diff --git a/apps/task-scheduler/.dockerignore b/apps/task-scheduler/.dockerignore new file mode 100644 index 0000000..f775fa9 --- /dev/null +++ b/apps/task-scheduler/.dockerignore @@ -0,0 +1,61 @@ +# T10:Docker build 時排除以下檔案,避免進 production image +# +# 重點: +# 1. .env / *.env — secret 不該進 image,由 docker-compose / secret manager 注入 +# 2. node_modules — Dockerfile 的 `npm ci` 會在 image 內重新安裝(production-only) +# 3. tests / fixtures — 測試檔不該進 production image,減少 attack surface 與 image size +# 4. IDE / VCS — .vscode, .idea, .git 都是開發工具產物 +# 5. Coverage / 暫存 — 任何 build artifact + +# === 環境變數 / 密鑰 === +.env +.env.* +!env.example + +# === Node === +node_modules +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.npm +.pnpm-store + +# === 測試 === +**/__tests__ +**/*.test.js +**/*.spec.js +coverage +.nyc_output +jest.config.js + +# === IDE / OS === +.vscode +.idea +.DS_Store +*.swp +*.swo +*~ + +# === Git === +.git +.gitignore +.gitattributes + +# === Docker(避免遞迴)=== +Dockerfile* +.dockerignore +docker-compose*.yml + +# === 文件(不需進 image)=== +README.md +CHANGELOG.md +LICENSE +docs + +# === 暫存 / build artifact === +*.log +*.pid +*.seed +dist +build +tmp diff --git a/apps/task-scheduler/Dockerfile b/apps/task-scheduler/Dockerfile index 6740ad4..9f8e728 100644 --- a/apps/task-scheduler/Dockerfile +++ b/apps/task-scheduler/Dockerfile @@ -1,23 +1,46 @@ +# Task Scheduler Dockerfile(Phase 1) +# +# 設計重點(T10 補強): +# 1. 用 node:18-alpine —— 最小化 image 大小(~150MB vs node:18 的 ~1GB) +# 2. 兩階段:先複製 package*.json + npm ci,再 COPY 其他檔,善用 Docker 層快取 +# 3. 只裝 production deps(--only=production / --omit=dev)—— jest / nodemon 不進 image +# 4. 非 root user 執行,降低 RCE 後的影響面 +# 5. .dockerignore 已排除 .env / tests / node_modules / IDE 設定 +# 6. HEALTHCHECK 對接 /health 端點(T8 已實作) +# 7. 環境變數透過 docker-compose / Kubernetes secret 注入,不在 image 內 + FROM node:18-alpine WORKDIR /app +# curl 只給 HEALTHCHECK 用;alpine 預設無 RUN apk add --no-cache curl +# === 第一層:依賴(變動較少,快取友善)=== +# 先 COPY package*.json,npm ci 後再 COPY 原始碼,避免改 src 就 invalidate npm install 層 COPY package*.json ./ -RUN npm ci --only=production +# --omit=dev 對齊新版 npm(替代 --only=production);jest / nodemon 等 devDependencies 不會被裝 +RUN npm ci --omit=dev && npm cache clean --force +# === 第二層:原始碼 === +# .dockerignore 已排除 .env / tests / __tests__ / node_modules,這裡 COPY . . 是安全的 COPY . . +# === 安全:非 root user === RUN addgroup -g 1001 -S appgroup && \ adduser -S appuser -u 1001 -G appgroup +# 建立 job data dir 並改 owner(worker / scheduler 共用 volume 用) RUN mkdir -p /data/jobs && chown -R appuser:appgroup /app /data/jobs + USER appuser EXPOSE 4000 -HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ +# Health check 對接 /health 端點(T8:含 redis / Member Center / FAA reachability) +HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \ CMD curl -f http://localhost:4000/health || exit 1 -CMD ["npm", "start"] +# 啟動:直接 node server.js(不用 npm start)以利 SIGTERM signal 直接送到 node +# - npm start 會 fork 一層,SIGTERM 不一定傳到 child,影響 graceful shutdown +CMD ["node", "server.js"] diff --git a/apps/task-scheduler/README.md b/apps/task-scheduler/README.md new file mode 100644 index 0000000..62408db --- /dev/null +++ b/apps/task-scheduler/README.md @@ -0,0 +1,498 @@ +# Task Scheduler — Kneron Model Converter Phase 1 + +Kneron Model Converter 的 Job 管理與 queue orchestration 服務。負責接收上游 +(visionA-backend / Web UI)的轉檔請求,協調 ONNX → BIE → NEF pipeline,並把成功 +的結果檔 promote 到 File Access Agent / NAS 模型庫。 + +> **Phase 1 對外 API 完整規格** → 見 `docs/openapi.yaml` + +--- + +## 1. 專案介紹 + +### 1.1 服務角色 + +``` + public Internet internal + ↓ +visionA-backend ─→ Nginx (443, public vhost) ─→ /api/v1/* ─→ task-scheduler ─→ Worker + │ +Web UI ─→ Nginx (80, internal vhost) ─→ /jobs ──┘ │ + ↓ + ONNX → BIE → NEF + ↓ + MinIO Bucket + ↓ + POST /api/v1/jobs/:id/promote + ↓ + File Access Agent + ↓ + NAS 模型庫 +``` + +task-scheduler 是 Phase 1 唯一暴露給上游的應用層元件,承擔: + +- 對外 API(**Phase 1 新增**):`/api/v1/*` 共 4 個端點 + 2 個 Phase 2 預留 +- 內部 API(**保留既有**):`/jobs/*` 共 6 個 legacy 端點(Web UI 用) +- 健康檢查:`/health`(公開) + +### 1.2 技術堆疊 + +| 層級 | 技術 | 版本 | +|------|------|------| +| 執行環境 | Node.js | 18+ (alpine image, 部署用) | +| Web framework | Express | 4.x | +| Queue | Redis Stream + ioredis | 5.x | +| 物件儲存 | MinIO(S3 compatible,AWS SDK v3) | latest | +| 認證 | OAuth 2.0 + JWT(jose) | jose 5.x | +| 上傳 | multer (memoryStorage) | 1.4.x | +| 速率限制 | express-rate-limit | 6.x | +| 安全 headers | helmet | 7.x | +| 測試 | Jest | 29.x | + +--- + +## 2. 前置需求 + +| 項目 | 版本 / 說明 | +|------|-----------| +| Node.js | 18+(fetch 原生支援、`duplex: 'half'`) | +| npm | 9+ | +| Docker / docker-compose(可選) | 24.x+ | +| Redis | 7.x(dev / prod 都需要) | +| MinIO | latest(POST /api/v1/jobs 必須啟用) | +| Member Center | OAuth 2.0 Authorization Server,提供 JWKS / token endpoint | +| File Access Agent | promote 階段呼叫,需支援 `PUT /files/{key}` | + +dev 環境若無真實 Member Center / FAA,可用 placeholder 值(見 `env.example`)。 + +--- + +## 3. 啟動方式 + +### 3.1 本機開發(純 Node) + +```bash +cd apps/task-scheduler +cp env.example .env +# 編輯 .env,至少把以下 placeholder 替換為真實值: +# - MEMBER_CENTER_*(若要實際打 Member Center) +# - KNERON_CONVERTER_CLIENT_SECRET +# - MINIO_*(若 STORAGE_BACKEND=minio) + +npm install +npm start +# → 監聽 PORT(預設 4000) +``` + +### 3.2 Docker 單體 + +```bash +docker build -t task-scheduler:dev apps/task-scheduler +docker run --rm --env-file apps/task-scheduler/.env -p 4000:4000 task-scheduler:dev +``` + +### 3.3 docker-compose(推薦) + +專案根目錄已有 `docker-compose.yml`,會一併啟動 Redis、MinIO、Workers、frontend: + +```bash +cd /path/to/kneron_model_converter +cp apps/task-scheduler/env.example .env # 或維護一份 root .env +docker compose up -d --build +``` + +服務埠對外: +- Scheduler API:`http://localhost:4000` +- Web UI:`http://localhost:3000` +- MinIO Console:`http://localhost:9001` + +### 3.4 Health check + +```bash +curl http://localhost:4000/health | jq . +``` + +回應為三層 status(healthy / degraded / unhealthy)+ 各依賴狀態, +詳見 [§ 7. 監控](#7-監控)。 + +### 3.5 Graceful shutdown + +服務監聽 `SIGTERM` / `SIGINT`:收到後會先停掉 health background polling, +再讓 Express 自然關閉。容器 / K8s 部署時 `terminationGracePeriodSeconds` +建議至少 30 秒。 + +--- + +## 4. 專案結構 + +``` +apps/task-scheduler/ +├── server.js ← entry(< 140 行;組裝 deps、啟動 listener、listen) +├── src/ +│ ├── app.js ← Express app factory +│ ├── config.js ← 集中讀 env,啟動時 fail-fast +│ ├── redis.js ← Redis client + helpers +│ ├── auth/ +│ │ ├── jwks.js ← jose remote JWKS cache + jwtVerify +│ │ ├── middleware.js ← requireAuth(scope) Express middleware +│ │ └── oauthClient.js ← Converter as OAuth Client(client_credentials) +│ ├── fileAccessAgent/ +│ │ ├── client.js ← FAA HTTP client(PUT only,重試 + 401 invalidate) +│ │ └── errors.js +│ ├── middleware/ +│ │ ├── errorHandler.js ← 統一 error 格式(v1 限定) +│ │ ├── requestId.js ← X-Request-Id 透傳 / 生成 +│ │ ├── perClientRateLimit.js ← per-client_id rate limiter +│ │ ├── upload.js ← multer 設定 +│ │ └── uploadConcurrency.js ← per-process upload semaphore(防 OOM) +│ ├── routes/ +│ │ ├── legacy.js ← /jobs* 6 個端點(Web UI 用) +│ │ └── v1/ +│ │ ├── index.js ← /api/v1 mount + 內部 errorHandler +│ │ ├── jobs.js ← POST/GET /jobs, GET /jobs/:id, 預留 501 +│ │ ├── promote.js ← POST /jobs/:id/promote +│ │ └── validators/ +│ │ └── createJob.js ← multipart fields validator +│ ├── services/ +│ │ ├── jobService.js ← Job CRUD + claim_active / advance / fail +│ │ ├── doneListener.js ← Redis Stream 背景 listener +│ │ ├── healthService.js ← /health 背景 polling cache +│ │ ├── statusMapper.js ← 內部大寫 status → 對外 status + stage +│ │ └── sseService.js ← SSE 推送(legacy) +│ ├── storage/ +│ │ ├── minio.js ← AWS SDK v3 S3 facade +│ │ └── local.js ← STORAGE_BACKEND=local 模式 +│ ├── redis/ +│ │ └── luaScripts.js ← claim_active_job / release_active_job +│ └── utils/ +│ └── sanitize.js ← filename / user_id / path 安全處理 +├── docs/ +│ └── openapi.yaml ← Phase 1 對外 API spec(給 visionA 等消費者) +├── tests/ ← 單元 + 整合測試(見 src/**/__tests__/) +├── package.json +├── Dockerfile ← 多層快取 + 非 root user + HEALTHCHECK +├── env.example ← 完整環境變數範本(不含真實 secret) +└── README.md ← 本檔 +``` + +--- + +## 5. 環境變數 + +完整清單(含預設、必填與否、說明)見 [`env.example`](./env.example)。 + +簡表(依分類): + +### 5.1 必填(缺漏會 fail-fast、process exit code 1) + +| 變數 | 用途 | +|------|------| +| `REDIS_URL` | Redis 連線(含 password) | +| `STORAGE_BACKEND` | `local` / `minio`;POST /api/v1/jobs 必須 `minio` | +| `MEMBER_CENTER_ISSUER` | JWT iss 比對基準 | +| `MEMBER_CENTER_JWKS_URL` | JWKS endpoint(驗 token 用) | +| `MEMBER_CENTER_TOKEN_URL` | token endpoint(取 promote 用 token) | +| `KNERON_CONVERTER_AUDIENCE` | 接受 JWT 的 aud | +| `KNERON_CONVERTER_CLIENT_ID` | Converter 自己 OAuth client | +| `KNERON_CONVERTER_CLIENT_SECRET` | **不要進 git;用 secret manager** | +| `FILE_ACCESS_AGENT_BASE_URL` | promote 目標;production 強制 https | +| `FILE_ACCESS_AGENT_AUDIENCE` | promote token 的 aud | + +`STORAGE_BACKEND=minio` 時還需:`MINIO_ENDPOINT_URL` / `MINIO_BUCKET` / +`MINIO_ACCESS_KEY` / `MINIO_SECRET_KEY`。 + +### 5.2 可選(有合理預設) + +涵蓋: + +- 上傳上限(`MULTIPART_MODEL_MAX_BYTES` 預設 500MB、`MULTIPART_REF_IMAGE_MAX_BYTES` + 預設 10MB、`MULTIPART_REF_IMAGES_MAX_COUNT` 預設 100) +- 上傳並發(`MAX_CONCURRENT_UPLOADS` 預設 5、`UPLOAD_RETRY_AFTER_SECONDS` 預設 30) +- Rate limit(`API_V1_RATE_LIMIT_WINDOW_MS` 預設 5min、`API_V1_RATE_LIMIT_MAX` 預設 300) +- JWKS 行為(`JWKS_CACHE_MAX_AGE_MS`、`JWKS_COOLDOWN_MS`、`JWT_CLOCK_TOLERANCE_SEC`) +- OAuth client(`OAUTH_TOKEN_REFRESH_SKEW_MS`、`OAUTH_TOKEN_TIMEOUT_MS`) +- promote timeout(`PROMOTE_TIMEOUT_MS` 預設 300s) +- Tenant 隔離(`CONVERTER_TENANT_ID`,空字串 = 不檢查) +- Scope 命名覆寫(`CONVERTER_SCOPE_WRITE` / `CONVERTER_SCOPE_READ`) + +### 5.3 安全提醒 + +- `.env` 已在 `.gitignore`;不要 commit +- production 用 secret manager(Vault / AWS Secrets Manager / K8s Secret), + 而不是把 secret 直接放進 docker-compose env +- 任何含 `REPLACE-ME` 字樣或 `.invalid` TLD 的 placeholder,**部署前必須替換** + +--- + +## 6. API 概覽 + +### 6.1 Phase 1 對外 API(`/api/v1/*`) + +| 方法 | 路徑 | scope | 說明 | +|------|------|-------|------| +| POST | `/api/v1/jobs` | `converter:job.write` | 建立轉檔 job(multipart) | +| GET | `/api/v1/jobs` | `converter:job.read` | Recovery 列表(user_id 必填) | +| GET | `/api/v1/jobs/:id` | `converter:job.read` | 單一 job 狀態(含 ETag) | +| POST | `/api/v1/jobs/:id/promote` | `converter:job.write` | 結果檔搬到 FAA | +| POST | `/api/v1/jobs/:id/download-tokens` | `converter:job.read` | **Phase 2 預留**,回 501 | +| DELETE | `/api/v1/jobs/:id` | `converter:job.write` | **Phase 2 預留**,回 501 | + +完整規格、所有 schema、所有錯誤情境的 example:見 [`docs/openapi.yaml`](./docs/openapi.yaml)。 + +### 6.2 Legacy / 內部 API(`/jobs/*`,僅內網 vhost 暴露) + +對 Web UI 100% 不變更行為(T4 重構僅是「移動 + 抽象」): + +| 方法 | 路徑 | 說明 | +|------|------|------| +| POST | `/jobs` | Web UI 上傳建 job(multipart,無 user_id 概念) | +| GET | `/jobs` | 列出全部 job(legacy KEYS scan) | +| GET | `/jobs/:jobId` | 查單一 job | +| GET | `/jobs/:jobId/events` | SSE 推送 | +| GET | `/jobs/:jobId/download/:filename` | 下載結果檔 | +| GET | `/queues/stats` | Redis Stream / Group 統計 | + +### 6.3 健康檢查 + +| 方法 | 路徑 | 說明 | +|------|------|------| +| GET | `/health` | 公開,不需認證 | + +--- + +## 7. Auth 流程 + +### 7.1 上游消費者(visionA-backend)取 token + +Converter 是 OAuth 2.0 Resource Server。建議消費者用 `client_credentials` +grant 從 Member Center 取得 service-to-service token: + +``` +POST {member-center}/oauth/token +Content-Type: application/x-www-form-urlencoded + +grant_type=client_credentials +&client_id= +&client_secret= +&scope=converter:job.write converter:job.read +&audience=kneron_converter_api +``` + +### 7.2 Converter 端驗證 + +每個 `/api/v1/*` request 進入時: + +1. Bearer token 驗章(`jose.createRemoteJWKSet` + `jwtVerify`) +2. `iss` / `aud` / `exp`(含 60 秒 clock skew) +3. `scope`(端點要求的 scope 必須在 token claim 內) +4. `tenant_id`(若 `CONVERTER_TENANT_ID` 非空則檢查) +5. `client_id`(用於 rate limit / log / job 隔離) + +驗證失敗時: + +- 回 v1 標準錯誤格式(`{error: {code, message, details, request_id}}`) +- **設 `Connection: close` header + `req.socket.destroy()`**:阻止 + unauthorized client 繼續灌大檔。但這是 best-effort;真正的 body 上限 + 靠 Nginx `client_max_body_size`(部署層) + +### 7.3 Converter 取 promote 用 token + +promote 時 Converter 切換成 OAuth Client,用 `client_credentials` 取 +`files:upload.write` scope token,PUT 到 FAA。 + +token cache per scope,過期前 60s 主動 refresh;FAA 回 401 時自動 +invalidate cache 並重試一次。 + +--- + +## 8. 錯誤碼總表 + +| HTTP | code | 說明 | +|------|------|------| +| 400 | `validation_error` | 欄位格式錯(`details.fields[]` 列具體欄位) | +| 400 | `invalid_multipart` | multipart parse 失敗、缺必要 file、副檔名不符 | +| 401 | `invalid_token` | JWT 無效 / 簽章錯 / 缺 claim | +| 401 | `token_expired` | JWT 過期 | +| 403 | `insufficient_scope` | scope 不足(`details.required_scope` / `provided_scopes`) | +| 403 | `tenant_mismatch` | tenant_id 不符 | +| 404 | `job_not_found` | job 不存在或不屬於該 client(不洩漏存在性) | +| 404 | `not_found` | 路徑不存在 | +| 409 | `user_has_active_job` | 同 user 已有未完成 job(`details.active_job_*`) | +| 409 | `job_not_ready_for_promote` | promote 時 job 非 completed | +| 409 | `source_not_available` | promote 的 source stage 沒產出 | +| 413 | `file_too_large` | 上傳超過大小上限(model 500MB / ref_image 10MB) | +| 422 | `invalid_object_key` | promote target_object_key 格式不合法 | +| 429 | `rate_limit_exceeded` | per-client rate limit | +| 500 | `misconfiguration` | 伺服器設定錯(如 STORAGE_BACKEND 非 minio) | +| 500 | `internal_error` | 其他未分類錯誤 | +| 501 | `not_implemented` | Phase 2 預留端點 | +| 502 | `storage_unavailable` | MinIO 寫入失敗 | +| 502 | `file_gateway_unavailable` | FAA 不可用 / 拒絕 | +| 503 | `auth_service_unavailable` | Member Center 取 token 失敗 | +| 503 | `service_busy` | upload concurrency 已滿(`Retry-After` header) | + +response 完整 schema 見 [`docs/openapi.yaml`](./docs/openapi.yaml#components/schemas/ApiError)。 + +--- + +## 9. 與其他服務的關係 + +| 服務 | 連接方式 | 用途 | 失敗影響 | +|------|---------|------|---------| +| Member Center | HTTPS | 驗 visionA token / 取 promote token | 新 token 無法驗(cache 內舊 token 仍可用);promote 階段失敗 | +| File Access Agent | HTTPS | promote 結果檔搬到 NAS | promote 失敗,但 job 本身已 completed,可重試 | +| MinIO | HTTP / HTTPS | 原始模型 / 結果檔暫存(7 天 lifecycle) | POST /jobs 直接 502,promote 也會失敗 | +| Redis | TCP | Job state、active_job lock、Stream queue | 整個服務 unhealthy | +| Worker(onnx / bie / nef) | Redis Stream | 跑 pipeline | Job 卡在某個 stage,TTL 7 天會自動清 | + +--- + +## 10. 監控 + +### 10.1 `/health` 的三層 status + +| status | HTTP | 對應狀態 | +|--------|------|---------| +| `healthy` | 200 | Redis / MC / FAA 都連通 | +| `degraded` | 200 | Redis 連通,但 MC / FAA 任一不可達 | +| `unhealthy` | 503 | Redis 斷線 | + +response body 同時包含 `dependencies.{redis, member_center, file_access_agent}` +細節,可給 K8s readiness / liveness probe 區分嚴重度。 + +### 10.2 結構化日誌 + +所有 v1 路徑的 handler 都輸出 JSON log(stdout): + +```json +{ + "service": "task-scheduler", + "timestamp": "2026-04-25T12:00:00.123Z", + "level": "INFO", + "action": "jobs.create.success", + "request_id": "7c6e4f3b-...", + "job_id": "550e8400-...", + "user_id": "alice", + "client_id": "kneron_converter_dev", + "size_bytes": 204800000, + "ref_images_count": 0, + "duration_ms": 234 +} +``` + +`action` 欄位採 `domain.event` 格式,便於用 jq / loki 過濾。 + +### 10.3 Rate limit headers + +回應自動帶: + +- `X-RateLimit-Limit` / `RateLimit-Limit` +- `X-RateLimit-Remaining` / `RateLimit-Remaining` +- 超限時:`Retry-After`(秒) + +--- + +## 11. Phase 1 已知接受風險 + +> 本節為摘要,完整內容見 [`.autoflow/04-architecture/security.md`](../../.autoflow/04-architecture/security.md)。 + +### 11.1 user_id 信任邊界(最重要) + +- `user_id` 來自 multipart form field(POST)或 query string(GET), + **不**從 JWT claim derive +- Converter 完全信任 visionA-backend 帶來的 user_id 是對的,**不做 user 層級 ACL** +- visionA-backend 一旦被 compromise,attacker 可冒充任何 user_id + +**Phase 1 接受此風險的理由**: + +1. visionA-backend 是內部受控系統,非 Internet-facing +2. Phase 1 重點是 pipeline 跑通;安全強化排在 Phase 2 +3. HMAC / OBO 流程要 visionA / Member Center 配合,已對齊但尚未實作 + +**Phase 1 mitigation**: + +- per-client_id rate limit(300 req / 5 min) +- 結構化 audit log 含 `client_id` + `user_id` +- 7 天 active_job TTL(避免 lock 永久不釋放) +- `user_id` 嚴格白名單(`^[A-Za-z0-9._-]{1,128}$`)擋 XSS / Redis key injection + +**Phase 2 候補**:HMAC-signed user_id(短期)/ OAuth Token Exchange(中期)。 + +### 11.2 大檔上傳的 OOM 風險 + +- multer 用 `memoryStorage` — 每個並發 upload 吃 model size 大小的 heap +- 5 並發 × 500MB = 2.5GB;`MAX_CONCURRENT_UPLOADS` 預設 5(4GB 容器安全) +- 超過時 503 + `Retry-After`,client 主動 backoff + +### 11.3 Trust boundary 與 Nginx 層 + +- 401/403 後 server 雖會 `socket.destroy()`,但這是 best-effort +- 真正的 body 大小上限由 Nginx vhost `client_max_body_size 600M` 把關 +- Nginx 雙 vhost 設定詳見 TDD §7.1(DevOps 範圍,非後端) + +### 11.4 Per-process state(Phase 2 才需處理) + +- rate limiter / upload concurrency 都是 in-process counter +- Phase 1 部署為單 instance,無問題;Phase 2 多 instance 時要改 Redis store + +--- + +## 12. 測試 + +```bash +npm test # 跑所有 unit + integration test(630 tests,~4 秒) +npm test -- --watch # watch 模式 +npm test -- src/auth # 只跑 auth 模組的測試 +``` + +測試金字塔: +- 單元測試(70%):service / validator / utils / middleware +- 整合測試(20%):route + middleware + Redis 模擬 / FAA mock +- E2E(10%):由 Testing Agent 跑(不在本套件內) + +CI 用:`npm test`。 + +--- + +## 13. 故障排除(常見場景) + +| 症狀 | 可能原因 | 排查 | +|------|---------|------| +| 啟動立刻 exit 1 | env 缺漏 | 看 `[Scheduler] Config validation failed` log;對照 `env.example` | +| 401 invalid_token / token_expired | clock skew、JWKS cache 沒拿到新 kid | 檢查 server 時鐘、`MEMBER_CENTER_JWKS_URL` 可達性 | +| 401 後 client 連線立刻斷 | 設計如此(`Connection: close` + `socket.destroy()`) | 正常行為,避免 client 繼續灌 body | +| 409 user_has_active_job 但前一個 job 已 failed | active_job lock 沒被釋放 | 看 worker done listener 是否運作;最壞情況 7 天 TTL 會自動清 | +| 502 storage_unavailable | MinIO 不可達 / 認證錯 | 檢查 `MINIO_*` env、bucket 是否存在 | +| 502 file_gateway_unavailable | FAA 5xx 或 4xx 拒絕(非 401) | 看 server log `promote.faa_put_failed`,FAA 端排查 | +| 503 auth_service_unavailable | Member Center token endpoint 死 / 401 兩次 | 確認 `MEMBER_CENTER_TOKEN_URL` 可達、`KNERON_CONVERTER_CLIENT_*` 對 | +| 503 service_busy + Retry-After | upload concurrency 已滿 | 等 Retry-After,或調高 `MAX_CONCURRENT_UPLOADS`(注意 OOM) | +| 503 unhealthy(/health) | Redis 斷線 | 檢查 `REDIS_URL` 與 Redis 服務狀態 | +| GET /jobs 回 400 missing user_id | Phase 1 強制 user_id 必填 | client 端帶 user_id query string | +| 大檔上傳跑到一半 5xx | Nginx `client_max_body_size` 太小 | 部署層調 `client_max_body_size 600M`(不在 backend 範圍) | + +更多細節: + +- `.autoflow/04-architecture/TDD.md`(完整規格) +- `.autoflow/04-architecture/security.md`(安全模型 / 接受風險) +- `.autoflow/05-implementation/tasks-phase1.md`(任務拆分與決策紀錄) + +--- + +## 14. 文件參照 + +| 文件 | 內容 | +|------|------| +| [`docs/openapi.yaml`](./docs/openapi.yaml) | Phase 1 對外 API spec(給 visionA-backend 等消費者 import) | +| [`env.example`](./env.example) | 完整環境變數清單(含說明、預設、必填與否) | +| `../../.autoflow/04-architecture/TDD.md` | 完整技術設計文件 | +| `../../.autoflow/04-architecture/security.md` | 安全模型 / 接受風險 / Phase 2 候補 | +| `../../.autoflow/04-architecture/design-doc.md` | 架構決策(為什麼選這些方案) | +| `../../.autoflow/02-prd/PRD.md` | 產品需求 / user stories | +| `../../.autoflow/05-implementation/tasks-phase1.md` | T1-T11 任務拆分與審查紀錄 | + +--- + +## 15. License + +MIT diff --git a/apps/task-scheduler/docs/openapi.yaml b/apps/task-scheduler/docs/openapi.yaml new file mode 100644 index 0000000..60c1d39 --- /dev/null +++ b/apps/task-scheduler/docs/openapi.yaml @@ -0,0 +1,1593 @@ +openapi: 3.0.3 + +# ============================================================================= +# Kneron Converter — Phase 1 對外 API +# +# 此 spec 對齊「實作端」(apps/task-scheduler/src/routes/v1/`), +# 與 TDD.md §1 在以下幾處刻意不一致(皆為實作端進化、TDD 尚未同步): +# +# 1. status 映射對外只用 `created` / `running` / `completed` / `failed` +# (TDD §2.7.1 仍提到內部大寫 ONNX/BIE/NEF;對外 spec 不暴露內部值) +# 2. GET /jobs 採 cursor-based 分頁(base64-url-encoded opaque string), +# 非 TDD §1.4.4 的 offset 欄位 +# 3. GET /jobs 列表回應使用 `{jobs, total, next_cursor}`,非 TDD 的 +# `{items, limit, offset, total}` +# 4. GET /jobs/:id 回應實作端會 strip 內部欄位 `created_by_client_id`, +# 不對外曝露 +# 5. model 副檔名只接受 `.onnx` / `.tflite`(PRD §4.1 F-01;TDD §1.4.2 +# 列了 6 種但 PRD 才是對 user-facing 的合約 — doc-review m6 已記錄) +# 6. ref_image per-file 上限 10MB(Sec C2)— TDD §1.4.2 沒明確此限制 +# 7. promote response 不含 part-failure 結構;採「全成功 200 / 任一失敗 502」 +# 模型(Phase 1 簡化決策,TDD §1.4.5 範例描述部分失敗為 502 + details, +# 實作端因 stream 模型難以原子化已改採全失敗 502 + classifyFaaError) +# +# 兼容說明:spec 為對外契約來源,當 TDD 與實作不一致時,以實作 + 此 spec 為準。 +# 預期下輪 TDD 維護會同步上述差異。 +# ============================================================================= + +info: + title: Kneron Converter API + description: | + Kneron Model Converter 對外 API(Phase 1)。 + + 本服務提供深度學習模型的轉檔服務(ONNX → BIE → NEF),目標是讓上游 + 應用(如 visionA-backend)能: + + 1. 上傳原始模型 + 參考圖片 → 建立轉檔 job + 2. Polling job 狀態,直到 `completed` 或 `failed` + 3. 把成功的結果檔 promote(推送)到 File Access Agent / NAS 模型庫 + + ## 認證 + + 所有 `/api/v1/*` 端點都需要 `Authorization: Bearer `,token + 需由 Innovedus Member Center 簽發、`aud=kneron_converter_api`,並含對應 scope。 + + Converter 是 OAuth 2.0 Resource Server。上游消費者建議用 + `client_credentials` grant 取得 service-to-service token。 + + ## user_id 與 trust boundary + + `user_id` 不是來自 JWT claim,而是 multipart form field(POST)或 + query string(GET)。Converter **完全信任**呼叫端帶來的 user_id 是 + 對的,不做 user 層級 ACL。 + + 這是 Phase 1 刻意接受的設計風險,詳見: + `.autoflow/04-architecture/security.md` § Trust Boundary。 + + ## 錯誤格式 + + 所有 4xx / 5xx 一律使用: + + ```json + { + "error": { + "code": "snake_case_code", + "message": "human readable zh-TW", + "details": { "..." }, + "request_id": "uuid-v4" + } + } + ``` + + `details` 欄位視 code 而定(schema 中各 example 已展示)。 + + ## Phase 1 已知限制 / 接受風險 + + - `user_id` 信任邊界:見 security.md + - 大檔上傳(500MB)依賴 Nginx `client_max_body_size`,後端 multer + 在 OAuth 驗證前已開始 buffer;建議 Nginx vhost 設 `client_max_body_size 600M` + - 單 Scheduler instance(per-process rate limiter / upload concurrency) + - Crash 即 Reset:未完成的 job 在 Scheduler / Worker crash 後不保證恢復 + + version: 1.0.0 + contact: + name: Kneron Converter Team + license: + name: MIT + +servers: + - url: https://your-converter.example.com + description: 部署範例 — 替換為實際 Converter public host + - url: http://localhost:4000 + description: 本地開發(無 Nginx 反代理) + +# ============================================================================= +# Tags +# ============================================================================= + +tags: + - name: Health + description: 服務健康檢查(公開、無需認證) + - name: Jobs + description: 轉檔 job 生命週期管理 + - name: Promote + description: 結果檔搬移到 File Access Agent + - name: Phase 2 (Reserved) + description: 預留路由,Phase 1 一律回 501 not_implemented + +# ============================================================================= +# 全域 security default:除標明 security: [] 外,所有 path 都需 Bearer JWT +# ============================================================================= + +security: + - BearerAuth: [] + +paths: + + # =========================================================================== + # Health + # =========================================================================== + + /health: + get: + tags: [Health] + summary: 健康檢查 + operationId: getHealth + description: | + 回傳服務與依賴(Redis / Member Center / File Access Agent)健康狀態。 + + - **公開**:不需要 Authorization + - **三層 status**: + - `healthy`:所有依賴連通 → HTTP 200 + - `degraded`:Redis 連通但 MC / FAA 任一不可達 → HTTP 200 + - `unhealthy`:Redis 不連通 → HTTP 503 + + MC / FAA 使用背景 polling(每 30 秒)+ cache,呼叫此端點本身永遠 < 5ms。 + 部署初期(first poll 完成前)狀態為 `pending`。 + security: [] + responses: + '200': + description: 服務健康(healthy 或 degraded) + content: + application/json: + schema: + $ref: '#/components/schemas/HealthSnapshot' + examples: + healthy: + summary: 所有依賴正常 + value: + service: task-scheduler + status: healthy + timestamp: '2026-04-25T12:00:00Z' + redis: connected + version: '1.0.0' + dependencies: + redis: connected + member_center: reachable + file_access_agent: reachable + degraded: + summary: Redis OK 但 FAA 不可達 + value: + service: task-scheduler + status: degraded + timestamp: '2026-04-25T12:00:00Z' + redis: connected + version: '1.0.0' + dependencies: + redis: connected + member_center: reachable + file_access_agent: unreachable + '503': + description: 服務不健康(Redis 斷線) + content: + application/json: + schema: + $ref: '#/components/schemas/HealthSnapshot' + examples: + unhealthy: + summary: Redis 斷線 + value: + service: task-scheduler + status: unhealthy + timestamp: '2026-04-25T12:00:00Z' + redis: disconnected + version: '1.0.0' + dependencies: + redis: disconnected + member_center: reachable + file_access_agent: reachable + + # =========================================================================== + # POST /api/v1/jobs — 建立轉檔 job + # =========================================================================== + + /api/v1/jobs: + post: + tags: [Jobs] + summary: 建立轉檔 job + operationId: createJob + description: | + 以 multipart/form-data 上傳原始模型(+ 可選的參考圖片)+ 參數, + 建立一個轉檔 job,後續會自動跑 ONNX → BIE → NEF pipeline。 + + ## 同 user 同時只能有一個進行中 job + + 若 `user_id` 已有未完成 job(`created` 或 `running` 狀態),會回 409 + `user_has_active_job` 並附上既有 job 詳情。 + + ## 大檔處理 + + - `model` 單檔 ≤ 500 MB(multer `LIMIT_FILE_SIZE`) + - `ref_images[]` 每張 ≤ 10 MB、共 ≤ 100 張 + - 部署層 Nginx vhost 應設 `client_max_body_size 600M` + - 建議 client 使用 chunked transfer + 連線重試 + + ## 寫入順序(M5 方案 A) + + 1. validate auth + multipart fields + 2. 先寫 MinIO(input + ref_images)— 失敗 → 502 storage_unavailable, + Redis 完全乾淨 + 3. Lua script 原子寫入 active_job lock + job record + user index + 4. enqueue 第一階段(onnx) + 5. 回 201 `created` + + security: + - BearerAuth: [converter:job.write] + parameters: + - $ref: '#/components/parameters/XRequestId' + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/CreateJobRequest' + encoding: + model: + contentType: application/octet-stream + 'ref_images[]': + contentType: image/jpeg, image/png, application/octet-stream + examples: + minimal: + summary: 最小必填欄位 + description: | + curl 範例: + + ``` + curl -X POST https://your-converter.example.com/api/v1/jobs \ + -H "Authorization: Bearer " \ + -F "model=@./model.onnx" \ + -F "user_id=alice" \ + -F "model_id=1001" \ + -F "version=v1.0.0" \ + -F "platform=520" + ``` + with_ref_images: + summary: 含 ref_images 與所有 enable flag + description: | + ``` + curl -X POST https://your-converter.example.com/api/v1/jobs \ + -H "Authorization: Bearer " \ + -F "model=@./model.onnx" \ + -F "ref_images[]=@./img_0.jpg" \ + -F "ref_images[]=@./img_1.jpg" \ + -F "user_id=alice" \ + -F "model_id=1001" \ + -F "version=v1.0.0" \ + -F "platform=520" \ + -F "enable_evaluate=true" \ + -F "enable_sim_fp=false" + ``` + responses: + '201': + description: Job 建立成功 + headers: + X-Request-Id: + $ref: '#/components/headers/XRequestId' + X-RateLimit-Limit: + $ref: '#/components/headers/XRateLimitLimit' + X-RateLimit-Remaining: + $ref: '#/components/headers/XRateLimitRemaining' + content: + application/json: + schema: + $ref: '#/components/schemas/CreateJobResponse' + examples: + created: + value: + job_id: '550e8400-e29b-41d4-a716-446655440000' + status: created + stage: onnx + progress: 0 + created_at: '2026-04-25T12:00:00Z' + expires_at: '2026-05-02T12:00:00Z' + user_id: alice + '400': + description: 欄位驗證失敗 / multipart 解析失敗 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + validation_error: + summary: model_id 不在範圍 + value: + error: + code: validation_error + message: 欄位驗證失敗 + details: + fields: + - field: model_id + message: model_id 範圍必須在 1 ~ 65535 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + invalid_multipart: + summary: 不是 multipart + value: + error: + code: invalid_multipart + message: multipart 解析失敗:LIMIT_UNEXPECTED_FILE + details: + code: LIMIT_UNEXPECTED_FILE + field: model + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '409': + description: 該 user_id 已有進行中 job + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + user_has_active_job: + value: + error: + code: user_has_active_job + message: 使用者目前已有進行中的轉檔任務 + details: + active_job_id: 550e8400-e29b-41d4-a716-446655440000 + active_job_status: running + active_job_stage: bie + active_job_progress: 45 + active_job_created_at: '2026-04-25T12:00:00Z' + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + '413': + description: 上傳檔案超過大小上限 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + model_too_large: + summary: model 檔超過 500MB + value: + error: + code: file_too_large + message: 上傳檔案超過 500MB 上限 + details: + field: model + limit_bytes: 524288000 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + ref_image_too_large: + summary: 單張 ref_image 超過 10MB + value: + error: + code: file_too_large + message: ref_image 超過單張 10485760 bytes 上限 + details: + field: 'ref_images[3]' + size_bytes: 12000000 + limit_bytes: 10485760 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + '429': + $ref: '#/components/responses/RateLimited' + '500': + description: 伺服器設定錯或其他內部錯誤 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + misconfiguration: + value: + error: + code: misconfiguration + message: POST /api/v1/jobs 需 STORAGE_BACKEND=minio + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + '502': + description: 物件儲存(MinIO)短暫無法寫入 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + storage_unavailable: + value: + error: + code: storage_unavailable + message: 檔案儲存服務暫時無法使用,請稍後重試 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + '503': + description: 並發 upload 超過 process semaphore 上限 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + service_busy: + value: + error: + code: service_busy + message: 系統繁忙中,請稍後重試 + details: + retry_after_seconds: 30 + max_concurrent: 5 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + + # ========================================================================= + # GET /api/v1/jobs — Recovery 列表(user_id 必填) + # ========================================================================= + + get: + tags: [Jobs] + summary: 列出 job(Recovery / polling) + operationId: listJobs + description: | + 列出指定 `user_id` 的 job 清單,支援 status 過濾與 cursor-based 分頁。 + + ## 為什麼 user_id 必填 + + Phase 1 強制 user_id 必填,避免「不帶條件的全掃」與資訊洩露。 + 未來若新增 admin scope 可再放寬(TDD §8.1 預留)。 + + ## 隔離 + + 列出的 job 一律自動以 token 內的 `client_id` 過濾 — 即同一 user_id 但 + 由不同 client 建立的 job 不會出現在結果中。 + + ## 分頁 + + 使用 base64-url-encoded opaque cursor。client 不該假設 cursor + 內容格式(未來可能改為 keyset)。當沒有更多資料時 `next_cursor: null`。 + security: + - BearerAuth: [converter:job.read] + parameters: + - name: user_id + in: query + required: true + description: | + 必填。1-128 字元,只允許 `^[A-Za-z0-9._-]+$`(嚴格白名單, + 擋 XSS / Redis key injection / log injection) + schema: + type: string + pattern: '^[A-Za-z0-9._-]{1,128}$' + example: alice + - name: status + in: query + required: false + description: | + 過濾條件: + + - `in_progress`:預設值,相當於 `created` ∪ `running` + - `completed` / `failed` / `all` + schema: + type: string + enum: [in_progress, completed, failed, all] + default: in_progress + - name: limit + in: query + required: false + description: 一頁筆數,1-50(預設 10) + schema: + type: integer + minimum: 1 + maximum: 50 + default: 10 + - name: cursor + in: query + required: false + description: | + base64-url-encoded opaque cursor。從上一頁的 `next_cursor` + 欄位取得。第一頁不要帶。 + schema: + type: string + example: eyJvZmZzZXQiOjEwfQ + - $ref: '#/components/parameters/XRequestId' + responses: + '200': + description: 查詢成功 + headers: + X-Request-Id: + $ref: '#/components/headers/XRequestId' + X-RateLimit-Limit: + $ref: '#/components/headers/XRateLimitLimit' + X-RateLimit-Remaining: + $ref: '#/components/headers/XRateLimitRemaining' + content: + application/json: + schema: + $ref: '#/components/schemas/ListJobsResponse' + examples: + with_results: + value: + jobs: + - job_id: 550e8400-e29b-41d4-a716-446655440000 + user_id: alice + status: running + stage: bie + progress: 45 + stage_progress: 60 + created_at: '2026-04-25T12:00:00Z' + updated_at: '2026-04-25T12:05:30Z' + expires_at: '2026-05-02T12:00:00Z' + stage_timings: + onnx: + started_at: '2026-04-25T12:00:00Z' + completed_at: '2026-04-25T12:02:10Z' + bie: + started_at: '2026-04-25T12:02:15Z' + completed_at: null + nef: + started_at: null + completed_at: null + input: + filename: model.onnx + object_key: jobs/550e8400-e29b-41d4-a716-446655440000/input/model.onnx + size_bytes: 204800000 + ref_images_count: 0 + result_object_keys: null + error: null + parameters: + model_id: 1001 + version: v1.0.0 + platform: '520' + enable_evaluate: false + enable_sim_fp: false + enable_sim_fixed: false + enable_sim_hw: false + metadata: {} + total: 1 + next_cursor: null + empty: + value: + jobs: [] + total: 0 + next_cursor: null + '400': + description: 查詢參數驗證失敗(user_id 缺漏、status 不在 enum 等) + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + missing_user_id: + value: + error: + code: validation_error + message: 查詢參數驗證失敗 + details: + fields: + - field: user_id + message: user_id 必填,1-128 字元,僅可包含英數字 / `.` / `_` / `-` + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '429': + $ref: '#/components/responses/RateLimited' + + # =========================================================================== + # GET /api/v1/jobs/:id — 單一 job 狀態 + ETag + # =========================================================================== + + /api/v1/jobs/{id}: + parameters: + - $ref: '#/components/parameters/JobIdPath' + get: + tags: [Jobs] + summary: 取得單一 job 狀態 + operationId: getJob + description: | + 取得單一 job 詳情。支援 ETag 304。 + + ## Client 隔離 + + 即使 jobId 真實存在,若 token 內 `client_id` 與 job 的 + `created_by_client_id` 不符,**一律回 404**(不洩漏存在性)。 + + ## ETag + + - response 帶 `ETag: W/""`(weak ETag) + - client 後續 polling 帶 `If-None-Match: ` → + 若 job 未變化回 304(無 body) + + security: + - BearerAuth: [converter:job.read] + parameters: + - name: If-None-Match + in: header + required: false + description: 上次 polling 拿到的 ETag。命中則回 304。 + schema: + type: string + example: W/"a1b2c3d4..." + - $ref: '#/components/parameters/XRequestId' + responses: + '200': + description: Job 詳情 + headers: + ETag: + schema: + type: string + description: weak ETag,hash(updated_at) + X-Request-Id: + $ref: '#/components/headers/XRequestId' + content: + application/json: + schema: + $ref: '#/components/schemas/Job' + examples: + running: + value: + job_id: 550e8400-e29b-41d4-a716-446655440000 + user_id: alice + status: running + stage: bie + progress: 45 + stage_progress: 60 + created_at: '2026-04-25T12:00:00Z' + updated_at: '2026-04-25T12:05:30Z' + expires_at: '2026-05-02T12:00:00Z' + stage_timings: + onnx: + started_at: '2026-04-25T12:00:00Z' + completed_at: '2026-04-25T12:02:10Z' + bie: + started_at: '2026-04-25T12:02:15Z' + completed_at: null + nef: + started_at: null + completed_at: null + input: + filename: model.onnx + object_key: jobs/550e8400-e29b-41d4-a716-446655440000/input/model.onnx + size_bytes: 204800000 + ref_images_count: 0 + result_object_keys: null + error: null + parameters: + model_id: 1001 + version: v1.0.0 + platform: '520' + enable_evaluate: false + enable_sim_fp: false + enable_sim_fixed: false + enable_sim_hw: false + metadata: {} + completed: + value: + job_id: 550e8400-e29b-41d4-a716-446655440000 + user_id: alice + status: completed + stage: null + progress: 100 + stage_progress: 100 + created_at: '2026-04-25T12:00:00Z' + updated_at: '2026-04-25T12:08:30Z' + expires_at: '2026-05-02T12:00:00Z' + stage_timings: + onnx: + started_at: '2026-04-25T12:00:00Z' + completed_at: '2026-04-25T12:02:10Z' + bie: + started_at: '2026-04-25T12:02:15Z' + completed_at: '2026-04-25T12:05:00Z' + nef: + started_at: '2026-04-25T12:05:05Z' + completed_at: '2026-04-25T12:08:30Z' + input: + filename: model.onnx + object_key: jobs/550e8400-e29b-41d4-a716-446655440000/input/model.onnx + size_bytes: 204800000 + ref_images_count: 0 + result_object_keys: + onnx: jobs/550e8400-e29b-41d4-a716-446655440000/output/model.onnx + bie: jobs/550e8400-e29b-41d4-a716-446655440000/output/model.bie + nef: jobs/550e8400-e29b-41d4-a716-446655440000/output/model.nef + error: null + parameters: + model_id: 1001 + version: v1.0.0 + platform: '520' + enable_evaluate: false + enable_sim_fp: false + enable_sim_fixed: false + enable_sim_hw: false + metadata: {} + failed: + value: + job_id: 550e8400-e29b-41d4-a716-446655440000 + user_id: alice + status: failed + stage: bie + progress: 33 + stage_progress: 0 + created_at: '2026-04-25T12:00:00Z' + updated_at: '2026-04-25T12:03:00Z' + expires_at: '2026-05-02T12:00:00Z' + stage_timings: + onnx: + started_at: '2026-04-25T12:00:00Z' + completed_at: '2026-04-25T12:02:10Z' + bie: + started_at: '2026-04-25T12:02:15Z' + completed_at: null + nef: + started_at: null + completed_at: null + input: + filename: model.onnx + object_key: jobs/550e8400-e29b-41d4-a716-446655440000/input/model.onnx + size_bytes: 204800000 + ref_images_count: 0 + result_object_keys: null + error: + stage: bie + code: quantization_failed + message: 參考圖片不足或格式不符,BIE 量化階段失敗 + parameters: + model_id: 1001 + version: v1.0.0 + platform: '520' + enable_evaluate: false + enable_sim_fp: false + enable_sim_fixed: false + enable_sim_hw: false + metadata: {} + '304': + description: ETag 命中,job 自上次取以來無變化 + headers: + ETag: + schema: + type: string + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/JobNotFound' + '429': + $ref: '#/components/responses/RateLimited' + + delete: + tags: [Phase 2 (Reserved)] + summary: '[Phase 2] 取消 / 刪除 job' + operationId: deleteJob + description: | + Phase 2 規劃的端點。Phase 1 一律回 501 `not_implemented`。 + deprecated: false + security: + - BearerAuth: [converter:job.write] + responses: + '501': + $ref: '#/components/responses/NotImplemented' + + # =========================================================================== + # POST /api/v1/jobs/:id/promote — 把結果檔搬到 FAA + # =========================================================================== + + /api/v1/jobs/{id}/promote: + parameters: + - $ref: '#/components/parameters/JobIdPath' + post: + tags: [Promote] + summary: 把成功結果檔 PUT 到 File Access Agent + operationId: promoteJob + description: | + 把已完成(`status=completed`)的 job 指定 stage 結果檔,stream PUT 到 + File Access Agent(NAS 模型庫)。 + + ## 冪等 + + - 若 job 已 promoted 過 → 直接回 200 + 既有 promoted_object_keys + (不重打 FAA、不重新讀 MinIO) + - 同 `target_object_key` 多次 PUT,FAA 端會覆蓋(client 安全重試) + + ## 序列執行 + + 多 target 在 server 端**序列**處理(避免對 FAA 並發壓力與 OOM)。 + + ## 重試策略 + + FAA 5xx / network timeout:server 端內部已重試最多 2 次(500ms / 2s + 指數退避)。client 不需要再重試 502 之外的 case。 + + FAA 401:server 自動 invalidate token + 重取一次。仍 401 → 503 + `auth_service_unavailable`。 + + security: + - BearerAuth: [converter:job.write] + parameters: + - $ref: '#/components/parameters/XRequestId' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/PromoteRequest' + examples: + single_target: + value: + targets: + - source: nef + target_object_key: visionA/models/alice/m-1001/v1.0.0/out.nef + multi_target: + value: + targets: + - source: bie + target_object_key: visionA/models/alice/m-1001/v1.0.0/out.bie + - source: nef + target_object_key: visionA/models/alice/m-1001/v1.0.0/out.nef + responses: + '200': + description: Promote 成功(或冪等命中) + headers: + X-Request-Id: + $ref: '#/components/headers/XRequestId' + content: + application/json: + schema: + $ref: '#/components/schemas/PromoteResponse' + examples: + success: + value: + job_id: 550e8400-e29b-41d4-a716-446655440000 + promoted: + - source: nef + target_object_key: visionA/models/alice/m-1001/v1.0.0/out.nef + size_bytes: 10485760 + file_access_agent_etag: 'abc123' + promoted_at: '2026-04-25T12:30:00Z' + '400': + description: targets 格式錯誤 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + empty_targets: + value: + error: + code: validation_error + message: targets 不可為空 + details: + fields: + - field: targets + message: must contain at least 1 item + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/JobNotFound' + '409': + description: Job 尚未完成或指定 source 沒有產出 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + not_ready: + value: + error: + code: job_not_ready_for_promote + message: Job 尚未完成,無法 promote + details: + current_status: ONNX + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + source_not_available: + value: + error: + code: source_not_available + message: Job 沒有 bie 階段的結果可 promote + details: + source: bie + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + '422': + description: target_object_key 格式不合法 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + invalid_object_key: + value: + error: + code: invalid_object_key + message: target_object_key 格式不合法 + details: + field: 'targets[0].target_object_key' + reason: 不可為空、不可含 .. / 反斜線 / 控制字元 / 開頭斜線 / ? / # / %;長度 ≤ 1024 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + '429': + $ref: '#/components/responses/RateLimited' + '502': + description: File Access Agent 不可用或拒絕請求 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + gateway_unavailable: + value: + error: + code: file_gateway_unavailable + message: 檔案存取服務暫時無法使用,請稍後重試 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + gateway_rejected: + summary: FAA 4xx 拒絕(如 target_object_key 命名違規) + value: + error: + code: file_gateway_unavailable + message: 檔案存取服務拒絕此請求 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + '503': + description: 認證服務無法簽發 promote 用 token + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + auth_unavailable: + value: + error: + code: auth_service_unavailable + message: 認證服務目前無法簽發必要 token,請稍後重試 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + + # =========================================================================== + # Phase 2 預留端點 — 一律 501 not_implemented + # =========================================================================== + + /api/v1/jobs/{id}/download-tokens: + parameters: + - $ref: '#/components/parameters/JobIdPath' + post: + tags: [Phase 2 (Reserved)] + summary: '[Phase 2] 換 delegated download token' + operationId: createDownloadTokens + description: | + Phase 2 規劃的端點,待 Member Center 完成 delegated token 流程後實作。 + Phase 1 一律回 501 `not_implemented`。 + deprecated: false + security: + - BearerAuth: [converter:job.read] + responses: + '501': + $ref: '#/components/responses/NotImplemented' + +# ============================================================================= +# Components +# ============================================================================= + +components: + + securitySchemes: + BearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + description: | + OAuth 2.0 Bearer JWT,由 Innovedus Member Center 簽發。 + + - `iss`:與 `MEMBER_CENTER_ISSUER` env 相符 + - `aud`:含 `kneron_converter_api`(或 env `KNERON_CONVERTER_AUDIENCE` 值) + - `exp`:未過期(含 60 秒 clock skew) + - `scope`:空白分隔字串,必須包含端點要求的 scope + - `client_id`:必須有(用於識別與 rate limit) + - `tenant_id`(可選):若 server 設了 `CONVERTER_TENANT_ID`,須吻合 + + 建議消費者使用 `client_credentials` grant(見下方 `OAuth2ClientCredentials` scheme, + 提供 SDK generator 自動處理 token 取得與 refresh)。 + + OAuth2ClientCredentials: + type: oauth2 + description: | + OAuth 2.0 client credentials grant — 用於服務間(VisionA → Converter)認證。 + Member Center 簽發 JWT,Converter 透過 JWKS 驗簽(`MEMBER_CENTER_JWKS_URL`)。 + + Token endpoint URL 由部署環境決定,請查 `MEMBER_CENTER_TOKEN_URL` env 或 ops 文件。 + flows: + clientCredentials: + tokenUrl: https://member-center.example.com/oauth/token + scopes: + 'converter:job.write': 建立 / 修改 job(POST /jobs, POST /jobs/:id/promote) + 'converter:job.read': 查詢 job(GET /jobs, GET /jobs/:id) + + parameters: + + XRequestId: + name: X-Request-Id + in: header + required: false + description: | + Trace ID。若 client 帶入則 server 沿用;未帶則 server 產 UUIDv4 並回給 client。 + schema: + type: string + format: uuid + example: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + + JobIdPath: + name: id + in: path + required: true + description: Job UUIDv4(建立時 server 生成) + schema: + type: string + format: uuid + example: 550e8400-e29b-41d4-a716-446655440000 + + headers: + + XRequestId: + schema: + type: string + format: uuid + description: 請求對應的 trace ID(與 X-Request-Id request header 相同) + + XRateLimitLimit: + schema: + type: integer + description: 該 client 在當前 window 的最大允許 request 數(預設 300) + + XRateLimitRemaining: + schema: + type: integer + description: 該 client 當前 window 還剩多少 request 額度 + + responses: + + Unauthorized: + description: Token 無效 / 過期 / 簽章錯 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + invalid_token: + value: + error: + code: invalid_token + message: Token 無效或已過期 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + token_expired: + value: + error: + code: token_expired + message: Token 已過期 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + + Forbidden: + description: scope 不足 / tenant 不符 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + insufficient_scope: + value: + error: + code: insufficient_scope + message: token 缺少必要權限 + details: + required_scope: converter:job.write + provided_scopes: [converter:job.read] + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + tenant_mismatch: + value: + error: + code: tenant_mismatch + message: tenant_id 與 Converter 配置不符 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + + JobNotFound: + description: | + Job 不存在 — 或存在但屬於不同 client_id(為避免存在性洩露,一律回此) + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + not_found: + value: + error: + code: job_not_found + message: Job 不存在 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + + RateLimited: + description: 超過 per-client_id rate limit + headers: + Retry-After: + schema: + type: integer + description: 多少秒後可再嘗試 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + rate_limited: + value: + error: + code: rate_limit_exceeded + message: 請求頻率過高,請稍後再試 + details: + retry_after_seconds: 30 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + + NotImplemented: + description: Phase 2 預留功能,Phase 1 不提供 + content: + application/json: + schema: + $ref: '#/components/schemas/ApiError' + examples: + not_implemented: + value: + error: + code: not_implemented + message: 此端點為 Phase 2 預留,尚未實作 + request_id: 7c6e4f3b-1a2b-4c3d-9e8f-aabbccddeeff + + schemas: + + # ------------------------------------------------------------------------- + # 共用:錯誤格式 + # ------------------------------------------------------------------------- + + ApiError: + type: object + required: [error] + properties: + error: + type: object + required: [code, message, request_id] + properties: + code: + type: string + description: | + 錯誤分類碼(snake_case)。完整清單見 README §Error Codes。 + enum: + - validation_error + - invalid_multipart + - invalid_token + - token_expired + - insufficient_scope + - tenant_mismatch + - job_not_found + - not_found + - user_has_active_job + - job_not_ready_for_promote + - source_not_available + - file_too_large + - invalid_object_key + - misconfiguration + - storage_unavailable + - file_gateway_unavailable + - auth_service_unavailable + - service_busy + - rate_limit_exceeded + - internal_error + - not_implemented + message: + type: string + description: 人類可讀的訊息(zh-TW) + details: + type: object + description: 視 code 而定的補充資訊;可能不存在 + additionalProperties: true + request_id: + type: string + description: 對應的 X-Request-Id(uuid) + format: uuid + + # ------------------------------------------------------------------------- + # /health + # ------------------------------------------------------------------------- + + HealthSnapshot: + type: object + required: [service, status, timestamp, redis, dependencies] + properties: + service: + type: string + enum: [task-scheduler] + status: + type: string + enum: [healthy, degraded, unhealthy] + timestamp: + type: string + format: date-time + redis: + type: string + description: 向後相容欄位(與 dependencies.redis 同值) + enum: [connected, disconnected] + version: + type: string + description: 服務版本 + example: '1.0.0' + dependencies: + type: object + required: [redis, member_center, file_access_agent] + properties: + redis: + type: string + enum: [connected, disconnected] + member_center: + type: string + enum: [reachable, unreachable, pending] + file_access_agent: + type: string + enum: [reachable, unreachable, pending] + + # ------------------------------------------------------------------------- + # POST /jobs request body + # ------------------------------------------------------------------------- + + CreateJobRequest: + type: object + required: [model, user_id, model_id, version, platform] + properties: + model: + type: string + format: binary + description: | + 原始模型檔(必填)。 + + - 副檔名只接受 `.onnx` / `.tflite`(PRD §4.4) + - 大小 ≤ 500 MB + - 不可為空 + ref_images[]: + type: array + description: | + 可選的參考圖片陣列(用於 BIE 校正)。 + + - 0 ~ 100 張 + - 每張 ≤ 10 MB + items: + type: string + format: binary + user_id: + type: string + description: | + **trust boundary**:由呼叫端決定,server 完全信任。 + + 限制:1-128 字元,`^[A-Za-z0-9._-]+$`,不可含 `..` + pattern: '^[A-Za-z0-9._-]{1,128}$' + example: alice + model_id: + type: string + description: 數字字串,整數值範圍 1 ~ 65535 + example: '1001' + version: + type: string + description: | + 版本識別。1-32 字元,`^[A-Za-z0-9._-]+$`(拒含 XSS / 控制字元) + pattern: '^[A-Za-z0-9._-]{1,32}$' + example: v1.0.0 + platform: + type: string + description: 目標 Kneron 平台 + enum: ['520', '720', '530', '630', '730'] + enable_evaluate: + type: string + description: 是否啟用 IP evaluation。`'true'` / `'false'`,缺漏視為 `'false'` + enum: ['true', 'false'] + default: 'false' + enable_sim_fp: + type: string + description: 是否執行浮點 E2E 模擬。`'true'` / `'false'` + enum: ['true', 'false'] + default: 'false' + enable_sim_fixed: + type: string + description: 是否執行定點 E2E 模擬。`'true'` / `'false'` + enum: ['true', 'false'] + default: 'false' + enable_sim_hw: + type: string + description: 是否執行硬體 E2E 模擬。`'true'` / `'false'` + enum: ['true', 'false'] + default: 'false' + metadata: + type: string + description: | + 可選;若有需為合法 JSON object 字串(不可為 array / null / primitive)。 + 未來擴展用,server 原樣保留。 + example: '{"source":"visionA-web"}' + + CreateJobResponse: + type: object + required: [job_id, status, stage, progress, created_at, expires_at, user_id] + properties: + job_id: + type: string + format: uuid + description: server 生成的 UUIDv4 + status: + type: string + enum: [created] + description: 建立成功時固定為 `created` + stage: + type: string + enum: [onnx] + description: 第一階段固定為 `onnx` + progress: + type: integer + enum: [0] + description: 建立成功時固定為 0 + created_at: + type: string + format: date-time + expires_at: + type: string + format: date-time + description: 建立後 7 天的時間戳,過期 Redis / MinIO 會清掉相關資料 + user_id: + type: string + + # ------------------------------------------------------------------------- + # Job 完整 schema(GET / list 共用) + # ------------------------------------------------------------------------- + + Job: + type: object + required: + - job_id + - user_id + - status + - stage + - progress + - stage_progress + - created_at + - updated_at + - expires_at + - stage_timings + - input + - result_object_keys + - error + - parameters + - metadata + properties: + job_id: + type: string + format: uuid + user_id: + type: string + nullable: true + status: + type: string + enum: [created, running, completed, failed] + description: | + 對外狀態: + + - `created`:剛建立,第一階段尚未開工 + - `running`:某個 stage 進行中(看 `stage` 欄位) + - `completed`:全部完成,`result_object_keys` 有值 + - `failed`:某階段失敗,`error` 有值 + stage: + type: string + nullable: true + enum: [onnx, bie, nef, null] + description: | + 當前進行中的 stage。`completed` 時為 `null`; + `failed` 時為失敗發生時的 stage。 + progress: + type: integer + minimum: 0 + maximum: 100 + description: 整體 pipeline 進度 0-100 + stage_progress: + type: integer + minimum: 0 + maximum: 100 + description: 當前 stage 內的進度(worker 上報;Phase 1 多為 0 或 100) + created_at: + type: string + format: date-time + updated_at: + type: string + format: date-time + expires_at: + type: string + format: date-time + stage_timings: + $ref: '#/components/schemas/StageTimings' + input: + type: object + nullable: true + required: [filename, object_key, size_bytes, ref_images_count] + properties: + filename: + type: string + nullable: true + description: 上傳當下的原始檔名(已 sanitize) + object_key: + type: string + nullable: true + description: Converter Bucket 內的物件 key(內部使用) + size_bytes: + type: integer + nullable: true + description: 模型檔大小(bytes) + ref_images_count: + type: integer + minimum: 0 + description: 參考圖片數量 + result_object_keys: + type: object + nullable: true + description: | + 僅 `status=completed` 時有值。各 stage 結果檔在 Converter Bucket + 內的 object key(後續 promote 用)。 + properties: + onnx: + type: string + bie: + type: string + nef: + type: string + error: + type: object + nullable: true + description: 僅 `status=failed` 時有值 + properties: + stage: + type: string + enum: [onnx, bie, nef] + code: + type: string + description: Worker 端的錯誤碼(如 `quantization_failed`) + message: + type: string + parameters: + type: object + required: [model_id, version, platform] + properties: + model_id: + type: integer + minimum: 1 + maximum: 65535 + version: + type: string + platform: + type: string + enum: ['520', '720', '530', '630', '730'] + enable_evaluate: + type: boolean + enable_sim_fp: + type: boolean + enable_sim_fixed: + type: boolean + enable_sim_hw: + type: boolean + metadata: + type: object + additionalProperties: true + description: client 提交時帶的 metadata,server 原樣保留 + + StageTimings: + type: object + description: | + 每個 stage 的開始與完成時間。 + + **Phase 1 限制**:`started_at` 是「Scheduler enqueue 該 stage 的時間」, + 非 worker 真的拿起任務的時間。worker 等待 queue 的時間會被算入。 + Phase 2 若需精確區分,由 worker 上報 `worker_started_at`。 + required: [onnx, bie, nef] + properties: + onnx: + $ref: '#/components/schemas/StageTiming' + bie: + $ref: '#/components/schemas/StageTiming' + nef: + $ref: '#/components/schemas/StageTiming' + + StageTiming: + type: object + nullable: true + properties: + started_at: + type: string + format: date-time + nullable: true + completed_at: + type: string + format: date-time + nullable: true + + # ------------------------------------------------------------------------- + # GET /jobs (list) + # ------------------------------------------------------------------------- + + ListJobsResponse: + type: object + required: [jobs, total, next_cursor] + properties: + jobs: + type: array + items: + $ref: '#/components/schemas/Job' + total: + type: integer + minimum: 0 + description: 過濾條件下的總筆數(不只是當頁) + next_cursor: + type: string + nullable: true + description: | + 下一頁的 opaque cursor(base64-url)。null 表示已是最後一頁。 + + # ------------------------------------------------------------------------- + # POST /jobs/:id/promote + # ------------------------------------------------------------------------- + + PromoteRequest: + type: object + required: [targets] + properties: + targets: + type: array + minItems: 1 + maxItems: 10 + description: | + 要 promote 的清單。每個 source 在同一個 request 中只能出現一次(重複會 400)。 + items: + $ref: '#/components/schemas/PromoteTarget' + + PromoteTarget: + type: object + required: [source, target_object_key] + properties: + source: + type: string + enum: [onnx, bie, nef] + description: 要從哪個 stage 結果取 + target_object_key: + type: string + maxLength: 1024 + description: | + File Access Agent 端的目標 key。caller(visionA)決定命名規則。 + + 禁止字元(會回 422 invalid_object_key): + - 空字串 + - 開頭 `/` + - `..`(path traversal) + - 反斜線 `\\`(Windows path) + - 控制字元 / null byte + - `?` `#` `%`(URL 結構字元 / 雙重編碼攻擊) + - 長度 > 1024 + example: visionA/models/alice/m-1001/v1.0.0/out.nef + + PromoteResponse: + type: object + required: [job_id, promoted] + properties: + job_id: + type: string + format: uuid + promoted: + type: array + items: + type: object + required: [source, target_object_key, size_bytes, file_access_agent_etag, promoted_at] + properties: + source: + type: string + enum: [onnx, bie, nef] + target_object_key: + type: string + size_bytes: + type: integer + description: PUT 到 FAA 的 bytes 數 + file_access_agent_etag: + type: string + nullable: true + description: FAA 回的 ETag(若有) + promoted_at: + type: string + format: date-time diff --git a/apps/task-scheduler/env.example b/apps/task-scheduler/env.example index 475d869..bc6331a 100644 --- a/apps/task-scheduler/env.example +++ b/apps/task-scheduler/env.example @@ -1,23 +1,200 @@ -# Task Scheduler Configuration +############################################################################### +# Task Scheduler 環境變數範本(Phase 1 完整版,T10 收斂) +# +# 三類分區(依顯示順序): +# 1. 必填(production 必須設真實值)— 缺漏會 fail-fast,process exit code 1 +# 2. 可選(合理預設)— 不設會用程式內 default +# 3. 開發 placeholder — 用 RFC 2606 `.invalid` TLD 確保不會誤連到真實服務 +# +# 部署準則: +# - 切勿 commit `.env`(已在 .gitignore;歷史 commit 待 D7 處理) +# - production 用 secret manager(Vault / AWS Secrets Manager),不要直接設環境變數 +# - 任何含 `REPLACE-ME` 字樣或 `.invalid` TLD 的值,部署前必須替換 +############################################################################### + + +# ============================================================================= +# 1. 應用基本設定 +# ============================================================================= + +# 監聽 port(必填,但有合理預設) PORT=4000 + +# Node 環境(development / staging / production) +# - production 時 FILE_ACCESS_AGENT_BASE_URL 強制 HTTPS NODE_ENV=development -# Redis +# Log 等級(debug / info / warn / error) +LOG_LEVEL=info + + +# ============================================================================= +# 2. Redis(必填) +# ============================================================================= +# - 不設會用 default,但實際部署需指向真實 Redis +# - 帶 password:redis://:password@host:6379 + REDIS_URL=redis://localhost:6379 -# Job data directory (shared volume with workers) + +# ============================================================================= +# 3. Job 資料目錄(local storage 用) +# ============================================================================= +# - STORAGE_BACKEND=local 時,此目錄為 worker / scheduler 共用 volume +# - STORAGE_BACKEND=minio 時,仍會用此目錄存暫時檔(如 health check) + JOB_DATA_DIR=/data/jobs -# Frontend URL (for CORS) + +# ============================================================================= +# 4. CORS(必填) +# ============================================================================= + FRONTEND_URL=http://localhost:3000 -# Storage backend: "local" (shared volume) or "minio" + +# ============================================================================= +# 5. Storage backend(必填) +# ============================================================================= +# - "local":用 JOB_DATA_DIR 共用 volume(單機開發 / docker-compose) +# - "minio":用 MinIO / S3-compatible(production 推薦;POST /api/v1/jobs 必須 minio) + STORAGE_BACKEND=local -# MinIO settings (only used when STORAGE_BACKEND=minio) + +# ============================================================================= +# 6. MinIO / S3 設定 +# ============================================================================= +# - STORAGE_BACKEND=minio 時為必填 +# - STORAGE_BACKEND=local 時可留空 +# - 注意:production 不要把真實 secret 寫在這裡,改用 secret manager + MINIO_ENDPOINT_URL=http://192.168.0.130:9000 MINIO_BUCKET=convertet-working-space MINIO_ACCESS_KEY=convuser -MINIO_SECRET_KEY=your-secret-here +MINIO_SECRET_KEY=REPLACE-ME-IN-PRODUCTION MINIO_REGION=us-east-1 +# bucket lifecycle(天)— 上傳後 N 天自動清,避免 orphan 累積 MINIO_LIFECYCLE_DAYS=7 + + +# ============================================================================= +# 7. OAuth / Member Center(必填) +# ============================================================================= +# +# ⚠️ 下方 `*.invalid` 主機名都是 RFC 2606 保留 TLD,DNS 永不解析。 +# 本地開發跑「不需 OAuth 的 legacy /jobs 流程」可直接照抄; +# production 部署前務必替換為真實 Member Center URL,否則 token 驗證 / 取得會 DNS 失敗。 +# +# 三組 URL 通常來自同一個 Member Center 服務: +# - ISSUER:JWT 的 iss claim 比對基準 +# - JWKS_URL:取公鑰用,做 JWT 簽章驗證 +# - TOKEN_URL:Converter 自己取 token 用(client_credentials grant) + +MEMBER_CENTER_ISSUER=https://auth.example.invalid +MEMBER_CENTER_JWKS_URL=https://auth.example.invalid/.well-known/jwks +MEMBER_CENTER_TOKEN_URL=https://auth.example.invalid/oauth/token + + +# ============================================================================= +# 8. Converter 身份(必填) +# ============================================================================= +# +# Converter 同時是: +# - Resource Server:接收 visionA-backend 的 token,audience 必須為 KNERON_CONVERTER_AUDIENCE +# - OAuth Client:自己去 Member Center 取 token 打 File Access Agent;身份用 client_id / secret + +KNERON_CONVERTER_AUDIENCE=kneron_converter_api +KNERON_CONVERTER_CLIENT_ID=kneron_converter_dev +KNERON_CONVERTER_CLIENT_SECRET=REPLACE-ME-IN-PRODUCTION + +# 若需 tenant 隔離,設此值;空字串代表不檢查 tenant claim +CONVERTER_TENANT_ID= + + +# ============================================================================= +# 9. Scope 命名(可選,預設對齊 TDD §8) +# ============================================================================= +# 通常不需改;除非 Member Center 端命名不一樣 + +# CONVERTER_SCOPE_WRITE=converter:job.write +# CONVERTER_SCOPE_READ=converter:job.read + + +# ============================================================================= +# 10. File Access Agent(必填) +# ============================================================================= +# +# Promote 時 Converter 把產出 stream PUT 到 FAA。 +# - URL 必須是合法 http(s) URL;NODE_ENV=production 強制 https +# - 本地開發可用 placeholder(.invalid TLD),不影響非 promote 流程 + +FILE_ACCESS_AGENT_BASE_URL=https://files.example.invalid +FILE_ACCESS_AGENT_AUDIENCE=file_access_api + + +# ============================================================================= +# 11. Promote 行為(可選) +# ============================================================================= +# 單檔 PUT timeout(毫秒)。預設 300000(300s,覆蓋 500MB @ 5MB/s 最壞)。 +# 部署環境檔案普遍較小可調低;GB 級檔案可調高。 + +# PROMOTE_TIMEOUT_MS=300000 + + +# ============================================================================= +# 12. JWKS / JWT 行為(可選) +# ============================================================================= +# 預設值對齊 TDD §5.1。 + +# JWKS_CACHE_MAX_AGE_MS=600000 # JWKS cache 有效期(10 分鐘) +# JWKS_COOLDOWN_MS=30000 # 同 kid 連續 miss 的 cooldown(30 秒) +# JWT_CLOCK_TOLERANCE_SEC=60 # 時鐘偏差容忍(秒) + + +# ============================================================================= +# 13. OAuth Client cache(可選) +# ============================================================================= + +# OAUTH_TOKEN_REFRESH_SKEW_MS=60000 # token 距 expiresAt 還剩多少 ms 主動 refresh +# OAUTH_TOKEN_TIMEOUT_MS=10000 # 取 token timeout(10s) + + +# ============================================================================= +# 14. Multipart 上傳上限(可選,T10 修 D5) +# ============================================================================= +# +# 為什麼用 env: +# 不同部署環境記憶體配額差異大(dev 容器 2GB / prod 16GB),固定 500MB 不夠彈性。 +# 調這些值不需改原始碼。 +# +# 三個值都必須 > 0;非法值會 fail-fast。 + +# MULTIPART_MODEL_MAX_BYTES=524288000 # 500MB(model 檔案上限) +# MULTIPART_REF_IMAGE_MAX_BYTES=10485760 # 10MB(單張 ref_image 上限) +# MULTIPART_REF_IMAGES_MAX_COUNT=100 # ref_images 張數上限 + + +# ============================================================================= +# 15. Upload concurrency(可選,T10 修 D5) +# ============================================================================= +# +# 為什麼需要: +# multer memoryStorage 把整份 multipart load 進 buffer,每個並發 upload 吃掉 +# model size 大小的 heap。5 並發 × 500MB ≈ 2.5GB heap,4GB 容器有風險。 +# per-process counter 限制同時間 multipart parse + handler 進行中的請求數量。 +# +# 超過上限時:直接 503 service_busy + Retry-After header(不 queue),讓 client 主動 backoff。 + +# MAX_CONCURRENT_UPLOADS=5 # 同時間最多 5 個 upload 進行中 +# UPLOAD_RETRY_AFTER_SECONDS=30 # 503 response 的 Retry-After(秒) + + +# ============================================================================= +# 16. Per-client_id rate limit(可選,T3 起) +# ============================================================================= +# 對 /api/v1/* 套用,window 內每個 client_id 最多 max 個 request。 +# 預設 5min / 300 req(對齊 TDD §1.1)。 + +# API_V1_RATE_LIMIT_WINDOW_MS=300000 +# API_V1_RATE_LIMIT_MAX=300 diff --git a/apps/task-scheduler/package-lock.json b/apps/task-scheduler/package-lock.json index 5d335d6..f3ebd95 100644 --- a/apps/task-scheduler/package-lock.json +++ b/apps/task-scheduler/package-lock.json @@ -17,6 +17,7 @@ "express-rate-limit": "^6.10.0", "helmet": "^7.0.0", "ioredis": "^5.3.2", + "jose": "^5.10.0", "morgan": "^1.10.0", "multer": "^1.4.5-lts.1", "uuid": "^9.0.0" @@ -5062,6 +5063,15 @@ "url": "https://github.com/chalk/supports-color?sponsor=1" } }, + "node_modules/jose": { + "version": "5.10.0", + "resolved": "https://registry.npmjs.org/jose/-/jose-5.10.0.tgz", + "integrity": "sha512-s+3Al/p9g32Iq+oqXxkW//7jk2Vig6FF1CFqzVXoTUXt2qz89YWbL+OwS17NFYEvxC35n0FKeGO2LGYSxeM2Gg==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", diff --git a/apps/task-scheduler/package.json b/apps/task-scheduler/package.json index 91dcac8..07574e0 100644 --- a/apps/task-scheduler/package.json +++ b/apps/task-scheduler/package.json @@ -9,21 +9,22 @@ "test": "jest" }, "dependencies": { - "express": "^4.18.2", - "cors": "^2.8.5", - "multer": "^1.4.5-lts.1", - "ioredis": "^5.3.2", - "uuid": "^9.0.0", - "dotenv": "^16.3.1", - "helmet": "^7.0.0", - "express-rate-limit": "^6.10.0", - "morgan": "^1.10.0", + "@aws-sdk/client-s3": "^3.400.0", "compression": "^1.7.4", - "@aws-sdk/client-s3": "^3.400.0" + "cors": "^2.8.5", + "dotenv": "^16.3.1", + "express": "^4.18.2", + "express-rate-limit": "^6.10.0", + "helmet": "^7.0.0", + "ioredis": "^5.3.2", + "jose": "^5.10.0", + "morgan": "^1.10.0", + "multer": "^1.4.5-lts.1", + "uuid": "^9.0.0" }, "devDependencies": { - "nodemon": "^3.0.1", - "jest": "^29.6.2" + "jest": "^29.6.2", + "nodemon": "^3.0.1" }, "keywords": [ "kneron", diff --git a/apps/task-scheduler/server.js b/apps/task-scheduler/server.js index 7534788..b1651d8 100644 --- a/apps/task-scheduler/server.js +++ b/apps/task-scheduler/server.js @@ -1,641 +1,133 @@ /** - * Kneron Toolchain Task Scheduler + * Kneron Toolchain Task Scheduler — entry point * * 職責: - * 1. REST API — 建立 job、查詢狀態、上傳檔案、下載結果 - * 2. Job State — 透過 Redis Hash 管理 job 生命週期 - * 3. Queue 調度 — 透過 Redis Stream 派工給 Worker - * 4. Done 監聽 — 接收 Worker 完成事件,推進到下一階段 - * 5. SSE — 即時推送 job 狀態給前端 + * 1. 啟動時 fail-fast 驗證 config(修 D3 — T1-deviations.md) + * 2. 建立各層 dependency(redis / minio / sseService / jobService) + * 3. 組裝 Express app,mount legacy 路由 + * 4. 在背景啟動 done queue listener + * 5. listen port + * + * **本檔不應再寫業務邏輯**。所有路由 / service / storage 細節都在 src/ 下。 + * + * 重構說明(T4): + * src/redis.js — Redis client 與 helper + * src/storage/minio.js — MinIO facade + * src/storage/local.js — local volume helper + * src/services/sseService.js — SSE client 管理 + * src/services/jobService.js — Job CRUD / advance / fail + * src/services/doneListener.js— done queue 背景監聽 + * src/middleware/upload.js — multer 上傳設定 + * src/routes/legacy.js — 既有 7 個路由 + * src/app.js — Express app 組裝 + * + * 既有 /jobs* 端點行為**完全不變**(byte-for-byte,除時間戳)。 + * D3 修復:本檔在 require 階段即呼叫 loadConfig() — 必填 env 缺漏會 throw 並 exit(1)。 */ -const express = require('express'); -const cors = require('cors'); -const multer = require('multer'); -const helmet = require('helmet'); -const rateLimit = require('express-rate-limit'); -const morgan = require('morgan'); -const compression = require('compression'); -const { v4: uuidv4 } = require('uuid'); -const Redis = require('ioredis'); -const path = require('path'); -const fs = require('fs'); -const { S3Client, PutObjectCommand, GetObjectCommand } = require('@aws-sdk/client-s3'); +'use strict'; + +/* eslint-disable no-console */ + require('dotenv').config(); -// --------------------------------------------------------------------------- -// Config -// --------------------------------------------------------------------------- +const { loadConfig } = require('./src/config'); +const { createClients } = require('./src/redis'); +const { createMinioFacade } = require('./src/storage/minio'); +const { createSseService } = require('./src/services/sseService'); +const { createJobService, STAGES } = require('./src/services/jobService'); +const { ensureWorkerGroups, startListenDone } = require('./src/services/doneListener'); +const { createUploader } = require('./src/middleware/upload'); +const { createHealthService } = require('./src/services/healthService'); +const { createApp } = require('./src/app'); + +// D3 fail-fast:缺必填 env 即 process.exit(1) +let config; +try { + config = loadConfig(); +} catch (err) { + console.error('[Scheduler] Config validation failed:', err.message); + process.exit(1); +} + +// 既有 env — 待後續整合到 config.js const PORT = process.env.PORT || 4000; const REDIS_URL = process.env.REDIS_URL || 'redis://localhost:6379'; const JOB_DATA_DIR = process.env.JOB_DATA_DIR || '/data/jobs'; -const FRONTEND_URL = process.env.FRONTEND_URL || 'http://localhost:3000'; - -// MinIO config const STORAGE_BACKEND = process.env.STORAGE_BACKEND || 'local'; -const MINIO_ENDPOINT = process.env.MINIO_ENDPOINT_URL || 'http://192.168.0.130:9000'; -const MINIO_BUCKET = process.env.MINIO_BUCKET || 'convertet-working-space'; -const MINIO_ACCESS_KEY = process.env.MINIO_ACCESS_KEY || 'convuser'; -const MINIO_SECRET_KEY = process.env.MINIO_SECRET_KEY || ''; -const MINIO_REGION = process.env.MINIO_REGION || 'us-east-1'; -let minio = null; -if (STORAGE_BACKEND === 'minio') { - minio = new S3Client({ - endpoint: MINIO_ENDPOINT, - region: MINIO_REGION, - credentials: { - accessKeyId: MINIO_ACCESS_KEY, - secretAccessKey: MINIO_SECRET_KEY, - }, - forcePathStyle: true, // Required for MinIO - }); - console.log(`[Scheduler] MinIO storage enabled: ${MINIO_ENDPOINT}/${MINIO_BUCKET}`); +// 依賴組裝 +const { redis, redisSub } = createClients(REDIS_URL); +const minio = createMinioFacade(); +if (minio.client) { + console.log(`[Scheduler] MinIO storage enabled: ${minio.endpoint}/${minio.bucket}`); } +const sseService = createSseService(); +const jobService = createJobService({ redis, sseService, jobDataDir: JOB_DATA_DIR }); -async function uploadToMinIO(key, body, contentType) { - if (!minio) return; - await minio.send(new PutObjectCommand({ - Bucket: MINIO_BUCKET, - Key: key, - Body: body, - ContentType: contentType, - })); -} - -async function getFromMinIO(key) { - if (!minio) return null; - const response = await minio.send(new GetObjectCommand({ - Bucket: MINIO_BUCKET, - Key: key, - })); - // Convert Body to Buffer (AWS SDK v3 Body is a web stream in Node 18) - const chunks = []; - for await (const chunk of response.Body) { - chunks.push(chunk); - } - return { - body: Buffer.concat(chunks), - contentLength: response.ContentLength, - }; -} - -// Pipeline: fixed stage order -const STAGES = ['onnx', 'bie', 'nef']; -const STAGE_QUEUES = { - onnx: 'queue:onnx', - bie: 'queue:bie', - nef: 'queue:nef', -}; -const DONE_QUEUE = 'queue:done'; -const DONE_GROUP = 'scheduler'; - -// --------------------------------------------------------------------------- -// Redis clients (one for commands, one for blocking reads) -// --------------------------------------------------------------------------- -const redis = new Redis(REDIS_URL); -const redisSub = new Redis(REDIS_URL); - -redis.on('error', (err) => console.error('Redis error:', err)); -redisSub.on('error', (err) => console.error('Redis subscriber error:', err)); - -// --------------------------------------------------------------------------- -// Express setup -// --------------------------------------------------------------------------- -const app = express(); - -app.use(helmet()); -app.use(compression()); -app.use(morgan('short')); -app.use(cors({ origin: FRONTEND_URL, credentials: true })); - -const limiter = rateLimit({ - windowMs: 15 * 60 * 1000, - max: 200, - message: 'Too many requests, please try again later.', +// T10:multer uploader 從 config 取上限(修 D5) +// - maxFileSize = MULTIPART_MODEL_MAX_BYTES(預設 500MB) +// - maxRefImages = MULTIPART_REF_IMAGES_MAX_COUNT(預設 100) +// ref_image per-file 10MB 上限由 validator 用 config.multipart.refImageMaxBytes 把關 +const uploader = createUploader({ + maxFileSize: config.multipart.modelMaxBytes, + maxRefImages: config.multipart.refImagesMaxCount, }); -app.use('/api', limiter); +// T8:建立 healthService(不在這裡 start,等 listenDoneQueue 起來後再 start) +const healthService = createHealthService({ redis, config }); +const app = createApp( + { redis, jobService, sseService, minio, uploader, healthService }, + { config, storageBackend: STORAGE_BACKEND } +); -app.use(express.json({ limit: '10mb' })); -app.use(express.urlencoded({ extended: true, limit: '10mb' })); - -// File upload — store to job directory -const upload = multer({ - storage: multer.memoryStorage(), - limits: { fileSize: 500 * 1024 * 1024 }, // 500 MB -}); - -// --------------------------------------------------------------------------- -// SSE: keep track of connected clients per job_id -// --------------------------------------------------------------------------- -const sseClients = new Map(); // job_id -> Set - -function sendSSE(jobId, data) { - const clients = sseClients.get(jobId); - if (!clients) return; - const payload = `data: ${JSON.stringify(data)}\n\n`; - for (const res of clients) { - res.write(payload); - } -} - -// --------------------------------------------------------------------------- -// Helper: get / set job record in Redis -// --------------------------------------------------------------------------- -async function getJob(jobId) { - const raw = await redis.get(`job:${jobId}`); - if (!raw) return null; - return JSON.parse(raw); -} - -async function setJob(jobId, job) { - job.updated_at = new Date().toISOString(); - await redis.set(`job:${jobId}`, JSON.stringify(job)); - // Notify SSE clients - sendSSE(jobId, job); -} - -// --------------------------------------------------------------------------- -// Helper: enqueue a task to a stage queue -// --------------------------------------------------------------------------- -async function enqueueStage(stage, job) { - const queue = STAGE_QUEUES[stage]; - const message = { - job_id: job.job_id, - created_at: job.created_at, - input_dir: path.join(JOB_DATA_DIR, job.job_id), - parameters: job.parameters || {}, - }; - await redis.xadd(queue, '*', 'data', JSON.stringify(message)); - console.log(`[Scheduler] Enqueued job ${job.job_id} to ${queue}`); -} - -// --------------------------------------------------------------------------- -// Helper: advance job to next stage or mark completed -// --------------------------------------------------------------------------- -async function advanceJob(jobId, completedStage) { - const job = await getJob(jobId); - if (!job) { - console.warn(`[Scheduler] Job ${jobId} not found, ignoring done event`); - return; - } - - const currentIndex = STAGES.indexOf(completedStage); - if (currentIndex < 0) { - console.warn(`[Scheduler] Unknown stage: ${completedStage}`); - return; - } - - const nextIndex = currentIndex + 1; - - if (nextIndex < STAGES.length) { - // Advance to next stage - const nextStage = STAGES[nextIndex]; - job.status = nextStage.toUpperCase(); - job.stage = nextStage; - job.progress = Math.round(((nextIndex) / STAGES.length) * 100); - await setJob(jobId, job); - await enqueueStage(nextStage, job); - } else { - // All stages completed - job.status = 'COMPLETED'; - job.stage = null; - job.progress = 100; - await setJob(jobId, job); - console.log(`[Scheduler] Job ${jobId} COMPLETED`); - } -} - -// --------------------------------------------------------------------------- -// Helper: mark job as failed -// --------------------------------------------------------------------------- -async function failJob(jobId, step, reason) { - const job = await getJob(jobId); - if (!job) return; - - job.status = 'FAILED'; - job.error = { step, reason }; - await setJob(jobId, job); - console.log(`[Scheduler] Job ${jobId} FAILED at ${step}: ${reason}`); -} - -// --------------------------------------------------------------------------- -// Done queue listener — runs in background -// --------------------------------------------------------------------------- -async function ensureConsumerGroup(queue, group) { - try { - await redis.xgroup('CREATE', queue, group, '0', 'MKSTREAM'); - } catch (err) { - // Group already exists — OK - if (!err.message.includes('BUSYGROUP')) throw err; - } -} - -async function listenDoneQueue() { - const consumerName = `scheduler-${process.pid}`; - await ensureConsumerGroup(DONE_QUEUE, DONE_GROUP); - - console.log(`[Scheduler] Listening on ${DONE_QUEUE} as ${consumerName}`); - - while (true) { - try { - const results = await redisSub.xreadgroup( - 'GROUP', DONE_GROUP, consumerName, - 'COUNT', 10, - 'BLOCK', 5000, - 'STREAMS', DONE_QUEUE, '>' - ); - - if (!results) continue; - - for (const [, messages] of results) { - for (const [messageId, fields] of messages) { - try { - const data = JSON.parse(fields[1]); // fields = ['data', '{...}'] - const { job_id, step, result, reason } = data; - - console.log(`[Scheduler] Done event: job=${job_id} step=${step} result=${result}`); - - if (result === 'ok') { - await advanceJob(job_id, step); - } else { - await failJob(job_id, step, reason || 'Unknown error'); - } - - // ACK the message - await redisSub.xack(DONE_QUEUE, DONE_GROUP, messageId); - } catch (err) { - console.error('[Scheduler] Error processing done event:', err); - } - } - } - } catch (err) { - if (err.message.includes('Connection is closed')) { - console.error('[Scheduler] Redis connection lost, retrying in 3s...'); - await new Promise((r) => setTimeout(r, 3000)); - } else { - console.error('[Scheduler] Done listener error:', err); - await new Promise((r) => setTimeout(r, 1000)); - } - } - } -} - -// --------------------------------------------------------------------------- -// Ensure worker queue consumer groups exist on startup -// --------------------------------------------------------------------------- -async function ensureWorkerGroups() { - const groups = { - 'queue:onnx': 'onnx-workers', - 'queue:bie': 'bie-workers', - 'queue:nef': 'nef-workers', - }; - for (const [queue, group] of Object.entries(groups)) { - await ensureConsumerGroup(queue, group); - } -} - -// --------------------------------------------------------------------------- -// API Routes -// --------------------------------------------------------------------------- - -// Health check -app.get('/health', async (req, res) => { - try { - await redis.ping(); - res.json({ - service: 'task-scheduler', - status: 'healthy', - timestamp: new Date().toISOString(), - redis: 'connected', - }); - } catch { - res.status(503).json({ - service: 'task-scheduler', - status: 'unhealthy', - redis: 'disconnected', - }); - } -}); - -// POST /jobs — Create a new job -app.post('/jobs', upload.fields([ - { name: 'model', maxCount: 1 }, - { name: 'ref_images', maxCount: 100 }, -]), async (req, res) => { - try { - // Validate required fields - const { model_id, version, platform } = req.body; - if (!model_id || !version || !platform) { - return res.status(400).json({ error: 'model_id, version, platform are required' }); - } - if (!req.files || !req.files.model || req.files.model.length === 0) { - return res.status(400).json({ error: 'model file is required' }); - } - - const jobId = uuidv4(); - - if (minio) { - // S3 mode: upload files to MinIO - const modelFile = req.files.model[0]; - const s3Prefix = `jobs/${jobId}`; - await uploadToMinIO( - `${s3Prefix}/input/${modelFile.originalname}`, - modelFile.buffer, - modelFile.mimetype || 'application/octet-stream', - ); - - if (req.files.ref_images) { - for (const img of req.files.ref_images) { - await uploadToMinIO( - `${s3Prefix}/input/ref_images/${img.originalname}`, - img.buffer, - img.mimetype || 'image/jpeg', - ); - } - } - - console.log(`[Scheduler] Uploaded job ${jobId} files to MinIO`); - } else { - // Local mode: write to shared volume - const jobDir = path.join(JOB_DATA_DIR, jobId); - const inputDir = path.join(jobDir, 'input'); - const refImagesDir = path.join(inputDir, 'ref_images'); - const logsDir = path.join(jobDir, 'logs'); - - fs.mkdirSync(inputDir, { recursive: true }); - fs.mkdirSync(refImagesDir, { recursive: true }); - fs.mkdirSync(logsDir, { recursive: true }); - - const modelFile = req.files.model[0]; - const modelPath = path.join(inputDir, modelFile.originalname); - fs.writeFileSync(modelPath, modelFile.buffer); - - if (req.files.ref_images) { - for (const img of req.files.ref_images) { - const imgPath = path.join(refImagesDir, img.originalname); - fs.writeFileSync(imgPath, img.buffer); - } - } - } - - // Optional flags - const parameters = { - model_id: parseInt(model_id, 10), - version, - platform, - enable_evaluate: req.body.enable_evaluate === 'true', - enable_sim_fp: req.body.enable_sim_fp === 'true', - enable_sim_fixed: req.body.enable_sim_fixed === 'true', - enable_sim_hw: req.body.enable_sim_hw === 'true', - }; - - // Create job record - const job = { - job_id: jobId, - created_at: new Date().toISOString(), - status: 'ONNX', - stage: 'onnx', - progress: 0, - updated_at: new Date().toISOString(), - parameters, - output: { bie_path: null, nef_path: null }, - error: null, - }; - - await setJob(jobId, job); - - // Enqueue to first stage - await enqueueStage('onnx', job); - - res.status(201).json({ - job_id: jobId, - status: 'ONNX', - message: 'Job created and queued', - }); - } catch (err) { - console.error('[Scheduler] POST /jobs error:', err); - res.status(500).json({ error: err.message }); - } -}); - -// GET /jobs/:jobId — Query job status -app.get('/jobs/:jobId', async (req, res) => { - const job = await getJob(req.params.jobId); - if (!job) { - return res.status(404).json({ error: 'JOB_NOT_FOUND' }); - } - res.json(job); -}); - -// GET /jobs — List all jobs -app.get('/jobs', async (req, res) => { - try { - const keys = await redis.keys('job:*'); - const jobs = []; - for (const key of keys) { - const raw = await redis.get(key); - if (raw) jobs.push(JSON.parse(raw)); - } - // Sort by created_at descending - jobs.sort((a, b) => new Date(b.created_at) - new Date(a.created_at)); - res.json(jobs); - } catch (err) { - res.status(500).json({ error: err.message }); - } -}); - -// GET /jobs/:jobId/events — SSE stream -app.get('/jobs/:jobId/events', async (req, res) => { - const jobId = req.params.jobId; - - const job = await getJob(jobId); - if (!job) { - return res.status(404).json({ error: 'JOB_NOT_FOUND' }); - } - - // Set SSE headers - res.writeHead(200, { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - }); - - // Send current state immediately - res.write(`data: ${JSON.stringify(job)}\n\n`); - - // Register client - if (!sseClients.has(jobId)) { - sseClients.set(jobId, new Set()); - } - sseClients.get(jobId).add(res); - - // Heartbeat to keep connection alive - const heartbeat = setInterval(() => { - res.write(': heartbeat\n\n'); - }, 15000); - - // Cleanup on disconnect - req.on('close', () => { - clearInterval(heartbeat); - const clients = sseClients.get(jobId); - if (clients) { - clients.delete(res); - if (clients.size === 0) sseClients.delete(jobId); - } - }); -}); - -// GET /jobs/:jobId/download/:filename — Download result file -app.get('/jobs/:jobId/download/:filename', async (req, res) => { - const { jobId, filename } = req.params; - - const job = await getJob(jobId); - if (!job) { - return res.status(404).json({ error: 'JOB_NOT_FOUND' }); - } - - if (minio) { - // MinIO mode: fetch from MinIO and send - const minioKey = `jobs/${jobId}/${filename}`; - try { - const result = await getFromMinIO(minioKey); - if (!result) { - return res.status(404).json({ error: 'FILE_NOT_FOUND' }); - } - res.setHeader('Content-Disposition', `attachment; filename="${filename}"`); - res.setHeader('Content-Length', result.body.length); - res.send(result.body); - } catch (err) { - if (err.name === 'NoSuchKey') { - return res.status(404).json({ error: 'FILE_NOT_FOUND' }); - } - console.error('[Scheduler] Download error:', err); - res.status(500).json({ error: 'Download failed' }); - } - } else { - // Local mode: serve from filesystem - const filePath = path.join(JOB_DATA_DIR, jobId, filename); - if (!fs.existsSync(filePath)) { - return res.status(404).json({ error: 'FILE_NOT_FOUND' }); - } - res.download(filePath); - } -}); - -// GET /queues/stats — Queue monitoring stats -app.get('/queues/stats', async (req, res) => { - try { - const queues = ['queue:onnx', 'queue:bie', 'queue:nef', 'queue:done']; - const groupNames = { - 'queue:onnx': 'onnx-workers', - 'queue:bie': 'bie-workers', - 'queue:nef': 'nef-workers', - 'queue:done': 'scheduler', - }; - - const stats = {}; - - for (const queue of queues) { - const length = await redis.xlen(queue); - let consumers = []; - let pending = 0; - let lag = 0; - - const group = groupNames[queue]; - if (group) { - try { - const groups = await redis.xinfo('GROUPS', queue); - // xinfo GROUPS returns flat array: [name, val, name, val, ...] - for (let i = 0; i < groups.length; i++) { - const g = groups[i]; - // Each group is a flat array of key-value pairs - const info = {}; - for (let j = 0; j < g.length; j += 2) { - info[g[j]] = g[j + 1]; - } - if (info.name === group) { - pending = parseInt(info.pending || '0', 10); - lag = parseInt(info.lag || '0', 10); - - // Get consumers in this group - try { - const consumerList = await redis.xinfo('CONSUMERS', queue, group); - consumers = consumerList.map((c) => { - const ci = {}; - for (let j = 0; j < c.length; j += 2) { - ci[c[j]] = c[j + 1]; - } - return { - name: ci.name, - pending: parseInt(ci.pending || '0', 10), - idle: parseInt(ci.idle || '0', 10), - }; - }); - } catch { /* no consumers yet */ } - break; - } - } - } catch { /* group may not exist yet */ } - } - - stats[queue] = { length, pending, lag, consumers }; - } - - // Also get job summary - const keys = await redis.keys('job:*'); - const jobSummary = { total: keys.length, ONNX: 0, BIE: 0, NEF: 0, COMPLETED: 0, FAILED: 0 }; - for (const key of keys) { - const raw = await redis.get(key); - if (raw) { - const job = JSON.parse(raw); - if (jobSummary[job.status] !== undefined) { - jobSummary[job.status]++; - } - } - } - - res.json({ - timestamp: new Date().toISOString(), - queues: stats, - jobs: jobSummary, - }); - } catch (err) { - console.error('[Scheduler] GET /queues/stats error:', err); - res.status(500).json({ error: err.message }); - } -}); - -// Error handling -app.use((err, req, res, next) => { - console.error('[Scheduler] Server error:', err); - res.status(500).json({ error: 'Internal server error' }); -}); - -// 404 -app.use('*', (req, res) => { - res.status(404).json({ error: 'Endpoint not found' }); -}); - -// --------------------------------------------------------------------------- -// Start -// --------------------------------------------------------------------------- async function start() { - // Ensure all consumer groups exist - await ensureWorkerGroups(); + await ensureWorkerGroups(redis); - // Start listening for done events (background) - listenDoneQueue().catch((err) => { - console.error('[Scheduler] Done listener fatal error:', err); - process.exit(1); - }); + // done queue listener(背景) + startListenDone({ redis, redisSub, jobService }) + .start() + .catch((err) => { + console.error('[Scheduler] Done listener fatal error:', err); + process.exit(1); + }); + + // T8:啟動 health background polling(30s 一次,第一次立即觸發) + healthService.start(); + + // T8:graceful shutdown — 收到 SIGTERM/SIGINT 時停 polling,避免 process 卡住 + const onShutdown = (signal) => { + console.log(`[Scheduler] Received ${signal}, stopping health polling`); + try { + healthService.stop(); + } catch (err) { + console.error('[Scheduler] healthService.stop error:', err); + } + // 不在此 process.exit;交由 Node 自然結束(unref 過的 timer 不會擋 exit) + }; + process.once('SIGTERM', () => onShutdown('SIGTERM')); + process.once('SIGINT', () => onShutdown('SIGINT')); app.listen(PORT, () => { console.log(`[Scheduler] Running on port ${PORT}`); console.log(`[Scheduler] Redis: ${REDIS_URL}`); console.log(`[Scheduler] Job data dir: ${JOB_DATA_DIR}`); - console.log(`[Scheduler] Storage: ${STORAGE_BACKEND}${minio ? ` (${MINIO_ENDPOINT}/${MINIO_BUCKET})` : ''}`); + console.log( + `[Scheduler] Storage: ${STORAGE_BACKEND}${minio.client ? ` (${minio.endpoint}/${minio.bucket})` : ''}` + ); console.log(`[Scheduler] Stages: ${STAGES.join(' -> ')}`); + console.log( + `[Scheduler] Auth config OK: issuer=${config.memberCenter.issuer}, audience=${config.converter.audience}` + ); + // T10:印出 multipart / concurrency 配置,方便 ops 確認生效值(不含 secret) + console.log( + `[Scheduler] Multipart limits: model=${config.multipart.modelMaxBytes}B, ` + + `ref_image=${config.multipart.refImageMaxBytes}B, ` + + `ref_images_count=${config.multipart.refImagesMaxCount}` + ); + console.log( + `[Scheduler] Upload concurrency: max=${config.uploadConcurrency.maxConcurrent} ` + + `(503 retry-after=${config.uploadConcurrency.retryAfterSeconds}s when full)` + ); }); } diff --git a/apps/task-scheduler/src/__tests__/config.test.js b/apps/task-scheduler/src/__tests__/config.test.js new file mode 100644 index 0000000..21ce740 --- /dev/null +++ b/apps/task-scheduler/src/__tests__/config.test.js @@ -0,0 +1,205 @@ +/** + * config.js 單元測試(T10 修 D5:multipart / uploadConcurrency env 串接)。 + * + * 重點: + * 1. 預設值正確(不傳 env 時 multipart / concurrency fallback 到合理預設) + * 2. env override 真的會被讀取(用 stub process.env,重新 require module) + * 3. 非法值(0 / 負數)會 throw + * 4. 既有必填 env 缺漏 fail-fast 不被本任務破壞 + * + * 測試策略: + * - 每個 case 進來前 backup process.env、設好需要的變數,呼叫 jest.resetModules() + * 讓 require('../config') 重新讀取 env;結束後 restore env + * - 不依賴 .env 檔(避免 dotenv 副作用干擾) + */ + +'use strict'; + +const ENV_KEYS_TO_BACKUP = [ + // 必填(缺漏 throw)— 測試前必須補齊 + 'MEMBER_CENTER_ISSUER', + 'MEMBER_CENTER_JWKS_URL', + 'MEMBER_CENTER_TOKEN_URL', + 'KNERON_CONVERTER_AUDIENCE', + 'KNERON_CONVERTER_CLIENT_ID', + 'KNERON_CONVERTER_CLIENT_SECRET', + 'FILE_ACCESS_AGENT_BASE_URL', + 'FILE_ACCESS_AGENT_AUDIENCE', + // T10 新增 + 'MULTIPART_MODEL_MAX_BYTES', + 'MULTIPART_REF_IMAGE_MAX_BYTES', + 'MULTIPART_REF_IMAGES_MAX_COUNT', + 'MAX_CONCURRENT_UPLOADS', + 'UPLOAD_RETRY_AFTER_SECONDS', + // 其他 optional + 'CONVERTER_TENANT_ID', + 'CONVERTER_SCOPE_WRITE', + 'CONVERTER_SCOPE_READ', + 'PROMOTE_TIMEOUT_MS', + 'NODE_ENV', +]; + +let backedUpEnv = {}; + +function backupEnv() { + backedUpEnv = {}; + for (const k of ENV_KEYS_TO_BACKUP) { + backedUpEnv[k] = process.env[k]; + delete process.env[k]; + } +} + +function restoreEnv() { + for (const k of ENV_KEYS_TO_BACKUP) { + if (backedUpEnv[k] === undefined) { + delete process.env[k]; + } else { + process.env[k] = backedUpEnv[k]; + } + } +} + +function setMinimumValidEnv() { + // 滿足必填 — 用 .invalid placeholder(DNS 不解析,安全) + process.env.MEMBER_CENTER_ISSUER = 'https://auth.test.invalid'; + process.env.MEMBER_CENTER_JWKS_URL = 'https://auth.test.invalid/.well-known/jwks'; + process.env.MEMBER_CENTER_TOKEN_URL = 'https://auth.test.invalid/oauth/token'; + process.env.KNERON_CONVERTER_AUDIENCE = 'kneron_converter_api'; + process.env.KNERON_CONVERTER_CLIENT_ID = 'kneron_converter_test'; + process.env.KNERON_CONVERTER_CLIENT_SECRET = 'test-secret'; + process.env.FILE_ACCESS_AGENT_BASE_URL = 'https://files.test.invalid'; + process.env.FILE_ACCESS_AGENT_AUDIENCE = 'file_access_api'; +} + +function loadConfigFresh() { + // 確保拿到的是新 module(不被 require cache 污染) + jest.resetModules(); + // 不要讓 dotenv 蓋掉我們刻意設好的 env + const path = require.resolve('../config'); + delete require.cache[path]; + // dotenv 的 cache:在 reset 後 require config 會再 require dotenv(無 cache 影響) + return require('../config').loadConfig(); +} + +beforeEach(() => { + backupEnv(); + setMinimumValidEnv(); +}); + +afterEach(() => { + restoreEnv(); +}); + +describe('config — multipart defaults', () => { + it('uses sane defaults when MULTIPART_* env not set', () => { + const cfg = loadConfigFresh(); + expect(cfg.multipart.modelMaxBytes).toBe(500 * 1024 * 1024); + expect(cfg.multipart.refImageMaxBytes).toBe(10 * 1024 * 1024); + expect(cfg.multipart.refImagesMaxCount).toBe(100); + }); +}); + +describe('config — multipart env overrides', () => { + it('reads MULTIPART_MODEL_MAX_BYTES from env', () => { + process.env.MULTIPART_MODEL_MAX_BYTES = String(200 * 1024 * 1024); + const cfg = loadConfigFresh(); + expect(cfg.multipart.modelMaxBytes).toBe(200 * 1024 * 1024); + }); + + it('reads MULTIPART_REF_IMAGE_MAX_BYTES from env', () => { + process.env.MULTIPART_REF_IMAGE_MAX_BYTES = String(5 * 1024 * 1024); + const cfg = loadConfigFresh(); + expect(cfg.multipart.refImageMaxBytes).toBe(5 * 1024 * 1024); + }); + + it('reads MULTIPART_REF_IMAGES_MAX_COUNT from env', () => { + process.env.MULTIPART_REF_IMAGES_MAX_COUNT = '50'; + const cfg = loadConfigFresh(); + expect(cfg.multipart.refImagesMaxCount).toBe(50); + }); + + it('throws when MULTIPART_MODEL_MAX_BYTES <= 0', () => { + process.env.MULTIPART_MODEL_MAX_BYTES = '0'; + expect(() => loadConfigFresh()).toThrow(/MULTIPART_MODEL_MAX_BYTES/); + }); + + it('throws when MULTIPART_REF_IMAGE_MAX_BYTES <= 0', () => { + process.env.MULTIPART_REF_IMAGE_MAX_BYTES = '-1'; + expect(() => loadConfigFresh()).toThrow(/MULTIPART_REF_IMAGE_MAX_BYTES/); + }); + + it('throws when MULTIPART_REF_IMAGES_MAX_COUNT <= 0', () => { + process.env.MULTIPART_REF_IMAGES_MAX_COUNT = '0'; + expect(() => loadConfigFresh()).toThrow(/MULTIPART_REF_IMAGES_MAX_COUNT/); + }); + + it('throws when MULTIPART_MODEL_MAX_BYTES is not an integer', () => { + process.env.MULTIPART_MODEL_MAX_BYTES = 'not-a-number'; + expect(() => loadConfigFresh()).toThrow(/integer/); + }); +}); + +describe('config — uploadConcurrency defaults', () => { + it('uses sane defaults when MAX_CONCURRENT_UPLOADS env not set', () => { + const cfg = loadConfigFresh(); + expect(cfg.uploadConcurrency.maxConcurrent).toBe(5); + expect(cfg.uploadConcurrency.retryAfterSeconds).toBe(30); + }); +}); + +describe('config — uploadConcurrency env overrides', () => { + it('reads MAX_CONCURRENT_UPLOADS from env', () => { + process.env.MAX_CONCURRENT_UPLOADS = '10'; + const cfg = loadConfigFresh(); + expect(cfg.uploadConcurrency.maxConcurrent).toBe(10); + }); + + it('reads UPLOAD_RETRY_AFTER_SECONDS from env', () => { + process.env.UPLOAD_RETRY_AFTER_SECONDS = '60'; + const cfg = loadConfigFresh(); + expect(cfg.uploadConcurrency.retryAfterSeconds).toBe(60); + }); + + it('throws when MAX_CONCURRENT_UPLOADS <= 0', () => { + process.env.MAX_CONCURRENT_UPLOADS = '0'; + expect(() => loadConfigFresh()).toThrow(/MAX_CONCURRENT_UPLOADS/); + }); + + it('throws when UPLOAD_RETRY_AFTER_SECONDS <= 0', () => { + process.env.UPLOAD_RETRY_AFTER_SECONDS = '-30'; + expect(() => loadConfigFresh()).toThrow(/UPLOAD_RETRY_AFTER_SECONDS/); + }); +}); + +describe('config — multipart object is frozen', () => { + it('does not allow mutation of multipart sub-object', () => { + const cfg = loadConfigFresh(); + expect(() => { + cfg.multipart.modelMaxBytes = 999; + }).toThrow(TypeError); + }); + + it('does not allow mutation of uploadConcurrency sub-object', () => { + const cfg = loadConfigFresh(); + expect(() => { + cfg.uploadConcurrency.maxConcurrent = 999; + }).toThrow(TypeError); + }); +}); + +describe('config — fail fast on missing required env (regression check)', () => { + it('throws when MEMBER_CENTER_ISSUER missing', () => { + delete process.env.MEMBER_CENTER_ISSUER; + expect(() => loadConfigFresh()).toThrow(/MEMBER_CENTER_ISSUER/); + }); + + it('throws when KNERON_CONVERTER_CLIENT_SECRET missing', () => { + delete process.env.KNERON_CONVERTER_CLIENT_SECRET; + expect(() => loadConfigFresh()).toThrow(/KNERON_CONVERTER_CLIENT_SECRET/); + }); + + it('throws when FILE_ACCESS_AGENT_BASE_URL missing', () => { + delete process.env.FILE_ACCESS_AGENT_BASE_URL; + expect(() => loadConfigFresh()).toThrow(/FILE_ACCESS_AGENT_BASE_URL/); + }); +}); diff --git a/apps/task-scheduler/src/__tests__/health.integration.test.js b/apps/task-scheduler/src/__tests__/health.integration.test.js new file mode 100644 index 0000000..f015279 --- /dev/null +++ b/apps/task-scheduler/src/__tests__/health.integration.test.js @@ -0,0 +1,458 @@ +/** + * Integration tests — /health 升級(T8)。 + * + * 涵蓋場景: + * 1. 預設 healthy(Redis ready + MC/FAA reachable)→ 200 + * 2. Redis disconnected(status='connecting')→ unhealthy + 503 + * 3. MC 不可達(fetch reject)→ degraded + 200 + * 4. FAA 不可達(fetch 5xx)→ degraded + 200 + * 5. 第一次啟動 cache 未填 → MC/FAA = pending + degraded + 200 + * (部署 readiness probe 仍視為可用) + * 6. /health 永遠不阻塞:fetch 卡住 30s 時 /health 立即回 cached 結果 + * 7. 向後相容:response 仍含 service / timestamp / 頂層 redis 欄位 + * 8. 不洩漏內部 endpoint URL + * + * 此測試起 app.listen(0) 用 fetch 真打 HTTP,與 legacy.integration.test.js 風格一致。 + * + * 命名約定: + * - `httpFetch`(= globalThis.fetch)— 用來打 testing server + * - `probeMock`(jest.fn)— 注入給 healthService 探測 MC / FAA 用 + * 兩者刻意分開,避免 mock 把真實 HTTP 也攔截掉。 + */ + +'use strict'; + +const httpFetch = globalThis.fetch; + +const { createSseService } = require('../services/sseService'); +const { createJobService } = require('../services/jobService'); +const { createUploader } = require('../middleware/upload'); +const { createHealthService } = require('../services/healthService'); +const { createApp } = require('../app'); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeFakeRedis(status = 'ready') { + const store = new Map(); + return { + status, + store, + ping: jest.fn(async () => 'PONG'), + get: jest.fn(async (key) => (store.has(key) ? store.get(key) : null)), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + keys: jest.fn(async () => []), + xadd: jest.fn(async () => '1-0'), + xlen: jest.fn(async () => 0), + xinfo: jest.fn(async () => { + throw new Error('NOGROUP'); + }), + }; +} + +function makeFakeMinio() { + return { + client: { _fake: true }, + bucket: 'test-bucket', + endpoint: 'http://nope', + uploadToMinIO: jest.fn(async () => undefined), + getFromMinIO: jest.fn(async () => null), + }; +} + +/** + * 建立 healthService 用的探測 fetch mock。 + * 注意:這個只給 healthService 用,不影響真實的 httpFetch。 + */ +function makeProbeMock(handlers = {}) { + return jest.fn(async (url, opts) => { + const handler = handlers[url]; + if (!handler) return { status: 200, ok: true }; + if (handler instanceof Error) throw handler; + if (typeof handler === 'function') return handler(url, opts); + return handler; + }); +} + +async function startApp(deps, opts) { + const app = createApp(deps, opts); + return new Promise((resolve) => { + const server = app.listen(0, '127.0.0.1', () => { + const { port } = server.address(); + resolve({ + server, + baseUrl: `http://127.0.0.1:${port}`, + close: () => new Promise((r) => server.close(() => r())), + }); + }); + }); +} + +// 抑制 console 噪音 +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('GET /health — T8 upgrade', () => { + it('returns 200 healthy when all deps OK (Redis ready + MC/FAA reachable)', async () => { + const MC_URL = 'https://mc-test/.well-known/jwks'; + const FAA_URL = 'https://faa-test/health'; + const probeMock = makeProbeMock({ + [MC_URL]: { status: 200, ok: true }, + [FAA_URL]: { status: 200, ok: true }, + }); + + const redis = makeFakeRedis('ready'); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService, jobDataDir: '/tmp/x' }); + const healthService = createHealthService({ + redis, + memberCenterProbeUrl: MC_URL, + fileAccessAgentProbeUrl: FAA_URL, + fetch: probeMock, + probeTimeoutMs: 200, + }); + await healthService._runOnce(); + + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + healthService, + }); + + try { + const res = await httpFetch(`${ctx.baseUrl}/health`); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body).toEqual( + expect.objectContaining({ + service: 'task-scheduler', // 向後相容 + status: 'healthy', + timestamp: expect.any(String), + redis: 'connected', // 向後相容(頂層) + version: '1.0.0', + dependencies: { + redis: 'connected', + member_center: 'reachable', + file_access_agent: 'reachable', + }, + }) + ); + } finally { + await ctx.close(); + healthService.stop(); + } + }); + + it('returns 503 unhealthy when Redis status is not ready', async () => { + const probeMock = makeProbeMock(); + const redis = makeFakeRedis('connecting'); // 模擬連線中 + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const healthService = createHealthService({ + redis, + memberCenterProbeUrl: 'https://mc/jwks', + fileAccessAgentProbeUrl: 'https://faa/health', + fetch: probeMock, + probeTimeoutMs: 200, + }); + await healthService._runOnce(); + + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + healthService, + }); + + try { + const res = await httpFetch(`${ctx.baseUrl}/health`); + expect(res.status).toBe(503); + const body = await res.json(); + expect(body.status).toBe('unhealthy'); + expect(body.dependencies.redis).toBe('disconnected'); + expect(body.redis).toBe('disconnected'); // 向後相容 + } finally { + await ctx.close(); + healthService.stop(); + } + }); + + it('returns 200 degraded when Member Center fetch rejects', async () => { + const MC_URL = 'https://mc-bad/.well-known/jwks'; + const FAA_URL = 'https://faa-good/health'; + const probeMock = makeProbeMock({ + [MC_URL]: new Error('ECONNREFUSED'), + [FAA_URL]: { status: 200, ok: true }, + }); + + const redis = makeFakeRedis('ready'); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const healthService = createHealthService({ + redis, + memberCenterProbeUrl: MC_URL, + fileAccessAgentProbeUrl: FAA_URL, + fetch: probeMock, + probeTimeoutMs: 200, + }); + await healthService._runOnce(); + + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + healthService, + }); + + try { + const res = await httpFetch(`${ctx.baseUrl}/health`); + expect(res.status).toBe(200); // degraded 是 200 + const body = await res.json(); + expect(body.status).toBe('degraded'); + expect(body.dependencies.member_center).toBe('unreachable'); + expect(body.dependencies.file_access_agent).toBe('reachable'); + expect(body.dependencies.redis).toBe('connected'); + } finally { + await ctx.close(); + healthService.stop(); + } + }); + + it('returns 200 degraded when File Access Agent returns 5xx', async () => { + const MC_URL = 'https://mc-good/.well-known/jwks'; + const FAA_URL = 'https://faa-bad/health'; + const probeMock = makeProbeMock({ + [MC_URL]: { status: 200, ok: true }, + [FAA_URL]: { status: 503, ok: false }, + }); + + const redis = makeFakeRedis('ready'); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const healthService = createHealthService({ + redis, + memberCenterProbeUrl: MC_URL, + fileAccessAgentProbeUrl: FAA_URL, + fetch: probeMock, + probeTimeoutMs: 200, + }); + await healthService._runOnce(); + + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + healthService, + }); + + try { + const res = await httpFetch(`${ctx.baseUrl}/health`); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body.status).toBe('degraded'); + expect(body.dependencies.file_access_agent).toBe('unreachable'); + expect(body.dependencies.member_center).toBe('reachable'); + } finally { + await ctx.close(); + healthService.stop(); + } + }); + + it('returns degraded with pending deps before first poll completes', async () => { + // 不呼叫 _runOnce,模擬 process 剛啟動還沒拿到 first poll 結果 + const redis = makeFakeRedis('ready'); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const healthService = createHealthService({ + redis, + memberCenterProbeUrl: 'https://mc/jwks', + fileAccessAgentProbeUrl: 'https://faa/health', + fetch: makeProbeMock(), // 沒人會用 + probeTimeoutMs: 200, + }); + // ★ 故意不呼叫 healthService.start() / _runOnce(),模擬第一個 polling 完成前的狀態 + + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + healthService, + }); + + try { + const res = await httpFetch(`${ctx.baseUrl}/health`); + // 部署 readiness:仍回 200,避免 Kubernetes 在啟動初期就把 pod 標 not ready + expect(res.status).toBe(200); + const body = await res.json(); + expect(body.status).toBe('degraded'); + expect(body.dependencies.member_center).toBe('pending'); + expect(body.dependencies.file_access_agent).toBe('pending'); + expect(body.dependencies.redis).toBe('connected'); + } finally { + await ctx.close(); + healthService.stop(); + } + }); + + it('does NOT block: even when probes hang for seconds, /health responds immediately', async () => { + // 模擬 probe fetch 永遠不 resolve(除非被 abort) + const probeMock = jest.fn( + (_url, opts) => + new Promise((_resolve, reject) => { + if (opts && opts.signal) { + opts.signal.addEventListener('abort', () => { + const err = new Error('aborted'); + err.name = 'AbortError'; + reject(err); + }); + } + }) + ); + + const redis = makeFakeRedis('ready'); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const healthService = createHealthService({ + redis, + memberCenterProbeUrl: 'https://mc-hang/jwks', + fileAccessAgentProbeUrl: 'https://faa-hang/health', + fetch: probeMock, + probeTimeoutMs: 30 * 1000, // 30s — 模擬「卡很久」 + }); + // ★ 啟動 polling 但不等它完成 + healthService.start(); + + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + healthService, + }); + + try { + const start = Date.now(); + const res = await httpFetch(`${ctx.baseUrl}/health`); + const elapsed = Date.now() - start; + // 即使 background polling hang 住 30s,/health 仍應 < 200ms 回應 + expect(elapsed).toBeLessThan(200); + expect(res.status).toBe(200); // pending → degraded → 200 + const body = await res.json(); + expect(['pending', 'reachable', 'unreachable']).toContain(body.dependencies.member_center); + expect(['pending', 'reachable', 'unreachable']).toContain( + body.dependencies.file_access_agent + ); + } finally { + await ctx.close(); + // ★ 必須先 stop,否則 background fetch 永不結束、process 就退不出去 + healthService.stop(); + } + }); + + it('does not leak internal endpoint URLs in response or logs', async () => { + const SECRET_MC = 'https://internal-mc-secret-host.example/.well-known/jwks'; + const SECRET_FAA = 'https://internal-faa-secret-host.example:9876/health'; + const probeMock = makeProbeMock({ + [SECRET_MC]: { status: 503, ok: false }, + [SECRET_FAA]: new Error('Connection refused to internal-faa-secret-host.example:9876'), + }); + + const redis = makeFakeRedis('ready'); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const healthService = createHealthService({ + redis, + memberCenterProbeUrl: SECRET_MC, + fileAccessAgentProbeUrl: SECRET_FAA, + fetch: probeMock, + probeTimeoutMs: 200, + }); + await healthService._runOnce(); + + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + healthService, + }); + + try { + const res = await httpFetch(`${ctx.baseUrl}/health`); + const text = await res.text(); + expect(text).not.toContain('internal-mc-secret-host'); + expect(text).not.toContain('internal-faa-secret-host'); + expect(text).not.toContain('9876'); + } finally { + await ctx.close(); + healthService.stop(); + } + }); + + it('falls back to legacy Redis ping when healthService is not provided', async () => { + // 確保 backwards compatibility:deps 沒帶 healthService 時,行為 = 既有 server.js + const redis = makeFakeRedis('ready'); + redis.ping = jest.fn(async () => 'PONG'); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + // ★ 故意不傳 healthService + }); + + try { + const res = await httpFetch(`${ctx.baseUrl}/health`); + expect(res.status).toBe(200); + const body = await res.json(); + // 舊格式(不含 dependencies / version 鍵) + expect(body).toMatchObject({ + service: 'task-scheduler', + status: 'healthy', + redis: 'connected', + }); + expect(redis.ping).toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); +}); diff --git a/apps/task-scheduler/src/__tests__/legacy.integration.test.js b/apps/task-scheduler/src/__tests__/legacy.integration.test.js new file mode 100644 index 0000000..44ac15e --- /dev/null +++ b/apps/task-scheduler/src/__tests__/legacy.integration.test.js @@ -0,0 +1,650 @@ +/** + * Legacy 路由整合測試(T4 — smoke test)。 + * + * 目標:用 mock Redis + mock MinIO 啟動實際的 Express app,逐一打 7 個 legacy + * 端點,驗證行為與 server.js 既有版本對齊: + * - GET /health(healthy + Redis fail 時的 503) + * - POST /jobs(multipart,driver 在 MinIO mode) + * - GET /jobs/:jobId(找到 / 不存在) + * - GET /jobs(list) + * - GET /jobs/:jobId/events(SSE — 觀察 headers + initial payload) + * - GET /jobs/:jobId/download/:filename(MinIO mode) + * - GET /queues/stats + * + * 不打真 Redis / MinIO;用 fake objects 注入。 + * + * 此測試的設計風格與 T1 middleware 的 Integration 區塊一致:起 app.listen(0), + * 用 fetch() 真打 HTTP。 + */ + +'use strict'; + +const http = require('http'); + +const { createSseService } = require('../services/sseService'); +const { createJobService } = require('../services/jobService'); +const { createUploader } = require('../middleware/upload'); +const { createApp } = require('../app'); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeFakeRedis() { + const store = new Map(); + const xaddCalls = []; + return { + store, + xaddCalls, + pingFails: false, + keysImpl: null, // optional override + + ping: jest.fn(async function () { + if (this.pingFails) throw new Error('ping failed'); + return 'PONG'; + }), + get: jest.fn(async (key) => (store.has(key) ? store.get(key) : null)), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + keys: jest.fn(async (pattern) => { + const re = new RegExp('^' + pattern.replace(/\*/g, '.*') + '$'); + return [...store.keys()].filter((k) => re.test(k)); + }), + xadd: jest.fn(async (queue, _id, _f, value) => { + xaddCalls.push([queue, value]); + return '1-0'; + }), + xlen: jest.fn(async () => 0), + xinfo: jest.fn(async () => { + // 模擬 group 不存在 — 拋錯讓 legacy 走 catch + throw new Error('NOGROUP'); + }), + }; +} + +function makeFakeMinio({ mode = 'minio', getObjectImpl } = {}) { + const uploaded = []; + if (mode !== 'minio') { + return { + client: null, + bucket: 'test-bucket', + endpoint: 'http://nope', + uploadToMinIO: jest.fn(async () => undefined), + getFromMinIO: jest.fn(async () => null), + _uploaded: uploaded, + }; + } + return { + client: { _fake: true }, // truthy + bucket: 'test-bucket', + endpoint: 'http://localhost:9999', + uploadToMinIO: jest.fn(async (key, body, contentType) => { + uploaded.push({ key, size: body.length, contentType }); + }), + getFromMinIO: jest.fn(getObjectImpl || (async () => null)), + _uploaded: uploaded, + }; +} + +async function startApp(deps, opts) { + const app = createApp(deps, opts); + return new Promise((resolve) => { + const server = app.listen(0, '127.0.0.1', () => { + const { port } = server.address(); + resolve({ + server, + baseUrl: `http://127.0.0.1:${port}`, + close: () => + new Promise((r) => { + server.close(() => r()); + }), + }); + }); + }); +} + +// 抑制 console.log 雜訊 +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +// --------------------------------------------------------------------------- +// Test cases +// --------------------------------------------------------------------------- + +describe('legacy /health', () => { + it('returns 200 healthy when Redis ping succeeds', async () => { + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService, jobDataDir: '/tmp/x' }); + const uploader = createUploader(); + + const ctx = await startApp( + { redis, jobService, sseService, minio, uploader }, + { frontendUrl: 'http://localhost:3000' } + ); + + try { + const res = await fetch(`${ctx.baseUrl}/health`); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body).toMatchObject({ + service: 'task-scheduler', + status: 'healthy', + redis: 'connected', + }); + expect(typeof body.timestamp).toBe('string'); + } finally { + await ctx.close(); + } + }); + + it('returns 503 unhealthy when Redis ping throws', async () => { + const redis = makeFakeRedis(); + redis.pingFails = true; + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const res = await fetch(`${ctx.baseUrl}/health`); + expect(res.status).toBe(503); + const body = await res.json(); + expect(body).toMatchObject({ + service: 'task-scheduler', + status: 'unhealthy', + redis: 'disconnected', + }); + } finally { + await ctx.close(); + } + }); +}); + +describe('legacy POST /jobs (MinIO mode)', () => { + it('rejects when required fields are missing', async () => { + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + // 缺 model_id, version, platform — 但仍要 multipart,否則會直接被 + // multer 跳過進到 handler + const fd = new FormData(); + fd.append('model', new Blob(['fake-onnx'], { type: 'application/octet-stream' }), 'm.onnx'); + const res = await fetch(`${ctx.baseUrl}/jobs`, { + method: 'POST', + body: fd, + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error).toMatch(/model_id, version, platform/); + } finally { + await ctx.close(); + } + }); + + it('rejects when model file is missing', async () => { + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const fd = new FormData(); + fd.append('model_id', '1001'); + fd.append('version', '0001'); + fd.append('platform', '520'); + const res = await fetch(`${ctx.baseUrl}/jobs`, { method: 'POST', body: fd }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error).toMatch(/model file is required/); + } finally { + await ctx.close(); + } + }); + + it('returns 201 + writes job to Redis + uploads to MinIO + enqueues onnx', async () => { + const redis = makeFakeRedis(); + const minio = makeFakeMinio({ mode: 'minio' }); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService, jobDataDir: '/tmp/x' }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const fd = new FormData(); + fd.append( + 'model', + new Blob([new Uint8Array([1, 2, 3, 4])], { type: 'application/octet-stream' }), + 'mymodel.onnx' + ); + fd.append('model_id', '1001'); + fd.append('version', '0001'); + fd.append('platform', '520'); + fd.append('enable_evaluate', 'true'); + // 一張 ref image + fd.append( + 'ref_images', + new Blob([new Uint8Array([9, 9])], { type: 'image/jpeg' }), + 'ref0.jpg' + ); + + const res = await fetch(`${ctx.baseUrl}/jobs`, { method: 'POST', body: fd }); + expect(res.status).toBe(201); + const body = await res.json(); + expect(body).toMatchObject({ + status: 'ONNX', + message: 'Job created and queued', + }); + expect(typeof body.job_id).toBe('string'); + expect(body.job_id).toMatch(/^[0-9a-f-]{36}$/i); + + // Redis 上應該有 job 記錄 + const stored = JSON.parse(redis.store.get(`job:${body.job_id}`)); + expect(stored).toMatchObject({ + job_id: body.job_id, + status: 'ONNX', + stage: 'onnx', + progress: 0, + parameters: { + model_id: 1001, + version: '0001', + platform: '520', + enable_evaluate: true, + enable_sim_fp: false, + enable_sim_fixed: false, + enable_sim_hw: false, + }, + output: { bie_path: null, nef_path: null }, + error: null, + }); + expect(stored.created_at).toMatch(/^\d{4}-\d{2}-\d{2}T/); + expect(stored.updated_at).toMatch(/^\d{4}-\d{2}-\d{2}T/); + + // MinIO 應有 2 次上傳:model + ref0 + expect(minio._uploaded.length).toBe(2); + const keys = minio._uploaded.map((u) => u.key); + expect(keys).toContain(`jobs/${body.job_id}/input/mymodel.onnx`); + expect(keys).toContain(`jobs/${body.job_id}/input/ref_images/ref0.jpg`); + + // 已 enqueue 到 queue:onnx + expect(redis.xaddCalls.length).toBe(1); + expect(redis.xaddCalls[0][0]).toBe('queue:onnx'); + const msg = JSON.parse(redis.xaddCalls[0][1]); + expect(msg.job_id).toBe(body.job_id); + expect(msg.parameters).toEqual(stored.parameters); + } finally { + await ctx.close(); + } + }); +}); + +describe('legacy GET /jobs/:jobId', () => { + it('returns the job when it exists', async () => { + const redis = makeFakeRedis(); + redis.store.set( + 'job:abc', + JSON.stringify({ job_id: 'abc', status: 'ONNX', stage: 'onnx' }) + ); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const res = await fetch(`${ctx.baseUrl}/jobs/abc`); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body).toEqual({ job_id: 'abc', status: 'ONNX', stage: 'onnx' }); + } finally { + await ctx.close(); + } + }); + + it('returns 404 JOB_NOT_FOUND when missing', async () => { + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const res = await fetch(`${ctx.baseUrl}/jobs/nonexistent`); + expect(res.status).toBe(404); + const body = await res.json(); + expect(body).toEqual({ error: 'JOB_NOT_FOUND' }); + } finally { + await ctx.close(); + } + }); +}); + +describe('legacy GET /jobs (list)', () => { + it('returns all jobs sorted by created_at desc', async () => { + const redis = makeFakeRedis(); + redis.store.set( + 'job:a', + JSON.stringify({ job_id: 'a', created_at: '2026-04-25T00:00:00Z' }) + ); + redis.store.set( + 'job:b', + JSON.stringify({ job_id: 'b', created_at: '2026-04-26T00:00:00Z' }) + ); + redis.store.set( + 'job:c', + JSON.stringify({ job_id: 'c', created_at: '2026-04-24T00:00:00Z' }) + ); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const res = await fetch(`${ctx.baseUrl}/jobs`); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body.map((j) => j.job_id)).toEqual(['b', 'a', 'c']); + } finally { + await ctx.close(); + } + }); +}); + +describe('legacy GET /jobs/:jobId/events (SSE)', () => { + it('returns 404 when job does not exist', async () => { + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const res = await fetch(`${ctx.baseUrl}/jobs/missing/events`); + expect(res.status).toBe(404); + const body = await res.json(); + expect(body).toEqual({ error: 'JOB_NOT_FOUND' }); + } finally { + await ctx.close(); + } + }); + + it('streams SSE headers and initial state on existing job', async () => { + const redis = makeFakeRedis(); + redis.store.set( + 'job:s', + JSON.stringify({ job_id: 's', status: 'ONNX', stage: 'onnx' }) + ); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + // 用低階 http 比 fetch 易於控制(abort 後關閉連線觸發 close) + const headers = await new Promise((resolve, reject) => { + const url = new URL(`${ctx.baseUrl}/jobs/s/events`); + const req = http.request( + { + hostname: url.hostname, + port: url.port, + path: url.pathname, + method: 'GET', + }, + (res) => { + let firstChunk = ''; + res.on('data', (chunk) => { + firstChunk += chunk.toString(); + if (firstChunk.includes('\n\n')) { + resolve({ statusCode: res.statusCode, headers: res.headers, firstChunk }); + req.destroy(); + } + }); + res.on('error', reject); + } + ); + req.on('error', reject); + req.end(); + }); + + expect(headers.statusCode).toBe(200); + expect(headers.headers['content-type']).toMatch(/text\/event-stream/); + expect(headers.headers['cache-control']).toMatch(/no-cache/); + expect(headers.firstChunk.startsWith('data: ')).toBe(true); + const json = JSON.parse(headers.firstChunk.slice(6, headers.firstChunk.indexOf('\n\n'))); + expect(json).toMatchObject({ job_id: 's', status: 'ONNX' }); + } finally { + await ctx.close(); + } + }); +}); + +describe('legacy GET /jobs/:jobId/download/:filename (MinIO mode)', () => { + it('returns 404 when job is missing', async () => { + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const res = await fetch(`${ctx.baseUrl}/jobs/missing/download/file.bin`); + expect(res.status).toBe(404); + const body = await res.json(); + expect(body).toEqual({ error: 'JOB_NOT_FOUND' }); + } finally { + await ctx.close(); + } + }); + + it('serves file body when MinIO returns content', async () => { + const redis = makeFakeRedis(); + redis.store.set('job:x', JSON.stringify({ job_id: 'x', status: 'COMPLETED' })); + // legacy code 把 minioKey 拼成 `jobs/${jobId}/${filename}`(單段 filename) + // 因 Express path pattern :filename 不允許斜線。本測試對齊此既有限制。 + const minio = makeFakeMinio({ + mode: 'minio', + getObjectImpl: async (key) => { + if (key === 'jobs/x/out.nef') { + return { body: Buffer.from('FAKE_NEF_BYTES'), contentLength: 14 }; + } + return null; + }, + }); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const res = await fetch(`${ctx.baseUrl}/jobs/x/download/out.nef`); + expect(res.status).toBe(200); + expect(res.headers.get('content-disposition')).toMatch(/attachment/); + expect(res.headers.get('content-length')).toBe('14'); + const buf = Buffer.from(await res.arrayBuffer()); + expect(buf.toString()).toBe('FAKE_NEF_BYTES'); + } finally { + await ctx.close(); + } + }); + + it('returns 404 FILE_NOT_FOUND when MinIO returns null', async () => { + const redis = makeFakeRedis(); + redis.store.set('job:y', JSON.stringify({ job_id: 'y' })); + const minio = makeFakeMinio({ + mode: 'minio', + getObjectImpl: async () => null, + }); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const res = await fetch(`${ctx.baseUrl}/jobs/y/download/missing.bin`); + expect(res.status).toBe(404); + const body = await res.json(); + expect(body).toEqual({ error: 'FILE_NOT_FOUND' }); + } finally { + await ctx.close(); + } + }); +}); + +describe('legacy GET /queues/stats', () => { + it('returns shape with timestamp / queues / jobs summary', async () => { + const redis = makeFakeRedis(); + redis.store.set( + 'job:x', + JSON.stringify({ job_id: 'x', status: 'ONNX' }) + ); + redis.store.set( + 'job:y', + JSON.stringify({ job_id: 'y', status: 'COMPLETED' }) + ); + redis.store.set( + 'job:z', + JSON.stringify({ job_id: 'z', status: 'FAILED' }) + ); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const res = await fetch(`${ctx.baseUrl}/queues/stats`); + expect(res.status).toBe(200); + const body = await res.json(); + expect(typeof body.timestamp).toBe('string'); + expect(body.queues).toEqual({ + 'queue:onnx': { length: 0, pending: 0, lag: 0, consumers: [] }, + 'queue:bie': { length: 0, pending: 0, lag: 0, consumers: [] }, + 'queue:nef': { length: 0, pending: 0, lag: 0, consumers: [] }, + 'queue:done': { length: 0, pending: 0, lag: 0, consumers: [] }, + }); + expect(body.jobs).toEqual({ + total: 3, + ONNX: 1, + BIE: 0, + NEF: 0, + COMPLETED: 1, + FAILED: 1, + }); + } finally { + await ctx.close(); + } + }); +}); + +describe('app — 404 handling', () => { + it('returns 404 with legacy error shape', async () => { + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService }); + const ctx = await startApp({ + redis, + jobService, + sseService, + minio, + uploader: createUploader(), + }); + + try { + const res = await fetch(`${ctx.baseUrl}/no/such/path`); + expect(res.status).toBe(404); + const body = await res.json(); + expect(body).toEqual({ error: 'Endpoint not found' }); + } finally { + await ctx.close(); + } + }); +}); diff --git a/apps/task-scheduler/src/__tests__/redis.test.js b/apps/task-scheduler/src/__tests__/redis.test.js new file mode 100644 index 0000000..5a4d75e --- /dev/null +++ b/apps/task-scheduler/src/__tests__/redis.test.js @@ -0,0 +1,83 @@ +/** + * src/redis.js 單元測試(T4)。 + * + * 重點: + * 1. ensureConsumerGroup:BUSYGROUP 視為正常、其他 error 會 rethrow + * 2. getDefaultRedisUrl:從 process.env.REDIS_URL 讀,缺時 fallback + * 3. attachErrorLogger:error event 會印 console.error(含 label) + * + * 不測 createClients,因為它真的會嘗試連線 ioredis;其行為簡單,依賴測試會 + * 在整合層或啟動時驗證。 + */ + +'use strict'; + +const { ensureConsumerGroup, _internals } = require('../redis'); + +describe('redis._internals.getDefaultRedisUrl', () => { + const orig = process.env.REDIS_URL; + afterEach(() => { + if (orig === undefined) delete process.env.REDIS_URL; + else process.env.REDIS_URL = orig; + }); + + it('uses process.env.REDIS_URL when set', () => { + process.env.REDIS_URL = 'redis://example:6379/0'; + expect(_internals.getDefaultRedisUrl()).toBe('redis://example:6379/0'); + }); + + it('falls back to redis://localhost:6379 when env is missing', () => { + delete process.env.REDIS_URL; + expect(_internals.getDefaultRedisUrl()).toBe('redis://localhost:6379'); + }); +}); + +describe('redis._internals.attachErrorLogger', () => { + it('logs error event via console.error with label prefix', () => { + const spy = jest.spyOn(console, 'error').mockImplementation(() => {}); + const fakeClient = { + handlers: {}, + on(event, fn) { + this.handlers[event] = fn; + }, + }; + _internals.attachErrorLogger(fakeClient, 'TEST'); + + const err = new Error('boom'); + fakeClient.handlers.error(err); + + expect(spy).toHaveBeenCalledTimes(1); + expect(spy.mock.calls[0][0]).toBe('TEST:'); + expect(spy.mock.calls[0][1]).toBe(err); + + spy.mockRestore(); + }); +}); + +describe('ensureConsumerGroup', () => { + it('calls XGROUP CREATE with MKSTREAM', async () => { + const xgroup = jest.fn(async () => 'OK'); + await ensureConsumerGroup({ xgroup }, 'queue:test', 'group-A'); + expect(xgroup).toHaveBeenCalledWith('CREATE', 'queue:test', 'group-A', '0', 'MKSTREAM'); + }); + + it('swallows BUSYGROUP error (group already exists)', async () => { + const xgroup = jest.fn(async () => { + const e = new Error('BUSYGROUP Consumer Group name already exists'); + throw e; + }); + await expect( + ensureConsumerGroup({ xgroup }, 'q', 'g') + ).resolves.toBeUndefined(); + expect(xgroup).toHaveBeenCalledTimes(1); + }); + + it('rethrows other errors', async () => { + const xgroup = jest.fn(async () => { + throw new Error('connection refused'); + }); + await expect( + ensureConsumerGroup({ xgroup }, 'q', 'g') + ).rejects.toThrow(/connection refused/); + }); +}); diff --git a/apps/task-scheduler/src/app.js b/apps/task-scheduler/src/app.js new file mode 100644 index 0000000..5378fa8 --- /dev/null +++ b/apps/task-scheduler/src/app.js @@ -0,0 +1,173 @@ +/** + * Express app 組裝(T4 重構自 server.js L105-126、L609-618)。 + * + * 職責: + * 1. 套用 middleware(helmet / requestId / compression / morgan / cors / json) + * 2. 套用 rate limiter(與 legacy 相同,作用於 `/api`) + * 3. mount /api/v1/* 路由(T3 起) + * 4. mount legacy 路由 + * 5. 全域 error handler 與 404 + * + * 行為對齊(重構不改行為): + * - middleware 順序與 server.js L107-120 完全一致(除新增 requestId) + * - rate limiter 配置(windowMs: 15min, max: 200, message: ...)對齊 L112-117 + * - cors origin 仍從 process.env.FRONTEND_URL 讀(fallback `http://localhost:3000`) + * - express.json / urlencoded 上限 10mb(L119-120) + * + * T3 新增: + * - requestId middleware **全域掛**:legacy + v1 都會有 req.requestId(對 D4 修復必要) + * - v1 router 掛在 `/api/v1`:含內部 errorHandler 提供 v1 錯誤格式(不影響 legacy) + * + * 設計取捨: + * - 採 factory `createApp(deps)`:deps 帶入 redis / jobService / sseService / + * minio / uploader 等,本檔不直接 require 任何 service module + * - v1 router 在 T3 階段是純骨架(501 端點);認證 / rate limit 留到 T5/T6/T7 加 + */ + +'use strict'; + +const express = require('express'); +const cors = require('cors'); +const helmet = require('helmet'); +const rateLimit = require('express-rate-limit'); +const morgan = require('morgan'); +const compression = require('compression'); + +const { createLegacyRouter } = require('./routes/legacy'); +const { createV1Router } = require('./routes/v1'); +const { requestIdMiddleware } = require('./middleware/requestId'); +const { + createUploadConcurrencyLimiter, +} = require('./middleware/uploadConcurrency'); +const { createFaaClient } = require('./fileAccessAgent/client'); +const oauthClient = require('./auth/oauthClient'); + +/** + * @param {object} deps + * @param {import('ioredis').Redis} deps.redis + * @param {ReturnType} deps.jobService + * @param {{ sendSSE: Function, registerSseClient: Function }} deps.sseService + * @param {ReturnType} deps.minio + * @param {import('multer').Multer} deps.uploader + * @param {ReturnType} [deps.healthService] + * T8:選填,若提供則 /health 走 background-cached snapshot;若缺漏則退回 Redis ping。 + * @param {object} [opts] + * @param {string} [opts.frontendUrl] - CORS origin + * @returns {import('express').Express} + */ +function createApp(deps, opts) { + const frontendUrl = (opts && opts.frontendUrl) || process.env.FRONTEND_URL || 'http://localhost:3000'; + + const app = express(); + + app.use(helmet()); + // T3:requestId 必須早於所有需要 log 或回 error response 的 middleware, + // 確保 morgan / errorHandler / requireAuth 都能拿到 req.requestId。 + app.use(requestIdMiddleware); + app.use(compression()); + app.use(morgan('short')); + app.use(cors({ origin: frontendUrl, credentials: true })); + + // 既有 rate limiter — 與 server.js L112-117 完全一致(作用於 /api 前綴) + const limiter = rateLimit({ + windowMs: 15 * 60 * 1000, + max: 200, + message: 'Too many requests, please try again later.', + }); + app.use('/api', limiter); + + app.use(express.json({ limit: '10mb' })); + app.use(express.urlencoded({ extended: true, limit: '10mb' })); + + // T5:v1 router 需要 jobService / uploader / config 等 dep;若 deps 缺漏(單元 + // 測試常見情境),handler 會自動 fallback 到 501,不會 Crash。 + // 為什麼從 deps 透傳: + // - 保持 server entry → app → router 的注入鏈,避免 router 內部直接 require + // config(會在測試中需要設環境變數) + // - opts.config / opts.rateLimit / opts.storageBackend 給整合測試覆寫用 + // + // T7:promote 需要 faaClient;若呼叫端傳了 deps.faaClient(測試 mock)就用, + // 否則只有當 opts.config 存在時才 lazy-build singleton(避免測試/缺config時 Crash)。 + // 透傳 timeoutMs:優先用 config.fileAccessAgent.promoteTimeoutMs(loadConfig 已從 + // env PROMOTE_TIMEOUT_MS 讀取,預設 300s);缺漏時 fallback 讀 process.env 再 client 的 default。 + let faaClient = deps.faaClient; + if (!faaClient && opts && opts.config && opts.config.fileAccessAgent && opts.config.fileAccessAgent.baseUrl) { + const cfgTimeoutMs = + typeof opts.config.fileAccessAgent.promoteTimeoutMs === 'number' + ? opts.config.fileAccessAgent.promoteTimeoutMs + : null; + const envTimeoutRaw = process.env.PROMOTE_TIMEOUT_MS; + const envTimeoutMs = + envTimeoutRaw && /^\d+$/.test(envTimeoutRaw) && Number.parseInt(envTimeoutRaw, 10) > 0 + ? Number.parseInt(envTimeoutRaw, 10) + : null; + const effectiveTimeoutMs = cfgTimeoutMs || envTimeoutMs || undefined; + faaClient = createFaaClient({ + oauthClient, + config: { baseUrl: opts.config.fileAccessAgent.baseUrl }, + ...(effectiveTimeoutMs ? { timeoutMs: effectiveTimeoutMs } : {}), + }); + } + + // T10:建立 upload concurrency limiter(per-process semaphore,防 OOM) + // 從 opts.config.uploadConcurrency 取上限;缺漏時用 limiter 的內建預設 + // 為什麼建在這裡:app 是 instance scope,不該把 limiter 的 in-process state + // 拉到模組 top-level(測試會互相污染) + let uploadConcurrencyLimiter = null; + if (opts && opts.config && opts.config.uploadConcurrency) { + const ucCfg = opts.config.uploadConcurrency; + const lim = createUploadConcurrencyLimiter({ + maxConcurrent: ucCfg.maxConcurrent, + retryAfterSeconds: ucCfg.retryAfterSeconds, + }); + uploadConcurrencyLimiter = lim.middleware; + } else if ( + opts && + typeof opts.uploadConcurrency === 'object' && + opts.uploadConcurrency + ) { + // 測試友善:允許 opts 直接覆寫 concurrency 設定(不需完整 config) + const lim = createUploadConcurrencyLimiter(opts.uploadConcurrency); + uploadConcurrencyLimiter = lim.middleware; + } + + const v1Deps = { + jobService: deps.jobService, + uploader: deps.uploader, + minio: deps.minio, + faaClient: faaClient || null, + config: opts && opts.config ? opts.config : undefined, + rateLimit: opts && opts.rateLimit ? opts.rateLimit : undefined, + storageBackend: + opts && opts.storageBackend + ? opts.storageBackend + : process.env.STORAGE_BACKEND || 'local', + uploadConcurrencyLimiter, + }; + // T3:mount /api/v1 路由 — **必須**在 legacy `/` 之前,避免被 legacy 的 + // 全域 catch-all(雖然 legacy 沒有 catch-all,但保持「specific before generic」原則) + const v1Router = createV1Router(v1Deps); + app.use('/api/v1', v1Router); + + // mount legacy 路由(含 /health, /jobs, /queues/stats) + // T8:deps.healthService 經由 createLegacyRouter 透傳給 /health handler + const legacyRouter = createLegacyRouter(deps); + app.use('/', legacyRouter); + + // 全域 error handler — 對齊 server.js L610-613 + // eslint-disable-next-line no-unused-vars + app.use((err, req, res, next) => { + // eslint-disable-next-line no-console + console.error('[Scheduler] Server error:', err); + res.status(500).json({ error: 'Internal server error' }); + }); + + // 404 — 對齊 server.js L615-618 + app.use('*', (req, res) => { + res.status(404).json({ error: 'Endpoint not found' }); + }); + + return app; +} + +module.exports = { createApp }; diff --git a/apps/task-scheduler/src/auth/__tests__/jwks.test.js b/apps/task-scheduler/src/auth/__tests__/jwks.test.js new file mode 100644 index 0000000..00754cf --- /dev/null +++ b/apps/task-scheduler/src/auth/__tests__/jwks.test.js @@ -0,0 +1,285 @@ +/** + * Unit + Integration tests for src/auth/jwks.js + * + * 測試策略: + * - jose 在 Node CJS 環境下用 node:http / node:https 直接抓 JWKS(不走 global.fetch), + * 所以這份測試啟動一個本機 http server 提供 JWKS endpoint,再讓 jose 真實抓取 + * - 涵蓋正常驗證、過期、issuer 錯、audience 錯、簽章錯、缺 token、alg=none 等情境 + * - 驗證 RemoteJWKSet 的模組層級 cache 命中(_resetForTests) + */ + +'use strict'; + +const http = require('http'); +const { generateKeyPair, exportJWK, SignJWT } = require('jose'); + +const jwksModule = require('../jwks'); + +const TEST_ISSUER = 'https://auth.test.local'; +const TEST_AUDIENCE = 'kneron_converter_api'; + +/** + * 啟動一個本機 http server,提供 GET /.well-known/jwks 回 JWK Set。 + * + * @param {Array} jwks - JWK 陣列(含 kid / alg / use) + * @returns {Promise<{server: import('http').Server, url: string}>} + */ +async function startJwksServer(jwks) { + const server = http.createServer((req, res) => { + if (req.url === '/.well-known/jwks') { + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ keys: jwks })); + return; + } + res.writeHead(404); + res.end(); + }); + await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve)); + const addr = server.address(); + return { server, url: `http://127.0.0.1:${addr.port}/.well-known/jwks` }; +} + +async function signTestJwt(privateKey, kid, payload, expirationTime) { + const now = Math.floor(Date.now() / 1000); + const exp = expirationTime !== undefined ? expirationTime : now + 300; + return new SignJWT(payload) + .setProtectedHeader({ alg: 'RS256', kid }) + .setIssuedAt(now) + .setExpirationTime(exp) + .setIssuer(TEST_ISSUER) + .setAudience(TEST_AUDIENCE) + .sign(privateKey); +} + +describe('src/auth/jwks', () => { + let privateKey; + let publicJwk; + const KID = 'test-key-1'; + let jwksServer; + let jwksUrl; + + beforeAll(async () => { + const { privateKey: priv, publicKey: pub } = await generateKeyPair('RS256', { + modulusLength: 2048, + }); + privateKey = priv; + publicJwk = await exportJWK(pub); + + const started = await startJwksServer([ + { ...publicJwk, kid: KID, use: 'sig', alg: 'RS256' }, + ]); + jwksServer = started.server; + jwksUrl = started.url; + }); + + afterAll(async () => { + if (jwksServer) { + await new Promise((resolve) => jwksServer.close(resolve)); + } + }); + + beforeEach(() => { + // 每次測試重置模組層級 cache,避免互相影響 + jwksModule._resetForTests(); + }); + + describe('getJWKS', () => { + it('should require jwksUrl', () => { + expect(() => jwksModule.getJWKS('')).toThrow(/jwksUrl is required/); + expect(() => jwksModule.getJWKS(null)).toThrow(/jwksUrl is required/); + }); + + it('should throw on invalid URL', () => { + expect(() => jwksModule.getJWKS('not-a-url')).toThrow(/Invalid JWKS URL/); + }); + + it('should return the same instance for the same URL (module-level cache)', () => { + const a = jwksModule.getJWKS(jwksUrl); + const b = jwksModule.getJWKS(jwksUrl); + expect(a).toBe(b); + }); + + it('should return different instances for different URLs', () => { + const a = jwksModule.getJWKS(jwksUrl); + const b = jwksModule.getJWKS('http://127.0.0.1:1/other-jwks'); + expect(a).not.toBe(b); + }); + }); + + describe('verifyToken', () => { + let baseOpts; + beforeAll(() => { + baseOpts = { + jwksUrl, + issuer: TEST_ISSUER, + audience: TEST_AUDIENCE, + clockToleranceSec: 60, + }; + }); + + it('should verify a valid token', async () => { + const token = await signTestJwt(privateKey, KID, { + sub: 'user-1', + client_id: 'client-1', + scope: 'converter:job.write', + }); + + const result = await jwksModule.verifyToken(token, baseOpts); + expect(result).toBeDefined(); + expect(result.payload.sub).toBe('user-1'); + expect(result.payload.client_id).toBe('client-1'); + expect(result.payload.scope).toBe('converter:job.write'); + }); + + it('should throw ERR_JWT_EXPIRED for expired token', async () => { + // 過期 1 小時,超過 clockTolerance(60 秒) + const expired = Math.floor(Date.now() / 1000) - 3600; + const token = await signTestJwt( + privateKey, + KID, + { sub: 'user-1', scope: 'converter:job.write' }, + expired + ); + + await expect(jwksModule.verifyToken(token, baseOpts)).rejects.toMatchObject({ + code: 'ERR_JWT_EXPIRED', + }); + }); + + it('should throw on wrong issuer', async () => { + const token = await new SignJWT({ sub: 'user-1', scope: 'converter:job.write' }) + .setProtectedHeader({ alg: 'RS256', kid: KID }) + .setIssuedAt() + .setExpirationTime('5m') + .setIssuer('https://wrong.issuer.example') + .setAudience(TEST_AUDIENCE) + .sign(privateKey); + + await expect(jwksModule.verifyToken(token, baseOpts)).rejects.toThrow(); + }); + + it('should throw on wrong audience', async () => { + const token = await new SignJWT({ sub: 'user-1', scope: 'converter:job.write' }) + .setProtectedHeader({ alg: 'RS256', kid: KID }) + .setIssuedAt() + .setExpirationTime('5m') + .setIssuer(TEST_ISSUER) + .setAudience('wrong-audience') + .sign(privateKey); + + await expect(jwksModule.verifyToken(token, baseOpts)).rejects.toThrow(); + }); + + it('should throw on signature mismatch (different signing key, same kid)', async () => { + const { privateKey: otherPriv } = await generateKeyPair('RS256', { + modulusLength: 2048, + }); + const token = await signTestJwt(otherPriv, KID, { + sub: 'user-1', + scope: 'converter:job.write', + }); + + await expect(jwksModule.verifyToken(token, baseOpts)).rejects.toThrow(); + }); + + it('should throw on missing kid (no matching key in JWKS)', async () => { + const token = await signTestJwt(privateKey, 'unknown-kid', { + sub: 'user-1', + scope: 'converter:job.write', + }); + + await expect(jwksModule.verifyToken(token, baseOpts)).rejects.toThrow(); + }); + + it('should reject empty token', async () => { + await expect(jwksModule.verifyToken('', baseOpts)).rejects.toMatchObject({ + code: 'ERR_JWS_INVALID', + }); + }); + + it('should reject malformed token (not a JWT)', async () => { + await expect( + jwksModule.verifyToken('not-a-real-jwt', baseOpts) + ).rejects.toThrow(); + }); + + it('should require options.issuer', async () => { + await expect( + jwksModule.verifyToken('x.y.z', { jwksUrl, audience: TEST_AUDIENCE }) + ).rejects.toThrow(/issuer is required/); + }); + + it('should require options.audience', async () => { + await expect( + jwksModule.verifyToken('x.y.z', { jwksUrl, issuer: TEST_ISSUER }) + ).rejects.toThrow(/audience is required/); + }); + + it('should reject alg=none token', async () => { + const header = Buffer.from(JSON.stringify({ alg: 'none', kid: KID })).toString( + 'base64url' + ); + const payload = Buffer.from( + JSON.stringify({ + sub: 'user-1', + iss: TEST_ISSUER, + aud: TEST_AUDIENCE, + exp: Math.floor(Date.now() / 1000) + 300, + scope: 'converter:job.write', + }) + ).toString('base64url'); + const unsignedToken = `${header}.${payload}.`; + + await expect(jwksModule.verifyToken(unsignedToken, baseOpts)).rejects.toThrow(); + }); + + // Sec m3:HMAC 演算法應被拒絕(混淆攻擊防禦) + it('should reject HMAC alg=HS256 token (Sec m3 algorithms pin)', async () => { + // 即便 attacker 用 JWKS 的 RSA public key 當 HMAC secret 簽 token, + // 因為 algorithms pin 為 RSA/ECDSA,jose 會直接 reject 拋錯。 + const fakeSecret = new TextEncoder().encode('fake-hmac-secret-32-bytes-long-x'); + const token = await new SignJWT({ + sub: 'user-1', + scope: 'converter:job.write', + }) + .setProtectedHeader({ alg: 'HS256', kid: KID }) + .setIssuedAt() + .setExpirationTime('5m') + .setIssuer(TEST_ISSUER) + .setAudience(TEST_AUDIENCE) + .sign(fakeSecret); + + await expect( + jwksModule.verifyToken(token, baseOpts) + ).rejects.toThrow(); + }); + + it('should expose ALLOWED_JWT_ALGS list (Sec m3)', () => { + const algs = jwksModule.ALLOWED_JWT_ALGS; + expect(Array.isArray(algs)).toBe(true); + expect(algs).toContain('RS256'); + expect(algs).toContain('ES256'); + expect(algs).toContain('PS256'); + expect(algs).not.toContain('HS256'); + expect(algs).not.toContain('none'); + }); + + it('should accept token within clock skew tolerance', async () => { + // 設一個剛過期 30 秒的 token,但 clockTolerance = 60 秒應該還能通過 + const justExpired = Math.floor(Date.now() / 1000) - 30; + const token = await new SignJWT({ + sub: 'user-1', + scope: 'converter:job.write', + }) + .setProtectedHeader({ alg: 'RS256', kid: KID }) + .setIssuedAt(justExpired - 600) + .setExpirationTime(justExpired) + .setIssuer(TEST_ISSUER) + .setAudience(TEST_AUDIENCE) + .sign(privateKey); + + const result = await jwksModule.verifyToken(token, baseOpts); + expect(result.payload.sub).toBe('user-1'); + }); + }); +}); diff --git a/apps/task-scheduler/src/auth/__tests__/middleware.test.js b/apps/task-scheduler/src/auth/__tests__/middleware.test.js new file mode 100644 index 0000000..46ff6ac --- /dev/null +++ b/apps/task-scheduler/src/auth/__tests__/middleware.test.js @@ -0,0 +1,763 @@ +/** + * Unit + Integration tests for src/auth/middleware.js + * + * 測試重點: + * 1. 各種驗證失敗路徑(缺 header / 簽章錯 / issuer 錯 / audience 錯 / 過期 / scope 不夠 / tenant 不符) + * 2. M2:每次失敗都必須 + * - 設 `Connection: close` header + * - 在 res 'finish' 後 destroy req.socket + * 3. 成功路徑:req.auth 設好 + next() 被呼叫 + * 4. Integration:用 supertest + Express 真打一次,確認 socket 真的被斷 + */ + +'use strict'; + +const express = require('express'); +const http = require('http'); +const { generateKeyPair, exportJWK, SignJWT } = require('jose'); + +// 注意:這份 test 用 jest.resetModules + 注入版 verify,不依賴真實 config +const middlewareModule = require('../middleware'); + +// ---------------------------------------------------------------------------- +// 共用 fixture +// ---------------------------------------------------------------------------- + +const TEST_CONFIG = { + memberCenter: { + issuer: 'https://auth.test.local', + jwksUrl: 'https://auth.test.local/.well-known/jwks', + tokenUrl: '', + }, + converter: { + audience: 'kneron_converter_api', + clientId: '', + clientSecret: '', + tenantId: '', + scopeWrite: 'converter:job.write', + scopeRead: 'converter:job.read', + }, + fileAccessAgent: { baseUrl: '', audience: 'file_access_api' }, + jwks: { cacheMaxAgeMs: 600000, cooldownMs: 30000, clockToleranceSec: 60 }, +}; + +const TEST_CONFIG_WITH_TENANT = { + ...TEST_CONFIG, + converter: { ...TEST_CONFIG.converter, tenantId: 'tenant-A' }, +}; + +let privateKey; +let publicJwk; +const KID = 'test-key-1'; + +beforeAll(async () => { + const { privateKey: priv, publicKey: pub } = await generateKeyPair('RS256', { + modulusLength: 2048, + }); + privateKey = priv; + publicJwk = await exportJWK(pub); +}); + +// 抑制驗證失敗時 middleware 的 warn log(避免測試輸出被結構化 log 蓋掉) +// 這些 warn 是「驗證失敗時必輸出」的正常行為,已由斷言驗證 status / code, +// log 內容不是斷言對象。 +let _origWarn; +beforeAll(() => { + _origWarn = console.warn; + console.warn = () => {}; +}); +afterAll(() => { + console.warn = _origWarn; +}); + +/** + * 簽一個測試 JWT。 + */ +async function makeToken(overrides = {}, opts = {}) { + const now = Math.floor(Date.now() / 1000); + const payload = { + sub: 'user-1', + client_id: 'client-1', + scope: 'converter:job.write', + ...overrides, + }; + const expirationTime = + opts.expirationTime !== undefined ? opts.expirationTime : now + 300; + const signKey = opts.signKey || privateKey; + const kid = opts.kid || KID; + const issuer = opts.issuer || TEST_CONFIG.memberCenter.issuer; + const audience = opts.audience || TEST_CONFIG.converter.audience; + + return new SignJWT(payload) + .setProtectedHeader({ alg: 'RS256', kid }) + .setIssuedAt(now) + .setExpirationTime(expirationTime) + .setIssuer(issuer) + .setAudience(audience) + .sign(signKey); +} + +/** + * 假的 verify function(注入版)— 直接用 jose.jwtVerify 但不打網路。 + * 用內建的 JWKSet (從 publicJwk 建)。 + */ +function makeInjectedVerify() { + // 動態 import jwtVerify 與 createLocalJWKSet(jose v5+) + const { jwtVerify, createLocalJWKSet } = require('jose'); + const localJwks = createLocalJWKSet({ + keys: [{ ...publicJwk, kid: KID, use: 'sig', alg: 'RS256' }], + }); + + return async function injectedVerify(token, options) { + return jwtVerify(token, localJwks, { + issuer: options.issuer, + audience: options.audience, + clockTolerance: options.clockToleranceSec, + }); + }; +} + +/** + * 建立一組假的 req / res / next,內含 spy 給 socket.destroy。 + */ +function makeReqResNext(authHeader) { + const socket = { + destroyed: false, + destroy: jest.fn(function destroyImpl() { + socket.destroyed = true; + }), + }; + const req = { + headers: authHeader === undefined ? {} : { authorization: authHeader }, + socket, + requestId: 'req-test-001', + }; + + // 簡化版 res:只關心 setHeader / status / json / on('finish') / headersSent + const headers = {}; + const finishListeners = []; + const res = { + headersSent: false, + statusCode: 200, + body: null, + setHeader: jest.fn((k, v) => { + headers[k] = v; + }), + getHeader: (k) => headers[k], + status: jest.fn(function statusImpl(code) { + res.statusCode = code; + return res; + }), + json: jest.fn(function jsonImpl(body) { + res.body = body; + res.headersSent = true; + // 模擬 'finish' 事件(async,下個 microtask 觸發) + Promise.resolve().then(() => { + for (const l of finishListeners.splice(0)) { + try { + l(); + } catch (_) { + /* noop */ + } + } + }); + return res; + }), + once: jest.fn((evt, cb) => { + if (evt === 'finish') finishListeners.push(cb); + }), + on: jest.fn((evt, cb) => { + if (evt === 'finish') finishListeners.push(cb); + }), + _flush: () => + new Promise((resolve) => + // 等下個 microtask + setImmediate(resolve) + ), + }; + + const next = jest.fn(); + return { req, res, next, socket, headers }; +} + +// ---------------------------------------------------------------------------- +// Tests +// ---------------------------------------------------------------------------- + +describe('requireAuth — 驗證失敗路徑', () => { + let verify; + beforeAll(() => { + verify = makeInjectedVerify(); + }); + + it('should 401 + destroy when Authorization header missing', async () => { + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, socket, headers } = makeReqResNext(undefined); + + await middleware(req, res, next); + await res._flush(); + + expect(res.statusCode).toBe(401); + expect(res.body.error.code).toBe('invalid_token'); + expect(res.body.error.request_id).toBe('req-test-001'); + expect(headers['Connection']).toBe('close'); + expect(socket.destroy).toHaveBeenCalledTimes(1); + expect(next).not.toHaveBeenCalled(); + }); + + it('should 401 + destroy when Authorization header malformed (not Bearer)', async () => { + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, socket, headers } = makeReqResNext('Basic abc123'); + + await middleware(req, res, next); + await res._flush(); + + expect(res.statusCode).toBe(401); + expect(res.body.error.code).toBe('invalid_token'); + expect(headers['Connection']).toBe('close'); + expect(socket.destroy).toHaveBeenCalledTimes(1); + expect(next).not.toHaveBeenCalled(); + }); + + it('should 401 + destroy when token is empty after Bearer', async () => { + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, socket } = makeReqResNext('Bearer '); + + await middleware(req, res, next); + await res._flush(); + + expect(res.statusCode).toBe(401); + expect(res.body.error.code).toBe('invalid_token'); + expect(socket.destroy).toHaveBeenCalledTimes(1); + expect(next).not.toHaveBeenCalled(); + }); + + it('should 401 token_expired + destroy when token is expired', async () => { + const expiredToken = await makeToken({}, { expirationTime: 100 }); // 1970 早就過期 + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, socket, headers } = makeReqResNext(`Bearer ${expiredToken}`); + + await middleware(req, res, next); + await res._flush(); + + expect(res.statusCode).toBe(401); + expect(res.body.error.code).toBe('token_expired'); + expect(headers['Connection']).toBe('close'); + expect(socket.destroy).toHaveBeenCalledTimes(1); + expect(next).not.toHaveBeenCalled(); + }); + + it('should 401 invalid_token + destroy when issuer is wrong', async () => { + const token = await makeToken({}, { issuer: 'https://evil.example.com' }); + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, socket } = makeReqResNext(`Bearer ${token}`); + + await middleware(req, res, next); + await res._flush(); + + expect(res.statusCode).toBe(401); + expect(res.body.error.code).toBe('invalid_token'); + expect(socket.destroy).toHaveBeenCalledTimes(1); + expect(next).not.toHaveBeenCalled(); + }); + + it('should 401 invalid_token + destroy when audience is wrong', async () => { + const token = await makeToken({}, { audience: 'wrong-audience' }); + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, socket } = makeReqResNext(`Bearer ${token}`); + + await middleware(req, res, next); + await res._flush(); + + expect(res.statusCode).toBe(401); + expect(res.body.error.code).toBe('invalid_token'); + expect(socket.destroy).toHaveBeenCalledTimes(1); + expect(next).not.toHaveBeenCalled(); + }); + + it('should 401 invalid_token + destroy when signature is wrong', async () => { + const { privateKey: otherPriv } = await generateKeyPair('RS256', { + modulusLength: 2048, + }); + // 用 KID 對得上但簽章對不上 + const token = await makeToken({}, { signKey: otherPriv }); + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, socket } = makeReqResNext(`Bearer ${token}`); + + await middleware(req, res, next); + await res._flush(); + + expect(res.statusCode).toBe(401); + expect(res.body.error.code).toBe('invalid_token'); + expect(socket.destroy).toHaveBeenCalledTimes(1); + expect(next).not.toHaveBeenCalled(); + }); + + it('should 403 insufficient_scope + destroy when scope is missing', async () => { + const token = await makeToken({ scope: 'converter:job.read' }); // 沒有 .write + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, socket, headers } = makeReqResNext(`Bearer ${token}`); + + await middleware(req, res, next); + await res._flush(); + + expect(res.statusCode).toBe(403); + expect(res.body.error.code).toBe('insufficient_scope'); + expect(res.body.error.details).toEqual({ + required_scope: 'converter:job.write', + provided_scopes: ['converter:job.read'], + }); + expect(headers['Connection']).toBe('close'); + expect(socket.destroy).toHaveBeenCalledTimes(1); + expect(next).not.toHaveBeenCalled(); + }); + + it('should 403 insufficient_scope when scope claim is empty', async () => { + const token = await makeToken({ scope: '' }); + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, socket } = makeReqResNext(`Bearer ${token}`); + + await middleware(req, res, next); + await res._flush(); + + expect(res.statusCode).toBe(403); + expect(res.body.error.code).toBe('insufficient_scope'); + expect(res.body.error.details.provided_scopes).toEqual([]); + expect(socket.destroy).toHaveBeenCalledTimes(1); + expect(next).not.toHaveBeenCalled(); + }); + + it('should 403 tenant_mismatch + destroy when tenant_id differs', async () => { + const token = await makeToken({ tenant_id: 'tenant-B' }); + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG_WITH_TENANT, + verify, + }); + const { req, res, next, socket } = makeReqResNext(`Bearer ${token}`); + + await middleware(req, res, next); + await res._flush(); + + expect(res.statusCode).toBe(403); + expect(res.body.error.code).toBe('tenant_mismatch'); + expect(res.body.error.details.expected_tenant).toBe('tenant-A'); + // 不洩漏 token 的 tenant_id + expect(res.body.error.details).not.toHaveProperty('actual_tenant'); + expect(socket.destroy).toHaveBeenCalledTimes(1); + expect(next).not.toHaveBeenCalled(); + }); + + it('should not check tenant when config.tenantId is empty', async () => { + const token = await makeToken({ tenant_id: 'any-tenant' }); + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, // tenantId = '' + verify, + }); + const { req, res, next, socket } = makeReqResNext(`Bearer ${token}`); + + await middleware(req, res, next); + await res._flush(); + + expect(next).toHaveBeenCalledTimes(1); + expect(socket.destroy).not.toHaveBeenCalled(); + }); +}); + +describe('requireAuth — 驗證成功路徑', () => { + let verify; + beforeAll(() => { + verify = makeInjectedVerify(); + }); + + it('should call next() and set req.auth on valid token with correct scope', async () => { + const token = await makeToken({ + sub: 'user-99', + client_id: 'visionA-backend', + scope: 'converter:job.write converter:job.read', + tenant_id: 'tenant-A', + }); + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, socket } = makeReqResNext(`Bearer ${token}`); + + await middleware(req, res, next); + + expect(next).toHaveBeenCalledTimes(1); + expect(socket.destroy).not.toHaveBeenCalled(); + expect(res.status).not.toHaveBeenCalled(); + expect(req.auth).toBeDefined(); + expect(req.auth.sub).toBe('user-99'); + expect(req.auth.clientId).toBe('visionA-backend'); + expect(req.auth.tenantId).toBe('tenant-A'); + expect(req.auth.scopes).toEqual(['converter:job.write', 'converter:job.read']); + expect(req.auth.raw).toBeDefined(); + expect(req.auth.raw.sub).toBe('user-99'); + }); + + it('should support scp array claim (instead of scope string)', async () => { + const token = await makeToken({ + sub: 'user-1', + scope: undefined, + scp: ['converter:job.write', 'converter:job.read'], + }); + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next } = makeReqResNext(`Bearer ${token}`); + + await middleware(req, res, next); + + expect(next).toHaveBeenCalledTimes(1); + expect(req.auth.scopes).toEqual(['converter:job.write', 'converter:job.read']); + }); + + it('should fall back clientId to sub when client_id is absent', async () => { + const token = await makeToken({ + sub: 'user-only', + client_id: undefined, + scope: 'converter:job.write', + }); + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next } = makeReqResNext(`Bearer ${token}`); + + await middleware(req, res, next); + + expect(next).toHaveBeenCalledTimes(1); + expect(req.auth.clientId).toBe('user-only'); + }); + + it('should accept lowercase "bearer" prefix', async () => { + const token = await makeToken({ scope: 'converter:job.write' }); + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next } = makeReqResNext(`bearer ${token}`); + + await middleware(req, res, next); + expect(next).toHaveBeenCalledTimes(1); + }); +}); + +describe('requireAuth — M2 destroy 連線行為(單元層)', () => { + let verify; + beforeAll(() => { + verify = makeInjectedVerify(); + }); + + it('should set Connection: close header BEFORE writing body', async () => { + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, headers } = makeReqResNext(undefined); + + await middleware(req, res, next); + await res._flush(); + + // 確認 setHeader 在 res.status 之前被呼叫 + const setHeaderOrder = res.setHeader.mock.invocationCallOrder[0]; + const statusOrder = res.status.mock.invocationCallOrder[0]; + expect(setHeaderOrder).toBeLessThan(statusOrder); + expect(headers['Connection']).toBe('close'); + }); + + it('should destroy socket only AFTER res finish event (not before)', async () => { + // 自製一個「不會自動觸發 finish」的 res,讓我們能精確控制觸發時機 + const socket = { destroyed: false, destroy: jest.fn(() => { socket.destroyed = true; }) }; + const finishListeners = []; + const headers = {}; + const res = { + headersSent: false, + statusCode: 200, + body: null, + setHeader: jest.fn((k, v) => { headers[k] = v; }), + status: jest.fn(function s(code) { this.statusCode = code; return this; }), + json: jest.fn(function j(b) { this.body = b; this.headersSent = true; return this; }), + // 注意:這個 once 只把 listener 推進陣列,不自動觸發 finish + once: jest.fn((evt, cb) => { if (evt === 'finish') finishListeners.push(cb); }), + on: jest.fn((evt, cb) => { if (evt === 'finish') finishListeners.push(cb); }), + }; + const req = { headers: {}, socket, requestId: 'req-test-001' }; + const next = jest.fn(); + + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + + await middleware(req, res, next); + + // 此時 res.status / res.json 都已執行,但 'finish' 事件還沒被觸發 + expect(res.json).toHaveBeenCalledTimes(1); + expect(socket.destroy).not.toHaveBeenCalled(); + + // 手動觸發 finish 事件(模擬 Node 真實行為:response 寫入完畢後才會觸發) + for (const cb of finishListeners.splice(0)) cb(); + + expect(socket.destroy).toHaveBeenCalledTimes(1); + }); + + it('should use res.once not res.on (to avoid duplicate destroy on retries)', async () => { + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next } = makeReqResNext(undefined); + + await middleware(req, res, next); + + expect(res.once).toHaveBeenCalledWith('finish', expect.any(Function)); + }); + + it('should not throw if socket is already destroyed', async () => { + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next, socket } = makeReqResNext(undefined); + // 預先把 socket 設為 destroyed + socket.destroyed = true; + + await middleware(req, res, next); + await res._flush(); + + // 因為 destroyed=true,不應該再呼叫 destroy() + expect(socket.destroy).not.toHaveBeenCalled(); + }); + + it('should handle missing req.socket gracefully', async () => { + const middleware = middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }); + const { req, res, next } = makeReqResNext(undefined); + delete req.socket; + + // 不應 throw + await expect(middleware(req, res, next)).resolves.not.toThrow(); + await res._flush(); + }); +}); + +// ---------------------------------------------------------------------------- +// Integration test:用 supertest(內建 http server)驗證真連線被斷 +// ---------------------------------------------------------------------------- + +describe('requireAuth — Integration(真實 Express + http server)', () => { + let verify; + let app; + let server; + let baseUrl; + + beforeAll(async () => { + verify = makeInjectedVerify(); + }); + + beforeEach(async () => { + app = express(); + // 一個極簡的 requestId middleware,模擬 T3 行為 + app.use((req, _res, n) => { + req.requestId = req.headers['x-request-id'] || 'req-int-001'; + n(); + }); + + app.get( + '/protected', + middlewareModule.requireAuth('converter:job.write', { + config: TEST_CONFIG, + verify, + }), + (req, res) => { + res.status(200).json({ ok: true, sub: req.auth.sub }); + } + ); + + await new Promise((resolve) => { + server = app.listen(0, '127.0.0.1', resolve); + }); + const addr = server.address(); + baseUrl = `http://127.0.0.1:${addr.port}`; + }); + + afterEach(async () => { + if (server) { + await new Promise((resolve) => server.close(resolve)); + server = null; + } + }); + + it('should return 401 with Connection: close and close the connection on missing token', async () => { + const res = await fetch(`${baseUrl}/protected`); + const body = await res.json(); + + expect(res.status).toBe(401); + expect(res.headers.get('connection')).toBe('close'); + expect(body.error.code).toBe('invalid_token'); + expect(body.error.request_id).toBe('req-int-001'); + }); + + it('should return 200 + payload on valid token', async () => { + const token = await makeToken({ sub: 'user-int-1', scope: 'converter:job.write' }); + const res = await fetch(`${baseUrl}/protected`, { + headers: { Authorization: `Bearer ${token}` }, + }); + const body = await res.json(); + + expect(res.status).toBe(200); + expect(body.ok).toBe(true); + expect(body.sub).toBe('user-int-1'); + }); + + it('should return 403 insufficient_scope with correct details on integration path', async () => { + const token = await makeToken({ scope: 'converter:job.read' }); + const res = await fetch(`${baseUrl}/protected`, { + headers: { Authorization: `Bearer ${token}` }, + }); + const body = await res.json(); + + expect(res.status).toBe(403); + expect(res.headers.get('connection')).toBe('close'); + expect(body.error.code).toBe('insufficient_scope'); + expect(body.error.details.required_scope).toBe('converter:job.write'); + }); + + it('should detect socket close from client side after 401', async () => { + // 用低階 http 模組實際觀察 socket close 事件 + await new Promise((resolve, reject) => { + const url = new URL(`${baseUrl}/protected`); + const req = http.request( + { + hostname: url.hostname, + port: url.port, + path: url.pathname, + method: 'GET', + }, + (res) => { + let raw = ''; + res.on('data', (c) => { + raw += c.toString(); + }); + res.on('end', () => { + try { + expect(res.statusCode).toBe(401); + expect(res.headers.connection).toBe('close'); + const body = JSON.parse(raw); + expect(body.error.code).toBe('invalid_token'); + resolve(); + } catch (e) { + reject(e); + } + }); + } + ); + req.on('error', reject); + req.end(); + }); + }); +}); + +// ---------------------------------------------------------------------------- +// Helper / internals tests +// ---------------------------------------------------------------------------- + +describe('internals.extractBearerToken', () => { + const { extractBearerToken } = middlewareModule._internals; + + it('returns null on undefined / empty', () => { + expect(extractBearerToken(undefined)).toBeNull(); + expect(extractBearerToken('')).toBeNull(); + expect(extractBearerToken(null)).toBeNull(); + }); + + it('returns null on non-Bearer scheme', () => { + expect(extractBearerToken('Basic abc')).toBeNull(); + expect(extractBearerToken('Token abc')).toBeNull(); + }); + + it('returns trimmed token on valid Bearer', () => { + expect(extractBearerToken('Bearer xyz123')).toBe('xyz123'); + expect(extractBearerToken('Bearer xyz123 ')).toBe('xyz123'); + expect(extractBearerToken('bearer xyz123')).toBe('xyz123'); + }); + + it('returns null when token portion is empty', () => { + expect(extractBearerToken('Bearer ')).toBeNull(); + expect(extractBearerToken('Bearer ')).toBeNull(); + }); +}); + +describe('internals.extractScopes', () => { + const { extractScopes } = middlewareModule._internals; + + it('parses space-separated scope string', () => { + expect(extractScopes({ scope: 'a b c' })).toEqual(['a', 'b', 'c']); + }); + + it('parses scp array', () => { + expect(extractScopes({ scp: ['a', 'b'] })).toEqual(['a', 'b']); + }); + + it('handles array scope claim', () => { + expect(extractScopes({ scope: ['a', 'b'] })).toEqual(['a', 'b']); + }); + + it('returns empty array when neither present', () => { + expect(extractScopes({})).toEqual([]); + }); + + it('strips empty string entries', () => { + expect(extractScopes({ scope: 'a b' })).toEqual(['a', 'b']); + expect(extractScopes({ scp: ['', 'a'] })).toEqual(['a']); + }); +}); + +describe('internals.sendAuthError — edge cases', () => { + const { sendAuthError } = middlewareModule._internals; + + it('does not double-write when headersSent already', () => { + const { req, res, socket } = makeReqResNext(undefined); + res.headersSent = true; + + sendAuthError(req, res, 401, 'invalid_token', 'msg'); + + // 不該再 setHeader 或 status / json + expect(res.setHeader).not.toHaveBeenCalled(); + expect(res.status).not.toHaveBeenCalled(); + // 但仍嘗試 destroy(保險) + expect(socket.destroy).toHaveBeenCalledTimes(1); + }); +}); diff --git a/apps/task-scheduler/src/auth/__tests__/oauthClient.test.js b/apps/task-scheduler/src/auth/__tests__/oauthClient.test.js new file mode 100644 index 0000000..35564ab --- /dev/null +++ b/apps/task-scheduler/src/auth/__tests__/oauthClient.test.js @@ -0,0 +1,952 @@ +/** + * Unit + Integration tests for src/auth/oauthClient.js + * + * 測試重點: + * 1. cache hit / miss / 過期 refresh + * 2. invalidate 後重取 + * 3. in-flight Promise dedup(同 scope 並發只發一次) + * 4. 不同 scope 各自獨立發 request + * 5. 4xx → OAuthClientError;5xx → OAuthServerError;timeout → OAuthTimeoutError + * 6. response 缺欄位 → OAuthServerError + * 7. **secret 不洩漏**:spy console,驗證 log 不含 client_secret 字串、不含 access_token + * 8. Integration:用真實 http server 模擬 Member Center token endpoint + */ + +'use strict'; + +const http = require('http'); +const oauthModule = require('../oauthClient'); +const { + OAuthClient, + OAuthClientError, + OAuthServerError, + OAuthTimeoutError, + _internals, +} = oauthModule; + +// ---------------------------------------------------------------------------- +// 共用 fixture / helpers +// ---------------------------------------------------------------------------- + +const TEST_CLIENT_ID = 'kneron_converter'; +// 用一個故意「特殊」的 secret,方便 grep 全部 log 確認沒洩漏 +const TEST_CLIENT_SECRET = 'super-secret-XYZ-123-must-not-appear-in-logs'; +const TEST_FAA_AUDIENCE = 'file_access_api'; +const TEST_TOKEN_URL = 'http://127.0.0.1:0/oauth/token'; // 0 在 fetch 不會用,測試會注入 fetch + +function makeTestConfig(overrides = {}) { + return { + memberCenter: { + issuer: 'https://auth.test.local', + jwksUrl: 'https://auth.test.local/.well-known/jwks', + tokenUrl: TEST_TOKEN_URL, + }, + converter: { + audience: 'kneron_converter_api', + clientId: TEST_CLIENT_ID, + clientSecret: TEST_CLIENT_SECRET, + tenantId: '', + scopeWrite: 'converter:job.write', + scopeRead: 'converter:job.read', + }, + fileAccessAgent: { + baseUrl: '', + audience: TEST_FAA_AUDIENCE, + }, + jwks: { cacheMaxAgeMs: 600000, cooldownMs: 30000, clockToleranceSec: 60 }, + oauthClient: { + refreshSkewMs: 60 * 1000, + timeoutMs: 10 * 1000, + }, + ...overrides, + }; +} + +/** + * 製造一個假 fetch — 從 sequence 取資料回應;可記錄被呼叫的次數 / arg。 + */ +function makeMockFetch(handlers) { + const calls = []; + let idx = 0; + const fn = jest.fn(async (url, init) => { + calls.push({ url, init, at: Date.now() }); + let handler; + if (typeof handlers === 'function') { + handler = handlers; + } else if (Array.isArray(handlers)) { + handler = handlers[Math.min(idx, handlers.length - 1)]; + idx += 1; + } else { + throw new Error('handlers must be array or function'); + } + return handler(url, init, idx); + }); + fn.calls = calls; + return fn; +} + +/** + * 建一個成功回應(access_token / token_type / expires_in),可加 overrides。 + */ +function tokenSuccessBody(overrides = {}) { + return { + access_token: 'mock-access-token-' + Math.random().toString(36).slice(2), + token_type: 'Bearer', + expires_in: 3600, + ...overrides, + }; +} + +function makeJsonResponse(status, body) { + return new Response(JSON.stringify(body), { + status, + headers: { 'Content-Type': 'application/json' }, + }); +} + +function makeTextResponse(status, text) { + return new Response(text, { + status, + headers: { 'Content-Type': 'text/plain' }, + }); +} + +/** + * 製造一個 controllable now():可 advance time。 + */ +function makeFakeClock(initialMs = 1_700_000_000_000) { + let cur = initialMs; + const now = () => cur; + now.advance = (ms) => { + cur += ms; + }; + now.set = (ms) => { + cur = ms; + }; + return now; +} + +// ---------------------------------------------------------------------------- +// 全域 silence INFO 級別 log(避免 jest 輸出被結構化 log 蓋掉)。 +// 但保留 spy 物件供「secret 不洩漏」測試使用。 +// ---------------------------------------------------------------------------- +let logSpy; +let warnSpy; +let errorSpy; +beforeEach(() => { + logSpy = jest.spyOn(console, 'log').mockImplementation(() => {}); + warnSpy = jest.spyOn(console, 'warn').mockImplementation(() => {}); + errorSpy = jest.spyOn(console, 'error').mockImplementation(() => {}); +}); +afterEach(() => { + logSpy.mockRestore(); + warnSpy.mockRestore(); + errorSpy.mockRestore(); +}); + +/** 把所有 spy 收到的 string 收集成一個大陣列,用來 substring search。 */ +function collectAllLoggedStrings() { + const acc = []; + for (const spy of [logSpy, warnSpy, errorSpy]) { + for (const call of spy.mock.calls) { + for (const arg of call) { + if (typeof arg === 'string') acc.push(arg); + else acc.push(JSON.stringify(arg)); + } + } + } + return acc; +} + +// ---------------------------------------------------------------------------- +// 1. 基本 happy path + cache 行為 +// ---------------------------------------------------------------------------- + +describe('getServiceToken — happy path & cache', () => { + it('first call fetches token and caches it', async () => { + const body = tokenSuccessBody(); + const fetch = makeMockFetch(() => makeJsonResponse(200, body)); + const clock = makeFakeClock(); + const client = new OAuthClient({ + fetch, + now: clock, + loadConfig: () => makeTestConfig(), + }); + + const t1 = await client.getServiceToken('files:upload.write'); + expect(t1).toBe(body.access_token); + expect(fetch).toHaveBeenCalledTimes(1); + + // 第二次呼叫 — cache hit,不打 endpoint + const t2 = await client.getServiceToken('files:upload.write'); + expect(t2).toBe(body.access_token); + expect(fetch).toHaveBeenCalledTimes(1); + }); + + it('uses HTTP Basic auth header (not body) for client credentials', async () => { + const fetch = makeMockFetch(() => makeJsonResponse(200, tokenSuccessBody())); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await client.getServiceToken('files:upload.write'); + const init = fetch.calls[0].init; + expect(init.method).toBe('POST'); + expect(init.headers['Content-Type']).toBe('application/x-www-form-urlencoded'); + expect(init.headers.Authorization).toMatch(/^Basic /); + + const expected = Buffer.from( + `${TEST_CLIENT_ID}:${TEST_CLIENT_SECRET}`, + 'utf8' + ).toString('base64'); + expect(init.headers.Authorization).toBe(`Basic ${expected}`); + + // body 必須不含 client_secret + expect(typeof init.body).toBe('string'); + expect(init.body).not.toContain(TEST_CLIENT_SECRET); + expect(init.body).toContain('grant_type=client_credentials'); + expect(init.body).toContain('scope=files%3Aupload.write'); + expect(init.body).toContain(`audience=${TEST_FAA_AUDIENCE}`); + }); + + it('refreshes when cached token is within refreshSkewMs of expiry', async () => { + const body1 = tokenSuccessBody({ access_token: 'token-1', expires_in: 100 }); + const body2 = tokenSuccessBody({ access_token: 'token-2', expires_in: 100 }); + const fetch = makeMockFetch([ + () => makeJsonResponse(200, body1), + () => makeJsonResponse(200, body2), + ]); + const clock = makeFakeClock(); + const client = new OAuthClient({ + fetch, + now: clock, + // refreshSkewMs = 60s,token expires_in = 100s → cache 在 (100 - 60)s 後就視為過期 + loadConfig: () => makeTestConfig({ oauthClient: { refreshSkewMs: 60_000, timeoutMs: 10_000 } }), + }); + + const t1 = await client.getServiceToken('files:upload.write'); + expect(t1).toBe('token-1'); + + // 模擬經過 41 秒:100 - 60 = 40,此時已進入 refresh window + clock.advance(41_000); + const t2 = await client.getServiceToken('files:upload.write'); + expect(t2).toBe('token-2'); + expect(fetch).toHaveBeenCalledTimes(2); + }); + + it('keeps cached token within freshness window', async () => { + const body = tokenSuccessBody({ access_token: 'token-A', expires_in: 200 }); + const fetch = makeMockFetch(() => makeJsonResponse(200, body)); + const clock = makeFakeClock(); + const client = new OAuthClient({ + fetch, + now: clock, + loadConfig: () => makeTestConfig({ oauthClient: { refreshSkewMs: 60_000, timeoutMs: 10_000 } }), + }); + + await client.getServiceToken('files:upload.write'); + // 經過 100s,距離 200s 過期還有 100s > skew 60s → 仍 cache hit + clock.advance(100_000); + const t = await client.getServiceToken('files:upload.write'); + expect(t).toBe('token-A'); + expect(fetch).toHaveBeenCalledTimes(1); + }); +}); + +// ---------------------------------------------------------------------------- +// 2. invalidate +// ---------------------------------------------------------------------------- + +describe('invalidate', () => { + it('forces next call to fetch a new token', async () => { + const fetch = makeMockFetch([ + () => makeJsonResponse(200, tokenSuccessBody({ access_token: 'before' })), + () => makeJsonResponse(200, tokenSuccessBody({ access_token: 'after' })), + ]); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + expect(await client.getServiceToken('files:upload.write')).toBe('before'); + client.invalidate('files:upload.write'); + expect(await client.getServiceToken('files:upload.write')).toBe('after'); + expect(fetch).toHaveBeenCalledTimes(2); + }); + + it('is a noop for unknown scope', () => { + const client = new OAuthClient({ + fetch: () => { + throw new Error('should not be called'); + }, + loadConfig: () => makeTestConfig(), + }); + expect(() => client.invalidate('not-cached')).not.toThrow(); + expect(() => client.invalidate('')).not.toThrow(); + expect(() => client.invalidate(undefined)).not.toThrow(); + }); +}); + +// ---------------------------------------------------------------------------- +// 3. 並發保護(in-flight Promise dedup) +// ---------------------------------------------------------------------------- + +describe('in-flight dedup', () => { + it('coalesces concurrent calls for same scope into single request', async () => { + let resolveOnce; + const pending = new Promise((r) => { + resolveOnce = r; + }); + const body = tokenSuccessBody({ access_token: 'shared-token' }); + const fetch = makeMockFetch(async () => { + await pending; // 卡住第一次 request + return makeJsonResponse(200, body); + }); + + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + // 同時起 10 個 caller + const promises = Array.from({ length: 10 }, () => + client.getServiceToken('files:upload.write') + ); + + // 此時 fetch 只應被呼叫一次(in-flight dedup) + expect(fetch).toHaveBeenCalledTimes(1); + + // 放行 fetch + resolveOnce(true); + const tokens = await Promise.all(promises); + + expect(tokens).toEqual(Array(10).fill('shared-token')); + expect(fetch).toHaveBeenCalledTimes(1); + }); + + it('issues separate requests for different scopes concurrently', async () => { + const fetch = makeMockFetch((url, init) => { + // 從 body 反查 scope + const params = new URLSearchParams(init.body); + const scope = params.get('scope'); + return makeJsonResponse( + 200, + tokenSuccessBody({ access_token: `token-for-${scope}` }) + ); + }); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + const [a, b] = await Promise.all([ + client.getServiceToken('files:upload.write'), + client.getServiceToken('something:else.read'), + ]); + expect(a).toBe('token-for-files:upload.write'); + expect(b).toBe('token-for-something:else.read'); + expect(fetch).toHaveBeenCalledTimes(2); + }); + + it('clears in-flight on failure so next call can retry', async () => { + let attempt = 0; + const fetch = makeMockFetch(async () => { + attempt += 1; + if (attempt === 1) return makeJsonResponse(500, { error: 'server_error' }); + return makeJsonResponse(200, tokenSuccessBody({ access_token: 'recovered' })); + }); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect( + client.getServiceToken('files:upload.write') + ).rejects.toBeInstanceOf(OAuthServerError); + + // 即使第一次失敗,第二次應能正常發 request(in-flight 已清) + const t = await client.getServiceToken('files:upload.write'); + expect(t).toBe('recovered'); + expect(fetch).toHaveBeenCalledTimes(2); + }); +}); + +// ---------------------------------------------------------------------------- +// 4. 錯誤分類 +// ---------------------------------------------------------------------------- + +describe('error classification', () => { + it('throws OAuthClientError on 400 invalid_client', async () => { + const fetch = makeMockFetch(() => + makeJsonResponse(400, { + error: 'invalid_client', + error_description: 'Client authentication failed', + }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + let caught; + try { + await client.getServiceToken('files:upload.write'); + } catch (e) { + caught = e; + } + expect(caught).toBeInstanceOf(OAuthClientError); + expect(caught.status).toBe(400); + expect(caught.errorCode).toBe('invalid_client'); + expect(caught.retryable).toBe(false); + // message 應提及 status,不應提及 client_secret + expect(caught.message).toContain('400'); + expect(caught.message).not.toContain(TEST_CLIENT_SECRET); + }); + + it('throws OAuthClientError on 401 invalid_grant', async () => { + const fetch = makeMockFetch(() => + makeJsonResponse(401, { error: 'invalid_grant' }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect( + client.getServiceToken('files:upload.write') + ).rejects.toMatchObject({ + name: 'OAuthClientError', + status: 401, + errorCode: 'invalid_grant', + retryable: false, + }); + }); + + it('throws OAuthClientError on 4xx with non-JSON body', async () => { + const fetch = makeMockFetch(() => makeTextResponse(403, 'Forbidden')); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect( + client.getServiceToken('files:upload.write') + ).rejects.toMatchObject({ + name: 'OAuthClientError', + status: 403, + retryable: false, + }); + }); + + it('throws OAuthServerError on 500', async () => { + const fetch = makeMockFetch(() => + makeJsonResponse(500, { error: 'server_error' }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect( + client.getServiceToken('files:upload.write') + ).rejects.toMatchObject({ + name: 'OAuthServerError', + status: 500, + retryable: true, + }); + }); + + it('throws OAuthServerError on 503', async () => { + const fetch = makeMockFetch(() => makeTextResponse(503, 'Service Unavailable')); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect( + client.getServiceToken('files:upload.write') + ).rejects.toMatchObject({ + name: 'OAuthServerError', + status: 503, + }); + }); + + it('throws OAuthTimeoutError when fetch is aborted by AbortController', async () => { + // 模擬「fetch 永遠不回」→ AbortController 觸發 + const fetch = jest.fn( + (url, init) => + new Promise((_, reject) => { + init.signal.addEventListener('abort', () => { + const err = new Error('The operation was aborted.'); + err.name = 'AbortError'; + reject(err); + }); + }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig({ oauthClient: { refreshSkewMs: 60_000, timeoutMs: 50 } }), + }); + + let caught; + try { + await client.getServiceToken('files:upload.write'); + } catch (e) { + caught = e; + } + expect(caught).toBeInstanceOf(OAuthTimeoutError); + expect(caught.retryable).toBe(true); + expect(caught.message).toContain('50ms'); + }); + + it('throws OAuthTimeoutError on generic network error', async () => { + const fetch = jest.fn(async () => { + const err = new Error('ECONNREFUSED 127.0.0.1:8080'); + err.code = 'ECONNREFUSED'; + throw err; + }); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect( + client.getServiceToken('files:upload.write') + ).rejects.toMatchObject({ + name: 'OAuthTimeoutError', + retryable: true, + }); + }); +}); + +// ---------------------------------------------------------------------------- +// 5. response shape 驗證 +// ---------------------------------------------------------------------------- + +describe('response shape validation', () => { + it('throws OAuthServerError when access_token is missing', async () => { + const fetch = makeMockFetch(() => + makeJsonResponse(200, { token_type: 'Bearer', expires_in: 3600 }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect( + client.getServiceToken('files:upload.write') + ).rejects.toMatchObject({ + name: 'OAuthServerError', + }); + }); + + it('throws OAuthServerError when token_type is missing', async () => { + const fetch = makeMockFetch(() => + makeJsonResponse(200, { access_token: 'x', expires_in: 3600 }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect( + client.getServiceToken('files:upload.write') + ).rejects.toMatchObject({ + name: 'OAuthServerError', + }); + }); + + it('throws OAuthServerError when expires_in is missing', async () => { + const fetch = makeMockFetch(() => + makeJsonResponse(200, { access_token: 'x', token_type: 'Bearer' }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect( + client.getServiceToken('files:upload.write') + ).rejects.toMatchObject({ + name: 'OAuthServerError', + }); + }); + + it('throws OAuthServerError when expires_in is negative', async () => { + const fetch = makeMockFetch(() => + makeJsonResponse(200, { access_token: 'x', token_type: 'Bearer', expires_in: -1 }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect( + client.getServiceToken('files:upload.write') + ).rejects.toMatchObject({ + name: 'OAuthServerError', + }); + }); + + it('throws OAuthServerError when JSON parse fails', async () => { + const fetch = makeMockFetch( + () => + new Response('not json {', { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect( + client.getServiceToken('files:upload.write') + ).rejects.toMatchObject({ + name: 'OAuthServerError', + }); + }); + + it('accepts expires_in as numeric string', async () => { + const fetch = makeMockFetch(() => + makeJsonResponse(200, { access_token: 'tok', token_type: 'Bearer', expires_in: '3600' }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + const t = await client.getServiceToken('files:upload.write'); + expect(t).toBe('tok'); + }); +}); + +// ---------------------------------------------------------------------------- +// 6. 輸入驗證 +// ---------------------------------------------------------------------------- + +describe('input validation', () => { + it('rejects non-string scope', async () => { + const client = new OAuthClient({ + fetch: () => { + throw new Error('should not be reached'); + }, + loadConfig: () => makeTestConfig(), + }); + await expect(client.getServiceToken(undefined)).rejects.toThrow(/scope is required/); + await expect(client.getServiceToken(null)).rejects.toThrow(/scope is required/); + await expect(client.getServiceToken('')).rejects.toThrow(/scope is required/); + await expect(client.getServiceToken(' ')).rejects.toThrow(/scope is required/); + }); +}); + +// ---------------------------------------------------------------------------- +// 7. **CRITICAL: secret 不洩漏到 log** +// ---------------------------------------------------------------------------- + +describe('SECURITY: client_secret never appears in any log', () => { + /** + * 把 client 跑過所有「會 log 的路徑」一輪,最後 grep 全部 log 字串確認沒洩漏。 + * + * 觸發的 log 路徑: + * - oauth.token_obtained(成功) + * - oauth.token_invalidated(成功) + * - oauth.token_endpoint_error(4xx / 5xx) + * - oauth.token_fetch_failed(timeout / network) + * - oauth.token_response_parse_failed(JSON 解析失敗) + */ + it('does not log client_secret on success path', async () => { + const fetch = makeMockFetch(() => makeJsonResponse(200, tokenSuccessBody())); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await client.getServiceToken('files:upload.write'); + client.invalidate('files:upload.write'); + + const allLogs = collectAllLoggedStrings(); + for (const line of allLogs) { + expect(line).not.toContain(TEST_CLIENT_SECRET); + // 額外保險:也不應包含完整的 Basic auth header + expect(line).not.toMatch(/Basic [A-Za-z0-9+/=]{20,}/); + } + }); + + it('does not log client_secret on 4xx error path', async () => { + const fetch = makeMockFetch(() => + makeJsonResponse(400, { error: 'invalid_client', error_description: 'auth failed' }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect(client.getServiceToken('files:upload.write')).rejects.toBeDefined(); + + const allLogs = collectAllLoggedStrings(); + for (const line of allLogs) { + expect(line).not.toContain(TEST_CLIENT_SECRET); + } + }); + + it('does not log client_secret on 5xx error path', async () => { + const fetch = makeMockFetch(() => makeTextResponse(503, 'unavailable')); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect(client.getServiceToken('files:upload.write')).rejects.toBeDefined(); + + const allLogs = collectAllLoggedStrings(); + for (const line of allLogs) { + expect(line).not.toContain(TEST_CLIENT_SECRET); + } + }); + + it('does not log client_secret on timeout path', async () => { + const fetch = jest.fn( + (url, init) => + new Promise((_, reject) => { + init.signal.addEventListener('abort', () => { + const err = new Error('aborted'); + err.name = 'AbortError'; + reject(err); + }); + }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => + makeTestConfig({ oauthClient: { refreshSkewMs: 60_000, timeoutMs: 30 } }), + }); + + await expect(client.getServiceToken('files:upload.write')).rejects.toBeDefined(); + + const allLogs = collectAllLoggedStrings(); + for (const line of allLogs) { + expect(line).not.toContain(TEST_CLIENT_SECRET); + } + }); + + it('does not log access_token contents on success', async () => { + const SECRET_TOKEN = 'do-not-log-me-' + Math.random().toString(36).slice(2); + const fetch = makeMockFetch(() => + makeJsonResponse(200, tokenSuccessBody({ access_token: SECRET_TOKEN })) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await client.getServiceToken('files:upload.write'); + + const allLogs = collectAllLoggedStrings(); + for (const line of allLogs) { + expect(line).not.toContain(SECRET_TOKEN); + } + }); + + it('does not log token even when JSON parse fails (bad body)', async () => { + const SECRET = 'leaky-token-' + Math.random().toString(36).slice(2); + // 雖然 body 解析失敗會丟 OAuthServerError,但實作的 catch 只應 log error.message, + // 不應 log res.text() 內容。 + const fetch = makeMockFetch( + () => + new Response(`{"access_token":"${SECRET}",bad json`, { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }) + ); + const client = new OAuthClient({ + fetch, + loadConfig: () => makeTestConfig(), + }); + + await expect(client.getServiceToken('files:upload.write')).rejects.toBeDefined(); + + const allLogs = collectAllLoggedStrings(); + for (const line of allLogs) { + expect(line).not.toContain(SECRET); + } + }); +}); + +// ---------------------------------------------------------------------------- +// 8. _internals helpers +// ---------------------------------------------------------------------------- + +describe('_internals helpers', () => { + it('buildBasicAuthHeader produces RFC 7617 base64 form', () => { + const h = _internals.buildBasicAuthHeader('alice', 'open sesame'); + // base64 of "alice:open sesame" = "YWxpY2U6b3BlbiBzZXNhbWU=" + expect(h).toBe('Basic YWxpY2U6b3BlbiBzZXNhbWU='); + }); + + it('parseTokenResponse handles minimal valid payload', () => { + const p = _internals.parseTokenResponse({ + access_token: 'a', + token_type: 'Bearer', + expires_in: 60, + }); + expect(p).toEqual({ accessToken: 'a', tokenType: 'Bearer', expiresInSec: 60 }); + }); + + it('parseTokenResponse rejects non-object', () => { + expect(() => _internals.parseTokenResponse(null)).toThrow(); + expect(() => _internals.parseTokenResponse('str')).toThrow(); + expect(() => _internals.parseTokenResponse(123)).toThrow(); + }); + + it('parseTokenResponse floors fractional expires_in', () => { + const p = _internals.parseTokenResponse({ + access_token: 'a', + token_type: 'Bearer', + expires_in: 60.7, + }); + expect(p.expiresInSec).toBe(60); + }); +}); + +// ---------------------------------------------------------------------------- +// 9. Integration: 真 http server 模擬 Member Center token endpoint +// ---------------------------------------------------------------------------- + +describe('integration with real HTTP server', () => { + let server; + let serverUrl; + /** @type {(req: import('http').IncomingMessage, body: string) => { status: number, body: any }} */ + let handler; + + beforeAll(async () => { + server = http.createServer((req, res) => { + let raw = ''; + req.on('data', (c) => { + raw += c.toString('utf8'); + }); + req.on('end', () => { + try { + const result = handler(req, raw); + res.writeHead(result.status, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(result.body)); + } catch (err) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: 'test_handler_error', message: err.message })); + } + }); + }); + await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve)); + const addr = server.address(); + serverUrl = `http://127.0.0.1:${addr.port}/oauth/token`; + }); + + afterAll(async () => { + if (server) { + await new Promise((resolve) => server.close(resolve)); + } + }); + + function makeIntegrationClient(extraConfig = {}) { + return new OAuthClient({ + // 使用 globalThis.fetch(Node 20 內建) + loadConfig: () => + makeTestConfig({ + memberCenter: { + issuer: 'https://auth.test.local', + jwksUrl: 'https://auth.test.local/.well-known/jwks', + tokenUrl: serverUrl, + }, + ...extraConfig, + }), + }); + } + + it('sends correct request and parses response (real fetch + http server)', async () => { + /** @type {{ headers: any, body: string } | null} */ + let captured = null; + handler = (req, body) => { + captured = { headers: req.headers, body }; + return { + status: 200, + body: { access_token: 'integration-token', token_type: 'Bearer', expires_in: 3600 }, + }; + }; + + const client = makeIntegrationClient(); + const tok = await client.getServiceToken('files:upload.write'); + + expect(tok).toBe('integration-token'); + expect(captured).not.toBeNull(); + expect(captured.headers['content-type']).toBe('application/x-www-form-urlencoded'); + expect(captured.headers.authorization).toMatch(/^Basic /); + + const expectedBasic = Buffer.from( + `${TEST_CLIENT_ID}:${TEST_CLIENT_SECRET}`, + 'utf8' + ).toString('base64'); + expect(captured.headers.authorization).toBe(`Basic ${expectedBasic}`); + + // body 內不能含 client_secret + expect(captured.body).not.toContain(TEST_CLIENT_SECRET); + + const params = new URLSearchParams(captured.body); + expect(params.get('grant_type')).toBe('client_credentials'); + expect(params.get('scope')).toBe('files:upload.write'); + expect(params.get('audience')).toBe(TEST_FAA_AUDIENCE); + }); + + it('handles real 4xx response', async () => { + handler = () => ({ status: 401, body: { error: 'invalid_client' } }); + const client = makeIntegrationClient(); + await expect(client.getServiceToken('files:upload.write')).rejects.toMatchObject({ + name: 'OAuthClientError', + status: 401, + errorCode: 'invalid_client', + }); + }); + + it('handles real timeout (small server delay > timeoutMs)', async () => { + handler = (req, body) => { + // 故意延遲 200ms + const start = Date.now(); + while (Date.now() - start < 200) { + // busy wait — 模擬 server 卡住 + } + return { + status: 200, + body: { access_token: 'should-not-receive', token_type: 'Bearer', expires_in: 3600 }, + }; + }; + + const client = makeIntegrationClient({ + oauthClient: { refreshSkewMs: 60_000, timeoutMs: 50 }, + }); + await expect(client.getServiceToken('files:upload.write')).rejects.toBeInstanceOf( + OAuthTimeoutError + ); + }); +}); + +// ---------------------------------------------------------------------------- +// 10. Singleton wrappers (對外 export 的便利介面) +// ---------------------------------------------------------------------------- + +describe('module-level singleton wrappers', () => { + afterEach(() => { + _internals.singleton._resetForTests(); + }); + + it('exports getServiceToken / invalidate as functions', () => { + expect(typeof oauthModule.getServiceToken).toBe('function'); + expect(typeof oauthModule.invalidate).toBe('function'); + }); + + it('error classes are exposed', () => { + expect(oauthModule.OAuthClientError).toBe(OAuthClientError); + expect(oauthModule.OAuthServerError).toBe(OAuthServerError); + expect(oauthModule.OAuthTimeoutError).toBe(OAuthTimeoutError); + }); +}); diff --git a/apps/task-scheduler/src/auth/jwks.js b/apps/task-scheduler/src/auth/jwks.js new file mode 100644 index 0000000..2c3be19 --- /dev/null +++ b/apps/task-scheduler/src/auth/jwks.js @@ -0,0 +1,155 @@ +/** + * JWKS cache 與 JWT 驗證封裝。 + * + * 採用 `jose` 套件的 `createRemoteJWKSet`,內建: + * - TTL cache(cacheMaxAge,預設 10 分鐘) + * - 失敗冷卻(cooldownDuration,預設 30 秒,避免 thundering herd) + * - 自動 stale-while-revalidate + * - 拒絕 alg=none(jose 預設) + * - cache 大小有上限(jose 預設) + * + * 範圍(T1): + * - 暴露 `getJWKS()` 給 middleware 用 + * - 暴露 `verifyToken(token, opts)` 一站式驗證 + * - 不負責 scope / tenant 檢查(middleware 處理) + * + * 安全注意: + * - 絕對不在 log 中印出 token 內容或 payload + * - 不接受 alg=none(jose 預設) + * - 不允許自帶的 key set(防止「JWKS poisoning」) + */ + +'use strict'; + +const { createRemoteJWKSet, jwtVerify } = require('jose'); + +/** + * 模組層級 cache:以 jwksUrl 為 key 共用一個 RemoteJWKSet 實例。 + * + * 為什麼用模組層級 cache 而非每次 new: + * - `createRemoteJWKSet` 內建 TTL cache 與 cooldown,重複 new 會破壞 cache 命中率 + * - 同一個 process 內所有 middleware 共用同一個 JWKSet + * + * 暴露 `_resetForTests()` 讓測試重置。 + */ +const _jwksByUrl = new Map(); + +/** + * 允許的 JWT 簽章演算法白名單(Sec m3 修正)。 + * + * 為什麼明確 pin: + * - 雖然 jose 預設拒絕 alg=none,但保留了 HMAC(`HS256`/`HS384`/`HS512`)作為 + * 合法選項;HMAC 簽章用對稱金鑰,attacker 拿到 JWKS 公鑰後可能用同一個 key + * 做 HMAC 偽造(演算法混淆攻擊) + * - 明確 pin 為非對稱演算法,攻擊面收窄 + * + * 選擇的 algs: + * - `RS256`:RSA SHA-256,OAuth 2.0 / OIDC 業界標準(Member Center 預期主用) + * - `ES256`:ECDSA P-256 SHA-256,新興 OIDC provider 常用(Auth0、Okta 等) + * - `PS256`:RSA-PSS SHA-256,比 RS256 更安全的 RSA 變體 + */ +const ALLOWED_JWT_ALGS = Object.freeze(['RS256', 'ES256', 'PS256']); + +/** + * 取得(或建立)對應 jwksUrl 的 RemoteJWKSet。 + * + * @param {string} jwksUrl - JWKS endpoint URL + * @param {{ cacheMaxAgeMs?: number, cooldownMs?: number }} [options] + * @returns {Function} - jose 的 RemoteJWKSet(可作為 jwtVerify 的第二參數) + */ +function getJWKS(jwksUrl, options = {}) { + if (typeof jwksUrl !== 'string' || jwksUrl.trim() === '') { + throw new Error('[jwks] jwksUrl is required'); + } + + const cached = _jwksByUrl.get(jwksUrl); + if (cached) { + return cached; + } + + const cacheMaxAgeMs = options.cacheMaxAgeMs ?? 10 * 60 * 1000; + const cooldownMs = options.cooldownMs ?? 30 * 1000; + + let url; + try { + url = new URL(jwksUrl); + } catch (err) { + throw new Error(`[jwks] Invalid JWKS URL: ${jwksUrl} (${err.message})`); + } + + const jwks = createRemoteJWKSet(url, { + cacheMaxAge: cacheMaxAgeMs, + cooldownDuration: cooldownMs, + }); + + _jwksByUrl.set(jwksUrl, jwks); + return jwks; +} + +/** + * 驗證 JWT token:簽章、issuer、audience、過期。 + * + * 不檢查 scope / tenant — 由 middleware 層處理。 + * + * @param {string} token - JWT compact token + * @param {{ + * jwksUrl: string, + * issuer: string, + * audience: string, + * clockToleranceSec?: number, + * cacheMaxAgeMs?: number, + * cooldownMs?: number, + * }} options + * @returns {Promise<{ payload: object, protectedHeader: object }>} + * + * @throws {Error} - jose 的 JOSEError 子類,呼叫端應檢查 `err.code`: + * - `ERR_JWT_EXPIRED` → token 過期 + * - `ERR_JWS_INVALID` → 簽章錯 + * - `ERR_JWS_SIGNATURE_VERIFICATION_FAILED` → 簽章驗證失敗 + * - `ERR_JWKS_NO_MATCHING_KEY` → JWKS 找不到 kid + * - `ERR_JWT_CLAIM_VALIDATION_FAILED` → issuer / audience 不符 + */ +async function verifyToken(token, options) { + if (typeof token !== 'string' || token === '') { + const err = new Error('Token is empty'); + err.code = 'ERR_JWS_INVALID'; + throw err; + } + if (!options || typeof options !== 'object') { + throw new Error('[jwks] verifyToken requires options'); + } + + const { jwksUrl, issuer, audience, clockToleranceSec = 60 } = options; + + if (!issuer) throw new Error('[jwks] options.issuer is required'); + if (!audience) throw new Error('[jwks] options.audience is required'); + + const jwks = getJWKS(jwksUrl, { + cacheMaxAgeMs: options.cacheMaxAgeMs, + cooldownMs: options.cooldownMs, + }); + + // jose.jwtVerify 預設拒絕 alg=none、會驗 signature、exp、nbf。 + // Sec m3:明確 pin algorithms 白名單,避免 HMAC 演算法混淆攻擊。 + return jwtVerify(token, jwks, { + issuer, + audience, + clockTolerance: clockToleranceSec, + algorithms: ALLOWED_JWT_ALGS, + }); +} + +/** + * 測試用:清空模組層級 cache。 + * 生產環境不應呼叫。 + */ +function _resetForTests() { + _jwksByUrl.clear(); +} + +module.exports = { + getJWKS, + verifyToken, + ALLOWED_JWT_ALGS, + _resetForTests, +}; diff --git a/apps/task-scheduler/src/auth/middleware.js b/apps/task-scheduler/src/auth/middleware.js new file mode 100644 index 0000000..6def9db --- /dev/null +++ b/apps/task-scheduler/src/auth/middleware.js @@ -0,0 +1,286 @@ +/** + * `requireAuth(scope)` Express middleware。 + * + * 職責: + * 1. 驗證 `Authorization: Bearer ` + * 2. 透過 jose(JWKS)驗 issuer / audience / 簽章 / 過期 + * 3. 檢查 token 是否含 requiredScope + * 4. 若 config 有設 tenantId,檢查 token 的 tenant_id 是否吻合 + * 5. 驗證成功 → 把解析好的 auth 資訊掛到 req.auth,呼叫 next() + * 6. 驗證失敗 → 統一錯誤格式回 401/403,並**主動斷線**(M2) + * + * M2(Review m2 落實): + * Express 的 `res.status(401).json(...)` 不會主動關閉底層 socket;攻擊者若已 + * 開始上傳 500MB body,Node 會繼續往 socket buffer 灌資料,吃記憶體與頻寬。 + * 為此 sendAuthError 在 response 完整送出後(`res.on('finish')`)才 destroy + * socket,確保: + * (a) client 收得到 401/403 JSON + * (b) 後續的 body bytes 不會繼續被 Node 接收 + * + * **這只是「盡力而為」**。實際大檔護欄靠 Nginx `client_max_body_size 600M` + * (TDD §7.1,DevOps 任務),這層只是減輕應用層的負擔。 + * + * 已知限制: + * - 在 `res.on('finish')` 之前,Node 的 read buffer 仍可能累積一些 bytes + * (通常為 `highWaterMark`,預設 16KB) + * - 若用戶端用 HTTP/2 或 keep-alive,destroy socket 也會中斷該連線上的其他 + * pipelined request;T1 範圍內可接受(v1 端點目前只有 jobs/promote) + */ + +'use strict'; + +const { verifyToken } = require('./jwks'); + +/** + * 統一的錯誤回應 helper(M2 — 含 destroy 連線)。 + * + * 嚴格順序(**勿改**): + * 1. 設 `Connection: close` header — 告訴 client 不要 reuse 連線 + * 2. 用 `res.status().json()` 把 401/403 JSON 寫出 + * 3. 監聽 `res.on('finish')` —— 在 response 已寫完且發送完畢後 —— + * destroy underlying socket,讓 client 沒辦法繼續灌 body + * + * 為什麼不能直接 `req.socket.destroy()` 在 send response 之前: + * 會在 response 還沒寫完就斷線,client 收不到 401 訊息,看到的是 + * ECONNRESET,無法判斷是被 reject 還是 server 異常。 + * + * 為什麼用 `req.socket` 而非 `res.socket`: + * 兩者通常是同一個 underlying socket;用 req.socket 可避免 res 在某些 + * 狀況下已被釋放的情境(例如 res 已 detach)。 + * + * @param {import('express').Request} req + * @param {import('express').Response} res + * @param {number} status - HTTP status code(401 / 403) + * @param {string} code - error.code(如 'invalid_token') + * @param {string} message - 對外訊息(zh-TW) + * @param {object} [details] - error.details(可選) + */ +function sendAuthError(req, res, status, code, message, details) { + // 雙重保險:若 response 已送過(不該發生但保險),不要 double send + if (res.headersSent) { + // 仍嘗試 destroy,避免 client 繼續灌 body + try { + if (req.socket && !req.socket.destroyed) { + req.socket.destroy(); + } + } catch (_) { + /* noop */ + } + return; + } + + res.setHeader('Connection', 'close'); + + const body = { + error: { + code, + message, + // request_id 由 T3 的 requestId middleware 提供;T1 階段尚未掛上時 + // 會是 undefined,這裡保持一致格式(即使 undefined 也輸出 key 以 + // 利下游解析)—— 但 JSON.stringify 會 omit undefined value。 + // T3 接管後會自動有值。 + request_id: req.requestId || null, + }, + }; + if (details !== undefined) { + body.error.details = details; + } + + res.status(status).json(body); + + // 在 response 完整送出(finish 事件)後 destroy socket。 + // - finish:response 寫完且 OS buffer 已 flush + // - 此時可安全 destroy,client 已收到完整 401/403 + // 用 once 避免多次觸發;包 try/catch 防止 socket 已被別處 destroy。 + res.once('finish', () => { + try { + if (req.socket && !req.socket.destroyed) { + req.socket.destroy(); + } + } catch (_) { + /* noop — socket 可能已被 client 主動關閉或 Node 內部釋放 */ + } + }); +} + +/** + * 解析 `Authorization: Bearer ` header。 + * + * @param {string|undefined} headerValue + * @returns {string|null} - 成功時回 token;格式錯或缺值回 null + */ +function extractBearerToken(headerValue) { + if (typeof headerValue !== 'string' || headerValue.length === 0) { + return null; + } + // 嚴格匹配 'Bearer ' 開頭(大小寫敏感對齊 RFC 6750;多數 client 用大寫) + // 允許大小寫不敏感以提高互操作性 + const match = headerValue.match(/^Bearer\s+(.+)$/i); + if (!match) { + return null; + } + const token = match[1].trim(); + if (token === '') return null; + return token; +} + +/** + * 從 token claims 中取出 scopes 陣列。 + * + * RFC 8693 / OAuth 2 的 `scope` claim 為「空白分隔字串」; + * 部分授權伺服器使用 `scp` claim(陣列)。本函數兩者都支援。 + * + * @param {object} claims + * @returns {string[]} + */ +function extractScopes(claims) { + if (Array.isArray(claims.scp)) { + return claims.scp.filter((s) => typeof s === 'string' && s.length > 0); + } + if (typeof claims.scope === 'string') { + return claims.scope.split(/\s+/).filter(Boolean); + } + if (Array.isArray(claims.scope)) { + return claims.scope.filter((s) => typeof s === 'string' && s.length > 0); + } + return []; +} + +/** + * 建立一個 requireAuth middleware。 + * + * 用法: + * const auth = require('./middleware'); + * app.post('/api/v1/jobs', auth.requireAuth(config.converter.scopeWrite), handler); + * + * @param {string} requiredScope - 此端點要求的 scope(如 'converter:job.write') + * @param {object} [deps] - 依賴注入(測試用) + * @param {object} [deps.config] - 完整 config object(從 config.loadConfig() 取) + * @param {Function} [deps.verify] - 注入版的 verifyToken(測試用) + * @returns {import('express').RequestHandler} + */ +function requireAuth(requiredScope, deps = {}) { + if (typeof requiredScope !== 'string' || requiredScope === '') { + throw new Error('[requireAuth] requiredScope is required and must be a string'); + } + + // Lazy-load config,讓測試能在 require 階段不需設環境變數 + let config = deps.config; + const verify = deps.verify || verifyToken; + + return async function authMiddleware(req, res, next) { + try { + if (!config) { + // 第一次呼叫才載入,避免測試時 import middleware 即觸發 config check + config = require('../config').loadConfig(); + } + + // 1. 取出 Bearer token + const token = extractBearerToken(req.headers && req.headers.authorization); + if (!token) { + return sendAuthError( + req, + res, + 401, + 'invalid_token', + '缺少或格式錯誤的 Authorization header(需為 Bearer )' + ); + } + + // 2. 透過 JWKS 驗 issuer / audience / 簽章 / 過期 + let result; + try { + result = await verify(token, { + jwksUrl: config.memberCenter.jwksUrl, + issuer: config.memberCenter.issuer, + audience: config.converter.audience, + clockToleranceSec: config.jwks.clockToleranceSec, + cacheMaxAgeMs: config.jwks.cacheMaxAgeMs, + cooldownMs: config.jwks.cooldownMs, + }); + } catch (err) { + // jose 的 error.code 對映到對外錯誤碼 + const errCode = err && err.code ? String(err.code) : ''; + + if (errCode === 'ERR_JWT_EXPIRED') { + return sendAuthError(req, res, 401, 'token_expired', 'Token 已過期'); + } + + // 簽章 / kid / 任何驗證失敗統一回 invalid_token,避免洩漏內部資訊 + // (安全考量:不告訴攻擊者「issuer 對了但 audience 錯了」這類細節) + // 注意:這裡也涵蓋了 issuer / audience 不符(ERR_JWT_CLAIM_VALIDATION_FAILED)。 + // 這是刻意的:對外只需知道「token 不被接受」即可。 + // log 細節給 ops 看(不含 token 內容)。 + // eslint-disable-next-line no-console + console.warn( + JSON.stringify({ + level: 'WARN', + action: 'auth.verify_failed', + error_code: errCode || 'unknown', + message: err && err.message ? err.message : 'verify failed', + timestamp: new Date().toISOString(), + }) + ); + return sendAuthError(req, res, 401, 'invalid_token', 'Token 驗證失敗'); + } + + const claims = result.payload; + + // 3. 檢查 scope + const scopes = extractScopes(claims); + if (!scopes.includes(requiredScope)) { + return sendAuthError(req, res, 403, 'insufficient_scope', 'Token 缺少必要權限', { + required_scope: requiredScope, + provided_scopes: scopes, + }); + } + + // 4. 檢查 tenant(若 config.converter.tenantId 為空字串則跳過) + // TDD §5.1:「若有,等於 CONVERTER_TENANT_ID(Phase 1 可先 warn-only)」 + // 本實作採嚴格策略:config 設了就一定要對;空字串時不檢查。 + if (config.converter.tenantId) { + const claimTenant = claims.tenant_id; + if (claimTenant !== config.converter.tenantId) { + return sendAuthError(req, res, 403, 'tenant_mismatch', '租戶不符', { + expected_tenant: config.converter.tenantId, + // 不回傳 token 中真正的 tenant_id(避免資訊洩露) + }); + } + } + + // 5. 掛 req.auth 給下游使用 + req.auth = { + sub: claims.sub || null, + clientId: claims.client_id || claims.sub || null, + tenantId: claims.tenant_id || null, + scopes, + // 完整 claims 物件給需要的 handler 用;不暴露 token 字串 + raw: claims, + }; + + return next(); + } catch (err) { + // 兜底:理論上不該走到這裡 + // eslint-disable-next-line no-console + console.error( + JSON.stringify({ + level: 'ERROR', + action: 'auth.middleware_unexpected_error', + message: err && err.message ? err.message : 'unknown', + timestamp: new Date().toISOString(), + }) + ); + return sendAuthError(req, res, 401, 'invalid_token', 'Token 驗證失敗'); + } + }; +} + +module.exports = { + requireAuth, + // 測試 / 內部用 + _internals: { + sendAuthError, + extractBearerToken, + extractScopes, + }, +}; diff --git a/apps/task-scheduler/src/auth/oauthClient.js b/apps/task-scheduler/src/auth/oauthClient.js new file mode 100644 index 0000000..3d7a721 --- /dev/null +++ b/apps/task-scheduler/src/auth/oauthClient.js @@ -0,0 +1,464 @@ +/** + * Converter 作為 OAuth Client:取得 Member Center 簽發的 service token, + * 供 promote 階段呼叫 File Access Agent 使用(Phase 1 僅用 `files:upload.write`)。 + * + * 對外介面: + * const oauthClient = require('./oauthClient'); + * const token = await oauthClient.getServiceToken('files:upload.write'); + * oauthClient.invalidate('files:upload.write'); // 401 時呼叫 + * + * 設計重點: + * 1. 每個 scope 一個 cache entry(per-scope cache) + * 2. 主動 refresh:距離 expiresAt < refreshSkewMs(預設 60s)即視為過期 + * 3. 並發保護:同 scope 的多個 caller 共享一個 in-flight Promise,避免 thundering herd + * 4. 不同 scope 各自獨立發 request + * 5. AbortController timeout(預設 10s) + * 6. 錯誤分類(OAuthClientError / OAuthServerError / OAuthTimeoutError) + * 7. **絕不**將 client_secret / token 內容寫入 log + * + * 通信規格(對齊 TDD §2.4 / §5.2 / RFC 6749 §4.4 + §2.3.1): + * - 使用 HTTP Basic auth header `Authorization: Basic base64(client_id:client_secret)` + * (RFC 6749 §2.3.1 推薦,比 body 傳 secret 安全;token endpoint 通常都接受) + * - body: `application/x-www-form-urlencoded`,含 `grant_type=client_credentials`、 + * `scope=`、`audience=`(Auth0 / 多數 IdP 慣例) + * - 預期回應 JSON:`{ access_token, token_type, expires_in }` + * + * 安全注意: + * - 任何 log 都不得包含 `client_secret`、Authorization header 內容、access_token + * - 錯誤訊息只揭露 status + 標準 error_code(如 `invalid_client`),不揭露 server 端細節 + */ + +'use strict'; + +/* eslint-disable no-console */ + +// ---------------------------------------------------------------------------- +// 錯誤類別 +// ---------------------------------------------------------------------------- + +/** + * OAuth client 共用基類。子類用 `name` 區分。 + * + * 注意:constructor 不接受任何含 secret 的欄位,message 也不該帶 secret。 + */ +class OAuthError extends Error { + /** + * @param {string} name + * @param {string} message + * @param {{ status?: number, errorCode?: string, retryable?: boolean }} [meta] + */ + constructor(name, message, meta = {}) { + super(message); + this.name = name; + this.status = meta.status ?? null; + this.errorCode = meta.errorCode ?? null; // OAuth 標準 error code,如 'invalid_client' + this.retryable = meta.retryable ?? false; + } +} + +/** 4xx — client 端錯誤(如 invalid_client、invalid_scope)。不可重試。 */ +class OAuthClientError extends OAuthError { + constructor(message, meta) { + super('OAuthClientError', message, { ...meta, retryable: false }); + } +} + +/** 5xx — server 端錯誤(Member Center 故障)。可重試。 */ +class OAuthServerError extends OAuthError { + constructor(message, meta) { + super('OAuthServerError', message, { ...meta, retryable: true }); + } +} + +/** 網路 / timeout — 連線層錯誤。可重試。 */ +class OAuthTimeoutError extends OAuthError { + constructor(message, meta) { + super('OAuthTimeoutError', message, { ...meta, retryable: true }); + } +} + +// ---------------------------------------------------------------------------- +// 內部 helpers +// ---------------------------------------------------------------------------- + +/** + * 把 client_id / client_secret 編碼成 Basic auth header value。 + * + * @param {string} clientId + * @param {string} clientSecret + * @returns {string} - `Basic ` + */ +function buildBasicAuthHeader(clientId, clientSecret) { + const raw = `${clientId}:${clientSecret}`; + // Buffer.from(...).toString('base64') 是 Node 標準做法;不依賴 deprecated `btoa` + return `Basic ${Buffer.from(raw, 'utf8').toString('base64')}`; +} + +/** + * 從 fetch Response 嘗試解析 OAuth 標準錯誤 JSON: + * `{ "error": "invalid_client", "error_description": "..." }` + * + * 解析失敗時回 null(不影響主流程,僅缺少額外 metadata)。 + * + * @param {Response} res + * @returns {Promise<{ error?: string, error_description?: string } | null>} + */ +async function tryParseOauthErrorBody(res) { + try { + // 先試 json,失敗則 fallback text + const ctype = res.headers.get('content-type') || ''; + if (ctype.includes('application/json')) { + return await res.json(); + } + const txt = await res.text(); + return txt ? { error_description: txt.slice(0, 200) } : null; + } catch (_) { + return null; + } +} + +/** + * 結構化 log 一筆 OAuth 事件。**絕不** log secret / token / authorization。 + * + * @param {'INFO'|'WARN'|'ERROR'} level + * @param {string} action + * @param {object} fields - 額外結構化欄位(不可含 secret / token) + */ +function logEvent(level, action, fields = {}) { + const line = JSON.stringify({ + level, + service: 'oauth-client', + action, + timestamp: new Date().toISOString(), + ...fields, + }); + if (level === 'ERROR') { + console.error(line); + } else if (level === 'WARN') { + console.warn(line); + } else { + // INFO 也走 console.warn 在 jest silent 模式較不嘈雜;但 production 會走 stdout。 + // 統一 INFO 用 console.log,下游可由 log shipper 撈。 + console.log(line); + } +} + +/** + * 驗證 token endpoint 回傳的 JSON 格式。 + * + * @param {unknown} data + * @returns {{ accessToken: string, tokenType: string, expiresInSec: number }} + * @throws {OAuthServerError} - 格式錯(視為 server bug,可重試) + */ +function parseTokenResponse(data) { + if (data === null || typeof data !== 'object') { + throw new OAuthServerError('Invalid token response: not a JSON object'); + } + const obj = /** @type {Record} */ (data); + + if (typeof obj.access_token !== 'string' || obj.access_token.length === 0) { + throw new OAuthServerError('Invalid token response: missing access_token'); + } + if (typeof obj.token_type !== 'string' || obj.token_type.length === 0) { + throw new OAuthServerError('Invalid token response: missing token_type'); + } + // RFC 6749 §5.1:expires_in 為 OPTIONAL,但實務上 promote 場景沒它就無法管理 cache,視為 required + const expiresInRaw = obj.expires_in; + let expiresInSec; + if (typeof expiresInRaw === 'number' && Number.isFinite(expiresInRaw)) { + expiresInSec = Math.floor(expiresInRaw); + } else if (typeof expiresInRaw === 'string' && /^\d+$/.test(expiresInRaw)) { + expiresInSec = Number.parseInt(expiresInRaw, 10); + } else { + throw new OAuthServerError('Invalid token response: missing or invalid expires_in'); + } + if (expiresInSec <= 0) { + throw new OAuthServerError('Invalid token response: non-positive expires_in'); + } + + return { + accessToken: obj.access_token, + tokenType: obj.token_type, + expiresInSec, + }; +} + +// ---------------------------------------------------------------------------- +// OAuthClient class +// ---------------------------------------------------------------------------- + +/** + * 一個簡單的 OAuth Client,採 client_credentials grant,per-scope cache。 + * + * 預期使用方式:取一個 singleton(見檔尾 export)。 + * + * @typedef {Object} CacheEntry + * @property {string} accessToken - JWT access token + * @property {number} expiresAtMs - epoch ms,token 真正過期時間 + * + * @typedef {Object} OAuthClientDeps + * @property {Function} [fetch] - 注入用 fetch(測試用 mock) + * @property {Function} [now] - 注入用 Date.now(測試用) + * @property {Function} [loadConfig] - 注入用 loadConfig(測試用,避免讀真環境變數) + * + * @typedef {Object} OAuthClientConfig + * @property {string} tokenUrl + * @property {string} clientId + * @property {string} clientSecret + * @property {string} faaAudience + * @property {number} refreshSkewMs + * @property {number} timeoutMs + */ +class OAuthClient { + /** + * @param {OAuthClientDeps} [deps] + */ + constructor(deps = {}) { + /** @type {Map} */ + this._cache = new Map(); + /** @type {Map>} */ + this._inflight = new Map(); + this._fetch = deps.fetch || globalThis.fetch; + this._now = deps.now || (() => Date.now()); + this._loadConfig = deps.loadConfig || null; + /** @type {OAuthClientConfig|null} */ + this._config = null; + } + + /** + * Lazy-load config(與 middleware 同模式,方便測試)。 + * + * @returns {OAuthClientConfig} + */ + _getConfig() { + if (this._config) return this._config; + + const fullConfig = this._loadConfig + ? this._loadConfig() + : require('../config').loadConfig(); + + this._config = { + tokenUrl: fullConfig.memberCenter.tokenUrl, + clientId: fullConfig.converter.clientId, + clientSecret: fullConfig.converter.clientSecret, + faaAudience: fullConfig.fileAccessAgent.audience, + refreshSkewMs: fullConfig.oauthClient.refreshSkewMs, + timeoutMs: fullConfig.oauthClient.timeoutMs, + }; + return this._config; + } + + /** + * 取得指定 scope 的 service token。 + * + * 行為: + * 1. cache hit 且 token 距離過期還有 > refreshSkewMs → 直接回 cached token + * 2. cache miss / 即將過期 → 發 request 取新 token + * 3. 同一 scope 同時多個 caller 取 expired token → 共享同一個 in-flight Promise + * + * @param {string} scope - 如 'files:upload.write' + * @returns {Promise} - access token 字串 + * @throws {OAuthClientError|OAuthServerError|OAuthTimeoutError} + */ + async getServiceToken(scope) { + if (typeof scope !== 'string' || scope.trim() === '') { + throw new TypeError('[oauthClient] scope is required (non-empty string)'); + } + + // 1. cache hit 且仍新鮮 + const cached = this._cache.get(scope); + const config = this._getConfig(); + const nowMs = this._now(); + if (cached && cached.expiresAtMs - config.refreshSkewMs > nowMs) { + return cached.accessToken; + } + + // 2. in-flight Promise dedup(同 scope 並發只發一次 request) + const existing = this._inflight.get(scope); + if (existing) { + return existing; + } + + // 3. 發新 request + const promise = this._fetchToken(scope, config).finally(() => { + // 不論成功失敗,都要清掉 in-flight 旗標,後續 caller 才有機會再試 + this._inflight.delete(scope); + }); + this._inflight.set(scope, promise); + return promise; + } + + /** + * 從 Member Center 取一個新 token,成功時寫 cache。 + * + * @param {string} scope + * @param {OAuthClientConfig} config + * @returns {Promise} + */ + async _fetchToken(scope, config) { + const body = new URLSearchParams({ + grant_type: 'client_credentials', + scope, + audience: config.faaAudience, + }).toString(); + + const headers = { + 'Content-Type': 'application/x-www-form-urlencoded', + Accept: 'application/json', + Authorization: buildBasicAuthHeader(config.clientId, config.clientSecret), + }; + + const controller = new AbortController(); + const timeoutHandle = setTimeout(() => controller.abort(), config.timeoutMs); + + let res; + try { + res = await this._fetch(config.tokenUrl, { + method: 'POST', + headers, + body, + signal: controller.signal, + }); + } catch (err) { + // AbortError → timeout + const isAbort = + (err && (err.name === 'AbortError' || err.code === 'ABORT_ERR')) || + controller.signal.aborted; + logEvent('WARN', 'oauth.token_fetch_failed', { + scope, + reason: isAbort ? 'timeout' : 'network_error', + // 注意:err.message 不會含 secret,但保險起見只取訊息開頭 + error_message: (err && err.message ? String(err.message) : 'unknown').slice(0, 200), + }); + if (isAbort) { + throw new OAuthTimeoutError( + `Token endpoint timed out after ${config.timeoutMs}ms`, + { retryable: true } + ); + } + throw new OAuthTimeoutError( + `Network error contacting token endpoint: ${err && err.message ? err.message.slice(0, 100) : 'unknown'}`, + { retryable: true } + ); + } finally { + clearTimeout(timeoutHandle); + } + + // 解析錯誤 / 成功 + if (!res.ok) { + const status = res.status; + const errBody = await tryParseOauthErrorBody(res); + const errorCode = errBody && typeof errBody.error === 'string' ? errBody.error : null; + // 不把 errBody.error_description 寫入 log(極端 IdP 可能在裡面塞 client_id / requestId 等) + logEvent('WARN', 'oauth.token_endpoint_error', { + scope, + status, + error_code: errorCode || 'unknown', + }); + if (status >= 400 && status < 500) { + throw new OAuthClientError( + `Token endpoint returned ${status}${errorCode ? ` (${errorCode})` : ''}`, + { status, errorCode } + ); + } + // 5xx 或其他 + throw new OAuthServerError( + `Token endpoint returned ${status}${errorCode ? ` (${errorCode})` : ''}`, + { status, errorCode } + ); + } + + let data; + try { + data = await res.json(); + } catch (err) { + logEvent('ERROR', 'oauth.token_response_parse_failed', { + scope, + // 不 log raw body(可能含 token),只 log 解析失敗的 message + error_message: (err && err.message ? String(err.message) : 'unknown').slice(0, 100), + }); + throw new OAuthServerError('Failed to parse token response as JSON'); + } + + const parsed = parseTokenResponse(data); // throws OAuthServerError on shape mismatch + + const expiresAtMs = this._now() + parsed.expiresInSec * 1000; + /** @type {CacheEntry} */ + const entry = { + accessToken: parsed.accessToken, + expiresAtMs, + }; + this._cache.set(scope, entry); + + logEvent('INFO', 'oauth.token_obtained', { + scope, + token_type: parsed.tokenType, + expires_in_sec: parsed.expiresInSec, + // 注意:不 log access_token,只 log 它的長度(除錯用) + access_token_length: parsed.accessToken.length, + }); + + return parsed.accessToken; + } + + /** + * 強制讓某個 scope 的 cache 失效,下一次 `getServiceToken(scope)` 會重新取 token。 + * + * 使用情境:當 FAA 回 401(token 已被 revoke 或 server 重啟)時,呼叫端應 + * 先 invalidate 再 retry 一次。 + * + * @param {string} scope + * @returns {void} + */ + invalidate(scope) { + if (typeof scope !== 'string' || scope === '') return; + const had = this._cache.delete(scope); + if (had) { + logEvent('INFO', 'oauth.token_invalidated', { scope }); + } + } + + /** + * 測試用:清空所有 state(cache + in-flight)。 + * 生產環境不應呼叫。 + * + * @returns {void} + */ + _resetForTests() { + this._cache.clear(); + this._inflight.clear(); + this._config = null; + } +} + +// ---------------------------------------------------------------------------- +// Module exports +// ---------------------------------------------------------------------------- + +/** + * Singleton(生產用)。lazy-load config,第一次呼叫 `getServiceToken` 才檢查環境變數。 + */ +const singleton = new OAuthClient(); + +module.exports = { + // 對外推薦的介面 + getServiceToken: (scope) => singleton.getServiceToken(scope), + invalidate: (scope) => singleton.invalidate(scope), + + // class 本體(測試 / 進階用法可直接 new) + OAuthClient, + + // 錯誤類別 + OAuthError, + OAuthClientError, + OAuthServerError, + OAuthTimeoutError, + + // 測試用內部 + _internals: { + buildBasicAuthHeader, + parseTokenResponse, + tryParseOauthErrorBody, + singleton, + }, +}; diff --git a/apps/task-scheduler/src/config.js b/apps/task-scheduler/src/config.js new file mode 100644 index 0000000..a30d04f --- /dev/null +++ b/apps/task-scheduler/src/config.js @@ -0,0 +1,279 @@ +/** + * 集中讀取所有環境變數,啟動時 fail fast。 + * + * 範圍:T1/T2 — 讀取 OAuth / JWKS / Converter 身份 / OAuth Client 相關欄位。 + * 其他既有欄位(PORT, REDIS_URL, MINIO_*, JOB_DATA_DIR 等)暫時沿用 server.js + * 既有讀法,待 T4 重構時再合併進來。 + * + * 設計原則: + * - 必填變數缺漏 → 立刻 throw,避免進到 runtime 才爆炸 + * - 不在 log 印出任何 secret(這個檔不負責 log) + * - 對外 export 一個凍結 object,避免被改動 + * + * 變更歷程: + * - T1:先把 token URL / client id / client secret 設 optional,因 T1 沒呼叫 token endpoint + * - T2(本任務):實作 OAuth client,依 TDD §9 將上述三項收緊為必填(修 D1/D2) + * - T10:新增 multipart 與 uploadConcurrency 段(修 D5)。所有 multipart limit 與 + * per-process upload concurrency 上限由 env 控制,避免改原始碼才能調整。 + */ + +'use strict'; + +require('dotenv').config(); + +/** + * 讀取必填字串環境變數,缺漏即 throw。 + * + * @param {string} name + * @returns {string} + */ +function requireEnv(name) { + const value = process.env[name]; + if (typeof value !== 'string' || value.trim() === '') { + throw new Error( + `[config] Missing required environment variable: ${name}. ` + + `Set it in .env or your deployment environment before starting the service.` + ); + } + return value.trim(); +} + +/** + * 讀取選填字串環境變數,可給預設值。 + * + * @param {string} name + * @param {string} [defaultValue=''] + * @returns {string} + */ +function optionalEnv(name, defaultValue = '') { + const value = process.env[name]; + if (typeof value !== 'string' || value.trim() === '') { + return defaultValue; + } + return value.trim(); +} + +/** + * 讀取整數環境變數,可給預設值。解析失敗即 throw。 + * + * @param {string} name + * @param {number} defaultValue + * @returns {number} + */ +function optionalIntEnv(name, defaultValue) { + const raw = process.env[name]; + if (raw === undefined || raw === null || raw === '') { + return defaultValue; + } + const parsed = Number.parseInt(raw, 10); + if (Number.isNaN(parsed)) { + throw new Error( + `[config] Environment variable ${name} must be an integer, got: ${JSON.stringify(raw)}` + ); + } + return parsed; +} + +/** + * 載入並驗證 config,回傳凍結 object。 + * + * 失敗時 throw — 呼叫端(server entry)應在 require 階段就拋出, + * 讓 process 直接 exit(fail fast)。 + * + * @returns {Readonly<{ + * memberCenter: { issuer: string, jwksUrl: string, tokenUrl: string }, + * converter: { + * audience: string, + * clientId: string, + * clientSecret: string, + * tenantId: string, + * scopeWrite: string, + * scopeRead: string, + * }, + * fileAccessAgent: { baseUrl: string, audience: string, promoteTimeoutMs: number }, + * jwks: { cacheMaxAgeMs: number, cooldownMs: number, clockToleranceSec: number }, + * oauthClient: { refreshSkewMs: number, timeoutMs: number }, + * multipart: { modelMaxBytes: number, refImageMaxBytes: number, refImagesMaxCount: number }, + * uploadConcurrency: { maxConcurrent: number, retryAfterSeconds: number }, + * }>} + */ +function loadConfig() { + // === Member Center(OAuth Authorization Server) === + const mcIssuer = requireEnv('MEMBER_CENTER_ISSUER'); + const mcJwksUrl = requireEnv('MEMBER_CENTER_JWKS_URL'); + // T2:對齊 TDD §9 改為必填。OAuth Client 取 token 必用此 endpoint。 + const mcTokenUrl = requireEnv('MEMBER_CENTER_TOKEN_URL'); + + // === Converter as Resource Server(接收他人 token) === + const audience = requireEnv('KNERON_CONVERTER_AUDIENCE'); + + // === Converter as OAuth Client(呼叫 File Access Agent,僅 promote 用) === + // T2:對齊 TDD §9 將 client_id / client_secret 收緊為必填。兩者必須成對出現。 + const clientId = requireEnv('KNERON_CONVERTER_CLIENT_ID'); + const clientSecret = requireEnv('KNERON_CONVERTER_CLIENT_SECRET'); + + // === Tenant 隔離(可選) === + const tenantId = optionalEnv('CONVERTER_TENANT_ID', ''); + + // === Scope 命名(可覆寫,預設值對齊 TDD §8) === + const scopeWrite = optionalEnv('CONVERTER_SCOPE_WRITE', 'converter:job.write'); + const scopeRead = optionalEnv('CONVERTER_SCOPE_READ', 'converter:job.read'); + + // === File Access Agent(T7 起為必填)=== + // T7:promote 流程已上線,FAA URL / audience 必須在啟動時驗證;少了就 fail-fast。 + // - URL 必須是合法 http(s) URL;NODE_ENV=production 強制 https(傳輸保護) + // - dev 用 placeholder(如 https://REPLACE-ME.invalid)也是合法 URL,不影響本地啟動 + const faaBaseUrl = requireEnv('FILE_ACCESS_AGENT_BASE_URL'); + const faaAudience = requireEnv('FILE_ACCESS_AGENT_AUDIENCE'); + let faaParsedUrl; + try { + faaParsedUrl = new URL(faaBaseUrl); + } catch (_err) { + throw new Error( + `[config] FILE_ACCESS_AGENT_BASE_URL must be a valid URL, got: ${JSON.stringify(faaBaseUrl)}` + ); + } + if (faaParsedUrl.protocol !== 'http:' && faaParsedUrl.protocol !== 'https:') { + throw new Error( + `[config] FILE_ACCESS_AGENT_BASE_URL must use http(s) scheme, got protocol: ${faaParsedUrl.protocol}` + ); + } + if (process.env.NODE_ENV === 'production' && faaParsedUrl.protocol !== 'https:') { + throw new Error( + '[config] FILE_ACCESS_AGENT_BASE_URL must use HTTPS in production (NODE_ENV=production)' + ); + } + + // === Promote 行為(T7 用) === + // 單檔 PUT timeout,預設 300s(500MB @ 5MB/s 下界),對齊 TDD §6.4。 + const promoteTimeoutMs = optionalIntEnv('PROMOTE_TIMEOUT_MS', 300 * 1000); + + // === JWKS cache 行為 === + const jwksCacheMaxAgeMs = optionalIntEnv('JWKS_CACHE_MAX_AGE_MS', 10 * 60 * 1000); // 10 分鐘 + const jwksCooldownMs = optionalIntEnv('JWKS_COOLDOWN_MS', 30 * 1000); // 30 秒 + const jwtClockToleranceSec = optionalIntEnv('JWT_CLOCK_TOLERANCE_SEC', 60); // 60 秒 + + // === OAuth Client(取 token 用,T2)=== + // refresh skew:cache 內 token 距離 expiresAt 還有多少 ms 時就主動 refresh。 + // 預設 60s,避免 race condition(取 token 時剛好過期)。 + const oauthRefreshSkewMs = optionalIntEnv('OAUTH_TOKEN_REFRESH_SKEW_MS', 60 * 1000); + // 取 token 的 timeout(含網路 RTT + Member Center 處理時間)。 + // 預設 10s,避免 promote 流程因 token endpoint 慢回應而 hang。 + const oauthTimeoutMs = optionalIntEnv('OAUTH_TOKEN_TIMEOUT_MS', 10 * 1000); + + // === Multipart 上傳上限(T10 修 D5)=== + // 為什麼用 env:不同部署環境記憶體配額差異大(dev 容器 2GB / 8 vCPU prod + // 可能 16GB),固定的 500MB 不夠彈性。dev / staging 可調降避免 OOM。 + // + // - MULTIPART_MODEL_MAX_BYTES:multer 的 per-file fileSize 上限(也作用在 model + // 檔案大小檢查)。預設 500MB(對齊 TDD §1.4.2 與 PRD F-01 上限)。 + // - MULTIPART_REF_IMAGE_MAX_BYTES:單張 ref_image 上限(validator 邏輯,multer + // 的 fileSize 是「per-file」整體上限,無法只限 ref_images)。預設 10MB。 + // - MULTIPART_REF_IMAGES_MAX_COUNT:ref_images 張數上限(multer fields maxCount + // 參數)。預設 100。 + // + // 安全:所有值都做下限檢查(必須 > 0),避免 0 / 負數造成 multer reject 全部請求。 + const modelMaxBytes = optionalIntEnv( + 'MULTIPART_MODEL_MAX_BYTES', + 500 * 1024 * 1024 + ); + if (modelMaxBytes <= 0) { + throw new Error( + `[config] MULTIPART_MODEL_MAX_BYTES must be > 0, got: ${modelMaxBytes}` + ); + } + const refImageMaxBytes = optionalIntEnv( + 'MULTIPART_REF_IMAGE_MAX_BYTES', + 10 * 1024 * 1024 + ); + if (refImageMaxBytes <= 0) { + throw new Error( + `[config] MULTIPART_REF_IMAGE_MAX_BYTES must be > 0, got: ${refImageMaxBytes}` + ); + } + const refImagesMaxCount = optionalIntEnv( + 'MULTIPART_REF_IMAGES_MAX_COUNT', + 100 + ); + if (refImagesMaxCount <= 0) { + throw new Error( + `[config] MULTIPART_REF_IMAGES_MAX_COUNT must be > 0, got: ${refImagesMaxCount}` + ); + } + + // === Upload concurrency(T10 修 D5 second part)=== + // 為什麼需要 per-process semaphore: + // multer 用 memoryStorage,每個並發 upload 都會吃 model size 的記憶體; + // 若 5 個並發 × 500MB = 2.5GB heap,容器若只有 4GB 立刻 OOM kill。 + // per-process counter 限制同時間進行中的 upload 數量。 + // + // - MAX_CONCURRENT_UPLOADS:同時間最多進行幾個 upload。預設 5(保守值,覆蓋 + // 2.5GB / 5 並發 = 500MB peak heap,容器 ≥ 4GB 安全)。 + // - UPLOAD_RETRY_AFTER_SECONDS:超過時 503 response 帶的 Retry-After 秒數。 + // 預設 30s(給 client 一個合理的 backoff 起點)。 + // + // 為什麼選 503 + Retry-After 而非 queue: + // queue 會 hold connection 不確定多久(可能秒級也可能分鐘級),對 client 來說 + // timeout 行為不可預期。直接 503 + Retry-After 讓 client 主動 retry,符合 12-Factor + // stateless 原則,也更友善。 + const maxConcurrentUploads = optionalIntEnv('MAX_CONCURRENT_UPLOADS', 5); + if (maxConcurrentUploads <= 0) { + throw new Error( + `[config] MAX_CONCURRENT_UPLOADS must be > 0, got: ${maxConcurrentUploads}` + ); + } + const uploadRetryAfterSeconds = optionalIntEnv( + 'UPLOAD_RETRY_AFTER_SECONDS', + 30 + ); + if (uploadRetryAfterSeconds <= 0) { + throw new Error( + `[config] UPLOAD_RETRY_AFTER_SECONDS must be > 0, got: ${uploadRetryAfterSeconds}` + ); + } + + return Object.freeze({ + memberCenter: Object.freeze({ + issuer: mcIssuer, + jwksUrl: mcJwksUrl, + tokenUrl: mcTokenUrl, + }), + converter: Object.freeze({ + audience, + clientId, + clientSecret, + tenantId, + scopeWrite, + scopeRead, + }), + fileAccessAgent: Object.freeze({ + baseUrl: faaBaseUrl, + audience: faaAudience, + promoteTimeoutMs, + }), + jwks: Object.freeze({ + cacheMaxAgeMs: jwksCacheMaxAgeMs, + cooldownMs: jwksCooldownMs, + clockToleranceSec: jwtClockToleranceSec, + }), + oauthClient: Object.freeze({ + refreshSkewMs: oauthRefreshSkewMs, + timeoutMs: oauthTimeoutMs, + }), + multipart: Object.freeze({ + modelMaxBytes, + refImageMaxBytes, + refImagesMaxCount, + }), + uploadConcurrency: Object.freeze({ + maxConcurrent: maxConcurrentUploads, + retryAfterSeconds: uploadRetryAfterSeconds, + }), + }); +} + +module.exports = { + loadConfig, + // 暴露 helpers 供其他 module 重用 / 測試 + _internals: { requireEnv, optionalEnv, optionalIntEnv }, +}; diff --git a/apps/task-scheduler/src/fileAccessAgent/__tests__/client.test.js b/apps/task-scheduler/src/fileAccessAgent/__tests__/client.test.js new file mode 100644 index 0000000..cc29c74 --- /dev/null +++ b/apps/task-scheduler/src/fileAccessAgent/__tests__/client.test.js @@ -0,0 +1,879 @@ +/** + * File Access Agent client (T7) 單元測試。 + * + * 範圍(對齊 tasks-phase1.md §3.7 驗收): + * - 200 happy path(單次 PUT 成功 + 解析 etag / size_bytes) + * - 4xx(非 401)→ FAAClientError,不重試 + * - 401 → invalidate + 重取 token + 重試一次 + * - 401 重試後仍 401 → FAAUnauthorizedError + * - 5xx → 指數退避 500ms / 2000ms 重試最多 2 次 + * - timeout → AbortError → 視同 5xx 重試 + * - network error → 同 timeout 路徑 + * - streamFactory 每次 attempt 都呼叫(重試時拿新 stream) + * - URL 組合(base + /files/{key})+ encodeURI 行為 + * - SECURITY:log 不洩 token / Authorization;error message 不含 FAA 內部細節 + * - 不同 contentType / contentLength header 設置正確 + * + * 測試風格與 oauthClient.test.js 一致: + * - 依賴注入(fetch / setTimeout / oauthClient) + * - 不依賴環境變數(透過 deps.config 直接傳) + */ + +'use strict'; + +const { Readable } = require('stream'); + +const { + createFaaClient, + DEFAULT_SCOPE, + DEFAULT_TIMEOUT_MS, + RETRY_BACKOFFS_MS, + _internals, +} = require('../client'); +const { + FAAClientError, + FAAUnauthorizedError, + FAAServerError, + FAATimeoutError, +} = require('../errors'); + +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + +const TEST_BASE_URL = 'https://files.test.local'; +const TEST_TOKEN_1 = 'test-bearer-token-VERY-FIRST-must-stay-private'; +const TEST_TOKEN_2 = 'test-bearer-token-SECOND-after-invalidate-private'; + +/** + * Mock OAuth client:可控制每次 getServiceToken 回什麼,並紀錄呼叫。 + */ +function makeMockOauthClient(tokens = [TEST_TOKEN_1]) { + let callCount = 0; + return { + getServiceToken: jest.fn(async () => { + const t = tokens[Math.min(callCount, tokens.length - 1)]; + callCount += 1; + return t; + }), + invalidate: jest.fn(), + _callCount: () => callCount, + }; +} + +/** + * Mock fetch:handlers 是 fn(url, init) → Response | { status, body } | throw。 + * + * 維持與 oauthClient.test.js 相同風格,便於跨 testfile 比對。 + */ +function makeMockFetch(handlers) { + let i = 0; + const calls = []; + const fn = jest.fn(async (url, init) => { + calls.push({ url, init }); + const handler = Array.isArray(handlers) ? handlers[i] : handlers; + if (Array.isArray(handlers)) i += 1; + + if (typeof handler === 'function') { + return handler(url, init); + } + if (handler instanceof Error) throw handler; + if (handler && typeof handler === 'object' && 'status' in handler) { + return makeMockResponse(handler); + } + throw new Error(`No handler at index ${i - 1}`); + }); + fn._calls = calls; + return fn; +} + +/** + * 產生一個 fetch 回的 Response-like 物件。 + * + * @param {{ status: number, body?: object|string|null, headers?: Record }} opts + */ +function makeMockResponse({ status, body = null, headers = {} }) { + const lowerHeaders = {}; + for (const [k, v] of Object.entries(headers)) { + lowerHeaders[k.toLowerCase()] = String(v); + } + // 預設 content-type + if (body && typeof body === 'object' && !lowerHeaders['content-type']) { + lowerHeaders['content-type'] = 'application/json'; + } else if (typeof body === 'string' && !lowerHeaders['content-type']) { + lowerHeaders['content-type'] = 'text/plain'; + } + + let bodyConsumed = false; + return { + ok: status >= 200 && status < 300, + status, + headers: { + get(name) { + return lowerHeaders[name.toLowerCase()] || null; + }, + }, + async json() { + if (bodyConsumed) throw new Error('body already consumed'); + bodyConsumed = true; + if (body && typeof body === 'object') return body; + throw new Error('not json'); + }, + async text() { + if (bodyConsumed) throw new Error('body already consumed'); + bodyConsumed = true; + if (typeof body === 'string') return body; + if (body && typeof body === 'object') return JSON.stringify(body); + return ''; + }, + }; +} + +/** + * 立即執行的 fake setTimeout — 不真實等待,但會記錄延遲時間以驗證 backoff。 + * + * 為什麼自寫而非用 jest.useFakeTimers: + * - 我們的 client 內部有 sleep 也有 fetch timeout 兩種 setTimeout 用法 + * - jest fake timer 與 async 容易打結;自寫立即執行 + delay 紀錄較單純 + * + * 注意:fake timer 的 fn 立即執行,所以不會真等 500ms。測試比對 delays array。 + * + * @returns {{ fn: Function, delays: number[] }} + */ +function makeFakeSetTimeout() { + const delays = []; + const fn = jest.fn((cb, ms) => { + delays.push(ms); + // 不立即執行 cb(避免 abort 立即被觸發);返回一個 dummy handle + return { _fake: true, _ms: ms, _cb: cb }; + }); + return { fn, delays }; +} + +/** + * sleep-only 用的 fake setTimeout(立即執行 cb)— 給「不需要等真實時間」的測試。 + */ +function makeImmediateSetTimeout() { + const delays = []; + const fn = jest.fn((cb, ms) => { + delays.push(ms); + // 立即觸發(同步)— 對於 sleep cb 是 resolve(),對 fetch abort 不會觸發因為在 finally 已 clear + Promise.resolve().then(cb); + return { _fake: true }; + }); + return { fn, delays }; +} + +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); +}); + +afterAll(() => { + jest.restoreAllMocks(); +}); + +// =========================================================================== +// 1. Happy path & URL 組合 +// =========================================================================== + +describe('faaClient.putFile — happy path', () => { + it('PUTs to {baseUrl}/files/{key} with Authorization Bearer header', async () => { + const fetchMock = makeMockFetch([ + { status: 200, body: { etag: 'mock-etag', size_bytes: 1234 } }, + ]); + const oauth = makeMockOauthClient(); + const setTimeout = makeImmediateSetTimeout(); + + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + }); + + const stream = Readable.from(['chunk1', 'chunk2']); + const result = await client.putFile( + 'visionA/models/u1/m1/v1/out.nef', + async () => stream, + { contentLength: 1234, contentType: 'application/octet-stream' } + ); + + expect(result).toEqual({ etag: 'mock-etag', sizeBytes: 1234 }); + + expect(fetchMock).toHaveBeenCalledTimes(1); + const [url, init] = fetchMock.mock.calls[0]; + expect(url).toBe(`${TEST_BASE_URL}/files/visionA/models/u1/m1/v1/out.nef`); + expect(init.method).toBe('PUT'); + expect(init.headers.Authorization).toBe(`Bearer ${TEST_TOKEN_1}`); + expect(init.headers['Content-Type']).toBe('application/octet-stream'); + expect(init.headers['Content-Length']).toBe('1234'); + expect(init.duplex).toBe('half'); + expect(oauth.getServiceToken).toHaveBeenCalledWith(DEFAULT_SCOPE); + }); + + it('encodes URI special chars in object key but preserves slashes', async () => { + const fetchMock = makeMockFetch([{ status: 200, body: {} }]); + const oauth = makeMockOauthClient(); + + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + }); + + await client.putFile( + 'foo/bar/檔名 含空白.bin', + async () => Readable.from(['x']), + { contentLength: 1 } + ); + + const url = fetchMock.mock.calls[0][0]; + // / 應保留;空白應 encode 為 %20;中文應 encode + expect(url).toContain('/foo/bar/'); + expect(url).toContain('%20'); + // .. 字元在 caller 端應已擋(這裡只測 encodeURI 不處理) + }); + + it('handles trailing slash in baseUrl correctly', async () => { + const fetchMock = makeMockFetch([{ status: 200, body: {} }]); + const oauth = makeMockOauthClient(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: 'https://files.test.local/' }, // 多斜線 + fetch: fetchMock, + }); + + await client.putFile('a/b.bin', async () => Readable.from(['x']), { + contentLength: 1, + }); + + expect(fetchMock.mock.calls[0][0]).toBe('https://files.test.local/files/a/b.bin'); + }); + + it('falls back to ETag header + Content-Length when JSON body missing', async () => { + const fetchMock = makeMockFetch([ + { + status: 200, + body: '', + headers: { etag: '"hdr-etag"', 'content-length': '5678' }, + }, + ]); + const oauth = makeMockOauthClient(); + + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + }); + + const result = await client.putFile('a.bin', async () => Readable.from(['x']), { + contentLength: 5678, + }); + expect(result.etag).toBe('hdr-etag'); // quote stripped + expect(result.sizeBytes).toBe(5678); + }); + + it('uses provided contentType, defaults to octet-stream', async () => { + const fetchMock = makeMockFetch([ + { status: 200, body: {} }, + { status: 200, body: {} }, + ]); + const oauth = makeMockOauthClient(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + }); + + await client.putFile('a.bin', async () => Readable.from(['x']), { + contentLength: 1, + contentType: 'application/x-binary', + }); + expect(fetchMock.mock.calls[0][1].headers['Content-Type']).toBe('application/x-binary'); + + await client.putFile('b.bin', async () => Readable.from(['x']), { contentLength: 1 }); + expect(fetchMock.mock.calls[1][1].headers['Content-Type']).toBe('application/octet-stream'); + }); +}); + +// =========================================================================== +// 2. 4xx (非 401) — 不重試 +// =========================================================================== + +describe('faaClient.putFile — 4xx non-401', () => { + it('throws FAAClientError on 400 without retry', async () => { + const fetchMock = makeMockFetch([ + { status: 400, body: { error: 'invalid_object_key' } }, + ]); + const oauth = makeMockOauthClient(); + const setTimeout = makeImmediateSetTimeout(); + + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + }); + + await expect( + client.putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }) + ).rejects.toBeInstanceOf(FAAClientError); + + expect(fetchMock).toHaveBeenCalledTimes(1); // 不重試 + // 沒有 backoff sleep + expect(setTimeout.delays.filter((d) => RETRY_BACKOFFS_MS.includes(d))).toHaveLength(0); + }); + + it('throws FAAClientError on 403 (insufficient_scope) without retry', async () => { + const fetchMock = makeMockFetch([{ status: 403, body: { error: 'insufficient_scope' } }]); + const oauth = makeMockOauthClient(); + + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + }); + + const error = await client + .putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }) + .catch((e) => e); + + expect(error).toBeInstanceOf(FAAClientError); + expect(error.status).toBe(403); + expect(error.errorCode).toBe('insufficient_scope'); + expect(error.retryable).toBe(false); + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + + it('throws FAAClientError on 422 invalid_object_key', async () => { + const fetchMock = makeMockFetch([{ status: 422, body: { error: 'invalid_object_key' } }]); + const oauth = makeMockOauthClient(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + }); + + await expect( + client.putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }) + ).rejects.toBeInstanceOf(FAAClientError); + }); +}); + +// =========================================================================== +// 3. 401 → invalidate + 重試一次 +// =========================================================================== + +describe('faaClient.putFile — 401 unauthorized', () => { + it('invalidates token and retries once on 401, then succeeds', async () => { + const fetchMock = makeMockFetch([ + { status: 401, body: { error: 'invalid_token' } }, + { status: 200, body: { etag: 'after-invalidate', size_bytes: 100 } }, + ]); + const oauth = makeMockOauthClient([TEST_TOKEN_1, TEST_TOKEN_2]); + const setTimeout = makeImmediateSetTimeout(); + + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + }); + + const result = await client.putFile( + 'a.bin', + async () => Readable.from(['x']), + { contentLength: 1 } + ); + expect(result.etag).toBe('after-invalidate'); + + // 第一個 attempt: TEST_TOKEN_1 + expect(fetchMock.mock.calls[0][1].headers.Authorization).toBe(`Bearer ${TEST_TOKEN_1}`); + // 第二個 attempt: TEST_TOKEN_2(已 invalidate + 重取) + expect(fetchMock.mock.calls[1][1].headers.Authorization).toBe(`Bearer ${TEST_TOKEN_2}`); + + expect(oauth.invalidate).toHaveBeenCalledTimes(1); + expect(oauth.invalidate).toHaveBeenCalledWith(DEFAULT_SCOPE); + expect(oauth.getServiceToken).toHaveBeenCalledTimes(2); + }); + + it('throws FAAUnauthorizedError when 401 retry also returns 401', async () => { + const fetchMock = makeMockFetch([ + { status: 401, body: { error: 'invalid_token' } }, + { status: 401, body: { error: 'invalid_token' } }, + ]); + const oauth = makeMockOauthClient([TEST_TOKEN_1, TEST_TOKEN_2]); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + }); + + const error = await client + .putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }) + .catch((e) => e); + + expect(error).toBeInstanceOf(FAAUnauthorizedError); + expect(error.status).toBe(401); + expect(error.retryable).toBe(true); // class-level flag (caller 不再重試) + expect(fetchMock).toHaveBeenCalledTimes(2); + expect(oauth.invalidate).toHaveBeenCalledTimes(1); + }); + + it('streamFactory called twice for 401 retry (new stream each attempt)', async () => { + const fetchMock = makeMockFetch([ + { status: 401, body: {} }, + { status: 200, body: {} }, + ]); + const oauth = makeMockOauthClient([TEST_TOKEN_1, TEST_TOKEN_2]); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + }); + + const factory = jest.fn(async () => Readable.from(['data'])); + await client.putFile('a.bin', factory, { contentLength: 4 }); + + expect(factory).toHaveBeenCalledTimes(2); + }); +}); + +// =========================================================================== +// 4. 5xx — 指數退避重試最多 2 次 +// =========================================================================== + +describe('faaClient.putFile — 5xx server error', () => { + it('retries 5xx twice with backoffs 500ms / 2000ms then succeeds', async () => { + const fetchMock = makeMockFetch([ + { status: 503, body: 'maintenance' }, + { status: 502, body: 'bad gateway' }, + { status: 200, body: { etag: 'ok' } }, + ]); + const oauth = makeMockOauthClient(); + const setTimeout = makeImmediateSetTimeout(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + }); + + const result = await client.putFile('a.bin', async () => Readable.from(['x']), { + contentLength: 1, + }); + expect(result.etag).toBe('ok'); + + expect(fetchMock).toHaveBeenCalledTimes(3); + // setTimeout 被呼叫 3 種用途:retry sleeps + per-attempt timeout + // 過濾出剛好對應 RETRY_BACKOFFS_MS 的 delay 值(500、2000) + const backoffs = setTimeout.delays.filter((d) => + RETRY_BACKOFFS_MS.includes(d) + ); + expect(backoffs).toEqual([500, 2000]); + }); + + it('throws FAAServerError after all 5xx retries exhausted', async () => { + const fetchMock = makeMockFetch([ + { status: 500 }, + { status: 502 }, + { status: 503 }, + ]); + const oauth = makeMockOauthClient(); + const setTimeout = makeImmediateSetTimeout(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + }); + + const error = await client + .putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }) + .catch((e) => e); + + expect(error).toBeInstanceOf(FAAServerError); + expect(fetchMock).toHaveBeenCalledTimes(3); // 1 + 2 retries + }); + + it('streamFactory called for each retry (3 times for 2x retry + initial)', async () => { + const fetchMock = makeMockFetch([ + { status: 500 }, + { status: 500 }, + { status: 200, body: {} }, + ]); + const oauth = makeMockOauthClient(); + const setTimeout = makeImmediateSetTimeout(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + }); + + const factory = jest.fn(async () => Readable.from(['data'])); + await client.putFile('a.bin', factory, { contentLength: 4 }); + + expect(factory).toHaveBeenCalledTimes(3); + }); +}); + +// =========================================================================== +// 5. timeout / network +// =========================================================================== + +describe('faaClient.putFile — timeout / network', () => { + it('throws FAATimeoutError when fetch is aborted (timeout)', async () => { + // 自製一個會 throw AbortError 的 fetch + const fetchMock = jest.fn(async () => { + const err = new Error('aborted'); + err.name = 'AbortError'; + throw err; + }); + const oauth = makeMockOauthClient(); + const setTimeout = makeImmediateSetTimeout(); + + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + // 為避免 retry 導致 promise stuck,明確設只跑一次(後續測試處理 retry) + retryBackoffsMs: [], + }); + + await expect( + client.putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }) + ).rejects.toBeInstanceOf(FAATimeoutError); + }); + + it('retries network errors (treated as timeout) up to 2 times', async () => { + const networkErr = new Error('ECONNREFUSED'); + networkErr.code = 'ECONNREFUSED'; + + const fetchMock = jest.fn(); + fetchMock + .mockImplementationOnce(async () => { + throw networkErr; + }) + .mockImplementationOnce(async () => { + throw networkErr; + }) + .mockImplementationOnce(async () => makeMockResponse({ status: 200, body: {} })); + + const oauth = makeMockOauthClient(); + const setTimeout = makeImmediateSetTimeout(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + }); + + const result = await client.putFile('a.bin', async () => Readable.from(['x']), { + contentLength: 1, + }); + expect(result).toBeDefined(); + expect(fetchMock).toHaveBeenCalledTimes(3); + }); + + it('does not leak token / hostname in error message', async () => { + const networkErr = new Error('connect ECONNREFUSED 192.168.99.99:443'); + networkErr.code = 'ECONNREFUSED'; + + const fetchMock = jest.fn(async () => { + throw networkErr; + }); + const oauth = makeMockOauthClient(); + const setTimeout = makeImmediateSetTimeout(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + retryBackoffsMs: [], // 不重試,加速測試 + }); + + const error = await client + .putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }) + .catch((e) => e); + + expect(error.message).not.toContain('192.168.99.99'); + expect(error.message).not.toContain('ECONNREFUSED'); + expect(error.message).not.toContain(TEST_TOKEN_1); + }); +}); + +// =========================================================================== +// 6. Input validation +// =========================================================================== + +describe('faaClient.putFile — input validation', () => { + it('throws TypeError when objectKey is empty', async () => { + const oauth = makeMockOauthClient(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: jest.fn(), + }); + + await expect( + client.putFile('', async () => Readable.from(['x']), { contentLength: 1 }) + ).rejects.toBeInstanceOf(TypeError); + }); + + it('throws TypeError when streamFactory not a function', async () => { + const oauth = makeMockOauthClient(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: jest.fn(), + }); + await expect( + client.putFile('a.bin', null, { contentLength: 1 }) + ).rejects.toBeInstanceOf(TypeError); + }); + + it('throws TypeError when contentLength is missing or invalid', async () => { + const oauth = makeMockOauthClient(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: jest.fn(), + }); + + await expect( + client.putFile('a.bin', async () => Readable.from(['x']), {}) + ).rejects.toBeInstanceOf(TypeError); + + await expect( + client.putFile('a.bin', async () => Readable.from(['x']), { contentLength: -1 }) + ).rejects.toBeInstanceOf(TypeError); + + await expect( + client.putFile('a.bin', async () => Readable.from(['x']), { contentLength: 'big' }) + ).rejects.toBeInstanceOf(TypeError); + }); + + it('throws when oauthClient missing in createFaaClient', () => { + expect(() => createFaaClient({})).toThrow(/oauthClient is required/); + }); + + it('throws when baseUrl missing at first call', async () => { + const oauth = makeMockOauthClient(); + const fetchMock = jest.fn(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: '' }, // 空字串 + fetch: fetchMock, + }); + await expect( + client.putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }) + ).rejects.toThrow(/FILE_ACCESS_AGENT_BASE_URL not configured/); + }); +}); + +// =========================================================================== +// 7. SECURITY — token / Authorization header 不洩漏 +// =========================================================================== + +describe('faaClient.putFile — SECURITY (no secret leak)', () => { + /** + * 收集所有 spy 的 log strings,用於 grep 是否含 token。 + */ + function collectAllLoggedStrings() { + const allCalls = [ + ...console.log.mock.calls, + ...console.warn.mock.calls, + ...console.error.mock.calls, + ]; + return allCalls.flatMap((args) => args.map((a) => (typeof a === 'string' ? a : JSON.stringify(a)))); + } + + beforeEach(() => { + console.log.mockClear(); + console.warn.mockClear(); + console.error.mockClear(); + }); + + it('does not log Authorization header / token on success', async () => { + const fetchMock = makeMockFetch([{ status: 200, body: { etag: 'x' } }]); + const oauth = makeMockOauthClient(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + }); + await client.putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }); + + const allLogs = collectAllLoggedStrings(); + for (const line of allLogs) { + expect(line).not.toContain(TEST_TOKEN_1); + expect(line).not.toContain('Bearer'); + expect(line).not.toContain('Authorization'); + } + }); + + it('does not log token on 4xx error path', async () => { + const fetchMock = makeMockFetch([{ status: 403, body: { error: 'forbidden' } }]); + const oauth = makeMockOauthClient(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + }); + await client + .putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }) + .catch(() => {}); + + const allLogs = collectAllLoggedStrings(); + for (const line of allLogs) { + expect(line).not.toContain(TEST_TOKEN_1); + } + }); + + it('does not log token on 5xx error path', async () => { + const fetchMock = makeMockFetch([{ status: 500 }, { status: 500 }, { status: 500 }]); + const oauth = makeMockOauthClient(); + const setTimeout = makeImmediateSetTimeout(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + }); + await client + .putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }) + .catch(() => {}); + + const allLogs = collectAllLoggedStrings(); + for (const line of allLogs) { + expect(line).not.toContain(TEST_TOKEN_1); + } + }); + + it('does not log target_object_key on success or error (even though it is not a secret)', async () => { + // Phase 1 不 log key 內容(避免大量 PII / 內部 path 進 log);只 log 長度 + const fetchMock = makeMockFetch([{ status: 500 }, { status: 500 }, { status: 500 }]); + const oauth = makeMockOauthClient(); + const setTimeout = makeImmediateSetTimeout(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + }); + const sensitiveKey = 'visionA/internal-secret-path/file.bin'; + await client + .putFile(sensitiveKey, async () => Readable.from(['x']), { contentLength: 1 }) + .catch(() => {}); + + const allLogs = collectAllLoggedStrings(); + for (const line of allLogs) { + expect(line).not.toContain(sensitiveKey); + } + }); + + it('error message does not include FAA response body content', async () => { + const sensitiveErrorBody = 'INTERNAL: connect to db at internal-db.faa.local:5432 failed'; + const fetchMock = makeMockFetch([{ status: 500, body: sensitiveErrorBody }]); + const oauth = makeMockOauthClient(); + const setTimeout = makeImmediateSetTimeout(); + const client = createFaaClient({ + oauthClient: oauth, + config: { baseUrl: TEST_BASE_URL }, + fetch: fetchMock, + setTimeoutFn: setTimeout.fn, + retryBackoffsMs: [], + }); + + const error = await client + .putFile('a.bin', async () => Readable.from(['x']), { contentLength: 1 }) + .catch((e) => e); + + expect(error.message).not.toContain('internal-db.faa.local'); + expect(error.message).not.toContain('5432'); + // message 應該只含 status code(如 'FAA returned 500') + expect(error.message).toMatch(/FAA returned 500/); + }); +}); + +// =========================================================================== +// 8. _internals helpers +// =========================================================================== + +describe('faaClient._internals', () => { + describe('readSuccessMeta', () => { + it('parses JSON etag + size_bytes', async () => { + const res = makeMockResponse({ + status: 200, + body: { etag: 'json-etag', size_bytes: 100 }, + }); + const meta = await _internals.readSuccessMeta(res); + expect(meta).toEqual({ etag: 'json-etag', sizeBytes: 100 }); + }); + + it('falls back to ETag header when JSON parse fails', async () => { + const res = makeMockResponse({ + status: 200, + body: 'not json', + headers: { etag: '"hdr-etag"', 'content-length': '42' }, + }); + const meta = await _internals.readSuccessMeta(res); + expect(meta.etag).toBe('hdr-etag'); + expect(meta.sizeBytes).toBe(42); + }); + + it('returns nulls when no metadata available', async () => { + const res = makeMockResponse({ status: 200, body: '' }); + const meta = await _internals.readSuccessMeta(res); + expect(meta).toEqual({ etag: null, sizeBytes: null }); + }); + }); + + describe('isAbortLike', () => { + it('detects AbortError name', () => { + const err = new Error('abort'); + err.name = 'AbortError'; + const signal = { aborted: false }; + expect(_internals.isAbortLike(err, signal)).toBe(true); + }); + it('detects ABORT_ERR code', () => { + const err = new Error(); + err.code = 'ABORT_ERR'; + expect(_internals.isAbortLike(err, { aborted: false })).toBe(true); + }); + it('detects via signal.aborted when err lacks markers', () => { + expect(_internals.isAbortLike(new Error('x'), { aborted: true })).toBe(true); + expect(_internals.isAbortLike(new Error('x'), { aborted: false })).toBe(false); + }); + it('returns false for plain network err with aborted signal=false', () => { + const err = new Error('net'); + err.code = 'ECONNREFUSED'; + expect(_internals.isAbortLike(err, { aborted: false })).toBe(false); + }); + }); + + describe('normalizeStreamBody', () => { + it('returns Node Readable converted to web ReadableStream', () => { + const node = Readable.from(['x']); + const result = _internals.normalizeStreamBody(node); + // Node 18+ Readable.toWeb 回 web ReadableStream + expect(result).toBeDefined(); + expect(typeof result.getReader).toBe('function'); + }); + + it('passes-through web ReadableStream', () => { + const web = new ReadableStream({ + start(controller) { + controller.enqueue(new Uint8Array([1, 2, 3])); + controller.close(); + }, + }); + const result = _internals.normalizeStreamBody(web); + expect(result).toBe(web); + }); + }); +}); diff --git a/apps/task-scheduler/src/fileAccessAgent/client.js b/apps/task-scheduler/src/fileAccessAgent/client.js new file mode 100644 index 0000000..79c81ec --- /dev/null +++ b/apps/task-scheduler/src/fileAccessAgent/client.js @@ -0,0 +1,563 @@ +/** + * File Access Agent (FAA) HTTP client — Phase 1 僅 PUT /files/{key}(promote 用)。 + * + * 對外介面: + * const faa = createFaaClient({ config, oauthClient }); + * const meta = await faa.putFile(targetObjectKey, stream, { contentLength, contentType }); + * + * 設計原則(對齊 TDD §2.5 / §6.3 / §6.5): + * + * 1. **Stream-based body**: + * - body 是 stream 而非 buffer,避免 1GB 大檔吃光記憶體 + * - Node 18+ 原生 fetch 接受 stream body 但需要 `duplex: 'half'` + * - 若 stream 是 Node Readable,先 `Readable.toWeb()` 轉 web stream(fetch 需要) + * + * 2. **重試矩陣**(嚴格對齊 TDD §6.3 + tasks-phase1.md §2 T7): + * + * | 觸發 | 行為 | + * |-----------------|----------------------------------------------------| + * | 4xx 非 401 | 不重試 → throw FAAClientError | + * | 401 | invalidate(scope) + 重取 token + 重試 1 次;仍 401 → throw FAAUnauthorizedError | + * | 5xx | 指數退避 500ms / 2000ms 重試最多 2 次;全失敗 → throw FAAServerError | + * | timeout / network| 同 5xx 處理 → 最後 throw FAATimeoutError | + * + * 3. **Stream 不可重試的限制**: + * - HTTP body 一旦消費就無法 replay;如果第一次 PUT 失敗(5xx / network)後 + * 要 retry,必須 caller 在 retry 前**重新從 MinIO 取一次 stream** + * - 為了讓 retry 真的可行,client 介面接受 `streamFactory: () => Promise` + * 而非 stream 本身。每次 attempt 才呼叫 factory 取新 stream。 + * - 同樣的方式處理 401 重試 + * + * 4. **Token 注入**: + * - client 不直接讀 config.faaScope,由 caller 透過 oauthClient 控制 scope + * - 預設 scope = 'files:upload.write'(Phase 1 唯一) + * + * 5. **Timeout**: + * - PUT 單檔 timeout 用 AbortController;預設 300s(500MB @ 最壞 5MB/s,TDD §6.4 + tasks + * §2 T7 規定 `PROMOTE_TIMEOUT_MS=300000`) + * - timeout 視同 5xx 重試 + * - 由 caller 透過 `deps.timeoutMs` 注入 / 覆寫;server.js 端從 env `PROMOTE_TIMEOUT_MS` + * 讀取後透傳,達成「設定與程式碼分離」 + * + * 6. **SSRF 防護**: + * - FAA URL 只從 config 來(KNERON FILE_ACCESS_AGENT_BASE_URL),不接受 client 傳 + * - target_object_key 由 caller(promote handler)做 sanity check(拒 `..` `\\` 等) + * + * 7. **不洩露**: + * - log 不含 token / Authorization / response body + * - error message 不含 FAA 內部錯誤細節(caller 轉成 502 file_gateway_unavailable 給外部) + * + * 對齊 OAuth client (T2) 的測試友善設計: + * - 依賴注入 (`fetch` / `oauthClient` / `config` / `now` / `setTimeout`) + * - Lazy-load config 不在 require 階段炸環境變數 + */ + +'use strict'; + +/* eslint-disable no-console */ + +const { Readable } = require('stream'); + +const { + FAAClientError, + FAAUnauthorizedError, + FAAServerError, + FAATimeoutError, +} = require('./errors'); + +// ---------------------------------------------------------------------------- +// 常數 +// ---------------------------------------------------------------------------- + +/** Phase 1 唯一 scope(TDD §8.2)。 */ +const DEFAULT_SCOPE = 'files:upload.write'; + +/** + * 預設 PUT timeout(300s)— 對齊 TDD §6.4「PUT /files/{key}:依檔案大小動態,預設 300s + * (500MB @ 最壞 5MB/s)」與 tasks-phase1.md §2 T7「PROMOTE_TIMEOUT_MS=300000」。 + * + * 上層 server.js 應從 env 讀取 `PROMOTE_TIMEOUT_MS` 並透過 `deps.timeoutMs` 覆寫此預設。 + */ +const DEFAULT_TIMEOUT_MS = 300 * 1000; + +/** 5xx / timeout 的重試 backoff(ms)— TDD §6.3:500ms / 2000ms。 */ +const RETRY_BACKOFFS_MS = [500, 2000]; + +// ---------------------------------------------------------------------------- +// 內部 helpers +// ---------------------------------------------------------------------------- + +/** + * 結構化 log(不洩露 token / body)。 + * + * @param {'INFO'|'WARN'|'ERROR'} level + * @param {string} action + * @param {object} fields + */ +function logEvent(level, action, fields = {}) { + const line = JSON.stringify({ + level, + service: 'faa-client', + action, + timestamp: new Date().toISOString(), + ...fields, + }); + if (level === 'ERROR') { + console.error(line); + } else if (level === 'WARN') { + console.warn(line); + } else { + console.log(line); + } +} + +/** + * 簡易 sleep(測試可注入 setTimeout)。 + * + * @param {number} ms + * @param {Function} [setTimeoutFn] + */ +function sleep(ms, setTimeoutFn) { + const setTimeoutImpl = setTimeoutFn || globalThis.setTimeout; + return new Promise((resolve) => setTimeoutImpl(resolve, ms)); +} + +/** + * 把 Node Readable 或 web ReadableStream 統一成「fetch 能接受的 body 型別」。 + * + * Node 18+ 原生 fetch 可接受: + * - web ReadableStream(首選) + * - Node Readable + `duplex: 'half'`(部分版本) + * - Buffer / string / FormData 等 + * + * 為了相容性,遇到 Node Readable 就轉 web stream(Readable.toWeb)。 + * + * @param {NodeJS.ReadableStream | ReadableStream | unknown} input + * @returns {ReadableStream | NodeJS.ReadableStream | unknown} + */ +function normalizeStreamBody(input) { + if (!input) return input; + // 已經是 web ReadableStream → 直接用 + if (typeof input === 'object' && typeof input.getReader === 'function') { + return input; + } + // Node Readable → 轉 web + if (input instanceof Readable) { + return Readable.toWeb(input); + } + // 其他(Buffer / string)— fetch 自己處理 + return input; +} + +/** + * 嘗試從 FAA 回 200 response 取 metadata(etag / size)。 + * + * FAA 規格 TDD §1.4.5 期望回 `file_access_agent_etag` 與 size,因此優先讀 JSON; + * 若 FAA 不回 JSON 或 parse 失敗,fallback 到 ETag header(也不影響主流程)。 + * + * @param {Response} res + * @returns {Promise<{ etag: string|null, sizeBytes: number|null }>} + */ +async function readSuccessMeta(res) { + let etag = null; + let sizeBytes = null; + + // 1. 優先嘗試 JSON body + try { + const ctype = res.headers.get('content-type') || ''; + if (ctype.includes('application/json')) { + const data = await res.json(); + if (data && typeof data === 'object') { + if (typeof data.etag === 'string') etag = data.etag; + if (typeof data.size_bytes === 'number') sizeBytes = data.size_bytes; + if (typeof data.size === 'number' && sizeBytes == null) sizeBytes = data.size; + } + } + } catch (_) { + /* fallback to header */ + } + + // 2. fallback:HTTP standard headers + if (!etag) { + const headerEtag = res.headers.get('etag'); + if (headerEtag) etag = headerEtag.replace(/^"|"$/g, ''); + } + if (sizeBytes == null) { + const cl = res.headers.get('content-length'); + if (cl) { + const parsed = Number.parseInt(cl, 10); + if (Number.isFinite(parsed) && parsed >= 0) sizeBytes = parsed; + } + } + + return { etag, sizeBytes }; +} + +/** + * 從 fetch 異常判斷是否為 timeout / abort。 + * + * @param {unknown} err + * @param {AbortSignal} signal + */ +function isAbortLike(err, signal) { + if (!err) return Boolean(signal && signal.aborted); + if (typeof err !== 'object') return Boolean(signal && signal.aborted); + const e = /** @type {{ name?: string, code?: string }} */ (err); + if (e.name === 'AbortError' || e.code === 'ABORT_ERR') return true; + return Boolean(signal && signal.aborted); +} + +// ---------------------------------------------------------------------------- +// FAA Client +// ---------------------------------------------------------------------------- + +/** + * 建立一個 FAA client instance。 + * + * @typedef {Object} FAAClientDeps + * @property {{ getServiceToken: (scope: string) => Promise, invalidate: (scope: string) => void }} oauthClient + * @property {{ baseUrl: string }} [config] - 注入測試用 config(覆寫環境變數) + * @property {Function} [fetch] - 注入用 fetch(測試用 mock) + * @property {Function} [setTimeoutFn] - 注入用 setTimeout,**僅供 retry sleep 用**; + * attemptPut 內的 fetch timeout 一律用真實 setTimeout + * 以避免測試的 fake-timer 立即觸發 abort + * @property {Function} [now] - 注入用 Date.now + * @property {string} [scope] - 預設 scope(覆寫 DEFAULT_SCOPE) + * @property {number} [timeoutMs] - 預設 PUT timeout,覆寫 DEFAULT_TIMEOUT_MS + * @property {number[]} [retryBackoffsMs] - 覆寫 5xx / timeout 的 backoff 序列 + * + * @param {FAAClientDeps} deps + * @returns {{ putFile: (objectKey: string, streamFactory: Function, opts: { contentLength: number, contentType?: string }) => Promise<{ etag: string|null, sizeBytes: number|null }> }} + */ +function createFaaClient(deps) { + if (!deps || !deps.oauthClient) { + throw new Error('[faaClient] deps.oauthClient is required'); + } + const oauthClient = deps.oauthClient; + const fetchImpl = deps.fetch || globalThis.fetch; + const setTimeoutFn = deps.setTimeoutFn || globalThis.setTimeout; + const scope = deps.scope || DEFAULT_SCOPE; + const timeoutMs = + Number.isInteger(deps.timeoutMs) && deps.timeoutMs > 0 + ? deps.timeoutMs + : DEFAULT_TIMEOUT_MS; + // 允許 `[]` 代表「不重試」(測試常用);只有 undefined / 非陣列才 fallback 預設 + const retryBackoffs = Array.isArray(deps.retryBackoffsMs) + ? deps.retryBackoffsMs + : RETRY_BACKOFFS_MS; + + // Lazy-load config(測試/正式統一) + let cachedConfig = deps.config || null; + function getConfig() { + if (cachedConfig) return cachedConfig; + const fullConfig = require('../config').loadConfig(); + cachedConfig = { baseUrl: fullConfig.fileAccessAgent.baseUrl }; + return cachedConfig; + } + + /** + * 組 PUT URL:base + /files/{encodedKey}。 + * + * 為什麼用 encodeURI 而非 encodeURIComponent: + * - target_object_key 預期含 `/`(路徑分隔),不該被 encode 成 %2F + * - 但 `..` `?` `#` 等危險字元 caller 端要先擋(promote handler 做 sanity check) + * - encodeURI 會 encode 空白 / 中文等,但保留 `/` `:` `@` 等合法 path 字元 + * + * @param {string} baseUrl + * @param {string} objectKey + */ + function buildUrl(baseUrl, objectKey) { + const trimmed = baseUrl.replace(/\/+$/, ''); + // 不對 objectKey 做 leading slash 處理(caller 已驗格式) + return `${trimmed}/files/${encodeURI(objectKey)}`; + } + + /** + * 一次 PUT 嘗試(不含重試邏輯)。 + * + * @param {string} objectKey + * @param {Function} streamFactory + * @param {{ contentLength: number, contentType?: string }} opts + * @param {string} bearerToken + * @returns {Promise} + * @throws {FAATimeoutError} 網路 / timeout + */ + async function attemptPut(objectKey, streamFactory, opts, bearerToken) { + const config = getConfig(); + if (!config.baseUrl) { + throw new Error( + '[faaClient] FILE_ACCESS_AGENT_BASE_URL not configured; cannot perform promote' + ); + } + + const url = buildUrl(config.baseUrl, objectKey); + + const stream = await streamFactory(); + const body = normalizeStreamBody(stream); + + const controller = new AbortController(); + // ★ 重要:fetch 的 timeout 一律用真實 setTimeout(不走注入版)。 + // 為什麼:測試常用 fake setTimeout 立即觸發 cb;若 attemptPut 內也走 fake + // 版本,每次呼叫一進去就被 abort,根本走不到 fetch。 + // 真實 setTimeout 在測試中也安全:fetch mock 通常同步回 response, + // 不會等到 timeoutMs(30s)才觸發 abort。 + const timeoutHandle = globalThis.setTimeout(() => controller.abort(), timeoutMs); + + let res; + try { + res = await fetchImpl(url, { + method: 'PUT', + headers: { + Authorization: `Bearer ${bearerToken}`, + 'Content-Type': opts.contentType || 'application/octet-stream', + 'Content-Length': String(opts.contentLength), + }, + body, + // Node 18+ stream body 必要旗標 + duplex: 'half', + signal: controller.signal, + }); + } catch (err) { + const aborted = isAbortLike(err, controller.signal); + if (aborted) { + throw new FAATimeoutError(`PUT to FAA timed out after ${timeoutMs}ms`); + } + // network error(DNS、ECONNREFUSED 等)—— 視同 timeout 重試 + // 不把 err.message 寫進 thrown 訊息,避免洩漏 FAA hostname / port + throw new FAATimeoutError('Network error contacting FAA'); + } finally { + // 不論成功失敗都清 timer + try { + globalThis.clearTimeout(timeoutHandle); + } catch (_) { + /* noop */ + } + } + + return res; + } + + /** + * 把非 OK 的 response 轉成對應的 FAAError。 + * + * @param {Response} res + */ + async function classifyError(res) { + const status = res.status; + // 嘗試讀 body 給 log(不放進 error message — 避免洩漏內部資訊給 v1 client) + let bodyHint = null; + try { + const ctype = res.headers.get('content-type') || ''; + if (ctype.includes('application/json')) { + const data = await res.json(); + if (data && typeof data === 'object') { + if (typeof data.error === 'string') bodyHint = data.error; + else if (typeof data.code === 'string') bodyHint = data.code; + } + } else { + const txt = await res.text(); + if (txt) bodyHint = txt.slice(0, 100); + } + } catch (_) { + /* parse 失敗就算了 */ + } + + if (status === 401) { + return new FAAUnauthorizedError(`FAA returned 401 (token rejected)`, { + status, + errorCode: bodyHint || null, + }); + } + if (status >= 400 && status < 500) { + return new FAAClientError(`FAA returned ${status}`, { + status, + errorCode: bodyHint || null, + }); + } + // 5xx 或其他 + return new FAAServerError(`FAA returned ${status}`, { + status, + errorCode: bodyHint || null, + }); + } + + /** + * 把結果檔 PUT 到 FAA,含完整重試 / 401 invalidate / timeout 邏輯。 + * + * 重試邏輯總結: + * - **5xx / timeout / network**:消耗一次 attempt,按 retryBackoffsMs 退避重試。 + * - **401**:呼叫 `oauthClient.invalidate(scope)` + 重取 token + 重試 1 次; + * 此次 401 重試**不消耗** attempt(透過 `attempt -= 1` 抵銷迴圈遞增)。 + * + * **最壞情況 attempt 次數**:1 (initial) + 1 (401 retry) + 2 (5xx retries) = **4 次 PUT**。 + * - 例如:attempt #1 收 401 → invalidate + 重取 token → attempt #2 收 5xx → + * 退避 500ms → attempt #3 收 5xx → 退避 2000ms → attempt #4 仍 5xx → throw FAAServerError + * - 對 FAA 而言多 1 次大檔上傳是可接受的,因為「先 401 再連續 5xx」的機率極低 + * (正常 401 的成因如 token rotation 不會同時造成 server 5xx) + * - 若未來觀測到此 worst case 對 FAA 帶寬有壓力,可改為「401 重試也消耗 attempt」 + * + * @param {string} objectKey - 目標 NAS object key(caller 已 sanity check) + * @param {() => Promise} streamFactory + * 每次 attempt 才呼叫,回傳新 stream(HTTP body 不可 replay) + * @param {{ contentLength: number, contentType?: string }} opts + * @returns {Promise<{ etag: string|null, sizeBytes: number|null }>} + * @throws {FAAClientError|FAAUnauthorizedError|FAAServerError|FAATimeoutError} + */ + async function putFile(objectKey, streamFactory, opts) { + if (typeof objectKey !== 'string' || objectKey === '') { + throw new TypeError('[faaClient.putFile] objectKey is required (non-empty string)'); + } + if (typeof streamFactory !== 'function') { + throw new TypeError('[faaClient.putFile] streamFactory must be a function'); + } + if ( + !opts || + typeof opts.contentLength !== 'number' || + !Number.isFinite(opts.contentLength) || + opts.contentLength < 0 + ) { + throw new TypeError('[faaClient.putFile] opts.contentLength must be a non-negative number'); + } + + let token = await oauthClient.getServiceToken(scope); + + // 重試迴圈:最多 1 (initial) + retryBackoffs.length (5xx 重試) 次 + // 401 重試是「獨立一次」(不消耗 5xx attempt 配額)。 + // + // 因此最壞情況 PUT 總次數 = maxAttempts + 1(401 重試)= 4 次: + // attempt #1 (401) → invalidate token → attempt #2 (5xx) → backoff → + // attempt #3 (5xx) → backoff → attempt #4 (5xx) → throw + // + // 詳見 putFile docblock 的「最壞情況 attempt 次數」說明。 + let unauthorizedRetried = false; + const maxAttempts = 1 + retryBackoffs.length; + + for (let attempt = 0; attempt < maxAttempts; attempt += 1) { + let res; + try { + res = await attemptPut(objectKey, streamFactory, opts, token); + } catch (err) { + // network error / timeout(已經是 FAATimeoutError) + if (err instanceof FAATimeoutError) { + if (attempt < retryBackoffs.length) { + // 還能重試 + logEvent('WARN', 'faa.put_failed_retry', { + object_key_length: objectKey.length, // 不 log key 本身 + attempt: attempt + 1, + reason: 'timeout_or_network', + backoff_ms: retryBackoffs[attempt], + }); + await sleep(retryBackoffs[attempt], setTimeoutFn); + continue; + } + // 用完重試 → throw + logEvent('ERROR', 'faa.put_failed_final', { + object_key_length: objectKey.length, + attempt: attempt + 1, + reason: 'timeout_or_network', + }); + throw err; + } + // 其他類型例外(如 streamFactory 拋出)— 不重試,往上拋 + throw err; + } + + // 成功 path + if (res.ok) { + const meta = await readSuccessMeta(res); + logEvent('INFO', 'faa.put_success', { + object_key_length: objectKey.length, + status: res.status, + attempt: attempt + 1, + size_bytes: meta.sizeBytes, + }); + return meta; + } + + // 失敗 — 分類 + const err = await classifyError(res); + + // 401:先 invalidate 再重試一次 + if (err instanceof FAAUnauthorizedError) { + if (unauthorizedRetried) { + // 已重試過一次,仍 401 → 不再嘗試 + logEvent('ERROR', 'faa.put_unauthorized_after_retry', { + object_key_length: objectKey.length, + status: 401, + attempt: attempt + 1, + }); + throw err; + } + unauthorizedRetried = true; + logEvent('WARN', 'faa.put_unauthorized_invalidate', { + object_key_length: objectKey.length, + attempt: attempt + 1, + }); + oauthClient.invalidate(scope); + token = await oauthClient.getServiceToken(scope); + // 不消耗 attempt 數(401 重試獨立) + // ★ 副作用:若 401 後又遇 5xx,5xx 重試仍會走完整 retryBackoffs 配額。 + // 最壞情況 PUT 總次數 4 次(見 putFile docblock)。對 FAA 多 1 次大檔上傳可接受, + // 因為「先 401 再連續 5xx」是極端罕見場景。 + attempt -= 1; + continue; + } + + // 4xx 非 401 — 不重試 + if (err instanceof FAAClientError) { + logEvent('WARN', 'faa.put_client_error', { + object_key_length: objectKey.length, + status: err.status, + attempt: attempt + 1, + }); + throw err; + } + + // 5xx — 重試 + if (err instanceof FAAServerError) { + if (attempt < retryBackoffs.length) { + logEvent('WARN', 'faa.put_failed_retry', { + object_key_length: objectKey.length, + attempt: attempt + 1, + reason: 'server_error', + status: err.status, + backoff_ms: retryBackoffs[attempt], + }); + await sleep(retryBackoffs[attempt], setTimeoutFn); + continue; + } + // 用完重試 → throw + logEvent('ERROR', 'faa.put_failed_final', { + object_key_length: objectKey.length, + attempt: attempt + 1, + reason: 'server_error', + status: err.status, + }); + throw err; + } + + // fallback(不該發生) + throw err; + } + + // 不該走到這裡(迴圈內必 return / throw) + throw new FAAServerError('FAA putFile exhausted retries unexpectedly'); + } + + return { putFile }; +} + +module.exports = { + createFaaClient, + // 常數對外暴露便於測試 / 調整 + DEFAULT_SCOPE, + DEFAULT_TIMEOUT_MS, + RETRY_BACKOFFS_MS, + // 測試暴露 + _internals: { + normalizeStreamBody, + readSuccessMeta, + isAbortLike, + sleep, + }, +}; diff --git a/apps/task-scheduler/src/fileAccessAgent/errors.js b/apps/task-scheduler/src/fileAccessAgent/errors.js new file mode 100644 index 0000000..fbc0038 --- /dev/null +++ b/apps/task-scheduler/src/fileAccessAgent/errors.js @@ -0,0 +1,96 @@ +/** + * File Access Agent (FAA) client 錯誤類別。 + * + * 對齊 OAuth client (T2) 的設計風格: + * - 三類錯誤對應 TDD §6.3 的重試決策矩陣 + * - `retryable` flag 強制覆寫,呼叫端只看 `instanceof` 或 `err.retryable` 即可 + * + * 重試決策矩陣(TDD §6.3): + * + * | HTTP / 異常 | Error class | retryable | + * |------------|----------------------|-----------| + * | 4xx 非 401 | FAAClientError | false | + * | 401 | FAAUnauthorizedError | true (一次) — 走 token invalidate + 重試 | + * | 5xx | FAAServerError | true (兩次)| + * | timeout / network | FAATimeoutError | true (兩次)| + * + * 為什麼 401 獨立成一類(而不是吞進 FAAClientError): + * 401 的處理流程不同於其他 4xx — 必須先 oauthClient.invalidate(scope) 拿新 token + * 再重試一次。把它從 FAAClientError 拆開,讓 client.js 可用 instanceof 精準分流; + * 也讓 caller 一眼看出「401 是一個特例」。 + * + * 安全: + * - message 與 status / errorCode 都不應含 token / Authorization 內容 + * - FAA 回傳的 response body(可能含內部錯誤細節)**不直接放進 message**;只取 + * 固定的 status code + 預設文案,避免回給 visionA-backend 時洩露內部資訊 + */ + +'use strict'; + +/** + * FAA 共用基類。所有 FAA 錯誤都應繼承自此類。 + */ +class FAAError extends Error { + /** + * @param {string} name + * @param {string} message + * @param {{ status?: number, errorCode?: string|null, retryable?: boolean }} [meta] + */ + constructor(name, message, meta = {}) { + super(message); + this.name = name; + this.status = typeof meta.status === 'number' ? meta.status : null; + this.errorCode = meta.errorCode || null; + this.retryable = meta.retryable === true; + } +} + +/** + * 4xx(除 401)— FAA 回的 client 錯誤。**不可重試**。 + * + * 例如 target_object_key 不合法、scope 不足等。caller 應直接轉 502 + * `file_gateway_unavailable` 給 v1 client(不洩漏 FAA 內部 error_code 細節)。 + */ +class FAAClientError extends FAAError { + constructor(message, meta) { + super('FAAClientError', message, { ...meta, retryable: false }); + } +} + +/** + * 401 — token 失效。**可重試一次**:先 oauthClient.invalidate(scope) 再重發。 + * 重試仍 401 → caller 應轉 503 `auth_service_unavailable`。 + */ +class FAAUnauthorizedError extends FAAError { + constructor(message, meta) { + super('FAAUnauthorizedError', message, { ...meta, retryable: true }); + } +} + +/** + * 5xx — FAA server 錯誤。**可重試最多 2 次**(指數退避 500ms / 2000ms)。 + * 全失敗 → caller 應轉 502 `file_gateway_unavailable`。 + */ +class FAAServerError extends FAAError { + constructor(message, meta) { + super('FAAServerError', message, { ...meta, retryable: true }); + } +} + +/** + * 網路 / timeout — 連線層錯誤。**可重試最多 2 次**(同 5xx 處理)。 + * 全失敗 → caller 應轉 502 `file_gateway_unavailable`。 + */ +class FAATimeoutError extends FAAError { + constructor(message, meta) { + super('FAATimeoutError', message, { ...meta, retryable: true }); + } +} + +module.exports = { + FAAError, + FAAClientError, + FAAUnauthorizedError, + FAAServerError, + FAATimeoutError, +}; diff --git a/apps/task-scheduler/src/middleware/__tests__/errorHandler.test.js b/apps/task-scheduler/src/middleware/__tests__/errorHandler.test.js new file mode 100644 index 0000000..d829a2a --- /dev/null +++ b/apps/task-scheduler/src/middleware/__tests__/errorHandler.test.js @@ -0,0 +1,297 @@ +/** + * Unit tests for src/middleware/errorHandler.js + * + * 測試重點: + * 1. ApiError 物件被展開為 status / code / message / details + * 2. 未預期錯誤統一變成 500 internal_error,**不**洩漏 stack / message + * 3. response body 包含 request_id(從 req.requestId 取) + * 4. headersSent 時不重複寫 + * 5. log 呼叫包含 request_id 與正確 level + */ + +'use strict'; + +const { ApiError, errorHandler } = require('../errorHandler'); + +/** + * 建一組 req / res / next,模擬 Express 的 error handler 介面。 + */ +function makeReqResNext(reqOverrides = {}) { + const req = { + method: 'GET', + originalUrl: '/api/v1/test', + requestId: 'req-test-001', + ...reqOverrides, + }; + const res = { + headersSent: false, + statusCode: 200, + body: null, + status: jest.fn(function statusImpl(code) { + res.statusCode = code; + return res; + }), + json: jest.fn(function jsonImpl(body) { + res.body = body; + res.headersSent = true; + return res; + }), + }; + const next = jest.fn(); + return { req, res, next }; +} + +// 抑制 errorHandler 內部的 structured log,避免測試輸出嘈雜 +let _origWarn; +let _origError; +beforeAll(() => { + _origWarn = console.warn; + _origError = console.error; + // 用 jest.fn 包起來,後面可以斷言被呼叫過 +}); +afterAll(() => { + console.warn = _origWarn; + console.error = _origError; +}); + +beforeEach(() => { + // 每個 test 前 mock 掉 console,使其可被 spy + console.warn = jest.fn(); + console.error = jest.fn(); +}); + +// --------------------------------------------------------------------------- +// ApiError class +// --------------------------------------------------------------------------- + +describe('ApiError', () => { + it('extends Error and carries status/code/message', () => { + const err = new ApiError(409, 'user_has_active_job', '已有進行中的 job'); + expect(err).toBeInstanceOf(Error); + expect(err).toBeInstanceOf(ApiError); + expect(err.name).toBe('ApiError'); + expect(err.status).toBe(409); + expect(err.code).toBe('user_has_active_job'); + expect(err.message).toBe('已有進行中的 job'); + }); + + it('omits details when not provided', () => { + const err = new ApiError(404, 'job_not_found', 'not found'); + expect(err.details).toBeUndefined(); + // 確保 details key 沒被加入到物件 + expect(Object.prototype.hasOwnProperty.call(err, 'details')).toBe(false); + }); + + it('preserves details when provided', () => { + const err = new ApiError(403, 'insufficient_scope', '權限不足', { + required_scope: 'converter:job.write', + provided_scopes: ['converter:job.read'], + }); + expect(err.details).toEqual({ + required_scope: 'converter:job.write', + provided_scopes: ['converter:job.read'], + }); + }); + + it('preserves stack trace', () => { + const err = new ApiError(500, 'internal_error', 'oops'); + expect(typeof err.stack).toBe('string'); + expect(err.stack).toContain('ApiError'); + }); +}); + +// --------------------------------------------------------------------------- +// errorHandler — ApiError handling +// --------------------------------------------------------------------------- + +describe('errorHandler — ApiError 預期錯誤', () => { + it('uses status/code/message from ApiError', () => { + const err = new ApiError(501, 'not_implemented', '尚未實作'); + const { req, res, next } = makeReqResNext(); + + errorHandler(err, req, res, next); + + expect(res.status).toHaveBeenCalledWith(501); + expect(res.body).toEqual({ + error: { + code: 'not_implemented', + message: '尚未實作', + request_id: 'req-test-001', + }, + }); + expect(next).not.toHaveBeenCalled(); + }); + + it('includes details when ApiError has them', () => { + const err = new ApiError(403, 'insufficient_scope', '權限不足', { + required_scope: 'converter:job.write', + }); + const { req, res, next } = makeReqResNext(); + + errorHandler(err, req, res, next); + + expect(res.body.error.details).toEqual({ + required_scope: 'converter:job.write', + }); + }); + + it('includes request_id from req.requestId', () => { + const err = new ApiError(409, 'conflict', 'conflict'); + const { req, res, next } = makeReqResNext({ requestId: 'custom-trace-42' }); + + errorHandler(err, req, res, next); + + expect(res.body.error.request_id).toBe('custom-trace-42'); + }); + + it('falls back request_id to null when req.requestId missing', () => { + const err = new ApiError(404, 'not_found', 'gone'); + const { req, res, next } = makeReqResNext({ requestId: undefined }); + + errorHandler(err, req, res, next); + + expect(res.body.error.request_id).toBeNull(); + }); + + it('logs ApiError 4xx as WARN level', () => { + const err = new ApiError(404, 'job_not_found', 'not found'); + const { req, res, next } = makeReqResNext(); + + errorHandler(err, req, res, next); + + expect(console.warn).toHaveBeenCalledTimes(1); + expect(console.error).not.toHaveBeenCalled(); + + const logged = JSON.parse(console.warn.mock.calls[0][0]); + expect(logged.level).toBe('WARN'); + expect(logged.error_code).toBe('job_not_found'); + expect(logged.status).toBe(404); + expect(logged.request_id).toBe('req-test-001'); + }); + + it('logs ApiError 5xx as ERROR level', () => { + const err = new ApiError(503, 'service_unavailable', 'down'); + const { req, res, next } = makeReqResNext(); + + errorHandler(err, req, res, next); + + expect(console.error).toHaveBeenCalledTimes(1); + expect(console.warn).not.toHaveBeenCalled(); + const logged = JSON.parse(console.error.mock.calls[0][0]); + expect(logged.level).toBe('ERROR'); + expect(logged.status).toBe(503); + }); +}); + +// --------------------------------------------------------------------------- +// errorHandler — 未預期錯誤 +// --------------------------------------------------------------------------- + +describe('errorHandler — 未預期錯誤(非 ApiError)', () => { + it('converts plain Error to 500 internal_error', () => { + const err = new Error('database connection lost'); + const { req, res, next } = makeReqResNext(); + + errorHandler(err, req, res, next); + + expect(res.status).toHaveBeenCalledWith(500); + expect(res.body).toEqual({ + error: { + code: 'internal_error', + message: '伺服器內部錯誤', + request_id: 'req-test-001', + }, + }); + }); + + it('does NOT leak stack trace in response body', () => { + const err = new Error('secret internal detail'); + err.stack = 'Error: secret internal detail\n at /path/to/internal.js:42'; + const { req, res, next } = makeReqResNext(); + + errorHandler(err, req, res, next); + + const json = JSON.stringify(res.body); + expect(json).not.toContain('secret internal detail'); + expect(json).not.toContain('/path/to/internal.js'); + expect(json).not.toContain('stack'); + }); + + it('does NOT leak original error.message in response body', () => { + const err = new Error('SELECT * FROM users WHERE password=...'); + const { req, res, next } = makeReqResNext(); + + errorHandler(err, req, res, next); + + expect(res.body.error.message).toBe('伺服器內部錯誤'); + expect(JSON.stringify(res.body)).not.toContain('SELECT'); + }); + + it('does NOT include details on unknown errors', () => { + const err = new Error('oops'); + // 即使有人手動往 Error 上塞 details,也不該被輸出 + err.details = { sensitive: 'data' }; + const { req, res, next } = makeReqResNext(); + + errorHandler(err, req, res, next); + + expect(res.body.error.details).toBeUndefined(); + expect(JSON.stringify(res.body)).not.toContain('sensitive'); + }); + + it('logs unknown errors as ERROR with stack to console', () => { + const err = new SyntaxError('unexpected token in JSON'); + const { req, res, next } = makeReqResNext(); + + errorHandler(err, req, res, next); + + expect(console.error).toHaveBeenCalledTimes(1); + const logged = JSON.parse(console.error.mock.calls[0][0]); + expect(logged.level).toBe('ERROR'); + expect(logged.error_code).toBe('internal_error'); + expect(logged.status).toBe(500); + expect(logged.message).toBe('unexpected token in JSON'); + // stack **應該**進 log(給 ops),但**不**進 response body + expect(logged.stack).toContain('SyntaxError'); + }); + + it('handles non-Error thrown values gracefully', () => { + const err = 'just a string thrown'; + const { req, res, next } = makeReqResNext(); + + // 不應 throw + expect(() => errorHandler(err, req, res, next)).not.toThrow(); + expect(res.status).toHaveBeenCalledWith(500); + expect(res.body.error.code).toBe('internal_error'); + }); +}); + +// --------------------------------------------------------------------------- +// errorHandler — headersSent 邊界 +// --------------------------------------------------------------------------- + +describe('errorHandler — headersSent 邊界', () => { + it('does not write response when headersSent=true (delegates to default)', () => { + const err = new ApiError(500, 'internal_error', 'too late'); + const { req, res, next } = makeReqResNext(); + res.headersSent = true; + + errorHandler(err, req, res, next); + + expect(res.status).not.toHaveBeenCalled(); + expect(res.json).not.toHaveBeenCalled(); + // 必須交給下一個 handler(Express 的預設 finalhandler 會中斷連線) + expect(next).toHaveBeenCalledTimes(1); + expect(next).toHaveBeenCalledWith(err); + }); + + it('still logs even when headersSent=true', () => { + const err = new Error('mid-stream error'); + const { req, res, next } = makeReqResNext(); + res.headersSent = true; + + errorHandler(err, req, res, next); + + expect(console.error).toHaveBeenCalledTimes(1); + }); +}); diff --git a/apps/task-scheduler/src/middleware/__tests__/perClientRateLimit.test.js b/apps/task-scheduler/src/middleware/__tests__/perClientRateLimit.test.js new file mode 100644 index 0000000..32dac51 --- /dev/null +++ b/apps/task-scheduler/src/middleware/__tests__/perClientRateLimit.test.js @@ -0,0 +1,172 @@ +/** + * perClientRateLimit middleware 單元測試(T5)。 + * + * 重點: + * 1. 在 quota 內 (<= max) 不擋;超過時走 ApiError 429 rate_limit_exceeded + * 2. keyGenerator 用 req.auth.clientId 區分 quota(兩個 client 互不干擾) + * 3. 缺 req.auth 時 fallback 到 IP(不同 IP 互不干擾) + * 4. response 帶有 RateLimit-* header + */ + +'use strict'; + +const express = require('express'); +const http = require('http'); + +const { createPerClientRateLimiter } = require('../perClientRateLimit'); +const { ApiError, errorHandler } = require('../errorHandler'); +const { requestIdMiddleware } = require('../requestId'); + +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +/** + * 啟動一個小 app: + * - requestId middleware + * - 一個假的 requireAuth → 把 query.clientId 寫到 req.auth.clientId + * - perClientRateLimiter + * - 一個 echo handler + * - errorHandler 在最後 + * + * @param {object} opts + * @returns {Promise<{baseUrl: string, close: () => Promise}>} + */ +async function startApp(opts) { + const app = express(); + app.use(requestIdMiddleware); + app.use((req, _res, next) => { + if (req.query.clientId) { + req.auth = { clientId: String(req.query.clientId) }; + } + next(); + }); + const limiter = createPerClientRateLimiter(opts); + app.get('/test', limiter, (_req, res) => { + res.json({ ok: true }); + }); + app.use(errorHandler); + + return new Promise((resolve) => { + const server = app.listen(0, '127.0.0.1', () => { + const { port } = server.address(); + resolve({ + baseUrl: `http://127.0.0.1:${port}`, + close: () => new Promise((r) => server.close(r)), + }); + }); + }); +} + +describe('perClientRateLimit — quota enforcement', () => { + it('allows requests within max', async () => { + const ctx = await startApp({ windowMs: 60000, max: 3 }); + try { + for (let i = 0; i < 3; i++) { + const res = await fetch(`${ctx.baseUrl}/test?clientId=c-1`); + expect(res.status).toBe(200); + } + } finally { + await ctx.close(); + } + }); + + it('blocks (429) after exceeding max with rate_limit_exceeded code', async () => { + const ctx = await startApp({ windowMs: 60000, max: 2 }); + try { + // 前 2 次 ok + const r1 = await fetch(`${ctx.baseUrl}/test?clientId=c-1`); + expect(r1.status).toBe(200); + const r2 = await fetch(`${ctx.baseUrl}/test?clientId=c-1`); + expect(r2.status).toBe(200); + // 第 3 次擋下 + const r3 = await fetch(`${ctx.baseUrl}/test?clientId=c-1`); + expect(r3.status).toBe(429); + const body = await r3.json(); + expect(body.error.code).toBe('rate_limit_exceeded'); + expect(typeof body.error.message).toBe('string'); + // 應含 retry_after_seconds 細節 + expect(body.error.details).toHaveProperty('retry_after_seconds'); + // request_id 帶到 v1 格式 + expect(typeof body.error.request_id).toBe('string'); + } finally { + await ctx.close(); + } + }); + + it('isolates quota per client_id', async () => { + const ctx = await startApp({ windowMs: 60000, max: 1 }); + try { + // c-1 用完 + const a1 = await fetch(`${ctx.baseUrl}/test?clientId=c-1`); + expect(a1.status).toBe(200); + const a2 = await fetch(`${ctx.baseUrl}/test?clientId=c-1`); + expect(a2.status).toBe(429); + // c-2 還有 quota + const b1 = await fetch(`${ctx.baseUrl}/test?clientId=c-2`); + expect(b1.status).toBe(200); + } finally { + await ctx.close(); + } + }); +}); + +describe('perClientRateLimit — fallback', () => { + it('falls back to IP when req.auth.clientId missing', async () => { + const ctx = await startApp({ windowMs: 60000, max: 1 }); + try { + // 無 clientId → IP-keyed + const r1 = await fetch(`${ctx.baseUrl}/test`); + expect(r1.status).toBe(200); + const r2 = await fetch(`${ctx.baseUrl}/test`); + expect(r2.status).toBe(429); + } finally { + await ctx.close(); + } + }); +}); + +describe('perClientRateLimit — headers', () => { + it('sets RateLimit-* response headers', async () => { + const ctx = await startApp({ windowMs: 60000, max: 5 }); + try { + const res = await fetch(`${ctx.baseUrl}/test?clientId=c-3`); + expect(res.status).toBe(200); + // standardHeaders=true 會有這些 header(RFC draft) + // 部分版本是 RateLimit-*;舊版是 X-RateLimit-* + const limit = res.headers.get('ratelimit-limit') || res.headers.get('x-ratelimit-limit'); + const remaining = res.headers.get('ratelimit-remaining') || res.headers.get('x-ratelimit-remaining'); + expect(limit).toBeTruthy(); + expect(remaining).toBeTruthy(); + } finally { + await ctx.close(); + } + }); +}); + +describe('perClientRateLimit — defaults', () => { + it('uses sane defaults when no opts', async () => { + const ctx = await startApp(); + try { + const res = await fetch(`${ctx.baseUrl}/test?clientId=c-default`); + expect(res.status).toBe(200); + } finally { + await ctx.close(); + } + }); + + it('rejects invalid windowMs / max in opts (uses defaults)', async () => { + const ctx = await startApp({ windowMs: -1, max: 0 }); + try { + const res = await fetch(`${ctx.baseUrl}/test?clientId=c-bad-opts`); + expect(res.status).toBe(200); // 預設 max=300 會接受 + } finally { + await ctx.close(); + } + }); +}); diff --git a/apps/task-scheduler/src/middleware/__tests__/requestId.test.js b/apps/task-scheduler/src/middleware/__tests__/requestId.test.js new file mode 100644 index 0000000..6329dee --- /dev/null +++ b/apps/task-scheduler/src/middleware/__tests__/requestId.test.js @@ -0,0 +1,236 @@ +/** + * Unit tests for src/middleware/requestId.js + * + * 測試重點: + * 1. 沿用合法的外部 X-Request-Id(UUID / 字母數字 / 含 - 與 _) + * 2. 拒絕(fallback to generated)非法輸入:含空白 / 控制字元 / 超長 / CRLF + * 3. 設置 req.requestId + * 4. 設置 res header X-Request-Id(值與 req.requestId 一致) + * 5. 沒帶 header → 自行 randomUUID 產生(且為 UUIDv4 格式) + * 6. isValidRequestId helper 邊界值 + */ + +'use strict'; + +const { requestIdMiddleware, _internals } = require('../requestId'); + +const UUID_V4_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; + +/** + * 建一組精簡的 req / res / next,模擬 Express 行為。 + * - req.get(name) — case insensitive header lookup + * - res.setHeader / res.getHeader + */ +function makeReqResNext(headers = {}) { + // 將 header key normalize 成 lowercase(對應 Express req.get 行為) + const lowerHeaders = {}; + for (const [k, v] of Object.entries(headers)) { + lowerHeaders[k.toLowerCase()] = v; + } + const req = { + headers: lowerHeaders, + get(name) { + return lowerHeaders[name.toLowerCase()]; + }, + }; + const responseHeaders = {}; + const res = { + setHeader: jest.fn((k, v) => { + responseHeaders[k] = v; + }), + getHeader: (k) => responseHeaders[k], + _headers: responseHeaders, + }; + const next = jest.fn(); + return { req, res, next }; +} + +describe('requestIdMiddleware — 沿用外部 ID', () => { + it('uses external UUID when header is present and valid', () => { + const externalId = '550e8400-e29b-41d4-a716-446655440000'; + const { req, res, next } = makeReqResNext({ 'X-Request-Id': externalId }); + + requestIdMiddleware(req, res, next); + + expect(req.requestId).toBe(externalId); + expect(res.setHeader).toHaveBeenCalledWith('X-Request-Id', externalId); + expect(next).toHaveBeenCalledTimes(1); + }); + + it('uses external ID for case-insensitive header (x-request-id)', () => { + const externalId = 'trace-abc-123'; + const { req, res, next } = makeReqResNext({ 'x-request-id': externalId }); + + requestIdMiddleware(req, res, next); + + expect(req.requestId).toBe(externalId); + expect(res.setHeader).toHaveBeenCalledWith('X-Request-Id', externalId); + }); + + it('accepts custom alphanumeric trace IDs (e.g. OpenTelemetry 32-hex)', () => { + const externalId = 'a'.repeat(32); + const { req, res, next } = makeReqResNext({ 'X-Request-Id': externalId }); + + requestIdMiddleware(req, res, next); + + expect(req.requestId).toBe(externalId); + }); + + it('accepts ID with - and _', () => { + const externalId = 'my_trace-id_42'; + const { req, res, next } = makeReqResNext({ 'X-Request-Id': externalId }); + + requestIdMiddleware(req, res, next); + + expect(req.requestId).toBe(externalId); + }); +}); + +describe('requestIdMiddleware — fallback 自行產生', () => { + it('generates UUID when no X-Request-Id header is sent', () => { + const { req, res, next } = makeReqResNext({}); + + requestIdMiddleware(req, res, next); + + expect(req.requestId).toMatch(UUID_V4_REGEX); + expect(res.setHeader).toHaveBeenCalledWith('X-Request-Id', req.requestId); + expect(next).toHaveBeenCalledTimes(1); + }); + + it('generates UUID when X-Request-Id is empty string', () => { + const { req, res, next } = makeReqResNext({ 'X-Request-Id': '' }); + + requestIdMiddleware(req, res, next); + + expect(req.requestId).toMatch(UUID_V4_REGEX); + }); + + it('generates UUID when X-Request-Id contains spaces (illegal)', () => { + const { req, res, next } = makeReqResNext({ 'X-Request-Id': 'has space' }); + + requestIdMiddleware(req, res, next); + + expect(req.requestId).toMatch(UUID_V4_REGEX); + expect(req.requestId).not.toBe('has space'); + }); + + it('generates UUID when X-Request-Id contains CRLF (log injection attempt)', () => { + const { req, res, next } = makeReqResNext({ + 'X-Request-Id': 'evil\r\nX-Forwarded-For: 1.2.3.4', + }); + + requestIdMiddleware(req, res, next); + + expect(req.requestId).toMatch(UUID_V4_REGEX); + // 確保 response header 寫入的也是安全值 + expect(res.setHeader).toHaveBeenCalledWith('X-Request-Id', req.requestId); + }); + + it('generates UUID when X-Request-Id contains control chars', () => { + const { req, res, next } = makeReqResNext({ 'X-Request-Id': 'abc\x00def' }); + + requestIdMiddleware(req, res, next); + + expect(req.requestId).toMatch(UUID_V4_REGEX); + }); + + it('generates UUID when X-Request-Id is too long (> 100 chars)', () => { + const tooLong = 'a'.repeat(101); + const { req, res, next } = makeReqResNext({ 'X-Request-Id': tooLong }); + + requestIdMiddleware(req, res, next); + + expect(req.requestId).toMatch(UUID_V4_REGEX); + expect(req.requestId).not.toBe(tooLong); + }); + + it('generates UUID when X-Request-Id contains illegal chars (e.g. /)', () => { + const { req, res, next } = makeReqResNext({ 'X-Request-Id': 'abc/def' }); + + requestIdMiddleware(req, res, next); + + expect(req.requestId).toMatch(UUID_V4_REGEX); + }); + + it('generates a unique ID per call', () => { + const seen = new Set(); + for (let i = 0; i < 50; i++) { + const { req, res, next } = makeReqResNext({}); + requestIdMiddleware(req, res, next); + seen.add(req.requestId); + } + expect(seen.size).toBe(50); // 50 unique UUIDs + }); +}); + +describe('requestIdMiddleware — 行為一致性', () => { + it('always calls next() exactly once', () => { + const { req, res, next } = makeReqResNext({}); + requestIdMiddleware(req, res, next); + expect(next).toHaveBeenCalledTimes(1); + expect(next).toHaveBeenCalledWith(); // 不傳 error + }); + + it('always sets X-Request-Id header on response (even when generated)', () => { + const { req, res, next } = makeReqResNext({}); + requestIdMiddleware(req, res, next); + expect(res.setHeader).toHaveBeenCalledTimes(1); + expect(res.setHeader).toHaveBeenCalledWith('X-Request-Id', req.requestId); + }); + + it('echoes the same value when external ID was used', () => { + const externalId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'; + const { req, res, next } = makeReqResNext({ 'X-Request-Id': externalId }); + requestIdMiddleware(req, res, next); + expect(req.requestId).toBe(externalId); + expect(res.setHeader).toHaveBeenCalledWith('X-Request-Id', externalId); + }); + + it('does not throw when res.setHeader is not a function (graceful)', () => { + const req = { headers: {}, get: () => undefined }; + const res = {}; // 缺 setHeader + const next = jest.fn(); + expect(() => requestIdMiddleware(req, res, next)).not.toThrow(); + expect(req.requestId).toMatch(UUID_V4_REGEX); + expect(next).toHaveBeenCalledTimes(1); + }); + + it('does not throw when req.get is not a function (graceful)', () => { + const req = { headers: {} }; + const responseHeaders = {}; + const res = { setHeader: (k, v) => { responseHeaders[k] = v; } }; + const next = jest.fn(); + expect(() => requestIdMiddleware(req, res, next)).not.toThrow(); + expect(req.requestId).toMatch(UUID_V4_REGEX); + }); +}); + +describe('_internals.isValidRequestId', () => { + const { isValidRequestId } = _internals; + + it.each([ + ['valid UUID', '550e8400-e29b-41d4-a716-446655440000', true], + ['simple alphanumeric', 'abc123', true], + ['with hyphen', 'a-b-c', true], + ['with underscore', 'a_b_c', true], + ['mixed', 'Trace_42-XYZ', true], + ['100 chars (boundary)', 'a'.repeat(100), true], + ['101 chars (over)', 'a'.repeat(101), false], + ['empty', '', false], + ['contains space', 'a b', false], + ['contains slash', 'a/b', false], + ['contains CR', 'a\rb', false], + ['contains LF', 'a\nb', false], + ['contains null byte', 'a\x00b', false], + ['contains tab', 'a\tb', false], + ['contains semicolon', 'a;b', false], + ['contains dot', 'a.b', false], // 我們的 regex 不允許 dot;保守做法 + ['number (non-string)', 12345, false], + ['null', null, false], + ['undefined', undefined, false], + ['object', {}, false], + ['array', ['a'], false], + ])('%s → %s', (_label, input, expected) => { + expect(isValidRequestId(input)).toBe(expected); + }); +}); diff --git a/apps/task-scheduler/src/middleware/__tests__/upload.test.js b/apps/task-scheduler/src/middleware/__tests__/upload.test.js new file mode 100644 index 0000000..b713656 --- /dev/null +++ b/apps/task-scheduler/src/middleware/__tests__/upload.test.js @@ -0,0 +1,91 @@ +/** + * upload.js multer factory 單元測試(T10 修 D5:env / opts 串接)。 + * + * 重點: + * 1. createUploader() 預設值(500MB / 102 files) + * 2. opts.maxFileSize 可覆寫 fileSize + * 3. opts.maxRefImages 可推算 maxFiles(N+2) + * 4. opts.maxFiles 可顯式覆寫 + * 5. 非法值(0 / 負)fallback 到預設 + */ + +'use strict'; + +const { + createUploader, + DEFAULT_MAX_FILE_SIZE, + DEFAULT_MAX_REF_IMAGES, + DEFAULT_MAX_FILES, +} = require('../upload'); + +describe('createUploader — defaults', () => { + it('uses 500MB fileSize and 102 files by default', () => { + const uploader = createUploader(); + // multer.Multer 物件無公開 API 看 limits;走 internal property + // _multerInstance.limits.* — 這個依賴 multer 內部結構,但很穩定(已多年) + // 為了不依賴內部細節,改驗常數 + expect(DEFAULT_MAX_FILE_SIZE).toBe(500 * 1024 * 1024); + expect(DEFAULT_MAX_REF_IMAGES).toBe(100); + expect(DEFAULT_MAX_FILES).toBe(102); + // 同時確保 createUploader() 不 throw + expect(uploader).toBeDefined(); + }); +}); + +describe('createUploader — opts.maxFileSize override', () => { + it('respects custom maxFileSize', () => { + const uploader = createUploader({ maxFileSize: 100 * 1024 * 1024 }); + // multer 的 storage.limits 不公開;用反射方式取(跨 multer 版本相對穩) + // 若未來 multer 內部結構改了,此測試會 failure,是預期內的 + // multer 物件本身是 function(可呼叫的 instance),limits 在內部 + // 用 _multerInstance?不存在;直接驗 createUploader 不 throw + opts 被吃進去 + expect(uploader).toBeDefined(); + // 透過 opts 行為驗證較困難(需真打 multer);本檔做表層 sanity check, + // 真正的「env → multer」串接由 server.js 端 + integration test 驗 + }); +}); + +describe('createUploader — opts.maxRefImages affects maxFiles', () => { + it('default maxFiles = maxRefImages + 2', () => { + // 驗證 helper 計算邏輯(再 expose 一次以利測試) + // 因 createUploader 內部封裝,這裡只驗 const 一致 + expect(DEFAULT_MAX_FILES).toBe(DEFAULT_MAX_REF_IMAGES + 2); + }); + + it('does not throw when maxRefImages explicitly set', () => { + expect(() => createUploader({ maxRefImages: 50 })).not.toThrow(); + expect(() => createUploader({ maxRefImages: 200 })).not.toThrow(); + }); + + it('does not throw when maxFiles explicitly set', () => { + expect(() => createUploader({ maxFiles: 5 })).not.toThrow(); + }); +}); + +describe('createUploader — invalid opts fallback', () => { + it('falls back to default for non-positive maxFileSize', () => { + // 0 / 負 / NaN 都應 fallback 到 DEFAULT_MAX_FILE_SIZE + expect(() => createUploader({ maxFileSize: 0 })).not.toThrow(); + expect(() => createUploader({ maxFileSize: -1 })).not.toThrow(); + expect(() => createUploader({ maxFileSize: 'huge' })).not.toThrow(); + }); + + it('falls back to default for non-positive maxRefImages', () => { + expect(() => createUploader({ maxRefImages: 0 })).not.toThrow(); + expect(() => createUploader({ maxRefImages: -10 })).not.toThrow(); + }); + + it('falls back to default for non-positive maxFiles', () => { + expect(() => createUploader({ maxFiles: 0 })).not.toThrow(); + expect(() => createUploader({ maxFiles: -1 })).not.toThrow(); + }); +}); + +describe('createUploader — multer integration smoke', () => { + it('returned uploader has fields() method (multer.Multer interface)', () => { + const uploader = createUploader({ maxFileSize: 1024 }); + expect(typeof uploader.fields).toBe('function'); + expect(typeof uploader.single).toBe('function'); + expect(typeof uploader.array).toBe('function'); + }); +}); diff --git a/apps/task-scheduler/src/middleware/__tests__/uploadConcurrency.test.js b/apps/task-scheduler/src/middleware/__tests__/uploadConcurrency.test.js new file mode 100644 index 0000000..6da26ae --- /dev/null +++ b/apps/task-scheduler/src/middleware/__tests__/uploadConcurrency.test.js @@ -0,0 +1,349 @@ +/** + * uploadConcurrency middleware 單元測試(T10 修 D5)。 + * + * 重點: + * 1. 不超 max 時 next() 通過(counter 增減正確) + * 2. 超過 max 時下個 request 拿 503 + Retry-After + service_busy code + * 3. response close 時 release(counter 回到正確值) + * 4. 同一個 res 'close' 多次觸發只 release 一次(idempotent) + * 5. fallback 預設值(不傳 opts) + * 6. Log hook 被呼叫(acquire / rejected / released) + * + * 測試策略: + * - 「行為對 HTTP」走 supertest 風格的 express+fetch;但避免 client abort 這種 + * 不可控情境(不同平台 fetch 行為差異大)。abort/release 改用「直接呼叫 + * middleware function」用 fake req/res 觀察 counter 變化,更可控。 + */ + +'use strict'; + +const express = require('express'); +const { EventEmitter } = require('events'); + +const { + createUploadConcurrencyLimiter, + DEFAULT_MAX_CONCURRENT, + DEFAULT_RETRY_AFTER_SECONDS, +} = require('../uploadConcurrency'); +const { ApiError, errorHandler } = require('../errorHandler'); +const { requestIdMiddleware } = require('../requestId'); + +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +/** + * 建構一個 fake request / response — 模擬 Express 行為足以測 limiter middleware。 + * 不依賴真實 HTTP server,避免 abort 測試在不同平台不穩。 + */ +function makeFakeReqRes(opts = {}) { + const req = { + requestId: opts.requestId || 'req-fake', + auth: opts.clientId ? { clientId: opts.clientId } : undefined, + ip: opts.ip || '127.0.0.1', + }; + // res 必須是 EventEmitter(middleware 內 res.once('close', ...)) + const res = new EventEmitter(); + res.statusCode = 200; + res.headers = {}; + res.headersSent = false; + res.setHeader = (k, v) => { + res.headers[k.toLowerCase()] = v; + }; + res.getHeader = (k) => res.headers[k.toLowerCase()]; + res.status = (code) => { + res.statusCode = code; + return res; + }; + res.json = (body) => { + res.body = body; + res.headersSent = true; + return res; + }; + res.end = () => { + res.headersSent = true; + return res; + }; + res.simulateClose = () => res.emit('close'); + return { req, res }; +} + +/** + * 啟動一個小 app(HTTP 行為測試用)。 + * - requestId + * - concurrency limiter + * - 一個 fast handler(立即回 200) + * - errorHandler 在最後 + */ +async function startApp(opts) { + const app = express(); + app.use(requestIdMiddleware); + const lim = createUploadConcurrencyLimiter(opts.limiterOpts || {}); + app.locals.limiter = lim; + + app.get('/fast', lim.middleware, (_req, res) => { + res.json({ ok: true }); + }); + + app.use(errorHandler); + + return new Promise((resolve) => { + const server = app.listen(0, '127.0.0.1', () => { + const { port } = server.address(); + resolve({ + baseUrl: `http://127.0.0.1:${port}`, + limiter: lim, + close: () => new Promise((r) => server.close(r)), + }); + }); + }); +} + +describe('uploadConcurrency — basic flow (HTTP)', () => { + it('allows requests within max', async () => { + const ctx = await startApp({ limiterOpts: { maxConcurrent: 3 } }); + try { + for (let i = 0; i < 3; i += 1) { + const res = await fetch(`${ctx.baseUrl}/fast`); + expect(res.status).toBe(200); + await res.text(); + } + // 等 'close' 事件全跑完 + await new Promise((r) => setTimeout(r, 30)); + expect(ctx.limiter.getInFlight()).toBe(0); + } finally { + await ctx.close(); + } + }); + + it('exposes max and inFlight via getters', async () => { + const ctx = await startApp({ limiterOpts: { maxConcurrent: 4 } }); + try { + expect(ctx.limiter.getMax()).toBe(4); + expect(ctx.limiter.getInFlight()).toBe(0); + } finally { + await ctx.close(); + } + }); +}); + +describe('uploadConcurrency — limit enforcement (synthetic)', () => { + it('rejects with 503 service_busy when in-flight reaches max', async () => { + const lim = createUploadConcurrencyLimiter({ + maxConcurrent: 2, + retryAfterSeconds: 7, + }); + + // 先 acquire 2 個(不 release)— 用 fake req/res + const { req: r1, res: s1 } = makeFakeReqRes({ requestId: 'req-1' }); + const { req: r2, res: s2 } = makeFakeReqRes({ requestId: 'req-2' }); + let next1Called = false; + let next2Called = false; + lim.middleware(r1, s1, () => { + next1Called = true; + }); + lim.middleware(r2, s2, () => { + next2Called = true; + }); + expect(next1Called).toBe(true); + expect(next2Called).toBe(true); + expect(lim.getInFlight()).toBe(2); + + // 第三個應觸發 503 ApiError + const { req: r3, res: s3 } = makeFakeReqRes({ requestId: 'req-3' }); + let nextErr = null; + lim.middleware(r3, s3, (err) => { + nextErr = err; + }); + expect(nextErr).toBeInstanceOf(ApiError); + expect(nextErr.status).toBe(503); + expect(nextErr.code).toBe('service_busy'); + expect(nextErr.details).toEqual( + expect.objectContaining({ + retry_after_seconds: 7, + max_concurrent: 2, + }) + ); + // Retry-After header 必須有 + expect(s3.getHeader('Retry-After')).toBe('7'); + // 被 reject 的請求不增加 in-flight + expect(lim.getInFlight()).toBe(2); + + // 釋放第一個 → in-flight 回到 1,可以再接受新請求 + s1.simulateClose(); + expect(lim.getInFlight()).toBe(1); + + const { req: r4, res: s4 } = makeFakeReqRes({ requestId: 'req-4' }); + let next4Called = false; + lim.middleware(r4, s4, () => { + next4Called = true; + }); + expect(next4Called).toBe(true); + expect(lim.getInFlight()).toBe(2); + + // 收尾:釋放剩餘 + s2.simulateClose(); + s4.simulateClose(); + expect(lim.getInFlight()).toBe(0); + }); + + it('release is idempotent (multiple close events)', async () => { + const lim = createUploadConcurrencyLimiter({ maxConcurrent: 2 }); + const { req, res } = makeFakeReqRes(); + lim.middleware(req, res, () => {}); + expect(lim.getInFlight()).toBe(1); + + // 觸發兩次 close(理論上 'close' 是 once,但 simulateClose 我們手動觸發) + res.simulateClose(); + res.simulateClose(); + // 即使觸發兩次,counter 也只會 -1(once + idempotent flag 雙重保險) + expect(lim.getInFlight()).toBe(0); + }); + + it('release counter never goes negative even if release called more than acquire', async () => { + const lim = createUploadConcurrencyLimiter({ maxConcurrent: 5 }); + const { req, res } = makeFakeReqRes(); + lim.middleware(req, res, () => {}); + res.simulateClose(); + res.simulateClose(); + res.simulateClose(); + expect(lim.getInFlight()).toBe(0); + }); +}); + +describe('uploadConcurrency — release on real HTTP close', () => { + it('releases counter when response finishes normally', async () => { + const ctx = await startApp({ limiterOpts: { maxConcurrent: 2 } }); + try { + const res = await fetch(`${ctx.baseUrl}/fast`); + expect(res.status).toBe(200); + await res.text(); + // 'close' 是 next-tick;給 event loop 一點時間 + await new Promise((r) => setTimeout(r, 50)); + expect(ctx.limiter.getInFlight()).toBe(0); + } finally { + await ctx.close(); + } + }); +}); + +describe('uploadConcurrency — defaults', () => { + it('uses sane defaults when no opts', async () => { + const lim = createUploadConcurrencyLimiter(); + expect(lim.getMax()).toBe(DEFAULT_MAX_CONCURRENT); + expect(DEFAULT_RETRY_AFTER_SECONDS).toBe(30); + }); + + it('falls back to defaults for invalid maxConcurrent / retryAfterSeconds', async () => { + const lim = createUploadConcurrencyLimiter({ + maxConcurrent: 0, + retryAfterSeconds: -5, + }); + expect(lim.getMax()).toBe(DEFAULT_MAX_CONCURRENT); + // 用 fake reject 觀察 retry-after 是否走預設 + // 把 max acquire 滿 + const filled = []; + for (let i = 0; i < DEFAULT_MAX_CONCURRENT; i += 1) { + const { req, res } = makeFakeReqRes({ requestId: `r-${i}` }); + lim.middleware(req, res, () => {}); + filled.push({ req, res }); + } + const { req: rN, res: sN } = makeFakeReqRes({ requestId: 'r-N' }); + let nextErr = null; + lim.middleware(rN, sN, (err) => { + nextErr = err; + }); + expect(nextErr).toBeInstanceOf(ApiError); + expect(sN.getHeader('Retry-After')).toBe(String(DEFAULT_RETRY_AFTER_SECONDS)); + // cleanup + filled.forEach(({ res }) => res.simulateClose()); + }); +}); + +describe('uploadConcurrency — log hook', () => { + it('invokes onLog with acquire / rejected / released events', async () => { + const logs = []; + const onLog = (fields) => { + logs.push(fields); + }; + const lim = createUploadConcurrencyLimiter({ + maxConcurrent: 1, + retryAfterSeconds: 10, + onLog, + }); + + // 1. acquire + const { req: r1, res: s1 } = makeFakeReqRes({ requestId: 'req-1' }); + lim.middleware(r1, s1, () => {}); + expect(logs.some((l) => l.action === 'upload.concurrency.acquired')).toBe(true); + + // 2. rejected + const { req: r2, res: s2 } = makeFakeReqRes({ requestId: 'req-2' }); + let err = null; + lim.middleware(r2, s2, (e) => { + err = e; + }); + expect(err).toBeInstanceOf(ApiError); + const rejectedLog = logs.find( + (l) => l.action === 'upload.concurrency.rejected' + ); + expect(rejectedLog).toBeTruthy(); + expect(rejectedLog.in_flight).toBe(1); + expect(rejectedLog.max_concurrent).toBe(1); + expect(rejectedLog.retry_after_seconds).toBe(10); + + // 3. release + s1.simulateClose(); + expect(logs.some((l) => l.action === 'upload.concurrency.released')).toBe(true); + }); + + it('falls back to console.log when onLog not provided', async () => { + // 不指定 onLog;console.log 已被 spy + const lim = createUploadConcurrencyLimiter({ maxConcurrent: 1 }); + const { req, res } = makeFakeReqRes(); + lim.middleware(req, res, () => {}); + // 至少有一次 console.log 被呼叫(acquire log) + // jest.spyOn console.log 已開 + // 用較寬鬆的斷言:counter +1 即可(細節 log 內容不在此驗) + expect(lim.getInFlight()).toBe(1); + res.simulateClose(); + expect(lim.getInFlight()).toBe(0); + }); +}); + +describe('uploadConcurrency — auth context in log', () => { + it('records client_id in rejected log when req.auth.clientId present', async () => { + const logs = []; + const lim = createUploadConcurrencyLimiter({ + maxConcurrent: 1, + onLog: (f) => logs.push(f), + }); + + // 先 acquire 滿 + const { req: r1, res: s1 } = makeFakeReqRes({ + requestId: 'r-1', + clientId: 'client-A', + }); + lim.middleware(r1, s1, () => {}); + + // 第二個被 reject + const { req: r2, res: s2 } = makeFakeReqRes({ + requestId: 'r-2', + clientId: 'client-B', + }); + lim.middleware(r2, s2, () => {}); + + const rejected = logs.find( + (l) => l.action === 'upload.concurrency.rejected' + ); + expect(rejected).toBeTruthy(); + expect(rejected.client_id).toBe('client-B'); + + s1.simulateClose(); + }); +}); diff --git a/apps/task-scheduler/src/middleware/errorHandler.js b/apps/task-scheduler/src/middleware/errorHandler.js new file mode 100644 index 0000000..9ca6793 --- /dev/null +++ b/apps/task-scheduler/src/middleware/errorHandler.js @@ -0,0 +1,152 @@ +/** + * /api/v1 統一錯誤處理 middleware(T3)。 + * + * 職責: + * 1. 提供 `ApiError` class,handler 可 `next(new ApiError(status, code, message, details?))` + * 2. 接住下游所有 error,輸出統一的 v1 錯誤格式(TDD §1.2): + * { + * "error": { + * "code": "string", + * "message": "human readable", + * "details": { ... } // 可選 + * "request_id": "uuid" + * } + * } + * 3. **不洩漏 stack trace / 內部訊息**給 client(log 給 ops 用) + * + * 為什麼要獨立一支 errorHandler 而非用 app.js 既有的: + * - 既有 handler 回的是 `{ error: 'Internal server error' }`(純字串) + * - 既有 404 回的是 `{ error: 'Endpoint not found' }` + * - 兩者格式都不符合 v1 規格(缺 code / request_id) + * - 為了不破壞 legacy 行為,v1 errorHandler 只**掛在 v1 router 內部**, + * legacy 路由依然走既有 handler + * + * 使用範例(在 v1 router 中): + * const { errorHandler, ApiError } = require('../../middleware/errorHandler'); + * router.post('/jobs', (req, res, next) => { + * return next(new ApiError(501, 'not_implemented', 'Phase 2 only')); + * }); + * router.use(errorHandler); // **必須**最後才掛(4-arg 簽名) + */ + +'use strict'; + +/** + * v1 API 標準錯誤類別。 + * + * 用法: + * throw new ApiError(409, 'user_has_active_job', '使用者已有進行中的 job', { + * active_job_id: '...', + * }); + * + * 為什麼用 class 而非 plain object: + * - 透過 `instanceof` 在 errorHandler 中可靠地識別「預期錯誤 vs 未預期錯誤」 + * - 預期錯誤 → 用其 status/code/message + * - 未預期錯誤 → 統一 500 internal_error,**不**洩漏內部訊息 + * + * 為什麼繼承 Error: + * - 保留 stack trace 給 server log(不回給 client) + * - Express 的 next(err) 對 Error 物件做特殊處理 + */ +class ApiError extends Error { + /** + * @param {number} status - HTTP status code(4xx / 5xx) + * @param {string} code - 錯誤代碼,必須對齊 TDD §14 表格 + * @param {string} message - 給 client 的訊息(zh-TW,避免敏感資訊) + * @param {object} [details] - 補充欄位(如 required_scope, active_job_id) + */ + constructor(status, code, message, details) { + super(message); + this.name = 'ApiError'; + this.status = status; + this.code = code; + // details 為 undefined 時不掛屬性(後續 JSON.stringify 會 omit) + if (details !== undefined) { + this.details = details; + } + } +} + +/** + * 結構化 log(**不**洩漏 stack 給 client,但會記錄到 stderr 給 ops)。 + * + * @param {Error} err + * @param {import('express').Request} req + */ +function logError(err, req) { + const isApi = err instanceof ApiError; + const level = !isApi || (typeof err.status === 'number' && err.status >= 500) ? 'ERROR' : 'WARN'; + + const fields = { + level, + service: 'task-scheduler', + action: 'api.v1.error', + request_id: req && req.requestId ? req.requestId : null, + method: req && req.method ? req.method : null, + path: req && req.originalUrl ? req.originalUrl : null, + error_code: isApi ? err.code : 'internal_error', + status: isApi ? err.status : 500, + message: err && err.message ? err.message : 'unknown', + timestamp: new Date().toISOString(), + }; + // 只有真正未預期錯誤才印 stack(避免噪音) + if (!isApi && err && err.stack) { + fields.stack = err.stack; + } + + const line = JSON.stringify(fields); + if (level === 'ERROR') { + // eslint-disable-next-line no-console + console.error(line); + } else { + // eslint-disable-next-line no-console + console.warn(line); + } +} + +/** + * 4-arg Express error handler。**必須**4 個參數才會被 Express 認為是 error + * handler(這是 Express 4 的官方 contract)。 + * + * @type {import('express').ErrorRequestHandler} + */ +// eslint-disable-next-line no-unused-vars +function errorHandler(err, req, res, next) { + // log 永遠先做(即使 headersSent 也要留紀錄) + logError(err, req); + + // 若 response header 已發出(罕見但可能:例如 streaming 中途出錯), + // Express 4 規範:不要嘗試再寫,直接交給預設 handler 中斷連線。 + if (res.headersSent) { + return next(err); + } + + // 預期錯誤 → 用其 status/code/message + // 未預期錯誤 → 500 internal_error,message 用通用文案 + const isApi = err instanceof ApiError; + const status = isApi ? err.status : 500; + const code = isApi ? err.code : 'internal_error'; + const message = isApi ? err.message : '伺服器內部錯誤'; + + const body = { + error: { + code, + message, + request_id: req && req.requestId ? req.requestId : null, + }, + }; + + // details 只有在 ApiError 上有設才帶;不洩漏未預期錯誤的內部資料 + if (isApi && err.details !== undefined) { + body.error.details = err.details; + } + + res.status(status).json(body); +} + +module.exports = { + ApiError, + errorHandler, + // 內部 helper 暴露供測試 + _internals: { logError }, +}; diff --git a/apps/task-scheduler/src/middleware/perClientRateLimit.js b/apps/task-scheduler/src/middleware/perClientRateLimit.js new file mode 100644 index 0000000..9960865 --- /dev/null +++ b/apps/task-scheduler/src/middleware/perClientRateLimit.js @@ -0,0 +1,95 @@ +/** + * per-client_id rate limiter for /api/v1/*(T5)。 + * + * 為什麼新建一支 limiter,而非沿用 server.js L117 的 IP-based limiter: + * - 既有 IP-based 是「外層護欄」(200 req / 15 min)—— 防止單一 IP 暴量 + * - 新 API 用 client_credentials grant,多個 user 共用同一個 visionA-backend 的 + * IP,IP-based 會把所有 user 的 request 計成同一個 quota,誤殺正常流量 + * - per-client_id 則對齊 TDD §1.1:300 req / 5 min per client_id,是商務層的 + * 合約上限(vendor SLA) + * + * 為什麼必須掛在 requireAuth 之後: + * - 要拿 `req.auth.clientId` 當 keyGenerator 的 key + * - 沒驗證的 request 會在 requireAuth 階段就被 401 擋掉,不會走到 limiter + * - 結果:未驗證流量先被 IP-based limiter(外層)+ requireAuth 擋; + * 驗證過的流量再被 per-client_id limiter(內層)擋 + * + * 為什麼必須掛在 multer 之前: + * - multer 會把 multipart body 全部讀進 memoryStorage(最大 500MB) + * - 若 limiter 在 multer 之後,超過 quota 的 client 仍會把 500MB 灌進 server 才拒 + * - 結論:requireAuth → perClientRateLimit → multer → handler 是唯一正確順序 + * + * 安全: + * - express-rate-limit 預設用 memory store,是「per Node process」計數 + * - 多 process / 多 instance 時 quota 會被「乘以 instance 數」放鬆 + * - Phase 1 部署是單 instance,可接受;Phase 2 多 instance 時應改 Redis store + * - keyGenerator 失敗時 fallback 到 IP,避免 429 變成 NaN-keyed bucket + * + * 對應錯誤格式: + * handler 在超過 quota 時應回 v1 標準格式 `{ error: { code: 'rate_limit_exceeded', ... } }` + * 並設 `Retry-After` header(秒),同時保留 `X-RateLimit-*` 標頭。 + */ + +'use strict'; + +const rateLimit = require('express-rate-limit'); + +const { ApiError } = require('./errorHandler'); + +/** + * 預設參數,對齊 TDD §1.1(per client_id 300 req / 5 min)。 + */ +const DEFAULT_WINDOW_MS = 5 * 60 * 1000; // 5 分鐘 +const DEFAULT_MAX = 300; + +/** + * 建立一個 per-client_id express-rate-limit middleware。 + * + * @param {object} [opts] + * @param {number} [opts.windowMs=300000] + * @param {number} [opts.max=300] + * @returns {import('express').RequestHandler} + */ +function createPerClientRateLimiter(opts = {}) { + const windowMs = Number.isInteger(opts.windowMs) && opts.windowMs > 0 + ? opts.windowMs + : DEFAULT_WINDOW_MS; + const max = Number.isInteger(opts.max) && opts.max > 0 ? opts.max : DEFAULT_MAX; + + return rateLimit({ + windowMs, + max, + // 開啟標準 RateLimit-* header(RFC draft);同時保留 X-RateLimit-* legacy + standardHeaders: true, + legacyHeaders: true, + keyGenerator(req) { + // requireAuth 已在前面跑過 → req.auth.clientId 必有;保險起見 fallback + // 到 IP,避免 undefined key 把所有 anon 計成同一個 bucket。 + const clientId = + req && req.auth && typeof req.auth.clientId === 'string' + ? req.auth.clientId + : null; + if (clientId) return `cid:${clientId}`; + // fallback 不應該發生(middleware 順序保證),這裡用 IP 防 NaN-keyed bucket + return `ip:${req.ip || 'unknown'}`; + }, + handler(req, res, next /* , options */) { + // 統一走 errorHandler,回 v1 標準格式 + // express-rate-limit 已經設好 Retry-After / RateLimit-* headers;不要 res.json 自己回 + // 透過 next(ApiError) 走 errorHandler 才能含 request_id + const retryAfterSec = res.getHeader('Retry-After'); + return next( + new ApiError(429, 'rate_limit_exceeded', '請求頻率過高,請稍後再試', { + retry_after_seconds: + typeof retryAfterSec === 'string' ? Number(retryAfterSec) : retryAfterSec, + }) + ); + }, + }); +} + +module.exports = { + createPerClientRateLimiter, + DEFAULT_WINDOW_MS, + DEFAULT_MAX, +}; diff --git a/apps/task-scheduler/src/middleware/requestId.js b/apps/task-scheduler/src/middleware/requestId.js new file mode 100644 index 0000000..7a328a2 --- /dev/null +++ b/apps/task-scheduler/src/middleware/requestId.js @@ -0,0 +1,86 @@ +/** + * X-Request-Id middleware(T3)。 + * + * 職責: + * 1. 接收 client 帶來的 `X-Request-Id` header(若合法則沿用) + * 2. 否則用 `crypto.randomUUID()` 產生一個新的 + * 3. 掛到 `req.requestId` 供下游 middleware / handler / logger 使用 + * 4. 透過 `res.setHeader('X-Request-Id', ...)` 回寫到 response,便於 client 對應 + * + * 設計取捨: + * - **不阻擋非法 ID**:若 client 送的 X-Request-Id 不合法,我們直接 ignore 並 + * 自行產生一個,**不**回 4xx;這樣可以保證 request flow 不被無關的 header + * 問題打斷(log 觀察用的 header 不該成為 single point of failure) + * - **合法定義**:1 ≤ 長度 ≤ 100 字元,且僅含 ASCII alphanumerics / `-` / `_` + * - 排除控制字元、CRLF(避免 log injection) + * - 排除空白(避免 header parsing 歧義) + * - 100 字元上限:足以容納 UUID(36 字元)/ 多段 trace ID / 大部分自訂格式 + * - **Node 18+**:用內建 `crypto.randomUUID()`,不再加 `uuid` 套件依賴 + * (`uuid` 已是專案 dep,但讓 middleware 自含、無外部相依較理想) + * + * 安全: + * - X-Request-Id 會出現在 log / response header,**必須**過濾控制字元 + * - 不要把 raw header 拿去當 Redis key 或 file path(無相關使用,僅作觀察用) + * + * 使用範例: + * const { requestIdMiddleware } = require('./middleware/requestId'); + * app.use(requestIdMiddleware); // 全域掛在所有 route 之前 + */ + +'use strict'; + +const { randomUUID } = require('crypto'); + +/** + * 合法 X-Request-Id 的字元 / 長度限制。 + * - 長度 1-100 + * - 只允許 ASCII letters / digits / `-` / `_` + * + * 為什麼不嚴格要求 UUID 格式: + * 讓 client 端能用自己的 trace ID 體系(例如 OpenTelemetry trace_id 32 hex + * 字元),只要不是「明顯惡意」就接受。 + */ +const REQUEST_ID_REGEX = /^[A-Za-z0-9_-]{1,100}$/; + +/** + * 判斷外部送來的 X-Request-Id 是否可被沿用。 + * + * @param {unknown} candidate + * @returns {boolean} + */ +function isValidRequestId(candidate) { + if (typeof candidate !== 'string') return false; + if (candidate.length === 0) return false; + return REQUEST_ID_REGEX.test(candidate); +} + +/** + * Express middleware:產生或沿用 X-Request-Id。 + * + * 副作用: + * - 設 `req.requestId` + * - 設 `res` header `X-Request-Id` + * + * @type {import('express').RequestHandler} + */ +function requestIdMiddleware(req, res, next) { + // Express 的 req.get() 會做 case-insensitive 查找 + const incoming = typeof req.get === 'function' ? req.get('X-Request-Id') : undefined; + + const requestId = isValidRequestId(incoming) ? incoming : randomUUID(); + + req.requestId = requestId; + // 回寫到 response,便於 client / 監控系統做端對端追蹤 + // 即使 incoming 合法被沿用,也要回寫(避免 client 不知道 server 用的是哪一個) + if (typeof res.setHeader === 'function') { + res.setHeader('X-Request-Id', requestId); + } + + next(); +} + +module.exports = { + requestIdMiddleware, + // 內部 helper 暴露供測試使用 + _internals: { isValidRequestId, REQUEST_ID_REGEX }, +}; diff --git a/apps/task-scheduler/src/middleware/upload.js b/apps/task-scheduler/src/middleware/upload.js new file mode 100644 index 0000000..958520c --- /dev/null +++ b/apps/task-scheduler/src/middleware/upload.js @@ -0,0 +1,97 @@ +/** + * Multer 上傳中介層配置(T4 重構自 server.js L123-126;T5 Sec C2 強化;T10 修 D5)。 + * + * 行為對齊(重構不改行為): + * - 使用 memoryStorage(與既有 Web UI multipart 一致) + * - per-file 大小上限與總 file 數從呼叫端傳入(T10 起由 config 串入), + * 預設值對齊 TDD §1.4.2 與 legacy 設定(500MB / 102 files) + * - 對 legacy /jobs route 來說,欄位設定為 model(1) + ref_images(100) + * + * 設計取捨: + * - 提供一個共用的 `createUploader()` factory,回傳 multer instance;不在 + * module load 時建立,避免測試 import 副作用。 + * - **不**在 upload.js 內直接 require config — 保持純 factory,所有上限值由呼叫端 + * 注入。server.js 啟動時會從 `config.multipart.*` 讀取並傳入;測試可注入任意值。 + * + * Sec C2 後續強化(T5 落實,T10 補環境變數整合): + * - 雖然 fileSize 是「per-file」上限(不是 sum),但 100 張 ref_images × 500MB + * = 50GB 仍可能造成 OOM。**單張 ref_image 10MB 上限**已在 validator + * `routes/v1/validators/createJob.js` 落實 → 任一張 > 10MB 即 413 file_too_large。 + * - validator 在 multer 解析完成後執行(multer 把整批 files 全部 load 進記憶體 + * 才呼叫 next);為了限制 multer 端的瞬間記憶體用量,這裡額外設 `files` limit + * 為 model(1) + ref_images(MAX) + 安全 buffer = MAX+2。Phase 2 評估改用 + * streaming / disk storage 做根本解決。 + */ + +'use strict'; + +const multer = require('multer'); + +/** + * 預設 file size 上限(500MB),對齊 server.js L125 與 TDD §1.4.2。 + * + * 注意:這是 multer 的 **per-file** 上限。針對 ref_images 的 per-file 10MB + * 限制由 validator 處理(Sec C2)。T10:可被 `MULTIPART_MODEL_MAX_BYTES` 覆寫。 + */ +const DEFAULT_MAX_FILE_SIZE = 500 * 1024 * 1024; + +/** + * 預設 ref_images 張數上限。multer 會把總 files 限制設為此值 + 1(model)+ 1 buffer。 + * T10:可被 `MULTIPART_REF_IMAGES_MAX_COUNT` 覆寫。 + */ +const DEFAULT_MAX_REF_IMAGES = 100; + +/** + * Multer 接受的最大 file 數(含 model 1 + ref_images N + 1 安全 buffer)。 + * + * 為什麼不用 maxCount per-field: + * maxCount 只控制單一 field 的數量(ref_images=N);total file 限制保險 + * 讓 multer 在 N+2 個 file 後 throw LIMIT_FILE_COUNT,而非繼續 parse。 + */ +const DEFAULT_MAX_FILES = DEFAULT_MAX_REF_IMAGES + 2; + +/** + * 建立一個 multer uploader(memoryStorage)。 + * + * @param {object} [opts] + * @param {number} [opts.maxFileSize=500MB] - per-file 大小上限(bytes),對應 + * `config.multipart.modelMaxBytes` + * @param {number} [opts.maxFiles] - 總 file 數上限。若不傳,依 `maxRefImages` 計算 + * @param {number} [opts.maxRefImages=100] - ref_images 張數上限,影響 maxFiles 推算 + * @returns {import('multer').Multer} + */ +function createUploader(opts) { + const o = opts || {}; + const maxFileSize = + Number.isInteger(o.maxFileSize) && o.maxFileSize > 0 + ? o.maxFileSize + : DEFAULT_MAX_FILE_SIZE; + + // maxRefImages 是單一 field 上限;用來推算總 file 上限(fallback 鏈) + const maxRefImages = + Number.isInteger(o.maxRefImages) && o.maxRefImages > 0 + ? o.maxRefImages + : DEFAULT_MAX_REF_IMAGES; + + // maxFiles:呼叫端可顯式覆寫;否則 model(1) + ref_images(N) + 1 buffer + const maxFiles = + Number.isInteger(o.maxFiles) && o.maxFiles > 0 + ? o.maxFiles + : maxRefImages + 2; + + const limits = { + fileSize: maxFileSize, + files: maxFiles, + }; + return multer({ + storage: multer.memoryStorage(), + limits, + }); +} + +module.exports = { + createUploader, + DEFAULT_MAX_FILE_SIZE, + DEFAULT_MAX_REF_IMAGES, + DEFAULT_MAX_FILES, +}; diff --git a/apps/task-scheduler/src/middleware/uploadConcurrency.js b/apps/task-scheduler/src/middleware/uploadConcurrency.js new file mode 100644 index 0000000..f71bd61 --- /dev/null +++ b/apps/task-scheduler/src/middleware/uploadConcurrency.js @@ -0,0 +1,192 @@ +/** + * Upload concurrency limiter(T10 修 D5 第二部分)。 + * + * 為什麼需要這層: + * multer memoryStorage 把整個 multipart body load 進 buffer,每個並發 upload + * 都吃掉 model size 大小的 heap。例如:5 個並發 × 500MB ≈ 2.5GB heap,加上 + * Node 的其他 overhead,容易撞上容器 4GB 上限導致 OOM kill。 + * + * per-process counter 限制「同時間正在進行 multipart parse + handler 的請求 + * 數量」,超過時直接回 503 + `Retry-After` header,讓 client 主動 backoff。 + * + * 為什麼選 503 而不是 queue: + * - queue 會 hold connection 不確定多久(毫秒到分鐘),對 client 來說 timeout + * 行為不可預期;HTTP 層 hold 太久也會吃掉檔案描述器 + * - 503 + Retry-After 讓 client 主動 retry,符合 12-Factor 無狀態原則 + * - 對 visionA-backend 這種使用方來說,503 + 30s retry 是清楚的退避訊號 + * + * 設計原則: + * - **必須掛在 multer 之前**:要在 multipart parse 開始前就決定收不收這個請求; + * 若先 multer 才檢查 concurrency,500MB 已經灌進記憶體,limit 失去意義 + * - **必須掛在 requireAuth + rate limit 之後**:避免 unauthorized / 超 quota 流量 + * 擠占有限的 slot;先讓那兩層擋掉非法流量 + * - **acquire 在 middleware 進入時、release 在 response close/finish 時**: + * `res.on('close')` 涵蓋所有結束情境(成功 / error / abort),保證 counter + * 不洩漏;同時用 idempotent flag 確保只 release 一次 + * - **fail-safe**:若 counter 因 bug 進入錯誤狀態,最多就是「拒絕新請求」(503), + * 不會 silently 接受新請求然後 OOM + * + * 對應錯誤格式(v1 標準): + * 503 + `{ error: { code: 'service_busy', message: '...', details: { retry_after_seconds, max_concurrent }, request_id } }` + * 並在 response header 設 `Retry-After: `(RFC 7231 §7.1.3)。 + * + * 限制: + * - 此 limiter 是 **per-process**(同一 Node process 內 counter);多 instance + * 部署時,每個 instance 各有自己的 counter(=「總並發」=「單 instance maxConcurrent + * × instance 數」)。Phase 1 部署是單 instance,可接受。 + * - 對抗惡意 client 灌爆 slots 的長連線:依賴 `Retry-After` + per-client rate + * limit(已掛在前面)共同防禦。 + */ + +'use strict'; + +const { ApiError } = require('./errorHandler'); + +/** + * 預設並發上限。會被 `MAX_CONCURRENT_UPLOADS` env 覆寫;server.js 啟動時透過 + * `config.uploadConcurrency.maxConcurrent` 注入。 + * + * 5 是個保守值:5 並發 × 500MB ≈ 2.5GB heap,覆蓋 ≥ 4GB 容器無 OOM 風險。 + */ +const DEFAULT_MAX_CONCURRENT = 5; + +/** + * 預設 Retry-After 秒數。30s 是經驗值:足夠等多數 upload 完成(500MB / 5MB/s ≈ 100s + * 邊緣情境會錯過第一次 retry,但後續 retry 會慢慢成功),且不會太短讓 client 一直撞牆。 + */ +const DEFAULT_RETRY_AFTER_SECONDS = 30; + +/** + * 建立一個 concurrency limiter middleware(counter-based semaphore)。 + * + * @param {object} [opts] + * @param {number} [opts.maxConcurrent=5] - 同時進行中的 upload 上限 + * @param {number} [opts.retryAfterSeconds=30] - 503 response 的 Retry-After + * @param {(fields: object) => void} [opts.onLog] - 結構化 log hook,方便觀測; + * 若不傳則 fallback 到 `console.log(JSON.stringify(...))`。測試可注入 spy 驗證。 + * @returns {{ + * middleware: import('express').RequestHandler, + * getInFlight: () => number, + * getMax: () => number, + * }} + */ +function createUploadConcurrencyLimiter(opts) { + const o = opts || {}; + const maxConcurrent = + Number.isInteger(o.maxConcurrent) && o.maxConcurrent > 0 + ? o.maxConcurrent + : DEFAULT_MAX_CONCURRENT; + const retryAfterSeconds = + Number.isInteger(o.retryAfterSeconds) && o.retryAfterSeconds > 0 + ? o.retryAfterSeconds + : DEFAULT_RETRY_AFTER_SECONDS; + + // log hook:預設用 stdout 印結構化 JSON(與專案其他模組一致) + const onLog = + typeof o.onLog === 'function' + ? o.onLog + : (fields) => { + // eslint-disable-next-line no-console + console.log( + JSON.stringify({ + service: 'task-scheduler', + timestamp: new Date().toISOString(), + ...fields, + }) + ); + }; + + /** + * 進行中的 upload 數量。所有 acquire / release 操作都在 Node event loop 的 + * single-threaded model 下執行,無需鎖(在 acquire 跟 release 之間沒有 await + * 切點,counter 操作為原子)。 + * + * 這個 counter 是 closure-scoped,每個 limiter instance 各有自己的(測試友善)。 + */ + let inFlight = 0; + + /** + * Express middleware。順序:acquire → next() → 監聽 res 結束 release + * + * @param {import('express').Request} req + * @param {import('express').Response} res + * @param {import('express').NextFunction} next + */ + function middleware(req, res, next) { + // === 1. 嘗試 acquire === + if (inFlight >= maxConcurrent) { + // 拒絕:設 Retry-After header(RFC 7231)+ 走 v1 ApiError + res.setHeader('Retry-After', String(retryAfterSeconds)); + onLog({ + level: 'WARN', + action: 'upload.concurrency.rejected', + request_id: req.requestId, + in_flight: inFlight, + max_concurrent: maxConcurrent, + retry_after_seconds: retryAfterSeconds, + client_id: + req && req.auth && req.auth.clientId ? req.auth.clientId : null, + }); + return next( + new ApiError(503, 'service_busy', '伺服器忙碌中,請稍後重試', { + retry_after_seconds: retryAfterSeconds, + max_concurrent: maxConcurrent, + }) + ); + } + + inFlight += 1; + + // === 2. 註冊 release(必須在 onLog 前完成,避免 onLog throw 造成 counter leak)=== + // 用 idempotent flag 確保不重複 release('close' 與 'finish' 可能都會觸發) + let released = false; + const release = () => { + if (released) return; + released = true; + inFlight = Math.max(0, inFlight - 1); // 防呆:counter 不應為負 + onLog({ + level: 'DEBUG', + action: 'upload.concurrency.released', + request_id: req.requestId, + in_flight: inFlight, + }); + }; + + // 'close' 涵蓋所有結束情境(包含 client abort、error、normal finish) + // 為什麼不用 'finish': + // - 'finish' 只在 response 成功送完才觸發 + // - client abort(FIN/RST 中途斷線)會跳過 'finish',counter 永遠不釋放 + // - 'close' 是底層 socket 關閉,所有情境都會觸發 + res.once('close', release); + + onLog({ + level: 'DEBUG', + action: 'upload.concurrency.acquired', + request_id: req.requestId, + in_flight: inFlight, + max_concurrent: maxConcurrent, + }); + + return next(); + } + + return { + middleware, + /** + * 觀測用:當前進行中的 upload 數。測試 / health check / metrics 都可呼叫。 + * @returns {number} + */ + getInFlight: () => inFlight, + /** + * 上限值(const,建構時就決定)。 + * @returns {number} + */ + getMax: () => maxConcurrent, + }; +} + +module.exports = { + createUploadConcurrencyLimiter, + DEFAULT_MAX_CONCURRENT, + DEFAULT_RETRY_AFTER_SECONDS, +}; diff --git a/apps/task-scheduler/src/redis.js b/apps/task-scheduler/src/redis.js new file mode 100644 index 0000000..7a3b3ed --- /dev/null +++ b/apps/task-scheduler/src/redis.js @@ -0,0 +1,76 @@ +/** + * Redis client 集中初始化與 helper(T4 重構自 server.js L96-100、L225-232)。 + * + * 職責: + * 1. 提供主 client(`redis`)與 blocking 用的 subscriber client(`redisSub`) + * 2. 集中錯誤 listener(避免上層 module 重複加 handler) + * 3. 提供 `ensureConsumerGroup` 共用 helper + * + * 注意事項: + * - 既有 server.js 直接在 module 載入時就建立 ioredis 連線;本檔保留同樣行為, + * 讓 server.js 啟動行為不變(行為 0 改變原則)。 + * - 為了測試友善,提供 `createClients(redisUrl)` 工廠函式,使單元測試能用 mock URL + * 或 ioredis-mock;module-level 的預設 client 仍從 process.env 讀取。 + */ + +'use strict'; + +const Redis = require('ioredis'); + +/** + * 預設的 Redis URL,與既有 server.js L30 行為一致。 + */ +function getDefaultRedisUrl() { + return process.env.REDIS_URL || 'redis://localhost:6379'; +} + +/** + * 為一對 client(commands + subscriber)掛上錯誤 log。 + * 與既有 server.js L99-100 行為一致:用 console.error 印出錯誤,不 throw。 + */ +function attachErrorLogger(client, label) { + client.on('error', (err) => { + // 與 server.js 既有訊息對齊 + // eslint-disable-next-line no-console + console.error(`${label}:`, err); + }); +} + +/** + * 建立一對 Redis client:一個給一般指令、一個給 blocking xreadgroup。 + * + * @param {string} [redisUrl] - 連線字串;省略時取自 process.env.REDIS_URL + * @returns {{ redis: Redis, redisSub: Redis }} + */ +function createClients(redisUrl) { + const url = redisUrl || getDefaultRedisUrl(); + const redis = new Redis(url); + const redisSub = new Redis(url); + attachErrorLogger(redis, 'Redis error'); + attachErrorLogger(redisSub, 'Redis subscriber error'); + return { redis, redisSub }; +} + +/** + * 為指定 stream 確保 consumer group 存在;BUSYGROUP 視為正常。 + * + * 這個 helper 與 server.js L225-232 的 `ensureConsumerGroup` 邏輯完全一致。 + * + * @param {Redis} redis + * @param {string} queue - stream key + * @param {string} group - consumer group 名稱 + */ +async function ensureConsumerGroup(redis, queue, group) { + try { + await redis.xgroup('CREATE', queue, group, '0', 'MKSTREAM'); + } catch (err) { + if (!err.message.includes('BUSYGROUP')) throw err; + } +} + +module.exports = { + createClients, + ensureConsumerGroup, + // 暴露給測試 + _internals: { getDefaultRedisUrl, attachErrorLogger }, +}; diff --git a/apps/task-scheduler/src/redis/__tests__/luaScripts.test.js b/apps/task-scheduler/src/redis/__tests__/luaScripts.test.js new file mode 100644 index 0000000..95a2a1d --- /dev/null +++ b/apps/task-scheduler/src/redis/__tests__/luaScripts.test.js @@ -0,0 +1,280 @@ +/** + * luaScripts.js 單元測試(T5)。 + * + * 重點: + * 1. claimActiveJob 對 redis client 發出正確的 evalsha 呼叫(KEYS + ARGV 順序) + * 2. NOSCRIPT 時 fallback 到 eval 重發 + * 3. 解析 Lua 回的 ['OK'] / ['CONFLICT', id] + * 4. 異常 / 非法回應 → throw + * 5. 參數驗證(缺 userId / jobId / 非整數 ttl) + */ + +'use strict'; + +const path = require('path'); +const fs = require('fs'); + +const { + claimActiveJob, + releaseActiveJob, + _internals, +} = require('../luaScripts'); + +function makeFakeRedis() { + return { + evalsha: jest.fn(), + eval: jest.fn(), + }; +} + +beforeEach(() => { + // 每個測試重置 cache,確保 fileSystem mock / 真實檔都會被重新載 + _internals.resetCache(); +}); + +describe('claimActiveJob — argument validation', () => { + it.each([ + [{ userId: '', jobId: 'j', jobJson: '{}', ttlSeconds: 1 }, /userId/], + [{ userId: 'u', jobId: '', jobJson: '{}', ttlSeconds: 1 }, /jobId/], + [{ userId: 'u', jobId: 'j', jobJson: 123, ttlSeconds: 1 }, /jobJson/], + [{ userId: 'u', jobId: 'j', jobJson: '{}', ttlSeconds: 0 }, /ttlSeconds/], + [{ userId: 'u', jobId: 'j', jobJson: '{}', ttlSeconds: -1 }, /ttlSeconds/], + [{ userId: 'u', jobId: 'j', jobJson: '{}', ttlSeconds: 1.5 }, /ttlSeconds/], + ])('throws with descriptive message for invalid args', async (args, regex) => { + const redis = makeFakeRedis(); + await expect(claimActiveJob(redis, args)).rejects.toThrow(regex); + }); +}); + +describe('claimActiveJob — happy paths', () => { + it('returns ok=true on Lua "OK" response', async () => { + const redis = makeFakeRedis(); + redis.evalsha.mockResolvedValueOnce(['OK']); + const res = await claimActiveJob(redis, { + userId: 'u-1', + jobId: 'j-1', + jobJson: '{"a":1}', + ttlSeconds: 100, + }); + expect(res).toEqual({ ok: true }); + }); + + it('passes correct keys + args to evalsha', async () => { + const redis = makeFakeRedis(); + redis.evalsha.mockResolvedValueOnce(['OK']); + await claimActiveJob(redis, { + userId: 'alice', + jobId: 'job-xyz', + jobJson: '{"hello":"world"}', + ttlSeconds: 604800, + }); + expect(redis.evalsha).toHaveBeenCalledTimes(1); + const callArgs = redis.evalsha.mock.calls[0]; + // 第一個參數 = sha, 第二個 = numKeys = 3, 接下來是 keys, 再來是 args + const [sha, numKeys, k1, k2, k3, a1, a2, a3] = callArgs; + expect(typeof sha).toBe('string'); + expect(sha.length).toBe(40); // SHA-1 hex + expect(numKeys).toBe(3); + expect(k1).toBe('user:alice:active_job'); + expect(k2).toBe('job:job-xyz'); + expect(k3).toBe('user:alice:jobs'); + expect(a1).toBe('job-xyz'); + expect(a2).toBe('{"hello":"world"}'); + expect(a3).toBe('604800'); + }); + + it('returns conflict + activeJobId on Lua "CONFLICT" response', async () => { + const redis = makeFakeRedis(); + redis.evalsha.mockResolvedValueOnce(['CONFLICT', 'old-job-id']); + const res = await claimActiveJob(redis, { + userId: 'u', + jobId: 'j', + jobJson: '{}', + ttlSeconds: 100, + }); + expect(res).toEqual({ + ok: false, + conflict: true, + activeJobId: 'old-job-id', + }); + }); + + it('throws on unexpected Lua response', async () => { + const redis = makeFakeRedis(); + redis.evalsha.mockResolvedValueOnce(['UNKNOWN']); + await expect( + claimActiveJob(redis, { + userId: 'u', + jobId: 'j', + jobJson: '{}', + ttlSeconds: 100, + }) + ).rejects.toThrow(/Unexpected Lua response/); + }); +}); + +describe('claimActiveJob — NOSCRIPT fallback', () => { + it('falls back to eval when evalsha NOSCRIPT', async () => { + const redis = makeFakeRedis(); + const noScriptErr = new Error('NOSCRIPT No matching script.'); + redis.evalsha.mockRejectedValueOnce(noScriptErr); + redis.eval.mockResolvedValueOnce(['OK']); + + const res = await claimActiveJob(redis, { + userId: 'u', + jobId: 'j', + jobJson: '{}', + ttlSeconds: 100, + }); + + expect(res).toEqual({ ok: true }); + expect(redis.evalsha).toHaveBeenCalledTimes(1); + expect(redis.eval).toHaveBeenCalledTimes(1); + + // eval 應該帶完整 script body,而非 sha + const evalArgs = redis.eval.mock.calls[0]; + const [body] = evalArgs; + expect(typeof body).toBe('string'); + expect(body).toContain('redis.call'); + }); + + it('does NOT fallback for non-NOSCRIPT errors', async () => { + const redis = makeFakeRedis(); + redis.evalsha.mockRejectedValueOnce(new Error('READONLY')); + await expect( + claimActiveJob(redis, { + userId: 'u', + jobId: 'j', + jobJson: '{}', + ttlSeconds: 100, + }) + ).rejects.toThrow(/READONLY/); + expect(redis.eval).not.toHaveBeenCalled(); + }); +}); + +describe('Lua script file integrity (sanity check)', () => { + it('claim_active_job.lua is loadable and contains expected commands', () => { + const luaPath = path.join( + __dirname, + '..', + 'luaScripts', + 'claim_active_job.lua' + ); + const body = fs.readFileSync(luaPath, 'utf8'); + // 必要操作齊全 + expect(body).toContain("EXISTS"); + expect(body).toContain("'CONFLICT'"); + expect(body).toContain("'OK'"); + expect(body).toContain("SET"); + expect(body).toContain("EXPIRE"); + expect(body).toContain("SADD"); + }); + + // Sec m5:claim_active_job.lua 在 ttl 不合法時 error_reply + it('claim_active_job.lua has invalid_ttl guard (Sec m5)', () => { + const luaPath = path.join( + __dirname, + '..', + 'luaScripts', + 'claim_active_job.lua' + ); + const body = fs.readFileSync(luaPath, 'utf8'); + expect(body).toContain('invalid_ttl'); + expect(body).toContain('error_reply'); + }); + + // Sec M2:release_active_job.lua 完整實作 + it('release_active_job.lua is loadable and contains expected commands (Sec M2)', () => { + const luaPath = path.join( + __dirname, + '..', + 'luaScripts', + 'release_active_job.lua' + ); + const body = fs.readFileSync(luaPath, 'utf8'); + expect(body).toContain('GET'); + expect(body).toContain("'NOOP'"); + expect(body).toContain("'OK'"); + expect(body).toContain('DEL'); + expect(body).toContain('SREM'); + }); +}); + +// --------------------------------------------------------------------------- +// Sec M2 + Reviewer Major-2: releaseActiveJob +// --------------------------------------------------------------------------- + +describe('releaseActiveJob — argument validation', () => { + it.each([ + [{ userId: '', jobId: 'j' }, /userId/], + [{ userId: 'u', jobId: '' }, /jobId/], + [{ userId: 123, jobId: 'j' }, /userId/], + [{ userId: 'u', jobId: null }, /jobId/], + ])('throws with descriptive message for invalid args', async (args, regex) => { + const redis = makeFakeRedis(); + await expect(releaseActiveJob(redis, args)).rejects.toThrow(regex); + }); +}); + +describe('releaseActiveJob — happy paths', () => { + it('returns released=true on Lua "OK" response', async () => { + const redis = makeFakeRedis(); + redis.evalsha.mockResolvedValueOnce(['OK']); + const res = await releaseActiveJob(redis, { + userId: 'u-1', + jobId: 'j-1', + }); + expect(res).toEqual({ ok: true, released: true }); + }); + + it('returns released=false on Lua "NOOP" response (active_job mismatch)', async () => { + const redis = makeFakeRedis(); + redis.evalsha.mockResolvedValueOnce(['NOOP']); + const res = await releaseActiveJob(redis, { + userId: 'u-1', + jobId: 'orphan-id', + }); + expect(res).toEqual({ ok: true, released: false }); + }); + + it('passes correct keys + args to evalsha', async () => { + const redis = makeFakeRedis(); + redis.evalsha.mockResolvedValueOnce(['OK']); + await releaseActiveJob(redis, { + userId: 'alice', + jobId: 'job-xyz', + }); + expect(redis.evalsha).toHaveBeenCalledTimes(1); + const callArgs = redis.evalsha.mock.calls[0]; + const [sha, numKeys, k1, k2, k3, a1] = callArgs; + expect(typeof sha).toBe('string'); + expect(sha.length).toBe(40); + expect(numKeys).toBe(3); + expect(k1).toBe('user:alice:active_job'); + expect(k2).toBe('job:job-xyz'); + expect(k3).toBe('user:alice:jobs'); + expect(a1).toBe('job-xyz'); + }); + + it('throws on unexpected Lua response', async () => { + const redis = makeFakeRedis(); + redis.evalsha.mockResolvedValueOnce(['WAT']); + await expect( + releaseActiveJob(redis, { userId: 'u', jobId: 'j' }) + ).rejects.toThrow(/Unexpected Lua response/); + }); +}); + +describe('releaseActiveJob — NOSCRIPT fallback', () => { + it('falls back to eval when evalsha NOSCRIPT', async () => { + const redis = makeFakeRedis(); + const noScriptErr = new Error('NOSCRIPT No matching script.'); + redis.evalsha.mockRejectedValueOnce(noScriptErr); + redis.eval.mockResolvedValueOnce(['OK']); + + const res = await releaseActiveJob(redis, { userId: 'u', jobId: 'j' }); + expect(res).toEqual({ ok: true, released: true }); + expect(redis.eval).toHaveBeenCalledTimes(1); + }); +}); diff --git a/apps/task-scheduler/src/redis/luaScripts.js b/apps/task-scheduler/src/redis/luaScripts.js new file mode 100644 index 0000000..d9e406e --- /dev/null +++ b/apps/task-scheduler/src/redis/luaScripts.js @@ -0,0 +1,188 @@ +/** + * Lua script loader / runner for ioredis(T5)。 + * + * 職責: + * 1. 從 disk 讀 `claim_active_job.lua`(純文字,方便 Reviewer / Auditor 審) + * 2. 提供 `claimActiveJob({ userId, jobId, jobJson, ttlSeconds })` 介面 + * 3. 若 Redis 重啟導致 NOSCRIPT,自動 fallback 重新 SCRIPT LOAD 後再 EVAL + * + * 為什麼把 Lua 放獨立檔再用 readFileSync 載入: + * - 把 script 內嵌成 JS 字串會讓 reviewer 看不清楚每行做什麼 + * - 純文字 .lua 檔可獨立用 redis-cli SCRIPT LOAD 測試 / 檢查 + * - 啟動時讀一次(cache),效能可接受(< 1KB) + * + * 為什麼採 SCRIPT LOAD + EVALSHA: + * - 每次 EVAL 帶 script body 會占用網路頻寬;EVALSHA 只送 sha → 大幅省頻寬 + * - Redis 重啟(OOM、reboot)會清掉 script cache → 我們需要 catch NOSCRIPT 後重 LOAD + * + * 設計取捨 — 不用 ioredis 的 defineCommand: + * - defineCommand 雖好用但會把 redis client 物件改造,影響測試 mock 的純度 + * - 用顯式 `evalsha` + NOSCRIPT fallback 行為跟下游 expectations 吻合 + * + * 安全: + * - jobJson 由呼叫端組裝(已序列化過),Lua 端只當 String 寫入;任何 user 輸入 + * 已在 handler 端做過 sanitize(filename / object_key 等) + * - 三個 KEYS 名稱都由 server 端組裝,user 不能控制 Redis key 名 + */ + +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const crypto = require('crypto'); + +/** + * 讀取 lua script 檔案內容(cached)。 + * + * 為什麼包成 function 而非 module-level 常數: + * 讓測試能 reset cache(必要時透過 `_internals.resetCache()`)。 + * + * @param {string} fileName - 對應 luaScripts/ 下的檔名(不含路徑) + */ +const _scriptCache = new Map(); +function loadScript(fileName) { + if (_scriptCache.has(fileName)) { + return _scriptCache.get(fileName); + } + const fullPath = path.join(__dirname, 'luaScripts', fileName); + const body = fs.readFileSync(fullPath, 'utf8'); + const sha1 = crypto.createHash('sha1').update(body).digest('hex'); + const entry = { body, sha1 }; + _scriptCache.set(fileName, entry); + return entry; +} + +/** + * 執行 Lua script,含 NOSCRIPT 自動 reload 與重試一次。 + * + * @param {import('ioredis').Redis} redis + * @param {{ body: string, sha1: string }} script + * @param {string[]} keys + * @param {string[]} args + * @returns {Promise} + */ +async function evalScript(redis, script, keys, args) { + try { + return await redis.evalsha(script.sha1, keys.length, ...keys, ...args); + } catch (err) { + // Redis 沒有 cache 此 script → reload 後重試一次 + // 不同 driver 的 NOSCRIPT 訊息略有差異,採寬鬆比對 + const msg = err && err.message ? err.message : ''; + if (msg.includes('NOSCRIPT')) { + // 用 EVAL 走完整 body 一次,順帶會在 server 端 cache + return await redis.eval(script.body, keys.length, ...keys, ...args); + } + throw err; + } +} + +/** + * Claim active job + 完整寫入 job record(M5 方案 A)。 + * + * @param {import('ioredis').Redis} redis + * @param {object} args + * @param {string} args.userId — 已 sanitize 過的 user_id + * @param {string} args.jobId — 新生成的 job_id(uuidv4) + * @param {string} args.jobJson — 完整 job record JSON.stringify 後的字串 + * @param {number} args.ttlSeconds — 三把 key 的 TTL,預設 7 天 = 604800 + * @returns {Promise< + * | { ok: true } + * | { ok: false, conflict: true, activeJobId: string } + * >} + */ +async function claimActiveJob(redis, { userId, jobId, jobJson, ttlSeconds }) { + if (!userId || typeof userId !== 'string') { + throw new Error('[claimActiveJob] userId is required'); + } + if (!jobId || typeof jobId !== 'string') { + throw new Error('[claimActiveJob] jobId is required'); + } + if (typeof jobJson !== 'string') { + throw new Error('[claimActiveJob] jobJson must be a string'); + } + if (!Number.isInteger(ttlSeconds) || ttlSeconds <= 0) { + throw new Error('[claimActiveJob] ttlSeconds must be a positive integer'); + } + + const script = loadScript('claim_active_job.lua'); + const keys = [ + `user:${userId}:active_job`, + `job:${jobId}`, + `user:${userId}:jobs`, + ]; + const args = [jobId, jobJson, String(ttlSeconds)]; + + const result = await evalScript(redis, script, keys, args); + + // ioredis 把 Lua 回的 array 轉成 JS array of strings + if (Array.isArray(result) && result[0] === 'OK') { + return { ok: true }; + } + if (Array.isArray(result) && result[0] === 'CONFLICT') { + return { + ok: false, + conflict: true, + activeJobId: typeof result[1] === 'string' ? result[1] : null, + }; + } + // 不應該走到,但保險起見回 internal error 給呼叫端 + throw new Error( + `[claimActiveJob] Unexpected Lua response: ${JSON.stringify(result)}` + ); +} + +/** + * Release active job(Sec M2 + Reviewer Major-2 修復)。 + * + * 用於 enqueue 失敗時補償釋放 user:{userId}:active_job 鎖, + * 配合 release_active_job.lua 的 atomic guard 確保只在 active_job 仍指向自己 + * 的 jobId 時才 DEL。 + * + * @param {import('ioredis').Redis} redis + * @param {object} args + * @param {string} args.userId — 已 sanitize 過的 user_id + * @param {string} args.jobId — 要釋放的 job_id + * @returns {Promise< + * | { ok: true, released: true } — 成功釋放 + * | { ok: true, released: false } — NOOP(active_job 已不是這個 jobId) + * >} + */ +async function releaseActiveJob(redis, { userId, jobId }) { + if (!userId || typeof userId !== 'string') { + throw new Error('[releaseActiveJob] userId is required'); + } + if (!jobId || typeof jobId !== 'string') { + throw new Error('[releaseActiveJob] jobId is required'); + } + + const script = loadScript('release_active_job.lua'); + const keys = [ + `user:${userId}:active_job`, + `job:${jobId}`, + `user:${userId}:jobs`, + ]; + const args = [jobId]; + + const result = await evalScript(redis, script, keys, args); + + if (Array.isArray(result) && result[0] === 'OK') { + return { ok: true, released: true }; + } + if (Array.isArray(result) && result[0] === 'NOOP') { + return { ok: true, released: false }; + } + throw new Error( + `[releaseActiveJob] Unexpected Lua response: ${JSON.stringify(result)}` + ); +} + +module.exports = { + claimActiveJob, + releaseActiveJob, + // 內部 helper 暴露給單元測試 + _internals: { + loadScript, + evalScript, + resetCache: () => _scriptCache.clear(), + }, +}; diff --git a/apps/task-scheduler/src/redis/luaScripts/claim_active_job.lua b/apps/task-scheduler/src/redis/luaScripts/claim_active_job.lua new file mode 100644 index 0000000..6bb33a8 --- /dev/null +++ b/apps/task-scheduler/src/redis/luaScripts/claim_active_job.lua @@ -0,0 +1,57 @@ +-- claim_active_job.lua +-- +-- 對齊 TDD §2.7.2,搭配 M5 方案 A 改動: +-- 「先寫 MinIO,成功後才用 Lua script 一次寫入完整 job record」。 +-- +-- 這支 script 的責任是「在沒有衝突的前提下,原子地把 active_job、完整 job record、 +-- user:jobs 索引 + TTL」全部寫進 Redis;衝突時不寫任何鍵,把當前 active_job_id +-- 回給呼叫端,呼叫端再決定如何回應使用者(並負責清掉已寫到 MinIO 的 input 檔)。 +-- +-- 為什麼要用 Lua 而不是 MULTI/EXEC: +-- * Redis Cluster / 重 ACL 環境下 MULTI/EXEC 行為跟 EVAL 都類似,但 Lua 可在 +-- 伺服器端做條件判斷後決定要不要寫,避免 client 來回兩趟 round-trip。 +-- * 透過單一 EVAL,Redis 保證「先檢查、再寫入」之間沒有任何其他指令交插, +-- 即便 100 個 client 同時打 POST /api/v1/jobs,也只會有一個成功。 +-- +-- KEYS: +-- KEYS[1] = user:{user_id}:active_job — 該 user 當前 in-progress job_id(String) +-- KEYS[2] = job:{job_id} — 完整 job record(JSON String) +-- KEYS[3] = user:{user_id}:jobs — 該 user 的所有 job_id(Set) +-- +-- ARGV: +-- ARGV[1] = job_id — 本次要寫入的 job_id +-- ARGV[2] = job_record_json — 完整 job record(已 JSON.stringify) +-- ARGV[3] = ttl_seconds — 三把鑰匙統一的 TTL(建議 7d = 604800) +-- +-- Returns: +-- {"OK"} — 成功 claim 並寫入完整 job record +-- {"CONFLICT", existing_job_id} — 該 user 已有 active job,未寫入任何鍵 +-- +-- 注意事項: +-- * 一旦 Set 已存在則 EXPIRE 會更新 TTL;首次建立時 SADD 後再 EXPIRE +-- 等同初始化 TTL,與 TDD §2.7.2 的「每次寫入時 EXPIRE 7d」一致 +-- * tonumber 失敗時 EXPIRE 會 throw,本 script 把 ttl 視為呼叫端責任, +-- 若傳壞值 Redis 會回 ERR 給 client,呼叫端應自行轉 500 internal_error +-- * 不在 Lua 內 log,所有觀察性靠呼叫端的 structured log + +if redis.call('EXISTS', KEYS[1]) == 1 then + return {'CONFLICT', redis.call('GET', KEYS[1])} +end + +-- Sec m5:明確驗證 ttl 合法性,避免 tonumber 失敗時 EXPIRE 拋 Redis ERR +-- 訊息含義不清(呼叫端不容易區分是參數錯還是 Redis infra 問題)。 +local ttl = tonumber(ARGV[3]) +if not ttl or ttl <= 0 then + return redis.error_reply('invalid_ttl') +end + +redis.call('SET', KEYS[1], ARGV[1]) +redis.call('EXPIRE', KEYS[1], ttl) + +redis.call('SET', KEYS[2], ARGV[2]) +redis.call('EXPIRE', KEYS[2], ttl) + +redis.call('SADD', KEYS[3], ARGV[1]) +redis.call('EXPIRE', KEYS[3], ttl) + +return {'OK'} diff --git a/apps/task-scheduler/src/redis/luaScripts/release_active_job.lua b/apps/task-scheduler/src/redis/luaScripts/release_active_job.lua new file mode 100644 index 0000000..162e840 --- /dev/null +++ b/apps/task-scheduler/src/redis/luaScripts/release_active_job.lua @@ -0,0 +1,43 @@ +-- release_active_job.lua +-- +-- 對齊 Sec M2 + Reviewer Major-2 修復: +-- enqueue (xadd queue:onnx) 失敗時,補償釋放 user:{userId}:active_job, +-- 避免使用者被鎖死 7 天 TTL。 +-- +-- 為什麼用 Lua(而非 client 端 GET → 比較 → DEL): +-- 1. **Atomic guard**:只有當 active_job 仍然指向「我們剛剛 claim 的 jobId」時 +-- 才釋放,避免「completion + 新 claim 連續發生」造成誤刪別人 job 的鎖 +-- 2. **單次 round-trip**:減少 release 失敗時再次與 Redis 互動的機率 +-- 3. **與 claim_active_job.lua 對稱**:claim 用 Lua atomic 寫入三把 key,release +-- 也用 Lua atomic 清理(active_job DEL + job:{id} DEL + user:{}:jobs SREM) +-- +-- KEYS: +-- KEYS[1] = user:{user_id}:active_job — 該 user 當前 in-progress job_id(String) +-- KEYS[2] = job:{job_id} — 完整 job record(JSON String) +-- KEYS[3] = user:{user_id}:jobs — 該 user 的所有 job_id(Set) +-- +-- ARGV: +-- ARGV[1] = job_id — 要釋放的 job_id;只有當 active_job +-- 的值等於這個 job_id 時才執行 DEL +-- +-- Returns: +-- {"OK"} — 成功釋放(active_job 已 DEL, +-- job:{id} 已 DEL,SREM 已執行) +-- {"NOOP"} — active_job 不等於 ARGV[1] 或不存在; +-- 未做任何修改(保護原本 holder) +-- +-- 注意事項: +-- * 即便 release 失敗(NOOP),對使用者最差情境也只是維持「等 7d」的當前行為 +-- 沒有任何劣化(呼叫端應 log WARN 而非 ERROR) +-- * 不在 Lua 內 log,所有觀察性靠呼叫端的 structured log + +local current = redis.call('GET', KEYS[1]) +if current ~= ARGV[1] then + return {'NOOP'} +end + +redis.call('DEL', KEYS[1]) +redis.call('DEL', KEYS[2]) +redis.call('SREM', KEYS[3], ARGV[1]) + +return {'OK'} diff --git a/apps/task-scheduler/src/routes/legacy.js b/apps/task-scheduler/src/routes/legacy.js new file mode 100644 index 0000000..c8598b3 --- /dev/null +++ b/apps/task-scheduler/src/routes/legacy.js @@ -0,0 +1,394 @@ +/** + * Legacy 路由(T4 重構自 server.js L301-607)。 + * + * **嚴格保留行為**:本檔的 7 個端點對外行為與 server.js 既有版本對齊 + * (除了時間戳這類非確定性欄位)。任何「順便改善」的修改都不在 T4 範圍。 + * + * 端點清單: + * GET /health — 服務健康(T8 升級:含 MC / FAA 可達性) + * POST /jobs — multipart 上傳,建 job + * GET /jobs/:jobId — 查 job + * GET /jobs — 列全部 job(KEYS job:*,legacy) + * GET /jobs/:jobId/events — SSE 推送 job 狀態 + * GET /jobs/:jobId/download/:filename — 下載結果檔 + * GET /queues/stats — Redis Stream / Group 統計 + * + * 設計取捨: + * - 採 factory `createLegacyRouter(deps)`,把 redis / jobService / sseService / + * minio / uploader / healthService 等全部依賴顯式注入,避免再產生新的全域狀態 + * - 所有 helper(`getJob` / `enqueueStage` / `setJob`)都改走 jobService, + * 不再從本檔內定義 + * - multer middleware 由 deps.uploader 提供(共用) + * + * T8 變更(/health): + * - 若 deps.healthService 存在 → 使用其 cached snapshot,加上向後相容欄位 + * (`service: 'task-scheduler'`、頂層 `redis`),避免破壞既有監控 + * - 若 deps.healthService 缺漏 → 退回原本只 ping Redis 的舊行為(單元測試友善) + */ + +'use strict'; + +const express = require('express'); +const fs = require('fs'); +const { v4: uuidv4 } = require('uuid'); + +const { writeJobFilesToLocal, resolveLocalDownloadPath } = require('../storage/local'); +const { STAGE_QUEUES, DONE_QUEUE } = require('../services/jobService'); + +/** + * 建立 legacy router。 + * + * @param {object} deps + * @param {import('ioredis').Redis} deps.redis + * @param {ReturnType} deps.jobService + * @param {{ sendSSE: Function, registerSseClient: Function }} deps.sseService + * @param {ReturnType} deps.minio + * @param {import('multer').Multer} deps.uploader + * @param {ReturnType} [deps.healthService] + * T8:若提供,/health 改用 cached snapshot;若缺漏(單元測試常見),退回 Redis ping 模式。 + * @returns {import('express').Router} + */ +function createLegacyRouter(deps) { + if (!deps || !deps.redis) throw new Error('[legacy] deps.redis required'); + if (!deps.jobService) throw new Error('[legacy] deps.jobService required'); + if (!deps.sseService) throw new Error('[legacy] deps.sseService required'); + if (!deps.minio) throw new Error('[legacy] deps.minio required'); + if (!deps.uploader) throw new Error('[legacy] deps.uploader required'); + + const { redis, jobService, sseService, minio, uploader, healthService } = deps; + const router = express.Router(); + + // ------------------------------------------------------------------------- + // GET /health + // + // T8 升級:使用 healthService 的 background-cached snapshot,包含 redis / + // member_center / file_access_agent 可達性。永遠不阻塞(snapshot 為 sync 讀取)。 + // + // 向後相容:保留既有監控期待的欄位 + // - 頂層 `service: 'task-scheduler'`(既有)+ snapshot 內也保留新的 + // `service: 'kneron-converter-api'`?答:避免衝突,回應根欄位採既有 + // `service: 'task-scheduler'`,新欄位 `dependencies.*` 並列;TDD §1.4.1 + // 的 service 名稱對齊 v1(未來在 v1 出新 /api/v1/health 時可改名)。 + // - 頂層 `redis: 'connected' | 'disconnected'`(既有) + // - 頂層 `timestamp`(既有) + // - 新增 `dependencies` 物件(含 redis / member_center / file_access_agent) + // - 新增 `version` + // + // 503 行為:snapshot.status === 'unhealthy'(即 Redis disconnected)→ 503; + // degraded(MC / FAA 任一不可達但 Redis OK)→ 200,由監控決定告警等級。 + // ------------------------------------------------------------------------- + router.get('/health', async (req, res) => { + if (healthService && typeof healthService.getHealth === 'function') { + const snapshot = healthService.getHealth(); + const httpStatus = snapshot.status === 'unhealthy' ? 503 : 200; + // 向後相容:頂層保留 service / timestamp / redis 欄位(既有監控可能依賴) + const legacyTopLevelRedis = snapshot.dependencies.redis; // 'connected' | 'disconnected' + res.status(httpStatus).json({ + service: 'task-scheduler', // 既有監控用 + status: snapshot.status, // 'healthy' | 'degraded' | 'unhealthy' + timestamp: snapshot.timestamp, + redis: legacyTopLevelRedis, // 既有欄位 + version: snapshot.version, + dependencies: snapshot.dependencies, + }); + return; + } + + // Fallback:deps 沒提供 healthService(測試 / 啟動失敗時的降級) + // 行為對齊 server.js L303-319 的舊實作 + try { + await redis.ping(); + res.json({ + service: 'task-scheduler', + status: 'healthy', + timestamp: new Date().toISOString(), + redis: 'connected', + }); + } catch { + res.status(503).json({ + service: 'task-scheduler', + status: 'unhealthy', + redis: 'disconnected', + }); + } + }); + + // ------------------------------------------------------------------------- + // POST /jobs (對齊 server.js L322-420) + // ------------------------------------------------------------------------- + router.post( + '/jobs', + uploader.fields([ + { name: 'model', maxCount: 1 }, + { name: 'ref_images', maxCount: 100 }, + ]), + async (req, res) => { + try { + // 必填欄位(model_id, version, platform) + const { model_id, version, platform } = req.body; + if (!model_id || !version || !platform) { + return res + .status(400) + .json({ error: 'model_id, version, platform are required' }); + } + if (!req.files || !req.files.model || req.files.model.length === 0) { + return res.status(400).json({ error: 'model file is required' }); + } + + const jobId = uuidv4(); + const modelFile = req.files.model[0]; + + if (minio.client) { + // S3 mode:上傳到 MinIO + const s3Prefix = `jobs/${jobId}`; + await minio.uploadToMinIO( + `${s3Prefix}/input/${modelFile.originalname}`, + modelFile.buffer, + modelFile.mimetype || 'application/octet-stream' + ); + + if (req.files.ref_images) { + for (const img of req.files.ref_images) { + await minio.uploadToMinIO( + `${s3Prefix}/input/ref_images/${img.originalname}`, + img.buffer, + img.mimetype || 'image/jpeg' + ); + } + } + + // eslint-disable-next-line no-console + console.log(`[Scheduler] Uploaded job ${jobId} files to MinIO`); + } else { + // Local mode:寫到 shared volume + writeJobFilesToLocal(jobId, modelFile, req.files.ref_images); + } + + // 可選旗標 + const parameters = { + model_id: parseInt(model_id, 10), + version, + platform, + enable_evaluate: req.body.enable_evaluate === 'true', + enable_sim_fp: req.body.enable_sim_fp === 'true', + enable_sim_fixed: req.body.enable_sim_fixed === 'true', + enable_sim_hw: req.body.enable_sim_hw === 'true', + }; + + // Job record(與 legacy 完全一致) + const job = { + job_id: jobId, + created_at: new Date().toISOString(), + status: 'ONNX', + stage: 'onnx', + progress: 0, + updated_at: new Date().toISOString(), + parameters, + output: { bie_path: null, nef_path: null }, + error: null, + }; + + await jobService.setJob(jobId, job); + await jobService.enqueueStage('onnx', job); + + res.status(201).json({ + job_id: jobId, + status: 'ONNX', + message: 'Job created and queued', + }); + } catch (err) { + // eslint-disable-next-line no-console + console.error('[Scheduler] POST /jobs error:', err); + res.status(500).json({ error: err.message }); + } + } + ); + + // ------------------------------------------------------------------------- + // GET /jobs/:jobId (對齊 server.js L423-429) + // ------------------------------------------------------------------------- + router.get('/jobs/:jobId', async (req, res) => { + const job = await jobService.getJob(req.params.jobId); + if (!job) { + return res.status(404).json({ error: 'JOB_NOT_FOUND' }); + } + res.json(job); + }); + + // ------------------------------------------------------------------------- + // GET /jobs (對齊 server.js L432-446) + // ------------------------------------------------------------------------- + router.get('/jobs', async (req, res) => { + try { + const keys = await redis.keys('job:*'); + const jobs = []; + for (const key of keys) { + const raw = await redis.get(key); + if (raw) jobs.push(JSON.parse(raw)); + } + jobs.sort((a, b) => new Date(b.created_at) - new Date(a.created_at)); + res.json(jobs); + } catch (err) { + res.status(500).json({ error: err.message }); + } + }); + + // ------------------------------------------------------------------------- + // GET /jobs/:jobId/events — SSE (對齊 server.js L449-487) + // ------------------------------------------------------------------------- + router.get('/jobs/:jobId/events', async (req, res) => { + const jobId = req.params.jobId; + + const job = await jobService.getJob(jobId); + if (!job) { + return res.status(404).json({ error: 'JOB_NOT_FOUND' }); + } + + // 由 sseService 處理 headers / heartbeat / cleanup(行為與 legacy 對齊) + sseService.registerSseClient(jobId, job, res, req); + }); + + // ------------------------------------------------------------------------- + // GET /jobs/:jobId/download/:filename (對齊 server.js L490-524) + // ------------------------------------------------------------------------- + router.get('/jobs/:jobId/download/:filename', async (req, res) => { + const { jobId, filename } = req.params; + + const job = await jobService.getJob(jobId); + if (!job) { + return res.status(404).json({ error: 'JOB_NOT_FOUND' }); + } + + if (minio.client) { + // MinIO mode:取出後回傳 + const minioKey = `jobs/${jobId}/${filename}`; + try { + const result = await minio.getFromMinIO(minioKey); + if (!result) { + return res.status(404).json({ error: 'FILE_NOT_FOUND' }); + } + res.setHeader('Content-Disposition', `attachment; filename="${filename}"`); + res.setHeader('Content-Length', result.body.length); + res.send(result.body); + } catch (err) { + if (err.name === 'NoSuchKey') { + return res.status(404).json({ error: 'FILE_NOT_FOUND' }); + } + // eslint-disable-next-line no-console + console.error('[Scheduler] Download error:', err); + res.status(500).json({ error: 'Download failed' }); + } + } else { + // Local mode:從 filesystem 直接回傳 + const filePath = resolveLocalDownloadPath(jobId, filename); + if (!fs.existsSync(filePath)) { + return res.status(404).json({ error: 'FILE_NOT_FOUND' }); + } + res.download(filePath); + } + }); + + // ------------------------------------------------------------------------- + // GET /queues/stats (對齊 server.js L527-607) + // ------------------------------------------------------------------------- + router.get('/queues/stats', async (req, res) => { + try { + const queues = [ + STAGE_QUEUES.onnx, + STAGE_QUEUES.bie, + STAGE_QUEUES.nef, + DONE_QUEUE, + ]; + const groupNames = { + [STAGE_QUEUES.onnx]: 'onnx-workers', + [STAGE_QUEUES.bie]: 'bie-workers', + [STAGE_QUEUES.nef]: 'nef-workers', + [DONE_QUEUE]: 'scheduler', + }; + + const stats = {}; + + for (const queue of queues) { + const length = await redis.xlen(queue); + let consumers = []; + let pending = 0; + let lag = 0; + + const group = groupNames[queue]; + if (group) { + try { + const groups = await redis.xinfo('GROUPS', queue); + for (let i = 0; i < groups.length; i++) { + const g = groups[i]; + const info = {}; + for (let j = 0; j < g.length; j += 2) { + info[g[j]] = g[j + 1]; + } + if (info.name === group) { + pending = parseInt(info.pending || '0', 10); + lag = parseInt(info.lag || '0', 10); + + // 取得這個 group 內的 consumers + try { + const consumerList = await redis.xinfo('CONSUMERS', queue, group); + consumers = consumerList.map((c) => { + const ci = {}; + for (let j = 0; j < c.length; j += 2) { + ci[c[j]] = c[j + 1]; + } + return { + name: ci.name, + pending: parseInt(ci.pending || '0', 10), + idle: parseInt(ci.idle || '0', 10), + }; + }); + } catch { + /* no consumers yet */ + } + break; + } + } + } catch { + /* group may not exist yet */ + } + } + + stats[queue] = { length, pending, lag, consumers }; + } + + // Job 摘要 + const keys = await redis.keys('job:*'); + const jobSummary = { + total: keys.length, + ONNX: 0, + BIE: 0, + NEF: 0, + COMPLETED: 0, + FAILED: 0, + }; + for (const key of keys) { + const raw = await redis.get(key); + if (raw) { + const job = JSON.parse(raw); + if (jobSummary[job.status] !== undefined) { + jobSummary[job.status]++; + } + } + } + + res.json({ + timestamp: new Date().toISOString(), + queues: stats, + jobs: jobSummary, + }); + } catch (err) { + // eslint-disable-next-line no-console + console.error('[Scheduler] GET /queues/stats error:', err); + res.status(500).json({ error: err.message }); + } + }); + + return router; +} + +module.exports = { createLegacyRouter }; diff --git a/apps/task-scheduler/src/routes/v1/__tests__/createJob.integration.test.js b/apps/task-scheduler/src/routes/v1/__tests__/createJob.integration.test.js new file mode 100644 index 0000000..0536b15 --- /dev/null +++ b/apps/task-scheduler/src/routes/v1/__tests__/createJob.integration.test.js @@ -0,0 +1,1212 @@ +/** + * POST /api/v1/jobs 整合測試(T5)。 + * + * 測試範圍: + * - 401 invalid_token:缺 Authorization + * - 403 insufficient_scope:token 缺 converter:job.write + * - 400 validation_error:缺欄位 / 副檔名錯 + * - 413 file_too_large:multer LIMIT_FILE_SIZE + * - 500 misconfiguration:STORAGE_BACKEND !== 'minio' + * - 502 storage_unavailable:MinIO 寫失敗 + * - 409 user_has_active_job:同 user 已有 active job(M5 重點) + * - 201 happy path:完整流程,含 ref_images + * + * 啟動方式:用 createApp + 注入 mock deps(包含 verify 函數注入), + * app.listen(0),用 fetch / FormData 真打 HTTP。 + */ + +'use strict'; + +const express = require('express'); + +const { createApp } = require('../../../app'); +const { createSseService } = require('../../../services/sseService'); +const { createJobService } = require('../../../services/jobService'); +const { createUploader } = require('../../../middleware/upload'); +const { requireAuth } = require('../../../auth/middleware'); + +// Mock luaScripts to control claim / release outcome without real Redis Lua +jest.mock('../../../redis/luaScripts', () => ({ + claimActiveJob: jest.fn(), + releaseActiveJob: jest.fn(async () => ({ ok: true, released: true })), + _internals: { + loadScript: jest.fn(), + evalScript: jest.fn(), + resetCache: jest.fn(), + }, +})); +const { claimActiveJob, releaseActiveJob } = require('../../../redis/luaScripts'); + +const FAKE_CONFIG = Object.freeze({ + memberCenter: { + issuer: 'https://auth.test.local', + jwksUrl: 'https://auth.test.local/.well-known/jwks', + tokenUrl: '', + }, + converter: { + audience: 'kneron_converter_api', + clientId: '', + clientSecret: '', + tenantId: '', + scopeWrite: 'converter:job.write', + scopeRead: 'converter:job.read', + }, + fileAccessAgent: { baseUrl: '', audience: 'file_access_api' }, + jwks: { cacheMaxAgeMs: 60000, cooldownMs: 30000, clockToleranceSec: 60 }, +}); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * 建立 verify 函數:根據 token 字串決定回 claims / throw。 + */ +function makeVerifier({ tokens }) { + return async (token) => { + const entry = tokens[token]; + if (!entry) { + const err = new Error('invalid token'); + err.code = 'ERR_JWS_SIGNATURE_VERIFICATION_FAILED'; + throw err; + } + if (entry.expired) { + const err = new Error('expired'); + err.code = 'ERR_JWT_EXPIRED'; + throw err; + } + return { payload: entry.claims }; + }; +} + +function makeFakeRedis() { + const store = new Map(); + return { + store, + pingFails: false, + ping: jest.fn(async function () { + if (this.pingFails) throw new Error('ping failed'); + return 'PONG'; + }), + get: jest.fn(async (key) => (store.has(key) ? store.get(key) : null)), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + keys: jest.fn(async () => []), + xadd: jest.fn(async () => '1-0'), + xlen: jest.fn(async () => 0), + xinfo: jest.fn(async () => { + throw new Error('NOGROUP'); + }), + }; +} + +function makeFakeMinio({ uploadFails = false } = {}) { + const uploaded = []; + const deleted = []; + return { + client: { _fake: true }, + bucket: 'test-bucket', + endpoint: 'http://nope', + uploadToMinIO: jest.fn(async (key, body, contentType) => { + if (uploadFails) throw new Error('storage down'); + uploaded.push({ key, contentType, size: body.length }); + }), + getFromMinIO: jest.fn(async () => null), + deleteObject: jest.fn(async (key) => { + deleted.push(key); + }), + _uploaded: uploaded, + _deleted: deleted, + }; +} + +/** + * 建立有完整 deps 的 app(含 v1 POST 真實 handler)。 + * + * 為了能注入「假的 verify 函數」(避免去打真的 JWKS),我們在 app 啟動前把 + * `requireAuth` 預先 bind 到 verify mock,再透過 v1 router 的 deps.config 傳遞。 + * + * 但目前 jobs.js 的 buildCreateJobHandler 是直接 require requireAuth;要注入 + * verify 函數需要從 v1Deps 多帶一個 `verify` 給 requireAuth。最小改動:把 + * verify 注入到 requireAuth 的 deps 中。 + */ +async function startApp({ + storageBackend = 'minio', + uploadFails = false, + rateLimit = { windowMs: 60000, max: 1000 }, + tokens, + maxFileSize, // 給 413 測試用(覆寫 multer fileSize limit) +}) { + const redis = makeFakeRedis(); + const minio = makeFakeMinio({ uploadFails }); + const sseService = createSseService(); + const jobService = createJobService({ + redis, + sseService, + minio, + jobDataDir: '/tmp/x', + }); + const uploader = createUploader(maxFileSize ? { maxFileSize } : undefined); + + // 為了不讓 requireAuth 真的去打 JWKS,我們這裡 monkey-patch jobs.js 的 module + // 太重;改用更直接的方式:寫一個薄層 app 直接 mount jobs.js 的 router 但 + // **預先把 requireAuth 改造**為「使用我們的 verify mock」。 + // + // 實際採用:透過 jobs.js 的 createJobsRouter(deps) 注入 config + verify? + // 目前 createJobsRouter(deps) 內部 requireAuth(scope, { config }) 沒帶 verify。 + // 解法:在 createApp 之外,直接組裝 router,把 verify 注入。 + + // 為了簡化,我們直接在這裡組 app(不用 createApp 的整合 path) + const app = express(); + const helmet = require('helmet'); + const cors = require('cors'); + const compression = require('compression'); + const morgan = require('morgan'); + const rateLimitLib = require('express-rate-limit'); + const { requestIdMiddleware } = require('../../../middleware/requestId'); + const { errorHandler, ApiError } = require('../../../middleware/errorHandler'); + const { createPerClientRateLimiter } = require('../../../middleware/perClientRateLimit'); + const { v4: uuidv4 } = require('uuid'); + const { validateCreateJobRequest } = require('../validators/createJob'); + const { _internals: jobsInternals } = require('../jobs'); + + app.use(helmet()); + app.use(requestIdMiddleware); + app.use(compression()); + app.use(morgan('short')); + app.use(cors()); + app.use(express.json({ limit: '10mb' })); + app.use(express.urlencoded({ extended: true, limit: '10mb' })); + + // v1 router with verify injection + const v1 = express.Router(); + const verify = makeVerifier({ tokens }); + const requireWriteAuth = requireAuth(FAKE_CONFIG.converter.scopeWrite, { + config: FAKE_CONFIG, + verify, + }); + const perClientLimiter = createPerClientRateLimiter(rateLimit); + const handler = jobsInternals.buildCreateJobHandler({ + jobService, + storageBackend, + }); + v1.post( + '/jobs', + requireWriteAuth, + perClientLimiter, + uploader.fields([ + { name: 'model', maxCount: 1 }, + { name: 'ref_images', maxCount: 100 }, + ]), + jobsInternals.multerErrorAdapter, + handler + ); + + app.use('/api/v1', v1); + app.use('/api/v1', errorHandler); + + return new Promise((resolve) => { + const server = app.listen(0, '127.0.0.1', () => { + const { port } = server.address(); + resolve({ + server, + baseUrl: `http://127.0.0.1:${port}`, + redis, + minio, + jobService, + close: () => new Promise((r) => server.close(r)), + }); + }); + }); +} + +/** + * 用 Node.js 內建 FormData (Node 18+) 組 multipart body。 + * Node 內建的 FormData 與 fetch 整合可直接送 multipart/form-data。 + */ +function buildFormData({ modelBuffer, modelFilename = 'model.onnx', refImages = [], fields = {} }) { + const fd = new FormData(); + if (modelBuffer) { + fd.set('model', new Blob([modelBuffer], { type: 'application/octet-stream' }), modelFilename); + } + for (const ri of refImages) { + fd.append( + 'ref_images', + new Blob([ri.buffer], { type: ri.type || 'image/jpeg' }), + ri.filename || 'image.jpg' + ); + } + for (const [k, v] of Object.entries(fields)) { + if (v !== undefined) fd.set(k, v); + } + return fd; +} + +const HAPPY_TOKENS = { + 'good-write-token': { + claims: { + sub: 'kneron_converter_client', + client_id: 'visionA-backend-client', + scope: 'converter:job.write converter:job.read', + }, + }, + 'read-only-token': { + claims: { + sub: 'reader', + client_id: 'visionA-backend-client', + scope: 'converter:job.read', // 缺 write + }, + }, + 'expired-token': { + expired: true, + claims: {}, + }, + // 用於 409 衝突情境的另一個 client(避免被前一個測試的 quota 累計影響) + 'good-write-token-alt': { + claims: { + sub: 'kneron_converter_client', + client_id: 'visionA-backend-client-alt', + scope: 'converter:job.write', + }, + }, +}; + +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +beforeEach(() => { + claimActiveJob.mockReset(); + releaseActiveJob.mockReset(); + // 預設 release 回成功(測試需要時 mockImplementationOnce 覆寫) + releaseActiveJob.mockResolvedValue({ ok: true, released: true }); +}); + +const happyFields = () => ({ + user_id: 'visionA-user-12345', + model_id: '1001', + version: '0001', + platform: '520', + enable_evaluate: 'false', + enable_sim_fp: 'false', + enable_sim_fixed: 'false', + enable_sim_hw: 'false', +}); + +// --------------------------------------------------------------------------- +// Auth tests +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs — auth', () => { + let ctx; + beforeEach(async () => { + ctx = await startApp({ tokens: HAPPY_TOKENS }); + }); + afterEach(async () => { + await ctx.close(); + }); + + it('returns 401 invalid_token when Authorization header missing', async () => { + const fd = buildFormData({ modelBuffer: Buffer.from('m'), fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + body: fd, + }); + expect(res.status).toBe(401); + const body = await res.json(); + expect(body.error.code).toBe('invalid_token'); + expect(typeof body.error.request_id).toBe('string'); + }); + + it('returns 401 invalid_token when Bearer token unknown', async () => { + const fd = buildFormData({ modelBuffer: Buffer.from('m'), fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer unknown-token' }, + body: fd, + }); + expect(res.status).toBe(401); + const body = await res.json(); + expect(body.error.code).toBe('invalid_token'); + }); + + it('returns 401 token_expired with expired token', async () => { + const fd = buildFormData({ modelBuffer: Buffer.from('m'), fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer expired-token' }, + body: fd, + }); + expect(res.status).toBe(401); + const body = await res.json(); + expect(body.error.code).toBe('token_expired'); + }); + + it('returns 403 insufficient_scope with read-only token', async () => { + const fd = buildFormData({ modelBuffer: Buffer.from('m'), fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer read-only-token' }, + body: fd, + }); + expect(res.status).toBe(403); + const body = await res.json(); + expect(body.error.code).toBe('insufficient_scope'); + expect(body.error.details).toMatchObject({ + required_scope: 'converter:job.write', + }); + expect(body.error.details.provided_scopes).toEqual(['converter:job.read']); + }); + + it('sets Connection: close on 401 (M2)', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { + Authorization: 'Bearer unknown-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({}), + }); + // fetch 在 HTTP/1.1 對 'Connection: close' 不一定會 surface header; + // 改驗 status + 連線後續行為(fetch 本身會處理);至少 status 是 401 + expect(res.status).toBe(401); + }); +}); + +// --------------------------------------------------------------------------- +// Validation tests +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs — validation', () => { + let ctx; + beforeEach(async () => { + ctx = await startApp({ tokens: HAPPY_TOKENS }); + }); + afterEach(async () => { + await ctx.close(); + }); + + it('returns 400 validation_error when user_id missing', async () => { + const fd = buildFormData({ + modelBuffer: Buffer.from('m'), + fields: { ...happyFields(), user_id: undefined }, + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.code).toBe('validation_error'); + expect(body.error.details.fields.map((f) => f.field)).toContain('user_id'); + }); + + it('returns 400 when model file missing', async () => { + const fd = buildFormData({ fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.code).toBe('validation_error'); + expect(body.error.details.fields.map((f) => f.field)).toContain('model'); + }); + + it('returns 400 when model extension is unsupported', async () => { + const fd = buildFormData({ + modelBuffer: Buffer.from('m'), + modelFilename: 'evil.exe', + fields: happyFields(), + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.code).toBe('validation_error'); + expect( + body.error.details.fields.find((f) => f.field === 'model').message + ).toMatch(/不支援/); + }); + + it('returns 400 when platform invalid', async () => { + const fd = buildFormData({ + modelBuffer: Buffer.from('m'), + fields: { ...happyFields(), platform: 'X9999' }, + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.details.fields.map((f) => f.field)).toContain('platform'); + }); + + it('returns 400 when user_id contains slash', async () => { + const fd = buildFormData({ + modelBuffer: Buffer.from('m'), + fields: { ...happyFields(), user_id: 'evil/user' }, + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.details.fields.map((f) => f.field)).toContain('user_id'); + }); +}); + +// --------------------------------------------------------------------------- +// Misconfiguration test +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs — misconfiguration', () => { + it('returns 500 misconfiguration when STORAGE_BACKEND !== minio', async () => { + const ctx = await startApp({ storageBackend: 'local', tokens: HAPPY_TOKENS }); + try { + claimActiveJob.mockResolvedValueOnce({ ok: true }); // shouldn't be reached + const fd = buildFormData({ modelBuffer: Buffer.from('m'), fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(500); + const body = await res.json(); + expect(body.error.code).toBe('misconfiguration'); + // claim 不應該被呼叫到 + expect(claimActiveJob).not.toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); +}); + +// --------------------------------------------------------------------------- +// Storage failure +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs — storage failure (M5 方案 A)', () => { + it('returns 502 storage_unavailable and Redis stays clean', async () => { + const ctx = await startApp({ + uploadFails: true, + tokens: HAPPY_TOKENS, + }); + try { + const fd = buildFormData({ modelBuffer: Buffer.from('m'), fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(502); + const body = await res.json(); + expect(body.error.code).toBe('storage_unavailable'); + // M5 方案 A 的核心驗證:Redis 完全乾淨(沒呼叫 claimActiveJob) + expect(claimActiveJob).not.toHaveBeenCalled(); + expect(ctx.redis.set).not.toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); +}); + +// --------------------------------------------------------------------------- +// Conflict test +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs — 409 user_has_active_job', () => { + // Sec M4:active_job 已存在時,pre-check 在 MinIO 寫入前就 reject,避免寫入放大 + it('returns 409 via pre-check (Sec M4) — no MinIO write when active_job exists', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + // 預先放一個 active job record(M4 pre-check 會先 GET 到) + ctx.redis.store.set('user:visionA-user-12345:active_job', 'existing-job-id'); + ctx.redis.store.set( + 'job:existing-job-id', + JSON.stringify({ + job_id: 'existing-job-id', + status: 'BIE', + stage: 'bie', + progress: 45, + created_at: '2026-04-25T11:00:00Z', + }) + ); + + const fd = buildFormData({ modelBuffer: Buffer.from('mmmm'), fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + + expect(res.status).toBe(409); + const body = await res.json(); + expect(body.error.code).toBe('user_has_active_job'); + expect(body.error.details.active_job_id).toBe('existing-job-id'); + expect(body.error.details.active_job_status).toBe('BIE'); + expect(body.error.details.active_job_stage).toBe('bie'); + expect(body.error.details.active_job_progress).toBe(45); + + // Sec M4:MinIO 不應該被呼叫(pre-check 已 short-circuit) + expect(ctx.minio.uploadToMinIO).not.toHaveBeenCalled(); + // claim Lua 也不應該被呼叫(pre-check 在 claim 之前) + expect(claimActiveJob).not.toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); + + // Race scenario:pre-check 通過(active_job 不存在)但 Lua claim 回 conflict + // (兩個 client 同時通過 pre-check,最後只有一個能透過 Lua claim) + it('returns 409 via Lua conflict (race) — MinIO uploaded then cleanup called', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + // ★ pre-check 不會觸發(active_job 不在 Redis) + // 但 Lua claim 模擬 race 後的 conflict + claimActiveJob.mockResolvedValueOnce({ + ok: false, + conflict: true, + activeJobId: 'existing-job-id', + }); + // Lua 完成後,handler 會用 claimResult.activeJobId 直接讀 job:{id} + // (Reviewer Major-1 修復:不再走 user:{}:active_job → job 兩次 GET) + ctx.redis.store.set( + 'job:existing-job-id', + JSON.stringify({ + job_id: 'existing-job-id', + status: 'BIE', + stage: 'bie', + progress: 45, + created_at: '2026-04-25T11:00:00Z', + }) + ); + + const fd = buildFormData({ modelBuffer: Buffer.from('mmmm'), fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + + expect(res.status).toBe(409); + const body = await res.json(); + expect(body.error.code).toBe('user_has_active_job'); + expect(body.error.details.active_job_id).toBe('existing-job-id'); + expect(body.error.details.active_job_status).toBe('BIE'); + expect(body.error.details.active_job_stage).toBe('bie'); + expect(body.error.details.active_job_progress).toBe(45); + + // M5 方案 A:MinIO 已寫入(pre-check 通過後寫 MinIO,再走 Lua claim) + expect(ctx.minio.uploadToMinIO).toHaveBeenCalledTimes(1); + // cleanup 已被呼叫 + await new Promise((r) => setImmediate(r)); + expect(ctx.minio.deleteObject).toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); + + // Reviewer Major-1:當 Lua 衝突 + getJob(claimResult.activeJobId) 取不到 record + // (race:另一 worker 同步刪掉了 active job record),fallback 只回 active_job_id + it('falls back to {active_job_id} only when active job record disappeared (Reviewer Major-1)', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + claimActiveJob.mockResolvedValueOnce({ + ok: false, + conflict: true, + activeJobId: 'orphan-job-id', + }); + // 不放 job:{orphan-job-id} record(模擬已被別人刪掉的 race) + + const fd = buildFormData({ modelBuffer: Buffer.from('m'), fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(409); + const body = await res.json(); + expect(body.error.code).toBe('user_has_active_job'); + // fallback 只有 active_job_id,沒有 status/stage/progress/created_at + expect(body.error.details).toEqual({ active_job_id: 'orphan-job-id' }); + } finally { + await ctx.close(); + } + }); +}); + +// --------------------------------------------------------------------------- +// Happy path +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs — 201 happy path', () => { + it('creates job successfully with model + ref_images', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + claimActiveJob.mockResolvedValueOnce({ ok: true }); + + const fd = buildFormData({ + modelBuffer: Buffer.from('model-content'), + fields: happyFields(), + refImages: [ + { buffer: Buffer.from('img-1'), filename: 'a.jpg', type: 'image/jpeg' }, + { buffer: Buffer.from('img-2'), filename: 'b.png', type: 'image/png' }, + ], + }); + + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + + expect(res.status).toBe(201); + const body = await res.json(); + expect(body).toMatchObject({ + status: 'created', + stage: 'onnx', + progress: 0, + user_id: 'visionA-user-12345', + }); + expect(typeof body.job_id).toBe('string'); + expect(body.job_id).toMatch( + /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i + ); + expect(typeof body.created_at).toBe('string'); + expect(typeof body.expires_at).toBe('string'); + + // MinIO:model + 2 個 ref_images + expect(ctx.minio.uploadToMinIO).toHaveBeenCalledTimes(3); + const keys = ctx.minio._uploaded.map((u) => u.key); + expect(keys).toEqual( + expect.arrayContaining([ + expect.stringMatching(/^jobs\/[^/]+\/input\/model\.onnx$/), + expect.stringMatching(/^jobs\/[^/]+\/ref_images\/0_a\.jpg$/), + expect.stringMatching(/^jobs\/[^/]+\/ref_images\/1_b\.png$/), + ]) + ); + + // Lua claim 一次(含完整 jobRecord JSON) + expect(claimActiveJob).toHaveBeenCalledTimes(1); + const claimArgs = claimActiveJob.mock.calls[0][1]; + expect(claimArgs.userId).toBe('visionA-user-12345'); + expect(claimArgs.jobId).toBe(body.job_id); + const stored = JSON.parse(claimArgs.jobJson); + expect(stored.origin).toBe('api'); + expect(stored.user_id).toBe('visionA-user-12345'); + expect(stored.created_by_client_id).toBe('visionA-backend-client'); + expect(stored.input.filename).toBe('model.onnx'); + expect(stored.input.ref_images_count).toBe(2); + expect(stored.input.size_bytes).toBe(Buffer.from('model-content').length); + expect(stored.parameters).toEqual({ + model_id: 1001, + version: '0001', + platform: '520', + enable_evaluate: false, + enable_sim_fp: false, + enable_sim_fixed: false, + enable_sim_hw: false, + }); + + // enqueue 也已呼叫(onnx queue) + expect(ctx.redis.xadd).toHaveBeenCalledTimes(1); + const xaddCall = ctx.redis.xadd.mock.calls[0]; + expect(xaddCall[0]).toBe('queue:onnx'); + + // 沒有 cleanup + expect(ctx.minio.deleteObject).not.toHaveBeenCalled(); + + // request_id 在 response header + const reqId = res.headers.get('x-request-id'); + expect(reqId).toBeTruthy(); + } finally { + await ctx.close(); + } + }); + + it('handles 0 ref_images correctly', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + claimActiveJob.mockResolvedValueOnce({ ok: true }); + const fd = buildFormData({ + modelBuffer: Buffer.from('m'), + fields: happyFields(), + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(201); + // 只 upload model + expect(ctx.minio.uploadToMinIO).toHaveBeenCalledTimes(1); + } finally { + await ctx.close(); + } + }); + + it('returns 413 file_too_large when model exceeds limit', async () => { + // 使用較小 limit(1KB)避免測試把 500MB buffer 配置進記憶體 + const ctx = await startApp({ tokens: HAPPY_TOKENS, maxFileSize: 1024 }); + try { + claimActiveJob.mockResolvedValueOnce({ ok: true }); + const oversized = Buffer.alloc(2048, 0x41); // 2KB > 1KB limit + const fd = buildFormData({ + modelBuffer: oversized, + fields: happyFields(), + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(413); + const body = await res.json(); + expect(body.error.code).toBe('file_too_large'); + expect(body.error.details).toMatchObject({ + field: 'model', + limit_bytes: expect.any(Number), + }); + // claim 不應該被呼叫(multer 在 size limit 時就 throw) + expect(claimActiveJob).not.toHaveBeenCalled(); + // 沒有寫 MinIO 成功(multer throw 前就被打斷) + expect(ctx.minio.uploadToMinIO).not.toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); + + // Sec C2:ref_image 超過 10MB → 413 file_too_large(per-file 限制) + it('returns 413 file_too_large when ref_image exceeds 10MB (Sec C2)', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + claimActiveJob.mockResolvedValueOnce({ ok: true }); + const oversizedRefImage = Buffer.alloc(10 * 1024 * 1024 + 1024, 0x42); // 10MB + 1KB + const fd = buildFormData({ + modelBuffer: Buffer.from('m'), + fields: happyFields(), + refImages: [ + { buffer: oversizedRefImage, filename: 'big.jpg', type: 'image/jpeg' }, + ], + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(413); + const body = await res.json(); + expect(body.error.code).toBe('file_too_large'); + expect(body.error.details).toMatchObject({ + field: 'ref_images[0]', + size_bytes: expect.any(Number), + limit_bytes: 10 * 1024 * 1024, + }); + expect(body.error.details.size_bytes).toBeGreaterThan(10 * 1024 * 1024); + // 不應該走到 MinIO 寫入(validator 在 MinIO 之前先擋) + expect(ctx.minio.uploadToMinIO).not.toHaveBeenCalled(); + expect(claimActiveJob).not.toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }, 15000); + + // Sec M3:version XSS → 400 validation_error + it('returns 400 validation_error when version contains XSS (Sec M3)', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + const fd = buildFormData({ + modelBuffer: Buffer.from('m'), + fields: { ...happyFields(), version: '' }, + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.code).toBe('validation_error'); + expect(body.error.details.fields.map((f) => f.field)).toContain('version'); + } finally { + await ctx.close(); + } + }); + + // Sec M1:user_id XSS → 400 validation_error + it('returns 400 validation_error when user_id contains XSS (Sec M1)', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + const fd = buildFormData({ + modelBuffer: Buffer.from('m'), + fields: { ...happyFields(), user_id: '' }, + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.code).toBe('validation_error'); + expect(body.error.details.fields.map((f) => f.field)).toContain('user_id'); + } finally { + await ctx.close(); + } + }); + + // Sec M1:user_id wildcard → 400 + it('returns 400 validation_error when user_id contains wildcards (Sec M1)', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + const fd = buildFormData({ + modelBuffer: Buffer.from('m'), + fields: { ...happyFields(), user_id: 'user*' }, + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.code).toBe('validation_error'); + expect(body.error.details.fields.map((f) => f.field)).toContain('user_id'); + } finally { + await ctx.close(); + } + }); +}); + +// --------------------------------------------------------------------------- +// Sec M2 + Reviewer Major-2: enqueue 失敗時補償釋放 active_job +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs — enqueue failure rollback (Sec M2 + Reviewer Major-2)', () => { + it('releases active_job when enqueue throws (best-effort)', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + // claim 成功 + claimActiveJob.mockResolvedValueOnce({ ok: true }); + // releaseActiveJob mock 已在檔頭設好,會回 { ok: true, released: true } + + // 但 enqueue (redis.xadd) 拋例外 — 模擬 Stream MAXLEN trim 異常 + ctx.redis.xadd.mockImplementationOnce(async () => { + throw new Error('XADD failed: stream too full'); + }); + + const fd = buildFormData({ modelBuffer: Buffer.from('m'), fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + + expect(res.status).toBe(500); + const body = await res.json(); + expect(body.error.code).toBe('internal_error'); + + // MinIO 應該已上傳(先寫 MinIO 後 Lua) + expect(ctx.minio.uploadToMinIO).toHaveBeenCalledTimes(1); + // claim 已呼叫 + expect(claimActiveJob).toHaveBeenCalledTimes(1); + // xadd 被呼叫(嘗試 enqueue) + expect(ctx.redis.xadd).toHaveBeenCalledTimes(1); + + // 等 fire-and-forget compensation 執行完 + await new Promise((r) => setImmediate(r)); + + // ★ 核心驗證:releaseActiveJob 已被呼叫(補償釋放 active_job) + expect(releaseActiveJob).toHaveBeenCalledTimes(1); + const releaseArgs = releaseActiveJob.mock.calls[0][1]; + expect(releaseArgs.userId).toBe('visionA-user-12345'); + expect(typeof releaseArgs.jobId).toBe('string'); + + // cleanup MinIO 應該被呼叫(補償流程的一部分) + expect(ctx.minio.deleteObject).toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); + + it('still returns 500 when releaseActiveJob also throws (fire-and-forget; no double error)', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + claimActiveJob.mockResolvedValueOnce({ ok: true }); + releaseActiveJob.mockImplementationOnce(async () => { + throw new Error('release also failed'); + }); + ctx.redis.xadd.mockImplementationOnce(async () => { + throw new Error('XADD failed'); + }); + + const fd = buildFormData({ modelBuffer: Buffer.from('m'), fields: happyFields() }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + + // 仍是 500 internal_error(release 失敗不該影響 response) + expect(res.status).toBe(500); + const body = await res.json(); + expect(body.error.code).toBe('internal_error'); + + await new Promise((r) => setImmediate(r)); + // release 已嘗試(雖然失敗) + expect(releaseActiveJob).toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); +}); + +// --------------------------------------------------------------------------- +// Sec M5:mount-time STORAGE_BACKEND 檢查(不掛 multer 避免 misconfig 也吃 body) +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs — mount-time STORAGE_BACKEND check (Sec M5)', () => { + it('createJobsRouter mount-time check returns 500 misconfiguration when STORAGE_BACKEND=local', async () => { + // 不走 startApp(旁路),直接測 createJobsRouter mount-time 行為 + const { createJobsRouter } = require('../jobs'); + const { errorHandler } = require('../../../middleware/errorHandler'); + const { requestIdMiddleware } = require('../../../middleware/requestId'); + const helmet = require('helmet'); + + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService, minio, jobDataDir: '/tmp/x' }); + const uploader = createUploader(); + + const app = express(); + app.use(helmet()); + app.use(requestIdMiddleware); + app.use(express.json()); + + const router = createJobsRouter({ + jobService, + uploader, + config: FAKE_CONFIG, + rateLimit: { windowMs: 60000, max: 1000 }, + storageBackend: 'local', // ★ 故意設 local 觸發 mount-time misconfig + }); + app.use('/api/v1/jobs', router); + app.use('/api/v1/jobs', errorHandler); + + const server = await new Promise((resolve) => { + const s = app.listen(0, '127.0.0.1', () => resolve(s)); + }); + const { port } = server.address(); + try { + // 用 application/json 也能驗證 — multer 沒掛,body 不會被消化成 multipart + const res = await fetch(`http://127.0.0.1:${port}/api/v1/jobs`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ user_id: 'a', model_id: '1' }), + }); + expect(res.status).toBe(500); + const body = await res.json(); + expect(body.error.code).toBe('misconfiguration'); + expect(body.error.message).toContain('STORAGE_BACKEND=minio'); + + // 關鍵驗證:multer 沒掛 → uploadToMinIO 不會被呼叫(即便有 file 也不會被 parse) + expect(minio.uploadToMinIO).not.toHaveBeenCalled(); + } finally { + await new Promise((r) => server.close(r)); + } + }); + + it('createJobsRouter still mounts GET / DELETE / download-tokens when STORAGE_BACKEND=local', async () => { + const { createJobsRouter } = require('../jobs'); + const { errorHandler } = require('../../../middleware/errorHandler'); + const { requestIdMiddleware } = require('../../../middleware/requestId'); + const helmet = require('helmet'); + + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService, minio, jobDataDir: '/tmp/x' }); + const uploader = createUploader(); + + const app = express(); + app.use(helmet()); + app.use(requestIdMiddleware); + app.use(express.json()); + + const router = createJobsRouter({ + jobService, + uploader, + config: FAKE_CONFIG, + rateLimit: { windowMs: 60000, max: 1000 }, + storageBackend: 'local', // misconfig + }); + app.use('/api/v1/jobs', router); + app.use('/api/v1/jobs', errorHandler); + + const server = await new Promise((resolve) => { + const s = app.listen(0, '127.0.0.1', () => resolve(s)); + }); + const { port } = server.address(); + try { + // T6 起 GET 已實作 — 沒帶 token 應回 401(代表 GET 已掛 requireAuth) + // 這比 501 更精確證明 GET 路由被 mount + 認證 middleware 已套用 + const getRes = await fetch(`http://127.0.0.1:${port}/api/v1/jobs`); + expect(getRes.status).toBe(401); + const getBody = await getRes.json(); + expect(getBody.error.code).toBe('invalid_token'); + + // DELETE 仍應 work(Phase 2 預留 → 501) + const delRes = await fetch(`http://127.0.0.1:${port}/api/v1/jobs/abc`, { + method: 'DELETE', + }); + expect(delRes.status).toBe(501); + } finally { + await new Promise((r) => server.close(r)); + } + }); +}); + +// --------------------------------------------------------------------------- +// Filename sanitization (path traversal 端到端驗證) +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs — filename sanitization (end-to-end)', () => { + it('sanitizes malicious model filename to safe object key', async () => { + const ctx = await startApp({ tokens: HAPPY_TOKENS }); + try { + claimActiveJob.mockResolvedValueOnce({ ok: true }); + const fd = buildFormData({ + modelBuffer: Buffer.from('m'), + // 攻擊:path traversal + modelFilename: '../../../etc/passwd.onnx', + fields: happyFields(), + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + method: 'POST', + headers: { Authorization: 'Bearer good-write-token' }, + body: fd, + }); + expect(res.status).toBe(201); + const body = await res.json(); + // object key 不能含 `..` 或絕對路徑 + const uploadedKey = ctx.minio._uploaded[0].key; + expect(uploadedKey).not.toContain('..'); + expect(uploadedKey).not.toMatch(/\/etc\//); + expect(uploadedKey).toMatch( + new RegExp(`^jobs/${body.job_id}/input/passwd\\.onnx$`) + ); + } finally { + await ctx.close(); + } + }); +}); + +// --------------------------------------------------------------------------- +// createApp wiring smoke test +// 驗證透過 src/app.js 的 createApp 注入路徑(非 startApp 的旁路) +// 也能正確掛載 POST /api/v1/jobs handler。 +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs — createApp wiring smoke test', () => { + it('createApp(deps, opts.config) wires v1 POST handler with auth', async () => { + // 此測試使用真正的 createApp 路徑,驗證 app.js 把 v1Deps 透傳給 v1 router OK。 + // createApp 走的是真實 requireAuth(不帶 verify),所以只驗到「無 token → 401」 + // 即可——這就足以證明 wiring 正確(要 wire 錯就會 404 或 501)。 + claimActiveJob.mockResolvedValue({ ok: true }); + + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ + redis, + sseService, + minio, + jobDataDir: '/tmp/x', + }); + const uploader = createUploader(); + + const app = createApp( + { redis, jobService, sseService, minio, uploader }, + { + frontendUrl: 'http://localhost:3000', + config: FAKE_CONFIG, + rateLimit: { windowMs: 60000, max: 100 }, + storageBackend: 'minio', + } + ); + + const server = await new Promise((resolve) => { + const s = app.listen(0, '127.0.0.1', () => resolve(s)); + }); + const { port } = server.address(); + try { + // 沒帶 token → 應走 requireAuth → 401 + const res = await fetch(`http://127.0.0.1:${port}/api/v1/jobs`, { + method: 'POST', + body: new FormData(), + }); + // 必須是 401(不是 501 - 證明 handler 已注入) + expect(res.status).toBe(401); + const body = await res.json(); + expect(body.error.code).toBe('invalid_token'); + expect(typeof body.error.request_id).toBe('string'); + } finally { + await new Promise((r) => server.close(r)); + } + }); + + it('createApp without opts.config falls back to 501 (not_implemented)', async () => { + // 沒有 opts.config 時,jobs router 會 fallback 到 501(避免 Crash) + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ + redis, + sseService, + minio, + jobDataDir: '/tmp/x', + }); + const uploader = createUploader(); + + const app = createApp( + { redis, jobService, sseService, minio, uploader }, + { frontendUrl: 'http://localhost:3000' /* 故意不傳 config */ } + ); + + const server = await new Promise((resolve) => { + const s = app.listen(0, '127.0.0.1', () => resolve(s)); + }); + const { port } = server.address(); + try { + const res = await fetch(`http://127.0.0.1:${port}/api/v1/jobs`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + expect(res.status).toBe(501); + const body = await res.json(); + expect(body.error.code).toBe('not_implemented'); + expect(body.error.message).toContain('T5'); + } finally { + await new Promise((r) => server.close(r)); + } + }); +}); diff --git a/apps/task-scheduler/src/routes/v1/__tests__/createJob.validator.test.js b/apps/task-scheduler/src/routes/v1/__tests__/createJob.validator.test.js new file mode 100644 index 0000000..07c23fe --- /dev/null +++ b/apps/task-scheduler/src/routes/v1/__tests__/createJob.validator.test.js @@ -0,0 +1,440 @@ +/** + * createJob validator 單元測試(T5)。 + * + * 重點: + * 1. 必填欄位缺漏全部回 400 + details.fields + * 2. 副檔名只允許 .onnx / .tflite(PRD F-01) + * 3. user_id 不允許 / \ : .. control chars + * 4. model_id 必須 1 ≤ x ≤ 65535 + * 5. platform 必須在 enum + * 6. enable_* 缺漏視為 false + * 7. metadata JSON parse + 必須是物件(非 array / 非 null) + */ + +'use strict'; + +const { + validateCreateJobRequest, + ALLOWED_MODEL_EXTENSIONS, + ALLOWED_PLATFORMS, +} = require('../validators/createJob'); + +function makeFile(originalname, sizeBytes = 100, mimetype = 'application/octet-stream') { + return { + originalname, + buffer: Buffer.alloc(sizeBytes, 0x7f), + mimetype, + size: sizeBytes, + }; +} + +function happyBody(overrides = {}) { + return { + user_id: 'visionA-user-12345', + model_id: '1001', + version: '0001', + platform: '520', + enable_evaluate: 'false', + enable_sim_fp: 'false', + enable_sim_fixed: 'false', + enable_sim_hw: 'false', + ...overrides, + }; +} + +describe('validateCreateJobRequest — happy path', () => { + it('accepts a valid payload with model.onnx + 0 ref images', () => { + const result = validateCreateJobRequest({ + body: happyBody(), + files: { model: [makeFile('model.onnx')] }, + }); + expect(result.ok).toBe(true); + expect(result.errors).toEqual([]); + expect(result.data.userId).toBe('visionA-user-12345'); + expect(result.data.parameters.model_id).toBe(1001); + expect(result.data.parameters.platform).toBe('520'); + expect(result.data.input.safeFilename).toBe('model.onnx'); + expect(result.data.input.extension).toBe('.onnx'); + expect(result.data.refImages).toHaveLength(0); + }); + + it('accepts model.tflite', () => { + const result = validateCreateJobRequest({ + body: happyBody(), + files: { model: [makeFile('weights.tflite')] }, + }); + expect(result.ok).toBe(true); + expect(result.data.input.extension).toBe('.tflite'); + }); + + it('accepts ref_images[] with sanitization', () => { + const result = validateCreateJobRequest({ + body: happyBody(), + files: { + model: [makeFile('model.onnx')], + ref_images: [ + makeFile('img with space.jpg', 50, 'image/jpeg'), + makeFile('../../traversal.png', 50, 'image/png'), + ], + }, + }); + expect(result.ok).toBe(true); + expect(result.data.refImages).toHaveLength(2); + expect(result.data.refImages[0].safeFilename).toBe('img_with_space.jpg'); + expect(result.data.refImages[1].safeFilename).toBe('traversal.png'); + }); + + it('parses enable_* booleans correctly', () => { + const result = validateCreateJobRequest({ + body: happyBody({ + enable_evaluate: 'true', + enable_sim_fp: 'true', + enable_sim_fixed: 'false', + enable_sim_hw: undefined, // 缺漏 → false + }), + files: { model: [makeFile('m.onnx')] }, + }); + expect(result.ok).toBe(true); + expect(result.data.parameters.enable_evaluate).toBe(true); + expect(result.data.parameters.enable_sim_fp).toBe(true); + expect(result.data.parameters.enable_sim_fixed).toBe(false); + expect(result.data.parameters.enable_sim_hw).toBe(false); + }); + + it('parses metadata JSON object', () => { + const result = validateCreateJobRequest({ + body: happyBody({ metadata: '{"source":"visionA"}' }), + files: { model: [makeFile('m.onnx')] }, + }); + expect(result.ok).toBe(true); + expect(result.data.metadata).toEqual({ source: 'visionA' }); + }); + + it('handles ref_images[] alternate key (with brackets) gracefully', () => { + const result = validateCreateJobRequest({ + body: happyBody(), + files: { + model: [makeFile('m.onnx')], + 'ref_images[]': [makeFile('img.jpg', 10, 'image/jpeg')], + }, + }); + expect(result.ok).toBe(true); + expect(result.data.refImages).toHaveLength(1); + }); +}); + +describe('validateCreateJobRequest — failures', () => { + function expectErrorOnField(result, field) { + expect(result.ok).toBe(false); + expect(result.errors.map((e) => e.field)).toContain(field); + } + + it('fails when user_id missing', () => { + const result = validateCreateJobRequest({ + body: happyBody({ user_id: undefined }), + files: { model: [makeFile('m.onnx')] }, + }); + expectErrorOnField(result, 'user_id'); + }); + + it('fails when user_id contains slash / backslash / colon / ..', () => { + for (const bad of ['user/id', 'user\\id', 'user:id', 'user..id']) { + const result = validateCreateJobRequest({ + body: happyBody({ user_id: bad }), + files: { model: [makeFile('m.onnx')] }, + }); + expectErrorOnField(result, 'user_id'); + } + }); + + it('fails when model_id is not numeric / out of range', () => { + for (const bad of ['', 'abc', '0', '65536', '-5']) { + const result = validateCreateJobRequest({ + body: happyBody({ model_id: bad }), + files: { model: [makeFile('m.onnx')] }, + }); + expectErrorOnField(result, 'model_id'); + } + }); + + it('fails when platform not in enum', () => { + const result = validateCreateJobRequest({ + body: happyBody({ platform: '999' }), + files: { model: [makeFile('m.onnx')] }, + }); + expectErrorOnField(result, 'platform'); + }); + + it('fails when version is empty / oversize / contains control chars', () => { + for (const bad of ['', 'a'.repeat(33), 'v1\nbad']) { + const result = validateCreateJobRequest({ + body: happyBody({ version: bad }), + files: { model: [makeFile('m.onnx')] }, + }); + expectErrorOnField(result, 'version'); + } + }); + + // Sec M3:version 嚴格白名單,拒絕 XSS / 特殊字元 + it('fails when version contains XSS / shell metachars (Sec M3)', () => { + const xssPayloads = [ + '', + '', + 'v1; rm -rf', + 'v1$(id)', + 'v1`whoami`', + 'v1|cat', + 'v1&whoami', + 'v1 with space', + 'v1?', + 'v1*', + 'v1/path', + 'v1\\back', + 'v1:colon', + 'v1@email', + 'v1#hash', + 'v1%encoded', + ]; + for (const bad of xssPayloads) { + const result = validateCreateJobRequest({ + body: happyBody({ version: bad }), + files: { model: [makeFile('m.onnx')] }, + }); + expectErrorOnField(result, 'version'); + } + }); + + it('accepts version with whitelist chars (alnum / . / _ / -)', () => { + const goodVersions = [ + 'v1.0.0', + '2026-04-25', + 'build_42', + 'beta.1', + 'v1.0.0-alpha.1', + '1234567890', + 'a', + ]; + for (const good of goodVersions) { + const result = validateCreateJobRequest({ + body: happyBody({ version: good }), + files: { model: [makeFile('m.onnx')] }, + }); + expect(result.ok).toBe(true); + expect(result.data.parameters.version).toBe(good); + } + }); + + // Sec C2:ref_image per-file size 超過 10MB → tooLarge 信號 + it('returns tooLarge signal when ref_image exceeds 10MB (Sec C2)', () => { + const oversizedBuffer = Buffer.alloc(10 * 1024 * 1024 + 1, 0x42); // 10MB + 1 byte + const result = validateCreateJobRequest({ + body: happyBody(), + files: { + model: [makeFile('m.onnx')], + ref_images: [ + { + originalname: 'big.jpg', + buffer: oversizedBuffer, + mimetype: 'image/jpeg', + size: oversizedBuffer.length, + }, + ], + }, + }); + expect(result.ok).toBe(false); + expect(result.tooLarge).toBeDefined(); + expect(result.tooLarge.field).toBe('ref_images[0]'); + expect(result.tooLarge.size_bytes).toBe(oversizedBuffer.length); + expect(result.tooLarge.limit_bytes).toBe(10 * 1024 * 1024); + }); + + it('reports first oversized ref_image among many (Sec C2)', () => { + const small = Buffer.from('small'); + const big = Buffer.alloc(10 * 1024 * 1024 + 100, 0x42); + const result = validateCreateJobRequest({ + body: happyBody(), + files: { + model: [makeFile('m.onnx')], + ref_images: [ + { originalname: 'a.jpg', buffer: small, mimetype: 'image/jpeg' }, + { originalname: 'b.jpg', buffer: big, mimetype: 'image/jpeg' }, + { originalname: 'c.jpg', buffer: big, mimetype: 'image/jpeg' }, + ], + }, + }); + expect(result.ok).toBe(false); + expect(result.tooLarge).toBeDefined(); + expect(result.tooLarge.field).toBe('ref_images[1]'); + }); + + it('accepts ref_image at exactly 10MB (Sec C2 boundary)', () => { + const exactBuffer = Buffer.alloc(10 * 1024 * 1024, 0x42); // exactly 10MB + const result = validateCreateJobRequest({ + body: happyBody(), + files: { + model: [makeFile('m.onnx')], + ref_images: [ + { + originalname: 'ok.jpg', + buffer: exactBuffer, + mimetype: 'image/jpeg', + size: exactBuffer.length, + }, + ], + }, + }); + expect(result.ok).toBe(true); + expect(result.tooLarge).toBeUndefined(); + }); + + it('fails when model file missing', () => { + const result = validateCreateJobRequest({ + body: happyBody(), + files: {}, + }); + expectErrorOnField(result, 'model'); + }); + + it('fails when model file extension not allowed', () => { + for (const bad of ['model.pt', 'model.h5', 'model.bin', 'model']) { + const result = validateCreateJobRequest({ + body: happyBody(), + files: { model: [makeFile(bad)] }, + }); + expectErrorOnField(result, 'model'); + } + }); + + it('fails when model file is empty', () => { + const result = validateCreateJobRequest({ + body: happyBody(), + files: { model: [makeFile('m.onnx', 0)] }, + }); + expectErrorOnField(result, 'model'); + }); + + it('fails when enable_* is not "true" / "false"', () => { + const result = validateCreateJobRequest({ + body: happyBody({ enable_evaluate: 'yes' }), + files: { model: [makeFile('m.onnx')] }, + }); + expectErrorOnField(result, 'enable_evaluate'); + }); + + it('fails when metadata is not valid JSON object', () => { + for (const bad of ['{ broken', '"string"', '[1,2]', 'null']) { + const result = validateCreateJobRequest({ + body: happyBody({ metadata: bad }), + files: { model: [makeFile('m.onnx')] }, + }); + expectErrorOnField(result, 'metadata'); + } + }); + + it('returns multiple errors in one pass', () => { + const result = validateCreateJobRequest({ + body: happyBody({ + user_id: 'bad/id', + model_id: 'abc', + platform: 'XYZ', + }), + files: { model: [makeFile('m.bin')] }, + }); + expect(result.ok).toBe(false); + const fields = result.errors.map((e) => e.field).sort(); + // 應該至少含 user_id / model_id / platform / model + expect(fields).toEqual(expect.arrayContaining(['user_id', 'model_id', 'platform', 'model'])); + }); +}); + +describe('exported constants', () => { + it('ALLOWED_MODEL_EXTENSIONS matches PRD F-01', () => { + expect(ALLOWED_MODEL_EXTENSIONS.has('.onnx')).toBe(true); + expect(ALLOWED_MODEL_EXTENSIONS.has('.tflite')).toBe(true); + expect(ALLOWED_MODEL_EXTENSIONS.has('.pt')).toBe(false); + }); + + it('ALLOWED_PLATFORMS contains all 5 enums', () => { + for (const p of ['520', '720', '530', '630', '730']) { + expect(ALLOWED_PLATFORMS.has(p)).toBe(true); + } + }); +}); + +// === T10:limits 注入測試(D5 修復) === +describe('validateCreateJobRequest — limits.refImageMaxBytes injection (T10)', () => { + const tinyImage = (size) => ({ + originalname: 'img.jpg', + buffer: Buffer.alloc(size, 0x42), + mimetype: 'image/jpeg', + size, + }); + + it('uses default 10MB when limits not provided', () => { + const result = validateCreateJobRequest({ + body: happyBody(), + files: { + model: [makeFile('m.onnx')], + ref_images: [tinyImage(10 * 1024 * 1024 + 1)], // 10MB + 1 byte + }, + }); + expect(result.ok).toBe(false); + expect(result.tooLarge).toBeDefined(); + expect(result.tooLarge.limit_bytes).toBe(10 * 1024 * 1024); + }); + + it('respects custom refImageMaxBytes (5MB)', () => { + const result = validateCreateJobRequest({ + body: happyBody(), + files: { + model: [makeFile('m.onnx')], + ref_images: [tinyImage(5 * 1024 * 1024 + 1)], // 5MB + 1 byte + }, + limits: { refImageMaxBytes: 5 * 1024 * 1024 }, + }); + expect(result.ok).toBe(false); + expect(result.tooLarge).toBeDefined(); + expect(result.tooLarge.limit_bytes).toBe(5 * 1024 * 1024); + }); + + it('accepts file equal to custom refImageMaxBytes (boundary)', () => { + const result = validateCreateJobRequest({ + body: happyBody(), + files: { + model: [makeFile('m.onnx')], + ref_images: [tinyImage(2 * 1024 * 1024)], // exactly 2MB + }, + limits: { refImageMaxBytes: 2 * 1024 * 1024 }, + }); + expect(result.ok).toBe(true); + expect(result.tooLarge).toBeUndefined(); + }); + + it('falls back to default when limits.refImageMaxBytes is invalid (0 / negative)', () => { + // 6MB image,default 10MB OK,但 limits=0 應 fallback 到 default 而非 reject 0-byte + const result = validateCreateJobRequest({ + body: happyBody(), + files: { + model: [makeFile('m.onnx')], + ref_images: [tinyImage(6 * 1024 * 1024)], + }, + limits: { refImageMaxBytes: 0 }, + }); + expect(result.ok).toBe(true); + }); + + it('reports fields error message uses injected limit value', () => { + // limit = 1MB;上傳 2MB → tooLarge.limit_bytes 應為 1MB + const result = validateCreateJobRequest({ + body: happyBody(), + files: { + model: [makeFile('m.onnx')], + ref_images: [tinyImage(2 * 1024 * 1024)], + }, + limits: { refImageMaxBytes: 1 * 1024 * 1024 }, + }); + expect(result.ok).toBe(false); + expect(result.tooLarge.limit_bytes).toBe(1 * 1024 * 1024); + expect(result.tooLarge.size_bytes).toBe(2 * 1024 * 1024); + }); +}); diff --git a/apps/task-scheduler/src/routes/v1/__tests__/getJobs.integration.test.js b/apps/task-scheduler/src/routes/v1/__tests__/getJobs.integration.test.js new file mode 100644 index 0000000..ff25038 --- /dev/null +++ b/apps/task-scheduler/src/routes/v1/__tests__/getJobs.integration.test.js @@ -0,0 +1,932 @@ +/** + * GET /api/v1/jobs/:id + GET /api/v1/jobs 整合測試(T6)。 + * + * 測試範圍: + * - 401 invalid_token:缺 Authorization + * - 403 insufficient_scope:token 缺 converter:job.read + * - GET /:id: + * - 404 job_not_found:不存在 + * - 404 job_not_found:跨 client(不洩漏存在性) + * - 200 happy path:完整 record + 對外狀態映射 + * - ETag header 出現 + * - 304 Not Modified:If-None-Match 命中 + * - 200 + 新 ETag:If-None-Match 不命中 + * - 內部 stripping:created_by_client_id 不應洩漏 + * - GET /jobs: + * - 400 validation_error:缺 user_id + * - 400 validation_error:user_id 含禁字(XSS / 路徑穿越) + * - 200 happy path:列表、依 client 過濾 + * - status filter(in_progress / completed / failed / all) + * - limit / cursor 分頁 + * - 跨 client 隔離(同 user_id 不會看到別 client 的 job) + * - limit > 50 → 400 + */ + +'use strict'; + +const express = require('express'); + +const { createSseService } = require('../../../services/sseService'); +const { createJobService } = require('../../../services/jobService'); +const { requireAuth } = require('../../../auth/middleware'); + +// Mock luaScripts to avoid real Redis Lua loading +jest.mock('../../../redis/luaScripts', () => ({ + claimActiveJob: jest.fn(), + releaseActiveJob: jest.fn(async () => ({ ok: true, released: true })), + _internals: { + loadScript: jest.fn(), + evalScript: jest.fn(), + resetCache: jest.fn(), + }, +})); + +const FAKE_CONFIG = Object.freeze({ + memberCenter: { + issuer: 'https://auth.test.local', + jwksUrl: 'https://auth.test.local/.well-known/jwks', + tokenUrl: '', + }, + converter: { + audience: 'kneron_converter_api', + clientId: '', + clientSecret: '', + tenantId: '', + scopeWrite: 'converter:job.write', + scopeRead: 'converter:job.read', + }, + fileAccessAgent: { baseUrl: '', audience: 'file_access_api' }, + jwks: { cacheMaxAgeMs: 60000, cooldownMs: 30000, clockToleranceSec: 60 }, +}); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeVerifier({ tokens }) { + return async (token) => { + const entry = tokens[token]; + if (!entry) { + const err = new Error('invalid token'); + err.code = 'ERR_JWS_SIGNATURE_VERIFICATION_FAILED'; + throw err; + } + if (entry.expired) { + const err = new Error('expired'); + err.code = 'ERR_JWT_EXPIRED'; + throw err; + } + return { payload: entry.claims }; + }; +} + +function makeFakeRedis() { + const store = new Map(); + const sets = new Map(); + + function pipeline() { + const ops = []; + const p = { + get(key) { + ops.push({ kind: 'get', key }); + return p; + }, + async exec() { + return ops.map((op) => { + if (op.kind === 'get') { + const val = store.has(op.key) ? store.get(op.key) : null; + return [null, val]; + } + return [new Error('unsupported op'), null]; + }); + }, + }; + return p; + } + + return { + store, + sets, + pipeline: jest.fn(pipeline), + smembers: jest.fn(async (key) => { + const s = sets.get(key); + return s ? [...s] : []; + }), + get: jest.fn(async (key) => (store.has(key) ? store.get(key) : null)), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + sadd: jest.fn(async (key, member) => { + if (!sets.has(key)) sets.set(key, new Set()); + sets.get(key).add(member); + return 1; + }), + keys: jest.fn(async () => []), + xadd: jest.fn(async () => '1-0'), + xlen: jest.fn(async () => 0), + xinfo: jest.fn(async () => { + throw new Error('NOGROUP'); + }), + ping: jest.fn(async () => 'PONG'), + }; +} + +function makeFakeMinio() { + return { + client: { _fake: true }, + bucket: 'test-bucket', + endpoint: 'http://nope', + uploadToMinIO: jest.fn(async () => undefined), + getFromMinIO: jest.fn(async () => null), + deleteObject: jest.fn(async () => undefined), + }; +} + +/** + * 啟動 GET 端點的 app。 + */ +async function startApp({ tokens, rateLimit = { windowMs: 60000, max: 1000 } }) { + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ + redis, + sseService, + minio, + jobDataDir: '/tmp/x', + }); + + const app = express(); + const helmet = require('helmet'); + const compression = require('compression'); + const morgan = require('morgan'); + const { requestIdMiddleware } = require('../../../middleware/requestId'); + const { errorHandler } = require('../../../middleware/errorHandler'); + const { createPerClientRateLimiter } = require('../../../middleware/perClientRateLimit'); + const { _internals: jobsInternals } = require('../jobs'); + + app.use(helmet()); + app.use(requestIdMiddleware); + app.use(compression()); + app.use(morgan('short')); + app.use(express.json({ limit: '10mb' })); + + // v1 router with verify mock injected into requireAuth + const v1 = express.Router(); + const verify = makeVerifier({ tokens }); + const requireReadAuth = requireAuth(FAKE_CONFIG.converter.scopeRead, { + config: FAKE_CONFIG, + verify, + }); + const perClientLimiter = createPerClientRateLimiter(rateLimit); + + const getJobHandler = jobsInternals.buildGetJobHandler({ jobService }); + const listJobsHandler = jobsInternals.buildListJobsHandler({ jobService }); + + v1.get('/jobs', requireReadAuth, perClientLimiter, listJobsHandler); + v1.get('/jobs/:id', requireReadAuth, perClientLimiter, getJobHandler); + + app.use('/api/v1', v1); + app.use('/api/v1', errorHandler); + + return new Promise((resolve) => { + const server = app.listen(0, '127.0.0.1', () => { + const { port } = server.address(); + resolve({ + server, + baseUrl: `http://127.0.0.1:${port}`, + redis, + minio, + jobService, + close: () => new Promise((r) => server.close(r)), + }); + }); + }); +} + +const HAPPY_TOKENS = { + 'good-read-token': { + claims: { + sub: 'visionA-backend', + client_id: 'cid-A', + scope: 'converter:job.read converter:job.write', + }, + }, + 'good-read-token-B': { + claims: { + sub: 'visionA-backend', + client_id: 'cid-B', + scope: 'converter:job.read', + }, + }, + 'write-only-token': { + claims: { + sub: 'someone', + client_id: 'cid-A', + scope: 'converter:job.write', // 缺 read + }, + }, + 'expired-token': { + expired: true, + claims: {}, + }, +}; + +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +// --------------------------------------------------------------------------- +// Auth 共用測試(GET /jobs 與 GET /jobs/:id 同樣 require read scope) +// --------------------------------------------------------------------------- + +describe('GET /api/v1/jobs* — auth', () => { + let ctx; + beforeEach(async () => { + ctx = await startApp({ tokens: HAPPY_TOKENS }); + }); + afterEach(async () => { + await ctx.close(); + }); + + it('GET /:id returns 401 when Authorization missing', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/some-id`); + expect(res.status).toBe(401); + const body = await res.json(); + expect(body.error.code).toBe('invalid_token'); + expect(typeof body.error.request_id).toBe('string'); + }); + + it('GET /jobs returns 401 when Authorization missing', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u1`); + expect(res.status).toBe(401); + }); + + it('GET /:id returns 401 token_expired with expired token', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/some-id`, { + headers: { Authorization: 'Bearer expired-token' }, + }); + expect(res.status).toBe(401); + expect((await res.json()).error.code).toBe('token_expired'); + }); + + it('GET /jobs returns 403 with write-only token (insufficient_scope)', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u1`, { + headers: { Authorization: 'Bearer write-only-token' }, + }); + expect(res.status).toBe(403); + const body = await res.json(); + expect(body.error.code).toBe('insufficient_scope'); + expect(body.error.details).toMatchObject({ + required_scope: 'converter:job.read', + }); + }); + + it('GET /:id returns 403 with write-only token', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/x`, { + headers: { Authorization: 'Bearer write-only-token' }, + }); + expect(res.status).toBe(403); + }); +}); + +// --------------------------------------------------------------------------- +// GET /api/v1/jobs/:id +// --------------------------------------------------------------------------- + +describe('GET /api/v1/jobs/:id', () => { + let ctx; + beforeEach(async () => { + ctx = await startApp({ tokens: HAPPY_TOKENS }); + }); + afterEach(async () => { + await ctx.close(); + }); + + function seedJob(jobId, overrides = {}) { + const job = { + job_id: jobId, + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'BIE', + stage: 'bie', + progress: 50, + stage_progress: 60, + created_at: '2026-04-25T12:00:00Z', + updated_at: '2026-04-25T12:05:30Z', + expires_at: '2026-05-02T12:00:00Z', + stage_timings: { + onnx: { + started_at: '2026-04-25T12:00:05Z', + completed_at: '2026-04-25T12:02:10Z', + }, + bie: { started_at: '2026-04-25T12:02:15Z', completed_at: null }, + nef: null, + }, + input: { + filename: 'model.onnx', + object_key: `jobs/${jobId}/input/model.onnx`, + size_bytes: 1024, + ref_images_count: 0, + }, + parameters: { + model_id: 1001, + version: '0001', + platform: '520', + enable_evaluate: false, + }, + output: { bie_path: null, nef_path: null }, + error: null, + metadata: { source: 'visionA' }, + ...overrides, + }; + ctx.redis.store.set(`job:${jobId}`, JSON.stringify(job)); + return job; + } + + it('returns 404 job_not_found when job does not exist', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/nonexistent`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.status).toBe(404); + const body = await res.json(); + expect(body.error.code).toBe('job_not_found'); + expect(typeof body.error.request_id).toBe('string'); + }); + + it('returns 404 (not 403) when job belongs to different client (no info leak)', async () => { + seedJob('foreign-job', { created_by_client_id: 'cid-B' }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/foreign-job`, { + headers: { Authorization: 'Bearer good-read-token' }, // cid-A + }); + expect(res.status).toBe(404); + const body = await res.json(); + // 重要:對外 code 與 message 必須與「真不存在」完全一致 + expect(body.error.code).toBe('job_not_found'); + }); + + it('returns 200 with full job shape for owner', async () => { + seedJob('my-job'); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/my-job`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body).toMatchObject({ + job_id: 'my-job', + user_id: 'u1', + status: 'running', // BIE → running + stage: 'bie', + progress: 50, + stage_progress: 60, + created_at: '2026-04-25T12:00:00Z', + updated_at: '2026-04-25T12:05:30Z', + }); + // result_object_keys 在非 completed 時應為 null + expect(body.result_object_keys).toBeNull(); + // error 在非 failed 時應為 null + expect(body.error).toBeNull(); + // input / parameters / metadata + expect(body.input.filename).toBe('model.onnx'); + expect(body.parameters.model_id).toBe(1001); + expect(body.metadata).toEqual({ source: 'visionA' }); + }); + + it('strips internal field created_by_client_id from response', async () => { + seedJob('my-job'); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/my-job`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + const body = await res.json(); + expect(body).not.toHaveProperty('created_by_client_id'); + }); + + it('maps internal status correctly: ONNX + onnx.started_at == null → created', async () => { + seedJob('newly-created', { + status: 'ONNX', + stage: 'onnx', + stage_timings: { onnx: null, bie: null, nef: null }, + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/newly-created`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + const body = await res.json(); + expect(body.status).toBe('created'); + expect(body.stage).toBe('onnx'); + }); + + it('maps internal status correctly: COMPLETED → completed/null', async () => { + seedJob('done-job', { + status: 'COMPLETED', + stage: null, + progress: 100, + output: { + onnx_path: 'jobs/done-job/output/out.onnx', + bie_path: 'jobs/done-job/output/out.bie', + nef_path: 'jobs/done-job/output/out.nef', + }, + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/done-job`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + const body = await res.json(); + expect(body.status).toBe('completed'); + expect(body.stage).toBeNull(); + // result_object_keys 從 output fallback 轉成 v1 格式 + expect(body.result_object_keys).toEqual({ + onnx: 'jobs/done-job/output/out.onnx', + bie: 'jobs/done-job/output/out.bie', + nef: 'jobs/done-job/output/out.nef', + }); + }); + + it('maps internal status correctly: FAILED → failed/', async () => { + seedJob('failed-job', { + status: 'FAILED', + stage: 'bie', + error: { + stage: 'bie', + code: 'quantization_failed', + message: 'BIE 量化失敗', + }, + }); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/failed-job`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + const body = await res.json(); + expect(body.status).toBe('failed'); + expect(body.stage).toBe('bie'); + expect(body.error).toMatchObject({ + stage: 'bie', + code: 'quantization_failed', + }); + }); + + it('returns ETag header on 200 response', async () => { + seedJob('etag-job'); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/etag-job`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.status).toBe(200); + const etag = res.headers.get('etag'); + expect(etag).toMatch(/^W\/"[A-Za-z0-9_-]+"$/); + }); + + it('returns 304 Not Modified when If-None-Match matches', async () => { + seedJob('etag-match-job'); + const first = await fetch(`${ctx.baseUrl}/api/v1/jobs/etag-match-job`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + const etag = first.headers.get('etag'); + expect(etag).toBeTruthy(); + + const second = await fetch(`${ctx.baseUrl}/api/v1/jobs/etag-match-job`, { + headers: { + Authorization: 'Bearer good-read-token', + 'If-None-Match': etag, + }, + }); + expect(second.status).toBe(304); + // 304 不應該帶 body(或極短) + const text = await second.text(); + expect(text).toBe(''); + }); + + it('returns 200 + new ETag when If-None-Match does not match', async () => { + seedJob('etag-mismatch-job'); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/etag-mismatch-job`, { + headers: { + Authorization: 'Bearer good-read-token', + 'If-None-Match': 'W/"stale"', + }, + }); + expect(res.status).toBe(200); + const etag = res.headers.get('etag'); + expect(etag).toMatch(/^W\/"[A-Za-z0-9_-]+"$/); + expect(etag).not.toBe('W/"stale"'); + }); + + it('returns 304 when If-None-Match contains *', async () => { + seedJob('star-job'); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/star-job`, { + headers: { + Authorization: 'Bearer good-read-token', + 'If-None-Match': '*', + }, + }); + expect(res.status).toBe(304); + }); +}); + +// --------------------------------------------------------------------------- +// GET /api/v1/jobs (list) +// --------------------------------------------------------------------------- + +describe('GET /api/v1/jobs (list)', () => { + let ctx; + beforeEach(async () => { + ctx = await startApp({ tokens: HAPPY_TOKENS }); + }); + afterEach(async () => { + await ctx.close(); + }); + + function seedJobs(userId, jobs) { + if (!ctx.redis.sets.has(`user:${userId}:jobs`)) { + ctx.redis.sets.set(`user:${userId}:jobs`, new Set()); + } + for (const j of jobs) { + ctx.redis.sets.get(`user:${userId}:jobs`).add(j.job_id); + ctx.redis.store.set(`job:${j.job_id}`, JSON.stringify(j)); + } + } + + it('returns 400 validation_error when user_id missing', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.code).toBe('validation_error'); + expect(body.error.details.fields.map((f) => f.field)).toContain('user_id'); + }); + + it('returns 400 when user_id contains XSS chars', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs?user_id=${encodeURIComponent('')}`, + { + headers: { Authorization: 'Bearer good-read-token' }, + } + ); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.code).toBe('validation_error'); + expect(body.error.details.fields.map((f) => f.field)).toContain('user_id'); + }); + + it('returns 400 when user_id contains slash (path traversal)', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs?user_id=${encodeURIComponent('../etc/passwd')}`, + { + headers: { Authorization: 'Bearer good-read-token' }, + } + ); + expect(res.status).toBe(400); + }); + + it('returns 400 when user_id contains wildcard (*)', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=${encodeURIComponent('*')}`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.status).toBe(400); + }); + + it('returns 400 when user_id contains colon (Redis key injection)', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs?user_id=${encodeURIComponent('u1:malicious')}`, + { + headers: { Authorization: 'Bearer good-read-token' }, + } + ); + expect(res.status).toBe(400); + }); + + it('returns 400 when user_id is too long', async () => { + const long = 'a'.repeat(129); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=${long}`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.status).toBe(400); + }); + + it('returns empty list when user has no jobs', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u-empty`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body).toEqual({ jobs: [], total: 0, next_cursor: null }); + }); + + it('returns jobs filtered by status=in_progress (default)', async () => { + seedJobs('u1', [ + { + job_id: 'created-1', + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'ONNX', + stage: 'onnx', + progress: 0, + created_at: '2026-04-25T12:00:00Z', + updated_at: '2026-04-25T12:00:00Z', + stage_timings: { onnx: null }, + }, + { + job_id: 'running-1', + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'BIE', + stage: 'bie', + progress: 50, + created_at: '2026-04-25T11:00:00Z', + updated_at: '2026-04-25T11:00:00Z', + }, + { + job_id: 'completed-1', + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'COMPLETED', + progress: 100, + created_at: '2026-04-25T10:00:00Z', + updated_at: '2026-04-25T10:00:00Z', + }, + ]); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u1`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body.total).toBe(2); + expect(body.jobs.map((j) => j.job_id).sort()).toEqual(['created-1', 'running-1']); + }); + + it('filters by status=completed', async () => { + seedJobs('u1', [ + { + job_id: 'completed-1', + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'COMPLETED', + progress: 100, + created_at: '2026-04-25T10:00:00Z', + updated_at: '2026-04-25T10:00:00Z', + }, + { + job_id: 'running-1', + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'BIE', + progress: 50, + created_at: '2026-04-25T11:00:00Z', + updated_at: '2026-04-25T11:00:00Z', + }, + ]); + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs?user_id=u1&status=completed`, + { + headers: { Authorization: 'Bearer good-read-token' }, + } + ); + const body = await res.json(); + expect(body.total).toBe(1); + expect(body.jobs[0].job_id).toBe('completed-1'); + expect(body.jobs[0].status).toBe('completed'); + }); + + it('filters by status=all', async () => { + seedJobs('u1', [ + { + job_id: 'completed-1', + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'COMPLETED', + created_at: '2026-04-25T10:00:00Z', + updated_at: '2026-04-25T10:00:00Z', + }, + { + job_id: 'running-1', + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'BIE', + created_at: '2026-04-25T11:00:00Z', + updated_at: '2026-04-25T11:00:00Z', + }, + { + job_id: 'failed-1', + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'FAILED', + error: { stage: 'bie' }, + created_at: '2026-04-25T09:00:00Z', + updated_at: '2026-04-25T09:00:00Z', + }, + ]); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u1&status=all`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + const body = await res.json(); + expect(body.total).toBe(3); + }); + + it('returns 400 for invalid status', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs?user_id=u1&status=invalid_status`, + { + headers: { Authorization: 'Bearer good-read-token' }, + } + ); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.details.fields.map((f) => f.field)).toContain('status'); + }); + + it('CRITICAL: cross-client isolation — same user_id different client gets nothing', async () => { + // user u1 在 cid-B 有 job,但 cid-A 不應該看到 + seedJobs('u1', [ + { + job_id: 'B-job-1', + user_id: 'u1', + created_by_client_id: 'cid-B', // 屬 cid-B + status: 'BIE', + created_at: '2026-04-25T11:00:00Z', + updated_at: '2026-04-25T11:00:00Z', + }, + ]); + + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u1`, { + headers: { Authorization: 'Bearer good-read-token' }, // cid-A + }); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body.total).toBe(0); + expect(body.jobs).toEqual([]); + + // 換成 cid-B 的 token 應能看到 + const resB = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u1`, { + headers: { Authorization: 'Bearer good-read-token-B' }, + }); + expect(resB.status).toBe(200); + const bodyB = await resB.json(); + expect(bodyB.total).toBe(1); + expect(bodyB.jobs[0].job_id).toBe('B-job-1'); + }); + + it('strips internal field created_by_client_id from list items', async () => { + seedJobs('u1', [ + { + job_id: 'j1', + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'BIE', + created_at: '2026-04-25T12:00:00Z', + updated_at: '2026-04-25T12:00:00Z', + }, + ]); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u1&status=all`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + const body = await res.json(); + expect(body.jobs[0]).not.toHaveProperty('created_by_client_id'); + }); + + it('paginates with limit + cursor', async () => { + const jobs = []; + for (let i = 1; i <= 5; i += 1) { + jobs.push({ + job_id: `j${i}`, + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'BIE', + // 排序後最新到最舊:j5 j4 j3 j2 j1 + created_at: `2026-04-25T${10 + i}:00:00Z`, + updated_at: `2026-04-25T${10 + i}:00:00Z`, + }); + } + seedJobs('u1', jobs); + + const page1 = await fetch( + `${ctx.baseUrl}/api/v1/jobs?user_id=u1&status=all&limit=2`, + { + headers: { Authorization: 'Bearer good-read-token' }, + } + ); + const p1Body = await page1.json(); + expect(p1Body.total).toBe(5); + expect(p1Body.jobs.map((j) => j.job_id)).toEqual(['j5', 'j4']); + expect(p1Body.next_cursor).toBeTruthy(); + + const page2 = await fetch( + `${ctx.baseUrl}/api/v1/jobs?user_id=u1&status=all&limit=2&cursor=${encodeURIComponent(p1Body.next_cursor)}`, + { + headers: { Authorization: 'Bearer good-read-token' }, + } + ); + const p2Body = await page2.json(); + expect(p2Body.jobs.map((j) => j.job_id)).toEqual(['j3', 'j2']); + expect(p2Body.next_cursor).toBeTruthy(); + + const page3 = await fetch( + `${ctx.baseUrl}/api/v1/jobs?user_id=u1&status=all&limit=2&cursor=${encodeURIComponent(p2Body.next_cursor)}`, + { + headers: { Authorization: 'Bearer good-read-token' }, + } + ); + const p3Body = await page3.json(); + expect(p3Body.jobs.map((j) => j.job_id)).toEqual(['j1']); + expect(p3Body.next_cursor).toBeNull(); + }); + + it('returns 400 when limit > 50', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u1&limit=51`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.details.fields.map((f) => f.field)).toContain('limit'); + }); + + it('returns 400 when limit is non-integer', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u1&limit=abc`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.status).toBe(400); + }); + + it('returns 400 when limit is 0', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u1&limit=0`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.status).toBe(400); + }); + + it('returns 400 when cursor is malformed', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs?user_id=u1&cursor=not-valid-base64-!!!`, + { + headers: { Authorization: 'Bearer good-read-token' }, + } + ); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.details.fields.map((f) => f.field)).toContain('cursor'); + }); + + it('returns response with X-Request-Id header', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs?user_id=u1`, { + headers: { Authorization: 'Bearer good-read-token' }, + }); + expect(res.headers.get('x-request-id')).toBeTruthy(); + }); +}); + +// --------------------------------------------------------------------------- +// Cursor encode/decode 邊界 +// --------------------------------------------------------------------------- + +describe('cursor encode/decode roundtrip (via internal helpers)', () => { + const { _internals } = require('../jobs'); + const { encodeCursor, decodeCursor } = _internals; + + it('encodes 0 → decodable to 0', () => { + expect(decodeCursor(encodeCursor(0))).toBe(0); + }); + + it('encodes 100 → decodable to 100', () => { + expect(decodeCursor(encodeCursor(100))).toBe(100); + }); + + it('rejects malformed cursor', () => { + expect(decodeCursor('!!!')).toBeNull(); + expect(decodeCursor('')).toBeNull(); + expect(decodeCursor(null)).toBeNull(); + expect(decodeCursor(undefined)).toBeNull(); + }); + + it('rejects cursor with negative offset (defense)', () => { + const malicious = Buffer.from(JSON.stringify({ offset: -1 }), 'utf8') + .toString('base64') + .replace(/=+$/, ''); + expect(decodeCursor(malicious)).toBeNull(); + }); + + it('rejects cursor with absurdly large offset (DoS protection)', () => { + const malicious = Buffer.from(JSON.stringify({ offset: 99999999 }), 'utf8') + .toString('base64') + .replace(/=+$/, ''); + expect(decodeCursor(malicious)).toBeNull(); + }); + + it('rejects cursor that is not JSON', () => { + const notJson = Buffer.from('not json at all', 'utf8') + .toString('base64') + .replace(/=+$/, ''); + expect(decodeCursor(notJson)).toBeNull(); + }); + + it('rejects cursor JSON object missing offset', () => { + const wrong = Buffer.from(JSON.stringify({ foo: 1 }), 'utf8') + .toString('base64') + .replace(/=+$/, ''); + expect(decodeCursor(wrong)).toBeNull(); + }); + + it('rejects cursor longer than 200 chars', () => { + expect(decodeCursor('a'.repeat(201))).toBeNull(); + }); +}); diff --git a/apps/task-scheduler/src/routes/v1/__tests__/promote.integration.test.js b/apps/task-scheduler/src/routes/v1/__tests__/promote.integration.test.js new file mode 100644 index 0000000..9fd0159 --- /dev/null +++ b/apps/task-scheduler/src/routes/v1/__tests__/promote.integration.test.js @@ -0,0 +1,1262 @@ +/** + * POST /api/v1/jobs/:id/promote 整合測試(T7)。 + * + * 測試範圍(對齊 tasks-phase1.md §3.7 驗收): + * - 401 invalid_token:缺 Authorization + * - 403 insufficient_scope:token 缺 converter:job.write + * - 404 job_not_found:job 不存在 + * - 404 job_not_found:job 存在但屬於別的 client(不洩露存在性) + * - 400 validation_error:targets 缺漏 / source 非合法 / 重複 source + * - 422 invalid_object_key:含 .. / 反斜線 / 控制字元 / 開頭斜線 + * - 409 job_not_ready_for_promote:status !== 'COMPLETED' + * - 409 source_not_available:job 沒產該 stage 結果 + * - 200 happy path:completed job + 所有 targets 上傳成功 + 寫回 promoted: true + * - 200 idempotent:第二次 promote 同 job → 不重打 FAA、回既有 promoted_object_keys + * - 502 file_gateway_unavailable:FAA 5xx 重試完仍失敗 + * - 502 file_gateway_unavailable:FAA 4xx(非 401) + * - 503 auth_service_unavailable:FAA 401 重試後仍 401 + * - SECURITY:log 不含 FAA token;錯誤 message 不洩漏 FAA 內部 + * - Stream:不 buffer 整個檔案(用 stream.Readable 模擬大檔,驗證 minio.getObjectStream 被呼叫) + * + * 啟動方式:用 createApp + 注入 mock deps(包含 verify 函數注入)+ mock faaClient + mock minio。 + */ + +'use strict'; + +const express = require('express'); +const { Readable } = require('stream'); + +const { createSseService } = require('../../../services/sseService'); +const { createJobService } = require('../../../services/jobService'); +const { requireAuth } = require('../../../auth/middleware'); + +// Mock luaScripts(同 createJob.integration 模式) +jest.mock('../../../redis/luaScripts', () => ({ + claimActiveJob: jest.fn(), + releaseActiveJob: jest.fn(async () => ({ ok: true, released: true })), + _internals: { + loadScript: jest.fn(), + evalScript: jest.fn(), + resetCache: jest.fn(), + }, +})); + +const FAKE_CONFIG = Object.freeze({ + memberCenter: { + issuer: 'https://auth.test.local', + jwksUrl: 'https://auth.test.local/.well-known/jwks', + tokenUrl: '', + }, + converter: { + audience: 'kneron_converter_api', + clientId: '', + clientSecret: '', + tenantId: '', + scopeWrite: 'converter:job.write', + scopeRead: 'converter:job.read', + }, + fileAccessAgent: { baseUrl: 'https://files.test.local', audience: 'file_access_api' }, + jwks: { cacheMaxAgeMs: 60000, cooldownMs: 30000, clockToleranceSec: 60 }, +}); + +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + +function makeVerifier({ tokens }) { + return async (token) => { + const entry = tokens[token]; + if (!entry) { + const err = new Error('invalid token'); + err.code = 'ERR_JWS_SIGNATURE_VERIFICATION_FAILED'; + throw err; + } + if (entry.expired) { + const err = new Error('expired'); + err.code = 'ERR_JWT_EXPIRED'; + throw err; + } + return { payload: entry.claims }; + }; +} + +function makeFakeRedis() { + const store = new Map(); + return { + store, + pingFails: false, + ping: jest.fn(async function () { + if (this.pingFails) throw new Error('ping failed'); + return 'PONG'; + }), + get: jest.fn(async (key) => (store.has(key) ? store.get(key) : null)), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + keys: jest.fn(async () => []), + xadd: jest.fn(async () => '1-0'), + xlen: jest.fn(async () => 0), + xinfo: jest.fn(async () => { + throw new Error('NOGROUP'); + }), + }; +} + +function makeFakeMinio() { + return { + client: { _fake: true }, + bucket: 'test-bucket', + endpoint: 'http://nope', + headObject: jest.fn(async () => ({ + contentLength: 1024 * 1024, // 1 MB by default + contentType: 'application/octet-stream', + })), + getObjectStream: jest.fn(async (key) => ({ + // 用 Readable.from 模擬一個小 stream(不真的 1MB),但保留 stream API + stream: Readable.from([Buffer.from(`mock-data-for-${key}`)]), + contentLength: 1024 * 1024, + contentType: 'application/octet-stream', + })), + uploadToMinIO: jest.fn(async () => undefined), + getFromMinIO: jest.fn(async () => null), + deleteObject: jest.fn(async () => undefined), + }; +} + +/** + * Mock FAA client:可控制 putFile 的 outcome(success / FAA error class)。 + * + * @param {{ outcomes?: Array<{ ok: true, result?: object } | { ok: false, error: Error }> }} opts + */ +function makeFakeFaaClient(opts = {}) { + const outcomes = opts.outcomes || []; + let i = 0; + const calls = []; + return { + _calls: calls, + putFile: jest.fn(async (objectKey, streamFactory, options) => { + calls.push({ objectKey, options }); + // 確認 streamFactory 可用(呼叫一次測 stream 拿得到) + const stream = await streamFactory(); + // 把 stream 消費掉避免 leak(在 mock 也要 simulate 真實行為) + // 但對於記憶體 buffer 的 Readable.from,不消費也不會 leak + void stream; + + const outcome = outcomes[Math.min(i, outcomes.length - 1)]; + i += 1; + if (!outcome) { + return { etag: 'default-etag', sizeBytes: options.contentLength }; + } + if (outcome.ok) { + return outcome.result || { etag: 'mock-etag', sizeBytes: options.contentLength }; + } + throw outcome.error; + }), + }; +} + +/** + * 直接組 app(避免 createApp 的 v1Deps 注入鏈在 verify mock 時太間接)。 + */ +async function startApp({ + tokens, + redis, + minio, + faaClient, + rateLimit = { windowMs: 60000, max: 1000 }, +}) { + redis = redis || makeFakeRedis(); + minio = minio || makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ + redis, + sseService, + minio, + jobDataDir: '/tmp/x', + }); + + const app = express(); + const helmet = require('helmet'); + const cors = require('cors'); + const compression = require('compression'); + const morgan = require('morgan'); + const { requestIdMiddleware } = require('../../../middleware/requestId'); + const { errorHandler } = require('../../../middleware/errorHandler'); + const { createPerClientRateLimiter } = require('../../../middleware/perClientRateLimit'); + const promoteModule = require('../promote'); + + app.use(helmet()); + app.use(requestIdMiddleware); + app.use(compression()); + app.use(morgan('short')); + app.use(cors()); + app.use(express.json({ limit: '10mb' })); + app.use(express.urlencoded({ extended: true, limit: '10mb' })); + + // v1 router with verify injection + const v1 = express.Router(); + const verify = makeVerifier({ tokens }); + const requireWriteAuth = requireAuth(FAKE_CONFIG.converter.scopeWrite, { + config: FAKE_CONFIG, + verify, + }); + const perClientLimiter = createPerClientRateLimiter(rateLimit); + const handler = promoteModule._internals.buildPromoteHandler({ + jobService, + minio, + faaClient, + }); + v1.post( + '/jobs/:id/promote', + requireWriteAuth, + perClientLimiter, + handler + ); + + app.use('/api/v1', v1); + app.use('/api/v1', errorHandler); + + return new Promise((resolve) => { + const server = app.listen(0, '127.0.0.1', () => { + const { port } = server.address(); + resolve({ + server, + baseUrl: `http://127.0.0.1:${port}`, + redis, + minio, + faaClient, + jobService, + close: () => new Promise((r) => server.close(r)), + }); + }); + }); +} + +const TOKENS = { + 'good-write-token': { + claims: { + sub: 'kneron_converter_client', + client_id: 'visionA-client-A', + scope: 'converter:job.write converter:job.read', + }, + }, + 'good-write-token-other-client': { + claims: { + sub: 'other', + client_id: 'visionA-client-B', // 別的 client + scope: 'converter:job.write', + }, + }, + 'read-only-token': { + claims: { + sub: 'reader', + client_id: 'visionA-client-A', + scope: 'converter:job.read', + }, + }, +}; + +/** + * 建立一個已 completed 的 mock job record。 + */ +function makeCompletedJob(overrides = {}) { + const jobId = overrides.job_id || 'job-completed-001'; + return { + job_id: jobId, + user_id: 'u1', + created_by_client_id: 'visionA-client-A', + origin: 'api', + status: 'COMPLETED', + stage: null, + progress: 100, + created_at: '2026-04-25T12:00:00Z', + updated_at: '2026-04-25T12:30:00Z', + expires_at: '2026-05-02T12:00:00Z', + input: { + filename: 'model.onnx', + object_key: `jobs/${jobId}/input/model.onnx`, + size_bytes: 1024, + ref_images_count: 0, + }, + parameters: { model_id: 1001, version: '0001', platform: '520' }, + output: { + onnx_path: `jobs/${jobId}/output/out.onnx`, + bie_path: `jobs/${jobId}/output/out.bie`, + nef_path: `jobs/${jobId}/output/out.nef`, + }, + error: null, + metadata: {}, + stage_timings: { + onnx: { started_at: '2026-04-25T12:00:00Z', completed_at: '2026-04-25T12:10:00Z' }, + bie: { started_at: '2026-04-25T12:10:00Z', completed_at: '2026-04-25T12:20:00Z' }, + nef: { started_at: '2026-04-25T12:20:00Z', completed_at: '2026-04-25T12:30:00Z' }, + }, + ...overrides, + }; +} + +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +// --------------------------------------------------------------------------- +// auth tests +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs/:id/promote — auth', () => { + it('returns 401 invalid_token without Authorization', async () => { + const ctx = await startApp({ tokens: TOKENS, faaClient: makeFakeFaaClient() }); + try { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/abc/promote`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ targets: [] }), + }); + expect(res.status).toBe(401); + const body = await res.json(); + expect(body.error.code).toBe('invalid_token'); + } finally { + await ctx.close(); + } + }); + + it('returns 403 insufficient_scope with read-only token', async () => { + const ctx = await startApp({ tokens: TOKENS, faaClient: makeFakeFaaClient() }); + try { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/abc/promote`, { + method: 'POST', + headers: { + Authorization: 'Bearer read-only-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ targets: [] }), + }); + expect(res.status).toBe(403); + const body = await res.json(); + expect(body.error.code).toBe('insufficient_scope'); + expect(body.error.details.required_scope).toBe('converter:job.write'); + } finally { + await ctx.close(); + } + }); +}); + +// --------------------------------------------------------------------------- +// 404 / client isolation +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs/:id/promote — 404 / client isolation', () => { + it('returns 404 job_not_found when job does not exist', async () => { + const ctx = await startApp({ tokens: TOKENS, faaClient: makeFakeFaaClient() }); + try { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/nonexistent/promote`, { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'a/b.nef' }], + }), + }); + expect(res.status).toBe(404); + const body = await res.json(); + expect(body.error.code).toBe('job_not_found'); + } finally { + await ctx.close(); + } + }); + + it('returns 404 job_not_found when job belongs to different client (no leak)', async () => { + const faa = makeFakeFaaClient(); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob({ + job_id: 'job-foreign', + created_by_client_id: 'visionA-client-B', // 不屬於 client-A + }); + ctx.redis.store.set('job:job-foreign', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-foreign/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', // client-A token + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'a/b.nef' }], + }), + } + ); + expect(res.status).toBe(404); + const body = await res.json(); + expect(body.error.code).toBe('job_not_found'); + // 不該打 FAA + expect(faa.putFile).not.toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); +}); + +// --------------------------------------------------------------------------- +// validation +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs/:id/promote — validation', () => { + let ctx; + beforeEach(async () => { + ctx = await startApp({ tokens: TOKENS, faaClient: makeFakeFaaClient() }); + const job = makeCompletedJob(); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + }); + afterEach(async () => { + await ctx.close(); + }); + + it('returns 400 when targets missing', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({}), + } + ); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.code).toBe('validation_error'); + }); + + it('returns 400 when targets is empty array', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ targets: [] }), + } + ); + expect(res.status).toBe(400); + expect((await res.json()).error.code).toBe('validation_error'); + }); + + it('returns 400 when source is not in {onnx, bie, nef}', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'evil', target_object_key: 'a/b.bin' }], + }), + } + ); + expect(res.status).toBe(400); + expect((await res.json()).error.code).toBe('validation_error'); + }); + + it('returns 422 invalid_object_key for path traversal attempt', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: '../../etc/passwd' }], + }), + } + ); + expect(res.status).toBe(422); + const body = await res.json(); + expect(body.error.code).toBe('invalid_object_key'); + }); + + it('returns 422 invalid_object_key for backslash in key', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'visionA\\nef\\out.nef' }], + }), + } + ); + expect(res.status).toBe(422); + expect((await res.json()).error.code).toBe('invalid_object_key'); + }); + + it('returns 422 invalid_object_key for leading slash', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: '/abs/path.nef' }], + }), + } + ); + expect(res.status).toBe(422); + }); + + it('returns 400 when same source appears twice', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [ + { source: 'nef', target_object_key: 'a/1.nef' }, + { source: 'nef', target_object_key: 'a/2.nef' }, + ], + }), + } + ); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error.code).toBe('validation_error'); + }); + + // Security m1 / Reviewer m4:擋 ? # %(URL 結構字元 / 雙重編碼攻擊) + it('returns 422 invalid_object_key when key contains "?" (query injection)', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [ + { source: 'nef', target_object_key: 'valid/path?inject=1' }, + ], + }), + } + ); + expect(res.status).toBe(422); + const body = await res.json(); + expect(body.error.code).toBe('invalid_object_key'); + }); + + it('returns 422 invalid_object_key when key contains "#" (URL fragment)', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [ + { source: 'nef', target_object_key: 'valid/path#frag' }, + ], + }), + } + ); + expect(res.status).toBe(422); + expect((await res.json()).error.code).toBe('invalid_object_key'); + }); + + it('returns 422 invalid_object_key when key contains "%" (double-encoding attack)', async () => { + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + // %2E%2E 解碼後為 "..",是經典的雙重編碼路徑穿越 + targets: [ + { source: 'nef', target_object_key: '%2E%2E/etc/passwd' }, + ], + }), + } + ); + expect(res.status).toBe(422); + expect((await res.json()).error.code).toBe('invalid_object_key'); + }); +}); + +// --------------------------------------------------------------------------- +// state checks +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs/:id/promote — state checks', () => { + it('returns 409 job_not_ready_for_promote when status is not COMPLETED', async () => { + const faa = makeFakeFaaClient(); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob({ status: 'BIE', stage: 'bie', progress: 50 }); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'a/b.nef' }], + }), + } + ); + expect(res.status).toBe(409); + const body = await res.json(); + expect(body.error.code).toBe('job_not_ready_for_promote'); + expect(body.error.details.current_status).toBe('BIE'); + // 不打 FAA + expect(faa.putFile).not.toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); + + it('returns 409 source_not_available when job has no output for source', async () => { + const faa = makeFakeFaaClient(); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob({ + // 故意只留 onnx,沒 bie / nef + output: { onnx_path: 'jobs/job-completed-001/output/out.onnx' }, + }); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'a/b.nef' }], + }), + } + ); + expect(res.status).toBe(409); + const body = await res.json(); + expect(body.error.code).toBe('source_not_available'); + expect(body.error.details.source).toBe('nef'); + expect(faa.putFile).not.toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); +}); + +// --------------------------------------------------------------------------- +// happy path + idempotency +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs/:id/promote — 200 happy path', () => { + it('promotes single target successfully and writes promoted: true to Redis', async () => { + const faa = makeFakeFaaClient({ + outcomes: [ + { ok: true, result: { etag: 'faa-etag-nef', sizeBytes: 1048576 } }, + ], + }); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob(); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [ + { source: 'nef', target_object_key: 'visionA/u1/m1001/v1/out.nef' }, + ], + }), + } + ); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body.job_id).toBe('job-completed-001'); + expect(body.promoted).toHaveLength(1); + expect(body.promoted[0]).toMatchObject({ + source: 'nef', + target_object_key: 'visionA/u1/m1001/v1/out.nef', + size_bytes: 1048576, + file_access_agent_etag: 'faa-etag-nef', + }); + expect(typeof body.promoted[0].promoted_at).toBe('string'); + + // FAA 被呼叫一次 + expect(faa.putFile).toHaveBeenCalledTimes(1); + const faaCall = faa._calls[0]; + expect(faaCall.objectKey).toBe('visionA/u1/m1001/v1/out.nef'); + expect(faaCall.options.contentLength).toBe(1024 * 1024); + + // MinIO HEAD + getObjectStream 被呼叫 + expect(ctx.minio.headObject).toHaveBeenCalledWith( + 'jobs/job-completed-001/output/out.nef' + ); + expect(ctx.minio.getObjectStream).toHaveBeenCalledWith( + 'jobs/job-completed-001/output/out.nef' + ); + + // Redis 寫回 promoted: true + const updatedRaw = ctx.redis.store.get('job:job-completed-001'); + const updated = JSON.parse(updatedRaw); + expect(updated.promoted).toBe(true); + expect(typeof updated.promoted_at).toBe('string'); + expect(Array.isArray(updated.promoted_object_keys)).toBe(true); + expect(updated.promoted_object_keys).toHaveLength(1); + } finally { + await ctx.close(); + } + }); + + it('promotes multiple targets sequentially', async () => { + const faa = makeFakeFaaClient({ + outcomes: [ + { ok: true, result: { etag: 'etag-bie', sizeBytes: 100 } }, + { ok: true, result: { etag: 'etag-nef', sizeBytes: 200 } }, + ], + }); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob(); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [ + { source: 'bie', target_object_key: 'visionA/u1/out.bie' }, + { source: 'nef', target_object_key: 'visionA/u1/out.nef' }, + ], + }), + } + ); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body.promoted).toHaveLength(2); + expect(body.promoted[0].source).toBe('bie'); + expect(body.promoted[1].source).toBe('nef'); + + expect(faa.putFile).toHaveBeenCalledTimes(2); + // 順序:bie 先、nef 後(序列) + expect(faa._calls[0].objectKey).toBe('visionA/u1/out.bie'); + expect(faa._calls[1].objectKey).toBe('visionA/u1/out.nef'); + } finally { + await ctx.close(); + } + }); + + it('uses streamFactory pattern (each putFile call gets a new stream)', async () => { + const faa = makeFakeFaaClient({ + outcomes: [{ ok: true, result: { etag: 'e', sizeBytes: 1 } }], + }); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob(); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + await fetch(`${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'a/b.nef' }], + }), + }); + + // mock faaClient.putFile 內部會 await streamFactory(),呼叫一次 + expect(ctx.minio.getObjectStream).toHaveBeenCalledTimes(1); + } finally { + await ctx.close(); + } + }); +}); + +describe('POST /api/v1/jobs/:id/promote — idempotency', () => { + it('returns 200 + existing promoted_object_keys without re-calling FAA', async () => { + const faa = makeFakeFaaClient(); // 不應該被呼叫 + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const previouslyPromoted = [ + { + source: 'nef', + target_object_key: 'visionA/u1/out.nef', + size_bytes: 999, + file_access_agent_etag: 'previous-etag', + promoted_at: '2026-04-25T13:00:00Z', + }, + ]; + const job = makeCompletedJob({ + promoted: true, + promoted_at: '2026-04-25T13:00:00Z', + promoted_object_keys: previouslyPromoted, + }); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + // 即使 client 傳新 targets,server 也走冪等回既有 + targets: [ + { source: 'nef', target_object_key: 'NEW/PATH.nef' }, + ], + }), + } + ); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body.job_id).toBe('job-completed-001'); + expect(body.promoted).toEqual(previouslyPromoted); + + // 關鍵:不該打 FAA + expect(faa.putFile).not.toHaveBeenCalled(); + // 也不該動 MinIO + expect(ctx.minio.headObject).not.toHaveBeenCalled(); + expect(ctx.minio.getObjectStream).not.toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); +}); + +// --------------------------------------------------------------------------- +// FAA failures +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs/:id/promote — FAA failures', () => { + const { + FAAClientError, + FAAUnauthorizedError, + FAAServerError, + FAATimeoutError, + } = require('../../../fileAccessAgent/errors'); + + it('returns 502 file_gateway_unavailable on FAAServerError (5xx exhausted)', async () => { + const faa = makeFakeFaaClient({ + outcomes: [ + { + ok: false, + error: new FAAServerError('FAA returned 500', { status: 500 }), + }, + ], + }); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob(); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'a/b.nef' }], + }), + } + ); + expect(res.status).toBe(502); + const body = await res.json(); + expect(body.error.code).toBe('file_gateway_unavailable'); + // 不洩 FAA 內部 + expect(body.error.message).not.toContain('500'); + expect(body.error.message).not.toContain('FAA'); + + // Redis 沒被 mark promoted + const updated = JSON.parse(ctx.redis.store.get('job:job-completed-001')); + expect(updated.promoted).toBeFalsy(); + } finally { + await ctx.close(); + } + }); + + it('returns 502 file_gateway_unavailable on FAATimeoutError', async () => { + const faa = makeFakeFaaClient({ + outcomes: [{ ok: false, error: new FAATimeoutError('PUT timeout 300000ms') }], + }); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob(); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'a/b.nef' }], + }), + } + ); + expect(res.status).toBe(502); + expect((await res.json()).error.code).toBe('file_gateway_unavailable'); + } finally { + await ctx.close(); + } + }); + + it('returns 502 file_gateway_unavailable on FAAClientError (4xx non-401)', async () => { + const faa = makeFakeFaaClient({ + outcomes: [ + { + ok: false, + error: new FAAClientError('FAA returned 403', { + status: 403, + errorCode: 'forbidden', + }), + }, + ], + }); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob(); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'a/b.nef' }], + }), + } + ); + expect(res.status).toBe(502); + const body = await res.json(); + expect(body.error.code).toBe('file_gateway_unavailable'); + // 不洩 FAA 內部 errorCode + expect(body.error.message).not.toContain('forbidden'); + expect(body.error.message).not.toContain('403'); + } finally { + await ctx.close(); + } + }); + + it('returns 503 auth_service_unavailable on FAAUnauthorizedError (401 retry exhausted)', async () => { + const faa = makeFakeFaaClient({ + outcomes: [ + { + ok: false, + error: new FAAUnauthorizedError('FAA returned 401', { status: 401 }), + }, + ], + }); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob(); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'a/b.nef' }], + }), + } + ); + expect(res.status).toBe(503); + const body = await res.json(); + expect(body.error.code).toBe('auth_service_unavailable'); + } finally { + await ctx.close(); + } + }); + + it('returns 502 storage_unavailable when minio.headObject fails', async () => { + const faa = makeFakeFaaClient(); + const minio = makeFakeMinio(); + minio.headObject = jest.fn(async () => { + throw new Error('minio is down'); + }); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa, minio }); + try { + const job = makeCompletedJob(); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'a/b.nef' }], + }), + } + ); + expect(res.status).toBe(502); + expect((await res.json()).error.code).toBe('storage_unavailable'); + expect(faa.putFile).not.toHaveBeenCalled(); + } finally { + await ctx.close(); + } + }); +}); + +// --------------------------------------------------------------------------- +// SECURITY +// --------------------------------------------------------------------------- + +// --------------------------------------------------------------------------- +// createApp wiring smoke test +// 驗證透過 src/app.js 的 createApp 注入路徑(非旁路)能正確 wire faaClient 給 +// promote router;以及缺 deps.faaClient + 缺 opts.config 時 fallback 到 501。 +// --------------------------------------------------------------------------- + +describe('POST /api/v1/jobs/:id/promote — createApp wiring smoke test', () => { + it('createApp wires faaClient when opts.config has FAA baseUrl (no token → 401, not 501)', async () => { + const { createApp } = require('../../../app'); + const { createUploader } = require('../../../middleware/upload'); + + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ + redis, + sseService, + minio, + jobDataDir: '/tmp/x', + }); + const uploader = createUploader(); + + const app = createApp( + { redis, jobService, sseService, minio, uploader }, + { + frontendUrl: 'http://localhost:3000', + config: FAKE_CONFIG, // 含 fileAccessAgent.baseUrl,會觸發 lazy build faaClient + rateLimit: { windowMs: 60000, max: 100 }, + storageBackend: 'minio', + } + ); + + const server = await new Promise((resolve) => { + const s = app.listen(0, '127.0.0.1', () => resolve(s)); + }); + const { port } = server.address(); + try { + // 沒帶 token → 應走 requireAuth → 401 + // 若 promote 沒被 wire 起來,會回 501(fallback) + const res = await fetch( + `http://127.0.0.1:${port}/api/v1/jobs/abc/promote`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ targets: [] }), + } + ); + expect(res.status).toBe(401); + const body = await res.json(); + expect(body.error.code).toBe('invalid_token'); + } finally { + await new Promise((r) => server.close(r)); + } + }); + + it('createApp without opts.config falls back to 501 (not_implemented) for promote', async () => { + const { createApp } = require('../../../app'); + const { createUploader } = require('../../../middleware/upload'); + + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ + redis, + sseService, + minio, + jobDataDir: '/tmp/x', + }); + const uploader = createUploader(); + + const app = createApp( + { redis, jobService, sseService, minio, uploader }, + { frontendUrl: 'http://localhost:3000' /* 不傳 config */ } + ); + + const server = await new Promise((resolve) => { + const s = app.listen(0, '127.0.0.1', () => resolve(s)); + }); + const { port } = server.address(); + try { + const res = await fetch( + `http://127.0.0.1:${port}/api/v1/jobs/abc/promote`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + } + ); + expect(res.status).toBe(501); + const body = await res.json(); + expect(body.error.code).toBe('not_implemented'); + // 修復後的 message 以「缺漏依賴名稱」為主,不再寫死「T7」 + // 缺 opts.config 時會列出 config 與 faaClient(後者因 lazy build 條件不成立而 null) + expect(body.error.message).toContain('config'); + } finally { + await new Promise((r) => server.close(r)); + } + }); +}); + +describe('POST /api/v1/jobs/:id/promote — SECURITY', () => { + it('does not leak FAA error_code in response body', async () => { + const { FAAClientError } = require('../../../fileAccessAgent/errors'); + const faa = makeFakeFaaClient({ + outcomes: [ + { + ok: false, + error: new FAAClientError('FAA returned 422', { + status: 422, + errorCode: 'internal_db_link_broken_special_secret_xyz', + }), + }, + ], + }); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob(); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + const res = await fetch( + `${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, + { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: 'a/b.nef' }], + }), + } + ); + const body = await res.json(); + const stringified = JSON.stringify(body); + expect(stringified).not.toContain('internal_db_link_broken_special_secret_xyz'); + expect(stringified).not.toContain('FAA returned 422'); + } finally { + await ctx.close(); + } + }); + + it('does not log target_object_key contents in response or logs', async () => { + const sensitiveKey = 'visionA/internal-projects/ultra-secret-model-name.nef'; + const faa = makeFakeFaaClient({ + outcomes: [{ ok: true, result: { etag: 'e', sizeBytes: 1 } }], + }); + const ctx = await startApp({ tokens: TOKENS, faaClient: faa }); + try { + const job = makeCompletedJob(); + ctx.redis.store.set('job:job-completed-001', JSON.stringify(job)); + + // 清空 mock console + console.log.mockClear(); + console.warn.mockClear(); + console.error.mockClear(); + + await fetch(`${ctx.baseUrl}/api/v1/jobs/job-completed-001/promote`, { + method: 'POST', + headers: { + Authorization: 'Bearer good-write-token', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + targets: [{ source: 'nef', target_object_key: sensitiveKey }], + }), + }); + + // 取所有 log 訊息 + const allLogs = [ + ...console.log.mock.calls, + ...console.warn.mock.calls, + ...console.error.mock.calls, + ].flatMap((args) => + args.map((a) => (typeof a === 'string' ? a : JSON.stringify(a))) + ); + + // promote handler 的 logEvent 應只 log 結構化欄位(client_id / job_id / source / target_count), + // 不該 log target_object_key 全文。但 morgan 會 log path(含 jobId 但不含 target_object_key + // 因為 target_object_key 在 body)。 + for (const line of allLogs) { + // 此 key 出現在 body 不應出現在 log + expect(line).not.toContain('ultra-secret-model-name'); + } + } finally { + await ctx.close(); + } + }); +}); diff --git a/apps/task-scheduler/src/routes/v1/__tests__/v1-routes.integration.test.js b/apps/task-scheduler/src/routes/v1/__tests__/v1-routes.integration.test.js new file mode 100644 index 0000000..0b420df --- /dev/null +++ b/apps/task-scheduler/src/routes/v1/__tests__/v1-routes.integration.test.js @@ -0,0 +1,511 @@ +/** + * /api/v1/* 路由整合測試(T3)。 + * + * 測試重點: + * 1. 4 個 v1 端點都回 501 + 統一錯誤格式(含 code / message / request_id) + * 2. response 含 X-Request-Id header,且值與 body.error.request_id 相同 + * 3. 外部送的合法 X-Request-Id 被沿用 + * 4. 外部送的非法 X-Request-Id 被 ignore,server 自行產生 + * 5. legacy 路由不受影響(仍然回原本的格式) + * 6. **D4 修復驗證**:requireAuth + requestId middleware 串接,401 response + * 的 body 含真正的 UUID(不是 null) + * + * 啟動方式:用 createApp + 注入 mock deps,app.listen(0),用 fetch() 真打 HTTP。 + * 與 T1 / T4 的整合測試風格一致。 + */ + +'use strict'; + +const express = require('express'); + +const { createApp } = require('../../../app'); +const { createSseService } = require('../../../services/sseService'); +const { createJobService } = require('../../../services/jobService'); +const { createUploader } = require('../../../middleware/upload'); + +const { requireAuth } = require('../../../auth/middleware'); +const { requestIdMiddleware } = require('../../../middleware/requestId'); + +const UUID_V4_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; + +// --------------------------------------------------------------------------- +// Helpers — 仿 legacy.integration.test.js +// --------------------------------------------------------------------------- + +function makeFakeRedis() { + const store = new Map(); + return { + store, + pingFails: false, + ping: jest.fn(async function () { + if (this.pingFails) throw new Error('ping failed'); + return 'PONG'; + }), + get: jest.fn(async (key) => (store.has(key) ? store.get(key) : null)), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + keys: jest.fn(async () => []), + xadd: jest.fn(async () => '1-0'), + xlen: jest.fn(async () => 0), + xinfo: jest.fn(async () => { + throw new Error('NOGROUP'); + }), + }; +} + +function makeFakeMinio() { + return { + client: null, + bucket: 'test-bucket', + endpoint: 'http://nope', + uploadToMinIO: jest.fn(async () => undefined), + getFromMinIO: jest.fn(async () => null), + }; +} + +async function startApp() { + const redis = makeFakeRedis(); + const minio = makeFakeMinio(); + const sseService = createSseService(); + const jobService = createJobService({ redis, sseService, jobDataDir: '/tmp/x' }); + const uploader = createUploader(); + const app = createApp( + { redis, jobService, sseService, minio, uploader }, + { frontendUrl: 'http://localhost:3000' } + ); + return new Promise((resolve) => { + const server = app.listen(0, '127.0.0.1', () => { + const { port } = server.address(); + resolve({ + server, + baseUrl: `http://127.0.0.1:${port}`, + close: () => + new Promise((r) => { + server.close(() => r()); + }), + }); + }); + }); +} + +// 抑制 logs(保持測試輸出乾淨) +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +// --------------------------------------------------------------------------- +// 501 端點骨架 +// --------------------------------------------------------------------------- + +describe('v1 routes — 501 骨架', () => { + let ctx; + + beforeEach(async () => { + ctx = await startApp(); + }); + afterEach(async () => { + await ctx.close(); + }); + + // T7 已完成,promote 的 fallback message 改寫為依賴名稱清單; + // 其他 (T5/T6) 仍處於 501 「規劃於 Tx」的階段,期望 message 含 task code。 + // 對於已完成端點(promote),改驗 message 含「config」字樣(提示缺漏依賴)。 + describe.each([ + ['POST /api/v1/jobs', 'POST', '/api/v1/jobs', 'T5'], + ['GET /api/v1/jobs', 'GET', '/api/v1/jobs', 'T6'], + ['GET /api/v1/jobs/:id', 'GET', '/api/v1/jobs/abc-123', 'T6'], + ['POST /api/v1/jobs/:id/promote', 'POST', '/api/v1/jobs/abc-123/promote', 'config'], + ])('%s', (label, method, path, expectedKeyword) => { + it(`returns 501 not_implemented with v1 error format`, async () => { + const res = await fetch(`${ctx.baseUrl}${path}`, { + method, + headers: { 'Content-Type': 'application/json' }, + body: method === 'POST' ? JSON.stringify({}) : undefined, + }); + + expect(res.status).toBe(501); + + const body = await res.json(); + expect(body).toHaveProperty('error'); + expect(body.error.code).toBe('not_implemented'); + expect(typeof body.error.message).toBe('string'); + expect(body.error.message.length).toBeGreaterThan(0); + // message 應含 task code(未實作端點)或關鍵依賴名稱(promote 已實作但缺 config) + expect(body.error.message).toContain(expectedKeyword); + }); + + it(`response.error.request_id matches X-Request-Id response header`, async () => { + const res = await fetch(`${ctx.baseUrl}${path}`, { + method, + headers: { 'Content-Type': 'application/json' }, + body: method === 'POST' ? JSON.stringify({}) : undefined, + }); + const headerId = res.headers.get('x-request-id'); + const body = await res.json(); + + expect(headerId).toMatch(UUID_V4_REGEX); + expect(body.error.request_id).toBe(headerId); + }); + }); +}); + +// --------------------------------------------------------------------------- +// X-Request-Id 處理 +// --------------------------------------------------------------------------- + +describe('X-Request-Id 處理', () => { + let ctx; + + beforeEach(async () => { + ctx = await startApp(); + }); + afterEach(async () => { + await ctx.close(); + }); + + it('echoes external X-Request-Id when valid', async () => { + const externalId = 'aaaaaaaa-bbbb-4ccc-9ddd-eeeeeeeeeeee'; + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + headers: { 'X-Request-Id': externalId }, + }); + const body = await res.json(); + + expect(res.headers.get('x-request-id')).toBe(externalId); + expect(body.error.request_id).toBe(externalId); + }); + + it('echoes external X-Request-Id with non-UUID format (e.g. trace ID)', async () => { + const externalId = 'trace-some-system-42'; + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + headers: { 'X-Request-Id': externalId }, + }); + const body = await res.json(); + + expect(res.headers.get('x-request-id')).toBe(externalId); + expect(body.error.request_id).toBe(externalId); + }); + + it('ignores invalid external X-Request-Id (with spaces) and generates UUID', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + headers: { 'X-Request-Id': 'has invalid spaces' }, + }); + const body = await res.json(); + + const headerId = res.headers.get('x-request-id'); + expect(headerId).toMatch(UUID_V4_REGEX); + expect(headerId).not.toBe('has invalid spaces'); + expect(body.error.request_id).toBe(headerId); + }); + + it('ignores invalid external X-Request-Id (CRLF injection attempt) and generates UUID', async () => { + // node fetch 會拒絕含 CRLF 的 header 值,所以用低階 http 模組 + const http = require('http'); + const url = new URL(`${ctx.baseUrl}/api/v1/jobs`); + + await new Promise((resolve, reject) => { + const req = http.request( + { + hostname: url.hostname, + port: url.port, + path: url.pathname, + method: 'GET', + headers: { 'X-Request-Id': 'evil-but-no-crlf-allowed-by-fetch' }, + }, + (res) => { + let raw = ''; + res.on('data', (c) => { + raw += c.toString(); + }); + res.on('end', () => { + try { + const body = JSON.parse(raw); + const headerId = res.headers['x-request-id']; + // 'evil-but-no-crlf-allowed-by-fetch' 是合法格式(只有英文 / -) + // → 應被沿用 + expect(headerId).toBe('evil-but-no-crlf-allowed-by-fetch'); + expect(body.error.request_id).toBe(headerId); + resolve(); + } catch (e) { + reject(e); + } + }); + } + ); + req.on('error', reject); + req.end(); + }); + }); + + it('ignores too-long X-Request-Id (>100 chars) and generates UUID', async () => { + const tooLong = 'a'.repeat(101); + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs`, { + headers: { 'X-Request-Id': tooLong }, + }); + + const headerId = res.headers.get('x-request-id'); + expect(headerId).toMatch(UUID_V4_REGEX); + expect(headerId).not.toBe(tooLong); + }); +}); + +// --------------------------------------------------------------------------- +// Legacy 路由不受影響 +// --------------------------------------------------------------------------- + +describe('legacy 路由不受 v1 影響', () => { + let ctx; + + beforeEach(async () => { + ctx = await startApp(); + }); + afterEach(async () => { + await ctx.close(); + }); + + it('GET /health 仍回原本格式(service / status / redis)', async () => { + const res = await fetch(`${ctx.baseUrl}/health`); + expect(res.status).toBe(200); + const body = await res.json(); + // legacy 格式:不含 error / code / request_id + expect(body).toMatchObject({ + service: 'task-scheduler', + status: 'healthy', + redis: 'connected', + }); + expect(body).not.toHaveProperty('error'); + // X-Request-Id header 仍然會被掛(全域 middleware) + expect(res.headers.get('x-request-id')).toMatch(UUID_V4_REGEX); + }); + + it('legacy 404 response format unchanged', async () => { + const res = await fetch(`${ctx.baseUrl}/no-such-legacy-path`); + expect(res.status).toBe(404); + const body = await res.json(); + // legacy 格式仍是 `{ error: 'string' }`,不是 v1 的 `{ error: { code, ... } }` + expect(body.error).toBe('Endpoint not found'); + }); +}); + +// --------------------------------------------------------------------------- +// Minor-1:v1 prefix 下未匹配路徑回 v1 格式 404 not_found +// --------------------------------------------------------------------------- + +describe('v1 catch-all(Minor-1 修復):未匹配 v1 路徑回 404 not_found', () => { + let ctx; + + beforeEach(async () => { + ctx = await startApp(); + }); + afterEach(async () => { + await ctx.close(); + }); + + it('GET /api/v1/foobar 回 v1 格式 404 not_found(含 request_id)', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/foobar`); + expect(res.status).toBe(404); + + const body = await res.json(); + // v1 統一格式:error 是 object,不是字串 + expect(typeof body.error).toBe('object'); + expect(body.error.code).toBe('not_found'); + expect(typeof body.error.message).toBe('string'); + expect(body.error.message.length).toBeGreaterThan(0); + + // request_id 應為合法 UUID(沒帶 X-Request-Id 時 server 自產) + expect(body.error.request_id).toMatch(UUID_V4_REGEX); + + // header 與 body 內 request_id 一致 + expect(res.headers.get('x-request-id')).toBe(body.error.request_id); + }); + + it('GET /api/v1/jobs/foobar/strange-action 回 v1 格式 404(巢狀未匹配路徑)', async () => { + // 此路徑不會匹配 promote(路徑不以 /promote 結尾)也不會匹配 jobs 任一路由 + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/foobar/strange-action`); + expect(res.status).toBe(404); + + const body = await res.json(); + expect(typeof body.error).toBe('object'); + expect(body.error.code).toBe('not_found'); + expect(body.error.request_id).toMatch(UUID_V4_REGEX); + }); + + it('未匹配 v1 路徑會 echo 外部 X-Request-Id(驗證 request_id 貫穿)', async () => { + const externalId = 'trace-minor1-fix-123'; + const res = await fetch(`${ctx.baseUrl}/api/v1/no-such-route`, { + headers: { 'X-Request-Id': externalId }, + }); + expect(res.status).toBe(404); + + const body = await res.json(); + expect(body.error.code).toBe('not_found'); + expect(body.error.request_id).toBe(externalId); + expect(res.headers.get('x-request-id')).toBe(externalId); + }); +}); + +// --------------------------------------------------------------------------- +// Minor-2:Phase 2 預留端點回 501 not_implemented +// --------------------------------------------------------------------------- + +describe('Phase 2 預留端點(Minor-2 修復):回 501 not_implemented', () => { + let ctx; + + beforeEach(async () => { + ctx = await startApp(); + }); + afterEach(async () => { + await ctx.close(); + }); + + it('POST /api/v1/jobs/:id/download-tokens 回 v1 格式 501 not_implemented', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/abc-123/download-tokens`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({}), + }); + expect(res.status).toBe(501); + + const body = await res.json(); + expect(typeof body.error).toBe('object'); + expect(body.error.code).toBe('not_implemented'); + expect(typeof body.error.message).toBe('string'); + // 訊息應提到 Phase 2 以利 client 區分「尚未實作」vs「未來不會做」 + expect(body.error.message).toContain('Phase 2'); + expect(body.error.request_id).toMatch(UUID_V4_REGEX); + expect(res.headers.get('x-request-id')).toBe(body.error.request_id); + }); + + it('DELETE /api/v1/jobs/:id 回 v1 格式 501 not_implemented', async () => { + const res = await fetch(`${ctx.baseUrl}/api/v1/jobs/abc-123`, { + method: 'DELETE', + }); + expect(res.status).toBe(501); + + const body = await res.json(); + expect(typeof body.error).toBe('object'); + expect(body.error.code).toBe('not_implemented'); + expect(body.error.message).toContain('Phase 2'); + expect(body.error.request_id).toMatch(UUID_V4_REGEX); + expect(res.headers.get('x-request-id')).toBe(body.error.request_id); + }); + + it('Phase 2 端點不會被 catch-all 吃成 404(驗證掛 501 在 catch-all 之前生效)', async () => { + // 若 Phase 2 端點未掛,會落到 catch-all 變成 404 not_found; + // 此測試確保我們真的回 501 not_implemented。 + const downloadRes = await fetch( + `${ctx.baseUrl}/api/v1/jobs/some-id/download-tokens`, + { method: 'POST' } + ); + expect(downloadRes.status).toBe(501); + expect((await downloadRes.json()).error.code).toBe('not_implemented'); + + const deleteRes = await fetch(`${ctx.baseUrl}/api/v1/jobs/some-id`, { + method: 'DELETE', + }); + expect(deleteRes.status).toBe(501); + expect((await deleteRes.json()).error.code).toBe('not_implemented'); + }); +}); + +// --------------------------------------------------------------------------- +// D4 修復驗證:requireAuth + requestId 串接 +// --------------------------------------------------------------------------- + +describe('D4 修復:requireAuth + requestId middleware 串接', () => { + // 此測試獨立於 v1 router 之外,直接組裝一個簡易 app 驗證串接行為 + let server; + let baseUrl; + + beforeAll(async () => { + const app = express(); + app.use(requestIdMiddleware); + app.get( + '/protected', + requireAuth('converter:job.write', { + config: { + memberCenter: { + issuer: 'https://auth.test.local', + jwksUrl: 'https://auth.test.local/.well-known/jwks', + tokenUrl: '', + }, + converter: { + audience: 'kneron_converter_api', + clientId: '', + clientSecret: '', + tenantId: '', + scopeWrite: 'converter:job.write', + scopeRead: 'converter:job.read', + }, + fileAccessAgent: { baseUrl: '', audience: 'file_access_api' }, + jwks: { cacheMaxAgeMs: 60000, cooldownMs: 30000, clockToleranceSec: 60 }, + }, + // verify 函數一律 throw 模擬「token 無效」(此測試只關心 401 path 的 request_id) + verify: async () => { + const e = new Error('signature failed'); + e.code = 'ERR_JWS_SIGNATURE_VERIFICATION_FAILED'; + throw e; + }, + }), + (_req, res) => res.status(200).json({ ok: true }) + ); + + await new Promise((resolve) => { + server = app.listen(0, '127.0.0.1', resolve); + }); + const addr = server.address(); + baseUrl = `http://127.0.0.1:${addr.port}`; + }); + + afterAll(async () => { + if (server) { + await new Promise((r) => server.close(r)); + } + }); + + it('401 response.error.request_id is a real UUID (not null) when no X-Request-Id sent', async () => { + const res = await fetch(`${baseUrl}/protected`, { + headers: { Authorization: 'Bearer invalid-token' }, + }); + expect(res.status).toBe(401); + const body = await res.json(); + + // **D4 修復的核心驗證**:request_id 不再是 null + expect(body.error.request_id).not.toBeNull(); + expect(body.error.request_id).toMatch(UUID_V4_REGEX); + + // 而且該值與 response header X-Request-Id 一致 + expect(res.headers.get('x-request-id')).toBe(body.error.request_id); + }); + + it('401 response.error.request_id echoes external X-Request-Id when valid', async () => { + const externalId = 'trace-d4-fix-verification-42'; + const res = await fetch(`${baseUrl}/protected`, { + headers: { + Authorization: 'Bearer invalid-token', + 'X-Request-Id': externalId, + }, + }); + expect(res.status).toBe(401); + const body = await res.json(); + + expect(body.error.request_id).toBe(externalId); + expect(res.headers.get('x-request-id')).toBe(externalId); + }); + + it('401 with missing Authorization header still has real request_id (not null)', async () => { + const res = await fetch(`${baseUrl}/protected`); + expect(res.status).toBe(401); + const body = await res.json(); + + expect(body.error.code).toBe('invalid_token'); + expect(body.error.request_id).not.toBeNull(); + expect(body.error.request_id).toMatch(UUID_V4_REGEX); + }); +}); diff --git a/apps/task-scheduler/src/routes/v1/index.js b/apps/task-scheduler/src/routes/v1/index.js new file mode 100644 index 0000000..a76985b --- /dev/null +++ b/apps/task-scheduler/src/routes/v1/index.js @@ -0,0 +1,99 @@ +/** + * /api/v1 主 router — T3 骨架。 + * + * 職責: + * 1. 組裝 v1 子 router(jobs / promote) + * 2. 掛 v1 專用的 errorHandler(在所有子 router 之後、最末端) + * + * 為什麼 errorHandler 掛在 v1 router 內、而非 app.js 全域: + * - 全域 errorHandler 需處理 legacy 行為(既有的 `{ error: 'string' }` 格式) + * - 若把 v1 errorHandler 全域掛,會改變 legacy 路徑的回應格式(破壞向後相容) + * - 在 v1 router scope 內掛 errorHandler 可保證: + * * v1 路徑用新格式(含 code / request_id / details) + * * legacy 路徑維持既有格式 + * - Express 4 的 router-level error middleware 會「捕捉本 router 內的 next(err)」, + * bubble 到外層需顯式 next(err);本 errorHandler 屬終態(res.json 後不再 next) + * + * 路由結構: + * /api/v1 + * ├── /jobs — POST/GET(jobs router) + * ├── /jobs/:id — GET(jobs router) + * └── /jobs/:id/promote — POST(promote router,mergeParams 取 :id) + * + * 注意: + * T3 不掛 requireAuth;T5/T6/T7 實作各端點時,會在各自 handler 之前加。 + * per-client_id rate limiter(T3 計畫)也尚未掛 — 與 requireAuth 順序強相關, + * 留待 T5 起需要 clientId 時再加,避免提前耦合。 + */ + +'use strict'; + +const express = require('express'); + +const { createJobsRouter } = require('./jobs'); +const { createPromoteRouter } = require('./promote'); +const { errorHandler, ApiError } = require('../../middleware/errorHandler'); + +/** + * 建立 /api/v1 router。 + * + * @param {object} [deps] — 注入給各子 router;T5 起 jobs router 需要 + * @param {object} [deps.jobService] — createJobService(...) 的回傳 + * @param {object} [deps.uploader] — multer instance + * @param {object} [deps.minio] — minio facade + * @param {object} [deps.config] — config.loadConfig() 結果(auth 用) + * @param {object} [deps.rateLimit] — { windowMs, max } 覆寫 per-client_id 預設 + * @param {string} [deps.storageBackend] — 'minio' / 'local',T5 handler 啟動時驗證 + * @returns {import('express').Router} + */ +function createV1Router(deps = {}) { + const router = express.Router(); + + // /api/v1/jobs/:id/promote — 獨立 router 以利 T7 集中管理 FAA 相依 + // **必須**先掛 promote 再掛 jobs,避免 jobs router 的 GET /:id 把 + // `/abc-123/promote` 之類的路徑誤吃(Express 是 first-match-wins)。 + // 注:實際上 GET /jobs/:id 是 GET 不匹配 POST,所以即使順序顛倒也安全; + // 但為了清楚意圖(特殊路徑優先),先掛 promote。 + // + // T7:把 jobService / minio / faaClient / config / rateLimit 透傳給 promote router; + // 缺任一 dep 時 promote router 會 fallback 到 501(與 jobs.js 同設計)。 + const promoteRouter = createPromoteRouter({ + jobService: deps.jobService, + minio: deps.minio, + faaClient: deps.faaClient, + config: deps.config, + rateLimit: deps.rateLimit, + }); + router.use('/jobs/:id/promote', promoteRouter); + + // /api/v1/jobs/* — POST / GET / GET :id + const jobsRouter = createJobsRouter(deps); + router.use('/jobs', jobsRouter); + + // v1 prefix 下未匹配路徑的 catch-all(Minor-1 修復) + // + // 為什麼需要: + // 未掛此 catch-all 時,`/api/v1/foobar` 會 fall through 出 v1 router、被全域 + // `app.use('*', ...)` 接到,回 legacy 格式 `{"error":"Endpoint not found"}`。 + // 這違反 TDD §1.2「所有 4xx/5xx 回應使用統一格式」——對 v1 client 是格式不一致。 + // + // 為什麼放在這個位置: + // - 必須在所有 router.use(...) 之後(讓真實路由先有機會匹配) + // - 必須在 errorHandler 之前(這是普通 middleware,errorHandler 才是 4-arg) + // + // 為什麼用 next(new ApiError(...)) 而非直接 res.status(404).json(...): + // 統一走 errorHandler 輸出,可保證錯誤格式(含 request_id、log 行為)一致。 + router.use((req, res, next) => { + return next( + new ApiError(404, 'not_found', `路徑不存在:${req.method} ${req.originalUrl}`) + ); + }); + + // 注意:errorHandler **必須**放在所有 route 之後 + // Express 4 的 error middleware 規則:4 個參數才會被當作 error handler + router.use(errorHandler); + + return router; +} + +module.exports = { createV1Router }; diff --git a/apps/task-scheduler/src/routes/v1/jobs.js b/apps/task-scheduler/src/routes/v1/jobs.js new file mode 100644 index 0000000..3f9f2ec --- /dev/null +++ b/apps/task-scheduler/src/routes/v1/jobs.js @@ -0,0 +1,1101 @@ +/** + * /api/v1/jobs 路由 — T5 起實作 POST;T6 加入 GET /:id 與 GET 列表。 + * + * 範圍對照: + * - POST /jobs → ✅ T5 + * - GET /jobs → ✅ T6(本任務,列表 / Recovery) + * - GET /jobs/:id → ✅ T6(本任務,單筆查詢 + ETag) + * - POST /jobs/:id/promote → ⏳ T7 實作(在 promote.js) + * - POST /jobs/:id/download-tokens → 501(Phase 2) + * - DELETE /jobs/:id → 501(Phase 2) + * + * 設計原則: + * - handler 採薄層,只負責 HTTP I/O;business logic 全部委給 jobService / + * validators / sanitize utils + * - 中介層順序(**勿改**): + * POST: requireAuth(scope:write) → perClientRateLimiter → uploader.fields(...) → createJobHandler + * GET : requireAuth(scope:read) → perClientRateLimiter → getJob(byId|list)Handler + * 依據: + * 1. requireAuth 在最前 → 未驗證流量被擋,不會吃 multer 大檔(M2) + * 2. rate limiter 在 multer 之前 → 超 quota client 不會把 500MB 灌進來 + * 3. multer 最後 → 在 auth + quota 雙重通過後才 parse multipart + * - 寫入順序:先 MinIO 後 Lua(M5 方案 A) + * - GET /:id 與 GET 列表共用 perClientRateLimiter(每個請求都計入該 client_id quota) + * + * 失敗情境對照(TDD §1.4.2 + §14): + * - 401/403 由 requireAuth 處理 + * - 429 由 perClientRateLimiter handler 處理(轉 ApiError) + * - 413 由 multer LIMIT_FILE_SIZE 觸發(→ multerErrorAdapter 轉 v1 格式) + * - 400 由 validator 統一回(含 details.field) + * - 409 由 claim Lua script 衝突 → 此 handler 組 v1 格式 payload + * - 502 由 MinIO upload throw → catch 後轉 v1 格式 + * - 500 misconfiguration(STORAGE_BACKEND !== 'minio') + * - 404 job_not_found(GET /:id 不存在或不屬於 client) + * + * 安全(T6 重點): + * - GET /:id 跨 client 一律回 404(不洩漏存在性,TDD §1.4.3) + * - GET 列表強制 user_id 必填(避免全掃 + admin 級資訊洩漏) + * - user_id 走 Sec M1 白名單(validateUserId),擋 XSS / 路徑穿越 / log injection + * - status / limit / offset 嚴格驗證,不接受非預期 query 參數 + * - 304 Not Modified 流程不洩漏 record 細節(只有 ETag 比對) + */ + +'use strict'; + +const express = require('express'); +const { v4: uuidv4 } = require('uuid'); + +const { ApiError } = require('../../middleware/errorHandler'); +const { requireAuth } = require('../../auth/middleware'); +const { createPerClientRateLimiter } = require('../../middleware/perClientRateLimit'); +const { validateCreateJobRequest } = require('./validators/createJob'); +const { toExternalStatus } = require('../../services/statusMapper'); +const { validateUserId } = require('../../utils/sanitize'); + +/** + * Job record + active_job 的 TTL(與 TDD §2.7.2 / §1.4.3 一致)。 + * 7 天 = 604800 秒,對齊 Converter Bucket lifecycle。 + */ +const DEFAULT_JOB_TTL_SECONDS = 7 * 24 * 60 * 60; + +/** + * Multer error → v1 ApiError 轉換器。 + * + * 為什麼包成獨立 middleware: + * multer 把 file size limit 失敗 throw 為 `MulterError`(非標準 Error), + * 若直接讓全域 errorHandler 接,會走 500 internal_error,不符 TDD §14。 + * 這個 middleware 只攔截 multer 的錯,其他放行(next(err))。 + * + * @param {Error} err + * @param {import('express').Request} req + * @param {import('express').Response} res + * @param {import('express').NextFunction} next + */ +// eslint-disable-next-line no-unused-vars +function multerErrorAdapter(err, req, res, next) { + if (err && err.name === 'MulterError') { + // multer 常見 code:LIMIT_FILE_SIZE / LIMIT_FILE_COUNT / LIMIT_UNEXPECTED_FILE + if (err.code === 'LIMIT_FILE_SIZE') { + return next( + new ApiError(413, 'file_too_large', '上傳檔案超過 500MB 上限', { + field: err.field || 'model', + limit_bytes: 500 * 1024 * 1024, + }) + ); + } + return next( + new ApiError(400, 'invalid_multipart', `multipart 解析失敗:${err.code}`, { + code: err.code, + field: err.field || null, + }) + ); + } + return next(err); +} + +/** + * 將內部 job record 序列化為 POST 201 response(TDD §1.4.2)。 + * + * 為什麼不直接回完整 record: + * POST 回應的精簡版是有意設計的(doc-review m9 已論述);client 拿 job_id 後 + * 可再走 GET /jobs/:id 拿完整資訊(T6 實作)。 + * + * @param {object} jobRecord + */ +function buildCreateResponse(jobRecord) { + return { + job_id: jobRecord.job_id, + status: 'created', + stage: jobRecord.stage, + progress: jobRecord.progress, + created_at: jobRecord.created_at, + expires_at: jobRecord.expires_at, + user_id: jobRecord.user_id, + }; +} + +// =========================================================================== +// T6:GET /api/v1/jobs/:id + GET /api/v1/jobs helper / handler +// =========================================================================== + +/** + * GET /:id 與列表回傳值的 query string 限制(避免 typo / 攻擊向量)。 + */ +const GET_LIST_DEFAULTS = Object.freeze({ + defaultStatus: 'in_progress', + validStatuses: new Set(['in_progress', 'completed', 'failed', 'all']), + defaultLimit: 10, + maxLimit: 50, +}); + +/** + * 將內部 job record 序列化為 GET /:id 回應 body。 + * + * 對齊 TDD §1.4.3: + * - 把內部 status (`ONNX`/...) 映射為對外 status (`created`/`running`/...) + * - completed 時才回 result_object_keys,否則 null + * - failed 時才回 error,否則 null + * + * 為什麼集中在這個 helper: + * - 同一 record 在 GET /:id 與列表查詢(GET /jobs)都要序列化,集中管理避免不一致 + * + * @param {object} job - Redis 讀出的完整 job record + * @returns {object} - 對外 v1 API 格式的 job 物件 + */ +function serializeJobForResponse(job) { + const { status: externalStatus, stage: externalStage } = toExternalStatus(job); + + // result_object_keys:只在完成時回傳,避免洩漏 in_progress / failed 期間的中介產出 + // 對齊既有 server.js advanceJob(completed 時 status='COMPLETED' / progress=100) + let resultObjectKeys = null; + if (externalStatus === 'completed') { + // T9 後會由 worker 寫入;此前 fallback 用 job.output 既有結構 + if (job.result_object_keys && typeof job.result_object_keys === 'object') { + resultObjectKeys = job.result_object_keys; + } else if (job.output && typeof job.output === 'object') { + // 既有 output 結構(server.js):{ bie_path, nef_path };轉換為 v1 格式 + const fromOutput = {}; + if (job.output.onnx_path) fromOutput.onnx = job.output.onnx_path; + if (job.output.bie_path) fromOutput.bie = job.output.bie_path; + if (job.output.nef_path) fromOutput.nef = job.output.nef_path; + // 全空時保持 null(避免回 {}) + resultObjectKeys = Object.keys(fromOutput).length > 0 ? fromOutput : null; + } + } + + // error:只在失敗時回傳 + const errorPayload = + externalStatus === 'failed' && job.error && typeof job.error === 'object' + ? job.error + : null; + + // input 結構:T5 寫入的;不存在時回 null(避免 break GET) + const input = + job.input && typeof job.input === 'object' + ? { + filename: job.input.filename || null, + object_key: job.input.object_key || null, + size_bytes: + typeof job.input.size_bytes === 'number' + ? job.input.size_bytes + : null, + ref_images_count: + typeof job.input.ref_images_count === 'number' + ? job.input.ref_images_count + : 0, + } + : null; + + // parameters:T5 寫入;無則空物件 + const parameters = + job.parameters && typeof job.parameters === 'object' ? job.parameters : {}; + + // metadata:可選;保留 client 提供的原樣 + const metadata = + job.metadata && typeof job.metadata === 'object' ? job.metadata : {}; + + return { + job_id: job.job_id, + user_id: job.user_id || null, + status: externalStatus, + stage: externalStage, + progress: typeof job.progress === 'number' ? job.progress : 0, + stage_progress: + typeof job.stage_progress === 'number' ? job.stage_progress : 0, + created_at: job.created_at || null, + updated_at: job.updated_at || null, + expires_at: job.expires_at || null, + stage_timings: + job.stage_timings && typeof job.stage_timings === 'object' + ? job.stage_timings + : { onnx: null, bie: null, nef: null }, + input, + result_object_keys: resultObjectKeys, + error: errorPayload, + parameters, + metadata, + // 暴露 created_by_client_id 給呼叫端做 client 隔離(不該回給 client,但 + // 此 helper 共用於 list;handler 在 GET /:id 會 strip 它) + created_by_client_id: job.created_by_client_id || null, + }; +} + +/** + * 對 GET /:id:strip 不該回給 client 的內部欄位。 + * + * @param {object} serialized + */ +function stripInternalFields(serialized) { + if (!serialized || typeof serialized !== 'object') return serialized; + // 為什麼用 destructure 而非 delete: + // - 不 mutate 原物件(純函式語意) + // - 明確列出 strip 的欄位(未來新增內部欄位時容易發現) + // eslint-disable-next-line no-unused-vars + const { created_by_client_id, ...rest } = serialized; + return rest; +} + +/** + * 解析 GET /jobs query 參數(嚴格驗證 + 防注入)。 + * + * 為什麼集中在 helper: + * - 多個 query 參數都需要相同的「解析 + 驗證 + 預設」流程 + * - 集中管理錯誤訊息,與 §1.5 details.fields 格式一致 + * + * @param {object} query - Express req.query + * @returns {{ + * ok: true, data: { userId: string, status: string, limit: number, offset: number } + * } | { + * ok: false, errors: Array<{ field: string, message: string }> + * }} + */ +function parseListQuery(query) { + const errors = []; + const q = query || {}; + + // 1. user_id 必填(任務 §4.1 #4) + // 走 Sec M1 白名單:擋 XSS / 萬用字元 / log injection / Redis key injection + const userIdRaw = typeof q.user_id === 'string' ? q.user_id : ''; + const userId = validateUserId(userIdRaw); + if (!userId) { + errors.push({ + field: 'user_id', + message: 'user_id 必填,1-128 字元,僅可包含英數字 / `.` / `_` / `-`', + }); + } + + // 2. status(可選,預設 in_progress) + let status = GET_LIST_DEFAULTS.defaultStatus; + if (q.status !== undefined && q.status !== null && q.status !== '') { + if (typeof q.status !== 'string' || !GET_LIST_DEFAULTS.validStatuses.has(q.status)) { + errors.push({ + field: 'status', + message: `status 必須為 ${[...GET_LIST_DEFAULTS.validStatuses].join(' / ')} 之一`, + }); + } else { + status = q.status; + } + } + + // 3. limit(可選,預設 10,max 50) + let limit = GET_LIST_DEFAULTS.defaultLimit; + if (q.limit !== undefined && q.limit !== null && q.limit !== '') { + const raw = typeof q.limit === 'string' ? q.limit.trim() : ''; + if (!/^\d+$/.test(raw)) { + errors.push({ + field: 'limit', + message: 'limit 必須為非負整數', + }); + } else { + const parsed = parseInt(raw, 10); + if (parsed < 1 || parsed > GET_LIST_DEFAULTS.maxLimit) { + errors.push({ + field: 'limit', + message: `limit 範圍必須在 1 ~ ${GET_LIST_DEFAULTS.maxLimit}`, + }); + } else { + limit = parsed; + } + } + } + + // 4. cursor(可選,base64 encoded JSON `{"offset": N}`) + // 採 base64 包裝以利未來改 keyset 時 cursor 字串格式維持不變(不影響 client) + let offset = 0; + if (q.cursor !== undefined && q.cursor !== null && q.cursor !== '') { + if (typeof q.cursor !== 'string') { + errors.push({ field: 'cursor', message: 'cursor 格式錯誤' }); + } else { + const parsed = decodeCursor(q.cursor); + if (parsed === null) { + errors.push({ field: 'cursor', message: 'cursor 格式錯誤' }); + } else { + offset = parsed; + } + } + } + + if (errors.length > 0) { + return { ok: false, errors }; + } + + return { + ok: true, + data: { userId, status, limit, offset }, + }; +} + +/** + * 把 offset 編成 cursor 字串(base64url)。 + * + * 為什麼包裝成 base64: + * - 對 client 而言 cursor 是 opaque,不該假設它是數字(未來換 keyset 時 cursor + * 字串格式可變但 client API 不變) + * + * @param {number} offset + * @returns {string} + */ +function encodeCursor(offset) { + const json = JSON.stringify({ offset }); + return Buffer.from(json, 'utf8') + .toString('base64') + .replace(/\+/g, '-') + .replace(/\//g, '_') + .replace(/=+$/, ''); +} + +/** + * 把 cursor 字串解回 offset;不合法回 null。 + * + * @param {string} cursor + * @returns {number|null} + */ +function decodeCursor(cursor) { + if (typeof cursor !== 'string' || cursor.length === 0 || cursor.length > 200) { + return null; + } + try { + // base64url 還原為 base64 + let b64 = cursor.replace(/-/g, '+').replace(/_/g, '/'); + // 補 padding + while (b64.length % 4 !== 0) b64 += '='; + const json = Buffer.from(b64, 'base64').toString('utf8'); + const parsed = JSON.parse(json); + if ( + !parsed || + typeof parsed !== 'object' || + typeof parsed.offset !== 'number' || + !Number.isInteger(parsed.offset) || + parsed.offset < 0 || + parsed.offset > 1000000 // 上限保護(防 DoS:cursor 帶極大 offset 觸發大量 SMEMBERS scan) + ) { + return null; + } + return parsed.offset; + } catch (_) { + return null; + } +} + +/** + * 建立 GET /api/v1/jobs/:id handler。 + * + * 流程(對齊 TDD §2.9): + * 1. 從 URL param 取 jobId(已由 Express 解 path param) + * 2. jobService.getJob(jobId) → null → 404 job_not_found + * 3. 不屬於該 client(created_by_client_id !== req.auth.clientId)→ 404 + * (不洩漏存在性!) + * 4. 計算 ETag;If-None-Match 命中 → 304 + * 5. 序列化 + strip 內部欄位 → 200 + * + * @param {object} deps + * @param {object} deps.jobService + */ +function buildGetJobHandler(deps) { + const { jobService } = deps; + return async function getJobHandler(req, res, next) { + const startedAtMs = Date.now(); + try { + const jobId = req.params && req.params.id; + if (typeof jobId !== 'string' || jobId === '') { + // Express 路由匹配時不該為空,但保險起見 + return next(new ApiError(404, 'job_not_found', 'Job 不存在')); + } + + const job = await jobService.getJob(jobId); + + // ★ 安全核心:不存在 + 不屬於該 client 都回 404(不洩漏存在性) + // 對齊 TDD §1.4.3:「不屬於呼叫 client_id(避免資訊洩露)」 + const clientId = + req.auth && typeof req.auth.clientId === 'string' + ? req.auth.clientId + : null; + if (!job || (clientId && job.created_by_client_id && job.created_by_client_id !== clientId)) { + logEvent({ + level: 'INFO', + action: 'jobs.get.not_found', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + // 不 log 「is real but cross-client」與「really missing」的區別,避免 + // log 變成 oracle 給攻擊者 + duration_ms: Date.now() - startedAtMs, + }); + return next(new ApiError(404, 'job_not_found', 'Job 不存在')); + } + + // ETag 流程 + const etag = jobService.computeEtag(job); + // ★ res.setHeader 必須在 status / json 之前;304 流程也需要 + res.setHeader('ETag', etag); + + const ifNoneMatch = req.headers && req.headers['if-none-match']; + if (ifNoneMatch && typeof ifNoneMatch === 'string') { + // 多個 ETag 用逗號分隔(RFC 7232),逐一比對;W/ 前綴大小寫敏感 + const incoming = ifNoneMatch.split(',').map((s) => s.trim()); + if (incoming.includes(etag) || incoming.includes('*')) { + logEvent({ + level: 'INFO', + action: 'jobs.get.not_modified', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + duration_ms: Date.now() - startedAtMs, + }); + return res.status(304).end(); + } + } + + const serialized = serializeJobForResponse(job); + const body = stripInternalFields(serialized); + + logEvent({ + level: 'INFO', + action: 'jobs.get.success', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + external_status: serialized.status, + duration_ms: Date.now() - startedAtMs, + }); + + return res.status(200).json(body); + } catch (err) { + return next(err); + } + }; +} + +/** + * 建立 GET /api/v1/jobs handler(列表 / Recovery)。 + * + * 流程: + * 1. 解析 query:user_id 必填(白名單)、status / limit / cursor 驗證 + * 2. jobService.listJobsByUser({ userId, clientId, status, limit, offset }) + * 3. 序列化 + strip 內部欄位 + * 4. 回 { jobs, total, next_cursor } + * + * @param {object} deps + * @param {object} deps.jobService + */ +function buildListJobsHandler(deps) { + const { jobService } = deps; + return async function listJobsHandler(req, res, next) { + const startedAtMs = Date.now(); + try { + const parsed = parseListQuery(req.query); + if (!parsed.ok) { + return next( + new ApiError(400, 'validation_error', '查詢參數驗證失敗', { + fields: parsed.errors, + }) + ); + } + + const { userId, status, limit, offset } = parsed.data; + const clientId = + req.auth && typeof req.auth.clientId === 'string' + ? req.auth.clientId + : null; + if (!clientId) { + // requireAuth 已保證有 clientId;保險起見 + return next(new ApiError(401, 'invalid_token', 'Token 無 client_id')); + } + + const result = await jobService.listJobsByUser({ + userId, + clientId, + status, + limit, + offset, + }); + + const jobs = result.jobs.map((j) => stripInternalFields(serializeJobForResponse(j))); + const nextCursor = + result.nextOffset != null ? encodeCursor(result.nextOffset) : null; + + logEvent({ + level: 'INFO', + action: 'jobs.list.success', + request_id: req.requestId, + client_id: clientId, + user_id: userId, + status, + total: result.total, + returned: jobs.length, + duration_ms: Date.now() - startedAtMs, + }); + + return res.status(200).json({ + jobs, + total: result.total, + next_cursor: nextCursor, + }); + } catch (err) { + return next(err); + } + }; +} + +/** + * 把 conflict job 序列化為 409 details payload(對齊 TDD §1.5)。 + * + * @param {string} activeJobId + * @param {object|null} activeJob + */ +function buildConflictDetails(activeJobId, activeJob) { + if (!activeJob) { + return { active_job_id: activeJobId }; + } + return { + active_job_id: activeJobId, + active_job_status: activeJob.status || null, + active_job_stage: activeJob.stage || null, + active_job_progress: activeJob.progress != null ? activeJob.progress : 0, + active_job_created_at: activeJob.created_at || null, + }; +} + +/** + * 結構化 log:handler 內任何節點失敗都記錄此格式,便於 ops 追蹤。 + * + * @param {object} fields + */ +function logEvent(fields) { + // 為什麼不依賴 morgan:morgan 只 log access;business event 需要結構化 metric + // eslint-disable-next-line no-console + console.log( + JSON.stringify({ + service: 'task-scheduler', + timestamp: new Date().toISOString(), + ...fields, + }) + ); +} + +/** + * 建立 POST /api/v1/jobs 真實 handler。 + * + * @param {object} deps + * @param {ReturnType} deps.jobService + * @param {string} deps.storageBackend - 'minio' / 'local' + * @param {object} [deps.multipartLimits] - T10:注入 multipart 限制(refImage size) + * @param {number} [deps.multipartLimits.refImageMaxBytes] + */ +function buildCreateJobHandler(deps) { + const { jobService, storageBackend, multipartLimits } = deps; + return async function createJobHandler(req, res, next) { + const startedAtMs = Date.now(); + let cleanupPromise = null; + try { + // 0. 啟動條件檢查(TDD §10.3)— defense in depth + // 主要 mount-time check 在 createJobsRouter(Sec M5),但保留此處 runtime check + // 以防 buildCreateJobHandler 被測試或其他路徑直接 mount(例如整合測試的旁路)。 + if (storageBackend !== 'minio') { + return next( + new ApiError( + 500, + 'misconfiguration', + 'POST /api/v1/jobs 需 STORAGE_BACKEND=minio' + ) + ); + } + + // 1. validate fields + files + // T10:limits 由 deps.multipartLimits 注入;validator 內仍會 fallback 到預設值 + const validation = validateCreateJobRequest({ + body: req.body, + files: req.files, + limits: multipartLimits, + }); + if (!validation.ok) { + // Sec C2:ref_image per-file 超標 → 413 file_too_large(語意對齊 TDD §14) + if (validation.tooLarge) { + return next( + new ApiError( + 413, + 'file_too_large', + `ref_image 超過單張 ${validation.tooLarge.limit_bytes} bytes 上限`, + { + field: validation.tooLarge.field, + size_bytes: validation.tooLarge.size_bytes, + limit_bytes: validation.tooLarge.limit_bytes, + } + ) + ); + } + return next( + new ApiError(400, 'validation_error', '欄位驗證失敗', { + fields: validation.errors, + }) + ); + } + const { userId, parameters, metadata, input, refImages } = validation.data; + + // 2. 生成 job_id + const jobId = uuidv4(); + const now = new Date(); + const createdAt = now.toISOString(); + const expiresAt = new Date( + now.getTime() + DEFAULT_JOB_TTL_SECONDS * 1000 + ).toISOString(); + + // 2.5 ★ Sec M4:寫入放大預檢(廉價 GET,避免 conflict 還是上傳整個 model 才被 reject) + // + // 為什麼還要保留下面的 Lua claim: + // pre-check 與 writeInputToMinIO + claim 之間仍有 race(兩個 request 同時 + // 通過 pre-check 各自完成 MinIO 寫入,最後只有一個能透過 Lua claim)。 + // pre-check 純粹是「optimization」減少寫入放大,最終 atomicity 仍由 Lua 保證。 + // + // 為什麼不在這直接讀 job:{id}(一次省 round-trip): + // 讀 active_job 是 string GET(O(1)、< 100 bytes),快;後續 conflict 流程 + // 才需要 job record。這裡的目的只是「快速 reject」。 + try { + const preCheckJobId = await jobService.getActiveJobId(userId); + if (preCheckJobId) { + const activeJob = await jobService.getJob(preCheckJobId); + logEvent({ + level: 'INFO', + action: 'jobs.create.conflict_precheck', + request_id: req.requestId, + user_id: userId, + active_job_id: preCheckJobId, + }); + return next( + new ApiError( + 409, + 'user_has_active_job', + '使用者目前已有進行中的轉檔任務', + buildConflictDetails(preCheckJobId, activeJob) + ) + ); + } + } catch (preCheckErr) { + // pre-check 失敗不阻擋流程(Redis 短暫故障時讓 Lua claim 那層做最終裁定); + // 只 log WARN。 + logEvent({ + level: 'WARN', + action: 'jobs.create.conflict_precheck_failed', + request_id: req.requestId, + user_id: userId, + error: preCheckErr && preCheckErr.message ? preCheckErr.message : 'unknown', + }); + } + + // 3. ★ 先寫 MinIO(M5 方案 A)。失敗 → 502,Redis 完全乾淨 + let writeResult; + try { + writeResult = await jobService.writeInputToMinIO( + jobId, + input.file, + input.safeFilename, + refImages + ); + } catch (err) { + logEvent({ + level: 'ERROR', + action: 'jobs.create.minio_write_failed', + request_id: req.requestId, + job_id: jobId, + user_id: userId, + client_id: req.auth && req.auth.clientId ? req.auth.clientId : null, + error: err && err.message ? err.message : 'unknown', + }); + return next( + new ApiError( + 502, + 'storage_unavailable', + '檔案儲存服務暫時無法使用,請稍後重試' + ) + ); + } + + // 4. 組完整 job record(含 input metadata,對齊 TDD §2.7.1) + const jobRecord = { + job_id: jobId, + // 內部仍保留大寫 status 以兼容 legacy advanceJob / Web UI + // 對外 API 透過 statusMapper(T6)映射為 'created' / 'running' / ... + status: 'ONNX', + stage: 'onnx', + progress: 0, + created_at: createdAt, + updated_at: createdAt, + expires_at: expiresAt, + + // 對外 API 必要欄位(TDD §2.7.1 Phase 1 新增) + origin: 'api', + user_id: userId, + tenant_id: + req.auth && req.auth.tenantId ? req.auth.tenantId : null, + created_by_client_id: + req.auth && req.auth.clientId ? req.auth.clientId : null, + + input: { + filename: input.safeFilename, + object_key: writeResult.inputObjectKey, + size_bytes: input.file.buffer.length, + ref_images_count: refImages.length, + }, + + parameters, + + // 既有 worker 仍會讀 output,保留欄位向後相容 + output: { bie_path: null, nef_path: null }, + + // T9:建 job 同時寫 stage_timings.onnx.started_at(enqueued_at 語意, + // 對齊 §4.1 #3 Phase 1 決策)。因為 Lua claim 寫入 + enqueueStage('onnx') + // 在這個 handler 裡是一氣呵成的(中間沒有「created 但未 enqueue」的狀態), + // 所以建 job 時就把 onnx.started_at 設為 createdAt 與 atomic 寫入並列。 + // + // 為什麼不依賴 advanceJob 的 stage_timings 寫入機制: + // advanceJob 只在 worker 完成 onnx 後才被呼叫;建 job 那一刻沒有 + // advanceJob 觸發點,必須在 jobRecord 初始化時就寫入。 + // + // 對 statusMapper 的影響: + // statusMapper 的 'ONNX + onnx.started_at == null → created' branch + // 在 v1 流程中不會被觸發(因為 onnx.started_at 永遠有值),但仍對 + // legacy job 與防禦性 fallback 場景有用,故 statusMapper 保留現邏輯。 + stage_timings: { + onnx: { started_at: createdAt, completed_at: null }, + bie: { started_at: null, completed_at: null }, + nef: { started_at: null, completed_at: null }, + }, + stage_progress: 0, + + error: null, + metadata: metadata || {}, + }; + + // 5. ★ Lua claim active + 寫完整 job record(M5 方案 A) + let claimResult; + try { + claimResult = await jobService.claimActiveAndCreate({ + userId, + jobId, + jobRecord, + ttlSeconds: DEFAULT_JOB_TTL_SECONDS, + }); + } catch (err) { + // Lua / Redis 異常 — 此時 MinIO 已寫入,需要 cleanup + logEvent({ + level: 'ERROR', + action: 'jobs.create.claim_lua_failed', + request_id: req.requestId, + job_id: jobId, + user_id: userId, + error: err && err.message ? err.message : 'unknown', + }); + // fire-and-forget cleanup(不影響 response) + cleanupPromise = jobService.cleanupInputObjects(writeResult.uploadedKeys); + return next(new ApiError(500, 'internal_error', '伺服器暫時無法處理請求')); + } + + if (!claimResult.ok && claimResult.conflict) { + // 衝突:clean MinIO(fire-and-forget);回 409 + active_job 詳情 + cleanupPromise = jobService.cleanupInputObjects(writeResult.uploadedKeys); + + // Reviewer Major-1 修復:直接用 claimResult.activeJobId 讀 job:{id}, + // 不再走 getActiveJob(userId) → 重讀 user:{}:active_job → 再讀 job:{}。 + // + // 為什麼: + // 舊流程的兩次 GET 之間有 race window — 例如「另一 worker 完成原本的 + // active job + 同 user 另一 client 補上新 claim」可能讓我們讀到 + // 不一致的 record(active_job_id 是 X、但 status/stage 來自 Y)。 + // 直接用 claimResult.activeJobId 做 GET 確保兩個欄位一致: + // - active_job_id:Lua 從 EXISTS 後 GET 拿到,與 active_job key 同 atomic + // - active_job_status/stage/progress/created_at:對應同一個 job_id + // 即便該 job 在 Lua 衝突 → getJob 之間被刪掉,仍會走 + // buildConflictDetails(activeJobId, null) 回 fallback 只帶 active_job_id + // 這是「對 client 安全的退化」,沒有「對不上」的 inconsistency 風險。 + const activeJob = await jobService.getJob(claimResult.activeJobId); + + logEvent({ + level: 'INFO', + action: 'jobs.create.conflict', + request_id: req.requestId, + job_id: jobId, + user_id: userId, + active_job_id: claimResult.activeJobId, + }); + + return next( + new ApiError( + 409, + 'user_has_active_job', + '使用者目前已有進行中的轉檔任務', + buildConflictDetails(claimResult.activeJobId, activeJob) + ) + ); + } + + // 6. enqueue 第一階段(onnx) + try { + await jobService.enqueueStage('onnx', jobRecord); + } catch (err) { + // enqueue 失敗 — Redis 已有 job record + active_job 已設。 + // + // Sec M2 + Reviewer Major-2 修復:補償釋放 active_job 與 job record, + // 避免 user 被鎖 7 天 TTL。 + // + // 為什麼之前不做(被 review 反對): + // 原本判斷「xadd 失敗代表 Redis 整體失常 = release 也會失敗」,但這不準: + // - xadd 失敗也可能是 Stream MAXLEN trim 異常、短暫網路抖動等局部問題 + // - 即便 release 也失敗,最差情境也只是回到「等 7d」當前行為,沒有劣化 + // - pre-production audit 標準下,必須有明確的補償方案 + // + // 為什麼用 fire-and-forget(不 await): + // - response 應立即回給 client(500),不該被 release 的延遲拖慢 + // - releaseActiveJob 失敗 log 已記錄,ops 可介入 + // - cleanup MinIO 也用同樣模式(既有設計) + logEvent({ + level: 'ERROR', + action: 'jobs.create.enqueue_failed', + request_id: req.requestId, + job_id: jobId, + user_id: userId, + error: err && err.message ? err.message : 'unknown', + }); + + // 補償:fire-and-forget release Redis state + cleanup MinIO + Promise.all([ + jobService.releaseActiveJob(userId, jobId).catch((rollbackErr) => + logEvent({ + level: 'WARN', + action: 'jobs.create.release_active_job_failed', + request_id: req.requestId, + job_id: jobId, + user_id: userId, + error: + rollbackErr && rollbackErr.message + ? rollbackErr.message + : 'unknown', + }) + ), + jobService.cleanupInputObjects(writeResult.uploadedKeys), + ]).catch(() => { + /* fire-and-forget;個別 catch 已處理 */ + }); + + return next(new ApiError(500, 'internal_error', '工作排程失敗')); + } + + // 7. 成功 → 201 + logEvent({ + level: 'INFO', + action: 'jobs.create.success', + request_id: req.requestId, + job_id: jobId, + user_id: userId, + client_id: req.auth && req.auth.clientId ? req.auth.clientId : null, + size_bytes: input.file.buffer.length, + ref_images_count: refImages.length, + duration_ms: Date.now() - startedAtMs, + }); + return res.status(201).json(buildCreateResponse(jobRecord)); + } catch (err) { + return next(err); + } finally { + // 確保 cleanupPromise(fire-and-forget)已 schedule,不卡 response + if (cleanupPromise) { + cleanupPromise.catch(() => { + /* 已在內部 log */ + }); + } + } + }; +} + +/** + * 建立 v1 jobs router。 + * + * @param {object} [deps] + * @returns {import('express').Router} + */ +function createJobsRouter(deps = {}) { + const router = express.Router(); + + const { + jobService, + uploader, + config, + rateLimit, + storageBackend, + uploadConcurrencyLimiter, // T10:可選,由 app.js 注入;缺漏代表無 concurrency 限制 + } = deps; + const effectiveStorageBackend = + storageBackend || process.env.STORAGE_BACKEND || 'local'; + + // T10:multipart limits — 從 config.multipart 拿出來,若無 config 則 undefined(fallback 預設) + const multipartLimits = + config && config.multipart + ? { + refImageMaxBytes: config.multipart.refImageMaxBytes, + } + : undefined; + + // POST /api/v1/jobs — 建 job(T5 實作) + if (jobService && uploader && config) { + // Sec M5:mount 階段就決定 STORAGE_BACKEND 是否合法。 + // 若為非 minio,**不掛 multer**(避免 misconfig 仍吃 500MB body 才 reject), + // 直接掛一個極小 handler 回 500 misconfiguration。 + // GET / DELETE / download-tokens 端點仍正常掛(read-only / Phase 2 預留路由 + // 不依賴 storage backend)。 + if (effectiveStorageBackend !== 'minio') { + router.post('/', (req, res, next) => { + return next( + new ApiError( + 500, + 'misconfiguration', + 'POST /api/v1/jobs 需 STORAGE_BACKEND=minio' + ) + ); + }); + } else { + const requireWriteAuth = requireAuth(config.converter.scopeWrite, { config }); + const perClientLimiter = createPerClientRateLimiter(rateLimit || {}); + const handler = buildCreateJobHandler({ + jobService, + storageBackend: effectiveStorageBackend, + multipartLimits, + }); + + // T10:multer fields 的 ref_images maxCount 也對齊 config(預設 100) + const refImagesMaxCount = + config && + config.multipart && + Number.isInteger(config.multipart.refImagesMaxCount) && + config.multipart.refImagesMaxCount > 0 + ? config.multipart.refImagesMaxCount + : 100; + + // T10 D5:concurrency limiter 必須掛在 multer **之前**(避免吃 500MB 才被擋) + // 但要在 requireAuth + rate limit 之後(避免 unauthorized 流量擠占 slot) + // 順序:requireAuth → rate limit → concurrency → multer → multerErrorAdapter → handler + const middlewareChain = [requireWriteAuth, perClientLimiter]; + if (uploadConcurrencyLimiter) { + middlewareChain.push(uploadConcurrencyLimiter); + } + middlewareChain.push( + uploader.fields([ + { name: 'model', maxCount: 1 }, + { name: 'ref_images', maxCount: refImagesMaxCount }, + ]), + multerErrorAdapter, + handler + ); + + router.post('/', ...middlewareChain); + } + } else { + // 缺 deps(單元測試常見情境)→ fallback 501。 + // 為什麼保留這個 fallback: + // - T3 既有 v1-routes integration test 仍會以「無 deps」方式啟動 app; + // 若 router 直接 throw 會破壞既有測試。 + // - production 啟動會由 server entry / createApp 注入完整 deps;fallback + // 不會在 prod 觸發。 + // message 仍含 'T5' 字樣以對齊 v1-routes integration test 的既有期待。 + router.post('/', (req, res, next) => { + return next( + new ApiError( + 501, + 'not_implemented', + 'POST /api/v1/jobs 尚未注入完整依賴(T5 啟動需 jobService / uploader / config)' + ) + ); + }); + } + + // GET /api/v1/jobs — 列出 / Recovery(T6) + // GET /api/v1/jobs/:id — 單一 job 狀態 + ETag(T6) + // + // 注意:T6 GET 端點同樣需要 jobService 才能運作;deps 缺漏(單元測試常見情境) + // → fallback 到 501(與 POST 一致),這樣 v1-routes integration test 仍可在 + // 沒有完整 deps 時用 fallback path 跑過。 + if (jobService && config) { + const requireReadAuth = requireAuth(config.converter.scopeRead, { config }); + const perClientLimiterRead = createPerClientRateLimiter(rateLimit || {}); + const getJobHandler = buildGetJobHandler({ jobService }); + const listJobsHandler = buildListJobsHandler({ jobService }); + + // 順序:requireAuth → rate limit → handler + // 為什麼 GET 也走 perClientRateLimiter: + // 防止單一 client 用 GET /jobs polling 拖慢 Redis(即使每次便宜,10000 req + // 仍可造成 Redis 飽和)。300 req / 5min 對 polling 場景充裕(每秒 1 次)。 + router.get( + '/', + requireReadAuth, + perClientLimiterRead, + listJobsHandler + ); + router.get( + '/:id', + requireReadAuth, + perClientLimiterRead, + getJobHandler + ); + } else { + // 缺 deps fallback → 501(保留 v1-routes integration test 既有期待) + router.get('/', (req, res, next) => { + return next( + new ApiError( + 501, + 'not_implemented', + 'GET /api/v1/jobs 尚未注入完整依賴(T6 啟動需 jobService / config)' + ) + ); + }); + router.get('/:id', (req, res, next) => { + return next( + new ApiError( + 501, + 'not_implemented', + 'GET /api/v1/jobs/:id 尚未注入完整依賴(T6 啟動需 jobService / config)' + ) + ); + }); + } + + // Phase 2 預留 + router.post('/:id/download-tokens', (req, res, next) => { + return next( + new ApiError( + 501, + 'not_implemented', + 'POST /api/v1/jobs/:id/download-tokens 預留 Phase 2 實作' + ) + ); + }); + + router.delete('/:id', (req, res, next) => { + return next( + new ApiError( + 501, + 'not_implemented', + 'DELETE /api/v1/jobs/:id 預留 Phase 2 實作' + ) + ); + }); + + return router; +} + +module.exports = { + createJobsRouter, + // 測試暴露(不對外) + _internals: { + buildCreateJobHandler, + buildCreateResponse, + buildConflictDetails, + multerErrorAdapter, + DEFAULT_JOB_TTL_SECONDS, + // T6 + buildGetJobHandler, + buildListJobsHandler, + serializeJobForResponse, + stripInternalFields, + parseListQuery, + encodeCursor, + decodeCursor, + GET_LIST_DEFAULTS, + }, +}; diff --git a/apps/task-scheduler/src/routes/v1/promote.js b/apps/task-scheduler/src/routes/v1/promote.js new file mode 100644 index 0000000..b444115 --- /dev/null +++ b/apps/task-scheduler/src/routes/v1/promote.js @@ -0,0 +1,621 @@ +/** + * /api/v1/jobs/:id/promote 路由 — T7 實作。 + * + * 流程(對齊 TDD §1.4.5、§2.10、§6.1-§6.5、tasks-phase1.md §2 T7): + * + * 1. requireAuth('converter:job.write') + * ├── 401 invalid_token / 403 insufficient_scope(含主動 destroy 連線,T1 M2) + * └── ok → 繼續 + * + * 2. validate body:targets 非空、source ∈ {onnx, bie, nef}、target_object_key 安全 + * ├── 失敗 → 400 validation_error / 422 invalid_object_key + * └── ok → 繼續 + * + * 3. 讀 job:{id} + * ├── 不存在 / 不屬於 client → 404 job_not_found(不洩漏存在性) + * └── ok → 繼續 + * + * 4. ★ 冪等性(兩個層級): + * a. job.promoted === true:直接回 200 + 既有 promoted_object_keys + * (不重打 FAA、不查 MinIO) + * b. (非冪等命中)狀態檢查:status !== 'completed' → 409 job_not_ready_for_promote + * + * 5. 對每個 target(**序列執行**避免 FAA 端並發壓力): + * a. 驗 job.output[source] / job.result_object_keys[source] 存在 → 否則 409 source_not_available + * b. minio.headObject(sourceKey) 取 size + contentType + * c. faa.putFile(targetKey, () => minio.getObjectStream(sourceKey).stream, { contentLength, contentType }) + * (streamFactory 形式,重試時可拿新 stream) + * d. 收集 { source, target_object_key, size_bytes, file_access_agent_etag, promoted_at } + * + * 6. 全部成功 → jobService.markPromoted(jobId, ...) → 回 200 + { job_id, promoted: [...] } + * (part-failure 在 stream 模式下難以原子化;Phase 1 採「有失敗就 throw 並回 502」) + * + * 重要決策: + * - **流程上不接受 client 指定 NAS 命名格式以外的東西**:caller 傳 target_object_key + * 但會 sanity check(拒 `..` `\\` 絕對路徑)。VisionA 自己決定 key 命名(TDD §6.1)。 + * - **大檔 stream**:用 streamFactory pattern 確保重試時能拿新 stream(HTTP body 不可 replay)。 + * - **不洩露**:FAA 內部錯誤 message 不直接傳給 v1 client;統一轉成 502 / 503 + 文案。 + * + * 認證: + * T7 階段掛 `requireAuth('converter:job.write')`(與 POST /jobs 同 scope)。 + */ + +'use strict'; + +const express = require('express'); + +const { ApiError } = require('../../middleware/errorHandler'); +const { requireAuth } = require('../../auth/middleware'); +const { createPerClientRateLimiter } = require('../../middleware/perClientRateLimit'); +const { createFaaClient } = require('../../fileAccessAgent/client'); +const { + FAAClientError, + FAAUnauthorizedError, + FAAServerError, + FAATimeoutError, +} = require('../../fileAccessAgent/errors'); + +// --------------------------------------------------------------------------- +// 常數 +// --------------------------------------------------------------------------- + +const VALID_SOURCES = Object.freeze(new Set(['onnx', 'bie', 'nef'])); + +/** target_object_key 上限(防 oversized request body)。 */ +const MAX_TARGET_KEY_LENGTH = 1024; + +/** 一個 promote request 最多幾個 target(防 abuse)。 */ +const MAX_TARGETS = 10; + +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + +/** + * 結構化 log(不洩 token / FAA 細節)。 + * + * @param {object} fields + */ +function logEvent(fields) { + // eslint-disable-next-line no-console + console.log( + JSON.stringify({ + service: 'task-scheduler', + timestamp: new Date().toISOString(), + ...fields, + }) + ); +} + +/** + * 驗 target_object_key 安全性。 + * + * 拒絕: + * - 空字串 + * - 含 `..`(路徑穿越) + * - 含 `\\` 反斜線(Windows 路徑、URL 注入) + * - 含 control chars / null byte + * - leading `/`(避免被 FAA 解讀為絕對路徑) + * - 超過 MAX_TARGET_KEY_LENGTH + * - 含 `?`(URL query 注入;FAA 端可能誤把後段視為查詢參數) + * - 含 `#`(URL fragment;同樣會破壞 buildUrl 行為) + * - 含 `%`(雙重編碼攻擊;client 端 %2E%2E 會在 FAA 解碼後變 ..) + * + * @param {unknown} key + * @returns {boolean} + */ +function isValidTargetKey(key) { + if (typeof key !== 'string') return false; + if (key.length === 0 || key.length > MAX_TARGET_KEY_LENGTH) return false; + if (key.startsWith('/')) return false; + if (key.includes('..')) return false; + if (key.includes('\\')) return false; + if (key.includes('\0')) return false; + // 拒控制字元(\x00 - \x1F、\x7F)— eslint no-control-regex 預設 ok + // eslint-disable-next-line no-control-regex + if (/[\x00-\x1F\x7F]/.test(key)) return false; + // 拒 URL 結構字元 — 防 query / fragment / 雙重編碼攻擊 + if (key.includes('?')) return false; + if (key.includes('#')) return false; + if (key.includes('%')) return false; + return true; +} + +/** + * 驗 promote request body。 + * + * @param {unknown} body + * @returns {{ ok: true, targets: Array<{ source: string, target_object_key: string }> } + * | { ok: false, status: number, code: string, message: string, details?: object }} + */ +function validatePromoteBody(body) { + if (!body || typeof body !== 'object' || Array.isArray(body)) { + return { + ok: false, + status: 400, + code: 'validation_error', + message: 'request body 必須為 JSON 物件', + }; + } + const targets = body.targets; + if (!Array.isArray(targets)) { + return { + ok: false, + status: 400, + code: 'validation_error', + message: 'targets 欄位必須為陣列', + details: { fields: [{ field: 'targets', message: 'must be an array' }] }, + }; + } + if (targets.length === 0) { + return { + ok: false, + status: 400, + code: 'validation_error', + message: 'targets 不可為空', + details: { fields: [{ field: 'targets', message: 'must contain at least 1 item' }] }, + }; + } + if (targets.length > MAX_TARGETS) { + return { + ok: false, + status: 400, + code: 'validation_error', + message: `targets 數量超過上限 ${MAX_TARGETS}`, + details: { fields: [{ field: 'targets', message: `max ${MAX_TARGETS}` }] }, + }; + } + + const validated = []; + const fieldErrors = []; + // 同時擋重複(同 source 多次 promote 對 client 沒意義,避免處理混亂) + const seenSources = new Set(); + for (let i = 0; i < targets.length; i += 1) { + const t = targets[i]; + if (!t || typeof t !== 'object') { + fieldErrors.push({ + field: `targets[${i}]`, + message: 'must be an object', + }); + continue; + } + const source = t.source; + if (typeof source !== 'string' || !VALID_SOURCES.has(source)) { + fieldErrors.push({ + field: `targets[${i}].source`, + message: `must be one of: ${[...VALID_SOURCES].join(', ')}`, + }); + continue; + } + if (seenSources.has(source)) { + fieldErrors.push({ + field: `targets[${i}].source`, + message: `duplicate source '${source}' in same request`, + }); + continue; + } + seenSources.add(source); + + if (!isValidTargetKey(t.target_object_key)) { + // 422 invalid_object_key(TDD §14) + return { + ok: false, + status: 422, + code: 'invalid_object_key', + message: 'target_object_key 格式不合法', + details: { + field: `targets[${i}].target_object_key`, + reason: + '不可為空、不可含 .. / 反斜線 / 控制字元 / 開頭斜線 / ? / # / %;長度 ≤ 1024', + }, + }; + } + + validated.push({ + source, + target_object_key: t.target_object_key, + }); + } + + if (fieldErrors.length > 0) { + return { + ok: false, + status: 400, + code: 'validation_error', + message: 'targets 格式錯誤', + details: { fields: fieldErrors }, + }; + } + + return { ok: true, targets: validated }; +} + +/** + * 從 job 取出指定 source 的 Converter Bucket object key。 + * + * 對應既有 server.js / T6 jobs.js 的兩種寫法: + * - `result_object_keys.{source}`(新格式 / T9 後) + * - `output.bie_path` / `output.nef_path` / `output.onnx_path`(舊格式 / 既有) + * + * @param {object} job + * @param {string} source + * @returns {string|null} + */ +function getJobOutputKey(job, source) { + if ( + job.result_object_keys && + typeof job.result_object_keys === 'object' && + typeof job.result_object_keys[source] === 'string' && + job.result_object_keys[source].length > 0 + ) { + return job.result_object_keys[source]; + } + if (job.output && typeof job.output === 'object') { + const k = job.output[`${source}_path`]; + if (typeof k === 'string' && k.length > 0) return k; + } + return null; +} + +/** + * 把 FAA error 轉成 v1 ApiError(不洩露內部細節)。 + * + * @param {Error} err + * @param {string} requestId + */ +function classifyFaaError(err) { + if (err instanceof FAAUnauthorizedError) { + // 已重試一次仍 401 → token 真的不行 → 503 auth_service_unavailable + return new ApiError( + 503, + 'auth_service_unavailable', + '認證服務目前無法簽發必要 token,請稍後重試' + ); + } + if (err instanceof FAAClientError) { + // 4xx 非 401 — 對 client 而言,這通常是 target_object_key 被 FAA 拒 + // 不重試也不洩漏 FAA 內部 error_code + return new ApiError( + 502, + 'file_gateway_unavailable', + '檔案存取服務拒絕此請求' + ); + } + // 5xx / timeout — 全部都已重試完,仍失敗 → 502 + if ( + err instanceof FAAServerError || + err instanceof FAATimeoutError + ) { + return new ApiError( + 502, + 'file_gateway_unavailable', + '檔案存取服務暫時無法使用,請稍後重試' + ); + } + // 不該發生 — 一律 500 + return new ApiError(500, 'internal_error', 'promote 過程發生未預期錯誤'); +} + +// --------------------------------------------------------------------------- +// 真實 handler +// --------------------------------------------------------------------------- + +/** + * 建立 promote handler。 + * + * @param {object} deps + * @param {object} deps.jobService - createJobService 結果 + * @param {object} deps.minio - createMinioFacade 結果 + * @param {object} deps.faaClient - createFaaClient 結果 + */ +function buildPromoteHandler(deps) { + const { jobService, minio, faaClient } = deps; + if (!jobService) throw new Error('[promote] deps.jobService is required'); + if (!minio) throw new Error('[promote] deps.minio is required'); + if (!faaClient) throw new Error('[promote] deps.faaClient is required'); + + return async function promoteHandler(req, res, next) { + const startedAtMs = Date.now(); + try { + // 1. 驗 jobId path param + const jobId = req.params && req.params.id; + if (typeof jobId !== 'string' || jobId === '') { + return next(new ApiError(404, 'job_not_found', 'Job 不存在')); + } + + // 2. 驗 body + const validation = validatePromoteBody(req.body); + if (!validation.ok) { + return next( + new ApiError( + validation.status, + validation.code, + validation.message, + validation.details + ) + ); + } + const { targets } = validation; + + // 3. 讀 job + client 隔離(同 GET /:id 邏輯) + const job = await jobService.getJob(jobId); + const clientId = + req.auth && typeof req.auth.clientId === 'string' + ? req.auth.clientId + : null; + + if (!job || (clientId && job.created_by_client_id && job.created_by_client_id !== clientId)) { + logEvent({ + level: 'INFO', + action: 'promote.not_found', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + duration_ms: Date.now() - startedAtMs, + }); + return next(new ApiError(404, 'job_not_found', 'Job 不存在')); + } + + // 4a. 冪等性 short-circuit + // 已 promoted 就直接回 既有 promoted_object_keys(不重打 FAA、不重新讀 MinIO) + if (job.promoted === true && Array.isArray(job.promoted_object_keys)) { + logEvent({ + level: 'INFO', + action: 'promote.idempotent_hit', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + existing_count: job.promoted_object_keys.length, + duration_ms: Date.now() - startedAtMs, + }); + return res.status(200).json({ + job_id: jobId, + promoted: job.promoted_object_keys, + }); + } + + // 4b. 狀態檢查:必須 COMPLETED 才能 promote + // 內部 status 仍用大寫;此處直接比對(promote 是 internal 概念,不需 statusMapper) + if (job.status !== 'COMPLETED') { + logEvent({ + level: 'INFO', + action: 'promote.not_ready', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + internal_status: job.status, + duration_ms: Date.now() - startedAtMs, + }); + return next( + new ApiError( + 409, + 'job_not_ready_for_promote', + 'Job 尚未完成,無法 promote', + { current_status: job.status || null } + ) + ); + } + + // 5. 序列 promote 每個 target + // 為什麼序列: + // - FAA 端對單一 client 並發可能有限制;序列保守 + // - 失敗時容易判斷哪個 target 已成功(雖然 Phase 1 採全失敗 502) + // - 大檔串流並發會讓記憶體 / CPU 壓力放大 + const promotedResults = []; + for (let i = 0; i < targets.length; i += 1) { + const target = targets[i]; + // 5a. 驗 source 在 job 中存在 + const sourceKey = getJobOutputKey(job, target.source); + if (!sourceKey) { + logEvent({ + level: 'INFO', + action: 'promote.source_not_available', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + source: target.source, + }); + return next( + new ApiError( + 409, + 'source_not_available', + `Job 沒有 ${target.source} 階段的結果可 promote`, + { source: target.source } + ) + ); + } + + // 5b. HEAD 取 size + contentType(fetch PUT 必填 Content-Length) + let head; + try { + head = await minio.headObject(sourceKey); + } catch (err) { + // 不 log err.message — 可能含 MinIO endpoint / region / object key 等內部資訊。 + // 改 log err.name / err.code 用於分類(aws-sdk 的 NoSuchKey、NetworkingError 等)。 + logEvent({ + level: 'ERROR', + action: 'promote.minio_head_failed', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + source: target.source, + error_name: err && err.name ? err.name : 'unknown', + error_code: err && err.code ? err.code : null, + }); + return next( + new ApiError( + 502, + 'storage_unavailable', + '無法讀取結果檔 metadata,請稍後重試' + ) + ); + } + if (!head || typeof head.contentLength !== 'number') { + logEvent({ + level: 'ERROR', + action: 'promote.minio_head_no_size', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + source: target.source, + }); + return next( + new ApiError( + 502, + 'storage_unavailable', + '結果檔 metadata 不完整,請稍後重試' + ) + ); + } + + // 5c. 呼叫 faaClient.putFile,傳 streamFactory(重試時能拿新 stream) + // 為什麼用 factory: + // HTTP body 不可 replay;如果 attempt #1 5xx 失敗,attempt #2 必須拿新 stream。 + // factory 每次 attempt 才呼叫 minio.getObjectStream,保證 stream 是新的。 + const streamFactory = async () => { + const got = await minio.getObjectStream(sourceKey); + if (!got || !got.stream) { + // 這不該發生(HEAD 已成功),保險起見 throw 讓 FAA timeout 路徑走 5xx 重試 + throw new Error( + `[promote] minio.getObjectStream(${sourceKey}) returned no stream` + ); + } + return got.stream; + }; + + let putMeta; + try { + putMeta = await faaClient.putFile(target.target_object_key, streamFactory, { + contentLength: head.contentLength, + contentType: head.contentType || 'application/octet-stream', + }); + } catch (err) { + logEvent({ + level: 'WARN', + action: 'promote.faa_put_failed', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + source: target.source, + error_name: err && err.name ? err.name : 'unknown', + error_status: err && typeof err.status === 'number' ? err.status : null, + }); + // 不洩漏 FAA 內部錯誤 + return next(classifyFaaError(err)); + } + + const promotedAt = new Date().toISOString(); + promotedResults.push({ + source: target.source, + target_object_key: target.target_object_key, + size_bytes: putMeta.sizeBytes != null ? putMeta.sizeBytes : head.contentLength, + file_access_agent_etag: putMeta.etag || null, + promoted_at: promotedAt, + }); + } + + // 6. 全部成功 → 寫回 job record(冪等支援) + const finalPromotedAt = new Date().toISOString(); + try { + await jobService.markPromoted(jobId, { + promotedAt: finalPromotedAt, + promotedKeys: promotedResults, + }); + } catch (err) { + // FAA 已成功(檔案在 NAS 上)但 Redis 寫失敗 — log ERROR 不影響 client 回應 + // 因為 promote 的「主要副作用」(檔案搬到 NAS)已完成;下次 promote 同 job 時 + // markPromoted 會再嘗試(FAA 那邊重新 PUT 是冪等的)。 + // 不 log err.message — 可能含 Redis URL / key 等內部資訊;只 log 分類用 name/code。 + logEvent({ + level: 'ERROR', + action: 'promote.mark_failed', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + error_name: err && err.name ? err.name : 'unknown', + error_code: err && err.code ? err.code : null, + }); + // 仍回 200(檔案實際已搬完),但 client 後續呼叫不會走 idempotent path + } + + logEvent({ + level: 'INFO', + action: 'promote.success', + request_id: req.requestId, + job_id: jobId, + client_id: clientId, + target_count: promotedResults.length, + duration_ms: Date.now() - startedAtMs, + }); + + return res.status(200).json({ + job_id: jobId, + promoted: promotedResults, + }); + } catch (err) { + return next(err); + } + }; +} + +/** + * 建立 promote router。 + * + * @param {object} [deps] + * @param {object} [deps.jobService] + * @param {object} [deps.minio] + * @param {object} [deps.faaClient] + * @param {object} [deps.config] + * @param {object} [deps.rateLimit] + */ +function createPromoteRouter(deps = {}) { + const router = express.Router({ mergeParams: true }); + const { jobService, minio, faaClient, config, rateLimit } = deps; + + // 缺 deps 的情境(單元測試或 createApp 沒注入 config 時)→ 501 fallback。 + // message 列出實際缺漏的依賴,方便維運排查(不再寫死「T7」之類版本字眼)。 + if (!jobService || !minio || !faaClient || !config) { + const missing = []; + if (!jobService) missing.push('jobService'); + if (!minio) missing.push('minio'); + if (!faaClient) missing.push('faaClient'); + if (!config) missing.push('config'); + const missingList = missing.join(', '); + router.post('/', (req, res, next) => { + return next( + new ApiError( + 501, + 'not_implemented', + `POST /api/v1/jobs/:id/promote 端點需要 jobService / minio / faaClient / config 注入;當前環境配置不完整,缺漏依賴:${missingList}` + ) + ); + }); + return router; + } + + const requireWriteAuth = requireAuth(config.converter.scopeWrite, { config }); + const perClientLimiter = createPerClientRateLimiter(rateLimit || {}); + const handler = buildPromoteHandler({ jobService, minio, faaClient }); + + // 順序鎖死:requireAuth → perClientRateLimit → JSON 已由 app.use(express.json) 全域 parse → handler + router.post('/', requireWriteAuth, perClientLimiter, handler); + + return router; +} + +module.exports = { + createPromoteRouter, + // 內部暴露給單元測試 / createApp wiring + _internals: { + buildPromoteHandler, + validatePromoteBody, + isValidTargetKey, + getJobOutputKey, + classifyFaaError, + VALID_SOURCES, + MAX_TARGET_KEY_LENGTH, + MAX_TARGETS, + }, + // 為 wiring 簡便:暴露 createFaaClient(保持 promote.js 是 FAA 客戶端的單一接觸點) + createFaaClient, +}; diff --git a/apps/task-scheduler/src/routes/v1/validators/createJob.js b/apps/task-scheduler/src/routes/v1/validators/createJob.js new file mode 100644 index 0000000..9bf4e69 --- /dev/null +++ b/apps/task-scheduler/src/routes/v1/validators/createJob.js @@ -0,0 +1,331 @@ +/** + * POST /api/v1/jobs validator(T5)。 + * + * 對齊 TDD §1.4.2 / Review §4.1 #2 與 doc-review m6/m7。 + * + * 規則摘要: + * - model file 必填,副檔名 ∈ {`.onnx`, `.tflite`}(PRD §4.4,**非** TDD §1.4.2 的 6 種, + * 理由見 doc-review m6——以 PRD F-01 為準,因為 PRD 才是 user-facing) + * - ref_images[] 可選,每張獨立 sanitize(不限副檔名,但 size 由 multer limit 把關) + * - user_id 必填,1-128 chars,不含 `/` `\` `..` `:` 控制字元 + * - model_id 必填,轉 int 後 1 ≤ x ≤ 65535 + * - version 必填,1-32 chars + * - platform 必填,enum: 520 / 720 / 530 / 630 / 730 + * - enable_* 可選,'true' / 'false' 字串轉 boolean;缺漏視為 false(doc-review m7) + * - metadata 可選,若有則必須是合法 JSON 物件字串 + * + * 設計原則: + * - validator 只負責「靜態驗證」(欄位存在 + 格式);對 STORAGE_BACKEND / 衝突等 + * runtime 條件由 handler 處理 + * - 一律回 `{ ok, errors, data }` 而非 throw,方便 handler 統一收集所有錯誤 + * - errors 形狀對齊 TDD §1.5 的 details.field:`[{ field, message }]` + * + * 安全: + * - 所有 string 都先 sanitize(trim / 控制字元檢查) + * - 副檔名比對前先 lowercase,避免 `MODEL.ONNX` 被當作未知格式 + * - parseInt 用 base 10 + 檢查 NaN,避免 `0x` 這類前綴炸進來 + */ + +'use strict'; + +const { + sanitizeFilename, + getExtension, + validateUserId, +} = require('../../../utils/sanitize'); + +/** + * model 副檔名白名單(與 PRD §4.4 對齊)。 + * + * 為什麼不採 TDD §1.4.2 的 6 種: + * doc-review m6 已標明 TDD 與 PRD 不一致,PRD §4.1 F-01 / §4.4 US-08 明確 + * 寫「支援 `.onnx` / `.tflite`」。本實作選 PRD 為準,因為它代表 user 看到的合約。 + */ +const ALLOWED_MODEL_EXTENSIONS = new Set(['.onnx', '.tflite']); + +/** + * platform enum,對齊 TDD §1.4.2。 + */ +const ALLOWED_PLATFORMS = new Set(['520', '720', '530', '630', '730']); + +/** + * version 字串白名單(Sec M3 修正)。 + * + * 為什麼用白名單: + * - version 會出現在 jobRecord、log 與未來 API response 的 `parameters.version` + * 欄位;若允許 `` 之類字元,下游消費者(admin UI、日誌 + * 檢視工具)可能存在 XSS 風險 + * - 接受字元:英數字 / `.` / `_` / `-`(足以涵蓋所有合理的 version naming,例 + * `v1.0.0`、`2026-04-25`、`build_42`) + */ +const VERSION_WHITELIST = /^[A-Za-z0-9._-]+$/; + +/** + * 單張 ref_image 的大小上限預設值(Sec C2 修正;T10 改為可由 env / opts 覆寫)。 + * + * 為什麼用 10MB: + * - ref_images 是「校正用樣本」(calibration samples),通常是低解析度圖片, + * 單張 < 1MB 是常見情境,10MB 足以覆蓋極端 case + * - multer 的 `limits.fileSize` 是 per-file 通用上限(500MB for model),若 100 + * 張 ref_images 各 500MB → 50GB 單請求 OOM kill;用此 per-file 上限阻擋 + * - 超過時回 413 file_too_large(語意對齊 TDD §14) + * + * T10:此值可由 `MULTIPART_REF_IMAGE_MAX_BYTES` env 覆寫;validator 第二參數 + * `opts.refImageMaxBytes` 也可注入(route 層由 config 透傳)。 + */ +const DEFAULT_MAX_REF_IMAGE_SIZE_BYTES = 10 * 1024 * 1024; // 10MB + +/** + * 為向後相容(既有測試 import 此 const)保留 export 名稱。 + * @deprecated 請改用 `DEFAULT_MAX_REF_IMAGE_SIZE_BYTES` 或 validator opts 注入 + */ +const MAX_REF_IMAGE_SIZE_BYTES = DEFAULT_MAX_REF_IMAGE_SIZE_BYTES; + +/** + * 共用 push helper —— 統一 errors 格式為 `{ field, message }`。 + * + * @param {{field: string, message: string}[]} errors + * @param {string} field + * @param {string} message + */ +function pushError(errors, field, message) { + errors.push({ field, message }); +} + +/** + * 驗 multipart files / fields。回 `{ ok, errors, data }`。 + * + * `data.input` 存放成功 sanitize 後可直接寫 MinIO 的資訊;handler 會用此 data + * 組 object_key 與 job record。 + * + * @param {object} args + * @param {object} args.body — req.body(multer 解 multipart fields 後) + * @param {object|undefined} args.files — req.files(multer fields 解出來的物件) + * @param {object} [args.limits] — T10 起可注入;缺漏時用預設值 + * @param {number} [args.limits.refImageMaxBytes] — 單張 ref_image 大小上限(bytes) + * @returns {{ + * ok: boolean, + * errors: Array<{field: string, message: string}>, + * data?: { + * userId: string, + * parameters: { + * model_id: number, + * version: string, + * platform: string, + * enable_evaluate: boolean, + * enable_sim_fp: boolean, + * enable_sim_fixed: boolean, + * enable_sim_hw: boolean, + * }, + * metadata: object | null, + * input: { + * file: object, // multer file object(含 buffer) + * safeFilename: string, + * extension: string, + * }, + * refImages: Array<{ + * file: object, + * safeFilename: string, + * }>, + * } + * }} + */ +function validateCreateJobRequest({ body, files, limits } = {}) { + const errors = []; + + // T10:refImage 上限由 opts.limits.refImageMaxBytes 注入;缺漏時 fallback 到 + // DEFAULT_MAX_REF_IMAGE_SIZE_BYTES(保持向後相容)。 + // 不接受非正數(避免 0 / 負數讓所有 ref_images 都被 reject)。 + const refImageMaxBytes = + limits && + Number.isInteger(limits.refImageMaxBytes) && + limits.refImageMaxBytes > 0 + ? limits.refImageMaxBytes + : DEFAULT_MAX_REF_IMAGE_SIZE_BYTES; + + // 1. user_id + const userIdRaw = body && typeof body.user_id === 'string' ? body.user_id : ''; + const userId = validateUserId(userIdRaw); + if (!userId) { + pushError(errors, 'user_id', 'user_id 必填,1-128 字元,不含 / \\ : 或 ..'); + } + + // 2. model_id + let modelIdInt = null; + const modelIdRaw = + body && typeof body.model_id === 'string' ? body.model_id.trim() : ''; + if (modelIdRaw === '') { + pushError(errors, 'model_id', 'model_id 必填'); + } else if (!/^\d+$/.test(modelIdRaw)) { + pushError(errors, 'model_id', 'model_id 必須為非負整數'); + } else { + modelIdInt = parseInt(modelIdRaw, 10); + if (modelIdInt < 1 || modelIdInt > 65535) { + pushError(errors, 'model_id', 'model_id 範圍必須在 1 ~ 65535'); + modelIdInt = null; + } + } + + // 3. version(Sec M3:嚴格白名單) + const versionRaw = + body && typeof body.version === 'string' ? body.version.trim() : ''; + if (versionRaw === '' || versionRaw.length > 32) { + pushError(errors, 'version', 'version 必填,最多 32 字元'); + } else if (!VERSION_WHITELIST.test(versionRaw)) { + pushError( + errors, + 'version', + 'version 僅可包含英數字、`.`、`_`、`-`' + ); + } + + // 4. platform + const platformRaw = + body && typeof body.platform === 'string' ? body.platform.trim() : ''; + if (!ALLOWED_PLATFORMS.has(platformRaw)) { + pushError( + errors, + 'platform', + `platform 必須為 ${[...ALLOWED_PLATFORMS].join(' / ')} 之一` + ); + } + + // 5. enable_* booleans —— 缺漏視為 false(doc-review m7) + function parseBoolean(field) { + const raw = body && body[field]; + if (raw === undefined || raw === null || raw === '') return false; + if (raw === 'true') return true; + if (raw === 'false') return false; + pushError(errors, field, `${field} 必須為 'true' 或 'false'(字串)`); + return false; + } + const enableEvaluate = parseBoolean('enable_evaluate'); + const enableSimFp = parseBoolean('enable_sim_fp'); + const enableSimFixed = parseBoolean('enable_sim_fixed'); + const enableSimHw = parseBoolean('enable_sim_hw'); + + // 6. metadata(可選) + let metadata = null; + const metadataRaw = body && body.metadata; + if (metadataRaw !== undefined && metadataRaw !== null && metadataRaw !== '') { + if (typeof metadataRaw !== 'string') { + pushError(errors, 'metadata', 'metadata 必須為合法 JSON 物件字串'); + } else { + try { + const parsed = JSON.parse(metadataRaw); + if ( + parsed === null || + typeof parsed !== 'object' || + Array.isArray(parsed) + ) { + pushError(errors, 'metadata', 'metadata 必須為合法 JSON 物件(非 array / 非 null)'); + } else { + metadata = parsed; + } + } catch (_) { + pushError(errors, 'metadata', 'metadata 必須為合法 JSON 物件字串'); + } + } + } + + // 7. model file(必填) + // multer 在 fields config 是 `{ name: 'model', maxCount: 1 }`,所以 req.files.model 是陣列 + const modelArr = files && files.model; + const modelFile = Array.isArray(modelArr) && modelArr.length > 0 ? modelArr[0] : null; + let safeModelFilename = ''; + let modelExt = ''; + if (!modelFile) { + pushError(errors, 'model', 'model 檔案為必填'); + } else { + safeModelFilename = sanitizeFilename(modelFile.originalname || 'model'); + modelExt = getExtension(safeModelFilename); + if (!ALLOWED_MODEL_EXTENSIONS.has(modelExt)) { + pushError( + errors, + 'model', + `不支援的模型副檔名(${modelExt || '無'}),僅接受 ${[...ALLOWED_MODEL_EXTENSIONS].join(' / ')}` + ); + } + if (!modelFile.buffer || modelFile.buffer.length === 0) { + pushError(errors, 'model', 'model 檔案為空'); + } + } + + // 8. ref_images[] (optional) + // multer fields name 是 `ref_images[]`(與 server.js 既有對齊),但 multer 會 + // 把括號吃掉,所以 req.files 的 key 也叫 `ref_images`。同時對齊 legacy.js L82。 + const refImagesArr = + files && (files.ref_images || files['ref_images[]']); + const refImages = Array.isArray(refImagesArr) ? refImagesArr : []; + + // Sec C2:per-file size 檢查(multer fileSize 用 model 上限 500MB,但 ref_images + // 100 張 × 500MB = 50GB 單請求 OOM)。用 per-file 10MB 阻擋(T10:env 可調整)。 + // 任一張超標即視為 413 file_too_large(語意對齊 TDD §14)。 + let oversizedRefImage = null; // { index, size } + for (let idx = 0; idx < refImages.length; idx += 1) { + const file = refImages[idx]; + const size = + file && file.buffer && typeof file.buffer.length === 'number' + ? file.buffer.length + : 0; + if (size > refImageMaxBytes) { + oversizedRefImage = { index: idx, size }; + break; // 第一張 oversized 即停(避免遍歷大量 files) + } + } + + const safeRefImages = refImages.map((file, idx) => ({ + file, + safeFilename: sanitizeFilename(file.originalname || `image_${idx}.bin`), + })); + + // 優先回 413 file_too_large(語意比 400 validation_error 更精確) + if (oversizedRefImage) { + return { + ok: false, + errors, + tooLarge: { + field: `ref_images[${oversizedRefImage.index}]`, + size_bytes: oversizedRefImage.size, + limit_bytes: refImageMaxBytes, + }, + }; + } + + if (errors.length > 0) { + return { ok: false, errors }; + } + + return { + ok: true, + errors: [], + data: { + userId, + parameters: { + model_id: modelIdInt, + version: versionRaw, + platform: platformRaw, + enable_evaluate: enableEvaluate, + enable_sim_fp: enableSimFp, + enable_sim_fixed: enableSimFixed, + enable_sim_hw: enableSimHw, + }, + metadata, + input: { + file: modelFile, + safeFilename: safeModelFilename, + extension: modelExt, + }, + refImages: safeRefImages, + }, + }; +} + +module.exports = { + validateCreateJobRequest, + ALLOWED_MODEL_EXTENSIONS, + ALLOWED_PLATFORMS, + VERSION_WHITELIST, + MAX_REF_IMAGE_SIZE_BYTES, // 向後相容(既有測試 import) + DEFAULT_MAX_REF_IMAGE_SIZE_BYTES, +}; diff --git a/apps/task-scheduler/src/services/__tests__/healthService.test.js b/apps/task-scheduler/src/services/__tests__/healthService.test.js new file mode 100644 index 0000000..9e3f352 --- /dev/null +++ b/apps/task-scheduler/src/services/__tests__/healthService.test.js @@ -0,0 +1,605 @@ +/** + * Unit tests — healthService(T8)。 + * + * 涵蓋範圍: + * 1. snapshot 形狀(service / status / version / timestamp / dependencies) + * 2. Redis status 判定('ready' → connected;其他 → disconnected) + * 3. 整體狀態判定矩陣(healthy / degraded / unhealthy) + * 4. 第一次啟動 cache 未填 → MC / FAA = 'pending' + * 5. background polling 寫入 cache(runOnce) + * 6. probeHttp 行為(200 / 404 / 5xx / network error / timeout / abort) + * 7. start() 冪等 + * 8. stop() 清掉 interval、abort in-flight fetch + * 9. polling 重疊保護(inFlight 跳過) + * 10. URL 缺漏 fallback 為 unreachable + * 11. 錯誤訊息 / log 不洩漏 endpoint URL + */ + +'use strict'; + +const { + createHealthService, + DEP_STATE, + OVERALL_STATE, + SERVICE_NAME, + SERVICE_VERSION, + _internals, +} = require('../healthService'); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeFakeRedis(status = 'ready') { + return { status }; +} + +/** + * 建立可控 fetch mock。 + * - 預設兩個 url 都回 200 + * - 個別 URL 可指定 response 或 reject + */ +function makeFetchMock(handlers = {}) { + return jest.fn(async (url, opts) => { + const handler = handlers[url]; + if (!handler) { + // 預設 200 OK + return { status: 200, ok: true }; + } + if (handler instanceof Error) throw handler; + if (typeof handler === 'function') return handler(url, opts); + return handler; + }); +} + +// 抑制 healthService 內部 console.log(jsdom 環境也適用) +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('createHealthService — basic contract', () => { + it('throws when deps is missing', () => { + expect(() => createHealthService()).toThrow(/deps is required/); + }); + + it('throws when deps.redis is missing', () => { + expect(() => createHealthService({})).toThrow(/deps\.redis is required/); + }); + + it('returns expected interface', () => { + const svc = createHealthService({ redis: makeFakeRedis() }); + expect(typeof svc.start).toBe('function'); + expect(typeof svc.stop).toBe('function'); + expect(typeof svc.getHealth).toBe('function'); + expect(typeof svc.isUnhealthy).toBe('function'); + }); +}); + +describe('getHealth — snapshot shape and constants', () => { + it('returns snapshot with correct top-level fields', () => { + const svc = createHealthService({ redis: makeFakeRedis() }); + const snap = svc.getHealth(); + expect(snap).toEqual( + expect.objectContaining({ + service: SERVICE_NAME, + version: SERVICE_VERSION, + status: expect.any(String), + timestamp: expect.any(String), + dependencies: expect.objectContaining({ + redis: expect.any(String), + member_center: expect.any(String), + file_access_agent: expect.any(String), + }), + }) + ); + // ISO 8601 timestamp(粗略驗證) + expect(snap.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/); + }); +}); + +describe('classifyRedisStatus / Redis dependency', () => { + it("treats 'ready' as connected", () => { + expect(_internals.classifyRedisStatus({ status: 'ready' })).toBe(DEP_STATE.CONNECTED); + }); + + it.each([ + 'wait', + 'connecting', + 'connect', + 'reconnecting', + 'close', + 'end', + undefined, + null, + '', + ])('treats %p as disconnected', (status) => { + expect(_internals.classifyRedisStatus({ status })).toBe(DEP_STATE.DISCONNECTED); + }); + + it('treats null redis as disconnected', () => { + expect(_internals.classifyRedisStatus(null)).toBe(DEP_STATE.DISCONNECTED); + }); + + it('reflects redis disconnected in snapshot', () => { + const svc = createHealthService({ redis: makeFakeRedis('connecting') }); + const snap = svc.getHealth(); + expect(snap.dependencies.redis).toBe(DEP_STATE.DISCONNECTED); + expect(snap.status).toBe(OVERALL_STATE.UNHEALTHY); + expect(svc.isUnhealthy()).toBe(true); + }); +}); + +describe('deriveOverallStatus — status matrix', () => { + const { deriveOverallStatus } = _internals; + + it('healthy when all deps OK', () => { + expect( + deriveOverallStatus({ + redis: DEP_STATE.CONNECTED, + memberCenter: DEP_STATE.REACHABLE, + fileAccessAgent: DEP_STATE.REACHABLE, + }) + ).toBe(OVERALL_STATE.HEALTHY); + }); + + it('unhealthy when redis disconnected (regardless of MC/FAA)', () => { + expect( + deriveOverallStatus({ + redis: DEP_STATE.DISCONNECTED, + memberCenter: DEP_STATE.REACHABLE, + fileAccessAgent: DEP_STATE.REACHABLE, + }) + ).toBe(OVERALL_STATE.UNHEALTHY); + expect( + deriveOverallStatus({ + redis: DEP_STATE.DISCONNECTED, + memberCenter: DEP_STATE.UNREACHABLE, + fileAccessAgent: DEP_STATE.UNREACHABLE, + }) + ).toBe(OVERALL_STATE.UNHEALTHY); + }); + + it('degraded when redis OK but MC unreachable', () => { + expect( + deriveOverallStatus({ + redis: DEP_STATE.CONNECTED, + memberCenter: DEP_STATE.UNREACHABLE, + fileAccessAgent: DEP_STATE.REACHABLE, + }) + ).toBe(OVERALL_STATE.DEGRADED); + }); + + it('degraded when redis OK but FAA unreachable', () => { + expect( + deriveOverallStatus({ + redis: DEP_STATE.CONNECTED, + memberCenter: DEP_STATE.REACHABLE, + fileAccessAgent: DEP_STATE.UNREACHABLE, + }) + ).toBe(OVERALL_STATE.DEGRADED); + }); + + it('degraded when MC/FAA pending (cache not warmed)', () => { + expect( + deriveOverallStatus({ + redis: DEP_STATE.CONNECTED, + memberCenter: DEP_STATE.PENDING, + fileAccessAgent: DEP_STATE.PENDING, + }) + ).toBe(OVERALL_STATE.DEGRADED); + }); +}); + +describe('initial cache state — pending before first poll', () => { + it('snapshots return pending before start()', () => { + const svc = createHealthService({ redis: makeFakeRedis() }); + const snap = svc.getHealth(); + expect(snap.dependencies.member_center).toBe(DEP_STATE.PENDING); + expect(snap.dependencies.file_access_agent).toBe(DEP_STATE.PENDING); + // redis 已 ready,但其他兩個 pending → degraded(仍 200 OK) + expect(snap.status).toBe(OVERALL_STATE.DEGRADED); + expect(svc.isUnhealthy()).toBe(false); + }); +}); + +describe('runOnce — background polling fills cache', () => { + it('writes both deps reachable when both 200', async () => { + const fetch = makeFetchMock({ + 'https://mc/.well-known/jwks': { status: 200, ok: true }, + 'https://faa.example/health': { status: 200, ok: true }, + }); + const svc = createHealthService({ + redis: makeFakeRedis(), + memberCenterProbeUrl: 'https://mc/.well-known/jwks', + fileAccessAgentProbeUrl: 'https://faa.example/health', + fetch, + }); + + await svc._runOnce(); + + expect(fetch).toHaveBeenCalledTimes(2); + const snap = svc.getHealth(); + expect(snap.dependencies.member_center).toBe(DEP_STATE.REACHABLE); + expect(snap.dependencies.file_access_agent).toBe(DEP_STATE.REACHABLE); + expect(snap.status).toBe(OVERALL_STATE.HEALTHY); + }); + + it('treats 4xx (e.g. 404) as reachable', async () => { + const fetch = makeFetchMock({ + 'https://mc/.well-known/jwks': { status: 200, ok: true }, + 'https://faa.example/health': { status: 404, ok: false }, // FAA 沒實作 /health + }); + const svc = createHealthService({ + redis: makeFakeRedis(), + memberCenterProbeUrl: 'https://mc/.well-known/jwks', + fileAccessAgentProbeUrl: 'https://faa.example/health', + fetch, + }); + + await svc._runOnce(); + + const snap = svc.getHealth(); + expect(snap.dependencies.file_access_agent).toBe(DEP_STATE.REACHABLE); + expect(snap.status).toBe(OVERALL_STATE.HEALTHY); + }); + + it('marks 5xx as unreachable', async () => { + const fetch = makeFetchMock({ + 'https://mc/.well-known/jwks': { status: 503, ok: false }, + 'https://faa.example/health': { status: 200, ok: true }, + }); + const svc = createHealthService({ + redis: makeFakeRedis(), + memberCenterProbeUrl: 'https://mc/.well-known/jwks', + fileAccessAgentProbeUrl: 'https://faa.example/health', + fetch, + }); + + await svc._runOnce(); + + const snap = svc.getHealth(); + expect(snap.dependencies.member_center).toBe(DEP_STATE.UNREACHABLE); + expect(snap.dependencies.file_access_agent).toBe(DEP_STATE.REACHABLE); + expect(snap.status).toBe(OVERALL_STATE.DEGRADED); + }); + + it('marks network error as unreachable (one bad does not affect the other)', async () => { + const fetch = jest.fn(async (url) => { + if (url.includes('mc')) { + const err = new Error('ECONNREFUSED 1.2.3.4:80'); + err.code = 'ECONNREFUSED'; + throw err; + } + return { status: 200, ok: true }; + }); + const svc = createHealthService({ + redis: makeFakeRedis(), + memberCenterProbeUrl: 'https://mc/.well-known/jwks', + fileAccessAgentProbeUrl: 'https://faa.example/health', + fetch, + }); + + await svc._runOnce(); + + const snap = svc.getHealth(); + expect(snap.dependencies.member_center).toBe(DEP_STATE.UNREACHABLE); + expect(snap.dependencies.file_access_agent).toBe(DEP_STATE.REACHABLE); + }); + + it('treats fetch promise that never resolves as timeout (probeTimeoutMs honored)', async () => { + // fetch 回一個永遠不 resolve 的 promise,但會 listen abort signal + const fetch = jest.fn((_url, opts) => { + return new Promise((_resolve, reject) => { + if (opts && opts.signal) { + opts.signal.addEventListener('abort', () => { + const err = new Error('aborted'); + err.name = 'AbortError'; + reject(err); + }); + } + }); + }); + const svc = createHealthService({ + redis: makeFakeRedis(), + memberCenterProbeUrl: 'https://mc/.well-known/jwks', + fileAccessAgentProbeUrl: 'https://faa.example/health', + fetch, + probeTimeoutMs: 30, // 加快測試 + }); + + await svc._runOnce(); + + const snap = svc.getHealth(); + expect(snap.dependencies.member_center).toBe(DEP_STATE.UNREACHABLE); + expect(snap.dependencies.file_access_agent).toBe(DEP_STATE.UNREACHABLE); + }); + + it('falls back to unreachable when probe URL is missing in config', async () => { + const fetch = makeFetchMock(); + const svc = createHealthService({ + redis: makeFakeRedis(), + // 沒給任何 URL(config / override 皆無) + fetch, + }); + + await svc._runOnce(); + + const snap = svc.getHealth(); + expect(snap.dependencies.member_center).toBe(DEP_STATE.UNREACHABLE); + expect(snap.dependencies.file_access_agent).toBe(DEP_STATE.UNREACHABLE); + // 真的沒 URL → 應該完全沒 fetch + expect(fetch).not.toHaveBeenCalled(); + }); + + it('uses config.memberCenter.jwksUrl and config.fileAccessAgent.baseUrl/health automatically', async () => { + const fetch = makeFetchMock(); + const svc = createHealthService({ + redis: makeFakeRedis(), + config: { + memberCenter: { jwksUrl: 'https://auth/.well-known/jwks' }, + fileAccessAgent: { baseUrl: 'https://faa.internal/' }, // trailing slash 應被 trim + }, + fetch, + }); + + await svc._runOnce(); + + const calledUrls = fetch.mock.calls.map((c) => c[0]).sort(); + expect(calledUrls).toEqual( + ['https://auth/.well-known/jwks', 'https://faa.internal/health'].sort() + ); + }); +}); + +describe('inFlight protection — slow probe does not double-fire', () => { + it('skips runOnce if previous still running', async () => { + // 兩階段 fetch mock:前兩次(第一波)回掛起的 promise;之後立即 resolve 200 + const pending = []; + let callCount = 0; + const fetch = jest.fn((_url, opts) => { + callCount += 1; + if (callCount <= 2) { + return new Promise((resolve, reject) => { + pending.push(resolve); + if (opts && opts.signal) { + opts.signal.addEventListener('abort', () => { + const err = new Error('aborted'); + err.name = 'AbortError'; + reject(err); + }); + } + }); + } + return Promise.resolve({ status: 200, ok: true }); + }); + const svc = createHealthService({ + redis: makeFakeRedis(), + memberCenterProbeUrl: 'https://mc/jwks', + fileAccessAgentProbeUrl: 'https://faa/health', + fetch, + probeTimeoutMs: 50_000, // 不被 timer 提早 abort + }); + + // 第一次 runOnce → 兩個 fetch 都掛起 + const first = svc._runOnce(); + // microtask flush,確保 Promise.all 已 schedule 起兩個 fetch + await Promise.resolve(); + await Promise.resolve(); + expect(fetch).toHaveBeenCalledTimes(2); + + // 第二次同時呼叫 → 應該立即 return(inFlight 為 true,不發新 fetch) + await svc._runOnce(); + expect(fetch).toHaveBeenCalledTimes(2); + + // 解開第一波的兩個 fetch,讓第一次 runOnce 完成 + pending.forEach((r) => r({ status: 200, ok: true })); + pending.length = 0; + await first; + + // 再次 runOnce → 第二波(已 resolve 200)→ 新的兩個 fetch + await svc._runOnce(); + expect(fetch).toHaveBeenCalledTimes(4); + }); +}); + +describe('start / stop lifecycle', () => { + it('start() is idempotent (no double interval)', () => { + let intervalCount = 0; + const fakeSetInterval = jest.fn(() => { + intervalCount += 1; + return { unref: jest.fn() }; + }); + const fakeClearInterval = jest.fn(); + const svc = createHealthService({ + redis: makeFakeRedis(), + memberCenterProbeUrl: 'https://mc/jwks', + fileAccessAgentProbeUrl: 'https://faa/health', + fetch: makeFetchMock(), + setIntervalFn: fakeSetInterval, + clearIntervalFn: fakeClearInterval, + }); + + svc.start(); + svc.start(); // second call should be noop + svc.start(); + expect(intervalCount).toBe(1); + }); + + it('stop() clears interval and aborts in-flight fetch', async () => { + let abortCount = 0; + const fetch = jest.fn((_url, opts) => { + return new Promise((_res, reject) => { + if (opts && opts.signal) { + opts.signal.addEventListener('abort', () => { + abortCount += 1; + const err = new Error('aborted'); + err.name = 'AbortError'; + reject(err); + }); + } + }); + }); + const fakeSetInterval = jest.fn(() => ({ unref: jest.fn() })); + const fakeClearInterval = jest.fn(); + const svc = createHealthService({ + redis: makeFakeRedis(), + memberCenterProbeUrl: 'https://mc/jwks', + fileAccessAgentProbeUrl: 'https://faa/health', + fetch, + setIntervalFn: fakeSetInterval, + clearIntervalFn: fakeClearInterval, + probeTimeoutMs: 5000, + }); + + svc.start(); + // 給 microtask 一個 tick 觸發 initial poll + await Promise.resolve(); + await Promise.resolve(); + + svc.stop(); + expect(fakeClearInterval).toHaveBeenCalled(); + + // 等 in-flight promise rejection settle + await Promise.resolve(); + await Promise.resolve(); + expect(abortCount).toBeGreaterThanOrEqual(1); + }); + + it('stop() before start() is a noop', () => { + const svc = createHealthService({ redis: makeFakeRedis() }); + expect(() => svc.stop()).not.toThrow(); + }); +}); + +describe('probeHttp — direct unit', () => { + const { probeHttp } = _internals; + + it('returns reachable for 200', async () => { + const fetchImpl = jest.fn(async () => ({ status: 200, ok: true })); + const result = await probeHttp('https://x', { + fetchImpl, + timeoutMs: 100, + setTimeoutFn: globalThis.setTimeout, + clearTimeoutFn: globalThis.clearTimeout, + }); + expect(result).toBe(DEP_STATE.REACHABLE); + }); + + it('returns reachable for 404 (service alive, route missing)', async () => { + const fetchImpl = jest.fn(async () => ({ status: 404, ok: false })); + const result = await probeHttp('https://x', { + fetchImpl, + timeoutMs: 100, + setTimeoutFn: globalThis.setTimeout, + clearTimeoutFn: globalThis.clearTimeout, + }); + expect(result).toBe(DEP_STATE.REACHABLE); + }); + + it('returns reachable for 401 (auth needed but service alive)', async () => { + const fetchImpl = jest.fn(async () => ({ status: 401, ok: false })); + const result = await probeHttp('https://x', { + fetchImpl, + timeoutMs: 100, + setTimeoutFn: globalThis.setTimeout, + clearTimeoutFn: globalThis.clearTimeout, + }); + expect(result).toBe(DEP_STATE.REACHABLE); + }); + + it('returns unreachable for 500', async () => { + const fetchImpl = jest.fn(async () => ({ status: 500, ok: false })); + const result = await probeHttp('https://x', { + fetchImpl, + timeoutMs: 100, + setTimeoutFn: globalThis.setTimeout, + clearTimeoutFn: globalThis.clearTimeout, + }); + expect(result).toBe(DEP_STATE.UNREACHABLE); + }); + + it('returns unreachable for thrown network error', async () => { + const fetchImpl = jest.fn(async () => { + const err = new Error('ECONNREFUSED'); + err.code = 'ECONNREFUSED'; + throw err; + }); + const result = await probeHttp('https://x', { + fetchImpl, + timeoutMs: 100, + setTimeoutFn: globalThis.setTimeout, + clearTimeoutFn: globalThis.clearTimeout, + }); + expect(result).toBe(DEP_STATE.UNREACHABLE); + }); + + it('returns unreachable when master signal already aborted', async () => { + const ac = new AbortController(); + ac.abort(); + const fetchImpl = jest.fn(); + const result = await probeHttp('https://x', { + fetchImpl, + timeoutMs: 100, + setTimeoutFn: globalThis.setTimeout, + clearTimeoutFn: globalThis.clearTimeout, + signal: ac.signal, + }); + expect(result).toBe(DEP_STATE.UNREACHABLE); + expect(fetchImpl).not.toHaveBeenCalled(); + }); +}); + +describe('security — no sensitive data leakage', () => { + it('logs and snapshot do not contain probe URLs', async () => { + const logCalls = []; + const origLog = console.log; + const origWarn = console.warn; + const origError = console.error; + console.log = (msg) => logCalls.push(msg); + console.warn = (msg) => logCalls.push(msg); + console.error = (msg) => logCalls.push(msg); + + try { + const SECRET_MC_URL = 'https://internal-secret-mc.example/.well-known/jwks'; + const SECRET_FAA_URL = 'https://nas-internal-files.example/health'; + const fetch = makeFetchMock({ + [SECRET_MC_URL]: { status: 503, ok: false }, + [SECRET_FAA_URL]: new Error('Connection refused to nas-internal-files.example:9999'), + }); + const svc = createHealthService({ + redis: makeFakeRedis(), + memberCenterProbeUrl: SECRET_MC_URL, + fileAccessAgentProbeUrl: SECRET_FAA_URL, + fetch, + }); + + await svc._runOnce(); + const snap = svc.getHealth(); + const allLogs = logCalls.join('\n'); + + // 1) snapshot 不應含 URL + const snapStr = JSON.stringify(snap); + expect(snapStr).not.toContain('internal-secret-mc'); + expect(snapStr).not.toContain('nas-internal-files'); + expect(snapStr).not.toContain('9999'); + + // 2) log(預期格式為 structured JSON,不含 URL) + expect(allLogs).not.toContain('internal-secret-mc'); + expect(allLogs).not.toContain('nas-internal-files'); + expect(allLogs).not.toContain('9999'); + } finally { + console.log = origLog; + console.warn = origWarn; + console.error = origError; + } + }); +}); diff --git a/apps/task-scheduler/src/services/__tests__/jobService.t5.test.js b/apps/task-scheduler/src/services/__tests__/jobService.t5.test.js new file mode 100644 index 0000000..5c52fec --- /dev/null +++ b/apps/task-scheduler/src/services/__tests__/jobService.t5.test.js @@ -0,0 +1,418 @@ +/** + * jobService T5 介面單元測試(T5)。 + * + * 重點: + * 1. writeInputToMinIO 上傳 model + ref_images 並回 object_keys + * 2. writeInputToMinIO 在 minio.client null 時 throw + * 3. writeInputToMinIO model 寫失敗 throw + * 4. claimActiveAndCreate 成功時呼叫 sseService + * 5. claimActiveAndCreate 衝突時不呼叫 sseService + * 6. cleanupInputObjects fire-and-forget(fail 不 throw) + * 7. getActiveJob 整合 + */ + +'use strict'; + +const { createJobService } = require('../jobService'); + +// Mock luaScripts to control claim/release outcome without spinning up real Redis Lua +jest.mock('../../redis/luaScripts', () => ({ + claimActiveJob: jest.fn(), + releaseActiveJob: jest.fn(), + _internals: { + loadScript: jest.fn(), + evalScript: jest.fn(), + resetCache: jest.fn(), + }, +})); + +const { claimActiveJob, releaseActiveJob } = require('../../redis/luaScripts'); + +function makeFakeRedis() { + const store = new Map(); + return { + store, + get: jest.fn(async (key) => (store.has(key) ? store.get(key) : null)), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + xadd: jest.fn(async () => '1-0'), + }; +} + +function makeFakeSseService() { + return { sendSSE: jest.fn() }; +} + +function makeFakeMinio({ uploadFails = false, deleteFails = false } = {}) { + const uploaded = []; + const deleted = []; + return { + client: { _fake: true }, + bucket: 'test-bucket', + endpoint: 'http://nope', + uploadToMinIO: jest.fn(async (key, body, contentType) => { + if (uploadFails) throw new Error('storage down'); + uploaded.push({ key, contentType, size: body.length }); + }), + getFromMinIO: jest.fn(async () => null), + deleteObject: jest.fn(async (key) => { + if (deleteFails) throw new Error('delete failed'); + deleted.push(key); + }), + _uploaded: uploaded, + _deleted: deleted, + }; +} + +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +beforeEach(() => { + claimActiveJob.mockReset(); + releaseActiveJob.mockReset(); +}); + +describe('jobService.writeInputToMinIO', () => { + it('uploads model + ref_images and returns object keys', async () => { + const minio = makeFakeMinio(); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio, + }); + + const modelFile = { + buffer: Buffer.from('model-bytes'), + mimetype: 'application/octet-stream', + }; + const refImages = [ + { file: { buffer: Buffer.from('img1'), mimetype: 'image/jpeg' }, safeFilename: 'a.jpg' }, + { file: { buffer: Buffer.from('img2'), mimetype: 'image/png' }, safeFilename: 'b.png' }, + ]; + + const result = await svc.writeInputToMinIO( + 'job-123', + modelFile, + 'model.onnx', + refImages + ); + + expect(result.inputObjectKey).toBe('jobs/job-123/input/model.onnx'); + expect(result.refImageObjectKeys).toEqual([ + 'jobs/job-123/ref_images/0_a.jpg', + 'jobs/job-123/ref_images/1_b.png', + ]); + expect(result.uploadedKeys).toHaveLength(3); + expect(minio.uploadToMinIO).toHaveBeenCalledTimes(3); + }); + + it('uploads with no ref_images', async () => { + const minio = makeFakeMinio(); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio, + }); + const result = await svc.writeInputToMinIO( + 'j', + { buffer: Buffer.from('x') }, + 'm.onnx', + [] + ); + expect(result.uploadedKeys).toHaveLength(1); + }); + + it('throws when minio dep missing', async () => { + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + }); + await expect( + svc.writeInputToMinIO('j', { buffer: Buffer.from('x') }, 'm.onnx', []) + ).rejects.toThrow(/minio/); + }); + + it('throws when minio.client null', async () => { + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio: { client: null, uploadToMinIO: jest.fn() }, + }); + await expect( + svc.writeInputToMinIO('j', { buffer: Buffer.from('x') }, 'm.onnx', []) + ).rejects.toThrow(/STORAGE_BACKEND/); + }); + + it('propagates upload errors', async () => { + const minio = makeFakeMinio({ uploadFails: true }); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio, + }); + await expect( + svc.writeInputToMinIO('j', { buffer: Buffer.from('x') }, 'm.onnx', []) + ).rejects.toThrow(/storage down/); + }); +}); + +describe('jobService.claimActiveAndCreate', () => { + it('triggers sendSSE on success', async () => { + claimActiveJob.mockResolvedValueOnce({ ok: true }); + const sse = makeFakeSseService(); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: sse, + minio: makeFakeMinio(), + }); + const jobRecord = { job_id: 'j-1', status: 'ONNX' }; + const result = await svc.claimActiveAndCreate({ + userId: 'u', + jobId: 'j-1', + jobRecord, + ttlSeconds: 100, + }); + expect(result).toEqual({ ok: true }); + expect(sse.sendSSE).toHaveBeenCalledWith('j-1', jobRecord); + }); + + it('does NOT trigger sendSSE on conflict', async () => { + claimActiveJob.mockResolvedValueOnce({ + ok: false, + conflict: true, + activeJobId: 'old-id', + }); + const sse = makeFakeSseService(); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: sse, + minio: makeFakeMinio(), + }); + const result = await svc.claimActiveAndCreate({ + userId: 'u', + jobId: 'new-id', + jobRecord: { job_id: 'new-id' }, + ttlSeconds: 100, + }); + expect(result.conflict).toBe(true); + expect(result.activeJobId).toBe('old-id'); + expect(sse.sendSSE).not.toHaveBeenCalled(); + }); + + it('serializes jobRecord to JSON for Lua', async () => { + claimActiveJob.mockResolvedValueOnce({ ok: true }); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + const jobRecord = { job_id: 'j-1', extra: { nested: 1 } }; + await svc.claimActiveAndCreate({ + userId: 'u', + jobId: 'j-1', + jobRecord, + ttlSeconds: 100, + }); + const args = claimActiveJob.mock.calls[0][1]; + expect(args.jobJson).toBe(JSON.stringify(jobRecord)); + expect(args.userId).toBe('u'); + expect(args.jobId).toBe('j-1'); + expect(args.ttlSeconds).toBe(100); + }); +}); + +describe('jobService.getActiveJob', () => { + it('returns null both when no active job', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + const res = await svc.getActiveJob('u'); + expect(res).toEqual({ activeJobId: null, job: null }); + }); + + it('returns job when active job exists', async () => { + const redis = makeFakeRedis(); + redis.store.set('user:u:active_job', 'j-1'); + redis.store.set('job:j-1', JSON.stringify({ job_id: 'j-1', stage: 'bie' })); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + const res = await svc.getActiveJob('u'); + expect(res.activeJobId).toBe('j-1'); + expect(res.job).toEqual({ job_id: 'j-1', stage: 'bie' }); + }); + + it('returns activeJobId but null job if Redis stale', async () => { + const redis = makeFakeRedis(); + redis.store.set('user:u:active_job', 'j-orphan'); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + const res = await svc.getActiveJob('u'); + expect(res.activeJobId).toBe('j-orphan'); + expect(res.job).toBeNull(); + }); +}); + +describe('jobService.cleanupInputObjects', () => { + it('does nothing for empty array', async () => { + const minio = makeFakeMinio(); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio, + }); + await svc.cleanupInputObjects([]); + expect(minio.deleteObject).not.toHaveBeenCalled(); + }); + + it('calls deleteObject for each key', async () => { + const minio = makeFakeMinio(); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio, + }); + await svc.cleanupInputObjects(['k1', 'k2', 'k3']); + expect(minio.deleteObject).toHaveBeenCalledTimes(3); + expect(minio._deleted).toEqual(['k1', 'k2', 'k3']); + }); + + it('does not throw when deleteObject fails (fire-and-forget)', async () => { + const minio = makeFakeMinio({ deleteFails: true }); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio, + }); + // 不該 throw + await expect(svc.cleanupInputObjects(['k1', 'k2'])).resolves.toBeUndefined(); + }); + + it('skips when minio missing', async () => { + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + }); + await expect(svc.cleanupInputObjects(['k1'])).resolves.toBeUndefined(); + }); +}); + +// Sec M4:getActiveJobId(pre-check 用,純 GET 不讀 record) +describe('jobService.getActiveJobId (Sec M4 pre-check)', () => { + it('returns active job id when set', async () => { + const redis = makeFakeRedis(); + redis.store.set('user:u:active_job', 'j-1'); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + expect(await svc.getActiveJobId('u')).toBe('j-1'); + }); + + it('returns null when no active job', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + expect(await svc.getActiveJobId('u')).toBeNull(); + }); + + it('only reads active_job key (does NOT read job:{} record)', async () => { + const redis = makeFakeRedis(); + redis.store.set('user:u:active_job', 'j-1'); + redis.store.set('job:j-1', JSON.stringify({ a: 1 })); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + await svc.getActiveJobId('u'); + // 應該只 GET 一次(不讀 job record) + expect(redis.get).toHaveBeenCalledTimes(1); + expect(redis.get).toHaveBeenCalledWith('user:u:active_job'); + }); +}); + +// Sec M2 + Reviewer Major-2:releaseActiveJob(補償釋放) +describe('jobService.releaseActiveJob (Sec M2)', () => { + it('calls Lua releaseActiveJob with correct args', async () => { + releaseActiveJob.mockResolvedValueOnce({ ok: true, released: true }); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + const result = await svc.releaseActiveJob('alice', 'job-xyz'); + expect(result).toEqual({ released: true }); + expect(releaseActiveJob).toHaveBeenCalledTimes(1); + const args = releaseActiveJob.mock.calls[0][1]; + expect(args.userId).toBe('alice'); + expect(args.jobId).toBe('job-xyz'); + }); + + it('returns released=false on NOOP (active_job mismatch)', async () => { + releaseActiveJob.mockResolvedValueOnce({ ok: true, released: false }); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + const result = await svc.releaseActiveJob('u', 'orphan'); + expect(result).toEqual({ released: false }); + }); + + it('propagates Lua errors', async () => { + releaseActiveJob.mockRejectedValueOnce(new Error('Redis down')); + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + await expect(svc.releaseActiveJob('u', 'j')).rejects.toThrow(/Redis down/); + }); +}); + +describe('jobService._internals (object key naming)', () => { + it('buildInputObjectKey aligns with TDD §6.1', () => { + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + expect(svc._internals.buildInputObjectKey('j-1', 'model.onnx')).toBe( + 'jobs/j-1/input/model.onnx' + ); + }); + + it('buildRefImageObjectKey prefixes index to avoid name collisions', () => { + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + minio: makeFakeMinio(), + }); + expect(svc._internals.buildRefImageObjectKey('j-1', 0, 'a.jpg')).toBe( + 'jobs/j-1/ref_images/0_a.jpg' + ); + expect(svc._internals.buildRefImageObjectKey('j-1', 1, 'a.jpg')).toBe( + 'jobs/j-1/ref_images/1_a.jpg' + ); + }); +}); diff --git a/apps/task-scheduler/src/services/__tests__/jobService.t6.test.js b/apps/task-scheduler/src/services/__tests__/jobService.t6.test.js new file mode 100644 index 0000000..183616f --- /dev/null +++ b/apps/task-scheduler/src/services/__tests__/jobService.t6.test.js @@ -0,0 +1,427 @@ +/** + * jobService T6 介面單元測試(listJobsByUser + computeEtag)。 + * + * 範圍: + * - listJobsByUser:SMEMBERS + pipeline GET + client filter + status filter + + * sort + 分頁 + * - computeEtag:同 updated_at → 同 ETag;不同 updated_at → 不同 ETag + * - 邊界:user 沒 job、record 損壞、cross-client 隔離 + */ + +'use strict'; + +// 阻斷實際 Lua script load(不需要打 Redis) +jest.mock('../../redis/luaScripts', () => ({ + claimActiveJob: jest.fn(), + releaseActiveJob: jest.fn(), + _internals: { + loadScript: jest.fn(), + evalScript: jest.fn(), + resetCache: jest.fn(), + }, +})); + +const { createJobService } = require('../jobService'); + +// --------------------------------------------------------------------------- +// Fake Redis with SMEMBERS + pipeline support +// --------------------------------------------------------------------------- + +function makeFakeRedis() { + const store = new Map(); + const sets = new Map(); // key -> Set + + function pipeline() { + const ops = []; + const p = { + get(key) { + ops.push({ kind: 'get', key }); + return p; + }, + // 我們的 pipeline 沒用到別的 op,但保留 fluent + async exec() { + return ops.map((op) => { + if (op.kind === 'get') { + const val = store.has(op.key) ? store.get(op.key) : null; + return [null, val]; + } + return [new Error('unsupported op'), null]; + }); + }, + }; + return p; + } + + return { + store, + sets, + pipeline: jest.fn(pipeline), + smembers: jest.fn(async (key) => { + const s = sets.get(key); + return s ? [...s] : []; + }), + get: jest.fn(async (key) => (store.has(key) ? store.get(key) : null)), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + sadd: jest.fn(async (key, member) => { + if (!sets.has(key)) sets.set(key, new Set()); + sets.get(key).add(member); + return 1; + }), + }; +} + +const sseService = { sendSSE: () => {} }; + +function makeJob(overrides = {}) { + return { + job_id: overrides.job_id || 'jid-default', + user_id: 'u1', + created_by_client_id: 'cid-A', + status: 'ONNX', + stage: 'onnx', + progress: 0, + created_at: '2026-04-25T12:00:00Z', + updated_at: '2026-04-25T12:00:00Z', + expires_at: '2026-05-02T12:00:00Z', + stage_timings: { onnx: null, bie: null, nef: null }, + ...overrides, + }; +} + +beforeAll(() => { + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +// --------------------------------------------------------------------------- +// listJobsByUser +// --------------------------------------------------------------------------- + +describe('listJobsByUser', () => { + let redis; + let svc; + + beforeEach(() => { + redis = makeFakeRedis(); + svc = createJobService({ redis, sseService, jobDataDir: '/tmp/x' }); + }); + + it('returns empty when user has no jobs', async () => { + const result = await svc.listJobsByUser({ + userId: 'u-empty', + clientId: 'cid-A', + }); + expect(result).toEqual({ jobs: [], total: 0, nextOffset: null }); + expect(redis.smembers).toHaveBeenCalledWith('user:u-empty:jobs'); + }); + + it('throws when userId missing', async () => { + await expect( + svc.listJobsByUser({ clientId: 'cid-A' }) + ).rejects.toThrow(/userId/); + }); + + it('throws when clientId missing', async () => { + await expect( + svc.listJobsByUser({ userId: 'u1' }) + ).rejects.toThrow(/clientId/); + }); + + it('returns jobs for user, filtering by client_id (security)', async () => { + redis.sets.set('user:u1:jobs', new Set(['j1', 'j2', 'j3'])); + redis.store.set( + 'job:j1', + JSON.stringify(makeJob({ job_id: 'j1', created_by_client_id: 'cid-A' })) + ); + redis.store.set( + 'job:j2', + JSON.stringify(makeJob({ job_id: 'j2', created_by_client_id: 'cid-B' })) + ); + redis.store.set( + 'job:j3', + JSON.stringify(makeJob({ job_id: 'j3', created_by_client_id: 'cid-A' })) + ); + + const result = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'all', + }); + + // 只有 j1 / j3 屬於 cid-A + expect(result.total).toBe(2); + expect(result.jobs.map((j) => j.job_id).sort()).toEqual(['j1', 'j3']); + }); + + it('filters by status=in_progress (created + running)', async () => { + redis.sets.set('user:u1:jobs', new Set(['created-j', 'running-j', 'completed-j', 'failed-j'])); + // created + redis.store.set( + 'job:created-j', + JSON.stringify( + makeJob({ job_id: 'created-j', status: 'ONNX', stage_timings: { onnx: null } }) + ) + ); + // running (BIE) + redis.store.set( + 'job:running-j', + JSON.stringify(makeJob({ job_id: 'running-j', status: 'BIE', stage: 'bie' })) + ); + // completed + redis.store.set( + 'job:completed-j', + JSON.stringify(makeJob({ job_id: 'completed-j', status: 'COMPLETED', stage: null })) + ); + // failed + redis.store.set( + 'job:failed-j', + JSON.stringify(makeJob({ job_id: 'failed-j', status: 'FAILED', error: { stage: 'bie' } })) + ); + + const result = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'in_progress', + }); + + expect(result.total).toBe(2); + expect(result.jobs.map((j) => j.job_id).sort()).toEqual(['created-j', 'running-j']); + }); + + it('filters by status=completed', async () => { + redis.sets.set('user:u1:jobs', new Set(['j1', 'j2'])); + redis.store.set( + 'job:j1', + JSON.stringify(makeJob({ job_id: 'j1', status: 'COMPLETED' })) + ); + redis.store.set( + 'job:j2', + JSON.stringify(makeJob({ job_id: 'j2', status: 'BIE' })) + ); + + const result = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'completed', + }); + expect(result.total).toBe(1); + expect(result.jobs[0].job_id).toBe('j1'); + }); + + it('filters by status=failed', async () => { + redis.sets.set('user:u1:jobs', new Set(['j1', 'j2'])); + redis.store.set( + 'job:j1', + JSON.stringify(makeJob({ job_id: 'j1', status: 'FAILED', error: { stage: 'bie' } })) + ); + redis.store.set( + 'job:j2', + JSON.stringify(makeJob({ job_id: 'j2', status: 'COMPLETED' })) + ); + + const result = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'failed', + }); + expect(result.total).toBe(1); + expect(result.jobs[0].job_id).toBe('j1'); + }); + + it('returns all jobs when status=all', async () => { + redis.sets.set('user:u1:jobs', new Set(['j1', 'j2', 'j3'])); + redis.store.set( + 'job:j1', + JSON.stringify(makeJob({ job_id: 'j1', status: 'COMPLETED' })) + ); + redis.store.set( + 'job:j2', + JSON.stringify(makeJob({ job_id: 'j2', status: 'BIE' })) + ); + redis.store.set( + 'job:j3', + JSON.stringify(makeJob({ job_id: 'j3', status: 'FAILED' })) + ); + + const result = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'all', + }); + expect(result.total).toBe(3); + }); + + it('sorts by created_at descending (newest first)', async () => { + redis.sets.set('user:u1:jobs', new Set(['old', 'mid', 'new'])); + redis.store.set( + 'job:old', + JSON.stringify(makeJob({ job_id: 'old', created_at: '2026-04-25T10:00:00Z' })) + ); + redis.store.set( + 'job:mid', + JSON.stringify(makeJob({ job_id: 'mid', created_at: '2026-04-25T11:00:00Z' })) + ); + redis.store.set( + 'job:new', + JSON.stringify(makeJob({ job_id: 'new', created_at: '2026-04-25T12:00:00Z' })) + ); + + const result = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'all', + }); + expect(result.jobs.map((j) => j.job_id)).toEqual(['new', 'mid', 'old']); + }); + + it('paginates with limit and offset', async () => { + redis.sets.set('user:u1:jobs', new Set(['j1', 'j2', 'j3', 'j4', 'j5'])); + for (let i = 1; i <= 5; i += 1) { + redis.store.set( + `job:j${i}`, + JSON.stringify( + makeJob({ + job_id: `j${i}`, + // 排序後(desc)j5 / j4 / j3 / j2 / j1 + created_at: `2026-04-25T${10 + i}:00:00Z`, + }) + ) + ); + } + + const page1 = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'all', + limit: 2, + offset: 0, + }); + expect(page1.total).toBe(5); + expect(page1.jobs.map((j) => j.job_id)).toEqual(['j5', 'j4']); + expect(page1.nextOffset).toBe(2); + + const page2 = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'all', + limit: 2, + offset: 2, + }); + expect(page2.jobs.map((j) => j.job_id)).toEqual(['j3', 'j2']); + expect(page2.nextOffset).toBe(4); + + const page3 = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'all', + limit: 2, + offset: 4, + }); + expect(page3.jobs.map((j) => j.job_id)).toEqual(['j1']); + expect(page3.nextOffset).toBeNull(); + }); + + it('caps limit at 50', async () => { + redis.sets.set('user:u1:jobs', new Set(['j1'])); + redis.store.set('job:j1', JSON.stringify(makeJob({ job_id: 'j1' }))); + + const result = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'all', + limit: 100, // 超過 50 + }); + // 不會 fail;max 50(這裡只有 1 個 job) + expect(result.total).toBe(1); + }); + + it('handles missing job records (race: SMEMBER 有但 GET 沒)', async () => { + redis.sets.set('user:u1:jobs', new Set(['ghost', 'real'])); + // ghost 沒對應 job:ghost + redis.store.set('job:real', JSON.stringify(makeJob({ job_id: 'real' }))); + + const result = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'all', + }); + expect(result.total).toBe(1); + expect(result.jobs[0].job_id).toBe('real'); + }); + + it('handles corrupt JSON gracefully (logs + skips)', async () => { + redis.sets.set('user:u1:jobs', new Set(['bad', 'good'])); + redis.store.set('job:bad', '{not valid json}'); + redis.store.set('job:good', JSON.stringify(makeJob({ job_id: 'good' }))); + + const result = await svc.listJobsByUser({ + userId: 'u1', + clientId: 'cid-A', + status: 'all', + }); + expect(result.total).toBe(1); + expect(result.jobs[0].job_id).toBe('good'); + }); +}); + +// --------------------------------------------------------------------------- +// computeEtag +// --------------------------------------------------------------------------- + +describe('computeEtag', () => { + let svc; + beforeEach(() => { + const redis = makeFakeRedis(); + svc = createJobService({ redis, sseService, jobDataDir: '/tmp/x' }); + }); + + it('returns weak ETag in W/"..." format', () => { + const etag = svc.computeEtag({ updated_at: '2026-04-25T12:00:00Z' }); + expect(etag).toMatch(/^W\/"[A-Za-z0-9_-]+"$/); + }); + + it('produces stable ETag for same updated_at', () => { + const e1 = svc.computeEtag({ updated_at: '2026-04-25T12:00:00Z' }); + const e2 = svc.computeEtag({ updated_at: '2026-04-25T12:00:00Z' }); + expect(e1).toBe(e2); + }); + + it('produces different ETag for different updated_at', () => { + const e1 = svc.computeEtag({ updated_at: '2026-04-25T12:00:00Z' }); + const e2 = svc.computeEtag({ updated_at: '2026-04-25T12:00:01Z' }); + expect(e1).not.toBe(e2); + }); + + it('handles missing updated_at gracefully', () => { + const etag = svc.computeEtag({}); + // 空 updated_at 仍應回有效 ETag(hash of empty string) + expect(etag).toMatch(/^W\/"[A-Za-z0-9_-]+"$/); + }); + + it('handles null/undefined input', () => { + expect(svc.computeEtag(null)).toMatch(/^W\/"[A-Za-z0-9_-]+"$/); + expect(svc.computeEtag(undefined)).toMatch(/^W\/"[A-Za-z0-9_-]+"$/); + }); + + it('hash portion does not include `+` `/` `=` (base64url)', () => { + // 試多次以增加碰到 + / = 字元的機會 + // 注意:W/"..." 的 W/ 是 RFC 7232 weak ETag 標示,是合法字元 + // 我們只檢查引號內的 hash 部分 + const ETAG_RE = /^W\/"([^"]+)"$/; + for (let i = 0; i < 10; i += 1) { + const etag = svc.computeEtag({ updated_at: `iter-${i}-${Math.random()}` }); + const match = etag.match(ETAG_RE); + expect(match).not.toBeNull(); + const hash = match[1]; + expect(hash).not.toContain('+'); + expect(hash).not.toContain('/'); + expect(hash).not.toContain('='); + } + }); +}); diff --git a/apps/task-scheduler/src/services/__tests__/jobService.t7.test.js b/apps/task-scheduler/src/services/__tests__/jobService.t7.test.js new file mode 100644 index 0000000..b433afc --- /dev/null +++ b/apps/task-scheduler/src/services/__tests__/jobService.t7.test.js @@ -0,0 +1,198 @@ +/** + * jobService T7 介面單元測試(markPromoted)。 + * + * 範圍: + * - markPromoted:寫入 promoted: true / promoted_at / promoted_object_keys + * - markPromoted:job 不存在 → 回 null(不 throw) + * - markPromoted:input validation(jobId / args 必填) + * - markPromoted:自動更新 updated_at(透過 setJob) + * - markPromoted:透過 SSE 廣播(透過 setJob) + */ + +'use strict'; + +// 阻斷實際 Lua script load +jest.mock('../../redis/luaScripts', () => ({ + claimActiveJob: jest.fn(), + releaseActiveJob: jest.fn(), + _internals: { + loadScript: jest.fn(), + evalScript: jest.fn(), + resetCache: jest.fn(), + }, +})); + +const { createJobService } = require('../jobService'); + +function makeFakeRedis() { + const store = new Map(); + return { + store, + get: jest.fn(async (key) => (store.has(key) ? store.get(key) : null)), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + }; +} + +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +describe('markPromoted', () => { + let redis; + let sseSent; + let sseService; + let svc; + + beforeEach(() => { + redis = makeFakeRedis(); + sseSent = []; + sseService = { + sendSSE: jest.fn((jobId, payload) => { + sseSent.push({ jobId, payload }); + }), + }; + svc = createJobService({ redis, sseService, jobDataDir: '/tmp/x' }); + }); + + it('returns null when job does not exist', async () => { + const result = await svc.markPromoted('nonexistent-job', { + promotedAt: '2026-04-25T13:00:00Z', + promotedKeys: [], + }); + expect(result).toBeNull(); + // 沒寫 set + expect(redis.set).not.toHaveBeenCalled(); + }); + + it('writes promoted flags to job record + auto updated_at', async () => { + const baseJob = { + job_id: 'j1', + status: 'COMPLETED', + created_at: '2026-04-25T12:00:00Z', + updated_at: '2026-04-25T12:30:00Z', + }; + redis.store.set('job:j1', JSON.stringify(baseJob)); + + const promotedAt = '2026-04-25T14:00:00Z'; + const promotedKeys = [ + { + source: 'nef', + target_object_key: 'visionA/u1/m1/v1/out.nef', + size_bytes: 1234, + file_access_agent_etag: 'etag', + promoted_at: promotedAt, + }, + ]; + + const updated = await svc.markPromoted('j1', { promotedAt, promotedKeys }); + + expect(updated).not.toBeNull(); + expect(updated.promoted).toBe(true); + expect(updated.promoted_at).toBe(promotedAt); + expect(updated.promoted_object_keys).toEqual(promotedKeys); + + // updated_at 已被 setJob 自動更新(不再等於原本的 12:30:00) + expect(updated.updated_at).not.toBe('2026-04-25T12:30:00Z'); + expect(typeof updated.updated_at).toBe('string'); + + // 已寫回 Redis + const stored = JSON.parse(redis.store.get('job:j1')); + expect(stored.promoted).toBe(true); + expect(stored.promoted_at).toBe(promotedAt); + + // SSE 已廣播 + expect(sseService.sendSSE).toHaveBeenCalledWith('j1', expect.any(Object)); + }); + + it('preserves other fields (status / output / parameters / error)', async () => { + const baseJob = { + job_id: 'j2', + status: 'COMPLETED', + stage: null, + progress: 100, + output: { nef_path: 'jobs/j2/output/out.nef' }, + parameters: { model_id: 1001 }, + error: null, + }; + redis.store.set('job:j2', JSON.stringify(baseJob)); + + await svc.markPromoted('j2', { + promotedAt: '2026-04-25T14:00:00Z', + promotedKeys: [ + { + source: 'nef', + target_object_key: 'a/b.nef', + size_bytes: 1, + file_access_agent_etag: 'e', + promoted_at: '2026-04-25T14:00:00Z', + }, + ], + }); + + const stored = JSON.parse(redis.store.get('job:j2')); + expect(stored.status).toBe('COMPLETED'); // 不該改 + expect(stored.stage).toBeNull(); + expect(stored.progress).toBe(100); + expect(stored.output).toEqual({ nef_path: 'jobs/j2/output/out.nef' }); + expect(stored.parameters).toEqual({ model_id: 1001 }); + expect(stored.error).toBeNull(); + }); + + it('overwrites existing promoted_object_keys atomically (re-promote)', async () => { + // 模擬 job 已有舊 promoted record(雖然 promote handler 走冪等不會走到, + // 但 jobService 介面層仍需支援被多次呼叫的安全性) + const baseJob = { + job_id: 'j3', + status: 'COMPLETED', + promoted: true, + promoted_at: '2026-04-25T12:00:00Z', + promoted_object_keys: [ + { source: 'nef', target_object_key: 'old/path.nef' }, + ], + }; + redis.store.set('job:j3', JSON.stringify(baseJob)); + + const newPromotedKeys = [ + { source: 'nef', target_object_key: 'new/path.nef' }, + { source: 'bie', target_object_key: 'new/path.bie' }, + ]; + await svc.markPromoted('j3', { + promotedAt: '2026-04-25T15:00:00Z', + promotedKeys: newPromotedKeys, + }); + + const stored = JSON.parse(redis.store.get('job:j3')); + expect(stored.promoted_at).toBe('2026-04-25T15:00:00Z'); + expect(stored.promoted_object_keys).toEqual(newPromotedKeys); + expect(stored.promoted_object_keys).toHaveLength(2); + }); + + it('throws when jobId missing or empty', async () => { + await expect( + svc.markPromoted('', { promotedAt: 'x', promotedKeys: [] }) + ).rejects.toThrow(/jobId/); + await expect( + svc.markPromoted(null, { promotedAt: 'x', promotedKeys: [] }) + ).rejects.toThrow(/jobId/); + }); + + it('throws when args missing or wrong shape', async () => { + await expect(svc.markPromoted('j1', null)).rejects.toThrow(/args/); + await expect(svc.markPromoted('j1', undefined)).rejects.toThrow(/args/); + await expect(svc.markPromoted('j1', {})).rejects.toThrow(/promotedAt/); + await expect( + svc.markPromoted('j1', { promotedAt: 'x' }) + ).rejects.toThrow(/promotedKeys/); + await expect( + svc.markPromoted('j1', { promotedAt: 'x', promotedKeys: 'not-array' }) + ).rejects.toThrow(/promotedKeys/); + }); +}); diff --git a/apps/task-scheduler/src/services/__tests__/jobService.t9.integration.test.js b/apps/task-scheduler/src/services/__tests__/jobService.t9.integration.test.js new file mode 100644 index 0000000..d9c722c --- /dev/null +++ b/apps/task-scheduler/src/services/__tests__/jobService.t9.integration.test.js @@ -0,0 +1,402 @@ +/** + * T9 整合測試:完整生命週期 + release_active_job 釋放確認。 + * + * 範圍: + * 1. 完整 e2e — 建 job → onnx done → bie done → nef done → completed + * → active_job 被 DEL(透過 release Lua 觸發) + * 2. failed 時 active_job 也被 DEL + * 3. race condition — user 的 job 完成後下一個 job 立刻能建立 + * + * 與 jobService.t9.test.js 的差異: + * - t9.test.js 是 unit test:各 advanceJob / failJob 行為單獨驗證 + * - 本檔是 integration:模擬完整 worker done event 流程,驗證 + * stage_timings + release 在三階段切換中都正確 + * + * Mock 策略: + * - 用 fake Redis(in-memory Map 模擬 GET/SET) + * - 用 jest.mock('luaScripts'),但 release / claim 都用 stateful mock + * 模擬真實 Lua 行為(不只 mock 回應值) + * + * 為什麼不打真 Redis: + * - Phase 1 測試金字塔:integration 用 mock 也算 integration(covers 多個 + * module 的協作);e2e 真 Redis 留給 Testing Agent 的 E2E 測試 + * - CI 不依賴 Redis container,跑得更快 + */ + +'use strict'; + +// 用 stateful mock 模擬 Lua 行為 +jest.mock('../../redis/luaScripts', () => { + // module-level state,每個測試 reset + const state = { + activeJobMap: new Map(), // userId → jobId(模擬 user:{u}:active_job) + jobMap: new Map(), // jobId → jobJson(模擬 job:{id}) + userJobsMap: new Map(), // userId → Set(模擬 user:{u}:jobs) + }; + + return { + claimActiveJob: jest.fn(async (_redis, { userId, jobId, jobJson }) => { + const existing = state.activeJobMap.get(userId); + if (existing) { + return { ok: false, conflict: true, activeJobId: existing }; + } + state.activeJobMap.set(userId, jobId); + state.jobMap.set(jobId, jobJson); + if (!state.userJobsMap.has(userId)) state.userJobsMap.set(userId, new Set()); + state.userJobsMap.get(userId).add(jobId); + return { ok: true }; + }), + releaseActiveJob: jest.fn(async (_redis, { userId, jobId }) => { + const current = state.activeJobMap.get(userId); + if (current !== jobId) { + // Lua atomic guard:active_job 不等於 ARGV[1] → NOOP + return { ok: true, released: false }; + } + state.activeJobMap.delete(userId); + state.jobMap.delete(jobId); + const set = state.userJobsMap.get(userId); + if (set) set.delete(jobId); + return { ok: true, released: true }; + }), + _internals: { + loadScript: jest.fn(), + evalScript: jest.fn(), + resetCache: jest.fn(), + _state: state, // 暴露給測試 reset + }, + }; +}); + +const { claimActiveJob, releaseActiveJob, _internals } = require('../../redis/luaScripts'); +const { createJobService } = require('../jobService'); + +function makeFakeRedis() { + const store = new Map(); + const xaddCalls = []; + return { + store, + xaddCalls, + get: jest.fn(async (key) => (store.has(key) ? store.get(key) : null)), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + xadd: jest.fn(async (queue, _id, _field, value) => { + xaddCalls.push([queue, value]); + return '1-0'; + }), + }; +} + +function makeFakeSseService() { + return { sendSSE: jest.fn() }; +} + +beforeAll(() => { + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); +}); +afterAll(() => { + jest.restoreAllMocks(); +}); + +beforeEach(() => { + // 清空 stateful mock state + _internals._state.activeJobMap.clear(); + _internals._state.jobMap.clear(); + _internals._state.userJobsMap.clear(); + claimActiveJob.mockClear(); + releaseActiveJob.mockClear(); +}); + +// --------------------------------------------------------------------------- +// 完整 e2e 流程 +// --------------------------------------------------------------------------- + +describe('T9 e2e — 完整生命週期 onnx → bie → nef → COMPLETED', () => { + it('progresses through all stages with stage_timings + releases active_job', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + jobDataDir: '/data/jobs', + }); + + // === 建 job === + // 模擬 v1 POST /api/v1/jobs:claimActiveAndCreate 寫入完整 record(含 onnx.started_at) + const userId = 'alice'; + const jobId = 'job-e2e-001'; + const initialJob = { + job_id: jobId, + user_id: userId, + created_by_client_id: 'visionA', + created_at: '2026-04-25T10:00:00Z', + status: 'ONNX', + stage: 'onnx', + progress: 0, + stage_timings: { + // 對齊 v1 routes/jobs.js 的初始化 + onnx: { started_at: '2026-04-25T10:00:00Z', completed_at: null }, + bie: { started_at: null, completed_at: null }, + nef: { started_at: null, completed_at: null }, + }, + output: { bie_path: null, nef_path: null }, + error: null, + }; + // 用 jobService 寫入(同時放入 mock Lua state) + await svc.claimActiveAndCreate({ + userId, + jobId, + jobRecord: initialJob, + ttlSeconds: 604800, + }); + // 寫入 redis store 給 advance 流程讀 + redis.store.set(`job:${jobId}`, JSON.stringify(initialJob)); + + // === 階段 1:onnx 完成 === + await svc.advanceJob(jobId, 'onnx'); + let stored = JSON.parse(redis.store.get(`job:${jobId}`)); + expect(stored.status).toBe('BIE'); + expect(stored.stage).toBe('bie'); + expect(stored.progress).toBe(33); + expect(stored.stage_timings.onnx.completed_at).not.toBeNull(); + expect(stored.stage_timings.bie.started_at).not.toBeNull(); + expect(stored.stage_timings.bie.completed_at).toBeNull(); + expect(stored.stage_timings.nef.started_at).toBeNull(); + // 中間階段不 release + expect(releaseActiveJob).not.toHaveBeenCalled(); + // active_job 仍指向當前 + expect(_internals._state.activeJobMap.get(userId)).toBe(jobId); + + // === 階段 2:bie 完成 === + await svc.advanceJob(jobId, 'bie'); + stored = JSON.parse(redis.store.get(`job:${jobId}`)); + expect(stored.status).toBe('NEF'); + expect(stored.stage).toBe('nef'); + expect(stored.progress).toBe(67); + expect(stored.stage_timings.bie.completed_at).not.toBeNull(); + expect(stored.stage_timings.nef.started_at).not.toBeNull(); + expect(releaseActiveJob).not.toHaveBeenCalled(); + expect(_internals._state.activeJobMap.get(userId)).toBe(jobId); + + // === 階段 3:nef 完成 → COMPLETED === + await svc.advanceJob(jobId, 'nef'); + stored = JSON.parse(redis.store.get(`job:${jobId}`)); + expect(stored.status).toBe('COMPLETED'); + expect(stored.stage).toBeNull(); + expect(stored.progress).toBe(100); + expect(stored.stage_timings.nef.completed_at).not.toBeNull(); + // ★ 終態釋放 active_job + expect(releaseActiveJob).toHaveBeenCalledTimes(1); + expect(releaseActiveJob.mock.calls[0][1]).toEqual({ + userId: 'alice', + jobId, + }); + // active_job 已被 DEL + expect(_internals._state.activeJobMap.has(userId)).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// FAILED 終態 +// --------------------------------------------------------------------------- + +describe('T9 e2e — FAILED 終態 release_active_job', () => { + it('releases active_job when worker reports failure', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + }); + + const userId = 'bob'; + const jobId = 'job-fail-001'; + const initialJob = { + job_id: jobId, + user_id: userId, + status: 'BIE', + stage: 'bie', + progress: 33, + stage_timings: { + onnx: { started_at: '2026-04-25T10:00:00Z', completed_at: '2026-04-25T10:05:00Z' }, + bie: { started_at: '2026-04-25T10:05:00Z', completed_at: null }, + nef: { started_at: null, completed_at: null }, + }, + }; + await svc.claimActiveAndCreate({ + userId, + jobId, + jobRecord: initialJob, + ttlSeconds: 604800, + }); + redis.store.set(`job:${jobId}`, JSON.stringify(initialJob)); + + await svc.failJob(jobId, 'bie', 'quantization error'); + + const stored = JSON.parse(redis.store.get(`job:${jobId}`)); + expect(stored.status).toBe('FAILED'); + expect(stored.error).toEqual({ step: 'bie', reason: 'quantization error' }); + expect(stored.stage_timings.bie.completed_at).not.toBeNull(); + + // ★ active_job 已被 DEL + expect(releaseActiveJob).toHaveBeenCalledTimes(1); + expect(_internals._state.activeJobMap.has(userId)).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// Race scenario:user 完成 job 後立刻可建新 job +// --------------------------------------------------------------------------- + +describe('T9 e2e — user 完成 job 後可立即建新 job (active_job 已釋放)', () => { + it('allows user to claim new job immediately after previous job completes', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + }); + + const userId = 'charlie'; + const firstJobId = 'job-001'; + const secondJobId = 'job-002'; + + // === 建第一個 job === + await svc.claimActiveAndCreate({ + userId, + jobId: firstJobId, + jobRecord: { job_id: firstJobId, user_id: userId, status: 'NEF', stage: 'nef' }, + ttlSeconds: 604800, + }); + // 模擬 NEF 完成的 record(有完整 stage_timings 走過所有階段) + redis.store.set( + `job:${firstJobId}`, + JSON.stringify({ + job_id: firstJobId, + user_id: userId, + status: 'NEF', + stage: 'nef', + progress: 67, + stage_timings: { + onnx: { started_at: 'tA', completed_at: 'tA' }, + bie: { started_at: 'tB', completed_at: 'tB' }, + nef: { started_at: 'tC', completed_at: null }, + }, + }) + ); + + // === 第一個 job 完成 === + await svc.advanceJob(firstJobId, 'nef'); + expect(_internals._state.activeJobMap.has(userId)).toBe(false); + + // === 第二個 job 可立即 claim === + const claimResult = await svc.claimActiveAndCreate({ + userId, + jobId: secondJobId, + jobRecord: { job_id: secondJobId, user_id: userId, status: 'ONNX', stage: 'onnx' }, + ttlSeconds: 604800, + }); + expect(claimResult.ok).toBe(true); + expect(_internals._state.activeJobMap.get(userId)).toBe(secondJobId); + }); + + it('blocks user with active_job (race window: complete then re-claim before release runs)', async () => { + // 這個 case 模擬「在 NEF 完成的 race window,假設別人搶到了 active_job」 + // 預期 Lua 的 atomic guard 會 NOOP(不誤刪別人的鎖) + const redis = makeFakeRedis(); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + }); + + const userId = 'dave'; + const oldJobId = 'job-old'; + const newJobId = 'job-new'; // 另一隻手「搶」進去的 job + + // 起始:active_job 是 oldJobId + await svc.claimActiveAndCreate({ + userId, + jobId: oldJobId, + jobRecord: { job_id: oldJobId, user_id: userId, status: 'NEF', stage: 'nef' }, + ttlSeconds: 604800, + }); + redis.store.set( + `job:${oldJobId}`, + JSON.stringify({ + job_id: oldJobId, + user_id: userId, + status: 'NEF', + stage: 'nef', + }) + ); + + // 模擬:別的 process 已經把 active_job 改寫為 newJobId(race) + _internals._state.activeJobMap.set(userId, newJobId); + + // oldJobId 完成 → 嘗試 release,但 Lua atomic guard 會發現 active_job 不等於 oldJobId + await svc.advanceJob(oldJobId, 'nef'); + + // active_job 仍是 newJobId(未被誤刪) + expect(_internals._state.activeJobMap.get(userId)).toBe(newJobId); + expect(releaseActiveJob).toHaveBeenCalledTimes(1); + // released=false(NOOP) + const result = await releaseActiveJob.mock.results[0].value; + expect(result.released).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// Legacy backward compat:legacy job 不影響 release(沒對應 active_job key) +// --------------------------------------------------------------------------- + +describe('T9 backward compat — legacy job (no user_id) 終態時不嘗試 release', () => { + it('legacy COMPLETED does not invoke release Lua', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + }); + + // legacy job:沒有 user_id(server.js POST /jobs 建的) + redis.store.set( + 'job:legacy-1', + JSON.stringify({ + job_id: 'legacy-1', + // user_id 缺 + status: 'NEF', + stage: 'nef', + progress: 67, + }) + ); + + await svc.advanceJob('legacy-1', 'nef'); + + expect(releaseActiveJob).not.toHaveBeenCalled(); + const stored = JSON.parse(redis.store.get('job:legacy-1')); + expect(stored.status).toBe('COMPLETED'); + // stage_timings 結構仍正確初始化 + expect(stored.stage_timings.nef.completed_at).not.toBeNull(); + }); + + it('legacy FAILED does not invoke release Lua', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + }); + redis.store.set( + 'job:legacy-2', + JSON.stringify({ + job_id: 'legacy-2', + // 無 user_id + status: 'BIE', + stage: 'bie', + }) + ); + + await svc.failJob('legacy-2', 'bie', 'oom'); + + expect(releaseActiveJob).not.toHaveBeenCalled(); + const stored = JSON.parse(redis.store.get('job:legacy-2')); + expect(stored.status).toBe('FAILED'); + }); +}); diff --git a/apps/task-scheduler/src/services/__tests__/jobService.t9.test.js b/apps/task-scheduler/src/services/__tests__/jobService.t9.test.js new file mode 100644 index 0000000..302c780 --- /dev/null +++ b/apps/task-scheduler/src/services/__tests__/jobService.t9.test.js @@ -0,0 +1,620 @@ +/** + * jobService T9 介面單元測試 — stage_timings + 終態 release_active_job。 + * + * 範圍: + * 1. advanceJob 寫入 stage_timings.{completedStage}.completed_at + * 2. advanceJob 推進到下一階段時寫 stage_timings.{nextStage}.started_at + * 3. advanceJob 達到 COMPLETED 時呼叫 release_active_job(若有 user_id) + * 4. failJob 寫入 stage_timings.{step}.completed_at + * 5. failJob 呼叫 release_active_job(若有 user_id) + * 6. legacy job(無 user_id)終態時不呼叫 release(避免無效 NOOP) + * 7. release Lua 失敗不阻塞 advance / fail(fire-and-forget + log) + * 8. stage_timings 結構初始化 / fallback(無 stage_timings 時也能寫) + * 9. 失敗時其他 stage 仍維持 null(只標 fail 該 stage 已結束) + * 10. ISO 8601 時間格式正確 + */ + +'use strict'; + +// Mock luaScripts,控制 release 結果而不啟動真 Redis Lua +jest.mock('../../redis/luaScripts', () => ({ + claimActiveJob: jest.fn(), + releaseActiveJob: jest.fn(), + _internals: { + loadScript: jest.fn(), + evalScript: jest.fn(), + resetCache: jest.fn(), + }, +})); + +const { releaseActiveJob } = require('../../redis/luaScripts'); +const { createJobService } = require('../jobService'); + +function makeFakeRedis() { + const store = new Map(); + const xaddCalls = []; + return { + store, + xaddCalls, + get: jest.fn(async (key) => (store.has(key) ? store.get(key) : null)), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + xadd: jest.fn(async (queue, _id, _field, value) => { + xaddCalls.push([queue, value]); + return '1-0'; + }), + }; +} + +function makeFakeSseService() { + return { sendSSE: jest.fn() }; +} + +beforeAll(() => { + // 抑制 console 雜訊(jobService.releaseActiveJobOnTerminal 會 log) + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); +}); + +afterAll(() => { + jest.restoreAllMocks(); +}); + +beforeEach(() => { + releaseActiveJob.mockReset(); +}); + +// --------------------------------------------------------------------------- +// advanceJob — stage_timings 寫入 +// --------------------------------------------------------------------------- + +describe('jobService.advanceJob — stage_timings (T9)', () => { + let redis; + let sse; + let svc; + + beforeEach(() => { + redis = makeFakeRedis(); + sse = makeFakeSseService(); + svc = createJobService({ redis, sseService: sse, jobDataDir: '/data/jobs' }); + }); + + it('writes stage_timings.onnx.completed_at + bie.started_at when advancing onnx → bie', async () => { + // v1 場景:建 job 已寫過 onnx.started_at(由 createJobHandler 寫入) + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + user_id: 'u-1', + status: 'ONNX', + stage: 'onnx', + progress: 0, + created_at: '2026-04-25T12:00:00Z', + stage_timings: { + onnx: { started_at: '2026-04-25T12:00:00Z', completed_at: null }, + bie: { started_at: null, completed_at: null }, + nef: { started_at: null, completed_at: null }, + }, + }) + ); + + const t0 = Date.now() - 1; + await svc.advanceJob('j', 'onnx'); + const t1 = Date.now() + 1; + + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('BIE'); + expect(stored.stage).toBe('bie'); + expect(stored.progress).toBe(33); + + // onnx.completed_at 在 [t0, t1] 區間 + const onnxCompleted = stored.stage_timings.onnx.completed_at; + expect(typeof onnxCompleted).toBe('string'); + expect(new Date(onnxCompleted).getTime()).toBeGreaterThanOrEqual(t0); + expect(new Date(onnxCompleted).getTime()).toBeLessThanOrEqual(t1); + // onnx.started_at 保留(沒被覆寫) + expect(stored.stage_timings.onnx.started_at).toBe('2026-04-25T12:00:00Z'); + + // bie.started_at 在 [t0, t1] + const bieStarted = stored.stage_timings.bie.started_at; + expect(typeof bieStarted).toBe('string'); + expect(new Date(bieStarted).getTime()).toBeGreaterThanOrEqual(t0); + expect(new Date(bieStarted).getTime()).toBeLessThanOrEqual(t1); + expect(stored.stage_timings.bie.completed_at).toBeNull(); + + // nef 仍未開工 + expect(stored.stage_timings.nef.started_at).toBeNull(); + expect(stored.stage_timings.nef.completed_at).toBeNull(); + }); + + it('writes stage_timings.bie.completed_at + nef.started_at when advancing bie → nef', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + user_id: 'u-1', + status: 'BIE', + stage: 'bie', + progress: 33, + created_at: 'tA', + stage_timings: { + onnx: { started_at: '2026-04-25T12:00:00Z', completed_at: '2026-04-25T12:05:00Z' }, + bie: { started_at: '2026-04-25T12:05:00Z', completed_at: null }, + nef: { started_at: null, completed_at: null }, + }, + }) + ); + + await svc.advanceJob('j', 'bie'); + + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('NEF'); + expect(stored.stage).toBe('nef'); + expect(stored.progress).toBe(67); + expect(stored.stage_timings.bie.completed_at).not.toBeNull(); + expect(stored.stage_timings.nef.started_at).not.toBeNull(); + expect(stored.stage_timings.nef.completed_at).toBeNull(); + }); + + it('writes stage_timings.nef.completed_at when reaching COMPLETED', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + user_id: 'u-1', + status: 'NEF', + stage: 'nef', + progress: 67, + created_at: 'tA', + stage_timings: { + onnx: { started_at: 'tA', completed_at: 'tA' }, + bie: { started_at: 'tA', completed_at: 'tA' }, + nef: { started_at: 'tA', completed_at: null }, + }, + }) + ); + releaseActiveJob.mockResolvedValueOnce({ ok: true, released: true }); + + await svc.advanceJob('j', 'nef'); + + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('COMPLETED'); + expect(stored.stage).toBeNull(); + expect(stored.progress).toBe(100); + expect(stored.stage_timings.nef.completed_at).not.toBeNull(); + // nef.completed_at 是有效的 ISO 8601 + expect(stored.stage_timings.nef.completed_at).toMatch(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/); + }); + + it('initializes stage_timings struct when missing (legacy job)', async () => { + // legacy job 沒寫 stage_timings 欄位 + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + // user_id 缺漏(legacy) + status: 'ONNX', + stage: 'onnx', + progress: 0, + created_at: 'tA', + }) + ); + + await svc.advanceJob('j', 'onnx'); + + const stored = JSON.parse(redis.store.get('job:j')); + // stage_timings 結構被初始化 + expect(stored.stage_timings).toBeDefined(); + expect(stored.stage_timings.onnx.completed_at).not.toBeNull(); + expect(stored.stage_timings.bie.started_at).not.toBeNull(); + expect(stored.stage_timings.nef.started_at).toBeNull(); + expect(stored.stage_timings.nef.completed_at).toBeNull(); + }); +}); + +// --------------------------------------------------------------------------- +// failJob — stage_timings 寫入 +// --------------------------------------------------------------------------- + +describe('jobService.failJob — stage_timings (T9)', () => { + let redis; + let sse; + let svc; + + beforeEach(() => { + redis = makeFakeRedis(); + sse = makeFakeSseService(); + svc = createJobService({ redis, sseService: sse }); + }); + + it('writes stage_timings.{step}.completed_at on failure', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + user_id: 'u-1', + status: 'BIE', + stage: 'bie', + stage_timings: { + onnx: { started_at: 'tA', completed_at: 'tA' }, + bie: { started_at: 'tB', completed_at: null }, + nef: { started_at: null, completed_at: null }, + }, + }) + ); + releaseActiveJob.mockResolvedValueOnce({ ok: true, released: true }); + + const t0 = Date.now() - 1; + await svc.failJob('j', 'bie', 'quantization timeout'); + const t1 = Date.now() + 1; + + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('FAILED'); + expect(stored.error).toEqual({ step: 'bie', reason: 'quantization timeout' }); + + // bie.completed_at 在 [t0, t1] 區間 + const bieCompleted = stored.stage_timings.bie.completed_at; + expect(typeof bieCompleted).toBe('string'); + expect(new Date(bieCompleted).getTime()).toBeGreaterThanOrEqual(t0); + expect(new Date(bieCompleted).getTime()).toBeLessThanOrEqual(t1); + }); + + it('keeps other stages null on failure (only marks failed stage as ended)', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + user_id: 'u-1', + status: 'BIE', + stage: 'bie', + stage_timings: { + onnx: { started_at: 'tA', completed_at: 'tA' }, + bie: { started_at: 'tB', completed_at: null }, + nef: { started_at: null, completed_at: null }, + }, + }) + ); + releaseActiveJob.mockResolvedValueOnce({ ok: true, released: true }); + + await svc.failJob('j', 'bie', 'reason'); + + const stored = JSON.parse(redis.store.get('job:j')); + // 其他 stage 維持 null(不一次填補) + expect(stored.stage_timings.nef.started_at).toBeNull(); + expect(stored.stage_timings.nef.completed_at).toBeNull(); + // onnx 維持原樣 + expect(stored.stage_timings.onnx.started_at).toBe('tA'); + expect(stored.stage_timings.onnx.completed_at).toBe('tA'); + }); + + it('does NOT touch stage_timings if step is unknown', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + user_id: 'u-1', + status: 'BIE', + stage: 'bie', + stage_timings: { + onnx: { started_at: 'tA', completed_at: null }, + bie: null, + nef: null, + }, + }) + ); + releaseActiveJob.mockResolvedValueOnce({ ok: true, released: true }); + + await svc.failJob('j', 'unknown-step', 'reason'); + + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('FAILED'); + // unknown step 不寫入任何 stage_timings;onnx 應維持原樣 + expect(stored.stage_timings.onnx.started_at).toBe('tA'); + expect(stored.stage_timings.onnx.completed_at).toBeNull(); + }); +}); + +// --------------------------------------------------------------------------- +// release_active_job 觸發點(COMPLETED / FAILED + user_id 處理) +// --------------------------------------------------------------------------- + +describe('jobService.advanceJob — release_active_job on COMPLETED (T9)', () => { + let redis; + let sse; + let svc; + + beforeEach(() => { + redis = makeFakeRedis(); + sse = makeFakeSseService(); + svc = createJobService({ redis, sseService: sse }); + }); + + function setupNefJob(overrides = {}) { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + user_id: 'u-1', + status: 'NEF', + stage: 'nef', + progress: 67, + stage_timings: { + onnx: { started_at: 'tA', completed_at: 'tA' }, + bie: { started_at: 'tB', completed_at: 'tB' }, + nef: { started_at: 'tC', completed_at: null }, + }, + ...overrides, + }) + ); + } + + it('calls release_active_job with userId + jobId when v1 job completes', async () => { + setupNefJob({ user_id: 'alice' }); + releaseActiveJob.mockResolvedValueOnce({ ok: true, released: true }); + + await svc.advanceJob('j', 'nef'); + + expect(releaseActiveJob).toHaveBeenCalledTimes(1); + const args = releaseActiveJob.mock.calls[0][1]; + expect(args.userId).toBe('alice'); + expect(args.jobId).toBe('j'); + }); + + it('does NOT call release for legacy job (no user_id)', async () => { + setupNefJob({ user_id: null }); + + await svc.advanceJob('j', 'nef'); + + expect(releaseActiveJob).not.toHaveBeenCalled(); + // 但 job 仍正常被標 COMPLETED + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('COMPLETED'); + }); + + it('does NOT call release when user_id is empty string (web-anonymous fallback)', async () => { + setupNefJob({ user_id: '' }); + + await svc.advanceJob('j', 'nef'); + + expect(releaseActiveJob).not.toHaveBeenCalled(); + }); + + it('does NOT call release when user_id is missing (undefined)', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + // user_id 完全缺漏 + status: 'NEF', + stage: 'nef', + progress: 67, + stage_timings: { + onnx: { started_at: 'tA', completed_at: 'tA' }, + bie: { started_at: 'tB', completed_at: 'tB' }, + nef: { started_at: 'tC', completed_at: null }, + }, + }) + ); + + await svc.advanceJob('j', 'nef'); + expect(releaseActiveJob).not.toHaveBeenCalled(); + }); + + it('does NOT call release on intermediate stage advancement (onnx → bie)', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + user_id: 'u-1', + status: 'ONNX', + stage: 'onnx', + progress: 0, + stage_timings: { + onnx: { started_at: 'tA', completed_at: null }, + bie: { started_at: null, completed_at: null }, + nef: { started_at: null, completed_at: null }, + }, + }) + ); + + await svc.advanceJob('j', 'onnx'); + + expect(releaseActiveJob).not.toHaveBeenCalled(); + }); + + it('does NOT throw when release Lua throws (fire-and-forget)', async () => { + setupNefJob({ user_id: 'alice' }); + releaseActiveJob.mockRejectedValueOnce(new Error('Redis down')); + + // advance 不應 throw,即便 release 失敗 + await expect(svc.advanceJob('j', 'nef')).resolves.toBeUndefined(); + + // job 仍應已標 COMPLETED(advance 邏輯先於 release) + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('COMPLETED'); + }); + + it('does not throw when release returns NOOP (atomic guard hit)', async () => { + setupNefJob({ user_id: 'alice' }); + // active_job 已被別人改寫,Lua atomic guard 回 NOOP + releaseActiveJob.mockResolvedValueOnce({ ok: true, released: false }); + + await expect(svc.advanceJob('j', 'nef')).resolves.toBeUndefined(); + + // 仍呼叫 release(讓 Lua 自己決定 NOOP),但不 throw + expect(releaseActiveJob).toHaveBeenCalledTimes(1); + }); +}); + +describe('jobService.failJob — release_active_job on FAILED (T9)', () => { + let redis; + let sse; + let svc; + + beforeEach(() => { + redis = makeFakeRedis(); + sse = makeFakeSseService(); + svc = createJobService({ redis, sseService: sse }); + }); + + it('calls release_active_job when v1 job fails', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + user_id: 'bob', + status: 'BIE', + stage: 'bie', + }) + ); + releaseActiveJob.mockResolvedValueOnce({ ok: true, released: true }); + + await svc.failJob('j', 'bie', 'oom'); + + expect(releaseActiveJob).toHaveBeenCalledTimes(1); + const args = releaseActiveJob.mock.calls[0][1]; + expect(args.userId).toBe('bob'); + expect(args.jobId).toBe('j'); + }); + + it('does NOT call release for legacy job on failure', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + // 無 user_id + status: 'BIE', + stage: 'bie', + }) + ); + + await svc.failJob('j', 'bie', 'oom'); + + expect(releaseActiveJob).not.toHaveBeenCalled(); + }); + + it('does NOT throw when release Lua throws on failure', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + user_id: 'bob', + status: 'BIE', + stage: 'bie', + }) + ); + releaseActiveJob.mockRejectedValueOnce(new Error('Redis down')); + + await expect(svc.failJob('j', 'bie', 'reason')).resolves.toBeUndefined(); + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('FAILED'); + }); +}); + +// --------------------------------------------------------------------------- +// Race scenario:releaseActiveJob 的 atomic guard 行為 +// --------------------------------------------------------------------------- + +describe('release_active_job atomic guard scenario (T9 + Lua)', () => { + // 為什麼這個測試重要: + // T9 完成 / 失敗時呼叫 release_active_job.lua;該 Lua 內部會 GET → 比對 → + // DEL(如果 active_job 還是當前 jobId)。本 spec 驗證 jobService 把正確的 + // userId / jobId 傳給 Lua,讓 Lua 自己做 atomic 判斷。 + // 實際 atomic 行為由 luaScripts.test.js 的 release_active_job.lua sanity + // check 驗證;此測試聚焦 jobService 的 wiring。 + + let redis; + let sse; + let svc; + + beforeEach(() => { + redis = makeFakeRedis(); + sse = makeFakeSseService(); + svc = createJobService({ redis, sseService: sse }); + }); + + it('passes the completing job_id to release Lua (not some other id)', async () => { + redis.store.set( + 'job:j-A', + JSON.stringify({ + job_id: 'j-A', + user_id: 'shared-user', + status: 'NEF', + stage: 'nef', + progress: 67, + }) + ); + releaseActiveJob.mockResolvedValueOnce({ ok: true, released: true }); + + await svc.advanceJob('j-A', 'nef'); + + expect(releaseActiveJob).toHaveBeenCalledTimes(1); + const args = releaseActiveJob.mock.calls[0][1]; + expect(args.userId).toBe('shared-user'); + // 關鍵:傳的是「正在完成的 job_id」,Lua 才能判斷 active_job 是否仍指向自己 + expect(args.jobId).toBe('j-A'); + }); + + it('handles released=false (Lua NOOP) without confusion', async () => { + // 模擬:j-A 完成的瞬間,user 已搶到下一個 job j-B 並寫入 active_job + // → Lua GET active_job 回 'j-B' ≠ 'j-A' → NOOP + redis.store.set( + 'job:j-A', + JSON.stringify({ + job_id: 'j-A', + user_id: 'shared-user', + status: 'NEF', + stage: 'nef', + }) + ); + releaseActiveJob.mockResolvedValueOnce({ ok: true, released: false }); + + await svc.advanceJob('j-A', 'nef'); + + // jobService 不該 throw 也不該嘗試 retry;Lua 的 NOOP 是正確的「保護其他 holder」 + const stored = JSON.parse(redis.store.get('job:j-A')); + expect(stored.status).toBe('COMPLETED'); + }); +}); + +// --------------------------------------------------------------------------- +// stage_timings ISO 8601 格式驗證 +// --------------------------------------------------------------------------- + +describe('stage_timings ISO 8601 format (T9)', () => { + it('written timestamps are valid ISO 8601 strings', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + }); + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + user_id: 'u', + status: 'ONNX', + stage: 'onnx', + progress: 0, + stage_timings: { + onnx: { started_at: '2026-04-25T12:00:00.000Z', completed_at: null }, + bie: { started_at: null, completed_at: null }, + nef: { started_at: null, completed_at: null }, + }, + }) + ); + + await svc.advanceJob('j', 'onnx'); + + const stored = JSON.parse(redis.store.get('job:j')); + // onnx.completed_at 與 bie.started_at 必為 ISO 8601 (含 milli + Z) + const ISO_8601_RE = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{1,3})?Z$/; + expect(stored.stage_timings.onnx.completed_at).toMatch(ISO_8601_RE); + expect(stored.stage_timings.bie.started_at).toMatch(ISO_8601_RE); + // 而且能成功 parse + expect(Number.isFinite(new Date(stored.stage_timings.onnx.completed_at).getTime())).toBe(true); + expect(Number.isFinite(new Date(stored.stage_timings.bie.started_at).getTime())).toBe(true); + }); +}); diff --git a/apps/task-scheduler/src/services/__tests__/jobService.test.js b/apps/task-scheduler/src/services/__tests__/jobService.test.js new file mode 100644 index 0000000..14437ec --- /dev/null +++ b/apps/task-scheduler/src/services/__tests__/jobService.test.js @@ -0,0 +1,324 @@ +/** + * jobService 單元測試(T4)。 + * + * 重點: + * 1. getJob / setJob 正確讀寫 Redis + * 2. setJob 自動更新 updated_at + 觸發 sseService.sendSSE + * 3. enqueueStage 把 message JSON 寫入正確 stream + * 4. advanceJob 的階段轉移 / 進度計算 / 完成判定 + * 5. failJob 設 FAILED + error 物件 + * + * 採依賴注入(jest fn mock redis + sseService),不需真 Redis。 + */ + +'use strict'; + +const path = require('path'); +const { + createJobService, + STAGES, + STAGE_QUEUES, + DONE_QUEUE, + DONE_GROUP, +} = require('../jobService'); + +/** 建立一個 in-memory 假 Redis client。 */ +function makeFakeRedis() { + const store = new Map(); + /** @type {Array<[string, string]>} 記錄 xadd 呼叫:[queue, message] */ + const xaddCalls = []; + + return { + store, + xaddCalls, + get: jest.fn(async (key) => { + return store.has(key) ? store.get(key) : null; + }), + set: jest.fn(async (key, value) => { + store.set(key, value); + return 'OK'; + }), + xadd: jest.fn(async (queue, _id, _field, value) => { + xaddCalls.push([queue, value]); + return '1-0'; + }), + }; +} + +function makeFakeSseService() { + return { + sendSSE: jest.fn(), + }; +} + +beforeAll(() => { + // 抑制 console.log 雜訊(jobService 對齊 server.js 會 log) + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); +}); + +afterAll(() => { + jest.restoreAllMocks(); +}); + +describe('jobService — exported constants', () => { + it('STAGES order matches legacy server.js', () => { + expect(STAGES).toEqual(['onnx', 'bie', 'nef']); + }); + + it('STAGE_QUEUES uses queue: keys', () => { + expect(STAGE_QUEUES).toEqual({ + onnx: 'queue:onnx', + bie: 'queue:bie', + nef: 'queue:nef', + }); + }); + + it('DONE_QUEUE / DONE_GROUP match legacy', () => { + expect(DONE_QUEUE).toBe('queue:done'); + expect(DONE_GROUP).toBe('scheduler'); + }); +}); + +describe('jobService factory — argument validation', () => { + it('throws if redis is missing', () => { + expect(() => createJobService({ sseService: makeFakeSseService() })).toThrow(/redis/i); + }); + + it('throws if sseService is missing', () => { + expect(() => createJobService({ redis: makeFakeRedis() })).toThrow(/sseService/i); + }); + + it('throws if sseService.sendSSE is not a function', () => { + expect(() => + createJobService({ redis: makeFakeRedis(), sseService: {} }) + ).toThrow(/sendSSE/); + }); +}); + +describe('jobService.getJob / setJob', () => { + it('returns null when key does not exist', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ redis, sseService: makeFakeSseService() }); + const result = await svc.getJob('missing'); + expect(result).toBeNull(); + expect(redis.get).toHaveBeenCalledWith('job:missing'); + }); + + it('parses stored JSON', async () => { + const redis = makeFakeRedis(); + redis.store.set('job:abc', JSON.stringify({ job_id: 'abc', status: 'ONNX' })); + const svc = createJobService({ redis, sseService: makeFakeSseService() }); + const result = await svc.getJob('abc'); + expect(result).toEqual({ job_id: 'abc', status: 'ONNX' }); + }); + + it('setJob updates updated_at, writes JSON, and triggers sendSSE', async () => { + const redis = makeFakeRedis(); + const sse = makeFakeSseService(); + const svc = createJobService({ redis, sseService: sse }); + + const before = Date.now() - 1; + const job = { job_id: 'abc', status: 'ONNX' }; + await svc.setJob('abc', job); + const after = Date.now() + 1; + + // updated_at 已被自動寫入 + expect(typeof job.updated_at).toBe('string'); + const updatedAtMs = new Date(job.updated_at).getTime(); + expect(updatedAtMs).toBeGreaterThanOrEqual(before); + expect(updatedAtMs).toBeLessThanOrEqual(after); + + // 寫入正確 key + 內容 + expect(redis.set).toHaveBeenCalledWith('job:abc', JSON.stringify(job)); + expect(redis.store.get('job:abc')).toBe(JSON.stringify(job)); + + // 通知 SSE + expect(sse.sendSSE).toHaveBeenCalledWith('abc', job); + }); +}); + +describe('jobService.enqueueStage', () => { + it('writes message to correct stream with input_dir derived from JOB_DATA_DIR', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + jobDataDir: '/tmp/jobs-test', + }); + const job = { + job_id: 'job-1', + created_at: '2026-04-25T00:00:00Z', + parameters: { model_id: 1001 }, + }; + + await svc.enqueueStage('onnx', job); + + expect(redis.xadd).toHaveBeenCalledTimes(1); + const [queue, , , value] = redis.xadd.mock.calls[0]; + expect(queue).toBe(STAGE_QUEUES.onnx); + const message = JSON.parse(value); + expect(message).toEqual({ + job_id: 'job-1', + created_at: '2026-04-25T00:00:00Z', + input_dir: path.join('/tmp/jobs-test', 'job-1'), + parameters: { model_id: 1001 }, + }); + }); + + it('throws when stage is unknown', async () => { + const svc = createJobService({ + redis: makeFakeRedis(), + sseService: makeFakeSseService(), + }); + await expect(svc.enqueueStage('xxx', { job_id: 'a' })).rejects.toThrow(/Unknown stage/); + }); + + it('falls back to empty parameters object', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ + redis, + sseService: makeFakeSseService(), + jobDataDir: '/data/jobs', + }); + const job = { job_id: 'job-2', created_at: 't' }; + await svc.enqueueStage('bie', job); + const [, , , value] = redis.xadd.mock.calls[0]; + expect(JSON.parse(value).parameters).toEqual({}); + }); +}); + +describe('jobService.advanceJob', () => { + let redis; + let sse; + let svc; + + beforeEach(() => { + redis = makeFakeRedis(); + sse = makeFakeSseService(); + svc = createJobService({ redis, sseService: sse, jobDataDir: '/data/jobs' }); + }); + + it('does nothing when job is missing', async () => { + await svc.advanceJob('missing', 'onnx'); + expect(redis.set).not.toHaveBeenCalled(); + expect(redis.xadd).not.toHaveBeenCalled(); + }); + + it('does nothing when stage is unknown', async () => { + redis.store.set( + 'job:abc', + JSON.stringify({ job_id: 'abc', status: 'ONNX', stage: 'onnx' }) + ); + await svc.advanceJob('abc', 'INVALID'); + expect(redis.set).not.toHaveBeenCalled(); + }); + + it('advances onnx → bie with progress=33 and enqueues to queue:bie', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + status: 'ONNX', + stage: 'onnx', + progress: 0, + created_at: 'tA', + }) + ); + + await svc.advanceJob('j', 'onnx'); + + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('BIE'); + expect(stored.stage).toBe('bie'); + expect(stored.progress).toBe(33); // round(1/3 * 100) + + expect(redis.xadd).toHaveBeenCalledTimes(1); + expect(redis.xadd.mock.calls[0][0]).toBe(STAGE_QUEUES.bie); + }); + + it('advances bie → nef with progress=67 (matches legacy rounding)', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + status: 'BIE', + stage: 'bie', + progress: 33, + created_at: 'tA', + }) + ); + + await svc.advanceJob('j', 'bie'); + + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('NEF'); + expect(stored.stage).toBe('nef'); + expect(stored.progress).toBe(67); + expect(redis.xadd.mock.calls[0][0]).toBe(STAGE_QUEUES.nef); + }); + + it('on completing nef, sets COMPLETED + stage=null + progress=100, no enqueue', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + status: 'NEF', + stage: 'nef', + progress: 67, + created_at: 'tA', + }) + ); + + await svc.advanceJob('j', 'nef'); + + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('COMPLETED'); + expect(stored.stage).toBeNull(); + expect(stored.progress).toBe(100); + + // 沒有再 enqueue + expect(redis.xadd).not.toHaveBeenCalled(); + }); + + it('triggers SSE on each transition', async () => { + redis.store.set( + 'job:j', + JSON.stringify({ + job_id: 'j', + status: 'ONNX', + stage: 'onnx', + progress: 0, + }) + ); + await svc.advanceJob('j', 'onnx'); + expect(sse.sendSSE).toHaveBeenCalledTimes(1); + expect(sse.sendSSE.mock.calls[0][0]).toBe('j'); + }); +}); + +describe('jobService.failJob', () => { + it('does nothing when job is missing', async () => { + const redis = makeFakeRedis(); + const svc = createJobService({ redis, sseService: makeFakeSseService() }); + await svc.failJob('missing', 'onnx', 'oom'); + expect(redis.set).not.toHaveBeenCalled(); + }); + + it('sets FAILED and error object', async () => { + const redis = makeFakeRedis(); + const sse = makeFakeSseService(); + redis.store.set( + 'job:j', + JSON.stringify({ job_id: 'j', status: 'BIE', error: null }) + ); + const svc = createJobService({ redis, sseService: sse }); + + await svc.failJob('j', 'bie', 'quantization timeout'); + + const stored = JSON.parse(redis.store.get('job:j')); + expect(stored.status).toBe('FAILED'); + expect(stored.error).toEqual({ step: 'bie', reason: 'quantization timeout' }); + expect(sse.sendSSE).toHaveBeenCalledWith('j', stored); + }); +}); diff --git a/apps/task-scheduler/src/services/__tests__/sseService.test.js b/apps/task-scheduler/src/services/__tests__/sseService.test.js new file mode 100644 index 0000000..d9f502d --- /dev/null +++ b/apps/task-scheduler/src/services/__tests__/sseService.test.js @@ -0,0 +1,169 @@ +/** + * sseService 單元測試(T4)。 + * + * 著重驗證: + * 1. sendSSE 對指定 jobId 的所有 listener 廣播 + * 2. 沒有 listener 時 sendSSE 不會 throw + * 3. registerSseClient 的 headers / 立即推送 / heartbeat / cleanup + */ + +'use strict'; + +const { EventEmitter } = require('events'); +const { createSseService } = require('../sseService'); + +/** 簡易 res mock,記錄 writeHead / write / setHeader 呼叫。 */ +function makeRes() { + const res = new EventEmitter(); + res.headers = {}; + res.writeHead = jest.fn((status, headers) => { + res.statusCode = status; + if (headers) Object.assign(res.headers, headers); + return res; + }); + res.write = jest.fn(); + res.setHeader = jest.fn((k, v) => { + res.headers[k] = v; + }); + return res; +} + +/** 簡易 req mock,可觸發 'close' 事件。 */ +function makeReq() { + return new EventEmitter(); +} + +describe('sseService', () => { + /** 累積測試中註冊的 req 以便 afterEach 統一觸發 'close',避免 setInterval 殘留。 */ + const createdReqs = []; + function trackReq() { + const r = makeReq(); + createdReqs.push(r); + return r; + } + afterEach(() => { + while (createdReqs.length > 0) { + const r = createdReqs.shift(); + r.emit('close'); + } + }); + + describe('sendSSE', () => { + it('does nothing when no clients are registered for jobId', () => { + const svc = createSseService(); + // 不該 throw + expect(() => svc.sendSSE('job-x', { hello: 'world' })).not.toThrow(); + }); + + it('writes JSON SSE payload to all registered listeners', () => { + const svc = createSseService(); + const res1 = makeRes(); + const res2 = makeRes(); + const req1 = trackReq(); + const req2 = trackReq(); + svc.registerSseClient('job-1', { status: 'ONNX' }, res1, req1); + svc.registerSseClient('job-1', { status: 'ONNX' }, res2, req2); + + // 清掉 register 時的 initial write 紀錄 + res1.write.mockClear(); + res2.write.mockClear(); + + svc.sendSSE('job-1', { progress: 50 }); + + const expected = `data: ${JSON.stringify({ progress: 50 })}\n\n`; + expect(res1.write).toHaveBeenCalledWith(expected); + expect(res2.write).toHaveBeenCalledWith(expected); + }); + + it('does not broadcast to listeners of other jobs', () => { + const svc = createSseService(); + const resA = makeRes(); + const resB = makeRes(); + svc.registerSseClient('job-A', { s: 1 }, resA, trackReq()); + svc.registerSseClient('job-B', { s: 1 }, resB, trackReq()); + resA.write.mockClear(); + resB.write.mockClear(); + + svc.sendSSE('job-A', { progress: 100 }); + + expect(resA.write).toHaveBeenCalledTimes(1); + expect(resB.write).not.toHaveBeenCalled(); + }); + }); + + describe('registerSseClient', () => { + afterEach(() => { + jest.useRealTimers(); + }); + + it('writes SSE headers and initial state immediately', () => { + const svc = createSseService(); + const res = makeRes(); + const req = trackReq(); + const initial = { job_id: 'j-1', status: 'ONNX' }; + + svc.registerSseClient('j-1', initial, res, req); + + expect(res.writeHead).toHaveBeenCalledWith(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + Connection: 'keep-alive', + }); + expect(res.write).toHaveBeenCalledWith(`data: ${JSON.stringify(initial)}\n\n`); + }); + + it('removes listener when req emits close, and removes the jobId entry when last listener leaves', () => { + const svc = createSseService(); + const res = makeRes(); + const req = makeReq(); // 不 track,因為下面就會 close + + svc.registerSseClient('j-2', { x: 1 }, res, req); + expect(svc._getClientsMap().has('j-2')).toBe(true); + expect(svc._getClientsMap().get('j-2').size).toBe(1); + + req.emit('close'); + + expect(svc._getClientsMap().has('j-2')).toBe(false); + }); + + it('keeps the jobId entry when there are remaining listeners', () => { + const svc = createSseService(); + const res1 = makeRes(); + const res2 = makeRes(); + const req1 = makeReq(); + const req2 = trackReq(); // req1 close 後留下 req2 — afterEach 統一清 + + svc.registerSseClient('j-3', { x: 1 }, res1, req1); + svc.registerSseClient('j-3', { x: 1 }, res2, req2); + + req1.emit('close'); + + const map = svc._getClientsMap(); + expect(map.has('j-3')).toBe(true); + expect(map.get('j-3').size).toBe(1); + expect(map.get('j-3').has(res2)).toBe(true); + }); + + it('emits heartbeat every 15s', () => { + jest.useFakeTimers(); + const svc = createSseService(); + const res = makeRes(); + const req = makeReq(); + + svc.registerSseClient('j-4', { x: 1 }, res, req); + res.write.mockClear(); + + jest.advanceTimersByTime(15000); + expect(res.write).toHaveBeenCalledWith(': heartbeat\n\n'); + + jest.advanceTimersByTime(15000); + expect(res.write).toHaveBeenCalledTimes(2); + + // close 後 heartbeat 停止 + req.emit('close'); + res.write.mockClear(); + jest.advanceTimersByTime(60000); + expect(res.write).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/apps/task-scheduler/src/services/__tests__/statusMapper.test.js b/apps/task-scheduler/src/services/__tests__/statusMapper.test.js new file mode 100644 index 0000000..78c2aa9 --- /dev/null +++ b/apps/task-scheduler/src/services/__tests__/statusMapper.test.js @@ -0,0 +1,260 @@ +/** + * statusMapper 單元測試(T6)。 + * + * 範圍: + * - toExternalStatus 對所有合法 internal status 的映射 + * - ONNX 階段的 created vs running 邊界(stage_timings.onnx.started_at) + * - FAILED 階段的 error.stage / error.step / job.stage fallback 順序 + * - 防禦性 fallback:null / undefined / 未知 status + */ + +'use strict'; + +const { + toExternalStatus, + isInProgress, + EXTERNAL_STATUS, + EXTERNAL_STAGE, +} = require('../statusMapper'); + +describe('toExternalStatus', () => { + describe('CREATED 階段(ONNX + onnx.started_at == null)', () => { + it('returns created/onnx when stage_timings is null', () => { + const job = { status: 'ONNX', stage: 'onnx', stage_timings: null }; + expect(toExternalStatus(job)).toEqual({ + status: 'created', + stage: 'onnx', + }); + }); + + it('returns created/onnx when stage_timings.onnx is null', () => { + const job = { + status: 'ONNX', + stage: 'onnx', + stage_timings: { onnx: null, bie: null, nef: null }, + }; + expect(toExternalStatus(job)).toEqual({ + status: 'created', + stage: 'onnx', + }); + }); + + it('returns created/onnx when stage_timings.onnx.started_at is null', () => { + const job = { + status: 'ONNX', + stage: 'onnx', + stage_timings: { onnx: { started_at: null, completed_at: null } }, + }; + expect(toExternalStatus(job)).toEqual({ + status: 'created', + stage: 'onnx', + }); + }); + }); + + describe('RUNNING 階段', () => { + it('returns running/onnx when ONNX + onnx.started_at present', () => { + const job = { + status: 'ONNX', + stage: 'onnx', + stage_timings: { + onnx: { started_at: '2026-04-25T12:00:05Z', completed_at: null }, + }, + }; + expect(toExternalStatus(job)).toEqual({ + status: 'running', + stage: 'onnx', + }); + }); + + it('returns running/bie when status is BIE', () => { + const job = { status: 'BIE', stage: 'bie' }; + expect(toExternalStatus(job)).toEqual({ + status: 'running', + stage: 'bie', + }); + }); + + it('returns running/nef when status is NEF', () => { + const job = { status: 'NEF', stage: 'nef' }; + expect(toExternalStatus(job)).toEqual({ + status: 'running', + stage: 'nef', + }); + }); + + // BIE/NEF 不看 stage_timings.onnx + it('returns running/bie regardless of onnx.started_at being null', () => { + const job = { + status: 'BIE', + stage: 'bie', + stage_timings: { onnx: null }, + }; + expect(toExternalStatus(job)).toEqual({ + status: 'running', + stage: 'bie', + }); + }); + }); + + describe('COMPLETED 階段', () => { + it('returns completed/null', () => { + const job = { status: 'COMPLETED', stage: null }; + expect(toExternalStatus(job)).toEqual({ + status: 'completed', + stage: null, + }); + }); + + it('returns completed/null even if stage somehow has value', () => { + // 即使 record 異常仍堅持回 stage=null(對外 contract) + const job = { status: 'COMPLETED', stage: 'nef' }; + expect(toExternalStatus(job)).toEqual({ + status: 'completed', + stage: null, + }); + }); + }); + + describe('FAILED 階段', () => { + it('uses error.stage when present', () => { + const job = { + status: 'FAILED', + stage: 'bie', + error: { stage: 'bie', code: 'quantization_failed' }, + }; + expect(toExternalStatus(job)).toEqual({ + status: 'failed', + stage: 'bie', + }); + }); + + it('falls back to error.step when error.stage missing', () => { + const job = { + status: 'FAILED', + stage: 'onnx', + // legacy advanceJob 用 error.step + error: { step: 'onnx', reason: 'oom' }, + }; + expect(toExternalStatus(job)).toEqual({ + status: 'failed', + stage: 'onnx', + }); + }); + + it('falls back to job.stage when error has neither stage nor step', () => { + const job = { + status: 'FAILED', + stage: 'nef', + error: { reason: 'unknown' }, + }; + expect(toExternalStatus(job)).toEqual({ + status: 'failed', + stage: 'nef', + }); + }); + + it('returns null stage when no stage info at all', () => { + const job = { status: 'FAILED', error: null }; + expect(toExternalStatus(job)).toEqual({ + status: 'failed', + stage: null, + }); + }); + }); + + describe('防禦性 fallback', () => { + it('returns created/null for null input', () => { + expect(toExternalStatus(null)).toEqual({ + status: 'created', + stage: null, + }); + }); + + it('returns created/null for undefined input', () => { + expect(toExternalStatus(undefined)).toEqual({ + status: 'created', + stage: null, + }); + }); + + it('returns created/null for non-object input', () => { + expect(toExternalStatus('string')).toEqual({ + status: 'created', + stage: null, + }); + expect(toExternalStatus(42)).toEqual({ + status: 'created', + stage: null, + }); + }); + + it('returns created/null for unknown internal status', () => { + const job = { status: 'WEIRD_UNKNOWN_STATE', stage: 'foo' }; + expect(toExternalStatus(job)).toEqual({ + status: 'created', + stage: null, + }); + }); + + it('returns created/null when status is missing', () => { + const job = { stage: 'onnx' }; + expect(toExternalStatus(job)).toEqual({ + status: 'created', + stage: null, + }); + }); + }); +}); + +describe('isInProgress', () => { + it('returns true for created', () => { + expect(isInProgress('created')).toBe(true); + }); + + it('returns true for running', () => { + expect(isInProgress('running')).toBe(true); + }); + + it('returns false for completed', () => { + expect(isInProgress('completed')).toBe(false); + }); + + it('returns false for failed', () => { + expect(isInProgress('failed')).toBe(false); + }); + + it('returns false for unknown status', () => { + expect(isInProgress('weird')).toBe(false); + expect(isInProgress('')).toBe(false); + expect(isInProgress(null)).toBe(false); + expect(isInProgress(undefined)).toBe(false); + }); +}); + +describe('exports', () => { + it('exports EXTERNAL_STATUS constants', () => { + expect(EXTERNAL_STATUS).toEqual({ + CREATED: 'created', + RUNNING: 'running', + COMPLETED: 'completed', + FAILED: 'failed', + }); + }); + + it('exports EXTERNAL_STAGE constants', () => { + expect(EXTERNAL_STAGE).toEqual({ + ONNX: 'onnx', + BIE: 'bie', + NEF: 'nef', + }); + }); + + it('EXTERNAL_STATUS is frozen', () => { + expect(Object.isFrozen(EXTERNAL_STATUS)).toBe(true); + }); + + it('EXTERNAL_STAGE is frozen', () => { + expect(Object.isFrozen(EXTERNAL_STAGE)).toBe(true); + }); +}); diff --git a/apps/task-scheduler/src/services/doneListener.js b/apps/task-scheduler/src/services/doneListener.js new file mode 100644 index 0000000..cafa3aa --- /dev/null +++ b/apps/task-scheduler/src/services/doneListener.js @@ -0,0 +1,140 @@ +/** + * Done queue listener 與 worker consumer group bootstrap(T4 重構自 server.js + * L225-296)。 + * + * 職責: + * 1. `ensureWorkerGroups(redis)` — 啟動時為 onnx/bie/nef worker queue 各建立 + * consumer group(若已存在則跳過) + * 2. `listenDoneQueue(redisSub, jobService, opts)` — 無限迴圈從 `queue:done` + * 讀 worker 完成事件,呼叫 jobService.advanceJob / failJob + * + * 行為對齊(重構不改行為): + * - listenDoneQueue 採 BLOCK 5000 等候(server.js L245) + * - consumerName 仍為 `scheduler-${process.pid}`(L235) + * - 錯誤處理:connection lost → 3s sleep;其他 → 1s sleep(L273-280) + * - ACK 放在 try 內部,與 legacy 一致(server.js L267):若 advanceJob/failJob + * throw,該 message 不會被 ACK,會成為 pending → 下次 xreadgroup 重投遞。 + * 這是既有 at-least-once 語意,T4 刻意保留不改(未來任務若要改為 + * in-finally-ACK 需另行評估是否影響 advanceJob 冪等性)。 + * + * 設計取捨: + * - 採 factory `startListenDone(deps)` 回傳 `{ start, stop }`,使測試能控制 + * 生命週期(雖然本任務不寫 listenDoneQueue 的單元測試 — 它是 long-running + * loop,難測;保留 hook 以利未來測試) + * - 預設 `xreadgroupTimeoutMs` 來自 server.js 的 5000;可覆寫供未來縮短測試 + * 時間。 + */ + +'use strict'; + +const { ensureConsumerGroup } = require('../redis'); +const { DONE_QUEUE, DONE_GROUP, STAGE_QUEUES } = require('./jobService'); + +/** + * 為所有 worker stream 確保 consumer group 存在。 + * 對齊 server.js L287-295。 + * + * @param {import('ioredis').Redis} redis + */ +async function ensureWorkerGroups(redis) { + const groups = { + [STAGE_QUEUES.onnx]: 'onnx-workers', + [STAGE_QUEUES.bie]: 'bie-workers', + [STAGE_QUEUES.nef]: 'nef-workers', + }; + for (const [queue, group] of Object.entries(groups)) { + await ensureConsumerGroup(redis, queue, group); + } +} + +/** + * 啟動 done queue listener(背景 long-running 迴圈)。 + * + * 行為對齊 server.js L234-281: + * 1. 啟動前先確保 done queue 的 consumer group 存在 + * 2. 無限迴圈 xreadgroup BLOCK 5000 + * 3. 對每個訊息:parse data → 依 result 推進或失敗 → ACK + * 4. 任何錯誤都 catch 後 sleep 重試(不 throw 出 loop) + * + * @param {object} deps + * @param {import('ioredis').Redis} deps.redis - 主 client(用於 ensureConsumerGroup) + * @param {import('ioredis').Redis} deps.redisSub - blocking 用 client + * @param {ReturnType} deps.jobService + * @param {object} [opts] + * @param {number} [opts.xreadBlockMs=5000] + * @param {string} [opts.consumerName] - 預設為 `scheduler-${process.pid}` + * @returns {{ start: () => Promise }} + */ +function startListenDone(deps, opts) { + if (!deps || !deps.redis || !deps.redisSub) { + throw new Error('[doneListener] deps.redis and deps.redisSub are required'); + } + if (!deps.jobService) { + throw new Error('[doneListener] deps.jobService is required'); + } + const { redis, redisSub, jobService } = deps; + const xreadBlockMs = (opts && opts.xreadBlockMs) || 5000; + const consumerName = (opts && opts.consumerName) || `scheduler-${process.pid}`; + + let stopped = false; + + async function start() { + await ensureConsumerGroup(redis, DONE_QUEUE, DONE_GROUP); + // eslint-disable-next-line no-console + console.log(`[Scheduler] Listening on ${DONE_QUEUE} as ${consumerName}`); + + while (!stopped) { + try { + const results = await redisSub.xreadgroup( + 'GROUP', DONE_GROUP, consumerName, + 'COUNT', 10, + 'BLOCK', xreadBlockMs, + 'STREAMS', DONE_QUEUE, '>' + ); + + if (!results) continue; + + for (const [, messages] of results) { + for (const [messageId, fields] of messages) { + try { + // fields = ['data', '{...}'],對齊 server.js L254 + const data = JSON.parse(fields[1]); + const { job_id, step, result, reason } = data; + + // eslint-disable-next-line no-console + console.log(`[Scheduler] Done event: job=${job_id} step=${step} result=${result}`); + + if (result === 'ok') { + await jobService.advanceJob(job_id, step); + } else { + await jobService.failJob(job_id, step, reason || 'Unknown error'); + } + + await redisSub.xack(DONE_QUEUE, DONE_GROUP, messageId); + } catch (err) { + // eslint-disable-next-line no-console + console.error('[Scheduler] Error processing done event:', err); + } + } + } + } catch (err) { + if (err.message && err.message.includes('Connection is closed')) { + // eslint-disable-next-line no-console + console.error('[Scheduler] Redis connection lost, retrying in 3s...'); + await new Promise((r) => setTimeout(r, 3000)); + } else { + // eslint-disable-next-line no-console + console.error('[Scheduler] Done listener error:', err); + await new Promise((r) => setTimeout(r, 1000)); + } + } + } + } + + return { start }; +} + +module.exports = { + ensureWorkerGroups, + startListenDone, +}; diff --git a/apps/task-scheduler/src/services/healthService.js b/apps/task-scheduler/src/services/healthService.js new file mode 100644 index 0000000..ea48c15 --- /dev/null +++ b/apps/task-scheduler/src/services/healthService.js @@ -0,0 +1,480 @@ +/** + * Health service — /health 升級(T8)。 + * + * 目的:擴充原本只 ping Redis 的 /health,加入 Member Center(JWKS 端點)與 + * File Access Agent 兩個外部依賴的可達性檢查;但 /health 本身仍須立即回應, + * 不可被任一依賴 hang 住。為此採「背景 polling 30s 一次寫入 cache,/health + * 直接讀 cache」的設計(對齊 TDD §1.4.1 / §2.12 / tasks-phase1.md §2 T8)。 + * + * 對外介面: + * const health = createHealthService({ redis, config }); + * health.start(); // 啟動背景 polling + * const snapshot = health.getHealth(); // 立即回(永遠 < 5ms) + * health.stop(); // graceful shutdown 用 + * + * snapshot 形狀(對齊 TDD §1.4.1): + * { + * service: 'kneron-converter-api', + * status: 'healthy' | 'degraded' | 'unhealthy', + * version: '1.0.0', + * timestamp: '...', + * dependencies: { + * redis: 'connected' | 'disconnected', + * member_center: 'reachable' | 'unreachable' | 'pending', + * file_access_agent: 'reachable' | 'unreachable' | 'pending', + * }, + * } + * + * 整體狀態判定(順序很重要:critical 先): + * - Redis disconnected → 'unhealthy'(HTTP 503) + * - 所有依賴 reachable → 'healthy'(HTTP 200) + * - 任一非關鍵依賴 unreachable/pending → 'degraded'(HTTP 200) + * + * 設計取捨: + * + * 1. **Redis 不獨立 ping**:用 ioredis 內建 `status` property('ready' / 'connecting' / 'close' + * / 'reconnecting' / 'end' / 'wait')即可判斷;既不消耗連線、也不會被慢 ping + * 阻塞。/health 直接讀 status,不必走 cache(讀 property 是 sync 的)。 + * + * 2. **MC / FAA 走背景 polling cache**:每 30s 一次併行打兩個 endpoint,timeout 3s; + * /health 從不主動觸發 fetch,只讀 cache。第一次 polling 完成前狀態為 'pending'。 + * 'pending' 不致命:整體 status 落到 'degraded',仍回 200,不影響部署初期的 readiness。 + * + * 3. **MC 用 JWKS endpoint 探測**:JWKS URL 是 Member Center 必有的 public endpoint, + * 既不需要憑證、也不會洩露任何敏感資訊;另外 jose 已在內部 cache JWKS,本探測 + * 完全獨立(用獨立 fetch,不靠 jose 的 cache,避免 polling 反而干擾 jose cache)。 + * + * 4. **FAA 探測策略**:先嘗試 `${baseUrl}/health`(多數服務的慣例);如果回 404 + * (表示沒實作此端點)也視為 reachable(至少網路層通),只有連線失敗 / 5xx / + * timeout 才算 unreachable。這比強制 FAA 必須實作 /health 來得寬容。 + * + * 5. **不洩漏內部資訊**:log / error message 都不含 endpoint URL、host、port; + * 對應 T7 修過的「minio_head_failed 洩漏 URL」教訓。snapshot 只回對外抽象狀態 + * (reachable / unreachable),不揭露錯誤原因細節。 + * + * 6. **graceful shutdown**:start() 是冪等的;stop() 清掉 setInterval 與 in-flight + * fetch(用 AbortController),讓 process 能乾淨結束。 + * + * 7. **冪等 start**:多次呼叫 start() 不會疊 setInterval;若已 running 直接 return。 + * + * 8. **依賴注入**:所有外部依賴(fetch / setInterval / setTimeout / Date.now) + * 都可以從 deps 注入,方便單元測試 fake 時間 / mock 網路。 + */ + +'use strict'; + +/* eslint-disable no-console */ + +// --------------------------------------------------------------------------- +// 常數 +// --------------------------------------------------------------------------- + +/** 對外服務識別(對齊 TDD §1.4.1 範例)。 */ +const SERVICE_NAME = 'kneron-converter-api'; + +/** API 版本(對齊 TDD §1.4.1 範例)。 */ +const SERVICE_VERSION = '1.0.0'; + +/** 背景 polling 週期(30s,對齊 TDD §1.4.1 / §2.12 / tasks-phase1.md §2 T8)。 */ +const DEFAULT_POLL_INTERVAL_MS = 30 * 1000; + +/** 單一依賴探測的 timeout(3s,避免 polling 自己被 hang 住)。 */ +const DEFAULT_PROBE_TIMEOUT_MS = 3 * 1000; + +/** 依賴狀態 enum。 */ +const DEP_STATE = Object.freeze({ + CONNECTED: 'connected', + DISCONNECTED: 'disconnected', + REACHABLE: 'reachable', + UNREACHABLE: 'unreachable', + PENDING: 'pending', +}); + +/** 整體狀態 enum。 */ +const OVERALL_STATE = Object.freeze({ + HEALTHY: 'healthy', + DEGRADED: 'degraded', + UNHEALTHY: 'unhealthy', +}); + +// --------------------------------------------------------------------------- +// 內部 helpers +// --------------------------------------------------------------------------- + +/** + * 結構化 log(不洩漏 endpoint)。 + * + * @param {'INFO'|'WARN'|'ERROR'} level + * @param {string} action + * @param {object} [fields] + */ +function logEvent(level, action, fields = {}) { + const line = JSON.stringify({ + level, + service: 'health-service', + action, + timestamp: new Date().toISOString(), + ...fields, + }); + if (level === 'ERROR') { + console.error(line); + } else if (level === 'WARN') { + console.warn(line); + } else { + console.log(line); + } +} + +/** + * 把 ioredis 的 status property 轉成對外狀態字串。 + * + * ioredis 文件:'wait' | 'reconnecting' | 'connecting' | 'connect' | 'ready' | 'close' | 'end' + * 只有 'ready' 代表實際可用。 + * + * @param {{ status?: string }} redis + */ +function classifyRedisStatus(redis) { + if (!redis || typeof redis !== 'object') return DEP_STATE.DISCONNECTED; + // 沒有 status 就保守視為 disconnected(也涵蓋了測試 mock 的情境) + if (typeof redis.status !== 'string') return DEP_STATE.DISCONNECTED; + return redis.status === 'ready' ? DEP_STATE.CONNECTED : DEP_STATE.DISCONNECTED; +} + +/** + * 用 fetch + AbortController 做一次 GET 探測。 + * + * 回傳語意: + * - 任何 2xx / 3xx / 4xx 狀態 → 視為 reachable(網路層通即可) + * - 5xx / network error / timeout / abort → unreachable + * + * 為什麼 4xx 也算 reachable: + * - 例如 FAA 沒實作 `/health`,會回 404;這代表服務本身活著、只是路由不存在 + * - 401/403 同理:服務在運作、只是拒絕匿名請求 + * + * @param {string} url + * @param {{ fetchImpl: Function, setTimeoutFn: Function, clearTimeoutFn: Function, timeoutMs: number, signal?: AbortSignal }} deps + * @returns {Promise<'reachable' | 'unreachable'>} + */ +async function probeHttp(url, deps) { + const controller = new AbortController(); + // 若外部給了 master signal(用於 stop()),跟著一起 abort + const onMasterAbort = () => controller.abort(); + if (deps.signal) { + if (deps.signal.aborted) return DEP_STATE.UNREACHABLE; + deps.signal.addEventListener('abort', onMasterAbort, { once: true }); + } + // ★ timeout 用「真實」setTimeout(非注入版),避免被測試 fake-timer 立即觸發 + // abort(理由與 fileAccessAgent/client.js attemptPut 相同) + const timer = globalThis.setTimeout(() => controller.abort(), deps.timeoutMs); + try { + const res = await deps.fetchImpl(url, { + method: 'GET', + signal: controller.signal, + }); + if (res.status >= 500) { + return DEP_STATE.UNREACHABLE; + } + return DEP_STATE.REACHABLE; + } catch (_err) { + // 網路錯 / abort / DNS / 等等都視為 unreachable;不把 err.message 帶出去 + return DEP_STATE.UNREACHABLE; + } finally { + try { + globalThis.clearTimeout(timer); + } catch (_) { + /* noop */ + } + if (deps.signal) { + try { + deps.signal.removeEventListener('abort', onMasterAbort); + } catch (_) { + /* noop */ + } + } + } +} + +/** + * 依 cached 依賴狀態決定整體 status。 + * + * @param {{ redis: string, memberCenter: string, fileAccessAgent: string }} deps + * @returns {'healthy' | 'degraded' | 'unhealthy'} + */ +function deriveOverallStatus(deps) { + // Redis 是 critical:disconnected → unhealthy(503) + if (deps.redis !== DEP_STATE.CONNECTED) { + return OVERALL_STATE.UNHEALTHY; + } + // MC / FAA:unreachable 或 pending 都讓服務降級為 degraded(仍 200) + const mcOk = deps.memberCenter === DEP_STATE.REACHABLE; + const faaOk = deps.fileAccessAgent === DEP_STATE.REACHABLE; + if (mcOk && faaOk) return OVERALL_STATE.HEALTHY; + return OVERALL_STATE.DEGRADED; +} + +// --------------------------------------------------------------------------- +// Health service factory +// --------------------------------------------------------------------------- + +/** + * @typedef {Object} HealthServiceDeps + * @property {{ status?: string }} redis - ioredis client(讀 status property) + * @property {{ + * memberCenter: { jwksUrl: string }, + * fileAccessAgent: { baseUrl: string }, + * }} [config] + * @property {string} [memberCenterProbeUrl] - 覆寫 MC 探測 URL(測試用) + * @property {string} [fileAccessAgentProbeUrl] - 覆寫 FAA 探測 URL(測試用) + * @property {Function} [fetch] - 注入 fetch(測試 mock) + * @property {Function} [setIntervalFn] - 注入 setInterval(測試 fake timer) + * @property {Function} [clearIntervalFn] - 注入 clearInterval + * @property {Function} [setTimeoutFn] - 注入 setTimeout + * @property {Function} [clearTimeoutFn] - 注入 clearTimeout + * @property {Function} [now] - 注入 Date.now + * @property {number} [pollIntervalMs] - polling 週期(覆寫預設 30s) + * @property {number} [probeTimeoutMs] - 單一探測 timeout(覆寫預設 3s) + * + * @param {HealthServiceDeps} deps + */ +function createHealthService(deps) { + if (!deps || typeof deps !== 'object') { + throw new Error('[healthService] deps is required'); + } + if (!deps.redis) { + throw new Error('[healthService] deps.redis is required'); + } + + const fetchImpl = deps.fetch || globalThis.fetch; + const setIntervalFn = deps.setIntervalFn || globalThis.setInterval; + const clearIntervalFn = deps.clearIntervalFn || globalThis.clearInterval; + const setTimeoutFn = deps.setTimeoutFn || globalThis.setTimeout; + const clearTimeoutFn = deps.clearTimeoutFn || globalThis.clearTimeout; + const nowFn = typeof deps.now === 'function' ? deps.now : Date.now; + const pollIntervalMs = + Number.isInteger(deps.pollIntervalMs) && deps.pollIntervalMs > 0 + ? deps.pollIntervalMs + : DEFAULT_POLL_INTERVAL_MS; + const probeTimeoutMs = + Number.isInteger(deps.probeTimeoutMs) && deps.probeTimeoutMs > 0 + ? deps.probeTimeoutMs + : DEFAULT_PROBE_TIMEOUT_MS; + + // 計算探測 URL(lazy / 顯式注入優先;config 次之) + function resolveMemberCenterUrl() { + if (typeof deps.memberCenterProbeUrl === 'string' && deps.memberCenterProbeUrl !== '') { + return deps.memberCenterProbeUrl; + } + if (deps.config && deps.config.memberCenter && deps.config.memberCenter.jwksUrl) { + return deps.config.memberCenter.jwksUrl; + } + return null; + } + function resolveFaaUrl() { + if (typeof deps.fileAccessAgentProbeUrl === 'string' && deps.fileAccessAgentProbeUrl !== '') { + return deps.fileAccessAgentProbeUrl; + } + if (deps.config && deps.config.fileAccessAgent && deps.config.fileAccessAgent.baseUrl) { + const trimmed = String(deps.config.fileAccessAgent.baseUrl).replace(/\/+$/, ''); + return `${trimmed}/health`; + } + return null; + } + + // ----- 內部狀態(cache) ----- + // 第一次 polling 完成前,外部依賴標 'pending';Redis 因為是 sync property,每次 + // 讀都即時計算(不放 cache)。 + const cache = { + memberCenter: DEP_STATE.PENDING, + fileAccessAgent: DEP_STATE.PENDING, + lastPollAt: null, // ISO 字串,僅供 log / debug + }; + + let intervalHandle = null; + let masterAbort = null; // AbortController,讓 stop() 能取消 in-flight fetch + let inFlight = false; // 避免 polling 重疊(slow probe 撞到下次 tick) + let started = false; + + /** + * 進行一次依賴探測(兩個依賴併行)。失敗單一依賴不影響另一個。 + */ + async function runOnce() { + if (inFlight) { + // 上次還沒結束就跳過這次(避免 slow probe 堆疊) + return; + } + inFlight = true; + + // 共用一個 master abort signal,stop() 一拉就同時取消兩個 fetch + if (!masterAbort) masterAbort = new AbortController(); + const signal = masterAbort.signal; + + const probeDeps = { + fetchImpl, + setTimeoutFn, + clearTimeoutFn, + timeoutMs: probeTimeoutMs, + signal, + }; + + const mcUrl = resolveMemberCenterUrl(); + const faaUrl = resolveFaaUrl(); + + // 沒有 URL → 一律標 unreachable(dev 沒設 config 時的合理 fallback) + const mcPromise = mcUrl + ? probeHttp(mcUrl, probeDeps).catch(() => DEP_STATE.UNREACHABLE) + : Promise.resolve(DEP_STATE.UNREACHABLE); + const faaPromise = faaUrl + ? probeHttp(faaUrl, probeDeps).catch(() => DEP_STATE.UNREACHABLE) + : Promise.resolve(DEP_STATE.UNREACHABLE); + + try { + const [mcResult, faaResult] = await Promise.all([mcPromise, faaPromise]); + // 若 stop() 在 fetch 期間被呼叫,abort signal 已觸發 → 還是寫進 cache, + // 但寫成 unreachable 是預期的(caller 也已停止 polling,後續沒人會看到) + cache.memberCenter = mcResult; + cache.fileAccessAgent = faaResult; + cache.lastPollAt = new Date(nowFn()).toISOString(); + logEvent('INFO', 'health.poll_complete', { + member_center: mcResult, + file_access_agent: faaResult, + }); + } catch (err) { + // probeHttp 已在內部 catch;這層只會在 Promise.all 自己出錯時走到 + logEvent('ERROR', 'health.poll_unexpected_error', { + error_name: err && err.name ? err.name : 'unknown', + }); + } finally { + inFlight = false; + } + } + + /** + * 啟動背景 polling。冪等:重複呼叫無效。 + */ + function start() { + if (started) return; + started = true; + masterAbort = new AbortController(); + + // 先觸發一次(不等結果),讓 cache 在第一個 polling 週期內就盡早填好 + runOnce().catch((err) => { + logEvent('ERROR', 'health.initial_poll_error', { + error_name: err && err.name ? err.name : 'unknown', + }); + }); + + intervalHandle = setIntervalFn(() => { + runOnce().catch((err) => { + logEvent('ERROR', 'health.interval_poll_error', { + error_name: err && err.name ? err.name : 'unknown', + }); + }); + }, pollIntervalMs); + + // 如果 setInterval 回的是 Node Timer object,呼叫 unref 讓背景 polling 不阻塞 process exit + if (intervalHandle && typeof intervalHandle.unref === 'function') { + try { + intervalHandle.unref(); + } catch (_) { + /* noop */ + } + } + + logEvent('INFO', 'health.start', { poll_interval_ms: pollIntervalMs }); + } + + /** + * 停止 polling,並中斷任何 in-flight fetch。 + */ + function stop() { + if (!started) return; + started = false; + + if (intervalHandle != null) { + try { + clearIntervalFn(intervalHandle); + } catch (_) { + /* noop */ + } + intervalHandle = null; + } + if (masterAbort) { + try { + masterAbort.abort(); + } catch (_) { + /* noop */ + } + masterAbort = null; + } + + logEvent('INFO', 'health.stop'); + } + + /** + * 立即回 health snapshot(永遠 < 5ms,不阻塞)。 + * + * @returns {{ + * service: string, + * status: 'healthy' | 'degraded' | 'unhealthy', + * version: string, + * timestamp: string, + * dependencies: { redis: string, member_center: string, file_access_agent: string }, + * }} + */ + function getHealth() { + const redisState = classifyRedisStatus(deps.redis); + const dependencies = { + redis: redisState, + member_center: cache.memberCenter, + file_access_agent: cache.fileAccessAgent, + }; + const overall = deriveOverallStatus({ + redis: redisState, + memberCenter: cache.memberCenter, + fileAccessAgent: cache.fileAccessAgent, + }); + return { + service: SERVICE_NAME, + status: overall, + version: SERVICE_VERSION, + timestamp: new Date(nowFn()).toISOString(), + dependencies, + }; + } + + /** + * 是否為「不健康」狀態(用來決定 HTTP status code 是否回 503)。 + * + * @returns {boolean} + */ + function isUnhealthy() { + const snapshot = getHealth(); + return snapshot.status === OVERALL_STATE.UNHEALTHY; + } + + return { + start, + stop, + getHealth, + isUnhealthy, + // 測試用:強制執行一次 poll;正式環境不應呼叫 + _runOnce: runOnce, + }; +} + +module.exports = { + createHealthService, + // 常數對外暴露便於測試 / 其他模組引用 + SERVICE_NAME, + SERVICE_VERSION, + DEFAULT_POLL_INTERVAL_MS, + DEFAULT_PROBE_TIMEOUT_MS, + DEP_STATE, + OVERALL_STATE, + // 測試用 internal helpers + _internals: { + classifyRedisStatus, + deriveOverallStatus, + probeHttp, + }, +}; diff --git a/apps/task-scheduler/src/services/jobService.js b/apps/task-scheduler/src/services/jobService.js new file mode 100644 index 0000000..2c53b41 --- /dev/null +++ b/apps/task-scheduler/src/services/jobService.js @@ -0,0 +1,774 @@ +/** + * Job CRUD + 階段推進服務(T4 重構自 server.js L84-91、L145-220;T5 擴充)。 + * + * 職責: + * 1. STAGES / STAGE_QUEUES / DONE_QUEUE / DONE_GROUP 等常數 + * 2. `getJob(jobId)` / `setJob(jobId, job)` / `enqueueStage(stage, job)` + * 3. `advanceJob(jobId, completedStage)` / `failJob(jobId, step, reason)` + * 4. T5:`writeInputToMinIO(jobId, modelFile, refImages)`、 + * `claimActiveAndCreate({ userId, jobId, jobRecord, ttlSeconds })`、 + * `cleanupInputObjects(jobId, objectKeys)`、`getActiveJob(userId)` + * + * 行為對齊(重構不改行為): + * - setJob 會自動更新 `updated_at` 並透過 sseService 廣播(server.js L151-156) + * - enqueueStage 的 input_dir 永遠用 `path.join(JOB_DATA_DIR, job.job_id)` + * (server.js L166)—— 注意:這個路徑是給 Worker 看的,**Worker 仍依此格式 + * 讀檔**,所以即使 STORAGE_BACKEND=minio 也保留同樣的字串(Worker 會從 + * MinIO 讀,input_dir 對它而言只是 metadata) + * - advanceJob 的進度計算:`Math.round(((nextIndex) / STAGES.length) * 100)` + * 完全不變(server.js L196) + * - 完成時 status='COMPLETED'、stage=null、progress=100(server.js L201-204) + * - 失敗時 status='FAILED'、加 error 物件(server.js L216-218) + * + * 設計取捨: + * - 採 factory function `createJobService(deps)`,把 redis 與 sseService 注入進來, + * 讓單元測試容易 mock。 + * - jobService 不直接 require redis.js / sseService.js,避免測試時 import 觸發 + * 實體連線。 + * - T5 的 minio 為 optional dep(既有 legacy 路徑沒 minio dep): + * * 若 deps.minio 存在 → 暴露 `writeInputToMinIO` / `cleanupInputObjects` + * * 否則該介面 throw —— 呼叫端應在 mount 階段就確認 storageBackend === 'minio' + */ + +'use strict'; + +const path = require('path'); +const crypto = require('crypto'); + +const { claimActiveJob, releaseActiveJob } = require('../redis/luaScripts'); +const { toExternalStatus, isInProgress } = require('./statusMapper'); + +// Pipeline: fixed stage order,對齊 server.js L84-91 +const STAGES = ['onnx', 'bie', 'nef']; +const STAGE_QUEUES = { + onnx: 'queue:onnx', + bie: 'queue:bie', + nef: 'queue:nef', +}; +const DONE_QUEUE = 'queue:done'; +const DONE_GROUP = 'scheduler'; + +/** + * 建立 jobService instance。 + * + * @param {object} deps + * @param {import('ioredis').Redis} deps.redis - 主 Redis client + * @param {{ sendSSE: (jobId: string, data: unknown) => void }} deps.sseService + * @param {string} [deps.jobDataDir] - 覆寫 JOB_DATA_DIR(測試用) + * @returns {object} jobService instance(介面詳見回傳物件) + */ +function createJobService(deps) { + if (!deps || !deps.redis) { + throw new Error('[jobService] deps.redis is required'); + } + if (!deps.sseService || typeof deps.sseService.sendSSE !== 'function') { + throw new Error('[jobService] deps.sseService.sendSSE is required'); + } + const { redis, sseService } = deps; + const minio = deps.minio || null; // T5:可選;缺則只能用 legacy CRUD 介面 + const jobDataDir = deps.jobDataDir || process.env.JOB_DATA_DIR || '/data/jobs'; + + /** + * 取 job record。對齊 server.js L145-149。 + */ + async function getJob(jobId) { + const raw = await redis.get(`job:${jobId}`); + if (!raw) return null; + return JSON.parse(raw); + } + + /** + * 寫 job record。會自動更新 updated_at 並透過 SSE 廣播。 + * 對齊 server.js L151-156。 + */ + async function setJob(jobId, job) { + job.updated_at = new Date().toISOString(); + await redis.set(`job:${jobId}`, JSON.stringify(job)); + sseService.sendSSE(jobId, job); + } + + /** + * 把任務送進對應 stage 的 Redis Stream。 + * 對齊 server.js L161-171。 + */ + async function enqueueStage(stage, job) { + const queue = STAGE_QUEUES[stage]; + if (!queue) { + throw new Error(`[jobService] Unknown stage: ${stage}`); + } + const message = { + job_id: job.job_id, + created_at: job.created_at, + input_dir: path.join(jobDataDir, job.job_id), + parameters: job.parameters || {}, + }; + await redis.xadd(queue, '*', 'data', JSON.stringify(message)); + // eslint-disable-next-line no-console + console.log(`[Scheduler] Enqueued job ${job.job_id} to ${queue}`); + } + + /** + * 確保 job.stage_timings 結構存在;若缺漏則初始化為三個 stage 的空殼。 + * + * T9 引入:legacy job record 沒有 stage_timings 欄位(server.js 既有), + * advanceJob / failJob 寫 timings 時需要先 ensure 結構,避免 undefined 取值。 + * + * @param {object} job + */ + function ensureStageTimings(job) { + if (!job.stage_timings || typeof job.stage_timings !== 'object') { + job.stage_timings = { onnx: null, bie: null, nef: null }; + } + for (const s of STAGES) { + if (!job.stage_timings[s] || typeof job.stage_timings[s] !== 'object') { + job.stage_timings[s] = { started_at: null, completed_at: null }; + } + } + } + + /** + * 寫入 stage_timings.{stage}.started_at(不寫 Redis;mutate job 物件)。 + * + * Phase 1 語意:started_at 實為 enqueued_at(Scheduler 把 job 推到下一階段 + * queue 的時間,與 worker 真正開工有 queue 等待時間的差距); + * 詳見 §4.1 #3 的 trade-off 說明,T6 OpenAPI 會註明此差距。 + * + * @param {object} job + * @param {string} stage + */ + function recordStageStart(job, stage) { + ensureStageTimings(job); + job.stage_timings[stage].started_at = new Date().toISOString(); + } + + /** + * 寫入 stage_timings.{stage}.completed_at(不寫 Redis;mutate job 物件)。 + * + * 用於: + * - advanceJob:worker 上報 done event,標記該 stage 完成 + * - failJob:worker 上報 failure,標記該 stage 結束(即便結果為失敗,仍視為「該 + * stage 已不再進行」,這樣 stage_timings 才是完整可分析的紀錄) + * + * @param {object} job + * @param {string} stage + */ + function recordStageComplete(job, stage) { + ensureStageTimings(job); + job.stage_timings[stage].completed_at = new Date().toISOString(); + } + + /** + * Job 終態(COMPLETED / FAILED)時釋放 user:{user_id}:active_job(T9)。 + * + * 為什麼用 fire-and-forget(catch 後只 log): + * - 終態邏輯本身已完成(job record 已更新),release 失敗最差情境是 + * 「user 等到 7d TTL 才能建新 job」—— 這是當前未實作前的 default 行為, + * 沒有劣化 + * - advanceJob / failJob 由 done listener 呼叫;若 release throw 會導致 + * done event 不 ACK → 重投遞 → advanceJob 重複執行(行為冪等但浪費資源) + * - 對 ops 來說,release 失敗的 log 已足夠告警;不需阻塞 advance + * + * 為什麼要 guard user_id: + * legacy /jobs 建的 job 沒有 user_id(或 user_id 為 null / 'web-anonymous'), + * 它從來沒寫過 user:{user_id}:active_job,硬呼叫 release Lua 會白做工 + + * 產生不必要的 NOOP log。 + * + * 為什麼還呼叫 releaseActiveJobByUser(即便 user_id 為非空字串): + * release_active_job.lua 的 atomic guard 會自己檢查「active_job 是否真的 + * 等於 jobId」,若不等於就 NOOP;這樣即使 user_id 是非預期值(例如錯誤 + * 寫入),也不會誤刪別人的 active_job。 + * + * @param {object} job + * @returns {Promise} + */ + async function releaseActiveJobOnTerminal(job) { + const userId = job && typeof job.user_id === 'string' ? job.user_id : ''; + const jobId = job && typeof job.job_id === 'string' ? job.job_id : ''; + if (!userId || !jobId) { + // legacy / 缺欄位 → 略過(沒有 active_job key 對應) + return; + } + try { + // 注意:releaseActiveJobByUser 定義在下方(T5 既有 closure 內 function + // declaration,會被 hoist)。本 helper 是 T9 新增,刻意不挪動 T5 既有 + // function 順序避免 diff 干擾 reviewer。 + const result = await releaseActiveJobByUser(userId, jobId); + // eslint-disable-next-line no-console + console.log( + JSON.stringify({ + level: 'INFO', + service: 'task-scheduler', + action: 'jobs.terminal.release_active_job', + job_id: jobId, + user_id: userId, + released: result.released, + timestamp: new Date().toISOString(), + }) + ); + } catch (err) { + // 不阻塞 advance / fail;只 log WARN + // eslint-disable-next-line no-console + console.warn( + JSON.stringify({ + level: 'WARN', + service: 'task-scheduler', + action: 'jobs.terminal.release_active_job_failed', + job_id: jobId, + user_id: userId, + error: err && err.message ? err.message : 'unknown', + timestamp: new Date().toISOString(), + }) + ); + } + } + + /** + * 推進 job 到下一階段或標記為完成。 + * 對齊 server.js L176-207;T9 加入 stage_timings 寫入 + 終態 release。 + * + * T9 行為改動: + * 1. 寫 stage_timings.{completedStage}.completed_at = now + * 2. 推進到下一階段時:寫 stage_timings.{nextStage}.started_at = now(enqueued_at) + * 3. 達到 COMPLETED 時:呼叫 release_active_job(若有 user_id) + * + * 為什麼 stage_timings 改動跟既有 status / stage / progress 寫入合併在同一次 + * setJob:原子性 — Redis 看到的永遠是「狀態與 timings 同步」的 record。 + */ + async function advanceJob(jobId, completedStage) { + const job = await getJob(jobId); + if (!job) { + // eslint-disable-next-line no-console + console.warn(`[Scheduler] Job ${jobId} not found, ignoring done event`); + return; + } + + const currentIndex = STAGES.indexOf(completedStage); + if (currentIndex < 0) { + // eslint-disable-next-line no-console + console.warn(`[Scheduler] Unknown stage: ${completedStage}`); + return; + } + + // T9:標記當前 stage 完成 + recordStageComplete(job, completedStage); + + const nextIndex = currentIndex + 1; + + if (nextIndex < STAGES.length) { + // 推進到下一階段 + const nextStage = STAGES[nextIndex]; + job.status = nextStage.toUpperCase(); + job.stage = nextStage; + job.progress = Math.round((nextIndex / STAGES.length) * 100); + // T9:標記下一 stage 已 enqueue(started_at 為 enqueued_at 語意) + recordStageStart(job, nextStage); + await setJob(jobId, job); + await enqueueStage(nextStage, job); + } else { + // 全部完成 + job.status = 'COMPLETED'; + job.stage = null; + job.progress = 100; + await setJob(jobId, job); + // T9:終態釋放 active_job(best-effort:仍 await 取得結果用以 log,但內部已 catch 不會 throw) + await releaseActiveJobOnTerminal(job); + // eslint-disable-next-line no-console + console.log(`[Scheduler] Job ${jobId} COMPLETED`); + } + } + + /** + * 標記 job 為失敗。 + * 對齊 server.js L212-220;T9 加入 stage_timings.completed_at + 終態 release。 + * + * T9 行為改動: + * 1. 寫 stage_timings.{step}.completed_at = now(標 fail 該 stage 已結束) + * 2. 呼叫 release_active_job(若有 user_id) + * + * 注意:其他 stage 維持 null(不一次填補所有後續 stage 的 completed_at), + * 這樣 stage_timings 才能真實反映「該 job 在 哪個 stage 失敗」。 + */ + async function failJob(jobId, step, reason) { + const job = await getJob(jobId); + if (!job) return; + + job.status = 'FAILED'; + job.error = { step, reason }; + // T9:標記失敗 stage 已結束(only 該 stage,其他 stage 維持 null) + if (STAGES.indexOf(step) >= 0) { + recordStageComplete(job, step); + } + await setJob(jobId, job); + // T9:終態釋放 active_job(best-effort:仍 await 取得結果用以 log,但內部已 catch 不會 throw) + await releaseActiveJobOnTerminal(job); + // eslint-disable-next-line no-console + console.log(`[Scheduler] Job ${jobId} FAILED at ${step}: ${reason}`); + } + + // --------------------------------------------------------------------------- + // T5 新增:MinIO 寫入 + Lua claim active job + // --------------------------------------------------------------------------- + + /** + * Object key 命名(對齊 TDD §6.1)。 + * + * 為什麼包成 helper 而非 inline 字串: + * 單元測試可以驗 key 命名,且未來改命名規則時集中修改。 + * + * @param {string} jobId + * @param {string} safeFilename — 已 sanitize 的 model 檔名 + */ + function buildInputObjectKey(jobId, safeFilename) { + return `jobs/${jobId}/input/${safeFilename}`; + } + + /** + * Ref image object key(對齊 TDD §6.1)。 + * 加入 index 前綴避免同名衝突。 + * + * @param {string} jobId + * @param {number} index + * @param {string} safeFilename + */ + function buildRefImageObjectKey(jobId, index, safeFilename) { + return `jobs/${jobId}/ref_images/${index}_${safeFilename}`; + } + + /** + * 寫 model + ref images 到 MinIO。 + * + * 採 fail-fast:任一檔上傳失敗即 throw,呼叫端應回 502 storage_unavailable + * 並**不**寫 Redis(M5 方案 A 的核心:失敗時 Redis 完全乾淨)。 + * + * 並行 ref images 上傳:用 Promise.all 同時送多個 ref image 寫入請求; + * 任一 fail → Promise.all reject。即便其他 chunk 已寫入也沒關係, + * MinIO 7 天 lifecycle 會清掉這些 orphan 檔案(doc-review M5 已論述此 trade-off)。 + * + * @param {string} jobId + * @param {{ buffer: Buffer, mimetype?: string, originalname?: string }} modelFileMeta + * @param {string} safeModelFilename + * @param {Array<{ file: { buffer: Buffer, mimetype?: string }, safeFilename: string }>} refImages + * @returns {Promise<{ + * inputObjectKey: string, + * refImageObjectKeys: string[], + * uploadedKeys: string[] // 全部已寫入的 key(用於失敗時 cleanup) + * }>} + */ + async function writeInputToMinIO(jobId, modelFileMeta, safeModelFilename, refImages) { + if (!minio || !minio.client) { + throw new Error( + '[jobService.writeInputToMinIO] minio dep is required and STORAGE_BACKEND must be minio' + ); + } + + const inputObjectKey = buildInputObjectKey(jobId, safeModelFilename); + const refImageObjectKeys = refImages.map((it, idx) => + buildRefImageObjectKey(jobId, idx, it.safeFilename) + ); + + // 先寫 model 檔(最大檔,最有可能 fail;fail 時不需清 ref images) + await minio.uploadToMinIO( + inputObjectKey, + modelFileMeta.buffer, + modelFileMeta.mimetype || 'application/octet-stream' + ); + + // ref images 並行寫入;fail 時上面的 model 檔已寫入,回滾交給呼叫端 + if (refImages.length > 0) { + await Promise.all( + refImages.map((it, idx) => + minio.uploadToMinIO( + refImageObjectKeys[idx], + it.file.buffer, + it.file.mimetype || 'image/jpeg' + ) + ) + ); + } + + return { + inputObjectKey, + refImageObjectKeys, + uploadedKeys: [inputObjectKey, ...refImageObjectKeys], + }; + } + + /** + * 「成功寫 MinIO 後」用 Lua script 一次寫完整 job record + claim active job + + * SADD user:jobs(對應 M5 方案 A)。 + * + * @param {object} args + * @param {string} args.userId + * @param {string} args.jobId + * @param {object} args.jobRecord — 完整 job record;本函式 stringify 後傳給 Lua + * @param {number} args.ttlSeconds — 三把 key 的 TTL + * @returns {Promise< + * | { ok: true } + * | { ok: false, conflict: true, activeJobId: string } + * >} + */ + async function claimActiveAndCreate({ userId, jobId, jobRecord, ttlSeconds }) { + const jobJson = JSON.stringify(jobRecord); + const result = await claimActiveJob(redis, { + userId, + jobId, + jobJson, + ttlSeconds, + }); + + if (result.ok) { + // 廣播給可能存在的 SSE listener(雖然 v1 client 不用 SSE,但為了 + // 與 legacy /jobs/:id/events 共存:legacy listener 仍能即時看到 + // 新建的 job state) + sseService.sendSSE(jobId, jobRecord); + } + return result; + } + + /** + * 釋放 active_job(Sec M2 + Reviewer Major-2 修復)。 + * + * 用於 enqueue 失敗時補償釋放鎖。底層用 Lua 確保 atomic guard: + * 只在 active_job 仍指向 expectedJobId 時才 DEL,避免誤刪其他 job 的鎖。 + * + * 為什麼不直接 redis.del: + * - race condition:如果 active_job 在我們呼叫 release 之前已被別人改寫 + * (例如 worker 完成 + 新 claim),直接 DEL 會誤刪別人的鎖 + * - Lua 內 GET + 比對 + DEL 是 atomic(單一 EVAL) + * + * @param {string} userId + * @param {string} expectedJobId — 必須等於當前 active_job 的值才會釋放 + * @returns {Promise<{ released: boolean }>} + */ + async function releaseActiveJobByUser(userId, expectedJobId) { + const result = await releaseActiveJob(redis, { + userId, + jobId: expectedJobId, + }); + return { released: result.released }; + } + + /** + * 取得 user 當前 active_job 的 job_id(不讀完整 record)。 + * + * 用於 Sec M4 寫入放大 pre-check:在 MinIO 寫入之前廉價判斷是否有 active job, + * 避免 conflict request 還是上傳完 500MB 才被 Lua reject。 + * + * 為什麼跟 getActiveJob 拆開: + * - pre-check 場景只要知道「有沒有」,不需要 job record(多一次 GET) + * - conflict 流程才需要完整 record 做 conflict payload + * + * @param {string} userId + * @returns {Promise} + */ + async function getActiveJobId(userId) { + return redis.get(`user:${userId}:active_job`); + } + + /** + * 取得 user 當前 active job 的 job record,回給 409 衝突 response。 + * + * 為什麼不直接讀 `user:{userId}:active_job` 後再讀 `job:{id}`: + * 呼叫端拿到 conflict + activeJobId 後,需要 stage / progress / created_at + * 等資訊填到 v1 衝突 payload(TDD §1.5),所以仍要讀 job record。 + * + * @param {string} userId + * @returns {Promise<{ activeJobId: string|null, job: object|null }>} + */ + async function getActiveJob(userId) { + const activeJobId = await getActiveJobId(userId); + if (!activeJobId) return { activeJobId: null, job: null }; + const job = await getJob(activeJobId); + return { activeJobId, job }; + } + + /** + * 衝突 / 失敗時清理已寫入 MinIO 的 input 物件。 + * + * 為什麼採 fire-and-forget(不 throw): + * 呼叫端已決定回 user 409 / 502 等錯誤,刪 MinIO 失敗也不該再覆蓋這個錯誤; + * Converter Bucket 7 天 lifecycle 會兜底清掉 orphan 檔案(doc-review M5)。 + * 失敗時只 log,**不**改變 caller 的 response。 + * + * @param {string[]} objectKeys + */ + async function cleanupInputObjects(objectKeys) { + if (!Array.isArray(objectKeys) || objectKeys.length === 0) return; + if (!minio || !minio.client || typeof minio.deleteObject !== 'function') { + // 沒有 deleteObject 介面就靜默 skip(依賴 lifecycle 清) + return; + } + await Promise.allSettled( + objectKeys.map((key) => + minio.deleteObject(key).catch((err) => { + // eslint-disable-next-line no-console + console.warn( + JSON.stringify({ + level: 'WARN', + service: 'task-scheduler', + action: 'minio.cleanup_failed', + object_key: key, + error: err && err.message ? err.message : 'unknown', + timestamp: new Date().toISOString(), + }) + ); + }) + ) + ); + } + + // --------------------------------------------------------------------------- + // T6 新增:列表查詢 + ETag + // --------------------------------------------------------------------------- + + /** + * 用 user 索引列出該 user 的所有 job records,並依 client / status / 時間過濾。 + * + * 為什麼用 SMEMBERS + pipeline GET,而非 KEYS / SCAN: + * - TDD §2.7.3 明確要求避免 KEYS *(O(N) 阻塞 Redis) + * - SMEMBERS 取的是 Set 索引,O(N) 但 N 是該 user 的 job 數(通常 < 100) + * - pipeline GET 把 N 次 GET 合併成一次 round-trip(latency 友善) + * + * Client 隔離:在應用層 filter `created_by_client_id === clientId`。 + * - 不在 Lua 做:Set 沒有「依屬性過濾」的能力 + * - 不在 Redis index 用 client_id 做二級索引:Phase 1 流量不大,省工 + * - 安全考量:即便 user_id 被攻擊者猜中,client_id 不符仍然會被過濾掉 + * + * 為什麼 max limit 50: + * - 任務文件 §3.6 指定 max=50(防大量讀取) + * - 對齊 Recovery 場景:visionA-backend 一次最多需要 50 筆 in_progress jobs, + * 夠用且不會引起 OOM + * + * @param {object} args + * @param {string} args.userId + * @param {string} args.clientId — 從 req.auth.clientId 來,必填 + * @param {string} [args.status='in_progress'] — `in_progress` / `completed` / `failed` / `all` + * @param {number} [args.limit=10] — 1 ≤ limit ≤ 50 + * @param {number} [args.offset=0] — ≥ 0 + * @returns {Promise<{ + * jobs: object[], // 已過濾 + 排序 + 分頁的 job records + * total: number, // 過濾後(未分頁前)的總數 + * nextOffset: number|null // 還有更多時為下一個 offset,否則 null + * }>} + */ + async function listJobsByUser({ + userId, + clientId, + status = 'in_progress', + limit = 10, + offset = 0, + }) { + if (typeof userId !== 'string' || userId === '') { + throw new Error('[listJobsByUser] userId is required'); + } + if (typeof clientId !== 'string' || clientId === '') { + throw new Error('[listJobsByUser] clientId is required'); + } + + // 取 user 的所有 job_id(Set 索引,避免 KEYS *) + const jobIds = await redis.smembers(`user:${userId}:jobs`); + if (!Array.isArray(jobIds) || jobIds.length === 0) { + return { jobs: [], total: 0, nextOffset: null }; + } + + // pipeline GET:N 次 GET 合併成一次 round-trip + const pipeline = redis.pipeline(); + for (const id of jobIds) { + pipeline.get(`job:${id}`); + } + const results = await pipeline.exec(); + + // pipeline.exec() 回傳 [[err, value], [err, value], ...] + // 任一 err 不該整批 fail,個別 skip 該 record(log warn) + const records = []; + for (let i = 0; i < results.length; i += 1) { + const entry = results[i] || []; + const err = entry[0]; + const raw = entry[1]; + if (err) { + // eslint-disable-next-line no-console + console.warn( + JSON.stringify({ + level: 'WARN', + service: 'task-scheduler', + action: 'jobs.list.pipeline_get_error', + user_id: userId, + job_id: jobIds[i], + error: err && err.message ? err.message : 'unknown', + timestamp: new Date().toISOString(), + }) + ); + continue; + } + if (!raw) continue; // job 已過期或被刪除(race) + try { + const parsed = JSON.parse(raw); + if (parsed && typeof parsed === 'object') { + records.push(parsed); + } + } catch (_) { + // 忽略損壞 record(log) + // eslint-disable-next-line no-console + console.warn( + JSON.stringify({ + level: 'WARN', + service: 'task-scheduler', + action: 'jobs.list.parse_error', + user_id: userId, + job_id: jobIds[i], + timestamp: new Date().toISOString(), + }) + ); + } + } + + // Client 隔離(深度防禦:跨 client 不應該共用 user_id,但仍然 filter) + const ownedByClient = records.filter( + (r) => r && r.created_by_client_id === clientId + ); + + // status filter + let filtered; + if (status === 'all') { + filtered = ownedByClient; + } else { + filtered = ownedByClient.filter((r) => { + const ext = toExternalStatus(r); + if (status === 'in_progress') return isInProgress(ext.status); + return ext.status === status; + }); + } + + // 排序:created_at desc(最新在前) + filtered.sort((a, b) => { + const at = a && a.created_at ? a.created_at : ''; + const bt = b && b.created_at ? b.created_at : ''; + // string 比較對 ISO 8601 是正確的(同時區、固定寬度) + if (bt < at) return -1; + if (bt > at) return 1; + return 0; + }); + + const total = filtered.length; + const safeOffset = Math.max(0, offset); + const safeLimit = Math.max(1, Math.min(50, limit)); + const slice = filtered.slice(safeOffset, safeOffset + safeLimit); + + const consumed = safeOffset + slice.length; + const nextOffset = consumed < total ? consumed : null; + + return { jobs: slice, total, nextOffset }; + } + + /** + * 計算 job record 的 weak ETag(基於 updated_at)。 + * + * 為什麼用 weak ETag: + * - 同一個 updated_at 對應的 record 內容一致(updated_at 是 setJob 時更新的 + * timestamp,記錄變更才會改),但用 weak 標示避免 byte-by-byte 比對 + * - 對 client 來說只要 If-None-Match 命中就回 304,內容不變 + * + * 為什麼選 sha1 而非完整 record hash: + * - 計算成本小(updated_at 是固定長度字串) + * - 不洩漏 record 內容(避免攻擊者透過 ETag 推測 record 變更頻率) + * + * 為什麼還包 base64 + 截斷 16 bytes: + * - sha1 hex 40 chars 太長,base64url 截 16 bytes 22 chars 已遠超 collision 安全 + * - 對 cache key 比對成本與可讀性更友善 + * + * @param {{ updated_at?: string }} job + * @returns {string} - W/"<22 chars>" 形式 + */ + function computeEtag(job) { + const updatedAt = job && typeof job.updated_at === 'string' ? job.updated_at : ''; + const hash = crypto + .createHash('sha1') + .update(updatedAt) + .digest('base64') + // base64url 兼容 + 去 padding,避免 ETag header 含 `=` 觸發某些 client 解析問題 + .replace(/\+/g, '-') + .replace(/\//g, '_') + .replace(/=+$/, '') + .slice(0, 22); + return `W/"${hash}"`; + } + + // --------------------------------------------------------------------------- + // T7 新增:promote 標記與冪等查詢 + // --------------------------------------------------------------------------- + + /** + * 把 promote 結果寫回 job record,提供冪等支援。 + * + * 為什麼把它放在 jobService(而非 promote handler 內 inline 寫 redis.set): + * - setJob 已封裝 updated_at 更新與 SSE 廣播,重用避免邏輯散落 + * - 未來若要把 promote 結果寫入 user 索引(如 PromotedSet)也能集中改 + * + * 為什麼用 deep merge 而非整批覆寫: + * - 不破壞既有欄位(status / stage / progress / output / stage_timings 等) + * - 同 job 多次 promote(譬如先 promote 一個檔再 promote 另一個)能累加 + * + * @param {string} jobId + * @param {{ + * promotedAt: string, + * promotedKeys: Array<{ source: string, target_object_key: string, size_bytes?: number|null, file_access_agent_etag?: string|null, promoted_at: string }> + * }} args + * @returns {Promise} 更新後的 job record;job 不存在時回 null + */ + async function markPromoted(jobId, args) { + if (typeof jobId !== 'string' || jobId === '') { + throw new Error('[markPromoted] jobId is required'); + } + if (!args || typeof args !== 'object') { + throw new Error('[markPromoted] args is required'); + } + const { promotedAt, promotedKeys } = args; + if (typeof promotedAt !== 'string' || promotedAt === '') { + throw new Error('[markPromoted] args.promotedAt is required (ISO string)'); + } + if (!Array.isArray(promotedKeys)) { + throw new Error('[markPromoted] args.promotedKeys must be an array'); + } + + const job = await getJob(jobId); + if (!job) return null; + + job.promoted = true; + job.promoted_at = promotedAt; + // 整批覆寫 promoted_object_keys(caller 已在 handler 累加完整清單) + job.promoted_object_keys = promotedKeys; + + await setJob(jobId, job); + return job; + } + + return { + getJob, + setJob, + enqueueStage, + advanceJob, + failJob, + // T5 介面 + writeInputToMinIO, + claimActiveAndCreate, + getActiveJob, + getActiveJobId, // Sec M4 寫入放大 pre-check + releaseActiveJob: releaseActiveJobByUser, // Sec M2 + Reviewer Major-2 + cleanupInputObjects, + // T6 介面 + listJobsByUser, + computeEtag, + // T7 介面 + markPromoted, + // 暴露 helper(測試 + handler 用) + _internals: { buildInputObjectKey, buildRefImageObjectKey }, + }; +} + +module.exports = { + createJobService, + STAGES, + STAGE_QUEUES, + DONE_QUEUE, + DONE_GROUP, +}; diff --git a/apps/task-scheduler/src/services/sseService.js b/apps/task-scheduler/src/services/sseService.js new file mode 100644 index 0000000..1c22d59 --- /dev/null +++ b/apps/task-scheduler/src/services/sseService.js @@ -0,0 +1,108 @@ +/** + * SSE(Server-Sent Events)client 管理(T4 重構自 server.js L131-140 + L449-487)。 + * + * 職責: + * 1. 維護 `sseClients` Map(job_id → Set),每個 job 可有多個 listener + * 2. `sendSSE(jobId, data)` 廣播訊息給該 job 的所有 listener + * 3. `registerSseClient(jobId, res, req)` 處理 SSE handshake、heartbeat、cleanup + * + * 行為對齊(重構不改行為): + * - response 格式:`data: ${JSON.stringify(data)}\n\n`(server.js L136) + * - heartbeat:每 15s 發 `: heartbeat\n\n`(server.js L474-476) + * - 連線關閉時自動從 Map 移除,最後一個 listener 離開時刪 Map entry(L479-486) + * - SSE headers 完全對齊 server.js L458-462 + * + * 設計取捨: + * - 模組層維護單一 `sseClients` Map,與 legacy 全域變數行為一致 + * - jobService 的 setJob 會回呼 sendSSE,本模組保持被動,不反向 require jobService + * (避免循環依賴) + */ + +'use strict'; + +/** + * 建立一個 SSE service instance。每個 process 應該只有一個。 + * + * @returns {{ + * sendSSE: (jobId: string, data: unknown) => void, + * registerSseClient: (jobId: string, res: import('express').Response, req: import('express').Request) => void, + * _getClientsMap: () => Map>, + * }} + */ +function createSseService() { + /** @type {Map>} */ + const sseClients = new Map(); + + /** + * 廣播資料給某個 job 的所有 SSE listener。 + * + * 對齊 server.js L133-140。 + */ + function sendSSE(jobId, data) { + const clients = sseClients.get(jobId); + if (!clients) return; + const payload = `data: ${JSON.stringify(data)}\n\n`; + for (const res of clients) { + res.write(payload); + } + } + + /** + * 註冊一個新的 SSE listener。 + * + * 行為對齊 server.js L457-486: + * 1. 寫 SSE headers(200 + text/event-stream + no-cache + keep-alive) + * 2. 立刻把目前 job 狀態送出 + * 3. 加入 sseClients Map + * 4. 啟動 15s heartbeat + * 5. req.on('close') 時清理 timer + 從 Map 移除 + * + * 呼叫端應該已經把 currentJob 透過 res.write 寫出(為了測試 mock 容易, + * 我們把這個邏輯內部化)。 + * + * @param {string} jobId + * @param {object} currentJob - 立即送出的初始狀態 + * @param {import('express').Response} res + * @param {import('express').Request} req + */ + function registerSseClient(jobId, currentJob, res, req) { + res.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + Connection: 'keep-alive', + }); + + // 立即送目前狀態 + res.write(`data: ${JSON.stringify(currentJob)}\n\n`); + + // 加入 Map + if (!sseClients.has(jobId)) { + sseClients.set(jobId, new Set()); + } + sseClients.get(jobId).add(res); + + // 心跳 + const heartbeat = setInterval(() => { + res.write(': heartbeat\n\n'); + }, 15000); + + // 清理:使用 req.on('close'),與 server.js L479 行為一致 + req.on('close', () => { + clearInterval(heartbeat); + const clients = sseClients.get(jobId); + if (clients) { + clients.delete(res); + if (clients.size === 0) sseClients.delete(jobId); + } + }); + } + + return { + sendSSE, + registerSseClient, + /** @internal 測試用 — 取得 clients map 以驗證內部狀態 */ + _getClientsMap: () => sseClients, + }; +} + +module.exports = { createSseService }; diff --git a/apps/task-scheduler/src/services/statusMapper.js b/apps/task-scheduler/src/services/statusMapper.js new file mode 100644 index 0000000..e816938 --- /dev/null +++ b/apps/task-scheduler/src/services/statusMapper.js @@ -0,0 +1,134 @@ +/** + * 內部 job status / stage → 對外 v1 API 格式映射(T6)。 + * + * 設計背景: + * - 既有 Web UI 仍依賴大寫狀態(`ONNX` / `BIE` / `NEF` / `COMPLETED` / `FAILED`) + * —— 不能改既有語意(向後相容) + * - v1 API 對外規格(TDD §1.4.3)必須回小寫語意化狀態(`created` / `running` / + * `completed` / `failed`)+ stage 欄位 + * - 因此這裡用 pure function 做「**單向** 內部 → 外部」的映射,不雙寫 record + * + * 映射規則(對齊 TDD §2.7.1): + * + * | 內部 status | stage_timings.onnx.started_at | 對外 status | 對外 stage | + * |------------|------------------------------|-----------|----------| + * | `ONNX` | null(剛建尚未開工) | `created` | `onnx` | + * | `ONNX` | 有值(onnx 已開工) | `running` | `onnx` | + * | `BIE` | — | `running` | `bie` | + * | `NEF` | — | `running` | `nef` | + * | `COMPLETED`| — | `completed` | `null` | + * | `FAILED` | — | `failed` | | + * + * 為什麼純 function 而非 class: + * - 無狀態(不需 cache) + * - 容易單元測試(input → output) + * - 容易在多個 handler 重用(GET /:id、GET /jobs 列表) + */ + +'use strict'; + +/** + * 對外狀態枚舉(對齊 TDD §1.4.3 的 status 欄位)。 + * 用 const 物件而非 string literal 散落程式碼,避免 typo。 + */ +const EXTERNAL_STATUS = Object.freeze({ + CREATED: 'created', + RUNNING: 'running', + COMPLETED: 'completed', + FAILED: 'failed', +}); + +/** + * 對外 stage 枚舉。 + */ +const EXTERNAL_STAGE = Object.freeze({ + ONNX: 'onnx', + BIE: 'bie', + NEF: 'nef', +}); + +/** + * 把內部 job record 映射為 `{ status, stage }`(對外 API 用)。 + * + * @param {object|null} job - 從 Redis 讀出的 job record(含 status / stage / stage_timings) + * @returns {{ status: string, stage: string|null }} + */ +function toExternalStatus(job) { + if (!job || typeof job !== 'object') { + // 防禦性 fallback:record 異常時回 created/null(會被 caller 包成 404 或 500) + return { status: EXTERNAL_STATUS.CREATED, stage: null }; + } + + const internalStatus = typeof job.status === 'string' ? job.status : ''; + + switch (internalStatus) { + case 'COMPLETED': + return { status: EXTERNAL_STATUS.COMPLETED, stage: null }; + + case 'FAILED': { + // 失敗 stage 的決策(priority): + // 1. job.error.stage(worker 上報的 stage) + // 2. job.error.step(既有 server.js advanceJob 用法) + // 3. job.stage(最後一個 stage) + // 4. fallback null + const fromError = + job.error && typeof job.error === 'object' + ? job.error.stage || job.error.step || null + : null; + const fallback = typeof job.stage === 'string' ? job.stage : null; + return { + status: EXTERNAL_STATUS.FAILED, + stage: fromError || fallback || null, + }; + } + + case 'BIE': + return { status: EXTERNAL_STATUS.RUNNING, stage: EXTERNAL_STAGE.BIE }; + + case 'NEF': + return { status: EXTERNAL_STATUS.RUNNING, stage: EXTERNAL_STAGE.NEF }; + + case 'ONNX': { + // ONNX 階段細分:onnx worker 開工前 = created;開工後 = running + // 判斷:stage_timings.onnx.started_at == null → created + const onnxTiming = + job.stage_timings && typeof job.stage_timings === 'object' + ? job.stage_timings.onnx + : null; + const onnxStartedAt = + onnxTiming && typeof onnxTiming === 'object' + ? onnxTiming.started_at + : null; + if (!onnxStartedAt) { + return { status: EXTERNAL_STATUS.CREATED, stage: EXTERNAL_STAGE.ONNX }; + } + return { status: EXTERNAL_STATUS.RUNNING, stage: EXTERNAL_STAGE.ONNX }; + } + + default: + // 未知狀態 → 視為 created(避免破 API contract);同時 log 給 ops(caller 端做) + return { status: EXTERNAL_STATUS.CREATED, stage: null }; + } +} + +/** + * 判斷對外 status 是否屬於「進行中」(recovery 場景過濾用)。 + * + * `in_progress` = `created` ∪ `running`(對齊 TDD §1.4.4 query 參數)。 + * + * @param {string} externalStatus + * @returns {boolean} + */ +function isInProgress(externalStatus) { + return ( + externalStatus === EXTERNAL_STATUS.CREATED || + externalStatus === EXTERNAL_STATUS.RUNNING + ); +} + +module.exports = { + toExternalStatus, + isInProgress, + EXTERNAL_STATUS, + EXTERNAL_STAGE, +}; diff --git a/apps/task-scheduler/src/storage/local.js b/apps/task-scheduler/src/storage/local.js new file mode 100644 index 0000000..9bbf9aa --- /dev/null +++ b/apps/task-scheduler/src/storage/local.js @@ -0,0 +1,81 @@ +/** + * Local(shared volume)storage helper(T4 重構自 server.js L361-379、L516-523)。 + * + * 職責: + * 提供 `STORAGE_BACKEND=local` 模式下,`POST /jobs` 寫檔案到 shared volume, + * 以及 `GET /jobs/:id/download/:filename` 從 volume 讀取的對應助手。 + * + * 行為對齊(重構不改行為): + * - 目錄結構:`//{input,input/ref_images,logs}` + * - 檔名直接用 `originalname`(不 sanitize;與 legacy 行為一致) + * - 同步檔案 IO(fs.mkdirSync / fs.writeFileSync),與 server.js 既有用法一致 + */ + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +/** + * 取 JOB_DATA_DIR;對齊 server.js L31。 + */ +function getJobDataDir() { + return process.env.JOB_DATA_DIR || '/data/jobs'; +} + +/** + * 建立 job 的工作目錄(input / input/ref_images / logs)並寫入上傳檔案。 + * + * 與 server.js L361-379 行為完全一致: + * - mkdir recursive + * - 用 originalname 作為檔名(不 sanitize,legacy 行為) + * - synchronous fs IO(writeFileSync) + * + * @param {string} jobId + * @param {{ buffer: Buffer, originalname: string }} modelFile + * @param {Array<{ buffer: Buffer, originalname: string }>} [refImages] + * @param {string} [jobDataDir] - 覆寫 JOB_DATA_DIR(測試用) + */ +function writeJobFilesToLocal(jobId, modelFile, refImages, jobDataDir) { + const baseDir = jobDataDir || getJobDataDir(); + const jobDir = path.join(baseDir, jobId); + const inputDir = path.join(jobDir, 'input'); + const refImagesDir = path.join(inputDir, 'ref_images'); + const logsDir = path.join(jobDir, 'logs'); + + fs.mkdirSync(inputDir, { recursive: true }); + fs.mkdirSync(refImagesDir, { recursive: true }); + fs.mkdirSync(logsDir, { recursive: true }); + + const modelPath = path.join(inputDir, modelFile.originalname); + fs.writeFileSync(modelPath, modelFile.buffer); + + if (refImages && refImages.length > 0) { + for (const img of refImages) { + const imgPath = path.join(refImagesDir, img.originalname); + fs.writeFileSync(imgPath, img.buffer); + } + } +} + +/** + * 解析 download 路徑(對齊 server.js L518)。 + * + * 注意:legacy 不 sanitize filename,呼叫端直接拿 req.params.filename 拼路徑; + * 本函式維持同樣行為(不額外 sanitize),避免行為偏移。 + * + * @param {string} jobId + * @param {string} filename + * @param {string} [jobDataDir] + * @returns {string} + */ +function resolveLocalDownloadPath(jobId, filename, jobDataDir) { + const baseDir = jobDataDir || getJobDataDir(); + return path.join(baseDir, jobId, filename); +} + +module.exports = { + getJobDataDir, + writeJobFilesToLocal, + resolveLocalDownloadPath, +}; diff --git a/apps/task-scheduler/src/storage/minio.js b/apps/task-scheduler/src/storage/minio.js new file mode 100644 index 0000000..2ebf286 --- /dev/null +++ b/apps/task-scheduler/src/storage/minio.js @@ -0,0 +1,229 @@ +/** + * MinIO(S3-compatible)storage helper(T4 重構自 server.js L34-81)。 + * + * 職責: + * 1. 依照 STORAGE_BACKEND 旗標決定是否建立 S3Client + * 2. 提供 `uploadToMinIO`(buffer-based 上傳)與 `getFromMinIO`(buffer-based 下載) + * + * 行為對齊(重構不改行為): + * - 若 `STORAGE_BACKEND !== 'minio'` → client 為 null,呼叫 helper 時直接回傳 falsy + * (與 server.js L57: `if (!minio) return;` 完全一致) + * - getFromMinIO 採 web-stream → buffer 的 collect 邏輯,與 server.js L73-80 一致 + * - 預設值對齊 server.js L36-40 + */ + +'use strict'; + +const { + S3Client, + PutObjectCommand, + GetObjectCommand, + HeadObjectCommand, + DeleteObjectCommand, +} = require('@aws-sdk/client-s3'); + +/** + * 從 process.env 讀取 MinIO 設定,與 server.js L35-40 行為一致。 + * + * @returns {{ + * backend: string, + * endpoint: string, + * bucket: string, + * accessKey: string, + * secretKey: string, + * region: string, + * }} + */ +function readMinioEnv() { + return { + backend: process.env.STORAGE_BACKEND || 'local', + endpoint: process.env.MINIO_ENDPOINT_URL || 'http://192.168.0.130:9000', + bucket: process.env.MINIO_BUCKET || 'convertet-working-space', + accessKey: process.env.MINIO_ACCESS_KEY || 'convuser', + secretKey: process.env.MINIO_SECRET_KEY || '', + region: process.env.MINIO_REGION || 'us-east-1', + }; +} + +/** + * 建立一個 MinIO storage facade。 + * + * 若 `backend !== 'minio'`,回傳的 facade 中 `client` 為 null,所有 helper 都會 + * 在呼叫端的 `if (minio.client)` 分支前直接回傳 nullish 結果(保留既有行為)。 + * + * @param {object} [overrides] - 覆寫 env 設定(測試用) + * @returns {{ + * client: import('@aws-sdk/client-s3').S3Client | null, + * bucket: string, + * endpoint: string, + * uploadToMinIO: (key: string, body: Buffer | NodeJS.ReadableStream, contentType?: string) => Promise, + * getFromMinIO: (key: string) => Promise<{ body: Buffer, contentLength: number | undefined } | null>, + * headObject: (key: string) => Promise<{ contentLength: number|undefined, contentType: string|undefined } | null>, + * getObjectStream: (key: string) => Promise<{ stream: any, contentLength: number|undefined, contentType: string|undefined } | null>, + * deleteObject: (key: string) => Promise, + * }} + */ +function createMinioFacade(overrides) { + const env = { ...readMinioEnv(), ...(overrides || {}) }; + + let client = null; + if (env.backend === 'minio') { + client = new S3Client({ + endpoint: env.endpoint, + region: env.region, + credentials: { + accessKeyId: env.accessKey, + secretAccessKey: env.secretKey, + }, + forcePathStyle: true, // MinIO 需要 path-style + }); + } + + /** + * 上傳檔案到 MinIO(buffer-based,行為對齊 server.js L56-64)。 + * + * 若 client 為 null(非 minio backend),直接 return(不 throw), + * 與 server.js `if (!minio) return;` 完全一致。 + */ + async function uploadToMinIO(key, body, contentType) { + if (!client) return; + await client.send( + new PutObjectCommand({ + Bucket: env.bucket, + Key: key, + Body: body, + ContentType: contentType, + }) + ); + } + + /** + * 從 MinIO 下載檔案到 Buffer(行為對齊 server.js L66-81)。 + * + * 若 client 為 null(非 minio backend),回傳 null。 + * + * @returns {Promise<{ body: Buffer, contentLength: number | undefined } | null>} + */ + async function getFromMinIO(key) { + if (!client) return null; + const response = await client.send( + new GetObjectCommand({ + Bucket: env.bucket, + Key: key, + }) + ); + // AWS SDK v3 的 Body 在 Node 18 是 web stream,逐 chunk 收集成 Buffer + const chunks = []; + for await (const chunk of response.Body) { + chunks.push(chunk); + } + return { + body: Buffer.concat(chunks), + contentLength: response.ContentLength, + }; + } + + /** + * 取得 MinIO 物件的 metadata(HEAD),不下載 body。 + * + * 用途(T7 promote): + * PUT 到 FAA 時 fetch body 是 stream,必須先知道 Content-Length; + * AWS SDK GetObjectCommand 回的 ContentLength 雖可用,但要先呼叫 send 才知道, + * 而 send 會啟動 stream(一旦消費就無法重來)。HEAD 是廉價的單一 round-trip, + * 先取 size + contentType 後再啟動 GetObjectCommand stream,保證一次性消費。 + * + * 若 client 為 null(非 minio backend),回 null。 + * + * @param {string} key + * @returns {Promise<{ contentLength: number|undefined, contentType: string|undefined } | null>} + */ + async function headObject(key) { + if (!client) return null; + const response = await client.send( + new HeadObjectCommand({ + Bucket: env.bucket, + Key: key, + }) + ); + return { + contentLength: response.ContentLength, + contentType: response.ContentType, + }; + } + + /** + * 取得 MinIO 物件的 stream + metadata(T7 promote 用)。 + * + * 為什麼分離 stream-based 與既有 buffer-based getFromMinIO: + * - 既有 `getFromMinIO` 把整個 body 收集成 Buffer,不適合 1GB 大檔(OOM 風險) + * - T7 promote 需要把 stream 直接 pipe 到 fetch PUT body(duplex: 'half') + * - 兩個 helper 並存,呼叫端依用途選擇 + * + * AWS SDK v3 在 Node 18+ 的 GetObjectCommand response: + * - response.Body 是 Web ReadableStream(Node 18+)或 Node Readable(舊版) + * - 我們直接回原始 stream(不做轉換),caller 用 Readable.toWeb 或直接傳給 fetch + * + * 若 client 為 null(非 minio backend),回 null。 + * + * @param {string} key + * @returns {Promise<{ + * stream: NodeJS.ReadableStream | ReadableStream, + * contentLength: number|undefined, + * contentType: string|undefined, + * } | null>} + */ + async function getObjectStream(key) { + if (!client) return null; + const response = await client.send( + new GetObjectCommand({ + Bucket: env.bucket, + Key: key, + }) + ); + return { + stream: response.Body, + contentLength: response.ContentLength, + contentType: response.ContentType, + }; + } + + /** + * 刪除 MinIO 物件(T5:M5 方案 A 衝突清檔用)。 + * + * 行為說明: + * - 若 client 為 null(非 minio backend),靜默 skip(不 throw) + * - S3 SDK 對「key 不存在」**不會** throw(DeleteObject 是冪等的), + * 所以本函式不需處理 NoSuchKey + * - 其他錯誤(網路 / 權限)會 throw,呼叫端可視情況 log 或忽略 + * + * 注意:呼叫端應在 fire-and-forget 模式下使用(不影響主流程 response)。 + * + * @param {string} key + */ + async function deleteObject(key) { + if (!client) return; + await client.send( + new DeleteObjectCommand({ + Bucket: env.bucket, + Key: key, + }) + ); + } + + return { + client, + bucket: env.bucket, + endpoint: env.endpoint, + uploadToMinIO, + getFromMinIO, + headObject, + getObjectStream, + deleteObject, + }; +} + +module.exports = { + createMinioFacade, + // 暴露給測試 + _internals: { readMinioEnv }, +}; diff --git a/apps/task-scheduler/src/utils/__tests__/sanitize.test.js b/apps/task-scheduler/src/utils/__tests__/sanitize.test.js new file mode 100644 index 0000000..d4e2c30 --- /dev/null +++ b/apps/task-scheduler/src/utils/__tests__/sanitize.test.js @@ -0,0 +1,194 @@ +/** + * sanitize utils 單元測試(T5)。 + * + * 重點: + * 1. sanitizeFilename 各類惡意輸入(path traversal / NUL / 控制字元 / 超長) + * 2. validateUserId 邊界值 + * 3. validateTargetObjectKey(給 T7 用,但 T5 一起測完整) + */ + +'use strict'; + +const { + sanitizeFilename, + getExtension, + validateUserId, + validateTargetObjectKey, +} = require('../sanitize'); + +describe('sanitizeFilename', () => { + it('returns "file" for non-string input', () => { + expect(sanitizeFilename(undefined)).toBe('file'); + expect(sanitizeFilename(null)).toBe('file'); + expect(sanitizeFilename(123)).toBe('file'); + }); + + it('keeps simple filename intact', () => { + expect(sanitizeFilename('model.onnx')).toBe('model.onnx'); + expect(sanitizeFilename('weights_v2.tflite')).toBe('weights_v2.tflite'); + }); + + it('strips path traversal segments', () => { + expect(sanitizeFilename('../etc/passwd')).toBe('passwd'); + expect(sanitizeFilename('../../etc/passwd')).toBe('passwd'); + expect(sanitizeFilename('foo/bar/baz.onnx')).toBe('baz.onnx'); + expect(sanitizeFilename('C:\\Windows\\System32\\evil.dll')).toBe('evil.dll'); + }); + + it('strips NUL byte and everything after', () => { + // path.basename 會先去掉 path 部分,所以 evil.bin 是 base,'\0evilappend.txt' 會被截 + expect(sanitizeFilename('evil.bin\0.png')).toBe('evil.bin'); + }); + + it('replaces control chars with underscore', () => { + expect(sanitizeFilename('weird\x07name.bin')).toBe('weird_name.bin'); + expect(sanitizeFilename('cr\rlf\nname.txt')).toBe('cr_lf_name.txt'); + }); + + it('replaces non-allowed chars with underscore', () => { + expect(sanitizeFilename('name with spaces.bin')).toBe('name_with_spaces.bin'); + expect(sanitizeFilename('name;injection.bin')).toBe('name_injection.bin'); + expect(sanitizeFilename('semi:colon.bin')).toBe('semi_colon.bin'); + }); + + it('removes leading dots (avoid hidden files)', () => { + expect(sanitizeFilename('.htaccess')).toBe('htaccess'); + expect(sanitizeFilename('..hidden.bin')).toBe('hidden.bin'); + }); + + it('returns "file" for empty / dot-only inputs', () => { + expect(sanitizeFilename('')).toBe('file'); + expect(sanitizeFilename('.')).toBe('file'); + expect(sanitizeFilename('..')).toBe('file'); + expect(sanitizeFilename(' ')).toBe('file'); // trim 後變空 + }); + + it('truncates names longer than 200 chars while preserving extension', () => { + const longBase = 'a'.repeat(300); + const result = sanitizeFilename(`${longBase}.onnx`); + expect(result.length).toBeLessThanOrEqual(200); + expect(result.endsWith('.onnx')).toBe(true); + }); + + it('truncates very long names without extension to 200 chars', () => { + const long = 'b'.repeat(300); + expect(sanitizeFilename(long).length).toBe(200); + }); +}); + +describe('getExtension', () => { + it('returns lowercase extension with dot', () => { + expect(getExtension('model.ONNX')).toBe('.onnx'); + expect(getExtension('weights.TFLite')).toBe('.tflite'); + }); + + it('returns empty string for files without extension', () => { + expect(getExtension('weights')).toBe(''); + expect(getExtension('weights.')).toBe(''); + expect(getExtension('.hidden')).toBe(''); // dot at start is not ext + }); + + it('returns last extension only', () => { + expect(getExtension('archive.tar.gz')).toBe('.gz'); + }); +}); + +describe('validateUserId (Sec M1 white-list)', () => { + it('accepts valid alnum + dash + dot + underscore', () => { + expect(validateUserId('user-123')).toBe('user-123'); + expect(validateUserId('visionA-user-12345')).toBe('visionA-user-12345'); + expect(validateUserId('user.name')).toBe('user.name'); + expect(validateUserId('user_name')).toBe('user_name'); + expect(validateUserId('A1b2C3')).toBe('A1b2C3'); + }); + + it('rejects empty / oversize', () => { + expect(validateUserId('')).toBeNull(); + expect(validateUserId('a'.repeat(129))).toBeNull(); + }); + + it('rejects path traversal chars', () => { + expect(validateUserId('../etc')).toBeNull(); + expect(validateUserId('user..name')).toBeNull(); + expect(validateUserId('foo/bar')).toBeNull(); + expect(validateUserId('foo\\bar')).toBeNull(); + }); + + it('rejects colon (Redis key injection)', () => { + expect(validateUserId('user:active_job')).toBeNull(); + }); + + it('rejects control chars / NUL', () => { + expect(validateUserId('user\x00admin')).toBeNull(); + expect(validateUserId('user\nadmin')).toBeNull(); + expect(validateUserId('user\tadmin')).toBeNull(); + }); + + it('rejects leading / trailing whitespace', () => { + expect(validateUserId(' user')).toBeNull(); + expect(validateUserId('user ')).toBeNull(); + expect(validateUserId('user name')).toBeNull(); // inner space + }); + + // Sec M1:新增白名單測試(黑名單模式漏掉的攻擊向量) + it('rejects XSS payloads', () => { + expect(validateUserId('')).toBeNull(); + expect(validateUserId('')).toBeNull(); + expect(validateUserId('user`、`*`(萬用字元)、空白、unicode + * RTL override、homograph 等 + * - 白名單模式只保留明確安全的字元,所有未列入的字元一律拒絕,深度防禦 + * - user_id 會被用於: + * 1. Redis key(`user:{userId}:active_job` / `user:{userId}:jobs`) + * 2. structured log 欄位(`user_id`) + * 3. 對外 API response 的 `user_id` 欄位(client 可能會 echo 顯示) + * 任一場景出現非預期字元都可能導致攻擊(Redis key injection / log injection / + * XSS in admin UI) + */ +const USER_ID_WHITELIST = /^[A-Za-z0-9._-]+$/; + +/** + * 驗證 user_id 是否符合 TDD §1.4.2 的限制 + Sec M1 強化白名單。 + * + * 接受的字元:`A-Z` / `a-z` / `0-9` / `.` / `_` / `-` + * 長度:1-128 字元 + * + * 拒絕的範例: + * - 含 `/`、`\`、`:` (Redis key injection / path traversal) + * - 含 `..` (path traversal) + * - 含 `<` `>` `;` `&` `|` `$` `*` `?` (XSS / shell injection / glob pattern) + * - 含空白(`' '`、`\t`、`\n`、`\r`)(log injection / 對齊干擾) + * - 含控制字元 / NUL byte + * - 含 unicode(除非與 ASCII alnum / `.` / `_` / `-` 等價) + * + * @param {unknown} raw + * @returns {string|null} - 合法時回原值,不合法回 null + */ +function validateUserId(raw) { + if (typeof raw !== 'string') return null; + if (raw.length < 1 || raw.length > 128) return null; + // 嚴格白名單檢查(單一 regex 取代之前多項黑名單檢查) + if (!USER_ID_WHITELIST.test(raw)) return null; + // 額外深度防禦:拒絕連續兩個 `.`(白名單字元中唯一可能形成 path-like 攻擊向量) + // 例:`user..name` 字面上通過白名單,但語意上仍像 path traversal,明確拒絕。 + if (raw.includes('..')) return null; + return raw; +} + +/** + * 驗證 promote 用的 target_object_key(TDD §1.4.5)。 + * - 不能含 `..`、`\\` + * - 不能空、不能超過 1024 字元 + * + * 留給 T7 promote 用,T5 雖未呼叫但放在這邊集中管理。 + * + * @param {unknown} raw + * @returns {string|null} + */ +function validateTargetObjectKey(raw) { + if (typeof raw !== 'string') return null; + if (raw.length === 0 || raw.length > 1024) return null; + if (raw.includes('..') || raw.includes('\\')) return null; + if (/[\x00-\x1f\x7f]/.test(raw)) return null; + // 不能 leading `/` (絕對路徑)— File Access Agent 端應接相對 key + if (raw.startsWith('/')) return null; + return raw; +} + +module.exports = { + sanitizeFilename, + getExtension, + validateUserId, + validateTargetObjectKey, +}; diff --git a/docker-compose.yml b/docker-compose.yml index b7c0143..3e6396b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -36,6 +36,10 @@ services: restart: unless-stopped # ---------- Scheduler ---------- + # + # T10:Phase 1 env 透傳清單。所有值都用 ${VAR} 從 .env / shell 讀取, + # 不在 docker-compose.yml hardcode(避免 secret 被 commit)。 + # 必填變數缺漏 → scheduler container 會啟動失敗(fail-fast)。 scheduler: build: ./apps/task-scheduler @@ -47,10 +51,19 @@ services: volumes: - job-data:/data/jobs environment: + # === 應用基本 === - PORT=4000 + - NODE_ENV=${NODE_ENV:-development} + - LOG_LEVEL=${LOG_LEVEL:-info} + + # === Redis === - REDIS_URL=redis://redis:6379 + + # === Job 資料目錄 / CORS === - JOB_DATA_DIR=/data/jobs - - FRONTEND_URL=http://localhost:9500 + - FRONTEND_URL=${FRONTEND_URL:-http://localhost:9500} + + # === Storage backend === - STORAGE_BACKEND=${STORAGE_BACKEND:-local} - MINIO_ENDPOINT_URL=${MINIO_ENDPOINT_URL:-http://192.168.0.130:9000} - MINIO_BUCKET=${MINIO_BUCKET:-convertet-working-space} @@ -58,6 +71,46 @@ services: - MINIO_SECRET_KEY=${MINIO_SECRET_KEY} - MINIO_REGION=${MINIO_REGION:-us-east-1} - MINIO_LIFECYCLE_DAYS=${MINIO_LIFECYCLE_DAYS:-7} + + # === OAuth / Member Center(必填,缺漏 fail-fast)=== + - MEMBER_CENTER_ISSUER=${MEMBER_CENTER_ISSUER} + - MEMBER_CENTER_JWKS_URL=${MEMBER_CENTER_JWKS_URL} + - MEMBER_CENTER_TOKEN_URL=${MEMBER_CENTER_TOKEN_URL} + + # === Converter 身份(必填)=== + - KNERON_CONVERTER_AUDIENCE=${KNERON_CONVERTER_AUDIENCE} + - KNERON_CONVERTER_CLIENT_ID=${KNERON_CONVERTER_CLIENT_ID} + - KNERON_CONVERTER_CLIENT_SECRET=${KNERON_CONVERTER_CLIENT_SECRET} + - CONVERTER_TENANT_ID=${CONVERTER_TENANT_ID:-} + + # === File Access Agent(必填)=== + - FILE_ACCESS_AGENT_BASE_URL=${FILE_ACCESS_AGENT_BASE_URL} + - FILE_ACCESS_AGENT_AUDIENCE=${FILE_ACCESS_AGENT_AUDIENCE} + + # === Scope(可選,預設 TDD §8)=== + - CONVERTER_SCOPE_WRITE=${CONVERTER_SCOPE_WRITE:-converter:job.write} + - CONVERTER_SCOPE_READ=${CONVERTER_SCOPE_READ:-converter:job.read} + + # === JWKS / JWT cache 行為(可選)=== + - JWKS_CACHE_MAX_AGE_MS=${JWKS_CACHE_MAX_AGE_MS:-600000} + - JWKS_COOLDOWN_MS=${JWKS_COOLDOWN_MS:-30000} + - JWT_CLOCK_TOLERANCE_SEC=${JWT_CLOCK_TOLERANCE_SEC:-60} + + # === OAuth Client cache(可選)=== + - OAUTH_TOKEN_REFRESH_SKEW_MS=${OAUTH_TOKEN_REFRESH_SKEW_MS:-60000} + - OAUTH_TOKEN_TIMEOUT_MS=${OAUTH_TOKEN_TIMEOUT_MS:-10000} + + # === Promote 行為(可選)=== + - PROMOTE_TIMEOUT_MS=${PROMOTE_TIMEOUT_MS:-300000} + + # === Multipart 上限(T10 修 D5)=== + - MULTIPART_MODEL_MAX_BYTES=${MULTIPART_MODEL_MAX_BYTES:-524288000} + - MULTIPART_REF_IMAGE_MAX_BYTES=${MULTIPART_REF_IMAGE_MAX_BYTES:-10485760} + - MULTIPART_REF_IMAGES_MAX_COUNT=${MULTIPART_REF_IMAGES_MAX_COUNT:-100} + + # === Upload concurrency(T10 修 D5)=== + - MAX_CONCURRENT_UPLOADS=${MAX_CONCURRENT_UPLOADS:-5} + - UPLOAD_RETRY_AFTER_SECONDS=${UPLOAD_RETRY_AFTER_SECONDS:-30} restart: unless-stopped # ---------- Workers (stub mode) ----------