jim800121chen 4d381c0b50 feat(task-scheduler): Phase 1 — modularize server + add OAuth/JWKS + /api/v1/* routes
Refactor server.js (647 → 99 lines) into 30+ modules under src/:
- auth/: JWKS validation, JWT middleware, OAuth client_credentials
- routes/v1/: jobs (POST/GET/:id) + promote with input validation
- routes/legacy.js: existing /jobs multipart path (backward compatible)
- services/: jobService, healthService, sseService, statusMapper,
  doneListener
- middleware/: requestId, errorHandler, perClientRateLimit,
  uploadConcurrency, upload (multer + storage)
- redis/: Lua scripts for atomic claim/release_active_job
- storage/: local + minio adapters; fileAccessAgent/: PUT promote client
- config.js: env var validation with fail-fast

Phase 1 features (T1–T11):
- T1 Auth middleware + JWKS (Member Center OAuth2 resource server)
- T2 OAuth client (Member Center client_credentials, Basic auth)
- T3 /api/v1/* router skeleton
- T4 server.js refactor (legacy endpoints fully preserved, real-Redis
  regression verified — existing worker consumer group untouched)
- T5 POST /api/v1/jobs (multipart, OWASP-audited, 2 Critical / 6 Major
  fixed; Risk-A/B documented as accepted)
- T6 GET /api/v1/jobs + GET /:id (cursor pagination, ETag, IDOR-safe)
- T7 POST /jobs/:id/promote (FAA PUT with own service token, 300s
  timeout, fail-fast on missing FAA URL)
- T8 /health upgrade (healthy/degraded/unhealthy + 30s background cache)
- T9 stage_timings (release_active_job in terminal states)
- T10 env + Docker integration (MULTIPART_* + concurrency limiter)
- T11 README (498 lines) + OpenAPI 3.0 spec (1588 lines)

Tests: 630 pass across 29 suites. Updated Dockerfile + .dockerignore +
docker-compose.yml env passthrough (no hardcoded secrets, fail-fast on
missing required vars).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 10:55:05 +08:00

280 lines
10 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* 集中讀取所有環境變數,啟動時 fail fast。
*
* 範圍T1/T2 — 讀取 OAuth / JWKS / Converter 身份 / OAuth Client 相關欄位。
* 其他既有欄位PORT, REDIS_URL, MINIO_*, JOB_DATA_DIR 等)暫時沿用 server.js
* 既有讀法,待 T4 重構時再合併進來。
*
* 設計原則:
* - 必填變數缺漏 → 立刻 throw避免進到 runtime 才爆炸
* - 不在 log 印出任何 secret這個檔不負責 log
* - 對外 export 一個凍結 object避免被改動
*
* 變更歷程:
* - T1先把 token URL / client id / client secret 設 optional因 T1 沒呼叫 token endpoint
* - T2本任務實作 OAuth client依 TDD §9 將上述三項收緊為必填(修 D1/D2
* - T10新增 multipart 與 uploadConcurrency 段(修 D5。所有 multipart limit 與
* per-process upload concurrency 上限由 env 控制,避免改原始碼才能調整。
*/
'use strict';
require('dotenv').config();
/**
* 讀取必填字串環境變數,缺漏即 throw。
*
* @param {string} name
* @returns {string}
*/
function requireEnv(name) {
const value = process.env[name];
if (typeof value !== 'string' || value.trim() === '') {
throw new Error(
`[config] Missing required environment variable: ${name}. ` +
`Set it in .env or your deployment environment before starting the service.`
);
}
return value.trim();
}
/**
* 讀取選填字串環境變數,可給預設值。
*
* @param {string} name
* @param {string} [defaultValue='']
* @returns {string}
*/
function optionalEnv(name, defaultValue = '') {
const value = process.env[name];
if (typeof value !== 'string' || value.trim() === '') {
return defaultValue;
}
return value.trim();
}
/**
* 讀取整數環境變數,可給預設值。解析失敗即 throw。
*
* @param {string} name
* @param {number} defaultValue
* @returns {number}
*/
function optionalIntEnv(name, defaultValue) {
const raw = process.env[name];
if (raw === undefined || raw === null || raw === '') {
return defaultValue;
}
const parsed = Number.parseInt(raw, 10);
if (Number.isNaN(parsed)) {
throw new Error(
`[config] Environment variable ${name} must be an integer, got: ${JSON.stringify(raw)}`
);
}
return parsed;
}
/**
* 載入並驗證 config回傳凍結 object。
*
* 失敗時 throw — 呼叫端server entry應在 require 階段就拋出,
* 讓 process 直接 exitfail fast
*
* @returns {Readonly<{
* memberCenter: { issuer: string, jwksUrl: string, tokenUrl: string },
* converter: {
* audience: string,
* clientId: string,
* clientSecret: string,
* tenantId: string,
* scopeWrite: string,
* scopeRead: string,
* },
* fileAccessAgent: { baseUrl: string, audience: string, promoteTimeoutMs: number },
* jwks: { cacheMaxAgeMs: number, cooldownMs: number, clockToleranceSec: number },
* oauthClient: { refreshSkewMs: number, timeoutMs: number },
* multipart: { modelMaxBytes: number, refImageMaxBytes: number, refImagesMaxCount: number },
* uploadConcurrency: { maxConcurrent: number, retryAfterSeconds: number },
* }>}
*/
function loadConfig() {
// === Member CenterOAuth Authorization Server ===
const mcIssuer = requireEnv('MEMBER_CENTER_ISSUER');
const mcJwksUrl = requireEnv('MEMBER_CENTER_JWKS_URL');
// T2對齊 TDD §9 改為必填。OAuth Client 取 token 必用此 endpoint。
const mcTokenUrl = requireEnv('MEMBER_CENTER_TOKEN_URL');
// === Converter as Resource Server接收他人 token ===
const audience = requireEnv('KNERON_CONVERTER_AUDIENCE');
// === Converter as OAuth Client呼叫 File Access Agent僅 promote 用) ===
// T2對齊 TDD §9 將 client_id / client_secret 收緊為必填。兩者必須成對出現。
const clientId = requireEnv('KNERON_CONVERTER_CLIENT_ID');
const clientSecret = requireEnv('KNERON_CONVERTER_CLIENT_SECRET');
// === Tenant 隔離(可選) ===
const tenantId = optionalEnv('CONVERTER_TENANT_ID', '');
// === Scope 命名(可覆寫,預設值對齊 TDD §8 ===
const scopeWrite = optionalEnv('CONVERTER_SCOPE_WRITE', 'converter:job.write');
const scopeRead = optionalEnv('CONVERTER_SCOPE_READ', 'converter:job.read');
// === File Access AgentT7 起為必填)===
// T7promote 流程已上線FAA URL / audience 必須在啟動時驗證;少了就 fail-fast。
// - URL 必須是合法 http(s) URLNODE_ENV=production 強制 https傳輸保護
// - dev 用 placeholder如 https://REPLACE-ME.invalid也是合法 URL不影響本地啟動
const faaBaseUrl = requireEnv('FILE_ACCESS_AGENT_BASE_URL');
const faaAudience = requireEnv('FILE_ACCESS_AGENT_AUDIENCE');
let faaParsedUrl;
try {
faaParsedUrl = new URL(faaBaseUrl);
} catch (_err) {
throw new Error(
`[config] FILE_ACCESS_AGENT_BASE_URL must be a valid URL, got: ${JSON.stringify(faaBaseUrl)}`
);
}
if (faaParsedUrl.protocol !== 'http:' && faaParsedUrl.protocol !== 'https:') {
throw new Error(
`[config] FILE_ACCESS_AGENT_BASE_URL must use http(s) scheme, got protocol: ${faaParsedUrl.protocol}`
);
}
if (process.env.NODE_ENV === 'production' && faaParsedUrl.protocol !== 'https:') {
throw new Error(
'[config] FILE_ACCESS_AGENT_BASE_URL must use HTTPS in production (NODE_ENV=production)'
);
}
// === Promote 行為T7 用) ===
// 單檔 PUT timeout預設 300s500MB @ 5MB/s 下界),對齊 TDD §6.4。
const promoteTimeoutMs = optionalIntEnv('PROMOTE_TIMEOUT_MS', 300 * 1000);
// === JWKS cache 行為 ===
const jwksCacheMaxAgeMs = optionalIntEnv('JWKS_CACHE_MAX_AGE_MS', 10 * 60 * 1000); // 10 分鐘
const jwksCooldownMs = optionalIntEnv('JWKS_COOLDOWN_MS', 30 * 1000); // 30 秒
const jwtClockToleranceSec = optionalIntEnv('JWT_CLOCK_TOLERANCE_SEC', 60); // 60 秒
// === OAuth Client取 token 用T2===
// refresh skewcache 內 token 距離 expiresAt 還有多少 ms 時就主動 refresh。
// 預設 60s避免 race condition取 token 時剛好過期)。
const oauthRefreshSkewMs = optionalIntEnv('OAUTH_TOKEN_REFRESH_SKEW_MS', 60 * 1000);
// 取 token 的 timeout含網路 RTT + Member Center 處理時間)。
// 預設 10s避免 promote 流程因 token endpoint 慢回應而 hang。
const oauthTimeoutMs = optionalIntEnv('OAUTH_TOKEN_TIMEOUT_MS', 10 * 1000);
// === Multipart 上傳上限T10 修 D5===
// 為什麼用 env不同部署環境記憶體配額差異大dev 容器 2GB / 8 vCPU prod
// 可能 16GB固定的 500MB 不夠彈性。dev / staging 可調降避免 OOM。
//
// - MULTIPART_MODEL_MAX_BYTESmulter 的 per-file fileSize 上限(也作用在 model
// 檔案大小檢查)。預設 500MB對齊 TDD §1.4.2 與 PRD F-01 上限)。
// - MULTIPART_REF_IMAGE_MAX_BYTES單張 ref_image 上限validator 邏輯multer
// 的 fileSize 是「per-file」整體上限無法只限 ref_images。預設 10MB。
// - MULTIPART_REF_IMAGES_MAX_COUNTref_images 張數上限multer fields maxCount
// 參數)。預設 100。
//
// 安全:所有值都做下限檢查(必須 > 0避免 0 / 負數造成 multer reject 全部請求。
const modelMaxBytes = optionalIntEnv(
'MULTIPART_MODEL_MAX_BYTES',
500 * 1024 * 1024
);
if (modelMaxBytes <= 0) {
throw new Error(
`[config] MULTIPART_MODEL_MAX_BYTES must be > 0, got: ${modelMaxBytes}`
);
}
const refImageMaxBytes = optionalIntEnv(
'MULTIPART_REF_IMAGE_MAX_BYTES',
10 * 1024 * 1024
);
if (refImageMaxBytes <= 0) {
throw new Error(
`[config] MULTIPART_REF_IMAGE_MAX_BYTES must be > 0, got: ${refImageMaxBytes}`
);
}
const refImagesMaxCount = optionalIntEnv(
'MULTIPART_REF_IMAGES_MAX_COUNT',
100
);
if (refImagesMaxCount <= 0) {
throw new Error(
`[config] MULTIPART_REF_IMAGES_MAX_COUNT must be > 0, got: ${refImagesMaxCount}`
);
}
// === Upload concurrencyT10 修 D5 second part===
// 為什麼需要 per-process semaphore
// multer 用 memoryStorage每個並發 upload 都會吃 model size 的記憶體;
// 若 5 個並發 × 500MB = 2.5GB heap容器若只有 4GB 立刻 OOM kill。
// per-process counter 限制同時間進行中的 upload 數量。
//
// - MAX_CONCURRENT_UPLOADS同時間最多進行幾個 upload。預設 5保守值覆蓋
// 2.5GB / 5 並發 = 500MB peak heap容器 ≥ 4GB 安全)。
// - UPLOAD_RETRY_AFTER_SECONDS超過時 503 response 帶的 Retry-After 秒數。
// 預設 30s給 client 一個合理的 backoff 起點)。
//
// 為什麼選 503 + Retry-After 而非 queue
// queue 會 hold connection 不確定多久(可能秒級也可能分鐘級),對 client 來說
// timeout 行為不可預期。直接 503 + Retry-After 讓 client 主動 retry符合 12-Factor
// stateless 原則,也更友善。
const maxConcurrentUploads = optionalIntEnv('MAX_CONCURRENT_UPLOADS', 5);
if (maxConcurrentUploads <= 0) {
throw new Error(
`[config] MAX_CONCURRENT_UPLOADS must be > 0, got: ${maxConcurrentUploads}`
);
}
const uploadRetryAfterSeconds = optionalIntEnv(
'UPLOAD_RETRY_AFTER_SECONDS',
30
);
if (uploadRetryAfterSeconds <= 0) {
throw new Error(
`[config] UPLOAD_RETRY_AFTER_SECONDS must be > 0, got: ${uploadRetryAfterSeconds}`
);
}
return Object.freeze({
memberCenter: Object.freeze({
issuer: mcIssuer,
jwksUrl: mcJwksUrl,
tokenUrl: mcTokenUrl,
}),
converter: Object.freeze({
audience,
clientId,
clientSecret,
tenantId,
scopeWrite,
scopeRead,
}),
fileAccessAgent: Object.freeze({
baseUrl: faaBaseUrl,
audience: faaAudience,
promoteTimeoutMs,
}),
jwks: Object.freeze({
cacheMaxAgeMs: jwksCacheMaxAgeMs,
cooldownMs: jwksCooldownMs,
clockToleranceSec: jwtClockToleranceSec,
}),
oauthClient: Object.freeze({
refreshSkewMs: oauthRefreshSkewMs,
timeoutMs: oauthTimeoutMs,
}),
multipart: Object.freeze({
modelMaxBytes,
refImageMaxBytes,
refImagesMaxCount,
}),
uploadConcurrency: Object.freeze({
maxConcurrent: maxConcurrentUploads,
retryAfterSeconds: uploadRetryAfterSeconds,
}),
});
}
module.exports = {
loadConfig,
// 暴露 helpers 供其他 module 重用 / 測試
_internals: { requireEnv, optionalEnv, optionalIntEnv },
};