Refactor server.js (647 → 99 lines) into 30+ modules under src/: - auth/: JWKS validation, JWT middleware, OAuth client_credentials - routes/v1/: jobs (POST/GET/:id) + promote with input validation - routes/legacy.js: existing /jobs multipart path (backward compatible) - services/: jobService, healthService, sseService, statusMapper, doneListener - middleware/: requestId, errorHandler, perClientRateLimit, uploadConcurrency, upload (multer + storage) - redis/: Lua scripts for atomic claim/release_active_job - storage/: local + minio adapters; fileAccessAgent/: PUT promote client - config.js: env var validation with fail-fast Phase 1 features (T1–T11): - T1 Auth middleware + JWKS (Member Center OAuth2 resource server) - T2 OAuth client (Member Center client_credentials, Basic auth) - T3 /api/v1/* router skeleton - T4 server.js refactor (legacy endpoints fully preserved, real-Redis regression verified — existing worker consumer group untouched) - T5 POST /api/v1/jobs (multipart, OWASP-audited, 2 Critical / 6 Major fixed; Risk-A/B documented as accepted) - T6 GET /api/v1/jobs + GET /:id (cursor pagination, ETag, IDOR-safe) - T7 POST /jobs/:id/promote (FAA PUT with own service token, 300s timeout, fail-fast on missing FAA URL) - T8 /health upgrade (healthy/degraded/unhealthy + 30s background cache) - T9 stage_timings (release_active_job in terminal states) - T10 env + Docker integration (MULTIPART_* + concurrency limiter) - T11 README (498 lines) + OpenAPI 3.0 spec (1588 lines) Tests: 630 pass across 29 suites. Updated Dockerfile + .dockerignore + docker-compose.yml env passthrough (no hardcoded secrets, fail-fast on missing required vars). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
280 lines
10 KiB
JavaScript
280 lines
10 KiB
JavaScript
/**
|
||
* 集中讀取所有環境變數,啟動時 fail fast。
|
||
*
|
||
* 範圍:T1/T2 — 讀取 OAuth / JWKS / Converter 身份 / OAuth Client 相關欄位。
|
||
* 其他既有欄位(PORT, REDIS_URL, MINIO_*, JOB_DATA_DIR 等)暫時沿用 server.js
|
||
* 既有讀法,待 T4 重構時再合併進來。
|
||
*
|
||
* 設計原則:
|
||
* - 必填變數缺漏 → 立刻 throw,避免進到 runtime 才爆炸
|
||
* - 不在 log 印出任何 secret(這個檔不負責 log)
|
||
* - 對外 export 一個凍結 object,避免被改動
|
||
*
|
||
* 變更歷程:
|
||
* - T1:先把 token URL / client id / client secret 設 optional,因 T1 沒呼叫 token endpoint
|
||
* - T2(本任務):實作 OAuth client,依 TDD §9 將上述三項收緊為必填(修 D1/D2)
|
||
* - T10:新增 multipart 與 uploadConcurrency 段(修 D5)。所有 multipart limit 與
|
||
* per-process upload concurrency 上限由 env 控制,避免改原始碼才能調整。
|
||
*/
|
||
|
||
'use strict';
|
||
|
||
require('dotenv').config();
|
||
|
||
/**
|
||
* 讀取必填字串環境變數,缺漏即 throw。
|
||
*
|
||
* @param {string} name
|
||
* @returns {string}
|
||
*/
|
||
function requireEnv(name) {
|
||
const value = process.env[name];
|
||
if (typeof value !== 'string' || value.trim() === '') {
|
||
throw new Error(
|
||
`[config] Missing required environment variable: ${name}. ` +
|
||
`Set it in .env or your deployment environment before starting the service.`
|
||
);
|
||
}
|
||
return value.trim();
|
||
}
|
||
|
||
/**
|
||
* 讀取選填字串環境變數,可給預設值。
|
||
*
|
||
* @param {string} name
|
||
* @param {string} [defaultValue='']
|
||
* @returns {string}
|
||
*/
|
||
function optionalEnv(name, defaultValue = '') {
|
||
const value = process.env[name];
|
||
if (typeof value !== 'string' || value.trim() === '') {
|
||
return defaultValue;
|
||
}
|
||
return value.trim();
|
||
}
|
||
|
||
/**
|
||
* 讀取整數環境變數,可給預設值。解析失敗即 throw。
|
||
*
|
||
* @param {string} name
|
||
* @param {number} defaultValue
|
||
* @returns {number}
|
||
*/
|
||
function optionalIntEnv(name, defaultValue) {
|
||
const raw = process.env[name];
|
||
if (raw === undefined || raw === null || raw === '') {
|
||
return defaultValue;
|
||
}
|
||
const parsed = Number.parseInt(raw, 10);
|
||
if (Number.isNaN(parsed)) {
|
||
throw new Error(
|
||
`[config] Environment variable ${name} must be an integer, got: ${JSON.stringify(raw)}`
|
||
);
|
||
}
|
||
return parsed;
|
||
}
|
||
|
||
/**
|
||
* 載入並驗證 config,回傳凍結 object。
|
||
*
|
||
* 失敗時 throw — 呼叫端(server entry)應在 require 階段就拋出,
|
||
* 讓 process 直接 exit(fail fast)。
|
||
*
|
||
* @returns {Readonly<{
|
||
* memberCenter: { issuer: string, jwksUrl: string, tokenUrl: string },
|
||
* converter: {
|
||
* audience: string,
|
||
* clientId: string,
|
||
* clientSecret: string,
|
||
* tenantId: string,
|
||
* scopeWrite: string,
|
||
* scopeRead: string,
|
||
* },
|
||
* fileAccessAgent: { baseUrl: string, audience: string, promoteTimeoutMs: number },
|
||
* jwks: { cacheMaxAgeMs: number, cooldownMs: number, clockToleranceSec: number },
|
||
* oauthClient: { refreshSkewMs: number, timeoutMs: number },
|
||
* multipart: { modelMaxBytes: number, refImageMaxBytes: number, refImagesMaxCount: number },
|
||
* uploadConcurrency: { maxConcurrent: number, retryAfterSeconds: number },
|
||
* }>}
|
||
*/
|
||
function loadConfig() {
|
||
// === Member Center(OAuth Authorization Server) ===
|
||
const mcIssuer = requireEnv('MEMBER_CENTER_ISSUER');
|
||
const mcJwksUrl = requireEnv('MEMBER_CENTER_JWKS_URL');
|
||
// T2:對齊 TDD §9 改為必填。OAuth Client 取 token 必用此 endpoint。
|
||
const mcTokenUrl = requireEnv('MEMBER_CENTER_TOKEN_URL');
|
||
|
||
// === Converter as Resource Server(接收他人 token) ===
|
||
const audience = requireEnv('KNERON_CONVERTER_AUDIENCE');
|
||
|
||
// === Converter as OAuth Client(呼叫 File Access Agent,僅 promote 用) ===
|
||
// T2:對齊 TDD §9 將 client_id / client_secret 收緊為必填。兩者必須成對出現。
|
||
const clientId = requireEnv('KNERON_CONVERTER_CLIENT_ID');
|
||
const clientSecret = requireEnv('KNERON_CONVERTER_CLIENT_SECRET');
|
||
|
||
// === Tenant 隔離(可選) ===
|
||
const tenantId = optionalEnv('CONVERTER_TENANT_ID', '');
|
||
|
||
// === Scope 命名(可覆寫,預設值對齊 TDD §8) ===
|
||
const scopeWrite = optionalEnv('CONVERTER_SCOPE_WRITE', 'converter:job.write');
|
||
const scopeRead = optionalEnv('CONVERTER_SCOPE_READ', 'converter:job.read');
|
||
|
||
// === File Access Agent(T7 起為必填)===
|
||
// T7:promote 流程已上線,FAA URL / audience 必須在啟動時驗證;少了就 fail-fast。
|
||
// - URL 必須是合法 http(s) URL;NODE_ENV=production 強制 https(傳輸保護)
|
||
// - dev 用 placeholder(如 https://REPLACE-ME.invalid)也是合法 URL,不影響本地啟動
|
||
const faaBaseUrl = requireEnv('FILE_ACCESS_AGENT_BASE_URL');
|
||
const faaAudience = requireEnv('FILE_ACCESS_AGENT_AUDIENCE');
|
||
let faaParsedUrl;
|
||
try {
|
||
faaParsedUrl = new URL(faaBaseUrl);
|
||
} catch (_err) {
|
||
throw new Error(
|
||
`[config] FILE_ACCESS_AGENT_BASE_URL must be a valid URL, got: ${JSON.stringify(faaBaseUrl)}`
|
||
);
|
||
}
|
||
if (faaParsedUrl.protocol !== 'http:' && faaParsedUrl.protocol !== 'https:') {
|
||
throw new Error(
|
||
`[config] FILE_ACCESS_AGENT_BASE_URL must use http(s) scheme, got protocol: ${faaParsedUrl.protocol}`
|
||
);
|
||
}
|
||
if (process.env.NODE_ENV === 'production' && faaParsedUrl.protocol !== 'https:') {
|
||
throw new Error(
|
||
'[config] FILE_ACCESS_AGENT_BASE_URL must use HTTPS in production (NODE_ENV=production)'
|
||
);
|
||
}
|
||
|
||
// === Promote 行為(T7 用) ===
|
||
// 單檔 PUT timeout,預設 300s(500MB @ 5MB/s 下界),對齊 TDD §6.4。
|
||
const promoteTimeoutMs = optionalIntEnv('PROMOTE_TIMEOUT_MS', 300 * 1000);
|
||
|
||
// === JWKS cache 行為 ===
|
||
const jwksCacheMaxAgeMs = optionalIntEnv('JWKS_CACHE_MAX_AGE_MS', 10 * 60 * 1000); // 10 分鐘
|
||
const jwksCooldownMs = optionalIntEnv('JWKS_COOLDOWN_MS', 30 * 1000); // 30 秒
|
||
const jwtClockToleranceSec = optionalIntEnv('JWT_CLOCK_TOLERANCE_SEC', 60); // 60 秒
|
||
|
||
// === OAuth Client(取 token 用,T2)===
|
||
// refresh skew:cache 內 token 距離 expiresAt 還有多少 ms 時就主動 refresh。
|
||
// 預設 60s,避免 race condition(取 token 時剛好過期)。
|
||
const oauthRefreshSkewMs = optionalIntEnv('OAUTH_TOKEN_REFRESH_SKEW_MS', 60 * 1000);
|
||
// 取 token 的 timeout(含網路 RTT + Member Center 處理時間)。
|
||
// 預設 10s,避免 promote 流程因 token endpoint 慢回應而 hang。
|
||
const oauthTimeoutMs = optionalIntEnv('OAUTH_TOKEN_TIMEOUT_MS', 10 * 1000);
|
||
|
||
// === Multipart 上傳上限(T10 修 D5)===
|
||
// 為什麼用 env:不同部署環境記憶體配額差異大(dev 容器 2GB / 8 vCPU prod
|
||
// 可能 16GB),固定的 500MB 不夠彈性。dev / staging 可調降避免 OOM。
|
||
//
|
||
// - MULTIPART_MODEL_MAX_BYTES:multer 的 per-file fileSize 上限(也作用在 model
|
||
// 檔案大小檢查)。預設 500MB(對齊 TDD §1.4.2 與 PRD F-01 上限)。
|
||
// - MULTIPART_REF_IMAGE_MAX_BYTES:單張 ref_image 上限(validator 邏輯,multer
|
||
// 的 fileSize 是「per-file」整體上限,無法只限 ref_images)。預設 10MB。
|
||
// - MULTIPART_REF_IMAGES_MAX_COUNT:ref_images 張數上限(multer fields maxCount
|
||
// 參數)。預設 100。
|
||
//
|
||
// 安全:所有值都做下限檢查(必須 > 0),避免 0 / 負數造成 multer reject 全部請求。
|
||
const modelMaxBytes = optionalIntEnv(
|
||
'MULTIPART_MODEL_MAX_BYTES',
|
||
500 * 1024 * 1024
|
||
);
|
||
if (modelMaxBytes <= 0) {
|
||
throw new Error(
|
||
`[config] MULTIPART_MODEL_MAX_BYTES must be > 0, got: ${modelMaxBytes}`
|
||
);
|
||
}
|
||
const refImageMaxBytes = optionalIntEnv(
|
||
'MULTIPART_REF_IMAGE_MAX_BYTES',
|
||
10 * 1024 * 1024
|
||
);
|
||
if (refImageMaxBytes <= 0) {
|
||
throw new Error(
|
||
`[config] MULTIPART_REF_IMAGE_MAX_BYTES must be > 0, got: ${refImageMaxBytes}`
|
||
);
|
||
}
|
||
const refImagesMaxCount = optionalIntEnv(
|
||
'MULTIPART_REF_IMAGES_MAX_COUNT',
|
||
100
|
||
);
|
||
if (refImagesMaxCount <= 0) {
|
||
throw new Error(
|
||
`[config] MULTIPART_REF_IMAGES_MAX_COUNT must be > 0, got: ${refImagesMaxCount}`
|
||
);
|
||
}
|
||
|
||
// === Upload concurrency(T10 修 D5 second part)===
|
||
// 為什麼需要 per-process semaphore:
|
||
// multer 用 memoryStorage,每個並發 upload 都會吃 model size 的記憶體;
|
||
// 若 5 個並發 × 500MB = 2.5GB heap,容器若只有 4GB 立刻 OOM kill。
|
||
// per-process counter 限制同時間進行中的 upload 數量。
|
||
//
|
||
// - MAX_CONCURRENT_UPLOADS:同時間最多進行幾個 upload。預設 5(保守值,覆蓋
|
||
// 2.5GB / 5 並發 = 500MB peak heap,容器 ≥ 4GB 安全)。
|
||
// - UPLOAD_RETRY_AFTER_SECONDS:超過時 503 response 帶的 Retry-After 秒數。
|
||
// 預設 30s(給 client 一個合理的 backoff 起點)。
|
||
//
|
||
// 為什麼選 503 + Retry-After 而非 queue:
|
||
// queue 會 hold connection 不確定多久(可能秒級也可能分鐘級),對 client 來說
|
||
// timeout 行為不可預期。直接 503 + Retry-After 讓 client 主動 retry,符合 12-Factor
|
||
// stateless 原則,也更友善。
|
||
const maxConcurrentUploads = optionalIntEnv('MAX_CONCURRENT_UPLOADS', 5);
|
||
if (maxConcurrentUploads <= 0) {
|
||
throw new Error(
|
||
`[config] MAX_CONCURRENT_UPLOADS must be > 0, got: ${maxConcurrentUploads}`
|
||
);
|
||
}
|
||
const uploadRetryAfterSeconds = optionalIntEnv(
|
||
'UPLOAD_RETRY_AFTER_SECONDS',
|
||
30
|
||
);
|
||
if (uploadRetryAfterSeconds <= 0) {
|
||
throw new Error(
|
||
`[config] UPLOAD_RETRY_AFTER_SECONDS must be > 0, got: ${uploadRetryAfterSeconds}`
|
||
);
|
||
}
|
||
|
||
return Object.freeze({
|
||
memberCenter: Object.freeze({
|
||
issuer: mcIssuer,
|
||
jwksUrl: mcJwksUrl,
|
||
tokenUrl: mcTokenUrl,
|
||
}),
|
||
converter: Object.freeze({
|
||
audience,
|
||
clientId,
|
||
clientSecret,
|
||
tenantId,
|
||
scopeWrite,
|
||
scopeRead,
|
||
}),
|
||
fileAccessAgent: Object.freeze({
|
||
baseUrl: faaBaseUrl,
|
||
audience: faaAudience,
|
||
promoteTimeoutMs,
|
||
}),
|
||
jwks: Object.freeze({
|
||
cacheMaxAgeMs: jwksCacheMaxAgeMs,
|
||
cooldownMs: jwksCooldownMs,
|
||
clockToleranceSec: jwtClockToleranceSec,
|
||
}),
|
||
oauthClient: Object.freeze({
|
||
refreshSkewMs: oauthRefreshSkewMs,
|
||
timeoutMs: oauthTimeoutMs,
|
||
}),
|
||
multipart: Object.freeze({
|
||
modelMaxBytes,
|
||
refImageMaxBytes,
|
||
refImagesMaxCount,
|
||
}),
|
||
uploadConcurrency: Object.freeze({
|
||
maxConcurrent: maxConcurrentUploads,
|
||
retryAfterSeconds: uploadRetryAfterSeconds,
|
||
}),
|
||
});
|
||
}
|
||
|
||
module.exports = {
|
||
loadConfig,
|
||
// 暴露 helpers 供其他 module 重用 / 測試
|
||
_internals: { requireEnv, optionalEnv, optionalIntEnv },
|
||
};
|