Add web frontend, MinIO storage, monitoring, and docker-compose deployment
- Frontend: rewrite Home.vue to match backend POST /jobs API (remove single-stage options) - Frontend: add Monitor page (/monitor) for queue and job monitoring - Frontend: add job history with localStorage tracking (per-browser) - Frontend: fix Nginx proxy rewrite (/api -> /) and add 500MB upload limit - Backend: add MinIO storage support (STORAGE_BACKEND=minio) alongside local mode - Backend: add GET /queues/stats API for queue monitoring - Backend: fix download handler for MinIO (buffer mode for Node 18 compat) - Workers: add S3/MinIO download/upload in consumer.py with isolated temp dirs - Workers: add s3_storage.py helper with lifecycle rule (7-day TTL) - Docker: add docker-compose.yml with all services (web, scheduler, redis, workers) - Docker: ports mapped to 9500 (web) and 9501 (scheduler) - Config: add .env to .gitignore to protect secrets Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
fdebf4db5d
commit
efa67d59a4
9
.gitignore
vendored
9
.gitignore
vendored
@ -15,6 +15,9 @@ coverage.xml
|
||||
venv/
|
||||
env/
|
||||
|
||||
# Environment (contains secrets)
|
||||
.env
|
||||
|
||||
# OS / Editor
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
@ -72,3 +75,9 @@ toolchain/prebuild/**/logs/
|
||||
|
||||
# Test outputs
|
||||
tests/fixtures/outputs/
|
||||
|
||||
CLAUDE.md.backup
|
||||
|
||||
# Autoflow Agent(由 autoflow-agent init 自動產生)
|
||||
.claude/
|
||||
.autoflow/CLAUDE.md.backup.*
|
||||
|
||||
@ -2,29 +2,22 @@ FROM node:18-alpine
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 複製package文件
|
||||
COPY package*.json ./
|
||||
RUN apk add --no-cache curl
|
||||
|
||||
# 安裝依賴
|
||||
COPY package*.json ./
|
||||
RUN npm ci --only=production
|
||||
|
||||
# 複製應用代碼
|
||||
COPY . .
|
||||
|
||||
# 創建非root用戶
|
||||
RUN addgroup -g 1001 -S nodejs
|
||||
RUN adduser -S nextjs -u 1001
|
||||
RUN addgroup -g 1001 -S appgroup && \
|
||||
adduser -S appuser -u 1001 -G appgroup
|
||||
|
||||
# 更改文件所有權
|
||||
RUN chown -R nextjs:nodejs /app
|
||||
USER nextjs
|
||||
RUN mkdir -p /data/jobs && chown -R appuser:appgroup /app /data/jobs
|
||||
USER appuser
|
||||
|
||||
# 暴露端口
|
||||
EXPOSE 4000
|
||||
|
||||
# 健康檢查
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:4000/health || exit 1
|
||||
|
||||
# 啟動命令
|
||||
CMD ["npm", "start"]
|
||||
|
||||
@ -2,15 +2,22 @@
|
||||
PORT=4000
|
||||
NODE_ENV=development
|
||||
|
||||
# Frontend URL
|
||||
# Redis
|
||||
REDIS_URL=redis://localhost:6379
|
||||
|
||||
# Job data directory (shared volume with workers)
|
||||
JOB_DATA_DIR=/data/jobs
|
||||
|
||||
# Frontend URL (for CORS)
|
||||
FRONTEND_URL=http://localhost:3000
|
||||
|
||||
# API Services Configuration
|
||||
ONNX_SERVICE_URL=http://localhost:5001
|
||||
BIE_SERVICE_URL=http://localhost:5002
|
||||
NEF_SERVICE_URL=http://localhost:5003
|
||||
# Storage backend: "local" (shared volume) or "minio"
|
||||
STORAGE_BACKEND=local
|
||||
|
||||
# API Keys
|
||||
ONNX_API_KEY=onnx-secret-key
|
||||
BIE_API_KEY=bie-secret-key
|
||||
NEF_API_KEY=nef-secret-key
|
||||
# MinIO settings (only used when STORAGE_BACKEND=minio)
|
||||
MINIO_ENDPOINT_URL=http://192.168.0.130:9000
|
||||
MINIO_BUCKET=convertet-working-space
|
||||
MINIO_ACCESS_KEY=convuser
|
||||
MINIO_SECRET_KEY=your-secret-here
|
||||
MINIO_REGION=us-east-1
|
||||
MINIO_LIFECYCLE_DAYS=7
|
||||
|
||||
6866
apps/task-scheduler/package-lock.json
generated
Normal file
6866
apps/task-scheduler/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "kneron-webgui-task-scheduler",
|
||||
"version": "1.0.0",
|
||||
"description": "Kneron Toolchain Web GUI Task Scheduler",
|
||||
"name": "kneron-task-scheduler",
|
||||
"version": "2.0.0",
|
||||
"description": "Kneron Toolchain Task Scheduler - Job management and queue orchestration",
|
||||
"main": "server.js",
|
||||
"scripts": {
|
||||
"start": "node server.js",
|
||||
@ -12,12 +12,14 @@
|
||||
"express": "^4.18.2",
|
||||
"cors": "^2.8.5",
|
||||
"multer": "^1.4.5-lts.1",
|
||||
"axios": "^1.5.0",
|
||||
"ioredis": "^5.3.2",
|
||||
"uuid": "^9.0.0",
|
||||
"dotenv": "^16.3.1",
|
||||
"helmet": "^7.0.0",
|
||||
"express-rate-limit": "^6.10.0",
|
||||
"morgan": "^1.10.0",
|
||||
"compression": "^1.7.4"
|
||||
"compression": "^1.7.4",
|
||||
"@aws-sdk/client-s3": "^3.400.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"nodemon": "^3.0.1",
|
||||
@ -26,8 +28,9 @@
|
||||
"keywords": [
|
||||
"kneron",
|
||||
"toolchain",
|
||||
"api",
|
||||
"proxy"
|
||||
"scheduler",
|
||||
"queue",
|
||||
"redis-stream"
|
||||
],
|
||||
"author": "Kneron Team",
|
||||
"license": "MIT"
|
||||
|
||||
@ -1,379 +1,647 @@
|
||||
/**
|
||||
* Kneron Toolchain Web GUI Task Scheduler
|
||||
* Express.js服務器,代理所有API調用
|
||||
* Kneron Toolchain Task Scheduler
|
||||
*
|
||||
* 職責:
|
||||
* 1. REST API — 建立 job、查詢狀態、上傳檔案、下載結果
|
||||
* 2. Job State — 透過 Redis Hash 管理 job 生命週期
|
||||
* 3. Queue 調度 — 透過 Redis Stream 派工給 Worker
|
||||
* 4. Done 監聽 — 接收 Worker 完成事件,推進到下一階段
|
||||
* 5. SSE — 即時推送 job 狀態給前端
|
||||
*/
|
||||
|
||||
const express = require('express');
|
||||
const cors = require('cors');
|
||||
const multer = require('multer');
|
||||
const axios = require('axios');
|
||||
const helmet = require('helmet');
|
||||
const rateLimit = require('express-rate-limit');
|
||||
const morgan = require('morgan');
|
||||
const compression = require('compression');
|
||||
const { v4: uuidv4 } = require('uuid');
|
||||
const Redis = require('ioredis');
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
const { S3Client, PutObjectCommand, GetObjectCommand } = require('@aws-sdk/client-s3');
|
||||
require('dotenv').config();
|
||||
|
||||
const app = express();
|
||||
// ---------------------------------------------------------------------------
|
||||
// Config
|
||||
// ---------------------------------------------------------------------------
|
||||
const PORT = process.env.PORT || 4000;
|
||||
const REDIS_URL = process.env.REDIS_URL || 'redis://localhost:6379';
|
||||
const JOB_DATA_DIR = process.env.JOB_DATA_DIR || '/data/jobs';
|
||||
const FRONTEND_URL = process.env.FRONTEND_URL || 'http://localhost:3000';
|
||||
|
||||
// 配置API服務
|
||||
const API_SERVICES = {
|
||||
onnx: {
|
||||
url: process.env.ONNX_SERVICE_URL || 'http://localhost:5001',
|
||||
apiKey: process.env.ONNX_API_KEY || 'onnx-secret-key'
|
||||
},
|
||||
bie: {
|
||||
url: process.env.BIE_SERVICE_URL || 'http://localhost:5002',
|
||||
apiKey: process.env.BIE_API_KEY || 'bie-secret-key'
|
||||
},
|
||||
nef: {
|
||||
url: process.env.NEF_SERVICE_URL || 'http://localhost:5003',
|
||||
apiKey: process.env.NEF_API_KEY || 'nef-secret-key'
|
||||
// MinIO config
|
||||
const STORAGE_BACKEND = process.env.STORAGE_BACKEND || 'local';
|
||||
const MINIO_ENDPOINT = process.env.MINIO_ENDPOINT_URL || 'http://192.168.0.130:9000';
|
||||
const MINIO_BUCKET = process.env.MINIO_BUCKET || 'convertet-working-space';
|
||||
const MINIO_ACCESS_KEY = process.env.MINIO_ACCESS_KEY || 'convuser';
|
||||
const MINIO_SECRET_KEY = process.env.MINIO_SECRET_KEY || '';
|
||||
const MINIO_REGION = process.env.MINIO_REGION || 'us-east-1';
|
||||
|
||||
let minio = null;
|
||||
if (STORAGE_BACKEND === 'minio') {
|
||||
minio = new S3Client({
|
||||
endpoint: MINIO_ENDPOINT,
|
||||
region: MINIO_REGION,
|
||||
credentials: {
|
||||
accessKeyId: MINIO_ACCESS_KEY,
|
||||
secretAccessKey: MINIO_SECRET_KEY,
|
||||
},
|
||||
forcePathStyle: true, // Required for MinIO
|
||||
});
|
||||
console.log(`[Scheduler] MinIO storage enabled: ${MINIO_ENDPOINT}/${MINIO_BUCKET}`);
|
||||
}
|
||||
|
||||
async function uploadToMinIO(key, body, contentType) {
|
||||
if (!minio) return;
|
||||
await minio.send(new PutObjectCommand({
|
||||
Bucket: MINIO_BUCKET,
|
||||
Key: key,
|
||||
Body: body,
|
||||
ContentType: contentType,
|
||||
}));
|
||||
}
|
||||
|
||||
async function getFromMinIO(key) {
|
||||
if (!minio) return null;
|
||||
const response = await minio.send(new GetObjectCommand({
|
||||
Bucket: MINIO_BUCKET,
|
||||
Key: key,
|
||||
}));
|
||||
// Convert Body to Buffer (AWS SDK v3 Body is a web stream in Node 18)
|
||||
const chunks = [];
|
||||
for await (const chunk of response.Body) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
};
|
||||
return {
|
||||
body: Buffer.concat(chunks),
|
||||
contentLength: response.ContentLength,
|
||||
};
|
||||
}
|
||||
|
||||
// Pipeline: fixed stage order
|
||||
const STAGES = ['onnx', 'bie', 'nef'];
|
||||
const STAGE_QUEUES = {
|
||||
onnx: 'queue:onnx',
|
||||
bie: 'queue:bie',
|
||||
nef: 'queue:nef',
|
||||
};
|
||||
const DONE_QUEUE = 'queue:done';
|
||||
const DONE_GROUP = 'scheduler';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Redis clients (one for commands, one for blocking reads)
|
||||
// ---------------------------------------------------------------------------
|
||||
const redis = new Redis(REDIS_URL);
|
||||
const redisSub = new Redis(REDIS_URL);
|
||||
|
||||
redis.on('error', (err) => console.error('Redis error:', err));
|
||||
redisSub.on('error', (err) => console.error('Redis subscriber error:', err));
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Express setup
|
||||
// ---------------------------------------------------------------------------
|
||||
const app = express();
|
||||
|
||||
// 中間件配置
|
||||
app.use(helmet());
|
||||
app.use(compression());
|
||||
app.use(morgan('combined'));
|
||||
app.use(morgan('short'));
|
||||
app.use(cors({ origin: FRONTEND_URL, credentials: true }));
|
||||
|
||||
// CORS配置 - 只允許前端訪問
|
||||
app.use(cors({
|
||||
origin: process.env.FRONTEND_URL || 'http://localhost:3000',
|
||||
credentials: true
|
||||
}));
|
||||
|
||||
// 請求限流
|
||||
const limiter = rateLimit({
|
||||
windowMs: 15 * 60 * 1000, // 15分鐘
|
||||
max: 100, // 限制每個IP每15分鐘最多100個請求
|
||||
message: 'Too many requests from this IP, please try again later.'
|
||||
windowMs: 15 * 60 * 1000,
|
||||
max: 200,
|
||||
message: 'Too many requests, please try again later.',
|
||||
});
|
||||
app.use('/api', limiter);
|
||||
|
||||
// 解析JSON和URL編碼
|
||||
app.use(express.json({ limit: '50mb' }));
|
||||
app.use(express.urlencoded({ extended: true, limit: '50mb' }));
|
||||
app.use(express.json({ limit: '10mb' }));
|
||||
app.use(express.urlencoded({ extended: true, limit: '10mb' }));
|
||||
|
||||
// 文件上傳配置
|
||||
const storage = multer.memoryStorage();
|
||||
// File upload — store to job directory
|
||||
const upload = multer({
|
||||
storage: storage,
|
||||
limits: {
|
||||
fileSize: 500 * 1024 * 1024 // 500MB
|
||||
}
|
||||
storage: multer.memoryStorage(),
|
||||
limits: { fileSize: 500 * 1024 * 1024 }, // 500 MB
|
||||
});
|
||||
|
||||
// 健康檢查
|
||||
app.get('/health', (req, res) => {
|
||||
res.json({
|
||||
service: 'task-scheduler',
|
||||
status: 'healthy',
|
||||
timestamp: new Date().toISOString(),
|
||||
services: Object.keys(API_SERVICES)
|
||||
});
|
||||
});
|
||||
// ---------------------------------------------------------------------------
|
||||
// SSE: keep track of connected clients per job_id
|
||||
// ---------------------------------------------------------------------------
|
||||
const sseClients = new Map(); // job_id -> Set<res>
|
||||
|
||||
// API代理函數
|
||||
async function proxyRequest(service, endpoint, method = 'GET', data = null, files = null) {
|
||||
try {
|
||||
const serviceConfig = API_SERVICES[service];
|
||||
if (!serviceConfig) {
|
||||
throw new Error(`Service ${service} not found`);
|
||||
}
|
||||
|
||||
const url = `${serviceConfig.url}${endpoint}`;
|
||||
const headers = {
|
||||
'X-API-Key': serviceConfig.apiKey,
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
|
||||
let response;
|
||||
if (method === 'GET') {
|
||||
response = await axios.get(url, { headers });
|
||||
} else if (method === 'POST') {
|
||||
if (files) {
|
||||
// 處理文件上傳
|
||||
const formData = new FormData();
|
||||
Object.keys(files).forEach(key => {
|
||||
formData.append(key, files[key].buffer, files[key].originalname);
|
||||
});
|
||||
response = await axios.post(url, formData, {
|
||||
headers: {
|
||||
'X-API-Key': serviceConfig.apiKey,
|
||||
'Content-Type': 'multipart/form-data'
|
||||
}
|
||||
});
|
||||
} else {
|
||||
response = await axios.post(url, data, { headers });
|
||||
}
|
||||
} else if (method === 'DELETE') {
|
||||
response = await axios.delete(url, { headers });
|
||||
}
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error(`Proxy request failed for ${service}:`, error.message);
|
||||
throw error;
|
||||
function sendSSE(jobId, data) {
|
||||
const clients = sseClients.get(jobId);
|
||||
if (!clients) return;
|
||||
const payload = `data: ${JSON.stringify(data)}\n\n`;
|
||||
for (const res of clients) {
|
||||
res.write(payload);
|
||||
}
|
||||
}
|
||||
|
||||
// ONNX API代理
|
||||
app.post('/api/onnx/upload', upload.single('file'), async (req, res) => {
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: get / set job record in Redis
|
||||
// ---------------------------------------------------------------------------
|
||||
async function getJob(jobId) {
|
||||
const raw = await redis.get(`job:${jobId}`);
|
||||
if (!raw) return null;
|
||||
return JSON.parse(raw);
|
||||
}
|
||||
|
||||
async function setJob(jobId, job) {
|
||||
job.updated_at = new Date().toISOString();
|
||||
await redis.set(`job:${jobId}`, JSON.stringify(job));
|
||||
// Notify SSE clients
|
||||
sendSSE(jobId, job);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: enqueue a task to a stage queue
|
||||
// ---------------------------------------------------------------------------
|
||||
async function enqueueStage(stage, job) {
|
||||
const queue = STAGE_QUEUES[stage];
|
||||
const message = {
|
||||
job_id: job.job_id,
|
||||
created_at: job.created_at,
|
||||
input_dir: path.join(JOB_DATA_DIR, job.job_id),
|
||||
parameters: job.parameters || {},
|
||||
};
|
||||
await redis.xadd(queue, '*', 'data', JSON.stringify(message));
|
||||
console.log(`[Scheduler] Enqueued job ${job.job_id} to ${queue}`);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: advance job to next stage or mark completed
|
||||
// ---------------------------------------------------------------------------
|
||||
async function advanceJob(jobId, completedStage) {
|
||||
const job = await getJob(jobId);
|
||||
if (!job) {
|
||||
console.warn(`[Scheduler] Job ${jobId} not found, ignoring done event`);
|
||||
return;
|
||||
}
|
||||
|
||||
const currentIndex = STAGES.indexOf(completedStage);
|
||||
if (currentIndex < 0) {
|
||||
console.warn(`[Scheduler] Unknown stage: ${completedStage}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const nextIndex = currentIndex + 1;
|
||||
|
||||
if (nextIndex < STAGES.length) {
|
||||
// Advance to next stage
|
||||
const nextStage = STAGES[nextIndex];
|
||||
job.status = nextStage.toUpperCase();
|
||||
job.stage = nextStage;
|
||||
job.progress = Math.round(((nextIndex) / STAGES.length) * 100);
|
||||
await setJob(jobId, job);
|
||||
await enqueueStage(nextStage, job);
|
||||
} else {
|
||||
// All stages completed
|
||||
job.status = 'COMPLETED';
|
||||
job.stage = null;
|
||||
job.progress = 100;
|
||||
await setJob(jobId, job);
|
||||
console.log(`[Scheduler] Job ${jobId} COMPLETED`);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: mark job as failed
|
||||
// ---------------------------------------------------------------------------
|
||||
async function failJob(jobId, step, reason) {
|
||||
const job = await getJob(jobId);
|
||||
if (!job) return;
|
||||
|
||||
job.status = 'FAILED';
|
||||
job.error = { step, reason };
|
||||
await setJob(jobId, job);
|
||||
console.log(`[Scheduler] Job ${jobId} FAILED at ${step}: ${reason}`);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Done queue listener — runs in background
|
||||
// ---------------------------------------------------------------------------
|
||||
async function ensureConsumerGroup(queue, group) {
|
||||
try {
|
||||
if (!req.file) {
|
||||
return res.status(400).json({ error: 'No file provided' });
|
||||
await redis.xgroup('CREATE', queue, group, '0', 'MKSTREAM');
|
||||
} catch (err) {
|
||||
// Group already exists — OK
|
||||
if (!err.message.includes('BUSYGROUP')) throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async function listenDoneQueue() {
|
||||
const consumerName = `scheduler-${process.pid}`;
|
||||
await ensureConsumerGroup(DONE_QUEUE, DONE_GROUP);
|
||||
|
||||
console.log(`[Scheduler] Listening on ${DONE_QUEUE} as ${consumerName}`);
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
const results = await redisSub.xreadgroup(
|
||||
'GROUP', DONE_GROUP, consumerName,
|
||||
'COUNT', 10,
|
||||
'BLOCK', 5000,
|
||||
'STREAMS', DONE_QUEUE, '>'
|
||||
);
|
||||
|
||||
if (!results) continue;
|
||||
|
||||
for (const [, messages] of results) {
|
||||
for (const [messageId, fields] of messages) {
|
||||
try {
|
||||
const data = JSON.parse(fields[1]); // fields = ['data', '{...}']
|
||||
const { job_id, step, result, reason } = data;
|
||||
|
||||
console.log(`[Scheduler] Done event: job=${job_id} step=${step} result=${result}`);
|
||||
|
||||
if (result === 'ok') {
|
||||
await advanceJob(job_id, step);
|
||||
} else {
|
||||
await failJob(job_id, step, reason || 'Unknown error');
|
||||
}
|
||||
|
||||
// ACK the message
|
||||
await redisSub.xack(DONE_QUEUE, DONE_GROUP, messageId);
|
||||
} catch (err) {
|
||||
console.error('[Scheduler] Error processing done event:', err);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
if (err.message.includes('Connection is closed')) {
|
||||
console.error('[Scheduler] Redis connection lost, retrying in 3s...');
|
||||
await new Promise((r) => setTimeout(r, 3000));
|
||||
} else {
|
||||
console.error('[Scheduler] Done listener error:', err);
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
}
|
||||
|
||||
const result = await proxyRequest('onnx', '/api/onnx/upload', 'POST', null, { file: req.file });
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('ONNX upload error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
app.post('/api/onnx/process', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('onnx', '/api/onnx/process', 'POST', req.body);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('ONNX process error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
// ---------------------------------------------------------------------------
|
||||
// Ensure worker queue consumer groups exist on startup
|
||||
// ---------------------------------------------------------------------------
|
||||
async function ensureWorkerGroups() {
|
||||
const groups = {
|
||||
'queue:onnx': 'onnx-workers',
|
||||
'queue:bie': 'bie-workers',
|
||||
'queue:nef': 'nef-workers',
|
||||
};
|
||||
for (const [queue, group] of Object.entries(groups)) {
|
||||
await ensureConsumerGroup(queue, group);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
app.get('/api/onnx/tasks/:taskId/status', async (req, res) => {
|
||||
// ---------------------------------------------------------------------------
|
||||
// API Routes
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Health check
|
||||
app.get('/health', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('onnx', `/api/onnx/tasks/${req.params.taskId}/status`);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('ONNX status error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/api/onnx/tasks/:taskId/result', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('onnx', `/api/onnx/tasks/${req.params.taskId}/result`);
|
||||
|
||||
// 如果是文件下載,設置適當的響應頭
|
||||
if (result.headers && result.headers['content-type']) {
|
||||
res.set('Content-Type', result.headers['content-type']);
|
||||
res.set('Content-Disposition', result.headers['content-disposition']);
|
||||
res.send(result.data);
|
||||
} else {
|
||||
res.json(result);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('ONNX result error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/api/onnx/tasks', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('onnx', '/api/onnx/tasks');
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('ONNX tasks list error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.delete('/api/onnx/tasks/:taskId', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('onnx', `/api/onnx/tasks/${req.params.taskId}`, 'DELETE');
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('ONNX cancel error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
// BIE API代理
|
||||
app.post('/api/bie/process', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('bie', '/api/bie/process', 'POST', req.body);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('BIE process error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/api/bie/tasks/:taskId/status', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('bie', `/api/bie/tasks/${req.params.taskId}/status`);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('BIE status error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/api/bie/tasks/:taskId/result', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('bie', `/api/bie/tasks/${req.params.taskId}/result`);
|
||||
|
||||
if (result.headers && result.headers['content-type']) {
|
||||
res.set('Content-Type', result.headers['content-type']);
|
||||
res.set('Content-Disposition', result.headers['content-disposition']);
|
||||
res.send(result.data);
|
||||
} else {
|
||||
res.json(result);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('BIE result error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/api/bie/tasks', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('bie', '/api/bie/tasks');
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('BIE tasks list error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.delete('/api/bie/tasks/:taskId', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('bie', `/api/bie/tasks/${req.params.taskId}`, 'DELETE');
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('BIE cancel error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
// NEF API代理
|
||||
app.post('/api/nef/process', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('nef', '/api/nef/process', 'POST', req.body);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('NEF process error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/api/nef/tasks/:taskId/status', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('nef', `/api/nef/tasks/${req.params.taskId}/status`);
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('NEF status error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/api/nef/tasks/:taskId/result', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('nef', `/api/nef/tasks/${req.params.taskId}/result`);
|
||||
|
||||
if (result.headers && result.headers['content-type']) {
|
||||
res.set('Content-Type', result.headers['content-type']);
|
||||
res.set('Content-Disposition', result.headers['content-disposition']);
|
||||
res.send(result.data);
|
||||
} else {
|
||||
res.json(result);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('NEF result error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/api/nef/tasks', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('nef', '/api/nef/tasks');
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('NEF tasks list error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.delete('/api/nef/tasks/:taskId', async (req, res) => {
|
||||
try {
|
||||
const result = await proxyRequest('nef', `/api/nef/tasks/${req.params.taskId}`, 'DELETE');
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('NEF cancel error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
// 完整工作流程
|
||||
app.post('/api/workflow/complete', async (req, res) => {
|
||||
try {
|
||||
const { onnx_file_id, model_id, version, platform, data_dir } = req.body;
|
||||
|
||||
// 1. 處理ONNX優化
|
||||
const onnxResult = await proxyRequest('onnx', '/api/onnx/process', 'POST', {
|
||||
file_id: onnx_file_id,
|
||||
model_id,
|
||||
version,
|
||||
platform
|
||||
await redis.ping();
|
||||
res.json({
|
||||
service: 'task-scheduler',
|
||||
status: 'healthy',
|
||||
timestamp: new Date().toISOString(),
|
||||
redis: 'connected',
|
||||
});
|
||||
} catch {
|
||||
res.status(503).json({
|
||||
service: 'task-scheduler',
|
||||
status: 'unhealthy',
|
||||
redis: 'disconnected',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 2. 處理BIE分析
|
||||
const bieResult = await proxyRequest('bie', '/api/bie/process', 'POST', {
|
||||
onnx_file_id,
|
||||
model_id,
|
||||
// POST /jobs — Create a new job
|
||||
app.post('/jobs', upload.fields([
|
||||
{ name: 'model', maxCount: 1 },
|
||||
{ name: 'ref_images', maxCount: 100 },
|
||||
]), async (req, res) => {
|
||||
try {
|
||||
// Validate required fields
|
||||
const { model_id, version, platform } = req.body;
|
||||
if (!model_id || !version || !platform) {
|
||||
return res.status(400).json({ error: 'model_id, version, platform are required' });
|
||||
}
|
||||
if (!req.files || !req.files.model || req.files.model.length === 0) {
|
||||
return res.status(400).json({ error: 'model file is required' });
|
||||
}
|
||||
|
||||
const jobId = uuidv4();
|
||||
|
||||
if (minio) {
|
||||
// S3 mode: upload files to MinIO
|
||||
const modelFile = req.files.model[0];
|
||||
const s3Prefix = `jobs/${jobId}`;
|
||||
await uploadToMinIO(
|
||||
`${s3Prefix}/input/${modelFile.originalname}`,
|
||||
modelFile.buffer,
|
||||
modelFile.mimetype || 'application/octet-stream',
|
||||
);
|
||||
|
||||
if (req.files.ref_images) {
|
||||
for (const img of req.files.ref_images) {
|
||||
await uploadToMinIO(
|
||||
`${s3Prefix}/input/ref_images/${img.originalname}`,
|
||||
img.buffer,
|
||||
img.mimetype || 'image/jpeg',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[Scheduler] Uploaded job ${jobId} files to MinIO`);
|
||||
} else {
|
||||
// Local mode: write to shared volume
|
||||
const jobDir = path.join(JOB_DATA_DIR, jobId);
|
||||
const inputDir = path.join(jobDir, 'input');
|
||||
const refImagesDir = path.join(inputDir, 'ref_images');
|
||||
const logsDir = path.join(jobDir, 'logs');
|
||||
|
||||
fs.mkdirSync(inputDir, { recursive: true });
|
||||
fs.mkdirSync(refImagesDir, { recursive: true });
|
||||
fs.mkdirSync(logsDir, { recursive: true });
|
||||
|
||||
const modelFile = req.files.model[0];
|
||||
const modelPath = path.join(inputDir, modelFile.originalname);
|
||||
fs.writeFileSync(modelPath, modelFile.buffer);
|
||||
|
||||
if (req.files.ref_images) {
|
||||
for (const img of req.files.ref_images) {
|
||||
const imgPath = path.join(refImagesDir, img.originalname);
|
||||
fs.writeFileSync(imgPath, img.buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Optional flags
|
||||
const parameters = {
|
||||
model_id: parseInt(model_id, 10),
|
||||
version,
|
||||
platform,
|
||||
data_dir
|
||||
});
|
||||
enable_evaluate: req.body.enable_evaluate === 'true',
|
||||
enable_sim_fp: req.body.enable_sim_fp === 'true',
|
||||
enable_sim_fixed: req.body.enable_sim_fixed === 'true',
|
||||
enable_sim_hw: req.body.enable_sim_hw === 'true',
|
||||
};
|
||||
|
||||
// 3. 處理NEF編譯
|
||||
const nefResult = await proxyRequest('nef', '/api/nef/process', 'POST', {
|
||||
bie_file_id: bieResult.task_id, // 假設BIE結果包含文件ID
|
||||
model_id,
|
||||
version,
|
||||
platform
|
||||
});
|
||||
// Create job record
|
||||
const job = {
|
||||
job_id: jobId,
|
||||
created_at: new Date().toISOString(),
|
||||
status: 'ONNX',
|
||||
stage: 'onnx',
|
||||
progress: 0,
|
||||
updated_at: new Date().toISOString(),
|
||||
parameters,
|
||||
output: { bie_path: null, nef_path: null },
|
||||
error: null,
|
||||
};
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
workflow_id: `workflow_${Date.now()}`,
|
||||
steps: {
|
||||
onnx: onnxResult,
|
||||
bie: bieResult,
|
||||
nef: nefResult
|
||||
},
|
||||
message: 'Complete workflow submitted successfully'
|
||||
await setJob(jobId, job);
|
||||
|
||||
// Enqueue to first stage
|
||||
await enqueueStage('onnx', job);
|
||||
|
||||
res.status(201).json({
|
||||
job_id: jobId,
|
||||
status: 'ONNX',
|
||||
message: 'Job created and queued',
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Workflow error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
} catch (err) {
|
||||
console.error('[Scheduler] POST /jobs error:', err);
|
||||
res.status(500).json({ error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// 錯誤處理中間件
|
||||
app.use((error, req, res, next) => {
|
||||
console.error('Server error:', error);
|
||||
res.status(500).json({
|
||||
error: 'Internal server error',
|
||||
message: process.env.NODE_ENV === 'development' ? error.message : 'Something went wrong'
|
||||
// GET /jobs/:jobId — Query job status
|
||||
app.get('/jobs/:jobId', async (req, res) => {
|
||||
const job = await getJob(req.params.jobId);
|
||||
if (!job) {
|
||||
return res.status(404).json({ error: 'JOB_NOT_FOUND' });
|
||||
}
|
||||
res.json(job);
|
||||
});
|
||||
|
||||
// GET /jobs — List all jobs
|
||||
app.get('/jobs', async (req, res) => {
|
||||
try {
|
||||
const keys = await redis.keys('job:*');
|
||||
const jobs = [];
|
||||
for (const key of keys) {
|
||||
const raw = await redis.get(key);
|
||||
if (raw) jobs.push(JSON.parse(raw));
|
||||
}
|
||||
// Sort by created_at descending
|
||||
jobs.sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
|
||||
res.json(jobs);
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// GET /jobs/:jobId/events — SSE stream
|
||||
app.get('/jobs/:jobId/events', async (req, res) => {
|
||||
const jobId = req.params.jobId;
|
||||
|
||||
const job = await getJob(jobId);
|
||||
if (!job) {
|
||||
return res.status(404).json({ error: 'JOB_NOT_FOUND' });
|
||||
}
|
||||
|
||||
// Set SSE headers
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
});
|
||||
|
||||
// Send current state immediately
|
||||
res.write(`data: ${JSON.stringify(job)}\n\n`);
|
||||
|
||||
// Register client
|
||||
if (!sseClients.has(jobId)) {
|
||||
sseClients.set(jobId, new Set());
|
||||
}
|
||||
sseClients.get(jobId).add(res);
|
||||
|
||||
// Heartbeat to keep connection alive
|
||||
const heartbeat = setInterval(() => {
|
||||
res.write(': heartbeat\n\n');
|
||||
}, 15000);
|
||||
|
||||
// Cleanup on disconnect
|
||||
req.on('close', () => {
|
||||
clearInterval(heartbeat);
|
||||
const clients = sseClients.get(jobId);
|
||||
if (clients) {
|
||||
clients.delete(res);
|
||||
if (clients.size === 0) sseClients.delete(jobId);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// 404處理
|
||||
// GET /jobs/:jobId/download/:filename — Download result file
|
||||
app.get('/jobs/:jobId/download/:filename', async (req, res) => {
|
||||
const { jobId, filename } = req.params;
|
||||
|
||||
const job = await getJob(jobId);
|
||||
if (!job) {
|
||||
return res.status(404).json({ error: 'JOB_NOT_FOUND' });
|
||||
}
|
||||
|
||||
if (minio) {
|
||||
// MinIO mode: fetch from MinIO and send
|
||||
const minioKey = `jobs/${jobId}/${filename}`;
|
||||
try {
|
||||
const result = await getFromMinIO(minioKey);
|
||||
if (!result) {
|
||||
return res.status(404).json({ error: 'FILE_NOT_FOUND' });
|
||||
}
|
||||
res.setHeader('Content-Disposition', `attachment; filename="${filename}"`);
|
||||
res.setHeader('Content-Length', result.body.length);
|
||||
res.send(result.body);
|
||||
} catch (err) {
|
||||
if (err.name === 'NoSuchKey') {
|
||||
return res.status(404).json({ error: 'FILE_NOT_FOUND' });
|
||||
}
|
||||
console.error('[Scheduler] Download error:', err);
|
||||
res.status(500).json({ error: 'Download failed' });
|
||||
}
|
||||
} else {
|
||||
// Local mode: serve from filesystem
|
||||
const filePath = path.join(JOB_DATA_DIR, jobId, filename);
|
||||
if (!fs.existsSync(filePath)) {
|
||||
return res.status(404).json({ error: 'FILE_NOT_FOUND' });
|
||||
}
|
||||
res.download(filePath);
|
||||
}
|
||||
});
|
||||
|
||||
// GET /queues/stats — Queue monitoring stats
|
||||
app.get('/queues/stats', async (req, res) => {
|
||||
try {
|
||||
const queues = ['queue:onnx', 'queue:bie', 'queue:nef', 'queue:done'];
|
||||
const groupNames = {
|
||||
'queue:onnx': 'onnx-workers',
|
||||
'queue:bie': 'bie-workers',
|
||||
'queue:nef': 'nef-workers',
|
||||
'queue:done': 'scheduler',
|
||||
};
|
||||
|
||||
const stats = {};
|
||||
|
||||
for (const queue of queues) {
|
||||
const length = await redis.xlen(queue);
|
||||
let consumers = [];
|
||||
let pending = 0;
|
||||
let lag = 0;
|
||||
|
||||
const group = groupNames[queue];
|
||||
if (group) {
|
||||
try {
|
||||
const groups = await redis.xinfo('GROUPS', queue);
|
||||
// xinfo GROUPS returns flat array: [name, val, name, val, ...]
|
||||
for (let i = 0; i < groups.length; i++) {
|
||||
const g = groups[i];
|
||||
// Each group is a flat array of key-value pairs
|
||||
const info = {};
|
||||
for (let j = 0; j < g.length; j += 2) {
|
||||
info[g[j]] = g[j + 1];
|
||||
}
|
||||
if (info.name === group) {
|
||||
pending = parseInt(info.pending || '0', 10);
|
||||
lag = parseInt(info.lag || '0', 10);
|
||||
|
||||
// Get consumers in this group
|
||||
try {
|
||||
const consumerList = await redis.xinfo('CONSUMERS', queue, group);
|
||||
consumers = consumerList.map((c) => {
|
||||
const ci = {};
|
||||
for (let j = 0; j < c.length; j += 2) {
|
||||
ci[c[j]] = c[j + 1];
|
||||
}
|
||||
return {
|
||||
name: ci.name,
|
||||
pending: parseInt(ci.pending || '0', 10),
|
||||
idle: parseInt(ci.idle || '0', 10),
|
||||
};
|
||||
});
|
||||
} catch { /* no consumers yet */ }
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch { /* group may not exist yet */ }
|
||||
}
|
||||
|
||||
stats[queue] = { length, pending, lag, consumers };
|
||||
}
|
||||
|
||||
// Also get job summary
|
||||
const keys = await redis.keys('job:*');
|
||||
const jobSummary = { total: keys.length, ONNX: 0, BIE: 0, NEF: 0, COMPLETED: 0, FAILED: 0 };
|
||||
for (const key of keys) {
|
||||
const raw = await redis.get(key);
|
||||
if (raw) {
|
||||
const job = JSON.parse(raw);
|
||||
if (jobSummary[job.status] !== undefined) {
|
||||
jobSummary[job.status]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res.json({
|
||||
timestamp: new Date().toISOString(),
|
||||
queues: stats,
|
||||
jobs: jobSummary,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('[Scheduler] GET /queues/stats error:', err);
|
||||
res.status(500).json({ error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Error handling
|
||||
app.use((err, req, res, next) => {
|
||||
console.error('[Scheduler] Server error:', err);
|
||||
res.status(500).json({ error: 'Internal server error' });
|
||||
});
|
||||
|
||||
// 404
|
||||
app.use('*', (req, res) => {
|
||||
res.status(404).json({ error: 'Endpoint not found' });
|
||||
});
|
||||
|
||||
// 啟動服務器
|
||||
app.listen(PORT, () => {
|
||||
console.log(`🚀 Task scheduler running on port ${PORT}`);
|
||||
console.log(`📡 Proxying to services:`);
|
||||
Object.keys(API_SERVICES).forEach(service => {
|
||||
console.log(` ${service}: ${API_SERVICES[service].url}`);
|
||||
// ---------------------------------------------------------------------------
|
||||
// Start
|
||||
// ---------------------------------------------------------------------------
|
||||
async function start() {
|
||||
// Ensure all consumer groups exist
|
||||
await ensureWorkerGroups();
|
||||
|
||||
// Start listening for done events (background)
|
||||
listenDoneQueue().catch((err) => {
|
||||
console.error('[Scheduler] Done listener fatal error:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
app.listen(PORT, () => {
|
||||
console.log(`[Scheduler] Running on port ${PORT}`);
|
||||
console.log(`[Scheduler] Redis: ${REDIS_URL}`);
|
||||
console.log(`[Scheduler] Job data dir: ${JOB_DATA_DIR}`);
|
||||
console.log(`[Scheduler] Storage: ${STORAGE_BACKEND}${minio ? ` (${MINIO_ENDPOINT}/${MINIO_BUCKET})` : ''}`);
|
||||
console.log(`[Scheduler] Stages: ${STAGES.join(' -> ')}`);
|
||||
});
|
||||
}
|
||||
|
||||
start().catch((err) => {
|
||||
console.error('[Scheduler] Failed to start:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
module.exports = app;
|
||||
|
||||
@ -18,6 +18,8 @@ RUN npm run build
|
||||
# 生產階段
|
||||
FROM nginx:alpine
|
||||
|
||||
RUN apk add --no-cache curl
|
||||
|
||||
# 複製構建結果
|
||||
COPY --from=build /app/dist /usr/share/nginx/html
|
||||
|
||||
|
||||
@ -4,6 +4,9 @@ server {
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
# 上傳大小限制(模型檔最大 500MB)
|
||||
client_max_body_size 500m;
|
||||
|
||||
# 啟用gzip壓縮
|
||||
gzip on;
|
||||
gzip_vary on;
|
||||
@ -16,9 +19,24 @@ server {
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
# API代理
|
||||
location /api {
|
||||
proxy_pass http://task-scheduler:4000;
|
||||
# SSE 端點 — 必須關閉 buffering(rewrite /api → /)
|
||||
location ~ ^/api/(jobs/.*/events)$ {
|
||||
proxy_pass http://scheduler:4000/$1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_read_timeout 86400s;
|
||||
proxy_send_timeout 86400s;
|
||||
chunked_transfer_encoding on;
|
||||
gzip off;
|
||||
}
|
||||
|
||||
# API 代理(rewrite /api → /)
|
||||
location /api/ {
|
||||
proxy_pass http://scheduler:4000/;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
|
||||
1773
apps/web/package-lock.json
generated
Normal file
1773
apps/web/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@ -7,6 +7,9 @@
|
||||
<el-icon><Cpu /></el-icon>
|
||||
<span>Model Converter Web</span>
|
||||
</div>
|
||||
<nav class="nav-links">
|
||||
<router-link to="/" class="nav-link">轉換</router-link>
|
||||
</nav>
|
||||
<div class="status">
|
||||
<el-icon v-if="isConnected" color="#67c23a"><CircleCheck /></el-icon>
|
||||
<el-icon v-else color="#f56c6c"><CircleClose /></el-icon>
|
||||
@ -75,6 +78,29 @@ const checkConnection = async () => {
|
||||
font-size: 24px;
|
||||
}
|
||||
|
||||
.nav-links {
|
||||
display: flex;
|
||||
gap: 20px;
|
||||
}
|
||||
|
||||
.nav-link {
|
||||
color: rgba(255, 255, 255, 0.8);
|
||||
text-decoration: none;
|
||||
font-size: 15px;
|
||||
padding: 4px 0;
|
||||
border-bottom: 2px solid transparent;
|
||||
transition: all 0.2s;
|
||||
}
|
||||
|
||||
.nav-link:hover {
|
||||
color: white;
|
||||
}
|
||||
|
||||
.nav-link.router-link-exact-active {
|
||||
color: white;
|
||||
border-bottom-color: white;
|
||||
}
|
||||
|
||||
.status {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
|
||||
172
apps/web/src/composables/useJobStatus.js
Normal file
172
apps/web/src/composables/useJobStatus.js
Normal file
@ -0,0 +1,172 @@
|
||||
import { onUnmounted } from 'vue'
|
||||
|
||||
const TERMINAL_STATES = ['COMPLETED', 'FAILED']
|
||||
const POLL_INTERVAL = 3000
|
||||
const SSE_RETRY_BASE = 5000
|
||||
const SSE_RETRY_MAX = 30000
|
||||
const SSE_MAX_RETRIES = 5
|
||||
|
||||
export function useJobStatus() {
|
||||
const watchers = new Map() // jobId -> watcher state
|
||||
|
||||
function watchJob(jobId, onUpdate) {
|
||||
if (watchers.has(jobId)) return
|
||||
|
||||
const state = {
|
||||
sse: null,
|
||||
pollTimer: null,
|
||||
sseRetryTimer: null,
|
||||
sseRetryCount: 0,
|
||||
mode: 'idle',
|
||||
stopped: false,
|
||||
}
|
||||
watchers.set(jobId, state)
|
||||
|
||||
function handleUpdate(jobData) {
|
||||
onUpdate(jobData)
|
||||
if (TERMINAL_STATES.includes(jobData.status)) {
|
||||
stopWatch(jobId)
|
||||
}
|
||||
}
|
||||
|
||||
// --- SSE ---
|
||||
function startSSE() {
|
||||
if (state.stopped) return
|
||||
if (typeof EventSource === 'undefined') {
|
||||
startPolling()
|
||||
return
|
||||
}
|
||||
|
||||
const es = new EventSource(`/api/jobs/${jobId}/events`)
|
||||
state.sse = es
|
||||
state.mode = 'sse'
|
||||
|
||||
es.onmessage = (event) => {
|
||||
state.sseRetryCount = 0
|
||||
try {
|
||||
handleUpdate(JSON.parse(event.data))
|
||||
} catch (e) {
|
||||
console.warn('[useJobStatus] SSE parse error:', e)
|
||||
}
|
||||
}
|
||||
|
||||
es.onerror = () => {
|
||||
closeSSE()
|
||||
startPolling()
|
||||
}
|
||||
}
|
||||
|
||||
function closeSSE() {
|
||||
if (state.sse) {
|
||||
state.sse.close()
|
||||
state.sse = null
|
||||
}
|
||||
}
|
||||
|
||||
// --- Polling ---
|
||||
function startPolling() {
|
||||
if (state.stopped || state.pollTimer) return
|
||||
state.mode = 'polling'
|
||||
|
||||
async function poll() {
|
||||
if (state.stopped) return
|
||||
try {
|
||||
const res = await fetch(`/api/jobs/${jobId}`)
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
||||
const data = await res.json()
|
||||
handleUpdate(data)
|
||||
|
||||
if (!state.stopped && state.sseRetryCount < SSE_MAX_RETRIES) {
|
||||
attemptSSERecovery()
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('[useJobStatus] Poll error:', e)
|
||||
}
|
||||
}
|
||||
|
||||
poll()
|
||||
state.pollTimer = setInterval(poll, POLL_INTERVAL)
|
||||
}
|
||||
|
||||
function stopPolling() {
|
||||
if (state.pollTimer) {
|
||||
clearInterval(state.pollTimer)
|
||||
state.pollTimer = null
|
||||
}
|
||||
}
|
||||
|
||||
// --- SSE Recovery ---
|
||||
function attemptSSERecovery() {
|
||||
if (state.sseRetryTimer || state.sse) return
|
||||
if (typeof EventSource === 'undefined') return
|
||||
|
||||
const delay = Math.min(
|
||||
SSE_RETRY_BASE * Math.pow(2, state.sseRetryCount),
|
||||
SSE_RETRY_MAX
|
||||
)
|
||||
state.sseRetryCount++
|
||||
|
||||
state.sseRetryTimer = setTimeout(() => {
|
||||
state.sseRetryTimer = null
|
||||
if (state.stopped) return
|
||||
|
||||
const testES = new EventSource(`/api/jobs/${jobId}/events`)
|
||||
const timeout = setTimeout(() => {
|
||||
testES.close()
|
||||
}, 5000)
|
||||
|
||||
testES.onmessage = (event) => {
|
||||
clearTimeout(timeout)
|
||||
stopPolling()
|
||||
state.sse = testES
|
||||
state.mode = 'sse'
|
||||
state.sseRetryCount = 0
|
||||
|
||||
try {
|
||||
handleUpdate(JSON.parse(event.data))
|
||||
} catch (e) { /* ignore */ }
|
||||
|
||||
testES.onerror = () => {
|
||||
closeSSE()
|
||||
startPolling()
|
||||
}
|
||||
}
|
||||
|
||||
testES.onerror = () => {
|
||||
clearTimeout(timeout)
|
||||
testES.close()
|
||||
}
|
||||
}, delay)
|
||||
}
|
||||
|
||||
// --- Cleanup ---
|
||||
function stopWatch(id) {
|
||||
const s = watchers.get(id)
|
||||
if (!s) return
|
||||
s.stopped = true
|
||||
closeSSE()
|
||||
stopPolling()
|
||||
if (s.sseRetryTimer) {
|
||||
clearTimeout(s.sseRetryTimer)
|
||||
s.sseRetryTimer = null
|
||||
}
|
||||
watchers.delete(id)
|
||||
}
|
||||
|
||||
startSSE()
|
||||
|
||||
return () => stopWatch(jobId)
|
||||
}
|
||||
|
||||
onUnmounted(() => {
|
||||
for (const [, state] of watchers) {
|
||||
state.stopped = true
|
||||
if (state.sse) state.sse.close()
|
||||
if (state.pollTimer) clearInterval(state.pollTimer)
|
||||
if (state.sseRetryTimer) clearTimeout(state.sseRetryTimer)
|
||||
}
|
||||
watchers.clear()
|
||||
})
|
||||
|
||||
return { watchJob }
|
||||
}
|
||||
@ -1,11 +1,17 @@
|
||||
import { createRouter, createWebHistory } from 'vue-router'
|
||||
import Home from '@/views/Home.vue'
|
||||
import Monitor from '@/views/Monitor.vue'
|
||||
|
||||
const routes = [
|
||||
{
|
||||
path: '/',
|
||||
name: 'Home',
|
||||
component: Home
|
||||
},
|
||||
{
|
||||
path: '/monitor',
|
||||
name: 'Monitor',
|
||||
component: Monitor
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@ -4,11 +4,6 @@ import axios from 'axios'
|
||||
export const useSystemStore = defineStore('system', {
|
||||
state: () => ({
|
||||
isConnected: false,
|
||||
services: {
|
||||
onnx: { status: 'unknown', activeTasks: 0 },
|
||||
bie: { status: 'unknown', activeTasks: 0 },
|
||||
nef: { status: 'unknown', activeTasks: 0 }
|
||||
}
|
||||
}),
|
||||
|
||||
actions: {
|
||||
@ -22,22 +17,5 @@ export const useSystemStore = defineStore('system', {
|
||||
throw error
|
||||
}
|
||||
},
|
||||
|
||||
async checkServiceHealth(service) {
|
||||
try {
|
||||
const response = await axios.get(`http://localhost:500${service === 'onnx' ? '1' : service === 'bie' ? '2' : '3'}/health`)
|
||||
this.services[service] = {
|
||||
status: response.data.status,
|
||||
activeTasks: response.data.active_tasks
|
||||
}
|
||||
return response.data
|
||||
} catch (error) {
|
||||
this.services[service] = {
|
||||
status: 'unreachable',
|
||||
activeTasks: 0
|
||||
}
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
339
apps/web/src/views/Monitor.vue
Normal file
339
apps/web/src/views/Monitor.vue
Normal file
@ -0,0 +1,339 @@
|
||||
<template>
|
||||
<div class="monitor">
|
||||
<!-- 自動刷新控制 -->
|
||||
<div class="monitor-toolbar">
|
||||
<el-switch v-model="autoRefresh" active-text="自動刷新" />
|
||||
<el-select v-model="refreshInterval" size="small" style="width: 100px; margin-left: 12px" :disabled="!autoRefresh">
|
||||
<el-option label="3 秒" :value="3000" />
|
||||
<el-option label="5 秒" :value="5000" />
|
||||
<el-option label="10 秒" :value="10000" />
|
||||
</el-select>
|
||||
<el-button size="small" @click="fetchStats" :loading="loading" style="margin-left: 12px">
|
||||
<el-icon><Refresh /></el-icon> 立即刷新
|
||||
</el-button>
|
||||
<span class="last-updated" v-if="lastUpdated">最後更新: {{ lastUpdated }}</span>
|
||||
</div>
|
||||
|
||||
<!-- Job 統計 -->
|
||||
<el-card shadow="hover" class="stats-card">
|
||||
<template #header>
|
||||
<div class="card-header">
|
||||
<el-icon><DataAnalysis /></el-icon>
|
||||
<span>任務統計</span>
|
||||
</div>
|
||||
</template>
|
||||
<div class="job-stats">
|
||||
<div class="stat-item">
|
||||
<div class="stat-value">{{ jobStats.total }}</div>
|
||||
<div class="stat-label">總任務數</div>
|
||||
</div>
|
||||
<div class="stat-item stat-onnx">
|
||||
<div class="stat-value">{{ jobStats.ONNX }}</div>
|
||||
<div class="stat-label">ONNX 處理中</div>
|
||||
</div>
|
||||
<div class="stat-item stat-bie">
|
||||
<div class="stat-value">{{ jobStats.BIE }}</div>
|
||||
<div class="stat-label">BIE 處理中</div>
|
||||
</div>
|
||||
<div class="stat-item stat-nef">
|
||||
<div class="stat-value">{{ jobStats.NEF }}</div>
|
||||
<div class="stat-label">NEF 處理中</div>
|
||||
</div>
|
||||
<div class="stat-item stat-completed">
|
||||
<div class="stat-value">{{ jobStats.COMPLETED }}</div>
|
||||
<div class="stat-label">已完成</div>
|
||||
</div>
|
||||
<div class="stat-item stat-failed">
|
||||
<div class="stat-value">{{ jobStats.FAILED }}</div>
|
||||
<div class="stat-label">失敗</div>
|
||||
</div>
|
||||
</div>
|
||||
</el-card>
|
||||
|
||||
<!-- Queue 狀態 -->
|
||||
<el-card shadow="hover" class="stats-card">
|
||||
<template #header>
|
||||
<div class="card-header">
|
||||
<el-icon><List /></el-icon>
|
||||
<span>佇列狀態</span>
|
||||
</div>
|
||||
</template>
|
||||
<el-table :data="queueRows" stripe>
|
||||
<el-table-column prop="name" label="佇列" width="160">
|
||||
<template #default="{ row }">
|
||||
<el-tag :type="queueTagType(row.name)" size="small">{{ row.label }}</el-tag>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="length" label="佇列長度" width="120" align="center" />
|
||||
<el-table-column prop="pending" label="處理中" width="120" align="center">
|
||||
<template #default="{ row }">
|
||||
<span :class="{ 'text-warning': row.pending > 0 }">{{ row.pending }}</span>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="consumerCount" label="Worker 數" width="120" align="center" />
|
||||
<el-table-column label="Workers" min-width="250">
|
||||
<template #default="{ row }">
|
||||
<div v-if="row.consumers.length > 0" class="consumer-list">
|
||||
<el-tag
|
||||
v-for="c in row.consumers"
|
||||
:key="c.name"
|
||||
size="small"
|
||||
:type="c.idle < 30000 ? 'success' : 'warning'"
|
||||
class="consumer-tag"
|
||||
>
|
||||
{{ c.name }} (pending: {{ c.pending }}, idle: {{ formatIdle(c.idle) }})
|
||||
</el-tag>
|
||||
</div>
|
||||
<span v-else class="text-muted">-</span>
|
||||
</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
</el-card>
|
||||
|
||||
<!-- Job 列表 -->
|
||||
<el-card shadow="hover" class="stats-card">
|
||||
<template #header>
|
||||
<div class="card-header">
|
||||
<el-icon><Document /></el-icon>
|
||||
<span>任務列表</span>
|
||||
</div>
|
||||
</template>
|
||||
<el-table :data="jobs" stripe :default-sort="{ prop: 'created_at', order: 'descending' }">
|
||||
<el-table-column prop="job_id" label="Job ID" width="120">
|
||||
<template #default="{ row }">
|
||||
<span class="job-id">{{ row.job_id.substring(0, 8) }}...</span>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="status" label="狀態" width="130" align="center">
|
||||
<template #default="{ row }">
|
||||
<el-tag :type="statusTagType(row.status)" size="small">{{ row.status }}</el-tag>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="progress" label="進度" width="160">
|
||||
<template #default="{ row }">
|
||||
<el-progress
|
||||
:percentage="row.progress || 0"
|
||||
:status="row.status === 'FAILED' ? 'exception' : row.status === 'COMPLETED' ? 'success' : undefined"
|
||||
:stroke-width="10"
|
||||
/>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="parameters.platform" label="平台" width="100" align="center">
|
||||
<template #default="{ row }">
|
||||
KDP{{ row.parameters?.platform || '-' }}
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="created_at" label="建立時間" width="180">
|
||||
<template #default="{ row }">
|
||||
{{ formatTime(row.created_at) }}
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="error" label="錯誤" min-width="200">
|
||||
<template #default="{ row }">
|
||||
<span v-if="row.error" class="text-danger">
|
||||
[{{ row.error.step }}] {{ row.error.reason }}
|
||||
</span>
|
||||
<span v-else class="text-muted">-</span>
|
||||
</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
</el-card>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { ref, computed, watch, onMounted, onUnmounted } from 'vue'
|
||||
import axios from 'axios'
|
||||
|
||||
const loading = ref(false)
|
||||
const autoRefresh = ref(true)
|
||||
const refreshInterval = ref(5000)
|
||||
const lastUpdated = ref('')
|
||||
let timer = null
|
||||
|
||||
const queueData = ref({})
|
||||
const jobStats = ref({ total: 0, ONNX: 0, BIE: 0, NEF: 0, COMPLETED: 0, FAILED: 0 })
|
||||
const jobs = ref([])
|
||||
|
||||
const queueRows = computed(() => {
|
||||
const labels = {
|
||||
'queue:onnx': 'ONNX',
|
||||
'queue:bie': 'BIE',
|
||||
'queue:nef': 'NEF',
|
||||
'queue:done': 'Done',
|
||||
}
|
||||
return Object.entries(queueData.value).map(([name, data]) => ({
|
||||
name,
|
||||
label: labels[name] || name,
|
||||
length: data.length || 0,
|
||||
pending: data.pending || 0,
|
||||
consumers: data.consumers || [],
|
||||
consumerCount: (data.consumers || []).length,
|
||||
}))
|
||||
})
|
||||
|
||||
async function fetchStats() {
|
||||
loading.value = true
|
||||
try {
|
||||
const [statsRes, jobsRes] = await Promise.all([
|
||||
axios.get('/api/queues/stats'),
|
||||
axios.get('/api/jobs'),
|
||||
])
|
||||
queueData.value = statsRes.data.queues || {}
|
||||
jobStats.value = statsRes.data.jobs || jobStats.value
|
||||
jobs.value = jobsRes.data || []
|
||||
lastUpdated.value = new Date().toLocaleTimeString()
|
||||
} catch (e) {
|
||||
console.warn('Failed to fetch stats:', e)
|
||||
} finally {
|
||||
loading.value = false
|
||||
}
|
||||
}
|
||||
|
||||
function startTimer() {
|
||||
stopTimer()
|
||||
if (autoRefresh.value) {
|
||||
timer = setInterval(fetchStats, refreshInterval.value)
|
||||
}
|
||||
}
|
||||
|
||||
function stopTimer() {
|
||||
if (timer) {
|
||||
clearInterval(timer)
|
||||
timer = null
|
||||
}
|
||||
}
|
||||
|
||||
watch([autoRefresh, refreshInterval], () => {
|
||||
startTimer()
|
||||
})
|
||||
|
||||
onMounted(() => {
|
||||
fetchStats()
|
||||
startTimer()
|
||||
})
|
||||
|
||||
onUnmounted(() => {
|
||||
stopTimer()
|
||||
})
|
||||
|
||||
function formatIdle(ms) {
|
||||
if (ms < 1000) return `${ms}ms`
|
||||
if (ms < 60000) return `${Math.round(ms / 1000)}s`
|
||||
return `${Math.round(ms / 60000)}m`
|
||||
}
|
||||
|
||||
function formatTime(iso) {
|
||||
if (!iso) return '-'
|
||||
return new Date(iso).toLocaleString()
|
||||
}
|
||||
|
||||
function queueTagType(name) {
|
||||
const types = {
|
||||
'queue:onnx': '',
|
||||
'queue:bie': 'warning',
|
||||
'queue:nef': 'success',
|
||||
'queue:done': 'info',
|
||||
}
|
||||
return types[name] || 'info'
|
||||
}
|
||||
|
||||
function statusTagType(status) {
|
||||
const types = {
|
||||
ONNX: 'primary',
|
||||
BIE: '',
|
||||
NEF: 'warning',
|
||||
COMPLETED: 'success',
|
||||
FAILED: 'danger',
|
||||
}
|
||||
return types[status] || 'info'
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.monitor {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.monitor-toolbar {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
margin-bottom: 20px;
|
||||
padding: 12px 16px;
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08);
|
||||
}
|
||||
|
||||
.last-updated {
|
||||
margin-left: auto;
|
||||
color: #909399;
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.stats-card {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.card-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
font-size: 18px;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.card-header .el-icon {
|
||||
margin-right: 8px;
|
||||
}
|
||||
|
||||
.job-stats {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(6, 1fr);
|
||||
gap: 16px;
|
||||
}
|
||||
|
||||
.stat-item {
|
||||
text-align: center;
|
||||
padding: 16px;
|
||||
border-radius: 8px;
|
||||
background: #f5f7fa;
|
||||
}
|
||||
|
||||
.stat-value {
|
||||
font-size: 32px;
|
||||
font-weight: 700;
|
||||
color: #303133;
|
||||
}
|
||||
|
||||
.stat-label {
|
||||
font-size: 13px;
|
||||
color: #909399;
|
||||
margin-top: 4px;
|
||||
}
|
||||
|
||||
.stat-onnx .stat-value { color: #409eff; }
|
||||
.stat-bie .stat-value { color: #e6a23c; }
|
||||
.stat-nef .stat-value { color: #67c23a; }
|
||||
.stat-completed .stat-value { color: #67c23a; }
|
||||
.stat-failed .stat-value { color: #f56c6c; }
|
||||
|
||||
.consumer-list {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
.consumer-tag {
|
||||
font-size: 11px;
|
||||
}
|
||||
|
||||
.job-id {
|
||||
font-family: monospace;
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.text-warning { color: #e6a23c; font-weight: 600; }
|
||||
.text-danger { color: #f56c6c; }
|
||||
.text-muted { color: #c0c4cc; }
|
||||
</style>
|
||||
@ -15,7 +15,8 @@ export default defineConfig({
|
||||
'/api': {
|
||||
target: 'http://localhost:4000',
|
||||
changeOrigin: true,
|
||||
secure: false
|
||||
secure: false,
|
||||
rewrite: (path) => path.replace(/^\/api/, '')
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -23,7 +24,7 @@ export default defineConfig({
|
||||
outDir: 'dist',
|
||||
assetsDir: 'assets',
|
||||
sourcemap: false,
|
||||
minify: 'terser',
|
||||
minify: 'esbuild',
|
||||
rollupOptions: {
|
||||
output: {
|
||||
chunkFileNames: 'js/[name]-[hash].js',
|
||||
|
||||
132
docker-compose.yml
Normal file
132
docker-compose.yml
Normal file
@ -0,0 +1,132 @@
|
||||
##
|
||||
# Kneron Model Converter — Development docker-compose
|
||||
#
|
||||
# Usage:
|
||||
# docker-compose up # local mode (shared volume)
|
||||
# STORAGE_BACKEND=s3 docker-compose up # S3/MinIO mode
|
||||
# docker-compose up --scale bie-worker=3 # scale BIE workers
|
||||
##
|
||||
|
||||
volumes:
|
||||
job-data:
|
||||
|
||||
services:
|
||||
# ---------- Infrastructure ----------
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
expose:
|
||||
- "6379"
|
||||
command: redis-server --save ""
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 3
|
||||
|
||||
# ---------- Web UI ----------
|
||||
|
||||
web:
|
||||
build: ./apps/web
|
||||
ports:
|
||||
- "9500:3000"
|
||||
depends_on:
|
||||
scheduler:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
|
||||
# ---------- Scheduler ----------
|
||||
|
||||
scheduler:
|
||||
build: ./apps/task-scheduler
|
||||
ports:
|
||||
- "9501:4000"
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- job-data:/data/jobs
|
||||
environment:
|
||||
- PORT=4000
|
||||
- REDIS_URL=redis://redis:6379
|
||||
- JOB_DATA_DIR=/data/jobs
|
||||
- FRONTEND_URL=http://localhost:9500
|
||||
- STORAGE_BACKEND=${STORAGE_BACKEND:-local}
|
||||
- MINIO_ENDPOINT_URL=${MINIO_ENDPOINT_URL:-http://192.168.0.130:9000}
|
||||
- MINIO_BUCKET=${MINIO_BUCKET:-convertet-working-space}
|
||||
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-convuser}
|
||||
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
|
||||
- MINIO_REGION=${MINIO_REGION:-us-east-1}
|
||||
- MINIO_LIFECYCLE_DAYS=${MINIO_LIFECYCLE_DAYS:-7}
|
||||
restart: unless-stopped
|
||||
|
||||
# ---------- Workers (stub mode) ----------
|
||||
|
||||
onnx-worker:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/workers/Dockerfile.stub
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- job-data:/data/jobs
|
||||
environment:
|
||||
- STAGE=onnx
|
||||
- REDIS_URL=redis://redis:6379
|
||||
- JOB_DATA_DIR=/data/jobs
|
||||
- WORKER_MODE=${WORKER_MODE:-stub}
|
||||
- STORAGE_BACKEND=${STORAGE_BACKEND:-local}
|
||||
- MINIO_ENDPOINT_URL=${MINIO_ENDPOINT_URL:-http://192.168.0.130:9000}
|
||||
- MINIO_BUCKET=${MINIO_BUCKET:-convertet-working-space}
|
||||
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-convuser}
|
||||
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
|
||||
- MINIO_REGION=${MINIO_REGION:-us-east-1}
|
||||
- MINIO_LIFECYCLE_DAYS=${MINIO_LIFECYCLE_DAYS:-7}
|
||||
restart: unless-stopped
|
||||
|
||||
bie-worker:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/workers/Dockerfile.stub
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- job-data:/data/jobs
|
||||
environment:
|
||||
- STAGE=bie
|
||||
- REDIS_URL=redis://redis:6379
|
||||
- JOB_DATA_DIR=/data/jobs
|
||||
- WORKER_MODE=${WORKER_MODE:-stub}
|
||||
- STORAGE_BACKEND=${STORAGE_BACKEND:-local}
|
||||
- MINIO_ENDPOINT_URL=${MINIO_ENDPOINT_URL:-http://192.168.0.130:9000}
|
||||
- MINIO_BUCKET=${MINIO_BUCKET:-convertet-working-space}
|
||||
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-convuser}
|
||||
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
|
||||
- MINIO_REGION=${MINIO_REGION:-us-east-1}
|
||||
- MINIO_LIFECYCLE_DAYS=${MINIO_LIFECYCLE_DAYS:-7}
|
||||
restart: unless-stopped
|
||||
|
||||
nef-worker:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/workers/Dockerfile.stub
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- job-data:/data/jobs
|
||||
environment:
|
||||
- STAGE=nef
|
||||
- REDIS_URL=redis://redis:6379
|
||||
- JOB_DATA_DIR=/data/jobs
|
||||
- WORKER_MODE=${WORKER_MODE:-stub}
|
||||
- STORAGE_BACKEND=${STORAGE_BACKEND:-local}
|
||||
- MINIO_ENDPOINT_URL=${MINIO_ENDPOINT_URL:-http://192.168.0.130:9000}
|
||||
- MINIO_BUCKET=${MINIO_BUCKET:-convertet-working-space}
|
||||
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-convuser}
|
||||
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
|
||||
- MINIO_REGION=${MINIO_REGION:-us-east-1}
|
||||
- MINIO_LIFECYCLE_DAYS=${MINIO_LIFECYCLE_DAYS:-7}
|
||||
restart: unless-stopped
|
||||
24
services/workers/Dockerfile.stub
Normal file
24
services/workers/Dockerfile.stub
Normal file
@ -0,0 +1,24 @@
|
||||
# Stub Worker Dockerfile
|
||||
# Lightweight image for development — no toolchain dependencies
|
||||
#
|
||||
# Build from project root:
|
||||
# docker build -f services/workers/Dockerfile.stub .
|
||||
|
||||
FROM python:3.9-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN pip install --no-cache-dir redis>=5.0 boto3>=1.28
|
||||
|
||||
COPY services/workers/ /app/services/workers/
|
||||
|
||||
RUN mkdir -p /data/jobs
|
||||
|
||||
ENV WORKER_MODE=stub
|
||||
ENV REDIS_URL=redis://redis:6379
|
||||
ENV JOB_DATA_DIR=/data/jobs
|
||||
ENV STORAGE_BACKEND=local
|
||||
# STAGE should be set to: onnx, bie, or nef
|
||||
ENV STAGE=onnx
|
||||
|
||||
CMD python -m services.workers.${STAGE}.worker
|
||||
53
services/workers/bie/worker.py
Normal file
53
services/workers/bie/worker.py
Normal file
@ -0,0 +1,53 @@
|
||||
"""
|
||||
BIE Worker — Redis Stream queue consumer entry point.
|
||||
|
||||
Usage:
|
||||
python -m services.workers.bie.worker
|
||||
# or
|
||||
WORKER_MODE=stub python -m services.workers.bie.worker
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
||||
if PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, PROJECT_ROOT)
|
||||
|
||||
from services.workers.consumer import WorkerConsumer
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(name)s] %(levelname)s %(message)s",
|
||||
)
|
||||
logger = logging.getLogger("bie-worker")
|
||||
|
||||
|
||||
def get_process_fn():
|
||||
mode = os.environ.get("WORKER_MODE", "real").lower()
|
||||
|
||||
if mode == "stub":
|
||||
from services.workers.stubs import process_bie_core_stub
|
||||
logger.info("Running in STUB mode")
|
||||
return process_bie_core_stub
|
||||
else:
|
||||
from services.workers.bie.core import process_bie_core
|
||||
logger.info("Running in REAL mode")
|
||||
return process_bie_core
|
||||
|
||||
|
||||
def main():
|
||||
process_fn = get_process_fn()
|
||||
|
||||
consumer = WorkerConsumer(
|
||||
stage="bie",
|
||||
process_fn=process_fn,
|
||||
queue_name="queue:bie",
|
||||
group_name="bie-workers",
|
||||
)
|
||||
consumer.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
268
services/workers/consumer.py
Normal file
268
services/workers/consumer.py
Normal file
@ -0,0 +1,268 @@
|
||||
"""
|
||||
Generic Redis Stream queue consumer for workers.
|
||||
|
||||
每個 Worker(ONNX/BIE/NEF)使用此模組作為進入點:
|
||||
1. 從指定的 Redis Stream queue 拉取任務(XREADGROUP)
|
||||
2. 從 S3/MinIO 下載輸入檔案到本地暫存目錄
|
||||
3. 呼叫對應的 core function 處理
|
||||
4. 將結果上傳到 S3/MinIO
|
||||
5. 將結果推送到 queue:done
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import signal
|
||||
import socket
|
||||
import tempfile
|
||||
import time
|
||||
from typing import Any, Callable, Dict
|
||||
|
||||
import redis
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WorkerConsumer:
|
||||
"""Redis Stream based queue consumer with S3/MinIO storage."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
stage: str,
|
||||
process_fn: Callable[[Dict[str, str], str, Dict[str, Any]], Dict[str, Any]],
|
||||
queue_name: str,
|
||||
group_name: str,
|
||||
redis_url: str = None,
|
||||
job_data_dir: str = None,
|
||||
):
|
||||
self.stage = stage
|
||||
self.process_fn = process_fn
|
||||
self.queue_name = queue_name
|
||||
self.group_name = group_name
|
||||
self.redis_url = redis_url or os.environ.get("REDIS_URL", "redis://localhost:6379")
|
||||
self.job_data_dir = job_data_dir or os.environ.get("JOB_DATA_DIR", "/data/jobs")
|
||||
self.consumer_name = f"{stage}-worker-{socket.gethostname()}-{os.getpid()}"
|
||||
self.running = True
|
||||
|
||||
self.client = redis.Redis.from_url(self.redis_url, decode_responses=True)
|
||||
|
||||
# Initialize MinIO storage
|
||||
self.minio = None
|
||||
if os.environ.get("STORAGE_BACKEND", "local") == "minio":
|
||||
from services.workers.s3_storage import MinIOStorage
|
||||
self.minio = MinIOStorage()
|
||||
logger.info("Using MinIO storage backend")
|
||||
else:
|
||||
logger.info("Using local filesystem storage backend")
|
||||
|
||||
def _ensure_group(self):
|
||||
"""Create consumer group if it doesn't exist."""
|
||||
try:
|
||||
self.client.xgroup_create(self.queue_name, self.group_name, id="0", mkstream=True)
|
||||
logger.info(f"Created consumer group '{self.group_name}' on '{self.queue_name}'")
|
||||
except redis.ResponseError as e:
|
||||
if "BUSYGROUP" not in str(e):
|
||||
raise
|
||||
# Group already exists — OK
|
||||
|
||||
def _prepare_local_dir(self, job_id: str) -> str:
|
||||
"""Prepare a local working directory for the job.
|
||||
|
||||
For S3 mode: downloads required files from S3 to a temp dir.
|
||||
For local mode: returns the existing job dir on shared volume.
|
||||
"""
|
||||
if not self.minio:
|
||||
return os.path.join(self.job_data_dir, job_id)
|
||||
|
||||
# MinIO mode: use a local temp dir (isolated per worker, no shared volume conflict)
|
||||
local_dir = os.path.join(tempfile.gettempdir(), "kneron-jobs", f"{job_id}-{self.stage}")
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
|
||||
s3_prefix = f"jobs/{job_id}"
|
||||
|
||||
if self.stage == "onnx":
|
||||
# Download input/ directory (model file + ref_images)
|
||||
self.minio.download_prefix(f"{s3_prefix}/input", os.path.join(local_dir, "input"))
|
||||
logger.info(f"Downloaded input files from S3 for job {job_id}")
|
||||
|
||||
elif self.stage == "bie":
|
||||
# Download out.onnx from previous stage
|
||||
self.minio.download_file(f"{s3_prefix}/out.onnx", os.path.join(local_dir, "out.onnx"))
|
||||
# Download ref_images for quantization
|
||||
self.minio.download_prefix(
|
||||
f"{s3_prefix}/input/ref_images",
|
||||
os.path.join(local_dir, "input", "ref_images"),
|
||||
)
|
||||
logger.info(f"Downloaded ONNX + ref_images from S3 for job {job_id}")
|
||||
|
||||
elif self.stage == "nef":
|
||||
# Download out.bie from previous stage
|
||||
self.minio.download_file(f"{s3_prefix}/out.bie", os.path.join(local_dir, "out.bie"))
|
||||
logger.info(f"Downloaded BIE from S3 for job {job_id}")
|
||||
|
||||
return local_dir
|
||||
|
||||
def _upload_output(self, job_id: str, job_dir: str):
|
||||
"""Upload the output file to S3 after processing."""
|
||||
if not self.minio:
|
||||
return
|
||||
|
||||
output_files = {
|
||||
"onnx": "out.onnx",
|
||||
"bie": "out.bie",
|
||||
"nef": "out.nef",
|
||||
}
|
||||
output_name = output_files[self.stage]
|
||||
local_path = os.path.join(job_dir, output_name)
|
||||
s3_key = f"jobs/{job_id}/{output_name}"
|
||||
|
||||
if os.path.exists(local_path):
|
||||
self.minio.upload_file(local_path, s3_key)
|
||||
logger.info(f"Uploaded {output_name} to S3 for job {job_id}")
|
||||
|
||||
def _cleanup_local(self, job_dir: str):
|
||||
"""Clean up local temp directory after S3 upload."""
|
||||
if not self.minio:
|
||||
return
|
||||
try:
|
||||
shutil.rmtree(job_dir, ignore_errors=True)
|
||||
logger.debug(f"Cleaned up local dir: {job_dir}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clean up {job_dir}: {e}")
|
||||
|
||||
def _build_input_paths(self, job_dir: str, parameters: dict) -> dict:
|
||||
"""Build input_paths dict based on stage and job directory contents."""
|
||||
input_dir = os.path.join(job_dir, "input")
|
||||
|
||||
if self.stage == "onnx":
|
||||
# Find the single input file in input/
|
||||
input_file = None
|
||||
if os.path.isdir(input_dir):
|
||||
for f in os.listdir(input_dir):
|
||||
fpath = os.path.join(input_dir, f)
|
||||
if os.path.isfile(fpath):
|
||||
input_file = fpath
|
||||
break
|
||||
if not input_file:
|
||||
raise FileNotFoundError(f"No input file found in {input_dir}")
|
||||
return {"file_path": input_file}
|
||||
|
||||
elif self.stage == "bie":
|
||||
onnx_path = os.path.join(job_dir, "out.onnx")
|
||||
ref_images_dir = os.path.join(input_dir, "ref_images")
|
||||
return {
|
||||
"onnx_file_path": onnx_path,
|
||||
"data_dir": ref_images_dir,
|
||||
}
|
||||
|
||||
elif self.stage == "nef":
|
||||
bie_path = os.path.join(job_dir, "out.bie")
|
||||
return {"bie_file_path": bie_path}
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown stage: {self.stage}")
|
||||
|
||||
def _get_output_path(self, job_dir: str) -> str:
|
||||
"""Get the expected output file path for this stage."""
|
||||
output_files = {
|
||||
"onnx": "out.onnx",
|
||||
"bie": "out.bie",
|
||||
"nef": "out.nef",
|
||||
}
|
||||
return os.path.join(job_dir, output_files[self.stage])
|
||||
|
||||
def _push_done(self, job_id: str, result: str, reason: str = None):
|
||||
"""Push a done event to queue:done."""
|
||||
message = {
|
||||
"job_id": job_id,
|
||||
"step": self.stage,
|
||||
"result": result,
|
||||
"completed_at": time.strftime("%Y-%m-%dT%H:%M:%S%z"),
|
||||
}
|
||||
if reason:
|
||||
message["reason"] = reason
|
||||
self.client.xadd("queue:done", {"data": json.dumps(message)})
|
||||
logger.info(f"Pushed done: job={job_id} step={self.stage} result={result}")
|
||||
|
||||
def _process_message(self, message_id: str, data: dict):
|
||||
"""Process a single task message."""
|
||||
job_id = data["job_id"]
|
||||
parameters = data.get("parameters", {})
|
||||
|
||||
logger.info(f"Processing job {job_id} (stage={self.stage})")
|
||||
|
||||
job_dir = None
|
||||
try:
|
||||
# Prepare local working directory (download from S3 if needed)
|
||||
job_dir = self._prepare_local_dir(job_id)
|
||||
|
||||
input_paths = self._build_input_paths(job_dir, parameters)
|
||||
output_path = self._get_output_path(job_dir)
|
||||
|
||||
# Add work_dir to parameters so core can set up toolchain paths
|
||||
parameters["work_dir"] = job_dir
|
||||
|
||||
result = self.process_fn(input_paths, output_path, parameters)
|
||||
|
||||
# Upload output to S3
|
||||
self._upload_output(job_id, job_dir)
|
||||
|
||||
logger.info(f"Job {job_id} completed: {result.get('file_path', 'N/A')}")
|
||||
self._push_done(job_id, "ok")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Job {job_id} failed: {e}", exc_info=True)
|
||||
self._push_done(job_id, "fail", reason=str(e))
|
||||
|
||||
finally:
|
||||
# Clean up local temp files in S3 mode
|
||||
if job_dir:
|
||||
self._cleanup_local(job_dir)
|
||||
|
||||
# ACK the message regardless of success/failure
|
||||
self.client.xack(self.queue_name, self.group_name, message_id)
|
||||
|
||||
def run(self):
|
||||
"""Main loop: pull tasks from queue and process them."""
|
||||
self._ensure_group()
|
||||
|
||||
logger.info(
|
||||
f"[{self.consumer_name}] Listening on {self.queue_name} "
|
||||
f"(group={self.group_name})"
|
||||
)
|
||||
|
||||
# Handle graceful shutdown
|
||||
def handle_signal(signum, frame):
|
||||
logger.info(f"[{self.consumer_name}] Received signal {signum}, shutting down...")
|
||||
self.running = False
|
||||
|
||||
signal.signal(signal.SIGTERM, handle_signal)
|
||||
signal.signal(signal.SIGINT, handle_signal)
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
results = self.client.xreadgroup(
|
||||
self.group_name,
|
||||
self.consumer_name,
|
||||
{self.queue_name: ">"},
|
||||
count=1,
|
||||
block=5000, # 5 second timeout
|
||||
)
|
||||
|
||||
if not results:
|
||||
continue
|
||||
|
||||
for stream_name, messages in results:
|
||||
for message_id, fields in messages:
|
||||
data = json.loads(fields["data"])
|
||||
self._process_message(message_id, data)
|
||||
|
||||
except redis.ConnectionError:
|
||||
logger.error("Redis connection lost, retrying in 3s...")
|
||||
time.sleep(3)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}", exc_info=True)
|
||||
time.sleep(1)
|
||||
|
||||
logger.info(f"[{self.consumer_name}] Stopped")
|
||||
53
services/workers/nef/worker.py
Normal file
53
services/workers/nef/worker.py
Normal file
@ -0,0 +1,53 @@
|
||||
"""
|
||||
NEF Worker — Redis Stream queue consumer entry point.
|
||||
|
||||
Usage:
|
||||
python -m services.workers.nef.worker
|
||||
# or
|
||||
WORKER_MODE=stub python -m services.workers.nef.worker
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
||||
if PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, PROJECT_ROOT)
|
||||
|
||||
from services.workers.consumer import WorkerConsumer
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(name)s] %(levelname)s %(message)s",
|
||||
)
|
||||
logger = logging.getLogger("nef-worker")
|
||||
|
||||
|
||||
def get_process_fn():
|
||||
mode = os.environ.get("WORKER_MODE", "real").lower()
|
||||
|
||||
if mode == "stub":
|
||||
from services.workers.stubs import process_nef_core_stub
|
||||
logger.info("Running in STUB mode")
|
||||
return process_nef_core_stub
|
||||
else:
|
||||
from services.workers.nef.core import process_nef_core
|
||||
logger.info("Running in REAL mode")
|
||||
return process_nef_core
|
||||
|
||||
|
||||
def main():
|
||||
process_fn = get_process_fn()
|
||||
|
||||
consumer = WorkerConsumer(
|
||||
stage="nef",
|
||||
process_fn=process_fn,
|
||||
queue_name="queue:nef",
|
||||
group_name="nef-workers",
|
||||
)
|
||||
consumer.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
55
services/workers/onnx/worker.py
Normal file
55
services/workers/onnx/worker.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""
|
||||
ONNX Worker — Redis Stream queue consumer entry point.
|
||||
|
||||
Usage:
|
||||
python -m services.workers.onnx.worker
|
||||
# or
|
||||
WORKER_MODE=stub python -m services.workers.onnx.worker
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Ensure project root is in sys.path
|
||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
||||
if PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, PROJECT_ROOT)
|
||||
|
||||
from services.workers.consumer import WorkerConsumer
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(name)s] %(levelname)s %(message)s",
|
||||
)
|
||||
logger = logging.getLogger("onnx-worker")
|
||||
|
||||
|
||||
def get_process_fn():
|
||||
"""Return the appropriate core function based on WORKER_MODE."""
|
||||
mode = os.environ.get("WORKER_MODE", "real").lower()
|
||||
|
||||
if mode == "stub":
|
||||
from services.workers.stubs import process_onnx_core_stub
|
||||
logger.info("Running in STUB mode")
|
||||
return process_onnx_core_stub
|
||||
else:
|
||||
from services.workers.onnx.core import process_onnx_core
|
||||
logger.info("Running in REAL mode")
|
||||
return process_onnx_core
|
||||
|
||||
|
||||
def main():
|
||||
process_fn = get_process_fn()
|
||||
|
||||
consumer = WorkerConsumer(
|
||||
stage="onnx",
|
||||
process_fn=process_fn,
|
||||
queue_name="queue:onnx",
|
||||
group_name="onnx-workers",
|
||||
)
|
||||
consumer.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
145
services/workers/s3_storage.py
Normal file
145
services/workers/s3_storage.py
Normal file
@ -0,0 +1,145 @@
|
||||
"""
|
||||
MinIO storage helper for workers.
|
||||
|
||||
Provides upload/download functionality to replace Docker Shared Volume.
|
||||
Workers download inputs from MinIO to local temp dir, process, then upload results.
|
||||
|
||||
Uses boto3 (S3-compatible API) to communicate with MinIO.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from typing import Optional
|
||||
|
||||
import boto3
|
||||
from botocore.config import Config as BotoConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MinIOStorage:
|
||||
"""MinIO storage client for job file exchange."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
endpoint_url: str = None,
|
||||
bucket: str = None,
|
||||
access_key: str = None,
|
||||
secret_key: str = None,
|
||||
region: str = None,
|
||||
lifecycle_days: int = None,
|
||||
):
|
||||
self.endpoint_url = endpoint_url or os.environ.get("MINIO_ENDPOINT_URL", "http://192.168.0.130:9000")
|
||||
self.bucket = bucket or os.environ.get("MINIO_BUCKET", "convertet-working-space")
|
||||
self.access_key = access_key or os.environ.get("MINIO_ACCESS_KEY", "convuser")
|
||||
self.secret_key = secret_key or os.environ.get("MINIO_SECRET_KEY", "")
|
||||
self.region = region or os.environ.get("MINIO_REGION", "us-east-1")
|
||||
self.lifecycle_days = lifecycle_days if lifecycle_days is not None else int(os.environ.get("MINIO_LIFECYCLE_DAYS", "7"))
|
||||
|
||||
self.client = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=self.endpoint_url,
|
||||
aws_access_key_id=self.access_key,
|
||||
aws_secret_access_key=self.secret_key,
|
||||
region_name=self.region,
|
||||
config=BotoConfig(signature_version="s3v4"),
|
||||
)
|
||||
self._ensure_bucket()
|
||||
if self.lifecycle_days > 0:
|
||||
self._ensure_lifecycle_rule()
|
||||
|
||||
def _ensure_bucket(self):
|
||||
"""Verify bucket exists (do not auto-create — bucket is managed externally)."""
|
||||
try:
|
||||
self.client.head_bucket(Bucket=self.bucket)
|
||||
logger.info(f"MinIO bucket verified: {self.bucket}")
|
||||
except Exception as e:
|
||||
logger.error(f"MinIO bucket '{self.bucket}' is not accessible: {e}")
|
||||
raise
|
||||
|
||||
def _ensure_lifecycle_rule(self):
|
||||
"""Set lifecycle rule to auto-expire objects under jobs/ prefix."""
|
||||
rule_id = "auto-cleanup-jobs"
|
||||
try:
|
||||
self.client.put_bucket_lifecycle_configuration(
|
||||
Bucket=self.bucket,
|
||||
LifecycleConfiguration={
|
||||
"Rules": [
|
||||
{
|
||||
"ID": rule_id,
|
||||
"Status": "Enabled",
|
||||
"Filter": {"Prefix": "jobs/"},
|
||||
"Expiration": {"Days": self.lifecycle_days},
|
||||
}
|
||||
]
|
||||
},
|
||||
)
|
||||
logger.info(f"MinIO lifecycle rule set: jobs/* expire after {self.lifecycle_days} days")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not set lifecycle rule: {e}")
|
||||
|
||||
def upload_file(self, local_path: str, key: str):
|
||||
"""Upload a local file to MinIO."""
|
||||
self.client.upload_file(local_path, self.bucket, key)
|
||||
logger.debug(f"Uploaded {local_path} -> minio://{self.bucket}/{key}")
|
||||
|
||||
def upload_data(self, data: bytes, key: str):
|
||||
"""Upload raw bytes to MinIO."""
|
||||
self.client.put_object(Bucket=self.bucket, Key=key, Body=data)
|
||||
logger.debug(f"Uploaded {len(data)} bytes -> minio://{self.bucket}/{key}")
|
||||
|
||||
def download_file(self, key: str, local_path: str):
|
||||
"""Download a file from MinIO to local path."""
|
||||
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
||||
self.client.download_file(self.bucket, key, local_path)
|
||||
logger.debug(f"Downloaded minio://{self.bucket}/{key} -> {local_path}")
|
||||
|
||||
def download_to_stream(self, key: str):
|
||||
"""Get a streaming body for a MinIO object."""
|
||||
response = self.client.get_object(Bucket=self.bucket, Key=key)
|
||||
return response["Body"]
|
||||
|
||||
def list_keys(self, prefix: str) -> list:
|
||||
"""List all keys under a prefix."""
|
||||
keys = []
|
||||
paginator = self.client.get_paginator("list_objects_v2")
|
||||
for page in paginator.paginate(Bucket=self.bucket, Prefix=prefix):
|
||||
for obj in page.get("Contents", []):
|
||||
keys.append(obj["Key"])
|
||||
return keys
|
||||
|
||||
def upload_directory(self, local_dir: str, prefix: str):
|
||||
"""Upload all files in a local directory to MinIO under a prefix."""
|
||||
for root, _, files in os.walk(local_dir):
|
||||
for fname in files:
|
||||
local_path = os.path.join(root, fname)
|
||||
rel_path = os.path.relpath(local_path, local_dir)
|
||||
key = f"{prefix}/{rel_path}"
|
||||
self.upload_file(local_path, key)
|
||||
|
||||
def download_prefix(self, prefix: str, local_dir: str):
|
||||
"""Download all files under a MinIO prefix to a local directory."""
|
||||
keys = self.list_keys(prefix)
|
||||
for key in keys:
|
||||
rel_path = key[len(prefix):].lstrip("/")
|
||||
if not rel_path:
|
||||
continue
|
||||
local_path = os.path.join(local_dir, rel_path)
|
||||
self.download_file(key, local_path)
|
||||
|
||||
def exists(self, key: str) -> bool:
|
||||
"""Check if a MinIO key exists."""
|
||||
try:
|
||||
self.client.head_object(Bucket=self.bucket, Key=key)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def get_size(self, key: str) -> Optional[int]:
|
||||
"""Get the size of a MinIO object in bytes."""
|
||||
try:
|
||||
response = self.client.head_object(Bucket=self.bucket, Key=key)
|
||||
return response["ContentLength"]
|
||||
except Exception:
|
||||
return None
|
||||
122
services/workers/stubs.py
Normal file
122
services/workers/stubs.py
Normal file
@ -0,0 +1,122 @@
|
||||
"""
|
||||
Stub implementations of worker core functions.
|
||||
|
||||
Used when WORKER_MODE=stub, allowing development and testing of
|
||||
Scheduler / Queue / UI without requiring the Kneron Toolchain environment.
|
||||
|
||||
Each stub:
|
||||
- Sleeps to simulate processing time
|
||||
- Creates a minimal output file
|
||||
- Returns a result dict matching the real core function signature
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
def process_onnx_core_stub(
|
||||
input_paths: Dict[str, str],
|
||||
output_path: str,
|
||||
parameters: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""Stub ONNX processing: sleep 2s, create a fake out.onnx."""
|
||||
file_path = input_paths["file_path"]
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"Input file not found: {file_path}")
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# Create minimal valid-looking output
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(b"STUB_ONNX_OUTPUT_" + os.path.basename(file_path).encode())
|
||||
|
||||
return {
|
||||
"file_path": output_path,
|
||||
"file_size": os.path.getsize(output_path),
|
||||
"eval_report": "",
|
||||
"model_info": {
|
||||
"model_id": parameters.get("model_id"),
|
||||
"version": parameters.get("version"),
|
||||
"platform": parameters.get("platform"),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def process_bie_core_stub(
|
||||
input_paths: Dict[str, str],
|
||||
output_path: str,
|
||||
parameters: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""Stub BIE processing: sleep 3s, create a fake out.bie."""
|
||||
onnx_file_path = input_paths["onnx_file_path"]
|
||||
data_dir = input_paths["data_dir"]
|
||||
|
||||
if not os.path.exists(onnx_file_path):
|
||||
raise FileNotFoundError(f"ONNX file not found: {onnx_file_path}")
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
# Count ref images (if any)
|
||||
img_count = 0
|
||||
if os.path.isdir(data_dir):
|
||||
img_count = len([f for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f))])
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(b"STUB_BIE_OUTPUT")
|
||||
|
||||
return {
|
||||
"file_path": output_path,
|
||||
"file_size": os.path.getsize(output_path),
|
||||
"model_info": {
|
||||
"model_id": parameters.get("model_id"),
|
||||
"version": parameters.get("version"),
|
||||
"platform": parameters.get("platform"),
|
||||
},
|
||||
"analysis_info": {
|
||||
"input_name": "stub_input",
|
||||
"batch_size": 1,
|
||||
"channels": 3,
|
||||
"height": 224,
|
||||
"width": 224,
|
||||
},
|
||||
"processed_images": img_count,
|
||||
}
|
||||
|
||||
|
||||
def process_nef_core_stub(
|
||||
input_paths: Dict[str, str],
|
||||
output_path: str,
|
||||
parameters: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""Stub NEF processing: sleep 2s, create a fake out.nef."""
|
||||
bie_file_path = input_paths["bie_file_path"]
|
||||
|
||||
if not os.path.exists(bie_file_path):
|
||||
raise FileNotFoundError(f"BIE file not found: {bie_file_path}")
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(b"STUB_NEF_OUTPUT")
|
||||
|
||||
return {
|
||||
"file_path": output_path,
|
||||
"file_size": os.path.getsize(output_path),
|
||||
"model_info": {
|
||||
"model_id": parameters.get("model_id"),
|
||||
"version": parameters.get("version"),
|
||||
"platform": parameters.get("platform"),
|
||||
},
|
||||
"compilation_info": {
|
||||
"optimization_level": "stub",
|
||||
"memory_usage": "stub",
|
||||
"inference_speed": "stub",
|
||||
},
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user