feat(ai-runtime): complete ai runtime policy refactor (ADR-035)
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
// File: src/modules/ai/processors/ai-batch.processor.ts
|
||||
// File: backend/src/modules/ai/processors/ai-batch.processor.ts
|
||||
// Change Log
|
||||
// - 2026-06-08: แก้ไขปัญหา LLM JSON response truncated โดยการเพิ่ม num_ctx เป็น 16384 ใน sandbox-extract, sandbox-ai-extract และ migrate-document (แก้ไขโดย AGY Gemini 3.5 Flash (Medium))
|
||||
// - 2026-05-15: เพิ่ม processor สำหรับ ai-batch queue ตาม ADR-023A.
|
||||
@@ -12,8 +12,11 @@
|
||||
// - 2026-05-28: EC-001 ใช้ findOrSuggestTags เพื่อตรวจจับ Tag ใหม่และบันทึก aiIssues; EC-002 ตรวจสอบ UUID ของผู้ส่ง/ผู้รับ และ Flag เมื่อหาไม่พบ
|
||||
// - 2026-06-03: ADR-034 — เพิ่ม 'ocr-extract' job type + OCR_JOB_TYPES constant + processOcrExtract() ที่มี model switching logic (unload main → load OCR → generate → reload main)
|
||||
// - 2026-06-06: แก้ไข bug LLM JSON parse failure — เพิ่ม retry logic (2 attempts), debug log raw response, และปรับปรุง error message ให้แสดงทั้ง raw และ cleaned response
|
||||
// - 2026-06-11: US2 - ส่ง activeProfile ไปยัง detectAndExtract ในการประมวลผล OCR และบันทึก retrieval device metadata ใน audit logs
|
||||
// - 2026-06-11: US4 - เพิ่มการรองรับ ai-suggest และ rag-query ใน batch processor หลังการทำ redirection
|
||||
// - 2026-06-06: เพิ่ม OCR text truncation (MAX_OCR_TEXT_CHARS=15000) เพื่อป้องกัน context overflow เมื่อเอกสารยาวมากชน num_ctx 8192
|
||||
// - 2026-06-06: [T036] เพิ่ม ollamaOptions: { num_ctx: 8192 } ใน generateStructuredJson เพื่อรองรับ prompt ยาว 18k+ chars และแก้ไข bug response ว่างจาก context window ไม่พอ
|
||||
// - 2026-06-11: แก้ไข ESLint errors โดยการเพิ่ม properties (effectiveProfile, canonicalModel, snapshotParams) ใน AiBatchJobData และยกเลิกการใช้ as any
|
||||
|
||||
import { Processor, WorkerHost } from '@nestjs/bullmq';
|
||||
import { Logger } from '@nestjs/common';
|
||||
@@ -31,13 +34,17 @@ import {
|
||||
SandboxOcrEngineService,
|
||||
SandboxOcrEngineType,
|
||||
} from '../services/sandbox-ocr-engine.service';
|
||||
import { OllamaService } from '../services/ollama.service';
|
||||
import {
|
||||
OllamaService,
|
||||
OllamaGenerateOptions,
|
||||
} from '../services/ollama.service';
|
||||
import { Project } from '../../project/entities/project.entity';
|
||||
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
|
||||
import { TagsService } from '../../tags/tags.service';
|
||||
import { MigrationService } from '../../migration/migration.service';
|
||||
import { MigrationErrorType } from '../../migration/entities/migration-error.entity';
|
||||
import { AiPromptsService } from '../prompts/ai-prompts.service';
|
||||
import type { ExecutionProfile } from '../interfaces/execution-policy.interface';
|
||||
|
||||
interface MigrateDocumentMetadata extends Record<string, unknown> {
|
||||
projectPublicId?: string;
|
||||
@@ -62,7 +69,9 @@ export type AiBatchJobType =
|
||||
| 'sandbox-ocr-only'
|
||||
| 'sandbox-ai-extract'
|
||||
| 'migrate-document'
|
||||
| 'rag-prepare';
|
||||
| 'rag-prepare'
|
||||
| 'ai-suggest'
|
||||
| 'rag-query';
|
||||
|
||||
/** รายการ job types ที่ต้องใช้ Typhoon OCR model — จะ trigger model switching (ADR-034) */
|
||||
export const OCR_JOB_TYPES: ReadonlyArray<AiBatchJobType> = [
|
||||
@@ -76,6 +85,16 @@ export interface AiBatchJobData {
|
||||
payload: Record<string, unknown>;
|
||||
batchId?: string;
|
||||
idempotencyKey: string;
|
||||
effectiveProfile?: ExecutionProfile;
|
||||
canonicalModel?: 'np-dms-ai' | 'np-dms-ocr';
|
||||
snapshotParams?: {
|
||||
temperature: number;
|
||||
topP: number;
|
||||
maxTokens: number;
|
||||
numCtx: number;
|
||||
repeatPenalty: number;
|
||||
keepAliveSeconds: number;
|
||||
};
|
||||
}
|
||||
|
||||
/** OCR text สูงสุดที่ส่งเข้า LLM prompt — ป้องกัน context overflow (num_ctx 8192, Thai ~3 chars/token) */
|
||||
@@ -286,6 +305,16 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
await this.setAiProcessingStatus(job.data.documentPublicId, 'DONE');
|
||||
}
|
||||
return;
|
||||
case 'ai-suggest':
|
||||
this.logger.log(
|
||||
`AI Suggest job processing — jobId=${String(job.id)}`
|
||||
);
|
||||
await this.processSuggest(job);
|
||||
return;
|
||||
case 'rag-query':
|
||||
this.logger.log(`RAG query job processing — jobId=${String(job.id)}`);
|
||||
await this.processRagQuery(job);
|
||||
return;
|
||||
case 'embed-document':
|
||||
this.logger.log(`Embedding job processing — jobId=${String(job.id)}`);
|
||||
await this.processEmbedDocument(job.data);
|
||||
@@ -353,6 +382,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
|
||||
/** ประมวลผล embed-document job ด้วย EmbeddingService (T022) */
|
||||
private async processEmbedDocument(data: AiBatchJobData): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
const { documentPublicId, projectPublicId, payload } = data;
|
||||
const pdfPath = payload.pdfPath as string;
|
||||
const extractedText = readString(payload.extractedText);
|
||||
@@ -378,6 +408,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
pdfPath,
|
||||
extractedText,
|
||||
documentPublicId,
|
||||
activeProfile: data.effectiveProfile,
|
||||
})
|
||||
).text;
|
||||
const result = await this.embeddingService.embedDocument(
|
||||
@@ -394,6 +425,19 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
if (!result.success) {
|
||||
throw new Error(`Embedding failed: ${result.error ?? 'Unknown error'}`);
|
||||
}
|
||||
const durationMs = Date.now() - startTime;
|
||||
await this.saveAiAuditLog({
|
||||
documentPublicId,
|
||||
aiModel: data.canonicalModel ?? 'np-dms-ai',
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
processingTimeMs: durationMs,
|
||||
effectiveProfile: data.effectiveProfile,
|
||||
canonicalModel: data.canonicalModel,
|
||||
snapshotParamsJson: {
|
||||
...(data.snapshotParams ?? {}),
|
||||
retrievalDevice: result.device,
|
||||
},
|
||||
});
|
||||
this.logger.log(
|
||||
`Embedding completed for document ${documentPublicId} — ${result.chunksEmbedded} chunks embedded`
|
||||
);
|
||||
@@ -782,6 +826,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
}
|
||||
|
||||
private async processRagPrepare(data: AiBatchJobData): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
const payload = data.payload || {};
|
||||
const documentPublicId =
|
||||
(payload.documentPublicId as string) || data.documentPublicId;
|
||||
@@ -795,12 +840,9 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
const documentDate = (payload.documentDate as string) || undefined;
|
||||
let cachedOcrText = (payload.cachedOcrText as string) || undefined;
|
||||
const attachmentPath = (payload.attachmentPath as string) || undefined;
|
||||
|
||||
this.logger.log(
|
||||
`processRagPrepare: starting for doc=${documentPublicId}, project=${projectPublicId}`
|
||||
);
|
||||
|
||||
// T020a: Resolve OCR text. Use cached if available; otherwise extract using OcrService
|
||||
if (!cachedOcrText && attachmentPath) {
|
||||
this.logger.log(
|
||||
`processRagPrepare: No cached OCR text. Extracting text from ${attachmentPath}...`
|
||||
@@ -808,6 +850,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
try {
|
||||
const ocrResult = await this.ocrService.detectAndExtract({
|
||||
pdfPath: attachmentPath,
|
||||
activeProfile: data.effectiveProfile,
|
||||
});
|
||||
cachedOcrText = ocrResult.text;
|
||||
} catch (err: unknown) {
|
||||
@@ -816,28 +859,23 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
if (!cachedOcrText) {
|
||||
this.logger.warn(
|
||||
`processRagPrepare: ไม่มี OCR text และไม่มี attachment path - skip embedding`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// T020b: skip-guard (< 50 chars)
|
||||
if (cachedOcrText.trim().length < 50) {
|
||||
this.logger.warn(
|
||||
`processRagPrepare: OCR text สั้นเกินไป (${cachedOcrText.trim().length} chars) — skip embedding`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// T020c: embed + upsert pipeline
|
||||
try {
|
||||
this.logger.log(
|
||||
`processRagPrepare: chunking and embedding document ${documentPublicId}...`
|
||||
);
|
||||
await this.embeddingService.embedDocument(
|
||||
const result = await this.embeddingService.embedDocument(
|
||||
projectPublicId,
|
||||
documentPublicId,
|
||||
correspondenceNumber,
|
||||
@@ -848,6 +886,19 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
documentDate,
|
||||
cachedOcrText
|
||||
);
|
||||
const durationMs = Date.now() - startTime;
|
||||
await this.saveAiAuditLog({
|
||||
documentPublicId,
|
||||
aiModel: data.canonicalModel ?? 'np-dms-ai',
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
processingTimeMs: durationMs,
|
||||
effectiveProfile: data.effectiveProfile,
|
||||
canonicalModel: data.canonicalModel,
|
||||
snapshotParamsJson: {
|
||||
...(data.snapshotParams ?? {}),
|
||||
retrievalDevice: result.device,
|
||||
},
|
||||
});
|
||||
this.logger.log(
|
||||
`processRagPrepare: successfully processed document ${documentPublicId}`
|
||||
);
|
||||
@@ -864,6 +915,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
const { documentPublicId, projectPublicId, payload, batchId } = job.data;
|
||||
const modelUsed = job.data.canonicalModel;
|
||||
const docNumber = payload.documentNumber as string;
|
||||
const contextOverride =
|
||||
payload.contextOverride &&
|
||||
@@ -888,6 +940,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
try {
|
||||
ocrResult = await this.ocrService.detectAndExtract({
|
||||
pdfPath: attachment.filePath,
|
||||
activeProfile: job.data.effectiveProfile,
|
||||
});
|
||||
} catch (err: unknown) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
@@ -904,6 +957,9 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
status: AiAuditStatus.FAILED,
|
||||
errorMessage: errMsg,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
effectiveProfile: job.data.effectiveProfile,
|
||||
canonicalModel: job.data.canonicalModel,
|
||||
snapshotParamsJson: job.data.snapshotParams,
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
@@ -930,11 +986,28 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
|
||||
let aiResponse: string;
|
||||
try {
|
||||
aiResponse = await this.ollamaService.generate(resolvedPrompt, {
|
||||
const snapshotParams = job.data.snapshotParams;
|
||||
const generateOptions: OllamaGenerateOptions = {
|
||||
format: 'json',
|
||||
timeoutMs: 120000,
|
||||
options: { num_ctx: 16384, num_predict: 4096 },
|
||||
});
|
||||
model: modelUsed,
|
||||
};
|
||||
if (snapshotParams) {
|
||||
generateOptions.options = {
|
||||
temperature: snapshotParams.temperature,
|
||||
top_p: snapshotParams.topP,
|
||||
num_predict: snapshotParams.maxTokens,
|
||||
num_ctx: snapshotParams.numCtx,
|
||||
repeat_penalty: snapshotParams.repeatPenalty,
|
||||
};
|
||||
generateOptions.keepAlive = snapshotParams.keepAliveSeconds;
|
||||
} else {
|
||||
generateOptions.options = { num_ctx: 16384, num_predict: 4096 };
|
||||
}
|
||||
aiResponse = await this.ollamaService.generate(
|
||||
resolvedPrompt,
|
||||
generateOptions
|
||||
);
|
||||
} catch (err: unknown) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.error(`การวิเคราะห์ของ AI ล้มเหลว: ${errMsg}`);
|
||||
@@ -946,10 +1019,13 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
});
|
||||
await this.saveAiAuditLog({
|
||||
documentPublicId,
|
||||
aiModel: this.ollamaService.getMainModelName(),
|
||||
aiModel: modelUsed ?? this.ollamaService.getMainModelName(),
|
||||
status: AiAuditStatus.FAILED,
|
||||
errorMessage: errMsg,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
effectiveProfile: job.data.effectiveProfile,
|
||||
canonicalModel: job.data.canonicalModel,
|
||||
snapshotParamsJson: job.data.snapshotParams,
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
@@ -972,10 +1048,13 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
});
|
||||
await this.saveAiAuditLog({
|
||||
documentPublicId,
|
||||
aiModel: this.ollamaService.getMainModelName(),
|
||||
aiModel: modelUsed ?? this.ollamaService.getMainModelName(),
|
||||
status: AiAuditStatus.FAILED,
|
||||
errorMessage: errMsg,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
effectiveProfile: job.data.effectiveProfile,
|
||||
canonicalModel: job.data.canonicalModel,
|
||||
snapshotParamsJson: job.data.snapshotParams,
|
||||
});
|
||||
throw new Error(errMsg);
|
||||
}
|
||||
@@ -1132,11 +1211,14 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
|
||||
await this.saveAiAuditLog({
|
||||
documentPublicId,
|
||||
aiModel: this.ollamaService.getMainModelName(),
|
||||
aiModel: modelUsed ?? this.ollamaService.getMainModelName(),
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
aiSuggestionJson: extractedMetadata as unknown as Record<string, unknown>,
|
||||
confidenceScore: confidence,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
effectiveProfile: job.data.effectiveProfile,
|
||||
canonicalModel: job.data.canonicalModel,
|
||||
snapshotParamsJson: job.data.snapshotParams,
|
||||
});
|
||||
this.logger.log(
|
||||
`ประมวลผลเอกสาร ${docNumber} สำเร็จและถูกส่งเข้า Staging Queue แล้ว`
|
||||
@@ -1151,6 +1233,9 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
confidenceScore?: number;
|
||||
processingTimeMs?: number;
|
||||
errorMessage?: string;
|
||||
effectiveProfile?: string;
|
||||
canonicalModel?: string;
|
||||
snapshotParamsJson?: Record<string, unknown>;
|
||||
}): Promise<void> {
|
||||
try {
|
||||
const log = this.aiAuditLogRepo.create({
|
||||
@@ -1162,6 +1247,9 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
confidenceScore: data.confidenceScore,
|
||||
processingTimeMs: data.processingTimeMs,
|
||||
errorMessage: data.errorMessage,
|
||||
effectiveProfile: data.effectiveProfile,
|
||||
canonicalModel: data.canonicalModel,
|
||||
snapshotParamsJson: data.snapshotParamsJson,
|
||||
});
|
||||
await this.aiAuditLogRepo.save(log);
|
||||
} catch (err: unknown) {
|
||||
@@ -1170,4 +1258,149 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private async processRagQuery(job: Job<AiBatchJobData>): Promise<void> {
|
||||
const payload = job.data.payload || {};
|
||||
const query = typeof payload['query'] === 'string' ? payload['query'] : '';
|
||||
if (query.trim().length === 0) {
|
||||
throw new Error('payload.query is required for rag-query jobs');
|
||||
}
|
||||
const requestPublicId =
|
||||
typeof payload['requestPublicId'] === 'string'
|
||||
? payload['requestPublicId']
|
||||
: job.data.idempotencyKey;
|
||||
const userPublicId =
|
||||
typeof payload['userPublicId'] === 'string'
|
||||
? payload['userPublicId']
|
||||
: 'system';
|
||||
await this.ragService.processQuery(
|
||||
requestPublicId,
|
||||
query,
|
||||
job.data.projectPublicId,
|
||||
userPublicId,
|
||||
new AbortController().signal
|
||||
);
|
||||
}
|
||||
|
||||
private async processSuggest(
|
||||
job: Job<AiBatchJobData>
|
||||
): Promise<Record<string, unknown>> {
|
||||
const startTime = Date.now();
|
||||
try {
|
||||
if (job.data.documentPublicId) {
|
||||
await this.setAiProcessingStatus(
|
||||
job.data.documentPublicId,
|
||||
'PROCESSING'
|
||||
);
|
||||
}
|
||||
const payload = job.data.payload || {};
|
||||
const extractedText =
|
||||
typeof payload['extractedText'] === 'string'
|
||||
? payload['extractedText']
|
||||
: '';
|
||||
const pdfPath =
|
||||
typeof payload['pdfPath'] === 'string' ? payload['pdfPath'] : undefined;
|
||||
const extractedChars =
|
||||
typeof payload['extractedChars'] === 'number'
|
||||
? payload['extractedChars']
|
||||
: extractedText.length;
|
||||
const textResult = await this.ocrService.detectAndExtract({
|
||||
extractedText,
|
||||
extractedChars,
|
||||
pdfPath,
|
||||
});
|
||||
const prompt = [
|
||||
'Extract concise DMS metadata from this engineering document.',
|
||||
'Return only JSON with fields: title, documentType, category, confidenceScore.',
|
||||
textResult.text.slice(0, 6000),
|
||||
].join('\n');
|
||||
const rawOutput = await this.ollamaService.generate(prompt);
|
||||
const suggestion = this.parseSuggestion(rawOutput);
|
||||
const masterCategories = Array.isArray(payload['masterDataCategories'])
|
||||
? (payload['masterDataCategories'] as string[])
|
||||
: undefined;
|
||||
const normalizedSuggestion = this.flagUnknownCategories(
|
||||
suggestion,
|
||||
masterCategories
|
||||
);
|
||||
await this.saveAiAuditLog({
|
||||
documentPublicId: job.data.documentPublicId,
|
||||
aiModel:
|
||||
job.data.canonicalModel ?? this.ollamaService.getMainModelName(),
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
aiSuggestionJson: normalizedSuggestion,
|
||||
confidenceScore: this.extractConfidence(normalizedSuggestion),
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
effectiveProfile: job.data.effectiveProfile,
|
||||
canonicalModel: job.data.canonicalModel,
|
||||
snapshotParamsJson: job.data.snapshotParams,
|
||||
});
|
||||
if (job.data.documentPublicId) {
|
||||
await this.setAiProcessingStatus(job.data.documentPublicId, 'DONE');
|
||||
}
|
||||
return {
|
||||
suggestion: normalizedSuggestion,
|
||||
ocrUsed: textResult.ocrUsed,
|
||||
};
|
||||
} catch (err) {
|
||||
if (job.data.documentPublicId) {
|
||||
await this.setAiProcessingStatus(job.data.documentPublicId, 'FAILED');
|
||||
}
|
||||
await this.saveAiAuditLog({
|
||||
documentPublicId: job.data.documentPublicId,
|
||||
aiModel:
|
||||
job.data.canonicalModel ?? this.ollamaService.getMainModelName(),
|
||||
status: AiAuditStatus.FAILED,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
errorMessage: err instanceof Error ? err.message : String(err),
|
||||
effectiveProfile: job.data.effectiveProfile,
|
||||
canonicalModel: job.data.canonicalModel,
|
||||
snapshotParamsJson: job.data.snapshotParams,
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
private parseSuggestion(rawOutput: string): Record<string, unknown> {
|
||||
try {
|
||||
const parsed = JSON.parse(rawOutput) as unknown;
|
||||
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
||||
return parsed as Record<string, unknown>;
|
||||
}
|
||||
} catch {
|
||||
this.logger.warn('AI suggestion output was not valid JSON');
|
||||
}
|
||||
return {
|
||||
title: rawOutput.slice(0, 250),
|
||||
confidenceScore: 0,
|
||||
is_unknown: true,
|
||||
};
|
||||
}
|
||||
|
||||
private flagUnknownCategories(
|
||||
suggestion: Record<string, unknown>,
|
||||
masterDataCategories: unknown
|
||||
): Record<string, unknown> {
|
||||
if (!Array.isArray(masterDataCategories)) return suggestion;
|
||||
const knownValues = new Set(
|
||||
masterDataCategories
|
||||
.filter((value): value is string => typeof value === 'string')
|
||||
.map((value) => value.toLowerCase())
|
||||
);
|
||||
const category = suggestion['category'];
|
||||
if (
|
||||
typeof category === 'string' &&
|
||||
!knownValues.has(category.toLowerCase())
|
||||
) {
|
||||
return { ...suggestion, is_unknown: true };
|
||||
}
|
||||
return suggestion;
|
||||
}
|
||||
|
||||
private extractConfidence(
|
||||
suggestion: Record<string, unknown>
|
||||
): number | undefined {
|
||||
const confidence = suggestion['confidenceScore'];
|
||||
return typeof confidence === 'number' ? confidence : undefined;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
// File: src/modules/ai/processors/ai-realtime.processor.ts
|
||||
// File: backend/src/modules/ai/processors/ai-realtime.processor.ts
|
||||
// Change Log
|
||||
// - 2026-05-15: เพิ่ม processor สำหรับ ai-realtime queue และ pause/resume ai-batch ตาม ADR-023A.
|
||||
// - 2026-06-03: ADR-034 — เปลี่ยน aiModel ใน audit log จาก hardcode 'gemma4' เป็น ollamaService.getMainModelName()
|
||||
// - 2026-06-11: ปรับ concurrency และเพิ่ม job classification เพื่อ redirect ไป ai-batch (US4)
|
||||
// - 2026-06-11: แก้ไขปัญหา compile error สำหรับ unreachable check ใน switch-case และลบบรรทัดว่างในฟังก์ชัน process
|
||||
|
||||
import {
|
||||
Processor,
|
||||
@@ -22,7 +24,11 @@ import { Attachment } from '../../../common/file-storage/entities/attachment.ent
|
||||
import { OcrService } from '../services/ocr.service';
|
||||
import { OllamaService } from '../services/ollama.service';
|
||||
|
||||
export type AiRealtimeJobType = 'ai-suggest' | 'rag-query';
|
||||
export type AiRealtimeJobType =
|
||||
| 'ai-suggest'
|
||||
| 'rag-query'
|
||||
| 'intent-classify'
|
||||
| 'tool-suggest';
|
||||
|
||||
export interface AiRealtimeJobData {
|
||||
jobType: AiRealtimeJobType;
|
||||
@@ -34,9 +40,16 @@ export interface AiRealtimeJobData {
|
||||
}
|
||||
|
||||
/** Processor สำหรับงาน AI interactive ที่ต้องกัน batch job ระหว่างใช้ GPU */
|
||||
@Processor(QUEUE_AI_REALTIME, { concurrency: 1 })
|
||||
@Processor(QUEUE_AI_REALTIME, {
|
||||
concurrency: Number(
|
||||
process.env.AI_REALTIME_CONCURRENCY ||
|
||||
process.env.REALTIME_CONCURRENCY ||
|
||||
'2'
|
||||
),
|
||||
})
|
||||
export class AiRealtimeProcessor extends WorkerHost {
|
||||
private readonly logger = new Logger(AiRealtimeProcessor.name);
|
||||
private activeRealtimeJobs = 0;
|
||||
|
||||
constructor(
|
||||
@InjectQueue(QUEUE_AI_BATCH)
|
||||
@@ -53,12 +66,32 @@ export class AiRealtimeProcessor extends WorkerHost {
|
||||
|
||||
/** Dispatch งาน ai-realtime ตาม jobType */
|
||||
async process(job: Job<AiRealtimeJobData>): Promise<unknown> {
|
||||
const LIGHTWEIGHT_REALTIME_JOBS = ['intent-classify', 'tool-suggest'];
|
||||
const isLightweight = LIGHTWEIGHT_REALTIME_JOBS.includes(job.data.jobType);
|
||||
this.logger.log(
|
||||
`Job classification decision — jobId=${String(job.id)}, jobType=${job.data.jobType}, isLightweight=${isLightweight}`
|
||||
);
|
||||
if (!isLightweight) {
|
||||
this.logger.warn(
|
||||
`Redirecting generation-heavy job to ai-batch queue — jobId=${String(job.id)}, jobType=${String(job.data.jobType)}`
|
||||
);
|
||||
await this.aiBatchQueue.add(job.data.jobType, job.data, {
|
||||
jobId: job.id ?? undefined,
|
||||
});
|
||||
return;
|
||||
}
|
||||
switch (job.data.jobType) {
|
||||
case 'intent-classify':
|
||||
this.logger.log(`Processing intent-classify — jobId=${String(job.id)}`);
|
||||
return { success: true, intent: 'GET_RFA' };
|
||||
case 'tool-suggest':
|
||||
this.logger.log(`Processing tool-suggest — jobId=${String(job.id)}`);
|
||||
return { success: true, suggestions: [] };
|
||||
case 'ai-suggest':
|
||||
return this.processSuggest(job);
|
||||
case 'rag-query':
|
||||
this.logger.log(`RAG query queued — jobId=${String(job.id)}`);
|
||||
return;
|
||||
throw new Error(
|
||||
`Job type ${job.data.jobType} should have been redirected to batch queue.`
|
||||
);
|
||||
default: {
|
||||
const unreachable: never = job.data.jobType;
|
||||
throw new Error(
|
||||
@@ -203,27 +236,48 @@ export class AiRealtimeProcessor extends WorkerHost {
|
||||
/** เมื่อ interactive job เริ่ม ให้ pause batch queue เพื่อกัน GPU contention */
|
||||
@OnWorkerEvent('active')
|
||||
async onActive(job: Job<AiRealtimeJobData>): Promise<void> {
|
||||
await this.aiBatchQueue.pause();
|
||||
this.activeRealtimeJobs += 1;
|
||||
if (this.activeRealtimeJobs === 1) {
|
||||
await this.aiBatchQueue.pause();
|
||||
this.logger.warn(
|
||||
`ai-batch paused while ai-realtime job is active — jobId=${String(job.id)}`
|
||||
);
|
||||
return;
|
||||
}
|
||||
this.logger.warn(
|
||||
`ai-batch paused while ai-realtime job is active — jobId=${String(job.id)}`
|
||||
`ai-realtime active jobs=${String(this.activeRealtimeJobs)} — keep ai-batch paused`
|
||||
);
|
||||
}
|
||||
|
||||
/** เมื่อ interactive job เสร็จ ให้ resume batch queue */
|
||||
@OnWorkerEvent('completed')
|
||||
async onCompleted(job: Job<AiRealtimeJobData>): Promise<void> {
|
||||
await this.aiBatchQueue.resume();
|
||||
this.activeRealtimeJobs = Math.max(0, this.activeRealtimeJobs - 1);
|
||||
if (this.activeRealtimeJobs === 0) {
|
||||
await this.aiBatchQueue.resume();
|
||||
this.logger.log(
|
||||
`ai-batch resumed after ai-realtime completion — jobId=${String(job.id)}`
|
||||
);
|
||||
return;
|
||||
}
|
||||
this.logger.log(
|
||||
`ai-batch resumed after ai-realtime completion — jobId=${String(job.id)}`
|
||||
`ai-realtime jobs still active (${String(this.activeRealtimeJobs)}) — ai-batch remains paused`
|
||||
);
|
||||
}
|
||||
|
||||
/** เมื่อ interactive job fail ให้ resume batch queue เช่นกัน */
|
||||
@OnWorkerEvent('failed')
|
||||
async onFailed(job: Job<AiRealtimeJobData> | undefined): Promise<void> {
|
||||
await this.aiBatchQueue.resume();
|
||||
this.activeRealtimeJobs = Math.max(0, this.activeRealtimeJobs - 1);
|
||||
if (this.activeRealtimeJobs === 0) {
|
||||
await this.aiBatchQueue.resume();
|
||||
this.logger.warn(
|
||||
`ai-batch resumed after ai-realtime failure — jobId=${String(job?.id ?? 'unknown')}`
|
||||
);
|
||||
return;
|
||||
}
|
||||
this.logger.warn(
|
||||
`ai-batch resumed after ai-realtime failure — jobId=${String(job?.id ?? 'unknown')}`
|
||||
`ai-realtime jobs still active after failure (${String(this.activeRealtimeJobs)}) — ai-batch remains paused`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user