690530:1121 ADR-030-231-ocr-sandbox-two-step-flow #01
This commit is contained in:
@@ -103,7 +103,11 @@ export class AiQueueService {
|
||||
* @idempotency `jobId = payload.idempotencyKey`
|
||||
*/
|
||||
async enqueueSandboxJob(
|
||||
jobType: 'sandbox-rag' | 'sandbox-extract',
|
||||
jobType:
|
||||
| 'sandbox-rag'
|
||||
| 'sandbox-extract'
|
||||
| 'sandbox-ocr-only'
|
||||
| 'sandbox-ai-extract',
|
||||
payload: {
|
||||
idempotencyKey: string;
|
||||
projectPublicId?: string;
|
||||
@@ -111,6 +115,7 @@ export class AiQueueService {
|
||||
userPublicId?: string;
|
||||
filePublicId?: string;
|
||||
pdfPath?: string;
|
||||
extraPayload?: Record<string, unknown>;
|
||||
}
|
||||
): Promise<string> {
|
||||
const job = await this.batchQueue.add(
|
||||
@@ -124,6 +129,7 @@ export class AiQueueService {
|
||||
userPublicId: payload.userPublicId,
|
||||
filePublicId: payload.filePublicId,
|
||||
pdfPath: payload.pdfPath,
|
||||
...payload.extraPayload,
|
||||
},
|
||||
idempotencyKey: payload.idempotencyKey,
|
||||
},
|
||||
|
||||
@@ -40,6 +40,8 @@ import {
|
||||
ApiHeader,
|
||||
ApiParam,
|
||||
ApiQuery,
|
||||
ApiConsumes,
|
||||
ApiBody,
|
||||
} from '@nestjs/swagger';
|
||||
import { AiService, ExtractionResult, PaginatedResult } from './ai.service';
|
||||
import { AiSettingsService } from './ai-settings.service';
|
||||
@@ -508,6 +510,77 @@ export class AiController {
|
||||
return { requestPublicId, jobId, status: 'queued' };
|
||||
}
|
||||
|
||||
// --- Step 1: OCR Only (สำหรับตรวจคุณภาพ OCR ก่อนทดสอบ AI) ---
|
||||
|
||||
@Post('admin/sandbox/ocr')
|
||||
@UseGuards(JwtAuthGuard, RbacGuard)
|
||||
@RequirePermission('system.manage_all')
|
||||
@ApiOperation({
|
||||
summary: 'Step 1: Run OCR Only — สำหรับตรวจคุณภาพ OCR ก่อนทดสอบ AI',
|
||||
description:
|
||||
'Upload PDF และรัน OCR เท่านั้น ไม่เรียก LLM — ผลลัพธ์ cache ไว้สำหรับ Step 2',
|
||||
})
|
||||
@ApiConsumes('multipart/form-data')
|
||||
@ApiBody({
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
file: {
|
||||
type: 'string',
|
||||
format: 'binary',
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
async submitSandboxOcr(
|
||||
@UploadedFile(
|
||||
new ParseFilePipe({
|
||||
validators: [
|
||||
new MaxFileSizeValidator({ maxSize: 50 * 1024 * 1024 }),
|
||||
new FileTypeValidator({ fileType: 'pdf' }),
|
||||
],
|
||||
})
|
||||
)
|
||||
file: Express.Multer.File,
|
||||
@CurrentUser() user: User
|
||||
): Promise<{ requestPublicId: string; jobId: string; status: string }> {
|
||||
const attachment = await this.fileStorageService.upload(file, user.user_id);
|
||||
const requestPublicId = uuidv7();
|
||||
const jobId = await this.aiQueueService.enqueueSandboxJob(
|
||||
'sandbox-ocr-only',
|
||||
{
|
||||
idempotencyKey: requestPublicId,
|
||||
pdfPath: attachment.filePath,
|
||||
}
|
||||
);
|
||||
return { requestPublicId, jobId, status: 'queued' };
|
||||
}
|
||||
|
||||
// --- Step 2: AI Extraction (ใช้ OCR text ที่ cache จาก Step 1) ---
|
||||
|
||||
@Post('admin/sandbox/ai-extract')
|
||||
@UseGuards(JwtAuthGuard, RbacGuard)
|
||||
@RequirePermission('system.manage_all')
|
||||
@ApiOperation({
|
||||
summary: 'Step 2: Run AI Extraction — ใช้ OCR text ที่ cache จาก Step 1',
|
||||
description:
|
||||
'รับ requestPublicId จาก Step 1 และ optional promptVersion แล้ว run LLM extraction',
|
||||
})
|
||||
async submitSandboxAiExtract(
|
||||
@Body() dto: { requestPublicId: string; promptVersion?: number }
|
||||
): Promise<{ requestPublicId: string; jobId: string; status: string }> {
|
||||
const { requestPublicId, promptVersion } = dto;
|
||||
const jobId = await this.aiQueueService.enqueueSandboxJob(
|
||||
'sandbox-ai-extract',
|
||||
{
|
||||
idempotencyKey: requestPublicId,
|
||||
projectPublicId: 'default', // Sandbox ใช้ default project
|
||||
extraPayload: { promptVersion },
|
||||
}
|
||||
);
|
||||
return { requestPublicId, jobId, status: 'queued' };
|
||||
}
|
||||
|
||||
// --- Webhook Callback จาก n8n (Service Account) ---
|
||||
|
||||
@Post('callback')
|
||||
|
||||
@@ -49,6 +49,8 @@ export type AiBatchJobType =
|
||||
| 'embed-document'
|
||||
| 'sandbox-rag'
|
||||
| 'sandbox-extract'
|
||||
| 'sandbox-ocr-only'
|
||||
| 'sandbox-ai-extract'
|
||||
| 'migrate-document';
|
||||
|
||||
export interface AiBatchJobData {
|
||||
@@ -197,6 +199,18 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
);
|
||||
await this.processSandboxExtract(job.data);
|
||||
return;
|
||||
case 'sandbox-ocr-only':
|
||||
this.logger.log(
|
||||
`Sandbox OCR-Only job processing — jobId=${String(job.id)}`
|
||||
);
|
||||
await this.processSandboxOcrOnly(job.data);
|
||||
return;
|
||||
case 'sandbox-ai-extract':
|
||||
this.logger.log(
|
||||
`Sandbox AI-Extract job processing — jobId=${String(job.id)}`
|
||||
);
|
||||
await this.processSandboxAiExtract(job.data);
|
||||
return;
|
||||
case 'migrate-document':
|
||||
this.logger.log(
|
||||
`Migrate document job processing — jobId=${String(job.id)}`
|
||||
@@ -369,6 +383,186 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
}
|
||||
}
|
||||
|
||||
/** Step 1: OCR เท่านั้น — สำหรับตรวจคุณภาพ OCR ก่อนทดสอบ AI */
|
||||
private async processSandboxOcrOnly(data: AiBatchJobData): Promise<void> {
|
||||
const { idempotencyKey, payload } = data;
|
||||
const pdfPath = payload.pdfPath as string;
|
||||
|
||||
if (!pdfPath) {
|
||||
throw new Error('pdfPath is required for sandbox-ocr-only job');
|
||||
}
|
||||
|
||||
await this.redis.setex(
|
||||
`ai:rag:result:${idempotencyKey}`,
|
||||
3600,
|
||||
JSON.stringify({
|
||||
requestPublicId: idempotencyKey,
|
||||
status: 'processing',
|
||||
})
|
||||
);
|
||||
|
||||
try {
|
||||
const ocrResult = await this.ocrService.detectAndExtract({ pdfPath });
|
||||
|
||||
// Cache OCR text สำหรับ Step 2
|
||||
await this.redis.setex(
|
||||
`ai:sandbox:ocr:${idempotencyKey}`,
|
||||
3600,
|
||||
JSON.stringify({
|
||||
ocrText: ocrResult.text,
|
||||
ocrUsed: ocrResult.ocrUsed,
|
||||
timestamp: new Date().toISOString(),
|
||||
})
|
||||
);
|
||||
|
||||
await this.redis.setex(
|
||||
`ai:rag:result:${idempotencyKey}`,
|
||||
3600,
|
||||
JSON.stringify({
|
||||
requestPublicId: idempotencyKey,
|
||||
status: 'completed',
|
||||
ocrText: ocrResult.text,
|
||||
ocrUsed: ocrResult.ocrUsed,
|
||||
completedAt: new Date().toISOString(),
|
||||
})
|
||||
);
|
||||
} catch (err: unknown) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.error(`Sandbox OCR-only failed: ${errMsg}`);
|
||||
await this.redis.setex(
|
||||
`ai:rag:result:${idempotencyKey}`,
|
||||
3600,
|
||||
JSON.stringify({
|
||||
requestPublicId: idempotencyKey,
|
||||
status: 'failed',
|
||||
errorMessage: errMsg,
|
||||
completedAt: new Date().toISOString(),
|
||||
})
|
||||
);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/** Step 2: AI Extraction — ใช้ OCR text ที่ cache จาก Step 1 */
|
||||
private async processSandboxAiExtract(data: AiBatchJobData): Promise<void> {
|
||||
const { idempotencyKey, payload, projectPublicId } = data;
|
||||
const promptVersion = (payload.promptVersion as number) || undefined;
|
||||
|
||||
await this.redis.setex(
|
||||
`ai:rag:result:${idempotencyKey}`,
|
||||
3600,
|
||||
JSON.stringify({
|
||||
requestPublicId: idempotencyKey,
|
||||
status: 'processing',
|
||||
})
|
||||
);
|
||||
|
||||
try {
|
||||
// ดึง OCR text จาก cache
|
||||
const cachedOcr = await this.redis.get(
|
||||
`ai:sandbox:ocr:${idempotencyKey}`
|
||||
);
|
||||
if (!cachedOcr) {
|
||||
throw new Error(
|
||||
'OCR text not found or expired, please run Step 1 first'
|
||||
);
|
||||
}
|
||||
const parsedOcr = JSON.parse(cachedOcr) as {
|
||||
ocrText: string;
|
||||
ocrUsed: boolean;
|
||||
timestamp: string;
|
||||
};
|
||||
const { ocrText } = parsedOcr;
|
||||
|
||||
// ดึง prompt version
|
||||
const activePrompt =
|
||||
await this.aiPromptsService.getActive('ocr_extraction');
|
||||
if (!activePrompt) {
|
||||
throw new Error('No active ocr_extraction prompt version found');
|
||||
}
|
||||
|
||||
// ถ้าระบุ promptVersion ให้ใช้ version นั้น
|
||||
const targetPrompt = promptVersion
|
||||
? await this.aiPromptsService.findByVersion(
|
||||
'ocr_extraction',
|
||||
promptVersion
|
||||
)
|
||||
: activePrompt;
|
||||
|
||||
if (!targetPrompt) {
|
||||
throw new Error(`Prompt version ${promptVersion} not found`);
|
||||
}
|
||||
|
||||
// Resolve context และ run LLM
|
||||
const masterDataContext = await this.aiPromptsService.resolveContext(
|
||||
targetPrompt,
|
||||
projectPublicId
|
||||
);
|
||||
|
||||
const resolvedPrompt = targetPrompt.template
|
||||
.replace('{{ocr_text}}', ocrText)
|
||||
.replace(
|
||||
'{{master_data_context}}',
|
||||
JSON.stringify(masterDataContext, null, 2)
|
||||
);
|
||||
|
||||
const response = await this.ollamaService.generate(resolvedPrompt, {
|
||||
timeoutMs: 120000,
|
||||
});
|
||||
|
||||
const cleanedResponse = response
|
||||
.replace(/```json/g, '')
|
||||
.replace(/```/g, '')
|
||||
.trim();
|
||||
|
||||
let extractedMetadata: Record<string, unknown>;
|
||||
try {
|
||||
extractedMetadata = JSON.parse(cleanedResponse) as Record<
|
||||
string,
|
||||
unknown
|
||||
>;
|
||||
} catch {
|
||||
throw new Error(
|
||||
`Failed to parse LLM response as JSON: ${cleanedResponse}`
|
||||
);
|
||||
}
|
||||
|
||||
await this.aiPromptsService.saveTestResult(
|
||||
'ocr_extraction',
|
||||
targetPrompt.versionNumber,
|
||||
extractedMetadata
|
||||
);
|
||||
|
||||
await this.redis.setex(
|
||||
`ai:rag:result:${idempotencyKey}`,
|
||||
3600,
|
||||
JSON.stringify({
|
||||
requestPublicId: idempotencyKey,
|
||||
status: 'completed',
|
||||
answer: JSON.stringify(extractedMetadata, null, 2),
|
||||
ocrText,
|
||||
ocrUsed: parsedOcr.ocrUsed,
|
||||
promptVersionUsed: targetPrompt.versionNumber,
|
||||
completedAt: new Date().toISOString(),
|
||||
})
|
||||
);
|
||||
} catch (err: unknown) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.error(`Sandbox AI-extract failed: ${errMsg}`);
|
||||
await this.redis.setex(
|
||||
`ai:rag:result:${idempotencyKey}`,
|
||||
3600,
|
||||
JSON.stringify({
|
||||
requestPublicId: idempotencyKey,
|
||||
status: 'failed',
|
||||
errorMessage: errMsg,
|
||||
completedAt: new Date().toISOString(),
|
||||
})
|
||||
);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
private async processMigrateDocument(
|
||||
job: Job<AiBatchJobData>
|
||||
): Promise<void> {
|
||||
|
||||
@@ -292,6 +292,21 @@ export class AiPromptsService {
|
||||
return prompt;
|
||||
}
|
||||
|
||||
/**
|
||||
* ดึง Prompt version ตาม versionNumber ที่ระบุ
|
||||
* @param promptType ประเภทของ prompt
|
||||
* @param versionNumber เลข version ที่ต้องการ
|
||||
* @returns Prompt version ที่ตรงกับ versionNumber หรือ null หากไม่พบ
|
||||
*/
|
||||
async findByVersion(
|
||||
promptType: string,
|
||||
versionNumber: number
|
||||
): Promise<AiPrompt | null> {
|
||||
return this.aiPromptRepo.findOne({
|
||||
where: { promptType, versionNumber },
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* ค้นหา prompt ที่มีผลใช้งานจริง และแทนที่ placeholder {{ocr_text}} ด้วยข้อความ OCR
|
||||
* @param promptType ประเภทของ prompt
|
||||
|
||||
Reference in New Issue
Block a user