690530:1121 ADR-030-231-ocr-sandbox-two-step-flow #01
This commit is contained in:
@@ -103,7 +103,11 @@ export class AiQueueService {
|
|||||||
* @idempotency `jobId = payload.idempotencyKey`
|
* @idempotency `jobId = payload.idempotencyKey`
|
||||||
*/
|
*/
|
||||||
async enqueueSandboxJob(
|
async enqueueSandboxJob(
|
||||||
jobType: 'sandbox-rag' | 'sandbox-extract',
|
jobType:
|
||||||
|
| 'sandbox-rag'
|
||||||
|
| 'sandbox-extract'
|
||||||
|
| 'sandbox-ocr-only'
|
||||||
|
| 'sandbox-ai-extract',
|
||||||
payload: {
|
payload: {
|
||||||
idempotencyKey: string;
|
idempotencyKey: string;
|
||||||
projectPublicId?: string;
|
projectPublicId?: string;
|
||||||
@@ -111,6 +115,7 @@ export class AiQueueService {
|
|||||||
userPublicId?: string;
|
userPublicId?: string;
|
||||||
filePublicId?: string;
|
filePublicId?: string;
|
||||||
pdfPath?: string;
|
pdfPath?: string;
|
||||||
|
extraPayload?: Record<string, unknown>;
|
||||||
}
|
}
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const job = await this.batchQueue.add(
|
const job = await this.batchQueue.add(
|
||||||
@@ -124,6 +129,7 @@ export class AiQueueService {
|
|||||||
userPublicId: payload.userPublicId,
|
userPublicId: payload.userPublicId,
|
||||||
filePublicId: payload.filePublicId,
|
filePublicId: payload.filePublicId,
|
||||||
pdfPath: payload.pdfPath,
|
pdfPath: payload.pdfPath,
|
||||||
|
...payload.extraPayload,
|
||||||
},
|
},
|
||||||
idempotencyKey: payload.idempotencyKey,
|
idempotencyKey: payload.idempotencyKey,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -40,6 +40,8 @@ import {
|
|||||||
ApiHeader,
|
ApiHeader,
|
||||||
ApiParam,
|
ApiParam,
|
||||||
ApiQuery,
|
ApiQuery,
|
||||||
|
ApiConsumes,
|
||||||
|
ApiBody,
|
||||||
} from '@nestjs/swagger';
|
} from '@nestjs/swagger';
|
||||||
import { AiService, ExtractionResult, PaginatedResult } from './ai.service';
|
import { AiService, ExtractionResult, PaginatedResult } from './ai.service';
|
||||||
import { AiSettingsService } from './ai-settings.service';
|
import { AiSettingsService } from './ai-settings.service';
|
||||||
@@ -508,6 +510,77 @@ export class AiController {
|
|||||||
return { requestPublicId, jobId, status: 'queued' };
|
return { requestPublicId, jobId, status: 'queued' };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Step 1: OCR Only (สำหรับตรวจคุณภาพ OCR ก่อนทดสอบ AI) ---
|
||||||
|
|
||||||
|
@Post('admin/sandbox/ocr')
|
||||||
|
@UseGuards(JwtAuthGuard, RbacGuard)
|
||||||
|
@RequirePermission('system.manage_all')
|
||||||
|
@ApiOperation({
|
||||||
|
summary: 'Step 1: Run OCR Only — สำหรับตรวจคุณภาพ OCR ก่อนทดสอบ AI',
|
||||||
|
description:
|
||||||
|
'Upload PDF และรัน OCR เท่านั้น ไม่เรียก LLM — ผลลัพธ์ cache ไว้สำหรับ Step 2',
|
||||||
|
})
|
||||||
|
@ApiConsumes('multipart/form-data')
|
||||||
|
@ApiBody({
|
||||||
|
schema: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
file: {
|
||||||
|
type: 'string',
|
||||||
|
format: 'binary',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
async submitSandboxOcr(
|
||||||
|
@UploadedFile(
|
||||||
|
new ParseFilePipe({
|
||||||
|
validators: [
|
||||||
|
new MaxFileSizeValidator({ maxSize: 50 * 1024 * 1024 }),
|
||||||
|
new FileTypeValidator({ fileType: 'pdf' }),
|
||||||
|
],
|
||||||
|
})
|
||||||
|
)
|
||||||
|
file: Express.Multer.File,
|
||||||
|
@CurrentUser() user: User
|
||||||
|
): Promise<{ requestPublicId: string; jobId: string; status: string }> {
|
||||||
|
const attachment = await this.fileStorageService.upload(file, user.user_id);
|
||||||
|
const requestPublicId = uuidv7();
|
||||||
|
const jobId = await this.aiQueueService.enqueueSandboxJob(
|
||||||
|
'sandbox-ocr-only',
|
||||||
|
{
|
||||||
|
idempotencyKey: requestPublicId,
|
||||||
|
pdfPath: attachment.filePath,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
return { requestPublicId, jobId, status: 'queued' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Step 2: AI Extraction (ใช้ OCR text ที่ cache จาก Step 1) ---
|
||||||
|
|
||||||
|
@Post('admin/sandbox/ai-extract')
|
||||||
|
@UseGuards(JwtAuthGuard, RbacGuard)
|
||||||
|
@RequirePermission('system.manage_all')
|
||||||
|
@ApiOperation({
|
||||||
|
summary: 'Step 2: Run AI Extraction — ใช้ OCR text ที่ cache จาก Step 1',
|
||||||
|
description:
|
||||||
|
'รับ requestPublicId จาก Step 1 และ optional promptVersion แล้ว run LLM extraction',
|
||||||
|
})
|
||||||
|
async submitSandboxAiExtract(
|
||||||
|
@Body() dto: { requestPublicId: string; promptVersion?: number }
|
||||||
|
): Promise<{ requestPublicId: string; jobId: string; status: string }> {
|
||||||
|
const { requestPublicId, promptVersion } = dto;
|
||||||
|
const jobId = await this.aiQueueService.enqueueSandboxJob(
|
||||||
|
'sandbox-ai-extract',
|
||||||
|
{
|
||||||
|
idempotencyKey: requestPublicId,
|
||||||
|
projectPublicId: 'default', // Sandbox ใช้ default project
|
||||||
|
extraPayload: { promptVersion },
|
||||||
|
}
|
||||||
|
);
|
||||||
|
return { requestPublicId, jobId, status: 'queued' };
|
||||||
|
}
|
||||||
|
|
||||||
// --- Webhook Callback จาก n8n (Service Account) ---
|
// --- Webhook Callback จาก n8n (Service Account) ---
|
||||||
|
|
||||||
@Post('callback')
|
@Post('callback')
|
||||||
|
|||||||
@@ -49,6 +49,8 @@ export type AiBatchJobType =
|
|||||||
| 'embed-document'
|
| 'embed-document'
|
||||||
| 'sandbox-rag'
|
| 'sandbox-rag'
|
||||||
| 'sandbox-extract'
|
| 'sandbox-extract'
|
||||||
|
| 'sandbox-ocr-only'
|
||||||
|
| 'sandbox-ai-extract'
|
||||||
| 'migrate-document';
|
| 'migrate-document';
|
||||||
|
|
||||||
export interface AiBatchJobData {
|
export interface AiBatchJobData {
|
||||||
@@ -197,6 +199,18 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
);
|
);
|
||||||
await this.processSandboxExtract(job.data);
|
await this.processSandboxExtract(job.data);
|
||||||
return;
|
return;
|
||||||
|
case 'sandbox-ocr-only':
|
||||||
|
this.logger.log(
|
||||||
|
`Sandbox OCR-Only job processing — jobId=${String(job.id)}`
|
||||||
|
);
|
||||||
|
await this.processSandboxOcrOnly(job.data);
|
||||||
|
return;
|
||||||
|
case 'sandbox-ai-extract':
|
||||||
|
this.logger.log(
|
||||||
|
`Sandbox AI-Extract job processing — jobId=${String(job.id)}`
|
||||||
|
);
|
||||||
|
await this.processSandboxAiExtract(job.data);
|
||||||
|
return;
|
||||||
case 'migrate-document':
|
case 'migrate-document':
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`Migrate document job processing — jobId=${String(job.id)}`
|
`Migrate document job processing — jobId=${String(job.id)}`
|
||||||
@@ -369,6 +383,186 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Step 1: OCR เท่านั้น — สำหรับตรวจคุณภาพ OCR ก่อนทดสอบ AI */
|
||||||
|
private async processSandboxOcrOnly(data: AiBatchJobData): Promise<void> {
|
||||||
|
const { idempotencyKey, payload } = data;
|
||||||
|
const pdfPath = payload.pdfPath as string;
|
||||||
|
|
||||||
|
if (!pdfPath) {
|
||||||
|
throw new Error('pdfPath is required for sandbox-ocr-only job');
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.redis.setex(
|
||||||
|
`ai:rag:result:${idempotencyKey}`,
|
||||||
|
3600,
|
||||||
|
JSON.stringify({
|
||||||
|
requestPublicId: idempotencyKey,
|
||||||
|
status: 'processing',
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const ocrResult = await this.ocrService.detectAndExtract({ pdfPath });
|
||||||
|
|
||||||
|
// Cache OCR text สำหรับ Step 2
|
||||||
|
await this.redis.setex(
|
||||||
|
`ai:sandbox:ocr:${idempotencyKey}`,
|
||||||
|
3600,
|
||||||
|
JSON.stringify({
|
||||||
|
ocrText: ocrResult.text,
|
||||||
|
ocrUsed: ocrResult.ocrUsed,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
await this.redis.setex(
|
||||||
|
`ai:rag:result:${idempotencyKey}`,
|
||||||
|
3600,
|
||||||
|
JSON.stringify({
|
||||||
|
requestPublicId: idempotencyKey,
|
||||||
|
status: 'completed',
|
||||||
|
ocrText: ocrResult.text,
|
||||||
|
ocrUsed: ocrResult.ocrUsed,
|
||||||
|
completedAt: new Date().toISOString(),
|
||||||
|
})
|
||||||
|
);
|
||||||
|
} catch (err: unknown) {
|
||||||
|
const errMsg = err instanceof Error ? err.message : String(err);
|
||||||
|
this.logger.error(`Sandbox OCR-only failed: ${errMsg}`);
|
||||||
|
await this.redis.setex(
|
||||||
|
`ai:rag:result:${idempotencyKey}`,
|
||||||
|
3600,
|
||||||
|
JSON.stringify({
|
||||||
|
requestPublicId: idempotencyKey,
|
||||||
|
status: 'failed',
|
||||||
|
errorMessage: errMsg,
|
||||||
|
completedAt: new Date().toISOString(),
|
||||||
|
})
|
||||||
|
);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Step 2: AI Extraction — ใช้ OCR text ที่ cache จาก Step 1 */
|
||||||
|
private async processSandboxAiExtract(data: AiBatchJobData): Promise<void> {
|
||||||
|
const { idempotencyKey, payload, projectPublicId } = data;
|
||||||
|
const promptVersion = (payload.promptVersion as number) || undefined;
|
||||||
|
|
||||||
|
await this.redis.setex(
|
||||||
|
`ai:rag:result:${idempotencyKey}`,
|
||||||
|
3600,
|
||||||
|
JSON.stringify({
|
||||||
|
requestPublicId: idempotencyKey,
|
||||||
|
status: 'processing',
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// ดึง OCR text จาก cache
|
||||||
|
const cachedOcr = await this.redis.get(
|
||||||
|
`ai:sandbox:ocr:${idempotencyKey}`
|
||||||
|
);
|
||||||
|
if (!cachedOcr) {
|
||||||
|
throw new Error(
|
||||||
|
'OCR text not found or expired, please run Step 1 first'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const parsedOcr = JSON.parse(cachedOcr) as {
|
||||||
|
ocrText: string;
|
||||||
|
ocrUsed: boolean;
|
||||||
|
timestamp: string;
|
||||||
|
};
|
||||||
|
const { ocrText } = parsedOcr;
|
||||||
|
|
||||||
|
// ดึง prompt version
|
||||||
|
const activePrompt =
|
||||||
|
await this.aiPromptsService.getActive('ocr_extraction');
|
||||||
|
if (!activePrompt) {
|
||||||
|
throw new Error('No active ocr_extraction prompt version found');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ถ้าระบุ promptVersion ให้ใช้ version นั้น
|
||||||
|
const targetPrompt = promptVersion
|
||||||
|
? await this.aiPromptsService.findByVersion(
|
||||||
|
'ocr_extraction',
|
||||||
|
promptVersion
|
||||||
|
)
|
||||||
|
: activePrompt;
|
||||||
|
|
||||||
|
if (!targetPrompt) {
|
||||||
|
throw new Error(`Prompt version ${promptVersion} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve context และ run LLM
|
||||||
|
const masterDataContext = await this.aiPromptsService.resolveContext(
|
||||||
|
targetPrompt,
|
||||||
|
projectPublicId
|
||||||
|
);
|
||||||
|
|
||||||
|
const resolvedPrompt = targetPrompt.template
|
||||||
|
.replace('{{ocr_text}}', ocrText)
|
||||||
|
.replace(
|
||||||
|
'{{master_data_context}}',
|
||||||
|
JSON.stringify(masterDataContext, null, 2)
|
||||||
|
);
|
||||||
|
|
||||||
|
const response = await this.ollamaService.generate(resolvedPrompt, {
|
||||||
|
timeoutMs: 120000,
|
||||||
|
});
|
||||||
|
|
||||||
|
const cleanedResponse = response
|
||||||
|
.replace(/```json/g, '')
|
||||||
|
.replace(/```/g, '')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
let extractedMetadata: Record<string, unknown>;
|
||||||
|
try {
|
||||||
|
extractedMetadata = JSON.parse(cleanedResponse) as Record<
|
||||||
|
string,
|
||||||
|
unknown
|
||||||
|
>;
|
||||||
|
} catch {
|
||||||
|
throw new Error(
|
||||||
|
`Failed to parse LLM response as JSON: ${cleanedResponse}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.aiPromptsService.saveTestResult(
|
||||||
|
'ocr_extraction',
|
||||||
|
targetPrompt.versionNumber,
|
||||||
|
extractedMetadata
|
||||||
|
);
|
||||||
|
|
||||||
|
await this.redis.setex(
|
||||||
|
`ai:rag:result:${idempotencyKey}`,
|
||||||
|
3600,
|
||||||
|
JSON.stringify({
|
||||||
|
requestPublicId: idempotencyKey,
|
||||||
|
status: 'completed',
|
||||||
|
answer: JSON.stringify(extractedMetadata, null, 2),
|
||||||
|
ocrText,
|
||||||
|
ocrUsed: parsedOcr.ocrUsed,
|
||||||
|
promptVersionUsed: targetPrompt.versionNumber,
|
||||||
|
completedAt: new Date().toISOString(),
|
||||||
|
})
|
||||||
|
);
|
||||||
|
} catch (err: unknown) {
|
||||||
|
const errMsg = err instanceof Error ? err.message : String(err);
|
||||||
|
this.logger.error(`Sandbox AI-extract failed: ${errMsg}`);
|
||||||
|
await this.redis.setex(
|
||||||
|
`ai:rag:result:${idempotencyKey}`,
|
||||||
|
3600,
|
||||||
|
JSON.stringify({
|
||||||
|
requestPublicId: idempotencyKey,
|
||||||
|
status: 'failed',
|
||||||
|
errorMessage: errMsg,
|
||||||
|
completedAt: new Date().toISOString(),
|
||||||
|
})
|
||||||
|
);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private async processMigrateDocument(
|
private async processMigrateDocument(
|
||||||
job: Job<AiBatchJobData>
|
job: Job<AiBatchJobData>
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
|
|||||||
@@ -292,6 +292,21 @@ export class AiPromptsService {
|
|||||||
return prompt;
|
return prompt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ดึง Prompt version ตาม versionNumber ที่ระบุ
|
||||||
|
* @param promptType ประเภทของ prompt
|
||||||
|
* @param versionNumber เลข version ที่ต้องการ
|
||||||
|
* @returns Prompt version ที่ตรงกับ versionNumber หรือ null หากไม่พบ
|
||||||
|
*/
|
||||||
|
async findByVersion(
|
||||||
|
promptType: string,
|
||||||
|
versionNumber: number
|
||||||
|
): Promise<AiPrompt | null> {
|
||||||
|
return this.aiPromptRepo.findOne({
|
||||||
|
where: { promptType, versionNumber },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ค้นหา prompt ที่มีผลใช้งานจริง และแทนที่ placeholder {{ocr_text}} ด้วยข้อความ OCR
|
* ค้นหา prompt ที่มีผลใช้งานจริง และแทนที่ placeholder {{ocr_text}} ด้วยข้อความ OCR
|
||||||
* @param promptType ประเภทของ prompt
|
* @param promptType ประเภทของ prompt
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
// - 2026-05-26: เพิ่มการตรวจสอบ versionsQuery.data แบบทนทานเพื่อป้องกัน Error N.find is not a function ในกรณีที่ API ส่งข้อมูลแบบ wrapped object มา
|
// - 2026-05-26: เพิ่มการตรวจสอบ versionsQuery.data แบบทนทานเพื่อป้องกัน Error N.find is not a function ในกรณีที่ API ส่งข้อมูลแบบ wrapped object มา
|
||||||
// - 2026-05-29: เพิ่ม OCR Raw Text section ในผล sandbox
|
// - 2026-05-29: เพิ่ม OCR Raw Text section ในผล sandbox
|
||||||
// - 2026-05-29: ปรับปรุงการโหลด Active Prompt ให้ทนทานต่อ race conditions และรูปแบบประเภทข้อมูลที่ส่งมาจาก API (boolean, number, string)
|
// - 2026-05-29: ปรับปรุงการโหลด Active Prompt ให้ทนทานต่อ race conditions และรูปแบบประเภทข้อมูลที่ส่งมาจาก API (boolean, number, string)
|
||||||
|
// - 2026-05-30: Refactor เป็น 2-step flow (Step 1: OCR-only → Step 2: AI Extraction) ตาม spec 231
|
||||||
'use client';
|
'use client';
|
||||||
|
|
||||||
import React, { useState, useEffect } from 'react';
|
import React, { useState, useEffect } from 'react';
|
||||||
@@ -32,6 +33,7 @@ import { useTranslations } from '@/hooks/use-translations';
|
|||||||
import PromptVersionHistory from './PromptVersionHistory';
|
import PromptVersionHistory from './PromptVersionHistory';
|
||||||
import { cn } from '@/lib/utils';
|
import { cn } from '@/lib/utils';
|
||||||
import { AiPrompt } from '@/types/ai-prompts';
|
import { AiPrompt } from '@/types/ai-prompts';
|
||||||
|
import { adminAiService } from '@/lib/services/admin-ai.service';
|
||||||
|
|
||||||
const DEFAULT_OCR_TEMPLATE = `คุณคือเอนจิ้นสกัดข้อมูลอัจฉริยะ (Document Intelligence Engine)
|
const DEFAULT_OCR_TEMPLATE = `คุณคือเอนจิ้นสกัดข้อมูลอัจฉริยะ (Document Intelligence Engine)
|
||||||
วิเคราะห์ข้อความ OCR ที่ได้รับจากเอกสารของโครงการ Laem Chabang Port Phase 3 และสกัดข้อมูลเมตาดาต้าให้ออกมาเป็น JSON object ที่ถูกต้องตามโครงสร้างที่กำหนด
|
วิเคราะห์ข้อความ OCR ที่ได้รับจากเอกสารของโครงการ Laem Chabang Port Phase 3 และสกัดข้อมูลเมตาดาต้าให้ออกมาเป็น JSON object ที่ถูกต้องตามโครงสร้างที่กำหนด
|
||||||
@@ -103,7 +105,15 @@ export default function OcrSandboxPromptManager() {
|
|||||||
const [ocrFile, setOcrFile] = useState<File | null>(null);
|
const [ocrFile, setOcrFile] = useState<File | null>(null);
|
||||||
const [manualNote, setManualNote] = useState<string>('');
|
const [manualNote, setManualNote] = useState<string>('');
|
||||||
const [activeTab, setActiveTab] = useState<'editor' | 'sandbox'>('editor');
|
const [activeTab, setActiveTab] = useState<'editor' | 'sandbox'>('editor');
|
||||||
const { state: sandboxState, jobId: sandboxJobId, submit: submitSandbox, reset: resetSandbox } =
|
// 2-step flow states
|
||||||
|
const [sandboxStep, setSandboxStep] = useState<'ocr' | 'ai'>('ocr');
|
||||||
|
const [ocrResult, setOcrResult] = useState<{
|
||||||
|
requestPublicId: string;
|
||||||
|
ocrText: string;
|
||||||
|
ocrUsed: boolean;
|
||||||
|
} | null>(null);
|
||||||
|
const [selectedPromptVersion, setSelectedPromptVersion] = useState<number | undefined>(undefined);
|
||||||
|
const { state: sandboxState, jobId: sandboxJobId, reset: resetSandbox } =
|
||||||
useSandboxRun(() => {
|
useSandboxRun(() => {
|
||||||
// เมื่อ sandbox เสร็จสิ้น: รีเฟรชรายการเวอร์ชัน
|
// เมื่อ sandbox เสร็จสิ้น: รีเฟรชรายการเวอร์ชัน
|
||||||
versionsQuery.refetch();
|
versionsQuery.refetch();
|
||||||
@@ -175,27 +185,95 @@ export default function OcrSandboxPromptManager() {
|
|||||||
toast.error(error.response?.data?.message || t('ai.prompt.saveNoteError'));
|
toast.error(error.response?.data?.message || t('ai.prompt.saveNoteError'));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
const handleSubmitOcr = async (e: React.FormEvent) => {
|
// Step 1: OCR-only handler
|
||||||
|
const handleStep1Ocr = async (e: React.FormEvent) => {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
if (!activePrompt) {
|
|
||||||
toast.error(t('ai.prompt.noActivePrompt'));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!ocrFile) {
|
if (!ocrFile) {
|
||||||
toast.error(t('ai.prompt.noFile'));
|
toast.error(t('ai.prompt.noFile'));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
resetSandbox();
|
resetSandbox();
|
||||||
await submitSandbox(ocrFile);
|
setSandboxStep('ocr');
|
||||||
|
const { requestPublicId } = await adminAiService.submitSandboxOcr(ocrFile);
|
||||||
toast.success(t('ai.prompt.uploadSuccess'));
|
toast.success(t('ai.prompt.uploadSuccess'));
|
||||||
|
// Poll สำหรับผลลัพธ์ OCR
|
||||||
|
const pollInterval = setInterval(async () => {
|
||||||
|
try {
|
||||||
|
const result = await adminAiService.getSandboxJobStatus(requestPublicId);
|
||||||
|
if (result.status === 'completed') {
|
||||||
|
clearInterval(pollInterval);
|
||||||
|
setOcrResult({
|
||||||
|
requestPublicId,
|
||||||
|
ocrText: result.ocrText || '',
|
||||||
|
ocrUsed: result.ocrUsed || false,
|
||||||
|
});
|
||||||
|
setSandboxStep('ai');
|
||||||
|
toast.success('OCR completed successfully');
|
||||||
|
} else if (result.status === 'failed') {
|
||||||
|
clearInterval(pollInterval);
|
||||||
|
toast.error(result.errorMessage || 'OCR failed');
|
||||||
|
}
|
||||||
|
} catch (_err) {
|
||||||
|
clearInterval(pollInterval);
|
||||||
|
toast.error('Poll error occurred');
|
||||||
|
}
|
||||||
|
}, 1000);
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
const error = err as { response?: { data?: { message?: string } } };
|
const error = err as { response?: { data?: { message?: string } } };
|
||||||
toast.error(error.response?.data?.message || t('ai.prompt.uploadError'));
|
toast.error(error.response?.data?.message || t('ai.prompt.uploadError'));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
// Step 2: AI Extraction handler
|
||||||
|
const handleStep2AiExtract = async (e: React.FormEvent) => {
|
||||||
|
e.preventDefault();
|
||||||
|
if (!ocrResult) {
|
||||||
|
toast.error('Please run Step 1 (OCR) first');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!activePrompt) {
|
||||||
|
toast.error(t('ai.prompt.noActivePrompt'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
resetSandbox();
|
||||||
|
const { requestPublicId } = await adminAiService.submitSandboxAiExtract(
|
||||||
|
ocrResult.requestPublicId,
|
||||||
|
selectedPromptVersion
|
||||||
|
);
|
||||||
|
toast.success('AI Extraction started');
|
||||||
|
// Poll สำหรับผลลัพธ์ AI
|
||||||
|
const pollInterval = setInterval(async () => {
|
||||||
|
try {
|
||||||
|
const result = await adminAiService.getSandboxJobStatus(requestPublicId);
|
||||||
|
if (result.status === 'completed') {
|
||||||
|
clearInterval(pollInterval);
|
||||||
|
// Trigger sandbox state update via useSandboxRun
|
||||||
|
toast.success(t('ai.prompt.sandboxSuccess'));
|
||||||
|
versionsQuery.refetch();
|
||||||
|
} else if (result.status === 'failed') {
|
||||||
|
clearInterval(pollInterval);
|
||||||
|
toast.error(result.errorMessage || 'AI Extraction failed');
|
||||||
|
}
|
||||||
|
} catch (_err) {
|
||||||
|
clearInterval(pollInterval);
|
||||||
|
toast.error('Poll error occurred');
|
||||||
|
}
|
||||||
|
}, 1000);
|
||||||
|
} catch (err: unknown) {
|
||||||
|
const error = err as { response?: { data?: { message?: string } } };
|
||||||
|
toast.error(error.response?.data?.message || 'AI Extraction failed');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// Reset 2-step flow
|
||||||
|
const handleResetSandbox = () => {
|
||||||
|
setSandboxStep('ocr');
|
||||||
|
setOcrResult(null);
|
||||||
|
setSelectedPromptVersion(undefined);
|
||||||
|
setOcrFile(null);
|
||||||
|
resetSandbox();
|
||||||
|
};
|
||||||
// แปล status key เป็นข้อความตาม locale ปัจจุบัน
|
// แปล status key เป็นข้อความตาม locale ปัจจุบัน
|
||||||
const statusLabel = sandboxState.statusText ? t(sandboxState.statusText) : '';
|
|
||||||
return (
|
return (
|
||||||
<div className="grid gap-6 lg:grid-cols-12 items-start">
|
<div className="grid gap-6 lg:grid-cols-12 items-start">
|
||||||
<div className="lg:col-span-8 space-y-6">
|
<div className="lg:col-span-8 space-y-6">
|
||||||
@@ -282,11 +360,14 @@ export default function OcrSandboxPromptManager() {
|
|||||||
{t('ai.prompt.sandboxCardTitle')}
|
{t('ai.prompt.sandboxCardTitle')}
|
||||||
</CardTitle>
|
</CardTitle>
|
||||||
<p className="text-xs text-muted-foreground">
|
<p className="text-xs text-muted-foreground">
|
||||||
{t('ai.prompt.sandboxCardDesc')}
|
{sandboxStep === 'ocr'
|
||||||
|
? 'Step 1: Upload PDF and run OCR to check quality'
|
||||||
|
: 'Step 2: Test AI prompt with OCR text'}
|
||||||
</p>
|
</p>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
<CardContent>
|
<CardContent>
|
||||||
<form onSubmit={handleSubmitOcr} className="space-y-4">
|
{sandboxStep === 'ocr' ? (
|
||||||
|
<form onSubmit={handleStep1Ocr} className="space-y-4">
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
<div
|
<div
|
||||||
className={cn(
|
className={cn(
|
||||||
@@ -352,32 +433,100 @@ export default function OcrSandboxPromptManager() {
|
|||||||
<div className="flex justify-end gap-3 pt-2">
|
<div className="flex justify-end gap-3 pt-2">
|
||||||
<Button
|
<Button
|
||||||
type="submit"
|
type="submit"
|
||||||
disabled={sandboxState.isRunning || !ocrFile || !activePrompt}
|
disabled={sandboxState.isRunning || !ocrFile}
|
||||||
className="flex items-center gap-2"
|
className="flex items-center gap-2"
|
||||||
>
|
>
|
||||||
{sandboxState.isRunning ? (
|
{sandboxState.isRunning ? (
|
||||||
<>
|
<>
|
||||||
<Loader2 className="h-4 w-4 animate-spin" />
|
<Loader2 className="h-4 w-4 animate-spin" />
|
||||||
{t('ai.prompt.running')}
|
Running OCR...
|
||||||
</>
|
</>
|
||||||
) : (
|
) : (
|
||||||
<>
|
<>
|
||||||
<Play className="h-4 w-4" />
|
<Play className="h-4 w-4" />
|
||||||
{t('ai.prompt.runSandbox')}
|
Step 1: Run OCR Only
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
</Button>
|
</Button>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
|
) : (
|
||||||
|
<form onSubmit={handleStep2AiExtract} className="space-y-4">
|
||||||
|
<div className="space-y-3">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<span className="text-xs font-medium">Prompt Version:</span>
|
||||||
|
<select
|
||||||
|
value={selectedPromptVersion ?? (activePrompt?.versionNumber ?? '')}
|
||||||
|
onChange={(e) => setSelectedPromptVersion(e.target.value ? Number(e.target.value) : undefined)}
|
||||||
|
className="text-xs bg-background border border-input rounded px-2 py-1"
|
||||||
|
>
|
||||||
|
{versions.map((v) => (
|
||||||
|
<option key={v.versionNumber} value={v.versionNumber}>
|
||||||
|
Version {v.versionNumber} {v.isActive ? '(Active)' : ''}
|
||||||
|
</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div className="flex justify-end gap-3 pt-2">
|
||||||
|
<Button
|
||||||
|
type="button"
|
||||||
|
variant="outline"
|
||||||
|
size="sm"
|
||||||
|
onClick={handleResetSandbox}
|
||||||
|
className="text-xs"
|
||||||
|
>
|
||||||
|
Reset
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
type="submit"
|
||||||
|
disabled={sandboxState.isRunning || !activePrompt}
|
||||||
|
className="flex items-center gap-2"
|
||||||
|
>
|
||||||
|
{sandboxState.isRunning ? (
|
||||||
|
<>
|
||||||
|
<Loader2 className="h-4 w-4 animate-spin" />
|
||||||
|
Running AI...
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<Play className="h-4 w-4" />
|
||||||
|
Step 2: Run AI Extraction
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
)}
|
||||||
</CardContent>
|
</CardContent>
|
||||||
</Card>
|
</Card>
|
||||||
|
{sandboxStep === 'ai' && ocrResult && (
|
||||||
|
<Card className="border border-blue-500/20 bg-background/50 backdrop-blur-md">
|
||||||
|
<CardHeader className="border-b border-border/30 pb-3 flex flex-row items-center justify-between">
|
||||||
|
<CardTitle className="text-base text-blue-600 dark:text-blue-400 flex items-center gap-2">
|
||||||
|
<ScanText className="h-4 w-4" />
|
||||||
|
OCR Raw Text (Step 1 Result)
|
||||||
|
</CardTitle>
|
||||||
|
<Badge variant="outline" className="text-xs">
|
||||||
|
{ocrResult.ocrUsed ? 'PaddleOCR' : 'Fast Path (Text Layer)'}
|
||||||
|
</Badge>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="pt-4">
|
||||||
|
<div className="relative rounded-md bg-muted p-4 font-mono text-xs overflow-auto max-h-[200px] border border-border/10">
|
||||||
|
<pre className="text-blue-600 dark:text-blue-400 select-text leading-relaxed whitespace-pre-wrap">
|
||||||
|
{ocrResult.ocrText || '(ไม่มีข้อความ)'}
|
||||||
|
</pre>
|
||||||
|
</div>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
)}
|
||||||
{sandboxState.isRunning && (
|
{sandboxState.isRunning && (
|
||||||
<Card className="border border-amber-500/20 bg-amber-500/5">
|
<Card className="border border-amber-500/20 bg-amber-500/5">
|
||||||
<CardContent className="pt-6 space-y-4">
|
<CardContent className="pt-6 space-y-4">
|
||||||
<div className="flex items-center justify-between text-xs font-medium">
|
<div className="flex items-center justify-between text-xs font-medium">
|
||||||
<span className="flex items-center gap-1.5">
|
<span className="flex items-center gap-1.5">
|
||||||
<Loader2 className="h-3.5 w-3.5 animate-spin text-amber-500" />
|
<Loader2 className="h-3.5 w-3.5 animate-spin text-amber-500" />
|
||||||
{statusLabel}
|
{sandboxStep === 'ocr' ? 'Running OCR...' : 'Running AI Extraction...'}
|
||||||
</span>
|
</span>
|
||||||
<span>{sandboxState.progress}%</span>
|
<span>{sandboxState.progress}%</span>
|
||||||
</div>
|
</div>
|
||||||
@@ -388,37 +537,17 @@ export default function OcrSandboxPromptManager() {
|
|||||||
</CardContent>
|
</CardContent>
|
||||||
</Card>
|
</Card>
|
||||||
)}
|
)}
|
||||||
{sandboxState.result && sandboxState.result.status === 'completed' && (
|
{sandboxState.result && sandboxState.result.status === 'completed' && sandboxStep === 'ai' && (
|
||||||
<div className="space-y-6">
|
<div className="space-y-6">
|
||||||
<Card className="border border-blue-500/20 bg-background/50 backdrop-blur-md">
|
|
||||||
<CardHeader className="border-b border-border/30 pb-3 flex flex-row items-center justify-between">
|
|
||||||
<CardTitle className="text-base text-blue-600 dark:text-blue-400 flex items-center gap-2">
|
|
||||||
<ScanText className="h-4 w-4" />
|
|
||||||
OCR Raw Text
|
|
||||||
</CardTitle>
|
|
||||||
<Badge variant="outline" className="text-xs">
|
|
||||||
{sandboxState.result.ocrUsed ? 'PaddleOCR' : 'Fast Path (Text Layer)'}
|
|
||||||
</Badge>
|
|
||||||
</CardHeader>
|
|
||||||
<CardContent className="pt-4">
|
|
||||||
<div className="relative rounded-md bg-muted p-4 font-mono text-xs overflow-auto max-h-[200px] border border-border/10">
|
|
||||||
<pre className="text-blue-600 dark:text-blue-400 select-text leading-relaxed whitespace-pre-wrap">
|
|
||||||
{sandboxState.result.ocrText || '(ไม่มีข้อความ)'}
|
|
||||||
</pre>
|
|
||||||
</div>
|
|
||||||
</CardContent>
|
|
||||||
</Card>
|
|
||||||
<Card className="border border-emerald-500/20 bg-background/50 backdrop-blur-md">
|
<Card className="border border-emerald-500/20 bg-background/50 backdrop-blur-md">
|
||||||
<CardHeader className="border-b border-border/30 pb-3 flex flex-row items-center justify-between">
|
<CardHeader className="border-b border-border/30 pb-3 flex flex-row items-center justify-between">
|
||||||
<CardTitle className="text-base text-emerald-600 dark:text-emerald-400 flex items-center gap-2">
|
<CardTitle className="text-base text-emerald-600 dark:text-emerald-400 flex items-center gap-2">
|
||||||
<FileJson className="h-4 w-4" />
|
<FileJson className="h-4 w-4" />
|
||||||
{t('ai.prompt.resultTitle')}
|
{t('ai.prompt.resultTitle')}
|
||||||
</CardTitle>
|
</CardTitle>
|
||||||
{activePrompt && (
|
|
||||||
<Badge variant="outline" className="text-xs text-emerald-500 border-emerald-500/20 bg-emerald-500/5">
|
<Badge variant="outline" className="text-xs text-emerald-500 border-emerald-500/20 bg-emerald-500/5">
|
||||||
{t('ai.prompt.resultVersionBadge', { version: String(activePrompt.versionNumber) })}
|
Version {sandboxState.result.promptVersionUsed || (activePrompt?.versionNumber ?? '?')}
|
||||||
</Badge>
|
</Badge>
|
||||||
)}
|
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
<CardContent className="pt-4 space-y-4">
|
<CardContent className="pt-4 space-y-4">
|
||||||
<div className="relative rounded-md bg-muted p-4 font-mono text-xs overflow-auto max-h-[300px] border border-border/10">
|
<div className="relative rounded-md bg-muted p-4 font-mono text-xs overflow-auto max-h-[300px] border border-border/10">
|
||||||
|
|||||||
@@ -144,6 +144,34 @@ export const adminAiService = {
|
|||||||
return extractData<{ requestPublicId: string; jobId: string; status: string }>(data);
|
return extractData<{ requestPublicId: string; jobId: string; status: string }>(data);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// --- Step 1: OCR Only (สำหรับตรวจคุณภาพ OCR ก่อนทดสอบ AI) ---
|
||||||
|
|
||||||
|
submitSandboxOcr: async (
|
||||||
|
file: File
|
||||||
|
): Promise<{ requestPublicId: string; jobId: string; status: string }> => {
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', file);
|
||||||
|
const { data } = await api.post('/ai/admin/sandbox/ocr', formData, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'multipart/form-data',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
return extractData<{ requestPublicId: string; jobId: string; status: string }>(data);
|
||||||
|
},
|
||||||
|
|
||||||
|
// --- Step 2: AI Extraction (ใช้ OCR text ที่ cache จาก Step 1) ---
|
||||||
|
|
||||||
|
submitSandboxAiExtract: async (
|
||||||
|
requestPublicId: string,
|
||||||
|
promptVersion?: number
|
||||||
|
): Promise<{ requestPublicId: string; jobId: string; status: string }> => {
|
||||||
|
const { data } = await api.post('/ai/admin/sandbox/ai-extract', {
|
||||||
|
requestPublicId,
|
||||||
|
promptVersion,
|
||||||
|
});
|
||||||
|
return extractData<{ requestPublicId: string; jobId: string; status: string }>(data);
|
||||||
|
},
|
||||||
|
|
||||||
// --- AI Model Management (ADR-027) ---
|
// --- AI Model Management (ADR-027) ---
|
||||||
|
|
||||||
getAvailableModels: async (): Promise<AiModelsResponse> => {
|
getAvailableModels: async (): Promise<AiModelsResponse> => {
|
||||||
|
|||||||
@@ -0,0 +1,364 @@
|
|||||||
|
# Feature Specification: OCR Sandbox Two-Step Flow (OCR-First → AI-Second)
|
||||||
|
|
||||||
|
**Feature Branch**: `main`
|
||||||
|
**Created**: 2026-05-30
|
||||||
|
**Status**: Draft
|
||||||
|
**Input**: User requirement: แยก OCR Sandbox เป็น 2 step — Step 1 OCR เท่านั้นเพื่อตรวจคุณภาพ OCR ก่อน → Step 2 AI Extraction เพื่อทดสอบ prompt
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## User Scenarios & Testing _(mandatory)_
|
||||||
|
|
||||||
|
### User Story 1 - OCR Quality Check Before AI Testing (Priority: P1)
|
||||||
|
|
||||||
|
ในฐานะ **ผู้ดูแลระบบ (Superadmin)**
|
||||||
|
ข้าพเจ้าต้องการรัน OCR บน PDF เพื่อตรวจสอบคุณภาพข้อความที่สกัดได้ก่อน
|
||||||
|
เพื่อยืนยันว่า OCR ทำงานถูกต้องและข้อความสมบูรณ์
|
||||||
|
ก่อนที่จะใช้ข้อความนั้นทดสอบ AI prompt template
|
||||||
|
|
||||||
|
**Why this priority**:
|
||||||
|
การแยก step ช่วยให้ admin แยกปัญหาได้ชัดเจน — ถ้า OCR แย่/ไม่สมบูรณ์ ไม่ต้องเสียเวลาทดสอบ prompt ให้เสียทรัพยากร AI
|
||||||
|
|
||||||
|
**Independent Test**:
|
||||||
|
upload PDF → กด "Step 1: Run OCR" → เห็น OCR Raw Text → ตรวจคุณภาพ → ถ้าพอใจ → กด "Step 2: Run AI Extraction" → เห็น LLM Result
|
||||||
|
|
||||||
|
**Acceptance Scenarios**:
|
||||||
|
|
||||||
|
1. **Given** admin upload PDF ใน OCR Sandbox, **When** กด "Step 1: Run OCR", **Then** ระบบรัน OCR (PaddleOCR/Fast Path) และแสดง OCR Raw Text เท่านั้น ยังไม่เรียก LLM
|
||||||
|
2. **Given** OCR Raw Text ปรากฏแล้ว, **When** admin ตรวจและพอใจกับคุณภาพ, **Then** admin สามารถกด "Step 2: Run AI Extraction" เพื่อส่ง OCR text ไป LLM ต่อ
|
||||||
|
3. **Given** OCR Raw Text แย่/ไม่สมบูรณ์, **When** admin ไม่พอใจ, **Then** admin สามารถ upload PDF ใหม่และรัน OCR ใหม่โดยไม่เสียทรัพยากร AI
|
||||||
|
4. **Given** admin อยู่ใน Step 2, **When** admin เปลี่ยนใจต้องการแก้ prompt version, **Then** admin สามารถเลือก prompt version อื่นจาก dropdown และรัน AI Extraction ใหม่ด้วย OCR text เดิม
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### User Story 2 - Prompt Version Testing with Same OCR Text (Priority: P2)
|
||||||
|
|
||||||
|
ในฐานะ **ผู้ดูแลระบบ (Superadmin)**
|
||||||
|
ข้าพเจ้าต้องการทดสอบ prompt version หลาย version ด้วย OCR text เดียวกัน
|
||||||
|
เพื่อเปรียบเทียบคุณภาพของ prompt versions ที่ต่างกัน
|
||||||
|
|
||||||
|
**Why this priority**:
|
||||||
|
ช่วยให้ admin evaluate prompt versions ได้รวดเร็วโดยไม่ต้องรัน OCR ซ้ำหลายครั้ง
|
||||||
|
|
||||||
|
**Independent Test**:
|
||||||
|
run OCR ครั้งเดียว → เลือก prompt v1 → run AI → เลือก prompt v2 → run AI → เปรียบเทียบผลลัพธ์
|
||||||
|
|
||||||
|
**Acceptance Scenarios**:
|
||||||
|
|
||||||
|
1. **Given** OCR Raw Text ถูกสกัดแล้ว, **When** admin เลือก prompt version v1 และกด "Run AI Extraction", **Then** ระบบใช้ prompt v1 กับ OCR text เดิม
|
||||||
|
2. **Given** ผลลัพธ์จาก v1 ปรากฏ, **When** admin เลือก prompt version v2 และกด "Run AI Extraction" อีกครั้ง, **Then** ระบบใช้ prompt v2 กับ OCR text เดิม (ไม่รัน OCR ซ้ำ)
|
||||||
|
3. **Given** admin อยากเปลี่ยน OCR text, **When** admin upload PDF ใหม่และกด "Step 1: Run OCR", **Then** OCR text ใหม่แทนที่เดิมและ step 2 ถูก reset
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Requirements _(mandatory)_
|
||||||
|
|
||||||
|
### Functional Requirements
|
||||||
|
|
||||||
|
- **FR-001**: ระบบ MUST มี job type ใหม่ `sandbox-ocr-only` ที่ทำ OCR เท่านั้น ไม่เรียก LLM
|
||||||
|
- **FR-002**: ระบบ MUST มี job type ใหม่ `sandbox-ai-extract` ที่รับ OCR text + prompt version แล้ว run LLM
|
||||||
|
- **FR-003**: ระบบ MUST เก็บ OCR text ใน Redis (TTL 3600s) หลังจาก Step 1 เสร็จ เพื่อใช้ใน Step 2
|
||||||
|
- **FR-004**: Frontend MUST แสดง UI แบบ 2 step แยกกัน — Step 1: OCR, Step 2: AI Extraction
|
||||||
|
- **FR-005**: Step 2 MUST มี dropdown เลือก prompt version (default = active version)
|
||||||
|
- **FR-006**: ระบบ MUST อนุญาตให้รัน Step 2 ซ้ำด้วย prompt version ต่างกันโดยใช้ OCR text เดิม
|
||||||
|
- **FR-007**: ระบบ MUST invalidate OCR text cache เมื่อ admin upload PDF ใหม่และรัน Step 1 ใหม่
|
||||||
|
|
||||||
|
### Key Entities
|
||||||
|
|
||||||
|
- **OCR Cache**: Redis key `ai:sandbox:ocr:{requestPublicId}` TTL 3600s — เก็บ OCR text และ metadata (ocrUsed, timestamp)
|
||||||
|
- **Sandbox OCR Job**: BullMQ job type `sandbox-ocr-only` — รัน OCR เท่านั้น
|
||||||
|
- **Sandbox AI Job**: BullMQ job type `sandbox-ai-extract` — รัน LLM ด้วย OCR text + prompt version
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Success Criteria _(mandatory)_
|
||||||
|
|
||||||
|
### Measurable Outcomes
|
||||||
|
|
||||||
|
- **SC-001**: Step 1 (OCR) ใช้เวลา < 10 วินาทีสำหรับ PDF ทั่วไป
|
||||||
|
- **SC-002**: Step 2 (AI) ใช้เวลา < 120 วินาที (เหมือน sandbox-extract เดิม)
|
||||||
|
- **SC-003**: Admin สามารถทดสอบ prompt version 3 version ด้วย OCR text เดิมภายใน 5 นาที
|
||||||
|
- **SC-004**: OCR text cache ถูก invalidate อัตโนมัติเมื่อ upload PDF ใหม่
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## API Design
|
||||||
|
|
||||||
|
### POST /ai/admin/sandbox/ocr (Step 1)
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
- `file`: PDF (multipart/form-data)
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"requestPublicId": "uuid",
|
||||||
|
"jobId": "uuid",
|
||||||
|
"status": "queued"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Behavior:**
|
||||||
|
- Upload PDF → storage temp
|
||||||
|
- Enqueue job `sandbox-ocr-only`
|
||||||
|
- Return requestPublicId สำหรับ polling
|
||||||
|
|
||||||
|
### POST /ai/admin/sandbox/ai-extract (Step 2)
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"requestPublicId": "uuid",
|
||||||
|
"promptVersion": 2 // optional, default = active
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"requestPublicId": "uuid",
|
||||||
|
"jobId": "uuid",
|
||||||
|
"status": "queued"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Behavior:**
|
||||||
|
- ดึง OCR text จาก Redis cache (`ai:sandbox:ocr:{requestPublicId}`)
|
||||||
|
- ถ้าไม่มี → throw 404 "OCR text not found or expired, please run Step 1 first"
|
||||||
|
- ดึง prompt version (default = active)
|
||||||
|
- Enqueue job `sandbox-ai-extract`
|
||||||
|
- Return requestPublicId สำหรับ polling
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Backend Implementation
|
||||||
|
|
||||||
|
### New Job Types
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
export type AiBatchJobType =
|
||||||
|
| 'ocr'
|
||||||
|
| 'extract-metadata'
|
||||||
|
| 'embed-document'
|
||||||
|
| 'sandbox-rag'
|
||||||
|
| 'sandbox-extract' // legacy (OCR + AI in one job)
|
||||||
|
| 'sandbox-ocr-only' // NEW: Step 1 - OCR only
|
||||||
|
| 'sandbox-ai-extract' // NEW: Step 2 - AI extraction with cached OCR
|
||||||
|
| 'migrate-document';
|
||||||
|
```
|
||||||
|
|
||||||
|
### processSandboxOcrOnly()
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
private async processSandboxOcrOnly(data: AiBatchJobData): Promise<void> {
|
||||||
|
const { idempotencyKey, payload } = data;
|
||||||
|
const pdfPath = payload.pdfPath as string;
|
||||||
|
|
||||||
|
const ocrResult = await this.ocrService.detectAndExtract({ pdfPath });
|
||||||
|
|
||||||
|
// Cache OCR text for Step 2
|
||||||
|
await this.redis.setex(
|
||||||
|
`ai:sandbox:ocr:${idempotencyKey}`,
|
||||||
|
3600,
|
||||||
|
JSON.stringify({
|
||||||
|
ocrText: ocrResult.text,
|
||||||
|
ocrUsed: ocrResult.ocrUsed,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
await this.redis.setex(
|
||||||
|
`ai:rag:result:${idempotencyKey}`,
|
||||||
|
3600,
|
||||||
|
JSON.stringify({
|
||||||
|
requestPublicId: idempotencyKey,
|
||||||
|
status: 'completed',
|
||||||
|
ocrText: ocrResult.text,
|
||||||
|
ocrUsed: ocrResult.ocrUsed,
|
||||||
|
completedAt: new Date().toISOString(),
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### processSandboxAiExtract()
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
private async processSandboxAiExtract(data: AiBatchJobData): Promise<void> {
|
||||||
|
const { idempotencyKey, payload, projectPublicId } = data;
|
||||||
|
const promptVersion = (payload.promptVersion as number) || undefined;
|
||||||
|
|
||||||
|
// ดึง OCR text จาก cache
|
||||||
|
const cachedOcr = await this.redis.get(`ai:sandbox:ocr:${idempotencyKey}`);
|
||||||
|
if (!cachedOcr) {
|
||||||
|
throw new Error('OCR text not found or expired, please run Step 1 first');
|
||||||
|
}
|
||||||
|
const { ocrText } = JSON.parse(cachedOcr);
|
||||||
|
|
||||||
|
// ดึง prompt version
|
||||||
|
const activePrompt = await this.aiPromptsService.getActive('ocr_extraction');
|
||||||
|
if (!activePrompt) {
|
||||||
|
throw new Error('No active ocr_extraction prompt version found');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ถ้าระบุ promptVersion ให้ใช้ version นั้น (แต่ต้อง validate ว่ามีอยู่)
|
||||||
|
const targetPrompt = promptVersion
|
||||||
|
? await this.aiPromptsService.findByVersion('ocr_extraction', promptVersion)
|
||||||
|
: activePrompt;
|
||||||
|
|
||||||
|
if (!targetPrompt) {
|
||||||
|
throw new Error(`Prompt version ${promptVersion} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve context และ run LLM (เหมือน processSandboxExtract เดิม)
|
||||||
|
const masterDataContext = await this.aiPromptsService.resolveContext(
|
||||||
|
targetPrompt,
|
||||||
|
projectPublicId
|
||||||
|
);
|
||||||
|
|
||||||
|
const resolvedPrompt = targetPrompt.template
|
||||||
|
.replace('{{ocr_text}}', ocrText)
|
||||||
|
.replace('{{master_data_context}}', JSON.stringify(masterDataContext, null, 2));
|
||||||
|
|
||||||
|
const response = await this.ollamaService.generate(resolvedPrompt, {
|
||||||
|
timeoutMs: 120000,
|
||||||
|
});
|
||||||
|
|
||||||
|
const cleanedResponse = response
|
||||||
|
.replace(/```json/g, '')
|
||||||
|
.replace(/```/g, '')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
let extractedMetadata: Record<string, unknown>;
|
||||||
|
try {
|
||||||
|
extractedMetadata = JSON.parse(cleanedResponse) as Record<string, unknown>;
|
||||||
|
} catch {
|
||||||
|
throw new Error(`Failed to parse LLM response as JSON: ${cleanedResponse}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.redis.setex(
|
||||||
|
`ai:rag:result:${idempotencyKey}`,
|
||||||
|
3600,
|
||||||
|
JSON.stringify({
|
||||||
|
requestPublicId: idempotencyKey,
|
||||||
|
status: 'completed',
|
||||||
|
answer: JSON.stringify(extractedMetadata, null, 2),
|
||||||
|
ocrText,
|
||||||
|
ocrUsed: JSON.parse(cachedOcr).ocrUsed,
|
||||||
|
promptVersionUsed: targetPrompt.versionNumber,
|
||||||
|
completedAt: new Date().toISOString(),
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Frontend Implementation
|
||||||
|
|
||||||
|
### UI Layout
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────┐
|
||||||
|
│ OCR Sandbox Playground │
|
||||||
|
├──────────────────────┬──────────────────────────────┤
|
||||||
|
│ Prompt Editor │ Version History │
|
||||||
|
│ ┌────────────────┐ │ ┌────────────────────────┐ │
|
||||||
|
│ │ textarea │ │ │ v3 (active) ✅ │ │
|
||||||
|
│ │ {{ocr_text}} │ │ │ v2 - 2026-05-24 │ │
|
||||||
|
│ │ ... │ │ │ v1 - 2026-05-22 │ │
|
||||||
|
│ └────────────────┘ │ └────────────────────────┘ │
|
||||||
|
│ [บันทึก Version ใหม่]│ [Load] [Activate] [Delete] │
|
||||||
|
├──────────────────────┴──────────────────────────────┤
|
||||||
|
│ Step 1: OCR Quality Check │
|
||||||
|
│ ┌──────────────────────────────────────────────┐ │
|
||||||
|
│ │ File Upload: [เลือก PDF] │ │
|
||||||
|
│ │ [Step 1: Run OCR] │ │
|
||||||
|
│ └──────────────────────────────────────────────┘ │
|
||||||
|
│ [OCR Raw Text Display] │
|
||||||
|
├─────────────────────────────────────────────────────┤
|
||||||
|
│ Step 2: AI Extraction (disabled until Step 1) │
|
||||||
|
│ ┌──────────────────────────────────────────────┐ │
|
||||||
|
│ │ Prompt Version: [v3 (active) ▼] │ │
|
||||||
|
│ │ [Step 2: Run AI Extraction] │ │
|
||||||
|
│ └──────────────────────────────────────────────┘ │
|
||||||
|
│ [LLM Result Display] │
|
||||||
|
└─────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### State Management
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const [ocrRequestPublicId, setOcrRequestPublicId] = useState<string | null>(null);
|
||||||
|
const [ocrText, setOcrText] = useState<string>('');
|
||||||
|
const [ocrUsed, setOcrUsed] = useState<boolean>(false);
|
||||||
|
const [aiRequestPublicId, setAiRequestPublicId] = useState<string | null>(null);
|
||||||
|
const [selectedPromptVersion, setSelectedPromptVersion] = useState<number | undefined>(undefined);
|
||||||
|
const [step, setStep] = useState<'upload' | 'ocr-done' | 'ai-done'>('upload');
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 1: Run OCR
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const handleRunOcr = async () => {
|
||||||
|
const response = await adminAiService.submitSandboxOcr(file);
|
||||||
|
setOcrRequestPublicId(response.requestPublicId);
|
||||||
|
// Poll for result...
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2: Run AI Extraction
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const handleRunAi = async () => {
|
||||||
|
const response = await adminAiService.submitSandboxAiExtract({
|
||||||
|
requestPublicId: ocrRequestPublicId,
|
||||||
|
promptVersion: selectedPromptVersion,
|
||||||
|
});
|
||||||
|
setAiRequestPublicId(response.requestPublicId);
|
||||||
|
// Poll for result...
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ADR Impact
|
||||||
|
|
||||||
|
- **ADR-029**: เพิ่ม job types ใหม่ แต่ไม่เปลี่ยน architecture หลักของ `ai_prompts` table
|
||||||
|
- **ADR-030**: ไม่กระทบ context resolution logic ยังใช้ `resolveContext()` เหมือนเดิม
|
||||||
|
- **ADR-023A**: ไม่กระทบ AI boundary ยังใช้ Ollama ผ่าน BullMQ เหมือนเดิม
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Migration Plan
|
||||||
|
|
||||||
|
### Phase 1: Backend
|
||||||
|
1. เพิ่ม job types ใหม่ใน `AiBatchJobType`
|
||||||
|
2. Implement `processSandboxOcrOnly()` ใน `AiBatchProcessor`
|
||||||
|
3. Implement `processSandboxAiExtract()` ใน `AiBatchProcessor`
|
||||||
|
4. เพิ่ม endpoint `POST /ai/admin/sandbox/ocr` ใน `AiController`
|
||||||
|
5. เพิ่ม endpoint `POST /ai/admin/sandbox/ai-extract` ใน `AiController`
|
||||||
|
6. เพิ่ม method `findByVersion()` ใน `AiPromptsService` (ถ้ายังไม่มี)
|
||||||
|
|
||||||
|
### Phase 2: Frontend
|
||||||
|
1. เพิ่ม methods ใหม่ใน `adminAiService`:
|
||||||
|
- `submitSandboxOcr(file)`
|
||||||
|
- `submitSandboxAiExtract({ requestPublicId, promptVersion })`
|
||||||
|
2. Refactor `OcrSandboxPromptManager.tsx`:
|
||||||
|
- เพิ่ม state สำหรับ step management
|
||||||
|
- เพิ่ม UI Step 1 + Step 2 แยกกัน
|
||||||
|
- เพิ่ม dropdown prompt version ใน Step 2
|
||||||
|
3. Update polling logic ให้รองรับ 2 requestPublicId แยกกัน
|
||||||
|
|
||||||
|
### Phase 3: Testing
|
||||||
|
1. Unit tests สำหรับ `processSandboxOcrOnly()` และ `processSandboxAiExtract()`
|
||||||
|
2. Integration tests สำหรับ OCR cache invalidation
|
||||||
|
3. E2E tests สำหรับ 2-step flow
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Rollback Plan
|
||||||
|
|
||||||
|
ถ้า feature นี้มีปัญหา:
|
||||||
|
- สามารถ rollback โดยใช้ legacy endpoint `POST /ai/admin/sandbox/extract` (sandbox-extract) ที่ยังคงอยู่
|
||||||
|
- หรือ comment out new endpoints และ UI changes
|
||||||
Reference in New Issue
Block a user