690530:1121 ADR-030-231-ocr-sandbox-two-step-flow #01

2026-05-30 11:21:37 +07:00
parent 1ba563aa70
commit b0b7d12d5a
7 changed files with 926 additions and 117 deletions
@@ -103,7 +103,11 @@ export class AiQueueService {
   * @idempotency `jobId = payload.idempotencyKey`
   */
  async enqueueSandboxJob(
-    jobType: 'sandbox-rag' | 'sandbox-extract',
+    jobType:
+      | 'sandbox-rag'
+      | 'sandbox-extract'
+      | 'sandbox-ocr-only'
+      | 'sandbox-ai-extract',
    payload: {
      idempotencyKey: string;
      projectPublicId?: string;
@@ -111,6 +115,7 @@ export class AiQueueService {
      userPublicId?: string;
      filePublicId?: string;
      pdfPath?: string;
+      extraPayload?: Record<string, unknown>;
    }
  ): Promise<string> {
    const job = await this.batchQueue.add(
@@ -124,6 +129,7 @@ export class AiQueueService {
          userPublicId: payload.userPublicId,
          filePublicId: payload.filePublicId,
          pdfPath: payload.pdfPath,
+          ...payload.extraPayload,
        },
        idempotencyKey: payload.idempotencyKey,
      },
@@ -40,6 +40,8 @@ import {
  ApiHeader,
  ApiParam,
  ApiQuery,
+  ApiConsumes,
+  ApiBody,
 } from '@nestjs/swagger';
 import { AiService, ExtractionResult, PaginatedResult } from './ai.service';
 import { AiSettingsService } from './ai-settings.service';
@@ -508,6 +510,77 @@ export class AiController {
    return { requestPublicId, jobId, status: 'queued' };
  }

+  // --- Step 1: OCR Only (สำหรับตรวจคุณภาพ OCR ก่อนทดสอบ AI) ---
+
+  @Post('admin/sandbox/ocr')
+  @UseGuards(JwtAuthGuard, RbacGuard)
+  @RequirePermission('system.manage_all')
+  @ApiOperation({
+    summary: 'Step 1: Run OCR Only — สำหรับตรวจคุณภาพ OCR ก่อนทดสอบ AI',
+    description:
+      'Upload PDF และรัน OCR เท่านั้น ไม่เรียก LLM — ผลลัพธ์ cache ไว้สำหรับ Step 2',
+  })
+  @ApiConsumes('multipart/form-data')
+  @ApiBody({
+    schema: {
+      type: 'object',
+      properties: {
+        file: {
+          type: 'string',
+          format: 'binary',
+        },
+      },
+    },
+  })
+  async submitSandboxOcr(
+    @UploadedFile(
+      new ParseFilePipe({
+        validators: [
+          new MaxFileSizeValidator({ maxSize: 50 * 1024 * 1024 }),
+          new FileTypeValidator({ fileType: 'pdf' }),
+        ],
+      })
+    )
+    file: Express.Multer.File,
+    @CurrentUser() user: User
+  ): Promise<{ requestPublicId: string; jobId: string; status: string }> {
+    const attachment = await this.fileStorageService.upload(file, user.user_id);
+    const requestPublicId = uuidv7();
+    const jobId = await this.aiQueueService.enqueueSandboxJob(
+      'sandbox-ocr-only',
+      {
+        idempotencyKey: requestPublicId,
+        pdfPath: attachment.filePath,
+      }
+    );
+    return { requestPublicId, jobId, status: 'queued' };
+  }
+
+  // --- Step 2: AI Extraction (ใช้ OCR text ที่ cache จาก Step 1) ---
+
+  @Post('admin/sandbox/ai-extract')
+  @UseGuards(JwtAuthGuard, RbacGuard)
+  @RequirePermission('system.manage_all')
+  @ApiOperation({
+    summary: 'Step 2: Run AI Extraction — ใช้ OCR text ที่ cache จาก Step 1',
+    description:
+      'รับ requestPublicId จาก Step 1 และ optional promptVersion แล้ว run LLM extraction',
+  })
+  async submitSandboxAiExtract(
+    @Body() dto: { requestPublicId: string; promptVersion?: number }
+  ): Promise<{ requestPublicId: string; jobId: string; status: string }> {
+    const { requestPublicId, promptVersion } = dto;
+    const jobId = await this.aiQueueService.enqueueSandboxJob(
+      'sandbox-ai-extract',
+      {
+        idempotencyKey: requestPublicId,
+        projectPublicId: 'default', // Sandbox ใช้ default project
+        extraPayload: { promptVersion },
+      }
+    );
+    return { requestPublicId, jobId, status: 'queued' };
+  }
+
  // --- Webhook Callback จาก n8n (Service Account) ---

  @Post('callback')
@@ -49,6 +49,8 @@ export type AiBatchJobType =
  | 'embed-document'
  | 'sandbox-rag'
  | 'sandbox-extract'
+  | 'sandbox-ocr-only'
+  | 'sandbox-ai-extract'
  | 'migrate-document';

 export interface AiBatchJobData {
@@ -197,6 +199,18 @@ export class AiBatchProcessor extends WorkerHost {
          );
          await this.processSandboxExtract(job.data);
          return;
+        case 'sandbox-ocr-only':
+          this.logger.log(
+            `Sandbox OCR-Only job processing — jobId=${String(job.id)}`
+          );
+          await this.processSandboxOcrOnly(job.data);
+          return;
+        case 'sandbox-ai-extract':
+          this.logger.log(
+            `Sandbox AI-Extract job processing — jobId=${String(job.id)}`
+          );
+          await this.processSandboxAiExtract(job.data);
+          return;
        case 'migrate-document':
          this.logger.log(
            `Migrate document job processing — jobId=${String(job.id)}`
@@ -369,6 +383,186 @@ export class AiBatchProcessor extends WorkerHost {
    }
  }

+  /** Step 1: OCR เท่านั้น — สำหรับตรวจคุณภาพ OCR ก่อนทดสอบ AI */
+  private async processSandboxOcrOnly(data: AiBatchJobData): Promise<void> {
+    const { idempotencyKey, payload } = data;
+    const pdfPath = payload.pdfPath as string;
+
+    if (!pdfPath) {
+      throw new Error('pdfPath is required for sandbox-ocr-only job');
+    }
+
+    await this.redis.setex(
+      `ai:rag:result:${idempotencyKey}`,
+      3600,
+      JSON.stringify({
+        requestPublicId: idempotencyKey,
+        status: 'processing',
+      })
+    );
+
+    try {
+      const ocrResult = await this.ocrService.detectAndExtract({ pdfPath });
+
+      // Cache OCR text สำหรับ Step 2
+      await this.redis.setex(
+        `ai:sandbox:ocr:${idempotencyKey}`,
+        3600,
+        JSON.stringify({
+          ocrText: ocrResult.text,
+          ocrUsed: ocrResult.ocrUsed,
+          timestamp: new Date().toISOString(),
+        })
+      );
+
+      await this.redis.setex(
+        `ai:rag:result:${idempotencyKey}`,
+        3600,
+        JSON.stringify({
+          requestPublicId: idempotencyKey,
+          status: 'completed',
+          ocrText: ocrResult.text,
+          ocrUsed: ocrResult.ocrUsed,
+          completedAt: new Date().toISOString(),
+        })
+      );
+    } catch (err: unknown) {
+      const errMsg = err instanceof Error ? err.message : String(err);
+      this.logger.error(`Sandbox OCR-only failed: ${errMsg}`);
+      await this.redis.setex(
+        `ai:rag:result:${idempotencyKey}`,
+        3600,
+        JSON.stringify({
+          requestPublicId: idempotencyKey,
+          status: 'failed',
+          errorMessage: errMsg,
+          completedAt: new Date().toISOString(),
+        })
+      );
+      throw err;
+    }
+  }
+
+  /** Step 2: AI Extraction — ใช้ OCR text ที่ cache จาก Step 1 */
+  private async processSandboxAiExtract(data: AiBatchJobData): Promise<void> {
+    const { idempotencyKey, payload, projectPublicId } = data;
+    const promptVersion = (payload.promptVersion as number) || undefined;
+
+    await this.redis.setex(
+      `ai:rag:result:${idempotencyKey}`,
+      3600,
+      JSON.stringify({
+        requestPublicId: idempotencyKey,
+        status: 'processing',
+      })
+    );
+
+    try {
+      // ดึง OCR text จาก cache
+      const cachedOcr = await this.redis.get(
+        `ai:sandbox:ocr:${idempotencyKey}`
+      );
+      if (!cachedOcr) {
+        throw new Error(
+          'OCR text not found or expired, please run Step 1 first'
+        );
+      }
+      const parsedOcr = JSON.parse(cachedOcr) as {
+        ocrText: string;
+        ocrUsed: boolean;
+        timestamp: string;
+      };
+      const { ocrText } = parsedOcr;
+
+      // ดึง prompt version
+      const activePrompt =
+        await this.aiPromptsService.getActive('ocr_extraction');
+      if (!activePrompt) {
+        throw new Error('No active ocr_extraction prompt version found');
+      }
+
+      // ถ้าระบุ promptVersion ให้ใช้ version นั้น
+      const targetPrompt = promptVersion
+        ? await this.aiPromptsService.findByVersion(
+            'ocr_extraction',
+            promptVersion
+          )
+        : activePrompt;
+
+      if (!targetPrompt) {
+        throw new Error(`Prompt version ${promptVersion} not found`);
+      }
+
+      // Resolve context และ run LLM
+      const masterDataContext = await this.aiPromptsService.resolveContext(
+        targetPrompt,
+        projectPublicId
+      );
+
+      const resolvedPrompt = targetPrompt.template
+        .replace('{{ocr_text}}', ocrText)
+        .replace(
+          '{{master_data_context}}',
+          JSON.stringify(masterDataContext, null, 2)
+        );
+
+      const response = await this.ollamaService.generate(resolvedPrompt, {
+        timeoutMs: 120000,
+      });
+
+      const cleanedResponse = response
+        .replace(/```json/g, '')
+        .replace(/```/g, '')
+        .trim();
+
+      let extractedMetadata: Record<string, unknown>;
+      try {
+        extractedMetadata = JSON.parse(cleanedResponse) as Record<
+          string,
+          unknown
+        >;
+      } catch {
+        throw new Error(
+          `Failed to parse LLM response as JSON: ${cleanedResponse}`
+        );
+      }
+
+      await this.aiPromptsService.saveTestResult(
+        'ocr_extraction',
+        targetPrompt.versionNumber,
+        extractedMetadata
+      );
+
+      await this.redis.setex(
+        `ai:rag:result:${idempotencyKey}`,
+        3600,
+        JSON.stringify({
+          requestPublicId: idempotencyKey,
+          status: 'completed',
+          answer: JSON.stringify(extractedMetadata, null, 2),
+          ocrText,
+          ocrUsed: parsedOcr.ocrUsed,
+          promptVersionUsed: targetPrompt.versionNumber,
+          completedAt: new Date().toISOString(),
+        })
+      );
+    } catch (err: unknown) {
+      const errMsg = err instanceof Error ? err.message : String(err);
+      this.logger.error(`Sandbox AI-extract failed: ${errMsg}`);
+      await this.redis.setex(
+        `ai:rag:result:${idempotencyKey}`,
+        3600,
+        JSON.stringify({
+          requestPublicId: idempotencyKey,
+          status: 'failed',
+          errorMessage: errMsg,
+          completedAt: new Date().toISOString(),
+        })
+      );
+      throw err;
+    }
+  }
+
  private async processMigrateDocument(
    job: Job<AiBatchJobData>
  ): Promise<void> {
@@ -292,6 +292,21 @@ export class AiPromptsService {
    return prompt;
  }

+  /**
+   * ดึง Prompt version ตาม versionNumber ที่ระบุ
+   * @param promptType ประเภทของ prompt
+   * @param versionNumber เลข version ที่ต้องการ
+   * @returns Prompt version ที่ตรงกับ versionNumber หรือ null หากไม่พบ
+   */
+  async findByVersion(
+    promptType: string,
+    versionNumber: number
+  ): Promise<AiPrompt | null> {
+    return this.aiPromptRepo.findOne({
+      where: { promptType, versionNumber },
+    });
+  }
+
  /**
   * ค้นหา prompt ที่มีผลใช้งานจริง และแทนที่ placeholder {{ocr_text}} ด้วยข้อความ OCR
   * @param promptType ประเภทของ prompt
@@ -6,6 +6,7 @@
 // - 2026-05-26: เพิ่มการตรวจสอบ versionsQuery.data แบบทนทานเพื่อป้องกัน Error N.find is not a function ในกรณีที่ API ส่งข้อมูลแบบ wrapped object มา
 // - 2026-05-29: เพิ่ม OCR Raw Text section ในผล sandbox
 // - 2026-05-29: ปรับปรุงการโหลด Active Prompt ให้ทนทานต่อ race conditions และรูปแบบประเภทข้อมูลที่ส่งมาจาก API (boolean, number, string)
+// - 2026-05-30: Refactor เป็น 2-step flow (Step 1: OCR-only → Step 2: AI Extraction) ตาม spec 231
 'use client';

 import React, { useState, useEffect } from 'react';
@@ -32,6 +33,7 @@ import { useTranslations } from '@/hooks/use-translations';
 import PromptVersionHistory from './PromptVersionHistory';
 import { cn } from '@/lib/utils';
 import { AiPrompt } from '@/types/ai-prompts';
+import { adminAiService } from '@/lib/services/admin-ai.service';

 const DEFAULT_OCR_TEMPLATE = `คุณคือเอนจิ้นสกัดข้อมูลอัจฉริยะ (Document Intelligence Engine)
 วิเคราะห์ข้อความ OCR ที่ได้รับจากเอกสารของโครงการ Laem Chabang Port Phase 3 และสกัดข้อมูลเมตาดาต้าให้ออกมาเป็น JSON object ที่ถูกต้องตามโครงสร้างที่กำหนด
@@ -103,7 +105,15 @@ export default function OcrSandboxPromptManager() {
  const [ocrFile, setOcrFile] = useState<File | null>(null);
  const [manualNote, setManualNote] = useState<string>('');
  const [activeTab, setActiveTab] = useState<'editor' | 'sandbox'>('editor');
-  const { state: sandboxState, jobId: sandboxJobId, submit: submitSandbox, reset: resetSandbox } =
+  // 2-step flow states
+  const [sandboxStep, setSandboxStep] = useState<'ocr' | 'ai'>('ocr');
+  const [ocrResult, setOcrResult] = useState<{
+    requestPublicId: string;
+    ocrText: string;
+    ocrUsed: boolean;
+  } | null>(null);
+  const [selectedPromptVersion, setSelectedPromptVersion] = useState<number | undefined>(undefined);
+  const { state: sandboxState, jobId: sandboxJobId, reset: resetSandbox } =
    useSandboxRun(() => {
      // เมื่อ sandbox เสร็จสิ้น: รีเฟรชรายการเวอร์ชัน
      versionsQuery.refetch();
@@ -175,27 +185,95 @@ export default function OcrSandboxPromptManager() {
      toast.error(error.response?.data?.message || t('ai.prompt.saveNoteError'));
    }
  };
-  const handleSubmitOcr = async (e: React.FormEvent) => {
+  // Step 1: OCR-only handler
+  const handleStep1Ocr = async (e: React.FormEvent) => {
    e.preventDefault();
-    if (!activePrompt) {
-      toast.error(t('ai.prompt.noActivePrompt'));
-      return;
-    }
    if (!ocrFile) {
      toast.error(t('ai.prompt.noFile'));
      return;
    }
    try {
      resetSandbox();
-      await submitSandbox(ocrFile);
+      setSandboxStep('ocr');
+      const { requestPublicId } = await adminAiService.submitSandboxOcr(ocrFile);
      toast.success(t('ai.prompt.uploadSuccess'));
+      // Poll สำหรับผลลัพธ์ OCR
+      const pollInterval = setInterval(async () => {
+        try {
+          const result = await adminAiService.getSandboxJobStatus(requestPublicId);
+          if (result.status === 'completed') {
+            clearInterval(pollInterval);
+            setOcrResult({
+              requestPublicId,
+              ocrText: result.ocrText || '',
+              ocrUsed: result.ocrUsed || false,
+            });
+            setSandboxStep('ai');
+            toast.success('OCR completed successfully');
+          } else if (result.status === 'failed') {
+            clearInterval(pollInterval);
+            toast.error(result.errorMessage || 'OCR failed');
+          }
+        } catch (_err) {
+          clearInterval(pollInterval);
+          toast.error('Poll error occurred');
+        }
+      }, 1000);
    } catch (err: unknown) {
      const error = err as { response?: { data?: { message?: string } } };
      toast.error(error.response?.data?.message || t('ai.prompt.uploadError'));
    }
  };
+  // Step 2: AI Extraction handler
+  const handleStep2AiExtract = async (e: React.FormEvent) => {
+    e.preventDefault();
+    if (!ocrResult) {
+      toast.error('Please run Step 1 (OCR) first');
+      return;
+    }
+    if (!activePrompt) {
+      toast.error(t('ai.prompt.noActivePrompt'));
+      return;
+    }
+    try {
+      resetSandbox();
+      const { requestPublicId } = await adminAiService.submitSandboxAiExtract(
+        ocrResult.requestPublicId,
+        selectedPromptVersion
+      );
+      toast.success('AI Extraction started');
+      // Poll สำหรับผลลัพธ์ AI
+      const pollInterval = setInterval(async () => {
+        try {
+          const result = await adminAiService.getSandboxJobStatus(requestPublicId);
+          if (result.status === 'completed') {
+            clearInterval(pollInterval);
+            // Trigger sandbox state update via useSandboxRun
+            toast.success(t('ai.prompt.sandboxSuccess'));
+            versionsQuery.refetch();
+          } else if (result.status === 'failed') {
+            clearInterval(pollInterval);
+            toast.error(result.errorMessage || 'AI Extraction failed');
+          }
+        } catch (_err) {
+          clearInterval(pollInterval);
+          toast.error('Poll error occurred');
+        }
+      }, 1000);
+    } catch (err: unknown) {
+      const error = err as { response?: { data?: { message?: string } } };
+      toast.error(error.response?.data?.message || 'AI Extraction failed');
+    }
+  };
+  // Reset 2-step flow
+  const handleResetSandbox = () => {
+    setSandboxStep('ocr');
+    setOcrResult(null);
+    setSelectedPromptVersion(undefined);
+    setOcrFile(null);
+    resetSandbox();
+  };
  // แปล status key เป็นข้อความตาม locale ปัจจุบัน
-  const statusLabel = sandboxState.statusText ? t(sandboxState.statusText) : '';
  return (
    <div className="grid gap-6 lg:grid-cols-12 items-start">
      <div className="lg:col-span-8 space-y-6">
@@ -282,102 +360,173 @@ export default function OcrSandboxPromptManager() {
                  {t('ai.prompt.sandboxCardTitle')}
                </CardTitle>
                <p className="text-xs text-muted-foreground">
-                  {t('ai.prompt.sandboxCardDesc')}
+                  {sandboxStep === 'ocr'
+                    ? 'Step 1: Upload PDF and run OCR to check quality'
+                    : 'Step 2: Test AI prompt with OCR text'}
                </p>
              </CardHeader>
              <CardContent>
-                <form onSubmit={handleSubmitOcr} className="space-y-4">
-                  <div className="space-y-2">
-                    <div
-                      className={cn(
-                        'flex flex-col items-center justify-center rounded-lg border border-dashed p-8 transition-all',
-                        ocrFile ? 'border-primary/50 bg-primary/5' : 'border-muted-foreground/20 hover:bg-muted/10'
-                      )}
-                      onDragOver={(e) => e.preventDefault()}
-                      onDrop={(e) => {
-                        e.preventDefault();
-                        if (sandboxState.isRunning) return;
-                        const file = e.dataTransfer.files?.[0];
-                        if (file?.name.toLowerCase().endsWith('.pdf')) {
-                          setOcrFile(file);
-                        } else {
-                          toast.error(t('ai.prompt.dropzonePdfOnly'));
-                        }
-                      }}
-                    >
-                      <Brain className="h-9 w-9 text-muted-foreground/50 mb-2 animate-bounce" />
-                      {ocrFile ? (
-                        <div className="text-center space-y-1">
-                          <p className="text-sm font-semibold">{ocrFile.name}</p>
-                          <p className="text-xs text-muted-foreground">
-                            ({(ocrFile.size / 1024 / 1024).toFixed(2)} MB)
-                          </p>
-                          <Button
-                            type="button"
-                            variant="ghost"
-                            size="sm"
-                            disabled={sandboxState.isRunning}
-                            onClick={() => setOcrFile(null)}
-                            className="mt-2 text-xs text-destructive hover:bg-destructive/10"
-                          >
-                            {t('ai.prompt.removeFile')}
-                          </Button>
-                        </div>
-                      ) : (
-                        <div className="text-center space-y-1">
-                          <p className="text-xs text-muted-foreground">
-                            {t('ai.prompt.dropzoneDrag')}
-                          </p>
-                          <input
-                            type="file"
-                            accept=".pdf"
-                            disabled={sandboxState.isRunning}
-                            onChange={(e) => {
-                              const file = e.target.files?.[0];
-                              if (file) setOcrFile(file);
-                            }}
-                            className="hidden"
-                            id="ocr-sandbox-file"
-                          />
-                          <label
-                            htmlFor="ocr-sandbox-file"
-                            className="mt-2.5 inline-flex h-8 items-center justify-center rounded-md bg-secondary px-3.5 text-xs font-semibold cursor-pointer hover:bg-secondary/85 transition-colors"
-                          >
-                            {t('ai.prompt.dropzoneChoose')}
-                          </label>
-                        </div>
-                      )}
+                {sandboxStep === 'ocr' ? (
+                  <form onSubmit={handleStep1Ocr} className="space-y-4">
+                    <div className="space-y-2">
+                      <div
+                        className={cn(
+                          'flex flex-col items-center justify-center rounded-lg border border-dashed p-8 transition-all',
+                          ocrFile ? 'border-primary/50 bg-primary/5' : 'border-muted-foreground/20 hover:bg-muted/10'
+                        )}
+                        onDragOver={(e) => e.preventDefault()}
+                        onDrop={(e) => {
+                          e.preventDefault();
+                          if (sandboxState.isRunning) return;
+                          const file = e.dataTransfer.files?.[0];
+                          if (file?.name.toLowerCase().endsWith('.pdf')) {
+                            setOcrFile(file);
+                          } else {
+                            toast.error(t('ai.prompt.dropzonePdfOnly'));
+                          }
+                        }}
+                      >
+                        <Brain className="h-9 w-9 text-muted-foreground/50 mb-2 animate-bounce" />
+                        {ocrFile ? (
+                          <div className="text-center space-y-1">
+                            <p className="text-sm font-semibold">{ocrFile.name}</p>
+                            <p className="text-xs text-muted-foreground">
+                              ({(ocrFile.size / 1024 / 1024).toFixed(2)} MB)
+                            </p>
+                            <Button
+                              type="button"
+                              variant="ghost"
+                              size="sm"
+                              disabled={sandboxState.isRunning}
+                              onClick={() => setOcrFile(null)}
+                              className="mt-2 text-xs text-destructive hover:bg-destructive/10"
+                            >
+                              {t('ai.prompt.removeFile')}
+                            </Button>
+                          </div>
+                        ) : (
+                          <div className="text-center space-y-1">
+                            <p className="text-xs text-muted-foreground">
+                              {t('ai.prompt.dropzoneDrag')}
+                            </p>
+                            <input
+                              type="file"
+                              accept=".pdf"
+                              disabled={sandboxState.isRunning}
+                              onChange={(e) => {
+                                const file = e.target.files?.[0];
+                                if (file) setOcrFile(file);
+                              }}
+                              className="hidden"
+                              id="ocr-sandbox-file"
+                            />
+                            <label
+                              htmlFor="ocr-sandbox-file"
+                              className="mt-2.5 inline-flex h-8 items-center justify-center rounded-md bg-secondary px-3.5 text-xs font-semibold cursor-pointer hover:bg-secondary/85 transition-colors"
+                            >
+                              {t('ai.prompt.dropzoneChoose')}
+                            </label>
+                          </div>
+                        )}
+                      </div>
                    </div>
-                  </div>
-                  <div className="flex justify-end gap-3 pt-2">
-                    <Button
-                      type="submit"
-                      disabled={sandboxState.isRunning || !ocrFile || !activePrompt}
-                      className="flex items-center gap-2"
-                    >
-                      {sandboxState.isRunning ? (
-                        <>
-                          <Loader2 className="h-4 w-4 animate-spin" />
-                          {t('ai.prompt.running')}
-                        </>
-                      ) : (
-                        <>
-                          <Play className="h-4 w-4" />
-                          {t('ai.prompt.runSandbox')}
-                        </>
-                      )}
-                    </Button>
-                  </div>
-                </form>
+                    <div className="flex justify-end gap-3 pt-2">
+                      <Button
+                        type="submit"
+                        disabled={sandboxState.isRunning || !ocrFile}
+                        className="flex items-center gap-2"
+                      >
+                        {sandboxState.isRunning ? (
+                          <>
+                            <Loader2 className="h-4 w-4 animate-spin" />
+                            Running OCR...
+                          </>
+                        ) : (
+                          <>
+                            <Play className="h-4 w-4" />
+                            Step 1: Run OCR Only
+                          </>
+                        )}
+                      </Button>
+                    </div>
+                  </form>
+                ) : (
+                  <form onSubmit={handleStep2AiExtract} className="space-y-4">
+                    <div className="space-y-3">
+                      <div className="flex items-center justify-between">
+                        <span className="text-xs font-medium">Prompt Version:</span>
+                        <select
+                          value={selectedPromptVersion ?? (activePrompt?.versionNumber ?? '')}
+                          onChange={(e) => setSelectedPromptVersion(e.target.value ? Number(e.target.value) : undefined)}
+                          className="text-xs bg-background border border-input rounded px-2 py-1"
+                        >
+                          {versions.map((v) => (
+                            <option key={v.versionNumber} value={v.versionNumber}>
+                              Version {v.versionNumber} {v.isActive ? '(Active)' : ''}
+                            </option>
+                          ))}
+                        </select>
+                      </div>
+                      <div className="flex justify-end gap-3 pt-2">
+                        <Button
+                          type="button"
+                          variant="outline"
+                          size="sm"
+                          onClick={handleResetSandbox}
+                          className="text-xs"
+                        >
+                          Reset
+                        </Button>
+                        <Button
+                          type="submit"
+                          disabled={sandboxState.isRunning || !activePrompt}
+                          className="flex items-center gap-2"
+                        >
+                          {sandboxState.isRunning ? (
+                            <>
+                              <Loader2 className="h-4 w-4 animate-spin" />
+                              Running AI...
+                            </>
+                          ) : (
+                            <>
+                              <Play className="h-4 w-4" />
+                              Step 2: Run AI Extraction
+                            </>
+                          )}
+                        </Button>
+                      </div>
+                    </div>
+                  </form>
+                )}
              </CardContent>
            </Card>
+            {sandboxStep === 'ai' && ocrResult && (
+              <Card className="border border-blue-500/20 bg-background/50 backdrop-blur-md">
+                <CardHeader className="border-b border-border/30 pb-3 flex flex-row items-center justify-between">
+                  <CardTitle className="text-base text-blue-600 dark:text-blue-400 flex items-center gap-2">
+                    <ScanText className="h-4 w-4" />
+                    OCR Raw Text (Step 1 Result)
+                  </CardTitle>
+                  <Badge variant="outline" className="text-xs">
+                    {ocrResult.ocrUsed ? 'PaddleOCR' : 'Fast Path (Text Layer)'}
+                  </Badge>
+                </CardHeader>
+                <CardContent className="pt-4">
+                  <div className="relative rounded-md bg-muted p-4 font-mono text-xs overflow-auto max-h-[200px] border border-border/10">
+                    <pre className="text-blue-600 dark:text-blue-400 select-text leading-relaxed whitespace-pre-wrap">
+                      {ocrResult.ocrText || '(ไม่มีข้อความ)'}
+                    </pre>
+                  </div>
+                </CardContent>
+              </Card>
+            )}
            {sandboxState.isRunning && (
              <Card className="border border-amber-500/20 bg-amber-500/5">
                <CardContent className="pt-6 space-y-4">
                  <div className="flex items-center justify-between text-xs font-medium">
                    <span className="flex items-center gap-1.5">
                      <Loader2 className="h-3.5 w-3.5 animate-spin text-amber-500" />
-                      {statusLabel}
+                      {sandboxStep === 'ocr' ? 'Running OCR...' : 'Running AI Extraction...'}
                    </span>
                    <span>{sandboxState.progress}%</span>
                  </div>
@@ -388,37 +537,17 @@ export default function OcrSandboxPromptManager() {
                </CardContent>
              </Card>
            )}
-            {sandboxState.result && sandboxState.result.status === 'completed' && (
+            {sandboxState.result && sandboxState.result.status === 'completed' && sandboxStep === 'ai' && (
              <div className="space-y-6">
-                <Card className="border border-blue-500/20 bg-background/50 backdrop-blur-md">
-                  <CardHeader className="border-b border-border/30 pb-3 flex flex-row items-center justify-between">
-                    <CardTitle className="text-base text-blue-600 dark:text-blue-400 flex items-center gap-2">
-                      <ScanText className="h-4 w-4" />
-                      OCR Raw Text
-                    </CardTitle>
-                    <Badge variant="outline" className="text-xs">
-                      {sandboxState.result.ocrUsed ? 'PaddleOCR' : 'Fast Path (Text Layer)'}
-                    </Badge>
-                  </CardHeader>
-                  <CardContent className="pt-4">
-                    <div className="relative rounded-md bg-muted p-4 font-mono text-xs overflow-auto max-h-[200px] border border-border/10">
-                      <pre className="text-blue-600 dark:text-blue-400 select-text leading-relaxed whitespace-pre-wrap">
-                        {sandboxState.result.ocrText || '(ไม่มีข้อความ)'}
-                      </pre>
-                    </div>
-                  </CardContent>
-                </Card>
                <Card className="border border-emerald-500/20 bg-background/50 backdrop-blur-md">
                  <CardHeader className="border-b border-border/30 pb-3 flex flex-row items-center justify-between">
                    <CardTitle className="text-base text-emerald-600 dark:text-emerald-400 flex items-center gap-2">
                      <FileJson className="h-4 w-4" />
                      {t('ai.prompt.resultTitle')}
                    </CardTitle>
-                    {activePrompt && (
-                      <Badge variant="outline" className="text-xs text-emerald-500 border-emerald-500/20 bg-emerald-500/5">
-                        {t('ai.prompt.resultVersionBadge', { version: String(activePrompt.versionNumber) })}
-                      </Badge>
-                    )}
+                    <Badge variant="outline" className="text-xs text-emerald-500 border-emerald-500/20 bg-emerald-500/5">
+                      Version {sandboxState.result.promptVersionUsed || (activePrompt?.versionNumber ?? '?')}
+                    </Badge>
                  </CardHeader>
                  <CardContent className="pt-4 space-y-4">
                    <div className="relative rounded-md bg-muted p-4 font-mono text-xs overflow-auto max-h-[300px] border border-border/10">
@@ -144,6 +144,34 @@ export const adminAiService = {
    return extractData<{ requestPublicId: string; jobId: string; status: string }>(data);
  },

+  // --- Step 1: OCR Only (สำหรับตรวจคุณภาพ OCR ก่อนทดสอบ AI) ---
+
+  submitSandboxOcr: async (
+    file: File
+  ): Promise<{ requestPublicId: string; jobId: string; status: string }> => {
+    const formData = new FormData();
+    formData.append('file', file);
+    const { data } = await api.post('/ai/admin/sandbox/ocr', formData, {
+      headers: {
+        'Content-Type': 'multipart/form-data',
+      },
+    });
+    return extractData<{ requestPublicId: string; jobId: string; status: string }>(data);
+  },
+
+  // --- Step 2: AI Extraction (ใช้ OCR text ที่ cache จาก Step 1) ---
+
+  submitSandboxAiExtract: async (
+    requestPublicId: string,
+    promptVersion?: number
+  ): Promise<{ requestPublicId: string; jobId: string; status: string }> => {
+    const { data } = await api.post('/ai/admin/sandbox/ai-extract', {
+      requestPublicId,
+      promptVersion,
+    });
+    return extractData<{ requestPublicId: string; jobId: string; status: string }>(data);
+  },
+
  // --- AI Model Management (ADR-027) ---

  getAvailableModels: async (): Promise<AiModelsResponse> => {
@@ -0,0 +1,364 @@
+# Feature Specification: OCR Sandbox Two-Step Flow (OCR-First → AI-Second)
+
+**Feature Branch**: `main`
+**Created**: 2026-05-30
+**Status**: Draft
+**Input**: User requirement: แยก OCR Sandbox เป็น 2 step — Step 1 OCR เท่านั้นเพื่อตรวจคุณภาพ OCR ก่อน → Step 2 AI Extraction เพื่อทดสอบ prompt
+
+---
+
+## User Scenarios & Testing _(mandatory)_
+
+### User Story 1 - OCR Quality Check Before AI Testing (Priority: P1)
+
+ในฐานะ **ผู้ดูแลระบบ (Superadmin)**
+ข้าพเจ้าต้องการรัน OCR บน PDF เพื่อตรวจสอบคุณภาพข้อความที่สกัดได้ก่อน
+เพื่อยืนยันว่า OCR ทำงานถูกต้องและข้อความสมบูรณ์
+ก่อนที่จะใช้ข้อความนั้นทดสอบ AI prompt template
+
+**Why this priority**:
+การแยก step ช่วยให้ admin แยกปัญหาได้ชัดเจน — ถ้า OCR แย่/ไม่สมบูรณ์ ไม่ต้องเสียเวลาทดสอบ prompt ให้เสียทรัพยากร AI
+
+**Independent Test**:
+upload PDF → กด "Step 1: Run OCR" → เห็น OCR Raw Text → ตรวจคุณภาพ → ถ้าพอใจ → กด "Step 2: Run AI Extraction" → เห็น LLM Result
+
+**Acceptance Scenarios**:
+
+1. **Given** admin upload PDF ใน OCR Sandbox, **When** กด "Step 1: Run OCR", **Then** ระบบรัน OCR (PaddleOCR/Fast Path) และแสดง OCR Raw Text เท่านั้น ยังไม่เรียก LLM
+2. **Given** OCR Raw Text ปรากฏแล้ว, **When** admin ตรวจและพอใจกับคุณภาพ, **Then** admin สามารถกด "Step 2: Run AI Extraction" เพื่อส่ง OCR text ไป LLM ต่อ
+3. **Given** OCR Raw Text แย่/ไม่สมบูรณ์, **When** admin ไม่พอใจ, **Then** admin สามารถ upload PDF ใหม่และรัน OCR ใหม่โดยไม่เสียทรัพยากร AI
+4. **Given** admin อยู่ใน Step 2, **When** admin เปลี่ยนใจต้องการแก้ prompt version, **Then** admin สามารถเลือก prompt version อื่นจาก dropdown และรัน AI Extraction ใหม่ด้วย OCR text เดิม
+
+---
+
+### User Story 2 - Prompt Version Testing with Same OCR Text (Priority: P2)
+
+ในฐานะ **ผู้ดูแลระบบ (Superadmin)**
+ข้าพเจ้าต้องการทดสอบ prompt version หลาย version ด้วย OCR text เดียวกัน
+เพื่อเปรียบเทียบคุณภาพของ prompt versions ที่ต่างกัน
+
+**Why this priority**:
+ช่วยให้ admin evaluate prompt versions ได้รวดเร็วโดยไม่ต้องรัน OCR ซ้ำหลายครั้ง
+
+**Independent Test**:
+run OCR ครั้งเดียว → เลือก prompt v1 → run AI → เลือก prompt v2 → run AI → เปรียบเทียบผลลัพธ์
+
+**Acceptance Scenarios**:
+
+1. **Given** OCR Raw Text ถูกสกัดแล้ว, **When** admin เลือก prompt version v1 และกด "Run AI Extraction", **Then** ระบบใช้ prompt v1 กับ OCR text เดิม
+2. **Given** ผลลัพธ์จาก v1 ปรากฏ, **When** admin เลือก prompt version v2 และกด "Run AI Extraction" อีกครั้ง, **Then** ระบบใช้ prompt v2 กับ OCR text เดิม (ไม่รัน OCR ซ้ำ)
+3. **Given** admin อยากเปลี่ยน OCR text, **When** admin upload PDF ใหม่และกด "Step 1: Run OCR", **Then** OCR text ใหม่แทนที่เดิมและ step 2 ถูก reset
+
+---
+
+## Requirements _(mandatory)_
+
+### Functional Requirements
+
+- **FR-001**: ระบบ MUST มี job type ใหม่ `sandbox-ocr-only` ที่ทำ OCR เท่านั้น ไม่เรียก LLM
+- **FR-002**: ระบบ MUST มี job type ใหม่ `sandbox-ai-extract` ที่รับ OCR text + prompt version แล้ว run LLM
+- **FR-003**: ระบบ MUST เก็บ OCR text ใน Redis (TTL 3600s) หลังจาก Step 1 เสร็จ เพื่อใช้ใน Step 2
+- **FR-004**: Frontend MUST แสดง UI แบบ 2 step แยกกัน — Step 1: OCR, Step 2: AI Extraction
+- **FR-005**: Step 2 MUST มี dropdown เลือก prompt version (default = active version)
+- **FR-006**: ระบบ MUST อนุญาตให้รัน Step 2 ซ้ำด้วย prompt version ต่างกันโดยใช้ OCR text เดิม
+- **FR-007**: ระบบ MUST invalidate OCR text cache เมื่อ admin upload PDF ใหม่และรัน Step 1 ใหม่
+
+### Key Entities
+
+- **OCR Cache**: Redis key `ai:sandbox:ocr:{requestPublicId}` TTL 3600s — เก็บ OCR text และ metadata (ocrUsed, timestamp)
+- **Sandbox OCR Job**: BullMQ job type `sandbox-ocr-only` — รัน OCR เท่านั้น
+- **Sandbox AI Job**: BullMQ job type `sandbox-ai-extract` — รัน LLM ด้วย OCR text + prompt version
+
+---
+
+## Success Criteria _(mandatory)_
+
+### Measurable Outcomes
+
+- **SC-001**: Step 1 (OCR) ใช้เวลา < 10 วินาทีสำหรับ PDF ทั่วไป
+- **SC-002**: Step 2 (AI) ใช้เวลา < 120 วินาที (เหมือน sandbox-extract เดิม)
+- **SC-003**: Admin สามารถทดสอบ prompt version 3 version ด้วย OCR text เดิมภายใน 5 นาที
+- **SC-004**: OCR text cache ถูก invalidate อัตโนมัติเมื่อ upload PDF ใหม่
+
+---
+
+## API Design
+
+### POST /ai/admin/sandbox/ocr (Step 1)
+
+**Request:**
+- `file`: PDF (multipart/form-data)
+
+**Response:**
+```json
+{
+  "requestPublicId": "uuid",
+  "jobId": "uuid",
+  "status": "queued"
+}
+```
+
+**Behavior:**
+- Upload PDF → storage temp
+- Enqueue job `sandbox-ocr-only`
+- Return requestPublicId สำหรับ polling
+
+### POST /ai/admin/sandbox/ai-extract (Step 2)
+
+**Request:**
+```json
+{
+  "requestPublicId": "uuid",
+  "promptVersion": 2  // optional, default = active
+}
+```
+
+**Response:**
+```json
+{
+  "requestPublicId": "uuid",
+  "jobId": "uuid",
+  "status": "queued"
+}
+```
+
+**Behavior:**
+- ดึง OCR text จาก Redis cache (`ai:sandbox:ocr:{requestPublicId}`)
+- ถ้าไม่มี → throw 404 "OCR text not found or expired, please run Step 1 first"
+- ดึง prompt version (default = active)
+- Enqueue job `sandbox-ai-extract`
+- Return requestPublicId สำหรับ polling
+
+---
+
+## Backend Implementation
+
+### New Job Types
+
+```typescript
+export type AiBatchJobType =
+  | 'ocr'
+  | 'extract-metadata'
+  | 'embed-document'
+  | 'sandbox-rag'
+  | 'sandbox-extract'      // legacy (OCR + AI in one job)
+  | 'sandbox-ocr-only'     // NEW: Step 1 - OCR only
+  | 'sandbox-ai-extract'   // NEW: Step 2 - AI extraction with cached OCR
+  | 'migrate-document';
+```
+
+### processSandboxOcrOnly()
+
+```typescript
+private async processSandboxOcrOnly(data: AiBatchJobData): Promise<void> {
+  const { idempotencyKey, payload } = data;
+  const pdfPath = payload.pdfPath as string;
+
+  const ocrResult = await this.ocrService.detectAndExtract({ pdfPath });
+
+  // Cache OCR text for Step 2
+  await this.redis.setex(
+    `ai:sandbox:ocr:${idempotencyKey}`,
+    3600,
+    JSON.stringify({
+      ocrText: ocrResult.text,
+      ocrUsed: ocrResult.ocrUsed,
+      timestamp: new Date().toISOString(),
+    })
+  );
+
+  await this.redis.setex(
+    `ai:rag:result:${idempotencyKey}`,
+    3600,
+    JSON.stringify({
+      requestPublicId: idempotencyKey,
+      status: 'completed',
+      ocrText: ocrResult.text,
+      ocrUsed: ocrResult.ocrUsed,
+      completedAt: new Date().toISOString(),
+    })
+  );
+}
+```
+
+### processSandboxAiExtract()
+
+```typescript
+private async processSandboxAiExtract(data: AiBatchJobData): Promise<void> {
+  const { idempotencyKey, payload, projectPublicId } = data;
+  const promptVersion = (payload.promptVersion as number) || undefined;
+
+  // ดึง OCR text จาก cache
+  const cachedOcr = await this.redis.get(`ai:sandbox:ocr:${idempotencyKey}`);
+  if (!cachedOcr) {
+    throw new Error('OCR text not found or expired, please run Step 1 first');
+  }
+  const { ocrText } = JSON.parse(cachedOcr);
+
+  // ดึง prompt version
+  const activePrompt = await this.aiPromptsService.getActive('ocr_extraction');
+  if (!activePrompt) {
+    throw new Error('No active ocr_extraction prompt version found');
+  }
+
+  // ถ้าระบุ promptVersion ให้ใช้ version นั้น (แต่ต้อง validate ว่ามีอยู่)
+  const targetPrompt = promptVersion
+    ? await this.aiPromptsService.findByVersion('ocr_extraction', promptVersion)
+    : activePrompt;
+
+  if (!targetPrompt) {
+    throw new Error(`Prompt version ${promptVersion} not found`);
+  }
+
+  // Resolve context และ run LLM (เหมือน processSandboxExtract เดิม)
+  const masterDataContext = await this.aiPromptsService.resolveContext(
+    targetPrompt,
+    projectPublicId
+  );
+
+  const resolvedPrompt = targetPrompt.template
+    .replace('{{ocr_text}}', ocrText)
+    .replace('{{master_data_context}}', JSON.stringify(masterDataContext, null, 2));
+
+  const response = await this.ollamaService.generate(resolvedPrompt, {
+    timeoutMs: 120000,
+  });
+
+  const cleanedResponse = response
+    .replace(/```json/g, '')
+    .replace(/```/g, '')
+    .trim();
+
+  let extractedMetadata: Record<string, unknown>;
+  try {
+    extractedMetadata = JSON.parse(cleanedResponse) as Record<string, unknown>;
+  } catch {
+    throw new Error(`Failed to parse LLM response as JSON: ${cleanedResponse}`);
+  }
+
+  await this.redis.setex(
+    `ai:rag:result:${idempotencyKey}`,
+    3600,
+    JSON.stringify({
+      requestPublicId: idempotencyKey,
+      status: 'completed',
+      answer: JSON.stringify(extractedMetadata, null, 2),
+      ocrText,
+      ocrUsed: JSON.parse(cachedOcr).ocrUsed,
+      promptVersionUsed: targetPrompt.versionNumber,
+      completedAt: new Date().toISOString(),
+    })
+  );
+}
+```
+
+---
+
+## Frontend Implementation
+
+### UI Layout
+
+```
+┌─────────────────────────────────────────────────────┐
+│ OCR Sandbox Playground                              │
+├──────────────────────┬──────────────────────────────┤
+│  Prompt Editor       │  Version History             │
+│  ┌────────────────┐  │  ┌────────────────────────┐  │
+│  │ textarea       │  │  │ v3 (active) ✅          │  │
+│  │ {{ocr_text}}   │  │  │ v2 - 2026-05-24        │  │
+│  │ ...            │  │  │ v1 - 2026-05-22        │  │
+│  └────────────────┘  │  └────────────────────────┘  │
+│  [บันทึก Version ใหม่]│  [Load] [Activate] [Delete] │
+├──────────────────────┴──────────────────────────────┤
+│  Step 1: OCR Quality Check                         │
+│  ┌──────────────────────────────────────────────┐  │
+│  │ File Upload: [เลือก PDF]                     │  │
+│  │ [Step 1: Run OCR]                              │  │
+│  └──────────────────────────────────────────────┘  │
+│  [OCR Raw Text Display]                            │
+├─────────────────────────────────────────────────────┤
+│  Step 2: AI Extraction (disabled until Step 1)     │
+│  ┌──────────────────────────────────────────────┐  │
+│  │ Prompt Version: [v3 (active) ▼]              │  │
+│  │ [Step 2: Run AI Extraction]                   │  │
+│  └──────────────────────────────────────────────┘  │
+│  [LLM Result Display]                              │
+└─────────────────────────────────────────────────────┘
+```
+
+### State Management
+
+```typescript
+const [ocrRequestPublicId, setOcrRequestPublicId] = useState<string | null>(null);
+const [ocrText, setOcrText] = useState<string>('');
+const [ocrUsed, setOcrUsed] = useState<boolean>(false);
+const [aiRequestPublicId, setAiRequestPublicId] = useState<string | null>(null);
+const [selectedPromptVersion, setSelectedPromptVersion] = useState<number | undefined>(undefined);
+const [step, setStep] = useState<'upload' | 'ocr-done' | 'ai-done'>('upload');
+```
+
+### Step 1: Run OCR
+
+```typescript
+const handleRunOcr = async () => {
+  const response = await adminAiService.submitSandboxOcr(file);
+  setOcrRequestPublicId(response.requestPublicId);
+  // Poll for result...
+};
+```
+
+### Step 2: Run AI Extraction
+
+```typescript
+const handleRunAi = async () => {
+  const response = await adminAiService.submitSandboxAiExtract({
+    requestPublicId: ocrRequestPublicId,
+    promptVersion: selectedPromptVersion,
+  });
+  setAiRequestPublicId(response.requestPublicId);
+  // Poll for result...
+};
+```
+
+---
+
+## ADR Impact
+
+- **ADR-029**: เพิ่ม job types ใหม่ แต่ไม่เปลี่ยน architecture หลักของ `ai_prompts` table
+- **ADR-030**: ไม่กระทบ context resolution logic ยังใช้ `resolveContext()` เหมือนเดิม
+- **ADR-023A**: ไม่กระทบ AI boundary ยังใช้ Ollama ผ่าน BullMQ เหมือนเดิม
+
+---
+
+## Migration Plan
+
+### Phase 1: Backend
+1. เพิ่ม job types ใหม่ใน `AiBatchJobType`
+2. Implement `processSandboxOcrOnly()` ใน `AiBatchProcessor`
+3. Implement `processSandboxAiExtract()` ใน `AiBatchProcessor`
+4. เพิ่ม endpoint `POST /ai/admin/sandbox/ocr` ใน `AiController`
+5. เพิ่ม endpoint `POST /ai/admin/sandbox/ai-extract` ใน `AiController`
+6. เพิ่ม method `findByVersion()` ใน `AiPromptsService` (ถ้ายังไม่มี)
+
+### Phase 2: Frontend
+1. เพิ่ม methods ใหม่ใน `adminAiService`:
+   - `submitSandboxOcr(file)`
+   - `submitSandboxAiExtract({ requestPublicId, promptVersion })`
+2. Refactor `OcrSandboxPromptManager.tsx`:
+   - เพิ่ม state สำหรับ step management
+   - เพิ่ม UI Step 1 + Step 2 แยกกัน
+   - เพิ่ม dropdown prompt version ใน Step 2
+3. Update polling logic ให้รองรับ 2 requestPublicId แยกกัน
+
+### Phase 3: Testing
+1. Unit tests สำหรับ `processSandboxOcrOnly()` และ `processSandboxAiExtract()`
+2. Integration tests สำหรับ OCR cache invalidation
+3. E2E tests สำหรับ 2-step flow
+
+---
+
+## Rollback Plan
+
+ถ้า feature นี้มีปัญหา:
+- สามารถ rollback โดยใช้ legacy endpoint `POST /ai/admin/sandbox/extract` (sandbox-extract) ที่ยังคงอยู่
+- หรือ comment out new endpoints และ UI changes