refactor(ai): OCR sidecar canonical naming cleanup — typhoon→np-dms, remove hardcoded keys, asyncio.to_thread, ADR-040/041
This commit is contained in:
@@ -51,7 +51,7 @@ QDRANT_URL=http://localhost:6333
|
||||
|
||||
# Ollama (Admin Desktop Desk-5439 — ADR-034 Thai-Optimized Model Stack)
|
||||
OLLAMA_MODEL_MAIN=typhoon2.5-np-dms:latest
|
||||
OLLAMA_MODEL_OCR=typhoon-np-dms-ocr:latest
|
||||
OLLAMA_MODEL_OCR=np-dms-ocr:latest
|
||||
OLLAMA_MODEL_EMBED=nomic-embed-text
|
||||
OLLAMA_EMBED_MODEL=nomic-embed-text
|
||||
OLLAMA_RAG_MODEL=typhoon2.5-np-dms:latest
|
||||
@@ -67,12 +67,10 @@ AI_REALTIME_CONCURRENCY=2
|
||||
QDRANT_HOST=http://192.168.10.8:6333
|
||||
QDRANT_COLLECTION=lcbp3_documents
|
||||
|
||||
# OCR sidecar (PaddleOCR on Desk-5439)
|
||||
# OCR sidecar (np-dms-ocr on Desk-5439)
|
||||
OCR_CHAR_THRESHOLD=100
|
||||
OCR_API_URL=http://192.168.10.8:8765
|
||||
|
||||
# Thai preprocessing microservice (PyThaiNLP — Admin Desktop)
|
||||
THAI_PREPROCESS_URL=http://192.168.10.8:8765
|
||||
OCR_API_URL=http://192.168.10.100:8765
|
||||
OCR_SIDECAR_API_KEY=change-me-sidecar-api-key
|
||||
|
||||
# ADR-023 forbids cloud AI fallback for project documents.
|
||||
|
||||
|
||||
@@ -134,7 +134,7 @@ export class AiQueueService {
|
||||
filePublicId?: string;
|
||||
pdfPath?: string;
|
||||
engineType?: string;
|
||||
typhoonOptions?: {
|
||||
ocrOptions?: {
|
||||
temperature?: number;
|
||||
topP?: number;
|
||||
repeatPenalty?: number;
|
||||
@@ -154,7 +154,7 @@ export class AiQueueService {
|
||||
filePublicId: payload.filePublicId,
|
||||
pdfPath: payload.pdfPath,
|
||||
engineType: payload.engineType,
|
||||
typhoonOptions: payload.typhoonOptions,
|
||||
ocrOptions: payload.ocrOptions,
|
||||
contractPublicId: payload.contractPublicId,
|
||||
...payload.extraPayload,
|
||||
},
|
||||
|
||||
@@ -567,7 +567,7 @@ export class AiController {
|
||||
},
|
||||
engineType: {
|
||||
type: 'string',
|
||||
enum: ['auto', 'tesseract', 'np-dms-ocr', 'typhoon-np-dms-ocr'],
|
||||
enum: ['auto', 'np-dms-ocr'],
|
||||
description: 'OCR engine ที่ต้องการใช้ (default: auto)',
|
||||
},
|
||||
temperature: {
|
||||
@@ -607,19 +607,14 @@ export class AiController {
|
||||
const attachment = await this.fileStorageService.upload(file, user.user_id);
|
||||
const requestPublicId = uuidv7();
|
||||
// ตรวจสอบและ normalize engineType ให้เป็นค่าที่ valid
|
||||
const validEngineTypes = [
|
||||
'auto',
|
||||
'tesseract',
|
||||
'np-dms-ocr',
|
||||
'typhoon-np-dms-ocr',
|
||||
] as const;
|
||||
const validEngineTypes = ['auto', 'np-dms-ocr'] as const;
|
||||
const resolvedEngineType: SandboxOcrEngineType = validEngineTypes.includes(
|
||||
engineType as SandboxOcrEngineType
|
||||
)
|
||||
? (engineType as SandboxOcrEngineType)
|
||||
: 'auto';
|
||||
// แปลง string จาก multipart form เป็น number (optional override)
|
||||
const typhoonOptions = {
|
||||
const ocrOptions = {
|
||||
...(temperature !== undefined && {
|
||||
temperature: parseFloat(temperature),
|
||||
}),
|
||||
@@ -634,7 +629,7 @@ export class AiController {
|
||||
idempotencyKey: requestPublicId,
|
||||
pdfPath: attachment.filePath,
|
||||
engineType: resolvedEngineType,
|
||||
...(Object.keys(typhoonOptions).length > 0 && { typhoonOptions }),
|
||||
...(Object.keys(ocrOptions).length > 0 && { ocrOptions }),
|
||||
}
|
||||
);
|
||||
return { requestPublicId, jobId, status: 'queued' };
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
// - 2026-05-22: นำเข้าและลงทะเบียน CleanupTempFilesWorker (T016) เพื่อลบไฟล์แนบชั่วคราวหมดอายุ
|
||||
// - 2026-05-23: ลงทะเบียน MigrationProgress + AiMigrationCheckpointService (ADR-023A)
|
||||
// - 2026-05-25: ลงทะเบียน AiAvailableModel สำหรับ AI Model Management (ADR-027).
|
||||
// - 2026-05-30: ลงทะเบียน VramMonitorService, OcrCacheService, TyphoonOcrProcessor, TyphoonLlmProcessor (ADR-032).
|
||||
// - 2026-05-30: ลงทะเบียน VramMonitorService, OcrCacheService, NpDmsOcrProcessor, NpDmsAiProcessor (ADR-032).
|
||||
// - 2026-06-13: ลงทะเบียน AiSandboxProfile สำหรับ ADR-036 sandbox-production parity
|
||||
// Module สำหรับ AI Gateway — ลงทะเบียน Services และ Controllers (ADR-023)
|
||||
|
||||
@@ -75,13 +75,13 @@ import {
|
||||
QUEUE_AI_VECTOR_DELETION,
|
||||
} from '../common/constants/queue.constants';
|
||||
import {
|
||||
TyphoonOcrProcessor,
|
||||
QUEUE_TYPHOON_OCR,
|
||||
} from './processors/typhoon-ocr.processor';
|
||||
NpDmsOcrProcessor,
|
||||
QUEUE_NP_DMS_OCR,
|
||||
} from './processors/np-dms-ocr-processor';
|
||||
import {
|
||||
TyphoonLlmProcessor,
|
||||
QUEUE_TYPHOON_LLM,
|
||||
} from './processors/typhoon-llm.processor';
|
||||
NpDmsAiProcessor,
|
||||
QUEUE_NP_DMS_AI,
|
||||
} from './processors/np-dms-ai.processor';
|
||||
|
||||
@Module({
|
||||
imports: [
|
||||
@@ -129,7 +129,7 @@ import {
|
||||
{ name: QUEUE_AI_VECTOR_DELETION },
|
||||
// Typhoon OCR + LLM queues: concurrency=1 เพื่อป้องกัน VRAM overflow (ADR-032)
|
||||
{
|
||||
name: QUEUE_TYPHOON_OCR,
|
||||
name: QUEUE_NP_DMS_OCR,
|
||||
defaultJobOptions: {
|
||||
attempts: 2,
|
||||
backoff: { type: 'exponential', delay: 5000 },
|
||||
@@ -138,7 +138,7 @@ import {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: QUEUE_TYPHOON_LLM,
|
||||
name: QUEUE_NP_DMS_AI,
|
||||
defaultJobOptions: {
|
||||
attempts: 2,
|
||||
backoff: { type: 'exponential', delay: 5000 },
|
||||
@@ -198,9 +198,9 @@ import {
|
||||
AiRagProcessor,
|
||||
// Phase 5: Vector Deletion async processor (ADR-023 FR-008)
|
||||
AiVectorDeletionProcessor,
|
||||
// ADR-032: Typhoon OCR + LLM sequential processors (concurrency=1)
|
||||
TyphoonOcrProcessor,
|
||||
TyphoonLlmProcessor,
|
||||
// ADR-032: np-dms-ocr + np-dms-ai sequential processors (concurrency=1)
|
||||
NpDmsOcrProcessor,
|
||||
NpDmsAiProcessor,
|
||||
// US4: Execution Profiles Service (T044)
|
||||
AiExecutionProfilesService,
|
||||
// RbacGuard ต้องการ UserService จาก UserModule
|
||||
|
||||
@@ -80,7 +80,7 @@ describe('AiService', () => {
|
||||
|
||||
const mockOllamaService = {
|
||||
getMainModelName: jest.fn().mockReturnValue('typhoon2.5-np-dms:latest'),
|
||||
getOcrModelName: jest.fn().mockReturnValue('typhoon-np-dms-ocr:latest'),
|
||||
getOcrModelName: jest.fn().mockReturnValue('np-dms-ocr:latest'),
|
||||
checkHealth: jest.fn().mockResolvedValue({
|
||||
status: 'HEALTHY',
|
||||
latencyMs: 120,
|
||||
|
||||
@@ -41,7 +41,7 @@ export class AiAuditLog extends UuidBaseEntity {
|
||||
@Column({ name: 'model_name', type: 'varchar', length: 100, nullable: true })
|
||||
modelName?: string;
|
||||
|
||||
// ประเภท OCR/LLM model ที่ใช้ เช่น tesseract, typhoon-ocr-3b, typhoon2.1-gemma3-4b (ADR-032)
|
||||
// ประเภท OCR/LLM model ที่ใช้ เช่น fast-path, np-dms-ocr, np-dms-ai (ADR-032)
|
||||
@Index('idx_ai_audit_model_type')
|
||||
@Column({ name: 'model_type', type: 'varchar', length: 50, nullable: true })
|
||||
modelType?: string;
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
// File: src/modules/ai/entities/ocr-engine-configuration.entity.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: สร้าง OcrEngineConfiguration class สำหรับเก็บข้อมูลการตั้งค่า OCR Engine (T010, US1)
|
||||
// - 2026-06-20: เปลี่ยน TESSERACT → FAST_PATH, TYPHOON_OCR → NP_DMS_OCR ตามการทำความสะอาด legacy references
|
||||
|
||||
import { ApiProperty } from '@nestjs/swagger';
|
||||
|
||||
export enum OcrEngineType {
|
||||
TESSERACT = 'tesseract',
|
||||
TYPHOON_OCR = 'typhoon_ocr',
|
||||
FAST_PATH = 'fast_path',
|
||||
NP_DMS_OCR = 'np_dms_ocr',
|
||||
}
|
||||
|
||||
/** คลาสสำหรับเก็บข้อมูลการตั้งค่า OCR Engine (ไม่ผูกกับตาราง SQL ตาม data-model.md) */
|
||||
|
||||
@@ -738,7 +738,7 @@ describe('AiBatchProcessor', () => {
|
||||
expect(ocrService.detectAndExtract).toHaveBeenCalledWith({
|
||||
pdfPath: '/files/test.pdf',
|
||||
activeProfile: 'quality',
|
||||
typhoonOptions: {
|
||||
ocrOptions: {
|
||||
temperature: 0.15,
|
||||
topP: 0.65,
|
||||
repeatPenalty: 1.15,
|
||||
|
||||
@@ -34,7 +34,7 @@ import { OcrService } from '../services/ocr.service';
|
||||
import {
|
||||
SandboxOcrEngineService,
|
||||
SandboxOcrEngineType,
|
||||
OcrTyphoonOptions,
|
||||
OcrNpDmsOptions,
|
||||
} from '../services/sandbox-ocr-engine.service';
|
||||
import {
|
||||
OllamaService,
|
||||
@@ -562,7 +562,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
})
|
||||
);
|
||||
try {
|
||||
let ocrParams: OcrTyphoonOptions | undefined = undefined;
|
||||
let ocrParams: OcrNpDmsOptions | undefined = undefined;
|
||||
if (engineType === 'np-dms-ocr') {
|
||||
try {
|
||||
const ocrDraft =
|
||||
@@ -705,7 +705,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
const { idempotencyKey, payload } = data;
|
||||
const pdfPath = payload.pdfPath as string;
|
||||
const engineType = (payload.engineType as SandboxOcrEngineType) || 'auto';
|
||||
const typhoonOptions = payload.typhoonOptions as
|
||||
const ocrOptions = payload.ocrOptions as
|
||||
| { temperature?: number; topP?: number; repeatPenalty?: number }
|
||||
| undefined;
|
||||
|
||||
@@ -722,7 +722,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
})
|
||||
);
|
||||
|
||||
let ocrParams = typhoonOptions;
|
||||
let ocrParams = ocrOptions;
|
||||
if (!ocrParams && engineType === 'np-dms-ocr') {
|
||||
try {
|
||||
const ocrDraft =
|
||||
@@ -1078,7 +1078,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
ocrResult = await this.ocrService.detectAndExtract({
|
||||
pdfPath: attachment.filePath,
|
||||
activeProfile: job.data.effectiveProfile,
|
||||
typhoonOptions: job.data.ocrSnapshotParams,
|
||||
ocrOptions: job.data.ocrSnapshotParams,
|
||||
});
|
||||
} catch (err: unknown) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
|
||||
+29
-28
@@ -1,8 +1,9 @@
|
||||
// File: src/modules/ai/processors/typhoon-llm.processor.ts
|
||||
// File: backend/src/modules/ai/processors/np-dms-ai.processor.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: Initial processor สำหรับ Typhoon LLM sequential jobs (T009d, ADR-032)
|
||||
// - 2026-05-30: Initial processor สำหรับ np-dms-ai sequential jobs (T009d, ADR-032)
|
||||
// รันด้วย concurrency=1 เพื่อป้องกัน VRAM overflow บน RTX 2060 Super (8GB)
|
||||
// ใช้ keep_alive=0 ผ่าน Ollama API เพื่อ unload model หลังประมวลผล
|
||||
// - 2026-06-20: เปลี่ยนชื่อจาก typhoon-llm.processor.ts เป็น np-dms-ai.processor.ts
|
||||
|
||||
import { Processor, WorkerHost } from '@nestjs/bullmq';
|
||||
import { Logger } from '@nestjs/common';
|
||||
@@ -16,14 +17,14 @@ import axios from 'axios';
|
||||
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
|
||||
import { VramMonitorService } from '../services/vram-monitor.service';
|
||||
|
||||
/** ชื่อ queue สำหรับ Typhoon LLM jobs */
|
||||
export const QUEUE_TYPHOON_LLM = 'typhoon-llm';
|
||||
/** ชื่อ queue สำหรับ np-dms-ai LLM jobs */
|
||||
export const QUEUE_NP_DMS_AI = 'np-dms-ai';
|
||||
|
||||
/** รูปแบบข้อมูล job ใน Typhoon LLM queue */
|
||||
export interface TyphoonLlmJobData {
|
||||
/** prompt ที่จะส่งให้ Typhoon LLM */
|
||||
/** รูปแบบข้อมูล job ใน np-dms-ai LLM queue */
|
||||
export interface NpDmsAiJobData {
|
||||
/** prompt ที่จะส่งให้ np-dms-ai LLM */
|
||||
prompt: string;
|
||||
/** ชื่อ model เช่น scb10x/typhoon2.1-gemma3-4b */
|
||||
/** ชื่อ model เช่น typhoon2.5-np-dms:latest */
|
||||
model?: string;
|
||||
/** idempotencyKey สำหรับ Redis result key */
|
||||
idempotencyKey: string;
|
||||
@@ -39,19 +40,19 @@ interface OllamaGenerateResponse {
|
||||
done: boolean;
|
||||
}
|
||||
|
||||
// VRAM ที่ Typhoon 2.1 Gemma3 4B ต้องการ (MB) — ตาม ADR-032
|
||||
const TYPHOON_LLM_REQUIRED_VRAM_MB = 4500;
|
||||
// VRAM ที่ np-dms-ai ต้องการ (MB) — ตาม ADR-032
|
||||
const NP_DMS_AI_REQUIRED_VRAM_MB = 4500;
|
||||
// Timeout 120 วินาทีสำหรับ LLM generation
|
||||
const TYPHOON_LLM_TIMEOUT_MS = 120000;
|
||||
const NP_DMS_AI_TIMEOUT_MS = 120000;
|
||||
|
||||
/**
|
||||
* Processor สำหรับ Typhoon LLM jobs ที่รันแบบ sequential (concurrency=1)
|
||||
* Processor สำหรับ np-dms-ai LLM jobs ที่รันแบบ sequential (concurrency=1)
|
||||
* เพื่อป้องกัน VRAM overflow เมื่อรัน LLM หลายงานพร้อมกันบน RTX 2060 Super
|
||||
* ตาม ADR-032: lockDuration=180000ms รองรับ 120s timeout + buffer
|
||||
*/
|
||||
@Processor(QUEUE_TYPHOON_LLM, { concurrency: 1, lockDuration: 180000 })
|
||||
export class TyphoonLlmProcessor extends WorkerHost {
|
||||
private readonly logger = new Logger(TyphoonLlmProcessor.name);
|
||||
@Processor(QUEUE_NP_DMS_AI, { concurrency: 1, lockDuration: 180000 })
|
||||
export class NpDmsAiProcessor extends WorkerHost {
|
||||
private readonly logger = new Logger(NpDmsAiProcessor.name);
|
||||
private readonly ollamaUrl: string;
|
||||
private readonly defaultModel: string;
|
||||
|
||||
@@ -68,25 +69,25 @@ export class TyphoonLlmProcessor extends WorkerHost {
|
||||
this.configService.get<string>('AI_HOST_URL', 'http://localhost:11434')
|
||||
);
|
||||
this.defaultModel = this.configService.get<string>(
|
||||
'OLLAMA_MODEL_TYPHOON',
|
||||
'scb10x/typhoon2.1-gemma3-4b'
|
||||
'OLLAMA_MODEL_MAIN',
|
||||
'typhoon2.5-np-dms:latest'
|
||||
);
|
||||
}
|
||||
|
||||
/** ประมวลผล Typhoon LLM job ทีละงาน */
|
||||
async process(job: Job<TyphoonLlmJobData>): Promise<void> {
|
||||
/** ประมวลผล np-dms-ai LLM job ทีละงาน */
|
||||
async process(job: Job<NpDmsAiJobData>): Promise<void> {
|
||||
const { prompt, model, idempotencyKey, documentPublicId } = job.data;
|
||||
const startTime = Date.now();
|
||||
const targetModel = model ?? this.defaultModel;
|
||||
this.logger.log(
|
||||
`Typhoon LLM job started — idempotencyKey=${idempotencyKey}, model=${targetModel}`
|
||||
`np-dms-ai LLM job started — idempotencyKey=${idempotencyKey}, model=${targetModel}`
|
||||
);
|
||||
// ตรวจสอบ VRAM ก่อนโหลด model
|
||||
const hasCapacity = await this.vramMonitorService.hasVramCapacity(
|
||||
TYPHOON_LLM_REQUIRED_VRAM_MB
|
||||
NP_DMS_AI_REQUIRED_VRAM_MB
|
||||
);
|
||||
if (!hasCapacity) {
|
||||
const errMsg = `VRAM ไม่เพียงพอสำหรับ ${targetModel} (ต้องการ ${TYPHOON_LLM_REQUIRED_VRAM_MB}MB) — retry ภายหลัง`;
|
||||
const errMsg = `VRAM ไม่เพียงพอสำหรับ ${targetModel} (ต้องการ ${NP_DMS_AI_REQUIRED_VRAM_MB}MB) — retry ภายหลัง`;
|
||||
this.logger.warn(errMsg);
|
||||
await this.saveResult(idempotencyKey, {
|
||||
status: 'failed',
|
||||
@@ -117,7 +118,7 @@ export class TyphoonLlmProcessor extends WorkerHost {
|
||||
},
|
||||
keep_alive: 0,
|
||||
},
|
||||
{ timeout: TYPHOON_LLM_TIMEOUT_MS }
|
||||
{ timeout: NP_DMS_AI_TIMEOUT_MS }
|
||||
);
|
||||
const processingTimeMs = Date.now() - startTime;
|
||||
const generatedText = response.data.response ?? '';
|
||||
@@ -136,11 +137,11 @@ export class TyphoonLlmProcessor extends WorkerHost {
|
||||
processingTimeMs,
|
||||
});
|
||||
this.logger.log(
|
||||
`Typhoon LLM completed — ${generatedText.length} chars, ${processingTimeMs}ms`
|
||||
`np-dms-ai LLM completed — ${generatedText.length} chars, ${processingTimeMs}ms`
|
||||
);
|
||||
} catch (err: unknown) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.error(`Typhoon LLM job failed: ${errMsg}`);
|
||||
this.logger.error(`np-dms-ai LLM job failed: ${errMsg}`);
|
||||
await this.saveResult(idempotencyKey, {
|
||||
status: 'failed',
|
||||
errorMessage: errMsg,
|
||||
@@ -169,7 +170,7 @@ export class TyphoonLlmProcessor extends WorkerHost {
|
||||
}
|
||||
): Promise<void> {
|
||||
await this.redis.setex(
|
||||
`ai:typhoon:llm:${idempotencyKey}`,
|
||||
`ai:np-dms-ai:llm:${idempotencyKey}`,
|
||||
3600,
|
||||
JSON.stringify({
|
||||
idempotencyKey,
|
||||
@@ -179,7 +180,7 @@ export class TyphoonLlmProcessor extends WorkerHost {
|
||||
);
|
||||
}
|
||||
|
||||
/** บันทึก audit log สำหรับ Typhoon LLM interaction */
|
||||
/** บันทึก audit log สำหรับ np-dms-ai LLM interaction */
|
||||
private async writeAuditLog(params: {
|
||||
documentPublicId?: string;
|
||||
model: string;
|
||||
@@ -189,7 +190,7 @@ export class TyphoonLlmProcessor extends WorkerHost {
|
||||
}): Promise<void> {
|
||||
const log = this.auditLogRepo.create({
|
||||
documentPublicId: params.documentPublicId,
|
||||
aiModel: 'typhoon-llm',
|
||||
aiModel: 'np-dms-ai',
|
||||
modelName: params.model,
|
||||
modelType: 'llm',
|
||||
status: params.status,
|
||||
+19
-18
@@ -1,8 +1,9 @@
|
||||
// File: src/modules/ai/processors/typhoon-ocr.processor.ts
|
||||
// File: src/modules/ai/processors/np-dms-ocr-processor.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: Initial processor สำหรับ Typhoon OCR sequential jobs (T009c, ADR-032)
|
||||
// รันด้วย concurrency=1 เพื่อป้องกัน VRAM overflow บน RTX 2060 Super (8GB)
|
||||
// ใช้ keep_alive=0 ผ่าน sidecar Ollama API เพื่อ unload model หลังประมวลผล
|
||||
// - 2026-06-20: เปลี่ยนชื่อไฟล์จาก typhoon-ocr.processor.ts → np-dms-ocr-processor.ts
|
||||
|
||||
import { Processor, WorkerHost } from '@nestjs/bullmq';
|
||||
import { Logger } from '@nestjs/common';
|
||||
@@ -17,24 +18,24 @@ import { VramMonitorService } from '../services/vram-monitor.service';
|
||||
import {
|
||||
SandboxOcrEngineService,
|
||||
SandboxOcrEngineType,
|
||||
OcrTyphoonOptions,
|
||||
OcrNpDmsOptions,
|
||||
} from '../services/sandbox-ocr-engine.service';
|
||||
|
||||
/** ชื่อ queue สำหรับ Typhoon OCR jobs */
|
||||
export const QUEUE_TYPHOON_OCR = 'typhoon-ocr';
|
||||
/** ชื่อ queue สำหรับ np-dms-ocr jobs */
|
||||
export const QUEUE_NP_DMS_OCR = 'np-dms-ocr';
|
||||
|
||||
/** รูปแบบข้อมูล job ใน Typhoon OCR queue */
|
||||
export interface TyphoonOcrJobData {
|
||||
/** รูปแบบข้อมูล job ใน np-dms-ocr queue */
|
||||
export interface NpDmsOcrJobData {
|
||||
/** public path ของไฟล์ PDF ที่ต้องการ OCR */
|
||||
pdfPath: string;
|
||||
/** engineType: 'typhoon-np-dms-ocr' สำหรับ queue นี้ */
|
||||
/** engineType: 'np-dms-ocr' สำหรับ queue นี้ */
|
||||
engineType: SandboxOcrEngineType;
|
||||
/** idempotencyKey สำหรับ Redis result key */
|
||||
idempotencyKey: string;
|
||||
/** documentPublicId สำหรับ audit log (optional) */
|
||||
documentPublicId?: string;
|
||||
/** Typhoon OCR options จาก sandbox UI เพื่อ override Modelfile defaults (optional) */
|
||||
typhoonOptions?: OcrTyphoonOptions;
|
||||
/** np-dms-ocr options จาก sandbox UI เพื่อ override Modelfile defaults (optional) */
|
||||
ocrOptions?: OcrNpDmsOptions;
|
||||
}
|
||||
|
||||
// VRAM ที่ Typhoon OCR-3B ต้องการ (MB) — ตาม ADR-032
|
||||
@@ -45,9 +46,9 @@ const TYPHOON_OCR_REQUIRED_VRAM_MB = 4000;
|
||||
* เพื่อป้องกัน VRAM overflow เมื่อทำ OCR หลายงานพร้อมกันบน RTX 2060 Super
|
||||
* ตาม ADR-032: lockDuration=180000ms รองรับ 120s timeout + buffer
|
||||
*/
|
||||
@Processor(QUEUE_TYPHOON_OCR, { concurrency: 1, lockDuration: 180000 })
|
||||
export class TyphoonOcrProcessor extends WorkerHost {
|
||||
private readonly logger = new Logger(TyphoonOcrProcessor.name);
|
||||
@Processor(QUEUE_NP_DMS_OCR, { concurrency: 1, lockDuration: 180000 })
|
||||
export class NpDmsOcrProcessor extends WorkerHost {
|
||||
private readonly logger = new Logger(NpDmsOcrProcessor.name);
|
||||
|
||||
constructor(
|
||||
@InjectRedis() private readonly redis: Redis,
|
||||
@@ -61,13 +62,13 @@ export class TyphoonOcrProcessor extends WorkerHost {
|
||||
}
|
||||
|
||||
/** ประมวลผล Typhoon OCR job ทีละงาน */
|
||||
async process(job: Job<TyphoonOcrJobData>): Promise<void> {
|
||||
async process(job: Job<NpDmsOcrJobData>): Promise<void> {
|
||||
const {
|
||||
pdfPath,
|
||||
engineType,
|
||||
idempotencyKey,
|
||||
documentPublicId,
|
||||
typhoonOptions,
|
||||
ocrOptions,
|
||||
} = job.data;
|
||||
const startTime = Date.now();
|
||||
this.logger.log(
|
||||
@@ -116,7 +117,7 @@ export class TyphoonOcrProcessor extends WorkerHost {
|
||||
const result = await this.sandboxOcrEngineService.detectAndExtract(
|
||||
pdfPath,
|
||||
engineType,
|
||||
typhoonOptions
|
||||
ocrOptions
|
||||
);
|
||||
const processingTimeMs = Date.now() - startTime;
|
||||
// บันทึกผลลัพธ์ใน Redis cache (24h TTL)
|
||||
@@ -171,7 +172,7 @@ export class TyphoonOcrProcessor extends WorkerHost {
|
||||
}
|
||||
): Promise<void> {
|
||||
await this.redis.setex(
|
||||
`ai:typhoon:ocr:${idempotencyKey}`,
|
||||
`ai:np-dms-ocr:${idempotencyKey}`,
|
||||
3600,
|
||||
JSON.stringify({
|
||||
idempotencyKey,
|
||||
@@ -193,8 +194,8 @@ export class TyphoonOcrProcessor extends WorkerHost {
|
||||
}): Promise<void> {
|
||||
const log = this.auditLogRepo.create({
|
||||
documentPublicId: params.documentPublicId,
|
||||
aiModel: 'typhoon-ocr',
|
||||
modelName: 'typhoon-np-dms-ocr:latest',
|
||||
aiModel: 'np-dms-ocr',
|
||||
modelName: 'np-dms-ocr:latest',
|
||||
modelType: params.engineType,
|
||||
status: params.status,
|
||||
processingTimeMs: params.processingTimeMs,
|
||||
@@ -97,7 +97,7 @@ export class AiPolicyService {
|
||||
*/
|
||||
getCanonicalModelName(modelName: string): 'np-dms-ai' | 'np-dms-ocr' {
|
||||
const name = modelName.toLowerCase();
|
||||
if (name.includes('ocr') || name.includes('typhoon-np-dms-ocr')) {
|
||||
if (name.includes('ocr')) {
|
||||
return 'np-dms-ocr';
|
||||
}
|
||||
return 'np-dms-ai';
|
||||
|
||||
@@ -4,13 +4,13 @@
|
||||
// - 2026-05-25: แก้ไข AggregateError (empty message) จาก axios โดย wrap เป็น Error พร้อม context ที่ชัดเจน.
|
||||
// - 2026-05-25: เพิ่ม path remapping (OCR_UPLOAD_BASE_PATH) เพื่อแปลง local upload path เป็น path ที่ sidecar เห็นผ่าน CIFS.
|
||||
// - 2026-05-29: เพิ่ม checkHealth() เพื่อตรวจสอบสุขภาพของ OCR sidecar สำหรับ getSystemHealth() (ADR-027)
|
||||
// - 2026-05-30: เปลี่ยนจาก PaddleOCR เป็น Tesseract OCR เพื่อความเข้ากันได้กับ CPU เก่า
|
||||
// - 2026-05-30: เปลี่ยนจาก PaddleOCR เป็น fast-path (PyMuPDF text layer) เพื่อความเข้ากันได้กับ CPU เก่า
|
||||
// - 2026-05-30: เพิ่ม VRAM insufficiency guard สำหรับ Typhoon OCR engine (T016a, ADR-032)
|
||||
// - 2026-05-30: ปรับปรุงสำหรับ Dynamic OCR Engine selection, Caching, และ Graceful Fallback (T013, T014, T016, T022, T023, US1)
|
||||
// - 2026-06-01: ปรับปรุง remapPath ให้รองรับ Windows absolute และ relative path ได้แม่นยำ 100%
|
||||
// - 2026-06-01: เปลี่ยน processWithTesseract/processWithTyphoon ให้ส่ง file content ผ่าน multipart ไปยัง /ocr-upload แทนการส่ง path
|
||||
// - 2026-06-01: เปลี่ยน processWithFastPath/processWithNpDmsOcr ให้ส่ง file content ผ่าน multipart ไปยัง /ocr-upload แทนการส่ง path
|
||||
// - 2026-06-02: ส่งค่า X-API-Key ใน request headers ไปยัง ocr-sidecar เพื่อความมั่นคงปลอดภัยสูงสุด (ADR-033, Suggestion 2)
|
||||
// - 2026-06-04: ADR-034 — เปลี่ยน TYPHOON_ENGINE.engineName เป็น typhoon-np-dms-ocr:latest ตรงกับชื่อโมเดลใน Ollama
|
||||
// - 2026-06-04: ADR-034 — เปลี่ยน TYPHOON_ENGINE.engineName เป็น np-dms-ocr:latest ตรงกับชื่อโมเดลใน Ollama
|
||||
// - 2026-06-11: US2 - คำนวณ OCR residency keep_alive แบบ dynamic ตาม VRAM headroom และ active profile
|
||||
// - 2026-06-13: US5 - เพิ่มการส่ง temperature, topP และ repeatPenalty ไปยัง OCR sidecar ผ่าน multipart form (T070)
|
||||
|
||||
@@ -28,6 +28,9 @@ import {
|
||||
} from '../entities/ocr-engine-configuration.entity';
|
||||
import { OcrEngineResponseDto } from '../dto/ocr-engine-response.dto';
|
||||
import { SystemSetting } from '../entities/system-setting.entity';
|
||||
import { AiExecutionProfile } from '../entities/ai-execution-profile.entity';
|
||||
import { AiPromptsService } from '../prompts/ai-prompts.service';
|
||||
import { BusinessException } from '../../../common/exceptions';
|
||||
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
|
||||
import { OcrCacheService } from './ocr-cache.service';
|
||||
import { VramMonitorService } from './vram-monitor.service';
|
||||
@@ -41,7 +44,7 @@ export interface OcrDetectionInput {
|
||||
pdfPath?: string;
|
||||
documentPublicId?: string; // เพิ่มเพื่อการทำ audit logs
|
||||
activeProfile?: ExecutionProfile;
|
||||
typhoonOptions?: {
|
||||
ocrOptions?: {
|
||||
temperature?: number;
|
||||
topP?: number;
|
||||
repeatPenalty?: number;
|
||||
@@ -68,16 +71,16 @@ const OCR_ACTIVE_ENGINE_KEY = 'OCR_ACTIVE_ENGINE';
|
||||
const OCR_ACTIVE_ENGINE_CACHE_KEY = 'system_settings:OCR_ACTIVE_ENGINE';
|
||||
const OCR_ACTIVE_ENGINE_TTL_SECONDS = 30;
|
||||
|
||||
const TESSERACT_ENGINE_ID = '019505a1-7c3e-7000-8000-abc123def001';
|
||||
const TYPHOON_ENGINE_ID = '019505a1-7c3e-7000-8000-abc123def002';
|
||||
const FAST_PATH_ENGINE_ID = '019505a1-7c3e-7000-8000-abc123def001';
|
||||
const OCR_ENGINE_ID = '019505a1-7c3e-7000-8000-abc123def002';
|
||||
|
||||
// VRAM ที่ Typhoon OCR-3B ต้องการ (MB)
|
||||
const TYPHOON_OCR_REQUIRED_VRAM_MB = 4000;
|
||||
// VRAM ที่ np-dms-ocr ต้องการ (MB)
|
||||
const OCR_REQUIRED_VRAM_MB = 4000;
|
||||
|
||||
const TESSERACT_ENGINE: OcrEngineConfiguration = {
|
||||
engineId: TESSERACT_ENGINE_ID,
|
||||
engineName: 'Tesseract OCR',
|
||||
engineType: OcrEngineType.TESSERACT,
|
||||
const FAST_PATH_ENGINE: OcrEngineConfiguration = {
|
||||
engineId: FAST_PATH_ENGINE_ID,
|
||||
engineName: 'Fast Path (PyMuPDF)',
|
||||
engineType: OcrEngineType.FAST_PATH,
|
||||
isActive: true,
|
||||
vramRequirementMB: 0,
|
||||
processingTimeLimitSeconds: 30,
|
||||
@@ -87,25 +90,25 @@ const TESSERACT_ENGINE: OcrEngineConfiguration = {
|
||||
updatedAt: new Date('2026-05-30T00:00:00Z'),
|
||||
};
|
||||
|
||||
const TYPHOON_ENGINE: OcrEngineConfiguration = {
|
||||
engineId: TYPHOON_ENGINE_ID,
|
||||
engineName: 'typhoon-np-dms-ocr:latest',
|
||||
engineType: OcrEngineType.TYPHOON_OCR,
|
||||
const OCR_ENGINE: OcrEngineConfiguration = {
|
||||
engineId: OCR_ENGINE_ID,
|
||||
engineName: 'np-dms-ocr:latest',
|
||||
engineType: OcrEngineType.NP_DMS_OCR,
|
||||
isActive: true,
|
||||
vramRequirementMB: TYPHOON_OCR_REQUIRED_VRAM_MB,
|
||||
vramRequirementMB: OCR_REQUIRED_VRAM_MB,
|
||||
processingTimeLimitSeconds: 60,
|
||||
concurrentLimit: 1,
|
||||
fallbackEngineId: TESSERACT_ENGINE_ID,
|
||||
fallbackEngineId: FAST_PATH_ENGINE_ID,
|
||||
createdAt: new Date('2026-05-30T00:00:00Z'),
|
||||
updatedAt: new Date('2026-05-30T00:00:00Z'),
|
||||
};
|
||||
|
||||
const ENGINES_MAP = new Map<string, OcrEngineConfiguration>([
|
||||
[TESSERACT_ENGINE_ID, TESSERACT_ENGINE],
|
||||
[TYPHOON_ENGINE_ID, TYPHOON_ENGINE],
|
||||
[FAST_PATH_ENGINE_ID, FAST_PATH_ENGINE],
|
||||
[OCR_ENGINE_ID, OCR_ENGINE],
|
||||
]);
|
||||
|
||||
/** บริการเลือก fast path หรือ OCR sidecar (Tesseract/Typhoon) พร้อมความสามารถในสลับ Engine และ Caching */
|
||||
/** บริการเลือก fast path หรือ OCR sidecar (np-dms-ocr) พร้อมความสามารถในสลับ Engine และ Caching */
|
||||
@Injectable()
|
||||
export class OcrService {
|
||||
private readonly logger = new Logger(OcrService.name);
|
||||
@@ -121,6 +124,9 @@ export class OcrService {
|
||||
private readonly settingRepo: Repository<SystemSetting>,
|
||||
@InjectRepository(AiAuditLog)
|
||||
private readonly auditLogRepo: Repository<AiAuditLog>,
|
||||
@InjectRepository(AiExecutionProfile)
|
||||
private readonly profileRepo: Repository<AiExecutionProfile>,
|
||||
private readonly aiPromptsService: AiPromptsService,
|
||||
private readonly ocrCacheService: OcrCacheService,
|
||||
private readonly vramMonitorService: VramMonitorService,
|
||||
private readonly aiPolicyService: AiPolicyService,
|
||||
@@ -131,10 +137,15 @@ export class OcrService {
|
||||
'OCR_API_URL',
|
||||
'http://localhost:8765'
|
||||
);
|
||||
this.ocrSidecarApiKey = this.configService.get<string>(
|
||||
'OCR_SIDECAR_API_KEY',
|
||||
'lcbp3-dms-ocr-sidecar-secure-token-2026'
|
||||
const ocrSidecarApiKey = this.configService.get<string>(
|
||||
'OCR_SIDECAR_API_KEY'
|
||||
);
|
||||
if (!ocrSidecarApiKey) {
|
||||
throw new Error(
|
||||
'OCR_SIDECAR_API_KEY is required — กรุณาตั้งค่า environment variable'
|
||||
);
|
||||
}
|
||||
this.ocrSidecarApiKey = ocrSidecarApiKey;
|
||||
this.vramHeadroomThresholdMb = this.configService.get<number>(
|
||||
'VRAM_HEADROOM_THRESHOLD_MB',
|
||||
this.configService.get<number>('AI_VRAM_HEADROOM_THRESHOLD_MB', 3000)
|
||||
@@ -272,7 +283,7 @@ export class OcrService {
|
||||
where: { settingKey: OCR_ACTIVE_ENGINE_KEY },
|
||||
});
|
||||
|
||||
const activeEngine = setting?.settingValue ?? TESSERACT_ENGINE_ID;
|
||||
const activeEngine = setting?.settingValue ?? FAST_PATH_ENGINE_ID;
|
||||
await this.redis.set(
|
||||
OCR_ACTIVE_ENGINE_CACHE_KEY,
|
||||
activeEngine,
|
||||
@@ -284,7 +295,7 @@ export class OcrService {
|
||||
this.logger.error(
|
||||
`Failed to get active OCR engine: ${error instanceof Error ? error.message : String(error)}`
|
||||
);
|
||||
return TESSERACT_ENGINE_ID;
|
||||
return FAST_PATH_ENGINE_ID;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -330,20 +341,20 @@ export class OcrService {
|
||||
|
||||
const activeEngineId = await this.getActiveEngineId();
|
||||
|
||||
if (activeEngineId === TYPHOON_ENGINE_ID) {
|
||||
return this.processWithTyphoon(input);
|
||||
if (activeEngineId === OCR_ENGINE_ID) {
|
||||
return this.processWithNpDmsOcr(input);
|
||||
} else {
|
||||
return this.processWithTesseract(input);
|
||||
return this.processWithFastPath(input);
|
||||
}
|
||||
}
|
||||
|
||||
/** ประมวลผลผ่าน Tesseract OCR โดยส่ง file content ผ่าน multipart */
|
||||
private async processWithTesseract(
|
||||
/** ประมวลผลผ่าน Fast Path (PyMuPDF text layer) โดยส่ง file content ผ่าน multipart */
|
||||
private async processWithFastPath(
|
||||
input: OcrDetectionInput
|
||||
): Promise<OcrDetectionResult> {
|
||||
const startTime = Date.now();
|
||||
try {
|
||||
this.logger.debug(`Tesseract OCR processing: ${input.pdfPath}`);
|
||||
this.logger.debug(`Fast Path processing: ${input.pdfPath}`);
|
||||
const fileBuffer = fs.readFileSync(input.pdfPath!);
|
||||
const form = new FormData();
|
||||
form.append(
|
||||
@@ -364,9 +375,9 @@ export class OcrService {
|
||||
const durationMs = Date.now() - startTime;
|
||||
await this.writeAuditLog({
|
||||
documentPublicId: input.documentPublicId,
|
||||
aiModel: 'tesseract',
|
||||
modelName: 'tesseract-ocr',
|
||||
modelType: 'tesseract',
|
||||
aiModel: 'fast-path',
|
||||
modelName: 'pymupdf',
|
||||
modelType: 'fast-path',
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
processingTimeMs: durationMs,
|
||||
cacheHit: false,
|
||||
@@ -384,36 +395,70 @@ export class OcrService {
|
||||
: String(err);
|
||||
await this.writeAuditLog({
|
||||
documentPublicId: input.documentPublicId,
|
||||
aiModel: 'tesseract',
|
||||
modelName: 'tesseract-ocr',
|
||||
modelType: 'tesseract',
|
||||
aiModel: 'fast-path',
|
||||
modelName: 'pymupdf',
|
||||
modelType: 'fast-path',
|
||||
status: AiAuditStatus.FAILED,
|
||||
processingTimeMs: durationMs,
|
||||
errorMessage: cause,
|
||||
cacheHit: false,
|
||||
});
|
||||
throw new Error(`Tesseract OCR Sidecar failed: ${cause}`);
|
||||
throw new Error(`Fast Path OCR Sidecar failed: ${cause}`);
|
||||
}
|
||||
}
|
||||
|
||||
/** ประมวลผลผ่าน Typhoon OCR */
|
||||
private async processWithTyphoon(
|
||||
/** ประมวลผลผ่าน np-dms-ocr (Ollama) */
|
||||
private async processWithNpDmsOcr(
|
||||
input: OcrDetectionInput
|
||||
): Promise<OcrDetectionResult> {
|
||||
const startTime = Date.now();
|
||||
try {
|
||||
const hasCapacity = await this.vramMonitorService.hasVramCapacity(
|
||||
TYPHOON_OCR_REQUIRED_VRAM_MB
|
||||
);
|
||||
const hasCapacity =
|
||||
await this.vramMonitorService.hasVramCapacity(OCR_REQUIRED_VRAM_MB);
|
||||
if (!hasCapacity) {
|
||||
this.logger.warn(
|
||||
`VRAM insufficient for Typhoon OCR. Falling back to Tesseract baseline.`
|
||||
`VRAM insufficient for np-dms-ocr. Falling back to fast-path.`
|
||||
);
|
||||
return this.processWithTesseract(input);
|
||||
return this.processWithFastPath(input);
|
||||
}
|
||||
const residency = await this.calculateOcrResidency(input.activeProfile);
|
||||
const keepAlive = residency.keepAliveSeconds;
|
||||
this.logger.debug(`Typhoon OCR processing: ${input.pdfPath}`);
|
||||
await this.calculateOcrResidency(input.activeProfile);
|
||||
|
||||
// Resolve runtime parameters from DB (ocr-extract profile)
|
||||
const profile = await this.profileRepo.findOne({
|
||||
where: { profileName: 'ocr-extract' },
|
||||
});
|
||||
const runtimeParams = {
|
||||
temperature: profile ? Number(profile.temperature) : 0.1,
|
||||
top_p: profile ? Number(profile.topP) : 0.5,
|
||||
repeat_penalty: profile ? Number(profile.repeatPenalty) : 1.0,
|
||||
max_tokens: profile?.maxTokens ?? 16000,
|
||||
};
|
||||
|
||||
// Override with input ocrOptions if provided
|
||||
if (input.ocrOptions?.temperature !== undefined) {
|
||||
runtimeParams.temperature = input.ocrOptions.temperature;
|
||||
}
|
||||
if (input.ocrOptions?.topP !== undefined) {
|
||||
runtimeParams.top_p = input.ocrOptions.topP;
|
||||
}
|
||||
if (input.ocrOptions?.repeatPenalty !== undefined) {
|
||||
runtimeParams.repeat_penalty = input.ocrOptions.repeatPenalty;
|
||||
}
|
||||
|
||||
// Resolve Active Prompt from DB (ocr_extraction)
|
||||
const activePrompt =
|
||||
await this.aiPromptsService.getActive('ocr_extraction');
|
||||
if (!activePrompt) {
|
||||
throw new BusinessException(
|
||||
'NO_ACTIVE_PROMPT',
|
||||
'No active ocr_extraction prompt found',
|
||||
'ไม่พบ Prompt OCR สำหรับดึงข้อมูลที่เปิดใช้งาน'
|
||||
);
|
||||
}
|
||||
const systemPrompt = activePrompt.template;
|
||||
const dmsTags = activePrompt.contextConfig?.dmsTags;
|
||||
|
||||
this.logger.debug(`np-dms-ocr processing: ${input.pdfPath}`);
|
||||
const fileBuffer = fs.readFileSync(input.pdfPath!);
|
||||
const form = new FormData();
|
||||
form.append(
|
||||
@@ -421,20 +466,18 @@ export class OcrService {
|
||||
new Blob([fileBuffer], { type: 'application/pdf' }),
|
||||
'upload.pdf'
|
||||
);
|
||||
form.append('engine', 'typhoon-np-dms-ocr');
|
||||
form.append('keep_alive', String(keepAlive));
|
||||
if (input.typhoonOptions?.temperature !== undefined) {
|
||||
form.append('temperature', String(input.typhoonOptions.temperature));
|
||||
}
|
||||
if (input.typhoonOptions?.topP !== undefined) {
|
||||
form.append('topP', String(input.typhoonOptions.topP));
|
||||
}
|
||||
if (input.typhoonOptions?.repeatPenalty !== undefined) {
|
||||
form.append(
|
||||
'repeatPenalty',
|
||||
String(input.typhoonOptions.repeatPenalty)
|
||||
);
|
||||
form.append('engine', 'np-dms-ocr');
|
||||
form.append('systemPrompt', systemPrompt);
|
||||
if (dmsTags) {
|
||||
form.append('dmsTags', JSON.stringify(dmsTags));
|
||||
}
|
||||
form.append('runtimeParams', JSON.stringify(runtimeParams));
|
||||
|
||||
// Append individual overrides for backward compatibility
|
||||
form.append('temperature', String(runtimeParams.temperature));
|
||||
form.append('topP', String(runtimeParams.top_p));
|
||||
form.append('repeatPenalty', String(runtimeParams.repeat_penalty));
|
||||
|
||||
const response = await axios.post<OcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr-upload`,
|
||||
form,
|
||||
@@ -447,9 +490,9 @@ export class OcrService {
|
||||
const durationMs = Date.now() - startTime;
|
||||
await this.writeAuditLog({
|
||||
documentPublicId: input.documentPublicId,
|
||||
aiModel: 'typhoon-ocr',
|
||||
modelName: 'typhoon-np-dms-ocr:latest',
|
||||
modelType: 'typhoon-ocr',
|
||||
aiModel: 'np-dms-ocr',
|
||||
modelName: 'np-dms-ocr:latest',
|
||||
modelType: 'np-dms-ocr',
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
processingTimeMs: durationMs,
|
||||
cacheHit: false,
|
||||
@@ -460,9 +503,9 @@ export class OcrService {
|
||||
};
|
||||
} catch (err: unknown) {
|
||||
this.logger.warn(
|
||||
`Typhoon OCR failed, trying fallback baseline (Tesseract): ${err instanceof Error ? err.message : String(err)}`
|
||||
`np-dms-ocr failed, trying fallback to fast-path: ${err instanceof Error ? err.message : String(err)}`
|
||||
);
|
||||
return this.processWithTesseract(input);
|
||||
return this.processWithFastPath(input);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
// File: src/modules/ai/services/sandbox-ocr-engine.service.spec.ts
|
||||
// Change Log:
|
||||
// - 2026-06-14: สร้าง unit tests สำหรับ SandboxOcrEngineService ครอบคลุม detectAndExtract ทุก engine
|
||||
// - 2026-06-20: เพิ่ม mock getRepositoryToken(AiExecutionProfile) สำหรับทดสอบ parameter governance
|
||||
|
||||
import { Test, TestingModule } from '@nestjs/testing';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { getRepositoryToken } from '@nestjs/typeorm';
|
||||
import axios from 'axios';
|
||||
import * as fs from 'fs';
|
||||
import { SandboxOcrEngineService } from './sandbox-ocr-engine.service';
|
||||
import { OcrService } from './ocr.service';
|
||||
import { AiPromptsService } from '../prompts/ai-prompts.service';
|
||||
import { AiExecutionProfile } from '../entities/ai-execution-profile.entity';
|
||||
|
||||
jest.mock('axios');
|
||||
jest.mock('fs');
|
||||
@@ -16,14 +19,31 @@ jest.mock('fs');
|
||||
const mockedAxios = axios as jest.Mocked<typeof axios>;
|
||||
const mockedFs = fs as jest.Mocked<typeof fs>;
|
||||
|
||||
/** OcrService mock สำหรับ tesseract/fast-path */
|
||||
/** OcrService mock สำหรับ fast-path */
|
||||
const mockOcrService = {
|
||||
detectAndExtract: jest.fn(),
|
||||
};
|
||||
|
||||
/** AiPromptsService mock สำหรับ ocr_system prompt */
|
||||
const mockAiPromptsService = {
|
||||
getActive: jest.fn(),
|
||||
getActive: jest.fn().mockResolvedValue({
|
||||
template: 'mock active system prompt',
|
||||
contextConfig: {
|
||||
dmsTags: ['tag1', 'tag2'],
|
||||
},
|
||||
}),
|
||||
};
|
||||
|
||||
/** AiExecutionProfile mock repository */
|
||||
const mockProfile = {
|
||||
profileName: 'ocr-extract',
|
||||
temperature: 0.1,
|
||||
topP: 0.5,
|
||||
repeatPenalty: 1.0,
|
||||
maxTokens: 16000,
|
||||
};
|
||||
const mockProfileRepository = {
|
||||
findOne: jest.fn().mockResolvedValue(mockProfile),
|
||||
};
|
||||
|
||||
/** ConfigService mock */
|
||||
@@ -48,6 +68,10 @@ describe('SandboxOcrEngineService', () => {
|
||||
{ provide: ConfigService, useValue: mockConfigService },
|
||||
{ provide: OcrService, useValue: mockOcrService },
|
||||
{ provide: AiPromptsService, useValue: mockAiPromptsService },
|
||||
{
|
||||
provide: getRepositoryToken(AiExecutionProfile),
|
||||
useValue: mockProfileRepository,
|
||||
},
|
||||
],
|
||||
}).compile();
|
||||
service = module.get<SandboxOcrEngineService>(SandboxOcrEngineService);
|
||||
@@ -65,7 +89,7 @@ describe('SandboxOcrEngineService', () => {
|
||||
});
|
||||
const result = await service.detectAndExtract('/tmp/file.pdf', 'auto');
|
||||
expect(result.text).toBe('auto extracted text');
|
||||
expect(result.engineUsed).toBe('tesseract');
|
||||
expect(result.engineUsed).toBe('fast-path');
|
||||
expect(result.fallbackUsed).toBe(false);
|
||||
expect(mockOcrService.detectAndExtract).toHaveBeenCalledWith({
|
||||
pdfPath: '/tmp/file.pdf',
|
||||
@@ -83,42 +107,6 @@ describe('SandboxOcrEngineService', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('detectAndExtract() — engine=tesseract', () => {
|
||||
it('ควร route ไปยัง OcrService เมื่อ engine=tesseract', async () => {
|
||||
mockOcrService.detectAndExtract.mockResolvedValueOnce({
|
||||
text: 'tesseract text',
|
||||
ocrUsed: true,
|
||||
});
|
||||
const result = await service.detectAndExtract(
|
||||
'/tmp/file.pdf',
|
||||
'tesseract'
|
||||
);
|
||||
expect(result.engineUsed).toBe('tesseract');
|
||||
expect(result.fallbackUsed).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('detectAndExtract() — engine=typhoon-np-dms-ocr (legacy alias)', () => {
|
||||
it('ควรแปลง typhoon-np-dms-ocr เป็น np-dms-ocr และส่งไปยัง sidecar', async () => {
|
||||
const mockBuffer = Buffer.from('pdf content');
|
||||
(mockedFs.readFileSync as jest.Mock).mockReturnValueOnce(mockBuffer);
|
||||
mockedAxios.post = jest.fn().mockResolvedValueOnce({
|
||||
data: {
|
||||
text: 'ocr text via alias',
|
||||
ocrUsed: true,
|
||||
engineUsed: 'np-dms-ocr',
|
||||
},
|
||||
});
|
||||
const result = await service.detectAndExtract(
|
||||
'/tmp/file.pdf',
|
||||
'typhoon-np-dms-ocr'
|
||||
);
|
||||
expect(result.text).toBe('ocr text via alias');
|
||||
expect(result.engineUsed).toBe('np-dms-ocr');
|
||||
expect(result.fallbackUsed).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('detectAndExtract() — engine=np-dms-ocr (sidecar path)', () => {
|
||||
it('ควรส่ง file ไปยัง sidecar /ocr-upload สำเร็จ', async () => {
|
||||
const mockBuffer = Buffer.from('pdf binary data');
|
||||
@@ -149,7 +137,7 @@ describe('SandboxOcrEngineService', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('ควรส่ง typhoonOptions (temperature, topP, repeatPenalty) ไปใน form data', async () => {
|
||||
it('ควรส่ง ocrOptions (temperature, topP, repeatPenalty) ไปใน form data', async () => {
|
||||
const mockBuffer = Buffer.from('pdf data');
|
||||
(mockedFs.readFileSync as jest.Mock).mockReturnValueOnce(mockBuffer);
|
||||
mockedAxios.post = jest.fn().mockResolvedValueOnce({
|
||||
@@ -178,13 +166,13 @@ describe('SandboxOcrEngineService', () => {
|
||||
expect(result.engineUsed).toBe('np-dms-ocr'); // resolvedEngineType fallback
|
||||
});
|
||||
|
||||
it('ควร fallback ไปยัง Tesseract เมื่อ fs.readFileSync ล้มเหลว (outer catch fallback)', async () => {
|
||||
it('ควร fallback ไปยัง fast-path เมื่อ fs.readFileSync ล้มเหลว (outer catch fallback)', async () => {
|
||||
(mockedFs.readFileSync as jest.Mock).mockImplementationOnce(() => {
|
||||
throw new Error('ENOENT: file not found');
|
||||
});
|
||||
// service จะ catch error และ fallback ไปยัง Tesseract
|
||||
// service จะ catch error และ fallback ไปยัง fast-path
|
||||
mockOcrService.detectAndExtract.mockResolvedValueOnce({
|
||||
text: 'tesseract fallback text',
|
||||
text: 'fast-path fallback text',
|
||||
ocrUsed: true,
|
||||
});
|
||||
const result = await service.detectAndExtract(
|
||||
@@ -192,10 +180,10 @@ describe('SandboxOcrEngineService', () => {
|
||||
'np-dms-ocr'
|
||||
);
|
||||
expect(result.fallbackUsed).toBe(true);
|
||||
expect(result.engineUsed).toBe('tesseract');
|
||||
expect(result.engineUsed).toBe('fast-path');
|
||||
});
|
||||
|
||||
it('ควร fallback ไปยัง Tesseract เมื่อ sidecar HTTP error เกิดขึ้น', async () => {
|
||||
it('ควร fallback ไปยัง fast-path เมื่อ sidecar HTTP error เกิดขึ้น', async () => {
|
||||
const mockBuffer = Buffer.from('pdf data');
|
||||
(mockedFs.readFileSync as jest.Mock).mockReturnValueOnce(mockBuffer);
|
||||
mockedAxios.post = jest.fn().mockRejectedValueOnce(
|
||||
@@ -204,16 +192,16 @@ describe('SandboxOcrEngineService', () => {
|
||||
})
|
||||
);
|
||||
mockOcrService.detectAndExtract.mockResolvedValueOnce({
|
||||
text: 'tesseract fallback result',
|
||||
text: 'fast-path fallback result',
|
||||
ocrUsed: true,
|
||||
});
|
||||
const result = await service.detectAndExtract(
|
||||
'/tmp/doc.pdf',
|
||||
'np-dms-ocr'
|
||||
);
|
||||
expect(result.text).toBe('tesseract fallback result');
|
||||
expect(result.text).toBe('fast-path fallback result');
|
||||
expect(result.fallbackUsed).toBe(true);
|
||||
expect(result.engineUsed).toBe('tesseract');
|
||||
expect(result.engineUsed).toBe('fast-path');
|
||||
});
|
||||
|
||||
it('ควร fallback ไปยัง fast-path เมื่อ sidecar error และ OcrService ส่ง ocrUsed=false', async () => {
|
||||
|
||||
@@ -3,26 +3,26 @@
|
||||
// - 2026-05-30: แยก SandboxOcrEngineService ออกจาก OcrService เพื่อรองรับการเลือก Typhoon OCR เฉพาะ sandbox โดยไม่กระทบ core OCR flow
|
||||
// - 2026-06-01: เปลี่ยนจาก remapPath + pdfPath ไปเป็น multipart file upload ไปยัง /ocr-upload (แก้ปัญหา Docker WSL2 mount)
|
||||
// - 2026-06-02: ส่งค่า X-API-Key ใน request headers ไปยัง ocr-sidecar เพื่อความมั่นคงปลอดภัยสูงสุด (ADR-033, Suggestion 2)
|
||||
// - 2026-06-04: ADR-034 — เพิ่ม 'typhoon-np-dms-ocr' เป็น canonical SandboxOcrEngineType; legacy aliases ยังรองรับ
|
||||
// - 2026-06-04: เพิ่ม OcrTyphoonOptions interface; รับ temperature/topP/repeatPenalty จาก frontend sandbox เพื่อ override Modelfile defaults
|
||||
// - 2026-06-13: ADR-036 — เปลี่ยน canonical SandboxOcrEngineType เป็น np-dms-ocr และคง legacy alias
|
||||
// - 2026-06-04: ADR-034 — เพิ่ม 'np-dms-ocr' เป็น canonical SandboxOcrEngineType
|
||||
// - 2026-06-04: เพิ่ม OcrNpDmsOptions interface; รับ temperature/topP/repeatPenalty จาก frontend sandbox เพื่อ override Modelfile defaults
|
||||
// - 2026-06-13: ADR-036 — เปลี่ยน canonical SandboxOcrEngineType เป็น np-dms-ocr
|
||||
// - 2026-06-17: เพิ่ม AiPromptsService injection และส่ง systemPrompt form field จาก active ocr_system prompt (T028)
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import axios from 'axios';
|
||||
import * as fs from 'fs';
|
||||
import { OcrService } from './ocr.service';
|
||||
import { AiPromptsService } from '../prompts/ai-prompts.service';
|
||||
import { AiExecutionProfile } from '../entities/ai-execution-profile.entity';
|
||||
import { BusinessException } from '../../../common/exceptions';
|
||||
|
||||
export type SandboxOcrEngineType =
|
||||
| 'auto'
|
||||
| 'tesseract'
|
||||
| 'np-dms-ocr'
|
||||
| 'typhoon-np-dms-ocr';
|
||||
export type SandboxOcrEngineType = 'auto' | 'np-dms-ocr';
|
||||
|
||||
/** ค่า parameter สำหรับ Typhoon OCR ที่ override Modelfile defaults ได้จาก sandbox UI */
|
||||
export interface OcrTyphoonOptions {
|
||||
/** ค่า parameter สำหรับ np-dms-ocr ที่ override Modelfile defaults ได้จาก sandbox UI */
|
||||
export interface OcrNpDmsOptions {
|
||||
temperature?: number;
|
||||
topP?: number;
|
||||
repeatPenalty?: number;
|
||||
@@ -50,7 +50,9 @@ export class SandboxOcrEngineService {
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
private readonly ocrService: OcrService,
|
||||
private readonly aiPromptsService: AiPromptsService
|
||||
private readonly aiPromptsService: AiPromptsService,
|
||||
@InjectRepository(AiExecutionProfile)
|
||||
private readonly profileRepo: Repository<AiExecutionProfile>
|
||||
) {
|
||||
this.ocrApiUrl = this.configService.get<string>(
|
||||
'OCR_API_URL',
|
||||
@@ -62,26 +64,23 @@ export class SandboxOcrEngineService {
|
||||
);
|
||||
}
|
||||
|
||||
/** รัน OCR ตาม engine ที่เลือก โดย fallback กลับไป Tesseract baseline เมื่อ Typhoon ล้มเหลว */
|
||||
/** รัน OCR ตาม engine ที่เลือก โดย fallback กลับไป fast-path เมื่อ np-dms-ocr ล้มเหลว */
|
||||
async detectAndExtract(
|
||||
pdfPath: string,
|
||||
engineType: SandboxOcrEngineType = 'auto',
|
||||
typhoonOptions?: OcrTyphoonOptions
|
||||
ocrOptions?: OcrNpDmsOptions
|
||||
): Promise<SandboxOcrResult> {
|
||||
const resolvedEngineType =
|
||||
engineType === 'typhoon-np-dms-ocr' ? 'np-dms-ocr' : engineType;
|
||||
const resolvedEngineType = engineType;
|
||||
this.logger.log(
|
||||
`detectAndExtract called — engine="${resolvedEngineType}" pdfPath="${pdfPath}" typhoonOptions=${JSON.stringify(typhoonOptions ?? null)}`
|
||||
`detectAndExtract called — engine="${resolvedEngineType}" pdfPath="${pdfPath}" ocrOptions=${JSON.stringify(ocrOptions ?? null)}`
|
||||
);
|
||||
if (resolvedEngineType === 'auto' || resolvedEngineType === 'tesseract') {
|
||||
this.logger.log(
|
||||
`engine="${resolvedEngineType}" → routing to Tesseract/fast-path`
|
||||
);
|
||||
if (resolvedEngineType === 'auto') {
|
||||
this.logger.log(`engine="${resolvedEngineType}" → routing to fast-path`);
|
||||
const result = await this.ocrService.detectAndExtract({ pdfPath });
|
||||
return {
|
||||
text: result.text,
|
||||
ocrUsed: result.ocrUsed,
|
||||
engineUsed: result.ocrUsed ? 'tesseract' : 'fast-path',
|
||||
engineUsed: result.ocrUsed ? 'fast-path' : 'fast-path',
|
||||
fallbackUsed: false,
|
||||
};
|
||||
}
|
||||
@@ -103,6 +102,42 @@ export class SandboxOcrEngineService {
|
||||
);
|
||||
throw fsErr;
|
||||
}
|
||||
|
||||
// Resolve runtime parameters from DB (ocr-extract profile)
|
||||
const profile = await this.profileRepo.findOne({
|
||||
where: { profileName: 'ocr-extract' },
|
||||
});
|
||||
const runtimeParams = {
|
||||
temperature: profile ? Number(profile.temperature) : 0.1,
|
||||
top_p: profile ? Number(profile.topP) : 0.5,
|
||||
repeat_penalty: profile ? Number(profile.repeatPenalty) : 1.0,
|
||||
max_tokens: profile?.maxTokens ?? 16000,
|
||||
};
|
||||
|
||||
// Override with sandbox options if provided
|
||||
if (ocrOptions?.temperature !== undefined) {
|
||||
runtimeParams.temperature = ocrOptions.temperature;
|
||||
}
|
||||
if (ocrOptions?.topP !== undefined) {
|
||||
runtimeParams.top_p = ocrOptions.topP;
|
||||
}
|
||||
if (ocrOptions?.repeatPenalty !== undefined) {
|
||||
runtimeParams.repeat_penalty = ocrOptions.repeatPenalty;
|
||||
}
|
||||
|
||||
// Resolve Active Prompt from DB (ocr_extraction)
|
||||
const activePrompt =
|
||||
await this.aiPromptsService.getActive('ocr_extraction');
|
||||
if (!activePrompt) {
|
||||
throw new BusinessException(
|
||||
'NO_ACTIVE_PROMPT',
|
||||
'No active ocr_extraction prompt found',
|
||||
'ไม่พบ Prompt OCR สำหรับดึงข้อมูลที่เปิดใช้งาน'
|
||||
);
|
||||
}
|
||||
const systemPrompt = activePrompt.template;
|
||||
const dmsTags = activePrompt.contextConfig?.dmsTags;
|
||||
|
||||
const form = new FormData();
|
||||
form.append(
|
||||
'file',
|
||||
@@ -110,32 +145,19 @@ export class SandboxOcrEngineService {
|
||||
'upload.pdf'
|
||||
);
|
||||
form.append('engine', resolvedEngineType);
|
||||
if (typhoonOptions?.temperature !== undefined) {
|
||||
form.append('temperature', String(typhoonOptions.temperature));
|
||||
}
|
||||
if (typhoonOptions?.topP !== undefined) {
|
||||
form.append('topP', String(typhoonOptions.topP));
|
||||
}
|
||||
if (typhoonOptions?.repeatPenalty !== undefined) {
|
||||
form.append('repeatPenalty', String(typhoonOptions.repeatPenalty));
|
||||
}
|
||||
// ดึง active ocr_system prompt และส่งไป sidecar
|
||||
try {
|
||||
const activeOcrSystemPrompt =
|
||||
await this.aiPromptsService.getActive('ocr_system');
|
||||
if (activeOcrSystemPrompt && activeOcrSystemPrompt.template) {
|
||||
form.append('systemPrompt', activeOcrSystemPrompt.template);
|
||||
this.logger.log(
|
||||
`Injected active ocr_system prompt (version ${activeOcrSystemPrompt.versionNumber})`
|
||||
);
|
||||
}
|
||||
} catch (promptErr: unknown) {
|
||||
this.logger.warn(
|
||||
`Failed to retrieve active ocr_system prompt, proceeding without: ${promptErr instanceof Error ? promptErr.message : String(promptErr)}`
|
||||
);
|
||||
form.append('systemPrompt', systemPrompt);
|
||||
if (dmsTags) {
|
||||
form.append('dmsTags', JSON.stringify(dmsTags));
|
||||
}
|
||||
form.append('runtimeParams', JSON.stringify(runtimeParams));
|
||||
|
||||
// Append individual overrides for backward compatibility
|
||||
form.append('temperature', String(runtimeParams.temperature));
|
||||
form.append('topP', String(runtimeParams.top_p));
|
||||
form.append('repeatPenalty', String(runtimeParams.repeat_penalty));
|
||||
|
||||
this.logger.log(
|
||||
`Sending to sidecar — engine=${engineType} options=${JSON.stringify(typhoonOptions ?? {})}`
|
||||
`Sending to sidecar — engine=${engineType} options=${JSON.stringify(ocrOptions ?? {})}`
|
||||
);
|
||||
const response = await axios.post<SandboxOcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr-upload`,
|
||||
@@ -183,9 +205,9 @@ export class SandboxOcrEngineService {
|
||||
? `HTTP ${axiosStatus} — ${cause} — sidecar detail: ${axiosDetail}`
|
||||
: `HTTP ${axiosStatus} — ${cause}`;
|
||||
this.logger.error(
|
||||
`[DIAG] Typhoon OCR FAILED — engine="${engineType}" url="${this.ocrApiUrl}/ocr-upload" error: ${fullCause}`
|
||||
`[DIAG] np-dms-ocr FAILED — engine="${engineType}" url="${this.ocrApiUrl}/ocr-upload" error: ${fullCause}`
|
||||
);
|
||||
this.logger.warn(`Falling back to Tesseract due to: ${fullCause}`);
|
||||
this.logger.warn(`Falling back to fast-path due to: ${fullCause}`);
|
||||
|
||||
const fallbackResult = await this.ocrService.detectAndExtract({
|
||||
pdfPath,
|
||||
@@ -193,7 +215,7 @@ export class SandboxOcrEngineService {
|
||||
return {
|
||||
text: fallbackResult.text,
|
||||
ocrUsed: fallbackResult.ocrUsed,
|
||||
engineUsed: fallbackResult.ocrUsed ? 'tesseract' : 'fast-path',
|
||||
engineUsed: fallbackResult.ocrUsed ? 'fast-path' : 'fast-path',
|
||||
fallbackUsed: true,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -54,7 +54,7 @@ describe('AiPolicyService', () => {
|
||||
|
||||
describe('getCanonicalModelName', () => {
|
||||
it('ควรคืนค่า np-dms-ocr สำหรับชื่อโมเดลที่มีคำว่า ocr', () => {
|
||||
expect(service.getCanonicalModelName('typhoon-np-dms-ocr:latest')).toBe(
|
||||
expect(service.getCanonicalModelName('np-dms-ocr:latest')).toBe(
|
||||
'np-dms-ocr'
|
||||
);
|
||||
expect(service.getCanonicalModelName('my-ocr-model')).toBe('np-dms-ocr');
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
// File: backend/src/modules/ai/tests/ocr-residency.spec.ts
|
||||
// Change Log:
|
||||
// - 2026-06-11: Initial unit tests for adaptive OCR residency
|
||||
// - 2026-06-20: เพิ่ม mock สำหรับ AiExecutionProfile repository และ AiPromptsService เพื่อรองรับ parameter governance
|
||||
|
||||
import { Test, TestingModule } from '@nestjs/testing';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
@@ -11,6 +12,8 @@ import { AiPolicyService } from '../services/ai-policy.service';
|
||||
import { OcrCacheService } from '../services/ocr-cache.service';
|
||||
import { SystemSetting } from '../entities/system-setting.entity';
|
||||
import { AiAuditLog } from '../entities/ai-audit-log.entity';
|
||||
import { AiExecutionProfile } from '../entities/ai-execution-profile.entity';
|
||||
import { AiPromptsService } from '../prompts/ai-prompts.service';
|
||||
|
||||
describe('OcrService Adaptive Residency (US2)', () => {
|
||||
let service: OcrService;
|
||||
@@ -36,6 +39,23 @@ describe('OcrService Adaptive Residency (US2)', () => {
|
||||
create: jest.fn().mockReturnValue({}),
|
||||
save: jest.fn().mockResolvedValue({}),
|
||||
};
|
||||
const mockProfileRepo = {
|
||||
findOne: jest.fn().mockResolvedValue({
|
||||
profileName: 'ocr-extract',
|
||||
temperature: 0.1,
|
||||
topP: 0.5,
|
||||
repeatPenalty: 1.0,
|
||||
maxTokens: 16000,
|
||||
}),
|
||||
};
|
||||
const mockAiPromptsService = {
|
||||
getActive: jest.fn().mockResolvedValue({
|
||||
template: 'mock active system prompt',
|
||||
contextConfig: {
|
||||
dmsTags: ['tag1', 'tag2'],
|
||||
},
|
||||
}),
|
||||
};
|
||||
const mockOcrCacheService = {};
|
||||
const mockVramMonitorService = {
|
||||
getVramHeadroom: jest.fn(),
|
||||
@@ -61,6 +81,11 @@ describe('OcrService Adaptive Residency (US2)', () => {
|
||||
provide: getRepositoryToken(AiAuditLog),
|
||||
useValue: mockAiAuditLogRepo,
|
||||
},
|
||||
{
|
||||
provide: getRepositoryToken(AiExecutionProfile),
|
||||
useValue: mockProfileRepo,
|
||||
},
|
||||
{ provide: AiPromptsService, useValue: mockAiPromptsService },
|
||||
{ provide: OcrCacheService, useValue: mockOcrCacheService },
|
||||
{ provide: VramMonitorService, useValue: mockVramMonitorService },
|
||||
{ provide: AiPolicyService, useValue: mockAiPolicyService },
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
// File: backend/src/modules/ai/tests/ocr.service.spec.ts
|
||||
// Change Log:
|
||||
// - 2026-06-13: Initial unit tests for OCR parameter wiring (T066)
|
||||
// - 2026-06-20: เพิ่ม mock สำหรับ AiExecutionProfile repository และ AiPromptsService เพื่อรองรับ parameter governance
|
||||
|
||||
import { Test, TestingModule } from '@nestjs/testing';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { getRepositoryToken } from '@nestjs/typeorm';
|
||||
@@ -10,12 +12,17 @@ import { AiPolicyService } from '../services/ai-policy.service';
|
||||
import { OcrCacheService } from '../services/ocr-cache.service';
|
||||
import { SystemSetting } from '../entities/system-setting.entity';
|
||||
import { AiAuditLog } from '../entities/ai-audit-log.entity';
|
||||
import { AiExecutionProfile } from '../entities/ai-execution-profile.entity';
|
||||
import { AiPromptsService } from '../prompts/ai-prompts.service';
|
||||
import axios from 'axios';
|
||||
import * as fs from 'fs';
|
||||
|
||||
jest.mock('axios');
|
||||
jest.mock('fs');
|
||||
|
||||
describe('OcrService Parameter Wiring (T066)', () => {
|
||||
let service: OcrService;
|
||||
|
||||
const mockConfigService = {
|
||||
get: jest.fn((key: string, defaultValue?: unknown): unknown => {
|
||||
const config: Record<string, unknown> = {
|
||||
@@ -29,16 +36,39 @@ describe('OcrService Parameter Wiring (T066)', () => {
|
||||
return config[key] ?? defaultValue;
|
||||
}),
|
||||
};
|
||||
|
||||
const mockSystemSettingRepo = {
|
||||
findOne: jest.fn().mockResolvedValue({
|
||||
settingValue: '019505a1-7c3e-7000-8000-abc123def002',
|
||||
}),
|
||||
};
|
||||
|
||||
const mockAiAuditLogRepo = {
|
||||
create: jest.fn().mockReturnValue({}),
|
||||
save: jest.fn().mockResolvedValue({}),
|
||||
};
|
||||
|
||||
const mockProfileRepo = {
|
||||
findOne: jest.fn().mockResolvedValue({
|
||||
profileName: 'ocr-extract',
|
||||
temperature: 0.1,
|
||||
topP: 0.5,
|
||||
repeatPenalty: 1.0,
|
||||
maxTokens: 16000,
|
||||
}),
|
||||
};
|
||||
|
||||
const mockAiPromptsService = {
|
||||
getActive: jest.fn().mockResolvedValue({
|
||||
template: 'mock active system prompt',
|
||||
contextConfig: {
|
||||
dmsTags: ['tag1', 'tag2'],
|
||||
},
|
||||
}),
|
||||
};
|
||||
|
||||
const mockOcrCacheService = {};
|
||||
|
||||
const mockVramMonitorService = {
|
||||
getVramHeadroom: jest.fn().mockResolvedValue({
|
||||
totalMb: 16384,
|
||||
@@ -49,12 +79,15 @@ describe('OcrService Parameter Wiring (T066)', () => {
|
||||
}),
|
||||
hasVramCapacity: jest.fn().mockResolvedValue(true),
|
||||
};
|
||||
|
||||
const mockAiPolicyService = {};
|
||||
|
||||
const mockRedis = {
|
||||
get: jest.fn().mockResolvedValue(null),
|
||||
set: jest.fn().mockResolvedValue('OK'),
|
||||
del: jest.fn().mockResolvedValue(1),
|
||||
};
|
||||
|
||||
beforeEach(async () => {
|
||||
const module: TestingModule = await Test.createTestingModule({
|
||||
providers: [
|
||||
@@ -68,6 +101,11 @@ describe('OcrService Parameter Wiring (T066)', () => {
|
||||
provide: getRepositoryToken(AiAuditLog),
|
||||
useValue: mockAiAuditLogRepo,
|
||||
},
|
||||
{
|
||||
provide: getRepositoryToken(AiExecutionProfile),
|
||||
useValue: mockProfileRepo,
|
||||
},
|
||||
{ provide: AiPromptsService, useValue: mockAiPromptsService },
|
||||
{ provide: OcrCacheService, useValue: mockOcrCacheService },
|
||||
{ provide: VramMonitorService, useValue: mockVramMonitorService },
|
||||
{ provide: AiPolicyService, useValue: mockAiPolicyService },
|
||||
@@ -88,7 +126,7 @@ describe('OcrService Parameter Wiring (T066)', () => {
|
||||
await service.detectAndExtract({
|
||||
pdfPath: '/path/to/test.pdf',
|
||||
documentPublicId: 'doc-123',
|
||||
typhoonOptions: {
|
||||
ocrOptions: {
|
||||
temperature: 0.15,
|
||||
topP: 0.65,
|
||||
repeatPenalty: 1.15,
|
||||
@@ -104,7 +142,7 @@ describe('OcrService Parameter Wiring (T066)', () => {
|
||||
const formData = postCallArgs[1];
|
||||
expect(url).toBe('http://localhost:8765/ocr-upload');
|
||||
expect(formData).toBeInstanceOf(FormData);
|
||||
expect(formData.get('engine')).toBe('typhoon-np-dms-ocr');
|
||||
expect(formData.get('engine')).toBe('np-dms-ocr');
|
||||
expect(formData.get('temperature')).toBe('0.15');
|
||||
expect(formData.get('topP')).toBe('0.65');
|
||||
expect(formData.get('repeatPenalty')).toBe('1.15');
|
||||
|
||||
Reference in New Issue
Block a user