feat(ai): add ADR-036 unified OCR architecture and frontend test coverage
CI / CD Pipeline / build (push) Failing after 6m24s
CI / CD Pipeline / deploy (push) Has been skipped

- Add ADR-036 unified OCR architecture (typhoon-ocr via Ollama)
- Extend AI execution profiles for OCR sandbox configuration
- Add comprehensive frontend test coverage (components, hooks, services)
- Add backend test coverage for document-numbering services
- Update OCR sidecar with typhoon-ocr integration
- Add AI policy service and execution profile management
- Update AGENTS.md and architecture documentation
This commit is contained in:
2026-06-14 06:34:07 +07:00
parent e3503b6a77
commit 7e8f4859cd
108 changed files with 33914 additions and 339 deletions
@@ -9,6 +9,8 @@
// - 2026-05-28: เพิ่ม test สำหรับ EC-001 (NEW_TAG_SUGGESTED) และ EC-002 (UNRESOLVED_SENDER/RECIPIENT_UUID)
// - 2026-05-29: แก้ไข mockAttachmentRepo เพิ่ม property manager เพื่อรองรับ jest.spyOn ใน EC-001, EC-002, และ migrate-document tests
// - 2026-06-03: ADR-034 — เพิ่ม OCR_JOB_TYPES import, mock unloadModel/loadModel/getOcrModelName, อัปเดต getMainModelName เป็น typhoon2.5, เพิ่ม test ocr-extract model switching
// - 2026-06-13: ADR-036 — อัปเดต model switching tests เป็น np-dms-ai/np-dms-ocr
// - 2026-06-13: US5 — Mock AiPolicyService เพื่อให้ผ่านการทดสอบและรองรับ sandbox parameter injection
import { Test, TestingModule } from '@nestjs/testing';
import { getRepositoryToken } from '@nestjs/typeorm';
@@ -30,6 +32,7 @@ import { AiAuditLog } from '../entities/ai-audit-log.entity';
import { TagsService } from '../../tags/tags.service';
import { MigrationService } from '../../migration/migration.service';
import { AiPromptsService } from '../prompts/ai-prompts.service';
import { AiPolicyService } from '../services/ai-policy.service';
describe('AiBatchProcessor', () => {
let processor: AiBatchProcessor;
@@ -61,13 +64,13 @@ describe('AiBatchProcessor', () => {
detectAndExtract: jest.fn().mockResolvedValue({
text: 'OCR text LCBP3-CIV-001 Civil',
ocrUsed: true,
engineUsed: 'typhoon-np-dms-ocr',
engineUsed: 'np-dms-ocr',
fallbackUsed: false,
}),
};
const mockOllamaService = {
getMainModelName: jest.fn().mockReturnValue('typhoon2.5-np-dms:latest'),
getOcrModelName: jest.fn().mockReturnValue('typhoon-np-dms-ocr:latest'),
getMainModelName: jest.fn().mockReturnValue('np-dms-ai:latest'),
getOcrModelName: jest.fn().mockReturnValue('np-dms-ocr:latest'),
loadModel: jest.fn().mockResolvedValue(true),
unloadModel: jest.fn().mockResolvedValue(true),
generate: jest.fn().mockResolvedValue(
@@ -148,6 +151,17 @@ describe('AiBatchProcessor', () => {
findByVersion: jest.fn().mockResolvedValue(null),
saveTestResult: jest.fn().mockResolvedValue(undefined),
};
const mockAiPolicyService = {
getSandboxParameters: jest.fn().mockResolvedValue({
temperature: 0.1,
topP: 0.6,
maxTokens: 4096,
numCtx: 8192,
repeatPenalty: 1.1,
keepAliveSeconds: 0,
canonicalModel: 'np-dms-ai',
}),
};
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
providers: [
@@ -176,6 +190,7 @@ describe('AiBatchProcessor', () => {
{ provide: TagsService, useValue: mockTagsService },
{ provide: MigrationService, useValue: mockMigrationService },
{ provide: AiPromptsService, useValue: mockAiPromptsService },
{ provide: AiPolicyService, useValue: mockAiPolicyService },
],
}).compile();
processor = module.get<AiBatchProcessor>(AiBatchProcessor);
@@ -204,27 +219,27 @@ describe('AiBatchProcessor', () => {
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(mockOllamaService.unloadModel).toHaveBeenCalledWith(
'typhoon2.5-np-dms:latest'
'np-dms-ai:latest'
);
expect(mockOllamaService.loadModel).toHaveBeenCalledWith(
'typhoon-np-dms-ocr:latest',
'np-dms-ocr:latest',
0
);
expect(mockOllamaService.generate).toHaveBeenCalledWith(
'Extract OCR text from this document.',
expect.objectContaining({
model: 'typhoon-np-dms-ocr:latest',
model: 'np-dms-ocr:latest',
timeoutMs: 120000,
})
);
expect(mockOllamaService.loadModel).toHaveBeenCalledWith(
'typhoon2.5-np-dms:latest',
'np-dms-ai:latest',
-1
);
expect(mockRedis.setex).toHaveBeenCalledWith(
'ai:ocr:result:doc-ocr-uuid-001',
3600,
expect.stringContaining('typhoon-np-dms-ocr:latest')
expect.stringContaining('np-dms-ocr:latest')
);
expect(attachmentRepo.update).toHaveBeenCalledWith(
{ publicId: 'doc-ocr-uuid-001' },
@@ -308,7 +323,8 @@ describe('AiBatchProcessor', () => {
await processor.process(job);
expect(sandboxOcrEngineService.detectAndExtract).toHaveBeenCalledWith(
'/files/test.pdf',
'auto'
'auto',
undefined
);
expect(ollamaService.generate).toHaveBeenCalledWith(
expect.any(String),
@@ -328,7 +344,7 @@ describe('AiBatchProcessor', () => {
const cachedOcrPayload = {
ocrText: 'OCR text for retry test\u0002\u0000',
ocrUsed: true,
engineUsed: 'typhoon-np-dms-ocr',
engineUsed: 'np-dms-ocr',
fallbackUsed: false,
timestamp: '2026-06-06T15:00:00.000Z',
};
@@ -518,9 +534,9 @@ describe('AiBatchProcessor', () => {
expect(attachmentRepo.findOne).toHaveBeenCalledWith({
where: { publicId: 'doc-uuid-123' },
});
expect(ocrService.detectAndExtract).toHaveBeenCalledWith({
pdfPath: '/files/test.pdf',
});
expect(ocrService.detectAndExtract).toHaveBeenCalledWith(
expect.objectContaining({ pdfPath: '/files/test.pdf' })
);
expect(ollamaService.generate).toHaveBeenCalledWith(
expect.any(String),
expect.objectContaining({
@@ -605,9 +621,9 @@ describe('AiBatchProcessor', () => {
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(ocrService.detectAndExtract).toHaveBeenCalledWith({
pdfPath: '/files/test-ocr.pdf',
});
expect(ocrService.detectAndExtract).toHaveBeenCalledWith(
expect.objectContaining({ pdfPath: '/files/test-ocr.pdf' })
);
expect(embeddingService.embedDocument).toHaveBeenCalledWith(
'proj-uuid-456',
'doc-uuid-123',
@@ -621,4 +637,108 @@ describe('AiBatchProcessor', () => {
);
});
});
describe('Sandbox Context Parity (US4)', () => {
it('ควรดึง projectPublicId และ contractPublicId จาก payload และส่งต่อให้ resolveContext ใน sandbox-extract', async () => {
const job = {
id: 'job-extract-context',
data: {
jobType: 'sandbox-extract',
documentPublicId: 'idem-extract-context-123',
projectPublicId: 'default',
payload: {
pdfPath: '/files/test.pdf',
projectPublicId: 'proj-uuid-override',
contractPublicId: 'contract-uuid-override',
},
idempotencyKey: 'idem-extract-context-123',
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(mockAiPromptsService.resolveContext).toHaveBeenCalledWith(
expect.any(Object),
'proj-uuid-override',
'contract-uuid-override'
);
});
it('ควรดึง projectPublicId และ contractPublicId จาก payload และส่งต่อให้ resolveContext ใน sandbox-ai-extract', async () => {
const cachedOcrPayload = {
ocrText: 'OCR text for retry test',
ocrUsed: true,
engineUsed: 'np-dms-ocr',
fallbackUsed: false,
timestamp: '2026-06-06T15:00:00.000Z',
};
mockRedis.get = jest
.fn()
.mockResolvedValueOnce(JSON.stringify(cachedOcrPayload));
const job = {
id: 'job-ai-extract-context',
data: {
jobType: 'sandbox-ai-extract',
documentPublicId: 'idem-ai-extract-context-123',
projectPublicId: 'default',
payload: {
promptVersion: 2,
projectPublicId: 'proj-uuid-override',
contractPublicId: 'contract-uuid-override',
},
idempotencyKey: 'idem-ai-extract-context-123',
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(mockAiPromptsService.resolveContext).toHaveBeenCalledWith(
expect.any(Object),
'proj-uuid-override',
'contract-uuid-override'
);
});
});
describe('Dual-Model Snapshot (US5/Phase 8)', () => {
it('ควรดึง ocrSnapshotParams จาก job data และส่งต่อให้ detectAndExtract ใน migrate-document', async () => {
const mockManager = {
createQueryBuilder: jest.fn().mockReturnThis(),
select: jest.fn().mockReturnThis(),
from: jest.fn().mockReturnThis(),
where: jest.fn().mockReturnThis(),
getRawOne: jest.fn().mockResolvedValue({ id: 10 }),
};
(mockAttachmentRepo as unknown as { manager: unknown }).manager =
mockManager;
const job = {
id: 'job-migrate-snapshot',
data: {
jobType: 'migrate-document',
documentPublicId: 'doc-uuid-123',
projectPublicId: 'proj-uuid-456',
payload: {
documentNumber: 'LEGACY-001',
title: 'Legacy Title',
senderOrgId: 1,
receiverOrgId: 2,
},
idempotencyKey: 'idem-migrate-snapshot',
batchId: 'batch-999',
effectiveProfile: 'quality',
ocrSnapshotParams: {
temperature: 0.15,
topP: 0.65,
repeatPenalty: 1.15,
},
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(ocrService.detectAndExtract).toHaveBeenCalledWith({
pdfPath: '/files/test.pdf',
activeProfile: 'quality',
typhoonOptions: {
temperature: 0.15,
topP: 0.65,
repeatPenalty: 1.15,
},
});
});
});
});
@@ -33,6 +33,7 @@ import { OcrService } from '../services/ocr.service';
import {
SandboxOcrEngineService,
SandboxOcrEngineType,
OcrTyphoonOptions,
} from '../services/sandbox-ocr-engine.service';
import {
OllamaService,
@@ -44,6 +45,7 @@ import { TagsService } from '../../tags/tags.service';
import { MigrationService } from '../../migration/migration.service';
import { MigrationErrorType } from '../../migration/entities/migration-error.entity';
import { AiPromptsService } from '../prompts/ai-prompts.service';
import { AiPolicyService } from '../services/ai-policy.service';
import type { ExecutionProfile } from '../interfaces/execution-policy.interface';
interface MigrateDocumentMetadata extends Record<string, unknown> {
@@ -90,11 +92,16 @@ export interface AiBatchJobData {
snapshotParams?: {
temperature: number;
topP: number;
maxTokens: number;
numCtx: number;
maxTokens: number | null;
numCtx: number | null;
repeatPenalty: number;
keepAliveSeconds: number;
};
ocrSnapshotParams?: {
temperature: number;
topP: number;
repeatPenalty: number;
};
}
/** OCR text สูงสุดที่ส่งเข้า LLM prompt — ป้องกัน context overflow (num_ctx 8192, Thai ~3 chars/token) */
@@ -213,6 +220,7 @@ export class AiBatchProcessor extends WorkerHost {
private readonly tagsService: TagsService,
private readonly migrationService: MigrationService,
private readonly aiPromptsService: AiPromptsService,
private readonly aiPolicyService: AiPolicyService,
@InjectRedis() private readonly redis: Redis
) {
super();
@@ -228,7 +236,14 @@ export class AiBatchProcessor extends WorkerHost {
model?: string;
system?: string;
format?: 'json';
ollamaOptions?: { num_ctx?: number; num_predict?: number };
ollamaOptions?: {
num_ctx?: number;
num_predict?: number;
temperature?: number;
top_p?: number;
repeat_penalty?: number;
};
keepAlive?: number;
}
): Promise<{
extractedMetadata: Record<string, unknown>;
@@ -241,6 +256,7 @@ export class AiBatchProcessor extends WorkerHost {
const rawResponse = await this.ollamaService.generate(prompt, {
...options,
options: options.ollamaOptions,
keepAlive: options.keepAlive,
});
const cleanedResponse = sanitizeLlmJsonResponse(rawResponse);
lastRawResponse = rawResponse;
@@ -492,6 +508,7 @@ export class AiBatchProcessor extends WorkerHost {
ocrText = await this.ollamaService.generate(prompt, {
model: ocrModel,
timeoutMs: 120000,
keepAlive: 0,
});
} finally {
this.logger.log(`[ModelSwitch] Reloading ${mainModel} (keep_alive:-1)`);
@@ -519,6 +536,9 @@ export class AiBatchProcessor extends WorkerHost {
const engineType = (payload.engineType as SandboxOcrEngineType) || 'auto';
const overrideProjPublicId =
(payload.projectPublicId as string) || projectPublicId;
const overrideContractPublicId = payload.contractPublicId as
| string
| undefined;
if (!pdfPath) {
throw new Error('pdfPath is required for sandbox-extract job');
}
@@ -531,9 +551,26 @@ export class AiBatchProcessor extends WorkerHost {
})
);
try {
let ocrParams: OcrTyphoonOptions | undefined = undefined;
if (engineType === 'np-dms-ocr') {
try {
const ocrDraft =
await this.aiPolicyService.getSandboxParameters('ocr-extract');
ocrParams = {
temperature: ocrDraft.temperature,
topP: ocrDraft.topP,
repeatPenalty: ocrDraft.repeatPenalty,
};
} catch (err) {
this.logger.warn(
`Failed to fetch sandbox parameters for ocr-extract: ${String(err)}`
);
}
}
const ocrResult = await this.sandboxOcrEngineService.detectAndExtract(
pdfPath,
engineType
engineType,
ocrParams
);
const sanitizedOcrText = sanitizeOcrText(ocrResult.text);
if (sanitizedOcrText.length !== ocrResult.text.length) {
@@ -553,7 +590,8 @@ export class AiBatchProcessor extends WorkerHost {
// ดังนั้นส่ง undefined เพื่อ skip project lookup
const masterDataContext = await this.aiPromptsService.resolveContext(
activePrompt,
overrideProjPublicId === 'default' ? undefined : overrideProjPublicId
overrideProjPublicId === 'default' ? undefined : overrideProjPublicId,
overrideContractPublicId
);
const compactMasterDataContext = JSON.stringify(masterDataContext);
@@ -573,13 +611,45 @@ export class AiBatchProcessor extends WorkerHost {
`Prompt stats: OCR=${ocrTextSafe.length} chars, MasterData=${compactMasterDataContext.length} chars, Total=${resolvedPrompt.length} chars`
);
let sandboxParams;
try {
sandboxParams =
await this.aiPolicyService.getSandboxParameters('standard');
} catch (err) {
this.logger.warn(
`Failed to fetch sandbox parameters for standard: ${String(err)}`
);
}
const generateOptions: {
format: 'json';
timeoutMs: number;
ollamaOptions?: {
num_ctx?: number;
num_predict?: number;
temperature?: number;
top_p?: number;
repeat_penalty?: number;
};
keepAlive?: number;
} = {
format: 'json',
timeoutMs: 120000,
ollamaOptions: {
num_ctx: sandboxParams?.numCtx ?? 16384,
num_predict: sandboxParams?.maxTokens ?? 4096,
temperature: sandboxParams?.temperature,
top_p: sandboxParams?.topP,
repeat_penalty: sandboxParams?.repeatPenalty,
},
};
if (sandboxParams?.keepAliveSeconds !== undefined) {
generateOptions.keepAlive = sandboxParams.keepAliveSeconds;
}
const { extractedMetadata } = await this.generateStructuredJson(
resolvedPrompt,
{
format: 'json',
timeoutMs: 120000,
ollamaOptions: { num_ctx: 16384, num_predict: 4096 }, // num_predict ป้องกัน output ถูก truncate
}
generateOptions
);
await this.aiPromptsService.saveTestResult(
'ocr_extraction',
@@ -641,11 +711,28 @@ export class AiBatchProcessor extends WorkerHost {
})
);
let ocrParams = typhoonOptions;
if (!ocrParams && engineType === 'np-dms-ocr') {
try {
const ocrDraft =
await this.aiPolicyService.getSandboxParameters('ocr-extract');
ocrParams = {
temperature: ocrDraft.temperature,
topP: ocrDraft.topP,
repeatPenalty: ocrDraft.repeatPenalty,
};
} catch (err) {
this.logger.warn(
`Failed to fetch sandbox parameters for ocr-extract: ${String(err)}`
);
}
}
try {
const ocrResult = await this.sandboxOcrEngineService.detectAndExtract(
pdfPath,
engineType,
typhoonOptions
ocrParams
);
const sanitizedOcrText = sanitizeOcrText(ocrResult.text);
if (sanitizedOcrText.length !== ocrResult.text.length) {
@@ -757,9 +844,15 @@ export class AiBatchProcessor extends WorkerHost {
// Resolve context และ run LLM
// Sandbox ใช้ 'default' projectPublicId แต่ไม่ต้องการ override context
// ดังนั้นส่ง undefined เพื่อ skip project lookup
const overrideProjPublicId =
(payload.projectPublicId as string) || projectPublicId;
const overrideContractPublicId = payload.contractPublicId as
| string
| undefined;
const masterDataContext = await this.aiPromptsService.resolveContext(
targetPrompt,
projectPublicId === 'default' ? undefined : projectPublicId
overrideProjPublicId === 'default' ? undefined : overrideProjPublicId,
overrideContractPublicId
);
const compactMasterDataContext = JSON.stringify(masterDataContext);
@@ -777,13 +870,46 @@ export class AiBatchProcessor extends WorkerHost {
this.logger.debug(
`Prompt stats: OCR=${ocrTextSafe.length} chars, MasterData=${compactMasterDataContext.length} chars, Total=${resolvedPrompt.length} chars`
);
let sandboxParams;
try {
sandboxParams =
await this.aiPolicyService.getSandboxParameters('standard');
} catch (err) {
this.logger.warn(
`Failed to fetch sandbox parameters for standard: ${String(err)}`
);
}
const generateOptions: {
format: 'json';
timeoutMs: number;
ollamaOptions?: {
num_ctx?: number;
num_predict?: number;
temperature?: number;
top_p?: number;
repeat_penalty?: number;
};
keepAlive?: number;
} = {
format: 'json',
timeoutMs: 120000,
ollamaOptions: {
num_ctx: sandboxParams?.numCtx ?? 16384,
num_predict: sandboxParams?.maxTokens ?? 4096,
temperature: sandboxParams?.temperature,
top_p: sandboxParams?.topP,
repeat_penalty: sandboxParams?.repeatPenalty,
},
};
if (sandboxParams?.keepAliveSeconds !== undefined) {
generateOptions.keepAlive = sandboxParams.keepAliveSeconds;
}
const { extractedMetadata } = await this.generateStructuredJson(
resolvedPrompt,
{
format: 'json',
timeoutMs: 120000,
ollamaOptions: { num_ctx: 16384, num_predict: 4096 }, // num_predict ป้องกัน output ถูก truncate
}
generateOptions
);
await this.aiPromptsService.saveTestResult(
@@ -941,6 +1067,7 @@ export class AiBatchProcessor extends WorkerHost {
ocrResult = await this.ocrService.detectAndExtract({
pdfPath: attachment.filePath,
activeProfile: job.data.effectiveProfile,
typhoonOptions: job.data.ocrSnapshotParams,
});
} catch (err: unknown) {
const errMsg = err instanceof Error ? err.message : String(err);
@@ -996,8 +1123,8 @@ export class AiBatchProcessor extends WorkerHost {
generateOptions.options = {
temperature: snapshotParams.temperature,
top_p: snapshotParams.topP,
num_predict: snapshotParams.maxTokens,
num_ctx: snapshotParams.numCtx,
num_predict: snapshotParams.maxTokens ?? undefined,
num_ctx: snapshotParams.numCtx ?? undefined,
repeat_penalty: snapshotParams.repeatPenalty,
};
generateOptions.keepAlive = snapshotParams.keepAliveSeconds;