Files
lcbp3/backend/src/modules/ai/processors/ai-batch.processor.spec.ts
T
admin 16aab2279c
CI / CD Pipeline / build (push) Successful in 5m19s
CI / CD Pipeline / deploy (push) Successful in 3m11s
690606:1705 ADR-035-135 #06
2026-06-06 17:05:51 +07:00

617 lines
23 KiB
TypeScript

// File: src/modules/ai/processors/ai-batch.processor.spec.ts
// Change Log
// - 2026-05-21: สร้าง Unit Test สำหรับ AiBatchProcessor ครอบคลุม embed-document และ sandbox-rag (T032).
// - 2026-05-21: เพิ่มการทดสอบ sandbox-extract พร้อม mock OcrService, OllamaService และ Redis (T039).
// - 2026-05-21: แก้ไข ESLint unexpected any และ unsafe member access โดยกำหนด type ให้ redis เป็น Record<string, jest.Mock>
// - 2026-05-22: เพิ่ม Mock dependencies (ProjectRepository, AiAuditLogRepository, TagsService, MigrationService) เพื่อแก้ปัญหา Nest resolve dependency ใน unit test และปรับโครงสร้างฟังก์ชันไม่มีบรรทัดว่าง (Zero Blank Lines) ตามกฎเหล็ก
// - 2026-05-27: เพิ่ม Mock สำหรับ getActive และ resolveContext ของ AiPromptsService เพื่อรองรับ Context-Aware Prompt (T017)
// - 2026-05-28: เพิ่ม test สำหรับ EC-001 (NEW_TAG_SUGGESTED) และ EC-002 (UNRESOLVED_SENDER/RECIPIENT_UUID)
// - 2026-05-29: แก้ไข mockAttachmentRepo เพิ่ม property manager เพื่อรองรับ jest.spyOn ใน EC-001, EC-002, และ migrate-document tests
// - 2026-06-03: ADR-034 — เพิ่ม OCR_JOB_TYPES import, mock unloadModel/loadModel/getOcrModelName, อัปเดต getMainModelName เป็น typhoon2.5, เพิ่ม test ocr-extract model switching
import { Test, TestingModule } from '@nestjs/testing';
import { getRepositoryToken } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import { Job } from 'bullmq';
import {
AiBatchProcessor,
AiBatchJobData,
OCR_JOB_TYPES,
} from './ai-batch.processor';
import { EmbeddingService } from '../services/embedding.service';
import { AiRagService } from '../ai-rag.service';
import { Attachment } from '../../../common/file-storage/entities/attachment.entity';
import { OcrService } from '../services/ocr.service';
import { SandboxOcrEngineService } from '../services/sandbox-ocr-engine.service';
import { OllamaService } from '../services/ollama.service';
import { Project } from '../../project/entities/project.entity';
import { AiAuditLog } from '../entities/ai-audit-log.entity';
import { TagsService } from '../../tags/tags.service';
import { MigrationService } from '../../migration/migration.service';
import { AiPromptsService } from '../prompts/ai-prompts.service';
describe('AiBatchProcessor', () => {
let processor: AiBatchProcessor;
let embeddingService: jest.Mocked<EmbeddingService>;
let ragService: jest.Mocked<AiRagService>;
let ocrService: jest.Mocked<OcrService>;
let sandboxOcrEngineService: jest.Mocked<SandboxOcrEngineService>;
let ollamaService: jest.Mocked<OllamaService>;
let redis: Record<string, jest.Mock>;
let attachmentRepo: jest.Mocked<Repository<Attachment>>;
const DEFAULT_REDIS_TOKEN = 'default_IORedisModuleConnectionToken';
const mockEmbeddingService = {
embedDocument: jest
.fn()
.mockResolvedValue({ success: true, chunksEmbedded: 5 }),
};
const mockRagService = {
processQuery: jest.fn().mockResolvedValue(undefined),
};
const mockOcrService = {
detectAndExtract: jest
.fn()
.mockResolvedValue({ text: 'OCR text LCBP3-CIV-001 Civil' }),
processWithAutoDetect: jest.fn().mockResolvedValue({
text: 'extracted ocr text from document that is long enough to bypass character length check',
}),
};
const mockSandboxOcrEngineService = {
detectAndExtract: jest.fn().mockResolvedValue({
text: 'OCR text LCBP3-CIV-001 Civil',
ocrUsed: true,
engineUsed: 'typhoon-np-dms-ocr',
fallbackUsed: false,
}),
};
const mockOllamaService = {
getMainModelName: jest.fn().mockReturnValue('typhoon2.5-np-dms:latest'),
getOcrModelName: jest.fn().mockReturnValue('typhoon-np-dms-ocr:latest'),
loadModel: jest.fn().mockResolvedValue(true),
unloadModel: jest.fn().mockResolvedValue(true),
generate: jest.fn().mockResolvedValue(
JSON.stringify({
documentNumber: 'LCBP3-CIV-001',
subject: 'Foundation Inspection Report',
discipline: 'Civil',
category: 'Correspondence',
date: '2026-05-20',
confidence: 0.95,
tags: ['foundation'],
summary: 'summary text',
})
),
};
const mockRedis = {
setex: jest.fn().mockResolvedValue('OK'),
get: jest.fn().mockResolvedValue(null),
};
const mockAttachmentRepo = {
findOne: jest.fn().mockResolvedValue({
id: 1,
publicId: 'doc-uuid-123',
filePath: '/files/test.pdf',
uploadedByUserId: 10,
}),
update: jest.fn().mockResolvedValue({ affected: 1 }),
manager: {},
};
const mockProjectRepo = {
findOne: jest.fn().mockResolvedValue({
id: 2,
publicId: 'proj-uuid-456',
}),
};
const mockAiAuditLogRepo = {
create: jest.fn().mockReturnValue({}),
save: jest.fn().mockResolvedValue({}),
};
const mockTagsService = {
findOrCreateTags: jest
.fn()
.mockResolvedValue([
{ id: 5, publicId: 'tag-uuid-999', tagName: 'foundation' },
]),
findOrSuggestTags: jest.fn().mockResolvedValue([
{
tag: { id: 5, publicId: 'tag-uuid-999', tagName: 'foundation' },
isNew: false,
},
]),
};
const mockMigrationService = {
createError: jest.fn().mockResolvedValue(undefined),
enqueueRecord: jest.fn().mockResolvedValue(undefined),
};
const mockAiPromptsService = {
getActive: jest.fn().mockResolvedValue({
id: 1,
promptType: 'ocr_extraction',
versionNumber: 2,
template:
'Resolved test prompt with OCR text {{ocr_text}} and context {{master_data_context}}',
isActive: true,
contextConfig: { filter: {} },
}),
resolveContext: jest.fn().mockResolvedValue({
availableProjects: [],
availableOrganizations: [],
availableDisciplines: [],
availableCorrespondenceTypes: [],
availableTags: [],
}),
resolveActive: jest.fn().mockResolvedValue({
resolvedPrompt: 'Resolved test prompt with OCR text',
versionNumber: 2,
}),
findByVersion: jest.fn().mockResolvedValue(null),
saveTestResult: jest.fn().mockResolvedValue(undefined),
};
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
providers: [
AiBatchProcessor,
{ provide: EmbeddingService, useValue: mockEmbeddingService },
{ provide: AiRagService, useValue: mockRagService },
{ provide: OcrService, useValue: mockOcrService },
{
provide: SandboxOcrEngineService,
useValue: mockSandboxOcrEngineService,
},
{ provide: OllamaService, useValue: mockOllamaService },
{ provide: DEFAULT_REDIS_TOKEN, useValue: mockRedis },
{
provide: getRepositoryToken(Attachment),
useValue: mockAttachmentRepo,
},
{
provide: getRepositoryToken(Project),
useValue: mockProjectRepo,
},
{
provide: getRepositoryToken(AiAuditLog),
useValue: mockAiAuditLogRepo,
},
{ provide: TagsService, useValue: mockTagsService },
{ provide: MigrationService, useValue: mockMigrationService },
{ provide: AiPromptsService, useValue: mockAiPromptsService },
],
}).compile();
processor = module.get<AiBatchProcessor>(AiBatchProcessor);
embeddingService = module.get(EmbeddingService);
ragService = module.get(AiRagService);
ocrService = module.get(OcrService);
sandboxOcrEngineService = module.get(SandboxOcrEngineService);
ollamaService = module.get(OllamaService);
redis = module.get(DEFAULT_REDIS_TOKEN);
attachmentRepo = module.get(getRepositoryToken(Attachment));
jest.clearAllMocks();
});
it('OCR_JOB_TYPES ควรมี ocr-extract เป็นสมาชิก (ADR-034)', () => {
expect(OCR_JOB_TYPES).toContain('ocr-extract');
});
it('ocr-extract: ควร unload main → load OCR (keep_alive:0) → generate → reload main (ADR-034)', async () => {
const job = {
id: 'job-ocr-extract',
data: {
jobType: 'ocr-extract',
documentPublicId: 'doc-ocr-uuid-001',
projectPublicId: 'proj-uuid-456',
payload: { prompt: 'Extract OCR text from this document.' },
idempotencyKey: 'idem-ocr-001',
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(mockOllamaService.unloadModel).toHaveBeenCalledWith(
'typhoon2.5-np-dms:latest'
);
expect(mockOllamaService.loadModel).toHaveBeenCalledWith(
'typhoon-np-dms-ocr:latest',
0
);
expect(mockOllamaService.generate).toHaveBeenCalledWith(
'Extract OCR text from this document.',
expect.objectContaining({
model: 'typhoon-np-dms-ocr:latest',
timeoutMs: 120000,
})
);
expect(mockOllamaService.loadModel).toHaveBeenCalledWith(
'typhoon2.5-np-dms:latest',
-1
);
expect(mockRedis.setex).toHaveBeenCalledWith(
'ai:ocr:result:doc-ocr-uuid-001',
3600,
expect.stringContaining('typhoon-np-dms-ocr:latest')
);
expect(attachmentRepo.update).toHaveBeenCalledWith(
{ publicId: 'doc-ocr-uuid-001' },
{ aiProcessingStatus: 'DONE' }
);
});
it('ควรสามารถเรียก process embed-document และอัปเดตสถานะใน database', async () => {
const job = {
id: 'job-embed',
data: {
jobType: 'embed-document',
documentPublicId: 'doc-uuid-123',
projectPublicId: 'proj-uuid-456',
payload: { pdfPath: '/files/test.pdf' },
idempotencyKey: 'idem-123',
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(ocrService.detectAndExtract).toHaveBeenCalledWith({
pdfPath: '/files/test.pdf',
extractedText: undefined,
documentPublicId: 'doc-uuid-123',
});
expect(embeddingService.embedDocument).toHaveBeenCalledTimes(1);
expect(embeddingService.embedDocument).toHaveBeenCalledWith(
'proj-uuid-456',
'doc-uuid-123',
'doc-uuid-123',
'ATTACHMENT',
'ACTIVE',
1,
'doc-uuid-123',
undefined,
'OCR text LCBP3-CIV-001 Civil'
);
expect(attachmentRepo.update).toHaveBeenCalledWith(
{ publicId: 'doc-uuid-123' },
{ aiProcessingStatus: 'PROCESSING' }
);
expect(attachmentRepo.update).toHaveBeenCalledWith(
{ publicId: 'doc-uuid-123' },
{ aiProcessingStatus: 'DONE' }
);
});
it('ควรประมวลผล sandbox-rag โดยการเรียก ragService.processQuery และข้ามการอัปเดต database', async () => {
const job = {
id: 'job-sandbox',
data: {
jobType: 'sandbox-rag',
documentPublicId: 'idem-sandbox-123',
projectPublicId: 'proj-uuid-456',
payload: {
query: 'ทดสอบคำถาม sandbox RAG',
userPublicId: 'user-uuid-789',
},
idempotencyKey: 'idem-sandbox-123',
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(ragService.processQuery).toHaveBeenCalledTimes(1);
expect(ragService.processQuery).toHaveBeenCalledWith(
'idem-sandbox-123',
'ทดสอบคำถาม sandbox RAG',
'proj-uuid-456',
'user-uuid-789',
expect.any(AbortSignal)
);
expect(attachmentRepo.update).not.toHaveBeenCalled();
});
it('ควรประมวลผล sandbox-extract โดยใช้ OcrService, OllamaService และเก็บค่าลง Redis', async () => {
const job = {
id: 'job-extract',
data: {
jobType: 'sandbox-extract',
documentPublicId: 'idem-extract-123',
projectPublicId: 'proj-uuid-456',
payload: { pdfPath: '/files/test.pdf' },
idempotencyKey: 'idem-extract-123',
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(sandboxOcrEngineService.detectAndExtract).toHaveBeenCalledWith(
'/files/test.pdf',
'auto'
);
expect(ollamaService.generate).toHaveBeenCalledWith(
expect.any(String),
expect.objectContaining({
format: 'json',
timeoutMs: 120000,
})
);
expect(redis.setex).toHaveBeenCalledTimes(2);
expect(redis.setex).toHaveBeenLastCalledWith(
'ai:rag:result:idem-extract-123',
3600,
expect.stringContaining('completed')
);
});
it('sandbox-ai-extract ควร regenerate response ใหม่เมื่อ parse JSON ครั้งแรกล้มเหลว', async () => {
const cachedOcrPayload = {
ocrText: 'OCR text for retry test\u0002\u0000',
ocrUsed: true,
engineUsed: 'typhoon-np-dms-ocr',
fallbackUsed: false,
timestamp: '2026-06-06T15:00:00.000Z',
};
mockRedis.get = jest
.fn()
.mockResolvedValueOnce(JSON.stringify(cachedOcrPayload));
mockAiPromptsService.findByVersion = jest.fn().mockResolvedValue({
id: 1,
promptType: 'ocr_extraction',
versionNumber: 2,
template:
'Resolved test prompt with OCR text {{ocr_text}} and context {{master_data_context}}',
isActive: true,
contextConfig: { filter: {} },
});
mockOllamaService.generate
.mockResolvedValueOnce('{\u0002\u0000')
.mockResolvedValueOnce(
JSON.stringify({
subject: 'Recovered after retry',
confidence: 0.91,
tags: ['retry'],
})
);
const job = {
id: 'job-ai-extract-retry',
data: {
jobType: 'sandbox-ai-extract',
documentPublicId: 'idem-ai-extract-123',
projectPublicId: 'default',
payload: { promptVersion: 2 },
idempotencyKey: 'idem-ai-extract-123',
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(mockOllamaService.generate).toHaveBeenCalledTimes(2);
expect(mockOllamaService.generate).toHaveBeenNthCalledWith(
1,
expect.not.stringContaining('\u0002'),
expect.objectContaining({
format: 'json',
timeoutMs: 120000,
})
);
expect(mockAiPromptsService.saveTestResult).toHaveBeenCalledWith(
'ocr_extraction',
2,
expect.objectContaining({
subject: 'Recovered after retry',
confidence: 0.91,
})
);
expect(mockRedis.setex).toHaveBeenLastCalledWith(
'ai:rag:result:idem-ai-extract-123',
3600,
expect.stringContaining('"llmPrompt"')
);
});
it('EC-001: ควรบันทึก aiIssues เมื่อ AI สกัด Tag ใหม่ที่ไม่มีในระบบ', async () => {
mockTagsService.findOrSuggestTags.mockResolvedValueOnce([
{
tag: { id: 5, publicId: 'tag-uuid-999', tagName: 'foundation' },
isNew: false,
},
{
tag: { id: 99, publicId: 'tag-uuid-new', tagName: 'newlytag' },
isNew: true,
},
]);
const mockManager = {
createQueryBuilder: jest.fn().mockReturnThis(),
select: jest.fn().mockReturnThis(),
from: jest.fn().mockReturnThis(),
where: jest.fn().mockReturnThis(),
getRawOne: jest.fn().mockResolvedValue({ id: 10 }),
};
(mockAttachmentRepo as unknown as { manager: unknown }).manager =
mockManager;
mockProjectRepo.findOne.mockResolvedValue({
id: 2,
publicId: 'proj-uuid-456',
});
const job = {
id: 'job-ec001',
data: {
jobType: 'migrate-document',
documentPublicId: 'doc-uuid-123',
projectPublicId: 'proj-uuid-456',
payload: { documentNumber: 'LEGACY-EC001', title: 'EC001 Title' },
idempotencyKey: 'idem-ec001',
batchId: 'batch-ec001',
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(mockMigrationService.enqueueRecord).toHaveBeenCalledWith(
expect.objectContaining({
aiIssues: expect.arrayContaining([
expect.objectContaining({
type: 'NEW_TAG_SUGGESTED',
tagName: 'newlytag',
}),
]),
})
);
});
it('EC-002: ควรตั้ง isValid=false และบันทึก aiIssues เมื่อ UUID ผู้ส่งไม่พบใน Master Data', async () => {
mockTagsService.findOrSuggestTags.mockResolvedValueOnce([]);
mockOllamaService.generate.mockResolvedValueOnce(
JSON.stringify({
documentNumber: 'LEGACY-EC002',
subject: 'EC002 Subject',
discipline: 'Civil',
category: 'Correspondence',
originatorOrganizationPublicId: 'unknown-org-uuid',
confidence: 0.95,
tags: [],
summary: 'summary',
})
);
const mockManager = {
createQueryBuilder: jest.fn().mockReturnThis(),
select: jest.fn().mockReturnThis(),
from: jest.fn().mockReturnThis(),
where: jest.fn().mockReturnThis(),
getRawOne: jest.fn().mockResolvedValue(null),
};
(mockAttachmentRepo as unknown as { manager: unknown }).manager =
mockManager;
mockProjectRepo.findOne.mockResolvedValue({
id: 2,
publicId: 'proj-uuid-456',
});
const job = {
id: 'job-ec002',
data: {
jobType: 'migrate-document',
documentPublicId: 'doc-uuid-123',
projectPublicId: 'proj-uuid-456',
payload: { documentNumber: 'LEGACY-EC002', title: 'EC002 Title' },
idempotencyKey: 'idem-ec002',
batchId: 'batch-ec002',
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(mockMigrationService.enqueueRecord).toHaveBeenCalledWith(
expect.objectContaining({
isValid: false,
aiIssues: expect.arrayContaining([
expect.objectContaining({
type: 'UNRESOLVED_SENDER_UUID',
uuid: 'unknown-org-uuid',
}),
]),
})
);
});
it('ควรประมวลผล migrate-document โดยจำลอง OCR, AI และเรียก migrationService.enqueueRecord', async () => {
const mockManager = {
createQueryBuilder: jest.fn().mockReturnThis(),
select: jest.fn().mockReturnThis(),
from: jest.fn().mockReturnThis(),
where: jest.fn().mockReturnThis(),
getRawOne: jest.fn().mockResolvedValue({ id: 10 }),
};
(mockAttachmentRepo as unknown as { manager: unknown }).manager =
mockManager;
const job = {
id: 'job-migrate',
data: {
jobType: 'migrate-document',
documentPublicId: 'doc-uuid-123',
projectPublicId: 'proj-uuid-456',
payload: {
documentNumber: 'LEGACY-001',
title: 'Legacy Title',
senderOrgId: 1,
receiverOrgId: 2,
contextOverride: {
contractPublicId: 'contract-uuid-789',
},
},
idempotencyKey: 'idem-migrate-123',
batchId: 'batch-999',
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(attachmentRepo.findOne).toHaveBeenCalledWith({
where: { publicId: 'doc-uuid-123' },
});
expect(ocrService.detectAndExtract).toHaveBeenCalledWith({
pdfPath: '/files/test.pdf',
});
expect(ollamaService.generate).toHaveBeenCalledTimes(1);
expect(mockTagsService.findOrSuggestTags).toHaveBeenCalledTimes(1);
expect(mockMigrationService.enqueueRecord).toHaveBeenCalledWith(
expect.objectContaining({
documentNumber: 'LEGACY-001',
subject: 'Foundation Inspection Report',
category: 'Correspondence',
isValid: true,
confidence: 0.95,
})
);
expect(mockAiPromptsService.resolveContext).toHaveBeenCalledWith(
expect.anything(),
'proj-uuid-456',
'contract-uuid-789'
);
expect(mockAiAuditLogRepo.create).toHaveBeenCalledTimes(1);
expect(mockAiAuditLogRepo.save).toHaveBeenCalledTimes(1);
});
describe('rag-prepare', () => {
it('ควรประมวลผล rag-prepare สำเร็จเมื่อส่ง cachedOcrText มาโดยตรง', async () => {
const job = {
id: 'job-rag-prepare-cached',
data: {
jobType: 'rag-prepare',
documentPublicId: 'doc-uuid-123',
projectPublicId: 'proj-uuid-456',
payload: {
documentPublicId: 'doc-uuid-123',
projectPublicId: 'proj-uuid-456',
correspondenceNumber: 'CORR-001',
docType: 'LETTER',
statusCode: 'IN_REVIEW',
revisionNumber: 1,
subject: 'Test Subject',
cachedOcrText:
'some cached ocr text that is long enough to pass the 50 character limit check',
},
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(embeddingService.embedDocument).toHaveBeenCalledWith(
'proj-uuid-456',
'doc-uuid-123',
'CORR-001',
'LETTER',
'IN_REVIEW',
1,
'Test Subject',
undefined,
'some cached ocr text that is long enough to pass the 50 character limit check'
);
});
it('ควรประมวลผล rag-prepare สำเร็จเมื่อดึงข้อความจากไฟล์แนบผ่าน OCR Service', async () => {
ocrService.detectAndExtract.mockResolvedValueOnce({
text: 'extracted ocr text from document that is long enough to bypass character length check',
ocrUsed: true,
});
const job = {
id: 'job-rag-prepare-ocr',
data: {
jobType: 'rag-prepare',
documentPublicId: 'doc-uuid-123',
projectPublicId: 'proj-uuid-456',
payload: {
documentPublicId: 'doc-uuid-123',
projectPublicId: 'proj-uuid-456',
correspondenceNumber: 'CORR-002',
docType: 'LETTER',
statusCode: 'IN_REVIEW',
revisionNumber: 2,
subject: 'Test OCR Subject',
attachmentPath: '/files/test-ocr.pdf',
},
},
} as unknown as Job<AiBatchJobData>;
await processor.process(job);
expect(ocrService.detectAndExtract).toHaveBeenCalledWith({
pdfPath: '/files/test-ocr.pdf',
});
expect(embeddingService.embedDocument).toHaveBeenCalledWith(
'proj-uuid-456',
'doc-uuid-123',
'CORR-002',
'LETTER',
'IN_REVIEW',
2,
'Test OCR Subject',
undefined,
'extracted ocr text from document that is long enough to bypass character length check'
);
});
});
});