// File: backend/src/modules/ai/ai-rag-pipeline.integration.spec.ts // Change Log: // - 2026-06-05: สร้าง integration test สำหรับ RAG Pipeline end-to-end (SC-002, Gap fix) // ครอบคลุม: enqueueRagPrepare jobId dedup, EmbeddingService pipeline, project isolation import { Test, TestingModule } from '@nestjs/testing'; import { ConfigService } from '@nestjs/config'; import { getQueueToken } from '@nestjs/bullmq'; import { AiQueueService, RagPrepareJobPayload } from './ai-queue.service'; import { EmbeddingService } from './services/embedding.service'; import { OllamaService } from './services/ollama.service'; import { OcrService } from './services/ocr.service'; import { AiQdrantService } from './qdrant.service'; import { AiPromptsService } from './prompts/ai-prompts.service'; import { QUEUE_AI_INGEST, QUEUE_AI_RAG, QUEUE_AI_VECTOR_DELETION, QUEUE_AI_BATCH, } from '../common/constants/queue.constants'; // ──────────────────────────────────────────────────────────────────────────────── // Mock helpers // ──────────────────────────────────────────────────────────────────────────────── /** สร้าง mock BullMQ Queue ที่ track jobId เพื่อ verify deduplication */ const createMockQueue = () => { return { add: jest .fn() .mockImplementation( (name: string, data: unknown, opts: { jobId?: string } = {}) => Promise.resolve({ id: opts.jobId ?? 'auto-id' }) ), }; }; /** สร้าง mock EmbeddingService dependencies */ const buildEmbeddingModule = async ( ollamaGenerateResponse: string, chunkSize = 512, chunkOverlap = 64 ) => { const mockOllamaService = { generate: jest.fn().mockResolvedValue(ollamaGenerateResponse), }; const mockAiPromptsService = { resolveActive: jest.fn().mockResolvedValue({ resolvedPrompt: 'แบ่ง OCR text ออกเป็น chunks', versionNumber: 1, }), }; const mockConfigService = { get: jest.fn((key: string, def?: unknown) => { const vals: Record = { EMBEDDING_CHUNK_SIZE: chunkSize, EMBEDDING_CHUNK_OVERLAP: chunkOverlap, }; return vals[key] ?? def; }), }; const mockEmbedViaSidecar = jest.fn().mockResolvedValue({ dense: Array(1024).fill(0.1), sparse: { indices: [10, 20], values: [0.8, 0.4] }, }); const mockDeleteByDocumentPublicId = jest.fn().mockResolvedValue(undefined); const mockUpsert = jest.fn().mockResolvedValue(undefined); const module: TestingModule = await Test.createTestingModule({ providers: [ EmbeddingService, { provide: ConfigService, useValue: mockConfigService }, { provide: OllamaService, useValue: mockOllamaService }, { provide: AiQdrantService, useValue: { deleteByDocumentPublicId: mockDeleteByDocumentPublicId, upsert: mockUpsert, }, }, { provide: OcrService, useValue: { embedViaSidecar: mockEmbedViaSidecar }, }, { provide: AiPromptsService, useValue: mockAiPromptsService }, ], }).compile(); return { service: module.get(EmbeddingService), mockEmbedViaSidecar, mockDeleteByDocumentPublicId, mockUpsert, mockOllamaService, }; }; // ──────────────────────────────────────────────────────────────────────────────── describe('RAG Pipeline — Integration (SC-002 / Gap fixes)', () => { // ────────────────────────────────────────────────────────────────────────────── // Test Group 1: BullMQ Job Deduplication (Gap 1 verify) // ────────────────────────────────────────────────────────────────────────────── describe('enqueueRagPrepare — jobId deduplication', () => { let queueService: AiQueueService; let mockBatchQueue: ReturnType; beforeEach(async () => { mockBatchQueue = createMockQueue(); const module: TestingModule = await Test.createTestingModule({ providers: [ AiQueueService, { provide: getQueueToken(QUEUE_AI_INGEST), useValue: { add: jest.fn() }, }, { provide: getQueueToken(QUEUE_AI_RAG), useValue: { add: jest.fn() }, }, { provide: getQueueToken(QUEUE_AI_VECTOR_DELETION), useValue: { add: jest.fn() }, }, { provide: getQueueToken(QUEUE_AI_BATCH), useValue: mockBatchQueue }, ], }).compile(); queueService = module.get(AiQueueService); }); it('ควรสร้าง jobId = rag-prepare:{documentPublicId}:{revisionNumber} (SC-004 dedup)', async () => { const payload: RagPrepareJobPayload = { documentPublicId: 'doc-uuid-001', projectPublicId: 'proj-uuid-abc', correspondenceNumber: 'CORR-2026-001', docType: 'LETTER', statusCode: 'SUBOWN', revisionNumber: 1, subject: 'เอกสารทดสอบ Dedup', }; await queueService.enqueueRagPrepare(payload); const calls = mockBatchQueue.add.mock.calls as [ string, unknown, { jobId?: string }, ][]; expect(calls[0][2]?.jobId).toBe('rag-prepare:doc-uuid-001:1'); }); it('ควร enqueue ด้วยชื่อ job rag-prepare และ payload ครบ', async () => { const payload: RagPrepareJobPayload = { documentPublicId: 'doc-uuid-002', projectPublicId: 'proj-uuid-xyz', correspondenceNumber: 'CORR-2026-002', docType: 'RFA', statusCode: 'CLBOWN', revisionNumber: 0, subject: 'RFA Test', documentDate: '2026-06-05', attachmentPath: '/files/rfa.pdf', }; await queueService.enqueueRagPrepare(payload); expect(mockBatchQueue.add).toHaveBeenCalledWith( 'rag-prepare', expect.objectContaining({ jobType: 'rag-prepare', documentPublicId: 'doc-uuid-002', revisionNumber: 0, }), expect.objectContaining({ jobId: 'rag-prepare:doc-uuid-002:0', attempts: 3, }) ); }); it('ควรคืน jobId เดิมเมื่อ enqueue revision เดียวกัน 2 ครั้ง (idempotency)', async () => { const payload: RagPrepareJobPayload = { documentPublicId: 'doc-same', projectPublicId: 'proj-same', correspondenceNumber: 'CORR-SAME', docType: 'LETTER', statusCode: 'SUBOWN', revisionNumber: 3, subject: 'Idempotency Test', }; const id1 = await queueService.enqueueRagPrepare(payload); const id2 = await queueService.enqueueRagPrepare(payload); // jobId เหมือนกัน — BullMQ จะ deduplicate ที่ server side expect(id1).toBe(id2); const calls = mockBatchQueue.add.mock.calls as [ string, unknown, { jobId?: string }, ][]; expect(calls[0][2]?.jobId).toBe(calls[1][2]?.jobId); }); }); // ────────────────────────────────────────────────────────────────────────────── // Test Group 2: processRagPrepare → EmbeddingService pipeline (SC-002) // ────────────────────────────────────────────────────────────────────────────── describe('EmbeddingService.embedDocument — full pipeline (SC-002)', () => { const semanticLlmResponse = 'เนื้อหาบทนำของเอกสารที่มีความยาวเพียงพอสำหรับการทดสอบ' + 'เนื้อหารายละเอียดของเอกสารฉบับนี้ครอบคลุมหัวข้อสำคัญ'; const ocrText = 'เนื้อหาเอกสารที่มีความยาวเกิน 50 ตัวอักษร สำหรับทดสอบ RAG pipeline integration test ครบ pipeline'; it('SC-002: ควรเรียก Sidecar /embed และ Qdrant upsert สำหรับ semantic chunks', async () => { const { service, mockEmbedViaSidecar, mockDeleteByDocumentPublicId, mockUpsert, } = await buildEmbeddingModule(semanticLlmResponse); const result = await service.embedDocument( 'proj-uuid-123', 'doc-uuid-456', 'CORR-2026-001', 'LETTER', 'SUBOWN', 1, 'Test Subject', '2026-06-05', ocrText ); // ตรวจสอบว่า Sidecar /embed ถูกเรียกสำหรับแต่ละ semantic chunk (2 chunks) expect(mockEmbedViaSidecar).toHaveBeenCalledTimes(2); // ตรวจสอบว่าลบ points เก่าก่อน upsert (delete-before-upsert) expect(mockDeleteByDocumentPublicId).toHaveBeenCalledWith( 'proj-uuid-123', 'doc-uuid-456' ); // ตรวจสอบ upsert payload ครบ 11 fields expect(mockUpsert).toHaveBeenCalledWith( 'proj-uuid-123', expect.arrayContaining([ expect.objectContaining({ payload: expect.objectContaining({ doc_public_id: 'doc-uuid-456', project_public_id: 'proj-uuid-123', doc_number: 'CORR-2026-001', doc_type: 'LETTER', status_code: 'SUBOWN', revision_number: 1, subject: 'Test Subject', document_date: '2026-06-05', }), }), ]) ); expect(result.success).toBe(true); expect(result.chunksEmbedded).toBe(2); }); it('SC-003: project isolation — upsert และ delete ต้องใช้ projectPublicId ที่ถูกต้อง', async () => { const { service, mockDeleteByDocumentPublicId, mockUpsert } = await buildEmbeddingModule(semanticLlmResponse); await service.embedDocument( 'proj-ISOLATED-999', 'doc-iso', 'CORR-ISO', 'LETTER', 'SUBOWN', 0, 'Subject', undefined, ocrText ); // deleteByDocumentPublicId ต้องใช้ projectPublicId ที่ถูกต้อง expect(mockDeleteByDocumentPublicId).toHaveBeenCalledWith( 'proj-ISOLATED-999', 'doc-iso' ); // upsert ต้องส่ง projectPublicId ที่ถูกต้องเป็น arg แรก const upsertCalls = mockUpsert.mock.calls as [string, unknown][]; expect(upsertCalls[0][0]).toBe('proj-ISOLATED-999'); }); it('SC-006: ลำดับ delete → upsert ต้องถูกต้องเสมอ (ป้องกัน stale chunks)', async () => { const callOrder: string[] = []; const { service, mockDeleteByDocumentPublicId, mockUpsert } = await buildEmbeddingModule(semanticLlmResponse); mockDeleteByDocumentPublicId.mockImplementationOnce(() => { callOrder.push('delete'); }); mockUpsert.mockImplementationOnce(() => { callOrder.push('upsert'); }); await service.embedDocument( 'proj-x', 'doc-stale', 'CORR-X', 'LETTER', 'SUBOWN', 2, 'Sub', undefined, ocrText ); // ตรวจสอบลำดับ: delete ต้องเกิดก่อน upsert เสมอ (SC-006) expect(callOrder).toEqual(['delete', 'upsert']); }); it('ควรคืน success=false เมื่อ ocrText ว่าง (edge case — skip guard)', async () => { const { service } = await buildEmbeddingModule(semanticLlmResponse); const result = await service.embedDocument( 'proj-x', 'doc-empty', 'CORR-X', 'LETTER', 'SUBOWN', 1, 'Sub', undefined, '' ); expect(result.success).toBe(false); expect(result.error).toContain('No OCR text'); }); }); // ────────────────────────────────────────────────────────────────────────────── // Test Group 3: Semantic Chunking fallback → fixed-size (FR-005) // ────────────────────────────────────────────────────────────────────────────── describe('Semantic Chunking fallback (FR-005)', () => { it('ควร fallback เป็น fixed-size และยังคง embed ได้ เมื่อ LLM output ไม่มี tag', async () => { const { service, mockEmbedViaSidecar, mockUpsert } = await buildEmbeddingModule( 'ไม่มี tag chunk เลย — plain text output', 60, 0 ); const ocrText = 'ก'.repeat(80); // 80 chars → 2 chunks (60 + 20 chars) const result = await service.embedDocument( 'proj-fallback', 'doc-fallback', 'CORR-FB', 'LETTER', 'SUBOWN', 1, 'Fallback', undefined, ocrText ); // fallback ยังต้อง embed ได้ expect(result.success).toBe(true); expect(result.chunksEmbedded).toBeGreaterThan(0); expect(mockEmbedViaSidecar).toHaveBeenCalled(); // ตรวจสอบว่า chunk_topic มาจาก fixed-size (ขึ้นต้นด้วย "ส่วนที่") // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access const upsertPoints = mockUpsert.mock.calls[0]?.[1] as Array<{ payload: { chunk_topic: string }; }>; expect(upsertPoints[0]?.payload.chunk_topic).toMatch(/ส่วนที่/); }); it('ควร fallback ทันทีเมื่อ LLM throw error', async () => { const { service, mockUpsert, mockOllamaService } = await buildEmbeddingModule('', 60, 0); mockOllamaService.generate.mockRejectedValueOnce( new Error('Ollama timeout') ); const ocrText = 'ก'.repeat(80); const result = await service.embedDocument( 'proj-err', 'doc-err', 'CORR-ERR', 'LETTER', 'SUBOWN', 1, 'Sub', undefined, ocrText ); // ถึงแม้ LLM throw แต่ fallback ยังทำงาน expect(result.success).toBe(true); expect(mockUpsert).toHaveBeenCalled(); }); }); });