feat(rfa-ai): Complete RFA Approval Refactor and AI Model Revision
CI / CD Pipeline / build (push) Successful in 4m54s
CI / CD Pipeline / deploy (push) Failing after 12m9s

This commit is contained in:
2026-05-16 10:59:53 +07:00
parent 6cb3ae10ee
commit 1a162bf320
105 changed files with 5088 additions and 1083 deletions
@@ -0,0 +1,166 @@
// File: src/modules/ai/services/embedding.service.ts
// Change Log
// - 2026-05-15: เพิ่ม EmbeddingService สำหรับ full-document chunked embedding ตาม ADR-023A T021.
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { OllamaService } from './ollama.service';
import { AiQdrantService } from '../qdrant.service';
import { OcrService } from './ocr.service';
export interface EmbeddingChunk {
chunkIndex: number;
text: string;
pageNumber?: number;
}
export interface EmbeddingResult {
success: boolean;
chunksEmbedded: number;
error?: string;
}
/** บริการสร้าง embedding สำหรับ full-document RAG (ADR-023A) */
@Injectable()
export class EmbeddingService {
private readonly logger = new Logger(EmbeddingService.name);
private readonly chunkSize: number;
private readonly overlap: number;
constructor(
private readonly configService: ConfigService,
private readonly ollamaService: OllamaService,
private readonly qdrantService: AiQdrantService,
private readonly ocrService: OcrService
) {
this.chunkSize = this.configService.get<number>(
'EMBEDDING_CHUNK_SIZE',
512
);
this.overlap = this.configService.get<number>(
'EMBEDDING_CHUNK_OVERLAP',
64
);
}
/**
* สร้าง embedding สำหรับเอกสารทั้งฉบับ:
* 1. ดึงข้อความ full-doc (ใช้ extractedText หรือ OCR)
* 2. Chunk text 512 tokens / 64 overlap
* 3. Generate embedding ต่อ chunk ด้วย nomic-embed-text
* 4. Upsert ไป Qdrant พร้อม project isolation
*/
async embedDocument(
pdfPath: string,
documentPublicId: string,
projectPublicId: string,
extractedText?: string
): Promise<EmbeddingResult> {
try {
// 1. ดึงข้อความจาก PDF (ใช้ extractedText ถ้ามี หรือเรียก OCR)
let fullText = extractedText;
if (!fullText) {
const ocrResult = await this.ocrService.detectAndExtract({
pdfPath,
extractedText: '',
extractedChars: 0,
});
fullText = ocrResult.text;
}
if (!fullText || fullText.trim().length === 0) {
this.logger.warn(`No text extracted from document ${documentPublicId}`);
return {
success: false,
chunksEmbedded: 0,
error: 'No text extracted',
};
}
// 2. Chunk text
const chunks = this.chunkText(fullText);
this.logger.log(
`Document ${documentPublicId} split into ${chunks.length} chunks`
);
// 3. Generate embedding และ upsert ไป Qdrant
const points = [];
for (const chunk of chunks) {
try {
const embedding = await this.ollamaService.generateEmbedding(
chunk.text
);
points.push({
id: `${documentPublicId}-${chunk.chunkIndex}`,
vector: embedding,
payload: {
document_public_id: documentPublicId,
chunk_index: chunk.chunkIndex,
page_number: chunk.pageNumber,
chunk_text: chunk.text,
embedded_at: new Date().toISOString(),
},
});
} catch (err) {
this.logger.error(
`Failed to embed chunk ${chunk.chunkIndex} for document ${documentPublicId}`,
err instanceof Error ? err.message : String(err)
);
}
}
if (points.length === 0) {
return {
success: false,
chunksEmbedded: 0,
error: 'All chunks failed to embed',
};
}
// 4. Upsert ไป Qdrant พร้อม project isolation
await this.qdrantService.upsert(projectPublicId, points);
this.logger.log(
`Successfully embedded ${points.length} chunks for document ${documentPublicId} in project ${projectPublicId}`
);
return { success: true, chunksEmbedded: points.length };
} catch (err) {
const errorMsg = err instanceof Error ? err.message : String(err);
this.logger.error(
`Embedding failed for document ${documentPublicId}: ${errorMsg}`
);
return { success: false, chunksEmbedded: 0, error: errorMsg };
}
}
/**
* Chunk text ด้วย overlap
* - chunkSize: 512 characters (approximate token equivalent)
* - overlap: 64 characters
*/
private chunkText(text: string): EmbeddingChunk[] {
const chunks: EmbeddingChunk[] = [];
const cleanText = text.replace(/\s+/g, ' ').trim();
const textLength = cleanText.length;
let startIndex = 0;
let chunkIndex = 0;
while (startIndex < textLength) {
const endIndex = Math.min(startIndex + this.chunkSize, textLength);
const chunkText = cleanText.substring(startIndex, endIndex);
chunks.push({
chunkIndex,
text: chunkText,
pageNumber: undefined, // TODO: Extract page numbers if available
});
startIndex += this.chunkSize - this.overlap;
chunkIndex += 1;
}
return chunks;
}
}
@@ -0,0 +1,130 @@
// File: backend/src/modules/ai/services/migration.service.ts
// บันทึกการแก้ไข: สร้าง MigrationService สำหรับ Legacy Migration (T030) ตาม ADR-023A
import {
Injectable,
Logger,
BadRequestException,
NotFoundException,
} from '@nestjs/common';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository, DataSource } from 'typeorm';
import { InjectQueue } from '@nestjs/bullmq';
import { Queue } from 'bullmq';
import {
MigrationReviewRecord,
MigrationReviewRecordStatus,
} from '../entities/migration-review.entity';
import { MigrationQueueItemDto } from '../dto/migration-queue-item.dto';
import { User } from '../../user/entities/user.entity';
@Injectable()
export class MigrationService {
private readonly logger = new Logger(MigrationService.name);
constructor(
@InjectRepository(MigrationReviewRecord)
private readonly migrationRepo: Repository<MigrationReviewRecord>,
@InjectQueue('ai-batch')
private readonly aiBatchQueue: Queue,
private readonly dataSource: DataSource
) {}
/**
* Queue a legacy document for human review and AI extraction
*/
async queueForReview(dto: MigrationQueueItemDto, idempotencyKey: string) {
this.logger.log(
`📥 Queuing legacy document for review: ${dto.filename} (Batch: ${dto.batchId})`
);
// 1. Check idempotency
const existing = await this.migrationRepo.findOne({
where: { idempotencyKey },
});
if (existing) {
return existing;
}
// 2. Create pending record
const record = this.migrationRepo.create({
batchId: dto.batchId,
idempotencyKey: idempotencyKey,
originalFilename: dto.filename,
storageTempPath: dto.tempPath,
status: MigrationReviewRecordStatus.PENDING,
aiMetadataJson: {}, // Will be updated by AI processor
confidenceScore: 0,
});
const saved = await this.migrationRepo.save(record);
// 3. Queue AI processing (OCR + Metadata Extraction)
await this.aiBatchQueue.add('extract-metadata', {
migrationQueuePublicId: saved.publicId,
tempPath: dto.tempPath,
filename: dto.filename,
projectPublicId: dto.projectPublicId,
});
return saved;
}
/**
* Get all migration queue items with pagination
*/
async findAll(page = 1, limit = 20, status?: string) {
const query = this.migrationRepo
.createQueryBuilder('q')
.orderBy('q.createdAt', 'DESC')
.skip((page - 1) * limit)
.take(limit);
if (status) {
query.andWhere('q.status = :status', { status });
}
const [items, total] = await query.getManyAndCount();
return { items, total, page, limit };
}
/**
* Approve a migration item and import it as a real document
*/
async approve(publicId: string, user: User) {
const item = await this.migrationRepo.findOne({ where: { publicId } });
if (!item) throw new NotFoundException('Migration item not found');
if (item.status !== MigrationReviewRecordStatus.PENDING)
throw new BadRequestException(
`Cannot approve item in status ${item.status}`
);
this.logger.log(
`✅ Approving migration item: ${item.originalFilename} (uuid: ${publicId})`
);
// TODO: Implement actual document import logic here in US3 Phase 5
// This will involve calling FileStorageService, CorrespondenceService, etc.
item.status = MigrationReviewRecordStatus.IMPORTED;
item.reviewedBy = user.user_id;
item.reviewedAt = new Date();
return this.migrationRepo.save(item);
}
/**
* Reject a migration item
*/
async reject(publicId: string, user: User, reason: string) {
const item = await this.migrationRepo.findOne({ where: { publicId } });
if (!item) throw new NotFoundException('Migration item not found');
item.status = MigrationReviewRecordStatus.REJECTED;
item.reviewedBy = user.user_id;
item.reviewedAt = new Date();
item.rejectionReason = reason;
return this.migrationRepo.save(item);
}
}
@@ -0,0 +1,66 @@
// File: src/modules/ai/services/ocr.service.ts
// Change Log
// - 2026-05-15: เพิ่ม OCR auto-detection service สำหรับ ADR-023A.
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import axios from 'axios';
export interface OcrDetectionInput {
extractedText?: string;
extractedChars?: number;
pdfPath?: string;
}
export interface OcrDetectionResult {
text: string;
ocrUsed: boolean;
}
interface PaddleOcrResponse {
text?: string;
}
/** บริการเลือก fast path หรือ PaddleOCR sidecar ตามจำนวนตัวอักษรที่ extract ได้ */
@Injectable()
export class OcrService {
private readonly logger = new Logger(OcrService.name);
private readonly threshold: number;
private readonly ocrApiUrl: string;
constructor(private readonly configService: ConfigService) {
this.threshold = this.configService.get<number>('OCR_CHAR_THRESHOLD', 100);
this.ocrApiUrl = this.configService.get<string>(
'OCR_API_URL',
'http://localhost:8765'
);
}
/** ตรวจสอบ text layer ก่อนเลือก OCR slow path */
async detectAndExtract(
input: OcrDetectionInput
): Promise<OcrDetectionResult> {
const extractedText = input.extractedText ?? '';
const extractedChars = input.extractedChars ?? extractedText.length;
if (extractedChars > this.threshold) {
return { text: extractedText, ocrUsed: false };
}
if (!input.pdfPath) {
this.logger.warn('OCR slow path skipped because pdfPath is missing');
return { text: extractedText, ocrUsed: false };
}
const response = await axios.post<PaddleOcrResponse>(
`${this.ocrApiUrl}/ocr`,
{ pdfPath: input.pdfPath },
{ timeout: 90000 }
);
return {
text: response.data.text ?? '',
ocrUsed: true,
};
}
}
@@ -0,0 +1,94 @@
// File: src/modules/ai/services/ollama.service.ts
// Change Log
// - 2026-05-15: เพิ่ม Ollama service สำหรับ ADR-023A 2-model stack.
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import axios from 'axios';
export interface OllamaGenerateOptions {
timeoutMs?: number;
signal?: AbortSignal;
}
/** บริการเรียก Ollama local-only บน Admin Desktop ตาม ADR-023A */
@Injectable()
export class OllamaService {
private readonly logger = new Logger(OllamaService.name);
private readonly ollamaUrl: string;
private readonly mainModel: string;
private readonly embedModel: string;
private readonly timeoutMs: number;
constructor(private readonly configService: ConfigService) {
this.ollamaUrl = this.configService.get<string>(
'OLLAMA_URL',
this.configService.get<string>('AI_HOST_URL', 'http://localhost:11434')
);
this.mainModel = this.configService.get<string>(
'OLLAMA_MODEL_MAIN',
'gemma4:e4b'
);
this.embedModel = this.configService.get<string>(
'OLLAMA_MODEL_EMBED',
this.configService.get<string>('OLLAMA_EMBED_MODEL', 'nomic-embed-text')
);
this.timeoutMs = this.configService.get<number>('AI_TIMEOUT_MS', 30000);
}
/** สร้างข้อความตอบกลับจาก gemma4:e4b หรือค่า ENV ที่กำหนด */
async generate(
prompt: string,
options: OllamaGenerateOptions = {}
): Promise<string> {
try {
const response = await axios.post<{ response: string }>(
`${this.ollamaUrl}/api/generate`,
{
model: this.mainModel,
prompt,
stream: false,
},
{
timeout: options.timeoutMs ?? this.timeoutMs,
signal: options.signal,
}
);
return response.data.response ?? '';
} catch (err) {
this.logger.error(
'Ollama generate failed',
err instanceof Error ? err.stack : String(err)
);
throw err;
}
}
/** สร้าง embedding ด้วย nomic-embed-text หรือค่า ENV ที่กำหนด */
async generateEmbedding(text: string): Promise<number[]> {
try {
const response = await axios.post<{ embedding: number[] }>(
`${this.ollamaUrl}/api/embeddings`,
{ model: this.embedModel, prompt: text },
{ timeout: this.timeoutMs }
);
return response.data.embedding;
} catch (err) {
this.logger.error(
'Ollama embedding failed',
err instanceof Error ? err.stack : String(err)
);
throw err;
}
}
/** คืนชื่อ main model สำหรับ audit log */
getMainModelName(): string {
return this.mainModel;
}
/** คืนชื่อ embedding model สำหรับ audit log */
getEmbeddingModelName(): string {
return this.embedModel;
}
}