feat(rfa-ai): Complete RFA Approval Refactor and AI Model Revision
This commit is contained in:
@@ -0,0 +1,166 @@
|
||||
// File: src/modules/ai/services/embedding.service.ts
|
||||
// Change Log
|
||||
// - 2026-05-15: เพิ่ม EmbeddingService สำหรับ full-document chunked embedding ตาม ADR-023A T021.
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { OllamaService } from './ollama.service';
|
||||
import { AiQdrantService } from '../qdrant.service';
|
||||
import { OcrService } from './ocr.service';
|
||||
|
||||
export interface EmbeddingChunk {
|
||||
chunkIndex: number;
|
||||
text: string;
|
||||
pageNumber?: number;
|
||||
}
|
||||
|
||||
export interface EmbeddingResult {
|
||||
success: boolean;
|
||||
chunksEmbedded: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/** บริการสร้าง embedding สำหรับ full-document RAG (ADR-023A) */
|
||||
@Injectable()
|
||||
export class EmbeddingService {
|
||||
private readonly logger = new Logger(EmbeddingService.name);
|
||||
private readonly chunkSize: number;
|
||||
private readonly overlap: number;
|
||||
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
private readonly ollamaService: OllamaService,
|
||||
private readonly qdrantService: AiQdrantService,
|
||||
private readonly ocrService: OcrService
|
||||
) {
|
||||
this.chunkSize = this.configService.get<number>(
|
||||
'EMBEDDING_CHUNK_SIZE',
|
||||
512
|
||||
);
|
||||
this.overlap = this.configService.get<number>(
|
||||
'EMBEDDING_CHUNK_OVERLAP',
|
||||
64
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* สร้าง embedding สำหรับเอกสารทั้งฉบับ:
|
||||
* 1. ดึงข้อความ full-doc (ใช้ extractedText หรือ OCR)
|
||||
* 2. Chunk text 512 tokens / 64 overlap
|
||||
* 3. Generate embedding ต่อ chunk ด้วย nomic-embed-text
|
||||
* 4. Upsert ไป Qdrant พร้อม project isolation
|
||||
*/
|
||||
async embedDocument(
|
||||
pdfPath: string,
|
||||
documentPublicId: string,
|
||||
projectPublicId: string,
|
||||
extractedText?: string
|
||||
): Promise<EmbeddingResult> {
|
||||
try {
|
||||
// 1. ดึงข้อความจาก PDF (ใช้ extractedText ถ้ามี หรือเรียก OCR)
|
||||
let fullText = extractedText;
|
||||
if (!fullText) {
|
||||
const ocrResult = await this.ocrService.detectAndExtract({
|
||||
pdfPath,
|
||||
extractedText: '',
|
||||
extractedChars: 0,
|
||||
});
|
||||
fullText = ocrResult.text;
|
||||
}
|
||||
|
||||
if (!fullText || fullText.trim().length === 0) {
|
||||
this.logger.warn(`No text extracted from document ${documentPublicId}`);
|
||||
return {
|
||||
success: false,
|
||||
chunksEmbedded: 0,
|
||||
error: 'No text extracted',
|
||||
};
|
||||
}
|
||||
|
||||
// 2. Chunk text
|
||||
const chunks = this.chunkText(fullText);
|
||||
this.logger.log(
|
||||
`Document ${documentPublicId} split into ${chunks.length} chunks`
|
||||
);
|
||||
|
||||
// 3. Generate embedding และ upsert ไป Qdrant
|
||||
const points = [];
|
||||
for (const chunk of chunks) {
|
||||
try {
|
||||
const embedding = await this.ollamaService.generateEmbedding(
|
||||
chunk.text
|
||||
);
|
||||
points.push({
|
||||
id: `${documentPublicId}-${chunk.chunkIndex}`,
|
||||
vector: embedding,
|
||||
payload: {
|
||||
document_public_id: documentPublicId,
|
||||
chunk_index: chunk.chunkIndex,
|
||||
page_number: chunk.pageNumber,
|
||||
chunk_text: chunk.text,
|
||||
embedded_at: new Date().toISOString(),
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
`Failed to embed chunk ${chunk.chunkIndex} for document ${documentPublicId}`,
|
||||
err instanceof Error ? err.message : String(err)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (points.length === 0) {
|
||||
return {
|
||||
success: false,
|
||||
chunksEmbedded: 0,
|
||||
error: 'All chunks failed to embed',
|
||||
};
|
||||
}
|
||||
|
||||
// 4. Upsert ไป Qdrant พร้อม project isolation
|
||||
await this.qdrantService.upsert(projectPublicId, points);
|
||||
|
||||
this.logger.log(
|
||||
`Successfully embedded ${points.length} chunks for document ${documentPublicId} in project ${projectPublicId}`
|
||||
);
|
||||
|
||||
return { success: true, chunksEmbedded: points.length };
|
||||
} catch (err) {
|
||||
const errorMsg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.error(
|
||||
`Embedding failed for document ${documentPublicId}: ${errorMsg}`
|
||||
);
|
||||
return { success: false, chunksEmbedded: 0, error: errorMsg };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk text ด้วย overlap
|
||||
* - chunkSize: 512 characters (approximate token equivalent)
|
||||
* - overlap: 64 characters
|
||||
*/
|
||||
private chunkText(text: string): EmbeddingChunk[] {
|
||||
const chunks: EmbeddingChunk[] = [];
|
||||
const cleanText = text.replace(/\s+/g, ' ').trim();
|
||||
const textLength = cleanText.length;
|
||||
|
||||
let startIndex = 0;
|
||||
let chunkIndex = 0;
|
||||
|
||||
while (startIndex < textLength) {
|
||||
const endIndex = Math.min(startIndex + this.chunkSize, textLength);
|
||||
const chunkText = cleanText.substring(startIndex, endIndex);
|
||||
|
||||
chunks.push({
|
||||
chunkIndex,
|
||||
text: chunkText,
|
||||
pageNumber: undefined, // TODO: Extract page numbers if available
|
||||
});
|
||||
|
||||
startIndex += this.chunkSize - this.overlap;
|
||||
chunkIndex += 1;
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
// File: backend/src/modules/ai/services/migration.service.ts
|
||||
// บันทึกการแก้ไข: สร้าง MigrationService สำหรับ Legacy Migration (T030) ตาม ADR-023A
|
||||
|
||||
import {
|
||||
Injectable,
|
||||
Logger,
|
||||
BadRequestException,
|
||||
NotFoundException,
|
||||
} from '@nestjs/common';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository, DataSource } from 'typeorm';
|
||||
import { InjectQueue } from '@nestjs/bullmq';
|
||||
import { Queue } from 'bullmq';
|
||||
import {
|
||||
MigrationReviewRecord,
|
||||
MigrationReviewRecordStatus,
|
||||
} from '../entities/migration-review.entity';
|
||||
import { MigrationQueueItemDto } from '../dto/migration-queue-item.dto';
|
||||
import { User } from '../../user/entities/user.entity';
|
||||
|
||||
@Injectable()
|
||||
export class MigrationService {
|
||||
private readonly logger = new Logger(MigrationService.name);
|
||||
|
||||
constructor(
|
||||
@InjectRepository(MigrationReviewRecord)
|
||||
private readonly migrationRepo: Repository<MigrationReviewRecord>,
|
||||
@InjectQueue('ai-batch')
|
||||
private readonly aiBatchQueue: Queue,
|
||||
private readonly dataSource: DataSource
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Queue a legacy document for human review and AI extraction
|
||||
*/
|
||||
async queueForReview(dto: MigrationQueueItemDto, idempotencyKey: string) {
|
||||
this.logger.log(
|
||||
`📥 Queuing legacy document for review: ${dto.filename} (Batch: ${dto.batchId})`
|
||||
);
|
||||
|
||||
// 1. Check idempotency
|
||||
const existing = await this.migrationRepo.findOne({
|
||||
where: { idempotencyKey },
|
||||
});
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
|
||||
// 2. Create pending record
|
||||
const record = this.migrationRepo.create({
|
||||
batchId: dto.batchId,
|
||||
idempotencyKey: idempotencyKey,
|
||||
originalFilename: dto.filename,
|
||||
storageTempPath: dto.tempPath,
|
||||
status: MigrationReviewRecordStatus.PENDING,
|
||||
aiMetadataJson: {}, // Will be updated by AI processor
|
||||
confidenceScore: 0,
|
||||
});
|
||||
|
||||
const saved = await this.migrationRepo.save(record);
|
||||
|
||||
// 3. Queue AI processing (OCR + Metadata Extraction)
|
||||
await this.aiBatchQueue.add('extract-metadata', {
|
||||
migrationQueuePublicId: saved.publicId,
|
||||
tempPath: dto.tempPath,
|
||||
filename: dto.filename,
|
||||
projectPublicId: dto.projectPublicId,
|
||||
});
|
||||
|
||||
return saved;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all migration queue items with pagination
|
||||
*/
|
||||
async findAll(page = 1, limit = 20, status?: string) {
|
||||
const query = this.migrationRepo
|
||||
.createQueryBuilder('q')
|
||||
.orderBy('q.createdAt', 'DESC')
|
||||
.skip((page - 1) * limit)
|
||||
.take(limit);
|
||||
|
||||
if (status) {
|
||||
query.andWhere('q.status = :status', { status });
|
||||
}
|
||||
|
||||
const [items, total] = await query.getManyAndCount();
|
||||
return { items, total, page, limit };
|
||||
}
|
||||
|
||||
/**
|
||||
* Approve a migration item and import it as a real document
|
||||
*/
|
||||
async approve(publicId: string, user: User) {
|
||||
const item = await this.migrationRepo.findOne({ where: { publicId } });
|
||||
if (!item) throw new NotFoundException('Migration item not found');
|
||||
if (item.status !== MigrationReviewRecordStatus.PENDING)
|
||||
throw new BadRequestException(
|
||||
`Cannot approve item in status ${item.status}`
|
||||
);
|
||||
|
||||
this.logger.log(
|
||||
`✅ Approving migration item: ${item.originalFilename} (uuid: ${publicId})`
|
||||
);
|
||||
|
||||
// TODO: Implement actual document import logic here in US3 Phase 5
|
||||
// This will involve calling FileStorageService, CorrespondenceService, etc.
|
||||
|
||||
item.status = MigrationReviewRecordStatus.IMPORTED;
|
||||
item.reviewedBy = user.user_id;
|
||||
item.reviewedAt = new Date();
|
||||
|
||||
return this.migrationRepo.save(item);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reject a migration item
|
||||
*/
|
||||
async reject(publicId: string, user: User, reason: string) {
|
||||
const item = await this.migrationRepo.findOne({ where: { publicId } });
|
||||
if (!item) throw new NotFoundException('Migration item not found');
|
||||
|
||||
item.status = MigrationReviewRecordStatus.REJECTED;
|
||||
item.reviewedBy = user.user_id;
|
||||
item.reviewedAt = new Date();
|
||||
item.rejectionReason = reason;
|
||||
|
||||
return this.migrationRepo.save(item);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
// File: src/modules/ai/services/ocr.service.ts
|
||||
// Change Log
|
||||
// - 2026-05-15: เพิ่ม OCR auto-detection service สำหรับ ADR-023A.
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import axios from 'axios';
|
||||
|
||||
export interface OcrDetectionInput {
|
||||
extractedText?: string;
|
||||
extractedChars?: number;
|
||||
pdfPath?: string;
|
||||
}
|
||||
|
||||
export interface OcrDetectionResult {
|
||||
text: string;
|
||||
ocrUsed: boolean;
|
||||
}
|
||||
|
||||
interface PaddleOcrResponse {
|
||||
text?: string;
|
||||
}
|
||||
|
||||
/** บริการเลือก fast path หรือ PaddleOCR sidecar ตามจำนวนตัวอักษรที่ extract ได้ */
|
||||
@Injectable()
|
||||
export class OcrService {
|
||||
private readonly logger = new Logger(OcrService.name);
|
||||
private readonly threshold: number;
|
||||
private readonly ocrApiUrl: string;
|
||||
|
||||
constructor(private readonly configService: ConfigService) {
|
||||
this.threshold = this.configService.get<number>('OCR_CHAR_THRESHOLD', 100);
|
||||
this.ocrApiUrl = this.configService.get<string>(
|
||||
'OCR_API_URL',
|
||||
'http://localhost:8765'
|
||||
);
|
||||
}
|
||||
|
||||
/** ตรวจสอบ text layer ก่อนเลือก OCR slow path */
|
||||
async detectAndExtract(
|
||||
input: OcrDetectionInput
|
||||
): Promise<OcrDetectionResult> {
|
||||
const extractedText = input.extractedText ?? '';
|
||||
const extractedChars = input.extractedChars ?? extractedText.length;
|
||||
|
||||
if (extractedChars > this.threshold) {
|
||||
return { text: extractedText, ocrUsed: false };
|
||||
}
|
||||
|
||||
if (!input.pdfPath) {
|
||||
this.logger.warn('OCR slow path skipped because pdfPath is missing');
|
||||
return { text: extractedText, ocrUsed: false };
|
||||
}
|
||||
|
||||
const response = await axios.post<PaddleOcrResponse>(
|
||||
`${this.ocrApiUrl}/ocr`,
|
||||
{ pdfPath: input.pdfPath },
|
||||
{ timeout: 90000 }
|
||||
);
|
||||
|
||||
return {
|
||||
text: response.data.text ?? '',
|
||||
ocrUsed: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
// File: src/modules/ai/services/ollama.service.ts
|
||||
// Change Log
|
||||
// - 2026-05-15: เพิ่ม Ollama service สำหรับ ADR-023A 2-model stack.
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import axios from 'axios';
|
||||
|
||||
export interface OllamaGenerateOptions {
|
||||
timeoutMs?: number;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
/** บริการเรียก Ollama local-only บน Admin Desktop ตาม ADR-023A */
|
||||
@Injectable()
|
||||
export class OllamaService {
|
||||
private readonly logger = new Logger(OllamaService.name);
|
||||
private readonly ollamaUrl: string;
|
||||
private readonly mainModel: string;
|
||||
private readonly embedModel: string;
|
||||
private readonly timeoutMs: number;
|
||||
|
||||
constructor(private readonly configService: ConfigService) {
|
||||
this.ollamaUrl = this.configService.get<string>(
|
||||
'OLLAMA_URL',
|
||||
this.configService.get<string>('AI_HOST_URL', 'http://localhost:11434')
|
||||
);
|
||||
this.mainModel = this.configService.get<string>(
|
||||
'OLLAMA_MODEL_MAIN',
|
||||
'gemma4:e4b'
|
||||
);
|
||||
this.embedModel = this.configService.get<string>(
|
||||
'OLLAMA_MODEL_EMBED',
|
||||
this.configService.get<string>('OLLAMA_EMBED_MODEL', 'nomic-embed-text')
|
||||
);
|
||||
this.timeoutMs = this.configService.get<number>('AI_TIMEOUT_MS', 30000);
|
||||
}
|
||||
|
||||
/** สร้างข้อความตอบกลับจาก gemma4:e4b หรือค่า ENV ที่กำหนด */
|
||||
async generate(
|
||||
prompt: string,
|
||||
options: OllamaGenerateOptions = {}
|
||||
): Promise<string> {
|
||||
try {
|
||||
const response = await axios.post<{ response: string }>(
|
||||
`${this.ollamaUrl}/api/generate`,
|
||||
{
|
||||
model: this.mainModel,
|
||||
prompt,
|
||||
stream: false,
|
||||
},
|
||||
{
|
||||
timeout: options.timeoutMs ?? this.timeoutMs,
|
||||
signal: options.signal,
|
||||
}
|
||||
);
|
||||
return response.data.response ?? '';
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
'Ollama generate failed',
|
||||
err instanceof Error ? err.stack : String(err)
|
||||
);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/** สร้าง embedding ด้วย nomic-embed-text หรือค่า ENV ที่กำหนด */
|
||||
async generateEmbedding(text: string): Promise<number[]> {
|
||||
try {
|
||||
const response = await axios.post<{ embedding: number[] }>(
|
||||
`${this.ollamaUrl}/api/embeddings`,
|
||||
{ model: this.embedModel, prompt: text },
|
||||
{ timeout: this.timeoutMs }
|
||||
);
|
||||
return response.data.embedding;
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
'Ollama embedding failed',
|
||||
err instanceof Error ? err.stack : String(err)
|
||||
);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/** คืนชื่อ main model สำหรับ audit log */
|
||||
getMainModelName(): string {
|
||||
return this.mainModel;
|
||||
}
|
||||
|
||||
/** คืนชื่อ embedding model สำหรับ audit log */
|
||||
getEmbeddingModelName(): string {
|
||||
return this.embedModel;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user