feat(rfa-ai): Complete RFA Approval Refactor and AI Model Revision
This commit is contained in:
@@ -6,7 +6,7 @@ import { getQueueToken } from '@nestjs/bullmq';
|
||||
import { RagService } from '../rag.service';
|
||||
import { QdrantService } from '../qdrant.service';
|
||||
import { EmbeddingService } from '../embedding.service';
|
||||
import { TyphoonService } from '../typhoon.service';
|
||||
import { LocalLlmService } from '../local-llm.service';
|
||||
import { IngestionService } from '../ingestion.service';
|
||||
import { DocumentChunk } from '../entities/document-chunk.entity';
|
||||
import { QUEUE_AI_VECTOR_DELETION } from '../../common/constants/queue.constants';
|
||||
@@ -23,7 +23,7 @@ const mockEmbedding = {
|
||||
embed: jest.fn(),
|
||||
};
|
||||
|
||||
const mockTyphoon = {
|
||||
const mockLocalLlm = {
|
||||
generate: jest.fn(),
|
||||
sanitizeInput: jest.fn((t: string) => t),
|
||||
};
|
||||
@@ -56,7 +56,7 @@ describe('RagService', () => {
|
||||
RagService,
|
||||
{ provide: QdrantService, useValue: mockQdrant },
|
||||
{ provide: EmbeddingService, useValue: mockEmbedding },
|
||||
{ provide: TyphoonService, useValue: mockTyphoon },
|
||||
{ provide: LocalLlmService, useValue: mockLocalLlm },
|
||||
{ provide: IngestionService, useValue: mockIngestion },
|
||||
{ provide: getRepositoryToken(DocumentChunk), useValue: mockChunkRepo },
|
||||
{ provide: DEFAULT_REDIS_TOKEN, useValue: mockRedis },
|
||||
@@ -95,7 +95,7 @@ describe('RagService', () => {
|
||||
score: 0.92,
|
||||
},
|
||||
]);
|
||||
mockTyphoon.generate.mockResolvedValue({
|
||||
mockLocalLlm.generate.mockResolvedValue({
|
||||
answer: 'คำตอบ',
|
||||
usedFallbackModel: false,
|
||||
});
|
||||
@@ -129,20 +129,17 @@ describe('RagService', () => {
|
||||
mockQdrant.isReady.mockReturnValue(true);
|
||||
mockEmbedding.embed.mockResolvedValue(new Array(768).fill(0.1));
|
||||
mockQdrant.hybridSearch.mockResolvedValue([]);
|
||||
mockTyphoon.generate.mockResolvedValue({
|
||||
mockLocalLlm.generate.mockResolvedValue({
|
||||
answer: 'ลับมาก',
|
||||
usedFallbackModel: true,
|
||||
usedFallbackModel: false,
|
||||
});
|
||||
|
||||
const result = await service.query(dto, adminPerms);
|
||||
|
||||
expect(mockRedis.get).not.toHaveBeenCalled();
|
||||
expect(mockRedis.setex).not.toHaveBeenCalled();
|
||||
expect(mockTyphoon.generate).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
true
|
||||
);
|
||||
expect(result.usedFallbackModel).toBe(true);
|
||||
expect(mockLocalLlm.generate).toHaveBeenCalledWith(expect.any(String));
|
||||
expect(result.usedFallbackModel).toBe(false);
|
||||
});
|
||||
|
||||
it('collectionReady=false → throw ServiceUnavailableException RAG_NOT_READY', async () => {
|
||||
@@ -158,7 +155,7 @@ describe('RagService', () => {
|
||||
mockRedis.get.mockResolvedValue(null);
|
||||
mockEmbedding.embed.mockResolvedValue(new Array(768).fill(0.1));
|
||||
mockQdrant.hybridSearch.mockResolvedValue([]);
|
||||
mockTyphoon.generate.mockResolvedValue({
|
||||
mockLocalLlm.generate.mockResolvedValue({
|
||||
answer: 'A',
|
||||
usedFallbackModel: false,
|
||||
});
|
||||
@@ -181,7 +178,7 @@ describe('RagService', () => {
|
||||
mockRedis.get.mockResolvedValue(null);
|
||||
mockEmbedding.embed.mockResolvedValue(new Array(768).fill(0.1));
|
||||
mockQdrant.hybridSearch.mockResolvedValue([]);
|
||||
mockTyphoon.generate.mockResolvedValue({
|
||||
mockLocalLlm.generate.mockResolvedValue({
|
||||
anwer: 'ok',
|
||||
usedFallbackModel: false,
|
||||
});
|
||||
@@ -199,9 +196,9 @@ describe('RagService', () => {
|
||||
mockRedis.get.mockResolvedValue(null);
|
||||
mockEmbedding.embed.mockResolvedValue(new Array(768).fill(0.1));
|
||||
mockQdrant.hybridSearch.mockResolvedValue([]);
|
||||
mockTyphoon.generate.mockResolvedValue({
|
||||
mockLocalLlm.generate.mockResolvedValue({
|
||||
answer: 'ok',
|
||||
usedFallbackModel: true,
|
||||
usedFallbackModel: false,
|
||||
});
|
||||
|
||||
await service.query(dto, adminPerms);
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
// File: src/modules/rag/local-llm.service.ts
|
||||
// Change Log
|
||||
// - 2026-05-15: แทนที่ cloud LLM API ด้วย Ollama local-only ตาม ADR-023A.
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import axios from 'axios';
|
||||
|
||||
export interface LlmGenerateResult {
|
||||
answer: string;
|
||||
usedFallbackModel: boolean;
|
||||
}
|
||||
|
||||
/** บริการเรียก LLM ภายในองค์กรผ่าน Ollama เท่านั้น */
|
||||
@Injectable()
|
||||
export class LocalLlmService {
|
||||
private readonly logger = new Logger(LocalLlmService.name);
|
||||
private readonly ollamaUrl: string;
|
||||
private readonly ollamaModel: string;
|
||||
private readonly timeoutMs: number;
|
||||
|
||||
constructor(private readonly configService: ConfigService) {
|
||||
this.ollamaUrl = this.configService.get<string>(
|
||||
'OLLAMA_URL',
|
||||
this.configService.get<string>('AI_HOST_URL', 'http://localhost:11434')
|
||||
);
|
||||
this.ollamaModel = this.configService.get<string>(
|
||||
'OLLAMA_MODEL_MAIN',
|
||||
this.configService.get<string>('OLLAMA_RAG_MODEL', 'gemma4:e4b')
|
||||
);
|
||||
this.timeoutMs = this.configService.get<number>('RAG_TIMEOUT_MS', 30000);
|
||||
}
|
||||
|
||||
/** สร้างคำตอบจากโมเดล local-only โดยไม่มี cloud fallback */
|
||||
async generate(prompt: string): Promise<LlmGenerateResult> {
|
||||
try {
|
||||
const response = await axios.post<{ response: string }>(
|
||||
`${this.ollamaUrl}/api/generate`,
|
||||
{
|
||||
model: this.ollamaModel,
|
||||
prompt,
|
||||
stream: false,
|
||||
},
|
||||
{ timeout: this.timeoutMs }
|
||||
);
|
||||
return {
|
||||
answer: response.data.response ?? '',
|
||||
usedFallbackModel: false,
|
||||
};
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
'Local Ollama generation failed',
|
||||
err instanceof Error ? err.stack : String(err)
|
||||
);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/** ทำความสะอาด prompt injection pattern พื้นฐานก่อนส่งเข้าโมเดล */
|
||||
sanitizeInput(text: string): string {
|
||||
return text
|
||||
.replace(/<CONTEXT_START>|<CONTEXT_END>/gi, '')
|
||||
.replace(/ignore previous instructions/gi, '')
|
||||
.replace(/system:/gi, '')
|
||||
.slice(0, 1000);
|
||||
}
|
||||
}
|
||||
@@ -7,7 +7,7 @@ import { DocumentChunk } from './entities/document-chunk.entity';
|
||||
import { QUEUE_AI_VECTOR_DELETION } from '../common/constants/queue.constants';
|
||||
import { EmbeddingService } from './embedding.service';
|
||||
import { QdrantService } from './qdrant.service';
|
||||
import { TyphoonService } from './typhoon.service';
|
||||
import { LocalLlmService } from './local-llm.service';
|
||||
import { RagService } from './rag.service';
|
||||
import { RagController } from './rag.controller';
|
||||
import { IngestionService } from './ingestion.service';
|
||||
@@ -40,7 +40,7 @@ const DLQ_DEFAULTS = {
|
||||
providers: [
|
||||
EmbeddingService,
|
||||
QdrantService,
|
||||
TyphoonService,
|
||||
LocalLlmService,
|
||||
RagService,
|
||||
IngestionService,
|
||||
OcrProcessor,
|
||||
@@ -50,7 +50,7 @@ const DLQ_DEFAULTS = {
|
||||
exports: [
|
||||
EmbeddingService,
|
||||
QdrantService,
|
||||
TyphoonService,
|
||||
LocalLlmService,
|
||||
RagService,
|
||||
IngestionService,
|
||||
],
|
||||
|
||||
@@ -16,7 +16,7 @@ import { createHash } from 'crypto';
|
||||
|
||||
import { QdrantService } from './qdrant.service';
|
||||
import { EmbeddingService } from './embedding.service';
|
||||
import { TyphoonService } from './typhoon.service';
|
||||
import { LocalLlmService } from './local-llm.service';
|
||||
import { IngestionService } from './ingestion.service';
|
||||
import { DocumentChunk } from './entities/document-chunk.entity';
|
||||
import { RagQueryDto } from './dto/rag-query.dto';
|
||||
@@ -32,7 +32,7 @@ export class RagService {
|
||||
constructor(
|
||||
private readonly qdrant: QdrantService,
|
||||
private readonly embedding: EmbeddingService,
|
||||
private readonly typhoon: TyphoonService,
|
||||
private readonly localLlm: LocalLlmService,
|
||||
private readonly ingestionService: IngestionService,
|
||||
@InjectRepository(DocumentChunk)
|
||||
private readonly chunkRepo: Repository<DocumentChunk>,
|
||||
@@ -84,13 +84,10 @@ export class RagService {
|
||||
|
||||
const context = this.buildContext(reranked);
|
||||
|
||||
const safeQuestion = this.typhoon.sanitizeInput(question);
|
||||
const safeQuestion = this.localLlm.sanitizeInput(question);
|
||||
const prompt = this.buildPrompt(safeQuestion, context);
|
||||
|
||||
const { answer, usedFallbackModel } = await this.typhoon.generate(
|
||||
prompt,
|
||||
isConfidential
|
||||
);
|
||||
const { answer, usedFallbackModel } = await this.localLlm.generate(prompt);
|
||||
|
||||
const citations: RagCitation[] = reranked.map((r) => ({
|
||||
chunkId: r.chunkId,
|
||||
|
||||
@@ -1,115 +0,0 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import axios from 'axios';
|
||||
|
||||
export interface LlmGenerateResult {
|
||||
answer: string;
|
||||
usedFallbackModel: boolean;
|
||||
}
|
||||
|
||||
interface TyphoonChatResponse {
|
||||
choices: Array<{ message: { content: string } }>;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class TyphoonService {
|
||||
private readonly logger = new Logger(TyphoonService.name);
|
||||
private readonly typhoonUrl: string;
|
||||
private readonly typhoonKey: string;
|
||||
private readonly ollamaUrl: string;
|
||||
private readonly ollamaModel: string;
|
||||
private readonly timeoutMs: number;
|
||||
|
||||
constructor(private readonly configService: ConfigService) {
|
||||
this.typhoonUrl = this.configService.get<string>(
|
||||
'TYPHOON_API_URL',
|
||||
'https://api.opentyphoon.ai/v1'
|
||||
);
|
||||
this.typhoonKey = this.configService.get<string>('TYPHOON_API_KEY', '');
|
||||
this.ollamaUrl = this.configService.get<string>(
|
||||
'OLLAMA_URL',
|
||||
'http://localhost:11434'
|
||||
);
|
||||
this.ollamaModel = this.configService.get<string>(
|
||||
'OLLAMA_RAG_MODEL',
|
||||
'gemma3:12b'
|
||||
);
|
||||
this.timeoutMs = this.configService.get<number>('RAG_TIMEOUT_MS', 5000);
|
||||
}
|
||||
|
||||
async generate(
|
||||
prompt: string,
|
||||
forceLocal: boolean
|
||||
): Promise<LlmGenerateResult> {
|
||||
if (forceLocal) {
|
||||
const answer = await this.generateOllama(prompt);
|
||||
return { answer, usedFallbackModel: true };
|
||||
}
|
||||
|
||||
try {
|
||||
const answer = await Promise.race([
|
||||
this.generateTyphoon(prompt),
|
||||
this.delay(this.timeoutMs).then(() => {
|
||||
throw new Error('Typhoon timeout');
|
||||
}),
|
||||
]);
|
||||
return { answer, usedFallbackModel: false };
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`Typhoon failed, falling back to Ollama: ${err instanceof Error ? err.message : String(err)}`
|
||||
);
|
||||
const answer = await this.generateOllama(prompt);
|
||||
return { answer, usedFallbackModel: true };
|
||||
}
|
||||
}
|
||||
|
||||
sanitizeInput(text: string): string {
|
||||
return text
|
||||
.replace(/<CONTEXT_START>|<CONTEXT_END>/gi, '')
|
||||
.replace(/ignore previous instructions/gi, '')
|
||||
.replace(/system:/gi, '')
|
||||
.slice(0, 1000);
|
||||
}
|
||||
|
||||
private async generateTyphoon(prompt: string): Promise<string> {
|
||||
const response = await axios.post<TyphoonChatResponse>(
|
||||
`${this.typhoonUrl}/chat/completions`,
|
||||
{
|
||||
model: 'typhoon-v2.1-12b-instruct',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: `<CONTEXT_START>\n${prompt}\n<CONTEXT_END>`,
|
||||
},
|
||||
],
|
||||
max_tokens: 1024,
|
||||
temperature: 0.1,
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${this.typhoonKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
timeout: this.timeoutMs,
|
||||
}
|
||||
);
|
||||
return response.data.choices[0]?.message?.content ?? '';
|
||||
}
|
||||
|
||||
private async generateOllama(prompt: string): Promise<string> {
|
||||
const response = await axios.post<{ response: string }>(
|
||||
`${this.ollamaUrl}/api/generate`,
|
||||
{
|
||||
model: this.ollamaModel,
|
||||
prompt,
|
||||
stream: false,
|
||||
},
|
||||
{ timeout: 30000 }
|
||||
);
|
||||
return response.data.response ?? '';
|
||||
}
|
||||
|
||||
private delay(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user