feat(rfa-ai): Complete RFA Approval Refactor and AI Model Revision
CI / CD Pipeline / build (push) Successful in 4m54s
CI / CD Pipeline / deploy (push) Failing after 12m9s

This commit is contained in:
2026-05-16 10:59:53 +07:00
parent 6cb3ae10ee
commit 1a162bf320
105 changed files with 5088 additions and 1083 deletions
@@ -6,7 +6,7 @@ import { getQueueToken } from '@nestjs/bullmq';
import { RagService } from '../rag.service';
import { QdrantService } from '../qdrant.service';
import { EmbeddingService } from '../embedding.service';
import { TyphoonService } from '../typhoon.service';
import { LocalLlmService } from '../local-llm.service';
import { IngestionService } from '../ingestion.service';
import { DocumentChunk } from '../entities/document-chunk.entity';
import { QUEUE_AI_VECTOR_DELETION } from '../../common/constants/queue.constants';
@@ -23,7 +23,7 @@ const mockEmbedding = {
embed: jest.fn(),
};
const mockTyphoon = {
const mockLocalLlm = {
generate: jest.fn(),
sanitizeInput: jest.fn((t: string) => t),
};
@@ -56,7 +56,7 @@ describe('RagService', () => {
RagService,
{ provide: QdrantService, useValue: mockQdrant },
{ provide: EmbeddingService, useValue: mockEmbedding },
{ provide: TyphoonService, useValue: mockTyphoon },
{ provide: LocalLlmService, useValue: mockLocalLlm },
{ provide: IngestionService, useValue: mockIngestion },
{ provide: getRepositoryToken(DocumentChunk), useValue: mockChunkRepo },
{ provide: DEFAULT_REDIS_TOKEN, useValue: mockRedis },
@@ -95,7 +95,7 @@ describe('RagService', () => {
score: 0.92,
},
]);
mockTyphoon.generate.mockResolvedValue({
mockLocalLlm.generate.mockResolvedValue({
answer: 'คำตอบ',
usedFallbackModel: false,
});
@@ -129,20 +129,17 @@ describe('RagService', () => {
mockQdrant.isReady.mockReturnValue(true);
mockEmbedding.embed.mockResolvedValue(new Array(768).fill(0.1));
mockQdrant.hybridSearch.mockResolvedValue([]);
mockTyphoon.generate.mockResolvedValue({
mockLocalLlm.generate.mockResolvedValue({
answer: 'ลับมาก',
usedFallbackModel: true,
usedFallbackModel: false,
});
const result = await service.query(dto, adminPerms);
expect(mockRedis.get).not.toHaveBeenCalled();
expect(mockRedis.setex).not.toHaveBeenCalled();
expect(mockTyphoon.generate).toHaveBeenCalledWith(
expect.any(String),
true
);
expect(result.usedFallbackModel).toBe(true);
expect(mockLocalLlm.generate).toHaveBeenCalledWith(expect.any(String));
expect(result.usedFallbackModel).toBe(false);
});
it('collectionReady=false → throw ServiceUnavailableException RAG_NOT_READY', async () => {
@@ -158,7 +155,7 @@ describe('RagService', () => {
mockRedis.get.mockResolvedValue(null);
mockEmbedding.embed.mockResolvedValue(new Array(768).fill(0.1));
mockQdrant.hybridSearch.mockResolvedValue([]);
mockTyphoon.generate.mockResolvedValue({
mockLocalLlm.generate.mockResolvedValue({
answer: 'A',
usedFallbackModel: false,
});
@@ -181,7 +178,7 @@ describe('RagService', () => {
mockRedis.get.mockResolvedValue(null);
mockEmbedding.embed.mockResolvedValue(new Array(768).fill(0.1));
mockQdrant.hybridSearch.mockResolvedValue([]);
mockTyphoon.generate.mockResolvedValue({
mockLocalLlm.generate.mockResolvedValue({
anwer: 'ok',
usedFallbackModel: false,
});
@@ -199,9 +196,9 @@ describe('RagService', () => {
mockRedis.get.mockResolvedValue(null);
mockEmbedding.embed.mockResolvedValue(new Array(768).fill(0.1));
mockQdrant.hybridSearch.mockResolvedValue([]);
mockTyphoon.generate.mockResolvedValue({
mockLocalLlm.generate.mockResolvedValue({
answer: 'ok',
usedFallbackModel: true,
usedFallbackModel: false,
});
await service.query(dto, adminPerms);
@@ -0,0 +1,67 @@
// File: src/modules/rag/local-llm.service.ts
// Change Log
// - 2026-05-15: แทนที่ cloud LLM API ด้วย Ollama local-only ตาม ADR-023A.
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import axios from 'axios';
export interface LlmGenerateResult {
answer: string;
usedFallbackModel: boolean;
}
/** บริการเรียก LLM ภายในองค์กรผ่าน Ollama เท่านั้น */
@Injectable()
export class LocalLlmService {
private readonly logger = new Logger(LocalLlmService.name);
private readonly ollamaUrl: string;
private readonly ollamaModel: string;
private readonly timeoutMs: number;
constructor(private readonly configService: ConfigService) {
this.ollamaUrl = this.configService.get<string>(
'OLLAMA_URL',
this.configService.get<string>('AI_HOST_URL', 'http://localhost:11434')
);
this.ollamaModel = this.configService.get<string>(
'OLLAMA_MODEL_MAIN',
this.configService.get<string>('OLLAMA_RAG_MODEL', 'gemma4:e4b')
);
this.timeoutMs = this.configService.get<number>('RAG_TIMEOUT_MS', 30000);
}
/** สร้างคำตอบจากโมเดล local-only โดยไม่มี cloud fallback */
async generate(prompt: string): Promise<LlmGenerateResult> {
try {
const response = await axios.post<{ response: string }>(
`${this.ollamaUrl}/api/generate`,
{
model: this.ollamaModel,
prompt,
stream: false,
},
{ timeout: this.timeoutMs }
);
return {
answer: response.data.response ?? '',
usedFallbackModel: false,
};
} catch (err) {
this.logger.error(
'Local Ollama generation failed',
err instanceof Error ? err.stack : String(err)
);
throw err;
}
}
/** ทำความสะอาด prompt injection pattern พื้นฐานก่อนส่งเข้าโมเดล */
sanitizeInput(text: string): string {
return text
.replace(/<CONTEXT_START>|<CONTEXT_END>/gi, '')
.replace(/ignore previous instructions/gi, '')
.replace(/system:/gi, '')
.slice(0, 1000);
}
}
+3 -3
View File
@@ -7,7 +7,7 @@ import { DocumentChunk } from './entities/document-chunk.entity';
import { QUEUE_AI_VECTOR_DELETION } from '../common/constants/queue.constants';
import { EmbeddingService } from './embedding.service';
import { QdrantService } from './qdrant.service';
import { TyphoonService } from './typhoon.service';
import { LocalLlmService } from './local-llm.service';
import { RagService } from './rag.service';
import { RagController } from './rag.controller';
import { IngestionService } from './ingestion.service';
@@ -40,7 +40,7 @@ const DLQ_DEFAULTS = {
providers: [
EmbeddingService,
QdrantService,
TyphoonService,
LocalLlmService,
RagService,
IngestionService,
OcrProcessor,
@@ -50,7 +50,7 @@ const DLQ_DEFAULTS = {
exports: [
EmbeddingService,
QdrantService,
TyphoonService,
LocalLlmService,
RagService,
IngestionService,
],
+4 -7
View File
@@ -16,7 +16,7 @@ import { createHash } from 'crypto';
import { QdrantService } from './qdrant.service';
import { EmbeddingService } from './embedding.service';
import { TyphoonService } from './typhoon.service';
import { LocalLlmService } from './local-llm.service';
import { IngestionService } from './ingestion.service';
import { DocumentChunk } from './entities/document-chunk.entity';
import { RagQueryDto } from './dto/rag-query.dto';
@@ -32,7 +32,7 @@ export class RagService {
constructor(
private readonly qdrant: QdrantService,
private readonly embedding: EmbeddingService,
private readonly typhoon: TyphoonService,
private readonly localLlm: LocalLlmService,
private readonly ingestionService: IngestionService,
@InjectRepository(DocumentChunk)
private readonly chunkRepo: Repository<DocumentChunk>,
@@ -84,13 +84,10 @@ export class RagService {
const context = this.buildContext(reranked);
const safeQuestion = this.typhoon.sanitizeInput(question);
const safeQuestion = this.localLlm.sanitizeInput(question);
const prompt = this.buildPrompt(safeQuestion, context);
const { answer, usedFallbackModel } = await this.typhoon.generate(
prompt,
isConfidential
);
const { answer, usedFallbackModel } = await this.localLlm.generate(prompt);
const citations: RagCitation[] = reranked.map((r) => ({
chunkId: r.chunkId,
-115
View File
@@ -1,115 +0,0 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import axios from 'axios';
export interface LlmGenerateResult {
answer: string;
usedFallbackModel: boolean;
}
interface TyphoonChatResponse {
choices: Array<{ message: { content: string } }>;
}
@Injectable()
export class TyphoonService {
private readonly logger = new Logger(TyphoonService.name);
private readonly typhoonUrl: string;
private readonly typhoonKey: string;
private readonly ollamaUrl: string;
private readonly ollamaModel: string;
private readonly timeoutMs: number;
constructor(private readonly configService: ConfigService) {
this.typhoonUrl = this.configService.get<string>(
'TYPHOON_API_URL',
'https://api.opentyphoon.ai/v1'
);
this.typhoonKey = this.configService.get<string>('TYPHOON_API_KEY', '');
this.ollamaUrl = this.configService.get<string>(
'OLLAMA_URL',
'http://localhost:11434'
);
this.ollamaModel = this.configService.get<string>(
'OLLAMA_RAG_MODEL',
'gemma3:12b'
);
this.timeoutMs = this.configService.get<number>('RAG_TIMEOUT_MS', 5000);
}
async generate(
prompt: string,
forceLocal: boolean
): Promise<LlmGenerateResult> {
if (forceLocal) {
const answer = await this.generateOllama(prompt);
return { answer, usedFallbackModel: true };
}
try {
const answer = await Promise.race([
this.generateTyphoon(prompt),
this.delay(this.timeoutMs).then(() => {
throw new Error('Typhoon timeout');
}),
]);
return { answer, usedFallbackModel: false };
} catch (err) {
this.logger.warn(
`Typhoon failed, falling back to Ollama: ${err instanceof Error ? err.message : String(err)}`
);
const answer = await this.generateOllama(prompt);
return { answer, usedFallbackModel: true };
}
}
sanitizeInput(text: string): string {
return text
.replace(/<CONTEXT_START>|<CONTEXT_END>/gi, '')
.replace(/ignore previous instructions/gi, '')
.replace(/system:/gi, '')
.slice(0, 1000);
}
private async generateTyphoon(prompt: string): Promise<string> {
const response = await axios.post<TyphoonChatResponse>(
`${this.typhoonUrl}/chat/completions`,
{
model: 'typhoon-v2.1-12b-instruct',
messages: [
{
role: 'user',
content: `<CONTEXT_START>\n${prompt}\n<CONTEXT_END>`,
},
],
max_tokens: 1024,
temperature: 0.1,
},
{
headers: {
Authorization: `Bearer ${this.typhoonKey}`,
'Content-Type': 'application/json',
},
timeout: this.timeoutMs,
}
);
return response.data.choices[0]?.message?.content ?? '';
}
private async generateOllama(prompt: string): Promise<string> {
const response = await axios.post<{ response: string }>(
`${this.ollamaUrl}/api/generate`,
{
model: this.ollamaModel,
prompt,
stream: false,
},
{ timeout: 30000 }
);
return response.data.response ?? '';
}
private delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}