feat(ai): ADR-032 Typhoon OCR integration - models, processors, cache, VRAM monitor, sandbox UI
This commit is contained in:
@@ -115,6 +115,7 @@ export class AiQueueService {
|
||||
userPublicId?: string;
|
||||
filePublicId?: string;
|
||||
pdfPath?: string;
|
||||
engineType?: string;
|
||||
extraPayload?: Record<string, unknown>;
|
||||
}
|
||||
): Promise<string> {
|
||||
@@ -129,6 +130,7 @@ export class AiQueueService {
|
||||
userPublicId: payload.userPublicId,
|
||||
filePublicId: payload.filePublicId,
|
||||
pdfPath: payload.pdfPath,
|
||||
engineType: payload.engineType,
|
||||
...payload.extraPayload,
|
||||
},
|
||||
idempotencyKey: payload.idempotencyKey,
|
||||
|
||||
@@ -98,4 +98,17 @@ describe('AiSettingsService', () => {
|
||||
'system_settings:AI_FEATURES_ENABLED'
|
||||
);
|
||||
});
|
||||
|
||||
it('ควรใช้ gemma4:e4b เป็นค่า active model เริ่มต้นเมื่อยังไม่มี system setting', async () => {
|
||||
mockRedis.get.mockResolvedValue(null);
|
||||
mockSettingRepo.findOne.mockResolvedValue(null);
|
||||
|
||||
await expect(service.getActiveModel()).resolves.toBe('gemma4:e4b');
|
||||
expect(mockRedis.set).toHaveBeenCalledWith(
|
||||
'system_settings:AI_ACTIVE_MODEL',
|
||||
'gemma4:e4b',
|
||||
'EX',
|
||||
30
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -150,7 +150,7 @@ export class AiSettingsService {
|
||||
where: { settingKey: AI_ACTIVE_MODEL_KEY },
|
||||
});
|
||||
|
||||
const activeModel = setting?.settingValue ?? 'gemma4:e2b';
|
||||
const activeModel = setting?.settingValue ?? 'gemma4:e4b';
|
||||
await this.redis.set(
|
||||
AI_ACTIVE_MODEL_CACHE_KEY,
|
||||
activeModel,
|
||||
@@ -160,7 +160,7 @@ export class AiSettingsService {
|
||||
return activeModel;
|
||||
} catch (error: unknown) {
|
||||
this.logger.error(`Failed to get active model: ${this.toMessage(error)}`);
|
||||
return 'gemma4:e2b';
|
||||
return 'gemma4:e4b';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
// - 2026-05-21: แก้ไขข้อห้ามใช้ parseInt โดยการใช้ Number แทนตามกฎ Tier 1
|
||||
// - 2026-05-23: เพิ่ม Migration Checkpoint API endpoints แทน MySQL direct access (ADR-023A)
|
||||
// - 2026-05-30: เพิ่ม @UseInterceptors(FileInterceptor('file')) ใน submitSandboxOcr เพื่อแก้ไขปัญหา BadRequestException (File is required)
|
||||
// - 2026-05-30: เพิ่ม endpoints GET/POST/PATCH models และ GET vram/status สำหรับ dynamic AI model management และ VRAM monitoring (T031-T034, US2)
|
||||
// Controller สำหรับ AI Gateway Endpoints (ADR-023)
|
||||
|
||||
import {
|
||||
@@ -78,6 +79,7 @@ import { v7 as uuidv7 } from 'uuid';
|
||||
import { DeleteAuditLogsQueryDto } from './dto/delete-audit-logs.dto';
|
||||
import { AiToolRegistryService } from './tool/ai-tool-registry.service';
|
||||
import { AiIntentRequestDto } from './dto/ai-intent-request.dto';
|
||||
import { AddAiModelDto } from './dto/add-ai-model.dto';
|
||||
import { ToggleAiFeaturesDto } from './dto/ai-admin-settings.dto';
|
||||
import { AiEnabledGuard } from './guards/ai-enabled.guard';
|
||||
import { InjectRedis } from '@nestjs-modules/ioredis';
|
||||
@@ -922,4 +924,84 @@ export class AiController {
|
||||
async logMigrationError(@Body() dto: MigrationErrorLogDto) {
|
||||
return this.migrationCheckpointService.logError(dto);
|
||||
}
|
||||
|
||||
// ─── AI Model Management & VRAM Monitoring Endpoints (T031-T034, US2) ───
|
||||
|
||||
@Get(['models', 'ai-models'])
|
||||
@UseGuards(JwtAuthGuard, RbacGuard)
|
||||
@ApiBearerAuth()
|
||||
@RequirePermission('system.manage_all')
|
||||
@ApiOperation({
|
||||
summary:
|
||||
'AI Models List — ดึงรายการโมเดล AI ทั้งหมดพร้อม VRAM requirement (T031, US2)',
|
||||
description:
|
||||
'ดึงรายการโมเดล AI ทั้งหมดที่ใช้งานได้ รวมถึงสถานะการทำงานและทรัพยากร VRAM ที่ต้องการ',
|
||||
})
|
||||
async getAiModels() {
|
||||
const result = await this.aiService.getAiModels();
|
||||
return {
|
||||
data: {
|
||||
models: result.models,
|
||||
activeModel: result.activeModel,
|
||||
},
|
||||
models: result.models,
|
||||
activeModel: result.activeModel,
|
||||
};
|
||||
}
|
||||
|
||||
@Post(['models', 'ai-models'])
|
||||
@UseGuards(JwtAuthGuard, RbacGuard)
|
||||
@ApiBearerAuth()
|
||||
@RequirePermission('system.manage_all')
|
||||
@HttpCode(HttpStatus.CREATED)
|
||||
@ApiOperation({
|
||||
summary:
|
||||
'AI Add Model — เพิ่มโมเดล AI ใหม่เข้าระบบพร้อมระบุ VRAM requirement (T032, US2)',
|
||||
description:
|
||||
'เพิ่มโมเดล AI ใหม่เข้าสู่ระบบเพื่อใช้สำหรับคิวงาน หรือ OCR processing',
|
||||
})
|
||||
async addAiModel(@Body() dto: AddAiModelDto, @CurrentUser() user: User) {
|
||||
const model = await this.aiService.addAiModel(dto, user.user_id);
|
||||
return { data: model };
|
||||
}
|
||||
|
||||
@Patch(['models/:modelId/activate', 'ai-models/:modelId/activate'])
|
||||
@UseGuards(JwtAuthGuard, RbacGuard)
|
||||
@ApiBearerAuth()
|
||||
@RequirePermission('system.manage_all')
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@ApiOperation({
|
||||
summary:
|
||||
'AI Activate Model — สลับโมเดล AI หลักพร้อมตรวจสอบ VRAM (T033, US2)',
|
||||
description:
|
||||
'เปิดใช้งานโมเดล AI สำหรับระบบหลัก โดยจะมีการตรวจสอบ capacity ของ VRAM GPU ป้องกัน OOM',
|
||||
})
|
||||
async activateAiModel(
|
||||
@Param('modelId') modelId: string,
|
||||
@Body() _dto: { isActive?: boolean },
|
||||
@CurrentUser() user: User
|
||||
) {
|
||||
const activeModelName = await this.aiService.activateAiModel(
|
||||
{ modelId },
|
||||
user.user_id
|
||||
);
|
||||
return {
|
||||
data: { id: modelId, isActive: true, activeModel: activeModelName },
|
||||
};
|
||||
}
|
||||
|
||||
@Get('vram/status')
|
||||
@UseGuards(JwtAuthGuard, RbacGuard)
|
||||
@ApiBearerAuth()
|
||||
@RequirePermission('system.manage_all')
|
||||
@ApiOperation({
|
||||
summary:
|
||||
'AI VRAM Status — ดึงสถานะ VRAM และโมเดลที่โหลดอยู่บน Ollama (T034, US2)',
|
||||
description:
|
||||
'ตรวจสอบปริมาณ VRAM ที่เหลืออยู่ และรายการโมเดลทั้งหมดที่โหลดอยู่ใน GPU แบบเรียลไทม์',
|
||||
})
|
||||
async getVramStatus() {
|
||||
const status = await this.aiService.getVramStatus();
|
||||
return { data: status };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
// - 2026-05-22: นำเข้าและลงทะเบียน CleanupTempFilesWorker (T016) เพื่อลบไฟล์แนบชั่วคราวหมดอายุ
|
||||
// - 2026-05-23: ลงทะเบียน MigrationProgress + AiMigrationCheckpointService (ADR-023A)
|
||||
// - 2026-05-25: ลงทะเบียน AiAvailableModel สำหรับ AI Model Management (ADR-027).
|
||||
// - 2026-05-30: ลงทะเบียน VramMonitorService, OcrCacheService, TyphoonOcrProcessor, TyphoonLlmProcessor (ADR-032).
|
||||
// Module สำหรับ AI Gateway — ลงทะเบียน Services และ Controllers (ADR-023)
|
||||
|
||||
import { Logger, Module, OnModuleInit } from '@nestjs/common';
|
||||
@@ -31,7 +32,10 @@ import { AiBatchProcessor } from './processors/ai-batch.processor';
|
||||
import { AiVectorDeletionProcessor } from './processors/vector-deletion.processor';
|
||||
import { OllamaService } from './services/ollama.service';
|
||||
import { OcrService } from './services/ocr.service';
|
||||
import { SandboxOcrEngineService } from './services/sandbox-ocr-engine.service';
|
||||
import { EmbeddingService } from './services/embedding.service';
|
||||
import { VramMonitorService } from './services/vram-monitor.service';
|
||||
import { OcrCacheService } from './services/ocr-cache.service';
|
||||
import { MigrationLog } from './entities/migration-log.entity';
|
||||
import { AiAuditLog } from './entities/ai-audit-log.entity';
|
||||
import { MigrationReviewRecord } from './entities/migration-review.entity';
|
||||
@@ -65,6 +69,14 @@ import {
|
||||
QUEUE_AI_REALTIME,
|
||||
QUEUE_AI_VECTOR_DELETION,
|
||||
} from '../common/constants/queue.constants';
|
||||
import {
|
||||
TyphoonOcrProcessor,
|
||||
QUEUE_TYPHOON_OCR,
|
||||
} from './processors/typhoon-ocr.processor';
|
||||
import {
|
||||
TyphoonLlmProcessor,
|
||||
QUEUE_TYPHOON_LLM,
|
||||
} from './processors/typhoon-llm.processor';
|
||||
|
||||
@Module({
|
||||
imports: [
|
||||
@@ -107,7 +119,26 @@ import {
|
||||
},
|
||||
},
|
||||
{ name: QUEUE_AI_RAG },
|
||||
{ name: QUEUE_AI_VECTOR_DELETION }
|
||||
{ name: QUEUE_AI_VECTOR_DELETION },
|
||||
// Typhoon OCR + LLM queues: concurrency=1 เพื่อป้องกัน VRAM overflow (ADR-032)
|
||||
{
|
||||
name: QUEUE_TYPHOON_OCR,
|
||||
defaultJobOptions: {
|
||||
attempts: 2,
|
||||
backoff: { type: 'exponential', delay: 5000 },
|
||||
removeOnComplete: 50,
|
||||
removeOnFail: 100,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: QUEUE_TYPHOON_LLM,
|
||||
defaultJobOptions: {
|
||||
attempts: 2,
|
||||
backoff: { type: 'exponential', delay: 5000 },
|
||||
removeOnComplete: 50,
|
||||
removeOnFail: 100,
|
||||
},
|
||||
}
|
||||
),
|
||||
|
||||
// HTTP Client สำหรับเรียก n8n Webhook (ADR-018: AI สื่อสารผ่าน API)
|
||||
@@ -147,7 +178,11 @@ import {
|
||||
AiValidationService,
|
||||
OllamaService,
|
||||
OcrService,
|
||||
SandboxOcrEngineService,
|
||||
EmbeddingService,
|
||||
// ADR-032: Typhoon OCR VRAM monitoring + result caching
|
||||
VramMonitorService,
|
||||
OcrCacheService,
|
||||
AiRealtimeProcessor,
|
||||
AiBatchProcessor,
|
||||
// Phase 4: RAG BullMQ pipeline (ADR-023)
|
||||
@@ -155,6 +190,9 @@ import {
|
||||
AiRagProcessor,
|
||||
// Phase 5: Vector Deletion async processor (ADR-023 FR-008)
|
||||
AiVectorDeletionProcessor,
|
||||
// ADR-032: Typhoon OCR + LLM sequential processors (concurrency=1)
|
||||
TyphoonOcrProcessor,
|
||||
TyphoonLlmProcessor,
|
||||
// RbacGuard ต้องการ UserService จาก UserModule
|
||||
RbacGuard,
|
||||
AiEnabledGuard,
|
||||
@@ -170,6 +208,10 @@ import {
|
||||
AiValidationService,
|
||||
OllamaService,
|
||||
OcrService,
|
||||
SandboxOcrEngineService,
|
||||
// ADR-032: Export สำหรับใช้งานใน controller
|
||||
VramMonitorService,
|
||||
OcrCacheService,
|
||||
AiRagService,
|
||||
],
|
||||
})
|
||||
|
||||
@@ -44,9 +44,21 @@ import {
|
||||
import { AiRealtimeJobData } from './processors/ai-realtime.processor';
|
||||
import { AiBatchJobData } from './processors/ai-batch.processor';
|
||||
import { AuditLog } from '../../common/entities/audit-log.entity';
|
||||
import { OllamaService } from './services/ollama.service';
|
||||
import { AiQdrantService } from './qdrant.service';
|
||||
import { OcrService, OcrHealthResult } from './services/ocr.service';
|
||||
import { AiSettingsService } from './ai-settings.service';
|
||||
import {
|
||||
VramMonitorService,
|
||||
VramStatus,
|
||||
} from './services/vram-monitor.service';
|
||||
import {
|
||||
AiModelConfiguration,
|
||||
AiModelType,
|
||||
} from './entities/ai-model-configuration.entity';
|
||||
import { AddAiModelDto } from './dto/add-ai-model.dto';
|
||||
import { ActivateAiModelDto } from './dto/activate-ai-model.dto';
|
||||
import { AiAvailableModel } from './entities/ai-available-model.entity';
|
||||
import { AiQdrantService } from './qdrant.service';
|
||||
import { OllamaService } from './services/ollama.service';
|
||||
|
||||
// ผลลัพธ์ของ Real-time Extraction
|
||||
export interface ExtractionResult {
|
||||
@@ -181,6 +193,10 @@ export class AiService {
|
||||
@Optional()
|
||||
private readonly ocrService?: OcrService,
|
||||
@Optional()
|
||||
private readonly aiSettingsService?: AiSettingsService,
|
||||
@Optional()
|
||||
private readonly vramMonitorService?: VramMonitorService,
|
||||
@Optional()
|
||||
@InjectRedis()
|
||||
private readonly redis?: Redis
|
||||
) {
|
||||
@@ -900,4 +916,172 @@ export class AiService {
|
||||
failedReason: job.failedReason,
|
||||
};
|
||||
}
|
||||
|
||||
// --- AI Model Management with VRAM Monitoring (T027 - T030, T038, US2) ---
|
||||
|
||||
/** ดึงรายการโมเดล AI ทั้งหมดพร้อมระบุตัวที่ใช้งานอยู่ปัจจุบัน (T027) */
|
||||
async getAiModels(): Promise<{
|
||||
models: AiModelConfiguration[];
|
||||
activeModel: string;
|
||||
}> {
|
||||
if (!this.aiSettingsService) {
|
||||
throw new SystemException('AiSettingsService not injected in AiService');
|
||||
}
|
||||
|
||||
const availableModels = await this.aiSettingsService.getAvailableModels();
|
||||
const activeModelName = await this.aiSettingsService.getActiveModel();
|
||||
|
||||
// Map ข้อมูลของ AiAvailableModel (DB) ให้กลายเป็น AiModelConfiguration (Plain Class)
|
||||
const MODEL_UUID_MAP: Record<string, string> = {
|
||||
'gemma4:e2b': '019505a1-7c3e-7000-8000-abc123def201',
|
||||
'gemma4:e4b': '019505a1-7c3e-7000-8000-abc123def202',
|
||||
'typhoon2.1-gemma3-4b': '019505a1-7c3e-7000-8000-abc123def203',
|
||||
};
|
||||
|
||||
const models = availableModels.map((model) => {
|
||||
const vramRequirementMB = Math.round((model.vramGb ?? 4.0) * 1024);
|
||||
const mockUuid =
|
||||
MODEL_UUID_MAP[model.modelName] ??
|
||||
`019505a1-7c3e-7000-8000-abc123def2${(model.id % 90) + 10}`;
|
||||
|
||||
return {
|
||||
modelId: mockUuid,
|
||||
modelName: model.modelName,
|
||||
modelType: AiModelType.LLM, // ตาราง ai_available_models ใช้สำหรับ LLM models
|
||||
ollamaModelName: model.modelName,
|
||||
vramRequirementMB,
|
||||
isActive: model.isActive,
|
||||
useCases: ['document_analysis', 'ocr_extraction'],
|
||||
quantization: model.modelName.includes('e2b') ? 'Q2_K' : 'Q4_K_M',
|
||||
createdAt: model.createdAt,
|
||||
updatedAt: model.updatedAt,
|
||||
} as AiModelConfiguration;
|
||||
});
|
||||
|
||||
return {
|
||||
models,
|
||||
activeModel: activeModelName,
|
||||
};
|
||||
}
|
||||
|
||||
/** ดึงข้อมูลสถานะ VRAM ล่าสุดของระบบ (T034) */
|
||||
async getVramStatus(): Promise<VramStatus> {
|
||||
if (!this.vramMonitorService) {
|
||||
throw new SystemException('VramMonitorService not injected in AiService');
|
||||
}
|
||||
return this.vramMonitorService.getVramStatus();
|
||||
}
|
||||
|
||||
/** เพิ่มโมเดล AI ใหม่เข้าระบบ (Superadmin only - T028) */
|
||||
async addAiModel(
|
||||
dto: AddAiModelDto,
|
||||
userId: number
|
||||
): Promise<AiAvailableModel> {
|
||||
if (!this.aiSettingsService) {
|
||||
throw new SystemException('AiSettingsService not injected in AiService');
|
||||
}
|
||||
|
||||
const vramGb = Number((dto.vramRequirementMB / 1024).toFixed(2));
|
||||
const model = await this.aiSettingsService.addModel(
|
||||
{
|
||||
modelName: dto.modelName,
|
||||
modelVersion: dto.ollamaModelName.split(':')[1] || 'latest',
|
||||
description: `Added via API. Quantization: ${dto.quantization || 'N/A'}. Use Cases: ${dto.useCases.join(', ')}`,
|
||||
vramGb,
|
||||
},
|
||||
userId
|
||||
);
|
||||
|
||||
// บันทึก Audit Log สำหรับการเพิ่มโมเดล AI ใหม่ (T038)
|
||||
await this.saveAuditLog({
|
||||
documentPublicId: '00000000-0000-0000-0000-000000000000',
|
||||
aiModel: 'system',
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
errorMessage: `Model ${dto.modelName} added by user ${userId}. VRAM requirement: ${dto.vramRequirementMB}MB`,
|
||||
});
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
/** เปลี่ยนแปลงโมเดล AI ที่ทำงานพร้อมตรวจสอบพื้นที่ VRAM (T029, T030, T038) */
|
||||
async activateAiModel(
|
||||
dto: ActivateAiModelDto,
|
||||
userId: number
|
||||
): Promise<string> {
|
||||
if (!this.aiSettingsService || !this.vramMonitorService) {
|
||||
throw new SystemException(
|
||||
'AiSettingsService or VramMonitorService not injected in AiService'
|
||||
);
|
||||
}
|
||||
|
||||
// 1. ดึงรายละเอียดโมเดลจากรายการ
|
||||
const availableModels = await this.aiSettingsService.getAvailableModels();
|
||||
|
||||
// ค้นหาด้วยชื่อโมเดล หรือด้วย modelId (ที่แมป UUID)
|
||||
const MODEL_UUID_MAP: Record<string, string> = {
|
||||
'019505a1-7c3e-7000-8000-abc123def201': 'gemma4:e2b',
|
||||
'019505a1-7c3e-7000-8000-abc123def202': 'gemma4:e4b',
|
||||
'019505a1-7c3e-7000-8000-abc123def203': 'typhoon2.1-gemma3-4b',
|
||||
};
|
||||
|
||||
let targetModelName = dto.modelId;
|
||||
if (MODEL_UUID_MAP[dto.modelId]) {
|
||||
targetModelName = MODEL_UUID_MAP[dto.modelId];
|
||||
}
|
||||
|
||||
const model = availableModels.find(
|
||||
(m) => m.modelName === targetModelName || String(m.id) === dto.modelId
|
||||
);
|
||||
if (!model) {
|
||||
throw new NotFoundException(
|
||||
`AI Model with identifier ${dto.modelId} not found`
|
||||
);
|
||||
}
|
||||
|
||||
if (!model.isActive) {
|
||||
throw new BusinessException(
|
||||
'MODEL_INACTIVE',
|
||||
`AI Model ${model.modelName} is not active`,
|
||||
'โมเดล AI นี้ยังไม่ได้เปิดใช้งาน กรุณาตั้งค่าสถานะโมเดลเป็น Active ก่อน'
|
||||
);
|
||||
}
|
||||
|
||||
// 2. ตรวจสอบ VRAM ก่อนอนุญาตให้เปลี่ยนโมเดลหลัก (T030)
|
||||
const vramRequirementMB = Math.round((model.vramGb ?? 4.0) * 1024);
|
||||
const hasCapacity =
|
||||
await this.vramMonitorService.hasVramCapacity(vramRequirementMB);
|
||||
if (!hasCapacity) {
|
||||
const vramStatus = await this.vramMonitorService.getVramStatus();
|
||||
const errMsg = `VRAM ไม่เพียงพอสำหรับการโหลดโมเดล ${model.modelName} (ต้องการ ${vramRequirementMB}MB, เหลือ ${vramStatus.freeVramMb}MB) — กรุณา unload โมเดลอื่น หรือเว้นระยะห่างในการโหลด`;
|
||||
|
||||
await this.saveAuditLog({
|
||||
documentPublicId: '00000000-0000-0000-0000-000000000000',
|
||||
aiModel: 'system',
|
||||
status: AiAuditStatus.FAILED,
|
||||
errorMessage: `Failed to activate model ${model.modelName} due to insufficient VRAM: ${errMsg}`,
|
||||
});
|
||||
|
||||
throw new BusinessException(
|
||||
'INSUFFICIENT_VRAM',
|
||||
errMsg,
|
||||
`พื้นที่หน่วยความจำ GPU (VRAM) ไม่เพียงพอสำหรับการโหลดโมเดล ${model.modelName}`
|
||||
);
|
||||
}
|
||||
|
||||
// 3. ทำการสลับโมเดล AI
|
||||
const activeModel = await this.aiSettingsService.setActiveModel(
|
||||
model.modelName,
|
||||
userId
|
||||
);
|
||||
|
||||
// บันทึก Audit Log สำหรับการเปิดใช้งานโมเดล AI (T038)
|
||||
await this.saveAuditLog({
|
||||
documentPublicId: '00000000-0000-0000-0000-000000000000',
|
||||
aiModel: 'system',
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
errorMessage: `Model ${model.modelName} activated by user ${userId}. VRAM Capacity verified successfully.`,
|
||||
});
|
||||
|
||||
return activeModel;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
// File: src/modules/ai/dto/activate-ai-model.dto.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: สร้าง ActivateAiModelDto สำหรับตั้งค่าโมเดล AI หลักที่ต้องการเปิดใช้งาน (T026, US2)
|
||||
|
||||
import { ApiProperty } from '@nestjs/swagger';
|
||||
import { IsString } from 'class-validator';
|
||||
|
||||
/** DTO สำหรับส่งรหัสของโมเดล AI ที่ต้องการเปิดใช้งาน */
|
||||
export class ActivateAiModelDto {
|
||||
@ApiProperty({
|
||||
description: 'รหัสโมเดล AI (UUIDv7) หรือชื่อโมเดล AI ที่ต้องการเปิดใช้งาน',
|
||||
})
|
||||
@IsString()
|
||||
modelId!: string;
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
// File: src/modules/ai/dto/add-ai-model.dto.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: สร้าง AddAiModelDto สำหรับเพิ่มโมเดล AI ใหม่เข้าระบบ (T025, US2)
|
||||
|
||||
import { ApiProperty } from '@nestjs/swagger';
|
||||
import {
|
||||
IsString,
|
||||
IsEnum,
|
||||
IsNumber,
|
||||
IsArray,
|
||||
IsOptional,
|
||||
} from 'class-validator';
|
||||
import { AiModelType } from '../entities/ai-model-configuration.entity';
|
||||
|
||||
/** DTO สำหรับเพิ่มโมเดล AI ใหม่ */
|
||||
export class AddAiModelDto {
|
||||
@ApiProperty({
|
||||
description: 'ชื่อของโมเดล AI (เช่น gemma4:e4b, typhoon2.1-gemma3-4b)',
|
||||
})
|
||||
@IsString()
|
||||
modelName!: string;
|
||||
|
||||
@ApiProperty({ description: 'ประเภทของโมเดล AI', enum: AiModelType })
|
||||
@IsEnum(AiModelType)
|
||||
modelType!: AiModelType;
|
||||
|
||||
@ApiProperty({ description: 'ชื่อโมเดลใน Ollama Registry' })
|
||||
@IsString()
|
||||
ollamaModelName!: string;
|
||||
|
||||
@ApiProperty({ description: 'ความต้องการ VRAM ในการประมวลผล (MB)' })
|
||||
@IsNumber()
|
||||
vramRequirementMB!: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'กรณีการใช้งานที่รองรับ (Use Cases)',
|
||||
type: [String],
|
||||
})
|
||||
@IsArray()
|
||||
@IsString({ each: true })
|
||||
useCases!: string[];
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ประเภท Quantization (เช่น Q3_K_M)',
|
||||
required: false,
|
||||
})
|
||||
@IsString()
|
||||
@IsOptional()
|
||||
quantization?: string;
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
// File: src/modules/ai/dto/ocr-engine-response.dto.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: สร้าง OcrEngineResponseDto สำหรับส่งข้อมูลผลลัพธ์ OCR Engine (T012, US1)
|
||||
|
||||
import { ApiProperty } from '@nestjs/swagger';
|
||||
import { OcrEngineType } from '../entities/ocr-engine-configuration.entity';
|
||||
|
||||
/** DTO สำหรับส่งรายการ OCR Engine กลับไปยังไคลเอนต์ */
|
||||
export class OcrEngineResponseDto {
|
||||
@ApiProperty({ description: 'รหัสประจำตัว OCR Engine (UUIDv7)' })
|
||||
engineId!: string;
|
||||
|
||||
@ApiProperty({ description: 'ชื่อของ OCR Engine' })
|
||||
engineName!: string;
|
||||
|
||||
@ApiProperty({ description: 'ประเภทของ OCR Engine', enum: OcrEngineType })
|
||||
engineType!: OcrEngineType;
|
||||
|
||||
@ApiProperty({ description: 'สถานะเปิดใช้งาน' })
|
||||
isActive!: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ระบุว่าเป็น Engine ที่ใช้งานอยู่ปัจจุบันหรือไม่',
|
||||
})
|
||||
isCurrentActive!: boolean;
|
||||
|
||||
@ApiProperty({ description: 'ความต้องการ VRAM ในการประมวลผล (MB)' })
|
||||
vramRequirementMB!: number;
|
||||
|
||||
@ApiProperty({ description: 'จำกัดเวลาในการประมวลผลสูงสุดต่อหน้า (วินาที)' })
|
||||
processingTimeLimitSeconds!: number;
|
||||
|
||||
@ApiProperty({ description: 'จำกัดการประมวลผลพร้อมกัน' })
|
||||
concurrentLimit!: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'รหัสประจำตัว OCR Engine สำรองกรณีขัดข้อง',
|
||||
nullable: true,
|
||||
})
|
||||
fallbackEngineId?: string | null;
|
||||
|
||||
@ApiProperty({ description: 'เวลาที่สร้างข้อมูล' })
|
||||
createdAt!: Date;
|
||||
|
||||
@ApiProperty({ description: 'เวลาที่อัปเดตข้อมูลล่าสุด' })
|
||||
updatedAt!: Date;
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
// File: src/modules/ai/dto/ocr-engine-selection.dto.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: สร้าง OcrEngineSelectionDto สำหรับการเลือก OCR Engine (T011, US1)
|
||||
|
||||
import { ApiProperty } from '@nestjs/swagger';
|
||||
import { IsBoolean, IsOptional } from 'class-validator';
|
||||
|
||||
/** DTO สำหรับการเลือกหรือตั้งค่าการทำงานของ OCR Engine */
|
||||
export class OcrEngineSelectionDto {
|
||||
@ApiProperty({
|
||||
description: 'เปิดใช้งานหรือปิดใช้งาน Engine นี้',
|
||||
required: false,
|
||||
})
|
||||
@IsBoolean()
|
||||
@IsOptional()
|
||||
isActive?: boolean;
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
// File: src/modules/ai/entities/ai-audit-log.entity.ts
|
||||
// Change Log
|
||||
// - 2026-05-14: เพิ่ม ADR-023 feedback fields โดยคง legacy audit fields ไว้ช่วงเปลี่ยนผ่าน.
|
||||
// - 2026-05-30: เพิ่ม modelType, vramUsageMB, cacheHit สำหรับ Typhoon OCR integration (T008, ADR-032).
|
||||
// Entity สำหรับตาราง ai_audit_logs — บันทึก AI Interaction และ feedback ตาม ADR-023
|
||||
|
||||
import {
|
||||
@@ -39,6 +40,19 @@ export class AiAuditLog extends UuidBaseEntity {
|
||||
@Column({ name: 'model_name', type: 'varchar', length: 100, nullable: true })
|
||||
modelName?: string;
|
||||
|
||||
// ประเภท OCR/LLM model ที่ใช้ เช่น tesseract, typhoon-ocr-3b, typhoon2.1-gemma3-4b (ADR-032)
|
||||
@Index('idx_ai_audit_model_type')
|
||||
@Column({ name: 'model_type', type: 'varchar', length: 50, nullable: true })
|
||||
modelType?: string;
|
||||
|
||||
// VRAM ที่ใช้จริง (MB) ณ เวลาประมวลผล (ADR-032)
|
||||
@Column({ name: 'vram_usage_mb', type: 'int', nullable: true })
|
||||
vramUsageMb?: number;
|
||||
|
||||
// ระบุว่าผลลัพธ์มาจาก Redis cache (true) หรือ OCR จริง (false) (ADR-032)
|
||||
@Column({ name: 'cache_hit', type: 'tinyint', width: 1, default: 0 })
|
||||
cacheHit!: boolean;
|
||||
|
||||
// JSON ที่ AI แนะนำก่อนมนุษย์ตรวจสอบ
|
||||
@Column({ name: 'ai_suggestion_json', type: 'json', nullable: true })
|
||||
aiSuggestionJson?: Record<string, unknown>;
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
// File: src/modules/ai/entities/ai-model-configuration.entity.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: สร้าง AiModelConfiguration class สำหรับเก็บข้อมูลการตั้งค่า AI Model (T024, US2)
|
||||
|
||||
import { ApiProperty } from '@nestjs/swagger';
|
||||
|
||||
export enum AiModelType {
|
||||
LLM = 'llm',
|
||||
EMBEDDING = 'embedding',
|
||||
OCR = 'ocr',
|
||||
}
|
||||
|
||||
/** คลาสสำหรับเก็บข้อมูลการตั้งค่า AI Model (ไม่ผูกกับตาราง SQL โดยตรง ตาม data-model.md) */
|
||||
export class AiModelConfiguration {
|
||||
@ApiProperty({ description: 'รหัสประจำตัวโมเดล AI (UUIDv7)' })
|
||||
modelId!: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ชื่อของโมเดล AI (เช่น gemma4:e4b, typhoon2.1-gemma3-4b)',
|
||||
})
|
||||
modelName!: string;
|
||||
|
||||
@ApiProperty({ description: 'ประเภทของโมเดล AI', enum: AiModelType })
|
||||
modelType!: AiModelType;
|
||||
|
||||
@ApiProperty({ description: 'ชื่อโมเดลใน Ollama Registry' })
|
||||
ollamaModelName!: string;
|
||||
|
||||
@ApiProperty({ description: 'ความต้องการ VRAM ในการประมวลผล (MB)' })
|
||||
vramRequirementMB!: number;
|
||||
|
||||
@ApiProperty({ description: 'สถานะเปิดใช้งานโมเดล' })
|
||||
isActive!: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'กรณีการใช้งานที่รองรับ (Use Cases)',
|
||||
type: [String],
|
||||
})
|
||||
useCases!: string[];
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ประเภท Quantization (เช่น Q3_K_M)',
|
||||
nullable: true,
|
||||
})
|
||||
quantization?: string | null;
|
||||
|
||||
@ApiProperty({ description: 'เวลาที่สร้างข้อมูล' })
|
||||
createdAt!: Date;
|
||||
|
||||
@ApiProperty({ description: 'เวลาที่อัปเดตข้อมูลล่าสุด' })
|
||||
updatedAt!: Date;
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
// File: src/modules/ai/entities/ocr-engine-configuration.entity.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: สร้าง OcrEngineConfiguration class สำหรับเก็บข้อมูลการตั้งค่า OCR Engine (T010, US1)
|
||||
|
||||
import { ApiProperty } from '@nestjs/swagger';
|
||||
|
||||
export enum OcrEngineType {
|
||||
TESSERACT = 'tesseract',
|
||||
TYPHOON_OCR = 'typhoon_ocr',
|
||||
}
|
||||
|
||||
/** คลาสสำหรับเก็บข้อมูลการตั้งค่า OCR Engine (ไม่ผูกกับตาราง SQL ตาม data-model.md) */
|
||||
export class OcrEngineConfiguration {
|
||||
@ApiProperty({ description: 'รหัสประจำตัว OCR Engine (UUIDv7)' })
|
||||
engineId!: string;
|
||||
|
||||
@ApiProperty({ description: 'ชื่อของ OCR Engine' })
|
||||
engineName!: string;
|
||||
|
||||
@ApiProperty({ description: 'ประเภทของ OCR Engine', enum: OcrEngineType })
|
||||
engineType!: OcrEngineType;
|
||||
|
||||
@ApiProperty({ description: 'สถานะเปิดใช้งาน' })
|
||||
isActive!: boolean;
|
||||
|
||||
@ApiProperty({ description: 'ความต้องการ VRAM ในการประมวลผล (MB)' })
|
||||
vramRequirementMB!: number;
|
||||
|
||||
@ApiProperty({ description: 'จำกัดเวลาในการประมวลผลสูงสุดต่อหน้า (วินาที)' })
|
||||
processingTimeLimitSeconds!: number;
|
||||
|
||||
@ApiProperty({ description: 'จำกัดการประมวลผลพร้อมกัน' })
|
||||
concurrentLimit!: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'รหัสประจำตัว OCR Engine สำรองกรณีขัดข้อง',
|
||||
nullable: true,
|
||||
})
|
||||
fallbackEngineId?: string | null;
|
||||
|
||||
@ApiProperty({ description: 'เวลาที่บันทึกข้อมูล' })
|
||||
createdAt!: Date;
|
||||
|
||||
@ApiProperty({ description: 'เวลาที่อัปเดตข้อมูลล่าสุด' })
|
||||
updatedAt!: Date;
|
||||
}
|
||||
@@ -17,6 +17,7 @@ import { EmbeddingService } from '../services/embedding.service';
|
||||
import { AiRagService } from '../ai-rag.service';
|
||||
import { Attachment } from '../../../common/file-storage/entities/attachment.entity';
|
||||
import { OcrService } from '../services/ocr.service';
|
||||
import { SandboxOcrEngineService } from '../services/sandbox-ocr-engine.service';
|
||||
import { OllamaService } from '../services/ollama.service';
|
||||
import { Project } from '../../project/entities/project.entity';
|
||||
import { AiAuditLog } from '../entities/ai-audit-log.entity';
|
||||
@@ -29,6 +30,7 @@ describe('AiBatchProcessor', () => {
|
||||
let embeddingService: jest.Mocked<EmbeddingService>;
|
||||
let ragService: jest.Mocked<AiRagService>;
|
||||
let ocrService: jest.Mocked<OcrService>;
|
||||
let sandboxOcrEngineService: jest.Mocked<SandboxOcrEngineService>;
|
||||
let ollamaService: jest.Mocked<OllamaService>;
|
||||
let redis: Record<string, jest.Mock>;
|
||||
let attachmentRepo: jest.Mocked<Repository<Attachment>>;
|
||||
@@ -46,6 +48,14 @@ describe('AiBatchProcessor', () => {
|
||||
.fn()
|
||||
.mockResolvedValue({ text: 'OCR text LCBP3-CIV-001 Civil' }),
|
||||
};
|
||||
const mockSandboxOcrEngineService = {
|
||||
detectAndExtract: jest.fn().mockResolvedValue({
|
||||
text: 'OCR text LCBP3-CIV-001 Civil',
|
||||
ocrUsed: true,
|
||||
engineUsed: 'typhoon-ocr-3b',
|
||||
fallbackUsed: false,
|
||||
}),
|
||||
};
|
||||
const mockOllamaService = {
|
||||
getMainModelName: jest.fn().mockReturnValue('gemma4:e4b'),
|
||||
generate: jest.fn().mockResolvedValue(
|
||||
@@ -131,6 +141,10 @@ describe('AiBatchProcessor', () => {
|
||||
{ provide: EmbeddingService, useValue: mockEmbeddingService },
|
||||
{ provide: AiRagService, useValue: mockRagService },
|
||||
{ provide: OcrService, useValue: mockOcrService },
|
||||
{
|
||||
provide: SandboxOcrEngineService,
|
||||
useValue: mockSandboxOcrEngineService,
|
||||
},
|
||||
{ provide: OllamaService, useValue: mockOllamaService },
|
||||
{ provide: DEFAULT_REDIS_TOKEN, useValue: mockRedis },
|
||||
{
|
||||
@@ -154,6 +168,7 @@ describe('AiBatchProcessor', () => {
|
||||
embeddingService = module.get(EmbeddingService);
|
||||
ragService = module.get(AiRagService);
|
||||
ocrService = module.get(OcrService);
|
||||
sandboxOcrEngineService = module.get(SandboxOcrEngineService);
|
||||
ollamaService = module.get(OllamaService);
|
||||
redis = module.get(DEFAULT_REDIS_TOKEN);
|
||||
attachmentRepo = module.get(getRepositoryToken(Attachment));
|
||||
@@ -218,9 +233,10 @@ describe('AiBatchProcessor', () => {
|
||||
},
|
||||
} as unknown as Job<AiBatchJobData>;
|
||||
await processor.process(job);
|
||||
expect(ocrService.detectAndExtract).toHaveBeenCalledWith({
|
||||
pdfPath: '/files/test.pdf',
|
||||
});
|
||||
expect(sandboxOcrEngineService.detectAndExtract).toHaveBeenCalledWith(
|
||||
'/files/test.pdf',
|
||||
'auto'
|
||||
);
|
||||
expect(ollamaService.generate).toHaveBeenCalledTimes(1);
|
||||
expect(redis.setex).toHaveBeenCalledTimes(2);
|
||||
expect(redis.setex).toHaveBeenLastCalledWith(
|
||||
|
||||
@@ -22,6 +22,10 @@ import { QUEUE_AI_BATCH } from '../../common/constants/queue.constants';
|
||||
import { EmbeddingService } from '../services/embedding.service';
|
||||
import { AiRagService } from '../ai-rag.service';
|
||||
import { OcrService } from '../services/ocr.service';
|
||||
import {
|
||||
SandboxOcrEngineService,
|
||||
SandboxOcrEngineType,
|
||||
} from '../services/sandbox-ocr-engine.service';
|
||||
import { OllamaService } from '../services/ollama.service';
|
||||
import { Project } from '../../project/entities/project.entity';
|
||||
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
|
||||
@@ -147,6 +151,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
private readonly embeddingService: EmbeddingService,
|
||||
private readonly ragService: AiRagService,
|
||||
private readonly ocrService: OcrService,
|
||||
private readonly sandboxOcrEngineService: SandboxOcrEngineService,
|
||||
private readonly ollamaService: OllamaService,
|
||||
private readonly tagsService: TagsService,
|
||||
private readonly migrationService: MigrationService,
|
||||
@@ -295,6 +300,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
private async processSandboxExtract(data: AiBatchJobData): Promise<void> {
|
||||
const { idempotencyKey, payload, projectPublicId } = data;
|
||||
const pdfPath = payload.pdfPath as string;
|
||||
const engineType = (payload.engineType as SandboxOcrEngineType) || 'auto';
|
||||
const overrideProjPublicId =
|
||||
(payload.projectPublicId as string) || projectPublicId;
|
||||
if (!pdfPath) {
|
||||
@@ -309,7 +315,10 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
})
|
||||
);
|
||||
try {
|
||||
const ocrResult = await this.ocrService.detectAndExtract({ pdfPath });
|
||||
const ocrResult = await this.sandboxOcrEngineService.detectAndExtract(
|
||||
pdfPath,
|
||||
engineType
|
||||
);
|
||||
|
||||
const activePrompt =
|
||||
await this.aiPromptsService.getActive('ocr_extraction');
|
||||
@@ -362,6 +371,8 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
answer: JSON.stringify(extractedMetadata, null, 2),
|
||||
ocrText: ocrResult.text,
|
||||
ocrUsed: ocrResult.ocrUsed,
|
||||
engineUsed: ocrResult.engineUsed,
|
||||
fallbackUsed: ocrResult.fallbackUsed,
|
||||
promptVersionUsed: activePrompt.versionNumber,
|
||||
completedAt: new Date().toISOString(),
|
||||
})
|
||||
@@ -387,6 +398,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
private async processSandboxOcrOnly(data: AiBatchJobData): Promise<void> {
|
||||
const { idempotencyKey, payload } = data;
|
||||
const pdfPath = payload.pdfPath as string;
|
||||
const engineType = (payload.engineType as SandboxOcrEngineType) || 'auto';
|
||||
|
||||
if (!pdfPath) {
|
||||
throw new Error('pdfPath is required for sandbox-ocr-only job');
|
||||
@@ -402,7 +414,10 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
);
|
||||
|
||||
try {
|
||||
const ocrResult = await this.ocrService.detectAndExtract({ pdfPath });
|
||||
const ocrResult = await this.sandboxOcrEngineService.detectAndExtract(
|
||||
pdfPath,
|
||||
engineType
|
||||
);
|
||||
|
||||
// Cache OCR text สำหรับ Step 2
|
||||
await this.redis.setex(
|
||||
@@ -411,6 +426,8 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
JSON.stringify({
|
||||
ocrText: ocrResult.text,
|
||||
ocrUsed: ocrResult.ocrUsed,
|
||||
engineUsed: ocrResult.engineUsed,
|
||||
fallbackUsed: ocrResult.fallbackUsed,
|
||||
timestamp: new Date().toISOString(),
|
||||
})
|
||||
);
|
||||
@@ -423,6 +440,8 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
status: 'completed',
|
||||
ocrText: ocrResult.text,
|
||||
ocrUsed: ocrResult.ocrUsed,
|
||||
engineUsed: ocrResult.engineUsed,
|
||||
fallbackUsed: ocrResult.fallbackUsed,
|
||||
completedAt: new Date().toISOString(),
|
||||
})
|
||||
);
|
||||
@@ -470,6 +489,8 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
const parsedOcr = JSON.parse(cachedOcr) as {
|
||||
ocrText: string;
|
||||
ocrUsed: boolean;
|
||||
engineUsed?: string;
|
||||
fallbackUsed?: boolean;
|
||||
timestamp: string;
|
||||
};
|
||||
const { ocrText } = parsedOcr;
|
||||
@@ -542,6 +563,8 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
answer: JSON.stringify(extractedMetadata, null, 2),
|
||||
ocrText,
|
||||
ocrUsed: parsedOcr.ocrUsed,
|
||||
engineUsed: parsedOcr.engineUsed,
|
||||
fallbackUsed: parsedOcr.fallbackUsed,
|
||||
promptVersionUsed: targetPrompt.versionNumber,
|
||||
completedAt: new Date().toISOString(),
|
||||
})
|
||||
|
||||
@@ -0,0 +1,202 @@
|
||||
// File: src/modules/ai/processors/typhoon-llm.processor.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: Initial processor สำหรับ Typhoon LLM sequential jobs (T009d, ADR-032)
|
||||
// รันด้วย concurrency=1 เพื่อป้องกัน VRAM overflow บน RTX 2060 Super (8GB)
|
||||
// ใช้ keep_alive=0 ผ่าน Ollama API เพื่อ unload model หลังประมวลผล
|
||||
|
||||
import { Processor, WorkerHost } from '@nestjs/bullmq';
|
||||
import { Logger } from '@nestjs/common';
|
||||
import { Job } from 'bullmq';
|
||||
import { InjectRedis } from '@nestjs-modules/ioredis';
|
||||
import Redis from 'ioredis';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import axios from 'axios';
|
||||
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
|
||||
import { VramMonitorService } from '../services/vram-monitor.service';
|
||||
|
||||
/** ชื่อ queue สำหรับ Typhoon LLM jobs */
|
||||
export const QUEUE_TYPHOON_LLM = 'typhoon-llm';
|
||||
|
||||
/** รูปแบบข้อมูล job ใน Typhoon LLM queue */
|
||||
export interface TyphoonLlmJobData {
|
||||
/** prompt ที่จะส่งให้ Typhoon LLM */
|
||||
prompt: string;
|
||||
/** ชื่อ model เช่น scb10x/typhoon2.1-gemma3-4b */
|
||||
model?: string;
|
||||
/** idempotencyKey สำหรับ Redis result key */
|
||||
idempotencyKey: string;
|
||||
/** documentPublicId สำหรับ audit log (optional) */
|
||||
documentPublicId?: string;
|
||||
/** projectPublicId สำหรับ data isolation */
|
||||
projectPublicId?: string;
|
||||
}
|
||||
|
||||
/** Ollama generate API response */
|
||||
interface OllamaGenerateResponse {
|
||||
response: string;
|
||||
done: boolean;
|
||||
}
|
||||
|
||||
// VRAM ที่ Typhoon 2.1 Gemma3 4B ต้องการ (MB) — ตาม ADR-032
|
||||
const TYPHOON_LLM_REQUIRED_VRAM_MB = 4500;
|
||||
// Timeout 120 วินาทีสำหรับ LLM generation
|
||||
const TYPHOON_LLM_TIMEOUT_MS = 120000;
|
||||
|
||||
/**
|
||||
* Processor สำหรับ Typhoon LLM jobs ที่รันแบบ sequential (concurrency=1)
|
||||
* เพื่อป้องกัน VRAM overflow เมื่อรัน LLM หลายงานพร้อมกันบน RTX 2060 Super
|
||||
* ตาม ADR-032: lockDuration=180000ms รองรับ 120s timeout + buffer
|
||||
*/
|
||||
@Processor(QUEUE_TYPHOON_LLM, { concurrency: 1, lockDuration: 180000 })
|
||||
export class TyphoonLlmProcessor extends WorkerHost {
|
||||
private readonly logger = new Logger(TyphoonLlmProcessor.name);
|
||||
private readonly ollamaUrl: string;
|
||||
private readonly defaultModel: string;
|
||||
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
@InjectRedis() private readonly redis: Redis,
|
||||
@InjectRepository(AiAuditLog)
|
||||
private readonly auditLogRepo: Repository<AiAuditLog>,
|
||||
private readonly vramMonitorService: VramMonitorService
|
||||
) {
|
||||
super();
|
||||
this.ollamaUrl = this.configService.get<string>(
|
||||
'OLLAMA_URL',
|
||||
this.configService.get<string>('AI_HOST_URL', 'http://localhost:11434')
|
||||
);
|
||||
this.defaultModel = this.configService.get<string>(
|
||||
'OLLAMA_MODEL_TYPHOON',
|
||||
'scb10x/typhoon2.1-gemma3-4b'
|
||||
);
|
||||
}
|
||||
|
||||
/** ประมวลผล Typhoon LLM job ทีละงาน */
|
||||
async process(job: Job<TyphoonLlmJobData>): Promise<void> {
|
||||
const { prompt, model, idempotencyKey, documentPublicId } = job.data;
|
||||
const startTime = Date.now();
|
||||
const targetModel = model ?? this.defaultModel;
|
||||
this.logger.log(
|
||||
`Typhoon LLM job started — idempotencyKey=${idempotencyKey}, model=${targetModel}`
|
||||
);
|
||||
// ตรวจสอบ VRAM ก่อนโหลด model
|
||||
const hasCapacity = await this.vramMonitorService.hasVramCapacity(
|
||||
TYPHOON_LLM_REQUIRED_VRAM_MB
|
||||
);
|
||||
if (!hasCapacity) {
|
||||
const errMsg = `VRAM ไม่เพียงพอสำหรับ ${targetModel} (ต้องการ ${TYPHOON_LLM_REQUIRED_VRAM_MB}MB) — retry ภายหลัง`;
|
||||
this.logger.warn(errMsg);
|
||||
await this.saveResult(idempotencyKey, {
|
||||
status: 'failed',
|
||||
errorMessage: errMsg,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
});
|
||||
await this.writeAuditLog({
|
||||
documentPublicId,
|
||||
model: targetModel,
|
||||
status: AiAuditStatus.FAILED,
|
||||
errorMessage: errMsg,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
});
|
||||
throw new Error(errMsg);
|
||||
}
|
||||
try {
|
||||
// เรียก Ollama generate API พร้อม keep_alive=0 เพื่อ unload model หลังประมวลผล
|
||||
const response = await axios.post<OllamaGenerateResponse>(
|
||||
`${this.ollamaUrl}/api/generate`,
|
||||
{
|
||||
model: targetModel,
|
||||
prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: 0.0,
|
||||
top_p: 0.9,
|
||||
repeat_penalty: 1.0,
|
||||
},
|
||||
keep_alive: 0,
|
||||
},
|
||||
{ timeout: TYPHOON_LLM_TIMEOUT_MS }
|
||||
);
|
||||
const processingTimeMs = Date.now() - startTime;
|
||||
const generatedText = response.data.response ?? '';
|
||||
// Invalidate VRAM cache เพราะ keep_alive=0 unloaded model แล้ว
|
||||
await this.vramMonitorService.invalidateCache();
|
||||
await this.saveResult(idempotencyKey, {
|
||||
status: 'completed',
|
||||
response: generatedText,
|
||||
model: targetModel,
|
||||
processingTimeMs,
|
||||
});
|
||||
await this.writeAuditLog({
|
||||
documentPublicId,
|
||||
model: targetModel,
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
processingTimeMs,
|
||||
});
|
||||
this.logger.log(
|
||||
`Typhoon LLM completed — ${generatedText.length} chars, ${processingTimeMs}ms`
|
||||
);
|
||||
} catch (err: unknown) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.error(`Typhoon LLM job failed: ${errMsg}`);
|
||||
await this.saveResult(idempotencyKey, {
|
||||
status: 'failed',
|
||||
errorMessage: errMsg,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
});
|
||||
await this.writeAuditLog({
|
||||
documentPublicId,
|
||||
model: targetModel,
|
||||
status: AiAuditStatus.FAILED,
|
||||
errorMessage: errMsg,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/** บันทึกผลลัพธ์ LLM ลง Redis สำหรับ polling */
|
||||
private async saveResult(
|
||||
idempotencyKey: string,
|
||||
result: {
|
||||
status: 'completed' | 'failed';
|
||||
response?: string;
|
||||
model?: string;
|
||||
processingTimeMs: number;
|
||||
errorMessage?: string;
|
||||
}
|
||||
): Promise<void> {
|
||||
await this.redis.setex(
|
||||
`ai:typhoon:llm:${idempotencyKey}`,
|
||||
3600,
|
||||
JSON.stringify({
|
||||
idempotencyKey,
|
||||
...result,
|
||||
completedAt: new Date().toISOString(),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
/** บันทึก audit log สำหรับ Typhoon LLM interaction */
|
||||
private async writeAuditLog(params: {
|
||||
documentPublicId?: string;
|
||||
model: string;
|
||||
status: AiAuditStatus;
|
||||
processingTimeMs: number;
|
||||
errorMessage?: string;
|
||||
}): Promise<void> {
|
||||
const log = this.auditLogRepo.create({
|
||||
documentPublicId: params.documentPublicId,
|
||||
aiModel: 'typhoon-llm',
|
||||
modelName: params.model,
|
||||
modelType: 'llm',
|
||||
status: params.status,
|
||||
processingTimeMs: params.processingTimeMs,
|
||||
cacheHit: false,
|
||||
errorMessage: params.errorMessage,
|
||||
});
|
||||
await this.auditLogRepo.save(log);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,196 @@
|
||||
// File: src/modules/ai/processors/typhoon-ocr.processor.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: Initial processor สำหรับ Typhoon OCR sequential jobs (T009c, ADR-032)
|
||||
// รันด้วย concurrency=1 เพื่อป้องกัน VRAM overflow บน RTX 2060 Super (8GB)
|
||||
// ใช้ keep_alive=0 ผ่าน sidecar Ollama API เพื่อ unload model หลังประมวลผล
|
||||
|
||||
import { Processor, WorkerHost } from '@nestjs/bullmq';
|
||||
import { Logger } from '@nestjs/common';
|
||||
import { Job } from 'bullmq';
|
||||
import { InjectRedis } from '@nestjs-modules/ioredis';
|
||||
import Redis from 'ioredis';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
|
||||
import { OcrCacheService } from '../services/ocr-cache.service';
|
||||
import { VramMonitorService } from '../services/vram-monitor.service';
|
||||
import {
|
||||
SandboxOcrEngineService,
|
||||
SandboxOcrEngineType,
|
||||
} from '../services/sandbox-ocr-engine.service';
|
||||
|
||||
/** ชื่อ queue สำหรับ Typhoon OCR jobs */
|
||||
export const QUEUE_TYPHOON_OCR = 'typhoon-ocr';
|
||||
|
||||
/** รูปแบบข้อมูล job ใน Typhoon OCR queue */
|
||||
export interface TyphoonOcrJobData {
|
||||
/** public path ของไฟล์ PDF ที่ต้องการ OCR */
|
||||
pdfPath: string;
|
||||
/** engineType: เสมอเป็น 'typhoon-ocr-3b' สำหรับ queue นี้ */
|
||||
engineType: SandboxOcrEngineType;
|
||||
/** idempotencyKey สำหรับ Redis result key */
|
||||
idempotencyKey: string;
|
||||
/** documentPublicId สำหรับ audit log (optional) */
|
||||
documentPublicId?: string;
|
||||
}
|
||||
|
||||
// VRAM ที่ Typhoon OCR-3B ต้องการ (MB) — ตาม ADR-032
|
||||
const TYPHOON_OCR_REQUIRED_VRAM_MB = 4000;
|
||||
|
||||
/**
|
||||
* Processor สำหรับ Typhoon OCR jobs ที่รันแบบ sequential (concurrency=1)
|
||||
* เพื่อป้องกัน VRAM overflow เมื่อทำ OCR หลายงานพร้อมกันบน RTX 2060 Super
|
||||
* ตาม ADR-032: lockDuration=180000ms รองรับ 120s timeout + buffer
|
||||
*/
|
||||
@Processor(QUEUE_TYPHOON_OCR, { concurrency: 1, lockDuration: 180000 })
|
||||
export class TyphoonOcrProcessor extends WorkerHost {
|
||||
private readonly logger = new Logger(TyphoonOcrProcessor.name);
|
||||
|
||||
constructor(
|
||||
@InjectRedis() private readonly redis: Redis,
|
||||
@InjectRepository(AiAuditLog)
|
||||
private readonly auditLogRepo: Repository<AiAuditLog>,
|
||||
private readonly ocrCacheService: OcrCacheService,
|
||||
private readonly vramMonitorService: VramMonitorService,
|
||||
private readonly sandboxOcrEngineService: SandboxOcrEngineService
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
/** ประมวลผล Typhoon OCR job ทีละงาน */
|
||||
async process(job: Job<TyphoonOcrJobData>): Promise<void> {
|
||||
const { pdfPath, engineType, idempotencyKey, documentPublicId } = job.data;
|
||||
const startTime = Date.now();
|
||||
this.logger.log(
|
||||
`Typhoon OCR job started — idempotencyKey=${idempotencyKey}, engine=${engineType}`
|
||||
);
|
||||
// ตรวจสอบ Redis cache ก่อน — ถ้ามีผลลัพธ์แล้วไม่ต้องรัน OCR ซ้ำ
|
||||
const cached = await this.ocrCacheService.get(pdfPath, engineType);
|
||||
if (cached) {
|
||||
this.logger.log(
|
||||
`OCR cache hit: ${idempotencyKey} (engine=${engineType})`
|
||||
);
|
||||
await this.saveResult(idempotencyKey, {
|
||||
text: cached.text,
|
||||
engineUsed: cached.engineUsed,
|
||||
cacheHit: true,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
});
|
||||
await this.writeAuditLog({
|
||||
documentPublicId,
|
||||
engineType,
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
cacheHit: true,
|
||||
});
|
||||
return;
|
||||
}
|
||||
// ตรวจสอบ VRAM ก่อนโหลด model
|
||||
const hasCapacity = await this.vramMonitorService.hasVramCapacity(
|
||||
TYPHOON_OCR_REQUIRED_VRAM_MB
|
||||
);
|
||||
if (!hasCapacity) {
|
||||
const errMsg = `VRAM ไม่เพียงพอสำหรับ Typhoon OCR-3B (ต้องการ ${TYPHOON_OCR_REQUIRED_VRAM_MB}MB) — retry ภายหลัง`;
|
||||
this.logger.warn(errMsg);
|
||||
await this.writeAuditLog({
|
||||
documentPublicId,
|
||||
engineType,
|
||||
status: AiAuditStatus.FAILED,
|
||||
errorMessage: errMsg,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
cacheHit: false,
|
||||
});
|
||||
throw new Error(errMsg);
|
||||
}
|
||||
// รัน OCR ผ่าน SandboxOcrEngineService (ซึ่งส่งคำขอไป sidecar → Ollama)
|
||||
try {
|
||||
const result = await this.sandboxOcrEngineService.detectAndExtract(
|
||||
pdfPath,
|
||||
engineType
|
||||
);
|
||||
const processingTimeMs = Date.now() - startTime;
|
||||
// บันทึกผลลัพธ์ใน Redis cache (24h TTL)
|
||||
await this.ocrCacheService.set(pdfPath, engineType, {
|
||||
text: result.text,
|
||||
engineUsed: result.engineUsed,
|
||||
charCount: result.text.length,
|
||||
});
|
||||
// Invalidate VRAM cache เพราะ keep_alive=0 unloaded model แล้ว
|
||||
await this.vramMonitorService.invalidateCache();
|
||||
await this.saveResult(idempotencyKey, {
|
||||
text: result.text,
|
||||
engineUsed: result.engineUsed,
|
||||
fallbackUsed: result.fallbackUsed,
|
||||
cacheHit: false,
|
||||
processingTimeMs,
|
||||
});
|
||||
await this.writeAuditLog({
|
||||
documentPublicId,
|
||||
engineType,
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
processingTimeMs,
|
||||
cacheHit: false,
|
||||
});
|
||||
this.logger.log(
|
||||
`Typhoon OCR completed — ${result.text.length} chars, ${processingTimeMs}ms`
|
||||
);
|
||||
} catch (err: unknown) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.error(`Typhoon OCR job failed: ${errMsg}`);
|
||||
await this.writeAuditLog({
|
||||
documentPublicId,
|
||||
engineType,
|
||||
status: AiAuditStatus.FAILED,
|
||||
errorMessage: errMsg,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
cacheHit: false,
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/** บันทึกผลลัพธ์ OCR ลง Redis สำหรับ polling */
|
||||
private async saveResult(
|
||||
idempotencyKey: string,
|
||||
result: {
|
||||
text: string;
|
||||
engineUsed: string;
|
||||
fallbackUsed?: boolean;
|
||||
cacheHit: boolean;
|
||||
processingTimeMs: number;
|
||||
}
|
||||
): Promise<void> {
|
||||
await this.redis.setex(
|
||||
`ai:typhoon:ocr:${idempotencyKey}`,
|
||||
3600,
|
||||
JSON.stringify({
|
||||
idempotencyKey,
|
||||
status: 'completed',
|
||||
...result,
|
||||
completedAt: new Date().toISOString(),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
/** บันทึก audit log สำหรับ Typhoon OCR interaction */
|
||||
private async writeAuditLog(params: {
|
||||
documentPublicId?: string;
|
||||
engineType: string;
|
||||
status: AiAuditStatus;
|
||||
processingTimeMs: number;
|
||||
cacheHit: boolean;
|
||||
errorMessage?: string;
|
||||
}): Promise<void> {
|
||||
const log = this.auditLogRepo.create({
|
||||
documentPublicId: params.documentPublicId,
|
||||
aiModel: 'typhoon-ocr',
|
||||
modelName: 'scb10x/typhoon-ocr-3b',
|
||||
modelType: params.engineType,
|
||||
status: params.status,
|
||||
processingTimeMs: params.processingTimeMs,
|
||||
cacheHit: params.cacheHit,
|
||||
errorMessage: params.errorMessage,
|
||||
});
|
||||
await this.auditLogRepo.save(log);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
// File: src/modules/ai/services/ocr-cache.service.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: Initial implementation สำหรับ Typhoon OCR 24-hour result caching (T007, ADR-032)
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { InjectRedis } from '@nestjs-modules/ioredis';
|
||||
import Redis from 'ioredis';
|
||||
import { createHash } from 'crypto';
|
||||
|
||||
/** ผลลัพธ์ที่ cache ไว้ใน Redis */
|
||||
export interface CachedOcrResult {
|
||||
text: string;
|
||||
engineUsed: string;
|
||||
charCount: number;
|
||||
cachedAt: string; // ISO string
|
||||
}
|
||||
|
||||
// TTL 24 ชั่วโมง (ตามที่กำหนดใน ADR-032)
|
||||
const OCR_CACHE_TTL_SECONDS = 24 * 60 * 60;
|
||||
// Prefix key ใน Redis
|
||||
const OCR_CACHE_PREFIX = 'ai:ocr:result:';
|
||||
|
||||
/**
|
||||
* บริการ cache ผลลัพธ์ OCR ใน Redis สำหรับ Typhoon OCR
|
||||
* Key: SHA-256(pdfPath + engineType) เพื่อป้องกัน key collision ระหว่าง engine ต่างๆ
|
||||
* TTL: 24 ชั่วโมง ตาม ADR-032
|
||||
*/
|
||||
@Injectable()
|
||||
export class OcrCacheService {
|
||||
private readonly logger = new Logger(OcrCacheService.name);
|
||||
|
||||
constructor(@InjectRedis() private readonly redis: Redis) {}
|
||||
|
||||
/**
|
||||
* สร้าง Redis cache key จาก pdfPath และ engineType
|
||||
* ใช้ SHA-256 เพื่อหลีกเลี่ยง key ยาวเกินไปและ cache collision
|
||||
*/
|
||||
private buildKey(pdfPath: string, engineType: string): string {
|
||||
const hash = createHash('sha256')
|
||||
.update(`${pdfPath}::${engineType}`)
|
||||
.digest('hex');
|
||||
return `${OCR_CACHE_PREFIX}${hash}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* ดึงผลลัพธ์ OCR จาก Redis cache
|
||||
* คืน null ถ้าไม่มี cache หรือ cache หมดอายุ
|
||||
*/
|
||||
async get(
|
||||
pdfPath: string,
|
||||
engineType: string
|
||||
): Promise<CachedOcrResult | null> {
|
||||
const key = this.buildKey(pdfPath, engineType);
|
||||
try {
|
||||
const raw = await this.redis.get(key);
|
||||
if (!raw) return null;
|
||||
return JSON.parse(raw) as CachedOcrResult;
|
||||
} catch (err: unknown) {
|
||||
// Cache miss ที่เกิดจาก parse error — ไม่ throw, คืน null เพื่อ fallback OCR จริง
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.warn(`OCR cache get failed for ${engineType}: ${msg}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* บันทึกผลลัพธ์ OCR ลง Redis cache พร้อม TTL 24 ชั่วโมง
|
||||
*/
|
||||
async set(
|
||||
pdfPath: string,
|
||||
engineType: string,
|
||||
result: Omit<CachedOcrResult, 'cachedAt'>
|
||||
): Promise<void> {
|
||||
const key = this.buildKey(pdfPath, engineType);
|
||||
const value: CachedOcrResult = {
|
||||
...result,
|
||||
cachedAt: new Date().toISOString(),
|
||||
};
|
||||
try {
|
||||
await this.redis.setex(key, OCR_CACHE_TTL_SECONDS, JSON.stringify(value));
|
||||
this.logger.debug(
|
||||
`OCR cache set: ${engineType} for ${pdfPath} (TTL 24h)`
|
||||
);
|
||||
} catch (err: unknown) {
|
||||
// Cache write failure ไม่ควร block OCR flow
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.warn(`OCR cache set failed: ${msg}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ลบ cache entry สำหรับไฟล์ที่ระบุ (เช่น หลังจากไฟล์ถูกแก้ไข)
|
||||
*/
|
||||
async invalidate(pdfPath: string, engineType: string): Promise<void> {
|
||||
const key = this.buildKey(pdfPath, engineType);
|
||||
try {
|
||||
await this.redis.del(key);
|
||||
} catch (err: unknown) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.warn(`OCR cache invalidate failed: ${msg}`);
|
||||
}
|
||||
}
|
||||
|
||||
/** ตรวจสอบว่ามี cache อยู่หรือไม่ (ไม่ดึงข้อมูล) */
|
||||
async exists(pdfPath: string, engineType: string): Promise<boolean> {
|
||||
const key = this.buildKey(pdfPath, engineType);
|
||||
const count = await this.redis.exists(key);
|
||||
return count > 0;
|
||||
}
|
||||
}
|
||||
@@ -5,15 +5,31 @@
|
||||
// - 2026-05-25: เพิ่ม path remapping (OCR_UPLOAD_BASE_PATH) เพื่อแปลง local upload path เป็น path ที่ sidecar เห็นผ่าน CIFS.
|
||||
// - 2026-05-29: เพิ่ม checkHealth() เพื่อตรวจสอบสุขภาพของ OCR sidecar สำหรับ getSystemHealth() (ADR-027)
|
||||
// - 2026-05-30: เปลี่ยนจาก PaddleOCR เป็น Tesseract OCR เพื่อความเข้ากันได้กับ CPU เก่า
|
||||
// - 2026-05-30: เพิ่ม VRAM insufficiency guard สำหรับ Typhoon OCR engine (T016a, ADR-032)
|
||||
// - 2026-05-30: ปรับปรุงสำหรับ Dynamic OCR Engine selection, Caching, และ Graceful Fallback (T013, T014, T016, T022, T023, US1)
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { InjectRedis } from '@nestjs-modules/ioredis';
|
||||
import Redis from 'ioredis';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository, EntityManager } from 'typeorm';
|
||||
import axios from 'axios';
|
||||
import {
|
||||
OcrEngineConfiguration,
|
||||
OcrEngineType,
|
||||
} from '../entities/ocr-engine-configuration.entity';
|
||||
import { OcrEngineResponseDto } from '../dto/ocr-engine-response.dto';
|
||||
import { SystemSetting } from '../entities/system-setting.entity';
|
||||
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
|
||||
import { OcrCacheService } from './ocr-cache.service';
|
||||
import { VramMonitorService } from './vram-monitor.service';
|
||||
|
||||
export interface OcrDetectionInput {
|
||||
extractedText?: string;
|
||||
extractedChars?: number;
|
||||
pdfPath?: string;
|
||||
documentPublicId?: string; // เพิ่มเพื่อการทำ audit logs
|
||||
}
|
||||
|
||||
export interface OcrDetectionResult {
|
||||
@@ -32,7 +48,48 @@ export interface OcrHealthResult {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/** บริการเลือก fast path หรือ OCR sidecar (Tesseract) ตามจำนวนตัวอักษรที่ extract ได้ */
|
||||
const OCR_ACTIVE_ENGINE_KEY = 'OCR_ACTIVE_ENGINE';
|
||||
const OCR_ACTIVE_ENGINE_CACHE_KEY = 'system_settings:OCR_ACTIVE_ENGINE';
|
||||
const OCR_ACTIVE_ENGINE_TTL_SECONDS = 30;
|
||||
|
||||
const TESSERACT_ENGINE_ID = '019505a1-7c3e-7000-8000-abc123def001';
|
||||
const TYPHOON_ENGINE_ID = '019505a1-7c3e-7000-8000-abc123def002';
|
||||
|
||||
// VRAM ที่ Typhoon OCR-3B ต้องการ (MB)
|
||||
const TYPHOON_OCR_REQUIRED_VRAM_MB = 4000;
|
||||
|
||||
const TESSERACT_ENGINE: OcrEngineConfiguration = {
|
||||
engineId: TESSERACT_ENGINE_ID,
|
||||
engineName: 'Tesseract OCR',
|
||||
engineType: OcrEngineType.TESSERACT,
|
||||
isActive: true,
|
||||
vramRequirementMB: 0,
|
||||
processingTimeLimitSeconds: 30,
|
||||
concurrentLimit: 10,
|
||||
fallbackEngineId: null,
|
||||
createdAt: new Date('2026-05-30T00:00:00Z'),
|
||||
updatedAt: new Date('2026-05-30T00:00:00Z'),
|
||||
};
|
||||
|
||||
const TYPHOON_ENGINE: OcrEngineConfiguration = {
|
||||
engineId: TYPHOON_ENGINE_ID,
|
||||
engineName: 'Typhoon OCR-3B',
|
||||
engineType: OcrEngineType.TYPHOON_OCR,
|
||||
isActive: true,
|
||||
vramRequirementMB: TYPHOON_OCR_REQUIRED_VRAM_MB,
|
||||
processingTimeLimitSeconds: 60,
|
||||
concurrentLimit: 1,
|
||||
fallbackEngineId: TESSERACT_ENGINE_ID,
|
||||
createdAt: new Date('2026-05-30T00:00:00Z'),
|
||||
updatedAt: new Date('2026-05-30T00:00:00Z'),
|
||||
};
|
||||
|
||||
const ENGINES_MAP = new Map<string, OcrEngineConfiguration>([
|
||||
[TESSERACT_ENGINE_ID, TESSERACT_ENGINE],
|
||||
[TYPHOON_ENGINE_ID, TYPHOON_ENGINE],
|
||||
]);
|
||||
|
||||
/** บริการเลือก fast path หรือ OCR sidecar (Tesseract/Typhoon) พร้อมความสามารถในสลับ Engine และ Caching */
|
||||
@Injectable()
|
||||
export class OcrService {
|
||||
private readonly logger = new Logger(OcrService.name);
|
||||
@@ -41,13 +98,21 @@ export class OcrService {
|
||||
private readonly localUploadBase: string;
|
||||
private readonly sidecarUploadBase: string;
|
||||
|
||||
constructor(private readonly configService: ConfigService) {
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
@InjectRepository(SystemSetting)
|
||||
private readonly settingRepo: Repository<SystemSetting>,
|
||||
@InjectRepository(AiAuditLog)
|
||||
private readonly auditLogRepo: Repository<AiAuditLog>,
|
||||
private readonly ocrCacheService: OcrCacheService,
|
||||
private readonly vramMonitorService: VramMonitorService,
|
||||
@InjectRedis() private readonly redis: Redis
|
||||
) {
|
||||
this.threshold = this.configService.get<number>('OCR_CHAR_THRESHOLD', 100);
|
||||
this.ocrApiUrl = this.configService.get<string>(
|
||||
'OCR_API_URL',
|
||||
'http://localhost:8765'
|
||||
);
|
||||
// path ที่ backend เห็น → path ที่ sidecar เห็น (ผ่าน CIFS mount)
|
||||
this.localUploadBase = this.configService
|
||||
.get<string>('UPLOAD_PERMANENT_DIR', '/app/uploads/permanent')
|
||||
.replace(/\/permanent$/, '');
|
||||
@@ -57,6 +122,81 @@ export class OcrService {
|
||||
);
|
||||
}
|
||||
|
||||
/** ดึงรายการ OCR Engines ทั้งหมด พร้อมตรวจสอบตัวที่กำลัง Active */
|
||||
async getOcrEngines(): Promise<OcrEngineResponseDto[]> {
|
||||
const activeEngineId = await this.getActiveEngineId();
|
||||
return Array.from(ENGINES_MAP.values()).map((engine) => ({
|
||||
...engine,
|
||||
isCurrentActive: engine.engineId === activeEngineId,
|
||||
}));
|
||||
}
|
||||
|
||||
/** บันทึกการเลือก OCR Engine หลัก */
|
||||
async selectOcrEngine(
|
||||
engineId: string,
|
||||
userId: number
|
||||
): Promise<OcrEngineConfiguration> {
|
||||
const selectedEngine = ENGINES_MAP.get(engineId);
|
||||
if (!selectedEngine) {
|
||||
throw new NotFoundException(`OCR Engine with ID ${engineId} not found`);
|
||||
}
|
||||
|
||||
await this.settingRepo.manager.transaction(
|
||||
async (manager: EntityManager): Promise<void> => {
|
||||
const repo = manager.getRepository(SystemSetting);
|
||||
const existing = await repo.findOne({
|
||||
where: { settingKey: OCR_ACTIVE_ENGINE_KEY },
|
||||
});
|
||||
|
||||
const setting =
|
||||
existing ??
|
||||
repo.create({
|
||||
settingKey: OCR_ACTIVE_ENGINE_KEY,
|
||||
dataType: 'string',
|
||||
category: 'ai',
|
||||
description: 'เอนจิน OCR หลักที่ใช้งานในระบบ (global)',
|
||||
isPublic: true,
|
||||
});
|
||||
|
||||
setting.settingValue = engineId;
|
||||
setting.updatedBy = userId;
|
||||
await repo.save(setting);
|
||||
}
|
||||
);
|
||||
|
||||
await this.redis.del(OCR_ACTIVE_ENGINE_CACHE_KEY);
|
||||
this.logger.log(
|
||||
`Active OCR Engine changed to ${selectedEngine.engineName} (ID: ${engineId}) by user ${userId}`
|
||||
);
|
||||
return selectedEngine;
|
||||
}
|
||||
|
||||
/** ดึง ID ของ OCR Engine ที่ใช้งานอยู่ปัจจุบัน */
|
||||
async getActiveEngineId(): Promise<string> {
|
||||
try {
|
||||
const cachedValue = await this.redis.get(OCR_ACTIVE_ENGINE_CACHE_KEY);
|
||||
if (cachedValue) return cachedValue;
|
||||
|
||||
const setting = await this.settingRepo.findOne({
|
||||
where: { settingKey: OCR_ACTIVE_ENGINE_KEY },
|
||||
});
|
||||
|
||||
const activeEngine = setting?.settingValue ?? TESSERACT_ENGINE_ID;
|
||||
await this.redis.set(
|
||||
OCR_ACTIVE_ENGINE_CACHE_KEY,
|
||||
activeEngine,
|
||||
'EX',
|
||||
OCR_ACTIVE_ENGINE_TTL_SECONDS
|
||||
);
|
||||
return activeEngine;
|
||||
} catch (error: unknown) {
|
||||
this.logger.error(
|
||||
`Failed to get active OCR engine: ${error instanceof Error ? error.message : String(error)}`
|
||||
);
|
||||
return TESSERACT_ENGINE_ID;
|
||||
}
|
||||
}
|
||||
|
||||
/** แปลง local upload path เป็น path ที่ sidecar เห็นผ่าน CIFS mount */
|
||||
private remapPath(localPath: string): string {
|
||||
if (this.localUploadBase && localPath.startsWith(this.localUploadBase)) {
|
||||
@@ -103,19 +243,51 @@ export class OcrService {
|
||||
return { text: extractedText, ocrUsed: false };
|
||||
}
|
||||
|
||||
const activeEngineId = await this.getActiveEngineId();
|
||||
|
||||
if (activeEngineId === TYPHOON_ENGINE_ID) {
|
||||
return this.processWithTyphoon(input);
|
||||
} else {
|
||||
return this.processWithTesseract(input);
|
||||
}
|
||||
}
|
||||
|
||||
/** ประมวลผลผ่าน Tesseract OCR */
|
||||
private async processWithTesseract(
|
||||
input: OcrDetectionInput
|
||||
): Promise<OcrDetectionResult> {
|
||||
const startTime = Date.now();
|
||||
const sidecarPath = this.remapPath(input.pdfPath!);
|
||||
|
||||
try {
|
||||
const sidecarPath = this.remapPath(input.pdfPath);
|
||||
this.logger.debug(`OCR path remap: ${input.pdfPath} → ${sidecarPath}`);
|
||||
this.logger.debug(
|
||||
`Tesseract OCR processing: ${input.pdfPath} → ${sidecarPath}`
|
||||
);
|
||||
const response = await axios.post<OcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr`,
|
||||
{ pdfPath: sidecarPath },
|
||||
{ timeout: 90000 }
|
||||
);
|
||||
|
||||
const text = response.data.text ?? '';
|
||||
const durationMs = Date.now() - startTime;
|
||||
|
||||
await this.writeAuditLog({
|
||||
documentPublicId: input.documentPublicId,
|
||||
aiModel: 'tesseract',
|
||||
modelName: 'tesseract-ocr',
|
||||
modelType: 'tesseract',
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
processingTimeMs: durationMs,
|
||||
cacheHit: false,
|
||||
});
|
||||
|
||||
return {
|
||||
text: response.data.text ?? '',
|
||||
text,
|
||||
ocrUsed: true,
|
||||
};
|
||||
} catch (err: unknown) {
|
||||
const durationMs = Date.now() - startTime;
|
||||
const cause =
|
||||
err instanceof AggregateError && err.errors?.length
|
||||
? err.errors
|
||||
@@ -124,9 +296,214 @@ export class OcrService {
|
||||
: err instanceof Error
|
||||
? err.message
|
||||
: String(err);
|
||||
|
||||
await this.writeAuditLog({
|
||||
documentPublicId: input.documentPublicId,
|
||||
aiModel: 'tesseract',
|
||||
modelName: 'tesseract-ocr',
|
||||
modelType: 'tesseract',
|
||||
status: AiAuditStatus.FAILED,
|
||||
processingTimeMs: durationMs,
|
||||
cacheHit: false,
|
||||
errorMessage: cause,
|
||||
});
|
||||
|
||||
throw new Error(
|
||||
`OCR sidecar (Tesseract) unreachable at ${this.ocrApiUrl} — ${cause}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/** ประมวลผลผ่าน Typhoon OCR พร้อม Caching และ Fallback */
|
||||
private async processWithTyphoon(
|
||||
input: OcrDetectionInput
|
||||
): Promise<OcrDetectionResult> {
|
||||
const startTime = Date.now();
|
||||
const pdfPath = input.pdfPath!;
|
||||
const engineType = 'typhoon-ocr-3b';
|
||||
|
||||
// 1. ตรวจสอบ Redis cache (T022)
|
||||
try {
|
||||
const cached = await this.ocrCacheService.get(pdfPath, engineType);
|
||||
if (cached) {
|
||||
this.logger.log(`OCR Cache Hit for Typhoon OCR: ${pdfPath}`);
|
||||
const durationMs = Date.now() - startTime;
|
||||
|
||||
await this.writeAuditLog({
|
||||
documentPublicId: input.documentPublicId,
|
||||
aiModel: 'typhoon-ocr',
|
||||
modelName: 'scb10x/typhoon-ocr-3b',
|
||||
modelType: engineType,
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
processingTimeMs: durationMs,
|
||||
cacheHit: true,
|
||||
});
|
||||
|
||||
return {
|
||||
text: cached.text,
|
||||
ocrUsed: true,
|
||||
};
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
this.logger.warn(
|
||||
`Cache checking failed: ${err instanceof Error ? err.message : String(err)}`
|
||||
);
|
||||
}
|
||||
|
||||
// 2. ตรวจสอบปริมาณ VRAM ก่อนประมวลผล (T016a)
|
||||
const hasCapacity = await this.vramMonitorService.hasVramCapacity(
|
||||
TYPHOON_OCR_REQUIRED_VRAM_MB
|
||||
);
|
||||
if (!hasCapacity) {
|
||||
const errorMsg = `VRAM capacity (< 4GB) insufficient for Typhoon OCR-3B. Fallback to Tesseract.`;
|
||||
return this.fallbackToTesseract(
|
||||
pdfPath,
|
||||
errorMsg,
|
||||
input.documentPublicId
|
||||
);
|
||||
}
|
||||
|
||||
// 3. เรียกประมวลผล Typhoon OCR
|
||||
const sidecarPath = this.remapPath(pdfPath);
|
||||
try {
|
||||
this.logger.log(`Calling Typhoon OCR-3B for: ${sidecarPath}`);
|
||||
const response = await axios.post<OcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr`,
|
||||
{ pdfPath: sidecarPath, engine: engineType },
|
||||
{ timeout: 60000 } // 60s timeout per ADR-032
|
||||
);
|
||||
|
||||
const text = response.data.text ?? '';
|
||||
const durationMs = Date.now() - startTime;
|
||||
|
||||
// เซ็ต Cache ลง Redis 24 ชั่วโมง (T022)
|
||||
await this.ocrCacheService.set(pdfPath, engineType, {
|
||||
text,
|
||||
engineUsed: engineType,
|
||||
charCount: text.length,
|
||||
});
|
||||
|
||||
// Invalidate VRAM monitor cache เนื่องจากใช้ keep_alive = 0 โมเดลจะถูก unload ทันที
|
||||
await this.vramMonitorService.invalidateCache();
|
||||
|
||||
// บันทึก Audit Log (T023)
|
||||
await this.writeAuditLog({
|
||||
documentPublicId: input.documentPublicId,
|
||||
aiModel: 'typhoon-ocr',
|
||||
modelName: 'scb10x/typhoon-ocr-3b',
|
||||
modelType: engineType,
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
processingTimeMs: durationMs,
|
||||
cacheHit: false,
|
||||
vramUsageMb: TYPHOON_OCR_REQUIRED_VRAM_MB,
|
||||
});
|
||||
|
||||
return {
|
||||
text,
|
||||
ocrUsed: true,
|
||||
};
|
||||
} catch (err: unknown) {
|
||||
const cause = err instanceof Error ? err.message : String(err);
|
||||
const errorMsg = `Typhoon OCR API call failed: ${cause}`;
|
||||
|
||||
// 4. สลับเอนจินสำรองอัตโนมัติ (Graceful Fallback to Tesseract - T016)
|
||||
return this.fallbackToTesseract(
|
||||
pdfPath,
|
||||
errorMsg,
|
||||
input.documentPublicId
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/** สลับไปใช้งาน Tesseract OCR อัตโนมัติในฐานะระบบสำรอง (Graceful Fallback - T016) */
|
||||
private async fallbackToTesseract(
|
||||
pdfPath: string,
|
||||
originalError: string,
|
||||
documentPublicId?: string
|
||||
): Promise<OcrDetectionResult> {
|
||||
this.logger.warn(
|
||||
`Typhoon OCR processing failed, initiating graceful fallback to Tesseract: ${originalError}`
|
||||
);
|
||||
const startTime = Date.now();
|
||||
const sidecarPath = this.remapPath(pdfPath);
|
||||
|
||||
try {
|
||||
const response = await axios.post<OcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr`,
|
||||
{ pdfPath: sidecarPath }, // ส่งโดยไม่มี engine parameter เพื่อให้เป็น Tesseract
|
||||
{ timeout: 30000 } // 30s timeout สำหรับ fallback
|
||||
);
|
||||
|
||||
const text = response.data.text ?? '';
|
||||
const durationMs = Date.now() - startTime;
|
||||
|
||||
// บันทึก Audit Log ด้วยสถานะ SUCCESS สำหรับ Tesseract แต่ระบุ Error ของ Typhoon ไว้
|
||||
await this.writeAuditLog({
|
||||
documentPublicId,
|
||||
aiModel: 'tesseract',
|
||||
modelName: 'tesseract-ocr',
|
||||
modelType: 'tesseract',
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
processingTimeMs: durationMs,
|
||||
cacheHit: false,
|
||||
errorMessage: `Graceful fallback from Typhoon OCR. Original error: ${originalError}`,
|
||||
});
|
||||
|
||||
return {
|
||||
text,
|
||||
ocrUsed: true,
|
||||
};
|
||||
} catch (err: unknown) {
|
||||
const durationMs = Date.now() - startTime;
|
||||
const cause = err instanceof Error ? err.message : String(err);
|
||||
this.logger.error(`OCR fallback to Tesseract failed: ${cause}`);
|
||||
|
||||
await this.writeAuditLog({
|
||||
documentPublicId,
|
||||
aiModel: 'tesseract',
|
||||
modelName: 'tesseract-ocr',
|
||||
modelType: 'tesseract',
|
||||
status: AiAuditStatus.FAILED,
|
||||
processingTimeMs: durationMs,
|
||||
cacheHit: false,
|
||||
errorMessage: `Fallback failed: ${cause}. Original Typhoon error: ${originalError}`,
|
||||
});
|
||||
|
||||
throw new Error(
|
||||
`OCR processing failed entirely. Typhoon error: ${originalError}. Fallback error: ${cause}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/** เขียนบันทึก AI Audit Log (T023) */
|
||||
private async writeAuditLog(params: {
|
||||
documentPublicId?: string;
|
||||
aiModel: string;
|
||||
modelName: string;
|
||||
modelType: string;
|
||||
status: AiAuditStatus;
|
||||
processingTimeMs: number;
|
||||
cacheHit: boolean;
|
||||
vramUsageMb?: number;
|
||||
errorMessage?: string;
|
||||
}): Promise<void> {
|
||||
try {
|
||||
const log = this.auditLogRepo.create({
|
||||
documentPublicId: params.documentPublicId,
|
||||
aiModel: params.aiModel,
|
||||
modelName: params.modelName,
|
||||
modelType: params.modelType,
|
||||
status: params.status,
|
||||
processingTimeMs: params.processingTimeMs,
|
||||
cacheHit: params.cacheHit,
|
||||
vramUsageMb: params.vramUsageMb,
|
||||
errorMessage: params.errorMessage,
|
||||
});
|
||||
await this.auditLogRepo.save(log);
|
||||
} catch (err: unknown) {
|
||||
this.logger.warn(
|
||||
`Failed to write AI audit log: ${err instanceof Error ? err.message : String(err)}`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
// File: src/modules/ai/services/sandbox-ocr-engine.service.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: แยก SandboxOcrEngineService ออกจาก OcrService เพื่อรองรับการเลือก Typhoon OCR เฉพาะ sandbox โดยไม่กระทบ core OCR flow
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import axios from 'axios';
|
||||
import { OcrService } from './ocr.service';
|
||||
|
||||
export type SandboxOcrEngineType = 'auto' | 'tesseract' | 'typhoon-ocr-3b';
|
||||
|
||||
interface SandboxOcrSidecarResponse {
|
||||
text?: string;
|
||||
ocrUsed?: boolean;
|
||||
engineUsed?: string;
|
||||
}
|
||||
|
||||
export interface SandboxOcrResult {
|
||||
text: string;
|
||||
ocrUsed: boolean;
|
||||
engineUsed: string;
|
||||
fallbackUsed: boolean;
|
||||
}
|
||||
|
||||
/** บริการ OCR สำหรับ sandbox เท่านั้น เพื่อแยก blast radius ออกจาก OcrService หลัก */
|
||||
@Injectable()
|
||||
export class SandboxOcrEngineService {
|
||||
private readonly logger = new Logger(SandboxOcrEngineService.name);
|
||||
private readonly ocrApiUrl: string;
|
||||
private readonly localUploadBase: string;
|
||||
private readonly sidecarUploadBase: string;
|
||||
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
private readonly ocrService: OcrService
|
||||
) {
|
||||
this.ocrApiUrl = this.configService.get<string>(
|
||||
'OCR_API_URL',
|
||||
'http://localhost:8765'
|
||||
);
|
||||
this.localUploadBase = this.configService
|
||||
.get<string>('UPLOAD_PERMANENT_DIR', '/app/uploads/permanent')
|
||||
.replace(/\/permanent$/, '');
|
||||
this.sidecarUploadBase = this.configService.get<string>(
|
||||
'OCR_SIDECAR_UPLOAD_BASE',
|
||||
'/mnt/uploads'
|
||||
);
|
||||
}
|
||||
|
||||
/** แปลง local upload path เป็น path ที่ sidecar เห็นผ่าน CIFS mount */
|
||||
private remapPath(localPath: string): string {
|
||||
if (this.localUploadBase && localPath.startsWith(this.localUploadBase)) {
|
||||
return localPath.replace(this.localUploadBase, this.sidecarUploadBase);
|
||||
}
|
||||
return localPath;
|
||||
}
|
||||
|
||||
/** รัน OCR ตาม engine ที่เลือก โดย fallback กลับไป Tesseract baseline เมื่อ Typhoon ล้มเหลว */
|
||||
async detectAndExtract(
|
||||
pdfPath: string,
|
||||
engineType: SandboxOcrEngineType = 'auto'
|
||||
): Promise<SandboxOcrResult> {
|
||||
if (engineType === 'auto' || engineType === 'tesseract') {
|
||||
const result = await this.ocrService.detectAndExtract({ pdfPath });
|
||||
return {
|
||||
text: result.text,
|
||||
ocrUsed: result.ocrUsed,
|
||||
engineUsed: result.ocrUsed ? 'tesseract' : 'fast-path',
|
||||
fallbackUsed: false,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await axios.post<SandboxOcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr`,
|
||||
{
|
||||
pdfPath: this.remapPath(pdfPath),
|
||||
engine: engineType,
|
||||
},
|
||||
{ timeout: 120000 }
|
||||
);
|
||||
|
||||
return {
|
||||
text: response.data.text ?? '',
|
||||
ocrUsed: response.data.ocrUsed ?? true,
|
||||
engineUsed: response.data.engineUsed ?? engineType,
|
||||
fallbackUsed: false,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
const cause = error instanceof Error ? error.message : String(error);
|
||||
this.logger.warn(
|
||||
`Typhoon OCR failed in sandbox, falling back to Tesseract: ${cause}`
|
||||
);
|
||||
|
||||
const fallbackResult = await this.ocrService.detectAndExtract({
|
||||
pdfPath,
|
||||
});
|
||||
return {
|
||||
text: fallbackResult.text,
|
||||
ocrUsed: fallbackResult.ocrUsed,
|
||||
engineUsed: fallbackResult.ocrUsed ? 'tesseract' : 'fast-path',
|
||||
fallbackUsed: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,134 @@
|
||||
// File: src/modules/ai/services/vram-monitor.service.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: Initial implementation สำหรับ Typhoon OCR VRAM monitoring (T006, ADR-032)
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import axios from 'axios';
|
||||
import { InjectRedis } from '@nestjs-modules/ioredis';
|
||||
import Redis from 'ioredis';
|
||||
|
||||
/** ข้อมูล VRAM จาก Ollama PS API */
|
||||
export interface OllamaModelInfo {
|
||||
name: string;
|
||||
size_vram: number; // bytes
|
||||
}
|
||||
|
||||
/** ผลลัพธ์ VRAM status */
|
||||
export interface VramStatus {
|
||||
totalVramMb: number;
|
||||
usedVramMb: number;
|
||||
freeVramMb: number;
|
||||
loadedModels: string[];
|
||||
hasCapacity: boolean; // true ถ้า free VRAM >= minRequiredMb
|
||||
}
|
||||
|
||||
/** ผลลัพธ์ภายในจาก Ollama /api/ps */
|
||||
interface OllamaProcessStatus {
|
||||
models?: OllamaModelInfo[];
|
||||
}
|
||||
|
||||
// Redis key สำหรับ cache VRAM status
|
||||
const VRAM_STATUS_CACHE_KEY = 'ai:vram:status';
|
||||
// TTL 10 วินาที — refresh บ่อยพอสำหรับ real-time monitoring
|
||||
const VRAM_STATUS_TTL_SECONDS = 10;
|
||||
// VRAM limit สำหรับ RTX 2060 Super (8192 MB)
|
||||
const GPU_TOTAL_VRAM_MB = 8192;
|
||||
// Threshold: ไม่โหลด model ถ้า usage > 90%
|
||||
const VRAM_USAGE_LIMIT_PERCENT = 0.9;
|
||||
|
||||
/** บริการตรวจสอบ VRAM GPU ผ่าน Ollama API ตาม ADR-032 */
|
||||
@Injectable()
|
||||
export class VramMonitorService {
|
||||
private readonly logger = new Logger(VramMonitorService.name);
|
||||
private readonly ollamaUrl: string;
|
||||
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
@InjectRedis() private readonly redis: Redis
|
||||
) {
|
||||
this.ollamaUrl = this.configService.get<string>(
|
||||
'OLLAMA_URL',
|
||||
this.configService.get<string>('AI_HOST_URL', 'http://localhost:11434')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* ดึงสถานะ VRAM ปัจจุบันจาก Ollama /api/ps
|
||||
* ใช้ Redis cache TTL 10 วินาทีเพื่อลด overhead
|
||||
*/
|
||||
async getVramStatus(minRequiredMb = 4000): Promise<VramStatus> {
|
||||
const cached = await this.redis.get(VRAM_STATUS_CACHE_KEY);
|
||||
if (cached) {
|
||||
const parsed = JSON.parse(cached) as VramStatus;
|
||||
parsed.hasCapacity = parsed.freeVramMb >= minRequiredMb;
|
||||
return parsed;
|
||||
}
|
||||
return this.fetchAndCacheVramStatus(minRequiredMb);
|
||||
}
|
||||
|
||||
/** ตรวจสอบว่า VRAM เพียงพอสำหรับโหลด model ที่ต้องการ */
|
||||
async hasVramCapacity(requiredMb: number): Promise<boolean> {
|
||||
const status = await this.getVramStatus(requiredMb);
|
||||
return status.hasCapacity;
|
||||
}
|
||||
|
||||
/** ดึงข้อมูล VRAM จาก Ollama และ cache ใน Redis */
|
||||
private async fetchAndCacheVramStatus(
|
||||
minRequiredMb: number
|
||||
): Promise<VramStatus> {
|
||||
try {
|
||||
const response = await axios.get<OllamaProcessStatus>(
|
||||
`${this.ollamaUrl}/api/ps`,
|
||||
{ timeout: 5000 }
|
||||
);
|
||||
const models = response.data.models ?? [];
|
||||
const loadedModels = models.map((m) => m.name);
|
||||
// คำนวณ VRAM ที่ใช้จาก models ที่โหลดอยู่
|
||||
const usedVramBytes = models.reduce(
|
||||
(sum, m) => sum + (m.size_vram ?? 0),
|
||||
0
|
||||
);
|
||||
const usedVramMb = Math.round(usedVramBytes / 1024 / 1024);
|
||||
// จำกัด VRAM ไม่เกิน limit 90% ของ GPU ทั้งหมด
|
||||
const maxAllowedMb = Math.floor(
|
||||
GPU_TOTAL_VRAM_MB * VRAM_USAGE_LIMIT_PERCENT
|
||||
);
|
||||
const freeVramMb = Math.max(0, maxAllowedMb - usedVramMb);
|
||||
const status: VramStatus = {
|
||||
totalVramMb: GPU_TOTAL_VRAM_MB,
|
||||
usedVramMb,
|
||||
freeVramMb,
|
||||
loadedModels,
|
||||
hasCapacity: freeVramMb >= minRequiredMb,
|
||||
};
|
||||
await this.redis.setex(
|
||||
VRAM_STATUS_CACHE_KEY,
|
||||
VRAM_STATUS_TTL_SECONDS,
|
||||
JSON.stringify(status)
|
||||
);
|
||||
return status;
|
||||
} catch (err: unknown) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.warn(
|
||||
`VRAM status fetch failed: ${msg} — ใช้ค่า conservative fallback`
|
||||
);
|
||||
// Fallback: สมมติว่า VRAM ไม่พอเมื่อ Ollama ไม่ตอบสนอง
|
||||
return {
|
||||
totalVramMb: GPU_TOTAL_VRAM_MB,
|
||||
usedVramMb: GPU_TOTAL_VRAM_MB,
|
||||
freeVramMb: 0,
|
||||
loadedModels: [],
|
||||
hasCapacity: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ล้าง VRAM cache (เรียกหลังจาก model unload ด้วย keep_alive=0)
|
||||
* เพื่อให้ status check ครั้งต่อไปดึงข้อมูลใหม่จาก Ollama
|
||||
*/
|
||||
async invalidateCache(): Promise<void> {
|
||||
await this.redis.del(VRAM_STATUS_CACHE_KEY);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user