690602:0957 ADR-033-233 #01
This commit is contained in:
@@ -11,6 +11,7 @@
|
||||
// - 2026-05-30: เพิ่ม @UseInterceptors(FileInterceptor('file')) ใน submitSandboxOcr เพื่อแก้ไขปัญหา BadRequestException (File is required)
|
||||
// - 2026-05-30: เพิ่ม endpoints GET/POST/PATCH models และ GET vram/status สำหรับ dynamic AI model management และ VRAM monitoring (T031-T034, US2)
|
||||
// - 2026-06-01: [BUGFIX] submitSandboxOcr: เพิ่ม @ApiBearerAuth(), @HttpCode(ACCEPTED), Body({ engineType }) และส่ง engineType ไปยัง enqueueSandboxJob
|
||||
// - 2026-06-02: เพิ่ม REST endpoints GET /ai/ocr-engines และ POST /ai/ocr-engines/:engineId/select (T003, T004, ADR-033) และนำเข้า SystemException เพื่อป้องกันความเสียหายในการคอมไพล์
|
||||
// Controller สำหรับ AI Gateway Endpoints (ADR-023)
|
||||
|
||||
import {
|
||||
@@ -33,6 +34,7 @@ import {
|
||||
ParseFilePipe,
|
||||
MaxFileSizeValidator,
|
||||
FileTypeValidator,
|
||||
Optional,
|
||||
} from '@nestjs/common';
|
||||
import { FilesInterceptor, FileInterceptor } from '@nestjs/platform-express';
|
||||
import { Throttle } from '@nestjs/throttler';
|
||||
@@ -62,7 +64,7 @@ import { CreateAiJobDto } from './dto/create-ai-job.dto';
|
||||
import { SubmitAiJobDto } from './dto/submit-ai-job.dto';
|
||||
import { MigrationUpdateDto } from './dto/migration-update.dto';
|
||||
import { MigrationQueryDto } from './dto/migration-query.dto';
|
||||
import { ValidationException } from '../../common/exceptions';
|
||||
import { ValidationException, SystemException } from '../../common/exceptions';
|
||||
import {
|
||||
ApproveLegacyMigrationDto,
|
||||
LegacyMigrationIngestDto,
|
||||
@@ -93,6 +95,9 @@ import {
|
||||
MigrationQueueRecordDto,
|
||||
SaveCheckpointDto,
|
||||
} from './dto/migration-checkpoint.dto';
|
||||
import { OcrService } from './services/ocr.service';
|
||||
import { OcrEngineResponseDto } from './dto/ocr-engine-response.dto';
|
||||
import { OcrEngineConfiguration } from './entities/ocr-engine-configuration.entity';
|
||||
|
||||
@ApiTags('AI Gateway')
|
||||
@Controller('ai')
|
||||
@@ -106,7 +111,8 @@ export class AiController {
|
||||
private readonly aiToolRegistryService: AiToolRegistryService,
|
||||
private readonly fileStorageService: FileStorageService,
|
||||
private readonly migrationCheckpointService: AiMigrationCheckpointService,
|
||||
@InjectRedis() private readonly redis: Redis
|
||||
@InjectRedis() private readonly redis: Redis,
|
||||
@Optional() private readonly ocrService?: OcrService
|
||||
) {}
|
||||
|
||||
// --- Real-time Extraction (User Upload) ---
|
||||
@@ -1027,4 +1033,45 @@ export class AiController {
|
||||
const status = await this.aiService.getVramStatus();
|
||||
return { data: status };
|
||||
}
|
||||
|
||||
@Get('ocr-engines')
|
||||
@UseGuards(JwtAuthGuard, RbacGuard)
|
||||
@ApiBearerAuth()
|
||||
@RequirePermission('system.manage_all')
|
||||
@ApiOperation({
|
||||
summary: 'OCR Engines — ดึงรายการเอนจิน OCR ทั้งหมดที่มีในระบบ (T003)',
|
||||
})
|
||||
async getOcrEngines(): Promise<{ data: OcrEngineResponseDto[] }> {
|
||||
if (!this.ocrService) {
|
||||
throw new SystemException('OcrService not injected in AiController');
|
||||
}
|
||||
const engines = await this.ocrService.getOcrEngines();
|
||||
return { data: engines };
|
||||
}
|
||||
|
||||
@Post('ocr-engines/:engineId/select')
|
||||
@UseGuards(JwtAuthGuard, RbacGuard)
|
||||
@ApiBearerAuth()
|
||||
@RequirePermission('system.manage_all')
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@ApiOperation({
|
||||
summary: 'OCR Select Engine — ตั้งค่าเอนจิน OCR หลักของระบบ (T004)',
|
||||
})
|
||||
@ApiParam({
|
||||
name: 'engineId',
|
||||
description: 'UUID ของเอนจิน OCR ที่เลือก',
|
||||
})
|
||||
async selectOcrEngine(
|
||||
@Param('engineId', ParseUuidPipe) engineId: string,
|
||||
@CurrentUser() user: User
|
||||
): Promise<{ data: OcrEngineConfiguration }> {
|
||||
if (!this.ocrService) {
|
||||
throw new SystemException('OcrService not injected in AiController');
|
||||
}
|
||||
const engine = await this.ocrService.selectOcrEngine(
|
||||
engineId,
|
||||
user.user_id
|
||||
);
|
||||
return { data: engine };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,8 @@ import {
|
||||
import { OllamaService } from './services/ollama.service';
|
||||
import { AiQdrantService } from './qdrant.service';
|
||||
import { ImportTransaction } from '../migration/entities/import-transaction.entity';
|
||||
import { AiSettingsService } from './ai-settings.service';
|
||||
import { VramMonitorService } from './services/vram-monitor.service';
|
||||
|
||||
const DEFAULT_REDIS_TOKEN = 'default_IORedisModuleConnectionToken';
|
||||
|
||||
@@ -74,6 +76,7 @@ describe('AiService', () => {
|
||||
latencyMs: 120,
|
||||
models: ['gemma4:e4b', 'nomic-embed-text'],
|
||||
}),
|
||||
loadModel: jest.fn().mockResolvedValue(true),
|
||||
};
|
||||
|
||||
const mockQdrantService = {
|
||||
@@ -84,6 +87,27 @@ describe('AiService', () => {
|
||||
}),
|
||||
};
|
||||
|
||||
const mockAiSettingsService = {
|
||||
getAvailableModels: jest
|
||||
.fn()
|
||||
.mockResolvedValue([
|
||||
{ id: 1, modelName: 'gemma4:e4b', isActive: true, vramGb: 4.0 },
|
||||
]),
|
||||
getActiveModel: jest.fn().mockResolvedValue('gemma4:e4b'),
|
||||
setActiveModel: jest.fn().mockResolvedValue('gemma4:e4b'),
|
||||
};
|
||||
|
||||
const mockVramMonitorService = {
|
||||
hasVramCapacity: jest.fn().mockResolvedValue(true),
|
||||
getVramStatus: jest.fn().mockResolvedValue({
|
||||
totalVramMb: 8192,
|
||||
usedVramMb: 2048,
|
||||
freeVramMb: 6144,
|
||||
loadedModels: [],
|
||||
hasCapacity: true,
|
||||
}),
|
||||
};
|
||||
|
||||
const mockRedis = {
|
||||
get: jest.fn(),
|
||||
set: jest.fn(),
|
||||
@@ -163,6 +187,8 @@ describe('AiService', () => {
|
||||
{ provide: AiValidationService, useValue: mockValidationService },
|
||||
{ provide: OllamaService, useValue: mockOllamaService },
|
||||
{ provide: AiQdrantService, useValue: mockQdrantService },
|
||||
{ provide: AiSettingsService, useValue: mockAiSettingsService },
|
||||
{ provide: VramMonitorService, useValue: mockVramMonitorService },
|
||||
{ provide: DEFAULT_REDIS_TOKEN, useValue: mockRedis },
|
||||
],
|
||||
}).compile();
|
||||
@@ -468,4 +494,32 @@ describe('AiService', () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('activateAiModel', () => {
|
||||
it('ควรขว้าง BusinessException เมื่อโหลดโมเดลล่วงหน้า (Pre-loading) ล้มเหลว', async () => {
|
||||
mockOllamaService.loadModel.mockResolvedValueOnce(false);
|
||||
await expect(
|
||||
service.activateAiModel(
|
||||
{ modelId: '019505a1-7c3e-7000-8000-abc123def202' },
|
||||
1
|
||||
)
|
||||
).rejects.toBeInstanceOf(BusinessException);
|
||||
expect(mockOllamaService.loadModel).toHaveBeenCalledWith('gemma4:e4b');
|
||||
expect(mockAiSettingsService.setActiveModel).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('ควรสลับโมเดลสำเร็จเมื่อ Ollama โหลดโมเดลเรียบร้อย', async () => {
|
||||
mockOllamaService.loadModel.mockResolvedValueOnce(true);
|
||||
const result = await service.activateAiModel(
|
||||
{ modelId: '019505a1-7c3e-7000-8000-abc123def202' },
|
||||
1
|
||||
);
|
||||
expect(result).toBe('gemma4:e4b');
|
||||
expect(mockOllamaService.loadModel).toHaveBeenCalledWith('gemma4:e4b');
|
||||
expect(mockAiSettingsService.setActiveModel).toHaveBeenCalledWith(
|
||||
'gemma4:e4b',
|
||||
1
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
// - 2026-05-21: เพิ่ม getSystemHealth พร้อมระบบแคช Redis 30 วินาทีตาม ADR-027.
|
||||
// - 2026-05-21: แก้ไข ESLint unsafe return error ใน getSystemHealth โดยใช้ interface SystemHealthResponse
|
||||
// - 2026-05-29: เพิ่ม OcrService.checkHealth() เข้า getSystemHealth() เพื่อแสดงสถานะ OCR sidecar
|
||||
// - 2026-06-02: ปรับปรุง activateAiModel ให้มีการโหลดและยืนยันโมเดลล่วงหน้าแบบ Synchronous (T008, ADR-033) และล้างโมเดลตัวเก่าออกเพื่อประหยัด VRAM (Suggestion 1)
|
||||
import { Injectable, Logger, Optional } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { HttpService } from '@nestjs/axios';
|
||||
@@ -1053,27 +1054,49 @@ export class AiService {
|
||||
if (!hasCapacity) {
|
||||
const vramStatus = await this.vramMonitorService.getVramStatus();
|
||||
const errMsg = `VRAM ไม่เพียงพอสำหรับการโหลดโมเดล ${model.modelName} (ต้องการ ${vramRequirementMB}MB, เหลือ ${vramStatus.freeVramMb}MB) — กรุณา unload โมเดลอื่น หรือเว้นระยะห่างในการโหลด`;
|
||||
|
||||
await this.saveAuditLog({
|
||||
documentPublicId: '00000000-0000-0000-0000-000000000000',
|
||||
aiModel: 'system',
|
||||
status: AiAuditStatus.FAILED,
|
||||
errorMessage: `Failed to activate model ${model.modelName} due to insufficient VRAM: ${errMsg}`,
|
||||
});
|
||||
|
||||
throw new BusinessException(
|
||||
'INSUFFICIENT_VRAM',
|
||||
errMsg,
|
||||
`พื้นที่หน่วยความจำ GPU (VRAM) ไม่เพียงพอสำหรับการโหลดโมเดล ${model.modelName}`
|
||||
);
|
||||
}
|
||||
|
||||
// 2.5 โหลดโมเดลล่วงหน้าแบบ Synchronous และตรวจสอบความพร้อมบน Ollama (ADR-033)
|
||||
if (this.ollamaService) {
|
||||
const isLoaded = await this.ollamaService.loadModel(model.modelName);
|
||||
if (!isLoaded) {
|
||||
const errMsg = `ไม่สามารถโหลดโมเดล ${model.modelName} ในระบบ Ollama ได้สำเร็จ (โมเดลอาจไม่ได้ดาวน์โหลด หรือ GPU/VRAM OOM) — กรุณาตรวจสอบ Ollama tags และสถานะ GPU`;
|
||||
await this.saveAuditLog({
|
||||
documentPublicId: '00000000-0000-0000-0000-000000000000',
|
||||
aiModel: 'system',
|
||||
status: AiAuditStatus.FAILED,
|
||||
errorMessage: `Failed to activate model ${model.modelName} during Ollama pre-loading: ${errMsg}`,
|
||||
});
|
||||
throw new BusinessException(
|
||||
'MODEL_LOAD_FAILED',
|
||||
errMsg,
|
||||
`ไม่สามารถดึงหรือโหลดโมเดล ${model.modelName} ไปยังระบบประมวลผล Ollama ได้`
|
||||
);
|
||||
}
|
||||
}
|
||||
const previousModelName = await this.aiSettingsService.getActiveModel();
|
||||
// 3. ทำการสลับโมเดล AI
|
||||
const activeModel = await this.aiSettingsService.setActiveModel(
|
||||
model.modelName,
|
||||
userId
|
||||
);
|
||||
|
||||
if (
|
||||
this.ollamaService &&
|
||||
previousModelName &&
|
||||
previousModelName !== model.modelName
|
||||
) {
|
||||
await this.ollamaService.unloadModel(previousModelName);
|
||||
}
|
||||
// บันทึก Audit Log สำหรับการเปิดใช้งานโมเดล AI (T038)
|
||||
await this.saveAuditLog({
|
||||
documentPublicId: '00000000-0000-0000-0000-000000000000',
|
||||
@@ -1081,7 +1104,6 @@ export class AiService {
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
errorMessage: `Model ${model.modelName} activated by user ${userId}. VRAM Capacity verified successfully.`,
|
||||
});
|
||||
|
||||
return activeModel;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
// - 2026-05-30: เพิ่ม VRAM insufficiency guard สำหรับ Typhoon OCR engine (T016a, ADR-032)
|
||||
// - 2026-05-30: ปรับปรุงสำหรับ Dynamic OCR Engine selection, Caching, และ Graceful Fallback (T013, T014, T016, T022, T023, US1)
|
||||
// - 2026-06-01: ปรับปรุง remapPath ให้รองรับ Windows absolute และ relative path ได้แม่นยำ 100%
|
||||
// - 2026-06-01: เปลี่ยน processWithTesseract/processWithTyphoon ให้ส่ง file content ผ่าน multipart
|
||||
// ไปยัง /ocr-upload แทนการส่ง path (แก้ปัญหา Docker WSL2 mount ไม่ได้)
|
||||
// - 2026-06-01: เปลี่ยน processWithTesseract/processWithTyphoon ให้ส่ง file content ผ่าน multipart ไปยัง /ocr-upload แทนการส่ง path
|
||||
// - 2026-06-02: ส่งค่า X-API-Key ใน request headers ไปยัง ocr-sidecar เพื่อความมั่นคงปลอดภัยสูงสุด (ADR-033, Suggestion 2)
|
||||
|
||||
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
@@ -99,7 +99,7 @@ export class OcrService {
|
||||
private readonly logger = new Logger(OcrService.name);
|
||||
private readonly threshold: number;
|
||||
private readonly ocrApiUrl: string;
|
||||
|
||||
private readonly ocrSidecarApiKey: string;
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
@InjectRepository(SystemSetting)
|
||||
@@ -115,6 +115,10 @@ export class OcrService {
|
||||
'OCR_API_URL',
|
||||
'http://localhost:8765'
|
||||
);
|
||||
this.ocrSidecarApiKey = this.configService.get<string>(
|
||||
'OCR_SIDECAR_API_KEY',
|
||||
'lcbp3-dms-ocr-sidecar-secure-token-2026'
|
||||
);
|
||||
}
|
||||
|
||||
/** ดึงรายการ OCR Engines ทั้งหมด พร้อมตรวจสอบตัวที่กำลัง Active */
|
||||
@@ -195,7 +199,10 @@ export class OcrService {
|
||||
async checkHealth(): Promise<OcrHealthResult> {
|
||||
const startTime = Date.now();
|
||||
try {
|
||||
await axios.get(`${this.ocrApiUrl}/health`, { timeout: 5000 });
|
||||
await axios.get(`${this.ocrApiUrl}/health`, {
|
||||
timeout: 5000,
|
||||
headers: { 'X-API-Key': this.ocrSidecarApiKey },
|
||||
});
|
||||
return {
|
||||
status: 'HEALTHY',
|
||||
latencyMs: Date.now() - startTime,
|
||||
@@ -256,7 +263,10 @@ export class OcrService {
|
||||
const response = await axios.post<OcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr-upload`,
|
||||
form,
|
||||
{ timeout: 90000 }
|
||||
{
|
||||
timeout: 90000,
|
||||
headers: { 'X-API-Key': this.ocrSidecarApiKey },
|
||||
}
|
||||
);
|
||||
const text = response.data.text ?? '';
|
||||
const durationMs = Date.now() - startTime;
|
||||
@@ -323,7 +333,10 @@ export class OcrService {
|
||||
const response = await axios.post<OcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr-upload`,
|
||||
form,
|
||||
{ timeout: 120000 }
|
||||
{
|
||||
timeout: 120000,
|
||||
headers: { 'X-API-Key': this.ocrSidecarApiKey },
|
||||
}
|
||||
);
|
||||
|
||||
const text = response.data.text ?? '';
|
||||
|
||||
@@ -1,178 +1,209 @@
|
||||
// File: src/modules/ai/services/ollama.service.ts
|
||||
|
||||
// Change Log
|
||||
|
||||
// - 2026-05-15: เพิ่ม Ollama service สำหรับ ADR-023A 2-model stack.
|
||||
|
||||
// - 2026-05-21: เพิ่ม checkHealth สำหรับตรวจสอบสุขภาพและความเร็ว (Latency) ของ Ollama
|
||||
// - 2026-06-02: เพิ่ม loadModel() preloading, ดึงจริงจาก /api/ps และเพิ่ม unloadModel() เพื่อล้างหน่วยความจำ GPU/VRAM (ADR-033, Suggestion 1)
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
|
||||
import axios from 'axios';
|
||||
|
||||
export interface OllamaGenerateOptions {
|
||||
timeoutMs?: number;
|
||||
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
/** บริการเรียก Ollama local-only บน Admin Desktop ตาม ADR-023A */
|
||||
|
||||
@Injectable()
|
||||
export class OllamaService {
|
||||
private readonly logger = new Logger(OllamaService.name);
|
||||
|
||||
private readonly ollamaUrl: string;
|
||||
|
||||
private readonly mainModel: string;
|
||||
|
||||
private readonly embedModel: string;
|
||||
|
||||
private readonly timeoutMs: number;
|
||||
|
||||
constructor(private readonly configService: ConfigService) {
|
||||
this.ollamaUrl = this.configService.get<string>(
|
||||
'OLLAMA_URL',
|
||||
|
||||
this.configService.get<string>('AI_HOST_URL', 'http://localhost:11434')
|
||||
);
|
||||
|
||||
this.mainModel = this.configService.get<string>(
|
||||
'OLLAMA_MODEL_MAIN',
|
||||
|
||||
'gemma4:e4b'
|
||||
);
|
||||
|
||||
this.embedModel = this.configService.get<string>(
|
||||
'OLLAMA_MODEL_EMBED',
|
||||
|
||||
this.configService.get<string>('OLLAMA_EMBED_MODEL', 'nomic-embed-text')
|
||||
);
|
||||
|
||||
this.timeoutMs = this.configService.get<number>('AI_TIMEOUT_MS', 30000);
|
||||
}
|
||||
|
||||
/** สร้างข้อความตอบกลับจาก gemma4:e4b หรือค่า ENV ที่กำหนด */
|
||||
|
||||
async generate(
|
||||
prompt: string,
|
||||
|
||||
options: OllamaGenerateOptions = {}
|
||||
): Promise<string> {
|
||||
try {
|
||||
const response = await axios.post<{ response: string }>(
|
||||
`${this.ollamaUrl}/api/generate`,
|
||||
|
||||
{
|
||||
model: this.mainModel,
|
||||
|
||||
prompt,
|
||||
|
||||
stream: false,
|
||||
},
|
||||
|
||||
{
|
||||
timeout: options.timeoutMs ?? this.timeoutMs,
|
||||
|
||||
signal: options.signal,
|
||||
}
|
||||
);
|
||||
|
||||
return response.data.response ?? '';
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
'Ollama generate failed',
|
||||
|
||||
err instanceof Error ? err.stack : String(err)
|
||||
);
|
||||
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/** สร้าง embedding ด้วย nomic-embed-text หรือค่า ENV ที่กำหนด */
|
||||
|
||||
async generateEmbedding(text: string): Promise<number[]> {
|
||||
try {
|
||||
const response = await axios.post<{ embedding: number[] }>(
|
||||
`${this.ollamaUrl}/api/embeddings`,
|
||||
|
||||
{ model: this.embedModel, prompt: text },
|
||||
|
||||
{ timeout: this.timeoutMs }
|
||||
);
|
||||
|
||||
return response.data.embedding;
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
'Ollama embedding failed',
|
||||
|
||||
err instanceof Error ? err.stack : String(err)
|
||||
);
|
||||
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/** คืนชื่อ main model สำหรับ audit log */
|
||||
|
||||
getMainModelName(): string {
|
||||
return this.mainModel;
|
||||
}
|
||||
|
||||
/** คืนชื่อ embedding model สำหรับ audit log */
|
||||
|
||||
getEmbeddingModelName(): string {
|
||||
return this.embedModel;
|
||||
}
|
||||
|
||||
/** ตรวจสอบสุขภาพและความเร็ว (Latency) ของระบบ Ollama */
|
||||
|
||||
async checkHealth(): Promise<{
|
||||
status: 'HEALTHY' | 'DEGRADED' | 'DOWN';
|
||||
|
||||
latencyMs: number;
|
||||
|
||||
models: string[];
|
||||
|
||||
error?: string;
|
||||
}> {
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
await axios.get(`${this.ollamaUrl}/api/tags`, { timeout: 5000 });
|
||||
|
||||
const latencyMs = Date.now() - startTime;
|
||||
|
||||
let loadedModels: string[] = [];
|
||||
try {
|
||||
const psResponse = await axios.get<{
|
||||
models?: Array<{ name: string }>;
|
||||
}>(`${this.ollamaUrl}/api/ps`, { timeout: 3000 });
|
||||
if (psResponse.data?.models) {
|
||||
loadedModels = psResponse.data.models.map((m) => m.name);
|
||||
}
|
||||
} catch (psErr) {
|
||||
this.logger.warn(
|
||||
`Failed to fetch loaded models from /api/ps: ${psErr instanceof Error ? psErr.message : String(psErr)}`
|
||||
);
|
||||
}
|
||||
if (loadedModels.length === 0) {
|
||||
loadedModels = [this.mainModel, this.embedModel];
|
||||
}
|
||||
return {
|
||||
status: 'HEALTHY',
|
||||
|
||||
latencyMs,
|
||||
|
||||
models: [this.mainModel, this.embedModel],
|
||||
models: loadedModels,
|
||||
};
|
||||
} catch (err: unknown) {
|
||||
const latencyMs = Date.now() - startTime;
|
||||
|
||||
const error = err instanceof Error ? err.message : String(err);
|
||||
|
||||
const isTimeout =
|
||||
err instanceof Error &&
|
||||
(err.message.includes('timeout') ||
|
||||
err.message.includes('504') ||
|
||||
err.message.includes('code ECONNABORTED'));
|
||||
|
||||
return {
|
||||
status: isTimeout ? 'DEGRADED' : 'DOWN',
|
||||
|
||||
latencyMs,
|
||||
|
||||
models: [this.mainModel, this.embedModel],
|
||||
|
||||
error,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/** โหลดโมเดลล่วงหน้าแบบ Synchronous และตรวจสอบความพร้อมบน Ollama (T007) */
|
||||
async loadModel(modelName: string): Promise<boolean> {
|
||||
try {
|
||||
const tagsResponse = await axios.get<{
|
||||
models?: Array<{ name: string; model: string }>;
|
||||
}>(`${this.ollamaUrl}/api/tags`, { timeout: 5000 });
|
||||
const installedModels = tagsResponse.data?.models ?? [];
|
||||
const exists = installedModels.some(
|
||||
(m) =>
|
||||
m.name === modelName ||
|
||||
m.model === modelName ||
|
||||
m.name.startsWith(modelName)
|
||||
);
|
||||
if (!exists) {
|
||||
this.logger.warn(`Model ${modelName} is not installed in Ollama`);
|
||||
return false;
|
||||
}
|
||||
this.logger.log(
|
||||
`Synchronously pre-loading model ${modelName} into GPU memory...`
|
||||
);
|
||||
await axios.post(
|
||||
`${this.ollamaUrl}/api/generate`,
|
||||
{
|
||||
model: modelName,
|
||||
prompt: '',
|
||||
stream: false,
|
||||
keep_alive: -1,
|
||||
},
|
||||
{ timeout: 30000 }
|
||||
);
|
||||
this.logger.log(`Model ${modelName} pre-loaded successfully`);
|
||||
return true;
|
||||
} catch (err: unknown) {
|
||||
this.logger.error(
|
||||
`Failed to pre-load model ${modelName}`,
|
||||
err instanceof Error ? err.stack : String(err)
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
/** ล้างโมเดลออกจากหน่วยความจำ GPU ของ Ollama เพื่อคืนค่า VRAM (ADR-033 Suggestion 1) */
|
||||
async unloadModel(modelName: string): Promise<boolean> {
|
||||
try {
|
||||
this.logger.log(`Unloading model ${modelName} from GPU memory...`);
|
||||
await axios.post(
|
||||
`${this.ollamaUrl}/api/generate`,
|
||||
{
|
||||
model: modelName,
|
||||
prompt: '',
|
||||
stream: false,
|
||||
keep_alive: 0,
|
||||
},
|
||||
{ timeout: 10000 }
|
||||
);
|
||||
this.logger.log(`Model ${modelName} unloaded successfully`);
|
||||
return true;
|
||||
} catch (err: unknown) {
|
||||
this.logger.warn(
|
||||
`Failed to unload model ${modelName}: ${err instanceof Error ? err.message : String(err)}`
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// Change Log
|
||||
// - 2026-05-30: แยก SandboxOcrEngineService ออกจาก OcrService เพื่อรองรับการเลือก Typhoon OCR เฉพาะ sandbox โดยไม่กระทบ core OCR flow
|
||||
// - 2026-06-01: เปลี่ยนจาก remapPath + pdfPath ไปเป็น multipart file upload ไปยัง /ocr-upload (แก้ปัญหา Docker WSL2 mount)
|
||||
// - 2026-06-02: ส่งค่า X-API-Key ใน request headers ไปยัง ocr-sidecar เพื่อความมั่นคงปลอดภัยสูงสุด (ADR-033, Suggestion 2)
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
@@ -33,7 +34,7 @@ export interface SandboxOcrResult {
|
||||
export class SandboxOcrEngineService {
|
||||
private readonly logger = new Logger(SandboxOcrEngineService.name);
|
||||
private readonly ocrApiUrl: string;
|
||||
|
||||
private readonly ocrSidecarApiKey: string;
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
private readonly ocrService: OcrService
|
||||
@@ -42,6 +43,10 @@ export class SandboxOcrEngineService {
|
||||
'OCR_API_URL',
|
||||
'http://localhost:8765'
|
||||
);
|
||||
this.ocrSidecarApiKey = this.configService.get<string>(
|
||||
'OCR_SIDECAR_API_KEY',
|
||||
'lcbp3-dms-ocr-sidecar-secure-token-2026'
|
||||
);
|
||||
}
|
||||
|
||||
/** รัน OCR ตาม engine ที่เลือก โดย fallback กลับไป Tesseract baseline เมื่อ Typhoon ล้มเหลว */
|
||||
@@ -71,7 +76,10 @@ export class SandboxOcrEngineService {
|
||||
const response = await axios.post<SandboxOcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr-upload`,
|
||||
form,
|
||||
{ timeout: 120000 }
|
||||
{
|
||||
timeout: 120000,
|
||||
headers: { 'X-API-Key': this.ocrSidecarApiKey },
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
|
||||
@@ -111,15 +111,14 @@ export class VramMonitorService {
|
||||
} catch (err: unknown) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
this.logger.warn(
|
||||
`VRAM status fetch failed: ${msg} — ใช้ค่า conservative fallback`
|
||||
`VRAM status fetch failed: ${msg} — ใช้ค่า resilient fallback`
|
||||
);
|
||||
// Fallback: สมมติว่า VRAM ไม่พอเมื่อ Ollama ไม่ตอบสนอง
|
||||
return {
|
||||
totalVramMb: GPU_TOTAL_VRAM_MB,
|
||||
usedVramMb: GPU_TOTAL_VRAM_MB,
|
||||
freeVramMb: 0,
|
||||
usedVramMb: 0,
|
||||
freeVramMb: GPU_TOTAL_VRAM_MB,
|
||||
loadedModels: [],
|
||||
hasCapacity: false,
|
||||
hasCapacity: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user