feat(ai-runtime): complete ai runtime policy refactor (ADR-035)
CI / CD Pipeline / build (push) Successful in 4m16s
CI / CD Pipeline / deploy (push) Successful in 11m51s

This commit is contained in:
2026-06-12 08:07:15 +07:00
parent 71c5e88181
commit 0227b7b982
63 changed files with 3566 additions and 451 deletions
+6 -1
View File
@@ -2,6 +2,7 @@
// Change Log:
// - 2026-05-13: Add BullMQ config registry for reminder and distribution queues.
// - 2026-05-15: เพิ่ม config สำหรับ ai-realtime และ ai-batch ตาม ADR-023A.
// - 2026-06-11: ปรับ aiRealtimeQueue.concurrency ให้รองรับ AI_REALTIME_CONCURRENCY / REALTIME_CONCURRENCY
import { registerAs } from '@nestjs/config';
@@ -12,7 +13,11 @@ export default registerAs('bullmq', () => ({
process.env.BULLMQ_DISTRIBUTION_QUEUE || 'rfa-distribution',
aiRealtimeQueue: {
name: process.env.BULLMQ_AI_REALTIME_QUEUE || 'ai-realtime',
concurrency: 1,
concurrency: Number(
process.env.AI_REALTIME_CONCURRENCY ||
process.env.REALTIME_CONCURRENCY ||
'2'
),
defaultJobOptions: {
attempts: 3,
backoff: { type: 'exponential', delay: 2000 },
+11 -14
View File
@@ -1,4 +1,4 @@
// File: src/modules/ai/ai.controller.ts
// File: backend/src/modules/ai/ai.controller.ts
// Change Log
// - 2026-05-14: เพิ่ม Legacy Migration staging endpoints ตาม ADR-023.
// - 2026-05-14: ย้าย DeleteAuditLogsQueryDto ไป dto/ folder; ลบ authHeader passthrough (🟢 LOW-1/LOW-2).
@@ -13,6 +13,7 @@
// - 2026-06-01: [BUGFIX] submitSandboxOcr: เพิ่ม @ApiBearerAuth(), @HttpCode(ACCEPTED), Body({ engineType }) และส่ง engineType ไปยัง enqueueSandboxJob
// - 2026-06-02: เพิ่ม REST endpoints GET /ai/ocr-engines และ POST /ai/ocr-engines/:engineId/select (T003, T004, ADR-033) และนำเข้า SystemException เพื่อป้องกันความเสียหายในการคอมไพล์
// - 2026-06-06: [BUGFIX] เพิ่ม @Throttle({ default: { limit: 300, ttl: 60000 } }) บน GET admin/sandbox/job/:id เพื่อแก้ ThrottlerException spam จาก frontend polling
// - 2026-06-11: แก้ไขการส่งพารามิเตอร์ให้กับ queueSuggestJob ใน suggestDocumentMetadata
// Controller สำหรับ AI Gateway Endpoints (ADR-023)
import {
@@ -62,7 +63,7 @@ import { AiRagQueryDto } from './dto/ai-rag-query.dto';
import { ExtractDocumentDto } from './dto/extract-document.dto';
import { AiCallbackDto } from './dto/ai-callback.dto';
import { CreateAiJobDto } from './dto/create-ai-job.dto';
import { SubmitAiJobDto } from './dto/submit-ai-job.dto';
import { AiJobResponseDto } from './dto/ai-job-response.dto';
import { MigrationUpdateDto } from './dto/migration-update.dto';
import { MigrationQueryDto } from './dto/migration-query.dto';
import { ValidationException, SystemException } from '../../common/exceptions';
@@ -171,11 +172,7 @@ export class AiController {
@Body() dto: CreateAiJobDto,
@Headers('idempotency-key') idempotencyKey: string
): Promise<{ success: boolean; jobId?: string; status: string }> {
const result = await this.aiService.queueSuggestJob({
...dto,
jobType: 'ai-suggest',
idempotencyKey: idempotencyKey || dto.idempotencyKey,
});
const result = await this.aiService.queueSuggestJob(dto, idempotencyKey);
return {
success: result.success,
jobId: result.jobId,
@@ -199,25 +196,25 @@ export class AiController {
@UseGuards(JwtAuthGuard, AiEnabledGuard, RbacGuard)
@ApiBearerAuth()
@RequirePermission('ai.suggest')
@HttpCode(HttpStatus.ACCEPTED)
@HttpCode(HttpStatus.CREATED)
@ApiOperation({
summary: 'Submit AI migration job — ส่งงานย้ายเอกสารให้ AI ประมวลผล',
summary: 'Submit unified AI job — ส่งงานประมวลผล AI แบบรวมศูนย์',
description:
'รับ tempAttachmentId/documentNumber แล้วส่งงานย้ายเอกสารเข้า BullMQ เพื่อรอการประมวลผล',
'รับชนิดงานและข้อมูลอ้างอิง เพื่อส่งงานประมวลผล AI เข้าคิว BullMQ',
})
@ApiHeader({
name: 'Idempotency-Key',
description: 'Unique key เพื่อป้องกัน duplicate AI job',
required: true,
})
async submitMigrationJob(
@Body() dto: SubmitAiJobDto,
async submitUnifiedJob(
@Body() dto: CreateAiJobDto,
@Headers('idempotency-key') idempotencyKey: string
) {
): Promise<AiJobResponseDto> {
if (!idempotencyKey) {
throw new ValidationException('Idempotency-Key header is required');
}
return this.aiService.submitMigrationJob(dto, idempotencyKey);
return this.aiService.submitUnifiedJob(dto, idempotencyKey);
}
@Get('jobs/:jobId')
+5
View File
@@ -36,12 +36,14 @@ import { SandboxOcrEngineService } from './services/sandbox-ocr-engine.service';
import { EmbeddingService } from './services/embedding.service';
import { VramMonitorService } from './services/vram-monitor.service';
import { OcrCacheService } from './services/ocr-cache.service';
import { AiPolicyService } from './services/ai-policy.service';
import { MigrationLog } from './entities/migration-log.entity';
import { AiAuditLog } from './entities/ai-audit-log.entity';
import { MigrationReviewRecord } from './entities/migration-review.entity';
import { MigrationProgress } from './entities/migration-progress.entity';
import { SystemSetting } from './entities/system-setting.entity';
import { AiAvailableModel } from './entities/ai-available-model.entity';
import { AiExecutionProfile } from './entities/ai-execution-profile.entity';
import { AiMigrationCheckpointService } from './ai-migration-checkpoint.service';
import { AiEnabledGuard } from './guards/ai-enabled.guard';
import { UserModule } from '../user/user.module';
@@ -96,6 +98,7 @@ import {
ImportTransaction,
MigrationReviewQueue,
AiPrompt,
AiExecutionProfile,
]),
BullModule.registerQueue(
@@ -171,6 +174,7 @@ import {
providers: [
AiService,
AiSettingsService,
AiPolicyService,
AiIngestService,
AiMigrationCheckpointService,
AiQueueService,
@@ -201,6 +205,7 @@ import {
exports: [
AiService,
AiSettingsService,
AiPolicyService,
AiIngestService,
AiMigrationCheckpointService,
AiQueueService,
+132 -1
View File
@@ -2,6 +2,7 @@
// Unit Tests สำหรับ AiService — ทดสอบ Business Logic สำคัญ: Callback, Update, Status Transitions
// Change Log
// - 2026-05-21: เพิ่ม unit tests สำหรับ getSystemHealth (T026) ทั้งกรณี cache hit/miss และ queue metrics.
// - 2026-06-11: เพิ่ม mock สำหรับ AiPolicyService เพื่อแก้ไข test regression
import { Test, TestingModule } from '@nestjs/testing';
import { getRepositoryToken } from '@nestjs/typeorm';
@@ -17,7 +18,11 @@ import {
import { AiAuditLog, AiAuditStatus } from './entities/ai-audit-log.entity';
import { AiCallbackDto } from './dto/ai-callback.dto';
import { MigrationUpdateDto } from './dto/migration-update.dto';
import { NotFoundException, BusinessException } from '../../common/exceptions';
import {
NotFoundException,
BusinessException,
ValidationException,
} from '../../common/exceptions';
import { AuditLog } from '../../common/entities/audit-log.entity';
import {
QUEUE_AI_BATCH,
@@ -28,6 +33,9 @@ import { AiQdrantService } from './qdrant.service';
import { ImportTransaction } from '../migration/entities/import-transaction.entity';
import { AiSettingsService } from './ai-settings.service';
import { VramMonitorService } from './services/vram-monitor.service';
import { AiPolicyService } from './services/ai-policy.service';
import { Attachment } from '../../common/file-storage/entities/attachment.entity';
import { Project } from '../project/entities/project.entity';
const DEFAULT_REDIS_TOKEN = 'default_IORedisModuleConnectionToken';
@@ -110,6 +118,44 @@ describe('AiService', () => {
}),
};
// Mock AiPolicyService
const mockAiPolicyService = {
getCanonicalModelName: jest.fn().mockImplementation((name: string) => {
if (name.includes('ocr')) return 'np-dms-ocr';
return 'np-dms-ai';
}),
getProfileForJobType: jest.fn().mockReturnValue('standard'),
getProfileParameters: jest.fn().mockResolvedValue({
canonicalModel: 'np-dms-ai',
temperature: 0.5,
topP: 0.8,
maxTokens: 4096,
numCtx: 8192,
repeatPenalty: 1.15,
keepAliveSeconds: 600,
}),
createJobPayload: jest
.fn()
.mockImplementation(async (jobType, docId, attachId) => {
await Promise.resolve();
return {
jobType,
documentPublicId: docId,
attachmentPublicId: attachId,
effectiveProfile: 'standard',
canonicalModel: 'np-dms-ai',
snapshotParams: {
temperature: 0.5,
topP: 0.8,
maxTokens: 4096,
numCtx: 8192,
repeatPenalty: 1.15,
keepAliveSeconds: 600,
},
};
}),
};
const mockRedis = {
get: jest.fn(),
set: jest.fn(),
@@ -191,6 +237,7 @@ describe('AiService', () => {
{ provide: AiQdrantService, useValue: mockQdrantService },
{ provide: AiSettingsService, useValue: mockAiSettingsService },
{ provide: VramMonitorService, useValue: mockVramMonitorService },
{ provide: AiPolicyService, useValue: mockAiPolicyService },
{ provide: DEFAULT_REDIS_TOKEN, useValue: mockRedis },
],
}).compile();
@@ -241,6 +288,90 @@ describe('AiService', () => {
});
});
describe('submitUnifiedJob', () => {
it('ไม่ควรบันทึก ai_audit_logs เป็น SUCCESS ตั้งแต่ตอน enqueue', async () => {
mockImportTransactionRepo.manager.findOne.mockResolvedValueOnce({
publicId: '019505a1-7c3e-7000-8000-abc123def777',
});
mockQueue.getJob.mockResolvedValue(null);
mockQueue.add.mockResolvedValue({ id: 'job-enqueued' });
const result = await service.submitUnifiedJob(
{
type: 'rag-query',
projectPublicId: '019505a1-7c3e-7000-8000-abc123def777',
payload: { query: 'test' },
},
'job-enqueued'
);
expect(result).toEqual({
jobId: 'job-enqueued',
status: 'queued',
modelUsed: 'np-dms-ai',
effectiveProfile: 'standard',
queueName: 'ai-batch',
});
expect(mockAuditLogRepo.save).not.toHaveBeenCalled();
});
it('ควร reject rag-query ที่ไม่มี payload.query', async () => {
await expect(
service.submitUnifiedJob(
{
type: 'rag-query',
projectPublicId: '019505a1-7c3e-7000-8000-abc123def777',
payload: {},
},
'job-no-query'
)
).rejects.toBeInstanceOf(ValidationException);
});
it('ควร reject projectPublicId ที่ไม่พบในระบบด้วย 422', async () => {
mockImportTransactionRepo.manager.findOne.mockResolvedValueOnce(null);
await expect(
service.submitUnifiedJob(
{
type: 'rag-query',
projectPublicId: '019505a1-7c3e-7000-8000-abc123def777',
payload: { query: 'test' },
},
'job-missing-project'
)
).rejects.toBeInstanceOf(BusinessException);
expect(mockImportTransactionRepo.manager.findOne).toHaveBeenCalledWith(
Project,
{
where: { publicId: '019505a1-7c3e-7000-8000-abc123def777' },
}
);
});
it('ควร reject attachment reference ที่ไม่พบในระบบด้วย 422', async () => {
mockImportTransactionRepo.manager.findOne
.mockResolvedValueOnce({
publicId: '019505a1-7c3e-7000-8000-abc123def777',
})
.mockResolvedValueOnce(null);
await expect(
service.submitUnifiedJob(
{
type: 'rag-query',
projectPublicId: '019505a1-7c3e-7000-8000-abc123def777',
documentPublicId: '019505a1-7c3e-7000-8000-abc123def456',
payload: { query: 'test' },
},
'job-missing-attachment'
)
).rejects.toBeInstanceOf(BusinessException);
expect(mockImportTransactionRepo.manager.findOne).toHaveBeenCalledWith(
Attachment,
{
where: { publicId: '019505a1-7c3e-7000-8000-abc123def456' },
}
);
});
});
// --- handleWebhookCallback ---
describe('handleWebhookCallback', () => {
+160 -13
View File
@@ -1,11 +1,14 @@
// File: src/modules/ai/ai.service.ts
// File: backend/src/modules/ai/ai.service.ts
// Service หลักของ AI Gateway — เชื่อมต่อระหว่าง DMS กับ n8n/Ollama Pipeline (ADR-018, ADR-020)
// Change Log
// - 2026-05-21: เพิ่ม getSystemHealth พร้อมระบบแคช Redis 30 วินาทีตาม ADR-027.
// - 2026-05-21: แก้ไข ESLint unsafe return error ใน getSystemHealth โดยใช้ interface SystemHealthResponse
// - 2026-05-29: เพิ่ม OcrService.checkHealth() เข้า getSystemHealth() เพื่อแสดงสถานะ OCR sidecar
// - 2026-06-02: ปรับปรุง activateAiModel ให้มีการโหลดและยืนยันโมเดลล่วงหน้าแบบ Synchronous (T008, ADR-033) และล้างโมเดลตัวเก่าออกเพื่อประหยัด VRAM (Suggestion 1)
// - 2026-06-03: ADR-034 — เพิ่ม activeModels field (เอา mainModel+ocrModel) ใน SystemHealthResponse
// - 2026-06-03: ADR-034 — เพิ่ม active models ใน SystemHealthResponse
// - 2026-06-11: US2 - เพิ่มการผูก execution profile ใน submitMigrationJob ของ ai.service.ts
// - 2026-06-11: US4 - เพิ่ม explicit assertion สำหรับการ dispatch RAG query ไปยัง ai-batch queue
// - 2026-06-11: แก้ไข compile errors (SystemException arguments, idempotencyKey signature, type mapping) และลบบรรทัดว่างในฟังก์ชันที่แก้ไข
import { Injectable, Logger, Optional } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { HttpService } from '@nestjs/axios';
@@ -37,8 +40,11 @@ import { MigrationQueryDto } from './dto/migration-query.dto';
import { AiValidationService } from './ai-validation.service';
import { CreateAiJobDto } from './dto/create-ai-job.dto';
import { SubmitAiJobDto } from './dto/submit-ai-job.dto';
import { AiJobResponseDto } from './dto/ai-job-response.dto';
import { AiPolicyService } from './services/ai-policy.service';
import { ImportTransaction } from '../migration/entities/import-transaction.entity';
import { Project } from '../project/entities/project.entity';
import { Attachment } from '../../common/file-storage/entities/attachment.entity';
import {
QUEUE_AI_BATCH,
QUEUE_AI_REALTIME,
@@ -52,6 +58,7 @@ import {
VramMonitorService,
VramStatus,
} from './services/vram-monitor.service';
import type { AiJobPayload } from './interfaces/execution-policy.interface';
import {
AiModelConfiguration,
AiModelType,
@@ -178,6 +185,7 @@ export class AiService {
private readonly configService: ConfigService,
private readonly httpService: HttpService,
private readonly aiValidationService: AiValidationService,
private readonly aiPolicyService: AiPolicyService,
@InjectRepository(MigrationLog)
private readonly migrationLogRepo: Repository<MigrationLog>,
@InjectRepository(AiAuditLog)
@@ -220,7 +228,16 @@ export class AiService {
// --- ADR-023A BullMQ Job Queueing ---
/** ส่งงาน AI Suggest เข้า ai-realtime queue แบบไม่ block request thread */
async queueSuggestJob(dto: CreateAiJobDto): Promise<AiQueueResult> {
async queueSuggestJob(
dto: CreateAiJobDto,
idempotencyKey: string
): Promise<AiQueueResult> {
if (dto.type === 'rag-query') {
throw new SystemException(
'RAG query cannot be queued in AI realtime queue',
{ errorCode: 'AI_QUEUE_ERROR' }
);
}
if (!this.aiRealtimeQueue) {
const error = new Error('AI realtime queue is not registered');
this.logger.error('AI job queue failed', {
@@ -229,18 +246,17 @@ export class AiService {
});
return { success: false, error };
}
try {
const job = await this.aiRealtimeQueue.add(
'ai-suggest',
{
jobType: 'ai-suggest',
documentPublicId: dto.documentPublicId,
projectPublicId: dto.projectPublicId,
projectPublicId: dto.projectPublicId || '',
payload: dto.payload ?? {},
idempotencyKey: dto.idempotencyKey,
idempotencyKey,
},
{ jobId: dto.idempotencyKey }
{ jobId: idempotencyKey }
);
return { success: true, jobId: String(job.id) };
} catch (err: unknown) {
@@ -254,7 +270,10 @@ export class AiService {
}
/** ส่งงาน embedding เข้า ai-batch queue แบบ best-effort */
async queueEmbedJob(dto: CreateAiJobDto): Promise<AiQueueResult> {
async queueEmbedJob(
dto: CreateAiJobDto,
idempotencyKey: string
): Promise<AiQueueResult> {
if (!this.aiBatchQueue) {
const error = new Error('AI batch queue is not registered');
this.logger.error('AI job queue failed', {
@@ -263,18 +282,17 @@ export class AiService {
});
return { success: false, error };
}
try {
const job = await this.aiBatchQueue.add(
'embed-document',
{
jobType: 'embed-document',
documentPublicId: dto.documentPublicId,
projectPublicId: dto.projectPublicId,
documentPublicId: dto.documentPublicId || '',
projectPublicId: dto.projectPublicId || '',
payload: dto.payload ?? {},
idempotencyKey: dto.idempotencyKey,
idempotencyKey,
},
{ jobId: dto.idempotencyKey }
{ jobId: idempotencyKey }
);
return { success: true, jobId: String(job.id) };
} catch (err: unknown) {
@@ -287,6 +305,124 @@ export class AiService {
}
}
/** ส่งงาน AI แบบสากล (Unified AI Job) เข้า BullMQ ตามนโยบายความมั่นคงปลอดภัย (ADR-023A) */
async submitUnifiedJob(
dto: CreateAiJobDto,
idempotencyKey: string
): Promise<AiJobResponseDto> {
const queueName = 'ai-batch';
const queue = this.aiBatchQueue;
if (dto.type === 'rag-query') {
if (queueName !== 'ai-batch') {
throw new SystemException(
'RAG query must be dispatched to ai-batch queue',
{ errorCode: 'AI_QUEUE_ERROR' }
);
}
}
if (!queue) {
throw new SystemException('AI batch queue is not registered', {
errorCode: 'AI_QUEUE_ERROR',
});
}
await this.validateUnifiedJobRequest(dto);
const activeJob = await queue.getJob(idempotencyKey);
if (activeJob) {
const payload = activeJob.data as unknown as AiJobPayload;
return {
jobId: String(activeJob.id),
status: 'queued',
modelUsed: payload.canonicalModel,
effectiveProfile: payload.effectiveProfile,
queueName: 'ai-batch',
};
}
const payload = await this.aiPolicyService.createJobPayload(
dto.type,
dto.documentPublicId || dto.attachmentPublicId,
dto.attachmentPublicId
);
const finalPayload = {
...payload,
documentPublicId: payload.documentPublicId || '',
projectPublicId: dto.projectPublicId || '',
payload: dto.payload || {},
idempotencyKey,
};
const job = await queue.add(
dto.type,
finalPayload as unknown as AiBatchJobData,
{
jobId: idempotencyKey,
}
);
return {
jobId: String(job.id),
status: 'queued',
modelUsed: payload.canonicalModel,
effectiveProfile: payload.effectiveProfile,
queueName: 'ai-batch',
};
}
private async validateUnifiedJobRequest(dto: CreateAiJobDto): Promise<void> {
if (dto.type === 'rag-query') {
const query = dto.payload?.['query'];
if (typeof query !== 'string' || query.trim().length === 0) {
throw new ValidationException(
'payload.query is required for rag-query jobs'
);
}
if (!dto.projectPublicId) {
throw new ValidationException(
'projectPublicId is required for rag-query jobs'
);
}
}
if (
(dto.type === 'auto-fill-document' || dto.type === 'migrate-document') &&
!dto.documentPublicId &&
!dto.attachmentPublicId
) {
throw new ValidationException(
'documentPublicId or attachmentPublicId is required for document AI jobs'
);
}
if (dto.projectPublicId) {
const project = await this.importTransactionRepo.manager.findOne(
Project,
{
where: { publicId: dto.projectPublicId },
}
);
if (!project) {
throw new BusinessException(
'PROJECT_NOT_FOUND',
`Project with publicId ${dto.projectPublicId} was not found`,
'ไม่พบโครงการที่อ้างอิงสำหรับงาน AI'
);
}
}
const referenceIds = [dto.documentPublicId, dto.attachmentPublicId].filter(
(value): value is string => typeof value === 'string'
);
for (const publicId of referenceIds) {
const attachment = await this.importTransactionRepo.manager.findOne(
Attachment,
{
where: { publicId },
}
);
if (!attachment) {
throw new BusinessException(
'ATTACHMENT_NOT_FOUND',
`Attachment with publicId ${publicId} was not found`,
'ไม่พบไฟล์อ้างอิงสำหรับงาน AI'
);
}
}
}
/** ส่งคำขอเปิดงานประมวลผลการย้ายเอกสารของ AI (migrate-document) เข้า BullMQ */
async submitMigrationJob(
dto: SubmitAiJobDto,
@@ -327,9 +463,14 @@ export class AiService {
defaultProject?.publicId ?? '00000000-0000-0000-0000-000000000000';
}
try {
const payload = await this.aiPolicyService.createJobPayload(
'migrate-document',
dto.payload.tempAttachmentId
);
const job = await this.aiBatchQueue.add(
'migrate-document',
{
...payload,
jobType: 'migrate-document',
documentPublicId: dto.payload.tempAttachmentId,
projectPublicId,
@@ -691,6 +832,9 @@ export class AiService {
inputHash?: string;
outputHash?: string;
errorMessage?: string;
effectiveProfile?: string;
canonicalModel?: string;
snapshotParamsJson?: Record<string, unknown>;
}): Promise<void> {
try {
const auditLog = this.aiAuditLogRepo.create({
@@ -702,6 +846,9 @@ export class AiService {
inputHash: data.inputHash,
outputHash: data.outputHash,
errorMessage: data.errorMessage,
effectiveProfile: data.effectiveProfile,
canonicalModel: data.canonicalModel,
snapshotParamsJson: data.snapshotParamsJson,
});
await this.aiAuditLogRepo.save(auditLog);
} catch (auditError: unknown) {
@@ -0,0 +1,42 @@
// File: backend/src/modules/ai/dto/ai-job-response.dto.ts
// Change Log:
// - 2026-06-11: Initial creation of AiJobResponseDto for unified AI jobs response
// - 2026-06-11: ใช้ import type สำหรับ ExecutionProfile เพื่อแก้ปัญหา TS1272
import { ApiProperty } from '@nestjs/swagger';
import { IsEnum, IsString } from 'class-validator';
import type { ExecutionProfile } from '../interfaces/execution-policy.interface';
export class AiJobResponseDto {
@ApiProperty({ description: 'ID ของงานในคิว BullMQ' })
@IsString()
jobId!: string;
@ApiProperty({
enum: ['queued', 'completed', 'failed'],
description: 'สถานะของงานในคิว',
})
@IsEnum(['queued', 'completed', 'failed'])
status!: 'queued' | 'completed' | 'failed';
@ApiProperty({
enum: ['np-dms-ai', 'np-dms-ocr'],
description: 'ชื่อโมเดลมาตรฐาน (Canonical Name) ที่ใช้งาน',
})
@IsEnum(['np-dms-ai', 'np-dms-ocr'])
modelUsed!: 'np-dms-ai' | 'np-dms-ocr';
@ApiProperty({
enum: ['interactive', 'standard', 'quality', 'deep-analysis'],
description: 'โปรไฟล์การประมวลผลจริงที่ระบบกำหนดให้',
})
@IsEnum(['interactive', 'standard', 'quality', 'deep-analysis'])
effectiveProfile!: ExecutionProfile;
@ApiProperty({
enum: ['ai-realtime', 'ai-batch'],
description: 'ชื่อคิวที่ใช้ประมวลผล',
})
@IsEnum(['ai-realtime', 'ai-batch'])
queueName!: 'ai-realtime' | 'ai-batch';
}
+74 -34
View File
@@ -1,53 +1,93 @@
// File: src/modules/ai/dto/create-ai-job.dto.ts
// Change Log
// - 2026-05-15: เพิ่ม DTO สำหรับ enqueue AI jobs ตาม ADR-023A US1.
// File: backend/src/modules/ai/dto/create-ai-job.dto.ts
// Change Log:
// - 2026-06-11: Refactored CreateAiJobDto to support new AI runtime policy contract (Option B)
// - 2026-06-11: เพิ่ม IsObject ใน class-validator import
// - 2026-06-11: ใช้ import type สำหรับ PublicJobType เพื่อแก้ปัญหา TS1272
import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger';
import {
IsIn,
IsNotEmpty,
IsObject,
IsEnum,
IsOptional,
IsString,
IsUUID,
IsObject,
registerDecorator,
ValidationOptions,
ValidationArguments,
} from 'class-validator';
import type { PublicJobType } from '../interfaces/execution-policy.interface';
export const AI_JOB_TYPES = [
'ai-suggest',
'rag-query',
'ocr',
'extract-metadata',
'embed-document',
] as const;
/**
* Custom decorator to forbid specific properties in payload.
* เดคอเรเตอร์สำหรับป้องกันไม่ให้ส่งฟิลด์ที่กำหนดมาใน API payload
*/
export function IsForbidden(validationOptions?: ValidationOptions) {
return function (object: object, propertyName: string) {
registerDecorator({
name: 'isForbidden',
target: object.constructor,
propertyName: propertyName,
options: validationOptions,
validator: {
validate(value: unknown) {
return value === undefined;
},
defaultMessage(args: ValidationArguments) {
return `${args.property} is forbidden in payload. Backend determines execution policy.`;
},
},
});
};
}
export type CreateAiJobType = (typeof AI_JOB_TYPES)[number];
/** DTO สำหรับส่งงาน AI เข้า BullMQ โดยใช้ publicId เท่านั้นตาม ADR-019 */
export class CreateAiJobDto {
@ApiProperty({ description: 'Attachment/document publicId สำหรับงาน AI' })
@IsUUID()
documentPublicId!: string;
@ApiProperty({ description: 'Project publicId สำหรับ project isolation' })
@IsUUID()
projectPublicId!: string;
@ApiProperty({
enum: AI_JOB_TYPES,
enum: ['auto-fill-document', 'migrate-document', 'rag-query'],
description: 'ชนิดงาน AI ที่ต้อง enqueue',
})
@IsIn(AI_JOB_TYPES)
jobType!: CreateAiJobType;
@ApiProperty({ description: 'Idempotency key จาก request header/body' })
@IsString()
@IsNotEmpty()
idempotencyKey!: string;
@IsEnum(['auto-fill-document', 'migrate-document', 'rag-query'])
type!: PublicJobType;
@ApiPropertyOptional({
description: 'Payload เพิ่มเติม เช่น pdfPath, extractedText, question',
description: 'Document publicId (UUIDv7) สำหรับงาน AI',
})
@IsOptional()
@IsUUID('all')
documentPublicId?: string;
@ApiPropertyOptional({
description: 'Attachment publicId (UUIDv7) สำหรับงาน AI',
})
@IsOptional()
@IsUUID('all')
attachmentPublicId?: string;
@ApiPropertyOptional({
description: 'Payload ข้อมูลเพิ่มเติมสำหรับงานแต่ละประเภท',
})
@IsOptional()
@IsObject()
payload?: Record<string, unknown>;
@ApiPropertyOptional({
description: 'Project publicId สำหรับ project isolation',
})
@IsOptional()
@IsUUID('all')
projectPublicId?: string;
// ฟิลด์ต้องห้ามตามข้อกำหนด FR-A01 เพื่อป้องกันการแทรกแซง policy จาก caller
@IsForbidden()
executionProfile?: unknown;
@IsForbidden()
model?: unknown;
@IsForbidden()
temperature?: unknown;
@IsForbidden()
top_p?: unknown;
@IsForbidden()
maxTokens?: unknown;
}
@@ -1,7 +1,8 @@
// File: src/modules/ai/entities/ai-audit-log.entity.ts
// File: backend/src/modules/ai/entities/ai-audit-log.entity.ts
// Change Log
// - 2026-05-14: เพิ่ม ADR-023 feedback fields โดยคง legacy audit fields ไว้ช่วงเปลี่ยนผ่าน.
// - 2026-05-30: เพิ่ม modelType, vramUsageMB, cacheHit สำหรับ Typhoon OCR integration (T008, ADR-032).
// - 2026-06-11: เปลี่ยน Record<string, any> เป็น Record<string, unknown> เพื่อแก้ปัญหา ESLint
// Entity สำหรับตาราง ai_audit_logs — บันทึก AI Interaction และ feedback ตาม ADR-023
import {
@@ -100,6 +101,25 @@ export class AiAuditLog extends UuidBaseEntity {
@Column({ name: 'error_message', type: 'text', nullable: true })
errorMessage?: string;
@Column({
name: 'effective_profile',
type: 'varchar',
length: 50,
nullable: true,
})
effectiveProfile?: string;
@Column({
name: 'canonical_model',
type: 'varchar',
length: 50,
nullable: true,
})
canonicalModel?: string;
@Column({ name: 'snapshot_params_json', type: 'json', nullable: true })
snapshotParamsJson?: Record<string, unknown>;
@CreateDateColumn({ name: 'created_at' })
createdAt!: Date;
}
@@ -0,0 +1,51 @@
// File: backend/src/modules/ai/entities/ai-execution-profile.entity.ts
// Change Log:
// - 2026-06-11: Initial creation of AiExecutionProfile entity for AI execution profiles
import {
Column,
CreateDateColumn,
Entity,
PrimaryGeneratedColumn,
UpdateDateColumn,
} from 'typeorm';
/** Entity สำหรับเก็บข้อมูลโปรไฟล์การทำงานของโมเดล AI (Execution Profile) */
@Entity('ai_execution_profiles')
export class AiExecutionProfile {
@PrimaryGeneratedColumn()
id!: number;
@Column({ name: 'profile_name', unique: true, length: 50 })
profileName!: string;
@Column({ type: 'decimal', precision: 4, scale: 3 })
temperature!: number;
@Column({ name: 'top_p', type: 'decimal', precision: 4, scale: 3 })
topP!: number;
@Column({ name: 'max_tokens', type: 'int' })
maxTokens!: number;
@Column({ name: 'num_ctx', type: 'int' })
numCtx!: number;
@Column({ name: 'repeat_penalty', type: 'decimal', precision: 5, scale: 3 })
repeatPenalty!: number;
@Column({ name: 'keep_alive_seconds', type: 'int' })
keepAliveSeconds!: number;
@Column({ name: 'is_active', type: 'boolean', default: true })
isActive!: boolean;
@Column({ name: 'updated_by', type: 'int', nullable: true })
updatedBy?: number;
@CreateDateColumn({ name: 'created_at' })
createdAt!: Date;
@UpdateDateColumn({ name: 'updated_at' })
updatedAt!: Date;
}
@@ -0,0 +1,79 @@
// File: backend/src/modules/ai/interfaces/execution-policy.interface.ts
// Change Log:
// - 2026-06-11: Initial creation of execution policy interfaces for AI runtime policy refactor
/**
* Public job types exposed in API.
* ประเภทงานที่เปิดให้ภายนอกเรียกใช้งานผ่าน API
*/
export type PublicJobType =
| 'auto-fill-document'
| 'migrate-document'
| 'rag-query';
/**
* Internal job types used within the system.
* ประเภทงานที่ใช้งานเป็นการภายในระบบ
*/
export type InternalJobType =
| PublicJobType
| 'intent-classify'
| 'tool-suggest'
| 'ocr-extract'
| 'sandbox-analysis';
/**
* Execution profiles for runtime resources.
* โปรไฟล์การทำงานเพื่อระบุทรัพยากรและพารามิเตอร์ที่จะใช้งาน
*/
export type ExecutionProfile =
| 'interactive'
| 'standard'
| 'quality'
| 'deep-analysis';
/**
* Interface representing the runtime configuration parameters.
* อินเทอร์เฟสสำหรับกำหนดพารามิเตอร์ในขณะทำงาน
*/
export interface RuntimePolicy {
canonicalModel: 'np-dms-ai' | 'np-dms-ocr';
temperature: number;
topP: number;
maxTokens: number;
numCtx: number;
repeatPenalty: number;
keepAliveSeconds: number;
}
/**
* VRAM usage statistics.
* สถิติการใช้ VRAM ของ GPU
*/
export interface VramHeadroom {
totalMb: number;
usedMb: number;
availableMb: number;
querySuccess: boolean;
mainModelVramMb?: number;
}
/**
* BullMQ job data payload.
* ข้อมูลของงาน (Payload) สำหรับส่งเข้าคิว BullMQ
*/
export interface AiJobPayload {
jobType: InternalJobType;
documentPublicId?: string;
attachmentPublicId?: string;
effectiveProfile: ExecutionProfile;
canonicalModel: 'np-dms-ai' | 'np-dms-ocr';
snapshotParams: {
temperature: number;
topP: number;
maxTokens: number;
numCtx: number;
repeatPenalty: number;
keepAliveSeconds: number;
};
}
@@ -0,0 +1,34 @@
// File: backend/src/modules/ai/interfaces/ocr-residency.interface.ts
// Change Log:
// - 2026-06-11: Initial creation of OCR residency interfaces for AI runtime policy refactor
import { ExecutionProfile } from './execution-policy.interface';
/**
* OCR runtime parameters based on SCB10X Typhoon OCR model.
* พารามิเตอร์ของระบบ OCR สำหรับ Typhoon OCR
*/
export interface OcrRuntimePolicy {
canonicalModel: 'np-dms-ocr';
numCtx: 8192;
numPredict: 4096;
temperature: 0.1;
topP: 0.1;
repeatPenalty: 1.1;
keepAliveSeconds: number;
}
/**
* Decision output for adaptive OCR residency.
* ผลลัพธ์การตัดสินใจว่าควรโหลด OCR ค้างไว้ใน VRAM หรือไม่
*/
export interface OcrResidencyDecision {
keepAliveSeconds: number;
vramHeadroomMb: number;
activeProfile: ExecutionProfile | null;
reason:
| 'deep-analysis-active'
| 'high-pressure'
| 'headroom-sufficient'
| 'query-failed';
}
@@ -1,4 +1,4 @@
// File: src/modules/ai/processors/ai-batch.processor.ts
// File: backend/src/modules/ai/processors/ai-batch.processor.ts
// Change Log
// - 2026-06-08: แก้ไขปัญหา LLM JSON response truncated โดยการเพิ่ม num_ctx เป็น 16384 ใน sandbox-extract, sandbox-ai-extract และ migrate-document (แก้ไขโดย AGY Gemini 3.5 Flash (Medium))
// - 2026-05-15: เพิ่ม processor สำหรับ ai-batch queue ตาม ADR-023A.
@@ -12,8 +12,11 @@
// - 2026-05-28: EC-001 ใช้ findOrSuggestTags เพื่อตรวจจับ Tag ใหม่และบันทึก aiIssues; EC-002 ตรวจสอบ UUID ของผู้ส่ง/ผู้รับ และ Flag เมื่อหาไม่พบ
// - 2026-06-03: ADR-034 — เพิ่ม 'ocr-extract' job type + OCR_JOB_TYPES constant + processOcrExtract() ที่มี model switching logic (unload main → load OCR → generate → reload main)
// - 2026-06-06: แก้ไข bug LLM JSON parse failure — เพิ่ม retry logic (2 attempts), debug log raw response, และปรับปรุง error message ให้แสดงทั้ง raw และ cleaned response
// - 2026-06-11: US2 - ส่ง activeProfile ไปยัง detectAndExtract ในการประมวลผล OCR และบันทึก retrieval device metadata ใน audit logs
// - 2026-06-11: US4 - เพิ่มการรองรับ ai-suggest และ rag-query ใน batch processor หลังการทำ redirection
// - 2026-06-06: เพิ่ม OCR text truncation (MAX_OCR_TEXT_CHARS=15000) เพื่อป้องกัน context overflow เมื่อเอกสารยาวมากชน num_ctx 8192
// - 2026-06-06: [T036] เพิ่ม ollamaOptions: { num_ctx: 8192 } ใน generateStructuredJson เพื่อรองรับ prompt ยาว 18k+ chars และแก้ไข bug response ว่างจาก context window ไม่พอ
// - 2026-06-11: แก้ไข ESLint errors โดยการเพิ่ม properties (effectiveProfile, canonicalModel, snapshotParams) ใน AiBatchJobData และยกเลิกการใช้ as any
import { Processor, WorkerHost } from '@nestjs/bullmq';
import { Logger } from '@nestjs/common';
@@ -31,13 +34,17 @@ import {
SandboxOcrEngineService,
SandboxOcrEngineType,
} from '../services/sandbox-ocr-engine.service';
import { OllamaService } from '../services/ollama.service';
import {
OllamaService,
OllamaGenerateOptions,
} from '../services/ollama.service';
import { Project } from '../../project/entities/project.entity';
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
import { TagsService } from '../../tags/tags.service';
import { MigrationService } from '../../migration/migration.service';
import { MigrationErrorType } from '../../migration/entities/migration-error.entity';
import { AiPromptsService } from '../prompts/ai-prompts.service';
import type { ExecutionProfile } from '../interfaces/execution-policy.interface';
interface MigrateDocumentMetadata extends Record<string, unknown> {
projectPublicId?: string;
@@ -62,7 +69,9 @@ export type AiBatchJobType =
| 'sandbox-ocr-only'
| 'sandbox-ai-extract'
| 'migrate-document'
| 'rag-prepare';
| 'rag-prepare'
| 'ai-suggest'
| 'rag-query';
/** รายการ job types ที่ต้องใช้ Typhoon OCR model — จะ trigger model switching (ADR-034) */
export const OCR_JOB_TYPES: ReadonlyArray<AiBatchJobType> = [
@@ -76,6 +85,16 @@ export interface AiBatchJobData {
payload: Record<string, unknown>;
batchId?: string;
idempotencyKey: string;
effectiveProfile?: ExecutionProfile;
canonicalModel?: 'np-dms-ai' | 'np-dms-ocr';
snapshotParams?: {
temperature: number;
topP: number;
maxTokens: number;
numCtx: number;
repeatPenalty: number;
keepAliveSeconds: number;
};
}
/** OCR text สูงสุดที่ส่งเข้า LLM prompt — ป้องกัน context overflow (num_ctx 8192, Thai ~3 chars/token) */
@@ -286,6 +305,16 @@ export class AiBatchProcessor extends WorkerHost {
await this.setAiProcessingStatus(job.data.documentPublicId, 'DONE');
}
return;
case 'ai-suggest':
this.logger.log(
`AI Suggest job processing — jobId=${String(job.id)}`
);
await this.processSuggest(job);
return;
case 'rag-query':
this.logger.log(`RAG query job processing — jobId=${String(job.id)}`);
await this.processRagQuery(job);
return;
case 'embed-document':
this.logger.log(`Embedding job processing — jobId=${String(job.id)}`);
await this.processEmbedDocument(job.data);
@@ -353,6 +382,7 @@ export class AiBatchProcessor extends WorkerHost {
/** ประมวลผล embed-document job ด้วย EmbeddingService (T022) */
private async processEmbedDocument(data: AiBatchJobData): Promise<void> {
const startTime = Date.now();
const { documentPublicId, projectPublicId, payload } = data;
const pdfPath = payload.pdfPath as string;
const extractedText = readString(payload.extractedText);
@@ -378,6 +408,7 @@ export class AiBatchProcessor extends WorkerHost {
pdfPath,
extractedText,
documentPublicId,
activeProfile: data.effectiveProfile,
})
).text;
const result = await this.embeddingService.embedDocument(
@@ -394,6 +425,19 @@ export class AiBatchProcessor extends WorkerHost {
if (!result.success) {
throw new Error(`Embedding failed: ${result.error ?? 'Unknown error'}`);
}
const durationMs = Date.now() - startTime;
await this.saveAiAuditLog({
documentPublicId,
aiModel: data.canonicalModel ?? 'np-dms-ai',
status: AiAuditStatus.SUCCESS,
processingTimeMs: durationMs,
effectiveProfile: data.effectiveProfile,
canonicalModel: data.canonicalModel,
snapshotParamsJson: {
...(data.snapshotParams ?? {}),
retrievalDevice: result.device,
},
});
this.logger.log(
`Embedding completed for document ${documentPublicId}${result.chunksEmbedded} chunks embedded`
);
@@ -782,6 +826,7 @@ export class AiBatchProcessor extends WorkerHost {
}
private async processRagPrepare(data: AiBatchJobData): Promise<void> {
const startTime = Date.now();
const payload = data.payload || {};
const documentPublicId =
(payload.documentPublicId as string) || data.documentPublicId;
@@ -795,12 +840,9 @@ export class AiBatchProcessor extends WorkerHost {
const documentDate = (payload.documentDate as string) || undefined;
let cachedOcrText = (payload.cachedOcrText as string) || undefined;
const attachmentPath = (payload.attachmentPath as string) || undefined;
this.logger.log(
`processRagPrepare: starting for doc=${documentPublicId}, project=${projectPublicId}`
);
// T020a: Resolve OCR text. Use cached if available; otherwise extract using OcrService
if (!cachedOcrText && attachmentPath) {
this.logger.log(
`processRagPrepare: No cached OCR text. Extracting text from ${attachmentPath}...`
@@ -808,6 +850,7 @@ export class AiBatchProcessor extends WorkerHost {
try {
const ocrResult = await this.ocrService.detectAndExtract({
pdfPath: attachmentPath,
activeProfile: data.effectiveProfile,
});
cachedOcrText = ocrResult.text;
} catch (err: unknown) {
@@ -816,28 +859,23 @@ export class AiBatchProcessor extends WorkerHost {
throw err;
}
}
if (!cachedOcrText) {
this.logger.warn(
`processRagPrepare: ไม่มี OCR text และไม่มี attachment path - skip embedding`
);
return;
}
// T020b: skip-guard (< 50 chars)
if (cachedOcrText.trim().length < 50) {
this.logger.warn(
`processRagPrepare: OCR text สั้นเกินไป (${cachedOcrText.trim().length} chars) — skip embedding`
);
return;
}
// T020c: embed + upsert pipeline
try {
this.logger.log(
`processRagPrepare: chunking and embedding document ${documentPublicId}...`
);
await this.embeddingService.embedDocument(
const result = await this.embeddingService.embedDocument(
projectPublicId,
documentPublicId,
correspondenceNumber,
@@ -848,6 +886,19 @@ export class AiBatchProcessor extends WorkerHost {
documentDate,
cachedOcrText
);
const durationMs = Date.now() - startTime;
await this.saveAiAuditLog({
documentPublicId,
aiModel: data.canonicalModel ?? 'np-dms-ai',
status: AiAuditStatus.SUCCESS,
processingTimeMs: durationMs,
effectiveProfile: data.effectiveProfile,
canonicalModel: data.canonicalModel,
snapshotParamsJson: {
...(data.snapshotParams ?? {}),
retrievalDevice: result.device,
},
});
this.logger.log(
`processRagPrepare: successfully processed document ${documentPublicId}`
);
@@ -864,6 +915,7 @@ export class AiBatchProcessor extends WorkerHost {
): Promise<void> {
const startTime = Date.now();
const { documentPublicId, projectPublicId, payload, batchId } = job.data;
const modelUsed = job.data.canonicalModel;
const docNumber = payload.documentNumber as string;
const contextOverride =
payload.contextOverride &&
@@ -888,6 +940,7 @@ export class AiBatchProcessor extends WorkerHost {
try {
ocrResult = await this.ocrService.detectAndExtract({
pdfPath: attachment.filePath,
activeProfile: job.data.effectiveProfile,
});
} catch (err: unknown) {
const errMsg = err instanceof Error ? err.message : String(err);
@@ -904,6 +957,9 @@ export class AiBatchProcessor extends WorkerHost {
status: AiAuditStatus.FAILED,
errorMessage: errMsg,
processingTimeMs: Date.now() - startTime,
effectiveProfile: job.data.effectiveProfile,
canonicalModel: job.data.canonicalModel,
snapshotParamsJson: job.data.snapshotParams,
});
throw err;
}
@@ -930,11 +986,28 @@ export class AiBatchProcessor extends WorkerHost {
let aiResponse: string;
try {
aiResponse = await this.ollamaService.generate(resolvedPrompt, {
const snapshotParams = job.data.snapshotParams;
const generateOptions: OllamaGenerateOptions = {
format: 'json',
timeoutMs: 120000,
options: { num_ctx: 16384, num_predict: 4096 },
});
model: modelUsed,
};
if (snapshotParams) {
generateOptions.options = {
temperature: snapshotParams.temperature,
top_p: snapshotParams.topP,
num_predict: snapshotParams.maxTokens,
num_ctx: snapshotParams.numCtx,
repeat_penalty: snapshotParams.repeatPenalty,
};
generateOptions.keepAlive = snapshotParams.keepAliveSeconds;
} else {
generateOptions.options = { num_ctx: 16384, num_predict: 4096 };
}
aiResponse = await this.ollamaService.generate(
resolvedPrompt,
generateOptions
);
} catch (err: unknown) {
const errMsg = err instanceof Error ? err.message : String(err);
this.logger.error(`การวิเคราะห์ของ AI ล้มเหลว: ${errMsg}`);
@@ -946,10 +1019,13 @@ export class AiBatchProcessor extends WorkerHost {
});
await this.saveAiAuditLog({
documentPublicId,
aiModel: this.ollamaService.getMainModelName(),
aiModel: modelUsed ?? this.ollamaService.getMainModelName(),
status: AiAuditStatus.FAILED,
errorMessage: errMsg,
processingTimeMs: Date.now() - startTime,
effectiveProfile: job.data.effectiveProfile,
canonicalModel: job.data.canonicalModel,
snapshotParamsJson: job.data.snapshotParams,
});
throw err;
}
@@ -972,10 +1048,13 @@ export class AiBatchProcessor extends WorkerHost {
});
await this.saveAiAuditLog({
documentPublicId,
aiModel: this.ollamaService.getMainModelName(),
aiModel: modelUsed ?? this.ollamaService.getMainModelName(),
status: AiAuditStatus.FAILED,
errorMessage: errMsg,
processingTimeMs: Date.now() - startTime,
effectiveProfile: job.data.effectiveProfile,
canonicalModel: job.data.canonicalModel,
snapshotParamsJson: job.data.snapshotParams,
});
throw new Error(errMsg);
}
@@ -1132,11 +1211,14 @@ export class AiBatchProcessor extends WorkerHost {
await this.saveAiAuditLog({
documentPublicId,
aiModel: this.ollamaService.getMainModelName(),
aiModel: modelUsed ?? this.ollamaService.getMainModelName(),
status: AiAuditStatus.SUCCESS,
aiSuggestionJson: extractedMetadata as unknown as Record<string, unknown>,
confidenceScore: confidence,
processingTimeMs: Date.now() - startTime,
effectiveProfile: job.data.effectiveProfile,
canonicalModel: job.data.canonicalModel,
snapshotParamsJson: job.data.snapshotParams,
});
this.logger.log(
`ประมวลผลเอกสาร ${docNumber} สำเร็จและถูกส่งเข้า Staging Queue แล้ว`
@@ -1151,6 +1233,9 @@ export class AiBatchProcessor extends WorkerHost {
confidenceScore?: number;
processingTimeMs?: number;
errorMessage?: string;
effectiveProfile?: string;
canonicalModel?: string;
snapshotParamsJson?: Record<string, unknown>;
}): Promise<void> {
try {
const log = this.aiAuditLogRepo.create({
@@ -1162,6 +1247,9 @@ export class AiBatchProcessor extends WorkerHost {
confidenceScore: data.confidenceScore,
processingTimeMs: data.processingTimeMs,
errorMessage: data.errorMessage,
effectiveProfile: data.effectiveProfile,
canonicalModel: data.canonicalModel,
snapshotParamsJson: data.snapshotParamsJson,
});
await this.aiAuditLogRepo.save(log);
} catch (err: unknown) {
@@ -1170,4 +1258,149 @@ export class AiBatchProcessor extends WorkerHost {
);
}
}
private async processRagQuery(job: Job<AiBatchJobData>): Promise<void> {
const payload = job.data.payload || {};
const query = typeof payload['query'] === 'string' ? payload['query'] : '';
if (query.trim().length === 0) {
throw new Error('payload.query is required for rag-query jobs');
}
const requestPublicId =
typeof payload['requestPublicId'] === 'string'
? payload['requestPublicId']
: job.data.idempotencyKey;
const userPublicId =
typeof payload['userPublicId'] === 'string'
? payload['userPublicId']
: 'system';
await this.ragService.processQuery(
requestPublicId,
query,
job.data.projectPublicId,
userPublicId,
new AbortController().signal
);
}
private async processSuggest(
job: Job<AiBatchJobData>
): Promise<Record<string, unknown>> {
const startTime = Date.now();
try {
if (job.data.documentPublicId) {
await this.setAiProcessingStatus(
job.data.documentPublicId,
'PROCESSING'
);
}
const payload = job.data.payload || {};
const extractedText =
typeof payload['extractedText'] === 'string'
? payload['extractedText']
: '';
const pdfPath =
typeof payload['pdfPath'] === 'string' ? payload['pdfPath'] : undefined;
const extractedChars =
typeof payload['extractedChars'] === 'number'
? payload['extractedChars']
: extractedText.length;
const textResult = await this.ocrService.detectAndExtract({
extractedText,
extractedChars,
pdfPath,
});
const prompt = [
'Extract concise DMS metadata from this engineering document.',
'Return only JSON with fields: title, documentType, category, confidenceScore.',
textResult.text.slice(0, 6000),
].join('\n');
const rawOutput = await this.ollamaService.generate(prompt);
const suggestion = this.parseSuggestion(rawOutput);
const masterCategories = Array.isArray(payload['masterDataCategories'])
? (payload['masterDataCategories'] as string[])
: undefined;
const normalizedSuggestion = this.flagUnknownCategories(
suggestion,
masterCategories
);
await this.saveAiAuditLog({
documentPublicId: job.data.documentPublicId,
aiModel:
job.data.canonicalModel ?? this.ollamaService.getMainModelName(),
status: AiAuditStatus.SUCCESS,
aiSuggestionJson: normalizedSuggestion,
confidenceScore: this.extractConfidence(normalizedSuggestion),
processingTimeMs: Date.now() - startTime,
effectiveProfile: job.data.effectiveProfile,
canonicalModel: job.data.canonicalModel,
snapshotParamsJson: job.data.snapshotParams,
});
if (job.data.documentPublicId) {
await this.setAiProcessingStatus(job.data.documentPublicId, 'DONE');
}
return {
suggestion: normalizedSuggestion,
ocrUsed: textResult.ocrUsed,
};
} catch (err) {
if (job.data.documentPublicId) {
await this.setAiProcessingStatus(job.data.documentPublicId, 'FAILED');
}
await this.saveAiAuditLog({
documentPublicId: job.data.documentPublicId,
aiModel:
job.data.canonicalModel ?? this.ollamaService.getMainModelName(),
status: AiAuditStatus.FAILED,
processingTimeMs: Date.now() - startTime,
errorMessage: err instanceof Error ? err.message : String(err),
effectiveProfile: job.data.effectiveProfile,
canonicalModel: job.data.canonicalModel,
snapshotParamsJson: job.data.snapshotParams,
});
throw err;
}
}
private parseSuggestion(rawOutput: string): Record<string, unknown> {
try {
const parsed = JSON.parse(rawOutput) as unknown;
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
return parsed as Record<string, unknown>;
}
} catch {
this.logger.warn('AI suggestion output was not valid JSON');
}
return {
title: rawOutput.slice(0, 250),
confidenceScore: 0,
is_unknown: true,
};
}
private flagUnknownCategories(
suggestion: Record<string, unknown>,
masterDataCategories: unknown
): Record<string, unknown> {
if (!Array.isArray(masterDataCategories)) return suggestion;
const knownValues = new Set(
masterDataCategories
.filter((value): value is string => typeof value === 'string')
.map((value) => value.toLowerCase())
);
const category = suggestion['category'];
if (
typeof category === 'string' &&
!knownValues.has(category.toLowerCase())
) {
return { ...suggestion, is_unknown: true };
}
return suggestion;
}
private extractConfidence(
suggestion: Record<string, unknown>
): number | undefined {
const confidence = suggestion['confidenceScore'];
return typeof confidence === 'number' ? confidence : undefined;
}
}
@@ -1,7 +1,9 @@
// File: src/modules/ai/processors/ai-realtime.processor.ts
// File: backend/src/modules/ai/processors/ai-realtime.processor.ts
// Change Log
// - 2026-05-15: เพิ่ม processor สำหรับ ai-realtime queue และ pause/resume ai-batch ตาม ADR-023A.
// - 2026-06-03: ADR-034 — เปลี่ยน aiModel ใน audit log จาก hardcode 'gemma4' เป็น ollamaService.getMainModelName()
// - 2026-06-11: ปรับ concurrency และเพิ่ม job classification เพื่อ redirect ไป ai-batch (US4)
// - 2026-06-11: แก้ไขปัญหา compile error สำหรับ unreachable check ใน switch-case และลบบรรทัดว่างในฟังก์ชัน process
import {
Processor,
@@ -22,7 +24,11 @@ import { Attachment } from '../../../common/file-storage/entities/attachment.ent
import { OcrService } from '../services/ocr.service';
import { OllamaService } from '../services/ollama.service';
export type AiRealtimeJobType = 'ai-suggest' | 'rag-query';
export type AiRealtimeJobType =
| 'ai-suggest'
| 'rag-query'
| 'intent-classify'
| 'tool-suggest';
export interface AiRealtimeJobData {
jobType: AiRealtimeJobType;
@@ -34,9 +40,16 @@ export interface AiRealtimeJobData {
}
/** Processor สำหรับงาน AI interactive ที่ต้องกัน batch job ระหว่างใช้ GPU */
@Processor(QUEUE_AI_REALTIME, { concurrency: 1 })
@Processor(QUEUE_AI_REALTIME, {
concurrency: Number(
process.env.AI_REALTIME_CONCURRENCY ||
process.env.REALTIME_CONCURRENCY ||
'2'
),
})
export class AiRealtimeProcessor extends WorkerHost {
private readonly logger = new Logger(AiRealtimeProcessor.name);
private activeRealtimeJobs = 0;
constructor(
@InjectQueue(QUEUE_AI_BATCH)
@@ -53,12 +66,32 @@ export class AiRealtimeProcessor extends WorkerHost {
/** Dispatch งาน ai-realtime ตาม jobType */
async process(job: Job<AiRealtimeJobData>): Promise<unknown> {
const LIGHTWEIGHT_REALTIME_JOBS = ['intent-classify', 'tool-suggest'];
const isLightweight = LIGHTWEIGHT_REALTIME_JOBS.includes(job.data.jobType);
this.logger.log(
`Job classification decision — jobId=${String(job.id)}, jobType=${job.data.jobType}, isLightweight=${isLightweight}`
);
if (!isLightweight) {
this.logger.warn(
`Redirecting generation-heavy job to ai-batch queue — jobId=${String(job.id)}, jobType=${String(job.data.jobType)}`
);
await this.aiBatchQueue.add(job.data.jobType, job.data, {
jobId: job.id ?? undefined,
});
return;
}
switch (job.data.jobType) {
case 'intent-classify':
this.logger.log(`Processing intent-classify — jobId=${String(job.id)}`);
return { success: true, intent: 'GET_RFA' };
case 'tool-suggest':
this.logger.log(`Processing tool-suggest — jobId=${String(job.id)}`);
return { success: true, suggestions: [] };
case 'ai-suggest':
return this.processSuggest(job);
case 'rag-query':
this.logger.log(`RAG query queued — jobId=${String(job.id)}`);
return;
throw new Error(
`Job type ${job.data.jobType} should have been redirected to batch queue.`
);
default: {
const unreachable: never = job.data.jobType;
throw new Error(
@@ -203,27 +236,48 @@ export class AiRealtimeProcessor extends WorkerHost {
/** เมื่อ interactive job เริ่ม ให้ pause batch queue เพื่อกัน GPU contention */
@OnWorkerEvent('active')
async onActive(job: Job<AiRealtimeJobData>): Promise<void> {
await this.aiBatchQueue.pause();
this.activeRealtimeJobs += 1;
if (this.activeRealtimeJobs === 1) {
await this.aiBatchQueue.pause();
this.logger.warn(
`ai-batch paused while ai-realtime job is active — jobId=${String(job.id)}`
);
return;
}
this.logger.warn(
`ai-batch paused while ai-realtime job is active jobId=${String(job.id)}`
`ai-realtime active jobs=${String(this.activeRealtimeJobs)} — keep ai-batch paused`
);
}
/** เมื่อ interactive job เสร็จ ให้ resume batch queue */
@OnWorkerEvent('completed')
async onCompleted(job: Job<AiRealtimeJobData>): Promise<void> {
await this.aiBatchQueue.resume();
this.activeRealtimeJobs = Math.max(0, this.activeRealtimeJobs - 1);
if (this.activeRealtimeJobs === 0) {
await this.aiBatchQueue.resume();
this.logger.log(
`ai-batch resumed after ai-realtime completion — jobId=${String(job.id)}`
);
return;
}
this.logger.log(
`ai-batch resumed after ai-realtime completion — jobId=${String(job.id)}`
`ai-realtime jobs still active (${String(this.activeRealtimeJobs)}) — ai-batch remains paused`
);
}
/** เมื่อ interactive job fail ให้ resume batch queue เช่นกัน */
@OnWorkerEvent('failed')
async onFailed(job: Job<AiRealtimeJobData> | undefined): Promise<void> {
await this.aiBatchQueue.resume();
this.activeRealtimeJobs = Math.max(0, this.activeRealtimeJobs - 1);
if (this.activeRealtimeJobs === 0) {
await this.aiBatchQueue.resume();
this.logger.warn(
`ai-batch resumed after ai-realtime failure — jobId=${String(job?.id ?? 'unknown')}`
);
return;
}
this.logger.warn(
`ai-batch resumed after ai-realtime failure — jobId=${String(job?.id ?? 'unknown')}`
`ai-realtime jobs still active after failure (${String(this.activeRealtimeJobs)}) — ai-batch remains paused`
);
}
}
@@ -0,0 +1,183 @@
// File: backend/src/modules/ai/services/ai-policy.service.ts
// Change Log:
// - 2026-06-11: Initial creation of AiPolicyService for managing execution profiles and policies
// - 2026-06-11: แก้ไขข้อผิดพลาด TS2367 (เทียบ profile กับ ocr-extract) และลบบรรทัดว่างในฟังก์ชัน getProfileParameters
import { Injectable, Logger } from '@nestjs/common';
import { InjectRedis } from '@nestjs-modules/ioredis';
import { InjectRepository } from '@nestjs/typeorm';
import type Redis from 'ioredis';
import { Repository } from 'typeorm';
import { AiExecutionProfile } from '../entities/ai-execution-profile.entity';
import {
ExecutionProfile,
InternalJobType,
RuntimePolicy,
AiJobPayload,
} from '../interfaces/execution-policy.interface';
@Injectable()
export class AiPolicyService {
private readonly logger = new Logger(AiPolicyService.name);
private readonly cachePrefix = 'ai_execution_profiles:';
private readonly cacheTtlSeconds = 60;
private readonly defaultProfiles: Record<ExecutionProfile, RuntimePolicy> = {
interactive: {
canonicalModel: 'np-dms-ai',
temperature: 0.7,
topP: 0.9,
maxTokens: 2048,
numCtx: 4096,
repeatPenalty: 1.15,
keepAliveSeconds: 300,
},
standard: {
canonicalModel: 'np-dms-ai',
temperature: 0.5,
topP: 0.8,
maxTokens: 4096,
numCtx: 8192,
repeatPenalty: 1.15,
keepAliveSeconds: 600,
},
quality: {
canonicalModel: 'np-dms-ai',
temperature: 0.1,
topP: 0.95,
maxTokens: 8192,
numCtx: 8192,
repeatPenalty: 1.15,
keepAliveSeconds: 600,
},
'deep-analysis': {
canonicalModel: 'np-dms-ai',
temperature: 0.3,
topP: 0.85,
maxTokens: 8192,
numCtx: 32768,
repeatPenalty: 1.15,
keepAliveSeconds: 0,
},
};
constructor(
@InjectRepository(AiExecutionProfile)
private readonly profileRepo: Repository<AiExecutionProfile>,
@InjectRedis() private readonly redis: Redis
) {}
/**
* แปลงชื่อ model หรือ tag ของ Ollama ให้เป็น canonical name เสมอ (np-dms-ai หรือ np-dms-ocr)
*/
getCanonicalModelName(modelName: string): 'np-dms-ai' | 'np-dms-ocr' {
const name = modelName.toLowerCase();
if (name.includes('ocr') || name.includes('typhoon-np-dms-ocr')) {
return 'np-dms-ocr';
}
return 'np-dms-ai';
}
/**
* แผนผังการแปลง JobType เป็น ExecutionProfile
*/
getProfileForJobType(jobType: InternalJobType): ExecutionProfile {
switch (jobType) {
case 'auto-fill-document':
case 'migrate-document':
return 'quality';
case 'rag-query':
return 'standard';
case 'intent-classify':
case 'tool-suggest':
return 'interactive';
case 'sandbox-analysis':
return 'deep-analysis';
case 'ocr-extract':
default:
return 'standard';
}
}
/**
* ดึงพารามิเตอร์การทำงานสำหรับ ExecutionProfile แต่ละอัน
*/
async getProfileParameters(
profile: ExecutionProfile
): Promise<RuntimePolicy> {
const cacheKey = `${this.cachePrefix}${profile}`;
try {
const cached = await this.redis.get(cacheKey);
if (cached) {
return JSON.parse(cached) as RuntimePolicy;
}
} catch (cacheErr) {
this.logger.warn(
`Failed to read execution profile cache: ${cacheErr instanceof Error ? cacheErr.message : String(cacheErr)}`
);
}
try {
const dbProfile = await this.profileRepo.findOne({
where: { profileName: profile, isActive: true },
});
if (dbProfile) {
const policy: RuntimePolicy = {
canonicalModel: 'np-dms-ai',
temperature: Number(dbProfile.temperature),
topP: Number(dbProfile.topP),
maxTokens: dbProfile.maxTokens,
numCtx: dbProfile.numCtx,
repeatPenalty: Number(dbProfile.repeatPenalty),
keepAliveSeconds: dbProfile.keepAliveSeconds,
};
try {
await this.redis.set(
cacheKey,
JSON.stringify(policy),
'EX',
this.cacheTtlSeconds
);
} catch (cacheSetErr) {
this.logger.warn(
`Failed to write execution profile cache: ${cacheSetErr instanceof Error ? cacheSetErr.message : String(cacheSetErr)}`
);
}
return policy;
}
} catch (dbErr) {
this.logger.error(
`Failed to read execution profile from DB: ${dbErr instanceof Error ? dbErr.message : String(dbErr)}`
);
}
return this.defaultProfiles[profile];
}
/**
* สร้าง payload ของ BullMQ job ที่มี snapshot parameters ณ เวลา dispatch
*/
async createJobPayload(
jobType: InternalJobType,
documentPublicId?: string,
attachmentPublicId?: string
): Promise<AiJobPayload> {
const effectiveProfile = this.getProfileForJobType(jobType);
const canonicalModel =
jobType === 'ocr-extract' ? 'np-dms-ocr' : 'np-dms-ai';
const policy = await this.getProfileParameters(effectiveProfile);
return {
jobType,
documentPublicId,
attachmentPublicId,
effectiveProfile,
canonicalModel,
snapshotParams: {
temperature: policy.temperature,
topP: policy.topP,
maxTokens: policy.maxTokens,
numCtx: policy.numCtx,
repeatPenalty: policy.repeatPenalty,
keepAliveSeconds: policy.keepAliveSeconds,
},
};
}
}
@@ -2,6 +2,7 @@
// Change Log
// - 2026-05-15: เพิ่ม EmbeddingService สำหรับ full-document chunked embedding ตาม ADR-023A T021.
// - 2026-06-05: ปรับปรุงเป็น Hybrid Embedding และเพิ่ม Semantic Chunking ผ่าน typhoon2.5 (T025-T027)
// - 2026-06-11: US3 - เพิ่มการคืนค่า device (cpu/gpu) จาก embedding
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
@@ -20,6 +21,7 @@ export interface EmbeddingResult {
success: boolean;
chunksEmbedded: number;
error?: string;
device?: string;
}
/** บริการสร้าง embedding สำหรับ full-document RAG (ADR-023A) */
@@ -75,19 +77,18 @@ export class EmbeddingService {
error: 'No OCR text provided',
};
}
// 1. แบ่งข้อความออกเป็น Chunk ด้วย Semantic Chunking
const chunks = await this.semanticChunkTextWithFallback(ocrText);
this.logger.log(
`Document ${documentPublicId} split into ${chunks.length} chunks`
);
// 2. แปลงแต่ละ chunk เป็น Hybrid Vector และเตรียม points
const points = [];
let usedDevice = 'gpu';
for (const [idx, chunk] of chunks.entries()) {
try {
// เรียก Sidecar /embed เพื่อแปลงข้อความของ chunk
const embedResult = await this.ocrService.embedViaSidecar(chunk.text);
if (embedResult.device === 'cpu') {
usedDevice = 'cpu';
}
points.push({
id: `${documentPublicId}-${idx}`,
vector: {
@@ -116,7 +117,6 @@ export class EmbeddingService {
);
}
}
if (points.length === 0) {
return {
success: false,
@@ -124,21 +124,19 @@ export class EmbeddingService {
error: 'All chunks failed to embed',
};
}
// 3. ลบ points เก่าของเอกสาร (เพื่อความ idempotent และรองรับ revision ใหม่)
await this.qdrantService.deleteByDocumentPublicId(
projectPublicId,
documentPublicId
);
// 4. บันทึก points ใหม่ลง Qdrant
await this.qdrantService.upsert(projectPublicId, points);
this.logger.log(
`Successfully embedded ${points.length} chunks for document ${documentPublicId} in project ${projectPublicId}`
);
return { success: true, chunksEmbedded: points.length };
return {
success: true,
chunksEmbedded: points.length,
device: usedDevice,
};
} catch (err) {
const errorMsg = err instanceof Error ? err.message : String(err);
this.logger.error(
+97 -8
View File
@@ -1,4 +1,4 @@
// File: src/modules/ai/services/ocr.service.ts
// File: backend/src/modules/ai/services/ocr.service.ts
// Change Log
// - 2026-05-15: เพิ่ม OCR auto-detection service สำหรับ ADR-023A.
// - 2026-05-25: แก้ไข AggregateError (empty message) จาก axios โดย wrap เป็น Error พร้อม context ที่ชัดเจน.
@@ -11,6 +11,7 @@
// - 2026-06-01: เปลี่ยน processWithTesseract/processWithTyphoon ให้ส่ง file content ผ่าน multipart ไปยัง /ocr-upload แทนการส่ง path
// - 2026-06-02: ส่งค่า X-API-Key ใน request headers ไปยัง ocr-sidecar เพื่อความมั่นคงปลอดภัยสูงสุด (ADR-033, Suggestion 2)
// - 2026-06-04: ADR-034 — เปลี่ยน TYPHOON_ENGINE.engineName เป็น typhoon-np-dms-ocr:latest ตรงกับชื่อโมเดลใน Ollama
// - 2026-06-11: US2 - คำนวณ OCR residency keep_alive แบบ dynamic ตาม VRAM headroom และ active profile
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
@@ -29,12 +30,16 @@ import { SystemSetting } from '../entities/system-setting.entity';
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
import { OcrCacheService } from './ocr-cache.service';
import { VramMonitorService } from './vram-monitor.service';
import { AiPolicyService } from './ai-policy.service';
import { ExecutionProfile } from '../interfaces/execution-policy.interface';
import { OcrResidencyDecision } from '../interfaces/ocr-residency.interface';
export interface OcrDetectionInput {
extractedText?: string;
extractedChars?: number;
pdfPath?: string;
documentPublicId?: string; // เพิ่มเพื่อการทำ audit logs
activeProfile?: ExecutionProfile;
}
export interface OcrDetectionResult {
@@ -101,6 +106,9 @@ export class OcrService {
private readonly threshold: number;
private readonly ocrApiUrl: string;
private readonly ocrSidecarApiKey: string;
private readonly vramHeadroomThresholdMb: number;
private readonly ocrResidencyWindowSeconds: number;
private readonly mainModelPressureThresholdMb: number;
constructor(
private readonly configService: ConfigService,
@InjectRepository(SystemSetting)
@@ -109,6 +117,7 @@ export class OcrService {
private readonly auditLogRepo: Repository<AiAuditLog>,
private readonly ocrCacheService: OcrCacheService,
private readonly vramMonitorService: VramMonitorService,
private readonly aiPolicyService: AiPolicyService,
@InjectRedis() private readonly redis: Redis
) {
this.threshold = this.configService.get<number>('OCR_CHAR_THRESHOLD', 100);
@@ -120,6 +129,82 @@ export class OcrService {
'OCR_SIDECAR_API_KEY',
'lcbp3-dms-ocr-sidecar-secure-token-2026'
);
this.vramHeadroomThresholdMb = this.configService.get<number>(
'VRAM_HEADROOM_THRESHOLD_MB',
this.configService.get<number>('AI_VRAM_HEADROOM_THRESHOLD_MB', 3000)
);
this.ocrResidencyWindowSeconds = this.configService.get<number>(
'OCR_RESIDENCY_WINDOW_SECONDS',
this.configService.get<number>('AI_OCR_RESIDENCY_WINDOW_SECONDS', 120)
);
this.mainModelPressureThresholdMb = this.configService.get<number>(
'GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB',
this.configService.get<number>(
'AI_GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB',
12000
)
);
}
/**
* คำนวณ keep_alive สำหรับ OCR ตามความจุ VRAM และประวัติการรัน
*/
async calculateOcrResidency(
activeProfile?: ExecutionProfile | null
): Promise<OcrResidencyDecision> {
try {
const headroom = await this.vramMonitorService.getVramHeadroom();
if (!headroom.querySuccess) {
return {
keepAliveSeconds: 0,
vramHeadroomMb: 0,
activeProfile: activeProfile ?? null,
reason: 'query-failed',
};
}
if (activeProfile === 'deep-analysis') {
this.logger.log(`OCR Residency: deep-analysis active, keep_alive = 0`);
return {
keepAliveSeconds: 0,
vramHeadroomMb: headroom.availableMb,
activeProfile,
reason: 'deep-analysis-active',
};
}
const isHighPressure =
(headroom.mainModelVramMb ?? 0) > this.mainModelPressureThresholdMb ||
headroom.availableMb < this.vramHeadroomThresholdMb;
if (isHighPressure) {
this.logger.log(
`OCR Residency: VRAM pressure is high (main: ${headroom.mainModelVramMb}MB, avail: ${headroom.availableMb}MB), keep_alive = 0`
);
return {
keepAliveSeconds: 0,
vramHeadroomMb: headroom.availableMb,
activeProfile: activeProfile ?? null,
reason: 'high-pressure',
};
}
this.logger.log(
`OCR Residency: VRAM headroom sufficient (${headroom.availableMb} MB), keep_alive = ${this.ocrResidencyWindowSeconds}`
);
return {
keepAliveSeconds: this.ocrResidencyWindowSeconds,
vramHeadroomMb: headroom.availableMb,
activeProfile: activeProfile ?? null,
reason: 'headroom-sufficient',
};
} catch (err: unknown) {
this.logger.warn(
`Failed to calculate OCR residency: ${err instanceof Error ? err.message : String(err)}`
);
return {
keepAliveSeconds: 0,
vramHeadroomMb: 0,
activeProfile: activeProfile ?? null,
reason: 'query-failed',
};
}
}
/** ดึงรายการ OCR Engines ทั้งหมด พร้อมตรวจสอบตัวที่กำลัง Active */
@@ -311,7 +396,6 @@ export class OcrService {
): Promise<OcrDetectionResult> {
const startTime = Date.now();
try {
// 1. ตรวจสอบ VRAM insufficiency guard
const hasCapacity = await this.vramMonitorService.hasVramCapacity(
TYPHOON_OCR_REQUIRED_VRAM_MB
);
@@ -321,7 +405,8 @@ export class OcrService {
);
return this.processWithTesseract(input);
}
const residency = await this.calculateOcrResidency(input.activeProfile);
const keepAlive = residency.keepAliveSeconds;
this.logger.debug(`Typhoon OCR processing: ${input.pdfPath}`);
const fileBuffer = fs.readFileSync(input.pdfPath!);
const form = new FormData();
@@ -331,6 +416,7 @@ export class OcrService {
'upload.pdf'
);
form.append('engine', 'typhoon-np-dms-ocr');
form.append('keep_alive', String(keepAlive));
const response = await axios.post<OcrSidecarResponse>(
`${this.ocrApiUrl}/ocr-upload`,
form,
@@ -339,10 +425,8 @@ export class OcrService {
headers: { 'X-API-Key': this.ocrSidecarApiKey },
}
);
const text = response.data.text ?? '';
const durationMs = Date.now() - startTime;
await this.writeAuditLog({
documentPublicId: input.documentPublicId,
aiModel: 'typhoon-ocr',
@@ -352,7 +436,6 @@ export class OcrService {
processingTimeMs: durationMs,
cacheHit: false,
});
return {
text,
ocrUsed: true,
@@ -398,6 +481,7 @@ export class OcrService {
async embedViaSidecar(text: string): Promise<{
dense: number[];
sparse: { indices: number[]; values: number[] };
device?: string;
}> {
try {
const response = await axios.post(
@@ -412,6 +496,7 @@ export class OcrService {
return response.data as {
dense: number[];
sparse: { indices: number[]; values: number[] };
device?: string;
};
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
@@ -424,7 +509,7 @@ export class OcrService {
async rerankViaSidecar(
query: string,
chunks: string[]
): Promise<{ scores: number[]; ranked_indices: number[] }> {
): Promise<{ scores: number[]; ranked_indices: number[]; device?: string }> {
try {
const response = await axios.post(
`${this.ocrApiUrl}/rerank`,
@@ -435,7 +520,11 @@ export class OcrService {
},
}
);
return response.data as { scores: number[]; ranked_indices: number[] };
return response.data as {
scores: number[];
ranked_indices: number[];
device?: string;
};
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
this.logger.error(`Failed to rerank via Sidecar: ${msg}`);
@@ -1,133 +1,143 @@
// File: src/modules/ai/services/vram-monitor.service.ts
// Change Log
// - 2026-05-30: Initial implementation สำหรับ Typhoon OCR VRAM monitoring (T006, ADR-032)
// File: backend/src/modules/ai/services/vram-monitor.service.ts
// Change Log:
// - 2026-06-11: Initial creation of VramMonitorService to monitor VRAM headroom from Ollama /api/ps
// - 2026-06-11: เพิ่มการคำนวณ mainModelVramMb ใน getVramHeadroom
// - 2026-06-11: เพิ่ม getVramStatus และ invalidateCache เพื่อความเข้ากันได้กับส่วนอื่น
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import axios from 'axios';
import { InjectRedis } from '@nestjs-modules/ioredis';
import Redis from 'ioredis';
import { VramHeadroom } from '../interfaces/execution-policy.interface';
/** ข้อมูล VRAM จาก Ollama PS API */
export interface OllamaModelInfo {
name: string;
size_vram: number; // bytes
}
/** ผลลัพธ์ VRAM status */
/**
* ผลลัพธ์ VRAM status สำหรับส่วนบริการภายนอก
* ผลลัพธ์นี้มีวัตถุประสงค์เพื่อรักษาความเข้ากันได้ย้อนหลัง (Backward Compatibility)
*/
export interface VramStatus {
totalVramMb: number;
usedVramMb: number;
freeVramMb: number;
loadedModels: string[];
hasCapacity: boolean; // true ถ้า free VRAM >= minRequiredMb
hasCapacity: boolean;
}
/** ผลลัพธ์ภายในจาก Ollama /api/ps */
interface OllamaProcessStatus {
models?: OllamaModelInfo[];
}
// Redis key สำหรับ cache VRAM status
const VRAM_STATUS_CACHE_KEY = 'ai:vram:status';
// TTL 10 วินาที — refresh บ่อยพอสำหรับ real-time monitoring
const VRAM_STATUS_TTL_SECONDS = 10;
// VRAM limit สำหรับ RTX 2060 Super (8192 MB)
const GPU_TOTAL_VRAM_MB = 8192;
// Threshold: ไม่โหลด model ถ้า usage > 90%
const VRAM_USAGE_LIMIT_PERCENT = 0.9;
/** บริการตรวจสอบ VRAM GPU ผ่าน Ollama API ตาม ADR-032 */
@Injectable()
export class VramMonitorService {
private readonly logger = new Logger(VramMonitorService.name);
private readonly ollamaUrl: string;
private readonly totalVramMb: number;
constructor(
private readonly configService: ConfigService,
@InjectRedis() private readonly redis: Redis
) {
constructor(private readonly configService: ConfigService) {
this.ollamaUrl = this.configService.get<string>(
'OLLAMA_URL',
this.configService.get<string>('AI_HOST_URL', 'http://localhost:11434')
this.configService.get<string>(
'AI_HOST_URL',
'http://192.168.10.100:11434'
)
);
this.totalVramMb = this.configService.get<number>(
'GPU_TOTAL_VRAM_MB',
16384 // Default to 16GB (RTX 5060 Ti)
);
}
/**
* ดึงสถานะ VRAM ปัจจุบันจาก Ollama /api/ps
* ใช้ Redis cache TTL 10 วินาทีเพื่อลด overhead
* ดึงสถานะ VRAM headroom จาก Ollama /api/ps
* ถ้าล้มเหลวจะคืนค่าด้วย safe default (available = 0)
*/
async getVramStatus(minRequiredMb = 4000): Promise<VramStatus> {
const cached = await this.redis.get(VRAM_STATUS_CACHE_KEY);
if (cached) {
const parsed = JSON.parse(cached) as VramStatus;
parsed.hasCapacity = parsed.freeVramMb >= minRequiredMb;
return parsed;
}
return this.fetchAndCacheVramStatus(minRequiredMb);
}
/** ตรวจสอบว่า VRAM เพียงพอสำหรับโหลด model ที่ต้องการ */
async hasVramCapacity(requiredMb: number): Promise<boolean> {
const status = await this.getVramStatus(requiredMb);
return status.hasCapacity;
}
/** ดึงข้อมูล VRAM จาก Ollama และ cache ใน Redis */
private async fetchAndCacheVramStatus(
minRequiredMb: number
): Promise<VramStatus> {
async getVramHeadroom(): Promise<VramHeadroom> {
try {
const response = await axios.get<OllamaProcessStatus>(
`${this.ollamaUrl}/api/ps`,
{ timeout: 5000 }
);
const models = response.data.models ?? [];
const loadedModels = models.map((m) => m.name);
// คำนวณ VRAM ที่ใช้จาก models ที่โหลดอยู่
const usedVramBytes = models.reduce(
(sum, m) => sum + (m.size_vram ?? 0),
0
);
const usedVramMb = Math.round(usedVramBytes / 1024 / 1024);
// จำกัด VRAM ไม่เกิน limit 90% ของ GPU ทั้งหมด
const maxAllowedMb = Math.floor(
GPU_TOTAL_VRAM_MB * VRAM_USAGE_LIMIT_PERCENT
);
const freeVramMb = Math.max(0, maxAllowedMb - usedVramMb);
const status: VramStatus = {
totalVramMb: GPU_TOTAL_VRAM_MB,
usedVramMb,
freeVramMb,
loadedModels,
hasCapacity: freeVramMb >= minRequiredMb,
const response = await axios.get<{
models?: Array<{
name: string;
size_vram: number;
}>;
}>(`${this.ollamaUrl}/api/ps`, { timeout: 3000 });
const models = response.data?.models ?? [];
let totalUsedBytes = 0;
let mainModelUsedBytes = 0;
for (const model of models) {
totalUsedBytes += model.size_vram || 0;
if (
model.name.includes('np-dms-ai') ||
model.name.includes('typhoon2.5-np-dms')
) {
mainModelUsedBytes += model.size_vram || 0;
}
}
const usedMb = Math.round(totalUsedBytes / (1024 * 1024));
const availableMb = Math.max(0, this.totalVramMb - usedMb);
const mainModelVramMb = Math.round(mainModelUsedBytes / (1024 * 1024));
return {
totalMb: this.totalVramMb,
usedMb,
availableMb,
querySuccess: true,
mainModelVramMb,
};
await this.redis.setex(
VRAM_STATUS_CACHE_KEY,
VRAM_STATUS_TTL_SECONDS,
JSON.stringify(status)
);
return status;
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
this.logger.warn(
`VRAM status fetch failed: ${msg} — ใช้ค่า resilient fallback`
`Failed to query Ollama /api/ps: ${err instanceof Error ? err.message : String(err)}`
);
return {
totalVramMb: GPU_TOTAL_VRAM_MB,
usedVramMb: 0,
freeVramMb: GPU_TOTAL_VRAM_MB,
loadedModels: [],
hasCapacity: true,
totalMb: this.totalVramMb,
usedMb: this.totalVramMb, // บังคับให้ used = total เพื่อให้ available = 0
availableMb: 0,
querySuccess: false,
mainModelVramMb: 0,
};
}
}
/**
* ล้าง VRAM cache (เรียกหลังจาก model unload ด้วย keep_alive=0)
* เพื่อให้ status check ครั้งต่อไปดึงข้อมูลใหม่จาก Ollama
* ดึงสถานะ VRAM ปัจจุบันของระบบ
* เพื่อความเข้ากันได้ย้อนหลังกับ endpoint vram/status
*/
async getVramStatus(minRequiredMb = 4000): Promise<VramStatus> {
try {
const response = await axios.get<{
models?: Array<{
name: string;
size_vram: number;
}>;
}>(`${this.ollamaUrl}/api/ps`, { timeout: 3000 });
const models = response.data?.models ?? [];
const loadedModels = models.map((m) => m.name);
const headroom = await this.getVramHeadroom();
return {
totalVramMb: headroom.totalMb,
usedVramMb: headroom.usedMb,
freeVramMb: headroom.availableMb,
loadedModels,
hasCapacity: headroom.availableMb >= minRequiredMb,
};
} catch (err: unknown) {
this.logger.warn(
`Failed to get VRAM status: ${err instanceof Error ? err.message : String(err)}`
);
return {
totalVramMb: this.totalVramMb,
usedVramMb: this.totalVramMb,
freeVramMb: 0,
loadedModels: [],
hasCapacity: false,
};
}
}
/**
* ตรวจสอบว่า VRAM เพียงพอสำหรับความต้องการโหลดโมเดลหรือไม่
*/
async hasVramCapacity(requiredMb: number): Promise<boolean> {
const headroom = await this.getVramHeadroom();
return headroom.availableMb >= requiredMb;
}
/**
* ล้าง cache VRAM (ไม่มี cache แล้วในระบบใหม่ แต่เก็บไว้เพื่อรองรับการเรียกใช้เดิม)
*/
async invalidateCache(): Promise<void> {
await this.redis.del(VRAM_STATUS_CACHE_KEY);
await Promise.resolve();
this.logger.log('VRAM cache invalidation requested (no-op in new policy)');
}
}
@@ -0,0 +1,138 @@
// File: backend/src/modules/ai/tests/ai-policy.service.spec.ts
// Change Log:
// - 2026-06-11: สร้าง unit tests สำหรับ AiPolicyService (US5)
// - 2026-06-11: แก้ไข DEFAULT_REDIS_TOKEN import เป็นค่าคงที่ string
import { Test, TestingModule } from '@nestjs/testing';
import { getRepositoryToken } from '@nestjs/typeorm';
import { AiPolicyService } from '../services/ai-policy.service';
import { AiExecutionProfile } from '../entities/ai-execution-profile.entity';
const DEFAULT_REDIS_TOKEN = 'default_IORedisModuleConnectionToken';
describe('AiPolicyService', () => {
let service: AiPolicyService;
const mockProfileRepo = {
findOne: jest.fn(),
};
const mockRedis = {
get: jest.fn(),
set: jest.fn(),
};
beforeEach(async () => {
jest.clearAllMocks();
const module: TestingModule = await Test.createTestingModule({
providers: [
AiPolicyService,
{
provide: getRepositoryToken(AiExecutionProfile),
useValue: mockProfileRepo,
},
{ provide: DEFAULT_REDIS_TOKEN, useValue: mockRedis },
],
}).compile();
service = module.get<AiPolicyService>(AiPolicyService);
});
describe('getCanonicalModelName', () => {
it('ควรคืนค่า np-dms-ocr สำหรับชื่อโมเดลที่มีคำว่า ocr', () => {
expect(service.getCanonicalModelName('typhoon-np-dms-ocr:latest')).toBe(
'np-dms-ocr'
);
expect(service.getCanonicalModelName('my-ocr-model')).toBe('np-dms-ocr');
});
it('ควรคืนค่า np-dms-ai สำหรับโมเดลอื่นๆ', () => {
expect(service.getCanonicalModelName('typhoon2.5-np-dms:latest')).toBe(
'np-dms-ai'
);
expect(service.getCanonicalModelName('gemma')).toBe('np-dms-ai');
});
});
describe('getProfileForJobType', () => {
it('ควร map job type ต่างๆ เป็น profile ที่ถูกต้อง', () => {
expect(service.getProfileForJobType('auto-fill-document')).toBe(
'quality'
);
expect(service.getProfileForJobType('migrate-document')).toBe('quality');
expect(service.getProfileForJobType('rag-query')).toBe('standard');
expect(service.getProfileForJobType('intent-classify')).toBe(
'interactive'
);
expect(service.getProfileForJobType('tool-suggest')).toBe('interactive');
expect(service.getProfileForJobType('sandbox-analysis')).toBe(
'deep-analysis'
);
expect(service.getProfileForJobType('ocr-extract')).toBe('standard');
});
});
describe('getProfileParameters', () => {
it('ควรดึงพารามิเตอร์จาก Redis cache เมื่อมี cache hit', async () => {
const mockPolicy = {
canonicalModel: 'np-dms-ai' as const,
temperature: 0.2,
topP: 0.9,
maxTokens: 1000,
numCtx: 4000,
repeatPenalty: 1.1,
keepAliveSeconds: 120,
};
mockRedis.get.mockResolvedValue(JSON.stringify(mockPolicy));
const result = await service.getProfileParameters('standard');
expect(result).toEqual(mockPolicy);
expect(mockRedis.get).toHaveBeenCalledWith(
'ai_execution_profiles:standard'
);
expect(mockProfileRepo.findOne).not.toHaveBeenCalled();
});
it('ควรดึงพารามิเตอร์จาก DB เมื่อ cache miss และบันทึกลง cache', async () => {
mockRedis.get.mockResolvedValue(null);
const mockDbProfile = {
profileName: 'standard',
isActive: true,
temperature: 0.4,
topP: 0.85,
maxTokens: 3000,
numCtx: 6000,
repeatPenalty: 1.2,
keepAliveSeconds: 400,
};
mockProfileRepo.findOne.mockResolvedValue(mockDbProfile);
const result = await service.getProfileParameters('standard');
expect(result.temperature).toBe(0.4);
expect(result.maxTokens).toBe(3000);
expect(mockRedis.set).toHaveBeenCalled();
});
it('ควร fallback ไปยัง Default parameters เมื่อดึงจาก DB หรือ Redis ล้มเหลว', async () => {
mockRedis.get.mockRejectedValue(new Error('Redis down'));
mockProfileRepo.findOne.mockRejectedValue(new Error('DB down'));
const result = await service.getProfileParameters('deep-analysis');
expect(result.canonicalModel).toBe('np-dms-ai');
expect(result.keepAliveSeconds).toBe(0);
});
});
describe('createJobPayload', () => {
it('ควรสร้าง payload ของ BullMQ job ที่มี snapshot parameters ครบถ้วน', async () => {
mockRedis.get.mockResolvedValue(null);
mockProfileRepo.findOne.mockResolvedValue(null); // ใช้ default
const payload = await service.createJobPayload(
'rag-query',
'doc-1',
'attach-1'
);
expect(payload.jobType).toBe('rag-query');
expect(payload.documentPublicId).toBe('doc-1');
expect(payload.attachmentPublicId).toBe('attach-1');
expect(payload.effectiveProfile).toBe('standard');
expect(payload.canonicalModel).toBe('np-dms-ai');
expect(payload.snapshotParams).toBeDefined();
expect(payload.snapshotParams.temperature).toBe(0.5);
});
});
});
@@ -0,0 +1,171 @@
// File: backend/src/modules/ai/tests/ai.controller.spec.ts
// Change Log:
// - 2026-06-11: สร้าง integration tests สำหรับ AiController forbidden fields (US5)
// - 2026-06-11: เพิ่ม ConfigService mock และ override ServiceAccountGuard เพื่อแก้ DI error
// - 2026-06-11: แก้ไขการ import supertest ให้ถูกต้อง เพื่อป้องกัน TypeError: request is not a function
// - 2026-06-11: แก้ไขการตรวจสอบ message array ในการทดสอบ validation ให้ถูกต้อง
// - 2026-06-11: แก้ไข ESLint unsafe argument/member access errors ใน integration tests
// - 2026-06-11: เพิ่ม mock 'default_IORedisModuleConnectionToken' เพื่อแก้ปัญหา NestJS DI และลบบรรทัดว่างในฟังก์ชัน
import { Test, TestingModule } from '@nestjs/testing';
import { INestApplication, ValidationPipe } from '@nestjs/common';
import request from 'supertest';
import { AiController } from '../ai.controller';
import { AiService } from '../ai.service';
import { AiIngestService } from '../ai-ingest.service';
import { AiRagService } from '../ai-rag.service';
import { AiQueueService } from '../ai-queue.service';
import { AiSettingsService } from '../ai-settings.service';
import { AiToolRegistryService } from '../tool/ai-tool-registry.service';
import { FileStorageService } from '../../../common/file-storage/file-storage.service';
import { AiMigrationCheckpointService } from '../ai-migration-checkpoint.service';
import { OcrService } from '../services/ocr.service';
import { JwtAuthGuard } from '../../../common/guards/jwt-auth.guard';
import { RbacGuard } from '../../../common/guards/rbac.guard';
import { AiEnabledGuard } from '../guards/ai-enabled.guard';
import { ServiceAccountGuard } from '../guards/service-account.guard';
import { ConfigService } from '@nestjs/config';
describe('AiController (Integration)', () => {
let app: INestApplication;
const mockGuard = { canActivate: () => true };
const mockAiService = {
submitUnifiedJob: jest.fn().mockResolvedValue({
jobId: 'job-123',
status: 'queued',
effectiveProfile: 'standard',
modelUsed: 'np-dms-ai',
}),
};
const mockAiIngestService = {};
const mockAiRagService = {};
const mockAiQueueService = {};
const mockAiSettingsService = {};
const mockAiToolRegistryService = {};
const mockFileStorageService = {};
const mockMigrationCheckpointService = {};
const mockOcrService = {};
beforeEach(async () => {
jest.clearAllMocks();
const moduleFixture: TestingModule = await Test.createTestingModule({
controllers: [AiController],
providers: [
{ provide: AiService, useValue: mockAiService },
{ provide: AiIngestService, useValue: mockAiIngestService },
{ provide: AiRagService, useValue: mockAiRagService },
{ provide: AiQueueService, useValue: mockAiQueueService },
{ provide: AiSettingsService, useValue: mockAiSettingsService },
{ provide: AiToolRegistryService, useValue: mockAiToolRegistryService },
{ provide: FileStorageService, useValue: mockFileStorageService },
{
provide: AiMigrationCheckpointService,
useValue: mockMigrationCheckpointService,
},
{ provide: OcrService, useValue: mockOcrService },
{
provide: 'default_IORedisModuleConnectionToken',
useValue: {
get: jest.fn().mockResolvedValue(null),
set: jest.fn().mockResolvedValue('OK'),
del: jest.fn().mockResolvedValue(1),
},
},
{
provide: ConfigService,
useValue: {
get: jest.fn().mockImplementation((key: string) => {
if (key === 'AI_ENABLED') return 'true';
return null;
}),
},
},
],
})
.overrideGuard(JwtAuthGuard)
.useValue(mockGuard)
.overrideGuard(RbacGuard)
.useValue(mockGuard)
.overrideGuard(AiEnabledGuard)
.useValue(mockGuard)
.overrideGuard(ServiceAccountGuard)
.useValue(mockGuard)
.compile();
app = moduleFixture.createNestApplication();
app.useGlobalPipes(
new ValidationPipe({
whitelist: true,
transform: true,
forbidNonWhitelisted: true,
})
);
await app.init();
});
afterEach(async () => {
await app.close();
});
describe('POST /ai/jobs - Validation', () => {
it('ควรส่งผ่านเมื่อส่ง payload ที่ถูกต้อง (ไม่มี executionProfile, model, temperature ฯลฯ)', async () => {
const validPayload = {
type: 'rag-query',
documentPublicId: '019505a1-7c3e-7000-8000-abc123def456',
payload: { query: 'test' },
};
const response = await request(app.getHttpServer() as () => void)
.post('/ai/jobs')
.set('idempotency-key', 'key-123')
.send(validPayload);
expect(response.status).toBe(201);
expect(response.body).toEqual({
jobId: 'job-123',
status: 'queued',
effectiveProfile: 'standard',
modelUsed: 'np-dms-ai',
});
expect(mockAiService.submitUnifiedJob).toHaveBeenCalled();
});
it('ควรคืนสถานะ 400 Bad Request เมื่อส่ง executionProfile มาใน payload', async () => {
const invalidPayload = {
type: 'rag-query',
documentPublicId: '019505a1-7c3e-7000-8000-abc123def456',
executionProfile: 'quality',
};
const response = await request(app.getHttpServer() as () => void)
.post('/ai/jobs')
.set('idempotency-key', 'key-123')
.send(invalidPayload);
expect(response.status).toBe(400);
const body = response.body as { message: string[] };
expect(body.message[0]).toContain(
'executionProfile is forbidden in payload'
);
});
it('ควรคืนสถานะ 400 Bad Request เมื่อส่ง model มาใน payload', async () => {
const invalidPayload = {
type: 'rag-query',
documentPublicId: '019505a1-7c3e-7000-8000-abc123def456',
model: { key: 'custom' },
};
const response = await request(app.getHttpServer() as () => void)
.post('/ai/jobs')
.set('idempotency-key', 'key-123')
.send(invalidPayload);
expect(response.status).toBe(400);
const body = response.body as { message: string[] };
expect(body.message[0]).toContain('model is forbidden in payload');
});
it('ควรคืนสถานะ 400 Bad Request เมื่อส่ง temperature มาใน payload', async () => {
const invalidPayload = {
type: 'rag-query',
documentPublicId: '019505a1-7c3e-7000-8000-abc123def456',
temperature: 0.7,
};
const response = await request(app.getHttpServer() as () => void)
.post('/ai/jobs')
.set('idempotency-key', 'key-123')
.send(invalidPayload);
expect(response.status).toBe(400);
const body = response.body as { message: string[] };
expect(body.message[0]).toContain('temperature is forbidden in payload');
});
});
});
@@ -0,0 +1,141 @@
// File: backend/src/modules/ai/tests/ocr-residency.spec.ts
// Change Log:
// - 2026-06-11: Initial unit tests for adaptive OCR residency
import { Test, TestingModule } from '@nestjs/testing';
import { ConfigService } from '@nestjs/config';
import { getRepositoryToken } from '@nestjs/typeorm';
import { OcrService } from '../services/ocr.service';
import { VramMonitorService } from '../services/vram-monitor.service';
import { AiPolicyService } from '../services/ai-policy.service';
import { OcrCacheService } from '../services/ocr-cache.service';
import { SystemSetting } from '../entities/system-setting.entity';
import { AiAuditLog } from '../entities/ai-audit-log.entity';
describe('OcrService Adaptive Residency (US2)', () => {
let service: OcrService;
const mockConfigService = {
get: jest.fn((key: string, defaultValue?: unknown): unknown => {
const config: Record<string, unknown> = {
OCR_CHAR_THRESHOLD: 100,
OCR_API_URL: 'http://localhost:8765',
OCR_SIDECAR_API_KEY: 'test-key',
VRAM_HEADROOM_THRESHOLD_MB: 3000,
OCR_RESIDENCY_WINDOW_SECONDS: 120,
GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB: 12000,
};
return config[key] ?? defaultValue;
}),
};
const mockSystemSettingRepo = {
findOne: jest.fn().mockResolvedValue({
settingValue: '019505a1-7c3e-7000-8000-abc123def002',
}),
};
const mockAiAuditLogRepo = {
create: jest.fn().mockReturnValue({}),
save: jest.fn().mockResolvedValue({}),
};
const mockOcrCacheService = {};
const mockVramMonitorService = {
getVramHeadroom: jest.fn(),
hasVramCapacity: jest.fn().mockResolvedValue(true),
};
const mockAiPolicyService = {};
const mockRedis = {
get: jest.fn().mockResolvedValue(null),
set: jest.fn().mockResolvedValue('OK'),
del: jest.fn().mockResolvedValue(1),
};
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
providers: [
OcrService,
{ provide: ConfigService, useValue: mockConfigService },
{
provide: getRepositoryToken(SystemSetting),
useValue: mockSystemSettingRepo,
},
{
provide: getRepositoryToken(AiAuditLog),
useValue: mockAiAuditLogRepo,
},
{ provide: OcrCacheService, useValue: mockOcrCacheService },
{ provide: VramMonitorService, useValue: mockVramMonitorService },
{ provide: AiPolicyService, useValue: mockAiPolicyService },
{
provide: 'default_IORedisModuleConnectionToken',
useValue: mockRedis,
},
],
}).compile();
service = module.get<OcrService>(OcrService);
jest.clearAllMocks();
});
it('ควรคืน keepAliveSeconds=0 เมื่อ activeProfile เป็น deep-analysis (FR-B03)', async () => {
mockVramMonitorService.getVramHeadroom.mockResolvedValueOnce({
totalMb: 16384,
usedMb: 4000,
availableMb: 12384,
querySuccess: true,
mainModelVramMb: 4000,
});
const decision = await service.calculateOcrResidency('deep-analysis');
expect(decision.keepAliveSeconds).toBe(0);
expect(decision.reason).toBe('deep-analysis-active');
});
it('ควรคืน keepAliveSeconds=0 เมื่อ VRAM ของโมเดลหลักเกิน pressure threshold (FR-B03)', async () => {
mockVramMonitorService.getVramHeadroom.mockResolvedValueOnce({
totalMb: 16384,
usedMb: 13000,
availableMb: 3384,
querySuccess: true,
mainModelVramMb: 13000,
});
const decision = await service.calculateOcrResidency('standard');
expect(decision.keepAliveSeconds).toBe(0);
expect(decision.reason).toBe('high-pressure');
});
it('ควรคืน keepAliveSeconds=0 เมื่อ VRAM headroom ต่ำกว่า headroom threshold (FR-B03)', async () => {
mockVramMonitorService.getVramHeadroom.mockResolvedValueOnce({
totalMb: 16384,
usedMb: 14000,
availableMb: 2384,
querySuccess: true,
mainModelVramMb: 8000,
});
const decision = await service.calculateOcrResidency('standard');
expect(decision.keepAliveSeconds).toBe(0);
expect(decision.reason).toBe('high-pressure');
});
it('ควรคืน keepAliveSeconds > 0 (residency window) เมื่อ VRAM เพียงพอและไม่มี pressure (FR-B04)', async () => {
mockVramMonitorService.getVramHeadroom.mockResolvedValueOnce({
totalMb: 16384,
usedMb: 4000,
availableMb: 12384,
querySuccess: true,
mainModelVramMb: 4000,
});
const decision = await service.calculateOcrResidency('standard');
expect(decision.keepAliveSeconds).toBe(120);
expect(decision.reason).toBe('headroom-sufficient');
});
it('ควรคืน keepAliveSeconds=0 และ reason=query-failed เมื่อ query VRAM ล้มเหลว (FR-B05)', async () => {
mockVramMonitorService.getVramHeadroom.mockResolvedValueOnce({
totalMb: 16384,
usedMb: 16384,
availableMb: 0,
querySuccess: false,
mainModelVramMb: 0,
});
const decision = await service.calculateOcrResidency('standard');
expect(decision.keepAliveSeconds).toBe(0);
expect(decision.reason).toBe('query-failed');
});
});
@@ -0,0 +1,153 @@
// File: backend/src/modules/ai/tests/queue-policy.spec.ts
// Change Log:
// - 2026-06-11: สร้าง unit tests สำหรับทดสอบ Queue Policy & Selective Realtime Concurrency (US4)
// - 2026-06-11: แก้ไข relative import ของ Attachment ให้ถูกต้อง (3 ระดับ)
// - 2026-06-11: นำเข้า Job และ AiRealtimeJobData เพื่อแก้ไข compile/lint errors
import { Test, TestingModule } from '@nestjs/testing';
import { getQueueToken } from '@nestjs/bullmq';
import { getRepositoryToken } from '@nestjs/typeorm';
import type { Job } from 'bullmq';
import { QUEUE_AI_BATCH } from '../../common/constants/queue.constants';
import {
AiRealtimeProcessor,
AiRealtimeJobData,
} from '../processors/ai-realtime.processor';
import { OcrService } from '../services/ocr.service';
import { OllamaService } from '../services/ollama.service';
import { AiAuditLog } from '../entities/ai-audit-log.entity';
import { Attachment } from '../../../common/file-storage/entities/attachment.entity';
describe('Queue Policy (US4)', () => {
let processor: AiRealtimeProcessor;
const mockBatchQueue = {
add: jest.fn().mockResolvedValue({ id: 'redirected-job-id' }),
pause: jest.fn().mockResolvedValue(undefined),
resume: jest.fn().mockResolvedValue(undefined),
};
const mockOcrService = {
detectAndExtract: jest.fn(),
};
const mockOllamaService = {
getMainModelName: jest.fn().mockReturnValue('np-dms-ai'),
generate: jest.fn(),
};
const mockAiAuditLogRepo = {
create: jest.fn(),
save: jest.fn(),
};
const mockAttachmentRepo = {
update: jest.fn(),
};
beforeEach(async () => {
jest.clearAllMocks();
const module: TestingModule = await Test.createTestingModule({
providers: [
AiRealtimeProcessor,
{ provide: getQueueToken(QUEUE_AI_BATCH), useValue: mockBatchQueue },
{ provide: OcrService, useValue: mockOcrService },
{ provide: OllamaService, useValue: mockOllamaService },
{
provide: getRepositoryToken(AiAuditLog),
useValue: mockAiAuditLogRepo,
},
{
provide: getRepositoryToken(Attachment),
useValue: mockAttachmentRepo,
},
],
}).compile();
processor = module.get<AiRealtimeProcessor>(AiRealtimeProcessor);
});
it('ควรอนุญาตให้ lightweight jobs รันได้โดยไม่ redirect', async () => {
const jobClassify = {
id: '1',
data: {
jobType: 'intent-classify',
projectPublicId: 'project-1',
payload: { query: 'test' },
},
} as unknown as Job<AiRealtimeJobData>;
const resultClassify = await processor.process(jobClassify);
expect(resultClassify).toEqual({ success: true, intent: 'GET_RFA' });
expect(mockBatchQueue.add).not.toHaveBeenCalled();
const jobTool = {
id: '2',
data: {
jobType: 'tool-suggest',
projectPublicId: 'project-1',
payload: { query: 'test' },
},
} as unknown as Job<AiRealtimeJobData>;
const resultTool = await processor.process(jobTool);
expect(resultTool).toEqual({ success: true, suggestions: [] });
expect(mockBatchQueue.add).not.toHaveBeenCalled();
});
it('ควร redirect generation-heavy jobs ไปยัง ai-batch queue', async () => {
const jobSuggest = {
id: '3',
data: {
jobType: 'ai-suggest',
projectPublicId: 'project-1',
payload: { query: 'test' },
},
} as unknown as Job<AiRealtimeJobData>;
await processor.process(jobSuggest);
expect(mockBatchQueue.add).toHaveBeenCalledWith(
'ai-suggest',
jobSuggest.data,
{ jobId: '3' }
);
const jobRag = {
id: '4',
data: {
jobType: 'rag-query',
projectPublicId: 'project-1',
payload: { query: 'test' },
},
} as unknown as Job<AiRealtimeJobData>;
await processor.process(jobRag);
expect(mockBatchQueue.add).toHaveBeenCalledWith('rag-query', jobRag.data, {
jobId: '4',
});
});
it('ควร resume ai-batch เมื่อ realtime jobs ทั้งหมดเสร็จแล้วเท่านั้น', async () => {
const firstJob = {
id: '10',
data: { jobType: 'intent-classify' },
} as Job<AiRealtimeJobData>;
const secondJob = {
id: '11',
data: { jobType: 'tool-suggest' },
} as Job<AiRealtimeJobData>;
await processor.onActive(firstJob);
await processor.onActive(secondJob);
expect(mockBatchQueue.pause).toHaveBeenCalledTimes(1);
await processor.onCompleted(firstJob);
expect(mockBatchQueue.resume).not.toHaveBeenCalled();
await processor.onCompleted(secondJob);
expect(mockBatchQueue.resume).toHaveBeenCalledTimes(1);
});
it('ควรยัง pause ai-batch ต่อเมื่อมี realtime job อื่น active อยู่แม้มี job หนึ่ง fail', async () => {
const firstJob = {
id: '12',
data: { jobType: 'intent-classify' },
} as Job<AiRealtimeJobData>;
const secondJob = {
id: '13',
data: { jobType: 'tool-suggest' },
} as Job<AiRealtimeJobData>;
await processor.onActive(firstJob);
await processor.onActive(secondJob);
expect(mockBatchQueue.pause).toHaveBeenCalledTimes(1);
await processor.onFailed(firstJob);
expect(mockBatchQueue.resume).not.toHaveBeenCalled();
await processor.onCompleted(secondJob);
expect(mockBatchQueue.resume).toHaveBeenCalledTimes(1);
});
});
@@ -0,0 +1,102 @@
// File: backend/src/modules/ai/tests/vram-monitor.service.spec.ts
// Change Log:
// - 2026-06-11: สร้าง unit tests สำหรับ VramMonitorService (US5)
import { Test, TestingModule } from '@nestjs/testing';
import { ConfigService } from '@nestjs/config';
import { VramMonitorService } from '../services/vram-monitor.service';
import axios from 'axios';
jest.mock('axios');
const mockedAxios = axios as jest.Mocked<typeof axios>;
describe('VramMonitorService', () => {
let service: VramMonitorService;
const mockConfigService = {
get: jest.fn((key: string, defaultValue?: unknown): unknown => {
const config: Record<string, unknown> = {
OLLAMA_URL: 'http://localhost:11434',
GPU_TOTAL_VRAM_MB: 8192, // mock total 8GB
};
return config[key] !== undefined ? config[key] : defaultValue;
}),
};
beforeEach(async () => {
jest.clearAllMocks();
const module: TestingModule = await Test.createTestingModule({
providers: [
VramMonitorService,
{ provide: ConfigService, useValue: mockConfigService },
],
}).compile();
service = module.get<VramMonitorService>(VramMonitorService);
});
it('should be defined', () => {
expect(service).toBeDefined();
});
describe('getVramHeadroom', () => {
it('ควรคำนวณ headroom ถูกต้องเมื่อ Ollama คืนข้อมูลโมเดลปกติ', async () => {
mockedAxios.get.mockResolvedValue({
data: {
models: [
{
name: 'typhoon2.5-np-dms:latest',
size_vram: 4 * 1024 * 1024 * 1024,
}, // 4GB
{ name: 'other-model', size_vram: 2 * 1024 * 1024 * 1024 }, // 2GB
],
},
});
const headroom = await service.getVramHeadroom();
expect(headroom.querySuccess).toBe(true);
expect(headroom.totalMb).toBe(8192);
expect(headroom.usedMb).toBe(6144); // 4GB + 2GB = 6GB (6144MB)
expect(headroom.availableMb).toBe(2048); // 8GB - 6GB = 2GB (2048MB)
expect(headroom.mainModelVramMb).toBe(4096); // 4GB main model (4096MB)
});
it('ควรคำนวณ headroom เป็น safe default (0 available) เมื่อ Ollama query ล้มเหลว', async () => {
mockedAxios.get.mockRejectedValue(new Error('Connection timeout'));
const headroom = await service.getVramHeadroom();
expect(headroom.querySuccess).toBe(false);
expect(headroom.availableMb).toBe(0);
expect(headroom.usedMb).toBe(8192);
expect(headroom.mainModelVramMb).toBe(0);
});
});
describe('hasVramCapacity', () => {
it('ควรคืน true เมื่อ headroom พอตามค่าที่ขอ', async () => {
mockedAxios.get.mockResolvedValue({
data: {
models: [
{
name: 'typhoon2.5-np-dms:latest',
size_vram: 4 * 1024 * 1024 * 1024,
},
],
},
});
const result = await service.hasVramCapacity(3000); // query available is 4096MB
expect(result).toBe(true);
});
it('ควรคืน false เมื่อ headroom ไม่พอตามค่าที่ขอ', async () => {
mockedAxios.get.mockResolvedValue({
data: {
models: [
{
name: 'typhoon2.5-np-dms:latest',
size_vram: 6 * 1024 * 1024 * 1024,
}, // 6GB used
],
},
});
const result = await service.hasVramCapacity(3000); // query available is 2048MB, required 3000MB
expect(result).toBe(false);
});
});
});