690528:1524 ADR-030-230 context aware #02
CI / CD Pipeline / build (push) Failing after 4m14s
CI / CD Pipeline / deploy (push) Has been skipped

This commit is contained in:
2026-05-28 15:24:41 +07:00
parent 960cd78b8a
commit 4391bbe61d
29 changed files with 4001 additions and 44 deletions
@@ -8,6 +8,7 @@
// - 2026-05-22: แก้ไข type compilation error ใน processMigrateDocument และนำช่องว่างภายในฟังก์ชันออก
// - 2026-05-25: เพิ่ม AiPromptsService เพื่อดึง Dynamic Prompt สำหรับ OCR extraction ใน sandbox และ migration pipeline
// - 2026-05-26: แก้ไข bug lockDuration=30000ms ทำให้ sandbox-extract job stall เมื่อ Ollama ใช้เวลา >30s — เพิ่ม lockDuration: 150000
// - 2026-05-28: EC-001 ใช้ findOrSuggestTags เพื่อตรวจจับ Tag ใหม่และบันทึก aiIssues; EC-002 ตรวจสอบ UUID ของผู้ส่ง/ผู้รับ และ Flag เมื่อหาไม่พบ
import { Processor, WorkerHost } from '@nestjs/bullmq';
import { Logger } from '@nestjs/common';
@@ -30,14 +31,16 @@ import { MigrationErrorType } from '../../migration/entities/migration-error.ent
import { AiPromptsService } from '../prompts/ai-prompts.service';
interface MigrateDocumentMetadata extends Record<string, unknown> {
documentNumber?: string;
projectPublicId?: string;
correspondenceTypeCode?: string;
disciplineCode?: string;
originatorOrganizationPublicId?: string;
recipients?: Array<{ organizationPublicId: string; recipientType: string }>;
subject?: string;
category?: string;
discipline?: string;
date?: string;
confidence?: number;
documentDate?: string;
tags?: string[];
summary?: string;
confidence?: number;
}
export type AiBatchJobType =
@@ -72,6 +75,32 @@ const toStringList = (value: unknown): string[] =>
? value.filter((item): item is string => typeof item === 'string')
: [];
const toRecipientsList = (
value: unknown
): Array<{ organizationPublicId: string; recipientType: string }> => {
if (!Array.isArray(value)) {
return [];
}
const result: Array<{ organizationPublicId: string; recipientType: string }> =
[];
for (const item of value) {
if (item && typeof item === 'object') {
const obj = item as Record<string, unknown>;
const orgId = readString(obj.organizationPublicId);
const type = readString(obj.recipientType);
if (orgId && type) {
// Normalize 'CC ' whitespace typo to 'CC'
const normalizedType = type.trim() === 'CC' ? 'CC' : type.trim();
result.push({
organizationPublicId: orgId,
recipientType: normalizedType,
});
}
}
}
return result;
};
const parseMigrateDocumentMetadata = (
cleanedResponse: string
): MigrateDocumentMetadata => {
@@ -81,11 +110,15 @@ const parseMigrateDocumentMetadata = (
}
const source = parsed as Record<string, unknown>;
return {
documentNumber: readString(source.documentNumber),
projectPublicId: readString(source.projectPublicId),
correspondenceTypeCode: readString(source.correspondenceTypeCode),
disciplineCode: readString(source.disciplineCode),
originatorOrganizationPublicId: readString(
source.originatorOrganizationPublicId
),
recipients: toRecipientsList(source.recipients),
subject: readString(source.subject),
category: readString(source.category),
discipline: readString(source.discipline),
date: readString(source.date),
documentDate: readString(source.documentDate),
confidence:
typeof source.confidence === 'number' ? source.confidence : undefined,
tags: toStringList(source.tags),
@@ -246,8 +279,10 @@ export class AiBatchProcessor extends WorkerHost {
/** ประมวลผล sandbox OCR + Metadata extraction โดยไม่บันทึกลง database */
private async processSandboxExtract(data: AiBatchJobData): Promise<void> {
const { idempotencyKey, payload } = data;
const { idempotencyKey, payload, projectPublicId } = data;
const pdfPath = payload.pdfPath as string;
const overrideProjPublicId =
(payload.projectPublicId as string) || projectPublicId;
if (!pdfPath) {
throw new Error('pdfPath is required for sandbox-extract job');
}
@@ -261,11 +296,26 @@ export class AiBatchProcessor extends WorkerHost {
);
try {
const ocrResult = await this.ocrService.detectAndExtract({ pdfPath });
const { resolvedPrompt, versionNumber } =
await this.aiPromptsService.resolveActive(
'ocr_extraction',
ocrResult.text
const activePrompt =
await this.aiPromptsService.getActive('ocr_extraction');
if (!activePrompt) {
throw new Error('No active ocr_extraction prompt version found');
}
// ดึงบริบท Master data
const masterDataContext = await this.aiPromptsService.resolveContext(
activePrompt,
overrideProjPublicId
);
const resolvedPrompt = activePrompt.template
.replace('{{ocr_text}}', ocrResult.text)
.replace(
'{{master_data_context}}',
JSON.stringify(masterDataContext, null, 2)
);
const response = await this.ollamaService.generate(resolvedPrompt, {
timeoutMs: 120000,
});
@@ -286,7 +336,7 @@ export class AiBatchProcessor extends WorkerHost {
}
await this.aiPromptsService.saveTestResult(
'ocr_extraction',
versionNumber,
activePrompt.versionNumber,
extractedMetadata
);
await this.redis.setex(
@@ -296,7 +346,7 @@ export class AiBatchProcessor extends WorkerHost {
requestPublicId: idempotencyKey,
status: 'completed',
answer: JSON.stringify(extractedMetadata, null, 2),
promptVersionUsed: versionNumber,
promptVersionUsed: activePrompt.versionNumber,
completedAt: new Date().toISOString(),
})
);
@@ -323,6 +373,13 @@ export class AiBatchProcessor extends WorkerHost {
const startTime = Date.now();
const { documentPublicId, projectPublicId, payload, batchId } = job.data;
const docNumber = payload.documentNumber as string;
const contextOverride =
payload.contextOverride &&
typeof payload.contextOverride === 'object' &&
!Array.isArray(payload.contextOverride)
? (payload.contextOverride as Record<string, unknown>)
: {};
const contractPublicId = readString(contextOverride.contractPublicId);
const attachment = await this.attachmentRepo.findOne({
where: { publicId: documentPublicId },
});
@@ -358,10 +415,27 @@ export class AiBatchProcessor extends WorkerHost {
});
throw err;
}
const { resolvedPrompt } = await this.aiPromptsService.resolveActive(
'ocr_extraction',
ocrResult.text
const activePrompt =
await this.aiPromptsService.getActive('ocr_extraction');
if (!activePrompt) {
throw new Error('No active prompt found for ocr_extraction');
}
// ดึงบริบทอ้างอิงโครงการที่กรองแล้ว (Data Isolation)
const masterDataContext = await this.aiPromptsService.resolveContext(
activePrompt,
projectPublicId,
contractPublicId
);
const resolvedPrompt = activePrompt.template
.replace('{{ocr_text}}', ocrResult.text)
.replace(
'{{master_data_context}}',
JSON.stringify(masterDataContext, null, 2)
);
let aiResponse: string;
try {
aiResponse = await this.ollamaService.generate(resolvedPrompt, {
@@ -411,50 +485,162 @@ export class AiBatchProcessor extends WorkerHost {
});
throw new Error(errMsg);
}
// 3. ตรวจสอบและค้นหา Tags Suggestion ร่วมกับ Auto-Diff (EC-001)
const aiIssues: Record<string, unknown>[] = [];
let mappedTags: Record<string, string>[] = [];
if (extractedMetadata.tags && extractedMetadata.tags.length > 0) {
const tags = await this.tagsService.findOrCreateTags(
const tagResults = await this.tagsService.findOrSuggestTags(
project.id,
extractedMetadata.tags,
attachment.uploadedByUserId
);
mappedTags = tags.map((t) => ({
publicId: t.publicId,
tagName: t.tagName,
mappedTags = tagResults.map(({ tag }) => ({
publicId: tag.publicId,
tagName: tag.tagName,
}));
// บันทึก Tag ใหม่ที่ไม่มีในระบบเป็น aiIssues เพื่อให้มนุษย์ตรวจสอบ
for (const { tag, isNew } of tagResults) {
if (isNew) {
aiIssues.push({
type: 'NEW_TAG_SUGGESTED',
tagPublicId: tag.publicId,
tagName: tag.tagName,
message: `Tag '${tag.tagName}' ถูกสร้างใหม่โดย AI — ต้องการการตรวจสอบจากมนุษย์`,
});
}
}
}
const confidence =
typeof extractedMetadata.confidence === 'number'
? extractedMetadata.confidence
: 0.5;
const isValid = confidence >= 0.6 && !!extractedMetadata.documentNumber;
// 4. Resolve UUIDs of Sender/Recipient Organizations to Database IDs (ADR-019)
// EC-002: UUID ที่หาไม่พบใน Master Data จะถูก flag ใน aiIssues และ isValid = false
let senderOrgId: number | undefined = undefined;
if (extractedMetadata.originatorOrganizationPublicId) {
const foundOrg = await this.attachmentRepo.manager
.createQueryBuilder()
.select('org.id', 'id')
.from('organizations', 'org')
.where('org.uuid = :uuid', {
uuid: extractedMetadata.originatorOrganizationPublicId,
})
.getRawOne<{ id: number }>();
if (foundOrg) {
senderOrgId = Number(foundOrg.id);
} else {
// EC-002: UUID ของผู้ส่งไม่มีใน Master Data — flag เพื่อ human review
aiIssues.push({
type: 'UNRESOLVED_SENDER_UUID',
uuid: extractedMetadata.originatorOrganizationPublicId,
message: `UUID ผู้ส่ง '${extractedMetadata.originatorOrganizationPublicId}' ไม่พบใน Master Data — ต้องการการตรวจสอบจากมนุษย์`,
});
}
}
let primaryReceiverOrgId: number | undefined = undefined;
if (
extractedMetadata.recipients &&
extractedMetadata.recipients.length > 0
) {
// ดึงผู้รับที่เป็นประเภท TO รายแรกเป็นผู้รับหลัก (Primary Receiver)
const primaryReceiverObj =
extractedMetadata.recipients.find((r) => r.recipientType === 'TO') ||
extractedMetadata.recipients[0];
const foundOrg = await this.attachmentRepo.manager
.createQueryBuilder()
.select('org.id', 'id')
.from('organizations', 'org')
.where('org.uuid = :uuid', {
uuid: primaryReceiverObj.organizationPublicId,
})
.getRawOne<{ id: number }>();
if (foundOrg) {
primaryReceiverOrgId = Number(foundOrg.id);
} else {
// EC-002: UUID ของผู้รับไม่มีใน Master Data — flag เพื่อ human review
aiIssues.push({
type: 'UNRESOLVED_RECIPIENT_UUID',
uuid: primaryReceiverObj.organizationPublicId,
message: `UUID ผู้รับ '${primaryReceiverObj.organizationPublicId}' ไม่พบใน Master Data — ต้องการการตรวจสอบจากมนุษย์`,
});
}
}
// 5. ดึงประเภทเอกสารโต้ตอบ (Category Type) และสาขางาน (Discipline)
let matchedCategory = 'Correspondence';
if (extractedMetadata.correspondenceTypeCode) {
const foundType = await this.attachmentRepo.manager
.createQueryBuilder()
.select('t.type_name', 'name')
.from('correspondence_types', 't')
.where('t.type_code = :code', {
code: extractedMetadata.correspondenceTypeCode,
})
.getRawOne<{ name: string }>();
if (foundType) {
matchedCategory = foundType.name;
}
}
let matchedDisciplineId: number | undefined = undefined;
if (extractedMetadata.disciplineCode) {
const foundDisp = await this.attachmentRepo.manager
.createQueryBuilder()
.select('d.id', 'id')
.from('disciplines', 'd')
.where('d.discipline_code = :code', {
code: extractedMetadata.disciplineCode,
})
.getRawOne<{ id: number }>();
if (foundDisp) {
matchedDisciplineId = Number(foundDisp.id);
}
}
// 6. ส่งบันทึกเข้าสู่ Review Queue พร้อมคืนค่าผู้รับ Object Array ใน JSON metadata details
// EC-002: หากมี UUID ที่ไม่สามารถ resolve ได้ ให้ isValid = false เพื่อส่งเข้า review เสมอ
const hasUnresolvedUuids = aiIssues.some(
(issue) =>
issue.type === 'UNRESOLVED_SENDER_UUID' ||
issue.type === 'UNRESOLVED_RECIPIENT_UUID'
);
const isValid = confidence >= 0.6 && !!docNumber && !hasUnresolvedUuids;
const payloadTitle = readString(payload.title);
await this.migrationService.enqueueRecord({
documentNumber: extractedMetadata.documentNumber || docNumber,
documentNumber: docNumber,
subject: extractedMetadata.subject || payloadTitle,
originalSubject: payloadTitle,
body: extractedMetadata.summary || '',
category: extractedMetadata.category || 'Correspondence',
category: matchedCategory,
aiSummary: extractedMetadata.summary || '',
projectId: project.id,
senderOrgId: readNumberId(payload.senderOrgId),
receiverOrgId: readNumberId(payload.receiverOrgId),
issuedDate: extractedMetadata.date || undefined,
receivedDate: extractedMetadata.date || undefined,
senderOrgId: senderOrgId || readNumberId(payload.senderOrgId),
receiverOrgId:
primaryReceiverOrgId || readNumberId(payload.receiverOrgId),
issuedDate: extractedMetadata.documentDate || undefined,
receivedDate: extractedMetadata.documentDate || undefined,
extractedTags: mappedTags,
tempAttachmentId: attachment.id,
isValid,
confidence,
aiJobId: String(job.id),
aiIssues: aiIssues.length > 0 ? aiIssues : undefined,
details: {
discipline: extractedMetadata.discipline,
disciplineCode: extractedMetadata.disciplineCode,
disciplineId: matchedDisciplineId,
recipientsList: extractedMetadata.recipients, // บันทึก Object Array สกัดใหม่
},
});
await this.saveAiAuditLog({
documentPublicId,
aiModel: this.ollamaService.getMainModelName(),
status: AiAuditStatus.SUCCESS,
aiSuggestionJson: extractedMetadata,
aiSuggestionJson: extractedMetadata as unknown as Record<string, unknown>,
confidenceScore: confidence,
processingTimeMs: Date.now() - startTime,
});