690528:1524 ADR-030-230 context aware #02
This commit is contained in:
@@ -8,6 +8,7 @@
|
||||
// - 2026-05-22: แก้ไข type compilation error ใน processMigrateDocument และนำช่องว่างภายในฟังก์ชันออก
|
||||
// - 2026-05-25: เพิ่ม AiPromptsService เพื่อดึง Dynamic Prompt สำหรับ OCR extraction ใน sandbox และ migration pipeline
|
||||
// - 2026-05-26: แก้ไข bug lockDuration=30000ms ทำให้ sandbox-extract job stall เมื่อ Ollama ใช้เวลา >30s — เพิ่ม lockDuration: 150000
|
||||
// - 2026-05-28: EC-001 ใช้ findOrSuggestTags เพื่อตรวจจับ Tag ใหม่และบันทึก aiIssues; EC-002 ตรวจสอบ UUID ของผู้ส่ง/ผู้รับ และ Flag เมื่อหาไม่พบ
|
||||
|
||||
import { Processor, WorkerHost } from '@nestjs/bullmq';
|
||||
import { Logger } from '@nestjs/common';
|
||||
@@ -30,14 +31,16 @@ import { MigrationErrorType } from '../../migration/entities/migration-error.ent
|
||||
import { AiPromptsService } from '../prompts/ai-prompts.service';
|
||||
|
||||
interface MigrateDocumentMetadata extends Record<string, unknown> {
|
||||
documentNumber?: string;
|
||||
projectPublicId?: string;
|
||||
correspondenceTypeCode?: string;
|
||||
disciplineCode?: string;
|
||||
originatorOrganizationPublicId?: string;
|
||||
recipients?: Array<{ organizationPublicId: string; recipientType: string }>;
|
||||
subject?: string;
|
||||
category?: string;
|
||||
discipline?: string;
|
||||
date?: string;
|
||||
confidence?: number;
|
||||
documentDate?: string;
|
||||
tags?: string[];
|
||||
summary?: string;
|
||||
confidence?: number;
|
||||
}
|
||||
|
||||
export type AiBatchJobType =
|
||||
@@ -72,6 +75,32 @@ const toStringList = (value: unknown): string[] =>
|
||||
? value.filter((item): item is string => typeof item === 'string')
|
||||
: [];
|
||||
|
||||
const toRecipientsList = (
|
||||
value: unknown
|
||||
): Array<{ organizationPublicId: string; recipientType: string }> => {
|
||||
if (!Array.isArray(value)) {
|
||||
return [];
|
||||
}
|
||||
const result: Array<{ organizationPublicId: string; recipientType: string }> =
|
||||
[];
|
||||
for (const item of value) {
|
||||
if (item && typeof item === 'object') {
|
||||
const obj = item as Record<string, unknown>;
|
||||
const orgId = readString(obj.organizationPublicId);
|
||||
const type = readString(obj.recipientType);
|
||||
if (orgId && type) {
|
||||
// Normalize 'CC ' whitespace typo to 'CC'
|
||||
const normalizedType = type.trim() === 'CC' ? 'CC' : type.trim();
|
||||
result.push({
|
||||
organizationPublicId: orgId,
|
||||
recipientType: normalizedType,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
const parseMigrateDocumentMetadata = (
|
||||
cleanedResponse: string
|
||||
): MigrateDocumentMetadata => {
|
||||
@@ -81,11 +110,15 @@ const parseMigrateDocumentMetadata = (
|
||||
}
|
||||
const source = parsed as Record<string, unknown>;
|
||||
return {
|
||||
documentNumber: readString(source.documentNumber),
|
||||
projectPublicId: readString(source.projectPublicId),
|
||||
correspondenceTypeCode: readString(source.correspondenceTypeCode),
|
||||
disciplineCode: readString(source.disciplineCode),
|
||||
originatorOrganizationPublicId: readString(
|
||||
source.originatorOrganizationPublicId
|
||||
),
|
||||
recipients: toRecipientsList(source.recipients),
|
||||
subject: readString(source.subject),
|
||||
category: readString(source.category),
|
||||
discipline: readString(source.discipline),
|
||||
date: readString(source.date),
|
||||
documentDate: readString(source.documentDate),
|
||||
confidence:
|
||||
typeof source.confidence === 'number' ? source.confidence : undefined,
|
||||
tags: toStringList(source.tags),
|
||||
@@ -246,8 +279,10 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
|
||||
/** ประมวลผล sandbox OCR + Metadata extraction โดยไม่บันทึกลง database */
|
||||
private async processSandboxExtract(data: AiBatchJobData): Promise<void> {
|
||||
const { idempotencyKey, payload } = data;
|
||||
const { idempotencyKey, payload, projectPublicId } = data;
|
||||
const pdfPath = payload.pdfPath as string;
|
||||
const overrideProjPublicId =
|
||||
(payload.projectPublicId as string) || projectPublicId;
|
||||
if (!pdfPath) {
|
||||
throw new Error('pdfPath is required for sandbox-extract job');
|
||||
}
|
||||
@@ -261,11 +296,26 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
);
|
||||
try {
|
||||
const ocrResult = await this.ocrService.detectAndExtract({ pdfPath });
|
||||
const { resolvedPrompt, versionNumber } =
|
||||
await this.aiPromptsService.resolveActive(
|
||||
'ocr_extraction',
|
||||
ocrResult.text
|
||||
|
||||
const activePrompt =
|
||||
await this.aiPromptsService.getActive('ocr_extraction');
|
||||
if (!activePrompt) {
|
||||
throw new Error('No active ocr_extraction prompt version found');
|
||||
}
|
||||
|
||||
// ดึงบริบท Master data
|
||||
const masterDataContext = await this.aiPromptsService.resolveContext(
|
||||
activePrompt,
|
||||
overrideProjPublicId
|
||||
);
|
||||
|
||||
const resolvedPrompt = activePrompt.template
|
||||
.replace('{{ocr_text}}', ocrResult.text)
|
||||
.replace(
|
||||
'{{master_data_context}}',
|
||||
JSON.stringify(masterDataContext, null, 2)
|
||||
);
|
||||
|
||||
const response = await this.ollamaService.generate(resolvedPrompt, {
|
||||
timeoutMs: 120000,
|
||||
});
|
||||
@@ -286,7 +336,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
}
|
||||
await this.aiPromptsService.saveTestResult(
|
||||
'ocr_extraction',
|
||||
versionNumber,
|
||||
activePrompt.versionNumber,
|
||||
extractedMetadata
|
||||
);
|
||||
await this.redis.setex(
|
||||
@@ -296,7 +346,7 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
requestPublicId: idempotencyKey,
|
||||
status: 'completed',
|
||||
answer: JSON.stringify(extractedMetadata, null, 2),
|
||||
promptVersionUsed: versionNumber,
|
||||
promptVersionUsed: activePrompt.versionNumber,
|
||||
completedAt: new Date().toISOString(),
|
||||
})
|
||||
);
|
||||
@@ -323,6 +373,13 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
const startTime = Date.now();
|
||||
const { documentPublicId, projectPublicId, payload, batchId } = job.data;
|
||||
const docNumber = payload.documentNumber as string;
|
||||
const contextOverride =
|
||||
payload.contextOverride &&
|
||||
typeof payload.contextOverride === 'object' &&
|
||||
!Array.isArray(payload.contextOverride)
|
||||
? (payload.contextOverride as Record<string, unknown>)
|
||||
: {};
|
||||
const contractPublicId = readString(contextOverride.contractPublicId);
|
||||
const attachment = await this.attachmentRepo.findOne({
|
||||
where: { publicId: documentPublicId },
|
||||
});
|
||||
@@ -358,10 +415,27 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
const { resolvedPrompt } = await this.aiPromptsService.resolveActive(
|
||||
'ocr_extraction',
|
||||
ocrResult.text
|
||||
|
||||
const activePrompt =
|
||||
await this.aiPromptsService.getActive('ocr_extraction');
|
||||
if (!activePrompt) {
|
||||
throw new Error('No active prompt found for ocr_extraction');
|
||||
}
|
||||
|
||||
// ดึงบริบทอ้างอิงโครงการที่กรองแล้ว (Data Isolation)
|
||||
const masterDataContext = await this.aiPromptsService.resolveContext(
|
||||
activePrompt,
|
||||
projectPublicId,
|
||||
contractPublicId
|
||||
);
|
||||
|
||||
const resolvedPrompt = activePrompt.template
|
||||
.replace('{{ocr_text}}', ocrResult.text)
|
||||
.replace(
|
||||
'{{master_data_context}}',
|
||||
JSON.stringify(masterDataContext, null, 2)
|
||||
);
|
||||
|
||||
let aiResponse: string;
|
||||
try {
|
||||
aiResponse = await this.ollamaService.generate(resolvedPrompt, {
|
||||
@@ -411,50 +485,162 @@ export class AiBatchProcessor extends WorkerHost {
|
||||
});
|
||||
throw new Error(errMsg);
|
||||
}
|
||||
|
||||
// 3. ตรวจสอบและค้นหา Tags Suggestion ร่วมกับ Auto-Diff (EC-001)
|
||||
const aiIssues: Record<string, unknown>[] = [];
|
||||
let mappedTags: Record<string, string>[] = [];
|
||||
if (extractedMetadata.tags && extractedMetadata.tags.length > 0) {
|
||||
const tags = await this.tagsService.findOrCreateTags(
|
||||
const tagResults = await this.tagsService.findOrSuggestTags(
|
||||
project.id,
|
||||
extractedMetadata.tags,
|
||||
attachment.uploadedByUserId
|
||||
);
|
||||
mappedTags = tags.map((t) => ({
|
||||
publicId: t.publicId,
|
||||
tagName: t.tagName,
|
||||
mappedTags = tagResults.map(({ tag }) => ({
|
||||
publicId: tag.publicId,
|
||||
tagName: tag.tagName,
|
||||
}));
|
||||
// บันทึก Tag ใหม่ที่ไม่มีในระบบเป็น aiIssues เพื่อให้มนุษย์ตรวจสอบ
|
||||
for (const { tag, isNew } of tagResults) {
|
||||
if (isNew) {
|
||||
aiIssues.push({
|
||||
type: 'NEW_TAG_SUGGESTED',
|
||||
tagPublicId: tag.publicId,
|
||||
tagName: tag.tagName,
|
||||
message: `Tag '${tag.tagName}' ถูกสร้างใหม่โดย AI — ต้องการการตรวจสอบจากมนุษย์`,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
const confidence =
|
||||
typeof extractedMetadata.confidence === 'number'
|
||||
? extractedMetadata.confidence
|
||||
: 0.5;
|
||||
const isValid = confidence >= 0.6 && !!extractedMetadata.documentNumber;
|
||||
|
||||
// 4. Resolve UUIDs of Sender/Recipient Organizations to Database IDs (ADR-019)
|
||||
// EC-002: UUID ที่หาไม่พบใน Master Data จะถูก flag ใน aiIssues และ isValid = false
|
||||
let senderOrgId: number | undefined = undefined;
|
||||
if (extractedMetadata.originatorOrganizationPublicId) {
|
||||
const foundOrg = await this.attachmentRepo.manager
|
||||
.createQueryBuilder()
|
||||
.select('org.id', 'id')
|
||||
.from('organizations', 'org')
|
||||
.where('org.uuid = :uuid', {
|
||||
uuid: extractedMetadata.originatorOrganizationPublicId,
|
||||
})
|
||||
.getRawOne<{ id: number }>();
|
||||
if (foundOrg) {
|
||||
senderOrgId = Number(foundOrg.id);
|
||||
} else {
|
||||
// EC-002: UUID ของผู้ส่งไม่มีใน Master Data — flag เพื่อ human review
|
||||
aiIssues.push({
|
||||
type: 'UNRESOLVED_SENDER_UUID',
|
||||
uuid: extractedMetadata.originatorOrganizationPublicId,
|
||||
message: `UUID ผู้ส่ง '${extractedMetadata.originatorOrganizationPublicId}' ไม่พบใน Master Data — ต้องการการตรวจสอบจากมนุษย์`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let primaryReceiverOrgId: number | undefined = undefined;
|
||||
if (
|
||||
extractedMetadata.recipients &&
|
||||
extractedMetadata.recipients.length > 0
|
||||
) {
|
||||
// ดึงผู้รับที่เป็นประเภท TO รายแรกเป็นผู้รับหลัก (Primary Receiver)
|
||||
const primaryReceiverObj =
|
||||
extractedMetadata.recipients.find((r) => r.recipientType === 'TO') ||
|
||||
extractedMetadata.recipients[0];
|
||||
const foundOrg = await this.attachmentRepo.manager
|
||||
.createQueryBuilder()
|
||||
.select('org.id', 'id')
|
||||
.from('organizations', 'org')
|
||||
.where('org.uuid = :uuid', {
|
||||
uuid: primaryReceiverObj.organizationPublicId,
|
||||
})
|
||||
.getRawOne<{ id: number }>();
|
||||
if (foundOrg) {
|
||||
primaryReceiverOrgId = Number(foundOrg.id);
|
||||
} else {
|
||||
// EC-002: UUID ของผู้รับไม่มีใน Master Data — flag เพื่อ human review
|
||||
aiIssues.push({
|
||||
type: 'UNRESOLVED_RECIPIENT_UUID',
|
||||
uuid: primaryReceiverObj.organizationPublicId,
|
||||
message: `UUID ผู้รับ '${primaryReceiverObj.organizationPublicId}' ไม่พบใน Master Data — ต้องการการตรวจสอบจากมนุษย์`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 5. ดึงประเภทเอกสารโต้ตอบ (Category Type) และสาขางาน (Discipline)
|
||||
let matchedCategory = 'Correspondence';
|
||||
if (extractedMetadata.correspondenceTypeCode) {
|
||||
const foundType = await this.attachmentRepo.manager
|
||||
.createQueryBuilder()
|
||||
.select('t.type_name', 'name')
|
||||
.from('correspondence_types', 't')
|
||||
.where('t.type_code = :code', {
|
||||
code: extractedMetadata.correspondenceTypeCode,
|
||||
})
|
||||
.getRawOne<{ name: string }>();
|
||||
if (foundType) {
|
||||
matchedCategory = foundType.name;
|
||||
}
|
||||
}
|
||||
|
||||
let matchedDisciplineId: number | undefined = undefined;
|
||||
if (extractedMetadata.disciplineCode) {
|
||||
const foundDisp = await this.attachmentRepo.manager
|
||||
.createQueryBuilder()
|
||||
.select('d.id', 'id')
|
||||
.from('disciplines', 'd')
|
||||
.where('d.discipline_code = :code', {
|
||||
code: extractedMetadata.disciplineCode,
|
||||
})
|
||||
.getRawOne<{ id: number }>();
|
||||
if (foundDisp) {
|
||||
matchedDisciplineId = Number(foundDisp.id);
|
||||
}
|
||||
}
|
||||
|
||||
// 6. ส่งบันทึกเข้าสู่ Review Queue พร้อมคืนค่าผู้รับ Object Array ใน JSON metadata details
|
||||
// EC-002: หากมี UUID ที่ไม่สามารถ resolve ได้ ให้ isValid = false เพื่อส่งเข้า review เสมอ
|
||||
const hasUnresolvedUuids = aiIssues.some(
|
||||
(issue) =>
|
||||
issue.type === 'UNRESOLVED_SENDER_UUID' ||
|
||||
issue.type === 'UNRESOLVED_RECIPIENT_UUID'
|
||||
);
|
||||
const isValid = confidence >= 0.6 && !!docNumber && !hasUnresolvedUuids;
|
||||
const payloadTitle = readString(payload.title);
|
||||
|
||||
await this.migrationService.enqueueRecord({
|
||||
documentNumber: extractedMetadata.documentNumber || docNumber,
|
||||
documentNumber: docNumber,
|
||||
subject: extractedMetadata.subject || payloadTitle,
|
||||
originalSubject: payloadTitle,
|
||||
body: extractedMetadata.summary || '',
|
||||
category: extractedMetadata.category || 'Correspondence',
|
||||
category: matchedCategory,
|
||||
aiSummary: extractedMetadata.summary || '',
|
||||
projectId: project.id,
|
||||
senderOrgId: readNumberId(payload.senderOrgId),
|
||||
receiverOrgId: readNumberId(payload.receiverOrgId),
|
||||
issuedDate: extractedMetadata.date || undefined,
|
||||
receivedDate: extractedMetadata.date || undefined,
|
||||
senderOrgId: senderOrgId || readNumberId(payload.senderOrgId),
|
||||
receiverOrgId:
|
||||
primaryReceiverOrgId || readNumberId(payload.receiverOrgId),
|
||||
issuedDate: extractedMetadata.documentDate || undefined,
|
||||
receivedDate: extractedMetadata.documentDate || undefined,
|
||||
extractedTags: mappedTags,
|
||||
tempAttachmentId: attachment.id,
|
||||
isValid,
|
||||
confidence,
|
||||
aiJobId: String(job.id),
|
||||
aiIssues: aiIssues.length > 0 ? aiIssues : undefined,
|
||||
details: {
|
||||
discipline: extractedMetadata.discipline,
|
||||
disciplineCode: extractedMetadata.disciplineCode,
|
||||
disciplineId: matchedDisciplineId,
|
||||
recipientsList: extractedMetadata.recipients, // บันทึก Object Array สกัดใหม่
|
||||
},
|
||||
});
|
||||
|
||||
await this.saveAiAuditLog({
|
||||
documentPublicId,
|
||||
aiModel: this.ollamaService.getMainModelName(),
|
||||
status: AiAuditStatus.SUCCESS,
|
||||
aiSuggestionJson: extractedMetadata,
|
||||
aiSuggestionJson: extractedMetadata as unknown as Record<string, unknown>,
|
||||
confidenceScore: confidence,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user