Files
lcbp3/specs/03-Data-and-Storage/AI Prompt.js
T
admin 11984bfa29
CI Pipeline / build (push) Failing after 12m41s
Build and Deploy / deploy (push) Failing after 2m44s
260322:1648 Correct Coresspondence / Doing RFA / Correct CI
2026-03-22 16:48:12 +07:00

157 lines
6.9 KiB
JavaScript

const config = $('Set Configuration').first().json.config;
const fallbackState = $('Check Fallback State').first()?.json || { is_fallback_active: false, recent_error_count: 0 };
const isFallback = fallbackState.is_fallback_active || false;
const model = isFallback ? config.OLLAMA_MODEL_FALLBACK : config.OLLAMA_MODEL_PRIMARY;
// Read DB Context
const dbContext = $('Fetch DB Context')
.all()
.map((i) => i.json);
const dbProjects = dbContext
.filter((d) => d.type === 'projects')
.map((d) => ({ id: d.id, code: d.text1, name: d.text2 }));
const dbDisciplines = dbContext
.filter((d) => d.type === 'disciplines')
.map((d) => ({ id: d.id, th: d.text1, en: d.text2 }));
const dbOrgs = dbContext
.filter((d) => d.type === 'organizations')
.map((d) => ({ id: d.id, name: d.text1, code: d.text2 }));
const dbTags = dbContext.filter((d) => d.type === 'tags').map((d) => ({ id: d.id, name: d.text1 }));
const dbCorrTypes = dbContext
.filter((d) => d.type === 'correspondence_types')
.map((d) => ({ id: d.id, code: d.text1, name: d.text2 }));
let systemCategories = ['Correspondence', 'RFA', 'Drawing', 'Transmittal', 'Report', 'Other'];
try {
systemCategories = $('File Mount Check').first().json.system_categories || systemCategories;
} catch (e) {}
const pdfItems = $('Extract PDF Text').all();
// File Validator passes all original Excel JSON fields through (sender, receiver, project_code, etc.)
// Read PDF File overwrites the JSON with binary data, so we must go back one step
const metaItems = $('File Validator').all();
return pdfItems.map((pdfItem, i) => {
const item = metaItems[i] || pdfItem;
const docNum = String(item.json.document_number || '');
const title = String(item.json.title || '');
const legacyNum = String(item.json.legacy_number || '');
const issuedDate = String(item.json.issued_date || '');
const receivedDate = String(item.json.received_date || '');
const corrType = String(item.json.correspondence_type || '');
const senderCode = String(item.json.sender || '');
const receiverCode = String(item.json.receiver || '');
const projectCode = String(item.json.project_code || '');
// JavaScript pre-mapping
const findOrgId = (code) => {
if (!code) return null;
const match = dbOrgs.find((o) => o.code === code || o.name === code);
return match ? match.id : null;
};
const findProjectId = (code) => {
if (!code) return config.PROJECT_ID; // Fallback to config
const match = dbProjects.find((p) => p.code === code || p.name === code);
return match ? match.id : config.PROJECT_ID;
};
const senderId = findOrgId(senderCode);
const receiverId = findOrgId(receiverCode);
const projectId = findProjectId(projectCode);
// Excel corrType is likely already the ID based on requirements, but fallback matching to ID if needed
const corrMatch = dbCorrTypes.find((c) => String(c.id) === corrType || c.code === corrType || c.name === corrType);
const corrTypeId = corrMatch ? corrMatch.id : isNaN(parseInt(corrType)) ? null : parseInt(corrType);
const isRFA = docNum.includes('-RFA-') || title.toLowerCase().includes('rfa');
const systemPrompt = `You are an expert Document Controller for a construction project (LCBP3) in Thailand.
The documents are primarily in THAI and ENGLISH.
Your task is to classify documents and extract metadata from OCR text.
Respond ONLY with valid JSON.`;
// Use pdfItem for the OCR extracted data, NOT the metaItem
const pdfText = String(pdfItem.json.data || '')
.substring(0, 3500)
.replace(/[^a-zA-Z0-9ก-๙\s\.\/\-:\[\]\(\)]/g, ' ');
const userPrompt = `Analyze this document:
[EXCEL METADATA]
Document Number: ${docNum || 'Not provided'}
Title: ${title || 'Not provided'}
Issued Date: ${issuedDate || 'Not provided'}
Received Date: ${receivedDate || 'Not provided'}
[DATABASE REFERENCES]
Disciplines: ${JSON.stringify(dbDisciplines)}
Tags: ${JSON.stringify(dbTags)}
[OCR TEXT EXTRACTION]
${pdfText}
Rules:
1. Category: Must be one of ${JSON.stringify(systemCategories)}. If Document Number contains "-RFA-", category MUST be "RFA".
2. Respond with EXACTLY 8 fields in JSON format:
- "discipline_id": Find 'id' from Disciplines array analyzing text to match 'th' or 'en'. If no match, use ID=64 (from contract LCBP3-C2).
- "subject": Document subject. If OCR is close to EXCEL METADATA Title, use EXCEL METADATA.
- "issued_date": Verify from OCR text if it matches ${issuedDate}, format YYYY-MM-DD.
- "received_date": Verify from OCR text. If empty, default to issued_date.
- "status": Extract status (e.g., For Information, Approve, Reject, Resubmit). This will be exported as "remark".
- "summary": 4-5 lines of Thai summary from OCR. This will be exported as "body".
- "tags": REQUIRED. Identify 2-5 main topics/themes from the document (from Title, subject matter, and OCR text). For each topic, return an object with:
* "tag_name": short topic name in Thai (2-5 words), e.g. "คอนกรีตผสม", "ทดสอบวัสดุ"
* "description": one sentence in Thai describing this topic (use key point details). e.g. "การทดสอบค่า slump ของคอนกรีตผสมที่หน้างาน"
Return as: [{"tag_name": "...", "description": "..."}, ...]
- "key_points": Array of 3-5 string key points extracted from the document (in Thai).
3. IMPORTANT: You MUST REPLACE the 'null' values in the template below with the actual Integer IDs or text you found. DO NOT reply with literal 'null' if you found a match!
Respond ONLY with this EXACT JSON structure:
{
"discipline_id": 64,
"subject": "${title}",
"issued_date": "${issuedDate}",
"received_date": "${receivedDate || issuedDate}",
"status": null,
"summary": "สรุปเนื้อหา 4-5 บรรทัด...",
"tags": [{"tag_name": "ชื่อหัวข้อ", "description": "คำอธิบาย key point ของหัวข้อนี้"}],
"key_points": ["จุดสำคัญที่ 1", "จุดสำคัญที่ 2", "จุดสำคัญที่ 3"],
"category": "${isRFA ? 'RFA' : 'Correspondence'}",
"confidence": 0.95
}`;
return {
json: {
...item.json,
active_model: model,
is_fallback: isFallback,
system_categories: systemCategories,
pre_mapped: {
project_id: projectId,
sender_id: senderId,
receiver_id: receiverId,
correspondence_type_id: corrTypeId,
},
_debug_mapping: {
excel_project_code: projectCode,
excel_sender: senderCode,
excel_receiver: receiverCode,
excel_corr_type: corrType,
matched_project: dbProjects.find((p) => p.code === projectCode || p.name === projectCode) || null,
first_org_sample: dbOrgs[0] || null,
},
ollama_payload: {
model: model,
prompt: `${systemPrompt}\n\n${userPrompt}`,
stream: false,
format: 'json',
options: {
temperature: 0.1,
num_ctx: 8192,
},
},
},
};
});