Files
lcbp3/specs/03-Data-and-Storage/AI Prompt.js
T
admin 15b447ceeb
Build and Deploy / deploy (push) Successful in 5m42s
260310:1705 20260310:1700 Refactor rfas
2026-03-10 17:05:30 +07:00

143 lines
6.8 KiB
JavaScript

const config = $('Set Configuration').first().json.config;
const fallbackState = $('Check Fallback State').first()?.json || { is_fallback_active: false, recent_error_count: 0 };
const isFallback = fallbackState.is_fallback_active || false;
const model = isFallback ? config.OLLAMA_MODEL_FALLBACK : config.OLLAMA_MODEL_PRIMARY;
// Read DB Context
const dbContext = $('Fetch DB Context').all().map(i => i.json);
const dbProjects = dbContext.filter(d => d.type === 'projects').map(d => ({id: d.id, code: d.text1, name: d.text2}));
const dbDisciplines = dbContext.filter(d => d.type === 'disciplines').map(d => ({id: d.id, th: d.text1, en: d.text2}));
const dbOrgs = dbContext.filter(d => d.type === 'organizations').map(d => ({id: d.id, name: d.text1, code: d.text2}));
const dbTags = dbContext.filter(d => d.type === 'tags').map(d => ({id: d.id, name: d.text1}));
const dbCorrTypes = dbContext.filter(d => d.type === 'correspondence_types').map(d => ({id: d.id, code: d.text1, name: d.text2}));
let systemCategories = ['Correspondence','RFA','Drawing','Transmittal','Report','Other'];
try { systemCategories = $('File Mount Check').first().json.system_categories || systemCategories; } catch (e) {}
const pdfItems = $('Extract PDF Text').all();
// File Validator passes all original Excel JSON fields through (sender, receiver, project_code, etc.)
// Read PDF File overwrites the JSON with binary data, so we must go back one step
const metaItems = $('File Validator').all();
return pdfItems.map((pdfItem, i) => {
const item = metaItems[i] || pdfItem;
const docNum = String(item.json.document_number || '');
const title = String(item.json.title || '');
const legacyNum = String(item.json.legacy_number || '');
const issuedDate = String(item.json.issued_date || '');
const receivedDate = String(item.json.received_date || '');
const corrType = String(item.json.correspondence_type || '');
const senderCode = String(item.json.sender || '');
const receiverCode = String(item.json.receiver || '');
const projectCode = String(item.json.project_code || '');
// JavaScript pre-mapping
const findOrgId = (code) => {
if (!code) return null;
const match = dbOrgs.find(o => o.code === code || o.name === code);
return match ? match.id : null;
};
const findProjectId = (code) => {
if (!code) return config.PROJECT_ID; // Fallback to config
const match = dbProjects.find(p => p.code === code || p.name === code);
return match ? match.id : config.PROJECT_ID;
};
const senderId = findOrgId(senderCode);
const receiverId = findOrgId(receiverCode);
const projectId = findProjectId(projectCode);
// Excel corrType is likely already the ID based on requirements, but fallback matching to ID if needed
const corrMatch = dbCorrTypes.find(c => String(c.id) === corrType || c.code === corrType || c.name === corrType);
const corrTypeId = corrMatch ? corrMatch.id : (isNaN(parseInt(corrType)) ? null : parseInt(corrType));
const isRFA = docNum.includes('-RFA-') || title.toLowerCase().includes('rfa');
const systemPrompt = `You are an expert Document Controller for a construction project (LCBP3) in Thailand.
The documents are primarily in THAI and ENGLISH.
Your task is to classify documents and extract metadata from OCR text.
Respond ONLY with valid JSON.`;
// Use pdfItem for the OCR extracted data, NOT the metaItem
const pdfText = String(pdfItem.json.data || '').substring(0, 3500).replace(/[^a-zA-Z0-9ก-๙\s\.\/\-:\[\]\(\)]/g, ' ');
const userPrompt = `Analyze this document:
[EXCEL METADATA]
Document Number: ${docNum || 'Not provided'}
Title: ${title || 'Not provided'}
Issued Date: ${issuedDate || 'Not provided'}
Received Date: ${receivedDate || 'Not provided'}
[DATABASE REFERENCES]
Disciplines: ${JSON.stringify(dbDisciplines)}
Tags: ${JSON.stringify(dbTags)}
[OCR TEXT EXTRACTION]
${pdfText}
Rules:
1. Category: Must be one of ${JSON.stringify(systemCategories)}. If Document Number contains "-RFA-", category MUST be "RFA".
2. Respond with EXACTLY 8 fields in JSON format:
- "discipline_id": Find 'id' from Disciplines array analyzing text to match 'th' or 'en'. If no match, use ID=64 (from contract LCBP3-C2).
- "subject": Document subject. If OCR is close to EXCEL METADATA Title, use EXCEL METADATA.
- "issued_date": Verify from OCR text if it matches ${issuedDate}, format YYYY-MM-DD.
- "received_date": Verify from OCR text. If empty, default to issued_date.
- "status": Extract status (e.g., For Information, Approve, Reject, Resubmit). This will be exported as "remark".
- "summary": 4-5 lines of Thai summary from OCR. This will be exported as "body".
- "tags": REQUIRED. Identify 2-5 main topics/themes from the document (from Title, subject matter, and OCR text). For each topic, return an object with:
* "tag_name": short topic name in Thai (2-5 words), e.g. "คอนกรีตผสม", "ทดสอบวัสดุ"
* "description": one sentence in Thai describing this topic (use key point details). e.g. "การทดสอบค่า slump ของคอนกรีตผสมที่หน้างาน"
Return as: [{"tag_name": "...", "description": "..."}, ...]
- "key_points": Array of 3-5 string key points extracted from the document (in Thai).
3. IMPORTANT: You MUST REPLACE the 'null' values in the template below with the actual Integer IDs or text you found. DO NOT reply with literal 'null' if you found a match!
Respond ONLY with this EXACT JSON structure:
{
"discipline_id": 64,
"subject": "${title}",
"issued_date": "${issuedDate}",
"received_date": "${receivedDate || issuedDate}",
"status": null,
"summary": "สรุปเนื้อหา 4-5 บรรทัด...",
"tags": [{"tag_name": "ชื่อหัวข้อ", "description": "คำอธิบาย key point ของหัวข้อนี้"}],
"key_points": ["จุดสำคัญที่ 1", "จุดสำคัญที่ 2", "จุดสำคัญที่ 3"],
"category": "${isRFA ? 'RFA' : 'Correspondence'}",
"confidence": 0.95
}`;
return {
json: {
...item.json,
active_model: model,
is_fallback: isFallback,
system_categories: systemCategories,
pre_mapped: {
project_id: projectId,
sender_id: senderId,
receiver_id: receiverId,
correspondence_type_id: corrTypeId
},
_debug_mapping: {
excel_project_code: projectCode,
excel_sender: senderCode,
excel_receiver: receiverCode,
excel_corr_type: corrType,
matched_project: dbProjects.find(p => p.code === projectCode || p.name === projectCode) || null,
first_org_sample: dbOrgs[0] || null
},
ollama_payload: {
model: model,
prompt: `${systemPrompt}\n\n${userPrompt}`,
stream: false,
format: 'json',
options: {
temperature: 0.1,
num_ctx: 8192
}
}
}
};
});