143 lines
6.8 KiB
JavaScript
143 lines
6.8 KiB
JavaScript
const config = $('Set Configuration').first().json.config;
|
|
const fallbackState = $('Check Fallback State').first()?.json || { is_fallback_active: false, recent_error_count: 0 };
|
|
const isFallback = fallbackState.is_fallback_active || false;
|
|
const model = isFallback ? config.OLLAMA_MODEL_FALLBACK : config.OLLAMA_MODEL_PRIMARY;
|
|
|
|
// Read DB Context
|
|
const dbContext = $('Fetch DB Context').all().map(i => i.json);
|
|
const dbProjects = dbContext.filter(d => d.type === 'projects').map(d => ({id: d.id, code: d.text1, name: d.text2}));
|
|
const dbDisciplines = dbContext.filter(d => d.type === 'disciplines').map(d => ({id: d.id, th: d.text1, en: d.text2}));
|
|
const dbOrgs = dbContext.filter(d => d.type === 'organizations').map(d => ({id: d.id, name: d.text1, code: d.text2}));
|
|
const dbTags = dbContext.filter(d => d.type === 'tags').map(d => ({id: d.id, name: d.text1}));
|
|
const dbCorrTypes = dbContext.filter(d => d.type === 'correspondence_types').map(d => ({id: d.id, code: d.text1, name: d.text2}));
|
|
|
|
let systemCategories = ['Correspondence','RFA','Drawing','Transmittal','Report','Other'];
|
|
try { systemCategories = $('File Mount Check').first().json.system_categories || systemCategories; } catch (e) {}
|
|
|
|
const pdfItems = $('Extract PDF Text').all();
|
|
// File Validator passes all original Excel JSON fields through (sender, receiver, project_code, etc.)
|
|
// Read PDF File overwrites the JSON with binary data, so we must go back one step
|
|
const metaItems = $('File Validator').all();
|
|
|
|
return pdfItems.map((pdfItem, i) => {
|
|
const item = metaItems[i] || pdfItem;
|
|
|
|
const docNum = String(item.json.document_number || '');
|
|
const title = String(item.json.title || '');
|
|
const legacyNum = String(item.json.legacy_number || '');
|
|
const issuedDate = String(item.json.issued_date || '');
|
|
const receivedDate = String(item.json.received_date || '');
|
|
const corrType = String(item.json.correspondence_type || '');
|
|
const senderCode = String(item.json.sender || '');
|
|
const receiverCode = String(item.json.receiver || '');
|
|
const projectCode = String(item.json.project_code || '');
|
|
|
|
// JavaScript pre-mapping
|
|
const findOrgId = (code) => {
|
|
if (!code) return null;
|
|
const match = dbOrgs.find(o => o.code === code || o.name === code);
|
|
return match ? match.id : null;
|
|
};
|
|
|
|
const findProjectId = (code) => {
|
|
if (!code) return config.PROJECT_ID; // Fallback to config
|
|
const match = dbProjects.find(p => p.code === code || p.name === code);
|
|
return match ? match.id : config.PROJECT_ID;
|
|
};
|
|
|
|
const senderId = findOrgId(senderCode);
|
|
const receiverId = findOrgId(receiverCode);
|
|
const projectId = findProjectId(projectCode);
|
|
// Excel corrType is likely already the ID based on requirements, but fallback matching to ID if needed
|
|
const corrMatch = dbCorrTypes.find(c => String(c.id) === corrType || c.code === corrType || c.name === corrType);
|
|
const corrTypeId = corrMatch ? corrMatch.id : (isNaN(parseInt(corrType)) ? null : parseInt(corrType));
|
|
|
|
const isRFA = docNum.includes('-RFA-') || title.toLowerCase().includes('rfa');
|
|
|
|
const systemPrompt = `You are an expert Document Controller for a construction project (LCBP3) in Thailand.
|
|
The documents are primarily in THAI and ENGLISH.
|
|
Your task is to classify documents and extract metadata from OCR text.
|
|
Respond ONLY with valid JSON.`;
|
|
|
|
// Use pdfItem for the OCR extracted data, NOT the metaItem
|
|
const pdfText = String(pdfItem.json.data || '').substring(0, 3500).replace(/[^a-zA-Z0-9ก-๙\s\.\/\-:\[\]\(\)]/g, ' ');
|
|
|
|
const userPrompt = `Analyze this document:
|
|
[EXCEL METADATA]
|
|
Document Number: ${docNum || 'Not provided'}
|
|
Title: ${title || 'Not provided'}
|
|
Issued Date: ${issuedDate || 'Not provided'}
|
|
Received Date: ${receivedDate || 'Not provided'}
|
|
|
|
[DATABASE REFERENCES]
|
|
Disciplines: ${JSON.stringify(dbDisciplines)}
|
|
Tags: ${JSON.stringify(dbTags)}
|
|
|
|
[OCR TEXT EXTRACTION]
|
|
${pdfText}
|
|
|
|
Rules:
|
|
1. Category: Must be one of ${JSON.stringify(systemCategories)}. If Document Number contains "-RFA-", category MUST be "RFA".
|
|
2. Respond with EXACTLY 8 fields in JSON format:
|
|
- "discipline_id": Find 'id' from Disciplines array analyzing text to match 'th' or 'en'. If no match, use ID=64 (from contract LCBP3-C2).
|
|
- "subject": Document subject. If OCR is close to EXCEL METADATA Title, use EXCEL METADATA.
|
|
- "issued_date": Verify from OCR text if it matches ${issuedDate}, format YYYY-MM-DD.
|
|
- "received_date": Verify from OCR text. If empty, default to issued_date.
|
|
- "status": Extract status (e.g., For Information, Approve, Reject, Resubmit). This will be exported as "remark".
|
|
- "summary": 4-5 lines of Thai summary from OCR. This will be exported as "body".
|
|
- "tags": REQUIRED. Identify 2-5 main topics/themes from the document (from Title, subject matter, and OCR text). For each topic, return an object with:
|
|
* "tag_name": short topic name in Thai (2-5 words), e.g. "คอนกรีตผสม", "ทดสอบวัสดุ"
|
|
* "description": one sentence in Thai describing this topic (use key point details). e.g. "การทดสอบค่า slump ของคอนกรีตผสมที่หน้างาน"
|
|
Return as: [{"tag_name": "...", "description": "..."}, ...]
|
|
- "key_points": Array of 3-5 string key points extracted from the document (in Thai).
|
|
|
|
3. IMPORTANT: You MUST REPLACE the 'null' values in the template below with the actual Integer IDs or text you found. DO NOT reply with literal 'null' if you found a match!
|
|
|
|
Respond ONLY with this EXACT JSON structure:
|
|
{
|
|
"discipline_id": 64,
|
|
"subject": "${title}",
|
|
"issued_date": "${issuedDate}",
|
|
"received_date": "${receivedDate || issuedDate}",
|
|
"status": null,
|
|
"summary": "สรุปเนื้อหา 4-5 บรรทัด...",
|
|
"tags": [{"tag_name": "ชื่อหัวข้อ", "description": "คำอธิบาย key point ของหัวข้อนี้"}],
|
|
"key_points": ["จุดสำคัญที่ 1", "จุดสำคัญที่ 2", "จุดสำคัญที่ 3"],
|
|
"category": "${isRFA ? 'RFA' : 'Correspondence'}",
|
|
"confidence": 0.95
|
|
}`;
|
|
|
|
return {
|
|
json: {
|
|
...item.json,
|
|
active_model: model,
|
|
is_fallback: isFallback,
|
|
system_categories: systemCategories,
|
|
pre_mapped: {
|
|
project_id: projectId,
|
|
sender_id: senderId,
|
|
receiver_id: receiverId,
|
|
correspondence_type_id: corrTypeId
|
|
},
|
|
_debug_mapping: {
|
|
excel_project_code: projectCode,
|
|
excel_sender: senderCode,
|
|
excel_receiver: receiverCode,
|
|
excel_corr_type: corrType,
|
|
matched_project: dbProjects.find(p => p.code === projectCode || p.name === projectCode) || null,
|
|
first_org_sample: dbOrgs[0] || null
|
|
},
|
|
ollama_payload: {
|
|
model: model,
|
|
prompt: `${systemPrompt}\n\n${userPrompt}`,
|
|
stream: false,
|
|
format: 'json',
|
|
options: {
|
|
temperature: 0.1,
|
|
num_ctx: 8192
|
|
}
|
|
}
|
|
}
|
|
};
|
|
});
|