260306:1600 20260306:1500 refactor tags #2
Some checks failed
Build and Deploy / deploy (push) Failing after 4m9s

This commit is contained in:
admin
2026-03-06 16:00:43 +07:00
parent 752df1fe59
commit ab2f1ea344
8 changed files with 194 additions and 21 deletions

View File

@@ -1 +1,3 @@
{
"editor.fontSize": 18
}

3
backend/.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"editor.fontSize": 18
}

View File

@@ -1,3 +1,4 @@
{
"jest.rootPath": "/"
"jest.rootPath": "/",
"editor.fontSize": 18
}

View File

@@ -22,7 +22,7 @@
// EDITOR SETTINGS
// ========================================
"editor.fontSize": 16,
"editor.fontSize": 18,
"editor.tabSize": 2,
"editor.lineHeight": 1.6,
"editor.rulers": [80, 120],

3
specs/.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"editor.fontSize": 18
}

View File

@@ -4,7 +4,7 @@
| ------------------------------------------------------------------ | ------- |
| legacy PDF document migration to system v1.8.0 uses n8n and Ollama | 1.8.0 |
> **Note:** Category Enum system-driven, Idempotency Contract, Duplicate Handling Clarification, Storage Enforcement, Audit Log Enhancement, Review Queue Integration, Revision Drift Protection, Execution Time, Encoding Normalization, Security Hardening, Orchestrator on QNAP, AI Physical Isolation (Desktop Desk-5439), Folder Standard (/share/np-dms/n8n)
> **Note:** Category Enum system-driven, Idempotency Contract, Duplicate Handling Clarification, Storage Enforcement, Audit Log Enhancement, Review Queue Integration, Revision Drift Protection, Execution Time, Encoding Normalization, Security Hardening, Orchestrator on QNAP, AI Physical Isolation (Desktop Desk-5439), Folder Standard (/share/np-dms/n8n), **AI Tag Extraction & Auto-Tagging**
---
@@ -12,6 +12,7 @@
- นำเข้าเอกสาร PDF 20,000 ฉบับ พร้อม Metadata จาก Excel (Legacy system export) เข้าสู่ระบบ LCBP3-DMS
- ใช้ AI (Ollama Local Model) เพื่อตรวจสอบความถูกต้องของลักษณะข้อมูล (Data format, Title consistency) ก่อนการนำเข้า
- **AI Tag Extraction:** ใช้ Ollama วิเคราะห์เอกสารและสกัด Tags ที่เกี่ยวข้อง (เช่น สาขางาน, ประเภทเอกสาร, องค์กร) อัตโนมัติ
- รักษาโครงสร้างความสัมพันธ์ (Project / Contract / Ref No.) และระบบการทำ Revision ตาม Business Rules
- **Checkpoint Support:** รองรับการหยุดและเริ่มงานต่อ (Resume) จากจุดที่ค้างอยู่ได้กรณีเกิดเหตุขัดข้อง
@@ -84,6 +85,36 @@ CREATE TABLE IF NOT EXISTS migration_progress (
);
```
**Tags Table (สำหรับ AI Tag Extraction):**
```sql
-- ตาราง Master เก็บ Tags (Global หรือ Project-specific)
CREATE TABLE tags (
id INT PRIMARY KEY AUTO_INCREMENT,
project_id INT NULL COMMENT 'NULL = Global Tag',
tag_name VARCHAR(100) NOT NULL,
color_code VARCHAR(30) DEFAULT 'default',
description TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
created_by INT,
deleted_at DATETIME NULL,
UNIQUE KEY ux_tag_project (project_id, tag_name),
INDEX idx_tags_deleted_at (deleted_at),
FOREIGN KEY (project_id) REFERENCES projects (id) ON DELETE CASCADE,
FOREIGN KEY (created_by) REFERENCES users (user_id) ON DELETE SET NULL
);
-- ตารางเชื่อมระหว่าง correspondences และ tags (M:N)
CREATE TABLE correspondence_tags (
correspondence_id INT,
tag_id INT,
PRIMARY KEY (correspondence_id, tag_id),
FOREIGN KEY (correspondence_id) REFERENCES correspondences (id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tags (id) ON DELETE CASCADE,
INDEX idx_tag_lookup (tag_id)
);
```
**Idempotency Table :**
```sql
CREATE TABLE IF NOT EXISTS import_transactions (
@@ -185,7 +216,7 @@ return items.map(item => ({
**System Prompt:**
```text
You are a Document Controller for a large construction project.
Your task is to validate document metadata.
Your task is to validate document metadata and suggest relevant tags.
You MUST respond ONLY with valid JSON. No explanation, no markdown, no extra text.
If there are no issues, "detected_issues" must be an empty array [].
```
@@ -199,22 +230,42 @@ Title: {{$json.title}}
Expected Pattern: [ORG]-[TYPE]-[SEQ] e.g. "TCC-COR-0001"
Category List (MUST match system enum exactly): {{$workflow.variables.system_categories}}
Analyze the document and suggest relevant tags based on:
1. Document content/title keywords (e.g., "Foundation", "Structure", "Electrical", "Safety")
2. Document type indicators (e.g., "Drawing", "Report", "Inspection")
3. Organization codes present in document number
4. Any discipline or phase indicators
Respond ONLY with this exact JSON structure:
{
"is_valid": true | false,
"confidence": 0.0 to 1.0,
"suggested_category": "<one from Category List>",
"detected_issues": ["<issue1>"],
"suggested_title": "<corrected title or null>"
"suggested_title": "<corrected title or null>",
"suggested_tags": ["<tag1>", "<tag2>"],
"tag_confidence": 0.0 to 1.0
}
```
**JSON Validation (ตรวจ Category ตรง Enum):**
**JSON Validation (ตรวจ Category ตรง Enum + Tag Normalization):**
```javascript
const systemCategories = $workflow.variables.system_categories;
if (!systemCategories.includes(result.suggested_category)) {
throw new Error(`Category "${result.suggested_category}" not in system enum: ${systemCategories.join(', ')}`);
}
// Tag Validation
if (!Array.isArray(result.suggested_tags)) {
result.suggested_tags = [];
}
// Normalize: trim, lowercase, remove duplicates
result.suggested_tags = [...new Set(result.suggested_tags.map(t => String(t).trim()).filter(t => t.length > 0))];
// Tag confidence validation
if (typeof result.tag_confidence !== 'number' || result.tag_confidence < 0 || result.tag_confidence > 1) {
result.tag_confidence = 0.5;
}
```
#### Node 3.5: Fallback Model Manager
@@ -257,6 +308,24 @@ Idempotency-Key: <document_number>:<batch_id>
Content-Type: application/json
```
**Backend Tag Handling Logic:**
เมื่อ Backend รับ Payload พร้อม `ai_tags` ระบบจะ:
1. **Validate Tags:** ตรวจสอบว่า tag name อยู่ในรูปแบบที่ถูกต้อง (ไม่ว่าง, ไม่มีอักขระพิเศษ)
2. **Create Missing Tags:** ถ้า Tag ไม่มีอยู่ใน `tags` table → สร้างใหม่โดยอัตโนมัติ
```sql
INSERT INTO tags (tag_name, created_by, created_at)
VALUES ('<tag_name>', (SELECT user_id FROM users WHERE username = 'migration_bot'), NOW())
ON DUPLICATE KEY UPDATE id=LAST_INSERT_ID(id);
```
3. **Link Document Tags:** บันทึกความสัมพันธ์ใน `correspondence_tags`
```sql
INSERT INTO correspondence_tags (correspondence_id, tag_id)
SELECT LAST_INSERT_ID(), tag_id FROM tags WHERE tag_name IN (<ai_tags>);
```
4. **Tag Confidence Logging:** บันทึก `tag_confidence` ลงใน `details` JSON ของ Revision
Payload:
```json
{
@@ -266,6 +335,8 @@ Payload:
"source_file_path": "{{file_path}}",
"ai_confidence": "{{ai_result.confidence}}",
"ai_issues": "{{ai_result.detected_issues}}",
"ai_tags": "{{ai_result.suggested_tags}}",
"tag_confidence": "{{ai_result.tag_confidence}}",
"migrated_by": "SYSTEM_IMPORT",
"batch_id": "{{$env.MIGRATION_BATCH_ID}}"
}
@@ -400,7 +471,8 @@ WHERE batch_id = 'migration_20260226';
| 7 | GPU VRAM Overflow | ใช้เฉพาะ Quantized Model (q4_K_M) |
| 8 | ดิสก์ NAS เต็ม | ปิด "Save Successful Executions" ใน n8n |
| 9 | Migration Token ถูกขโมย | Token 7 วัน, IP Whitelist `<NAS_IP>` เท่านั้น |
| 10 | ไฟดับ/ล่มกลางคัน | Checkpoint Table → Resume จากจุดที่ค้าง |
| 11 | AI Tag Extraction ผิดพลาด | Tag confidence < 0.6 → ส่งไป Review Queue / บันทึกใน metadata |
| 12 | Tag ซ้ำ/คล้ายกัน | Normalization ก่อนบันทึก (lowercase, trim, deduplicate) |
---
@@ -422,6 +494,25 @@ WHERE created_by = 'SYSTEM_IMPORT' AND action = 'IMPORT';
-- 4. ตรวจ Idempotency ไม่มีซ้ำ
SELECT idempotency_key, COUNT(*) FROM import_transactions
GROUP BY idempotency_key HAVING COUNT(*) > 1;
-- 5. ตรวจ Tags ที่สร้างจาก Migration
SELECT COUNT(*) as total_tags FROM tags WHERE created_by = (SELECT user_id FROM users WHERE username = 'migration_bot');
-- 6. ตรวจเอกสารที่มี Tag ผูกอยู่
SELECT COUNT(DISTINCT correspondence_id) as docs_with_tags
FROM correspondence_tags ct
JOIN correspondences c ON ct.correspondence_id = c.id
WHERE c.created_by = (SELECT user_id FROM users WHERE username = 'migration_bot');
-- 7. ตรวจ Tag Distribution
SELECT t.tag_name, COUNT(ct.correspondence_id) as doc_count
FROM tags t
JOIN correspondence_tags ct ON t.id = ct.tag_id
JOIN correspondences c ON ct.correspondence_id = c.id
WHERE c.created_by = (SELECT user_id FROM users WHERE username = 'migration_bot')
GROUP BY t.id, t.tag_name
ORDER BY doc_count DESC
LIMIT 20;
```
---

View File

@@ -230,8 +230,9 @@ mysql -h <DB_HOST> -u migration_bot -p lcbp3_production < lcbp3-v1.8.0-migration
### Node 1-2: Pre-flight Checks
- ตรวจสอบ Backend Health
- ดึง Categories จาก `/api/master/correspondence-types`
- ดึง Tags ที่มีอยู่แล้วจาก `/api/tags` (สำหรับ AI Tag Extraction)
- ตรวจ File Mount (Read-only)
- เก็บ Categories ใน `$workflow.staticData.systemCategories`
- เก็บ Categories และ Existing Tags ใน `$workflow.staticData.systemCategories`
### Node 3: Read Checkpoint
- อ่าน `last_processed_index` จาก `migration_progress`
@@ -251,16 +252,19 @@ mysql -h <DB_HOST> -u migration_bot -p lcbp3_production < lcbp3-v1.8.0-migration
### Node 6: Build AI Prompt
- ดึง Categories จาก `staticData` (ไม่ hardcode)
- เลือก Model ตาม Fallback State
- สร้าง Prompt ตาม Template
- สร้าง Prompt ตาม Template พร้อม **Tag Extraction Instructions**
- AI จะวิเคราะห์ Title และ Document Number เพื่อสกัด Tags ที่เกี่ยวข้อง
### Node 7: Ollama AI Analysis
- เรียก `POST /api/generate`
- Timeout 30 วินาที
- Retry 3 ครั้ง (n8n built-in)
- AI Response รวม `suggested_tags` และ `tag_confidence`
### Node 8: Parse & Validate
- Parse JSON Response
- Schema Validation (is_valid, confidence, detected_issues)
- **Tag Validation**: Normalize tags (trim, lowercase, deduplicate)
- Enum Validation (ตรวจ Category ว่าอยู่ใน List หรือไม่)
- **Output 2 ทาง**: Success → Router, Error → Fallback
@@ -274,6 +278,8 @@ mysql -h <DB_HOST> -u migration_bot -p lcbp3_production < lcbp3-v1.8.0-migration
### Node 10A: Auto Ingest
- POST `/api/migration/import`
- Header: `Idempotency-Key: {doc_num}:{batch_id}`
- Payload รวม **ai_tags** และ **tag_confidence**
- Backend จะสร้าง Tags ที่ยังไม่มี และผูกกับเอกสารอัตโนมัติ
- บันทึก Checkpoint ทุก 10 records
### Node 10B: Review Queue
@@ -397,9 +403,11 @@ mysql -h <DB_IP> -u root -p \
| 5 | File Mount RO ถูกต้อง | `docker exec n8n ls /home/node/.n8n-files/staging_ai` |
| 6 | Log Mount RW ถูกต้อง | `docker exec n8n touch /home/node/.n8n-files/migration_logs/test` |
| 7 | Categories ไม่ hardcode | ดูผลลัพธ์ Node Fetch Categories |
| 8 | Idempotency Key ถูกต้อง | ตรวจ Header ใน Node Import |
| 9 | Checkpoint บันทึก | ตรวจสอบ `migration_progress` หลังรัน |
| 10 | Error Log สร้างไฟล์ | ตรวจสอบ `error_log.csv` |
| 8 | Tags โหลดถูกต้อง | ดูผลลัพธ์ Node Fetch Tags (ควรแสดงรายการ Tags ที่มีอยู่) |
| 9 | AI Tag Extraction ทำงาน | ตรวจ `suggested_tags` ใน Response จาก Parse & Validate Node |
| 10 | Idempotency Key ถูกต้อง | ตรวจ Header ใน Node Import |
| 11 | Checkpoint บันทึก | ตรวจสอบ `migration_progress` หลังรัน |
| 12 | Error Log สร้างไฟล์ | ตรวจสอบ `error_log.csv` |
---
@@ -421,11 +429,33 @@ return [{ json: {
}}];
```
### ปัญหา: Ollama Timeout
### ปัญหา: AI Tag Extraction ไม่ทำงาน
**ตรวจสอบ:**
1. ดู Response ใน Node "Parse & Validate" ว่ามี field `suggested_tags` หรือไม่
2. ถ้าไม่มี → ตรวจสอบ Prompt ใน "Build AI Prompt" ว่ารวม Tag Extraction Instructions แล้ว
3. ถ้า AI ตอบแต่ Tags ไม่ถูกต้อง → ปรับ Threshold หรือส่งไป Review Queue
```javascript
// Debug Code Node ชั่วคราว
return [{
json: {
has_suggested_tags: !!$json.ai_result?.suggested_tags,
tag_count: $json.ai_result?.suggested_tags?.length || 0,
suggested_tags: $json.ai_result?.suggested_tags,
tag_confidence: $json.ai_result?.tag_confidence
}
}];
```
### ปัญหา: Tags ซ้ำหรือผิดพลาด
**แก้ไข:**
- เพิ่ม `DELAY_MS` เป็น 3000 หรือ 5000
- ลด `BATCH_SIZE` เหลือ 5
- ตรวจสอบ GPU/CPU ของ Ollama Server
- ใช้ SQL ตรวจสอบ Tags ที่ซ้ำ:
```sql
SELECT tag_name, COUNT(*) as cnt FROM tags
WHERE created_by = (SELECT user_id FROM users WHERE username = 'migration_bot')
GROUP BY tag_name HAVING cnt > 1;
```
- ถ้าพบซ้ำ → ใช้ Node Normalize ก่อนบันทึก (มีแล้วใน Parse & Validate)
---
@@ -442,6 +472,12 @@ docker exec n8n sh -c "tail -10 /home/node/.n8n-files/migration_logs/error_log.c
# ดู Checkpoint ใน DB
mysql -h <DB_HOST> -u migration_bot -p -e "SELECT * FROM migration_progress WHERE batch_id = 'migration_20260226'"
# ดู Tags ที่สร้างจาก Migration
mysql -h <DB_HOST> -u migration_bot -p -e "SELECT tag_name, created_at FROM tags WHERE created_by = (SELECT user_id FROM users WHERE username = 'migration_bot') ORDER BY created_at DESC LIMIT 20"
# ดูสถิติการผูก Tag กับเอกสาร
mysql -h <DB_HOST> -u migration_bot -p -e "SELECT COUNT(DISTINCT ct.correspondence_id) as docs_with_tags, COUNT(DISTINCT ct.tag_id) as unique_tags_used FROM correspondence_tags ct JOIN correspondences c ON ct.correspondence_id = c.id WHERE c.created_by = (SELECT user_id FROM users WHERE username = 'migration_bot')"
```
---

View File

@@ -53,6 +53,32 @@
],
"notes": "ดึง Categories จาก Backend"
},
{
"parameters": {
"url": "={{$('Set Configuration').first().json.config.BACKEND_URL}}/api/tags",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Authorization",
"value": "={{$('Set Configuration').first().json.config.MIGRATION_TOKEN}}"
}
]
},
"options": {
"timeout": 10000
}
},
"id": "f1a2b3c4-d5e6-7f8g-9h0i-j1k2l3m4n5o6",
"name": "Fetch Tags",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.1,
"position": [
5040,
3856
],
"notes": "ดึง Tags ที่มีอยู่แล้วจาก Backend"
},
{
"parameters": {
"url": "={{$('Set Configuration').first().json.config.BACKEND_URL}}/health",
@@ -73,7 +99,7 @@
},
{
"parameters": {
"jsCode": "const fs = require('fs');\nconst config = $('Set Configuration').first().json.config;\n\n// Check file mount and inputs\ntry {\n if (!fs.existsSync(config.EXCEL_FILE)) {\n throw new Error(`Excel file not found at: ${config.EXCEL_FILE}`);\n }\n if (!fs.existsSync(config.SOURCE_PDF_DIR)) {\n throw new Error(`PDF Source directory not found at: ${config.SOURCE_PDF_DIR}`);\n }\n \n const files = fs.readdirSync(config.SOURCE_PDF_DIR);\n \n // Check write permission to log path\n fs.writeFileSync(`${config.LOG_PATH}/.preflight_ok`, new Date().toISOString());\n \n // Grab categories out of the previous node (Fetch Categories) if available\n // otherwise use fallback array\n let categories = ['Correspondence','RFA','Drawing','Transmittal','Report','Other'];\n try {\n const upstreamData = $('Fetch Categories').first()?.json?.data;\n if (upstreamData && Array.isArray(upstreamData)) {\n categories = upstreamData.map(c => c.name || c.type || c); // very loose mapping depending on API response\n }\n } catch(e) {}\n \n return [{ json: { \n preflight_ok: true, \n pdf_count_in_source: files.length,\n excel_target: config.EXCEL_FILE,\n system_categories: categories,\n timestamp: new Date().toISOString()\n }}];\n} catch (err) {\n throw new Error(`Pre-flight check failed: ${err.message}`);\n}"
"jsCode": "const fs = require('fs');\nconst config = $('Set Configuration').first().json.config;\n\n// Check file mount and inputs\ntry {\n if (!fs.existsSync(config.EXCEL_FILE)) {\n throw new Error(`Excel file not found at: ${config.EXCEL_FILE}`);\n }\n if (!fs.existsSync(config.SOURCE_PDF_DIR)) {\n throw new Error(`PDF Source directory not found at: ${config.SOURCE_PDF_DIR}`);\n }\n \n const files = fs.readdirSync(config.SOURCE_PDF_DIR);\n \n // Check write permission to log path\n fs.writeFileSync(`${config.LOG_PATH}/.preflight_ok`, new Date().toISOString());\n \n // Grab categories out of the previous node (Fetch Categories) if available\n // otherwise use fallback array\n let categories = ['Correspondence','RFA','Drawing','Transmittal','Report','Other'];\n try {\n const upstreamData = $('Fetch Categories').first()?.json?.data;\n if (upstreamData && Array.isArray(upstreamData)) {\n categories = upstreamData.map(c => c.name || c.type || c); // very loose mapping depending on API response\n }\n } catch(e) {}\n \n // Grab existing tags from Fetch Tags node\n let existingTags = [];\n try {\n const tagData = $('Fetch Tags').first()?.json?.data || [];\n existingTags = Array.isArray(tagData) ? tagData.map(t => t.tag_name || t.name || '').filter(Boolean) : [];\n } catch(e) {}\n \n return [{ json: { \n preflight_ok: true, \n pdf_count_in_source: files.length,\n excel_target: config.EXCEL_FILE,\n system_categories: categories,\n existing_tags: existingTags,\n timestamp: new Date().toISOString()\n }}];\n} catch (err) {\n throw new Error(`Pre-flight check failed: ${err.message}`);\n}\n}"
},
"id": "5bdb31ca-9588-404d-92ce-3438bdd9835b",
"name": "File Mount Check",
@@ -193,7 +219,7 @@
},
{
"parameters": {
"jsCode": "const config = $('Set Configuration').first().json.config;\nconst fallbackState = $input.first().json[0] || { is_fallback_active: false, recent_error_count: 0 };\n\nconst isFallback = fallbackState.is_fallback_active || false;\nconst model = isFallback ? config.OLLAMA_MODEL_FALLBACK : config.OLLAMA_MODEL_PRIMARY;\n\n// Safely pull categories from the first Check node\nlet systemCategories = ['Correspondence','RFA','Drawing','Transmittal','Report','Other'];\ntry { systemCategories = $('File Mount Check').first().json.system_categories || systemCategories; } catch (e) {}\n\nconst items = $('File Validator').all();\n\nreturn items.map(item => {\n const systemPrompt = `You are a Document Controller for a large construction project.\nYour task is to validate document metadata.\nYou MUST respond ONLY with valid JSON. No explanation, no markdown, no extra text.\nIf there are no issues, \"detected_issues\" must be an empty array [].`;\n\n const userPrompt = `Validate this document metadata and respond in JSON:\n\nDocument Number: ${item.json.document_number}\nTitle: ${item.json.title}\nExpected Pattern: [ORG]-[TYPE]-[SEQ] e.g. \"TCC-COR-0001\"\nCategory List (MUST match system enum exactly): ${JSON.stringify(systemCategories)}\n\nRespond ONLY with this exact JSON structure:\n{\n \"is_valid\": true | false,\n \"confidence\": 0.0 to 1.0,\n \"suggested_category\": \"<one from Category List>\",\n \"detected_issues\": [\"<issue1>\"],\n \"suggested_title\": \"<corrected title or null>\"\n}`;\n\n return {\n json: {\n ...item.json,\n active_model: model,\n is_fallback: isFallback,\n system_categories: systemCategories,\n ollama_payload: {\n model: model,\n prompt: `${systemPrompt}\\n\\n${userPrompt}`,\n stream: false,\n format: 'json'\n }\n }\n };\n});"
"jsCode": "const config = $('Set Configuration').first().json.config;\nconst fallbackState = $input.first().json[0] || { is_fallback_active: false, recent_error_count: 0 };\n\nconst isFallback = fallbackState.is_fallback_active || false;\nconst model = isFallback ? config.OLLAMA_MODEL_FALLBACK : config.OLLAMA_MODEL_PRIMARY;\n\n// Safely pull categories from the first Check node\nlet systemCategories = ['Correspondence','RFA','Drawing','Transmittal','Report','Other'];\ntry { systemCategories = $('File Mount Check').first().json.system_categories || systemCategories; } catch (e) {}\n\nconst items = $('File Validator').all();\n\nreturn items.map(item => {\n const systemPrompt = `You are a Document Controller for a large construction project.\nYour task is to validate document metadata and suggest relevant tags.\nYou MUST respond ONLY with valid JSON. No explanation, no markdown, no extra text.\nIf there are no issues, \"detected_issues\" must be an empty array [].`;\n\n const userPrompt = `Validate this document metadata and respond in JSON:\n\nDocument Number: ${item.json.document_number}\nTitle: ${item.json.title}\nExpected Pattern: [ORG]-[TYPE]-[SEQ] e.g. \"TCC-COR-0001\"\nCategory List (MUST match system enum exactly): ${JSON.stringify(systemCategories)}\n\nAnalyze the document and suggest relevant tags based on:\n1. Document content/title keywords (e.g., \"Foundation\", \"Structure\", \"Electrical\", \"Safety\")\n2. Document type indicators (e.g., \"Drawing\", \"Report\", \"Inspection\")\n3. Organization codes present in document number\n4. Any discipline or phase indicators\n\nRespond ONLY with this exact JSON structure:\n{\n \"is_valid\": true | false,\n \"confidence\": 0.0 to 1.0,\n \"suggested_category\": \"<one from Category List>\",\n \"detected_issues\": [\"<issue1>\"],\n \"suggested_title\": \"<corrected title or null>\",\n \"suggested_tags\": [\"<tag1>\", \"<tag2>\"],\n \"tag_confidence\": 0.0 to 1.0\n}`;\n\n return {\n json: {\n ...item.json,\n active_model: model,\n is_fallback: isFallback,\n system_categories: systemCategories,\n ollama_payload: {\n model: model,\n prompt: `${systemPrompt}\\n\\n${userPrompt}`,\n stream: false,\n format: 'json'\n }\n }\n };\n});"
},
"id": "9f82950f-7533-4cbd-8e1e-8e441c1cb2a5",
"name": "Build AI Prompt",
@@ -228,7 +254,7 @@
},
{
"parameters": {
"jsCode": "const items = $input.all();\nconst parsed = [];\nconst parseErrors = [];\n\nfor (const item of items) {\n try {\n let raw = item.json.response || '';\n \n // Clean markdown\n raw = raw.replace(/```json/gi, '').replace(/```/g, '').trim();\n const result = JSON.parse(raw);\n \n // Schema Validation\n if (typeof result.is_valid !== 'boolean') throw new Error('is_valid must be boolean');\n if (typeof result.confidence !== 'number' || result.confidence < 0 || result.confidence > 1) {\n throw new Error('confidence must be float 0.0-1.0');\n }\n if (!Array.isArray(result.detected_issues)) throw new Error('detected_issues must be array');\n \n // Enum Validation\n const systemCategories = item.json.system_categories || [];\n if (!systemCategories.includes(result.suggested_category)) {\n throw new Error(`Category \"${result.suggested_category}\" not in system enum`);\n }\n \n parsed.push({\n ...item,\n json: { ...item.json, ai_result: result, parse_error: null }\n });\n } catch (err) {\n parseErrors.push({\n ...item,\n json: {\n ...item.json,\n ai_result: null,\n parse_error: err.message,\n raw_ai_response: item.json.response,\n error_type: 'AI_PARSE_ERROR'\n }\n });\n }\n}\n\nreturn [parsed, parseErrors];"
"jsCode": "const items = $input.all();\nconst parsed = [];\nconst parseErrors = [];\n\nfor (const item of items) {\n try {\n let raw = item.json.response || '';\n \n // Clean markdown\n raw = raw.replace(/```json/gi, '').replace(/```/g, '').trim();\n const result = JSON.parse(raw);\n \n // Schema Validation\n if (typeof result.is_valid !== 'boolean') throw new Error('is_valid must be boolean');\n if (typeof result.confidence !== 'number' || result.confidence < 0 || result.confidence > 1) {\n throw new Error('confidence must be float 0.0-1.0');\n }\n if (!Array.isArray(result.detected_issues)) throw new Error('detected_issues must be array');\n \n // Tag Validation - ensure suggested_tags is an array\n if (!Array.isArray(result.suggested_tags)) {\n result.suggested_tags = [];\n }\n // Normalize tags: trim, lowercase, remove duplicates\n result.suggested_tags = [...new Set(result.suggested_tags.map(t => String(t).trim()).filter(t => t.length > 0))];\n \n // Tag confidence validation\n if (typeof result.tag_confidence !== 'number' || result.tag_confidence < 0 || result.tag_confidence > 1) {\n result.tag_confidence = 0.5; // default if missing or invalid\n }\n \n // Enum Validation\n const systemCategories = item.json.system_categories || [];\n if (!systemCategories.includes(result.suggested_category)) {\n throw new Error(`Category \"${result.suggested_category}\" not in system enum`);\n }\n \n parsed.push({\n ...item,\n json: { ...item.json, ai_result: result, parse_error: null }\n });\n } catch (err) {\n parseErrors.push({\n ...item,\n json: {\n ...item.json,\n ai_result: null,\n parse_error: err.message,\n raw_ai_response: item.json.response,\n error_type: 'AI_PARSE_ERROR'\n }\n });\n }\n}\n\nreturn [parsed, parseErrors];"
},
"id": "281dc950-a3b6-4412-a0b4-76663b8c37ea",
"name": "Parse & Validate AI Response",
@@ -295,7 +321,7 @@
},
"sendBody": true,
"specifyBody": "json",
"jsonBody": "={\n \"document_number\": \"{{$json.document_number}}\",\n \"title\": \"{{$json.ai_result.suggested_title || $json.title}}\",\n \"category\": \"{{$json.ai_result.suggested_category}}\",\n \"source_file_path\": \"{{$json.file_path}}\",\n \"ai_confidence\": {{$json.ai_result.confidence}},\n \"ai_issues\": {{JSON.stringify($json.ai_result.detected_issues)}},\n \"migrated_by\": \"SYSTEM_IMPORT\",\n \"batch_id\": \"{{$('Set Configuration').first().json.config.BATCH_ID}}\",\n \"details\": {\n \"legacy_number\": \"{{$json.legacy_number}}\"\n }\n}",
"jsonBody": "={\n \"document_number\": \"{{$json.document_number}}\",\n \"title\": \"{{$json.ai_result.suggested_title || $json.title}}\",\n \"category\": \"{{$json.ai_result.suggested_category}}\",\n \"source_file_path\": \"{{$json.file_path}}\",\n \"ai_confidence\": {{$json.ai_result.confidence}},\n \"ai_issues\": {{JSON.stringify($json.ai_result.detected_issues)}},\n \"ai_tags\": {{JSON.stringify($json.ai_result.suggested_tags || [])}},\n \"tag_confidence\": {{$json.ai_result.tag_confidence || 0}},\n \"migrated_by\": \"SYSTEM_IMPORT\",\n \"batch_id\": \"{{$('Set Configuration').first().json.config.BATCH_ID}}\",\n \"details\": {\n \"legacy_number\": \"{{$json.legacy_number}}\"\n }\n}",
"options": {
"timeout": 30000
}
@@ -471,6 +497,17 @@
]
},
"Fetch Categories": {
"main": [
[
{
"node": "Fetch Tags",
"type": "main",
"index": 0
}
]
]
},
"Fetch Tags": {
"main": [
[
{