260306:1600 20260306:1500 refactor tags #2
Some checks failed
Build and Deploy / deploy (push) Failing after 4m9s
Some checks failed
Build and Deploy / deploy (push) Failing after 4m9s
This commit is contained in:
4
.vscode/settings.json
vendored
4
.vscode/settings.json
vendored
@@ -1 +1,3 @@
|
|||||||
|
{
|
||||||
|
"editor.fontSize": 18
|
||||||
|
}
|
||||||
|
|||||||
3
backend/.vscode/settings.json
vendored
Normal file
3
backend/.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"editor.fontSize": 18
|
||||||
|
}
|
||||||
3
frontend/.vscode/settings.json
vendored
3
frontend/.vscode/settings.json
vendored
@@ -1,3 +1,4 @@
|
|||||||
{
|
{
|
||||||
"jest.rootPath": "/"
|
"jest.rootPath": "/",
|
||||||
|
"editor.fontSize": 18
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,7 +22,7 @@
|
|||||||
// EDITOR SETTINGS
|
// EDITOR SETTINGS
|
||||||
// ========================================
|
// ========================================
|
||||||
|
|
||||||
"editor.fontSize": 16,
|
"editor.fontSize": 18,
|
||||||
"editor.tabSize": 2,
|
"editor.tabSize": 2,
|
||||||
"editor.lineHeight": 1.6,
|
"editor.lineHeight": 1.6,
|
||||||
"editor.rulers": [80, 120],
|
"editor.rulers": [80, 120],
|
||||||
|
|||||||
3
specs/.vscode/settings.json
vendored
Normal file
3
specs/.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"editor.fontSize": 18
|
||||||
|
}
|
||||||
@@ -4,7 +4,7 @@
|
|||||||
| ------------------------------------------------------------------ | ------- |
|
| ------------------------------------------------------------------ | ------- |
|
||||||
| legacy PDF document migration to system v1.8.0 uses n8n and Ollama | 1.8.0 |
|
| legacy PDF document migration to system v1.8.0 uses n8n and Ollama | 1.8.0 |
|
||||||
|
|
||||||
> **Note:** Category Enum system-driven, Idempotency Contract, Duplicate Handling Clarification, Storage Enforcement, Audit Log Enhancement, Review Queue Integration, Revision Drift Protection, Execution Time, Encoding Normalization, Security Hardening, Orchestrator on QNAP, AI Physical Isolation (Desktop Desk-5439), Folder Standard (/share/np-dms/n8n)
|
> **Note:** Category Enum system-driven, Idempotency Contract, Duplicate Handling Clarification, Storage Enforcement, Audit Log Enhancement, Review Queue Integration, Revision Drift Protection, Execution Time, Encoding Normalization, Security Hardening, Orchestrator on QNAP, AI Physical Isolation (Desktop Desk-5439), Folder Standard (/share/np-dms/n8n), **AI Tag Extraction & Auto-Tagging**
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
- นำเข้าเอกสาร PDF 20,000 ฉบับ พร้อม Metadata จาก Excel (Legacy system export) เข้าสู่ระบบ LCBP3-DMS
|
- นำเข้าเอกสาร PDF 20,000 ฉบับ พร้อม Metadata จาก Excel (Legacy system export) เข้าสู่ระบบ LCBP3-DMS
|
||||||
- ใช้ AI (Ollama Local Model) เพื่อตรวจสอบความถูกต้องของลักษณะข้อมูล (Data format, Title consistency) ก่อนการนำเข้า
|
- ใช้ AI (Ollama Local Model) เพื่อตรวจสอบความถูกต้องของลักษณะข้อมูล (Data format, Title consistency) ก่อนการนำเข้า
|
||||||
|
- **AI Tag Extraction:** ใช้ Ollama วิเคราะห์เอกสารและสกัด Tags ที่เกี่ยวข้อง (เช่น สาขางาน, ประเภทเอกสาร, องค์กร) อัตโนมัติ
|
||||||
- รักษาโครงสร้างความสัมพันธ์ (Project / Contract / Ref No.) และระบบการทำ Revision ตาม Business Rules
|
- รักษาโครงสร้างความสัมพันธ์ (Project / Contract / Ref No.) และระบบการทำ Revision ตาม Business Rules
|
||||||
- **Checkpoint Support:** รองรับการหยุดและเริ่มงานต่อ (Resume) จากจุดที่ค้างอยู่ได้กรณีเกิดเหตุขัดข้อง
|
- **Checkpoint Support:** รองรับการหยุดและเริ่มงานต่อ (Resume) จากจุดที่ค้างอยู่ได้กรณีเกิดเหตุขัดข้อง
|
||||||
|
|
||||||
@@ -84,6 +85,36 @@ CREATE TABLE IF NOT EXISTS migration_progress (
|
|||||||
);
|
);
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Tags Table (สำหรับ AI Tag Extraction):**
|
||||||
|
```sql
|
||||||
|
-- ตาราง Master เก็บ Tags (Global หรือ Project-specific)
|
||||||
|
CREATE TABLE tags (
|
||||||
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
project_id INT NULL COMMENT 'NULL = Global Tag',
|
||||||
|
tag_name VARCHAR(100) NOT NULL,
|
||||||
|
color_code VARCHAR(30) DEFAULT 'default',
|
||||||
|
description TEXT,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||||
|
created_by INT,
|
||||||
|
deleted_at DATETIME NULL,
|
||||||
|
UNIQUE KEY ux_tag_project (project_id, tag_name),
|
||||||
|
INDEX idx_tags_deleted_at (deleted_at),
|
||||||
|
FOREIGN KEY (project_id) REFERENCES projects (id) ON DELETE CASCADE,
|
||||||
|
FOREIGN KEY (created_by) REFERENCES users (user_id) ON DELETE SET NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
-- ตารางเชื่อมระหว่าง correspondences และ tags (M:N)
|
||||||
|
CREATE TABLE correspondence_tags (
|
||||||
|
correspondence_id INT,
|
||||||
|
tag_id INT,
|
||||||
|
PRIMARY KEY (correspondence_id, tag_id),
|
||||||
|
FOREIGN KEY (correspondence_id) REFERENCES correspondences (id) ON DELETE CASCADE,
|
||||||
|
FOREIGN KEY (tag_id) REFERENCES tags (id) ON DELETE CASCADE,
|
||||||
|
INDEX idx_tag_lookup (tag_id)
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
**Idempotency Table :**
|
**Idempotency Table :**
|
||||||
```sql
|
```sql
|
||||||
CREATE TABLE IF NOT EXISTS import_transactions (
|
CREATE TABLE IF NOT EXISTS import_transactions (
|
||||||
@@ -185,7 +216,7 @@ return items.map(item => ({
|
|||||||
**System Prompt:**
|
**System Prompt:**
|
||||||
```text
|
```text
|
||||||
You are a Document Controller for a large construction project.
|
You are a Document Controller for a large construction project.
|
||||||
Your task is to validate document metadata.
|
Your task is to validate document metadata and suggest relevant tags.
|
||||||
You MUST respond ONLY with valid JSON. No explanation, no markdown, no extra text.
|
You MUST respond ONLY with valid JSON. No explanation, no markdown, no extra text.
|
||||||
If there are no issues, "detected_issues" must be an empty array [].
|
If there are no issues, "detected_issues" must be an empty array [].
|
||||||
```
|
```
|
||||||
@@ -199,22 +230,42 @@ Title: {{$json.title}}
|
|||||||
Expected Pattern: [ORG]-[TYPE]-[SEQ] e.g. "TCC-COR-0001"
|
Expected Pattern: [ORG]-[TYPE]-[SEQ] e.g. "TCC-COR-0001"
|
||||||
Category List (MUST match system enum exactly): {{$workflow.variables.system_categories}}
|
Category List (MUST match system enum exactly): {{$workflow.variables.system_categories}}
|
||||||
|
|
||||||
|
Analyze the document and suggest relevant tags based on:
|
||||||
|
1. Document content/title keywords (e.g., "Foundation", "Structure", "Electrical", "Safety")
|
||||||
|
2. Document type indicators (e.g., "Drawing", "Report", "Inspection")
|
||||||
|
3. Organization codes present in document number
|
||||||
|
4. Any discipline or phase indicators
|
||||||
|
|
||||||
Respond ONLY with this exact JSON structure:
|
Respond ONLY with this exact JSON structure:
|
||||||
{
|
{
|
||||||
"is_valid": true | false,
|
"is_valid": true | false,
|
||||||
"confidence": 0.0 to 1.0,
|
"confidence": 0.0 to 1.0,
|
||||||
"suggested_category": "<one from Category List>",
|
"suggested_category": "<one from Category List>",
|
||||||
"detected_issues": ["<issue1>"],
|
"detected_issues": ["<issue1>"],
|
||||||
"suggested_title": "<corrected title or null>"
|
"suggested_title": "<corrected title or null>",
|
||||||
|
"suggested_tags": ["<tag1>", "<tag2>"],
|
||||||
|
"tag_confidence": 0.0 to 1.0
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
**JSON Validation (ตรวจ Category ตรง Enum):**
|
**JSON Validation (ตรวจ Category ตรง Enum + Tag Normalization):**
|
||||||
```javascript
|
```javascript
|
||||||
const systemCategories = $workflow.variables.system_categories;
|
const systemCategories = $workflow.variables.system_categories;
|
||||||
if (!systemCategories.includes(result.suggested_category)) {
|
if (!systemCategories.includes(result.suggested_category)) {
|
||||||
throw new Error(`Category "${result.suggested_category}" not in system enum: ${systemCategories.join(', ')}`);
|
throw new Error(`Category "${result.suggested_category}" not in system enum: ${systemCategories.join(', ')}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tag Validation
|
||||||
|
if (!Array.isArray(result.suggested_tags)) {
|
||||||
|
result.suggested_tags = [];
|
||||||
|
}
|
||||||
|
// Normalize: trim, lowercase, remove duplicates
|
||||||
|
result.suggested_tags = [...new Set(result.suggested_tags.map(t => String(t).trim()).filter(t => t.length > 0))];
|
||||||
|
|
||||||
|
// Tag confidence validation
|
||||||
|
if (typeof result.tag_confidence !== 'number' || result.tag_confidence < 0 || result.tag_confidence > 1) {
|
||||||
|
result.tag_confidence = 0.5;
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Node 3.5: Fallback Model Manager
|
#### Node 3.5: Fallback Model Manager
|
||||||
@@ -257,6 +308,24 @@ Idempotency-Key: <document_number>:<batch_id>
|
|||||||
Content-Type: application/json
|
Content-Type: application/json
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Backend Tag Handling Logic:**
|
||||||
|
|
||||||
|
เมื่อ Backend รับ Payload พร้อม `ai_tags` ระบบจะ:
|
||||||
|
|
||||||
|
1. **Validate Tags:** ตรวจสอบว่า tag name อยู่ในรูปแบบที่ถูกต้อง (ไม่ว่าง, ไม่มีอักขระพิเศษ)
|
||||||
|
2. **Create Missing Tags:** ถ้า Tag ไม่มีอยู่ใน `tags` table → สร้างใหม่โดยอัตโนมัติ
|
||||||
|
```sql
|
||||||
|
INSERT INTO tags (tag_name, created_by, created_at)
|
||||||
|
VALUES ('<tag_name>', (SELECT user_id FROM users WHERE username = 'migration_bot'), NOW())
|
||||||
|
ON DUPLICATE KEY UPDATE id=LAST_INSERT_ID(id);
|
||||||
|
```
|
||||||
|
3. **Link Document Tags:** บันทึกความสัมพันธ์ใน `correspondence_tags`
|
||||||
|
```sql
|
||||||
|
INSERT INTO correspondence_tags (correspondence_id, tag_id)
|
||||||
|
SELECT LAST_INSERT_ID(), tag_id FROM tags WHERE tag_name IN (<ai_tags>);
|
||||||
|
```
|
||||||
|
4. **Tag Confidence Logging:** บันทึก `tag_confidence` ลงใน `details` JSON ของ Revision
|
||||||
|
|
||||||
Payload:
|
Payload:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
@@ -266,6 +335,8 @@ Payload:
|
|||||||
"source_file_path": "{{file_path}}",
|
"source_file_path": "{{file_path}}",
|
||||||
"ai_confidence": "{{ai_result.confidence}}",
|
"ai_confidence": "{{ai_result.confidence}}",
|
||||||
"ai_issues": "{{ai_result.detected_issues}}",
|
"ai_issues": "{{ai_result.detected_issues}}",
|
||||||
|
"ai_tags": "{{ai_result.suggested_tags}}",
|
||||||
|
"tag_confidence": "{{ai_result.tag_confidence}}",
|
||||||
"migrated_by": "SYSTEM_IMPORT",
|
"migrated_by": "SYSTEM_IMPORT",
|
||||||
"batch_id": "{{$env.MIGRATION_BATCH_ID}}"
|
"batch_id": "{{$env.MIGRATION_BATCH_ID}}"
|
||||||
}
|
}
|
||||||
@@ -400,7 +471,8 @@ WHERE batch_id = 'migration_20260226';
|
|||||||
| 7 | GPU VRAM Overflow | ใช้เฉพาะ Quantized Model (q4_K_M) |
|
| 7 | GPU VRAM Overflow | ใช้เฉพาะ Quantized Model (q4_K_M) |
|
||||||
| 8 | ดิสก์ NAS เต็ม | ปิด "Save Successful Executions" ใน n8n |
|
| 8 | ดิสก์ NAS เต็ม | ปิด "Save Successful Executions" ใน n8n |
|
||||||
| 9 | Migration Token ถูกขโมย | Token 7 วัน, IP Whitelist `<NAS_IP>` เท่านั้น |
|
| 9 | Migration Token ถูกขโมย | Token 7 วัน, IP Whitelist `<NAS_IP>` เท่านั้น |
|
||||||
| 10 | ไฟดับ/ล่มกลางคัน | Checkpoint Table → Resume จากจุดที่ค้าง |
|
| 11 | AI Tag Extraction ผิดพลาด | Tag confidence < 0.6 → ส่งไป Review Queue / บันทึกใน metadata |
|
||||||
|
| 12 | Tag ซ้ำ/คล้ายกัน | Normalization ก่อนบันทึก (lowercase, trim, deduplicate) |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -422,6 +494,25 @@ WHERE created_by = 'SYSTEM_IMPORT' AND action = 'IMPORT';
|
|||||||
-- 4. ตรวจ Idempotency ไม่มีซ้ำ
|
-- 4. ตรวจ Idempotency ไม่มีซ้ำ
|
||||||
SELECT idempotency_key, COUNT(*) FROM import_transactions
|
SELECT idempotency_key, COUNT(*) FROM import_transactions
|
||||||
GROUP BY idempotency_key HAVING COUNT(*) > 1;
|
GROUP BY idempotency_key HAVING COUNT(*) > 1;
|
||||||
|
|
||||||
|
-- 5. ตรวจ Tags ที่สร้างจาก Migration
|
||||||
|
SELECT COUNT(*) as total_tags FROM tags WHERE created_by = (SELECT user_id FROM users WHERE username = 'migration_bot');
|
||||||
|
|
||||||
|
-- 6. ตรวจเอกสารที่มี Tag ผูกอยู่
|
||||||
|
SELECT COUNT(DISTINCT correspondence_id) as docs_with_tags
|
||||||
|
FROM correspondence_tags ct
|
||||||
|
JOIN correspondences c ON ct.correspondence_id = c.id
|
||||||
|
WHERE c.created_by = (SELECT user_id FROM users WHERE username = 'migration_bot');
|
||||||
|
|
||||||
|
-- 7. ตรวจ Tag Distribution
|
||||||
|
SELECT t.tag_name, COUNT(ct.correspondence_id) as doc_count
|
||||||
|
FROM tags t
|
||||||
|
JOIN correspondence_tags ct ON t.id = ct.tag_id
|
||||||
|
JOIN correspondences c ON ct.correspondence_id = c.id
|
||||||
|
WHERE c.created_by = (SELECT user_id FROM users WHERE username = 'migration_bot')
|
||||||
|
GROUP BY t.id, t.tag_name
|
||||||
|
ORDER BY doc_count DESC
|
||||||
|
LIMIT 20;
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
@@ -230,8 +230,9 @@ mysql -h <DB_HOST> -u migration_bot -p lcbp3_production < lcbp3-v1.8.0-migration
|
|||||||
### Node 1-2: Pre-flight Checks
|
### Node 1-2: Pre-flight Checks
|
||||||
- ตรวจสอบ Backend Health
|
- ตรวจสอบ Backend Health
|
||||||
- ดึง Categories จาก `/api/master/correspondence-types`
|
- ดึง Categories จาก `/api/master/correspondence-types`
|
||||||
|
- ดึง Tags ที่มีอยู่แล้วจาก `/api/tags` (สำหรับ AI Tag Extraction)
|
||||||
- ตรวจ File Mount (Read-only)
|
- ตรวจ File Mount (Read-only)
|
||||||
- เก็บ Categories ใน `$workflow.staticData.systemCategories`
|
- เก็บ Categories และ Existing Tags ใน `$workflow.staticData.systemCategories`
|
||||||
|
|
||||||
### Node 3: Read Checkpoint
|
### Node 3: Read Checkpoint
|
||||||
- อ่าน `last_processed_index` จาก `migration_progress`
|
- อ่าน `last_processed_index` จาก `migration_progress`
|
||||||
@@ -251,16 +252,19 @@ mysql -h <DB_HOST> -u migration_bot -p lcbp3_production < lcbp3-v1.8.0-migration
|
|||||||
### Node 6: Build AI Prompt
|
### Node 6: Build AI Prompt
|
||||||
- ดึง Categories จาก `staticData` (ไม่ hardcode)
|
- ดึง Categories จาก `staticData` (ไม่ hardcode)
|
||||||
- เลือก Model ตาม Fallback State
|
- เลือก Model ตาม Fallback State
|
||||||
- สร้าง Prompt ตาม Template
|
- สร้าง Prompt ตาม Template พร้อม **Tag Extraction Instructions**
|
||||||
|
- AI จะวิเคราะห์ Title และ Document Number เพื่อสกัด Tags ที่เกี่ยวข้อง
|
||||||
|
|
||||||
### Node 7: Ollama AI Analysis
|
### Node 7: Ollama AI Analysis
|
||||||
- เรียก `POST /api/generate`
|
- เรียก `POST /api/generate`
|
||||||
- Timeout 30 วินาที
|
- Timeout 30 วินาที
|
||||||
- Retry 3 ครั้ง (n8n built-in)
|
- Retry 3 ครั้ง (n8n built-in)
|
||||||
|
- AI Response รวม `suggested_tags` และ `tag_confidence`
|
||||||
|
|
||||||
### Node 8: Parse & Validate
|
### Node 8: Parse & Validate
|
||||||
- Parse JSON Response
|
- Parse JSON Response
|
||||||
- Schema Validation (is_valid, confidence, detected_issues)
|
- Schema Validation (is_valid, confidence, detected_issues)
|
||||||
|
- **Tag Validation**: Normalize tags (trim, lowercase, deduplicate)
|
||||||
- Enum Validation (ตรวจ Category ว่าอยู่ใน List หรือไม่)
|
- Enum Validation (ตรวจ Category ว่าอยู่ใน List หรือไม่)
|
||||||
- **Output 2 ทาง**: Success → Router, Error → Fallback
|
- **Output 2 ทาง**: Success → Router, Error → Fallback
|
||||||
|
|
||||||
@@ -274,6 +278,8 @@ mysql -h <DB_HOST> -u migration_bot -p lcbp3_production < lcbp3-v1.8.0-migration
|
|||||||
### Node 10A: Auto Ingest
|
### Node 10A: Auto Ingest
|
||||||
- POST `/api/migration/import`
|
- POST `/api/migration/import`
|
||||||
- Header: `Idempotency-Key: {doc_num}:{batch_id}`
|
- Header: `Idempotency-Key: {doc_num}:{batch_id}`
|
||||||
|
- Payload รวม **ai_tags** และ **tag_confidence**
|
||||||
|
- Backend จะสร้าง Tags ที่ยังไม่มี และผูกกับเอกสารอัตโนมัติ
|
||||||
- บันทึก Checkpoint ทุก 10 records
|
- บันทึก Checkpoint ทุก 10 records
|
||||||
|
|
||||||
### Node 10B: Review Queue
|
### Node 10B: Review Queue
|
||||||
@@ -397,9 +403,11 @@ mysql -h <DB_IP> -u root -p \
|
|||||||
| 5 | File Mount RO ถูกต้อง | `docker exec n8n ls /home/node/.n8n-files/staging_ai` |
|
| 5 | File Mount RO ถูกต้อง | `docker exec n8n ls /home/node/.n8n-files/staging_ai` |
|
||||||
| 6 | Log Mount RW ถูกต้อง | `docker exec n8n touch /home/node/.n8n-files/migration_logs/test` |
|
| 6 | Log Mount RW ถูกต้อง | `docker exec n8n touch /home/node/.n8n-files/migration_logs/test` |
|
||||||
| 7 | Categories ไม่ hardcode | ดูผลลัพธ์ Node Fetch Categories |
|
| 7 | Categories ไม่ hardcode | ดูผลลัพธ์ Node Fetch Categories |
|
||||||
| 8 | Idempotency Key ถูกต้อง | ตรวจ Header ใน Node Import |
|
| 8 | Tags โหลดถูกต้อง | ดูผลลัพธ์ Node Fetch Tags (ควรแสดงรายการ Tags ที่มีอยู่) |
|
||||||
| 9 | Checkpoint บันทึก | ตรวจสอบ `migration_progress` หลังรัน |
|
| 9 | AI Tag Extraction ทำงาน | ตรวจ `suggested_tags` ใน Response จาก Parse & Validate Node |
|
||||||
| 10 | Error Log สร้างไฟล์ | ตรวจสอบ `error_log.csv` |
|
| 10 | Idempotency Key ถูกต้อง | ตรวจ Header ใน Node Import |
|
||||||
|
| 11 | Checkpoint บันทึก | ตรวจสอบ `migration_progress` หลังรัน |
|
||||||
|
| 12 | Error Log สร้างไฟล์ | ตรวจสอบ `error_log.csv` |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -421,11 +429,33 @@ return [{ json: {
|
|||||||
}}];
|
}}];
|
||||||
```
|
```
|
||||||
|
|
||||||
### ปัญหา: Ollama Timeout
|
### ปัญหา: AI Tag Extraction ไม่ทำงาน
|
||||||
|
**ตรวจสอบ:**
|
||||||
|
1. ดู Response ใน Node "Parse & Validate" ว่ามี field `suggested_tags` หรือไม่
|
||||||
|
2. ถ้าไม่มี → ตรวจสอบ Prompt ใน "Build AI Prompt" ว่ารวม Tag Extraction Instructions แล้ว
|
||||||
|
3. ถ้า AI ตอบแต่ Tags ไม่ถูกต้อง → ปรับ Threshold หรือส่งไป Review Queue
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Debug Code Node ชั่วคราว
|
||||||
|
return [{
|
||||||
|
json: {
|
||||||
|
has_suggested_tags: !!$json.ai_result?.suggested_tags,
|
||||||
|
tag_count: $json.ai_result?.suggested_tags?.length || 0,
|
||||||
|
suggested_tags: $json.ai_result?.suggested_tags,
|
||||||
|
tag_confidence: $json.ai_result?.tag_confidence
|
||||||
|
}
|
||||||
|
}];
|
||||||
|
```
|
||||||
|
|
||||||
|
### ปัญหา: Tags ซ้ำหรือผิดพลาด
|
||||||
**แก้ไข:**
|
**แก้ไข:**
|
||||||
- เพิ่ม `DELAY_MS` เป็น 3000 หรือ 5000
|
- ใช้ SQL ตรวจสอบ Tags ที่ซ้ำ:
|
||||||
- ลด `BATCH_SIZE` เหลือ 5
|
```sql
|
||||||
- ตรวจสอบ GPU/CPU ของ Ollama Server
|
SELECT tag_name, COUNT(*) as cnt FROM tags
|
||||||
|
WHERE created_by = (SELECT user_id FROM users WHERE username = 'migration_bot')
|
||||||
|
GROUP BY tag_name HAVING cnt > 1;
|
||||||
|
```
|
||||||
|
- ถ้าพบซ้ำ → ใช้ Node Normalize ก่อนบันทึก (มีแล้วใน Parse & Validate)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -442,6 +472,12 @@ docker exec n8n sh -c "tail -10 /home/node/.n8n-files/migration_logs/error_log.c
|
|||||||
|
|
||||||
# ดู Checkpoint ใน DB
|
# ดู Checkpoint ใน DB
|
||||||
mysql -h <DB_HOST> -u migration_bot -p -e "SELECT * FROM migration_progress WHERE batch_id = 'migration_20260226'"
|
mysql -h <DB_HOST> -u migration_bot -p -e "SELECT * FROM migration_progress WHERE batch_id = 'migration_20260226'"
|
||||||
|
|
||||||
|
# ดู Tags ที่สร้างจาก Migration
|
||||||
|
mysql -h <DB_HOST> -u migration_bot -p -e "SELECT tag_name, created_at FROM tags WHERE created_by = (SELECT user_id FROM users WHERE username = 'migration_bot') ORDER BY created_at DESC LIMIT 20"
|
||||||
|
|
||||||
|
# ดูสถิติการผูก Tag กับเอกสาร
|
||||||
|
mysql -h <DB_HOST> -u migration_bot -p -e "SELECT COUNT(DISTINCT ct.correspondence_id) as docs_with_tags, COUNT(DISTINCT ct.tag_id) as unique_tags_used FROM correspondence_tags ct JOIN correspondences c ON ct.correspondence_id = c.id WHERE c.created_by = (SELECT user_id FROM users WHERE username = 'migration_bot')"
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
@@ -53,6 +53,32 @@
|
|||||||
],
|
],
|
||||||
"notes": "ดึง Categories จาก Backend"
|
"notes": "ดึง Categories จาก Backend"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"url": "={{$('Set Configuration').first().json.config.BACKEND_URL}}/api/tags",
|
||||||
|
"sendHeaders": true,
|
||||||
|
"headerParameters": {
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "Authorization",
|
||||||
|
"value": "={{$('Set Configuration').first().json.config.MIGRATION_TOKEN}}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"options": {
|
||||||
|
"timeout": 10000
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "f1a2b3c4-d5e6-7f8g-9h0i-j1k2l3m4n5o6",
|
||||||
|
"name": "Fetch Tags",
|
||||||
|
"type": "n8n-nodes-base.httpRequest",
|
||||||
|
"typeVersion": 4.1,
|
||||||
|
"position": [
|
||||||
|
5040,
|
||||||
|
3856
|
||||||
|
],
|
||||||
|
"notes": "ดึง Tags ที่มีอยู่แล้วจาก Backend"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"url": "={{$('Set Configuration').first().json.config.BACKEND_URL}}/health",
|
"url": "={{$('Set Configuration').first().json.config.BACKEND_URL}}/health",
|
||||||
@@ -73,7 +99,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"jsCode": "const fs = require('fs');\nconst config = $('Set Configuration').first().json.config;\n\n// Check file mount and inputs\ntry {\n if (!fs.existsSync(config.EXCEL_FILE)) {\n throw new Error(`Excel file not found at: ${config.EXCEL_FILE}`);\n }\n if (!fs.existsSync(config.SOURCE_PDF_DIR)) {\n throw new Error(`PDF Source directory not found at: ${config.SOURCE_PDF_DIR}`);\n }\n \n const files = fs.readdirSync(config.SOURCE_PDF_DIR);\n \n // Check write permission to log path\n fs.writeFileSync(`${config.LOG_PATH}/.preflight_ok`, new Date().toISOString());\n \n // Grab categories out of the previous node (Fetch Categories) if available\n // otherwise use fallback array\n let categories = ['Correspondence','RFA','Drawing','Transmittal','Report','Other'];\n try {\n const upstreamData = $('Fetch Categories').first()?.json?.data;\n if (upstreamData && Array.isArray(upstreamData)) {\n categories = upstreamData.map(c => c.name || c.type || c); // very loose mapping depending on API response\n }\n } catch(e) {}\n \n return [{ json: { \n preflight_ok: true, \n pdf_count_in_source: files.length,\n excel_target: config.EXCEL_FILE,\n system_categories: categories,\n timestamp: new Date().toISOString()\n }}];\n} catch (err) {\n throw new Error(`Pre-flight check failed: ${err.message}`);\n}"
|
"jsCode": "const fs = require('fs');\nconst config = $('Set Configuration').first().json.config;\n\n// Check file mount and inputs\ntry {\n if (!fs.existsSync(config.EXCEL_FILE)) {\n throw new Error(`Excel file not found at: ${config.EXCEL_FILE}`);\n }\n if (!fs.existsSync(config.SOURCE_PDF_DIR)) {\n throw new Error(`PDF Source directory not found at: ${config.SOURCE_PDF_DIR}`);\n }\n \n const files = fs.readdirSync(config.SOURCE_PDF_DIR);\n \n // Check write permission to log path\n fs.writeFileSync(`${config.LOG_PATH}/.preflight_ok`, new Date().toISOString());\n \n // Grab categories out of the previous node (Fetch Categories) if available\n // otherwise use fallback array\n let categories = ['Correspondence','RFA','Drawing','Transmittal','Report','Other'];\n try {\n const upstreamData = $('Fetch Categories').first()?.json?.data;\n if (upstreamData && Array.isArray(upstreamData)) {\n categories = upstreamData.map(c => c.name || c.type || c); // very loose mapping depending on API response\n }\n } catch(e) {}\n \n // Grab existing tags from Fetch Tags node\n let existingTags = [];\n try {\n const tagData = $('Fetch Tags').first()?.json?.data || [];\n existingTags = Array.isArray(tagData) ? tagData.map(t => t.tag_name || t.name || '').filter(Boolean) : [];\n } catch(e) {}\n \n return [{ json: { \n preflight_ok: true, \n pdf_count_in_source: files.length,\n excel_target: config.EXCEL_FILE,\n system_categories: categories,\n existing_tags: existingTags,\n timestamp: new Date().toISOString()\n }}];\n} catch (err) {\n throw new Error(`Pre-flight check failed: ${err.message}`);\n}\n}"
|
||||||
},
|
},
|
||||||
"id": "5bdb31ca-9588-404d-92ce-3438bdd9835b",
|
"id": "5bdb31ca-9588-404d-92ce-3438bdd9835b",
|
||||||
"name": "File Mount Check",
|
"name": "File Mount Check",
|
||||||
@@ -193,7 +219,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"jsCode": "const config = $('Set Configuration').first().json.config;\nconst fallbackState = $input.first().json[0] || { is_fallback_active: false, recent_error_count: 0 };\n\nconst isFallback = fallbackState.is_fallback_active || false;\nconst model = isFallback ? config.OLLAMA_MODEL_FALLBACK : config.OLLAMA_MODEL_PRIMARY;\n\n// Safely pull categories from the first Check node\nlet systemCategories = ['Correspondence','RFA','Drawing','Transmittal','Report','Other'];\ntry { systemCategories = $('File Mount Check').first().json.system_categories || systemCategories; } catch (e) {}\n\nconst items = $('File Validator').all();\n\nreturn items.map(item => {\n const systemPrompt = `You are a Document Controller for a large construction project.\nYour task is to validate document metadata.\nYou MUST respond ONLY with valid JSON. No explanation, no markdown, no extra text.\nIf there are no issues, \"detected_issues\" must be an empty array [].`;\n\n const userPrompt = `Validate this document metadata and respond in JSON:\n\nDocument Number: ${item.json.document_number}\nTitle: ${item.json.title}\nExpected Pattern: [ORG]-[TYPE]-[SEQ] e.g. \"TCC-COR-0001\"\nCategory List (MUST match system enum exactly): ${JSON.stringify(systemCategories)}\n\nRespond ONLY with this exact JSON structure:\n{\n \"is_valid\": true | false,\n \"confidence\": 0.0 to 1.0,\n \"suggested_category\": \"<one from Category List>\",\n \"detected_issues\": [\"<issue1>\"],\n \"suggested_title\": \"<corrected title or null>\"\n}`;\n\n return {\n json: {\n ...item.json,\n active_model: model,\n is_fallback: isFallback,\n system_categories: systemCategories,\n ollama_payload: {\n model: model,\n prompt: `${systemPrompt}\\n\\n${userPrompt}`,\n stream: false,\n format: 'json'\n }\n }\n };\n});"
|
"jsCode": "const config = $('Set Configuration').first().json.config;\nconst fallbackState = $input.first().json[0] || { is_fallback_active: false, recent_error_count: 0 };\n\nconst isFallback = fallbackState.is_fallback_active || false;\nconst model = isFallback ? config.OLLAMA_MODEL_FALLBACK : config.OLLAMA_MODEL_PRIMARY;\n\n// Safely pull categories from the first Check node\nlet systemCategories = ['Correspondence','RFA','Drawing','Transmittal','Report','Other'];\ntry { systemCategories = $('File Mount Check').first().json.system_categories || systemCategories; } catch (e) {}\n\nconst items = $('File Validator').all();\n\nreturn items.map(item => {\n const systemPrompt = `You are a Document Controller for a large construction project.\nYour task is to validate document metadata and suggest relevant tags.\nYou MUST respond ONLY with valid JSON. No explanation, no markdown, no extra text.\nIf there are no issues, \"detected_issues\" must be an empty array [].`;\n\n const userPrompt = `Validate this document metadata and respond in JSON:\n\nDocument Number: ${item.json.document_number}\nTitle: ${item.json.title}\nExpected Pattern: [ORG]-[TYPE]-[SEQ] e.g. \"TCC-COR-0001\"\nCategory List (MUST match system enum exactly): ${JSON.stringify(systemCategories)}\n\nAnalyze the document and suggest relevant tags based on:\n1. Document content/title keywords (e.g., \"Foundation\", \"Structure\", \"Electrical\", \"Safety\")\n2. Document type indicators (e.g., \"Drawing\", \"Report\", \"Inspection\")\n3. Organization codes present in document number\n4. Any discipline or phase indicators\n\nRespond ONLY with this exact JSON structure:\n{\n \"is_valid\": true | false,\n \"confidence\": 0.0 to 1.0,\n \"suggested_category\": \"<one from Category List>\",\n \"detected_issues\": [\"<issue1>\"],\n \"suggested_title\": \"<corrected title or null>\",\n \"suggested_tags\": [\"<tag1>\", \"<tag2>\"],\n \"tag_confidence\": 0.0 to 1.0\n}`;\n\n return {\n json: {\n ...item.json,\n active_model: model,\n is_fallback: isFallback,\n system_categories: systemCategories,\n ollama_payload: {\n model: model,\n prompt: `${systemPrompt}\\n\\n${userPrompt}`,\n stream: false,\n format: 'json'\n }\n }\n };\n});"
|
||||||
},
|
},
|
||||||
"id": "9f82950f-7533-4cbd-8e1e-8e441c1cb2a5",
|
"id": "9f82950f-7533-4cbd-8e1e-8e441c1cb2a5",
|
||||||
"name": "Build AI Prompt",
|
"name": "Build AI Prompt",
|
||||||
@@ -228,7 +254,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"jsCode": "const items = $input.all();\nconst parsed = [];\nconst parseErrors = [];\n\nfor (const item of items) {\n try {\n let raw = item.json.response || '';\n \n // Clean markdown\n raw = raw.replace(/```json/gi, '').replace(/```/g, '').trim();\n const result = JSON.parse(raw);\n \n // Schema Validation\n if (typeof result.is_valid !== 'boolean') throw new Error('is_valid must be boolean');\n if (typeof result.confidence !== 'number' || result.confidence < 0 || result.confidence > 1) {\n throw new Error('confidence must be float 0.0-1.0');\n }\n if (!Array.isArray(result.detected_issues)) throw new Error('detected_issues must be array');\n \n // Enum Validation\n const systemCategories = item.json.system_categories || [];\n if (!systemCategories.includes(result.suggested_category)) {\n throw new Error(`Category \"${result.suggested_category}\" not in system enum`);\n }\n \n parsed.push({\n ...item,\n json: { ...item.json, ai_result: result, parse_error: null }\n });\n } catch (err) {\n parseErrors.push({\n ...item,\n json: {\n ...item.json,\n ai_result: null,\n parse_error: err.message,\n raw_ai_response: item.json.response,\n error_type: 'AI_PARSE_ERROR'\n }\n });\n }\n}\n\nreturn [parsed, parseErrors];"
|
"jsCode": "const items = $input.all();\nconst parsed = [];\nconst parseErrors = [];\n\nfor (const item of items) {\n try {\n let raw = item.json.response || '';\n \n // Clean markdown\n raw = raw.replace(/```json/gi, '').replace(/```/g, '').trim();\n const result = JSON.parse(raw);\n \n // Schema Validation\n if (typeof result.is_valid !== 'boolean') throw new Error('is_valid must be boolean');\n if (typeof result.confidence !== 'number' || result.confidence < 0 || result.confidence > 1) {\n throw new Error('confidence must be float 0.0-1.0');\n }\n if (!Array.isArray(result.detected_issues)) throw new Error('detected_issues must be array');\n \n // Tag Validation - ensure suggested_tags is an array\n if (!Array.isArray(result.suggested_tags)) {\n result.suggested_tags = [];\n }\n // Normalize tags: trim, lowercase, remove duplicates\n result.suggested_tags = [...new Set(result.suggested_tags.map(t => String(t).trim()).filter(t => t.length > 0))];\n \n // Tag confidence validation\n if (typeof result.tag_confidence !== 'number' || result.tag_confidence < 0 || result.tag_confidence > 1) {\n result.tag_confidence = 0.5; // default if missing or invalid\n }\n \n // Enum Validation\n const systemCategories = item.json.system_categories || [];\n if (!systemCategories.includes(result.suggested_category)) {\n throw new Error(`Category \"${result.suggested_category}\" not in system enum`);\n }\n \n parsed.push({\n ...item,\n json: { ...item.json, ai_result: result, parse_error: null }\n });\n } catch (err) {\n parseErrors.push({\n ...item,\n json: {\n ...item.json,\n ai_result: null,\n parse_error: err.message,\n raw_ai_response: item.json.response,\n error_type: 'AI_PARSE_ERROR'\n }\n });\n }\n}\n\nreturn [parsed, parseErrors];"
|
||||||
},
|
},
|
||||||
"id": "281dc950-a3b6-4412-a0b4-76663b8c37ea",
|
"id": "281dc950-a3b6-4412-a0b4-76663b8c37ea",
|
||||||
"name": "Parse & Validate AI Response",
|
"name": "Parse & Validate AI Response",
|
||||||
@@ -295,7 +321,7 @@
|
|||||||
},
|
},
|
||||||
"sendBody": true,
|
"sendBody": true,
|
||||||
"specifyBody": "json",
|
"specifyBody": "json",
|
||||||
"jsonBody": "={\n \"document_number\": \"{{$json.document_number}}\",\n \"title\": \"{{$json.ai_result.suggested_title || $json.title}}\",\n \"category\": \"{{$json.ai_result.suggested_category}}\",\n \"source_file_path\": \"{{$json.file_path}}\",\n \"ai_confidence\": {{$json.ai_result.confidence}},\n \"ai_issues\": {{JSON.stringify($json.ai_result.detected_issues)}},\n \"migrated_by\": \"SYSTEM_IMPORT\",\n \"batch_id\": \"{{$('Set Configuration').first().json.config.BATCH_ID}}\",\n \"details\": {\n \"legacy_number\": \"{{$json.legacy_number}}\"\n }\n}",
|
"jsonBody": "={\n \"document_number\": \"{{$json.document_number}}\",\n \"title\": \"{{$json.ai_result.suggested_title || $json.title}}\",\n \"category\": \"{{$json.ai_result.suggested_category}}\",\n \"source_file_path\": \"{{$json.file_path}}\",\n \"ai_confidence\": {{$json.ai_result.confidence}},\n \"ai_issues\": {{JSON.stringify($json.ai_result.detected_issues)}},\n \"ai_tags\": {{JSON.stringify($json.ai_result.suggested_tags || [])}},\n \"tag_confidence\": {{$json.ai_result.tag_confidence || 0}},\n \"migrated_by\": \"SYSTEM_IMPORT\",\n \"batch_id\": \"{{$('Set Configuration').first().json.config.BATCH_ID}}\",\n \"details\": {\n \"legacy_number\": \"{{$json.legacy_number}}\"\n }\n}",
|
||||||
"options": {
|
"options": {
|
||||||
"timeout": 30000
|
"timeout": 30000
|
||||||
}
|
}
|
||||||
@@ -471,6 +497,17 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"Fetch Categories": {
|
"Fetch Categories": {
|
||||||
|
"main": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"node": "Fetch Tags",
|
||||||
|
"type": "main",
|
||||||
|
"index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Fetch Tags": {
|
||||||
"main": [
|
"main": [
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user