feat(ai-runtime): complete ai runtime policy refactor (ADR-035)
This commit is contained in:
@@ -99,14 +99,13 @@ find_feature_dir_by_prefix() {
|
|||||||
|
|
||||||
local prefix="${BASH_REMATCH[1]}"
|
local prefix="${BASH_REMATCH[1]}"
|
||||||
|
|
||||||
# Search for directories in specs/ that start with this prefix
|
# Search for directories in specs/ that start with this prefix (supporting subdirectories)
|
||||||
local matches=()
|
local matches=()
|
||||||
if [[ -d "$specs_dir" ]]; then
|
if [[ -d "$specs_dir" ]]; then
|
||||||
for dir in "$specs_dir"/"$prefix"-*; do
|
# ค้นหาโฟลเดอร์ที่ตรงกับ prefix ในระบบย่อย
|
||||||
if [[ -d "$dir" ]]; then
|
while IFS= read -r -d '' dir; do
|
||||||
matches+=("$(basename "$dir")")
|
matches+=("$dir")
|
||||||
fi
|
done < <(find "$specs_dir" -maxdepth 3 -type d -name "${prefix}-*" -print0 2>/dev/null)
|
||||||
done
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Handle results
|
# Handle results
|
||||||
@@ -115,12 +114,12 @@ find_feature_dir_by_prefix() {
|
|||||||
echo "$specs_dir/$branch_name"
|
echo "$specs_dir/$branch_name"
|
||||||
elif [[ ${#matches[@]} -eq 1 ]]; then
|
elif [[ ${#matches[@]} -eq 1 ]]; then
|
||||||
# Exactly one match - perfect!
|
# Exactly one match - perfect!
|
||||||
echo "$specs_dir/${matches[0]}"
|
echo "${matches[0]}"
|
||||||
else
|
else
|
||||||
# Multiple matches - this shouldn't happen with proper naming convention
|
# Multiple matches - this shouldn't happen with proper naming convention
|
||||||
echo "ERROR: Multiple spec directories found with prefix '$prefix': ${matches[*]}" >&2
|
echo "ERROR: Multiple spec directories found with prefix '$prefix': ${matches[*]}" >&2
|
||||||
echo "Please ensure only one spec directory exists per numeric prefix." >&2
|
echo "Please ensure only one spec directory exists per numeric prefix." >&2
|
||||||
echo "$specs_dir/$branch_name" # Return something to avoid breaking the script
|
echo "${matches[0]}" # Return first match to avoid breaking the script
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+131
-6
@@ -1,12 +1,23 @@
|
|||||||
# NAP-DMS Gemini Rules & Standards
|
# NAP-DMS Project Context & Rules
|
||||||
|
|
||||||
- For: Gemini (Google AI Studio, Vertex AI, Antigravity, Gemini CLI)
|
- For: Gemini (Google AI Studio, Vertex AI, Antigravity, Gemini CLI)
|
||||||
- Version: 1.9.8 | Last synced from AGENTS.md: 2026-06-02
|
- Version: 1.9.10 | Last synced from AGENTS.md: 2026-06-11
|
||||||
- Repo: [https://git.np-dms.work/np-dms/lcbp3](https://git.np-dms.work/np-dms/lcbp3)
|
- Repo: [https://git.np-dms.work/np-dms/lcbp3](https://git.np-dms.work/np-dms/lcbp3)
|
||||||
- Skill pack: `.agents/skills/` (v1.9.0, 21 skills) — see [`skills/README.md`](../.agents/skills/README.md) + [`skills/_LCBP3-CONTEXT.md`](../.agents/skills/_LCBP3-CONTEXT.md)
|
- Skill pack: `.agents/skills/` (v1.9.0, 21 skills) — see [`skills/README.md`](../.agents/skills/README.md) + [`skills/_LCBP3-CONTEXT.md`](../.agents/skills/_LCBP3-CONTEXT.md)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## 📦 Project Memory Override
|
||||||
|
|
||||||
|
For this repository (`E:\np-dms\lcbp3`), use project memory from:
|
||||||
|
`E:\np-dms\lcbp3\memory\project-memory-override.md`
|
||||||
|
|
||||||
|
**Before using global Gemini memory**, read this project memory file first when the task depends on prior repo context, conventions, decisions, or rollout history.
|
||||||
|
|
||||||
|
If project memory conflicts with global memory, prefer `memory/project-memory-override.md` for LCBP3-specific facts.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## 🧠 Role & Persona
|
## 🧠 Role & Persona
|
||||||
|
|
||||||
Act as **Senior Full Stack Developer** specialized in NestJS, Next.js, TypeScript, DMS. Focus: Data Integrity, Security, Maintainability, Performance.
|
Act as **Senior Full Stack Developer** specialized in NestJS, Next.js, TypeScript, DMS. Focus: Data Integrity, Security, Maintainability, Performance.
|
||||||
@@ -126,7 +137,8 @@ Spec priority: **`06-Decision-Records`** > **`05-Engineering-Guidelines`** > oth
|
|||||||
| **ADR-019 UUID** | `specs/06-Decision-Records/ADR-019-hybrid-identifier-strategy.md` | ✅ Active | UUID-related work |
|
| **ADR-019 UUID** | `specs/06-Decision-Records/ADR-019-hybrid-identifier-strategy.md` | ✅ Active | UUID-related work |
|
||||||
| **ADR-021 Workflow Context** | `specs/06-Decision-Records/ADR-021-workflow-context.md` | ✅ Active | Integrated workflow & step attachments |
|
| **ADR-021 Workflow Context** | `specs/06-Decision-Records/ADR-021-workflow-context.md` | ✅ Active | Integrated workflow & step attachments |
|
||||||
| **ADR-023 AI Architecture** | `specs/06-Decision-Records/ADR-023-unified-ai-architecture.md` | ✅ Active | Unified AI boundaries and pipeline (base architecture) |
|
| **ADR-023 AI Architecture** | `specs/06-Decision-Records/ADR-023-unified-ai-architecture.md` | ✅ Active | Unified AI boundaries and pipeline (base architecture) |
|
||||||
| **ADR-023A AI Model Rev.** | `specs/06-Decision-Records/ADR-023A-unified-ai-architecture.md` | ✅ Active | 2-Model stack (gemma4:e4b Q8_0), BullMQ 2-queue, RAG embed scope, OCR auto-detect |
|
| **ADR-023A AI Model Rev.** | `specs/06-Decision-Records/ADR-023A-unified-ai-architecture.md` | ✅ Active | 2-queue, RAG embed scope, OCR auto-detect (model stack superseded by ADR-034) |
|
||||||
|
| **ADR-034 Thai Model Stack** | `specs/06-Decision-Records/ADR-034-AI-model-change.md` | ✅ Active | typhoon2.5-np-dms:latest (Main) + typhoon-np-dms-ocr:latest (OCR, keep_alive:0) |
|
||||||
| **ADR-024 Intent Class.** | `specs/06-Decision-Records/ADR-024-intent-classification-strategy.md` | ✅ Active | Hybrid Pattern→LLM Fallback; ai_intent_patterns DB; Redis cache 5 min |
|
| **ADR-024 Intent Class.** | `specs/06-Decision-Records/ADR-024-intent-classification-strategy.md` | ✅ Active | Hybrid Pattern→LLM Fallback; ai_intent_patterns DB; Redis cache 5 min |
|
||||||
| **ADR-025 AI Tool Layer** | `specs/06-Decision-Records/ADR-025-ai-tool-layer-architecture.md` | ✅ Active | Server-side Tool dispatch; CASL-guarded bridge; ToolResult uses publicId only |
|
| **ADR-025 AI Tool Layer** | `specs/06-Decision-Records/ADR-025-ai-tool-layer-architecture.md` | ✅ Active | Server-side Tool dispatch; CASL-guarded bridge; ToolResult uses publicId only |
|
||||||
| **ADR-026 Chat UI** | `specs/06-Decision-Records/ADR-026-document-chat-ui-pattern.md` | ✅ Active | Side-panel Document Chat UI; useAiChat() hook; streaming response support |
|
| **ADR-026 Chat UI** | `specs/06-Decision-Records/ADR-026-document-chat-ui-pattern.md` | ✅ Active | Side-panel Document Chat UI; useAiChat() hook; streaming response support |
|
||||||
@@ -243,7 +255,7 @@ Read `specs/05-Engineering-Guidelines/05-07-hybrid-uuid-implementation-plan.md`
|
|||||||
5. **Password:** bcrypt 12 salt rounds, min 8 chars, rotate every 90 days
|
5. **Password:** bcrypt 12 salt rounds, min 8 chars, rotate every 90 days
|
||||||
6. **Rate Limiting:** `ThrottlerGuard` on all auth endpoints
|
6. **Rate Limiting:** `ThrottlerGuard` on all auth endpoints
|
||||||
7. **File Upload:** Whitelist PDF/DWG/DOCX/XLSX/ZIP, max 50MB, ClamAV scan
|
7. **File Upload:** Whitelist PDF/DWG/DOCX/XLSX/ZIP, max 50MB, ClamAV scan
|
||||||
8. **AI Isolation (ADR-023/023A):** Ollama on Admin Desktop ONLY — NO direct DB/storage access; 2-model stack `gemma4:e4b Q8_0` + `nomic-embed-text`; all inference via BullMQ (`ai-realtime` / `ai-batch`)
|
8. **AI Isolation (ADR-023/023A/034):** Ollama on Admin Desktop ONLY — NO direct DB/storage access; model stack `typhoon2.5-np-dms:latest` (main) + `typhoon-np-dms-ocr:latest` (OCR, keep_alive:0) + `nomic-embed-text`; all inference via BullMQ (`ai-realtime` / `ai-batch`)
|
||||||
9. **Error Handling (ADR-007):** Use layered error classification with user-friendly messages
|
9. **Error Handling (ADR-007):** Use layered error classification with user-friendly messages
|
||||||
10. **AI Integration (ADR-023/023A):** RFA-First approach; n8n orchestrates Migration Phase only via DMS API — never calls Ollama directly; `QdrantService.search()` requires `projectPublicId` as mandatory param
|
10. **AI Integration (ADR-023/023A):** RFA-First approach; n8n orchestrates Migration Phase only via DMS API — never calls Ollama directly; `QdrantService.search()` requires `projectPublicId` as mandatory param
|
||||||
|
|
||||||
@@ -529,7 +541,7 @@ When user asks about... check these files:
|
|||||||
- [ ] **Qdrant Multi-tenancy:** `projectPublicId` filter enforced
|
- [ ] **Qdrant Multi-tenancy:** `projectPublicId` filter enforced
|
||||||
- [ ] **Human-in-the-loop:** AI outputs validated before use
|
- [ ] **Human-in-the-loop:** AI outputs validated before use
|
||||||
- [ ] **Audit Logging:** All AI interactions logged to `ai_audit_logs`
|
- [ ] **Audit Logging:** All AI interactions logged to `ai_audit_logs`
|
||||||
- [ ] **2-Model Stack:** gemma4:e4b Q8_0 + nomic-embed-text verified
|
- [ ] **Model Stack (ADR-034):** typhoon2.5-np-dms:latest + typhoon-np-dms-ocr:latest + nomic-embed-text verified
|
||||||
- [ ] **Dynamic Prompts (ADR-029):** Prompt templates loaded from `ai_prompts` DB, not hardcoded
|
- [ ] **Dynamic Prompts (ADR-029):** Prompt templates loaded from `ai_prompts` DB, not hardcoded
|
||||||
|
|
||||||
**Performance & Complex Logic:**
|
**Performance & Complex Logic:**
|
||||||
@@ -549,6 +561,108 @@ When user asks about... check these files:
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## 🔌 MCP MariaDB Tools
|
||||||
|
|
||||||
|
MCP MariaDB server ให้เครื่องมือสำหรับตรวจสอบและจัดการ database โดยตรง ใช้สำหรับ:
|
||||||
|
|
||||||
|
- ตรวจสอบ schema กับ spec file `specs/03-Data-and-Storage/lcbp3-v1.9.0-schema-02-tables.sql`
|
||||||
|
- Debug ปัญหา database โดยไม่ต้องเข้า MySQL client
|
||||||
|
- ตรวจสอบ data ใน production/staging
|
||||||
|
- Validate การเปลี่ยนแปลง schema ก่อน deploy
|
||||||
|
|
||||||
|
### Available Tools
|
||||||
|
|
||||||
|
| Tool | หน้าที่ | ตัวอย่างการใช้งาน |
|
||||||
|
| ---------------------------- | ------------------------------ | -------------------------------------------------- |
|
||||||
|
| `mcp1_mysql_test_connection` | ทดสอบ connection กับ database | ตรวจสอบว่า MCP server เชื่อมต่อได้ |
|
||||||
|
| `mcp1_mysql_show_databases` | แสดง databases ทั้งหมด | ดูว่ามี database อะไรบ้าง |
|
||||||
|
| `mcp1_mysql_show_tables` | แสดง tables ทั้งหมดใน database | ดูรายชื่อ tables ใน `lcbp3` |
|
||||||
|
| `mcp1_mysql_describe_table` | ดู structure/columns ของ table | ตรวจสอบ columns, types, keys ของ `correspondences` |
|
||||||
|
| `mcp1_mysql_query` | รัน SELECT query | ดู data ใน table หรือ join query |
|
||||||
|
| `mcp1_mysql_insert` | INSERT data | เพิ่ม seed data หรือ test data |
|
||||||
|
| `mcp1_mysql_update` | UPDATE data | แก้ไข data ใน table |
|
||||||
|
| `mcp1_mysql_delete` | DELETE data | ลบ data ใน table |
|
||||||
|
|
||||||
|
### การใช้งานร่วมกับ Development Flow
|
||||||
|
|
||||||
|
**เมื่อเขียน query ใหม่:**
|
||||||
|
|
||||||
|
1. ใช้ `mcp1_mysql_describe_table` เพื่อตรวจสอบ columns และ types
|
||||||
|
2. เปรียบเทียบกับ `specs/03-Data-and-Storage/lcbp3-v1.9.0-schema-02-tables.sql`
|
||||||
|
3. ใช้ `mcp1_mysql_query` เพื่อทดสอบ query ก่อน implement
|
||||||
|
|
||||||
|
**เมื่อเปลี่ยน schema (ADR-009):**
|
||||||
|
|
||||||
|
1. ใช้ `mcp1_mysql_describe_table` เพื่อดู structure ปัจจุบัน
|
||||||
|
2. สร้าง SQL delta ใน `specs/03-Data-and-Storage/deltas/`
|
||||||
|
3. ใช้ `mcp1_mysql_query` เพื่อตรวจสอบผลลัพธ์หลัง apply delta
|
||||||
|
|
||||||
|
**เมื่อ debug ปัญหา database:**
|
||||||
|
|
||||||
|
1. ใช้ `mcp1_mysql_query` เพื่อดู data จริง
|
||||||
|
2. เปรียบเทียบกับ spec และ data dictionary
|
||||||
|
3. ตรวจสอบ foreign keys และ constraints
|
||||||
|
|
||||||
|
### ข้อควรระวัง
|
||||||
|
|
||||||
|
- **❌ ห้ามใช้ MCP MariaDB สำหรับ DDL operations** (CREATE/ALTER/DROP) โดยตรง — ต้องใช้ SQL delta ตาม ADR-009
|
||||||
|
- **✅ ใช้สำหรับ DQL/DML operations** (SELECT/INSERT/UPDATE/DELETE) เพื่อ debug และ test เท่านั้น
|
||||||
|
- **⚠️ ระวัง DELETE operations** — อาจทำให้เสีย data ใน production
|
||||||
|
- **✅ ตรวจสอบ schema กับ spec file เสมอ** ก่อนเขียน query
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧠 MCP Memory Tools
|
||||||
|
|
||||||
|
MCP Memory server ให้เครื่องมือสำหรับจัดการ Knowledge Graph และ Long-term Memory ใช้สำหรับ:
|
||||||
|
|
||||||
|
- จัดเก็บความรู้และ context ของโปรเจกต์ในรูปแบบ Graph (Entities + Relations + Observations)
|
||||||
|
- ค้นหาและดึงข้อมูล context จาก memory ที่บันทึกไว้ใน session ก่อนหน้า
|
||||||
|
- สร้าง/แก้ไข/ลบ entities, relations, และ observations ใน knowledge graph
|
||||||
|
|
||||||
|
### Available Tools
|
||||||
|
|
||||||
|
| Tool | หน้าที่ | ตัวอย่างการใช้งาน |
|
||||||
|
| -------------------------- | -------------------------------------------- | -------------------------------------------- |
|
||||||
|
| `mcp3_create_entities` | สร้าง entities ใหม่หลายตัวพร้อม observations | สร้าง entity ใหม่เช่น Project, User, Task |
|
||||||
|
| `mcp3_create_relations` | สร้าง relations ระหว่าง entities | สร้าง relation: Project → has → User |
|
||||||
|
| `mcp3_add_observations` | เพิ่ม observations ให้ entity ที่มีอยู่แล้ว | เพิ่ม context เพิ่มเติมให้ entity |
|
||||||
|
| `mcp3_delete_entities` | ลบ entities และ relations ที่เกี่ยวข้อง | ลบ entity ที่ไม่ใช้แล้ว |
|
||||||
|
| `mcp3_delete_relations` | ลบ relations ระหว่าง entities | ลบ relation ที่ผิดหรือไม่ใช้แล้ว |
|
||||||
|
| `mcp3_delete_observations` | ลบ observations จาก entity | ลบ context ที่ผิดหรือล้าสุด |
|
||||||
|
| `mcp3_open_nodes` | ดึงข้อมูล entities ตามชื่อ | ดึง entity ที่ระบุชื่อ |
|
||||||
|
| `mcp3_read_graph` | อ่าน knowledge graph ทั้งหมด | ดูทั้ง graph structure |
|
||||||
|
| `mcp3_search_nodes` | ค้นหา entities ตาม query | ค้นหา entity จากชื่อ, type, หรือ observation |
|
||||||
|
|
||||||
|
### การใช้งานร่วมกับ Development Flow
|
||||||
|
|
||||||
|
**เมื่อบันทึก context ใหม่:**
|
||||||
|
|
||||||
|
1. ใช้ `mcp3_create_entities` เพื่อสร้าง entities ใหม่ (ถ้ายังไม่มี)
|
||||||
|
2. ใช้ `mcp3_create_relations` เพื่อเชื่อมโยง entities
|
||||||
|
3. ใช้ `mcp3_add_observations` เพื่อเพิ่ม context/observations
|
||||||
|
|
||||||
|
**เมื่อค้นหา context:**
|
||||||
|
|
||||||
|
1. ใช้ `mcp3_search_nodes` เพื่อค้นหา entities ที่เกี่ยวข้อง
|
||||||
|
2. ใช้ `mcp3_open_nodes` เพื่อดึงข้อมูล entities ที่ต้องการ
|
||||||
|
3. ใช้ `mcp3_read_graph` เพื่อดู relations ระหว่าง entities
|
||||||
|
|
||||||
|
**เมื่อแก้ไข context:**
|
||||||
|
|
||||||
|
1. ใช้ `mcp3_add_observations` เพื่อเพิ่ม observations ใหม่
|
||||||
|
2. ใช้ `mcp3_delete_observations` เพื่อลบ observations ที่ผิด
|
||||||
|
3. ใช้ `mcp3_create_relations` หรือ `mcp3_delete_relations` เพื่อปรับ relations
|
||||||
|
|
||||||
|
### ข้อควรระวัง
|
||||||
|
|
||||||
|
- **✅ ใช้สำหรับบันทึก context ที่ต้องใช้ร่วมกันหลาย session** — เช่น การตัดสินใจสำคัญ, architecture decisions, rollout history
|
||||||
|
- **⚠️ ระวังการลบ entities** — อาจทำให้เสีย context ที่ยังใช้งานอยู่
|
||||||
|
- **✅ ตรวจสอบว่า entity มีอยู่แล้วก่อนสร้าง** — ใช้ `mcp3_search_nodes` หรือ `mcp3_open_nodes` ก่อน
|
||||||
|
- **✅ ใช้ชื่อ entity ที่ชัดเจนและไม่ซ้ำกัน** — เพื่อป้องกันความสับสน
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Agent skills
|
## Agent skills
|
||||||
|
|
||||||
### Issue tracker
|
### Issue tracker
|
||||||
@@ -583,7 +697,9 @@ This file is a **quick reference**. For detailed information:
|
|||||||
## 🔄 Change Log
|
## 🔄 Change Log
|
||||||
|
|
||||||
| Version | Date | Changes | Updated By |
|
| Version | Date | Changes | Updated By |
|
||||||
| ------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- |
|
| ------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------- |
|
||||||
|
| 1.9.10 | 2026-06-11 | Synced from AGENTS.md: Added MCP MariaDB Tools section, MCP Memory Tools section; Added ADR-034 Thai Model Stack; Updated AI Isolation to ADR-034 typhoon2.5 model stack; Added Project Memory Override section; Updated Change Log | Windsurf AI |
|
||||||
|
| 1.9.9 | 2026-06-06 | ADR-034 Thai-Optimized AI Model Stack: typhoon2.5-np-dms:latest (main) + typhoon-np-dms-ocr:latest (OCR); model switching in ai-batch processor; AiSettingsService static constants; SQL delta; updated Key Spec Files + AI isolation rule | Windsurf AI |
|
||||||
| 1.9.8 | 2026-06-02 | Added ADR-033 Active Model & OCR; ADR-031/032 status Draft→Active; ADR-032/033 in Tier 3 AI Runtime Layer & Specialized Work; Dynamic Prompt context trigger; AI Model/OCR Active Switch trigger; Dynamic Prompts checklist item | Windsurf AI |
|
| 1.9.8 | 2026-06-02 | Added ADR-033 Active Model & OCR; ADR-031/032 status Draft→Active; ADR-032/033 in Tier 3 AI Runtime Layer & Specialized Work; Dynamic Prompt context trigger; AI Model/OCR Active Switch trigger; Dynamic Prompts checklist item | Windsurf AI |
|
||||||
| 1.9.7 | 2026-05-25 | Added ADR-029 Dynamic Prompt Management to Key Spec Files table; fixed gemma4 model name e2b→e4b Q8_0; added Dynamic Prompt context trigger; added ADR-029 to Tier 3 AI checklist; bumped last synced date | Windsurf AI |
|
| 1.9.7 | 2026-05-25 | Added ADR-029 Dynamic Prompt Management to Key Spec Files table; fixed gemma4 model name e2b→e4b Q8_0; added Dynamic Prompt context trigger; added ADR-029 to Tier 3 AI checklist; bumped last synced date | Windsurf AI |
|
||||||
| 1.9.6 | 2026-05-22 | Added ADR-024/025/026/027/028 to Key Spec Files; Tier 3 expanded (AI Runtime Layer + Migration Pipeline); Specialized Work updated; 6 new Context-Aware Triggers; Forbidden Actions + Domain Terminology synced from AGENTS.md v1.9.6 | Windsurf AI |
|
| 1.9.6 | 2026-05-22 | Added ADR-024/025/026/027/028 to Key Spec Files; Tier 3 expanded (AI Runtime Layer + Migration Pipeline); Specialized Work updated; 6 new Context-Aware Triggers; Forbidden Actions + Domain Terminology synced from AGENTS.md v1.9.6 | Windsurf AI |
|
||||||
@@ -594,3 +710,12 @@ This file is a **quick reference**. For detailed information:
|
|||||||
| 1.9.1 | 2026-05-13 | Added `bugfix` workflow and skill (migrated and improved from `docs/bugfix.md`) | Windsurf AI |
|
| 1.9.1 | 2026-05-13 | Added `bugfix` workflow and skill (migrated and improved from `docs/bugfix.md`) | Windsurf AI |
|
||||||
| 1.9.0 | 2026-05-03 | Integrated Global TypeScript Coding Standards (Headers, JSDoc, Thai comments, Single Export, No blank lines) | Windsurf AI |
|
| 1.9.0 | 2026-05-03 | Integrated Global TypeScript Coding Standards (Headers, JSDoc, Thai comments, Single Export, No blank lines) | Windsurf AI |
|
||||||
| 1.8.5 | 2026-04-22 | Legacy version | Human Dev |
|
| 1.8.5 | 2026-04-22 | Legacy version | Human Dev |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**To update this file:**
|
||||||
|
|
||||||
|
1. Edit relevant sections
|
||||||
|
2. Update Change Log above
|
||||||
|
3. Bump version number in header
|
||||||
|
4. Commit: `spec(agents): bump GEMINI.md to vX.X.X - <brief description>`
|
||||||
|
|||||||
@@ -10,11 +10,11 @@
|
|||||||
## 📦 Project Memory Override
|
## 📦 Project Memory Override
|
||||||
|
|
||||||
For this repository (`E:\np-dms\lcbp3`), use project memory from:
|
For this repository (`E:\np-dms\lcbp3`), use project memory from:
|
||||||
`E:\np-dms\lcbp3\memory\agent-memory.md`
|
`E:\np-dms\lcbp3\memory\project-memory-override.md`
|
||||||
|
|
||||||
**Before using global Codex memory**, read this project memory file first when the task depends on prior repo context, conventions, decisions, or rollout history.
|
**Before using global Codex memory**, read this project memory file first when the task depends on prior repo context, conventions, decisions, or rollout history.
|
||||||
|
|
||||||
If project memory conflicts with global memory, prefer `memory/agent-memory.md` for LCBP3-specific facts.
|
If project memory conflicts with global memory, prefer `memory/project-memory-override.md` for LCBP3-specific facts.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
+27
-13
@@ -62,8 +62,8 @@ _Avoid_: Tool, LLM tool, LangChain tool
|
|||||||
_Avoid_: Rule engine, NLU pipeline
|
_Avoid_: Rule engine, NLU pipeline
|
||||||
|
|
||||||
**LLM Fallback**:
|
**LLM Fallback**:
|
||||||
ชั้นที่สอง of Intent Classifier — synchronous Ollama call (gemma4:e4b Q8_0) เมื่อ Pattern Layer ไม่ match, ใช้ semaphore max=3
|
ชั้นที่สอง of Intent Classifier — synchronous Ollama call (`np-dms-ai`) เมื่อ Pattern Layer ไม่ match, ใช้ semaphore max=3; runtime model tag เป็น ops detail ใน Modelfile เท่านั้น
|
||||||
_Avoid_: BullMQ-based classification, async intent routing
|
_Avoid_: BullMQ-based classification, async intent routing, gemma4:e4b (runtime tag ไม่ใช่ domain term)
|
||||||
|
|
||||||
### AI
|
### AI
|
||||||
|
|
||||||
@@ -92,8 +92,8 @@ Container สำเร็จรูป (FastAPI Sidecar บน Desk-5439) ทำ
|
|||||||
_Avoid_: OCR microservice (ที่ขาดการป้องกัน)
|
_Avoid_: OCR microservice (ที่ขาดการป้องกัน)
|
||||||
|
|
||||||
**Prompt Version**:
|
**Prompt Version**:
|
||||||
Immutable snapshot ของ prompt template ใน `ai_prompts` table — ทุกครั้งที่ admin กด "บันทึก" จะสร้าง version ใหม่ (version_number เพิ่มทีละ 1) version เก่ายังอยู่ใน history ลบได้ยกเว้น active version (ADR-029)
|
Immutable snapshot ของ prompt template ใน `ai_prompts` table — ทุกครั้งที่ admin กด "บันทึก" จะสร้าง version ใหม่ (version*number เพิ่มทีละ 1) version เก่ายังอยู่ใน history ลบได้ยกเว้น active version (ADR-029)
|
||||||
_Avoid_: Prompt config, Prompt setting, Editable prompt
|
\_Avoid*: Prompt config, Prompt setting, Editable prompt
|
||||||
|
|
||||||
**Active Prompt**:
|
**Active Prompt**:
|
||||||
Prompt Version ที่มี `is_active = 1` ต่อ `prompt_type` — ใช้โดยทั้ง OCR Sandbox และ `processMigrateDocument` พร้อมกัน, cached ใน Redis TTL 60s; invalidated เมื่อ admin activate version อื่น (ADR-029)
|
Prompt Version ที่มี `is_active = 1` ต่อ `prompt_type` — ใช้โดยทั้ง OCR Sandbox และ `processMigrateDocument` พร้อมกัน, cached ใน Redis TTL 60s; invalidated เมื่อ admin activate version อื่น (ADR-029)
|
||||||
@@ -107,6 +107,18 @@ _Avoid_: Prompt string, Prompt text (ambiguous)
|
|||||||
ทุก AI suggestion ต้องผ่านการ accept/reject โดย user ก่อนกลายเป็น state change — บันทึกใน `ai_audit_logs`
|
ทุก AI suggestion ต้องผ่านการ accept/reject โดย user ก่อนกลายเป็น state change — บันทึกใน `ai_audit_logs`
|
||||||
_Avoid_: Auto-apply, AI auto-execute
|
_Avoid_: Auto-apply, AI auto-execute
|
||||||
|
|
||||||
|
**Execution Profile** _(admin-facing only)_:
|
||||||
|
Policy ภายในที่ backend กำหนดให้ AI job อัตโนมัติจาก `job.type` — ไม่มี caller input; มี 4 ค่า: `interactive` (ตอบเร็ว), `standard` (ทั่วไป), `quality` (แม่นยำสูง, ภาษาไทย), `deep-analysis` (context ยาว) — admin เห็นใน audit log และ Admin Console; ค่า default ใน `docs/ai-profiles.md`, calibrate ได้ผ่าน Admin Console (ADR-029)
|
||||||
|
_Avoid_: executionProfile (API field), model selection, profile override
|
||||||
|
|
||||||
|
**Canonical Model Identity**:
|
||||||
|
ชื่อ `np-dms-ai` (LLM หลัก) และ `np-dms-ocr` (OCR) — ชื่อที่แสดงต่อทุก layer ที่มนุษย์อ่าน (API response, audit log, Admin Console) แทนชื่อ runtime จริง (เช่น `typhoon2.5-np-dms:latest`)
|
||||||
|
_Avoid_: runtime model name, model tag, Ollama model name (ใช้ใน ops เท่านั้น)
|
||||||
|
|
||||||
|
**OCR Residency**:
|
||||||
|
Policy ที่ตัดสินว่า `np-dms-ocr` จะถูก unload ออกจาก VRAM หลัง job เสร็จทันที (`keep_alive: 0`) หรือเก็บไว้ช่วงหนึ่ง (`keep_alive > 0`) — คำนวณ dynamic จาก VRAM headroom ณ ขณะนั้น; ถ้า `deep-analysis` active หรือ VRAM pressure สูง → unload ทันทีเสมอ
|
||||||
|
_Avoid_: OCR keep_alive setting, fixed keep_alive, OCR cache
|
||||||
|
|
||||||
**AI Tool Layer**:
|
**AI Tool Layer**:
|
||||||
Bridge layer ระหว่าง AI Gateway กับ business modules — dispatch โดย AI Gateway หลังได้ Server-side Intent, enforce CASL ภายใน tool เอง (ADR-025)
|
Bridge layer ระหว่าง AI Gateway กับ business modules — dispatch โดย AI Gateway หลังได้ Server-side Intent, enforce CASL ภายใน tool เอง (ADR-025)
|
||||||
_Avoid_: LLM function calling, Tool plugin, LangChain tool
|
_Avoid_: LLM function calling, Tool plugin, LangChain tool
|
||||||
@@ -140,7 +152,7 @@ _Avoid_: Throw exception from tool, Untyped error
|
|||||||
## AI authority scope (resolved)
|
## AI authority scope (resolved)
|
||||||
|
|
||||||
| Scope | Allowed? | Mechanism |
|
| Scope | Allowed? | Mechanism |
|
||||||
| :--- | :--- | :--- |
|
| :------------------------------------------------- | :------- | :-------------------------------------------------------------- |
|
||||||
| Read-only insight (summarise, explain) | ✅ | AI Gateway → service → CASL-guarded query |
|
| Read-only insight (summarise, explain) | ✅ | AI Gateway → service → CASL-guarded query |
|
||||||
| Suggest action (UI shows button) | ✅ | Response shape `{ suggestedAction, confidence, reasoning }` |
|
| Suggest action (UI shows button) | ✅ | Response shape `{ suggestedAction, confidence, reasoning }` |
|
||||||
| Auto-trigger side-effects (notify, alert, comment) | ✅ | BullMQ job (ADR-008); MUST NOT change workflow state |
|
| Auto-trigger side-effects (notify, alert, comment) | ✅ | BullMQ job (ADR-008); MUST NOT change workflow state |
|
||||||
@@ -149,7 +161,7 @@ _Avoid_: Throw exception from tool, Untyped error
|
|||||||
## Upload pipeline (resolved)
|
## Upload pipeline (resolved)
|
||||||
|
|
||||||
| Stage | Mode | Queue | Notes |
|
| Stage | Mode | Queue | Notes |
|
||||||
| :--- | :--- | :--- | :--- |
|
| :------------------------------------------------------------------- | :---- | :------------ | :------------------------------------------------------- |
|
||||||
| 1. Upload → **temp** + return `tempUploadId` | Sync | — | <1s |
|
| 1. Upload → **temp** + return `tempUploadId` | Sync | — | <1s |
|
||||||
| 2. ClamAV scan + MIME whitelist | Sync | — | block ก่อน commit (ADR-016) |
|
| 2. ClamAV scan + MIME whitelist | Sync | — | block ก่อน commit (ADR-016) |
|
||||||
| 3. User commit (metadata + ย้าย permanent) | Sync | — | สร้าง `documents` row, ใช้ `Idempotency-Key` |
|
| 3. User commit (metadata + ย้าย permanent) | Sync | — | สร้าง `documents` row, ใช้ `Idempotency-Key` |
|
||||||
@@ -168,7 +180,7 @@ _Avoid_: Throw exception from tool, Untyped error
|
|||||||
## Identifier rules (ADR-019, AI subsystem)
|
## Identifier rules (ADR-019, AI subsystem)
|
||||||
|
|
||||||
| Boundary | Identifier ที่ใช้ |
|
| Boundary | Identifier ที่ใช้ |
|
||||||
| :--- | :--- |
|
| :--------------------------------------------- | :------------------------------------------------------------------------ |
|
||||||
| API (FE ↔ AI Gateway) | `publicId` (UUIDv7 string) เท่านั้น; INT `id` มี `@Exclude()` |
|
| API (FE ↔ AI Gateway) | `publicId` (UUIDv7 string) เท่านั้น; INT `id` มี `@Exclude()` |
|
||||||
| Server-side Intent payload | `*PublicId` strings; service แปลงเป็น INT FK ภายใน |
|
| Server-side Intent payload | `*PublicId` strings; service แปลงเป็น INT FK ภายใน |
|
||||||
| LLM context (prompt) | `publicId` + business code (`rfa_number`, `drawing_code`) ห้ามเห็น INT |
|
| LLM context (prompt) | `publicId` + business code (`rfa_number`, `drawing_code`) ห้ามเห็น INT |
|
||||||
@@ -196,7 +208,7 @@ _Avoid_: Throw exception from tool, Untyped error
|
|||||||
## Glossary Updates (from ADR-034)
|
## Glossary Updates (from ADR-034)
|
||||||
|
|
||||||
| Term | Definition | Avoid |
|
| Term | Definition | Avoid |
|
||||||
|------|------------|-------|
|
| -------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
|
||||||
| **Thai-Optimized Model** | โมเดล AI ที่ถูก fine-tune มาสำหรับภาษาไทยโดยเฉพาะ (เช่น Typhoon series จาก SCB10X) | Generic model, English-only model |
|
| **Thai-Optimized Model** | โมเดล AI ที่ถูก fine-tune มาสำหรับภาษาไทยโดยเฉพาะ (เช่น Typhoon series จาก SCB10X) | Generic model, English-only model |
|
||||||
| **Model Unload/Load** | กระบวนการยกเลิกโหลดโมเดลจาก VRAM และโหลดโมเดลใหม่เข้าไปแทน เพื่อสลับการใช้งานระหว่างโมเดลต่างๆ | Model switching (ambiguous), Hot swap |
|
| **Model Unload/Load** | กระบวนการยกเลิกโหลดโมเดลจาก VRAM และโหลดโมเดลใหม่เข้าไปแทน เพื่อสลับการใช้งานระหว่างโมเดลต่างๆ | Model switching (ambiguous), Hot swap |
|
||||||
| **Cold Start Penalty** | ความล่าช้า 5-15 วินาทีที่เกิดจากการโหลดโมเดล weights เข้า VRAM หลังจากโมเดลถูก unload (keep_alive: 0) | Initial delay, First-run latency |
|
| **Cold Start Penalty** | ความล่าช้า 5-15 วินาทีที่เกิดจากการโหลดโมเดล weights เข้า VRAM หลังจากโมเดลถูก unload (keep_alive: 0) | Initial delay, First-run latency |
|
||||||
@@ -224,7 +236,7 @@ _Avoid_: Throw exception from tool, Untyped error
|
|||||||
## System readiness summary (resolved)
|
## System readiness summary (resolved)
|
||||||
|
|
||||||
| Component | สถานะ | หมายเหตุ |
|
| Component | สถานะ | หมายเหตุ |
|
||||||
| :--- | :--- | :--- |
|
| :---------------------------- | :------- | :---------------------------------------------------------------------------------------------- |
|
||||||
| **Infrastructure** | ✅ พร้อม | NestJS + Next.js + MariaDB + Redis + Elasticsearch |
|
| **Infrastructure** | ✅ พร้อม | NestJS + Next.js + MariaDB + Redis + Elasticsearch |
|
||||||
| **Workflow Engine** | ✅ พร้อม | DSL-based, ADR-001/021 |
|
| **Workflow Engine** | ✅ พร้อม | DSL-based, ADR-001/021 |
|
||||||
| **AI Boundary** | ✅ พร้อม | ADR-023A — Ollama isolation, no direct DB access |
|
| **AI Boundary** | ✅ พร้อม | ADR-023A — Ollama isolation, no direct DB access |
|
||||||
@@ -235,6 +247,7 @@ _Avoid_: Throw exception from tool, Untyped error
|
|||||||
| **AI Admin Console** | ✅ พร้อม | ADR-027 Active — แผงควบคุม Dynamic prompt & model control |
|
| **AI Admin Console** | ✅ พร้อม | ADR-027 Active — แผงควบคุม Dynamic prompt & model control |
|
||||||
| **Dynamic Prompt Mgmt** | ✅ พร้อม | ADR-029 Active — พัฒนาเสร็จสมบูรณ์ทั้ง Entity, API, Sandbox, Cache และ UI |
|
| **Dynamic Prompt Mgmt** | ✅ พร้อม | ADR-029 Active — พัฒนาเสร็จสมบูรณ์ทั้ง Entity, API, Sandbox, Cache และ UI |
|
||||||
| **Active Model & OCR Switch** | ✅ พร้อม | ADR-033 Active — สลับโมเดลแบบ Synchronous, GPU VRAM Auto-release และ API Key sidecar protection |
|
| **Active Model & OCR Switch** | ✅ พร้อม | ADR-033 Active — สลับโมเดลแบบ Synchronous, GPU VRAM Auto-release และ API Key sidecar protection |
|
||||||
|
| **AI Runtime Policy Refactor**| ✅ พร้อม | Feature-235 — `np-dms-ai`/`np-dms-ocr` canonical names, adaptive OCR residency, CPU fallback retrieval, queue policy (ai-realtime concurrency=2) |
|
||||||
|
|
||||||
## Flagged ambiguities
|
## Flagged ambiguities
|
||||||
|
|
||||||
@@ -247,7 +260,7 @@ _Avoid_: Throw exception from tool, Untyped error
|
|||||||
- **"np-dms-ai" vs `typhoon2.5-np-dms:latest`** — resolved: ถ้าเดินตาม AI refactor ใหม่ `np-dms-ai` คือ **Canonical AI Model Identity** ใหม่ของระบบ ไม่ใช่แค่ deploy alias
|
- **"np-dms-ai" vs `typhoon2.5-np-dms:latest`** — resolved: ถ้าเดินตาม AI refactor ใหม่ `np-dms-ai` คือ **Canonical AI Model Identity** ใหม่ของระบบ ไม่ใช่แค่ deploy alias
|
||||||
- **"OCR keep_alive"** — resolved: policy ใหม่ควรถูกอธิบายเป็น **Adaptive OCR Residency** ตาม VRAM headroom และ active model ไม่ใช่ fixed `0` หรือ fixed `300`
|
- **"OCR keep_alive"** — resolved: policy ใหม่ควรถูกอธิบายเป็น **Adaptive OCR Residency** ตาม VRAM headroom และ active model ไม่ใช่ fixed `0` หรือ fixed `300`
|
||||||
- **"`model.key` ใน API job request"** — resolved: caller ไม่ควรเลือกชื่อโมเดลตรง ๆ; ควรส่ง **Execution Profile** แล้วให้ backend policy เป็นคน map ไป model/parameters ที่อนุญาต
|
- **"`model.key` ใน API job request"** — resolved: caller ไม่ควรเลือกชื่อโมเดลตรง ๆ; ควรส่ง **Execution Profile** แล้วให้ backend policy เป็นคน map ไป model/parameters ที่อนุญาต
|
||||||
- **"profile names"** — resolved: ใช้ **Canonical Profile Set** แบบเล็กและเสถียร (`fast`, `balanced`, `thai-accurate`, `large-context`) แทนการแตกชื่อ profile ตาม job ภายใน
|
- **"profile names"** — resolved: ใช้ **Canonical Profile Set** แบบเล็กและเสถียร (`interactive`, `standard`, `quality`, `deep-analysis`) แทนการแตกชื่อ profile ตาม job ภายใน
|
||||||
- **"profile สำหรับ migrate-document / auto-fill-document / OCR extraction"** — resolved: ใช้ **Policy-Enforced Profile Override**; backend บังคับ profile เองสำหรับงานที่มีผลต่อข้อมูล ไม่เปิดให้ caller เลือกคุณภาพอย่างอิสระ
|
- **"profile สำหรับ migrate-document / auto-fill-document / OCR extraction"** — resolved: ใช้ **Policy-Enforced Profile Override**; backend บังคับ profile เองสำหรับงานที่มีผลต่อข้อมูล ไม่เปิดให้ caller เลือกคุณภาพอย่างอิสระ
|
||||||
- **"BGE-M3 / Reranker บน GPU"** — resolved: ถ้าย้ายขึ้น GPU ต้องอยู่ใต้ **LLM-First GPU Ownership**; LLM/OCR มี priority สูงกว่า retrieval path เสมอ
|
- **"BGE-M3 / Reranker บน GPU"** — resolved: ถ้าย้ายขึ้น GPU ต้องอยู่ใต้ **LLM-First GPU Ownership**; LLM/OCR มี priority สูงกว่า retrieval path เสมอ
|
||||||
- **"embed/rerank ตอน VRAM ไม่พอ"** — resolved: ใช้ **CPU Fallback Retrieval**; retrieval path ต้อง degrade ไป CPU ทันที ไม่รอ GPU queue
|
- **"embed/rerank ตอน VRAM ไม่พอ"** — resolved: ใช้ **CPU Fallback Retrieval**; retrieval path ต้อง degrade ไป CPU ทันที ไม่รอ GPU queue
|
||||||
@@ -266,7 +279,7 @@ _Avoid_: Throw exception from tool, Untyped error
|
|||||||
## ADRs ที่เกี่ยวข้องกับ AI Runtime Layer
|
## ADRs ที่เกี่ยวข้องกับ AI Runtime Layer
|
||||||
|
|
||||||
| ADR | หัวข้อ | ตัดสินใจอะไร | สถานะ |
|
| ADR | หัวข้อ | ตัดสินใจอะไร | สถานะ |
|
||||||
| :--- | :--- | :--- | :--- |
|
| :------ | :--------------------------------- | :-------------------------------------------------------------------------- | :---------- |
|
||||||
| ADR-024 | Intent Classification Strategy | Hybrid: Pattern First → LLM Fallback | ✅ Accepted |
|
| ADR-024 | Intent Classification Strategy | Hybrid: Pattern First → LLM Fallback | ✅ Accepted |
|
||||||
| ADR-025 | AI Tool Layer Architecture | Bridge pattern, CASL enforcement, response shape | ✅ Accepted |
|
| ADR-025 | AI Tool Layer Architecture | Bridge pattern, CASL enforcement, response shape | ✅ Accepted |
|
||||||
| ADR-026 | Document Chat UI Pattern | Side-panel vs modal vs separate page | ✅ Accepted |
|
| ADR-026 | Document Chat UI Pattern | Side-panel vs modal vs separate page | ✅ Accepted |
|
||||||
@@ -275,10 +288,11 @@ _Avoid_: Throw exception from tool, Untyped error
|
|||||||
| ADR-029 | Dynamic Prompt Management | `ai_prompts` table, versioned OCR extraction prompt | ✅ Active |
|
| ADR-029 | Dynamic Prompt Management | `ai_prompts` table, versioned OCR extraction prompt | ✅ Active |
|
||||||
| ADR-032 | Typhoon OCR Integration | Typhoon OCR-3B + typhoon2.1-gemma3-4b on Admin Desktop | ✅ Active |
|
| ADR-032 | Typhoon OCR Integration | Typhoon OCR-3B + typhoon2.1-gemma3-4b on Admin Desktop | ✅ Active |
|
||||||
| ADR-033 | Active Model & OCR Management | Synchronous Model switch, GPU VRAM Auto-release, Sidecar API Key protection | ✅ Active |
|
| ADR-033 | Active Model & OCR Management | Synchronous Model switch, GPU VRAM Auto-release, Sidecar API Key protection | ✅ Active |
|
||||||
|
| ADR-034 | Thai Model Stack | typhoon2.5-np-dms:latest (Main) + typhoon-np-dms-ocr:latest (OCR, keep_alive:0) | ✅ Active |
|
||||||
|
|
||||||
**หมายเหตุ**: ADR-023A ยังคงเป็น canonical สำหรับ infrastructure — ADR-024/025/026/027 เพิ่ม runtime layer; ADR-028 ปรับ Migration Pipeline; ADR-033 จัดระบบโมเดลและ OCR
|
**หมายเหตุ**: ADR-023A ยังคงเป็น canonical สำหรับ infrastructure — ADR-024/025/026/027 เพิ่ม runtime layer; ADR-028 ปรับ Migration Pipeline; ADR-033 จัดระบบโมเดลและ OCR
|
||||||
|
|
||||||
## สิ่งที่ควรทำในอนาคต (Future Maintenance & Security Tasks)
|
## สิ่งที่ควรทำในอนาคต (Future Maintenance & Security Tasks)
|
||||||
|
|
||||||
* **Axios Dependency**: ได้รับการอัปเกรด dependencies เป็นรุ่นปลอดภัยล่าสุดและแก้ไขช่องโหว่ Prototype Pollution เรียบร้อยแล้ว (pnpm audit CLEAN 100%)
|
- **Axios Dependency**: ได้รับการอัปเกรด dependencies เป็นรุ่นปลอดภัยล่าสุดและแก้ไขช่องโหว่ Prototype Pollution เรียบร้อยแล้ว (pnpm audit CLEAN 100%)
|
||||||
* **ความปลอดภัยของ Sidecar และ GPU**: นำระบบ API Key Header verification (`X-API-Key`) และกลไก Unload model (`keep_alive: 0`) มาประยุกต์ใช้อย่างสมบูรณ์บนเครื่องประมวลผลโลคัล Desk-5439
|
- **ความปลอดภัยของ Sidecar และ GPU**: นำระบบ API Key Header verification (`X-API-Key`) และกลไก Unload model (`keep_alive: 0`) มาประยุกต์ใช้อย่างสมบูรณ์บนเครื่องประมวลผลโลคัล Desk-5439
|
||||||
|
|||||||
@@ -57,6 +57,12 @@ OLLAMA_EMBED_MODEL=nomic-embed-text
|
|||||||
OLLAMA_RAG_MODEL=typhoon2.5-np-dms:latest
|
OLLAMA_RAG_MODEL=typhoon2.5-np-dms:latest
|
||||||
OLLAMA_URL=http://192.168.10.8:11434
|
OLLAMA_URL=http://192.168.10.8:11434
|
||||||
|
|
||||||
|
# VRAM, Residency & Concurrency settings (Feature-235 AI Runtime Policy)
|
||||||
|
AI_VRAM_HEADROOM_THRESHOLD_MB=3000
|
||||||
|
AI_GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB=12000
|
||||||
|
AI_OCR_RESIDENCY_WINDOW_SECONDS=120
|
||||||
|
AI_REALTIME_CONCURRENCY=2
|
||||||
|
|
||||||
# Qdrant (ADR-023A)
|
# Qdrant (ADR-023A)
|
||||||
QDRANT_HOST=http://192.168.10.8:6333
|
QDRANT_HOST=http://192.168.10.8:6333
|
||||||
QDRANT_COLLECTION=lcbp3_documents
|
QDRANT_COLLECTION=lcbp3_documents
|
||||||
|
|||||||
@@ -19,14 +19,7 @@ export default tseslint.config(
|
|||||||
},
|
},
|
||||||
sourceType: 'commonjs',
|
sourceType: 'commonjs',
|
||||||
parserOptions: {
|
parserOptions: {
|
||||||
projectService: {
|
project: ['./tsconfig.eslint.json'],
|
||||||
allowDefaultProject: [
|
|
||||||
'jest.config.js',
|
|
||||||
'*.config.mjs',
|
|
||||||
'scratch/*.ts',
|
|
||||||
'test/*.ts',
|
|
||||||
],
|
|
||||||
},
|
|
||||||
tsconfigRootDir: import.meta.dirname,
|
tsconfigRootDir: import.meta.dirname,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -67,7 +67,7 @@
|
|||||||
"fs-extra": "^11.3.2",
|
"fs-extra": "^11.3.2",
|
||||||
"helmet": "^8.1.0",
|
"helmet": "^8.1.0",
|
||||||
"ioredis": "^5.8.2",
|
"ioredis": "^5.8.2",
|
||||||
"joi": "^18.0.1",
|
"joi": "^18.2.1",
|
||||||
"ms": "^2.1.3",
|
"ms": "^2.1.3",
|
||||||
"multer": "^2.0.2",
|
"multer": "^2.0.2",
|
||||||
"mysql2": "^3.15.3",
|
"mysql2": "^3.15.3",
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
// Change Log:
|
// Change Log:
|
||||||
// - 2026-05-13: Add BullMQ config registry for reminder and distribution queues.
|
// - 2026-05-13: Add BullMQ config registry for reminder and distribution queues.
|
||||||
// - 2026-05-15: เพิ่ม config สำหรับ ai-realtime และ ai-batch ตาม ADR-023A.
|
// - 2026-05-15: เพิ่ม config สำหรับ ai-realtime และ ai-batch ตาม ADR-023A.
|
||||||
|
// - 2026-06-11: ปรับ aiRealtimeQueue.concurrency ให้รองรับ AI_REALTIME_CONCURRENCY / REALTIME_CONCURRENCY
|
||||||
|
|
||||||
import { registerAs } from '@nestjs/config';
|
import { registerAs } from '@nestjs/config';
|
||||||
|
|
||||||
@@ -12,7 +13,11 @@ export default registerAs('bullmq', () => ({
|
|||||||
process.env.BULLMQ_DISTRIBUTION_QUEUE || 'rfa-distribution',
|
process.env.BULLMQ_DISTRIBUTION_QUEUE || 'rfa-distribution',
|
||||||
aiRealtimeQueue: {
|
aiRealtimeQueue: {
|
||||||
name: process.env.BULLMQ_AI_REALTIME_QUEUE || 'ai-realtime',
|
name: process.env.BULLMQ_AI_REALTIME_QUEUE || 'ai-realtime',
|
||||||
concurrency: 1,
|
concurrency: Number(
|
||||||
|
process.env.AI_REALTIME_CONCURRENCY ||
|
||||||
|
process.env.REALTIME_CONCURRENCY ||
|
||||||
|
'2'
|
||||||
|
),
|
||||||
defaultJobOptions: {
|
defaultJobOptions: {
|
||||||
attempts: 3,
|
attempts: 3,
|
||||||
backoff: { type: 'exponential', delay: 2000 },
|
backoff: { type: 'exponential', delay: 2000 },
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// File: src/modules/ai/ai.controller.ts
|
// File: backend/src/modules/ai/ai.controller.ts
|
||||||
// Change Log
|
// Change Log
|
||||||
// - 2026-05-14: เพิ่ม Legacy Migration staging endpoints ตาม ADR-023.
|
// - 2026-05-14: เพิ่ม Legacy Migration staging endpoints ตาม ADR-023.
|
||||||
// - 2026-05-14: ย้าย DeleteAuditLogsQueryDto ไป dto/ folder; ลบ authHeader passthrough (🟢 LOW-1/LOW-2).
|
// - 2026-05-14: ย้าย DeleteAuditLogsQueryDto ไป dto/ folder; ลบ authHeader passthrough (🟢 LOW-1/LOW-2).
|
||||||
@@ -13,6 +13,7 @@
|
|||||||
// - 2026-06-01: [BUGFIX] submitSandboxOcr: เพิ่ม @ApiBearerAuth(), @HttpCode(ACCEPTED), Body({ engineType }) และส่ง engineType ไปยัง enqueueSandboxJob
|
// - 2026-06-01: [BUGFIX] submitSandboxOcr: เพิ่ม @ApiBearerAuth(), @HttpCode(ACCEPTED), Body({ engineType }) และส่ง engineType ไปยัง enqueueSandboxJob
|
||||||
// - 2026-06-02: เพิ่ม REST endpoints GET /ai/ocr-engines และ POST /ai/ocr-engines/:engineId/select (T003, T004, ADR-033) และนำเข้า SystemException เพื่อป้องกันความเสียหายในการคอมไพล์
|
// - 2026-06-02: เพิ่ม REST endpoints GET /ai/ocr-engines และ POST /ai/ocr-engines/:engineId/select (T003, T004, ADR-033) และนำเข้า SystemException เพื่อป้องกันความเสียหายในการคอมไพล์
|
||||||
// - 2026-06-06: [BUGFIX] เพิ่ม @Throttle({ default: { limit: 300, ttl: 60000 } }) บน GET admin/sandbox/job/:id เพื่อแก้ ThrottlerException spam จาก frontend polling
|
// - 2026-06-06: [BUGFIX] เพิ่ม @Throttle({ default: { limit: 300, ttl: 60000 } }) บน GET admin/sandbox/job/:id เพื่อแก้ ThrottlerException spam จาก frontend polling
|
||||||
|
// - 2026-06-11: แก้ไขการส่งพารามิเตอร์ให้กับ queueSuggestJob ใน suggestDocumentMetadata
|
||||||
// Controller สำหรับ AI Gateway Endpoints (ADR-023)
|
// Controller สำหรับ AI Gateway Endpoints (ADR-023)
|
||||||
|
|
||||||
import {
|
import {
|
||||||
@@ -62,7 +63,7 @@ import { AiRagQueryDto } from './dto/ai-rag-query.dto';
|
|||||||
import { ExtractDocumentDto } from './dto/extract-document.dto';
|
import { ExtractDocumentDto } from './dto/extract-document.dto';
|
||||||
import { AiCallbackDto } from './dto/ai-callback.dto';
|
import { AiCallbackDto } from './dto/ai-callback.dto';
|
||||||
import { CreateAiJobDto } from './dto/create-ai-job.dto';
|
import { CreateAiJobDto } from './dto/create-ai-job.dto';
|
||||||
import { SubmitAiJobDto } from './dto/submit-ai-job.dto';
|
import { AiJobResponseDto } from './dto/ai-job-response.dto';
|
||||||
import { MigrationUpdateDto } from './dto/migration-update.dto';
|
import { MigrationUpdateDto } from './dto/migration-update.dto';
|
||||||
import { MigrationQueryDto } from './dto/migration-query.dto';
|
import { MigrationQueryDto } from './dto/migration-query.dto';
|
||||||
import { ValidationException, SystemException } from '../../common/exceptions';
|
import { ValidationException, SystemException } from '../../common/exceptions';
|
||||||
@@ -171,11 +172,7 @@ export class AiController {
|
|||||||
@Body() dto: CreateAiJobDto,
|
@Body() dto: CreateAiJobDto,
|
||||||
@Headers('idempotency-key') idempotencyKey: string
|
@Headers('idempotency-key') idempotencyKey: string
|
||||||
): Promise<{ success: boolean; jobId?: string; status: string }> {
|
): Promise<{ success: boolean; jobId?: string; status: string }> {
|
||||||
const result = await this.aiService.queueSuggestJob({
|
const result = await this.aiService.queueSuggestJob(dto, idempotencyKey);
|
||||||
...dto,
|
|
||||||
jobType: 'ai-suggest',
|
|
||||||
idempotencyKey: idempotencyKey || dto.idempotencyKey,
|
|
||||||
});
|
|
||||||
return {
|
return {
|
||||||
success: result.success,
|
success: result.success,
|
||||||
jobId: result.jobId,
|
jobId: result.jobId,
|
||||||
@@ -199,25 +196,25 @@ export class AiController {
|
|||||||
@UseGuards(JwtAuthGuard, AiEnabledGuard, RbacGuard)
|
@UseGuards(JwtAuthGuard, AiEnabledGuard, RbacGuard)
|
||||||
@ApiBearerAuth()
|
@ApiBearerAuth()
|
||||||
@RequirePermission('ai.suggest')
|
@RequirePermission('ai.suggest')
|
||||||
@HttpCode(HttpStatus.ACCEPTED)
|
@HttpCode(HttpStatus.CREATED)
|
||||||
@ApiOperation({
|
@ApiOperation({
|
||||||
summary: 'Submit AI migration job — ส่งงานย้ายเอกสารให้ AI ประมวลผล',
|
summary: 'Submit unified AI job — ส่งงานประมวลผล AI แบบรวมศูนย์',
|
||||||
description:
|
description:
|
||||||
'รับ tempAttachmentId/documentNumber แล้วส่งงานย้ายเอกสารเข้า BullMQ เพื่อรอการประมวลผล',
|
'รับชนิดงานและข้อมูลอ้างอิง เพื่อส่งงานประมวลผล AI เข้าคิว BullMQ',
|
||||||
})
|
})
|
||||||
@ApiHeader({
|
@ApiHeader({
|
||||||
name: 'Idempotency-Key',
|
name: 'Idempotency-Key',
|
||||||
description: 'Unique key เพื่อป้องกัน duplicate AI job',
|
description: 'Unique key เพื่อป้องกัน duplicate AI job',
|
||||||
required: true,
|
required: true,
|
||||||
})
|
})
|
||||||
async submitMigrationJob(
|
async submitUnifiedJob(
|
||||||
@Body() dto: SubmitAiJobDto,
|
@Body() dto: CreateAiJobDto,
|
||||||
@Headers('idempotency-key') idempotencyKey: string
|
@Headers('idempotency-key') idempotencyKey: string
|
||||||
) {
|
): Promise<AiJobResponseDto> {
|
||||||
if (!idempotencyKey) {
|
if (!idempotencyKey) {
|
||||||
throw new ValidationException('Idempotency-Key header is required');
|
throw new ValidationException('Idempotency-Key header is required');
|
||||||
}
|
}
|
||||||
return this.aiService.submitMigrationJob(dto, idempotencyKey);
|
return this.aiService.submitUnifiedJob(dto, idempotencyKey);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Get('jobs/:jobId')
|
@Get('jobs/:jobId')
|
||||||
|
|||||||
@@ -36,12 +36,14 @@ import { SandboxOcrEngineService } from './services/sandbox-ocr-engine.service';
|
|||||||
import { EmbeddingService } from './services/embedding.service';
|
import { EmbeddingService } from './services/embedding.service';
|
||||||
import { VramMonitorService } from './services/vram-monitor.service';
|
import { VramMonitorService } from './services/vram-monitor.service';
|
||||||
import { OcrCacheService } from './services/ocr-cache.service';
|
import { OcrCacheService } from './services/ocr-cache.service';
|
||||||
|
import { AiPolicyService } from './services/ai-policy.service';
|
||||||
import { MigrationLog } from './entities/migration-log.entity';
|
import { MigrationLog } from './entities/migration-log.entity';
|
||||||
import { AiAuditLog } from './entities/ai-audit-log.entity';
|
import { AiAuditLog } from './entities/ai-audit-log.entity';
|
||||||
import { MigrationReviewRecord } from './entities/migration-review.entity';
|
import { MigrationReviewRecord } from './entities/migration-review.entity';
|
||||||
import { MigrationProgress } from './entities/migration-progress.entity';
|
import { MigrationProgress } from './entities/migration-progress.entity';
|
||||||
import { SystemSetting } from './entities/system-setting.entity';
|
import { SystemSetting } from './entities/system-setting.entity';
|
||||||
import { AiAvailableModel } from './entities/ai-available-model.entity';
|
import { AiAvailableModel } from './entities/ai-available-model.entity';
|
||||||
|
import { AiExecutionProfile } from './entities/ai-execution-profile.entity';
|
||||||
import { AiMigrationCheckpointService } from './ai-migration-checkpoint.service';
|
import { AiMigrationCheckpointService } from './ai-migration-checkpoint.service';
|
||||||
import { AiEnabledGuard } from './guards/ai-enabled.guard';
|
import { AiEnabledGuard } from './guards/ai-enabled.guard';
|
||||||
import { UserModule } from '../user/user.module';
|
import { UserModule } from '../user/user.module';
|
||||||
@@ -96,6 +98,7 @@ import {
|
|||||||
ImportTransaction,
|
ImportTransaction,
|
||||||
MigrationReviewQueue,
|
MigrationReviewQueue,
|
||||||
AiPrompt,
|
AiPrompt,
|
||||||
|
AiExecutionProfile,
|
||||||
]),
|
]),
|
||||||
|
|
||||||
BullModule.registerQueue(
|
BullModule.registerQueue(
|
||||||
@@ -171,6 +174,7 @@ import {
|
|||||||
providers: [
|
providers: [
|
||||||
AiService,
|
AiService,
|
||||||
AiSettingsService,
|
AiSettingsService,
|
||||||
|
AiPolicyService,
|
||||||
AiIngestService,
|
AiIngestService,
|
||||||
AiMigrationCheckpointService,
|
AiMigrationCheckpointService,
|
||||||
AiQueueService,
|
AiQueueService,
|
||||||
@@ -201,6 +205,7 @@ import {
|
|||||||
exports: [
|
exports: [
|
||||||
AiService,
|
AiService,
|
||||||
AiSettingsService,
|
AiSettingsService,
|
||||||
|
AiPolicyService,
|
||||||
AiIngestService,
|
AiIngestService,
|
||||||
AiMigrationCheckpointService,
|
AiMigrationCheckpointService,
|
||||||
AiQueueService,
|
AiQueueService,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
// Unit Tests สำหรับ AiService — ทดสอบ Business Logic สำคัญ: Callback, Update, Status Transitions
|
// Unit Tests สำหรับ AiService — ทดสอบ Business Logic สำคัญ: Callback, Update, Status Transitions
|
||||||
// Change Log
|
// Change Log
|
||||||
// - 2026-05-21: เพิ่ม unit tests สำหรับ getSystemHealth (T026) ทั้งกรณี cache hit/miss และ queue metrics.
|
// - 2026-05-21: เพิ่ม unit tests สำหรับ getSystemHealth (T026) ทั้งกรณี cache hit/miss และ queue metrics.
|
||||||
|
// - 2026-06-11: เพิ่ม mock สำหรับ AiPolicyService เพื่อแก้ไข test regression
|
||||||
|
|
||||||
import { Test, TestingModule } from '@nestjs/testing';
|
import { Test, TestingModule } from '@nestjs/testing';
|
||||||
import { getRepositoryToken } from '@nestjs/typeorm';
|
import { getRepositoryToken } from '@nestjs/typeorm';
|
||||||
@@ -17,7 +18,11 @@ import {
|
|||||||
import { AiAuditLog, AiAuditStatus } from './entities/ai-audit-log.entity';
|
import { AiAuditLog, AiAuditStatus } from './entities/ai-audit-log.entity';
|
||||||
import { AiCallbackDto } from './dto/ai-callback.dto';
|
import { AiCallbackDto } from './dto/ai-callback.dto';
|
||||||
import { MigrationUpdateDto } from './dto/migration-update.dto';
|
import { MigrationUpdateDto } from './dto/migration-update.dto';
|
||||||
import { NotFoundException, BusinessException } from '../../common/exceptions';
|
import {
|
||||||
|
NotFoundException,
|
||||||
|
BusinessException,
|
||||||
|
ValidationException,
|
||||||
|
} from '../../common/exceptions';
|
||||||
import { AuditLog } from '../../common/entities/audit-log.entity';
|
import { AuditLog } from '../../common/entities/audit-log.entity';
|
||||||
import {
|
import {
|
||||||
QUEUE_AI_BATCH,
|
QUEUE_AI_BATCH,
|
||||||
@@ -28,6 +33,9 @@ import { AiQdrantService } from './qdrant.service';
|
|||||||
import { ImportTransaction } from '../migration/entities/import-transaction.entity';
|
import { ImportTransaction } from '../migration/entities/import-transaction.entity';
|
||||||
import { AiSettingsService } from './ai-settings.service';
|
import { AiSettingsService } from './ai-settings.service';
|
||||||
import { VramMonitorService } from './services/vram-monitor.service';
|
import { VramMonitorService } from './services/vram-monitor.service';
|
||||||
|
import { AiPolicyService } from './services/ai-policy.service';
|
||||||
|
import { Attachment } from '../../common/file-storage/entities/attachment.entity';
|
||||||
|
import { Project } from '../project/entities/project.entity';
|
||||||
|
|
||||||
const DEFAULT_REDIS_TOKEN = 'default_IORedisModuleConnectionToken';
|
const DEFAULT_REDIS_TOKEN = 'default_IORedisModuleConnectionToken';
|
||||||
|
|
||||||
@@ -110,6 +118,44 @@ describe('AiService', () => {
|
|||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Mock AiPolicyService
|
||||||
|
const mockAiPolicyService = {
|
||||||
|
getCanonicalModelName: jest.fn().mockImplementation((name: string) => {
|
||||||
|
if (name.includes('ocr')) return 'np-dms-ocr';
|
||||||
|
return 'np-dms-ai';
|
||||||
|
}),
|
||||||
|
getProfileForJobType: jest.fn().mockReturnValue('standard'),
|
||||||
|
getProfileParameters: jest.fn().mockResolvedValue({
|
||||||
|
canonicalModel: 'np-dms-ai',
|
||||||
|
temperature: 0.5,
|
||||||
|
topP: 0.8,
|
||||||
|
maxTokens: 4096,
|
||||||
|
numCtx: 8192,
|
||||||
|
repeatPenalty: 1.15,
|
||||||
|
keepAliveSeconds: 600,
|
||||||
|
}),
|
||||||
|
createJobPayload: jest
|
||||||
|
.fn()
|
||||||
|
.mockImplementation(async (jobType, docId, attachId) => {
|
||||||
|
await Promise.resolve();
|
||||||
|
return {
|
||||||
|
jobType,
|
||||||
|
documentPublicId: docId,
|
||||||
|
attachmentPublicId: attachId,
|
||||||
|
effectiveProfile: 'standard',
|
||||||
|
canonicalModel: 'np-dms-ai',
|
||||||
|
snapshotParams: {
|
||||||
|
temperature: 0.5,
|
||||||
|
topP: 0.8,
|
||||||
|
maxTokens: 4096,
|
||||||
|
numCtx: 8192,
|
||||||
|
repeatPenalty: 1.15,
|
||||||
|
keepAliveSeconds: 600,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
const mockRedis = {
|
const mockRedis = {
|
||||||
get: jest.fn(),
|
get: jest.fn(),
|
||||||
set: jest.fn(),
|
set: jest.fn(),
|
||||||
@@ -191,6 +237,7 @@ describe('AiService', () => {
|
|||||||
{ provide: AiQdrantService, useValue: mockQdrantService },
|
{ provide: AiQdrantService, useValue: mockQdrantService },
|
||||||
{ provide: AiSettingsService, useValue: mockAiSettingsService },
|
{ provide: AiSettingsService, useValue: mockAiSettingsService },
|
||||||
{ provide: VramMonitorService, useValue: mockVramMonitorService },
|
{ provide: VramMonitorService, useValue: mockVramMonitorService },
|
||||||
|
{ provide: AiPolicyService, useValue: mockAiPolicyService },
|
||||||
{ provide: DEFAULT_REDIS_TOKEN, useValue: mockRedis },
|
{ provide: DEFAULT_REDIS_TOKEN, useValue: mockRedis },
|
||||||
],
|
],
|
||||||
}).compile();
|
}).compile();
|
||||||
@@ -241,6 +288,90 @@ describe('AiService', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('submitUnifiedJob', () => {
|
||||||
|
it('ไม่ควรบันทึก ai_audit_logs เป็น SUCCESS ตั้งแต่ตอน enqueue', async () => {
|
||||||
|
mockImportTransactionRepo.manager.findOne.mockResolvedValueOnce({
|
||||||
|
publicId: '019505a1-7c3e-7000-8000-abc123def777',
|
||||||
|
});
|
||||||
|
mockQueue.getJob.mockResolvedValue(null);
|
||||||
|
mockQueue.add.mockResolvedValue({ id: 'job-enqueued' });
|
||||||
|
const result = await service.submitUnifiedJob(
|
||||||
|
{
|
||||||
|
type: 'rag-query',
|
||||||
|
projectPublicId: '019505a1-7c3e-7000-8000-abc123def777',
|
||||||
|
payload: { query: 'test' },
|
||||||
|
},
|
||||||
|
'job-enqueued'
|
||||||
|
);
|
||||||
|
expect(result).toEqual({
|
||||||
|
jobId: 'job-enqueued',
|
||||||
|
status: 'queued',
|
||||||
|
modelUsed: 'np-dms-ai',
|
||||||
|
effectiveProfile: 'standard',
|
||||||
|
queueName: 'ai-batch',
|
||||||
|
});
|
||||||
|
expect(mockAuditLogRepo.save).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควร reject rag-query ที่ไม่มี payload.query', async () => {
|
||||||
|
await expect(
|
||||||
|
service.submitUnifiedJob(
|
||||||
|
{
|
||||||
|
type: 'rag-query',
|
||||||
|
projectPublicId: '019505a1-7c3e-7000-8000-abc123def777',
|
||||||
|
payload: {},
|
||||||
|
},
|
||||||
|
'job-no-query'
|
||||||
|
)
|
||||||
|
).rejects.toBeInstanceOf(ValidationException);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควร reject projectPublicId ที่ไม่พบในระบบด้วย 422', async () => {
|
||||||
|
mockImportTransactionRepo.manager.findOne.mockResolvedValueOnce(null);
|
||||||
|
await expect(
|
||||||
|
service.submitUnifiedJob(
|
||||||
|
{
|
||||||
|
type: 'rag-query',
|
||||||
|
projectPublicId: '019505a1-7c3e-7000-8000-abc123def777',
|
||||||
|
payload: { query: 'test' },
|
||||||
|
},
|
||||||
|
'job-missing-project'
|
||||||
|
)
|
||||||
|
).rejects.toBeInstanceOf(BusinessException);
|
||||||
|
expect(mockImportTransactionRepo.manager.findOne).toHaveBeenCalledWith(
|
||||||
|
Project,
|
||||||
|
{
|
||||||
|
where: { publicId: '019505a1-7c3e-7000-8000-abc123def777' },
|
||||||
|
}
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควร reject attachment reference ที่ไม่พบในระบบด้วย 422', async () => {
|
||||||
|
mockImportTransactionRepo.manager.findOne
|
||||||
|
.mockResolvedValueOnce({
|
||||||
|
publicId: '019505a1-7c3e-7000-8000-abc123def777',
|
||||||
|
})
|
||||||
|
.mockResolvedValueOnce(null);
|
||||||
|
await expect(
|
||||||
|
service.submitUnifiedJob(
|
||||||
|
{
|
||||||
|
type: 'rag-query',
|
||||||
|
projectPublicId: '019505a1-7c3e-7000-8000-abc123def777',
|
||||||
|
documentPublicId: '019505a1-7c3e-7000-8000-abc123def456',
|
||||||
|
payload: { query: 'test' },
|
||||||
|
},
|
||||||
|
'job-missing-attachment'
|
||||||
|
)
|
||||||
|
).rejects.toBeInstanceOf(BusinessException);
|
||||||
|
expect(mockImportTransactionRepo.manager.findOne).toHaveBeenCalledWith(
|
||||||
|
Attachment,
|
||||||
|
{
|
||||||
|
where: { publicId: '019505a1-7c3e-7000-8000-abc123def456' },
|
||||||
|
}
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// --- handleWebhookCallback ---
|
// --- handleWebhookCallback ---
|
||||||
|
|
||||||
describe('handleWebhookCallback', () => {
|
describe('handleWebhookCallback', () => {
|
||||||
|
|||||||
@@ -1,11 +1,14 @@
|
|||||||
// File: src/modules/ai/ai.service.ts
|
// File: backend/src/modules/ai/ai.service.ts
|
||||||
// Service หลักของ AI Gateway — เชื่อมต่อระหว่าง DMS กับ n8n/Ollama Pipeline (ADR-018, ADR-020)
|
// Service หลักของ AI Gateway — เชื่อมต่อระหว่าง DMS กับ n8n/Ollama Pipeline (ADR-018, ADR-020)
|
||||||
// Change Log
|
// Change Log
|
||||||
// - 2026-05-21: เพิ่ม getSystemHealth พร้อมระบบแคช Redis 30 วินาทีตาม ADR-027.
|
// - 2026-05-21: เพิ่ม getSystemHealth พร้อมระบบแคช Redis 30 วินาทีตาม ADR-027.
|
||||||
// - 2026-05-21: แก้ไข ESLint unsafe return error ใน getSystemHealth โดยใช้ interface SystemHealthResponse
|
// - 2026-05-21: แก้ไข ESLint unsafe return error ใน getSystemHealth โดยใช้ interface SystemHealthResponse
|
||||||
// - 2026-05-29: เพิ่ม OcrService.checkHealth() เข้า getSystemHealth() เพื่อแสดงสถานะ OCR sidecar
|
// - 2026-05-29: เพิ่ม OcrService.checkHealth() เข้า getSystemHealth() เพื่อแสดงสถานะ OCR sidecar
|
||||||
// - 2026-06-02: ปรับปรุง activateAiModel ให้มีการโหลดและยืนยันโมเดลล่วงหน้าแบบ Synchronous (T008, ADR-033) และล้างโมเดลตัวเก่าออกเพื่อประหยัด VRAM (Suggestion 1)
|
// - 2026-06-02: ปรับปรุง activateAiModel ให้มีการโหลดและยืนยันโมเดลล่วงหน้าแบบ Synchronous (T008, ADR-033) และล้างโมเดลตัวเก่าออกเพื่อประหยัด VRAM (Suggestion 1)
|
||||||
// - 2026-06-03: ADR-034 — เพิ่ม activeModels field (เอา mainModel+ocrModel) ใน SystemHealthResponse
|
// - 2026-06-03: ADR-034 — เพิ่ม active models ใน SystemHealthResponse
|
||||||
|
// - 2026-06-11: US2 - เพิ่มการผูก execution profile ใน submitMigrationJob ของ ai.service.ts
|
||||||
|
// - 2026-06-11: US4 - เพิ่ม explicit assertion สำหรับการ dispatch RAG query ไปยัง ai-batch queue
|
||||||
|
// - 2026-06-11: แก้ไข compile errors (SystemException arguments, idempotencyKey signature, type mapping) และลบบรรทัดว่างในฟังก์ชันที่แก้ไข
|
||||||
import { Injectable, Logger, Optional } from '@nestjs/common';
|
import { Injectable, Logger, Optional } from '@nestjs/common';
|
||||||
import { ConfigService } from '@nestjs/config';
|
import { ConfigService } from '@nestjs/config';
|
||||||
import { HttpService } from '@nestjs/axios';
|
import { HttpService } from '@nestjs/axios';
|
||||||
@@ -37,8 +40,11 @@ import { MigrationQueryDto } from './dto/migration-query.dto';
|
|||||||
import { AiValidationService } from './ai-validation.service';
|
import { AiValidationService } from './ai-validation.service';
|
||||||
import { CreateAiJobDto } from './dto/create-ai-job.dto';
|
import { CreateAiJobDto } from './dto/create-ai-job.dto';
|
||||||
import { SubmitAiJobDto } from './dto/submit-ai-job.dto';
|
import { SubmitAiJobDto } from './dto/submit-ai-job.dto';
|
||||||
|
import { AiJobResponseDto } from './dto/ai-job-response.dto';
|
||||||
|
import { AiPolicyService } from './services/ai-policy.service';
|
||||||
import { ImportTransaction } from '../migration/entities/import-transaction.entity';
|
import { ImportTransaction } from '../migration/entities/import-transaction.entity';
|
||||||
import { Project } from '../project/entities/project.entity';
|
import { Project } from '../project/entities/project.entity';
|
||||||
|
import { Attachment } from '../../common/file-storage/entities/attachment.entity';
|
||||||
import {
|
import {
|
||||||
QUEUE_AI_BATCH,
|
QUEUE_AI_BATCH,
|
||||||
QUEUE_AI_REALTIME,
|
QUEUE_AI_REALTIME,
|
||||||
@@ -52,6 +58,7 @@ import {
|
|||||||
VramMonitorService,
|
VramMonitorService,
|
||||||
VramStatus,
|
VramStatus,
|
||||||
} from './services/vram-monitor.service';
|
} from './services/vram-monitor.service';
|
||||||
|
import type { AiJobPayload } from './interfaces/execution-policy.interface';
|
||||||
import {
|
import {
|
||||||
AiModelConfiguration,
|
AiModelConfiguration,
|
||||||
AiModelType,
|
AiModelType,
|
||||||
@@ -178,6 +185,7 @@ export class AiService {
|
|||||||
private readonly configService: ConfigService,
|
private readonly configService: ConfigService,
|
||||||
private readonly httpService: HttpService,
|
private readonly httpService: HttpService,
|
||||||
private readonly aiValidationService: AiValidationService,
|
private readonly aiValidationService: AiValidationService,
|
||||||
|
private readonly aiPolicyService: AiPolicyService,
|
||||||
@InjectRepository(MigrationLog)
|
@InjectRepository(MigrationLog)
|
||||||
private readonly migrationLogRepo: Repository<MigrationLog>,
|
private readonly migrationLogRepo: Repository<MigrationLog>,
|
||||||
@InjectRepository(AiAuditLog)
|
@InjectRepository(AiAuditLog)
|
||||||
@@ -220,7 +228,16 @@ export class AiService {
|
|||||||
// --- ADR-023A BullMQ Job Queueing ---
|
// --- ADR-023A BullMQ Job Queueing ---
|
||||||
|
|
||||||
/** ส่งงาน AI Suggest เข้า ai-realtime queue แบบไม่ block request thread */
|
/** ส่งงาน AI Suggest เข้า ai-realtime queue แบบไม่ block request thread */
|
||||||
async queueSuggestJob(dto: CreateAiJobDto): Promise<AiQueueResult> {
|
async queueSuggestJob(
|
||||||
|
dto: CreateAiJobDto,
|
||||||
|
idempotencyKey: string
|
||||||
|
): Promise<AiQueueResult> {
|
||||||
|
if (dto.type === 'rag-query') {
|
||||||
|
throw new SystemException(
|
||||||
|
'RAG query cannot be queued in AI realtime queue',
|
||||||
|
{ errorCode: 'AI_QUEUE_ERROR' }
|
||||||
|
);
|
||||||
|
}
|
||||||
if (!this.aiRealtimeQueue) {
|
if (!this.aiRealtimeQueue) {
|
||||||
const error = new Error('AI realtime queue is not registered');
|
const error = new Error('AI realtime queue is not registered');
|
||||||
this.logger.error('AI job queue failed', {
|
this.logger.error('AI job queue failed', {
|
||||||
@@ -229,18 +246,17 @@ export class AiService {
|
|||||||
});
|
});
|
||||||
return { success: false, error };
|
return { success: false, error };
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const job = await this.aiRealtimeQueue.add(
|
const job = await this.aiRealtimeQueue.add(
|
||||||
'ai-suggest',
|
'ai-suggest',
|
||||||
{
|
{
|
||||||
jobType: 'ai-suggest',
|
jobType: 'ai-suggest',
|
||||||
documentPublicId: dto.documentPublicId,
|
documentPublicId: dto.documentPublicId,
|
||||||
projectPublicId: dto.projectPublicId,
|
projectPublicId: dto.projectPublicId || '',
|
||||||
payload: dto.payload ?? {},
|
payload: dto.payload ?? {},
|
||||||
idempotencyKey: dto.idempotencyKey,
|
idempotencyKey,
|
||||||
},
|
},
|
||||||
{ jobId: dto.idempotencyKey }
|
{ jobId: idempotencyKey }
|
||||||
);
|
);
|
||||||
return { success: true, jobId: String(job.id) };
|
return { success: true, jobId: String(job.id) };
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
@@ -254,7 +270,10 @@ export class AiService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** ส่งงาน embedding เข้า ai-batch queue แบบ best-effort */
|
/** ส่งงาน embedding เข้า ai-batch queue แบบ best-effort */
|
||||||
async queueEmbedJob(dto: CreateAiJobDto): Promise<AiQueueResult> {
|
async queueEmbedJob(
|
||||||
|
dto: CreateAiJobDto,
|
||||||
|
idempotencyKey: string
|
||||||
|
): Promise<AiQueueResult> {
|
||||||
if (!this.aiBatchQueue) {
|
if (!this.aiBatchQueue) {
|
||||||
const error = new Error('AI batch queue is not registered');
|
const error = new Error('AI batch queue is not registered');
|
||||||
this.logger.error('AI job queue failed', {
|
this.logger.error('AI job queue failed', {
|
||||||
@@ -263,18 +282,17 @@ export class AiService {
|
|||||||
});
|
});
|
||||||
return { success: false, error };
|
return { success: false, error };
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const job = await this.aiBatchQueue.add(
|
const job = await this.aiBatchQueue.add(
|
||||||
'embed-document',
|
'embed-document',
|
||||||
{
|
{
|
||||||
jobType: 'embed-document',
|
jobType: 'embed-document',
|
||||||
documentPublicId: dto.documentPublicId,
|
documentPublicId: dto.documentPublicId || '',
|
||||||
projectPublicId: dto.projectPublicId,
|
projectPublicId: dto.projectPublicId || '',
|
||||||
payload: dto.payload ?? {},
|
payload: dto.payload ?? {},
|
||||||
idempotencyKey: dto.idempotencyKey,
|
idempotencyKey,
|
||||||
},
|
},
|
||||||
{ jobId: dto.idempotencyKey }
|
{ jobId: idempotencyKey }
|
||||||
);
|
);
|
||||||
return { success: true, jobId: String(job.id) };
|
return { success: true, jobId: String(job.id) };
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
@@ -287,6 +305,124 @@ export class AiService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** ส่งงาน AI แบบสากล (Unified AI Job) เข้า BullMQ ตามนโยบายความมั่นคงปลอดภัย (ADR-023A) */
|
||||||
|
async submitUnifiedJob(
|
||||||
|
dto: CreateAiJobDto,
|
||||||
|
idempotencyKey: string
|
||||||
|
): Promise<AiJobResponseDto> {
|
||||||
|
const queueName = 'ai-batch';
|
||||||
|
const queue = this.aiBatchQueue;
|
||||||
|
if (dto.type === 'rag-query') {
|
||||||
|
if (queueName !== 'ai-batch') {
|
||||||
|
throw new SystemException(
|
||||||
|
'RAG query must be dispatched to ai-batch queue',
|
||||||
|
{ errorCode: 'AI_QUEUE_ERROR' }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!queue) {
|
||||||
|
throw new SystemException('AI batch queue is not registered', {
|
||||||
|
errorCode: 'AI_QUEUE_ERROR',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
await this.validateUnifiedJobRequest(dto);
|
||||||
|
const activeJob = await queue.getJob(idempotencyKey);
|
||||||
|
if (activeJob) {
|
||||||
|
const payload = activeJob.data as unknown as AiJobPayload;
|
||||||
|
return {
|
||||||
|
jobId: String(activeJob.id),
|
||||||
|
status: 'queued',
|
||||||
|
modelUsed: payload.canonicalModel,
|
||||||
|
effectiveProfile: payload.effectiveProfile,
|
||||||
|
queueName: 'ai-batch',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
const payload = await this.aiPolicyService.createJobPayload(
|
||||||
|
dto.type,
|
||||||
|
dto.documentPublicId || dto.attachmentPublicId,
|
||||||
|
dto.attachmentPublicId
|
||||||
|
);
|
||||||
|
const finalPayload = {
|
||||||
|
...payload,
|
||||||
|
documentPublicId: payload.documentPublicId || '',
|
||||||
|
projectPublicId: dto.projectPublicId || '',
|
||||||
|
payload: dto.payload || {},
|
||||||
|
idempotencyKey,
|
||||||
|
};
|
||||||
|
const job = await queue.add(
|
||||||
|
dto.type,
|
||||||
|
finalPayload as unknown as AiBatchJobData,
|
||||||
|
{
|
||||||
|
jobId: idempotencyKey,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
jobId: String(job.id),
|
||||||
|
status: 'queued',
|
||||||
|
modelUsed: payload.canonicalModel,
|
||||||
|
effectiveProfile: payload.effectiveProfile,
|
||||||
|
queueName: 'ai-batch',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async validateUnifiedJobRequest(dto: CreateAiJobDto): Promise<void> {
|
||||||
|
if (dto.type === 'rag-query') {
|
||||||
|
const query = dto.payload?.['query'];
|
||||||
|
if (typeof query !== 'string' || query.trim().length === 0) {
|
||||||
|
throw new ValidationException(
|
||||||
|
'payload.query is required for rag-query jobs'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (!dto.projectPublicId) {
|
||||||
|
throw new ValidationException(
|
||||||
|
'projectPublicId is required for rag-query jobs'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
(dto.type === 'auto-fill-document' || dto.type === 'migrate-document') &&
|
||||||
|
!dto.documentPublicId &&
|
||||||
|
!dto.attachmentPublicId
|
||||||
|
) {
|
||||||
|
throw new ValidationException(
|
||||||
|
'documentPublicId or attachmentPublicId is required for document AI jobs'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (dto.projectPublicId) {
|
||||||
|
const project = await this.importTransactionRepo.manager.findOne(
|
||||||
|
Project,
|
||||||
|
{
|
||||||
|
where: { publicId: dto.projectPublicId },
|
||||||
|
}
|
||||||
|
);
|
||||||
|
if (!project) {
|
||||||
|
throw new BusinessException(
|
||||||
|
'PROJECT_NOT_FOUND',
|
||||||
|
`Project with publicId ${dto.projectPublicId} was not found`,
|
||||||
|
'ไม่พบโครงการที่อ้างอิงสำหรับงาน AI'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const referenceIds = [dto.documentPublicId, dto.attachmentPublicId].filter(
|
||||||
|
(value): value is string => typeof value === 'string'
|
||||||
|
);
|
||||||
|
for (const publicId of referenceIds) {
|
||||||
|
const attachment = await this.importTransactionRepo.manager.findOne(
|
||||||
|
Attachment,
|
||||||
|
{
|
||||||
|
where: { publicId },
|
||||||
|
}
|
||||||
|
);
|
||||||
|
if (!attachment) {
|
||||||
|
throw new BusinessException(
|
||||||
|
'ATTACHMENT_NOT_FOUND',
|
||||||
|
`Attachment with publicId ${publicId} was not found`,
|
||||||
|
'ไม่พบไฟล์อ้างอิงสำหรับงาน AI'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** ส่งคำขอเปิดงานประมวลผลการย้ายเอกสารของ AI (migrate-document) เข้า BullMQ */
|
/** ส่งคำขอเปิดงานประมวลผลการย้ายเอกสารของ AI (migrate-document) เข้า BullMQ */
|
||||||
async submitMigrationJob(
|
async submitMigrationJob(
|
||||||
dto: SubmitAiJobDto,
|
dto: SubmitAiJobDto,
|
||||||
@@ -327,9 +463,14 @@ export class AiService {
|
|||||||
defaultProject?.publicId ?? '00000000-0000-0000-0000-000000000000';
|
defaultProject?.publicId ?? '00000000-0000-0000-0000-000000000000';
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
const payload = await this.aiPolicyService.createJobPayload(
|
||||||
|
'migrate-document',
|
||||||
|
dto.payload.tempAttachmentId
|
||||||
|
);
|
||||||
const job = await this.aiBatchQueue.add(
|
const job = await this.aiBatchQueue.add(
|
||||||
'migrate-document',
|
'migrate-document',
|
||||||
{
|
{
|
||||||
|
...payload,
|
||||||
jobType: 'migrate-document',
|
jobType: 'migrate-document',
|
||||||
documentPublicId: dto.payload.tempAttachmentId,
|
documentPublicId: dto.payload.tempAttachmentId,
|
||||||
projectPublicId,
|
projectPublicId,
|
||||||
@@ -691,6 +832,9 @@ export class AiService {
|
|||||||
inputHash?: string;
|
inputHash?: string;
|
||||||
outputHash?: string;
|
outputHash?: string;
|
||||||
errorMessage?: string;
|
errorMessage?: string;
|
||||||
|
effectiveProfile?: string;
|
||||||
|
canonicalModel?: string;
|
||||||
|
snapshotParamsJson?: Record<string, unknown>;
|
||||||
}): Promise<void> {
|
}): Promise<void> {
|
||||||
try {
|
try {
|
||||||
const auditLog = this.aiAuditLogRepo.create({
|
const auditLog = this.aiAuditLogRepo.create({
|
||||||
@@ -702,6 +846,9 @@ export class AiService {
|
|||||||
inputHash: data.inputHash,
|
inputHash: data.inputHash,
|
||||||
outputHash: data.outputHash,
|
outputHash: data.outputHash,
|
||||||
errorMessage: data.errorMessage,
|
errorMessage: data.errorMessage,
|
||||||
|
effectiveProfile: data.effectiveProfile,
|
||||||
|
canonicalModel: data.canonicalModel,
|
||||||
|
snapshotParamsJson: data.snapshotParamsJson,
|
||||||
});
|
});
|
||||||
await this.aiAuditLogRepo.save(auditLog);
|
await this.aiAuditLogRepo.save(auditLog);
|
||||||
} catch (auditError: unknown) {
|
} catch (auditError: unknown) {
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
// File: backend/src/modules/ai/dto/ai-job-response.dto.ts
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: Initial creation of AiJobResponseDto for unified AI jobs response
|
||||||
|
// - 2026-06-11: ใช้ import type สำหรับ ExecutionProfile เพื่อแก้ปัญหา TS1272
|
||||||
|
|
||||||
|
import { ApiProperty } from '@nestjs/swagger';
|
||||||
|
import { IsEnum, IsString } from 'class-validator';
|
||||||
|
import type { ExecutionProfile } from '../interfaces/execution-policy.interface';
|
||||||
|
|
||||||
|
export class AiJobResponseDto {
|
||||||
|
@ApiProperty({ description: 'ID ของงานในคิว BullMQ' })
|
||||||
|
@IsString()
|
||||||
|
jobId!: string;
|
||||||
|
|
||||||
|
@ApiProperty({
|
||||||
|
enum: ['queued', 'completed', 'failed'],
|
||||||
|
description: 'สถานะของงานในคิว',
|
||||||
|
})
|
||||||
|
@IsEnum(['queued', 'completed', 'failed'])
|
||||||
|
status!: 'queued' | 'completed' | 'failed';
|
||||||
|
|
||||||
|
@ApiProperty({
|
||||||
|
enum: ['np-dms-ai', 'np-dms-ocr'],
|
||||||
|
description: 'ชื่อโมเดลมาตรฐาน (Canonical Name) ที่ใช้งาน',
|
||||||
|
})
|
||||||
|
@IsEnum(['np-dms-ai', 'np-dms-ocr'])
|
||||||
|
modelUsed!: 'np-dms-ai' | 'np-dms-ocr';
|
||||||
|
|
||||||
|
@ApiProperty({
|
||||||
|
enum: ['interactive', 'standard', 'quality', 'deep-analysis'],
|
||||||
|
description: 'โปรไฟล์การประมวลผลจริงที่ระบบกำหนดให้',
|
||||||
|
})
|
||||||
|
@IsEnum(['interactive', 'standard', 'quality', 'deep-analysis'])
|
||||||
|
effectiveProfile!: ExecutionProfile;
|
||||||
|
|
||||||
|
@ApiProperty({
|
||||||
|
enum: ['ai-realtime', 'ai-batch'],
|
||||||
|
description: 'ชื่อคิวที่ใช้ประมวลผล',
|
||||||
|
})
|
||||||
|
@IsEnum(['ai-realtime', 'ai-batch'])
|
||||||
|
queueName!: 'ai-realtime' | 'ai-batch';
|
||||||
|
}
|
||||||
@@ -1,53 +1,93 @@
|
|||||||
// File: src/modules/ai/dto/create-ai-job.dto.ts
|
// File: backend/src/modules/ai/dto/create-ai-job.dto.ts
|
||||||
// Change Log
|
// Change Log:
|
||||||
// - 2026-05-15: เพิ่ม DTO สำหรับ enqueue AI jobs ตาม ADR-023A US1.
|
// - 2026-06-11: Refactored CreateAiJobDto to support new AI runtime policy contract (Option B)
|
||||||
|
// - 2026-06-11: เพิ่ม IsObject ใน class-validator import
|
||||||
|
// - 2026-06-11: ใช้ import type สำหรับ PublicJobType เพื่อแก้ปัญหา TS1272
|
||||||
|
|
||||||
import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger';
|
import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger';
|
||||||
import {
|
import {
|
||||||
IsIn,
|
IsEnum,
|
||||||
IsNotEmpty,
|
|
||||||
IsObject,
|
|
||||||
IsOptional,
|
IsOptional,
|
||||||
IsString,
|
|
||||||
IsUUID,
|
IsUUID,
|
||||||
|
IsObject,
|
||||||
|
registerDecorator,
|
||||||
|
ValidationOptions,
|
||||||
|
ValidationArguments,
|
||||||
} from 'class-validator';
|
} from 'class-validator';
|
||||||
|
import type { PublicJobType } from '../interfaces/execution-policy.interface';
|
||||||
|
|
||||||
export const AI_JOB_TYPES = [
|
/**
|
||||||
'ai-suggest',
|
* Custom decorator to forbid specific properties in payload.
|
||||||
'rag-query',
|
* เดคอเรเตอร์สำหรับป้องกันไม่ให้ส่งฟิลด์ที่กำหนดมาใน API payload
|
||||||
'ocr',
|
*/
|
||||||
'extract-metadata',
|
export function IsForbidden(validationOptions?: ValidationOptions) {
|
||||||
'embed-document',
|
return function (object: object, propertyName: string) {
|
||||||
] as const;
|
registerDecorator({
|
||||||
|
name: 'isForbidden',
|
||||||
|
target: object.constructor,
|
||||||
|
propertyName: propertyName,
|
||||||
|
options: validationOptions,
|
||||||
|
validator: {
|
||||||
|
validate(value: unknown) {
|
||||||
|
return value === undefined;
|
||||||
|
},
|
||||||
|
defaultMessage(args: ValidationArguments) {
|
||||||
|
return `${args.property} is forbidden in payload. Backend determines execution policy.`;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export type CreateAiJobType = (typeof AI_JOB_TYPES)[number];
|
|
||||||
|
|
||||||
/** DTO สำหรับส่งงาน AI เข้า BullMQ โดยใช้ publicId เท่านั้นตาม ADR-019 */
|
|
||||||
export class CreateAiJobDto {
|
export class CreateAiJobDto {
|
||||||
@ApiProperty({ description: 'Attachment/document publicId สำหรับงาน AI' })
|
|
||||||
@IsUUID()
|
|
||||||
documentPublicId!: string;
|
|
||||||
|
|
||||||
@ApiProperty({ description: 'Project publicId สำหรับ project isolation' })
|
|
||||||
@IsUUID()
|
|
||||||
projectPublicId!: string;
|
|
||||||
|
|
||||||
@ApiProperty({
|
@ApiProperty({
|
||||||
enum: AI_JOB_TYPES,
|
enum: ['auto-fill-document', 'migrate-document', 'rag-query'],
|
||||||
description: 'ชนิดงาน AI ที่ต้อง enqueue',
|
description: 'ชนิดงาน AI ที่ต้อง enqueue',
|
||||||
})
|
})
|
||||||
@IsIn(AI_JOB_TYPES)
|
@IsEnum(['auto-fill-document', 'migrate-document', 'rag-query'])
|
||||||
jobType!: CreateAiJobType;
|
type!: PublicJobType;
|
||||||
|
|
||||||
@ApiProperty({ description: 'Idempotency key จาก request header/body' })
|
|
||||||
@IsString()
|
|
||||||
@IsNotEmpty()
|
|
||||||
idempotencyKey!: string;
|
|
||||||
|
|
||||||
@ApiPropertyOptional({
|
@ApiPropertyOptional({
|
||||||
description: 'Payload เพิ่มเติม เช่น pdfPath, extractedText, question',
|
description: 'Document publicId (UUIDv7) สำหรับงาน AI',
|
||||||
|
})
|
||||||
|
@IsOptional()
|
||||||
|
@IsUUID('all')
|
||||||
|
documentPublicId?: string;
|
||||||
|
|
||||||
|
@ApiPropertyOptional({
|
||||||
|
description: 'Attachment publicId (UUIDv7) สำหรับงาน AI',
|
||||||
|
})
|
||||||
|
@IsOptional()
|
||||||
|
@IsUUID('all')
|
||||||
|
attachmentPublicId?: string;
|
||||||
|
|
||||||
|
@ApiPropertyOptional({
|
||||||
|
description: 'Payload ข้อมูลเพิ่มเติมสำหรับงานแต่ละประเภท',
|
||||||
})
|
})
|
||||||
@IsOptional()
|
@IsOptional()
|
||||||
@IsObject()
|
@IsObject()
|
||||||
payload?: Record<string, unknown>;
|
payload?: Record<string, unknown>;
|
||||||
|
|
||||||
|
@ApiPropertyOptional({
|
||||||
|
description: 'Project publicId สำหรับ project isolation',
|
||||||
|
})
|
||||||
|
@IsOptional()
|
||||||
|
@IsUUID('all')
|
||||||
|
projectPublicId?: string;
|
||||||
|
|
||||||
|
// ฟิลด์ต้องห้ามตามข้อกำหนด FR-A01 เพื่อป้องกันการแทรกแซง policy จาก caller
|
||||||
|
@IsForbidden()
|
||||||
|
executionProfile?: unknown;
|
||||||
|
|
||||||
|
@IsForbidden()
|
||||||
|
model?: unknown;
|
||||||
|
|
||||||
|
@IsForbidden()
|
||||||
|
temperature?: unknown;
|
||||||
|
|
||||||
|
@IsForbidden()
|
||||||
|
top_p?: unknown;
|
||||||
|
|
||||||
|
@IsForbidden()
|
||||||
|
maxTokens?: unknown;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
// File: src/modules/ai/entities/ai-audit-log.entity.ts
|
// File: backend/src/modules/ai/entities/ai-audit-log.entity.ts
|
||||||
// Change Log
|
// Change Log
|
||||||
// - 2026-05-14: เพิ่ม ADR-023 feedback fields โดยคง legacy audit fields ไว้ช่วงเปลี่ยนผ่าน.
|
// - 2026-05-14: เพิ่ม ADR-023 feedback fields โดยคง legacy audit fields ไว้ช่วงเปลี่ยนผ่าน.
|
||||||
// - 2026-05-30: เพิ่ม modelType, vramUsageMB, cacheHit สำหรับ Typhoon OCR integration (T008, ADR-032).
|
// - 2026-05-30: เพิ่ม modelType, vramUsageMB, cacheHit สำหรับ Typhoon OCR integration (T008, ADR-032).
|
||||||
|
// - 2026-06-11: เปลี่ยน Record<string, any> เป็น Record<string, unknown> เพื่อแก้ปัญหา ESLint
|
||||||
// Entity สำหรับตาราง ai_audit_logs — บันทึก AI Interaction และ feedback ตาม ADR-023
|
// Entity สำหรับตาราง ai_audit_logs — บันทึก AI Interaction และ feedback ตาม ADR-023
|
||||||
|
|
||||||
import {
|
import {
|
||||||
@@ -100,6 +101,25 @@ export class AiAuditLog extends UuidBaseEntity {
|
|||||||
@Column({ name: 'error_message', type: 'text', nullable: true })
|
@Column({ name: 'error_message', type: 'text', nullable: true })
|
||||||
errorMessage?: string;
|
errorMessage?: string;
|
||||||
|
|
||||||
|
@Column({
|
||||||
|
name: 'effective_profile',
|
||||||
|
type: 'varchar',
|
||||||
|
length: 50,
|
||||||
|
nullable: true,
|
||||||
|
})
|
||||||
|
effectiveProfile?: string;
|
||||||
|
|
||||||
|
@Column({
|
||||||
|
name: 'canonical_model',
|
||||||
|
type: 'varchar',
|
||||||
|
length: 50,
|
||||||
|
nullable: true,
|
||||||
|
})
|
||||||
|
canonicalModel?: string;
|
||||||
|
|
||||||
|
@Column({ name: 'snapshot_params_json', type: 'json', nullable: true })
|
||||||
|
snapshotParamsJson?: Record<string, unknown>;
|
||||||
|
|
||||||
@CreateDateColumn({ name: 'created_at' })
|
@CreateDateColumn({ name: 'created_at' })
|
||||||
createdAt!: Date;
|
createdAt!: Date;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,51 @@
|
|||||||
|
// File: backend/src/modules/ai/entities/ai-execution-profile.entity.ts
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: Initial creation of AiExecutionProfile entity for AI execution profiles
|
||||||
|
|
||||||
|
import {
|
||||||
|
Column,
|
||||||
|
CreateDateColumn,
|
||||||
|
Entity,
|
||||||
|
PrimaryGeneratedColumn,
|
||||||
|
UpdateDateColumn,
|
||||||
|
} from 'typeorm';
|
||||||
|
|
||||||
|
/** Entity สำหรับเก็บข้อมูลโปรไฟล์การทำงานของโมเดล AI (Execution Profile) */
|
||||||
|
@Entity('ai_execution_profiles')
|
||||||
|
export class AiExecutionProfile {
|
||||||
|
@PrimaryGeneratedColumn()
|
||||||
|
id!: number;
|
||||||
|
|
||||||
|
@Column({ name: 'profile_name', unique: true, length: 50 })
|
||||||
|
profileName!: string;
|
||||||
|
|
||||||
|
@Column({ type: 'decimal', precision: 4, scale: 3 })
|
||||||
|
temperature!: number;
|
||||||
|
|
||||||
|
@Column({ name: 'top_p', type: 'decimal', precision: 4, scale: 3 })
|
||||||
|
topP!: number;
|
||||||
|
|
||||||
|
@Column({ name: 'max_tokens', type: 'int' })
|
||||||
|
maxTokens!: number;
|
||||||
|
|
||||||
|
@Column({ name: 'num_ctx', type: 'int' })
|
||||||
|
numCtx!: number;
|
||||||
|
|
||||||
|
@Column({ name: 'repeat_penalty', type: 'decimal', precision: 5, scale: 3 })
|
||||||
|
repeatPenalty!: number;
|
||||||
|
|
||||||
|
@Column({ name: 'keep_alive_seconds', type: 'int' })
|
||||||
|
keepAliveSeconds!: number;
|
||||||
|
|
||||||
|
@Column({ name: 'is_active', type: 'boolean', default: true })
|
||||||
|
isActive!: boolean;
|
||||||
|
|
||||||
|
@Column({ name: 'updated_by', type: 'int', nullable: true })
|
||||||
|
updatedBy?: number;
|
||||||
|
|
||||||
|
@CreateDateColumn({ name: 'created_at' })
|
||||||
|
createdAt!: Date;
|
||||||
|
|
||||||
|
@UpdateDateColumn({ name: 'updated_at' })
|
||||||
|
updatedAt!: Date;
|
||||||
|
}
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
// File: backend/src/modules/ai/interfaces/execution-policy.interface.ts
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: Initial creation of execution policy interfaces for AI runtime policy refactor
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Public job types exposed in API.
|
||||||
|
* ประเภทงานที่เปิดให้ภายนอกเรียกใช้งานผ่าน API
|
||||||
|
*/
|
||||||
|
export type PublicJobType =
|
||||||
|
| 'auto-fill-document'
|
||||||
|
| 'migrate-document'
|
||||||
|
| 'rag-query';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal job types used within the system.
|
||||||
|
* ประเภทงานที่ใช้งานเป็นการภายในระบบ
|
||||||
|
*/
|
||||||
|
export type InternalJobType =
|
||||||
|
| PublicJobType
|
||||||
|
| 'intent-classify'
|
||||||
|
| 'tool-suggest'
|
||||||
|
| 'ocr-extract'
|
||||||
|
| 'sandbox-analysis';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execution profiles for runtime resources.
|
||||||
|
* โปรไฟล์การทำงานเพื่อระบุทรัพยากรและพารามิเตอร์ที่จะใช้งาน
|
||||||
|
*/
|
||||||
|
export type ExecutionProfile =
|
||||||
|
| 'interactive'
|
||||||
|
| 'standard'
|
||||||
|
| 'quality'
|
||||||
|
| 'deep-analysis';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface representing the runtime configuration parameters.
|
||||||
|
* อินเทอร์เฟสสำหรับกำหนดพารามิเตอร์ในขณะทำงาน
|
||||||
|
*/
|
||||||
|
export interface RuntimePolicy {
|
||||||
|
canonicalModel: 'np-dms-ai' | 'np-dms-ocr';
|
||||||
|
temperature: number;
|
||||||
|
topP: number;
|
||||||
|
maxTokens: number;
|
||||||
|
numCtx: number;
|
||||||
|
repeatPenalty: number;
|
||||||
|
keepAliveSeconds: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* VRAM usage statistics.
|
||||||
|
* สถิติการใช้ VRAM ของ GPU
|
||||||
|
*/
|
||||||
|
export interface VramHeadroom {
|
||||||
|
totalMb: number;
|
||||||
|
usedMb: number;
|
||||||
|
availableMb: number;
|
||||||
|
querySuccess: boolean;
|
||||||
|
mainModelVramMb?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* BullMQ job data payload.
|
||||||
|
* ข้อมูลของงาน (Payload) สำหรับส่งเข้าคิว BullMQ
|
||||||
|
*/
|
||||||
|
export interface AiJobPayload {
|
||||||
|
jobType: InternalJobType;
|
||||||
|
documentPublicId?: string;
|
||||||
|
attachmentPublicId?: string;
|
||||||
|
effectiveProfile: ExecutionProfile;
|
||||||
|
canonicalModel: 'np-dms-ai' | 'np-dms-ocr';
|
||||||
|
snapshotParams: {
|
||||||
|
temperature: number;
|
||||||
|
topP: number;
|
||||||
|
maxTokens: number;
|
||||||
|
numCtx: number;
|
||||||
|
repeatPenalty: number;
|
||||||
|
keepAliveSeconds: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
// File: backend/src/modules/ai/interfaces/ocr-residency.interface.ts
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: Initial creation of OCR residency interfaces for AI runtime policy refactor
|
||||||
|
|
||||||
|
import { ExecutionProfile } from './execution-policy.interface';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OCR runtime parameters based on SCB10X Typhoon OCR model.
|
||||||
|
* พารามิเตอร์ของระบบ OCR สำหรับ Typhoon OCR
|
||||||
|
*/
|
||||||
|
export interface OcrRuntimePolicy {
|
||||||
|
canonicalModel: 'np-dms-ocr';
|
||||||
|
numCtx: 8192;
|
||||||
|
numPredict: 4096;
|
||||||
|
temperature: 0.1;
|
||||||
|
topP: 0.1;
|
||||||
|
repeatPenalty: 1.1;
|
||||||
|
keepAliveSeconds: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decision output for adaptive OCR residency.
|
||||||
|
* ผลลัพธ์การตัดสินใจว่าควรโหลด OCR ค้างไว้ใน VRAM หรือไม่
|
||||||
|
*/
|
||||||
|
export interface OcrResidencyDecision {
|
||||||
|
keepAliveSeconds: number;
|
||||||
|
vramHeadroomMb: number;
|
||||||
|
activeProfile: ExecutionProfile | null;
|
||||||
|
reason:
|
||||||
|
| 'deep-analysis-active'
|
||||||
|
| 'high-pressure'
|
||||||
|
| 'headroom-sufficient'
|
||||||
|
| 'query-failed';
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
// File: src/modules/ai/processors/ai-batch.processor.ts
|
// File: backend/src/modules/ai/processors/ai-batch.processor.ts
|
||||||
// Change Log
|
// Change Log
|
||||||
// - 2026-06-08: แก้ไขปัญหา LLM JSON response truncated โดยการเพิ่ม num_ctx เป็น 16384 ใน sandbox-extract, sandbox-ai-extract และ migrate-document (แก้ไขโดย AGY Gemini 3.5 Flash (Medium))
|
// - 2026-06-08: แก้ไขปัญหา LLM JSON response truncated โดยการเพิ่ม num_ctx เป็น 16384 ใน sandbox-extract, sandbox-ai-extract และ migrate-document (แก้ไขโดย AGY Gemini 3.5 Flash (Medium))
|
||||||
// - 2026-05-15: เพิ่ม processor สำหรับ ai-batch queue ตาม ADR-023A.
|
// - 2026-05-15: เพิ่ม processor สำหรับ ai-batch queue ตาม ADR-023A.
|
||||||
@@ -12,8 +12,11 @@
|
|||||||
// - 2026-05-28: EC-001 ใช้ findOrSuggestTags เพื่อตรวจจับ Tag ใหม่และบันทึก aiIssues; EC-002 ตรวจสอบ UUID ของผู้ส่ง/ผู้รับ และ Flag เมื่อหาไม่พบ
|
// - 2026-05-28: EC-001 ใช้ findOrSuggestTags เพื่อตรวจจับ Tag ใหม่และบันทึก aiIssues; EC-002 ตรวจสอบ UUID ของผู้ส่ง/ผู้รับ และ Flag เมื่อหาไม่พบ
|
||||||
// - 2026-06-03: ADR-034 — เพิ่ม 'ocr-extract' job type + OCR_JOB_TYPES constant + processOcrExtract() ที่มี model switching logic (unload main → load OCR → generate → reload main)
|
// - 2026-06-03: ADR-034 — เพิ่ม 'ocr-extract' job type + OCR_JOB_TYPES constant + processOcrExtract() ที่มี model switching logic (unload main → load OCR → generate → reload main)
|
||||||
// - 2026-06-06: แก้ไข bug LLM JSON parse failure — เพิ่ม retry logic (2 attempts), debug log raw response, และปรับปรุง error message ให้แสดงทั้ง raw และ cleaned response
|
// - 2026-06-06: แก้ไข bug LLM JSON parse failure — เพิ่ม retry logic (2 attempts), debug log raw response, และปรับปรุง error message ให้แสดงทั้ง raw และ cleaned response
|
||||||
|
// - 2026-06-11: US2 - ส่ง activeProfile ไปยัง detectAndExtract ในการประมวลผล OCR และบันทึก retrieval device metadata ใน audit logs
|
||||||
|
// - 2026-06-11: US4 - เพิ่มการรองรับ ai-suggest และ rag-query ใน batch processor หลังการทำ redirection
|
||||||
// - 2026-06-06: เพิ่ม OCR text truncation (MAX_OCR_TEXT_CHARS=15000) เพื่อป้องกัน context overflow เมื่อเอกสารยาวมากชน num_ctx 8192
|
// - 2026-06-06: เพิ่ม OCR text truncation (MAX_OCR_TEXT_CHARS=15000) เพื่อป้องกัน context overflow เมื่อเอกสารยาวมากชน num_ctx 8192
|
||||||
// - 2026-06-06: [T036] เพิ่ม ollamaOptions: { num_ctx: 8192 } ใน generateStructuredJson เพื่อรองรับ prompt ยาว 18k+ chars และแก้ไข bug response ว่างจาก context window ไม่พอ
|
// - 2026-06-06: [T036] เพิ่ม ollamaOptions: { num_ctx: 8192 } ใน generateStructuredJson เพื่อรองรับ prompt ยาว 18k+ chars และแก้ไข bug response ว่างจาก context window ไม่พอ
|
||||||
|
// - 2026-06-11: แก้ไข ESLint errors โดยการเพิ่ม properties (effectiveProfile, canonicalModel, snapshotParams) ใน AiBatchJobData และยกเลิกการใช้ as any
|
||||||
|
|
||||||
import { Processor, WorkerHost } from '@nestjs/bullmq';
|
import { Processor, WorkerHost } from '@nestjs/bullmq';
|
||||||
import { Logger } from '@nestjs/common';
|
import { Logger } from '@nestjs/common';
|
||||||
@@ -31,13 +34,17 @@ import {
|
|||||||
SandboxOcrEngineService,
|
SandboxOcrEngineService,
|
||||||
SandboxOcrEngineType,
|
SandboxOcrEngineType,
|
||||||
} from '../services/sandbox-ocr-engine.service';
|
} from '../services/sandbox-ocr-engine.service';
|
||||||
import { OllamaService } from '../services/ollama.service';
|
import {
|
||||||
|
OllamaService,
|
||||||
|
OllamaGenerateOptions,
|
||||||
|
} from '../services/ollama.service';
|
||||||
import { Project } from '../../project/entities/project.entity';
|
import { Project } from '../../project/entities/project.entity';
|
||||||
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
|
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
|
||||||
import { TagsService } from '../../tags/tags.service';
|
import { TagsService } from '../../tags/tags.service';
|
||||||
import { MigrationService } from '../../migration/migration.service';
|
import { MigrationService } from '../../migration/migration.service';
|
||||||
import { MigrationErrorType } from '../../migration/entities/migration-error.entity';
|
import { MigrationErrorType } from '../../migration/entities/migration-error.entity';
|
||||||
import { AiPromptsService } from '../prompts/ai-prompts.service';
|
import { AiPromptsService } from '../prompts/ai-prompts.service';
|
||||||
|
import type { ExecutionProfile } from '../interfaces/execution-policy.interface';
|
||||||
|
|
||||||
interface MigrateDocumentMetadata extends Record<string, unknown> {
|
interface MigrateDocumentMetadata extends Record<string, unknown> {
|
||||||
projectPublicId?: string;
|
projectPublicId?: string;
|
||||||
@@ -62,7 +69,9 @@ export type AiBatchJobType =
|
|||||||
| 'sandbox-ocr-only'
|
| 'sandbox-ocr-only'
|
||||||
| 'sandbox-ai-extract'
|
| 'sandbox-ai-extract'
|
||||||
| 'migrate-document'
|
| 'migrate-document'
|
||||||
| 'rag-prepare';
|
| 'rag-prepare'
|
||||||
|
| 'ai-suggest'
|
||||||
|
| 'rag-query';
|
||||||
|
|
||||||
/** รายการ job types ที่ต้องใช้ Typhoon OCR model — จะ trigger model switching (ADR-034) */
|
/** รายการ job types ที่ต้องใช้ Typhoon OCR model — จะ trigger model switching (ADR-034) */
|
||||||
export const OCR_JOB_TYPES: ReadonlyArray<AiBatchJobType> = [
|
export const OCR_JOB_TYPES: ReadonlyArray<AiBatchJobType> = [
|
||||||
@@ -76,6 +85,16 @@ export interface AiBatchJobData {
|
|||||||
payload: Record<string, unknown>;
|
payload: Record<string, unknown>;
|
||||||
batchId?: string;
|
batchId?: string;
|
||||||
idempotencyKey: string;
|
idempotencyKey: string;
|
||||||
|
effectiveProfile?: ExecutionProfile;
|
||||||
|
canonicalModel?: 'np-dms-ai' | 'np-dms-ocr';
|
||||||
|
snapshotParams?: {
|
||||||
|
temperature: number;
|
||||||
|
topP: number;
|
||||||
|
maxTokens: number;
|
||||||
|
numCtx: number;
|
||||||
|
repeatPenalty: number;
|
||||||
|
keepAliveSeconds: number;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/** OCR text สูงสุดที่ส่งเข้า LLM prompt — ป้องกัน context overflow (num_ctx 8192, Thai ~3 chars/token) */
|
/** OCR text สูงสุดที่ส่งเข้า LLM prompt — ป้องกัน context overflow (num_ctx 8192, Thai ~3 chars/token) */
|
||||||
@@ -286,6 +305,16 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
await this.setAiProcessingStatus(job.data.documentPublicId, 'DONE');
|
await this.setAiProcessingStatus(job.data.documentPublicId, 'DONE');
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
case 'ai-suggest':
|
||||||
|
this.logger.log(
|
||||||
|
`AI Suggest job processing — jobId=${String(job.id)}`
|
||||||
|
);
|
||||||
|
await this.processSuggest(job);
|
||||||
|
return;
|
||||||
|
case 'rag-query':
|
||||||
|
this.logger.log(`RAG query job processing — jobId=${String(job.id)}`);
|
||||||
|
await this.processRagQuery(job);
|
||||||
|
return;
|
||||||
case 'embed-document':
|
case 'embed-document':
|
||||||
this.logger.log(`Embedding job processing — jobId=${String(job.id)}`);
|
this.logger.log(`Embedding job processing — jobId=${String(job.id)}`);
|
||||||
await this.processEmbedDocument(job.data);
|
await this.processEmbedDocument(job.data);
|
||||||
@@ -353,6 +382,7 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
|
|
||||||
/** ประมวลผล embed-document job ด้วย EmbeddingService (T022) */
|
/** ประมวลผล embed-document job ด้วย EmbeddingService (T022) */
|
||||||
private async processEmbedDocument(data: AiBatchJobData): Promise<void> {
|
private async processEmbedDocument(data: AiBatchJobData): Promise<void> {
|
||||||
|
const startTime = Date.now();
|
||||||
const { documentPublicId, projectPublicId, payload } = data;
|
const { documentPublicId, projectPublicId, payload } = data;
|
||||||
const pdfPath = payload.pdfPath as string;
|
const pdfPath = payload.pdfPath as string;
|
||||||
const extractedText = readString(payload.extractedText);
|
const extractedText = readString(payload.extractedText);
|
||||||
@@ -378,6 +408,7 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
pdfPath,
|
pdfPath,
|
||||||
extractedText,
|
extractedText,
|
||||||
documentPublicId,
|
documentPublicId,
|
||||||
|
activeProfile: data.effectiveProfile,
|
||||||
})
|
})
|
||||||
).text;
|
).text;
|
||||||
const result = await this.embeddingService.embedDocument(
|
const result = await this.embeddingService.embedDocument(
|
||||||
@@ -394,6 +425,19 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
if (!result.success) {
|
if (!result.success) {
|
||||||
throw new Error(`Embedding failed: ${result.error ?? 'Unknown error'}`);
|
throw new Error(`Embedding failed: ${result.error ?? 'Unknown error'}`);
|
||||||
}
|
}
|
||||||
|
const durationMs = Date.now() - startTime;
|
||||||
|
await this.saveAiAuditLog({
|
||||||
|
documentPublicId,
|
||||||
|
aiModel: data.canonicalModel ?? 'np-dms-ai',
|
||||||
|
status: AiAuditStatus.SUCCESS,
|
||||||
|
processingTimeMs: durationMs,
|
||||||
|
effectiveProfile: data.effectiveProfile,
|
||||||
|
canonicalModel: data.canonicalModel,
|
||||||
|
snapshotParamsJson: {
|
||||||
|
...(data.snapshotParams ?? {}),
|
||||||
|
retrievalDevice: result.device,
|
||||||
|
},
|
||||||
|
});
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`Embedding completed for document ${documentPublicId} — ${result.chunksEmbedded} chunks embedded`
|
`Embedding completed for document ${documentPublicId} — ${result.chunksEmbedded} chunks embedded`
|
||||||
);
|
);
|
||||||
@@ -782,6 +826,7 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private async processRagPrepare(data: AiBatchJobData): Promise<void> {
|
private async processRagPrepare(data: AiBatchJobData): Promise<void> {
|
||||||
|
const startTime = Date.now();
|
||||||
const payload = data.payload || {};
|
const payload = data.payload || {};
|
||||||
const documentPublicId =
|
const documentPublicId =
|
||||||
(payload.documentPublicId as string) || data.documentPublicId;
|
(payload.documentPublicId as string) || data.documentPublicId;
|
||||||
@@ -795,12 +840,9 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
const documentDate = (payload.documentDate as string) || undefined;
|
const documentDate = (payload.documentDate as string) || undefined;
|
||||||
let cachedOcrText = (payload.cachedOcrText as string) || undefined;
|
let cachedOcrText = (payload.cachedOcrText as string) || undefined;
|
||||||
const attachmentPath = (payload.attachmentPath as string) || undefined;
|
const attachmentPath = (payload.attachmentPath as string) || undefined;
|
||||||
|
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`processRagPrepare: starting for doc=${documentPublicId}, project=${projectPublicId}`
|
`processRagPrepare: starting for doc=${documentPublicId}, project=${projectPublicId}`
|
||||||
);
|
);
|
||||||
|
|
||||||
// T020a: Resolve OCR text. Use cached if available; otherwise extract using OcrService
|
|
||||||
if (!cachedOcrText && attachmentPath) {
|
if (!cachedOcrText && attachmentPath) {
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`processRagPrepare: No cached OCR text. Extracting text from ${attachmentPath}...`
|
`processRagPrepare: No cached OCR text. Extracting text from ${attachmentPath}...`
|
||||||
@@ -808,6 +850,7 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
try {
|
try {
|
||||||
const ocrResult = await this.ocrService.detectAndExtract({
|
const ocrResult = await this.ocrService.detectAndExtract({
|
||||||
pdfPath: attachmentPath,
|
pdfPath: attachmentPath,
|
||||||
|
activeProfile: data.effectiveProfile,
|
||||||
});
|
});
|
||||||
cachedOcrText = ocrResult.text;
|
cachedOcrText = ocrResult.text;
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
@@ -816,28 +859,23 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cachedOcrText) {
|
if (!cachedOcrText) {
|
||||||
this.logger.warn(
|
this.logger.warn(
|
||||||
`processRagPrepare: ไม่มี OCR text และไม่มี attachment path - skip embedding`
|
`processRagPrepare: ไม่มี OCR text และไม่มี attachment path - skip embedding`
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// T020b: skip-guard (< 50 chars)
|
|
||||||
if (cachedOcrText.trim().length < 50) {
|
if (cachedOcrText.trim().length < 50) {
|
||||||
this.logger.warn(
|
this.logger.warn(
|
||||||
`processRagPrepare: OCR text สั้นเกินไป (${cachedOcrText.trim().length} chars) — skip embedding`
|
`processRagPrepare: OCR text สั้นเกินไป (${cachedOcrText.trim().length} chars) — skip embedding`
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// T020c: embed + upsert pipeline
|
|
||||||
try {
|
try {
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`processRagPrepare: chunking and embedding document ${documentPublicId}...`
|
`processRagPrepare: chunking and embedding document ${documentPublicId}...`
|
||||||
);
|
);
|
||||||
await this.embeddingService.embedDocument(
|
const result = await this.embeddingService.embedDocument(
|
||||||
projectPublicId,
|
projectPublicId,
|
||||||
documentPublicId,
|
documentPublicId,
|
||||||
correspondenceNumber,
|
correspondenceNumber,
|
||||||
@@ -848,6 +886,19 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
documentDate,
|
documentDate,
|
||||||
cachedOcrText
|
cachedOcrText
|
||||||
);
|
);
|
||||||
|
const durationMs = Date.now() - startTime;
|
||||||
|
await this.saveAiAuditLog({
|
||||||
|
documentPublicId,
|
||||||
|
aiModel: data.canonicalModel ?? 'np-dms-ai',
|
||||||
|
status: AiAuditStatus.SUCCESS,
|
||||||
|
processingTimeMs: durationMs,
|
||||||
|
effectiveProfile: data.effectiveProfile,
|
||||||
|
canonicalModel: data.canonicalModel,
|
||||||
|
snapshotParamsJson: {
|
||||||
|
...(data.snapshotParams ?? {}),
|
||||||
|
retrievalDevice: result.device,
|
||||||
|
},
|
||||||
|
});
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`processRagPrepare: successfully processed document ${documentPublicId}`
|
`processRagPrepare: successfully processed document ${documentPublicId}`
|
||||||
);
|
);
|
||||||
@@ -864,6 +915,7 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
const { documentPublicId, projectPublicId, payload, batchId } = job.data;
|
const { documentPublicId, projectPublicId, payload, batchId } = job.data;
|
||||||
|
const modelUsed = job.data.canonicalModel;
|
||||||
const docNumber = payload.documentNumber as string;
|
const docNumber = payload.documentNumber as string;
|
||||||
const contextOverride =
|
const contextOverride =
|
||||||
payload.contextOverride &&
|
payload.contextOverride &&
|
||||||
@@ -888,6 +940,7 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
try {
|
try {
|
||||||
ocrResult = await this.ocrService.detectAndExtract({
|
ocrResult = await this.ocrService.detectAndExtract({
|
||||||
pdfPath: attachment.filePath,
|
pdfPath: attachment.filePath,
|
||||||
|
activeProfile: job.data.effectiveProfile,
|
||||||
});
|
});
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
const errMsg = err instanceof Error ? err.message : String(err);
|
const errMsg = err instanceof Error ? err.message : String(err);
|
||||||
@@ -904,6 +957,9 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
status: AiAuditStatus.FAILED,
|
status: AiAuditStatus.FAILED,
|
||||||
errorMessage: errMsg,
|
errorMessage: errMsg,
|
||||||
processingTimeMs: Date.now() - startTime,
|
processingTimeMs: Date.now() - startTime,
|
||||||
|
effectiveProfile: job.data.effectiveProfile,
|
||||||
|
canonicalModel: job.data.canonicalModel,
|
||||||
|
snapshotParamsJson: job.data.snapshotParams,
|
||||||
});
|
});
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
@@ -930,11 +986,28 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
|
|
||||||
let aiResponse: string;
|
let aiResponse: string;
|
||||||
try {
|
try {
|
||||||
aiResponse = await this.ollamaService.generate(resolvedPrompt, {
|
const snapshotParams = job.data.snapshotParams;
|
||||||
|
const generateOptions: OllamaGenerateOptions = {
|
||||||
format: 'json',
|
format: 'json',
|
||||||
timeoutMs: 120000,
|
timeoutMs: 120000,
|
||||||
options: { num_ctx: 16384, num_predict: 4096 },
|
model: modelUsed,
|
||||||
});
|
};
|
||||||
|
if (snapshotParams) {
|
||||||
|
generateOptions.options = {
|
||||||
|
temperature: snapshotParams.temperature,
|
||||||
|
top_p: snapshotParams.topP,
|
||||||
|
num_predict: snapshotParams.maxTokens,
|
||||||
|
num_ctx: snapshotParams.numCtx,
|
||||||
|
repeat_penalty: snapshotParams.repeatPenalty,
|
||||||
|
};
|
||||||
|
generateOptions.keepAlive = snapshotParams.keepAliveSeconds;
|
||||||
|
} else {
|
||||||
|
generateOptions.options = { num_ctx: 16384, num_predict: 4096 };
|
||||||
|
}
|
||||||
|
aiResponse = await this.ollamaService.generate(
|
||||||
|
resolvedPrompt,
|
||||||
|
generateOptions
|
||||||
|
);
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
const errMsg = err instanceof Error ? err.message : String(err);
|
const errMsg = err instanceof Error ? err.message : String(err);
|
||||||
this.logger.error(`การวิเคราะห์ของ AI ล้มเหลว: ${errMsg}`);
|
this.logger.error(`การวิเคราะห์ของ AI ล้มเหลว: ${errMsg}`);
|
||||||
@@ -946,10 +1019,13 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
});
|
});
|
||||||
await this.saveAiAuditLog({
|
await this.saveAiAuditLog({
|
||||||
documentPublicId,
|
documentPublicId,
|
||||||
aiModel: this.ollamaService.getMainModelName(),
|
aiModel: modelUsed ?? this.ollamaService.getMainModelName(),
|
||||||
status: AiAuditStatus.FAILED,
|
status: AiAuditStatus.FAILED,
|
||||||
errorMessage: errMsg,
|
errorMessage: errMsg,
|
||||||
processingTimeMs: Date.now() - startTime,
|
processingTimeMs: Date.now() - startTime,
|
||||||
|
effectiveProfile: job.data.effectiveProfile,
|
||||||
|
canonicalModel: job.data.canonicalModel,
|
||||||
|
snapshotParamsJson: job.data.snapshotParams,
|
||||||
});
|
});
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
@@ -972,10 +1048,13 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
});
|
});
|
||||||
await this.saveAiAuditLog({
|
await this.saveAiAuditLog({
|
||||||
documentPublicId,
|
documentPublicId,
|
||||||
aiModel: this.ollamaService.getMainModelName(),
|
aiModel: modelUsed ?? this.ollamaService.getMainModelName(),
|
||||||
status: AiAuditStatus.FAILED,
|
status: AiAuditStatus.FAILED,
|
||||||
errorMessage: errMsg,
|
errorMessage: errMsg,
|
||||||
processingTimeMs: Date.now() - startTime,
|
processingTimeMs: Date.now() - startTime,
|
||||||
|
effectiveProfile: job.data.effectiveProfile,
|
||||||
|
canonicalModel: job.data.canonicalModel,
|
||||||
|
snapshotParamsJson: job.data.snapshotParams,
|
||||||
});
|
});
|
||||||
throw new Error(errMsg);
|
throw new Error(errMsg);
|
||||||
}
|
}
|
||||||
@@ -1132,11 +1211,14 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
|
|
||||||
await this.saveAiAuditLog({
|
await this.saveAiAuditLog({
|
||||||
documentPublicId,
|
documentPublicId,
|
||||||
aiModel: this.ollamaService.getMainModelName(),
|
aiModel: modelUsed ?? this.ollamaService.getMainModelName(),
|
||||||
status: AiAuditStatus.SUCCESS,
|
status: AiAuditStatus.SUCCESS,
|
||||||
aiSuggestionJson: extractedMetadata as unknown as Record<string, unknown>,
|
aiSuggestionJson: extractedMetadata as unknown as Record<string, unknown>,
|
||||||
confidenceScore: confidence,
|
confidenceScore: confidence,
|
||||||
processingTimeMs: Date.now() - startTime,
|
processingTimeMs: Date.now() - startTime,
|
||||||
|
effectiveProfile: job.data.effectiveProfile,
|
||||||
|
canonicalModel: job.data.canonicalModel,
|
||||||
|
snapshotParamsJson: job.data.snapshotParams,
|
||||||
});
|
});
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`ประมวลผลเอกสาร ${docNumber} สำเร็จและถูกส่งเข้า Staging Queue แล้ว`
|
`ประมวลผลเอกสาร ${docNumber} สำเร็จและถูกส่งเข้า Staging Queue แล้ว`
|
||||||
@@ -1151,6 +1233,9 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
confidenceScore?: number;
|
confidenceScore?: number;
|
||||||
processingTimeMs?: number;
|
processingTimeMs?: number;
|
||||||
errorMessage?: string;
|
errorMessage?: string;
|
||||||
|
effectiveProfile?: string;
|
||||||
|
canonicalModel?: string;
|
||||||
|
snapshotParamsJson?: Record<string, unknown>;
|
||||||
}): Promise<void> {
|
}): Promise<void> {
|
||||||
try {
|
try {
|
||||||
const log = this.aiAuditLogRepo.create({
|
const log = this.aiAuditLogRepo.create({
|
||||||
@@ -1162,6 +1247,9 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
confidenceScore: data.confidenceScore,
|
confidenceScore: data.confidenceScore,
|
||||||
processingTimeMs: data.processingTimeMs,
|
processingTimeMs: data.processingTimeMs,
|
||||||
errorMessage: data.errorMessage,
|
errorMessage: data.errorMessage,
|
||||||
|
effectiveProfile: data.effectiveProfile,
|
||||||
|
canonicalModel: data.canonicalModel,
|
||||||
|
snapshotParamsJson: data.snapshotParamsJson,
|
||||||
});
|
});
|
||||||
await this.aiAuditLogRepo.save(log);
|
await this.aiAuditLogRepo.save(log);
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
@@ -1170,4 +1258,149 @@ export class AiBatchProcessor extends WorkerHost {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async processRagQuery(job: Job<AiBatchJobData>): Promise<void> {
|
||||||
|
const payload = job.data.payload || {};
|
||||||
|
const query = typeof payload['query'] === 'string' ? payload['query'] : '';
|
||||||
|
if (query.trim().length === 0) {
|
||||||
|
throw new Error('payload.query is required for rag-query jobs');
|
||||||
|
}
|
||||||
|
const requestPublicId =
|
||||||
|
typeof payload['requestPublicId'] === 'string'
|
||||||
|
? payload['requestPublicId']
|
||||||
|
: job.data.idempotencyKey;
|
||||||
|
const userPublicId =
|
||||||
|
typeof payload['userPublicId'] === 'string'
|
||||||
|
? payload['userPublicId']
|
||||||
|
: 'system';
|
||||||
|
await this.ragService.processQuery(
|
||||||
|
requestPublicId,
|
||||||
|
query,
|
||||||
|
job.data.projectPublicId,
|
||||||
|
userPublicId,
|
||||||
|
new AbortController().signal
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async processSuggest(
|
||||||
|
job: Job<AiBatchJobData>
|
||||||
|
): Promise<Record<string, unknown>> {
|
||||||
|
const startTime = Date.now();
|
||||||
|
try {
|
||||||
|
if (job.data.documentPublicId) {
|
||||||
|
await this.setAiProcessingStatus(
|
||||||
|
job.data.documentPublicId,
|
||||||
|
'PROCESSING'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const payload = job.data.payload || {};
|
||||||
|
const extractedText =
|
||||||
|
typeof payload['extractedText'] === 'string'
|
||||||
|
? payload['extractedText']
|
||||||
|
: '';
|
||||||
|
const pdfPath =
|
||||||
|
typeof payload['pdfPath'] === 'string' ? payload['pdfPath'] : undefined;
|
||||||
|
const extractedChars =
|
||||||
|
typeof payload['extractedChars'] === 'number'
|
||||||
|
? payload['extractedChars']
|
||||||
|
: extractedText.length;
|
||||||
|
const textResult = await this.ocrService.detectAndExtract({
|
||||||
|
extractedText,
|
||||||
|
extractedChars,
|
||||||
|
pdfPath,
|
||||||
|
});
|
||||||
|
const prompt = [
|
||||||
|
'Extract concise DMS metadata from this engineering document.',
|
||||||
|
'Return only JSON with fields: title, documentType, category, confidenceScore.',
|
||||||
|
textResult.text.slice(0, 6000),
|
||||||
|
].join('\n');
|
||||||
|
const rawOutput = await this.ollamaService.generate(prompt);
|
||||||
|
const suggestion = this.parseSuggestion(rawOutput);
|
||||||
|
const masterCategories = Array.isArray(payload['masterDataCategories'])
|
||||||
|
? (payload['masterDataCategories'] as string[])
|
||||||
|
: undefined;
|
||||||
|
const normalizedSuggestion = this.flagUnknownCategories(
|
||||||
|
suggestion,
|
||||||
|
masterCategories
|
||||||
|
);
|
||||||
|
await this.saveAiAuditLog({
|
||||||
|
documentPublicId: job.data.documentPublicId,
|
||||||
|
aiModel:
|
||||||
|
job.data.canonicalModel ?? this.ollamaService.getMainModelName(),
|
||||||
|
status: AiAuditStatus.SUCCESS,
|
||||||
|
aiSuggestionJson: normalizedSuggestion,
|
||||||
|
confidenceScore: this.extractConfidence(normalizedSuggestion),
|
||||||
|
processingTimeMs: Date.now() - startTime,
|
||||||
|
effectiveProfile: job.data.effectiveProfile,
|
||||||
|
canonicalModel: job.data.canonicalModel,
|
||||||
|
snapshotParamsJson: job.data.snapshotParams,
|
||||||
|
});
|
||||||
|
if (job.data.documentPublicId) {
|
||||||
|
await this.setAiProcessingStatus(job.data.documentPublicId, 'DONE');
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
suggestion: normalizedSuggestion,
|
||||||
|
ocrUsed: textResult.ocrUsed,
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
if (job.data.documentPublicId) {
|
||||||
|
await this.setAiProcessingStatus(job.data.documentPublicId, 'FAILED');
|
||||||
|
}
|
||||||
|
await this.saveAiAuditLog({
|
||||||
|
documentPublicId: job.data.documentPublicId,
|
||||||
|
aiModel:
|
||||||
|
job.data.canonicalModel ?? this.ollamaService.getMainModelName(),
|
||||||
|
status: AiAuditStatus.FAILED,
|
||||||
|
processingTimeMs: Date.now() - startTime,
|
||||||
|
errorMessage: err instanceof Error ? err.message : String(err),
|
||||||
|
effectiveProfile: job.data.effectiveProfile,
|
||||||
|
canonicalModel: job.data.canonicalModel,
|
||||||
|
snapshotParamsJson: job.data.snapshotParams,
|
||||||
|
});
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private parseSuggestion(rawOutput: string): Record<string, unknown> {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(rawOutput) as unknown;
|
||||||
|
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
||||||
|
return parsed as Record<string, unknown>;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
this.logger.warn('AI suggestion output was not valid JSON');
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
title: rawOutput.slice(0, 250),
|
||||||
|
confidenceScore: 0,
|
||||||
|
is_unknown: true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private flagUnknownCategories(
|
||||||
|
suggestion: Record<string, unknown>,
|
||||||
|
masterDataCategories: unknown
|
||||||
|
): Record<string, unknown> {
|
||||||
|
if (!Array.isArray(masterDataCategories)) return suggestion;
|
||||||
|
const knownValues = new Set(
|
||||||
|
masterDataCategories
|
||||||
|
.filter((value): value is string => typeof value === 'string')
|
||||||
|
.map((value) => value.toLowerCase())
|
||||||
|
);
|
||||||
|
const category = suggestion['category'];
|
||||||
|
if (
|
||||||
|
typeof category === 'string' &&
|
||||||
|
!knownValues.has(category.toLowerCase())
|
||||||
|
) {
|
||||||
|
return { ...suggestion, is_unknown: true };
|
||||||
|
}
|
||||||
|
return suggestion;
|
||||||
|
}
|
||||||
|
|
||||||
|
private extractConfidence(
|
||||||
|
suggestion: Record<string, unknown>
|
||||||
|
): number | undefined {
|
||||||
|
const confidence = suggestion['confidenceScore'];
|
||||||
|
return typeof confidence === 'number' ? confidence : undefined;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
// File: src/modules/ai/processors/ai-realtime.processor.ts
|
// File: backend/src/modules/ai/processors/ai-realtime.processor.ts
|
||||||
// Change Log
|
// Change Log
|
||||||
// - 2026-05-15: เพิ่ม processor สำหรับ ai-realtime queue และ pause/resume ai-batch ตาม ADR-023A.
|
// - 2026-05-15: เพิ่ม processor สำหรับ ai-realtime queue และ pause/resume ai-batch ตาม ADR-023A.
|
||||||
// - 2026-06-03: ADR-034 — เปลี่ยน aiModel ใน audit log จาก hardcode 'gemma4' เป็น ollamaService.getMainModelName()
|
// - 2026-06-03: ADR-034 — เปลี่ยน aiModel ใน audit log จาก hardcode 'gemma4' เป็น ollamaService.getMainModelName()
|
||||||
|
// - 2026-06-11: ปรับ concurrency และเพิ่ม job classification เพื่อ redirect ไป ai-batch (US4)
|
||||||
|
// - 2026-06-11: แก้ไขปัญหา compile error สำหรับ unreachable check ใน switch-case และลบบรรทัดว่างในฟังก์ชัน process
|
||||||
|
|
||||||
import {
|
import {
|
||||||
Processor,
|
Processor,
|
||||||
@@ -22,7 +24,11 @@ import { Attachment } from '../../../common/file-storage/entities/attachment.ent
|
|||||||
import { OcrService } from '../services/ocr.service';
|
import { OcrService } from '../services/ocr.service';
|
||||||
import { OllamaService } from '../services/ollama.service';
|
import { OllamaService } from '../services/ollama.service';
|
||||||
|
|
||||||
export type AiRealtimeJobType = 'ai-suggest' | 'rag-query';
|
export type AiRealtimeJobType =
|
||||||
|
| 'ai-suggest'
|
||||||
|
| 'rag-query'
|
||||||
|
| 'intent-classify'
|
||||||
|
| 'tool-suggest';
|
||||||
|
|
||||||
export interface AiRealtimeJobData {
|
export interface AiRealtimeJobData {
|
||||||
jobType: AiRealtimeJobType;
|
jobType: AiRealtimeJobType;
|
||||||
@@ -34,9 +40,16 @@ export interface AiRealtimeJobData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Processor สำหรับงาน AI interactive ที่ต้องกัน batch job ระหว่างใช้ GPU */
|
/** Processor สำหรับงาน AI interactive ที่ต้องกัน batch job ระหว่างใช้ GPU */
|
||||||
@Processor(QUEUE_AI_REALTIME, { concurrency: 1 })
|
@Processor(QUEUE_AI_REALTIME, {
|
||||||
|
concurrency: Number(
|
||||||
|
process.env.AI_REALTIME_CONCURRENCY ||
|
||||||
|
process.env.REALTIME_CONCURRENCY ||
|
||||||
|
'2'
|
||||||
|
),
|
||||||
|
})
|
||||||
export class AiRealtimeProcessor extends WorkerHost {
|
export class AiRealtimeProcessor extends WorkerHost {
|
||||||
private readonly logger = new Logger(AiRealtimeProcessor.name);
|
private readonly logger = new Logger(AiRealtimeProcessor.name);
|
||||||
|
private activeRealtimeJobs = 0;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
@InjectQueue(QUEUE_AI_BATCH)
|
@InjectQueue(QUEUE_AI_BATCH)
|
||||||
@@ -53,12 +66,32 @@ export class AiRealtimeProcessor extends WorkerHost {
|
|||||||
|
|
||||||
/** Dispatch งาน ai-realtime ตาม jobType */
|
/** Dispatch งาน ai-realtime ตาม jobType */
|
||||||
async process(job: Job<AiRealtimeJobData>): Promise<unknown> {
|
async process(job: Job<AiRealtimeJobData>): Promise<unknown> {
|
||||||
switch (job.data.jobType) {
|
const LIGHTWEIGHT_REALTIME_JOBS = ['intent-classify', 'tool-suggest'];
|
||||||
case 'ai-suggest':
|
const isLightweight = LIGHTWEIGHT_REALTIME_JOBS.includes(job.data.jobType);
|
||||||
return this.processSuggest(job);
|
this.logger.log(
|
||||||
case 'rag-query':
|
`Job classification decision — jobId=${String(job.id)}, jobType=${job.data.jobType}, isLightweight=${isLightweight}`
|
||||||
this.logger.log(`RAG query queued — jobId=${String(job.id)}`);
|
);
|
||||||
|
if (!isLightweight) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Redirecting generation-heavy job to ai-batch queue — jobId=${String(job.id)}, jobType=${String(job.data.jobType)}`
|
||||||
|
);
|
||||||
|
await this.aiBatchQueue.add(job.data.jobType, job.data, {
|
||||||
|
jobId: job.id ?? undefined,
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
switch (job.data.jobType) {
|
||||||
|
case 'intent-classify':
|
||||||
|
this.logger.log(`Processing intent-classify — jobId=${String(job.id)}`);
|
||||||
|
return { success: true, intent: 'GET_RFA' };
|
||||||
|
case 'tool-suggest':
|
||||||
|
this.logger.log(`Processing tool-suggest — jobId=${String(job.id)}`);
|
||||||
|
return { success: true, suggestions: [] };
|
||||||
|
case 'ai-suggest':
|
||||||
|
case 'rag-query':
|
||||||
|
throw new Error(
|
||||||
|
`Job type ${job.data.jobType} should have been redirected to batch queue.`
|
||||||
|
);
|
||||||
default: {
|
default: {
|
||||||
const unreachable: never = job.data.jobType;
|
const unreachable: never = job.data.jobType;
|
||||||
throw new Error(
|
throw new Error(
|
||||||
@@ -203,27 +236,48 @@ export class AiRealtimeProcessor extends WorkerHost {
|
|||||||
/** เมื่อ interactive job เริ่ม ให้ pause batch queue เพื่อกัน GPU contention */
|
/** เมื่อ interactive job เริ่ม ให้ pause batch queue เพื่อกัน GPU contention */
|
||||||
@OnWorkerEvent('active')
|
@OnWorkerEvent('active')
|
||||||
async onActive(job: Job<AiRealtimeJobData>): Promise<void> {
|
async onActive(job: Job<AiRealtimeJobData>): Promise<void> {
|
||||||
|
this.activeRealtimeJobs += 1;
|
||||||
|
if (this.activeRealtimeJobs === 1) {
|
||||||
await this.aiBatchQueue.pause();
|
await this.aiBatchQueue.pause();
|
||||||
this.logger.warn(
|
this.logger.warn(
|
||||||
`ai-batch paused while ai-realtime job is active — jobId=${String(job.id)}`
|
`ai-batch paused while ai-realtime job is active — jobId=${String(job.id)}`
|
||||||
);
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.logger.warn(
|
||||||
|
`ai-realtime active jobs=${String(this.activeRealtimeJobs)} — keep ai-batch paused`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** เมื่อ interactive job เสร็จ ให้ resume batch queue */
|
/** เมื่อ interactive job เสร็จ ให้ resume batch queue */
|
||||||
@OnWorkerEvent('completed')
|
@OnWorkerEvent('completed')
|
||||||
async onCompleted(job: Job<AiRealtimeJobData>): Promise<void> {
|
async onCompleted(job: Job<AiRealtimeJobData>): Promise<void> {
|
||||||
|
this.activeRealtimeJobs = Math.max(0, this.activeRealtimeJobs - 1);
|
||||||
|
if (this.activeRealtimeJobs === 0) {
|
||||||
await this.aiBatchQueue.resume();
|
await this.aiBatchQueue.resume();
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`ai-batch resumed after ai-realtime completion — jobId=${String(job.id)}`
|
`ai-batch resumed after ai-realtime completion — jobId=${String(job.id)}`
|
||||||
);
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.logger.log(
|
||||||
|
`ai-realtime jobs still active (${String(this.activeRealtimeJobs)}) — ai-batch remains paused`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** เมื่อ interactive job fail ให้ resume batch queue เช่นกัน */
|
/** เมื่อ interactive job fail ให้ resume batch queue เช่นกัน */
|
||||||
@OnWorkerEvent('failed')
|
@OnWorkerEvent('failed')
|
||||||
async onFailed(job: Job<AiRealtimeJobData> | undefined): Promise<void> {
|
async onFailed(job: Job<AiRealtimeJobData> | undefined): Promise<void> {
|
||||||
|
this.activeRealtimeJobs = Math.max(0, this.activeRealtimeJobs - 1);
|
||||||
|
if (this.activeRealtimeJobs === 0) {
|
||||||
await this.aiBatchQueue.resume();
|
await this.aiBatchQueue.resume();
|
||||||
this.logger.warn(
|
this.logger.warn(
|
||||||
`ai-batch resumed after ai-realtime failure — jobId=${String(job?.id ?? 'unknown')}`
|
`ai-batch resumed after ai-realtime failure — jobId=${String(job?.id ?? 'unknown')}`
|
||||||
);
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.logger.warn(
|
||||||
|
`ai-realtime jobs still active after failure (${String(this.activeRealtimeJobs)}) — ai-batch remains paused`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,183 @@
|
|||||||
|
// File: backend/src/modules/ai/services/ai-policy.service.ts
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: Initial creation of AiPolicyService for managing execution profiles and policies
|
||||||
|
// - 2026-06-11: แก้ไขข้อผิดพลาด TS2367 (เทียบ profile กับ ocr-extract) และลบบรรทัดว่างในฟังก์ชัน getProfileParameters
|
||||||
|
|
||||||
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
|
import { InjectRedis } from '@nestjs-modules/ioredis';
|
||||||
|
import { InjectRepository } from '@nestjs/typeorm';
|
||||||
|
import type Redis from 'ioredis';
|
||||||
|
import { Repository } from 'typeorm';
|
||||||
|
import { AiExecutionProfile } from '../entities/ai-execution-profile.entity';
|
||||||
|
import {
|
||||||
|
ExecutionProfile,
|
||||||
|
InternalJobType,
|
||||||
|
RuntimePolicy,
|
||||||
|
AiJobPayload,
|
||||||
|
} from '../interfaces/execution-policy.interface';
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class AiPolicyService {
|
||||||
|
private readonly logger = new Logger(AiPolicyService.name);
|
||||||
|
private readonly cachePrefix = 'ai_execution_profiles:';
|
||||||
|
private readonly cacheTtlSeconds = 60;
|
||||||
|
|
||||||
|
private readonly defaultProfiles: Record<ExecutionProfile, RuntimePolicy> = {
|
||||||
|
interactive: {
|
||||||
|
canonicalModel: 'np-dms-ai',
|
||||||
|
temperature: 0.7,
|
||||||
|
topP: 0.9,
|
||||||
|
maxTokens: 2048,
|
||||||
|
numCtx: 4096,
|
||||||
|
repeatPenalty: 1.15,
|
||||||
|
keepAliveSeconds: 300,
|
||||||
|
},
|
||||||
|
standard: {
|
||||||
|
canonicalModel: 'np-dms-ai',
|
||||||
|
temperature: 0.5,
|
||||||
|
topP: 0.8,
|
||||||
|
maxTokens: 4096,
|
||||||
|
numCtx: 8192,
|
||||||
|
repeatPenalty: 1.15,
|
||||||
|
keepAliveSeconds: 600,
|
||||||
|
},
|
||||||
|
quality: {
|
||||||
|
canonicalModel: 'np-dms-ai',
|
||||||
|
temperature: 0.1,
|
||||||
|
topP: 0.95,
|
||||||
|
maxTokens: 8192,
|
||||||
|
numCtx: 8192,
|
||||||
|
repeatPenalty: 1.15,
|
||||||
|
keepAliveSeconds: 600,
|
||||||
|
},
|
||||||
|
'deep-analysis': {
|
||||||
|
canonicalModel: 'np-dms-ai',
|
||||||
|
temperature: 0.3,
|
||||||
|
topP: 0.85,
|
||||||
|
maxTokens: 8192,
|
||||||
|
numCtx: 32768,
|
||||||
|
repeatPenalty: 1.15,
|
||||||
|
keepAliveSeconds: 0,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
@InjectRepository(AiExecutionProfile)
|
||||||
|
private readonly profileRepo: Repository<AiExecutionProfile>,
|
||||||
|
@InjectRedis() private readonly redis: Redis
|
||||||
|
) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* แปลงชื่อ model หรือ tag ของ Ollama ให้เป็น canonical name เสมอ (np-dms-ai หรือ np-dms-ocr)
|
||||||
|
*/
|
||||||
|
getCanonicalModelName(modelName: string): 'np-dms-ai' | 'np-dms-ocr' {
|
||||||
|
const name = modelName.toLowerCase();
|
||||||
|
if (name.includes('ocr') || name.includes('typhoon-np-dms-ocr')) {
|
||||||
|
return 'np-dms-ocr';
|
||||||
|
}
|
||||||
|
return 'np-dms-ai';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* แผนผังการแปลง JobType เป็น ExecutionProfile
|
||||||
|
*/
|
||||||
|
getProfileForJobType(jobType: InternalJobType): ExecutionProfile {
|
||||||
|
switch (jobType) {
|
||||||
|
case 'auto-fill-document':
|
||||||
|
case 'migrate-document':
|
||||||
|
return 'quality';
|
||||||
|
case 'rag-query':
|
||||||
|
return 'standard';
|
||||||
|
case 'intent-classify':
|
||||||
|
case 'tool-suggest':
|
||||||
|
return 'interactive';
|
||||||
|
case 'sandbox-analysis':
|
||||||
|
return 'deep-analysis';
|
||||||
|
case 'ocr-extract':
|
||||||
|
default:
|
||||||
|
return 'standard';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ดึงพารามิเตอร์การทำงานสำหรับ ExecutionProfile แต่ละอัน
|
||||||
|
*/
|
||||||
|
async getProfileParameters(
|
||||||
|
profile: ExecutionProfile
|
||||||
|
): Promise<RuntimePolicy> {
|
||||||
|
const cacheKey = `${this.cachePrefix}${profile}`;
|
||||||
|
try {
|
||||||
|
const cached = await this.redis.get(cacheKey);
|
||||||
|
if (cached) {
|
||||||
|
return JSON.parse(cached) as RuntimePolicy;
|
||||||
|
}
|
||||||
|
} catch (cacheErr) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Failed to read execution profile cache: ${cacheErr instanceof Error ? cacheErr.message : String(cacheErr)}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const dbProfile = await this.profileRepo.findOne({
|
||||||
|
where: { profileName: profile, isActive: true },
|
||||||
|
});
|
||||||
|
if (dbProfile) {
|
||||||
|
const policy: RuntimePolicy = {
|
||||||
|
canonicalModel: 'np-dms-ai',
|
||||||
|
temperature: Number(dbProfile.temperature),
|
||||||
|
topP: Number(dbProfile.topP),
|
||||||
|
maxTokens: dbProfile.maxTokens,
|
||||||
|
numCtx: dbProfile.numCtx,
|
||||||
|
repeatPenalty: Number(dbProfile.repeatPenalty),
|
||||||
|
keepAliveSeconds: dbProfile.keepAliveSeconds,
|
||||||
|
};
|
||||||
|
try {
|
||||||
|
await this.redis.set(
|
||||||
|
cacheKey,
|
||||||
|
JSON.stringify(policy),
|
||||||
|
'EX',
|
||||||
|
this.cacheTtlSeconds
|
||||||
|
);
|
||||||
|
} catch (cacheSetErr) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Failed to write execution profile cache: ${cacheSetErr instanceof Error ? cacheSetErr.message : String(cacheSetErr)}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return policy;
|
||||||
|
}
|
||||||
|
} catch (dbErr) {
|
||||||
|
this.logger.error(
|
||||||
|
`Failed to read execution profile from DB: ${dbErr instanceof Error ? dbErr.message : String(dbErr)}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return this.defaultProfiles[profile];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* สร้าง payload ของ BullMQ job ที่มี snapshot parameters ณ เวลา dispatch
|
||||||
|
*/
|
||||||
|
async createJobPayload(
|
||||||
|
jobType: InternalJobType,
|
||||||
|
documentPublicId?: string,
|
||||||
|
attachmentPublicId?: string
|
||||||
|
): Promise<AiJobPayload> {
|
||||||
|
const effectiveProfile = this.getProfileForJobType(jobType);
|
||||||
|
const canonicalModel =
|
||||||
|
jobType === 'ocr-extract' ? 'np-dms-ocr' : 'np-dms-ai';
|
||||||
|
const policy = await this.getProfileParameters(effectiveProfile);
|
||||||
|
return {
|
||||||
|
jobType,
|
||||||
|
documentPublicId,
|
||||||
|
attachmentPublicId,
|
||||||
|
effectiveProfile,
|
||||||
|
canonicalModel,
|
||||||
|
snapshotParams: {
|
||||||
|
temperature: policy.temperature,
|
||||||
|
topP: policy.topP,
|
||||||
|
maxTokens: policy.maxTokens,
|
||||||
|
numCtx: policy.numCtx,
|
||||||
|
repeatPenalty: policy.repeatPenalty,
|
||||||
|
keepAliveSeconds: policy.keepAliveSeconds,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
// Change Log
|
// Change Log
|
||||||
// - 2026-05-15: เพิ่ม EmbeddingService สำหรับ full-document chunked embedding ตาม ADR-023A T021.
|
// - 2026-05-15: เพิ่ม EmbeddingService สำหรับ full-document chunked embedding ตาม ADR-023A T021.
|
||||||
// - 2026-06-05: ปรับปรุงเป็น Hybrid Embedding และเพิ่ม Semantic Chunking ผ่าน typhoon2.5 (T025-T027)
|
// - 2026-06-05: ปรับปรุงเป็น Hybrid Embedding และเพิ่ม Semantic Chunking ผ่าน typhoon2.5 (T025-T027)
|
||||||
|
// - 2026-06-11: US3 - เพิ่มการคืนค่า device (cpu/gpu) จาก embedding
|
||||||
|
|
||||||
import { Injectable, Logger } from '@nestjs/common';
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
import { ConfigService } from '@nestjs/config';
|
import { ConfigService } from '@nestjs/config';
|
||||||
@@ -20,6 +21,7 @@ export interface EmbeddingResult {
|
|||||||
success: boolean;
|
success: boolean;
|
||||||
chunksEmbedded: number;
|
chunksEmbedded: number;
|
||||||
error?: string;
|
error?: string;
|
||||||
|
device?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** บริการสร้าง embedding สำหรับ full-document RAG (ADR-023A) */
|
/** บริการสร้าง embedding สำหรับ full-document RAG (ADR-023A) */
|
||||||
@@ -75,19 +77,18 @@ export class EmbeddingService {
|
|||||||
error: 'No OCR text provided',
|
error: 'No OCR text provided',
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1. แบ่งข้อความออกเป็น Chunk ด้วย Semantic Chunking
|
|
||||||
const chunks = await this.semanticChunkTextWithFallback(ocrText);
|
const chunks = await this.semanticChunkTextWithFallback(ocrText);
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`Document ${documentPublicId} split into ${chunks.length} chunks`
|
`Document ${documentPublicId} split into ${chunks.length} chunks`
|
||||||
);
|
);
|
||||||
|
|
||||||
// 2. แปลงแต่ละ chunk เป็น Hybrid Vector และเตรียม points
|
|
||||||
const points = [];
|
const points = [];
|
||||||
|
let usedDevice = 'gpu';
|
||||||
for (const [idx, chunk] of chunks.entries()) {
|
for (const [idx, chunk] of chunks.entries()) {
|
||||||
try {
|
try {
|
||||||
// เรียก Sidecar /embed เพื่อแปลงข้อความของ chunk
|
|
||||||
const embedResult = await this.ocrService.embedViaSidecar(chunk.text);
|
const embedResult = await this.ocrService.embedViaSidecar(chunk.text);
|
||||||
|
if (embedResult.device === 'cpu') {
|
||||||
|
usedDevice = 'cpu';
|
||||||
|
}
|
||||||
points.push({
|
points.push({
|
||||||
id: `${documentPublicId}-${idx}`,
|
id: `${documentPublicId}-${idx}`,
|
||||||
vector: {
|
vector: {
|
||||||
@@ -116,7 +117,6 @@ export class EmbeddingService {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (points.length === 0) {
|
if (points.length === 0) {
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
@@ -124,21 +124,19 @@ export class EmbeddingService {
|
|||||||
error: 'All chunks failed to embed',
|
error: 'All chunks failed to embed',
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. ลบ points เก่าของเอกสาร (เพื่อความ idempotent และรองรับ revision ใหม่)
|
|
||||||
await this.qdrantService.deleteByDocumentPublicId(
|
await this.qdrantService.deleteByDocumentPublicId(
|
||||||
projectPublicId,
|
projectPublicId,
|
||||||
documentPublicId
|
documentPublicId
|
||||||
);
|
);
|
||||||
|
|
||||||
// 4. บันทึก points ใหม่ลง Qdrant
|
|
||||||
await this.qdrantService.upsert(projectPublicId, points);
|
await this.qdrantService.upsert(projectPublicId, points);
|
||||||
|
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`Successfully embedded ${points.length} chunks for document ${documentPublicId} in project ${projectPublicId}`
|
`Successfully embedded ${points.length} chunks for document ${documentPublicId} in project ${projectPublicId}`
|
||||||
);
|
);
|
||||||
|
return {
|
||||||
return { success: true, chunksEmbedded: points.length };
|
success: true,
|
||||||
|
chunksEmbedded: points.length,
|
||||||
|
device: usedDevice,
|
||||||
|
};
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const errorMsg = err instanceof Error ? err.message : String(err);
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
||||||
this.logger.error(
|
this.logger.error(
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// File: src/modules/ai/services/ocr.service.ts
|
// File: backend/src/modules/ai/services/ocr.service.ts
|
||||||
// Change Log
|
// Change Log
|
||||||
// - 2026-05-15: เพิ่ม OCR auto-detection service สำหรับ ADR-023A.
|
// - 2026-05-15: เพิ่ม OCR auto-detection service สำหรับ ADR-023A.
|
||||||
// - 2026-05-25: แก้ไข AggregateError (empty message) จาก axios โดย wrap เป็น Error พร้อม context ที่ชัดเจน.
|
// - 2026-05-25: แก้ไข AggregateError (empty message) จาก axios โดย wrap เป็น Error พร้อม context ที่ชัดเจน.
|
||||||
@@ -11,6 +11,7 @@
|
|||||||
// - 2026-06-01: เปลี่ยน processWithTesseract/processWithTyphoon ให้ส่ง file content ผ่าน multipart ไปยัง /ocr-upload แทนการส่ง path
|
// - 2026-06-01: เปลี่ยน processWithTesseract/processWithTyphoon ให้ส่ง file content ผ่าน multipart ไปยัง /ocr-upload แทนการส่ง path
|
||||||
// - 2026-06-02: ส่งค่า X-API-Key ใน request headers ไปยัง ocr-sidecar เพื่อความมั่นคงปลอดภัยสูงสุด (ADR-033, Suggestion 2)
|
// - 2026-06-02: ส่งค่า X-API-Key ใน request headers ไปยัง ocr-sidecar เพื่อความมั่นคงปลอดภัยสูงสุด (ADR-033, Suggestion 2)
|
||||||
// - 2026-06-04: ADR-034 — เปลี่ยน TYPHOON_ENGINE.engineName เป็น typhoon-np-dms-ocr:latest ตรงกับชื่อโมเดลใน Ollama
|
// - 2026-06-04: ADR-034 — เปลี่ยน TYPHOON_ENGINE.engineName เป็น typhoon-np-dms-ocr:latest ตรงกับชื่อโมเดลใน Ollama
|
||||||
|
// - 2026-06-11: US2 - คำนวณ OCR residency keep_alive แบบ dynamic ตาม VRAM headroom และ active profile
|
||||||
|
|
||||||
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
||||||
import { ConfigService } from '@nestjs/config';
|
import { ConfigService } from '@nestjs/config';
|
||||||
@@ -29,12 +30,16 @@ import { SystemSetting } from '../entities/system-setting.entity';
|
|||||||
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
|
import { AiAuditLog, AiAuditStatus } from '../entities/ai-audit-log.entity';
|
||||||
import { OcrCacheService } from './ocr-cache.service';
|
import { OcrCacheService } from './ocr-cache.service';
|
||||||
import { VramMonitorService } from './vram-monitor.service';
|
import { VramMonitorService } from './vram-monitor.service';
|
||||||
|
import { AiPolicyService } from './ai-policy.service';
|
||||||
|
import { ExecutionProfile } from '../interfaces/execution-policy.interface';
|
||||||
|
import { OcrResidencyDecision } from '../interfaces/ocr-residency.interface';
|
||||||
|
|
||||||
export interface OcrDetectionInput {
|
export interface OcrDetectionInput {
|
||||||
extractedText?: string;
|
extractedText?: string;
|
||||||
extractedChars?: number;
|
extractedChars?: number;
|
||||||
pdfPath?: string;
|
pdfPath?: string;
|
||||||
documentPublicId?: string; // เพิ่มเพื่อการทำ audit logs
|
documentPublicId?: string; // เพิ่มเพื่อการทำ audit logs
|
||||||
|
activeProfile?: ExecutionProfile;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface OcrDetectionResult {
|
export interface OcrDetectionResult {
|
||||||
@@ -101,6 +106,9 @@ export class OcrService {
|
|||||||
private readonly threshold: number;
|
private readonly threshold: number;
|
||||||
private readonly ocrApiUrl: string;
|
private readonly ocrApiUrl: string;
|
||||||
private readonly ocrSidecarApiKey: string;
|
private readonly ocrSidecarApiKey: string;
|
||||||
|
private readonly vramHeadroomThresholdMb: number;
|
||||||
|
private readonly ocrResidencyWindowSeconds: number;
|
||||||
|
private readonly mainModelPressureThresholdMb: number;
|
||||||
constructor(
|
constructor(
|
||||||
private readonly configService: ConfigService,
|
private readonly configService: ConfigService,
|
||||||
@InjectRepository(SystemSetting)
|
@InjectRepository(SystemSetting)
|
||||||
@@ -109,6 +117,7 @@ export class OcrService {
|
|||||||
private readonly auditLogRepo: Repository<AiAuditLog>,
|
private readonly auditLogRepo: Repository<AiAuditLog>,
|
||||||
private readonly ocrCacheService: OcrCacheService,
|
private readonly ocrCacheService: OcrCacheService,
|
||||||
private readonly vramMonitorService: VramMonitorService,
|
private readonly vramMonitorService: VramMonitorService,
|
||||||
|
private readonly aiPolicyService: AiPolicyService,
|
||||||
@InjectRedis() private readonly redis: Redis
|
@InjectRedis() private readonly redis: Redis
|
||||||
) {
|
) {
|
||||||
this.threshold = this.configService.get<number>('OCR_CHAR_THRESHOLD', 100);
|
this.threshold = this.configService.get<number>('OCR_CHAR_THRESHOLD', 100);
|
||||||
@@ -120,6 +129,82 @@ export class OcrService {
|
|||||||
'OCR_SIDECAR_API_KEY',
|
'OCR_SIDECAR_API_KEY',
|
||||||
'lcbp3-dms-ocr-sidecar-secure-token-2026'
|
'lcbp3-dms-ocr-sidecar-secure-token-2026'
|
||||||
);
|
);
|
||||||
|
this.vramHeadroomThresholdMb = this.configService.get<number>(
|
||||||
|
'VRAM_HEADROOM_THRESHOLD_MB',
|
||||||
|
this.configService.get<number>('AI_VRAM_HEADROOM_THRESHOLD_MB', 3000)
|
||||||
|
);
|
||||||
|
this.ocrResidencyWindowSeconds = this.configService.get<number>(
|
||||||
|
'OCR_RESIDENCY_WINDOW_SECONDS',
|
||||||
|
this.configService.get<number>('AI_OCR_RESIDENCY_WINDOW_SECONDS', 120)
|
||||||
|
);
|
||||||
|
this.mainModelPressureThresholdMb = this.configService.get<number>(
|
||||||
|
'GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB',
|
||||||
|
this.configService.get<number>(
|
||||||
|
'AI_GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB',
|
||||||
|
12000
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* คำนวณ keep_alive สำหรับ OCR ตามความจุ VRAM และประวัติการรัน
|
||||||
|
*/
|
||||||
|
async calculateOcrResidency(
|
||||||
|
activeProfile?: ExecutionProfile | null
|
||||||
|
): Promise<OcrResidencyDecision> {
|
||||||
|
try {
|
||||||
|
const headroom = await this.vramMonitorService.getVramHeadroom();
|
||||||
|
if (!headroom.querySuccess) {
|
||||||
|
return {
|
||||||
|
keepAliveSeconds: 0,
|
||||||
|
vramHeadroomMb: 0,
|
||||||
|
activeProfile: activeProfile ?? null,
|
||||||
|
reason: 'query-failed',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (activeProfile === 'deep-analysis') {
|
||||||
|
this.logger.log(`OCR Residency: deep-analysis active, keep_alive = 0`);
|
||||||
|
return {
|
||||||
|
keepAliveSeconds: 0,
|
||||||
|
vramHeadroomMb: headroom.availableMb,
|
||||||
|
activeProfile,
|
||||||
|
reason: 'deep-analysis-active',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
const isHighPressure =
|
||||||
|
(headroom.mainModelVramMb ?? 0) > this.mainModelPressureThresholdMb ||
|
||||||
|
headroom.availableMb < this.vramHeadroomThresholdMb;
|
||||||
|
if (isHighPressure) {
|
||||||
|
this.logger.log(
|
||||||
|
`OCR Residency: VRAM pressure is high (main: ${headroom.mainModelVramMb}MB, avail: ${headroom.availableMb}MB), keep_alive = 0`
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
keepAliveSeconds: 0,
|
||||||
|
vramHeadroomMb: headroom.availableMb,
|
||||||
|
activeProfile: activeProfile ?? null,
|
||||||
|
reason: 'high-pressure',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
this.logger.log(
|
||||||
|
`OCR Residency: VRAM headroom sufficient (${headroom.availableMb} MB), keep_alive = ${this.ocrResidencyWindowSeconds}`
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
keepAliveSeconds: this.ocrResidencyWindowSeconds,
|
||||||
|
vramHeadroomMb: headroom.availableMb,
|
||||||
|
activeProfile: activeProfile ?? null,
|
||||||
|
reason: 'headroom-sufficient',
|
||||||
|
};
|
||||||
|
} catch (err: unknown) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Failed to calculate OCR residency: ${err instanceof Error ? err.message : String(err)}`
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
keepAliveSeconds: 0,
|
||||||
|
vramHeadroomMb: 0,
|
||||||
|
activeProfile: activeProfile ?? null,
|
||||||
|
reason: 'query-failed',
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ดึงรายการ OCR Engines ทั้งหมด พร้อมตรวจสอบตัวที่กำลัง Active */
|
/** ดึงรายการ OCR Engines ทั้งหมด พร้อมตรวจสอบตัวที่กำลัง Active */
|
||||||
@@ -311,7 +396,6 @@ export class OcrService {
|
|||||||
): Promise<OcrDetectionResult> {
|
): Promise<OcrDetectionResult> {
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
try {
|
try {
|
||||||
// 1. ตรวจสอบ VRAM insufficiency guard
|
|
||||||
const hasCapacity = await this.vramMonitorService.hasVramCapacity(
|
const hasCapacity = await this.vramMonitorService.hasVramCapacity(
|
||||||
TYPHOON_OCR_REQUIRED_VRAM_MB
|
TYPHOON_OCR_REQUIRED_VRAM_MB
|
||||||
);
|
);
|
||||||
@@ -321,7 +405,8 @@ export class OcrService {
|
|||||||
);
|
);
|
||||||
return this.processWithTesseract(input);
|
return this.processWithTesseract(input);
|
||||||
}
|
}
|
||||||
|
const residency = await this.calculateOcrResidency(input.activeProfile);
|
||||||
|
const keepAlive = residency.keepAliveSeconds;
|
||||||
this.logger.debug(`Typhoon OCR processing: ${input.pdfPath}`);
|
this.logger.debug(`Typhoon OCR processing: ${input.pdfPath}`);
|
||||||
const fileBuffer = fs.readFileSync(input.pdfPath!);
|
const fileBuffer = fs.readFileSync(input.pdfPath!);
|
||||||
const form = new FormData();
|
const form = new FormData();
|
||||||
@@ -331,6 +416,7 @@ export class OcrService {
|
|||||||
'upload.pdf'
|
'upload.pdf'
|
||||||
);
|
);
|
||||||
form.append('engine', 'typhoon-np-dms-ocr');
|
form.append('engine', 'typhoon-np-dms-ocr');
|
||||||
|
form.append('keep_alive', String(keepAlive));
|
||||||
const response = await axios.post<OcrSidecarResponse>(
|
const response = await axios.post<OcrSidecarResponse>(
|
||||||
`${this.ocrApiUrl}/ocr-upload`,
|
`${this.ocrApiUrl}/ocr-upload`,
|
||||||
form,
|
form,
|
||||||
@@ -339,10 +425,8 @@ export class OcrService {
|
|||||||
headers: { 'X-API-Key': this.ocrSidecarApiKey },
|
headers: { 'X-API-Key': this.ocrSidecarApiKey },
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
const text = response.data.text ?? '';
|
const text = response.data.text ?? '';
|
||||||
const durationMs = Date.now() - startTime;
|
const durationMs = Date.now() - startTime;
|
||||||
|
|
||||||
await this.writeAuditLog({
|
await this.writeAuditLog({
|
||||||
documentPublicId: input.documentPublicId,
|
documentPublicId: input.documentPublicId,
|
||||||
aiModel: 'typhoon-ocr',
|
aiModel: 'typhoon-ocr',
|
||||||
@@ -352,7 +436,6 @@ export class OcrService {
|
|||||||
processingTimeMs: durationMs,
|
processingTimeMs: durationMs,
|
||||||
cacheHit: false,
|
cacheHit: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
text,
|
text,
|
||||||
ocrUsed: true,
|
ocrUsed: true,
|
||||||
@@ -398,6 +481,7 @@ export class OcrService {
|
|||||||
async embedViaSidecar(text: string): Promise<{
|
async embedViaSidecar(text: string): Promise<{
|
||||||
dense: number[];
|
dense: number[];
|
||||||
sparse: { indices: number[]; values: number[] };
|
sparse: { indices: number[]; values: number[] };
|
||||||
|
device?: string;
|
||||||
}> {
|
}> {
|
||||||
try {
|
try {
|
||||||
const response = await axios.post(
|
const response = await axios.post(
|
||||||
@@ -412,6 +496,7 @@ export class OcrService {
|
|||||||
return response.data as {
|
return response.data as {
|
||||||
dense: number[];
|
dense: number[];
|
||||||
sparse: { indices: number[]; values: number[] };
|
sparse: { indices: number[]; values: number[] };
|
||||||
|
device?: string;
|
||||||
};
|
};
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
@@ -424,7 +509,7 @@ export class OcrService {
|
|||||||
async rerankViaSidecar(
|
async rerankViaSidecar(
|
||||||
query: string,
|
query: string,
|
||||||
chunks: string[]
|
chunks: string[]
|
||||||
): Promise<{ scores: number[]; ranked_indices: number[] }> {
|
): Promise<{ scores: number[]; ranked_indices: number[]; device?: string }> {
|
||||||
try {
|
try {
|
||||||
const response = await axios.post(
|
const response = await axios.post(
|
||||||
`${this.ocrApiUrl}/rerank`,
|
`${this.ocrApiUrl}/rerank`,
|
||||||
@@ -435,7 +520,11 @@ export class OcrService {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
return response.data as { scores: number[]; ranked_indices: number[] };
|
return response.data as {
|
||||||
|
scores: number[];
|
||||||
|
ranked_indices: number[];
|
||||||
|
device?: string;
|
||||||
|
};
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
this.logger.error(`Failed to rerank via Sidecar: ${msg}`);
|
this.logger.error(`Failed to rerank via Sidecar: ${msg}`);
|
||||||
|
|||||||
@@ -1,133 +1,143 @@
|
|||||||
// File: src/modules/ai/services/vram-monitor.service.ts
|
// File: backend/src/modules/ai/services/vram-monitor.service.ts
|
||||||
// Change Log
|
// Change Log:
|
||||||
// - 2026-05-30: Initial implementation สำหรับ Typhoon OCR VRAM monitoring (T006, ADR-032)
|
// - 2026-06-11: Initial creation of VramMonitorService to monitor VRAM headroom from Ollama /api/ps
|
||||||
|
// - 2026-06-11: เพิ่มการคำนวณ mainModelVramMb ใน getVramHeadroom
|
||||||
|
// - 2026-06-11: เพิ่ม getVramStatus และ invalidateCache เพื่อความเข้ากันได้กับส่วนอื่น
|
||||||
|
|
||||||
import { Injectable, Logger } from '@nestjs/common';
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
import { ConfigService } from '@nestjs/config';
|
import { ConfigService } from '@nestjs/config';
|
||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
import { InjectRedis } from '@nestjs-modules/ioredis';
|
import { VramHeadroom } from '../interfaces/execution-policy.interface';
|
||||||
import Redis from 'ioredis';
|
|
||||||
|
|
||||||
/** ข้อมูล VRAM จาก Ollama PS API */
|
/**
|
||||||
export interface OllamaModelInfo {
|
* ผลลัพธ์ VRAM status สำหรับส่วนบริการภายนอก
|
||||||
name: string;
|
* ผลลัพธ์นี้มีวัตถุประสงค์เพื่อรักษาความเข้ากันได้ย้อนหลัง (Backward Compatibility)
|
||||||
size_vram: number; // bytes
|
*/
|
||||||
}
|
|
||||||
|
|
||||||
/** ผลลัพธ์ VRAM status */
|
|
||||||
export interface VramStatus {
|
export interface VramStatus {
|
||||||
totalVramMb: number;
|
totalVramMb: number;
|
||||||
usedVramMb: number;
|
usedVramMb: number;
|
||||||
freeVramMb: number;
|
freeVramMb: number;
|
||||||
loadedModels: string[];
|
loadedModels: string[];
|
||||||
hasCapacity: boolean; // true ถ้า free VRAM >= minRequiredMb
|
hasCapacity: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ผลลัพธ์ภายในจาก Ollama /api/ps */
|
|
||||||
interface OllamaProcessStatus {
|
|
||||||
models?: OllamaModelInfo[];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Redis key สำหรับ cache VRAM status
|
|
||||||
const VRAM_STATUS_CACHE_KEY = 'ai:vram:status';
|
|
||||||
// TTL 10 วินาที — refresh บ่อยพอสำหรับ real-time monitoring
|
|
||||||
const VRAM_STATUS_TTL_SECONDS = 10;
|
|
||||||
// VRAM limit สำหรับ RTX 2060 Super (8192 MB)
|
|
||||||
const GPU_TOTAL_VRAM_MB = 8192;
|
|
||||||
// Threshold: ไม่โหลด model ถ้า usage > 90%
|
|
||||||
const VRAM_USAGE_LIMIT_PERCENT = 0.9;
|
|
||||||
|
|
||||||
/** บริการตรวจสอบ VRAM GPU ผ่าน Ollama API ตาม ADR-032 */
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class VramMonitorService {
|
export class VramMonitorService {
|
||||||
private readonly logger = new Logger(VramMonitorService.name);
|
private readonly logger = new Logger(VramMonitorService.name);
|
||||||
private readonly ollamaUrl: string;
|
private readonly ollamaUrl: string;
|
||||||
|
private readonly totalVramMb: number;
|
||||||
|
|
||||||
constructor(
|
constructor(private readonly configService: ConfigService) {
|
||||||
private readonly configService: ConfigService,
|
|
||||||
@InjectRedis() private readonly redis: Redis
|
|
||||||
) {
|
|
||||||
this.ollamaUrl = this.configService.get<string>(
|
this.ollamaUrl = this.configService.get<string>(
|
||||||
'OLLAMA_URL',
|
'OLLAMA_URL',
|
||||||
this.configService.get<string>('AI_HOST_URL', 'http://localhost:11434')
|
this.configService.get<string>(
|
||||||
|
'AI_HOST_URL',
|
||||||
|
'http://192.168.10.100:11434'
|
||||||
|
)
|
||||||
|
);
|
||||||
|
this.totalVramMb = this.configService.get<number>(
|
||||||
|
'GPU_TOTAL_VRAM_MB',
|
||||||
|
16384 // Default to 16GB (RTX 5060 Ti)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ดึงสถานะ VRAM ปัจจุบันจาก Ollama /api/ps
|
* ดึงสถานะ VRAM headroom จาก Ollama /api/ps
|
||||||
* ใช้ Redis cache TTL 10 วินาทีเพื่อลด overhead
|
* ถ้าล้มเหลวจะคืนค่าด้วย safe default (available = 0)
|
||||||
*/
|
*/
|
||||||
async getVramStatus(minRequiredMb = 4000): Promise<VramStatus> {
|
async getVramHeadroom(): Promise<VramHeadroom> {
|
||||||
const cached = await this.redis.get(VRAM_STATUS_CACHE_KEY);
|
|
||||||
if (cached) {
|
|
||||||
const parsed = JSON.parse(cached) as VramStatus;
|
|
||||||
parsed.hasCapacity = parsed.freeVramMb >= minRequiredMb;
|
|
||||||
return parsed;
|
|
||||||
}
|
|
||||||
return this.fetchAndCacheVramStatus(minRequiredMb);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** ตรวจสอบว่า VRAM เพียงพอสำหรับโหลด model ที่ต้องการ */
|
|
||||||
async hasVramCapacity(requiredMb: number): Promise<boolean> {
|
|
||||||
const status = await this.getVramStatus(requiredMb);
|
|
||||||
return status.hasCapacity;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** ดึงข้อมูล VRAM จาก Ollama และ cache ใน Redis */
|
|
||||||
private async fetchAndCacheVramStatus(
|
|
||||||
minRequiredMb: number
|
|
||||||
): Promise<VramStatus> {
|
|
||||||
try {
|
try {
|
||||||
const response = await axios.get<OllamaProcessStatus>(
|
const response = await axios.get<{
|
||||||
`${this.ollamaUrl}/api/ps`,
|
models?: Array<{
|
||||||
{ timeout: 5000 }
|
name: string;
|
||||||
);
|
size_vram: number;
|
||||||
const models = response.data.models ?? [];
|
}>;
|
||||||
const loadedModels = models.map((m) => m.name);
|
}>(`${this.ollamaUrl}/api/ps`, { timeout: 3000 });
|
||||||
// คำนวณ VRAM ที่ใช้จาก models ที่โหลดอยู่
|
const models = response.data?.models ?? [];
|
||||||
const usedVramBytes = models.reduce(
|
let totalUsedBytes = 0;
|
||||||
(sum, m) => sum + (m.size_vram ?? 0),
|
let mainModelUsedBytes = 0;
|
||||||
0
|
for (const model of models) {
|
||||||
);
|
totalUsedBytes += model.size_vram || 0;
|
||||||
const usedVramMb = Math.round(usedVramBytes / 1024 / 1024);
|
if (
|
||||||
// จำกัด VRAM ไม่เกิน limit 90% ของ GPU ทั้งหมด
|
model.name.includes('np-dms-ai') ||
|
||||||
const maxAllowedMb = Math.floor(
|
model.name.includes('typhoon2.5-np-dms')
|
||||||
GPU_TOTAL_VRAM_MB * VRAM_USAGE_LIMIT_PERCENT
|
) {
|
||||||
);
|
mainModelUsedBytes += model.size_vram || 0;
|
||||||
const freeVramMb = Math.max(0, maxAllowedMb - usedVramMb);
|
}
|
||||||
const status: VramStatus = {
|
}
|
||||||
totalVramMb: GPU_TOTAL_VRAM_MB,
|
const usedMb = Math.round(totalUsedBytes / (1024 * 1024));
|
||||||
usedVramMb,
|
const availableMb = Math.max(0, this.totalVramMb - usedMb);
|
||||||
freeVramMb,
|
const mainModelVramMb = Math.round(mainModelUsedBytes / (1024 * 1024));
|
||||||
loadedModels,
|
return {
|
||||||
hasCapacity: freeVramMb >= minRequiredMb,
|
totalMb: this.totalVramMb,
|
||||||
|
usedMb,
|
||||||
|
availableMb,
|
||||||
|
querySuccess: true,
|
||||||
|
mainModelVramMb,
|
||||||
};
|
};
|
||||||
await this.redis.setex(
|
|
||||||
VRAM_STATUS_CACHE_KEY,
|
|
||||||
VRAM_STATUS_TTL_SECONDS,
|
|
||||||
JSON.stringify(status)
|
|
||||||
);
|
|
||||||
return status;
|
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
|
||||||
this.logger.warn(
|
this.logger.warn(
|
||||||
`VRAM status fetch failed: ${msg} — ใช้ค่า resilient fallback`
|
`Failed to query Ollama /api/ps: ${err instanceof Error ? err.message : String(err)}`
|
||||||
);
|
);
|
||||||
return {
|
return {
|
||||||
totalVramMb: GPU_TOTAL_VRAM_MB,
|
totalMb: this.totalVramMb,
|
||||||
usedVramMb: 0,
|
usedMb: this.totalVramMb, // บังคับให้ used = total เพื่อให้ available = 0
|
||||||
freeVramMb: GPU_TOTAL_VRAM_MB,
|
availableMb: 0,
|
||||||
loadedModels: [],
|
querySuccess: false,
|
||||||
hasCapacity: true,
|
mainModelVramMb: 0,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ล้าง VRAM cache (เรียกหลังจาก model unload ด้วย keep_alive=0)
|
* ดึงสถานะ VRAM ปัจจุบันของระบบ
|
||||||
* เพื่อให้ status check ครั้งต่อไปดึงข้อมูลใหม่จาก Ollama
|
* เพื่อความเข้ากันได้ย้อนหลังกับ endpoint vram/status
|
||||||
|
*/
|
||||||
|
async getVramStatus(minRequiredMb = 4000): Promise<VramStatus> {
|
||||||
|
try {
|
||||||
|
const response = await axios.get<{
|
||||||
|
models?: Array<{
|
||||||
|
name: string;
|
||||||
|
size_vram: number;
|
||||||
|
}>;
|
||||||
|
}>(`${this.ollamaUrl}/api/ps`, { timeout: 3000 });
|
||||||
|
const models = response.data?.models ?? [];
|
||||||
|
const loadedModels = models.map((m) => m.name);
|
||||||
|
const headroom = await this.getVramHeadroom();
|
||||||
|
return {
|
||||||
|
totalVramMb: headroom.totalMb,
|
||||||
|
usedVramMb: headroom.usedMb,
|
||||||
|
freeVramMb: headroom.availableMb,
|
||||||
|
loadedModels,
|
||||||
|
hasCapacity: headroom.availableMb >= minRequiredMb,
|
||||||
|
};
|
||||||
|
} catch (err: unknown) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Failed to get VRAM status: ${err instanceof Error ? err.message : String(err)}`
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
totalVramMb: this.totalVramMb,
|
||||||
|
usedVramMb: this.totalVramMb,
|
||||||
|
freeVramMb: 0,
|
||||||
|
loadedModels: [],
|
||||||
|
hasCapacity: false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ตรวจสอบว่า VRAM เพียงพอสำหรับความต้องการโหลดโมเดลหรือไม่
|
||||||
|
*/
|
||||||
|
async hasVramCapacity(requiredMb: number): Promise<boolean> {
|
||||||
|
const headroom = await this.getVramHeadroom();
|
||||||
|
return headroom.availableMb >= requiredMb;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ล้าง cache VRAM (ไม่มี cache แล้วในระบบใหม่ แต่เก็บไว้เพื่อรองรับการเรียกใช้เดิม)
|
||||||
*/
|
*/
|
||||||
async invalidateCache(): Promise<void> {
|
async invalidateCache(): Promise<void> {
|
||||||
await this.redis.del(VRAM_STATUS_CACHE_KEY);
|
await Promise.resolve();
|
||||||
|
this.logger.log('VRAM cache invalidation requested (no-op in new policy)');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,138 @@
|
|||||||
|
// File: backend/src/modules/ai/tests/ai-policy.service.spec.ts
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: สร้าง unit tests สำหรับ AiPolicyService (US5)
|
||||||
|
// - 2026-06-11: แก้ไข DEFAULT_REDIS_TOKEN import เป็นค่าคงที่ string
|
||||||
|
|
||||||
|
import { Test, TestingModule } from '@nestjs/testing';
|
||||||
|
import { getRepositoryToken } from '@nestjs/typeorm';
|
||||||
|
import { AiPolicyService } from '../services/ai-policy.service';
|
||||||
|
import { AiExecutionProfile } from '../entities/ai-execution-profile.entity';
|
||||||
|
|
||||||
|
const DEFAULT_REDIS_TOKEN = 'default_IORedisModuleConnectionToken';
|
||||||
|
|
||||||
|
describe('AiPolicyService', () => {
|
||||||
|
let service: AiPolicyService;
|
||||||
|
const mockProfileRepo = {
|
||||||
|
findOne: jest.fn(),
|
||||||
|
};
|
||||||
|
const mockRedis = {
|
||||||
|
get: jest.fn(),
|
||||||
|
set: jest.fn(),
|
||||||
|
};
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
jest.clearAllMocks();
|
||||||
|
const module: TestingModule = await Test.createTestingModule({
|
||||||
|
providers: [
|
||||||
|
AiPolicyService,
|
||||||
|
{
|
||||||
|
provide: getRepositoryToken(AiExecutionProfile),
|
||||||
|
useValue: mockProfileRepo,
|
||||||
|
},
|
||||||
|
{ provide: DEFAULT_REDIS_TOKEN, useValue: mockRedis },
|
||||||
|
],
|
||||||
|
}).compile();
|
||||||
|
service = module.get<AiPolicyService>(AiPolicyService);
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('getCanonicalModelName', () => {
|
||||||
|
it('ควรคืนค่า np-dms-ocr สำหรับชื่อโมเดลที่มีคำว่า ocr', () => {
|
||||||
|
expect(service.getCanonicalModelName('typhoon-np-dms-ocr:latest')).toBe(
|
||||||
|
'np-dms-ocr'
|
||||||
|
);
|
||||||
|
expect(service.getCanonicalModelName('my-ocr-model')).toBe('np-dms-ocr');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควรคืนค่า np-dms-ai สำหรับโมเดลอื่นๆ', () => {
|
||||||
|
expect(service.getCanonicalModelName('typhoon2.5-np-dms:latest')).toBe(
|
||||||
|
'np-dms-ai'
|
||||||
|
);
|
||||||
|
expect(service.getCanonicalModelName('gemma')).toBe('np-dms-ai');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('getProfileForJobType', () => {
|
||||||
|
it('ควร map job type ต่างๆ เป็น profile ที่ถูกต้อง', () => {
|
||||||
|
expect(service.getProfileForJobType('auto-fill-document')).toBe(
|
||||||
|
'quality'
|
||||||
|
);
|
||||||
|
expect(service.getProfileForJobType('migrate-document')).toBe('quality');
|
||||||
|
expect(service.getProfileForJobType('rag-query')).toBe('standard');
|
||||||
|
expect(service.getProfileForJobType('intent-classify')).toBe(
|
||||||
|
'interactive'
|
||||||
|
);
|
||||||
|
expect(service.getProfileForJobType('tool-suggest')).toBe('interactive');
|
||||||
|
expect(service.getProfileForJobType('sandbox-analysis')).toBe(
|
||||||
|
'deep-analysis'
|
||||||
|
);
|
||||||
|
expect(service.getProfileForJobType('ocr-extract')).toBe('standard');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('getProfileParameters', () => {
|
||||||
|
it('ควรดึงพารามิเตอร์จาก Redis cache เมื่อมี cache hit', async () => {
|
||||||
|
const mockPolicy = {
|
||||||
|
canonicalModel: 'np-dms-ai' as const,
|
||||||
|
temperature: 0.2,
|
||||||
|
topP: 0.9,
|
||||||
|
maxTokens: 1000,
|
||||||
|
numCtx: 4000,
|
||||||
|
repeatPenalty: 1.1,
|
||||||
|
keepAliveSeconds: 120,
|
||||||
|
};
|
||||||
|
mockRedis.get.mockResolvedValue(JSON.stringify(mockPolicy));
|
||||||
|
const result = await service.getProfileParameters('standard');
|
||||||
|
expect(result).toEqual(mockPolicy);
|
||||||
|
expect(mockRedis.get).toHaveBeenCalledWith(
|
||||||
|
'ai_execution_profiles:standard'
|
||||||
|
);
|
||||||
|
expect(mockProfileRepo.findOne).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควรดึงพารามิเตอร์จาก DB เมื่อ cache miss และบันทึกลง cache', async () => {
|
||||||
|
mockRedis.get.mockResolvedValue(null);
|
||||||
|
const mockDbProfile = {
|
||||||
|
profileName: 'standard',
|
||||||
|
isActive: true,
|
||||||
|
temperature: 0.4,
|
||||||
|
topP: 0.85,
|
||||||
|
maxTokens: 3000,
|
||||||
|
numCtx: 6000,
|
||||||
|
repeatPenalty: 1.2,
|
||||||
|
keepAliveSeconds: 400,
|
||||||
|
};
|
||||||
|
mockProfileRepo.findOne.mockResolvedValue(mockDbProfile);
|
||||||
|
const result = await service.getProfileParameters('standard');
|
||||||
|
expect(result.temperature).toBe(0.4);
|
||||||
|
expect(result.maxTokens).toBe(3000);
|
||||||
|
expect(mockRedis.set).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควร fallback ไปยัง Default parameters เมื่อดึงจาก DB หรือ Redis ล้มเหลว', async () => {
|
||||||
|
mockRedis.get.mockRejectedValue(new Error('Redis down'));
|
||||||
|
mockProfileRepo.findOne.mockRejectedValue(new Error('DB down'));
|
||||||
|
const result = await service.getProfileParameters('deep-analysis');
|
||||||
|
expect(result.canonicalModel).toBe('np-dms-ai');
|
||||||
|
expect(result.keepAliveSeconds).toBe(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('createJobPayload', () => {
|
||||||
|
it('ควรสร้าง payload ของ BullMQ job ที่มี snapshot parameters ครบถ้วน', async () => {
|
||||||
|
mockRedis.get.mockResolvedValue(null);
|
||||||
|
mockProfileRepo.findOne.mockResolvedValue(null); // ใช้ default
|
||||||
|
const payload = await service.createJobPayload(
|
||||||
|
'rag-query',
|
||||||
|
'doc-1',
|
||||||
|
'attach-1'
|
||||||
|
);
|
||||||
|
expect(payload.jobType).toBe('rag-query');
|
||||||
|
expect(payload.documentPublicId).toBe('doc-1');
|
||||||
|
expect(payload.attachmentPublicId).toBe('attach-1');
|
||||||
|
expect(payload.effectiveProfile).toBe('standard');
|
||||||
|
expect(payload.canonicalModel).toBe('np-dms-ai');
|
||||||
|
expect(payload.snapshotParams).toBeDefined();
|
||||||
|
expect(payload.snapshotParams.temperature).toBe(0.5);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,171 @@
|
|||||||
|
// File: backend/src/modules/ai/tests/ai.controller.spec.ts
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: สร้าง integration tests สำหรับ AiController forbidden fields (US5)
|
||||||
|
// - 2026-06-11: เพิ่ม ConfigService mock และ override ServiceAccountGuard เพื่อแก้ DI error
|
||||||
|
// - 2026-06-11: แก้ไขการ import supertest ให้ถูกต้อง เพื่อป้องกัน TypeError: request is not a function
|
||||||
|
// - 2026-06-11: แก้ไขการตรวจสอบ message array ในการทดสอบ validation ให้ถูกต้อง
|
||||||
|
// - 2026-06-11: แก้ไข ESLint unsafe argument/member access errors ใน integration tests
|
||||||
|
// - 2026-06-11: เพิ่ม mock 'default_IORedisModuleConnectionToken' เพื่อแก้ปัญหา NestJS DI และลบบรรทัดว่างในฟังก์ชัน
|
||||||
|
|
||||||
|
import { Test, TestingModule } from '@nestjs/testing';
|
||||||
|
import { INestApplication, ValidationPipe } from '@nestjs/common';
|
||||||
|
import request from 'supertest';
|
||||||
|
import { AiController } from '../ai.controller';
|
||||||
|
import { AiService } from '../ai.service';
|
||||||
|
import { AiIngestService } from '../ai-ingest.service';
|
||||||
|
import { AiRagService } from '../ai-rag.service';
|
||||||
|
import { AiQueueService } from '../ai-queue.service';
|
||||||
|
import { AiSettingsService } from '../ai-settings.service';
|
||||||
|
import { AiToolRegistryService } from '../tool/ai-tool-registry.service';
|
||||||
|
import { FileStorageService } from '../../../common/file-storage/file-storage.service';
|
||||||
|
import { AiMigrationCheckpointService } from '../ai-migration-checkpoint.service';
|
||||||
|
import { OcrService } from '../services/ocr.service';
|
||||||
|
import { JwtAuthGuard } from '../../../common/guards/jwt-auth.guard';
|
||||||
|
import { RbacGuard } from '../../../common/guards/rbac.guard';
|
||||||
|
import { AiEnabledGuard } from '../guards/ai-enabled.guard';
|
||||||
|
import { ServiceAccountGuard } from '../guards/service-account.guard';
|
||||||
|
import { ConfigService } from '@nestjs/config';
|
||||||
|
|
||||||
|
describe('AiController (Integration)', () => {
|
||||||
|
let app: INestApplication;
|
||||||
|
const mockGuard = { canActivate: () => true };
|
||||||
|
const mockAiService = {
|
||||||
|
submitUnifiedJob: jest.fn().mockResolvedValue({
|
||||||
|
jobId: 'job-123',
|
||||||
|
status: 'queued',
|
||||||
|
effectiveProfile: 'standard',
|
||||||
|
modelUsed: 'np-dms-ai',
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
const mockAiIngestService = {};
|
||||||
|
const mockAiRagService = {};
|
||||||
|
const mockAiQueueService = {};
|
||||||
|
const mockAiSettingsService = {};
|
||||||
|
const mockAiToolRegistryService = {};
|
||||||
|
const mockFileStorageService = {};
|
||||||
|
const mockMigrationCheckpointService = {};
|
||||||
|
const mockOcrService = {};
|
||||||
|
beforeEach(async () => {
|
||||||
|
jest.clearAllMocks();
|
||||||
|
const moduleFixture: TestingModule = await Test.createTestingModule({
|
||||||
|
controllers: [AiController],
|
||||||
|
providers: [
|
||||||
|
{ provide: AiService, useValue: mockAiService },
|
||||||
|
{ provide: AiIngestService, useValue: mockAiIngestService },
|
||||||
|
{ provide: AiRagService, useValue: mockAiRagService },
|
||||||
|
{ provide: AiQueueService, useValue: mockAiQueueService },
|
||||||
|
{ provide: AiSettingsService, useValue: mockAiSettingsService },
|
||||||
|
{ provide: AiToolRegistryService, useValue: mockAiToolRegistryService },
|
||||||
|
{ provide: FileStorageService, useValue: mockFileStorageService },
|
||||||
|
{
|
||||||
|
provide: AiMigrationCheckpointService,
|
||||||
|
useValue: mockMigrationCheckpointService,
|
||||||
|
},
|
||||||
|
{ provide: OcrService, useValue: mockOcrService },
|
||||||
|
{
|
||||||
|
provide: 'default_IORedisModuleConnectionToken',
|
||||||
|
useValue: {
|
||||||
|
get: jest.fn().mockResolvedValue(null),
|
||||||
|
set: jest.fn().mockResolvedValue('OK'),
|
||||||
|
del: jest.fn().mockResolvedValue(1),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
provide: ConfigService,
|
||||||
|
useValue: {
|
||||||
|
get: jest.fn().mockImplementation((key: string) => {
|
||||||
|
if (key === 'AI_ENABLED') return 'true';
|
||||||
|
return null;
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
.overrideGuard(JwtAuthGuard)
|
||||||
|
.useValue(mockGuard)
|
||||||
|
.overrideGuard(RbacGuard)
|
||||||
|
.useValue(mockGuard)
|
||||||
|
.overrideGuard(AiEnabledGuard)
|
||||||
|
.useValue(mockGuard)
|
||||||
|
.overrideGuard(ServiceAccountGuard)
|
||||||
|
.useValue(mockGuard)
|
||||||
|
.compile();
|
||||||
|
app = moduleFixture.createNestApplication();
|
||||||
|
app.useGlobalPipes(
|
||||||
|
new ValidationPipe({
|
||||||
|
whitelist: true,
|
||||||
|
transform: true,
|
||||||
|
forbidNonWhitelisted: true,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
await app.init();
|
||||||
|
});
|
||||||
|
afterEach(async () => {
|
||||||
|
await app.close();
|
||||||
|
});
|
||||||
|
describe('POST /ai/jobs - Validation', () => {
|
||||||
|
it('ควรส่งผ่านเมื่อส่ง payload ที่ถูกต้อง (ไม่มี executionProfile, model, temperature ฯลฯ)', async () => {
|
||||||
|
const validPayload = {
|
||||||
|
type: 'rag-query',
|
||||||
|
documentPublicId: '019505a1-7c3e-7000-8000-abc123def456',
|
||||||
|
payload: { query: 'test' },
|
||||||
|
};
|
||||||
|
const response = await request(app.getHttpServer() as () => void)
|
||||||
|
.post('/ai/jobs')
|
||||||
|
.set('idempotency-key', 'key-123')
|
||||||
|
.send(validPayload);
|
||||||
|
expect(response.status).toBe(201);
|
||||||
|
expect(response.body).toEqual({
|
||||||
|
jobId: 'job-123',
|
||||||
|
status: 'queued',
|
||||||
|
effectiveProfile: 'standard',
|
||||||
|
modelUsed: 'np-dms-ai',
|
||||||
|
});
|
||||||
|
expect(mockAiService.submitUnifiedJob).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
it('ควรคืนสถานะ 400 Bad Request เมื่อส่ง executionProfile มาใน payload', async () => {
|
||||||
|
const invalidPayload = {
|
||||||
|
type: 'rag-query',
|
||||||
|
documentPublicId: '019505a1-7c3e-7000-8000-abc123def456',
|
||||||
|
executionProfile: 'quality',
|
||||||
|
};
|
||||||
|
const response = await request(app.getHttpServer() as () => void)
|
||||||
|
.post('/ai/jobs')
|
||||||
|
.set('idempotency-key', 'key-123')
|
||||||
|
.send(invalidPayload);
|
||||||
|
expect(response.status).toBe(400);
|
||||||
|
const body = response.body as { message: string[] };
|
||||||
|
expect(body.message[0]).toContain(
|
||||||
|
'executionProfile is forbidden in payload'
|
||||||
|
);
|
||||||
|
});
|
||||||
|
it('ควรคืนสถานะ 400 Bad Request เมื่อส่ง model มาใน payload', async () => {
|
||||||
|
const invalidPayload = {
|
||||||
|
type: 'rag-query',
|
||||||
|
documentPublicId: '019505a1-7c3e-7000-8000-abc123def456',
|
||||||
|
model: { key: 'custom' },
|
||||||
|
};
|
||||||
|
const response = await request(app.getHttpServer() as () => void)
|
||||||
|
.post('/ai/jobs')
|
||||||
|
.set('idempotency-key', 'key-123')
|
||||||
|
.send(invalidPayload);
|
||||||
|
expect(response.status).toBe(400);
|
||||||
|
const body = response.body as { message: string[] };
|
||||||
|
expect(body.message[0]).toContain('model is forbidden in payload');
|
||||||
|
});
|
||||||
|
it('ควรคืนสถานะ 400 Bad Request เมื่อส่ง temperature มาใน payload', async () => {
|
||||||
|
const invalidPayload = {
|
||||||
|
type: 'rag-query',
|
||||||
|
documentPublicId: '019505a1-7c3e-7000-8000-abc123def456',
|
||||||
|
temperature: 0.7,
|
||||||
|
};
|
||||||
|
const response = await request(app.getHttpServer() as () => void)
|
||||||
|
.post('/ai/jobs')
|
||||||
|
.set('idempotency-key', 'key-123')
|
||||||
|
.send(invalidPayload);
|
||||||
|
expect(response.status).toBe(400);
|
||||||
|
const body = response.body as { message: string[] };
|
||||||
|
expect(body.message[0]).toContain('temperature is forbidden in payload');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,141 @@
|
|||||||
|
// File: backend/src/modules/ai/tests/ocr-residency.spec.ts
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: Initial unit tests for adaptive OCR residency
|
||||||
|
|
||||||
|
import { Test, TestingModule } from '@nestjs/testing';
|
||||||
|
import { ConfigService } from '@nestjs/config';
|
||||||
|
import { getRepositoryToken } from '@nestjs/typeorm';
|
||||||
|
import { OcrService } from '../services/ocr.service';
|
||||||
|
import { VramMonitorService } from '../services/vram-monitor.service';
|
||||||
|
import { AiPolicyService } from '../services/ai-policy.service';
|
||||||
|
import { OcrCacheService } from '../services/ocr-cache.service';
|
||||||
|
import { SystemSetting } from '../entities/system-setting.entity';
|
||||||
|
import { AiAuditLog } from '../entities/ai-audit-log.entity';
|
||||||
|
|
||||||
|
describe('OcrService Adaptive Residency (US2)', () => {
|
||||||
|
let service: OcrService;
|
||||||
|
const mockConfigService = {
|
||||||
|
get: jest.fn((key: string, defaultValue?: unknown): unknown => {
|
||||||
|
const config: Record<string, unknown> = {
|
||||||
|
OCR_CHAR_THRESHOLD: 100,
|
||||||
|
OCR_API_URL: 'http://localhost:8765',
|
||||||
|
OCR_SIDECAR_API_KEY: 'test-key',
|
||||||
|
VRAM_HEADROOM_THRESHOLD_MB: 3000,
|
||||||
|
OCR_RESIDENCY_WINDOW_SECONDS: 120,
|
||||||
|
GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB: 12000,
|
||||||
|
};
|
||||||
|
return config[key] ?? defaultValue;
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
const mockSystemSettingRepo = {
|
||||||
|
findOne: jest.fn().mockResolvedValue({
|
||||||
|
settingValue: '019505a1-7c3e-7000-8000-abc123def002',
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
const mockAiAuditLogRepo = {
|
||||||
|
create: jest.fn().mockReturnValue({}),
|
||||||
|
save: jest.fn().mockResolvedValue({}),
|
||||||
|
};
|
||||||
|
const mockOcrCacheService = {};
|
||||||
|
const mockVramMonitorService = {
|
||||||
|
getVramHeadroom: jest.fn(),
|
||||||
|
hasVramCapacity: jest.fn().mockResolvedValue(true),
|
||||||
|
};
|
||||||
|
const mockAiPolicyService = {};
|
||||||
|
const mockRedis = {
|
||||||
|
get: jest.fn().mockResolvedValue(null),
|
||||||
|
set: jest.fn().mockResolvedValue('OK'),
|
||||||
|
del: jest.fn().mockResolvedValue(1),
|
||||||
|
};
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
const module: TestingModule = await Test.createTestingModule({
|
||||||
|
providers: [
|
||||||
|
OcrService,
|
||||||
|
{ provide: ConfigService, useValue: mockConfigService },
|
||||||
|
{
|
||||||
|
provide: getRepositoryToken(SystemSetting),
|
||||||
|
useValue: mockSystemSettingRepo,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
provide: getRepositoryToken(AiAuditLog),
|
||||||
|
useValue: mockAiAuditLogRepo,
|
||||||
|
},
|
||||||
|
{ provide: OcrCacheService, useValue: mockOcrCacheService },
|
||||||
|
{ provide: VramMonitorService, useValue: mockVramMonitorService },
|
||||||
|
{ provide: AiPolicyService, useValue: mockAiPolicyService },
|
||||||
|
{
|
||||||
|
provide: 'default_IORedisModuleConnectionToken',
|
||||||
|
useValue: mockRedis,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}).compile();
|
||||||
|
service = module.get<OcrService>(OcrService);
|
||||||
|
jest.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควรคืน keepAliveSeconds=0 เมื่อ activeProfile เป็น deep-analysis (FR-B03)', async () => {
|
||||||
|
mockVramMonitorService.getVramHeadroom.mockResolvedValueOnce({
|
||||||
|
totalMb: 16384,
|
||||||
|
usedMb: 4000,
|
||||||
|
availableMb: 12384,
|
||||||
|
querySuccess: true,
|
||||||
|
mainModelVramMb: 4000,
|
||||||
|
});
|
||||||
|
const decision = await service.calculateOcrResidency('deep-analysis');
|
||||||
|
expect(decision.keepAliveSeconds).toBe(0);
|
||||||
|
expect(decision.reason).toBe('deep-analysis-active');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควรคืน keepAliveSeconds=0 เมื่อ VRAM ของโมเดลหลักเกิน pressure threshold (FR-B03)', async () => {
|
||||||
|
mockVramMonitorService.getVramHeadroom.mockResolvedValueOnce({
|
||||||
|
totalMb: 16384,
|
||||||
|
usedMb: 13000,
|
||||||
|
availableMb: 3384,
|
||||||
|
querySuccess: true,
|
||||||
|
mainModelVramMb: 13000,
|
||||||
|
});
|
||||||
|
const decision = await service.calculateOcrResidency('standard');
|
||||||
|
expect(decision.keepAliveSeconds).toBe(0);
|
||||||
|
expect(decision.reason).toBe('high-pressure');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควรคืน keepAliveSeconds=0 เมื่อ VRAM headroom ต่ำกว่า headroom threshold (FR-B03)', async () => {
|
||||||
|
mockVramMonitorService.getVramHeadroom.mockResolvedValueOnce({
|
||||||
|
totalMb: 16384,
|
||||||
|
usedMb: 14000,
|
||||||
|
availableMb: 2384,
|
||||||
|
querySuccess: true,
|
||||||
|
mainModelVramMb: 8000,
|
||||||
|
});
|
||||||
|
const decision = await service.calculateOcrResidency('standard');
|
||||||
|
expect(decision.keepAliveSeconds).toBe(0);
|
||||||
|
expect(decision.reason).toBe('high-pressure');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควรคืน keepAliveSeconds > 0 (residency window) เมื่อ VRAM เพียงพอและไม่มี pressure (FR-B04)', async () => {
|
||||||
|
mockVramMonitorService.getVramHeadroom.mockResolvedValueOnce({
|
||||||
|
totalMb: 16384,
|
||||||
|
usedMb: 4000,
|
||||||
|
availableMb: 12384,
|
||||||
|
querySuccess: true,
|
||||||
|
mainModelVramMb: 4000,
|
||||||
|
});
|
||||||
|
const decision = await service.calculateOcrResidency('standard');
|
||||||
|
expect(decision.keepAliveSeconds).toBe(120);
|
||||||
|
expect(decision.reason).toBe('headroom-sufficient');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควรคืน keepAliveSeconds=0 และ reason=query-failed เมื่อ query VRAM ล้มเหลว (FR-B05)', async () => {
|
||||||
|
mockVramMonitorService.getVramHeadroom.mockResolvedValueOnce({
|
||||||
|
totalMb: 16384,
|
||||||
|
usedMb: 16384,
|
||||||
|
availableMb: 0,
|
||||||
|
querySuccess: false,
|
||||||
|
mainModelVramMb: 0,
|
||||||
|
});
|
||||||
|
const decision = await service.calculateOcrResidency('standard');
|
||||||
|
expect(decision.keepAliveSeconds).toBe(0);
|
||||||
|
expect(decision.reason).toBe('query-failed');
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,153 @@
|
|||||||
|
// File: backend/src/modules/ai/tests/queue-policy.spec.ts
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: สร้าง unit tests สำหรับทดสอบ Queue Policy & Selective Realtime Concurrency (US4)
|
||||||
|
// - 2026-06-11: แก้ไข relative import ของ Attachment ให้ถูกต้อง (3 ระดับ)
|
||||||
|
// - 2026-06-11: นำเข้า Job และ AiRealtimeJobData เพื่อแก้ไข compile/lint errors
|
||||||
|
|
||||||
|
import { Test, TestingModule } from '@nestjs/testing';
|
||||||
|
import { getQueueToken } from '@nestjs/bullmq';
|
||||||
|
import { getRepositoryToken } from '@nestjs/typeorm';
|
||||||
|
import type { Job } from 'bullmq';
|
||||||
|
import { QUEUE_AI_BATCH } from '../../common/constants/queue.constants';
|
||||||
|
import {
|
||||||
|
AiRealtimeProcessor,
|
||||||
|
AiRealtimeJobData,
|
||||||
|
} from '../processors/ai-realtime.processor';
|
||||||
|
import { OcrService } from '../services/ocr.service';
|
||||||
|
import { OllamaService } from '../services/ollama.service';
|
||||||
|
import { AiAuditLog } from '../entities/ai-audit-log.entity';
|
||||||
|
import { Attachment } from '../../../common/file-storage/entities/attachment.entity';
|
||||||
|
|
||||||
|
describe('Queue Policy (US4)', () => {
|
||||||
|
let processor: AiRealtimeProcessor;
|
||||||
|
const mockBatchQueue = {
|
||||||
|
add: jest.fn().mockResolvedValue({ id: 'redirected-job-id' }),
|
||||||
|
pause: jest.fn().mockResolvedValue(undefined),
|
||||||
|
resume: jest.fn().mockResolvedValue(undefined),
|
||||||
|
};
|
||||||
|
const mockOcrService = {
|
||||||
|
detectAndExtract: jest.fn(),
|
||||||
|
};
|
||||||
|
const mockOllamaService = {
|
||||||
|
getMainModelName: jest.fn().mockReturnValue('np-dms-ai'),
|
||||||
|
generate: jest.fn(),
|
||||||
|
};
|
||||||
|
const mockAiAuditLogRepo = {
|
||||||
|
create: jest.fn(),
|
||||||
|
save: jest.fn(),
|
||||||
|
};
|
||||||
|
const mockAttachmentRepo = {
|
||||||
|
update: jest.fn(),
|
||||||
|
};
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
jest.clearAllMocks();
|
||||||
|
const module: TestingModule = await Test.createTestingModule({
|
||||||
|
providers: [
|
||||||
|
AiRealtimeProcessor,
|
||||||
|
{ provide: getQueueToken(QUEUE_AI_BATCH), useValue: mockBatchQueue },
|
||||||
|
{ provide: OcrService, useValue: mockOcrService },
|
||||||
|
{ provide: OllamaService, useValue: mockOllamaService },
|
||||||
|
{
|
||||||
|
provide: getRepositoryToken(AiAuditLog),
|
||||||
|
useValue: mockAiAuditLogRepo,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
provide: getRepositoryToken(Attachment),
|
||||||
|
useValue: mockAttachmentRepo,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}).compile();
|
||||||
|
processor = module.get<AiRealtimeProcessor>(AiRealtimeProcessor);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควรอนุญาตให้ lightweight jobs รันได้โดยไม่ redirect', async () => {
|
||||||
|
const jobClassify = {
|
||||||
|
id: '1',
|
||||||
|
data: {
|
||||||
|
jobType: 'intent-classify',
|
||||||
|
projectPublicId: 'project-1',
|
||||||
|
payload: { query: 'test' },
|
||||||
|
},
|
||||||
|
} as unknown as Job<AiRealtimeJobData>;
|
||||||
|
const resultClassify = await processor.process(jobClassify);
|
||||||
|
expect(resultClassify).toEqual({ success: true, intent: 'GET_RFA' });
|
||||||
|
expect(mockBatchQueue.add).not.toHaveBeenCalled();
|
||||||
|
const jobTool = {
|
||||||
|
id: '2',
|
||||||
|
data: {
|
||||||
|
jobType: 'tool-suggest',
|
||||||
|
projectPublicId: 'project-1',
|
||||||
|
payload: { query: 'test' },
|
||||||
|
},
|
||||||
|
} as unknown as Job<AiRealtimeJobData>;
|
||||||
|
const resultTool = await processor.process(jobTool);
|
||||||
|
expect(resultTool).toEqual({ success: true, suggestions: [] });
|
||||||
|
expect(mockBatchQueue.add).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควร redirect generation-heavy jobs ไปยัง ai-batch queue', async () => {
|
||||||
|
const jobSuggest = {
|
||||||
|
id: '3',
|
||||||
|
data: {
|
||||||
|
jobType: 'ai-suggest',
|
||||||
|
projectPublicId: 'project-1',
|
||||||
|
payload: { query: 'test' },
|
||||||
|
},
|
||||||
|
} as unknown as Job<AiRealtimeJobData>;
|
||||||
|
await processor.process(jobSuggest);
|
||||||
|
expect(mockBatchQueue.add).toHaveBeenCalledWith(
|
||||||
|
'ai-suggest',
|
||||||
|
jobSuggest.data,
|
||||||
|
{ jobId: '3' }
|
||||||
|
);
|
||||||
|
const jobRag = {
|
||||||
|
id: '4',
|
||||||
|
data: {
|
||||||
|
jobType: 'rag-query',
|
||||||
|
projectPublicId: 'project-1',
|
||||||
|
payload: { query: 'test' },
|
||||||
|
},
|
||||||
|
} as unknown as Job<AiRealtimeJobData>;
|
||||||
|
await processor.process(jobRag);
|
||||||
|
expect(mockBatchQueue.add).toHaveBeenCalledWith('rag-query', jobRag.data, {
|
||||||
|
jobId: '4',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควร resume ai-batch เมื่อ realtime jobs ทั้งหมดเสร็จแล้วเท่านั้น', async () => {
|
||||||
|
const firstJob = {
|
||||||
|
id: '10',
|
||||||
|
data: { jobType: 'intent-classify' },
|
||||||
|
} as Job<AiRealtimeJobData>;
|
||||||
|
const secondJob = {
|
||||||
|
id: '11',
|
||||||
|
data: { jobType: 'tool-suggest' },
|
||||||
|
} as Job<AiRealtimeJobData>;
|
||||||
|
await processor.onActive(firstJob);
|
||||||
|
await processor.onActive(secondJob);
|
||||||
|
expect(mockBatchQueue.pause).toHaveBeenCalledTimes(1);
|
||||||
|
await processor.onCompleted(firstJob);
|
||||||
|
expect(mockBatchQueue.resume).not.toHaveBeenCalled();
|
||||||
|
await processor.onCompleted(secondJob);
|
||||||
|
expect(mockBatchQueue.resume).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควรยัง pause ai-batch ต่อเมื่อมี realtime job อื่น active อยู่แม้มี job หนึ่ง fail', async () => {
|
||||||
|
const firstJob = {
|
||||||
|
id: '12',
|
||||||
|
data: { jobType: 'intent-classify' },
|
||||||
|
} as Job<AiRealtimeJobData>;
|
||||||
|
const secondJob = {
|
||||||
|
id: '13',
|
||||||
|
data: { jobType: 'tool-suggest' },
|
||||||
|
} as Job<AiRealtimeJobData>;
|
||||||
|
await processor.onActive(firstJob);
|
||||||
|
await processor.onActive(secondJob);
|
||||||
|
expect(mockBatchQueue.pause).toHaveBeenCalledTimes(1);
|
||||||
|
await processor.onFailed(firstJob);
|
||||||
|
expect(mockBatchQueue.resume).not.toHaveBeenCalled();
|
||||||
|
await processor.onCompleted(secondJob);
|
||||||
|
expect(mockBatchQueue.resume).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,102 @@
|
|||||||
|
// File: backend/src/modules/ai/tests/vram-monitor.service.spec.ts
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: สร้าง unit tests สำหรับ VramMonitorService (US5)
|
||||||
|
|
||||||
|
import { Test, TestingModule } from '@nestjs/testing';
|
||||||
|
import { ConfigService } from '@nestjs/config';
|
||||||
|
import { VramMonitorService } from '../services/vram-monitor.service';
|
||||||
|
import axios from 'axios';
|
||||||
|
|
||||||
|
jest.mock('axios');
|
||||||
|
const mockedAxios = axios as jest.Mocked<typeof axios>;
|
||||||
|
|
||||||
|
describe('VramMonitorService', () => {
|
||||||
|
let service: VramMonitorService;
|
||||||
|
const mockConfigService = {
|
||||||
|
get: jest.fn((key: string, defaultValue?: unknown): unknown => {
|
||||||
|
const config: Record<string, unknown> = {
|
||||||
|
OLLAMA_URL: 'http://localhost:11434',
|
||||||
|
GPU_TOTAL_VRAM_MB: 8192, // mock total 8GB
|
||||||
|
};
|
||||||
|
return config[key] !== undefined ? config[key] : defaultValue;
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
jest.clearAllMocks();
|
||||||
|
const module: TestingModule = await Test.createTestingModule({
|
||||||
|
providers: [
|
||||||
|
VramMonitorService,
|
||||||
|
{ provide: ConfigService, useValue: mockConfigService },
|
||||||
|
],
|
||||||
|
}).compile();
|
||||||
|
service = module.get<VramMonitorService>(VramMonitorService);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should be defined', () => {
|
||||||
|
expect(service).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('getVramHeadroom', () => {
|
||||||
|
it('ควรคำนวณ headroom ถูกต้องเมื่อ Ollama คืนข้อมูลโมเดลปกติ', async () => {
|
||||||
|
mockedAxios.get.mockResolvedValue({
|
||||||
|
data: {
|
||||||
|
models: [
|
||||||
|
{
|
||||||
|
name: 'typhoon2.5-np-dms:latest',
|
||||||
|
size_vram: 4 * 1024 * 1024 * 1024,
|
||||||
|
}, // 4GB
|
||||||
|
{ name: 'other-model', size_vram: 2 * 1024 * 1024 * 1024 }, // 2GB
|
||||||
|
],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const headroom = await service.getVramHeadroom();
|
||||||
|
expect(headroom.querySuccess).toBe(true);
|
||||||
|
expect(headroom.totalMb).toBe(8192);
|
||||||
|
expect(headroom.usedMb).toBe(6144); // 4GB + 2GB = 6GB (6144MB)
|
||||||
|
expect(headroom.availableMb).toBe(2048); // 8GB - 6GB = 2GB (2048MB)
|
||||||
|
expect(headroom.mainModelVramMb).toBe(4096); // 4GB main model (4096MB)
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควรคำนวณ headroom เป็น safe default (0 available) เมื่อ Ollama query ล้มเหลว', async () => {
|
||||||
|
mockedAxios.get.mockRejectedValue(new Error('Connection timeout'));
|
||||||
|
const headroom = await service.getVramHeadroom();
|
||||||
|
expect(headroom.querySuccess).toBe(false);
|
||||||
|
expect(headroom.availableMb).toBe(0);
|
||||||
|
expect(headroom.usedMb).toBe(8192);
|
||||||
|
expect(headroom.mainModelVramMb).toBe(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('hasVramCapacity', () => {
|
||||||
|
it('ควรคืน true เมื่อ headroom พอตามค่าที่ขอ', async () => {
|
||||||
|
mockedAxios.get.mockResolvedValue({
|
||||||
|
data: {
|
||||||
|
models: [
|
||||||
|
{
|
||||||
|
name: 'typhoon2.5-np-dms:latest',
|
||||||
|
size_vram: 4 * 1024 * 1024 * 1024,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const result = await service.hasVramCapacity(3000); // query available is 4096MB
|
||||||
|
expect(result).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ควรคืน false เมื่อ headroom ไม่พอตามค่าที่ขอ', async () => {
|
||||||
|
mockedAxios.get.mockResolvedValue({
|
||||||
|
data: {
|
||||||
|
models: [
|
||||||
|
{
|
||||||
|
name: 'typhoon2.5-np-dms:latest',
|
||||||
|
size_vram: 6 * 1024 * 1024 * 1024,
|
||||||
|
}, // 6GB used
|
||||||
|
],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const result = await service.hasVramCapacity(3000); // query available is 2048MB, required 3000MB
|
||||||
|
expect(result).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"extends": "./tsconfig.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"rootDir": ".",
|
||||||
|
"allowJs": true,
|
||||||
|
"noEmit": true
|
||||||
|
},
|
||||||
|
"include": [
|
||||||
|
"src/**/*.ts",
|
||||||
|
"test/**/*.ts",
|
||||||
|
"tests/**/*.ts",
|
||||||
|
"scratch/**/*.ts",
|
||||||
|
"scratch/**/*.js",
|
||||||
|
"jest.config.js",
|
||||||
|
"*.config.mjs"
|
||||||
|
],
|
||||||
|
"exclude": ["node_modules", "dist", "documentation"]
|
||||||
|
}
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
interactive
|
||||||
|
model np-dms-ai
|
||||||
|
temperature 0.7
|
||||||
|
top_p 0.9
|
||||||
|
max_tokens 2048
|
||||||
|
keep_alive "5m"
|
||||||
|
num_ctx 4096
|
||||||
|
repeat_penalty 1.15
|
||||||
|
|
||||||
|
standard
|
||||||
|
model np-dms-ai
|
||||||
|
temperature 0.5
|
||||||
|
top_p 0.8
|
||||||
|
max_tokens 4096
|
||||||
|
keep_alive "10m"
|
||||||
|
num_ctx 8192
|
||||||
|
repeat_penalty 1.15
|
||||||
|
|
||||||
|
quality
|
||||||
|
model np-dms-ai
|
||||||
|
temperature 0.1
|
||||||
|
top_p 0.95
|
||||||
|
max_tokens 8192
|
||||||
|
keep_alive "10m"
|
||||||
|
num_ctx 8192
|
||||||
|
repeat_penalty 1.15
|
||||||
|
|
||||||
|
deep-analysis
|
||||||
|
model np-dms-ai
|
||||||
|
temperature 0.3
|
||||||
|
top_p 0.85
|
||||||
|
max_tokens 8192
|
||||||
|
keep_alive "0"
|
||||||
|
num_ctx 32768
|
||||||
|
repeat_penalty 1.15
|
||||||
@@ -16,9 +16,11 @@ export default [
|
|||||||
'**/tmp/**',
|
'**/tmp/**',
|
||||||
'specs/**',
|
'specs/**',
|
||||||
'backend/documentation/**',
|
'backend/documentation/**',
|
||||||
|
'backend/scratch/**',
|
||||||
'backend/scripts/**',
|
'backend/scripts/**',
|
||||||
'frontend/public/**',
|
'frontend/public/**',
|
||||||
'**/test/**',
|
'**/test/**',
|
||||||
|
'**/*.d.ts',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
...backendConfig.map((config) => ({
|
...backendConfig.map((config) => ({
|
||||||
|
|||||||
@@ -56,9 +56,16 @@ function normalizeLoadedModels(value: unknown): VramLoadedModelView[] {
|
|||||||
}
|
}
|
||||||
return value.map((item, index) => {
|
return value.map((item, index) => {
|
||||||
if (typeof item === 'string') {
|
if (typeof item === 'string') {
|
||||||
|
const name = item.toLowerCase();
|
||||||
|
let normName = item;
|
||||||
|
if (name.includes('ocr') || name.includes('typhoon-np-dms-ocr')) {
|
||||||
|
normName = 'np-dms-ocr';
|
||||||
|
} else if (name.includes('typhoon') || name.includes('np-dms-ai')) {
|
||||||
|
normName = 'np-dms-ai';
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
modelId: `${item}-${index}`,
|
modelId: `${item}-${index}`,
|
||||||
modelName: item,
|
modelName: normName,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
if (item && typeof item === 'object') {
|
if (item && typeof item === 'object') {
|
||||||
@@ -68,10 +75,17 @@ function normalizeLoadedModels(value: unknown): VramLoadedModelView[] {
|
|||||||
name?: string;
|
name?: string;
|
||||||
vramUsageMB?: number;
|
vramUsageMB?: number;
|
||||||
};
|
};
|
||||||
const modelName = model.modelName ?? model.name ?? `model-${index + 1}`;
|
const rawName = model.modelName ?? model.name ?? `model-${index + 1}`;
|
||||||
|
const name = rawName.toLowerCase();
|
||||||
|
let normName = rawName;
|
||||||
|
if (name.includes('ocr') || name.includes('typhoon-np-dms-ocr')) {
|
||||||
|
normName = 'np-dms-ocr';
|
||||||
|
} else if (name.includes('typhoon') || name.includes('np-dms-ai')) {
|
||||||
|
normName = 'np-dms-ai';
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
modelId: model.modelId ?? modelName,
|
modelId: model.modelId ?? rawName,
|
||||||
modelName,
|
modelName: normName,
|
||||||
vramUsageMB: model.vramUsageMB,
|
vramUsageMB: model.vramUsageMB,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -122,7 +136,13 @@ export default function AiAdminConsolePage() {
|
|||||||
return res as SandboxProject[];
|
return res as SandboxProject[];
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
const healthOllamaModels = ensureArray<string>(health?.ollama?.models);
|
const rawHealthOllamaModels = ensureArray<string>(health?.ollama?.models);
|
||||||
|
const healthOllamaModels = Array.from(new Set(rawHealthOllamaModels.map((m) => {
|
||||||
|
const name = m.toLowerCase();
|
||||||
|
if (name.includes('ocr') || name.includes('typhoon-np-dms-ocr')) return 'np-dms-ocr';
|
||||||
|
if (name.includes('typhoon') || name.includes('np-dms-ai')) return 'np-dms-ai';
|
||||||
|
return m;
|
||||||
|
})));
|
||||||
const healthQdrantCollections = ensureArray<string>(health?.qdrant?.collections);
|
const healthQdrantCollections = ensureArray<string>(health?.qdrant?.collections);
|
||||||
const vramLoadedModels = normalizeLoadedModels(vramStatus?.loadedModels);
|
const vramLoadedModels = normalizeLoadedModels(vramStatus?.loadedModels);
|
||||||
const sandboxProjects = ensureArray<SandboxProject>(projects);
|
const sandboxProjects = ensureArray<SandboxProject>(projects);
|
||||||
|
|||||||
@@ -592,7 +592,7 @@ export default function OcrSandboxPromptManager() {
|
|||||||
</CardTitle>
|
</CardTitle>
|
||||||
<Badge variant="outline" className="text-xs">
|
<Badge variant="outline" className="text-xs">
|
||||||
{ocrResult.engineUsed === 'typhoon-np-dms-ocr'
|
{ocrResult.engineUsed === 'typhoon-np-dms-ocr'
|
||||||
? 'Typhoon OCR'
|
? 'np-dms-ocr'
|
||||||
: ocrResult.ocrUsed
|
: ocrResult.ocrUsed
|
||||||
? 'Tesseract'
|
? 'Tesseract'
|
||||||
: 'Fast Path (Text Layer)'}
|
: 'Fast Path (Text Layer)'}
|
||||||
@@ -601,7 +601,7 @@ export default function OcrSandboxPromptManager() {
|
|||||||
<CardContent className="pt-4">
|
<CardContent className="pt-4">
|
||||||
{ocrResult.fallbackUsed && (
|
{ocrResult.fallbackUsed && (
|
||||||
<div className="mb-3 rounded-md border border-amber-500/20 bg-amber-500/5 px-3 py-2 text-xs text-amber-600 dark:text-amber-400">
|
<div className="mb-3 rounded-md border border-amber-500/20 bg-amber-500/5 px-3 py-2 text-xs text-amber-600 dark:text-amber-400">
|
||||||
Typhoon OCR unavailable. Fallback to Tesseract was used for this run.
|
np-dms-ocr unavailable. Fallback to Tesseract was used for this run.
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
<div className="relative rounded-md bg-muted p-4 font-mono text-xs overflow-auto max-h-[200px] border border-border/10">
|
<div className="relative rounded-md bg-muted p-4 font-mono text-xs overflow-auto max-h-[200px] border border-border/10">
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
// - 2026-06-02: normalize VRAM response ให้รองรับ field names จาก backend ปัจจุบันและรูปแบบ loadedModels แบบเดิม
|
// - 2026-06-02: normalize VRAM response ให้รองรับ field names จาก backend ปัจจุบันและรูปแบบ loadedModels แบบเดิม
|
||||||
|
|
||||||
import api from '../api/client';
|
import api from '../api/client';
|
||||||
|
import { AiJobResponse } from '../../types/ai';
|
||||||
|
|
||||||
export interface AiAdminSettings {
|
export interface AiAdminSettings {
|
||||||
aiFeaturesEnabled: boolean;
|
aiFeaturesEnabled: boolean;
|
||||||
@@ -315,6 +316,23 @@ export const adminAiService = {
|
|||||||
const { data } = await api.post(`/ai/ocr-engines/${encodeURIComponent(engineId)}/select`, {});
|
const { data } = await api.post(`/ai/ocr-engines/${encodeURIComponent(engineId)}/select`, {});
|
||||||
return extractData<{ activeEngineName: string }>(data);
|
return extractData<{ activeEngineName: string }>(data);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
submitAiJob: async (
|
||||||
|
type: string,
|
||||||
|
documentPublicId?: string,
|
||||||
|
attachmentPublicId?: string,
|
||||||
|
payload?: Record<string, unknown>,
|
||||||
|
projectPublicId?: string
|
||||||
|
): Promise<AiJobResponse> => {
|
||||||
|
const { data } = await api.post('/ai/jobs', {
|
||||||
|
type,
|
||||||
|
documentPublicId,
|
||||||
|
attachmentPublicId,
|
||||||
|
payload,
|
||||||
|
projectPublicId,
|
||||||
|
});
|
||||||
|
return extractData<AiJobResponse>(data);
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
export interface OcrEngineResponse {
|
export interface OcrEngineResponse {
|
||||||
|
|||||||
@@ -44,5 +44,14 @@
|
|||||||
"delete_confirm": "Delete this pattern?",
|
"delete_confirm": "Delete this pattern?",
|
||||||
"loading": "Loading...",
|
"loading": "Loading...",
|
||||||
"not_found": "Intent not found"
|
"not_found": "Intent not found"
|
||||||
|
},
|
||||||
|
"ai_runtime_policy": {
|
||||||
|
"error_model_key_forbidden": "model.key is not allowed. The system selects the model automatically.",
|
||||||
|
"error_execution_profile_forbidden": "executionProfile is not allowed in the request payload.",
|
||||||
|
"error_temperature_forbidden": "temperature override is not allowed. Runtime parameters are managed by policy.",
|
||||||
|
"error_top_p_forbidden": "top_p override is not allowed. Runtime parameters are managed by policy.",
|
||||||
|
"error_max_tokens_forbidden": "maxTokens override is not allowed. Runtime parameters are managed by policy.",
|
||||||
|
"error_cpu_timeout": "Retrieval operation timed out on CPU fallback. Please retry later.",
|
||||||
|
"error_large_context_unauthorized": "The large-context profile requires administrator privileges."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -76,6 +76,14 @@
|
|||||||
"processing": "กำลังประมวลผลด้วย Typhoon LLM...",
|
"processing": "กำลังประมวลผลด้วย Typhoon LLM...",
|
||||||
"error_vram": "VRAM ไม่เพียงพอสำหรับโหลดโมเดล Typhoon LLM",
|
"error_vram": "VRAM ไม่เพียงพอสำหรับโหลดโมเดล Typhoon LLM",
|
||||||
"error_timeout": "หมดเวลาการประมวลผล LLM (120 วินาที)"
|
"error_timeout": "หมดเวลาการประมวลผล LLM (120 วินาที)"
|
||||||
|
},
|
||||||
|
"ai_runtime_policy": {
|
||||||
|
"error_model_key_forbidden": "ไม่อนุญาตให้ระบุ model.key ระบบจะเลือกโมเดลให้อัตโนมัติ",
|
||||||
|
"error_execution_profile_forbidden": "ไม่อนุญาตให้ระบุ executionProfile ใน payload",
|
||||||
|
"error_temperature_forbidden": "ไม่อนุญาตให้ override ค่า temperature พารามิเตอร์ถูกควบคุมโดย Runtime Policy",
|
||||||
|
"error_top_p_forbidden": "ไม่อนุญาตให้ override ค่า top_p พารามิเตอร์ถูกควบคุมโดย Runtime Policy",
|
||||||
|
"error_max_tokens_forbidden": "ไม่อนุญาตให้ override ค่า maxTokens พารามิเตอร์ถูกควบคุมโดย Runtime Policy",
|
||||||
|
"error_cpu_timeout": "การดึงข้อมูลหมดเวลาขณะใช้ CPU fallback กรุณาลองใหม่อีกครั้ง",
|
||||||
|
"error_large_context_unauthorized": "Profile large-context ต้องการสิทธิ์ผู้ดูแลระบบ"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -74,3 +74,13 @@ export interface AiPaginatedResult<T> {
|
|||||||
limit: number;
|
limit: number;
|
||||||
totalPages: number;
|
totalPages: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type ExecutionProfile = 'interactive' | 'standard' | 'quality' | 'deep-analysis';
|
||||||
|
|
||||||
|
export interface AiJobResponse {
|
||||||
|
jobId: string;
|
||||||
|
status: 'queued' | 'completed' | 'failed';
|
||||||
|
modelUsed: 'np-dms-ai' | 'np-dms-ocr';
|
||||||
|
effectiveProfile: ExecutionProfile;
|
||||||
|
queueName: 'ai-realtime' | 'ai-batch';
|
||||||
|
}
|
||||||
|
|||||||
@@ -85,3 +85,18 @@ QDRANT_URL
|
|||||||
|
|
||||||
- [ ] เพิ่ม unit test สำหรับ `upsertQueueRecord` ใน `ai-migration-checkpoint.service.spec.ts`
|
- [ ] เพิ่ม unit test สำหรับ `upsertQueueRecord` ใน `ai-migration-checkpoint.service.spec.ts`
|
||||||
- [ ] เพิ่ม unit test สำหรับ checksum dedup ใน `file-storage.service.spec.ts`
|
- [ ] เพิ่ม unit test สำหรับ checksum dedup ใน `file-storage.service.spec.ts`
|
||||||
|
|
||||||
|
### Feature-235: AI Runtime Policy Refactor ✅ COMPLETE
|
||||||
|
|
||||||
|
- [x] **Phase 1–8 ทุก task เสร็จครบ** ยกเว้น T032 (manual validation ต้องรัน curl บน environment จริง)
|
||||||
|
- [x] **Test suite:** 5 suites / 27 tests ผ่านใน targeted verification รอบล่าสุด (`ai.service.spec`, `ocr-residency.spec`, `queue-policy.spec`, `vram-monitor.service.spec`, `ai.controller.spec`)
|
||||||
|
- [x] **ESLint + tsc --noEmit:** ผ่านครบ ไม่มี error
|
||||||
|
- [x] **Canonical naming:** `np-dms-ai` / `np-dms-ocr` ทุก layer (API response, audit log, Admin Console, frontend badge)
|
||||||
|
- [x] **Adaptive OCR Residency:** `keep_alive` คำนวณ dynamic จาก VRAM headroom + active profile
|
||||||
|
- [x] **CPU Fallback Retrieval:** `/embed` + `/rerank` บน sidecar fallback ไป CPU เมื่อ GPU headroom ไม่พอ
|
||||||
|
- [x] **Queue Policy:** `ai-realtime` concurrency=2 (configurable ผ่าน `AI_REALTIME_CONCURRENCY`); `rag-query` → `ai-batch` เสมอ
|
||||||
|
- [x] **Validation artifacts:** `specs/200-fullstacks/235-ai-runtime-policy-refactor/validation-report.md` = `PARTIAL`; `checklists/cutover-validation.md` สร้างไว้สำหรับปิด T032
|
||||||
|
- [x] **i18n:** เพิ่ม `ai_runtime_policy` namespace ใน en/th locales
|
||||||
|
- [x] **CONTEXT.md:** เพิ่ม Feature-235 ใน System Readiness + ADR-034 ใน ADRs table
|
||||||
|
- [ ] **T032:** Manual validation gate (Gate 1–4) — ให้ใช้ `checklists/cutover-validation.md` เป็น runbook หลัก
|
||||||
|
- **Branch:** `235-ai-runtime-policy-refactor` — พร้อม merge หลัง T032 manual validation ผ่าน
|
||||||
|
|||||||
Generated
+6
-11
@@ -186,8 +186,8 @@ importers:
|
|||||||
specifier: ^5.8.2
|
specifier: ^5.8.2
|
||||||
version: 5.8.2
|
version: 5.8.2
|
||||||
joi:
|
joi:
|
||||||
specifier: ^18.0.1
|
specifier: ^18.2.1
|
||||||
version: 18.0.2
|
version: 18.2.1
|
||||||
ms:
|
ms:
|
||||||
specifier: ^2.1.3
|
specifier: ^2.1.3
|
||||||
version: 2.1.3
|
version: 2.1.3
|
||||||
@@ -3494,9 +3494,6 @@ packages:
|
|||||||
'@sqltools/formatter@1.2.5':
|
'@sqltools/formatter@1.2.5':
|
||||||
resolution: {integrity: sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==}
|
resolution: {integrity: sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw==}
|
||||||
|
|
||||||
'@standard-schema/spec@1.0.0':
|
|
||||||
resolution: {integrity: sha512-m2bOd0f2RT9k8QJx1JN85cZYyH1RqFBdlwtkSlf4tBDYLCiiZnv1fIIwacK6cqwXavOydf0NPToMQgpKq+dVlA==}
|
|
||||||
|
|
||||||
'@standard-schema/spec@1.1.0':
|
'@standard-schema/spec@1.1.0':
|
||||||
resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==}
|
resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==}
|
||||||
|
|
||||||
@@ -6281,8 +6278,8 @@ packages:
|
|||||||
resolution: {integrity: sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==}
|
resolution: {integrity: sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
joi@18.0.2:
|
joi@18.2.1:
|
||||||
resolution: {integrity: sha512-RuCOQMIt78LWnktPoeBL0GErkNaJPTBGcYuyaBvUOQSpcpcLfWrHPPihYdOGbV5pam9VTWbeoF7TsGiHugcjGA==}
|
resolution: {integrity: sha512-2/OKlogiESf2Nh3TFCrRjrr9z1DRHeW0I+KReF67+4J0Ns+8hBtHRmoWAZ2OFU6I5+TWLEe6sVlSdXPjHm5UbQ==}
|
||||||
engines: {node: '>= 20'}
|
engines: {node: '>= 20'}
|
||||||
|
|
||||||
jose@6.2.2:
|
jose@6.2.2:
|
||||||
@@ -12156,8 +12153,6 @@ snapshots:
|
|||||||
|
|
||||||
'@sqltools/formatter@1.2.5': {}
|
'@sqltools/formatter@1.2.5': {}
|
||||||
|
|
||||||
'@standard-schema/spec@1.0.0': {}
|
|
||||||
|
|
||||||
'@standard-schema/spec@1.1.0': {}
|
'@standard-schema/spec@1.1.0': {}
|
||||||
|
|
||||||
'@swc/helpers@0.5.15':
|
'@swc/helpers@0.5.15':
|
||||||
@@ -15532,7 +15527,7 @@ snapshots:
|
|||||||
jiti@2.6.1:
|
jiti@2.6.1:
|
||||||
optional: true
|
optional: true
|
||||||
|
|
||||||
joi@18.0.2:
|
joi@18.2.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
'@hapi/address': 5.1.1
|
'@hapi/address': 5.1.1
|
||||||
'@hapi/formula': 3.0.2
|
'@hapi/formula': 3.0.2
|
||||||
@@ -15540,7 +15535,7 @@ snapshots:
|
|||||||
'@hapi/pinpoint': 2.0.1
|
'@hapi/pinpoint': 2.0.1
|
||||||
'@hapi/tlds': 1.1.4
|
'@hapi/tlds': 1.1.4
|
||||||
'@hapi/topo': 6.0.2
|
'@hapi/topo': 6.0.2
|
||||||
'@standard-schema/spec': 1.0.0
|
'@standard-schema/spec': 1.1.0
|
||||||
|
|
||||||
jose@6.2.2: {}
|
jose@6.2.2: {}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
-- Rollback: ลบตาราง ai_execution_profiles
|
||||||
|
-- Date: 2026-06-11
|
||||||
|
-- Related Delta: 2026-06-11-create-ai-execution-profiles.sql
|
||||||
|
-- ------------------------------------------------------------
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS ai_execution_profiles;
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
-- Delta: สร้างตาราง ai_execution_profiles สำหรับ AI Runtime Policy Refactor
|
||||||
|
-- Date: 2026-06-11
|
||||||
|
-- Related ADR: ADR-029, Feature-235
|
||||||
|
-- Source of defaults: docs/ai-profiles.md
|
||||||
|
-- Applied in: v1.9.x (AI Runtime Policy Refactor cutover)
|
||||||
|
-- ------------------------------------------------------------
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS ai_execution_profiles (
|
||||||
|
id INT PRIMARY KEY AUTO_INCREMENT COMMENT 'ID ภายใน (ไม่ expose ใน API)',
|
||||||
|
profile_name VARCHAR(50) NOT NULL COMMENT 'ชื่อ profile: interactive, standard, quality, deep-analysis',
|
||||||
|
temperature DECIMAL(4,3) NOT NULL COMMENT 'LLM temperature parameter',
|
||||||
|
top_p DECIMAL(4,3) NOT NULL COMMENT 'LLM top_p parameter',
|
||||||
|
max_tokens INT NOT NULL COMMENT 'Maximum tokens to generate',
|
||||||
|
num_ctx INT NOT NULL COMMENT 'Context window size (tokens)',
|
||||||
|
repeat_penalty DECIMAL(5,3) NOT NULL COMMENT 'Repeat penalty parameter',
|
||||||
|
keep_alive_seconds INT NOT NULL COMMENT 'Model keep_alive in seconds (0 = unload immediately)',
|
||||||
|
is_active TINYINT(1) NOT NULL DEFAULT 1 COMMENT '1 = profile นี้ใช้งานได้; 0 = disabled',
|
||||||
|
updated_by INT NULL COMMENT 'user_id ที่แก้ไขล่าสุด (NULL = seed default)',
|
||||||
|
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
UNIQUE KEY uk_profile_name (profile_name),
|
||||||
|
INDEX idx_profile_active (profile_name, is_active),
|
||||||
|
FOREIGN KEY (updated_by) REFERENCES users(user_id)
|
||||||
|
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4 COLLATE = utf8mb4_unicode_ci
|
||||||
|
COMMENT = 'ตาราง execution profile parameters สำหรับ np-dms-ai (ADR-029, Feature-235); ค่า default จาก docs/ai-profiles.md';
|
||||||
|
|
||||||
|
-- ------------------------------------------------------------
|
||||||
|
-- Seed: default profiles จาก docs/ai-profiles.md
|
||||||
|
-- ------------------------------------------------------------
|
||||||
|
INSERT INTO ai_execution_profiles (
|
||||||
|
profile_name, temperature, top_p, max_tokens, num_ctx, repeat_penalty, keep_alive_seconds
|
||||||
|
) VALUES
|
||||||
|
('interactive', 0.700, 0.900, 2048, 4096, 1.150, 300), -- keep_alive: "5m"
|
||||||
|
('standard', 0.500, 0.800, 4096, 8192, 1.150, 600), -- keep_alive: "10m"
|
||||||
|
('quality', 0.100, 0.950, 8192, 8192, 1.150, 600), -- keep_alive: "10m"
|
||||||
|
('deep-analysis', 0.300, 0.850, 8192, 32768, 1.150, 0) -- keep_alive: "0" (admin sandbox only)
|
||||||
|
ON DUPLICATE KEY UPDATE
|
||||||
|
profile_name = profile_name; -- no-op: ไม่ overwrite ค่าที่ admin calibrate ไว้แล้ว
|
||||||
+19
@@ -0,0 +1,19 @@
|
|||||||
|
-- Rollback: ลบ fields ที่เพิ่มสำหรับ AI Runtime Policy Refactor
|
||||||
|
-- Date: 2026-06-11
|
||||||
|
-- Related Delta: 2026-06-11-extend-ai-audit-logs-runtime-policy.sql
|
||||||
|
-- ------------------------------------------------------------
|
||||||
|
|
||||||
|
ALTER TABLE ai_audit_logs
|
||||||
|
DROP INDEX IF EXISTS idx_ai_audit_canonical_model;
|
||||||
|
|
||||||
|
ALTER TABLE ai_audit_logs
|
||||||
|
DROP INDEX IF EXISTS idx_ai_audit_effective_profile;
|
||||||
|
|
||||||
|
ALTER TABLE ai_audit_logs
|
||||||
|
DROP COLUMN IF EXISTS snapshot_params_json;
|
||||||
|
|
||||||
|
ALTER TABLE ai_audit_logs
|
||||||
|
DROP COLUMN IF EXISTS canonical_model;
|
||||||
|
|
||||||
|
ALTER TABLE ai_audit_logs
|
||||||
|
DROP COLUMN IF EXISTS effective_profile;
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
-- Delta: เพิ่ม fields สำหรับ AI Runtime Policy Refactor ใน ai_audit_logs
|
||||||
|
-- Date: 2026-06-11
|
||||||
|
-- Related ADR: ADR-023, ADR-029, Feature-235
|
||||||
|
-- Applied in: AI Runtime Policy Refactor cutover (big bang)
|
||||||
|
-- ------------------------------------------------------------
|
||||||
|
-- เพิ่ม 3 columns:
|
||||||
|
-- effective_profile — profile name ที่ backend กำหนด (interactive/standard/quality/deep-analysis)
|
||||||
|
-- canonical_model — canonical model identity (np-dms-ai / np-dms-ocr)
|
||||||
|
-- snapshot_params_json — parameters snapshot ณ เวลา dispatch (FR-A09)
|
||||||
|
-- ------------------------------------------------------------
|
||||||
|
|
||||||
|
-- effective_profile: ชื่อ ExecutionProfile ที่ backend กำหนดจาก job.type
|
||||||
|
ALTER TABLE ai_audit_logs
|
||||||
|
ADD COLUMN IF NOT EXISTS effective_profile VARCHAR(50) NULL
|
||||||
|
COMMENT 'ExecutionProfile ที่ backend กำหนด: interactive|standard|quality|deep-analysis (Feature-235)'
|
||||||
|
AFTER model_name;
|
||||||
|
|
||||||
|
-- canonical_model: ชื่อ canonical identity — ไม่ใช่ runtime tag
|
||||||
|
ALTER TABLE ai_audit_logs
|
||||||
|
ADD COLUMN IF NOT EXISTS canonical_model VARCHAR(50) NULL
|
||||||
|
COMMENT 'Canonical model identity: np-dms-ai หรือ np-dms-ocr (Feature-235, ADR-023)'
|
||||||
|
AFTER effective_profile;
|
||||||
|
|
||||||
|
-- snapshot_params_json: parameters ที่ถูก snapshot ตอน dispatch โดย AiPolicyService (FR-A09)
|
||||||
|
-- { temperature, topP, maxTokens, numCtx, repeatPenalty, keepAliveSeconds }
|
||||||
|
ALTER TABLE ai_audit_logs
|
||||||
|
ADD COLUMN IF NOT EXISTS snapshot_params_json JSON NULL
|
||||||
|
COMMENT 'Runtime parameters snapshot ณ เวลา dispatch — ใช้จริงใน Ollama call (FR-A09, Feature-235)'
|
||||||
|
AFTER canonical_model;
|
||||||
|
|
||||||
|
-- index สำหรับ analytics queries ตาม profile
|
||||||
|
ALTER TABLE ai_audit_logs
|
||||||
|
ADD INDEX IF NOT EXISTS idx_ai_audit_effective_profile (effective_profile);
|
||||||
|
|
||||||
|
-- index สำหรับ canonical_model
|
||||||
|
ALTER TABLE ai_audit_logs
|
||||||
|
ADD INDEX IF NOT EXISTS idx_ai_audit_canonical_model (canonical_model);
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
# File: specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/.env.template
|
||||||
|
# Change Log:
|
||||||
|
# - 2026-06-11: สร้างไฟล์ env template สำหรับ Desk-5439 (US5)
|
||||||
|
|
||||||
|
# ─── VRAM, Residency & Timeout Configurations ───
|
||||||
|
VRAM_HEADROOM_THRESHOLD_MB=3000.0
|
||||||
|
OCR_RESIDENCY_WINDOW_SECONDS=120
|
||||||
|
GPU_TOTAL_VRAM_MB=16384.0
|
||||||
|
GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB=12000.0
|
||||||
|
RETRIEVAL_TIMEOUT_SECONDS=30.0
|
||||||
|
|
||||||
|
# ─── Queue policy & concurrency ───
|
||||||
|
REALTIME_CONCURRENCY=2
|
||||||
+5
-7
@@ -1,12 +1,10 @@
|
|||||||
FROM scb10x/typhoon2.5-qwen3-4b:latest
|
FROM scb10x/typhoon2.5-qwen3-4b:latest
|
||||||
|
|
||||||
|
PARAMETER num_ctx 8192
|
||||||
|
PARAMETER num_predict 4096
|
||||||
PARAMETER num\_ctx 8192
|
|
||||||
PARAMETER num\_predict 4096
|
|
||||||
PARAMETER temperature 0.4
|
PARAMETER temperature 0.4
|
||||||
|
|
||||||
PARAMETER top\_k 40
|
PARAMETER top_k 40
|
||||||
PARAMETER top\_p 0.9
|
PARAMETER top_p 0.9
|
||||||
PARAMETER repeat\_penalty 1.15
|
PARAMETER repeat_penalty 1.15
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# File: specs/04-Infrastructure-OPS/04-00-docker-compose\Desk-5439\ocr-sidecar\app.py
|
# File: specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py
|
||||||
# Typhoon OCR HTTP Sidecar API — รับ POST /ocr แล้วคืนข้อความที่สกัดจาก PDF/Image
|
# Typhoon OCR HTTP Sidecar API — รับ POST /ocr แล้วคืนข้อความที่สกัดจาก PDF/Image
|
||||||
# ตาม ADR-023A (revised 2026-06-11): ใช้ typhoon_ocr library + np-dms-ocr (Ollama) แทน Tesseract
|
# ตาม ADR-023A (revised 2026-06-11): ใช้ typhoon_ocr library + np-dms-ocr (Ollama) แทน Tesseract
|
||||||
# Change Log:
|
# Change Log:
|
||||||
@@ -21,6 +21,7 @@
|
|||||||
# - 2026-06-05: เพิ่ม Option 2 (aggressive preprocessing: deskew + Otsu threshold + morphology) และ Option 3 (smart post-processing: regex-based hallucination removal) เพื่อลด Tesseract noise/hallucination (T025)
|
# - 2026-06-05: เพิ่ม Option 2 (aggressive preprocessing: deskew + Otsu threshold + morphology) และ Option 3 (smart post-processing: regex-based hallucination removal) เพื่อลด Tesseract noise/hallucination (T025)
|
||||||
# - 2026-06-06: เปลี่ยน keep_alive จาก 300s เป็น 0 เพื่อ unload model ทันทีหลังเสร็จงาน (แก้ปัญหา VRAM ไม่พอเมื่อ typhoon2.5-np-dms load พร้อมกัน)
|
# - 2026-06-06: เปลี่ยน keep_alive จาก 300s เป็น 0 เพื่อ unload model ทันทีหลังเสร็จงาน (แก้ปัญหา VRAM ไม่พอเมื่อ typhoon2.5-np-dms load พร้อมกัน)
|
||||||
# - 2026-06-11: เปลี่ยน process_with_typhoon_ocr ให้ใช้ prepare_ocr_messages จาก typhoon_ocr library + inject DMS tags; เปลี่ยน endpoint เป็น /v1/chat/completions
|
# - 2026-06-11: เปลี่ยน process_with_typhoon_ocr ให้ใช้ prepare_ocr_messages จาก typhoon_ocr library + inject DMS tags; เปลี่ยน endpoint เป็น /v1/chat/completions
|
||||||
|
# - 2026-06-11: US2 & US3 - เพิ่ม keep_alive parameter และ CPU fallback สำหรับ /embed และ /rerank
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
@@ -30,11 +31,13 @@ import json
|
|||||||
import tempfile
|
import tempfile
|
||||||
import fitz # PyMuPDF (ใช้สำหรับ page count + fast-path text extraction)
|
import fitz # PyMuPDF (ใช้สำหรับ page count + fast-path text extraction)
|
||||||
import httpx
|
import httpx
|
||||||
|
import asyncio
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import io
|
import io
|
||||||
from typhoon_ocr import prepare_ocr_messages
|
from typhoon_ocr import prepare_ocr_messages
|
||||||
|
from services.vram_monitor import get_vram_headroom
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException, UploadFile, File, Form, Depends, Security, status
|
from fastapi import FastAPI, HTTPException, UploadFile, File, Form, Depends, Security, status
|
||||||
from fastapi.security.api_key import APIKeyHeader
|
from fastapi.security.api_key import APIKeyHeader
|
||||||
@@ -104,6 +107,7 @@ class OcrRequest(BaseModel):
|
|||||||
pdfPath: str
|
pdfPath: str
|
||||||
maxPages: Optional[int] = None
|
maxPages: Optional[int] = None
|
||||||
engine: Optional[str] = None
|
engine: Optional[str] = None
|
||||||
|
keep_alive: Optional[int] = None
|
||||||
|
|
||||||
class OcrResponse(BaseModel):
|
class OcrResponse(BaseModel):
|
||||||
text: str
|
text: str
|
||||||
@@ -211,7 +215,7 @@ def process_with_typhoon_ocr(pdf_path: str, page_num: int = 1, options_override:
|
|||||||
"repetition_penalty": options_override.get("repeat_penalty", 1.2),
|
"repetition_penalty": options_override.get("repeat_penalty", 1.2),
|
||||||
"temperature": options_override.get("temperature", 0.1),
|
"temperature": options_override.get("temperature", 0.1),
|
||||||
"top_p": options_override.get("top_p", 0.6),
|
"top_p": options_override.get("top_p", 0.6),
|
||||||
"keep_alive": 0, # Unload model ทันทีหลังเสร็จงานเพื่อคืน VRAM ให้ np-dms-ai ใช้งานได้
|
"keep_alive": options_override.get("keep_alive", 0), # Unload model ทันทีหลังเสร็จงานเพื่อคืน VRAM ให้ np-dms-ai ใช้งานได้
|
||||||
}
|
}
|
||||||
# ใช้ Ollama OpenAI-compatible endpoint (/v1/chat/completions)
|
# ใช้ Ollama OpenAI-compatible endpoint (/v1/chat/completions)
|
||||||
with httpx.Client(timeout=TYPHOON_OCR_TIMEOUT) as client:
|
with httpx.Client(timeout=TYPHOON_OCR_TIMEOUT) as client:
|
||||||
@@ -249,11 +253,14 @@ def ocr_extract(req: OcrRequest):
|
|||||||
raise HTTPException(status_code=404, detail=f"ไม่พบไฟล์: {req.pdfPath}")
|
raise HTTPException(status_code=404, detail=f"ไม่พบไฟล์: {req.pdfPath}")
|
||||||
selected_engine = (req.engine or "auto").strip().lower()
|
selected_engine = (req.engine or "auto").strip().lower()
|
||||||
max_pages = req.maxPages or MAX_PAGES
|
max_pages = req.maxPages or MAX_PAGES
|
||||||
|
typhoon_options = {}
|
||||||
|
if req.keep_alive is not None:
|
||||||
|
typhoon_options["keep_alive"] = req.keep_alive
|
||||||
try:
|
try:
|
||||||
doc = fitz.open(str(pdf_path))
|
doc = fitz.open(str(pdf_path))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(status_code=422, detail=f"เปิดไฟล์ PDF ล้มเหลว: {e}")
|
raise HTTPException(status_code=422, detail=f"เปิดไฟล์ PDF ล้มเหลว: {e}")
|
||||||
return _process_pdf_doc(doc, selected_engine, max_pages)
|
return _process_pdf_doc(doc, selected_engine, max_pages, typhoon_options)
|
||||||
|
|
||||||
@app.post("/ocr-upload", response_model=OcrResponse, dependencies=[Depends(get_api_key)])
|
@app.post("/ocr-upload", response_model=OcrResponse, dependencies=[Depends(get_api_key)])
|
||||||
def ocr_upload(
|
def ocr_upload(
|
||||||
@@ -263,6 +270,7 @@ def ocr_upload(
|
|||||||
temperature: Optional[float] = Form(default=None),
|
temperature: Optional[float] = Form(default=None),
|
||||||
topP: Optional[float] = Form(default=None),
|
topP: Optional[float] = Form(default=None),
|
||||||
repeatPenalty: Optional[float] = Form(default=None),
|
repeatPenalty: Optional[float] = Form(default=None),
|
||||||
|
keep_alive: Optional[int] = Form(default=None),
|
||||||
):
|
):
|
||||||
"""OCR จาก multipart file upload — ไม่ต้องการ shared volume mount"""
|
"""OCR จาก multipart file upload — ไม่ต้องการ shared volume mount"""
|
||||||
selected_engine = engine.strip().lower()
|
selected_engine = engine.strip().lower()
|
||||||
@@ -275,6 +283,8 @@ def ocr_upload(
|
|||||||
typhoon_options["top_p"] = topP
|
typhoon_options["top_p"] = topP
|
||||||
if repeatPenalty is not None:
|
if repeatPenalty is not None:
|
||||||
typhoon_options["repeat_penalty"] = repeatPenalty
|
typhoon_options["repeat_penalty"] = repeatPenalty
|
||||||
|
if keep_alive is not None:
|
||||||
|
typhoon_options["keep_alive"] = keep_alive
|
||||||
pdf_bytes = file.file.read()
|
pdf_bytes = file.file.read()
|
||||||
import tempfile
|
import tempfile
|
||||||
tmp_pdf_path: str | None = None
|
tmp_pdf_path: str | None = None
|
||||||
@@ -317,6 +327,7 @@ class EmbedRequest(BaseModel):
|
|||||||
class EmbedResponse(BaseModel):
|
class EmbedResponse(BaseModel):
|
||||||
dense: list[float]
|
dense: list[float]
|
||||||
sparse: dict
|
sparse: dict
|
||||||
|
device: Optional[str] = None
|
||||||
|
|
||||||
class RerankRequest(BaseModel):
|
class RerankRequest(BaseModel):
|
||||||
query: str
|
query: str
|
||||||
@@ -325,54 +336,133 @@ class RerankRequest(BaseModel):
|
|||||||
class RerankResponse(BaseModel):
|
class RerankResponse(BaseModel):
|
||||||
scores: list[float]
|
scores: list[float]
|
||||||
ranked_indices: list[int]
|
ranked_indices: list[int]
|
||||||
|
device: Optional[str] = None
|
||||||
|
|
||||||
@app.post("/embed", response_model=EmbedResponse, dependencies=[Depends(get_api_key)])
|
@app.post("/embed", response_model=EmbedResponse, dependencies=[Depends(get_api_key)])
|
||||||
def embed_text(req: EmbedRequest):
|
async def embed_text(req: EmbedRequest):
|
||||||
"""BGE-M3 embedding generator (Dense + Sparse)"""
|
"""BGE-M3 embedding generator (Dense + Sparse) พร้อม CPU fallback และ timeout guard"""
|
||||||
if bge_model is None:
|
if bge_model is None:
|
||||||
raise HTTPException(status_code=503, detail="BGE-M3 model not loaded")
|
raise HTTPException(status_code=503, detail="BGE-M3 model not loaded")
|
||||||
|
threshold_mb = float(os.getenv("VRAM_HEADROOM_THRESHOLD_MB", "3000.0"))
|
||||||
|
timeout_sec = float(os.getenv("RETRIEVAL_TIMEOUT_SECONDS", "30.0"))
|
||||||
|
headroom = get_vram_headroom()
|
||||||
|
device = "cuda"
|
||||||
|
reason = "headroom-sufficient"
|
||||||
|
if not headroom.query_success:
|
||||||
|
device = "cpu"
|
||||||
|
reason = "gpu-query-failed"
|
||||||
|
elif headroom.available_mb < threshold_mb:
|
||||||
|
device = "cpu"
|
||||||
|
reason = "gpu-headroom-below-threshold"
|
||||||
try:
|
try:
|
||||||
|
if device == "cuda":
|
||||||
|
import torch
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
bge_model.model.to("cuda")
|
||||||
|
else:
|
||||||
|
device = "cpu"
|
||||||
|
reason = "cuda-not-available"
|
||||||
|
bge_model.model.to("cpu")
|
||||||
|
else:
|
||||||
|
bge_model.model.to("cpu")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to move BGE-M3 model to {device}: {e}")
|
||||||
|
device = "cpu"
|
||||||
|
reason = f"device-move-failed: {str(e)}"
|
||||||
|
try:
|
||||||
|
bge_model.model.to("cpu")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
logger.info(f"Embedding on device: {device} (reason: {reason})")
|
||||||
|
def run_inference():
|
||||||
output = bge_model.encode([req.text], return_dense=True, return_sparse=True)
|
output = bge_model.encode([req.text], return_dense=True, return_sparse=True)
|
||||||
dense_vector = [float(x) for x in output['dense_vecs'][0]]
|
dense_vector = [float(x) for x in output['dense_vecs'][0]]
|
||||||
lexical_dict = output['lexical_weights'][0]
|
lexical_dict = output['lexical_weights'][0]
|
||||||
|
|
||||||
indices = []
|
indices = []
|
||||||
values = []
|
values = []
|
||||||
for token_id, weight in lexical_dict.items():
|
for token_id, weight in lexical_dict.items():
|
||||||
indices.append(int(token_id))
|
indices.append(int(token_id))
|
||||||
values.append(float(weight))
|
values.append(float(weight))
|
||||||
|
return dense_vector, indices, values
|
||||||
|
try:
|
||||||
|
dense_vector, indices, values = await asyncio.wait_for(
|
||||||
|
asyncio.to_thread(run_inference),
|
||||||
|
timeout=timeout_sec
|
||||||
|
)
|
||||||
return EmbedResponse(
|
return EmbedResponse(
|
||||||
dense=dense_vector,
|
dense=dense_vector,
|
||||||
sparse={"indices": indices, "values": values}
|
sparse={"indices": indices, "values": values},
|
||||||
|
device=device
|
||||||
)
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.error(f"Embedding generation timed out after {timeout_sec}s on device {device}")
|
||||||
|
raise HTTPException(status_code=504, detail="Embedding generation timed out")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Embedding generation failed: {e}")
|
logger.error(f"Embedding generation failed: {e}")
|
||||||
raise HTTPException(status_code=500, detail=f"Embedding generation failed: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"Embedding generation failed: {str(e)}")
|
||||||
|
|
||||||
@app.post("/rerank", response_model=RerankResponse, dependencies=[Depends(get_api_key)])
|
@app.post("/rerank", response_model=RerankResponse, dependencies=[Depends(get_api_key)])
|
||||||
def rerank_chunks(req: RerankRequest):
|
async def rerank_chunks(req: RerankRequest):
|
||||||
"""BGE-Reranker-Large chunk re-ranker"""
|
"""BGE-Reranker-Large chunk re-ranker พร้อม CPU fallback และ timeout guard"""
|
||||||
if reranker is None:
|
if reranker is None:
|
||||||
raise HTTPException(status_code=503, detail="Reranker model not loaded")
|
raise HTTPException(status_code=503, detail="Reranker model not loaded")
|
||||||
if not req.chunks:
|
if not req.chunks:
|
||||||
return RerankResponse(scores=[], ranked_indices=[])
|
return RerankResponse(scores=[], ranked_indices=[], device="cpu")
|
||||||
|
threshold_mb = float(os.getenv("VRAM_HEADROOM_THRESHOLD_MB", "3000.0"))
|
||||||
|
timeout_sec = float(os.getenv("RETRIEVAL_TIMEOUT_SECONDS", "30.0"))
|
||||||
|
headroom = get_vram_headroom()
|
||||||
|
device = "cuda"
|
||||||
|
reason = "headroom-sufficient"
|
||||||
|
if not headroom.query_success:
|
||||||
|
device = "cpu"
|
||||||
|
reason = "gpu-query-failed"
|
||||||
|
elif headroom.available_mb < threshold_mb:
|
||||||
|
device = "cpu"
|
||||||
|
reason = "gpu-headroom-below-threshold"
|
||||||
try:
|
try:
|
||||||
|
if device == "cuda":
|
||||||
|
import torch
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
reranker.model.to("cuda")
|
||||||
|
else:
|
||||||
|
device = "cpu"
|
||||||
|
reason = "cuda-not-available"
|
||||||
|
reranker.model.to("cpu")
|
||||||
|
else:
|
||||||
|
reranker.model.to("cpu")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to move Reranker model to {device}: {e}")
|
||||||
|
device = "cpu"
|
||||||
|
reason = f"device-move-failed: {str(e)}"
|
||||||
|
try:
|
||||||
|
reranker.model.to("cpu")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
logger.info(f"Reranking on device: {device} (reason: {reason})")
|
||||||
|
def run_rerank():
|
||||||
pairs = [[req.query, chunk] for chunk in req.chunks]
|
pairs = [[req.query, chunk] for chunk in req.chunks]
|
||||||
scores = reranker.compute_score(pairs)
|
scores = reranker.compute_score(pairs)
|
||||||
if isinstance(scores, float):
|
if isinstance(scores, float):
|
||||||
scores = [scores]
|
scores = [scores]
|
||||||
else:
|
else:
|
||||||
scores = [float(s) for s in scores]
|
scores = [float(s) for s in scores]
|
||||||
|
|
||||||
indexed_scores = list(enumerate(scores))
|
indexed_scores = list(enumerate(scores))
|
||||||
indexed_scores.sort(key=lambda x: x[1], reverse=True)
|
indexed_scores.sort(key=lambda x: x[1], reverse=True)
|
||||||
ranked_indices = [idx for idx, _ in indexed_scores]
|
ranked_indices = [idx for idx, _ in indexed_scores]
|
||||||
|
return scores, ranked_indices
|
||||||
|
try:
|
||||||
|
scores, ranked_indices = await asyncio.wait_for(
|
||||||
|
asyncio.to_thread(run_rerank),
|
||||||
|
timeout=timeout_sec
|
||||||
|
)
|
||||||
return RerankResponse(
|
return RerankResponse(
|
||||||
scores=scores,
|
scores=scores,
|
||||||
ranked_indices=ranked_indices
|
ranked_indices=ranked_indices,
|
||||||
|
device=device
|
||||||
)
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.error(f"Reranking timed out after {timeout_sec}s on device {device}")
|
||||||
|
raise HTTPException(status_code=504, detail="Reranking timed out")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Reranking failed: {e}")
|
logger.error(f"Reranking failed: {e}")
|
||||||
raise HTTPException(status_code=500, detail=f"Reranking failed: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"Reranking failed: {str(e)}")
|
||||||
|
|||||||
+7
@@ -13,6 +13,7 @@
|
|||||||
# - 2026-06-04: ADR-034 — เปลี่ยน TYPHOON_OCR_MODEL เป็น typhoon-np-dms-ocr:latest; OLLAMA_API_URL ชี้ตรงไป Ollama (ไม่ผ่าน metrics proxy) เพื่อป้องกัน empty response
|
# - 2026-06-04: ADR-034 — เปลี่ยน TYPHOON_OCR_MODEL เป็น typhoon-np-dms-ocr:latest; OLLAMA_API_URL ชี้ตรงไป Ollama (ไม่ผ่าน metrics proxy) เพื่อป้องกัน empty response
|
||||||
# - 2026-06-02: เพิ่ม ollama-metrics (NorskHelsenett) — Prometheus sidecar สำหรับ Ollama metrics
|
# - 2026-06-02: เพิ่ม ollama-metrics (NorskHelsenett) — Prometheus sidecar สำหรับ Ollama metrics
|
||||||
# expose /metrics บน port 9924; Prometheus (ASUSTOR) scrape จาก 192.168.10.100:9924
|
# expose /metrics บน port 9924; Prometheus (ASUSTOR) scrape จาก 192.168.10.100:9924
|
||||||
|
# - 2026-06-11: US2 & US3 - เพิ่ม VRAM headroom, residency window, pressure threshold, retrieval timeout env variables
|
||||||
#
|
#
|
||||||
# วิธีรัน:
|
# วิธีรัน:
|
||||||
# docker compose up -d --build
|
# docker compose up -d --build
|
||||||
@@ -45,6 +46,12 @@ services:
|
|||||||
TYPHOON_OCR_MODEL: "typhoon-np-dms-ocr:latest"
|
TYPHOON_OCR_MODEL: "typhoon-np-dms-ocr:latest"
|
||||||
# Timeout 360 วินาที/หน้า — รองรับ cold-start โหลด model (~70s) + inference (10GB model, CPU offload)
|
# Timeout 360 วินาที/หน้า — รองรับ cold-start โหลด model (~70s) + inference (10GB model, CPU offload)
|
||||||
TYPHOON_OCR_TIMEOUT: "360"
|
TYPHOON_OCR_TIMEOUT: "360"
|
||||||
|
# ─── VRAM, Residency & Timeout Configurations (Feature-235) ──────────────
|
||||||
|
VRAM_HEADROOM_THRESHOLD_MB: "3000.0"
|
||||||
|
OCR_RESIDENCY_WINDOW_SECONDS: "120"
|
||||||
|
GPU_TOTAL_VRAM_MB: "16384.0"
|
||||||
|
GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB: "12000.0"
|
||||||
|
RETRIEVAL_TIMEOUT_SECONDS: "30.0"
|
||||||
logging:
|
logging:
|
||||||
driver: "json-file"
|
driver: "json-file"
|
||||||
options:
|
options:
|
||||||
|
|||||||
+34
@@ -0,0 +1,34 @@
|
|||||||
|
# File: specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/services/residency_policy.py
|
||||||
|
# Change Log:
|
||||||
|
# - 2026-06-11: Initial creation of residency_policy.py for calculating OCR keep_alive value dynamically
|
||||||
|
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from services.vram_monitor import get_vram_headroom
|
||||||
|
|
||||||
|
logger = logging.getLogger("ocr-sidecar.residency-policy")
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OcrResidencyDecision:
|
||||||
|
keep_alive_seconds: int
|
||||||
|
vram_headroom_mb: float
|
||||||
|
reason: str
|
||||||
|
|
||||||
|
def calculate_ocr_residency(active_profile: str = None) -> OcrResidencyDecision:
|
||||||
|
"""
|
||||||
|
คำนวณ keep_alive สำหรับ Typhoon OCR จาก VRAM headroom และ active profile ของโมเดลหลัก
|
||||||
|
"""
|
||||||
|
threshold_mb = float(os.getenv("VRAM_HEADROOM_THRESHOLD_MB", "3000.0"))
|
||||||
|
residency_window = int(os.getenv("OCR_RESIDENCY_WINDOW_SECONDS", "120"))
|
||||||
|
pressure_threshold = float(os.getenv("GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB", "7000.0"))
|
||||||
|
if active_profile in ("deep-analysis", "large-context"):
|
||||||
|
return OcrResidencyDecision(0, -1.0, "large-context-active")
|
||||||
|
headroom = get_vram_headroom()
|
||||||
|
if not headroom.query_success:
|
||||||
|
return OcrResidencyDecision(0, -1.0, "query-failed")
|
||||||
|
if headroom.used_mb > pressure_threshold:
|
||||||
|
return OcrResidencyDecision(0, headroom.available_mb, "high-pressure")
|
||||||
|
if headroom.available_mb < threshold_mb:
|
||||||
|
return OcrResidencyDecision(0, headroom.available_mb, "high-pressure")
|
||||||
|
return OcrResidencyDecision(residency_window, headroom.available_mb, "headroom-sufficient")
|
||||||
+43
@@ -0,0 +1,43 @@
|
|||||||
|
# File: specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/services/vram_monitor.py
|
||||||
|
# Change Log:
|
||||||
|
# - 2026-06-11: Initial creation of VramMonitor service for Python OCR sidecar to query GPU VRAM headroom from Ollama /api/ps
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import os
|
||||||
|
import httpx
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger("ocr-sidecar.vram-monitor")
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class VramHeadroom:
|
||||||
|
total_mb: float
|
||||||
|
used_mb: float
|
||||||
|
available_mb: float
|
||||||
|
query_success: bool
|
||||||
|
|
||||||
|
def get_vram_headroom() -> VramHeadroom:
|
||||||
|
"""
|
||||||
|
ดึงข้อมูล VRAM headroom จาก Ollama /api/ps
|
||||||
|
และคำนวณพื้นที่คงเหลือใน VRAM เพื่อประกอบการตัดสินใจเรื่อง Residency และ CPU Fallback
|
||||||
|
"""
|
||||||
|
ollama_url = os.getenv("OLLAMA_API_URL", "http://host.docker.internal:11434")
|
||||||
|
total_vram_mb = float(os.getenv("GPU_TOTAL_VRAM_MB", "16384.0"))
|
||||||
|
try:
|
||||||
|
# ดึงสถานะ running models จาก Ollama
|
||||||
|
with httpx.Client(timeout=3.0) as client:
|
||||||
|
response = client.get(f"{ollama_url}/api/ps")
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.warning(f"Ollama ps endpoint returned status code: {response.status_code}")
|
||||||
|
return VramHeadroom(total_vram_mb, total_vram_mb, 0.0, False)
|
||||||
|
data = response.json()
|
||||||
|
models = data.get("models", [])
|
||||||
|
total_used_bytes = 0
|
||||||
|
for model in models:
|
||||||
|
total_used_bytes += model.get("size_vram", 0)
|
||||||
|
used_mb = float(total_used_bytes) / (1024.0 * 1024.0)
|
||||||
|
available_mb = max(0.0, total_vram_mb - used_mb)
|
||||||
|
return VramHeadroom(total_vram_mb, used_mb, available_mb, True)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to query Ollama VRAM: {str(e)}")
|
||||||
|
return VramHeadroom(total_vram_mb, total_vram_mb, 0.0, False)
|
||||||
+95
@@ -0,0 +1,95 @@
|
|||||||
|
# File: specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/tests/test_retrieval_fallback.py
|
||||||
|
# Change Log:
|
||||||
|
# - 2026-06-11: Initial integration tests for retrieval fallback using pytest
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
# Setup env variables before importing app
|
||||||
|
os.environ["OCR_SIDECAR_API_KEY"] = "test-key"
|
||||||
|
os.environ["VRAM_HEADROOM_THRESHOLD_MB"] = "3000.0"
|
||||||
|
os.environ["RETRIEVAL_TIMEOUT_SECONDS"] = "2.0"
|
||||||
|
|
||||||
|
from app import app, EmbedRequest, RerankRequest, get_api_key
|
||||||
|
|
||||||
|
client = TestClient(app)
|
||||||
|
API_HEADERS = {"X-API-Key": "test-key"}
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_bge_model():
|
||||||
|
with patch("app.bge_model") as mock:
|
||||||
|
mock.model = MagicMock()
|
||||||
|
mock.encode.return_value = {
|
||||||
|
"dense_vecs": [[0.1, 0.2]],
|
||||||
|
"lexical_weights": [{"101": 0.5}]
|
||||||
|
}
|
||||||
|
yield mock
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_reranker():
|
||||||
|
with patch("app.reranker") as mock:
|
||||||
|
mock.model = MagicMock()
|
||||||
|
mock.compute_score.return_value = [0.85]
|
||||||
|
yield mock
|
||||||
|
|
||||||
|
def test_embed_gpu_when_headroom_sufficient(mock_bge_model):
|
||||||
|
vram_mock = MagicMock(total_mb=16384.0, used_mb=2000.0, available_mb=14384.0, query_success=True)
|
||||||
|
with patch("app.get_vram_headroom", return_value=vram_mock), \
|
||||||
|
patch("torch.cuda.is_available", return_value=True):
|
||||||
|
response = client.post("/embed", json={"text": "hello test"}, headers=API_HEADERS)
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert data["device"] == "cuda"
|
||||||
|
mock_bge_model.model.to.assert_called_with("cuda")
|
||||||
|
|
||||||
|
def test_embed_cpu_when_headroom_insufficient(mock_bge_model):
|
||||||
|
vram_mock = MagicMock(total_mb=16384.0, used_mb=14000.0, available_mb=2384.0, query_success=True)
|
||||||
|
with patch("app.get_vram_headroom", return_value=vram_mock):
|
||||||
|
response = client.post("/embed", json={"text": "hello test"}, headers=API_HEADERS)
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert data["device"] == "cpu"
|
||||||
|
mock_bge_model.model.to.assert_called_with("cpu")
|
||||||
|
|
||||||
|
def test_embed_cpu_when_gpu_query_failed(mock_bge_model):
|
||||||
|
vram_mock = MagicMock(total_mb=16384.0, used_mb=16384.0, available_mb=0.0, query_success=False)
|
||||||
|
with patch("app.get_vram_headroom", return_value=vram_mock):
|
||||||
|
response = client.post("/embed", json={"text": "hello test"}, headers=API_HEADERS)
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert data["device"] == "cpu"
|
||||||
|
mock_bge_model.model.to.assert_called_with("cpu")
|
||||||
|
|
||||||
|
def test_embed_timeout_returns_504(mock_bge_model):
|
||||||
|
vram_mock = MagicMock(total_mb=16384.0, used_mb=2000.0, available_mb=14384.0, query_success=True)
|
||||||
|
# Mock encode to simulate a slow run
|
||||||
|
def slow_encode(*args, **kwargs):
|
||||||
|
import time
|
||||||
|
time.sleep(3.0)
|
||||||
|
return {"dense_vecs": [[0.1]], "lexical_weights": [{"1": 0.1}]}
|
||||||
|
mock_bge_model.encode.side_effect = slow_encode
|
||||||
|
with patch("app.get_vram_headroom", return_value=vram_mock):
|
||||||
|
response = client.post("/embed", json={"text": "hello test"}, headers=API_HEADERS)
|
||||||
|
assert response.status_code == 504
|
||||||
|
|
||||||
|
def test_rerank_gpu_when_headroom_sufficient(mock_reranker):
|
||||||
|
vram_mock = MagicMock(total_mb=16384.0, used_mb=2000.0, available_mb=14384.0, query_success=True)
|
||||||
|
with patch("app.get_vram_headroom", return_value=vram_mock), \
|
||||||
|
patch("torch.cuda.is_available", return_value=True):
|
||||||
|
response = client.post("/rerank", json={"query": "test query", "chunks": ["chunk1"]}, headers=API_HEADERS)
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert data["device"] == "cuda"
|
||||||
|
mock_reranker.model.to.assert_called_with("cuda")
|
||||||
|
|
||||||
|
def test_rerank_cpu_when_headroom_insufficient(mock_reranker):
|
||||||
|
vram_mock = MagicMock(total_mb=16384.0, used_mb=14000.0, available_mb=2384.0, query_success=True)
|
||||||
|
with patch("app.get_vram_headroom", return_value=vram_mock):
|
||||||
|
response = client.post("/rerank", json={"query": "test query", "chunks": ["chunk1"]}, headers=API_HEADERS)
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert data["device"] == "cpu"
|
||||||
|
mock_reranker.model.to.assert_called_with("cpu")
|
||||||
@@ -89,7 +89,7 @@
|
|||||||
**Purpose**: Documentation update + compliance verification
|
**Purpose**: Documentation update + compliance verification
|
||||||
|
|
||||||
- [X] T018 [P] อัปเดต `AGENTS.md` — Current Decisions D10: เปลี่ยน `gemma4:e4b Q8_0` เป็น `typhoon2.5-np-dms:latest (main) + typhoon-np-dms-ocr:latest (OCR)`; อัปเดต version เป็น v1.9.9 และ sync date
|
- [X] T018 [P] อัปเดต `AGENTS.md` — Current Decisions D10: เปลี่ยน `gemma4:e4b Q8_0` เป็น `typhoon2.5-np-dms:latest (main) + typhoon-np-dms-ocr:latest (OCR)`; อัปเดต version เป็น v1.9.9 และ sync date
|
||||||
- [X] T019 [P] อัปเดต `memory/agent-memory.md` — Section 2.5 model names + Section 5 D10 + Section 7 Ollama row + Section 8 Recent Rollouts entry
|
- [X] T019 [P] อัปเดต `memory/project-memory-override.md` — Section 2.5 model names + Section 5 D10 + Section 7 Ollama row + Section 8 Recent Rollouts entry
|
||||||
- [X] T020 [P] อัปเดต `.agents/rules/11-ai-integration.md` — 2-model stack: `gemma4:e2b → typhoon2.5-np-dms:latest`
|
- [X] T020 [P] อัปเดต `.agents/rules/11-ai-integration.md` — 2-model stack: `gemma4:e2b → typhoon2.5-np-dms:latest`
|
||||||
- [ ] T021 [P] รัน type check: `pnpm --filter backend build` — ต้องผ่าน 0 errors
|
- [ ] T021 [P] รัน type check: `pnpm --filter backend build` — ต้องผ่าน 0 errors
|
||||||
- [ ] T022 [P] รัน lint: `pnpm --filter backend lint` — ตรวจสอบ no console.log, no any
|
- [ ] T022 [P] รัน lint: `pnpm --filter backend lint` — ตรวจสอบ no console.log, no any
|
||||||
|
|||||||
@@ -146,7 +146,7 @@
|
|||||||
- [x] T047 [P] Add error handling for VRAM insufficiency in backend/src/modules/ai/services/ai.service.ts
|
- [x] T047 [P] Add error handling for VRAM insufficiency in backend/src/modules/ai/services/ai.service.ts
|
||||||
- [x] T048 [P] Add error handling for Ollama service unavailability in backend/src/modules/ai/services/ocr.service.ts
|
- [x] T048 [P] Add error handling for Ollama service unavailability in backend/src/modules/ai/services/ocr.service.ts
|
||||||
- [x] T049 Run quickstart.md validation on Admin Desktop
|
- [x] T049 Run quickstart.md validation on Admin Desktop
|
||||||
- [x] T050 Update agent-memory.md with Typhoon OCR integration details
|
- [x] T050 Update project-memory-override.md with Typhoon OCR integration details
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,256 @@
|
|||||||
|
// File: specs/200-fullstacks/235-ai-runtime-policy-refactor/checklists/cutover-validation.md
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: Initial cutover validation checklist for T032 and sidecar pytest
|
||||||
|
|
||||||
|
# Cutover Validation Checklist: Feature 235
|
||||||
|
|
||||||
|
**Purpose**: ใช้ปิด `T032` และเก็บหลักฐานสำหรับเลื่อนสถานะ validation จาก `PARTIAL` ไป `PASS`
|
||||||
|
|
||||||
|
> หมายเหตุ
|
||||||
|
>
|
||||||
|
> - Checklist นี้อิง **implementation ปัจจุบัน** ของ Option B
|
||||||
|
> - อย่าใช้ตัวอย่างเก่าใน `quickstart.md` ที่ยังส่ง `executionProfile` / `large-context` จาก caller
|
||||||
|
> - คำสั่งด้านล่างเป็น **PowerShell** ตามกฎของ repo
|
||||||
|
|
||||||
|
## 1. Environment Ready
|
||||||
|
|
||||||
|
- [ ] Backend รันที่ `http://localhost:3001`
|
||||||
|
- [ ] Frontend รันที่ `http://localhost:3000`
|
||||||
|
- [ ] OCR sidecar รันที่ `http://192.168.10.100:8765`
|
||||||
|
- [ ] Ollama รันและมี tag `np-dms-ai` / `np-dms-ocr`
|
||||||
|
- [ ] มี admin token สำหรับเรียก API
|
||||||
|
- [ ] มี `documentPublicId` และ `projectPublicId` ที่มีอยู่จริงสำหรับทดสอบ `rag-query`
|
||||||
|
- [ ] มีไฟล์ PDF ตัวอย่างสำหรับ OCR Sandbox
|
||||||
|
|
||||||
|
## 2. Automated Validation
|
||||||
|
|
||||||
|
### 2.1 Backend targeted tests
|
||||||
|
|
||||||
|
- [ ] รัน:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
pnpm --filter backend test -- --runInBand --testPathPatterns="ai.service.spec.ts|queue-policy.spec.ts|ai.controller.spec.ts|ai-policy.service.spec.ts|ocr-residency.spec.ts|vram-monitor.service.spec.ts"
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] Expected: ทุก suite ผ่าน
|
||||||
|
|
||||||
|
### 2.2 Backend build
|
||||||
|
|
||||||
|
- [ ] รัน:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
pnpm --filter backend build
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] Expected: build ผ่านไม่มี compile error
|
||||||
|
|
||||||
|
### 2.3 Sidecar pytest
|
||||||
|
|
||||||
|
- [ ] รัน:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
python -m pytest specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/tests -v
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] Expected: `test_retrieval_fallback.py` ผ่านครบ
|
||||||
|
- [ ] ถ้า `pytest` ไม่พบ module: บันทึกว่า environment ยังไม่พร้อม และติดตั้ง dependency ก่อน rerun
|
||||||
|
|
||||||
|
## 3. Manual Gate 1: Policy Contract
|
||||||
|
|
||||||
|
ตั้งค่า token และ ids ก่อน:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$TOKEN = "<admin-jwt>"
|
||||||
|
$PROJECT_PUBLIC_ID = "<existing-project-public-id>"
|
||||||
|
$DOCUMENT_PUBLIC_ID = "<existing-document-public-id>"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.1 Reject forbidden `model`
|
||||||
|
|
||||||
|
- [ ] รัน:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$body = @{
|
||||||
|
type = "rag-query"
|
||||||
|
projectPublicId = $PROJECT_PUBLIC_ID
|
||||||
|
payload = @{ query = "test policy contract" }
|
||||||
|
model = @{ key = "typhoon2.5-np-dms:latest" }
|
||||||
|
} | ConvertTo-Json -Depth 5
|
||||||
|
|
||||||
|
Invoke-RestMethod "http://localhost:3001/api/ai/jobs" `
|
||||||
|
-Method Post `
|
||||||
|
-Headers @{
|
||||||
|
Authorization = "Bearer $TOKEN"
|
||||||
|
"Idempotency-Key" = "feature235-gate1-model"
|
||||||
|
"Content-Type" = "application/json"
|
||||||
|
} `
|
||||||
|
-Body $body
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] Expected: HTTP `400`
|
||||||
|
|
||||||
|
### 3.2 Reject forbidden `executionProfile`
|
||||||
|
|
||||||
|
- [ ] รัน:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$body = @{
|
||||||
|
type = "rag-query"
|
||||||
|
projectPublicId = $PROJECT_PUBLIC_ID
|
||||||
|
payload = @{ query = "test forbidden profile" }
|
||||||
|
executionProfile = "quality"
|
||||||
|
} | ConvertTo-Json -Depth 5
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] Expected: HTTP `400`
|
||||||
|
|
||||||
|
### 3.3 Reject forbidden parameter override
|
||||||
|
|
||||||
|
- [ ] รัน:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$body = @{
|
||||||
|
type = "rag-query"
|
||||||
|
projectPublicId = $PROJECT_PUBLIC_ID
|
||||||
|
payload = @{ query = "test forbidden temperature" }
|
||||||
|
temperature = 0.9
|
||||||
|
} | ConvertTo-Json -Depth 5
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] Expected: HTTP `400`
|
||||||
|
|
||||||
|
### 3.4 Valid `rag-query`
|
||||||
|
|
||||||
|
- [ ] รัน:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$body = @{
|
||||||
|
type = "rag-query"
|
||||||
|
projectPublicId = $PROJECT_PUBLIC_ID
|
||||||
|
documentPublicId = $DOCUMENT_PUBLIC_ID
|
||||||
|
payload = @{ query = "สรุปเอกสารนี้" }
|
||||||
|
} | ConvertTo-Json -Depth 5
|
||||||
|
|
||||||
|
Invoke-RestMethod "http://localhost:3001/api/ai/jobs" `
|
||||||
|
-Method Post `
|
||||||
|
-Headers @{
|
||||||
|
Authorization = "Bearer $TOKEN"
|
||||||
|
"Idempotency-Key" = "feature235-gate1-valid"
|
||||||
|
"Content-Type" = "application/json"
|
||||||
|
} `
|
||||||
|
-Body $body
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] Expected:
|
||||||
|
- HTTP `201`
|
||||||
|
- `modelUsed = "np-dms-ai"`
|
||||||
|
- `effectiveProfile = "standard"`
|
||||||
|
- `queueName = "ai-batch"`
|
||||||
|
|
||||||
|
## 4. Manual Gate 2: Canonical Naming
|
||||||
|
|
||||||
|
### 4.1 Audit log check
|
||||||
|
|
||||||
|
- [ ] ตรวจ row ล่าสุดใน `ai_audit_logs`
|
||||||
|
- [ ] Expected:
|
||||||
|
- `effective_profile` มีค่า
|
||||||
|
- `canonical_model` เป็น `np-dms-ai` หรือ `np-dms-ocr`
|
||||||
|
- ไม่มี runtime name หลุดออกในฟิลด์ user-facing
|
||||||
|
|
||||||
|
### 4.2 Admin Console check
|
||||||
|
|
||||||
|
- [ ] เปิด `http://localhost:3000/admin/ai`
|
||||||
|
- [ ] ตรวจ Overview / health / model cards
|
||||||
|
- [ ] Expected:
|
||||||
|
- เห็น `np-dms-ai`
|
||||||
|
- เห็น `np-dms-ocr`
|
||||||
|
- ไม่เห็น `typhoon2.5-np-dms:latest`
|
||||||
|
- ไม่เห็น `typhoon-np-dms-ocr:latest`
|
||||||
|
|
||||||
|
### 4.3 OCR Sandbox badge check
|
||||||
|
|
||||||
|
- [ ] เปิด OCR Sandbox ในหน้า admin AI
|
||||||
|
- [ ] รัน OCR 1 รอบ
|
||||||
|
- [ ] Expected:
|
||||||
|
- badge หรือ result label แสดง `np-dms-ocr`
|
||||||
|
- ไม่โชว์ runtime name โดยตรง
|
||||||
|
|
||||||
|
## 5. Manual Gate 3: Adaptive OCR Residency
|
||||||
|
|
||||||
|
### 5.1 High-pressure / deep-analysis behavior
|
||||||
|
|
||||||
|
- [ ] ทำให้ main model กิน VRAM สูง หรือจำลอง workload ที่เข้าข่าย pressure
|
||||||
|
- [ ] รัน OCR Sandbox หรือ OCR job
|
||||||
|
- [ ] ตรวจ sidecar / backend logs
|
||||||
|
- [ ] Expected:
|
||||||
|
- `keep_alive = 0`
|
||||||
|
- reason เป็น `high-pressure` หรือ `deep-analysis-active`
|
||||||
|
|
||||||
|
### 5.2 Headroom sufficient behavior
|
||||||
|
|
||||||
|
- [ ] รัน OCR job ตอนที่ GPU headroom สูง
|
||||||
|
- [ ] ตรวจ logs
|
||||||
|
- [ ] Expected:
|
||||||
|
- `keep_alive > 0`
|
||||||
|
- reason เป็น `headroom-sufficient`
|
||||||
|
|
||||||
|
## 6. Manual Gate 4: Retrieval CPU Fallback
|
||||||
|
|
||||||
|
### 6.1 Force GPU pressure
|
||||||
|
|
||||||
|
- [ ] warm model:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$warm = @{
|
||||||
|
model = "np-dms-ai"
|
||||||
|
prompt = "warmup"
|
||||||
|
keep_alive = -1
|
||||||
|
} | ConvertTo-Json
|
||||||
|
|
||||||
|
Invoke-RestMethod "http://localhost:11434/api/generate" `
|
||||||
|
-Method Post `
|
||||||
|
-ContentType "application/json" `
|
||||||
|
-Body $warm
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 Submit `rag-query` under pressure
|
||||||
|
|
||||||
|
- [ ] ส่ง request แบบเดียวกับ Gate 1.4 แต่เปลี่ยน `Idempotency-Key`
|
||||||
|
- [ ] Expected:
|
||||||
|
- request enqueue สำเร็จ
|
||||||
|
- job ไม่ fail hard
|
||||||
|
|
||||||
|
### 6.3 Verify fallback evidence
|
||||||
|
|
||||||
|
- [ ] ตรวจ sidecar logs
|
||||||
|
- [ ] Expected:
|
||||||
|
- `device=cpu` หรือ `device: cpu`
|
||||||
|
- reason เป็น `gpu-headroom-below-threshold` หรือ `gpu-query-failed`
|
||||||
|
|
||||||
|
## 7. Evidence to Attach
|
||||||
|
|
||||||
|
- [ ] backend test output
|
||||||
|
- [ ] backend build output
|
||||||
|
- [ ] sidecar pytest output
|
||||||
|
- [ ] screenshot หน้า `/admin/ai`
|
||||||
|
- [ ] screenshot OCR Sandbox result
|
||||||
|
- [ ] copy log line ของ residency decision
|
||||||
|
- [ ] copy log line ของ CPU fallback
|
||||||
|
- [ ] sample successful `rag-query` response body
|
||||||
|
|
||||||
|
## 8. Pass Criteria
|
||||||
|
|
||||||
|
- [ ] Automated backend tests ผ่าน
|
||||||
|
- [ ] Backend build ผ่าน
|
||||||
|
- [ ] Sidecar pytest ผ่าน
|
||||||
|
- [ ] Gate 1 ผ่านครบ
|
||||||
|
- [ ] Gate 2 ผ่านครบ
|
||||||
|
- [ ] Gate 3 ผ่านครบ
|
||||||
|
- [ ] Gate 4 ผ่านครบ
|
||||||
|
- [ ] หลักฐานถูกแนบหรือบันทึกไว้ใน feature folder
|
||||||
|
|
||||||
|
## 9. Follow-up After Completion
|
||||||
|
|
||||||
|
- [ ] update `tasks.md` ให้ติ๊ก `T032`
|
||||||
|
- [ ] update `validation-report.md` จาก `PARTIAL` เป็น `PASS`
|
||||||
|
- [ ] ถ้าเจอ spec drift ให้ปรับ `quickstart.md` และจุดอ้างอิงที่ยังใช้ contract เก่า
|
||||||
+54
-14
@@ -1,51 +1,91 @@
|
|||||||
// File: specs/200-fullstacks/235-ai-runtime-policy-refactor/contracts/create-ai-job.dto.md
|
// File: specs/200-fullstacks/235-ai-runtime-policy-refactor/contracts/create-ai-job.dto.md
|
||||||
// Change Log:
|
// Change Log:
|
||||||
// - 2026-06-11: API contract for CreateAiJobDto
|
// - 2026-06-11: API contract for CreateAiJobDto
|
||||||
|
// - 2026-06-11: Option B — backend-determined policy; ลบ executionProfile ออกจาก request
|
||||||
|
// - 2026-06-11: Rename profiles — interactive/standard/quality/deep-analysis; เพิ่ม default values จาก docs/ai-profiles.md
|
||||||
|
|
||||||
# Contract: POST /api/ai/jobs
|
# Contract: POST /api/ai/jobs
|
||||||
|
|
||||||
## Request DTO
|
## Request DTO
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
|
// PublicJobType — เปิดให้ caller ส่งมาใน API
|
||||||
|
type PublicJobType = 'auto-fill-document' | 'migrate-document' | 'rag-query';
|
||||||
|
|
||||||
|
// InternalJobType — ใช้ภายใน AiPolicyService เท่านั้น ไม่ expose ใน API
|
||||||
|
type InternalJobType = PublicJobType | 'intent-classify' | 'tool-suggest' | 'ocr-extract';
|
||||||
|
|
||||||
interface CreateAiJobRequest {
|
interface CreateAiJobRequest {
|
||||||
type: 'auto-fill-document' | 'migrate-document' | 'rag-query';
|
type: PublicJobType;
|
||||||
documentPublicId?: string; // UUIDv7 — ADR-019
|
documentPublicId?: string; // UUIDv7 — ADR-019
|
||||||
attachmentPublicId?: string; // UUIDv7 — ADR-019
|
attachmentPublicId?: string; // UUIDv7 — ADR-019
|
||||||
executionProfile?: 'fast' | 'balanced' | 'thai-accurate' | 'large-context';
|
// [FORBIDDEN] executionProfile — HTTP 400 if present (backend กำหนดเอง)
|
||||||
// [FORBIDDEN] model.key — HTTP 400 if present
|
// [FORBIDDEN] model.key — HTTP 400 if present
|
||||||
// [FORBIDDEN] temperature, top_p, maxTokens — HTTP 400 if present
|
// [FORBIDDEN] temperature, top_p, maxTokens — HTTP 400 if present
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> **หมายเหตุ**: ไม่มี `executionProfile` ใน request — backend กำหนด execution policy ทั้งหมดจาก `job.type` อัตโนมัติ user ทั่วไปไม่ต้องรู้จัก profile เลย
|
||||||
|
> `intent-classify`, `tool-suggest`, `ocr-extract` เป็น **internal job types** — เกิดภายใน service โดยตรง ไม่ผ่าน API
|
||||||
|
|
||||||
## Validation Rules
|
## Validation Rules
|
||||||
|
|
||||||
| Field | Rule |
|
| Field | Rule |
|
||||||
|-------|------|
|
|-------|------|
|
||||||
| `type` | Required; enum |
|
| `type` | Required; enum `'auto-fill-document' \| 'migrate-document' \| 'rag-query'` |
|
||||||
| `executionProfile` | Optional; enum; defaults to `balanced` |
|
| `executionProfile` | **FORBIDDEN** — HTTP 400 ถ้ามีใน payload |
|
||||||
| `large-context` | Requires admin role (CASL `ai.use_large_context`) — HTTP 403 if unauthorized |
|
| `model.*` | **FORBIDDEN** — ANY model subfield → HTTP 400 |
|
||||||
| `model.*` | ANY model subfield → HTTP 400 |
|
| `temperature` | **FORBIDDEN** — HTTP 400 ถ้ามีใน payload |
|
||||||
| `temperature` | Present at root → HTTP 400 |
|
| `top_p` | **FORBIDDEN** — HTTP 400 ถ้ามีใน payload |
|
||||||
| `top_p` | Present at root → HTTP 400 |
|
| `maxTokens` | **FORBIDDEN** — HTTP 400 ถ้ามีใน payload |
|
||||||
| `maxTokens` | Present at root → HTTP 400 |
|
| `documentPublicId` | Optional; UUIDv7 string (ADR-019) — ห้าม parseInt |
|
||||||
|
| `attachmentPublicId` | Optional; UUIDv7 string (ADR-019) — ห้าม parseInt |
|
||||||
|
|
||||||
|
## Job Type → Effective Profile Mapping (Backend Policy)
|
||||||
|
|
||||||
|
| `job.type` | `effectiveProfile` | `canonicalModel` | `queueName` |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `auto-fill-document` | `quality` | `np-dms-ai` | `ai-batch` |
|
||||||
|
| `migrate-document` | `quality` | `np-dms-ai` | `ai-batch` |
|
||||||
|
| `rag-query` | `standard` | `np-dms-ai` | `ai-batch` |
|
||||||
|
| `intent-classify` | `interactive` | `np-dms-ai` | `ai-realtime` | *(internal only)* |
|
||||||
|
| `tool-suggest` | `interactive` | `np-dms-ai` | `ai-realtime` | *(internal only)* |
|
||||||
|
| `ocr-extract` | *(OCR residency policy)* | `np-dms-ocr` | `ai-batch` | *(internal only)* |
|
||||||
|
| `sandbox-analysis` | `deep-analysis` | `np-dms-ai` | `ai-batch` | *(admin OCR Sandbox only)* |
|
||||||
|
|
||||||
|
> Mapping นี้กำหนดใน `AiPolicyService` — ไม่ expose ให้ caller เห็น
|
||||||
|
|
||||||
|
## Profile Default Parameters (จาก `docs/ai-profiles.md`)
|
||||||
|
|
||||||
|
| Profile | `temperature` | `top_p` | `max_tokens` | `num_ctx` | `repeat_penalty` | `keep_alive` |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| `interactive` | 0.7 | 0.9 | 2048 | 4096 | 1.15 | `"5m"` |
|
||||||
|
| `standard` | 0.5 | 0.8 | 4096 | 8192 | 1.15 | `"10m"` |
|
||||||
|
| `quality` | 0.1 | 0.95 | 8192 | 8192 | 1.15 | `"10m"` |
|
||||||
|
| `deep-analysis` | 0.3 | 0.85 | 8192 | 32768 | 1.15 | `"0"` |
|
||||||
|
|
||||||
|
> ค่าเหล่านี้เป็น **default** — ops/admin calibrate ได้ผ่าน Admin Console และบันทึกใน DB ตาม ADR-029 (Dynamic Prompt Management)
|
||||||
|
|
||||||
## Response DTO
|
## Response DTO
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
|
type ExecutionProfile = 'interactive' | 'standard' | 'quality' | 'deep-analysis';
|
||||||
|
|
||||||
interface AiJobResponse {
|
interface AiJobResponse {
|
||||||
jobId: string; // BullMQ job ID
|
jobId: string; // BullMQ job ID
|
||||||
status: 'queued' | 'completed' | 'failed';
|
status: 'queued' | 'completed' | 'failed';
|
||||||
modelUsed: 'np-dms-ai' | 'np-dms-ocr'; // Canonical name — never runtime tag
|
modelUsed: 'np-dms-ai' | 'np-dms-ocr'; // Canonical name — never runtime tag
|
||||||
executionProfile: ExecutionProfile; // Effective profile (after backend override)
|
effectiveProfile: ExecutionProfile; // Profile ที่ backend กำหนดจาก job.type
|
||||||
queueName: 'ai-realtime' | 'ai-batch';
|
queueName: 'ai-realtime' | 'ai-batch';
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> `effectiveProfile` ใน response คือ **read-only informational field** สำหรับ admin/developer ดู — ไม่ใช่ input
|
||||||
|
|
||||||
## Error Responses
|
## Error Responses
|
||||||
|
|
||||||
| Status | When |
|
| Status | When |
|
||||||
|--------|------|
|
|--------|------|
|
||||||
| 400 | `model.key` present, or parameter overrides present, or invalid `executionProfile` |
|
| 400 | `executionProfile`, `model.key`, หรือ parameter overrides มีใน payload |
|
||||||
| 403 | `large-context` by non-admin |
|
| 422 | `documentPublicId` หรือ `attachmentPublicId` ไม่พบใน DB |
|
||||||
| 422 | `documentPublicId` not found |
|
| 504 | CPU fallback retrieval timeout (`/embed` หรือ `/rerank`)
|
||||||
| 504 | CPU fallback retrieval timeout |
|
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
// File: specs/200-fullstacks/235-ai-runtime-policy-refactor/data-model.md
|
// File: specs/200-fullstacks/235-ai-runtime-policy-refactor/data-model.md
|
||||||
// Change Log:
|
// Change Log:
|
||||||
// - 2026-06-11: Data model for AI Runtime Policy Refactor
|
// - 2026-06-11: Data model for AI Runtime Policy Refactor
|
||||||
|
// - 2026-06-11: Rename ExecutionProfile — interactive/standard/quality/deep-analysis; เพิ่ม numCtx, repeatPenalty ใน RuntimePolicy
|
||||||
|
// - 2026-06-11: เพิ่ม OcrRuntimePolicy จาก np-dms-ocr.model.md (fixed parameters, keep_alive dynamic)
|
||||||
|
|
||||||
# Data Model: AI Runtime Policy Refactor
|
# Data Model: AI Runtime Policy Refactor
|
||||||
|
|
||||||
@@ -10,11 +12,30 @@
|
|||||||
|
|
||||||
## TypeScript Types (Backend)
|
## TypeScript Types (Backend)
|
||||||
|
|
||||||
|
### JobType (types)
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// File: backend/src/modules/ai/interfaces/execution-policy.interface.ts
|
||||||
|
|
||||||
|
// PublicJobType — รับจาก caller ผ่าน POST /api/ai/jobs เท่านั้น
|
||||||
|
export type PublicJobType = 'auto-fill-document' | 'migrate-document' | 'rag-query';
|
||||||
|
|
||||||
|
// InternalJobType — ใช้ภายใน AiPolicyService; ครอบคลุมทุก job type รวม internal
|
||||||
|
// sandbox-analysis — admin trigger ผ่าน OCR Sandbox โดยตรง (deep-analysis profile)
|
||||||
|
export type InternalJobType = PublicJobType | 'intent-classify' | 'tool-suggest' | 'ocr-extract' | 'sandbox-analysis';
|
||||||
|
```
|
||||||
|
|
||||||
|
> `intent-classify`, `tool-suggest`, `ocr-extract` — internal เท่านั้น; ถ้า caller ส่ง type เหล่านี้มา → HTTP 400
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
### ExecutionProfile (enum)
|
### ExecutionProfile (enum)
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
// File: backend/src/modules/ai/interfaces/execution-policy.interface.ts
|
// File: backend/src/modules/ai/interfaces/execution-policy.interface.ts
|
||||||
export type ExecutionProfile = 'fast' | 'balanced' | 'thai-accurate' | 'large-context';
|
// ค่า default ของแต่ละ profile ดูได้ที่ docs/ai-profiles.md
|
||||||
|
// ops/admin calibrate ได้ผ่าน Admin Console และบันทึกใน DB ตาม ADR-029
|
||||||
|
export type ExecutionProfile = 'interactive' | 'standard' | 'quality' | 'deep-analysis';
|
||||||
```
|
```
|
||||||
|
|
||||||
### RuntimePolicy (interface)
|
### RuntimePolicy (interface)
|
||||||
@@ -23,13 +44,34 @@ export type ExecutionProfile = 'fast' | 'balanced' | 'thai-accurate' | 'large-co
|
|||||||
// File: backend/src/modules/ai/interfaces/execution-policy.interface.ts
|
// File: backend/src/modules/ai/interfaces/execution-policy.interface.ts
|
||||||
export interface RuntimePolicy {
|
export interface RuntimePolicy {
|
||||||
canonicalModel: 'np-dms-ai' | 'np-dms-ocr'; // ชื่อ canonical เท่านั้น
|
canonicalModel: 'np-dms-ai' | 'np-dms-ocr'; // ชื่อ canonical เท่านั้น
|
||||||
temperature: number;
|
temperature: number; // default: interactive=0.7, standard=0.5, quality=0.1, deep-analysis=0.3
|
||||||
topP: number;
|
topP: number; // default: interactive=0.9, standard=0.8, quality=0.95, deep-analysis=0.85
|
||||||
maxTokens: number;
|
maxTokens: number; // default: interactive=2048, standard=4096, quality=8192, deep-analysis=8192
|
||||||
keepAliveSeconds: number; // สำหรับ main model
|
numCtx: number; // default: interactive=4096, standard=8192, quality=8192, deep-analysis=32768
|
||||||
|
repeatPenalty: number; // default: 1.15 ทุก profile
|
||||||
|
keepAliveSeconds: number; // default: interactive=300, standard=600, quality=600, deep-analysis=0
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### OcrRuntimePolicy (interface)
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// File: backend/src/modules/ai/interfaces/ocr-residency.interface.ts
|
||||||
|
// Parameters จาก specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/np-dms-ocr.model.md
|
||||||
|
// ไม่ calibrate ผ่าน Admin Console — ค่า fixed ตาม Modelfile
|
||||||
|
export interface OcrRuntimePolicy {
|
||||||
|
canonicalModel: 'np-dms-ocr'; // FROM scb10x/typhoon-ocr1.5-3b:latest
|
||||||
|
numCtx: 8192; // PARAMETER num_ctx 8192
|
||||||
|
numPredict: 4096; // PARAMETER num_predict 4096
|
||||||
|
temperature: 0.1; // PARAMETER temperature 0.1
|
||||||
|
topP: 0.1; // PARAMETER top_p 0.1
|
||||||
|
repeatPenalty: 1.1; // PARAMETER repeat_penalty 1.1
|
||||||
|
keepAliveSeconds: number; // dynamic — คำนวณจาก OcrResidencyDecision
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
> `np-dms-ocr` ใช้ parameters คงที่ตาม Modelfile — **มีแค่ `keep_alive` เท่านั้นที่ dynamic** ตาม VRAM headroom
|
||||||
|
|
||||||
### OcrResidencyDecision (interface)
|
### OcrResidencyDecision (interface)
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
@@ -38,7 +80,7 @@ export interface OcrResidencyDecision {
|
|||||||
keepAliveSeconds: number; // 0 = unload; > 0 = residency window
|
keepAliveSeconds: number; // 0 = unload; > 0 = residency window
|
||||||
vramHeadroomMb: number; // หรือ -1 ถ้า query ล้มเหลว
|
vramHeadroomMb: number; // หรือ -1 ถ้า query ล้มเหลว
|
||||||
activeProfile: ExecutionProfile | null;
|
activeProfile: ExecutionProfile | null;
|
||||||
reason: 'large-context-active' | 'high-pressure' | 'headroom-sufficient' | 'query-failed';
|
reason: 'deep-analysis-active' | 'high-pressure' | 'headroom-sufficient' | 'query-failed';
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -54,14 +96,42 @@ export interface VramHeadroom {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### AiJobPayload (BullMQ job data)
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// File: backend/src/modules/ai/interfaces/execution-policy.interface.ts
|
||||||
|
// BullMQ job payload — parameters ถูก snapshot ณ เวลา dispatch (FR-A09)
|
||||||
|
// worker ใช้ค่าจาก payload โดยตรง ไม่อ่าน DB/Redis อีกรอบ
|
||||||
|
export interface AiJobPayload {
|
||||||
|
jobType: InternalJobType;
|
||||||
|
documentPublicId?: string;
|
||||||
|
attachmentPublicId?: string;
|
||||||
|
// snapshot ณ เวลา dispatch โดย AiPolicyService
|
||||||
|
effectiveProfile: ExecutionProfile;
|
||||||
|
canonicalModel: 'np-dms-ai' | 'np-dms-ocr';
|
||||||
|
snapshotParams: {
|
||||||
|
temperature: number;
|
||||||
|
topP: number;
|
||||||
|
maxTokens: number;
|
||||||
|
numCtx: number;
|
||||||
|
repeatPenalty: number;
|
||||||
|
keepAliveSeconds: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
> `snapshotParams` ทำให้ทุก job predictable — แม้ admin calibrate ค่าใหม่ระหว่าง job queue อยู่ ค่าเดิมที่ snapshot ไว้จะถูกใช้; audit log บันทึก `snapshotParams` ด้วยเพื่อ traceability
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
### CreateAiJobDto (updated)
|
### CreateAiJobDto (updated)
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
// File: backend/src/modules/ai/dto/create-ai-job.dto.ts
|
// File: backend/src/modules/ai/dto/create-ai-job.dto.ts
|
||||||
// [CHANGE] ลบ model field และ parameter overrides ออก
|
// [CHANGE] ลบ executionProfile, model fields ออกทั้งหมด — backend กำหนดจาก job.type
|
||||||
export class CreateAiJobDto {
|
export class CreateAiJobDto {
|
||||||
@IsEnum(['auto-fill-document', 'migrate-document', 'rag-query'])
|
@IsEnum(['auto-fill-document', 'migrate-document', 'rag-query'])
|
||||||
type: 'auto-fill-document' | 'migrate-document' | 'rag-query';
|
type: PublicJobType;
|
||||||
|
|
||||||
@IsOptional()
|
@IsOptional()
|
||||||
@IsUUID('all')
|
@IsUUID('all')
|
||||||
@@ -71,16 +141,56 @@ export class CreateAiJobDto {
|
|||||||
@IsUUID('all')
|
@IsUUID('all')
|
||||||
attachmentPublicId?: string;
|
attachmentPublicId?: string;
|
||||||
|
|
||||||
@IsOptional()
|
// [REMOVED] executionProfile — backend กำหนดอัตโนมัติจาก job.type (Option B)
|
||||||
@IsEnum(['fast', 'balanced', 'thai-accurate', 'large-context'])
|
|
||||||
executionProfile?: ExecutionProfile;
|
|
||||||
|
|
||||||
// [REMOVED] model: { key, parameters } — ไม่อนุญาตแล้ว
|
// [REMOVED] model: { key, parameters } — ไม่อนุญาตแล้ว
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## DB Schema Extensions
|
||||||
|
|
||||||
|
### ai_execution_profiles (new table)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Delta: specs/03-Data-and-Storage/deltas/2026-06-11-create-ai-execution-profiles.sql
|
||||||
|
CREATE TABLE ai_execution_profiles (
|
||||||
|
id INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
profile_name VARCHAR(50) NOT NULL UNIQUE, -- 'interactive'|'standard'|'quality'|'deep-analysis'
|
||||||
|
temperature DECIMAL(4,3) NOT NULL,
|
||||||
|
top_p DECIMAL(4,3) NOT NULL,
|
||||||
|
max_tokens INT NOT NULL,
|
||||||
|
num_ctx INT NOT NULL,
|
||||||
|
repeat_penalty DECIMAL(5,3) NOT NULL,
|
||||||
|
keep_alive_seconds INT NOT NULL, -- 0 = unload immediately
|
||||||
|
is_active TINYINT(1) NOT NULL DEFAULT 1,
|
||||||
|
updated_by INT NULL, -- NULL = seed default
|
||||||
|
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
> - ค่า default seed จาก `docs/ai-profiles.md` ผ่าน delta SQL
|
||||||
|
> - Admin calibrate ผ่าน Admin Console → `UPDATE ai_execution_profiles SET ... WHERE profile_name = ?`
|
||||||
|
> - `AiPolicyService` อ่านค่าจาก table นี้ (Redis cache TTL 60s ตาม ADR-029 pattern)
|
||||||
|
> - `ON DUPLICATE KEY UPDATE profile_name = profile_name` — ป้องกัน overwrite ค่าที่ admin calibrate ไว้
|
||||||
|
|
||||||
|
### ai_audit_logs (extended columns)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Delta: specs/03-Data-and-Storage/deltas/2026-06-11-extend-ai-audit-logs-runtime-policy.sql
|
||||||
|
ALTER TABLE ai_audit_logs
|
||||||
|
ADD COLUMN effective_profile VARCHAR(50) NULL -- 'interactive'|'standard'|'quality'|'deep-analysis'
|
||||||
|
ADD COLUMN canonical_model VARCHAR(50) NULL -- 'np-dms-ai' | 'np-dms-ocr'
|
||||||
|
ADD COLUMN snapshot_params_json JSON NULL; -- { temperature, topP, maxTokens, numCtx, repeatPenalty, keepAliveSeconds }
|
||||||
|
```
|
||||||
|
|
||||||
|
> - `effective_profile` + `canonical_model` แทน legacy `ai_model` / `model_name` ที่มีชื่อ runtime tag
|
||||||
|
> - `snapshot_params_json` บันทึก parameters จริงที่ใช้ใน Ollama call (FR-A09) — ทำให้ audit traceability สมบูรณ์
|
||||||
|
> - columns เดิม (`ai_model`, `model_name`) ยังคงอยู่ (backward compat) — Feature-235 เขียน columns ใหม่เพิ่มเติม
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Python Types (OCR Sidecar)
|
## Python Types (OCR Sidecar)
|
||||||
|
|
||||||
### VramHeadroom (dataclass)
|
### VramHeadroom (dataclass)
|
||||||
|
|||||||
@@ -22,19 +22,19 @@
|
|||||||
|
|
||||||
### User Story 1 — Policy Contract & Canonical Naming (Priority: P1)
|
### User Story 1 — Policy Contract & Canonical Naming (Priority: P1)
|
||||||
|
|
||||||
นักพัฒนาและ admin ที่ส่ง AI job request ผ่าน AI Gateway จะส่งได้แค่ `executionProfile` (`fast | balanced | thai-accurate | large-context`) โดยไม่สามารถระบุชื่อ model หรือ override runtime parameters ได้เอง — system แสดงและบันทึก model ในทุก layer ด้วยชื่อ canonical `np-dms-ai` และ `np-dms-ocr` แทนชื่อ runtime เดิม
|
User ทั่วไปส่ง AI job request ผ่าน AI Gateway โดยระบุแค่ `job type` — ระบบ backend กำหนด execution policy (model, parameters) ทั้งหมดอัตโนมัติตาม job type โดยไม่มี caller input ใดๆ เกี่ยวกับ model หรือ profile — system แสดงและบันทึก model ในทุก layer ด้วยชื่อ canonical `np-dms-ai` และ `np-dms-ocr` แทนชื่อ runtime เดิม Admin/Superadmin สามารถดูและทดสอบ policy behavior ผ่าน Admin Console และ OCR Sandbox เท่านั้น
|
||||||
|
|
||||||
**Why this priority**: เป็นรากฐานของทุก workstream — ถ้า contract ยังเป็น caller-driven อยู่ workstream อื่นไม่มีความหมาย
|
**Why this priority**: เป็นรากฐานของทุก workstream — ถ้า contract ยังเป็น caller-driven อยู่ workstream อื่นไม่มีความหมาย
|
||||||
|
|
||||||
**Independent Test**: ยิง POST ไปยัง AI Gateway endpoint ด้วย payload ที่มี `model.key` หรือ `temperature` แล้วตรวจว่า API reject 400 พร้อม error message; ยิงด้วย `executionProfile: "balanced"` แล้วตรวจว่าผ่านและ log/response แสดง `np-dms-ai`
|
**Independent Test**: ยิง POST ไปยัง AI Gateway endpoint ด้วย `job type` เท่านั้น แล้วตรวจว่า response แสดง `modelUsed: "np-dms-ai"` และ audit log มี `effectiveProfile` ที่ถูกต้องตาม job type
|
||||||
|
|
||||||
**Acceptance Scenarios**:
|
**Acceptance Scenarios**:
|
||||||
|
|
||||||
1. **Given** AI job request ที่มี `model: { key: "typhoon2.5-np-dms:latest" }`, **When** ส่งไปยัง `POST /api/ai/jobs`, **Then** system ตอบ HTTP 400 พร้อมข้อความว่า field `model.key` ไม่อนุญาต
|
1. **Given** AI job request ที่มี `model: { key: "typhoon2.5-np-dms:latest" }` หรือ `executionProfile` field ใดๆ, **When** ส่งไปยัง `POST /api/ai/jobs`, **Then** system ตอบ HTTP 400 เพราะ fields เหล่านั้นไม่อนุญาต
|
||||||
2. **Given** AI job request ที่มี `executionProfile: "balanced"`, **When** job ถูก dispatch ไปยัง `ai-batch` queue, **Then** job payload บันทึก `modelUsed: "np-dms-ai"` ใน audit log
|
2. **Given** AI job request ที่มีแค่ `type: "rag-query"`, **When** job ถูก dispatch ไปยัง `ai-batch` queue, **Then** job payload บันทึก `modelUsed: "np-dms-ai"` และ `effectiveProfile` ที่ backend กำหนดให้ใน audit log
|
||||||
3. **Given** admin เปิด AI Admin Console, **When** ดู model information panel, **Then** แสดงชื่อ `np-dms-ai` และ `np-dms-ocr` ไม่ใช่ชื่อ runtime จริง (เช่น `typhoon2.5-np-dms:latest`)
|
3. **Given** admin เปิด AI Admin Console, **When** ดู model information panel, **Then** แสดงชื่อ `np-dms-ai` และ `np-dms-ocr` ไม่ใช่ชื่อ runtime จริง (เช่น `typhoon2.5-np-dms:latest`)
|
||||||
4. **Given** `auto-fill-document` job ถูกส่งมาพร้อม `executionProfile: "fast"`, **When** backend process job, **Then** backend override เป็น deterministic profile โดยไม่ใช้ค่า `fast` ที่ caller ส่งมา
|
4. **Given** `auto-fill-document` job ถูกส่งมา, **When** backend process job, **Then** backend กำหนด `effectiveProfile: "quality"` อัตโนมัติตาม job type โดยไม่รับ input จาก caller
|
||||||
5. **Given** `large-context` profile ถูกส่งโดย non-admin user, **When** backend validate, **Then** ตอบ HTTP 403 เพราะ profile นั้น restrict เฉพาะ admin/special workflows
|
5. **Given** admin เปิด OCR Sandbox, **When** ทดสอบ OCR job, **Then** สามารถดู `effectiveProfile` และ `modelUsed` ที่ระบบกำหนดให้ในผลลัพธ์
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -107,10 +107,12 @@ BullMQ queue ปรับให้ `ai-realtime` รองรับ concurrency
|
|||||||
### Edge Cases
|
### Edge Cases
|
||||||
|
|
||||||
- ถ้า VRAM headroom calculation service ล้มเหลว (timeout หรือ error) → ต้อง fallback เป็น `keep_alive: 0` เสมอ (safe default)
|
- ถ้า VRAM headroom calculation service ล้มเหลว (timeout หรือ error) → ต้อง fallback เป็น `keep_alive: 0` เสมอ (safe default)
|
||||||
- ถ้า caller ส่ง `executionProfile` ที่ไม่อยู่ใน canonical set → ตอบ 400 validation error
|
- ถ้า caller ส่ง `executionProfile` หรือ `model.*` fields มาใน payload → ตอบ 400 validation error ทันที (FR-A01)
|
||||||
- ถ้า `large-context` profile ถูก whitelist ให้ admin แต่ VRAM ไม่พอ → backend ต้อง reject พร้อม error ชัดเจน ไม่ใช่ silent fallback
|
- ถ้า `large-context` profile ถูก whitelist ให้ admin แต่ VRAM ไม่พอ → backend ต้อง reject พร้อม error ชัดเจน ไม่ใช่ silent fallback
|
||||||
- ถ้า OCR job เข้ามาพร้อมกับ main model generation job → LLM-First rule บังคับ: OCR ต้องรอหรือใช้ `keep_alive: 0`
|
- ถ้า OCR job เข้ามาพร้อมกับ main model generation job → LLM-First rule บังคับ: OCR ต้องรอหรือใช้ `keep_alive: 0`
|
||||||
- ถ้า `/embed` fallback ไป CPU แล้ว job ใช้เวลานานเกิน timeout → ต้อง return partial result หรือ error ที่ชัดเจน ไม่ใช่ hang
|
- ถ้า `/embed` fallback ไป CPU แล้ว job ใช้เวลานานเกิน timeout → ต้อง return partial result หรือ error ที่ชัดเจน ไม่ใช่ hang
|
||||||
|
- ถ้า `VramMonitorService` ทำงานผิดพลาดหลัง cutover (เช่น Ollama `/api/ps` schema เปลี่ยน) → ระบบยัง operate ได้ด้วย safe default (`keep_alive: 0`) — **ไม่มี rollback plan; policy คือ fix-forward เท่านั้น** ต้องแก้ไขจนสำเร็จ
|
||||||
|
- VRAM race condition ระหว่าง headroom snapshot กับ Ollama request arrival ถือว่ายอมรับได้ เนื่องจาก `np-dms-ai` VRAM usage ใน production ถูก manual test จนมั่นใจก่อน cutover แล้ว
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -120,19 +122,21 @@ BullMQ queue ปรับให้ `ai-realtime` รองรับ concurrency
|
|||||||
|
|
||||||
**Workstream A: Contract & Canonical Naming**
|
**Workstream A: Contract & Canonical Naming**
|
||||||
|
|
||||||
- **FR-A01**: System MUST reject AI job requests ที่มี `model.key` field ใน payload (HTTP 400)
|
- **FR-A01**: System MUST reject AI job requests ที่มี `model.key`, `executionProfile`, `temperature`, `top_p`, หรือ `maxTokens` field ใน payload (HTTP 400) — ไม่มี caller input ใดๆ เกี่ยวกับ model หรือ profile
|
||||||
- **FR-A02**: System MUST reject AI job requests ที่มี direct `temperature`, `top_p`, หรือ `maxTokens` overrides (HTTP 400)
|
- **FR-A02**: `CreateAiJobDto` MUST รับเฉพาะ `type`, `documentPublicId`, `attachmentPublicId` — ไม่มี profile หรือ model fields
|
||||||
- **FR-A03**: `executionProfile` MUST รับค่าได้เฉพาะ `fast | balanced | thai-accurate | large-context`
|
- **FR-A03**: Backend MUST กำหนด `effectiveProfile` อัตโนมัติจาก `job.type` ตาม policy mapping ใน `AiPolicyService`
|
||||||
- **FR-A04**: `large-context` profile MUST ถูก authorize เฉพาะ admin role หรือ backend-whitelisted workflows
|
- **FR-A04**: Admin/Superadmin ดูและทดสอบ policy behavior ได้ผ่าน Admin Console และ OCR Sandbox เท่านั้น — ไม่ผ่าน API payload; OCR Sandbox ใช้ `sandbox-analysis` job type ภายใน ซึ่ง map ไป `deep-analysis` profile สำหรับ long-context document testing
|
||||||
- **FR-A05**: System MUST map `executionProfile` → canonical model name และ runtime parameters ใน backend policy layer
|
- **FR-A05**: System MUST map `job.type` → `{ effectiveProfile, canonicalModel, runtimeParameters }` ใน backend policy layer
|
||||||
- **FR-A06**: งาน data-affecting (`migrate-document`, `auto-fill-document`) MUST ถูก backend override profile โดยไม่ใช้ค่าที่ caller ส่งมา
|
- **FR-A06**: ทุก job type MUST มี deterministic policy mapping — ไม่มี job type ใดที่ไม่มี default policy
|
||||||
- **FR-A07**: ทุก layer (API response, audit log, Admin Console, OCR Sandbox) MUST แสดงชื่อ `np-dms-ai` และ `np-dms-ocr` แทนชื่อ runtime จริง
|
- **FR-A07**: ทุก layer (API response, audit log, Admin Console, OCR Sandbox) MUST แสดงชื่อ `np-dms-ai` และ `np-dms-ocr` แทนชื่อ runtime จริง
|
||||||
|
- **FR-A08**: audit log MUST บันทึก `effectiveProfile` (ค่าที่ backend กำหนด) และ `modelUsed` (canonical name) — `requestedProfile` เสมอ `null` เพราะไม่มี caller input
|
||||||
|
- **FR-A09**: `AiPolicyService` MUST snapshot `{ temperature, topP, maxTokens, numCtx, repeatPenalty, keepAliveSeconds }` จาก `ai_execution_profiles` (DB/Redis) ณ เวลา dispatch แล้วฝังใน BullMQ job payload — worker ใช้ค่าจาก payload โดยตรง ไม่อ่าน DB อีกรอบ; ทำให้ทุก job predictable และ audit log ตรงกับ parameters ที่ใช้จริง
|
||||||
|
|
||||||
**Workstream B: Runtime Policy**
|
**Workstream B: Runtime Policy**
|
||||||
|
|
||||||
- **FR-B01**: Backend MUST มี policy mapping: `executionProfile` → `{ canonicalModel, keep_alive, temperature, top_p, maxTokens }`
|
- **FR-B01**: Backend MUST มี policy mapping: `executionProfile` → `{ canonicalModel, keep_alive, temperature, top_p, max_tokens, num_ctx, repeat_penalty }`; ค่า default ตาม `docs/ai-profiles.md`; ค่าจริง calibrate ได้ผ่าน Admin Console และบันทึกใน DB ตาม ADR-029
|
||||||
- **FR-B02**: OCR residency MUST คำนวณ `keep_alive` แบบ dynamic จาก VRAM headroom และ active profile
|
- **FR-B02**: OCR residency MUST คำนวณ `keep_alive` แบบ dynamic จาก VRAM headroom และ active profile
|
||||||
- **FR-B03**: ถ้า active profile = `large-context` หรือ main model pressure = high → OCR `keep_alive` MUST = `0`
|
- **FR-B03**: ถ้า active profile = `deep-analysis` หรือ main model pressure = high → OCR `keep_alive` MUST = `0` โดย "main model pressure สูง" นิยามว่า `np-dms-ai.size_vram` ใน Ollama `/api/ps` response > `GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB` (configurable env)
|
||||||
- **FR-B04**: ถ้า VRAM headroom ≥ policy threshold → OCR สามารถใช้ residency window > 0
|
- **FR-B04**: ถ้า VRAM headroom ≥ policy threshold → OCR สามารถใช้ residency window > 0
|
||||||
- **FR-B05**: VRAM headroom calculation ล้มเหลว → MUST fallback เป็น `keep_alive: 0` (safe default)
|
- **FR-B05**: VRAM headroom calculation ล้มเหลว → MUST fallback เป็น `keep_alive: 0` (safe default)
|
||||||
- **FR-B06**: OCR residency decision MUST ถูก log พร้อม headroom value ที่ใช้ตัดสิน
|
- **FR-B06**: OCR residency decision MUST ถูก log พร้อม headroom value ที่ใช้ตัดสิน
|
||||||
@@ -155,7 +159,7 @@ BullMQ queue ปรับให้ `ai-realtime` รองรับ concurrency
|
|||||||
|
|
||||||
### Key Entities
|
### Key Entities
|
||||||
|
|
||||||
- **ExecutionProfile**: Enum value ที่ caller ส่งมา (`fast | balanced | thai-accurate | large-context`) — contract ระดับ API
|
- **ExecutionProfile**: Enum value ที่ backend กำหนดภายใน (`interactive | standard | quality | deep-analysis`) — **ไม่ expose ใน public API** ใช้ภายใน policy layer และ audit log เท่านั้น; ค่า default กำหนดใน `docs/ai-profiles.md` และ calibrate ได้ผ่าน Admin Console (ADR-029)
|
||||||
- **RuntimePolicy**: Backend mapping จาก `ExecutionProfile` → `{ canonicalModel, keep_alive, temperature, top_p, maxTokens }` — ไม่ expose ใน API
|
- **RuntimePolicy**: Backend mapping จาก `ExecutionProfile` → `{ canonicalModel, keep_alive, temperature, top_p, maxTokens }` — ไม่ expose ใน API
|
||||||
- **VramHeadroom**: ค่า computed ณ เวลา request ที่ใช้ตัดสิน OCR residency และ retrieval acceleration — บันทึกใน log
|
- **VramHeadroom**: ค่า computed ณ เวลา request ที่ใช้ตัดสิน OCR residency และ retrieval acceleration — บันทึกใน log
|
||||||
- **CanonicalModelIdentity**: ชื่อ `np-dms-ai` หรือ `np-dms-ocr` — ใช้ทุกชั้นที่ผู้ใช้เห็น
|
- **CanonicalModelIdentity**: ชื่อ `np-dms-ai` หรือ `np-dms-ocr` — ใช้ทุกชั้นที่ผู้ใช้เห็น
|
||||||
@@ -182,6 +186,10 @@ BullMQ queue ปรับให้ `ai-realtime` รองรับ concurrency
|
|||||||
|
|
||||||
- Q: ถ้า `/embed` fallback ไป CPU แล้ว job ใช้เวลานานเกิน timeout → ควร return partial result หรือ return error ที่ชัดเจน? → A: Return error ที่ชัดเจนพร้อม HTTP 504 timeout message — ไม่ return partial result เพราะ downstream LLM context จะ incomplete และทำให้ผลลัพธ์ผิดพลาดโดยไม่รู้ตัว
|
- Q: ถ้า `/embed` fallback ไป CPU แล้ว job ใช้เวลานานเกิน timeout → ควร return partial result หรือ return error ที่ชัดเจน? → A: Return error ที่ชัดเจนพร้อม HTTP 504 timeout message — ไม่ return partial result เพราะ downstream LLM context จะ incomplete และทำให้ผลลัพธ์ผิดพลาดโดยไม่รู้ตัว
|
||||||
- Q: VRAM headroom threshold ระดับ spec ควรกำหนด default value ไหม? → A: ไม่กำหนดใน spec — threshold เป็น operational config (env variable `VRAM_HEADROOM_THRESHOLD_MB`) ที่ ops/admin ปรับได้ runtime; spec ระบุแค่ว่า "ต้องมี threshold ที่ configurable" และ "ต้องใช้ safe default = 0 (unload) เมื่อ query ล้มเหลว"
|
- Q: VRAM headroom threshold ระดับ spec ควรกำหนด default value ไหม? → A: ไม่กำหนดใน spec — threshold เป็น operational config (env variable `VRAM_HEADROOM_THRESHOLD_MB`) ที่ ops/admin ปรับได้ runtime; spec ระบุแค่ว่า "ต้องมี threshold ที่ configurable" และ "ต้องใช้ safe default = 0 (unload) เมื่อ query ล้มเหลว"
|
||||||
|
- Q: "main model pressure สูง" วัดอย่างไรในทางปฏิบัติ? → A: วัดจาก `np-dms-ai.size_vram` ใน Ollama `/api/ps` response เทียบกับ `GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB` (configurable env) — ไม่ใช้ Redis flag หรือ shared state ใหม่
|
||||||
|
- Q: Rollback plan สำหรับ big bang cutover คืออะไร? → A: ไม่มี rollback — policy คือ fix-forward เท่านั้น; ถ้า cutover มีปัญหาต้องแก้ไขจนสำเร็จ
|
||||||
|
- Q: audit log ควรบันทึก profile ที่ caller ส่งมา หรือ profile ที่ใช้จริงหลัง override? → A: บันทึกแค่ `effectiveProfile` และ `modelUsed` — `requestedProfile` เสมอ `null` เพราะ user ไม่ได้ส่ง profile มาเลย (backend กำหนดทั้งหมดจาก job type)
|
||||||
|
- Q: `executionProfile` ควรรับจาก caller ไหม? → A: ไม่ — backend กำหนดทั้งหมดจาก job type; user ทั่วไปไม่รู้จัก profile เลย; admin ทดสอบผ่าน Admin Console/OCR Sandbox เท่านั้น
|
||||||
|
|
||||||
## Assumptions
|
## Assumptions
|
||||||
|
|
||||||
@@ -189,5 +197,5 @@ BullMQ queue ปรับให้ `ai-realtime` รองรับ concurrency
|
|||||||
- VRAM headroom threshold ค่าเริ่มต้นจะถูกกำหนดใน config/env และปรับได้โดยไม่ต้อง redeploy
|
- VRAM headroom threshold ค่าเริ่มต้นจะถูกกำหนดใน config/env และปรับได้โดยไม่ต้อง redeploy
|
||||||
- Canonical model names (`np-dms-ai`, `np-dms-ocr`) ถูก tag ใน Ollama registry บน Desk-5439 ก่อน cutover
|
- Canonical model names (`np-dms-ai`, `np-dms-ocr`) ถูก tag ใน Ollama registry บน Desk-5439 ก่อน cutover
|
||||||
- OCR sidecar (`app.py`) บน Desk-5439 จะถูก update เป็นส่วนหนึ่งของ cutover
|
- OCR sidecar (`app.py`) บน Desk-5439 จะถูก update เป็นส่วนหนึ่งของ cutover
|
||||||
- Big bang rollout: ไม่มี parallel legacy path — ทุก change deploy พร้อมกันในรอบเดียว
|
- Big bang rollout: ไม่มี parallel legacy path — ทุก change deploy พร้อมกันในรอบเดียว; **ไม่มี rollback plan — fix-forward เท่านั้น**
|
||||||
- `ai-realtime` concurrency uplift เป็น configuration change ไม่ใช่ architectural change ใหม่
|
- `ai-realtime` concurrency uplift เป็น configuration change ไม่ใช่ architectural change ใหม่
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
// File: specs/200-fullstacks/235-ai-runtime-policy-refactor/tasks.md
|
// File: specs/200-fullstacks/235-ai-runtime-policy-refactor/tasks.md
|
||||||
// Change Log:
|
// Change Log:
|
||||||
// - 2026-06-11: Initial task list for AI Runtime Policy Refactor
|
// - 2026-06-11: Initial task list for AI Runtime Policy Refactor
|
||||||
|
// - 2026-06-11: เพิ่ม T040/T041 สำหรับ delta SQL (ai_execution_profiles, ai_audit_logs extension)
|
||||||
|
// - 2026-06-11: อัปเดต T001 (AiJobPayload, JobType), T005 (snapshot), T010 (snapshotParams)
|
||||||
|
|
||||||
# Tasks: AI Runtime Policy Refactor
|
# Tasks: AI Runtime Policy Refactor
|
||||||
|
|
||||||
@@ -18,10 +20,10 @@
|
|||||||
|
|
||||||
**Purpose**: สร้าง foundational types และ interfaces ก่อน workstream ทุกอัน
|
**Purpose**: สร้าง foundational types และ interfaces ก่อน workstream ทุกอัน
|
||||||
|
|
||||||
- [ ] T001 สร้าง interface file `backend/src/modules/ai/interfaces/execution-policy.interface.ts` (ExecutionProfile type, RuntimePolicy interface, VramHeadroom interface)
|
- [x] T001 สร้าง interface file `backend/src/modules/ai/interfaces/execution-policy.interface.ts` — `ExecutionProfile` type (`interactive|standard|quality|deep-analysis`), `PublicJobType`, `InternalJobType`, `RuntimePolicy`, `OcrRuntimePolicy`, `AiJobPayload` (snapshot params), `VramHeadroom`
|
||||||
- [ ] T002 สร้าง interface file `backend/src/modules/ai/interfaces/ocr-residency.interface.ts` (OcrResidencyDecision interface)
|
- [x] T002 สร้าง interface file `backend/src/modules/ai/interfaces/ocr-residency.interface.ts` (OcrResidencyDecision interface)
|
||||||
- [ ] T003 [P] สร้าง `backend/src/modules/ai/services/vram-monitor.service.ts` — query Ollama `/api/ps` เพื่อคำนวณ VRAM headroom
|
- [x] T003 [P] สร้าง `backend/src/modules/ai/services/vram-monitor.service.ts` — query Ollama `/api/ps` เพื่อคำนวณ VRAM headroom
|
||||||
- [ ] T004 [P] สร้าง `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/services/vram_monitor.py` — Python VRAM headroom query via Ollama `/api/ps`
|
- [x] T004 [P] สร้าง `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/services/vram_monitor.py` — Python VRAM headroom query via Ollama `/api/ps`
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -31,13 +33,15 @@
|
|||||||
|
|
||||||
**⚠️ CRITICAL**: No user story work can begin until this phase is complete
|
**⚠️ CRITICAL**: No user story work can begin until this phase is complete
|
||||||
|
|
||||||
- [ ] T005 สร้าง `backend/src/modules/ai/services/ai-policy.service.ts` — ExecutionProfile → RuntimePolicy mapping, canonical model name mapping, data-affecting job override logic
|
- [x] T040 [P] Apply delta SQL `specs/03-Data-and-Storage/deltas/2026-06-11-create-ai-execution-profiles.sql` — สร้าง table `ai_execution_profiles` + seed 4 profiles; ตรวจว่ามี row `interactive`, `standard`, `quality`, `deep-analysis` ใน DB (**MUST apply ก่อน** T005 อ่าน table นี้)
|
||||||
- [ ] T006 สร้าง `backend/src/modules/ai/guards/execution-profile.guard.ts` — CASL check: `large-context` เฉพาะ admin role
|
- [x] T041 [P] Apply delta SQL `specs/03-Data-and-Storage/deltas/2026-06-11-extend-ai-audit-logs-runtime-policy.sql` — เพิ่ม columns `effective_profile`, `canonical_model`, `snapshot_params_json` ใน `ai_audit_logs`; ตรวจด้วย `SHOW COLUMNS` (**MUST apply ก่อน** T010 เขียนลง columns เหล่านี้)
|
||||||
- [ ] T007 [P] แก้ `backend/src/modules/ai/dto/create-ai-job.dto.ts` — เอา `model.key` และ parameter override fields ออก, เพิ่ม `executionProfile?: ExecutionProfile` พร้อม class-validator
|
- [x] T005 สร้าง `backend/src/modules/ai/services/ai-policy.service.ts` — `InternalJobType` → `ExecutionProfile` mapping, อ่าน `ai_execution_profiles` จาก DB (Redis cache TTL 60s), snapshot `RuntimePolicy` parameters ลง `AiJobPayload` ตอน dispatch (FR-A09)
|
||||||
- [ ] T008 สร้าง `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/services/residency_policy.py` — OCR keep_alive calculation function
|
- [x] T006 ~~ลบออก~~ ExecutionProfileGuard ไม่จำเป็นแล้ว — ไม่มี caller input เลย (Option B) *skip task นี้*
|
||||||
- [ ] T009 แก้ `backend/src/modules/ai/ai.module.ts` — register `AiPolicyService`, `VramMonitorService`, `ExecutionProfileGuard`
|
- [x] T007 [P] แก้ `backend/src/modules/ai/dto/create-ai-job.dto.ts` — เอา `model.key`, `executionProfile`, `temperature`, `top_p`, `maxTokens` ออกทั้งหมด; เหลือเฉพาะ `type`, `documentPublicId`, `attachmentPublicId`; เพิ่ม `@IsForbidden()` validator หรือ forbidden field check ใน pipe
|
||||||
|
- [x] T008 สร้าง `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/services/residency_policy.py` — OCR keep_alive calculation function
|
||||||
|
- [x] T009 แก้ `backend/src/modules/ai/ai.module.ts` — register `AiPolicyService`, `VramMonitorService` (ลบ `ExecutionProfileGuard` ออก)
|
||||||
|
|
||||||
**Checkpoint**: Foundation ready — policy services, guard, and updated DTO available
|
**Checkpoint**: Foundation ready — delta SQL applied, policy services + updated DTO available
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -49,13 +53,13 @@
|
|||||||
|
|
||||||
### Implementation for User Story 1
|
### Implementation for User Story 1
|
||||||
|
|
||||||
- [ ] T010 [US1] แก้ `backend/src/modules/ai/ai.service.ts` — inject `AiPolicyService`, validate `executionProfile`, apply backend override สำหรับ `migrate-document` และ `auto-fill-document`, set `modelUsed` canonical name ใน audit log
|
- [x] T010 [US1] แก้ `backend/src/modules/ai/ai.service.ts` — inject `AiPolicyService`, กำหนด `effectiveProfile` อัตโนมัติจาก `job.type`, บันทึก `effectiveProfile` + `modelUsed` + `snapshotParams` ลง `ai_audit_logs` (FR-A08, FR-A09) — ไม่มี `requestedProfile` แล้ว
|
||||||
- [ ] T011 [P] [US1] แก้ `backend/src/modules/ai/dto/ai-job-response.dto.ts` — เพิ่ม `modelUsed: 'np-dms-ai' | 'np-dms-ocr'` field, เพิ่ม `executionProfile` field (effective profile หลัง override)
|
- [x] T011 [P] [US1] แก้ `backend/src/modules/ai/dto/ai-job-response.dto.ts` — เพิ่ม `modelUsed: 'np-dms-ai' | 'np-dms-ocr'` field, เพิ่ม `executionProfile` field (effective profile หลัง override)
|
||||||
- [ ] T012 [P] [US1] แก้ `backend/src/modules/ai/ai.controller.ts` — ใช้ `ExecutionProfileGuard` บน create-job endpoint, validate forbidden fields ใน pipe
|
- [x] T012 [P] [US1] แก้ `backend/src/modules/ai/ai.controller.ts` — validate forbidden fields (`model.*`, `executionProfile`, `temperature` ฯลฯ) ใน pipe — ไม่ต้อง guard แล้ว เพราะ DTO ทำไว้แล้ว
|
||||||
- [ ] T013 [P] [US1] แก้ `frontend/types/ai.ts` — เอา `model` field ออก, เพิ่ม `executionProfile?: ExecutionProfile`, เพิ่ม `modelUsed?: string`
|
- [x] T013 [P] [US1] แก้ `frontend/types/ai.ts` — เอา `model` field ออก, เพิ่ม `executionProfile?: ExecutionProfile`, เพิ่ม `modelUsed?: string`
|
||||||
- [ ] T014 [US1] แก้ `frontend/lib/services/admin-ai.service.ts` — update request/response types ให้สอดคล้องกับ DTO ใหม่
|
- [x] T014 [US1] แก้ `frontend/lib/services/admin-ai.service.ts` — update request/response types ให้สอดคล้องกับ DTO ใหม่
|
||||||
- [ ] T015 [P] [US1] แก้ `frontend/components/admin/ai/OcrSandboxPromptManager.tsx` — แสดง `np-dms-ai` / `np-dms-ocr` แทนชื่อ runtime ใน result cards และ model info
|
- [x] T015 [P] [US1] แก้ `frontend/components/admin/ai/OcrSandboxPromptManager.tsx` — แสดง `np-dms-ai` / `np-dms-ocr` แทนชื่อ runtime ใน result cards และ model info
|
||||||
- [ ] T016 [US1] แก้ `frontend/app/(admin)/admin/ai/page.tsx` — แสดง canonical names ใน System Health panel และ model status cards
|
- [x] T016 [US1] แก้ `frontend/app/(admin)/admin/ai/page.tsx` — แสดง canonical names ใน System Health panel และ model status cards
|
||||||
|
|
||||||
**Checkpoint**: US1 fully functional — policy contract enforced, canonical naming in all layers
|
**Checkpoint**: US1 fully functional — policy contract enforced, canonical naming in all layers
|
||||||
|
|
||||||
@@ -69,10 +73,10 @@
|
|||||||
|
|
||||||
### Implementation for User Story 2
|
### Implementation for User Story 2
|
||||||
|
|
||||||
- [ ] T017 [US2] แก้ `backend/src/modules/ai/services/ocr.service.ts` — inject `VramMonitorService` และ `AiPolicyService`, เพิ่ม `calculateOcrResidency()` method, ส่ง `keep_alive` ที่คำนวณได้ไปใน OCR sidecar request, log `OcrResidencyDecision`
|
- [x] T017 [US2] แก้ `backend/src/modules/ai/services/ocr.service.ts` — inject `VramMonitorService` และ `AiPolicyService`, เพิ่ม `calculateOcrResidency()` method, ส่ง `keep_alive` ที่คำนวณได้ไปใน OCR sidecar request, log `OcrResidencyDecision`
|
||||||
- [ ] T018 [P] [US2] แก้ `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py` — รับ `keep_alive` parameter จาก request body แทน hardcode `keep_alive=0`, ส่ง `keep_alive` ค่านั้นไปใน Ollama `/v1/chat/completions` call
|
- [x] T018 [P] [US2] แก้ `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py` — รับ `keep_alive` parameter จาก request body แทน hardcode `keep_alive=0`, ส่ง `keep_alive` ค่านั้นไปใน Ollama `/v1/chat/completions` call
|
||||||
- [ ] T019 [P] [US2] เพิ่ม env variables ใน docker-compose ของ Desk-5439 OCR sidecar — `VRAM_HEADROOM_THRESHOLD_MB`, `OCR_RESIDENCY_WINDOW_SECONDS`, `GPU_TOTAL_VRAM_MB`
|
- [x] T019 [P] [US2] เพิ่ม env variables ใน docker-compose ของ Desk-5439 OCR sidecar — `VRAM_HEADROOM_THRESHOLD_MB`, `OCR_RESIDENCY_WINDOW_SECONDS`, `GPU_TOTAL_VRAM_MB`
|
||||||
- [ ] T020 [US2] เพิ่ม unit tests `backend/src/modules/ai/tests/ocr-residency.spec.ts` — scenarios: large-context-active, high-pressure, headroom-sufficient, query-failed fallback
|
- [x] T020 [US2] เพิ่ม unit tests `backend/src/modules/ai/tests/ocr-residency.spec.ts` — scenarios: large-context-active, high-pressure, headroom-sufficient, query-failed fallback
|
||||||
|
|
||||||
**Checkpoint**: US2 functional — OCR keep_alive computed dynamically per policy
|
**Checkpoint**: US2 functional — OCR keep_alive computed dynamically per policy
|
||||||
|
|
||||||
@@ -86,10 +90,10 @@
|
|||||||
|
|
||||||
### Implementation for User Story 3
|
### Implementation for User Story 3
|
||||||
|
|
||||||
- [ ] T021 [P] [US3] แก้ `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py` — เพิ่ม VRAM headroom check ใน `POST /embed` endpoint; ถ้าผ่าน threshold ใช้ GPU, ถ้าไม่ผ่านหรือ query ล้มเหลว ใช้ CPU; log `device` และ `reason`
|
- [x] T021 [P] [US3] แก้ `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py` — เพิ่ม VRAM headroom check ใน `POST /embed` endpoint; ถ้าผ่าน threshold ใช้ GPU, ถ้าไม่ผ่านหรือ query ล้มเหลว ใช้ CPU; log `device` และ `reason`
|
||||||
- [ ] T022 [P] [US3] แก้ `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py` — เพิ่ม VRAM headroom check ใน `POST /rerank` endpoint; CPU fallback logic เหมือน `/embed`; เพิ่ม timeout guard (504 response ถ้า CPU timeout)
|
- [x] T022 [P] [US3] แก้ `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py` — เพิ่ม VRAM headroom check ใน `POST /rerank` endpoint; CPU fallback logic เหมือน `/embed`; เพิ่ม timeout guard (504 response ถ้า CPU timeout)
|
||||||
- [ ] T023 [US3] แก้ `backend/src/modules/ai/processors/ai-batch.processor.ts` — รอง handle กรณีที่ `/embed` หรือ `/rerank` ตอบ `device: "cpu"` ใน response; log `retrievalDevice` ลง ai_audit_logs metadata
|
- [x] T023 [US3] แก้ `backend/src/modules/ai/processors/ai-batch.processor.ts` — รอง handle กรณีที่ `/embed` หรือ `/rerank` ตอบ `device: "cpu"` ใน response; log `retrievalDevice` ลง ai_audit_logs metadata
|
||||||
- [ ] T024 [P] [US3] สร้าง `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/tests/test_retrieval_fallback.py` — pytest tests สำหรับ CPU fallback behavior ของ `/embed` และ `/rerank`
|
- [x] T024 [P] [US3] สร้าง `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/tests/test_retrieval_fallback.py` — pytest tests สำหรับ CPU fallback behavior ของ `/embed` และ `/rerank`
|
||||||
|
|
||||||
**Checkpoint**: US3 functional — retrieval never hard-fails due to GPU pressure
|
**Checkpoint**: US3 functional — retrieval never hard-fails due to GPU pressure
|
||||||
|
|
||||||
@@ -103,10 +107,10 @@
|
|||||||
|
|
||||||
### Implementation for User Story 4
|
### Implementation for User Story 4
|
||||||
|
|
||||||
- [ ] T025 [US4] แก้ `backend/src/config/bullmq.config.ts` — เพิ่ม `REALTIME_CONCURRENCY` env variable (default: 2); ปรับ `ai-realtime` worker concurrency ให้ configurable
|
- [x] T025 [US4] แก้ `backend/src/config/bullmq.config.ts` — เพิ่ม `REALTIME_CONCURRENCY` env variable (default: 2); ปรับ `ai-realtime` worker concurrency ให้ configurable
|
||||||
- [ ] T026 [US4] แก้ `backend/src/modules/ai/processors/ai-realtime.processor.ts` — เพิ่ม job type classification: `LIGHTWEIGHT_REALTIME_JOBS = ['intent-classify', 'tool-suggest']`; generation-heavy jobs ถูก redirect ไป `ai-batch` ถ้าเข้ามาผิด queue; เพิ่ม log สำหรับ classification decision
|
- [x] T026 [US4] แก้ `backend/src/modules/ai/processors/ai-realtime.processor.ts` — เพิ่ม job type classification: `LIGHTWEIGHT_REALTIME_JOBS = ['intent-classify', 'tool-suggest']`; generation-heavy jobs ถูก redirect ไป `ai-batch` ถ้าเข้ามาผิด queue; เพิ่ม log สำหรับ classification decision
|
||||||
- [ ] T027 [P] [US4] ตรวจสอบ `backend/src/modules/ai/ai.service.ts` — ยืนยันว่า `rag-query` ถูก dispatch ไป `ai-batch` เสมอ (ไม่ใช่ `ai-realtime`); เพิ่ม explicit assertion ใน dispatch logic
|
- [x] T027 [P] [US4] ตรวจสอบ `backend/src/modules/ai/ai.service.ts` — ยืนยันว่า `rag-query` ถูก dispatch ไป `ai-batch` เสมอ (ไม่ใช่ `ai-realtime`); เพิ่ม explicit assertion ใน dispatch logic
|
||||||
- [ ] T028 [P] [US4] เพิ่ม unit tests `backend/src/modules/ai/tests/queue-policy.spec.ts` — ทดสอบ job classification, rag-query routing, lightweight job concurrency
|
- [x] T028 [P] [US4] เพิ่ม unit tests `backend/src/modules/ai/tests/queue-policy.spec.ts` — ทดสอบ job classification, rag-query routing, lightweight job concurrency
|
||||||
|
|
||||||
**Checkpoint**: US4 functional — selective concurrency active, rag-query always in ai-batch
|
**Checkpoint**: US4 functional — selective concurrency active, rag-query always in ai-batch
|
||||||
|
|
||||||
@@ -120,12 +124,12 @@
|
|||||||
|
|
||||||
### Implementation for User Story 5
|
### Implementation for User Story 5
|
||||||
|
|
||||||
- [ ] T029 [US5] สร้าง `backend/src/modules/ai/tests/ai-policy.service.spec.ts` — unit tests ครอบ: profile mapping ทุก 4 values, canonical name mapping, data-affecting override, `large-context` guard validation
|
- [x] T029 [US5] สร้าง `backend/src/modules/ai/tests/ai-policy.service.spec.ts` — unit tests ครอบ: `job.type` → `effectiveProfile` mapping ทุก job type, canonical name mapping, forbidden fields rejection (400), audit log มี `effectiveProfile` + `modelUsed` และไม่มี `requestedProfile` (FR-A08)
|
||||||
- [ ] T030 [P] [US5] สร้าง `backend/src/modules/ai/tests/execution-profile.guard.spec.ts` — unit tests: admin passes, non-admin blocked, missing token blocked
|
- [x] T030 [US5] ~~ExecutionProfileGuard tests — skip~~ แทนที่: เพิ่ม integration test สำหรับ forbidden fields validation ใน `ai.controller.spec.ts` — ตรวจว่า `model.*` และ `executionProfile` ใน payload → 400
|
||||||
- [ ] T031 [P] [US5] สร้าง `backend/src/modules/ai/tests/vram-monitor.service.spec.ts` — unit tests: successful query, Ollama timeout fallback, empty models response
|
- [x] T031 [P] [US5] สร้าง `backend/src/modules/ai/tests/vram-monitor.service.spec.ts` — unit tests: successful query, Ollama timeout fallback, empty models response
|
||||||
- [ ] T032 [US5] ทดสอบ manual validation ตาม `quickstart.md` — รัน curl commands ทั้ง Gate 1–4, ตรวจ Admin Console labels, ตรวจ OCR Sandbox behavior; บันทึกผลใน checklist
|
- [ ] T032 [US5] ทดสอบ manual validation ตาม `quickstart.md` — รัน curl commands ทั้ง Gate 1–4, ตรวจ Admin Console labels, ตรวจ OCR Sandbox behavior; บันทึกผลใน checklist
|
||||||
- [ ] T033 [P] [US5] อัปเดต env template ไฟล์ `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/.env.template` — เพิ่ม `VRAM_HEADROOM_THRESHOLD_MB`, `OCR_RESIDENCY_WINDOW_SECONDS`, `GPU_TOTAL_VRAM_MB`, `REALTIME_CONCURRENCY`
|
- [x] T033 [P] [US5] อัปเดต env template ไฟล์ `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/.env.template` — เพิ่ม `VRAM_HEADROOM_THRESHOLD_MB`, `OCR_RESIDENCY_WINDOW_SECONDS`, `GPU_TOTAL_VRAM_MB`, `GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB`, `REALTIME_CONCURRENCY`
|
||||||
- [ ] T034 [P] [US5] อัปเดต `backend/.env.example` — เพิ่ม `AI_VRAM_HEADROOM_THRESHOLD_MB`, `AI_REALTIME_CONCURRENCY`
|
- [x] T034 [P] [US5] อัปเดต `backend/.env.example` — เพิ่ม `AI_VRAM_HEADROOM_THRESHOLD_MB`, `AI_GPU_MAIN_MODEL_PRESSURE_THRESHOLD_MB`, `AI_OCR_RESIDENCY_WINDOW_SECONDS`, `AI_REALTIME_CONCURRENCY`
|
||||||
|
|
||||||
**Checkpoint**: All 5 user stories complete — big bang cutover gate ready for validation
|
**Checkpoint**: All 5 user stories complete — big bang cutover gate ready for validation
|
||||||
|
|
||||||
@@ -133,11 +137,11 @@
|
|||||||
|
|
||||||
## Phase 8: Polish & Cross-Cutting Concerns
|
## Phase 8: Polish & Cross-Cutting Concerns
|
||||||
|
|
||||||
- [ ] T039 [US1] แก้ `backend/src/modules/ai/processors/ai-batch.processor.ts` — เปลี่ยน `ocrUsed` label value จาก `"Typhoon OCR"` / `"PaddleOCR"` เป็น `"np-dms-ocr"` ใน Redis completed result (ครอบคลุม FR-A07: canonical names ทุก layer รวมถึง OCR Sandbox badge)
|
- [x] T039 [US1] แก้ `backend/src/modules/ai/processors/ai-batch.processor.ts` — เปลี่ยน `ocrUsed` label value จาก `"Typhoon OCR"` / `"PaddleOCR"` เป็น `"np-dms-ocr"` ใน Redis completed result (ครอบคลุม FR-A07: canonical names ทุก layer รวมถึง OCR Sandbox badge) — verified: engineUsed ค่า canonical แล้ว (`typhoon-np-dms-ocr`, `tesseract`, `fast-path`); frontend badge แสดง `np-dms-ocr` ถูกต้อง
|
||||||
- [ ] T035 [P] ตรวจสอบ i18n keys ที่ต้องเพิ่มใน `frontend/public/locales/` สำหรับ error messages ใหม่ (400 model.key, 403 large-context, 504 CPU timeout)
|
- [x] T035 [P] ตรวจสอบ i18n keys ที่ต้องเพิ่มใน `frontend/public/locales/` สำหรับ error messages ใหม่ (400 model.key, 403 large-context, 504 CPU timeout) — เพิ่ม `ai_runtime_policy` namespace ใน en/ai.json และ th/ai.json
|
||||||
- [ ] T036 อัปเดต CONTEXT.md และ AGENTS.md — เพิ่ม `np-dms-ai` / `np-dms-ocr` เป็น canonical identity ใน System readiness summary; แก้ references เดิมที่ยังใช้ชื่อ runtime
|
- [x] T036 อัปเดต CONTEXT.md และ AGENTS.md — เพิ่ม `np-dms-ai` / `np-dms-ocr` เป็น canonical identity ใน System readiness summary; เพิ่ม ADR-034 ใน ADRs table
|
||||||
- [ ] T037 [P] ตรวจสอบ ADR-034 references ทั้งหมดใน codebase ด้วย search — ไฟล์ไหนยังใช้ `typhoon2.5-np-dms:latest` หรือ `typhoon-np-dms-ocr:latest` ใน user-facing surfaces (ไม่ใช่ Modelfile/ops internals)
|
- [x] T037 [P] ตรวจสอบ ADR-034 references ทั้งหมดใน codebase ด้วย search — ไม่พบ `typhoon*:latest` ใน user-facing surfaces (frontend TS/TSX); พบใน ops internals (ollama.service.ts, ai-settings.service.ts, test files) ซึ่งถูกต้องตามนโยบาย
|
||||||
- [ ] T038 รัน `pnpm lint` และ `pnpm type-check` สำหรับ backend และ frontend — แก้ทุก error ก่อน cutover
|
- [x] T038 รัน `pnpm lint` และ `pnpm type-check` สำหรับ backend และ frontend — แก้ทุก error ก่อน cutover — ESLint + tsc --noEmit ผ่านครบ ไม่มี error
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -146,7 +150,7 @@
|
|||||||
### Phase Dependencies
|
### Phase Dependencies
|
||||||
|
|
||||||
- **Setup (Phase 1)**: ไม่มี dependency — เริ่มได้ทันที
|
- **Setup (Phase 1)**: ไม่มี dependency — เริ่มได้ทันที
|
||||||
- **Foundational (Phase 2)**: ต้องรอ Phase 1 (T001, T002) — BLOCKS ทุก user story
|
- **Foundational (Phase 2)**: ต้องรอ Phase 1 (T001, T002) — BLOCKS ทุก user story; **T040/T041 (delta SQL) MUST apply ก่อน** T005 และ T010
|
||||||
- **US1 (Phase 3)**: ต้องรอ Phase 2 complete — สำคัญสุด, ทำก่อน
|
- **US1 (Phase 3)**: ต้องรอ Phase 2 complete — สำคัญสุด, ทำก่อน
|
||||||
- **US2 (Phase 4)**: ต้องรอ Phase 2 complete — ขึ้นกับ `VramMonitorService` จาก T003
|
- **US2 (Phase 4)**: ต้องรอ Phase 2 complete — ขึ้นกับ `VramMonitorService` จาก T003
|
||||||
- **US3 (Phase 5)**: ต้องรอ Phase 2 complete — ขึ้นกับ `vram_monitor.py` จาก T004
|
- **US3 (Phase 5)**: ต้องรอ Phase 2 complete — ขึ้นกับ `vram_monitor.py` จาก T004
|
||||||
@@ -166,7 +170,8 @@
|
|||||||
|
|
||||||
- T001 + T002: parallel (different files)
|
- T001 + T002: parallel (different files)
|
||||||
- T003 + T004: parallel (different stacks)
|
- T003 + T004: parallel (different stacks)
|
||||||
- T005, T006, T007: T005 ทำก่อน (T006, T007 ขึ้นกับ types จาก T005)
|
- T040 + T041: parallel (different tables) — ต้องรอ Phase 1 และ MUST apply ก่อน T005/T010
|
||||||
|
- T005, T006, T007: T005 ทำก่อน (T006, T007 ขึ้นกับ types จาก T005); T040 ต้อง complete ก่อน T005
|
||||||
- US1 + US2 + US3 + US4: parallel หลัง Phase 2 complete (ถ้ามีทีม)
|
- US1 + US2 + US3 + US4: parallel หลัง Phase 2 complete (ถ้ามีทีม)
|
||||||
- T029, T030, T031, T033, T034: parallel (different test files / env files)
|
- T029, T030, T031, T033, T034: parallel (different test files / env files)
|
||||||
|
|
||||||
@@ -193,12 +198,12 @@
|
|||||||
|
|
||||||
### Total Task Count
|
### Total Task Count
|
||||||
|
|
||||||
- **Total**: 39 tasks
|
- **Total**: 41 tasks
|
||||||
- **US1**: 7 tasks (T010–T016)
|
- **US1**: 7 tasks (T010–T016)
|
||||||
- **US2**: 4 tasks (T017–T020)
|
- **US2**: 4 tasks (T017–T020)
|
||||||
- **US3**: 4 tasks (T021–T024)
|
- **US3**: 4 tasks (T021–T024)
|
||||||
- **US4**: 4 tasks (T025–T028)
|
- **US4**: 4 tasks (T025–T028)
|
||||||
- **US5**: 6 tasks (T029–T034)
|
- **US5**: 6 tasks (T029–T034)
|
||||||
- **Setup**: 4 tasks (T001–T004)
|
- **Setup**: 4 tasks (T001–T004)
|
||||||
- **Foundational**: 5 tasks (T005–T009)
|
- **Foundational**: 7 tasks (T040, T041, T005–T009)
|
||||||
- **Polish**: 4 tasks (T035–T038)
|
- **Polish**: 4 tasks (T035–T038)
|
||||||
|
|||||||
@@ -0,0 +1,126 @@
|
|||||||
|
// File: specs/200-fullstacks/235-ai-runtime-policy-refactor/validation-report.md
|
||||||
|
// Change Log:
|
||||||
|
// - 2026-06-11: Initial validation report for feature 235
|
||||||
|
|
||||||
|
# Validation Report: AI Runtime Policy Refactor
|
||||||
|
|
||||||
|
**Date**: 2026-06-11
|
||||||
|
**Feature**: `235-ai-runtime-policy-refactor`
|
||||||
|
**Status**: PARTIAL
|
||||||
|
|
||||||
|
## Coverage Summary
|
||||||
|
|
||||||
|
| Metric | Count | Percentage |
|
||||||
|
| --- | ---: | ---: |
|
||||||
|
| Requirements Covered | 22/25 | 88% |
|
||||||
|
| Acceptance Criteria Met | 14/19 | 74% |
|
||||||
|
| Edge Cases Handled | 6/7 | 86% |
|
||||||
|
| Tests Present | 18/25 | 72% |
|
||||||
|
|
||||||
|
## What Was Validated
|
||||||
|
|
||||||
|
- Workstream A evidence found in backend DTO/service/response contract and tests:
|
||||||
|
[create-ai-job.dto.ts](./backend/src/modules/ai/dto/create-ai-job.dto.ts),
|
||||||
|
[ai-job-response.dto.ts](./backend/src/modules/ai/dto/ai-job-response.dto.ts),
|
||||||
|
[ai.service.ts](./backend/src/modules/ai/ai.service.ts),
|
||||||
|
[ai.controller.spec.ts](./backend/src/modules/ai/tests/ai.controller.spec.ts),
|
||||||
|
[ai-policy.service.spec.ts](./backend/src/modules/ai/tests/ai-policy.service.spec.ts),
|
||||||
|
[ai.service.spec.ts](./backend/src/modules/ai/ai.service.spec.ts)
|
||||||
|
- Workstream B evidence found in:
|
||||||
|
[ocr.service.ts](./backend/src/modules/ai/services/ocr.service.ts),
|
||||||
|
[vram-monitor.service.ts](./backend/src/modules/ai/services/vram-monitor.service.ts),
|
||||||
|
[ocr-residency.spec.ts](./backend/src/modules/ai/tests/ocr-residency.spec.ts),
|
||||||
|
[vram-monitor.service.spec.ts](./backend/src/modules/ai/tests/vram-monitor.service.spec.ts),
|
||||||
|
[residency_policy.py](./specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/services/residency_policy.py)
|
||||||
|
- Workstream C evidence found in:
|
||||||
|
[app.py](./specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py),
|
||||||
|
[ai-batch.processor.ts](./backend/src/modules/ai/processors/ai-batch.processor.ts),
|
||||||
|
[test_retrieval_fallback.py](./specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/tests/test_retrieval_fallback.py)
|
||||||
|
- Workstream D evidence found in:
|
||||||
|
[bullmq.config.ts](./backend/src/config/bullmq.config.ts),
|
||||||
|
[ai-realtime.processor.ts](./backend/src/modules/ai/processors/ai-realtime.processor.ts),
|
||||||
|
[queue-policy.spec.ts](./backend/src/modules/ai/tests/queue-policy.spec.ts)
|
||||||
|
- User-facing canonical naming evidence found in:
|
||||||
|
[page.tsx](./frontend/app/(admin)/admin/ai/page.tsx),
|
||||||
|
[OcrSandboxPromptManager.tsx](./frontend/components/admin/ai/OcrSandboxPromptManager.tsx),
|
||||||
|
[admin-ai.service.ts](./frontend/lib/services/admin-ai.service.ts)
|
||||||
|
|
||||||
|
## Requirement Matrix
|
||||||
|
|
||||||
|
| Requirement | Status | Evidence | Notes |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| FR-A01 | Covered | DTO forbidden fields + controller integration tests | HTTP 400 path implemented |
|
||||||
|
| FR-A02 | Partial | DTO still accepts `payload` and `projectPublicId` | Spec text conflicts with rag-query/query + tenant isolation contract |
|
||||||
|
| FR-A03 | Covered | `AiPolicyService.getProfileForJobType()` + `AiService.submitUnifiedJob()` | Backend assigns profile from job type |
|
||||||
|
| FR-A04 | Covered | Admin Console + OCR Sandbox UI | Visibility exists in UI; enforcement is by contract removal, not separate guard |
|
||||||
|
| FR-A05 | Covered | `AiPolicyService.createJobPayload()` | Mapping includes profile, canonical model, snapshot params |
|
||||||
|
| FR-A06 | Covered | deterministic switch in `getProfileForJobType()` | No unmapped internal job type found |
|
||||||
|
| FR-A07 | Covered | backend DTOs, frontend normalization, sandbox badge mapping | Canonical labels present across layers inspected |
|
||||||
|
| FR-A08 | Covered | worker audit writes `effectiveProfile`, `canonicalModel`, `snapshotParamsJson` | enqueue-time false success log removed |
|
||||||
|
| FR-A09 | Covered | `createJobPayload()` snapshot + worker uses payload snapshot | Predictable per-dispatch parameters |
|
||||||
|
| FR-B01 | Covered | `AiPolicyService` default policy map + DB/cache lookup | Runtime policy layer exists |
|
||||||
|
| FR-B02 | Covered | `OcrService.calculateOcrResidency()` | Dynamic keep_alive decision implemented |
|
||||||
|
| FR-B03 | Covered | deep-analysis/high-pressure branches + residency tests | Safe OCR unload path exists |
|
||||||
|
| FR-B04 | Covered | residency window branch + tests | Positive keep_alive path exists |
|
||||||
|
| FR-B05 | Covered | VRAM query failure fallback + tests | Safe default `keep_alive=0` exists |
|
||||||
|
| FR-B06 | Covered | `OcrService` logs decision context | Log behavior implemented, not live-verified |
|
||||||
|
| FR-C01 | Covered | `/embed` headroom check + CPU fallback | Sidecar code present |
|
||||||
|
| FR-C02 | Covered | `/rerank` headroom check + CPU fallback | Sidecar code present |
|
||||||
|
| FR-C03 | Covered | `/embed` + `/rerank` timeout -> HTTP 504 | No partial result path found |
|
||||||
|
| FR-C04 | Covered | device/reason logging in sidecar | Log behavior implemented |
|
||||||
|
| FR-C05 | Partial | `rag-query` backend path exists | No executed integration/manual proof that fallback path completes end-to-end |
|
||||||
|
| FR-C06 | Covered | env threshold usage + safe default in VRAM query failure | Configurable threshold present |
|
||||||
|
| FR-D01 | Partial | config default=2 + processor logic + unit tests | No live worker concurrency proof beyond unit tests |
|
||||||
|
| FR-D02 | Covered | lightweight job classification list | Matches spec set |
|
||||||
|
| FR-D03 | Covered | `AiService.submitUnifiedJob()` + realtime redirect tests | `rag-query` stays in `ai-batch` |
|
||||||
|
| FR-D04 | Covered | active-job counter + queue policy tests | Resume now waits for all realtime jobs |
|
||||||
|
|
||||||
|
## Acceptance Criteria Gaps
|
||||||
|
|
||||||
|
| Scenario | Status | Notes |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| US1-3 Admin Console shows canonical names only | Partial | Code supports it, but no manual browser validation recorded |
|
||||||
|
| US1-5 OCR Sandbox reveals effective profile/modelUsed | Partial | UI/service evidence exists, but no executed sandbox validation record |
|
||||||
|
| US2-4 OCR logs residency decision with headroom | Partial | Logging code exists; no captured runtime log artifact |
|
||||||
|
| US3-4 RAG still answers under CPU fallback | Partial | Code path exists; no completed end-to-end run |
|
||||||
|
| US5-1 executable cutover gate | Partial | backend targeted tests passed, but sidecar pytest was not executed in this validation pass |
|
||||||
|
| US5-2 Admin Console labels manual check | Missing | T032 still unchecked |
|
||||||
|
| US5-3 OCR Sandbox behavior across headroom scenarios | Missing | T032 still unchecked |
|
||||||
|
|
||||||
|
## Edge Case Review
|
||||||
|
|
||||||
|
| Edge Case | Status | Notes |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| VRAM query failure -> `keep_alive: 0` | Handled | explicit safe default in backend + sidecar |
|
||||||
|
| caller sends forbidden profile/model fields | Handled | DTO/controller tests cover this |
|
||||||
|
| admin-only large-context when VRAM insufficient | Partial | spec branch is stale after contract removal; no current caller path exists |
|
||||||
|
| OCR job races with main model generation | Handled | high-pressure/deep-analysis path forces unload |
|
||||||
|
| CPU fallback timeout must fail clearly | Handled | 504 implemented |
|
||||||
|
| Ollama `/api/ps` schema drift after cutover | Handled | safe default `available=0` path exists |
|
||||||
|
| headroom snapshot/request race acceptable | Handled | implementation follows spec assumption; no stronger synchronization introduced |
|
||||||
|
|
||||||
|
## Success Criteria Notes
|
||||||
|
|
||||||
|
| Success Criterion | Status | Notes |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| SC-001 | Likely Met | automated rejection tests exist |
|
||||||
|
| SC-002 | Partial | code normalization exists; no full manual surface sweep attached |
|
||||||
|
| SC-003 | Not Validated | no latency measurement artifact |
|
||||||
|
| SC-004 | Partial | fallback code exists; no executed end-to-end proof |
|
||||||
|
| SC-005 | Partial | backend tests executed, sidecar pytest/manual cutover not completed |
|
||||||
|
| SC-006 | Partial | concurrency config + unit tests exist, no throughput measurement |
|
||||||
|
|
||||||
|
## Key Findings
|
||||||
|
|
||||||
|
1. Implementation is broadly aligned with the runtime-policy refactor design, especially on policy mapping, canonical naming, adaptive OCR residency, retrieval CPU fallback, and queue pause/resume correctness.
|
||||||
|
2. Validation cannot be promoted to `PASS` yet because the feature still lacks the manual Gate 1–4 evidence from [quickstart.md](./quickstart.md) and this pass did not execute the Python sidecar pytest suite.
|
||||||
|
3. The spec artifact set contains one material inconsistency: FR-A02 says `CreateAiJobDto` should only expose `type`, `documentPublicId`, and `attachmentPublicId`, but the same spec and implemented contract require `payload.query` and `projectPublicId` for `rag-query`. The code follows the richer contract, not the literal FR-A02 text.
|
||||||
|
4. [quickstart.md](./quickstart.md) is stale against the implemented Option B contract in at least Gate 1C, 1D, and 4A because it still sends `executionProfile` / `large-context` style caller input that the new DTO now forbids.
|
||||||
|
|
||||||
|
## Recommendations
|
||||||
|
|
||||||
|
1. Complete T032 by running the manual Gate 1–4 flow on a real backend + OCR sidecar environment and append the captured results to this feature folder.
|
||||||
|
2. Run `pytest specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/tests -v` once the sidecar environment is ready, then update this report with the result.
|
||||||
|
3. Reconcile FR-A02 and `quickstart.md` with the actual Option B contract so the validation target and operator guide no longer contradict the implementation.
|
||||||
|
4. Add one end-to-end proof for FR-C05/SC-004: force GPU pressure, submit `rag-query`, and capture both successful response and sidecar `device=cpu` log.
|
||||||
|
5. Add one concurrency-focused execution proof for FR-D01/SC-006 if the team wants `PASS` to include runtime throughput evidence rather than unit-level proof only.
|
||||||
@@ -16,4 +16,5 @@
|
|||||||
| 2026-06-05 | v1.9.8 | RAG Pipeline Enhancements (Spec 234 / ADR-035) — BGE-M3 + BGE-Reranker + Hybrid Qdrant (Session 14/15) | ✅ Complete |
|
| 2026-06-05 | v1.9.8 | RAG Pipeline Enhancements (Spec 234 / ADR-035) — BGE-M3 + BGE-Reranker + Hybrid Qdrant (Session 14/15) | ✅ Complete |
|
||||||
| 2026-06-06 | v1.9.9 | LLM JSON Parse Failure & VRAM Fix (ADR-035-135) — retry logic + keep_alive=0 + ESLint heap fix | ✅ Complete |
|
| 2026-06-06 | v1.9.9 | LLM JSON Parse Failure & VRAM Fix (ADR-035-135) — retry logic + keep_alive=0 + ESLint heap fix | ✅ Complete |
|
||||||
| 2026-06-08 | v1.9.10 | LLM JSON Response Truncation Fix — ขยาย num_ctx: 16384 (Session 16 โดย AGY Gemini 3.5 Flash (Medium)) | ✅ Complete |
|
| 2026-06-08 | v1.9.10 | LLM JSON Response Truncation Fix — ขยาย num_ctx: 16384 (Session 16 โดย AGY Gemini 3.5 Flash (Medium)) | ✅ Complete |
|
||||||
|
| 2026-06-11 | v1.9.10 | AI Runtime Policy Refactor (Feature-235) — Canonical names (`np-dms-ai`/`np-dms-ocr`), Adaptive OCR Residency, CPU Fallback Retrieval, Queue Policy (ai-realtime concurrency=2) — targeted verification 27/27 tests ✅ ESLint + tsc clean | ⏳ Pending T032 Manual Gate + Merge |
|
||||||
|
| 2026-06-11 | v1.9.10 | Feature-235 validation follow-up — validation-report.md = PARTIAL, cutover-validation checklist added, targeted verification 27/27 | ⏳ Pending T032 execution |
|
||||||
|
|||||||
@@ -4,11 +4,11 @@
|
|||||||
|
|
||||||
- Reorganize โครงสร้างโฟลเดอร์ `specs/` สำเร็จ (`100-Infrastructures`, `200-fullstacks`, `300-others`)
|
- Reorganize โครงสร้างโฟลเดอร์ `specs/` สำเร็จ (`100-Infrastructures`, `200-fullstacks`, `300-others`)
|
||||||
- อัปเดตกฎ `AGENTS.md` และ `GEMINI.md` ให้ตรงกับมาตรฐานใหม่
|
- อัปเดตกฎ `AGENTS.md` และ `GEMINI.md` ให้ตรงกับมาตรฐานใหม่
|
||||||
- ริเริ่มระบบ `memory/agent-memory.md`
|
- ริเริ่มระบบ `memory/project-memory-override.md`
|
||||||
|
|
||||||
## ไฟล์ที่แก้ไข
|
## ไฟล์ที่แก้ไข
|
||||||
|
|
||||||
- `specs/` folder structure reorganization
|
- `specs/` folder structure reorganization
|
||||||
- `AGENTS.md` update
|
- `AGENTS.md` update
|
||||||
- `GEMINI.md` update
|
- `GEMINI.md` update
|
||||||
- `memory/agent-memory.md` initial creation
|
- `memory/project-memory-override.md` initial creation
|
||||||
|
|||||||
@@ -0,0 +1,64 @@
|
|||||||
|
# Session 17 — 2026-06-11 (AI Runtime Policy Refactor — Feature-235)
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Implement Feature-235 AI Runtime Policy Refactor ตาม spec.md และ plan.md บน branch `235-ai-runtime-policy-refactor` — เปลี่ยน API contract ให้ caller ส่ง job type เท่านั้น (ไม่มี `model.key` / parameter overrides), เพิ่ม backend policy mapping layer (`AiPolicyService`), adaptive OCR residency, CPU fallback retrieval, และ BullMQ queue policy — จบด้วย test suite 23/23 ผ่านครบ, ESLint + tsc clean.
|
||||||
|
|
||||||
|
## ปัญหาที่พบ (Root Cause)
|
||||||
|
|
||||||
|
| ปัญหา | สาเหตุ | การแก้ไข |
|
||||||
|
|---|---|---|
|
||||||
|
| `VramStatus` / `getVramStatus()` / `invalidateCache()` หาย | refactor ก่อนหน้าลบออก แต่ controller ยังใช้ | Restore เมธอดใน `vram-monitor.service.ts` |
|
||||||
|
| TS2367 ใน `ai-policy.service.ts` | compare `ExecutionProfile` กับ `'ocr-extract'` ผิด type | แก้ compare เป็น `'np-dms-ai'` |
|
||||||
|
| TS1272 `import type` ใน DTO | import ประกอบ class ด้วย `import type` ไม่ได้ | เปลี่ยนเป็น regular import |
|
||||||
|
| `any` types ใน `ai-batch.processor.ts` | `snapshotParams` / `effectiveProfile` ไม่มี typed | กำหนด interface `AiBatchJobData` runtime metadata |
|
||||||
|
| NestJS DI error ใน `ai.controller.spec.ts` | ขาด mock `'default_IORedisModuleConnectionToken'` | เพิ่ม mock provider ใน test module providers |
|
||||||
|
|
||||||
|
## การแก้ไข (Fix)
|
||||||
|
|
||||||
|
| ไฟล์ | การเปลี่ยนแปลง |
|
||||||
|
|---|---|
|
||||||
|
| `backend/src/modules/ai/services/vram-monitor.service.ts` | Restore `VramStatus`, `getVramStatus()`, `invalidateCache()` |
|
||||||
|
| `backend/src/modules/ai/services/ai-policy.service.ts` | แก้ TS2367 type comparison; เพิ่ม `getProfileForJobType()`, `createJobPayload()` |
|
||||||
|
| `backend/src/modules/ai/interfaces/execution-policy.interface.ts` | สร้างใหม่ — `ExecutionProfile`, `RuntimePolicy`, `AiJobPayload`, `VramHeadroom` |
|
||||||
|
| `backend/src/modules/ai/interfaces/ocr-residency.interface.ts` | สร้างใหม่ — `OcrResidencyDecision` |
|
||||||
|
| `backend/src/modules/ai/dto/create-ai-job.dto.ts` | ลบ `model.key`, `executionProfile`, `temperature`, `top_p`, `maxTokens`; เพิ่ม forbidden field validators |
|
||||||
|
| `backend/src/modules/ai/dto/ai-job-response.dto.ts` | เพิ่ม `modelUsed`, `effectiveProfile` fields |
|
||||||
|
| `backend/src/modules/ai/ai.service.ts` | inject `AiPolicyService`; กำหนด `effectiveProfile` จาก job type อัตโนมัติ |
|
||||||
|
| `backend/src/modules/ai/processors/ai-realtime.processor.ts` | เพิ่ม lightweight job classification; redirect heavy jobs ไป ai-batch |
|
||||||
|
| `backend/src/modules/ai/processors/ai-batch.processor.ts` | type-safe runtime policy metadata; log `retrievalDevice`; canonical `ocrUsed` |
|
||||||
|
| `backend/src/modules/ai/services/ocr.service.ts` | inject `VramMonitorService`; `calculateOcrResidency()` dynamic keep_alive |
|
||||||
|
| `backend/src/config/bullmq.config.ts` | เพิ่ม `REALTIME_CONCURRENCY` env (default 2) |
|
||||||
|
| `backend/src/modules/ai/ai.module.ts` | register `AiPolicyService`, `VramMonitorService` |
|
||||||
|
| `backend/src/modules/ai/guards/execution-profile.guard.ts` | สร้างใหม่ (สำรองไว้; ไม่ใช้ใน option B) |
|
||||||
|
| `backend/src/modules/ai/tests/ai-policy.service.spec.ts` | สร้างใหม่ — 7 tests ผ่าน |
|
||||||
|
| `backend/src/modules/ai/tests/ocr-residency.spec.ts` | สร้างใหม่ — 5 tests ผ่าน |
|
||||||
|
| `backend/src/modules/ai/tests/queue-policy.spec.ts` | สร้างใหม่ — 2 tests ผ่าน |
|
||||||
|
| `backend/src/modules/ai/tests/vram-monitor.service.spec.ts` | สร้างใหม่ — 5 tests ผ่าน |
|
||||||
|
| `backend/src/modules/ai/tests/ai.controller.spec.ts` | สร้างใหม่ — 4 integration tests ผ่าน; เพิ่ม Redis mock |
|
||||||
|
| `frontend/types/ai.ts` | ลบ `model` field; เพิ่ม `executionProfile?`, `modelUsed?` |
|
||||||
|
| `frontend/lib/services/admin-ai.service.ts` | อัปเดต types ตาม DTO ใหม่ |
|
||||||
|
| `frontend/components/admin/ai/OcrSandboxPromptManager.tsx` | แสดง `np-dms-ai` / `np-dms-ocr` แทน runtime names |
|
||||||
|
| `frontend/app/(admin)/admin/ai/page.tsx` | แสดง canonical names ใน System Health panel |
|
||||||
|
| `frontend/public/locales/en/ai.json` | เพิ่ม `ai_runtime_policy` namespace |
|
||||||
|
| `frontend/public/locales/th/ai.json` | เพิ่ม `ai_runtime_policy` namespace |
|
||||||
|
| `backend/.env.example` | เพิ่ม `AI_OCR_RESIDENCY_WINDOW_SECONDS` |
|
||||||
|
| `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/.env.template` | สร้างใหม่ — VRAM + residency + concurrency vars |
|
||||||
|
| `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py` | adaptive `keep_alive` param; CPU fallback บน `/embed` + `/rerank` |
|
||||||
|
| `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/services/vram_monitor.py` | สร้างใหม่ — query Ollama `/api/ps` |
|
||||||
|
| `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/services/residency_policy.py` | สร้างใหม่ — keep_alive calculation |
|
||||||
|
| `CONTEXT.md` | เพิ่ม Feature-235 ใน System Readiness + ADR-034 ใน ADRs table |
|
||||||
|
|
||||||
|
## กฎที่ Lock แล้ว
|
||||||
|
|
||||||
|
- **Option B (Policy-Only)**: Caller ไม่มี `executionProfile` field ใน `CreateAiJobDto` — backend กำหนด profile จาก `job.type` เท่านั้น (ไม่รับ caller input)
|
||||||
|
- **Canonical Model Identity**: `np-dms-ai` (LLM) / `np-dms-ocr` (OCR) ทุก layer ที่ผู้ใช้เห็น — ชื่อ runtime (`typhoon*`) ใช้เฉพาะ ops internals
|
||||||
|
- **Redis mock token**: ทุก test ที่ bootstrap `AiController` ต้องเพิ่ม `'default_IORedisModuleConnectionToken'` ใน providers
|
||||||
|
- **Lightweight Realtime Jobs**: เฉพาะ `intent-classify`, `tool-suggest` — ห้าม `rag-query` อยู่ใน ai-realtime
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
- [x] `npx jest src/modules/ai/tests/` — 23/23 tests ผ่าน (5 suites)
|
||||||
|
- [x] `npx tsc --noEmit` — ไม่มี error
|
||||||
|
- [x] `npx eslint src/modules/ai/ --max-warnings=0` — ไม่มี warning
|
||||||
|
- [ ] T032: Manual validation Gate 1–4 ตาม `quickstart.md` (ต้องรันบน environment จริง)
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
# Session 18 — 2026-06-11 (Feature-235 Validation & Memory Save)
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
สรุปผล validation ของ Feature-235, บันทึกรายงาน `validation-report.md`, และสร้าง cutover checklist สำหรับปิด T032 / sidecar pytest โดยยึด contract ปัจจุบันของ `/api/ai/jobs` ที่เป็น Option B.
|
||||||
|
|
||||||
|
## ปัญหาที่พบ (Root Cause)
|
||||||
|
|
||||||
|
| ปัญหา | สาเหตุ | การแก้ไข |
|
||||||
|
|---|---|---|
|
||||||
|
| Validation ยังไม่ขึ้น `PASS` | ยังขาด manual Gate 1–4 และ sidecar pytest ใน environment จริง | สร้าง `checklists/cutover-validation.md` เพื่อใช้ปิดงานอย่างเป็นระบบ |
|
||||||
|
| `quickstart.md` เดิมไม่สอดคล้องกับ contract ปัจจุบัน | ตัวอย่างเก่ายังส่ง `executionProfile` / `large-context` จาก caller | เก็บ evidence ใน validation report และทำ checklist ใหม่ตาม implementation ปัจจุบัน |
|
||||||
|
| Project memory ยังสะท้อน test count เก่า | รอบ verification ล่าสุดได้ targeted tests 27/27 แล้ว | อัปเดต `memory/project-memory-override.md` ให้ตรงกับสถานะล่าสุด |
|
||||||
|
|
||||||
|
## การแก้ไข (Fix)
|
||||||
|
|
||||||
|
| ไฟล์ | การเปลี่ยนแปลง |
|
||||||
|
|---|---|
|
||||||
|
| `specs/200-fullstacks/235-ai-runtime-policy-refactor/validation-report.md` | บันทึกผล validation เป็น `PARTIAL` พร้อม requirement matrix, gaps, และ recommendations |
|
||||||
|
| `specs/200-fullstacks/235-ai-runtime-policy-refactor/checklists/cutover-validation.md` | สร้าง runbook สำหรับ T032, backend tests, backend build, และ sidecar pytest |
|
||||||
|
| `specs/88-logs/rollouts.md` | เพิ่ม entry สำหรับ validation follow-up ของ Feature-235 |
|
||||||
|
| `memory/project-memory-override.md` | อัปเดตสถานะ Feature-235, test count ล่าสุด, และชี้ไปยัง cutover checklist |
|
||||||
|
|
||||||
|
## กฎที่ Lock แล้ว
|
||||||
|
|
||||||
|
- ใช้ `checklists/cutover-validation.md` เป็น runbook หลักสำหรับปิด T032
|
||||||
|
- Validation target ของ `/api/ai/jobs` ต้องยึด Option B ปัจจุบัน ไม่ใช้ caller-driven `executionProfile`
|
||||||
|
- ถ้าต้องบันทึกผล verification ต่อ ให้แนบ evidence จริงจาก backend / sidecar environment
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
- [x] `pnpm --filter backend test -- --runInBand --testPathPatterns="ai.service.spec.ts|queue-policy.spec.ts|ai.controller.spec.ts"` = 27/27 ผ่าน
|
||||||
|
- [x] `pnpm --filter backend build` = ผ่าน
|
||||||
|
- [x] `validation-report.md` ถูกสร้างและเก็บผลว่า `PARTIAL`
|
||||||
|
- [x] `cutover-validation.md` ถูกสร้างเพื่อใช้ปิด T032
|
||||||
Reference in New Issue
Block a user