diff --git a/.agents/rules/11-ai-integration.md b/.agents/rules/11-ai-integration.md index 864849ad..6e1b307e 100644 --- a/.agents/rules/11-ai-integration.md +++ b/.agents/rules/11-ai-integration.md @@ -3,7 +3,7 @@ ## CRITICAL RULES - **ALWAYS** follow ADR-023 AI boundary policy (isolation on Admin Desktop) -- **ALWAYS** use ADR-023A 2-model stack (gemma4:e2b + nomic-embed-text) +- **ALWAYS** use ADR-034 model stack (typhoon2.5-np-dms:latest + typhoon-np-dms-ocr:latest + nomic-embed-text) - **ALWAYS** use BullMQ 2-queue (ai-realtime + ai-batch) for GPU overload prevention - **NEVER** allow AI direct database/storage access - **ALWAYS** implement human-in-the-loop validation @@ -26,7 +26,7 @@ n8n (Migration) → DMS API → BullMQ → Admin Desktop (Ollama) → Backend Va | ----------------- | ------------------------- | ------------------------------------------------------------------------ | | **AI Gateway** | Backend (NestJS) | API endpoints, validation, audit logging | | **BullMQ Queues** | Backend (NestJS) | ai-realtime (RAG/Suggest), ai-batch (OCR/Extract/Embed) | -| **Ollama Engine** | Admin Desktop (Desk-5439) | gemma4:e2b (LLM) + nomic-embed-text (Embedding) | +| **Ollama Engine** | Admin Desktop (Desk-5439) | typhoon2.5-np-dms:latest (Main LLM) + typhoon-np-dms-ocr:latest (OCR, keep_alive:0) + nomic-embed-text (Embedding) | | **OCR Engine** | Admin Desktop (Desk-5439) | PaddleOCR + PyThaiNLP (Thai/English text extraction) | | **Orchestrator** | QNAP NAS (n8n) | Migration Phase orchestrator only (calls DMS API, never Ollama directly) | @@ -76,7 +76,7 @@ export class AiService { async extractMetadata(documentId: string): Promise { // 1. Validate permissions // 2. Queue job to BullMQ (ai-batch or ai-realtime) - // 3. Worker sends to Admin Desktop AI (gemma4:e2b) + // 3. Worker sends to Admin Desktop AI (typhoon2.5-np-dms:latest) // 4. Validate AI response // 5. Log audit trail to ai_audit_logs // 6. Return validated results @@ -113,9 +113,9 @@ const DocumentReviewForm = ({ document, aiSuggestions }) => { - **n8n Boundary:** n8n MUST call DMS API → BullMQ, NEVER Ollama/Qdrant directly - **GPU Overload Prevention:** BullMQ 2-queue (ai-realtime + ai-batch) with concurrency=1 -## ADR-023A Specific Rules +## ADR-034 Model Stack (supersedes ADR-023A §2.1) -- **2-Model Stack:** gemma4:e2b + nomic-embed-text +- **3-Model Config:** typhoon2.5-np-dms:latest (Main) + typhoon-np-dms-ocr:latest (OCR, keep_alive:0) + nomic-embed-text (Embedding) - **PDF 3-Page Limit:** Classification/Tagging uses first 3 pages only (NOT RAG embedding) - **RAG Embedding:** Full document chunked at 512 tokens/64 tokens overlap - **OCR Auto-Detect:** PyMuPDF chars > 100 → Fast path, else PaddleOCR @@ -129,7 +129,7 @@ const DocumentReviewForm = ({ document, aiSuggestions }) => { - [ ] BullMQ 2-queue setup (ai-realtime + ai-batch) - [ ] QdrantService with projectPublicId enforcement - [ ] DocumentReviewForm reusable component -- [ ] Admin Desktop Ollama (gemma4:e2b + nomic-embed-text) + PaddleOCR setup +- [ ] Admin Desktop Ollama (typhoon2.5-np-dms:latest + typhoon-np-dms-ocr:latest + nomic-embed-text) setup - [ ] n8n workflow orchestration (Migration Phase only) - [ ] AI audit logging and monitoring (ai_audit_logs) - [ ] Human-in-the-loop validation workflows diff --git a/.windsurf/plans/specs-reorganization-05bb1b.md b/.devin/plans/specs-reorganization-05bb1b.md similarity index 100% rename from .windsurf/plans/specs-reorganization-05bb1b.md rename to .devin/plans/specs-reorganization-05bb1b.md diff --git a/.windsurf/rules/00-project-context.md b/.devin/rules/00-project-context.md similarity index 100% rename from .windsurf/rules/00-project-context.md rename to .devin/rules/00-project-context.md diff --git a/.windsurf/rules/01-adr-019-uuid.md b/.devin/rules/01-adr-019-uuid.md similarity index 100% rename from .windsurf/rules/01-adr-019-uuid.md rename to .devin/rules/01-adr-019-uuid.md diff --git a/.windsurf/rules/02-security.md b/.devin/rules/02-security.md similarity index 100% rename from .windsurf/rules/02-security.md rename to .devin/rules/02-security.md diff --git a/.windsurf/rules/03-typescript.md b/.devin/rules/03-typescript.md similarity index 100% rename from .windsurf/rules/03-typescript.md rename to .devin/rules/03-typescript.md diff --git a/.windsurf/rules/04-domain-terminology.md b/.devin/rules/04-domain-terminology.md similarity index 100% rename from .windsurf/rules/04-domain-terminology.md rename to .devin/rules/04-domain-terminology.md diff --git a/.windsurf/rules/05-forbidden-actions.md b/.devin/rules/05-forbidden-actions.md similarity index 100% rename from .windsurf/rules/05-forbidden-actions.md rename to .devin/rules/05-forbidden-actions.md diff --git a/.windsurf/rules/06-backend-patterns.md b/.devin/rules/06-backend-patterns.md similarity index 100% rename from .windsurf/rules/06-backend-patterns.md rename to .devin/rules/06-backend-patterns.md diff --git a/.windsurf/rules/07-frontend-patterns.md b/.devin/rules/07-frontend-patterns.md similarity index 100% rename from .windsurf/rules/07-frontend-patterns.md rename to .devin/rules/07-frontend-patterns.md diff --git a/.windsurf/rules/08-development-flow.md b/.devin/rules/08-development-flow.md similarity index 100% rename from .windsurf/rules/08-development-flow.md rename to .devin/rules/08-development-flow.md diff --git a/.windsurf/rules/09-commit-checklist.md b/.devin/rules/09-commit-checklist.md similarity index 100% rename from .windsurf/rules/09-commit-checklist.md rename to .devin/rules/09-commit-checklist.md diff --git a/.windsurf/rules/10-error-handling.md b/.devin/rules/10-error-handling.md similarity index 100% rename from .windsurf/rules/10-error-handling.md rename to .devin/rules/10-error-handling.md diff --git a/.windsurf/rules/11-ai-integration.md b/.devin/rules/11-ai-integration.md similarity index 100% rename from .windsurf/rules/11-ai-integration.md rename to .devin/rules/11-ai-integration.md diff --git a/.windsurf/rules/README.md b/.devin/rules/README.md similarity index 100% rename from .windsurf/rules/README.md rename to .devin/rules/README.md diff --git a/.windsurf/rules/specify-rules.md b/.devin/rules/specify-rules.md similarity index 100% rename from .windsurf/rules/specify-rules.md rename to .devin/rules/specify-rules.md diff --git a/.windsurf/skills/README.md b/.devin/skills/README.md similarity index 100% rename from .windsurf/skills/README.md rename to .devin/skills/README.md diff --git a/.windsurf/skills/VERSION b/.devin/skills/VERSION similarity index 100% rename from .windsurf/skills/VERSION rename to .devin/skills/VERSION diff --git a/.windsurf/skills/_LCBP3-CONTEXT.md b/.devin/skills/_LCBP3-CONTEXT.md similarity index 100% rename from .windsurf/skills/_LCBP3-CONTEXT.md rename to .devin/skills/_LCBP3-CONTEXT.md diff --git a/.windsurf/skills/bugfix/SKILL.md b/.devin/skills/bugfix/SKILL.md similarity index 100% rename from .windsurf/skills/bugfix/SKILL.md rename to .devin/skills/bugfix/SKILL.md diff --git a/.windsurf/skills/diagnose/SKILL.md b/.devin/skills/diagnose/SKILL.md similarity index 100% rename from .windsurf/skills/diagnose/SKILL.md rename to .devin/skills/diagnose/SKILL.md diff --git a/.windsurf/skills/diagnose/scripts/hitl-loop.template.sh b/.devin/skills/diagnose/scripts/hitl-loop.template.sh similarity index 100% rename from .windsurf/skills/diagnose/scripts/hitl-loop.template.sh rename to .devin/skills/diagnose/scripts/hitl-loop.template.sh diff --git a/.windsurf/skills/e2e-testing/SKILL.md b/.devin/skills/e2e-testing/SKILL.md similarity index 100% rename from .windsurf/skills/e2e-testing/SKILL.md rename to .devin/skills/e2e-testing/SKILL.md diff --git a/.windsurf/skills/grill-with-docs/ADR-FORMAT.md b/.devin/skills/grill-with-docs/ADR-FORMAT.md similarity index 100% rename from .windsurf/skills/grill-with-docs/ADR-FORMAT.md rename to .devin/skills/grill-with-docs/ADR-FORMAT.md diff --git a/.windsurf/skills/grill-with-docs/CONTEXT-FORMAT.md b/.devin/skills/grill-with-docs/CONTEXT-FORMAT.md similarity index 100% rename from .windsurf/skills/grill-with-docs/CONTEXT-FORMAT.md rename to .devin/skills/grill-with-docs/CONTEXT-FORMAT.md diff --git a/.windsurf/skills/grill-with-docs/SKILL.md b/.devin/skills/grill-with-docs/SKILL.md similarity index 100% rename from .windsurf/skills/grill-with-docs/SKILL.md rename to .devin/skills/grill-with-docs/SKILL.md diff --git a/.windsurf/skills/nestjs-best-practices/.github/workflows/branch-protection.yml b/.devin/skills/nestjs-best-practices/.github/workflows/branch-protection.yml similarity index 100% rename from .windsurf/skills/nestjs-best-practices/.github/workflows/branch-protection.yml rename to .devin/skills/nestjs-best-practices/.github/workflows/branch-protection.yml diff --git a/.windsurf/skills/nestjs-best-practices/.github/workflows/deploy.yml b/.devin/skills/nestjs-best-practices/.github/workflows/deploy.yml similarity index 100% rename from .windsurf/skills/nestjs-best-practices/.github/workflows/deploy.yml rename to .devin/skills/nestjs-best-practices/.github/workflows/deploy.yml diff --git a/.windsurf/skills/nestjs-best-practices/.gitignore b/.devin/skills/nestjs-best-practices/.gitignore similarity index 100% rename from .windsurf/skills/nestjs-best-practices/.gitignore rename to .devin/skills/nestjs-best-practices/.gitignore diff --git a/.windsurf/skills/nestjs-best-practices/AGENTS.md b/.devin/skills/nestjs-best-practices/AGENTS.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/AGENTS.md rename to .devin/skills/nestjs-best-practices/AGENTS.md diff --git a/.windsurf/skills/nestjs-best-practices/AGENTS.md.v1.1.0.bak b/.devin/skills/nestjs-best-practices/AGENTS.md.v1.1.0.bak similarity index 100% rename from .windsurf/skills/nestjs-best-practices/AGENTS.md.v1.1.0.bak rename to .devin/skills/nestjs-best-practices/AGENTS.md.v1.1.0.bak diff --git a/.windsurf/skills/nestjs-best-practices/README.md b/.devin/skills/nestjs-best-practices/README.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/README.md rename to .devin/skills/nestjs-best-practices/README.md diff --git a/.windsurf/skills/nestjs-best-practices/SKILL.md b/.devin/skills/nestjs-best-practices/SKILL.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/SKILL.md rename to .devin/skills/nestjs-best-practices/SKILL.md diff --git a/.windsurf/skills/nestjs-best-practices/metadata.json b/.devin/skills/nestjs-best-practices/metadata.json similarity index 100% rename from .windsurf/skills/nestjs-best-practices/metadata.json rename to .devin/skills/nestjs-best-practices/metadata.json diff --git a/.windsurf/skills/nestjs-best-practices/rules/api-use-dto-serialization.md b/.devin/skills/nestjs-best-practices/rules/api-use-dto-serialization.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/api-use-dto-serialization.md rename to .devin/skills/nestjs-best-practices/rules/api-use-dto-serialization.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/api-use-interceptors.md b/.devin/skills/nestjs-best-practices/rules/api-use-interceptors.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/api-use-interceptors.md rename to .devin/skills/nestjs-best-practices/rules/api-use-interceptors.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/api-use-pipes.md b/.devin/skills/nestjs-best-practices/rules/api-use-pipes.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/api-use-pipes.md rename to .devin/skills/nestjs-best-practices/rules/api-use-pipes.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/api-versioning.md b/.devin/skills/nestjs-best-practices/rules/api-versioning.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/api-versioning.md rename to .devin/skills/nestjs-best-practices/rules/api-versioning.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/arch-avoid-circular-deps.md b/.devin/skills/nestjs-best-practices/rules/arch-avoid-circular-deps.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/arch-avoid-circular-deps.md rename to .devin/skills/nestjs-best-practices/rules/arch-avoid-circular-deps.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/arch-feature-modules.md b/.devin/skills/nestjs-best-practices/rules/arch-feature-modules.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/arch-feature-modules.md rename to .devin/skills/nestjs-best-practices/rules/arch-feature-modules.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/arch-module-sharing.md b/.devin/skills/nestjs-best-practices/rules/arch-module-sharing.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/arch-module-sharing.md rename to .devin/skills/nestjs-best-practices/rules/arch-module-sharing.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/arch-single-responsibility.md b/.devin/skills/nestjs-best-practices/rules/arch-single-responsibility.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/arch-single-responsibility.md rename to .devin/skills/nestjs-best-practices/rules/arch-single-responsibility.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/arch-use-events.md b/.devin/skills/nestjs-best-practices/rules/arch-use-events.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/arch-use-events.md rename to .devin/skills/nestjs-best-practices/rules/arch-use-events.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/arch-use-repository-pattern.md b/.devin/skills/nestjs-best-practices/rules/arch-use-repository-pattern.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/arch-use-repository-pattern.md rename to .devin/skills/nestjs-best-practices/rules/arch-use-repository-pattern.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/db-avoid-n-plus-one.md b/.devin/skills/nestjs-best-practices/rules/db-avoid-n-plus-one.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/db-avoid-n-plus-one.md rename to .devin/skills/nestjs-best-practices/rules/db-avoid-n-plus-one.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/db-hybrid-identifier.md b/.devin/skills/nestjs-best-practices/rules/db-hybrid-identifier.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/db-hybrid-identifier.md rename to .devin/skills/nestjs-best-practices/rules/db-hybrid-identifier.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/db-no-typeorm-migrations.md b/.devin/skills/nestjs-best-practices/rules/db-no-typeorm-migrations.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/db-no-typeorm-migrations.md rename to .devin/skills/nestjs-best-practices/rules/db-no-typeorm-migrations.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/db-use-migrations.md b/.devin/skills/nestjs-best-practices/rules/db-use-migrations.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/db-use-migrations.md rename to .devin/skills/nestjs-best-practices/rules/db-use-migrations.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/db-use-transactions.md b/.devin/skills/nestjs-best-practices/rules/db-use-transactions.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/db-use-transactions.md rename to .devin/skills/nestjs-best-practices/rules/db-use-transactions.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/devops-graceful-shutdown.md b/.devin/skills/nestjs-best-practices/rules/devops-graceful-shutdown.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/devops-graceful-shutdown.md rename to .devin/skills/nestjs-best-practices/rules/devops-graceful-shutdown.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/devops-use-config-module.md b/.devin/skills/nestjs-best-practices/rules/devops-use-config-module.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/devops-use-config-module.md rename to .devin/skills/nestjs-best-practices/rules/devops-use-config-module.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/devops-use-logging.md b/.devin/skills/nestjs-best-practices/rules/devops-use-logging.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/devops-use-logging.md rename to .devin/skills/nestjs-best-practices/rules/devops-use-logging.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/di-avoid-service-locator.md b/.devin/skills/nestjs-best-practices/rules/di-avoid-service-locator.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/di-avoid-service-locator.md rename to .devin/skills/nestjs-best-practices/rules/di-avoid-service-locator.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/di-interface-segregation.md b/.devin/skills/nestjs-best-practices/rules/di-interface-segregation.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/di-interface-segregation.md rename to .devin/skills/nestjs-best-practices/rules/di-interface-segregation.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/di-liskov-substitution.md b/.devin/skills/nestjs-best-practices/rules/di-liskov-substitution.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/di-liskov-substitution.md rename to .devin/skills/nestjs-best-practices/rules/di-liskov-substitution.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/di-prefer-constructor-injection.md b/.devin/skills/nestjs-best-practices/rules/di-prefer-constructor-injection.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/di-prefer-constructor-injection.md rename to .devin/skills/nestjs-best-practices/rules/di-prefer-constructor-injection.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/di-scope-awareness.md b/.devin/skills/nestjs-best-practices/rules/di-scope-awareness.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/di-scope-awareness.md rename to .devin/skills/nestjs-best-practices/rules/di-scope-awareness.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/di-use-interfaces-tokens.md b/.devin/skills/nestjs-best-practices/rules/di-use-interfaces-tokens.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/di-use-interfaces-tokens.md rename to .devin/skills/nestjs-best-practices/rules/di-use-interfaces-tokens.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/error-handle-async-errors.md b/.devin/skills/nestjs-best-practices/rules/error-handle-async-errors.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/error-handle-async-errors.md rename to .devin/skills/nestjs-best-practices/rules/error-handle-async-errors.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/error-throw-http-exceptions.md b/.devin/skills/nestjs-best-practices/rules/error-throw-http-exceptions.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/error-throw-http-exceptions.md rename to .devin/skills/nestjs-best-practices/rules/error-throw-http-exceptions.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/error-use-exception-filters.md b/.devin/skills/nestjs-best-practices/rules/error-use-exception-filters.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/error-use-exception-filters.md rename to .devin/skills/nestjs-best-practices/rules/error-use-exception-filters.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/lcbp3-ai-boundary.md b/.devin/skills/nestjs-best-practices/rules/lcbp3-ai-boundary.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/lcbp3-ai-boundary.md rename to .devin/skills/nestjs-best-practices/rules/lcbp3-ai-boundary.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/lcbp3-workflow-engine.md b/.devin/skills/nestjs-best-practices/rules/lcbp3-workflow-engine.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/lcbp3-workflow-engine.md rename to .devin/skills/nestjs-best-practices/rules/lcbp3-workflow-engine.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/micro-use-health-checks.md b/.devin/skills/nestjs-best-practices/rules/micro-use-health-checks.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/micro-use-health-checks.md rename to .devin/skills/nestjs-best-practices/rules/micro-use-health-checks.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/micro-use-patterns.md b/.devin/skills/nestjs-best-practices/rules/micro-use-patterns.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/micro-use-patterns.md rename to .devin/skills/nestjs-best-practices/rules/micro-use-patterns.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/micro-use-queues.md b/.devin/skills/nestjs-best-practices/rules/micro-use-queues.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/micro-use-queues.md rename to .devin/skills/nestjs-best-practices/rules/micro-use-queues.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/perf-async-hooks.md b/.devin/skills/nestjs-best-practices/rules/perf-async-hooks.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/perf-async-hooks.md rename to .devin/skills/nestjs-best-practices/rules/perf-async-hooks.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/perf-lazy-loading.md b/.devin/skills/nestjs-best-practices/rules/perf-lazy-loading.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/perf-lazy-loading.md rename to .devin/skills/nestjs-best-practices/rules/perf-lazy-loading.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/perf-optimize-database.md b/.devin/skills/nestjs-best-practices/rules/perf-optimize-database.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/perf-optimize-database.md rename to .devin/skills/nestjs-best-practices/rules/perf-optimize-database.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/perf-use-caching.md b/.devin/skills/nestjs-best-practices/rules/perf-use-caching.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/perf-use-caching.md rename to .devin/skills/nestjs-best-practices/rules/perf-use-caching.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/security-auth-jwt.md b/.devin/skills/nestjs-best-practices/rules/security-auth-jwt.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/security-auth-jwt.md rename to .devin/skills/nestjs-best-practices/rules/security-auth-jwt.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/security-file-two-phase-upload.md b/.devin/skills/nestjs-best-practices/rules/security-file-two-phase-upload.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/security-file-two-phase-upload.md rename to .devin/skills/nestjs-best-practices/rules/security-file-two-phase-upload.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/security-rate-limiting.md b/.devin/skills/nestjs-best-practices/rules/security-rate-limiting.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/security-rate-limiting.md rename to .devin/skills/nestjs-best-practices/rules/security-rate-limiting.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/security-sanitize-output.md b/.devin/skills/nestjs-best-practices/rules/security-sanitize-output.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/security-sanitize-output.md rename to .devin/skills/nestjs-best-practices/rules/security-sanitize-output.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/security-use-guards.md b/.devin/skills/nestjs-best-practices/rules/security-use-guards.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/security-use-guards.md rename to .devin/skills/nestjs-best-practices/rules/security-use-guards.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/security-validate-all-input.md b/.devin/skills/nestjs-best-practices/rules/security-validate-all-input.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/security-validate-all-input.md rename to .devin/skills/nestjs-best-practices/rules/security-validate-all-input.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/test-e2e-supertest.md b/.devin/skills/nestjs-best-practices/rules/test-e2e-supertest.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/test-e2e-supertest.md rename to .devin/skills/nestjs-best-practices/rules/test-e2e-supertest.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/test-mock-external-services.md b/.devin/skills/nestjs-best-practices/rules/test-mock-external-services.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/test-mock-external-services.md rename to .devin/skills/nestjs-best-practices/rules/test-mock-external-services.md diff --git a/.windsurf/skills/nestjs-best-practices/rules/test-use-testing-module.md b/.devin/skills/nestjs-best-practices/rules/test-use-testing-module.md similarity index 100% rename from .windsurf/skills/nestjs-best-practices/rules/test-use-testing-module.md rename to .devin/skills/nestjs-best-practices/rules/test-use-testing-module.md diff --git a/.windsurf/skills/nestjs-best-practices/scripts/build-agents.ts b/.devin/skills/nestjs-best-practices/scripts/build-agents.ts similarity index 100% rename from .windsurf/skills/nestjs-best-practices/scripts/build-agents.ts rename to .devin/skills/nestjs-best-practices/scripts/build-agents.ts diff --git a/.windsurf/skills/nestjs-best-practices/scripts/build.sh b/.devin/skills/nestjs-best-practices/scripts/build.sh similarity index 100% rename from .windsurf/skills/nestjs-best-practices/scripts/build.sh rename to .devin/skills/nestjs-best-practices/scripts/build.sh diff --git a/.windsurf/skills/nestjs-best-practices/scripts/package.json b/.devin/skills/nestjs-best-practices/scripts/package.json similarity index 100% rename from .windsurf/skills/nestjs-best-practices/scripts/package.json rename to .devin/skills/nestjs-best-practices/scripts/package.json diff --git a/.windsurf/skills/next-best-practices/SKILL.md b/.devin/skills/next-best-practices/SKILL.md similarity index 100% rename from .windsurf/skills/next-best-practices/SKILL.md rename to .devin/skills/next-best-practices/SKILL.md diff --git a/.windsurf/skills/next-best-practices/async-patterns.md b/.devin/skills/next-best-practices/async-patterns.md similarity index 100% rename from .windsurf/skills/next-best-practices/async-patterns.md rename to .devin/skills/next-best-practices/async-patterns.md diff --git a/.windsurf/skills/next-best-practices/bundling.md b/.devin/skills/next-best-practices/bundling.md similarity index 100% rename from .windsurf/skills/next-best-practices/bundling.md rename to .devin/skills/next-best-practices/bundling.md diff --git a/.windsurf/skills/next-best-practices/data-patterns.md b/.devin/skills/next-best-practices/data-patterns.md similarity index 100% rename from .windsurf/skills/next-best-practices/data-patterns.md rename to .devin/skills/next-best-practices/data-patterns.md diff --git a/.windsurf/skills/next-best-practices/debug-tricks.md b/.devin/skills/next-best-practices/debug-tricks.md similarity index 100% rename from .windsurf/skills/next-best-practices/debug-tricks.md rename to .devin/skills/next-best-practices/debug-tricks.md diff --git a/.windsurf/skills/next-best-practices/directives.md b/.devin/skills/next-best-practices/directives.md similarity index 100% rename from .windsurf/skills/next-best-practices/directives.md rename to .devin/skills/next-best-practices/directives.md diff --git a/.windsurf/skills/next-best-practices/error-handling.md b/.devin/skills/next-best-practices/error-handling.md similarity index 100% rename from .windsurf/skills/next-best-practices/error-handling.md rename to .devin/skills/next-best-practices/error-handling.md diff --git a/.windsurf/skills/next-best-practices/file-conventions.md b/.devin/skills/next-best-practices/file-conventions.md similarity index 100% rename from .windsurf/skills/next-best-practices/file-conventions.md rename to .devin/skills/next-best-practices/file-conventions.md diff --git a/.windsurf/skills/next-best-practices/font.md b/.devin/skills/next-best-practices/font.md similarity index 100% rename from .windsurf/skills/next-best-practices/font.md rename to .devin/skills/next-best-practices/font.md diff --git a/.windsurf/skills/next-best-practices/functions.md b/.devin/skills/next-best-practices/functions.md similarity index 100% rename from .windsurf/skills/next-best-practices/functions.md rename to .devin/skills/next-best-practices/functions.md diff --git a/.windsurf/skills/next-best-practices/hydration-error.md b/.devin/skills/next-best-practices/hydration-error.md similarity index 100% rename from .windsurf/skills/next-best-practices/hydration-error.md rename to .devin/skills/next-best-practices/hydration-error.md diff --git a/.windsurf/skills/next-best-practices/i18n.md b/.devin/skills/next-best-practices/i18n.md similarity index 100% rename from .windsurf/skills/next-best-practices/i18n.md rename to .devin/skills/next-best-practices/i18n.md diff --git a/.windsurf/skills/next-best-practices/image.md b/.devin/skills/next-best-practices/image.md similarity index 100% rename from .windsurf/skills/next-best-practices/image.md rename to .devin/skills/next-best-practices/image.md diff --git a/.windsurf/skills/next-best-practices/metadata.md b/.devin/skills/next-best-practices/metadata.md similarity index 100% rename from .windsurf/skills/next-best-practices/metadata.md rename to .devin/skills/next-best-practices/metadata.md diff --git a/.windsurf/skills/next-best-practices/parallel-routes.md b/.devin/skills/next-best-practices/parallel-routes.md similarity index 100% rename from .windsurf/skills/next-best-practices/parallel-routes.md rename to .devin/skills/next-best-practices/parallel-routes.md diff --git a/.windsurf/skills/next-best-practices/route-handlers.md b/.devin/skills/next-best-practices/route-handlers.md similarity index 100% rename from .windsurf/skills/next-best-practices/route-handlers.md rename to .devin/skills/next-best-practices/route-handlers.md diff --git a/.windsurf/skills/next-best-practices/rsc-boundaries.md b/.devin/skills/next-best-practices/rsc-boundaries.md similarity index 100% rename from .windsurf/skills/next-best-practices/rsc-boundaries.md rename to .devin/skills/next-best-practices/rsc-boundaries.md diff --git a/.windsurf/skills/next-best-practices/runtime-selection.md b/.devin/skills/next-best-practices/runtime-selection.md similarity index 100% rename from .windsurf/skills/next-best-practices/runtime-selection.md rename to .devin/skills/next-best-practices/runtime-selection.md diff --git a/.windsurf/skills/next-best-practices/scripts.md b/.devin/skills/next-best-practices/scripts.md similarity index 100% rename from .windsurf/skills/next-best-practices/scripts.md rename to .devin/skills/next-best-practices/scripts.md diff --git a/.windsurf/skills/next-best-practices/self-hosting.md b/.devin/skills/next-best-practices/self-hosting.md similarity index 100% rename from .windsurf/skills/next-best-practices/self-hosting.md rename to .devin/skills/next-best-practices/self-hosting.md diff --git a/.windsurf/skills/next-best-practices/suspense-boundaries.md b/.devin/skills/next-best-practices/suspense-boundaries.md similarity index 100% rename from .windsurf/skills/next-best-practices/suspense-boundaries.md rename to .devin/skills/next-best-practices/suspense-boundaries.md diff --git a/.windsurf/skills/next-best-practices/two-phase-upload.md b/.devin/skills/next-best-practices/two-phase-upload.md similarity index 100% rename from .windsurf/skills/next-best-practices/two-phase-upload.md rename to .devin/skills/next-best-practices/two-phase-upload.md diff --git a/.windsurf/skills/next-best-practices/uuid-handling.md b/.devin/skills/next-best-practices/uuid-handling.md similarity index 100% rename from .windsurf/skills/next-best-practices/uuid-handling.md rename to .devin/skills/next-best-practices/uuid-handling.md diff --git a/.windsurf/skills/security-review/SKILL.md b/.devin/skills/security-review/SKILL.md similarity index 100% rename from .windsurf/skills/security-review/SKILL.md rename to .devin/skills/security-review/SKILL.md diff --git a/.windsurf/skills/skills.md b/.devin/skills/skills.md similarity index 100% rename from .windsurf/skills/skills.md rename to .devin/skills/skills.md diff --git a/.windsurf/skills/speckit-analyze/SKILL.md b/.devin/skills/speckit-analyze/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-analyze/SKILL.md rename to .devin/skills/speckit-analyze/SKILL.md diff --git a/.windsurf/skills/speckit-checker/SKILL.md b/.devin/skills/speckit-checker/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-checker/SKILL.md rename to .devin/skills/speckit-checker/SKILL.md diff --git a/.windsurf/skills/speckit-checklist/SKILL.md b/.devin/skills/speckit-checklist/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-checklist/SKILL.md rename to .devin/skills/speckit-checklist/SKILL.md diff --git a/.windsurf/skills/speckit-checklist/templates/checklist-template.md b/.devin/skills/speckit-checklist/templates/checklist-template.md similarity index 100% rename from .windsurf/skills/speckit-checklist/templates/checklist-template.md rename to .devin/skills/speckit-checklist/templates/checklist-template.md diff --git a/.windsurf/skills/speckit-clarify/SKILL.md b/.devin/skills/speckit-clarify/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-clarify/SKILL.md rename to .devin/skills/speckit-clarify/SKILL.md diff --git a/.windsurf/skills/speckit-constitution/SKILL.md b/.devin/skills/speckit-constitution/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-constitution/SKILL.md rename to .devin/skills/speckit-constitution/SKILL.md diff --git a/.windsurf/skills/speckit-diff/SKILL.md b/.devin/skills/speckit-diff/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-diff/SKILL.md rename to .devin/skills/speckit-diff/SKILL.md diff --git a/.windsurf/skills/speckit-implement/SKILL.md b/.devin/skills/speckit-implement/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-implement/SKILL.md rename to .devin/skills/speckit-implement/SKILL.md diff --git a/.windsurf/skills/speckit-migrate/SKILL.md b/.devin/skills/speckit-migrate/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-migrate/SKILL.md rename to .devin/skills/speckit-migrate/SKILL.md diff --git a/.windsurf/skills/speckit-plan/SKILL.md b/.devin/skills/speckit-plan/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-plan/SKILL.md rename to .devin/skills/speckit-plan/SKILL.md diff --git a/.windsurf/skills/speckit-plan/templates/agent-file-template.md b/.devin/skills/speckit-plan/templates/agent-file-template.md similarity index 100% rename from .windsurf/skills/speckit-plan/templates/agent-file-template.md rename to .devin/skills/speckit-plan/templates/agent-file-template.md diff --git a/.windsurf/skills/speckit-plan/templates/plan-template.md b/.devin/skills/speckit-plan/templates/plan-template.md similarity index 100% rename from .windsurf/skills/speckit-plan/templates/plan-template.md rename to .devin/skills/speckit-plan/templates/plan-template.md diff --git a/.windsurf/skills/speckit-quizme/SKILL.md b/.devin/skills/speckit-quizme/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-quizme/SKILL.md rename to .devin/skills/speckit-quizme/SKILL.md diff --git a/.windsurf/skills/speckit-reviewer/SKILL.md b/.devin/skills/speckit-reviewer/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-reviewer/SKILL.md rename to .devin/skills/speckit-reviewer/SKILL.md diff --git a/.windsurf/skills/speckit-security-audit/SKILL.md b/.devin/skills/speckit-security-audit/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-security-audit/SKILL.md rename to .devin/skills/speckit-security-audit/SKILL.md diff --git a/.windsurf/skills/speckit-specify/SKILL.md b/.devin/skills/speckit-specify/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-specify/SKILL.md rename to .devin/skills/speckit-specify/SKILL.md diff --git a/.windsurf/skills/speckit-specify/templates/spec-template.md b/.devin/skills/speckit-specify/templates/spec-template.md similarity index 100% rename from .windsurf/skills/speckit-specify/templates/spec-template.md rename to .devin/skills/speckit-specify/templates/spec-template.md diff --git a/.windsurf/skills/speckit-status/SKILL.md b/.devin/skills/speckit-status/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-status/SKILL.md rename to .devin/skills/speckit-status/SKILL.md diff --git a/.windsurf/skills/speckit-tasks/SKILL.md b/.devin/skills/speckit-tasks/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-tasks/SKILL.md rename to .devin/skills/speckit-tasks/SKILL.md diff --git a/.windsurf/skills/speckit-tasks/templates/tasks-template.md b/.devin/skills/speckit-tasks/templates/tasks-template.md similarity index 100% rename from .windsurf/skills/speckit-tasks/templates/tasks-template.md rename to .devin/skills/speckit-tasks/templates/tasks-template.md diff --git a/.windsurf/skills/speckit-taskstoissues/SKILL.md b/.devin/skills/speckit-taskstoissues/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-taskstoissues/SKILL.md rename to .devin/skills/speckit-taskstoissues/SKILL.md diff --git a/.windsurf/skills/speckit-tester/SKILL.md b/.devin/skills/speckit-tester/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-tester/SKILL.md rename to .devin/skills/speckit-tester/SKILL.md diff --git a/.windsurf/skills/speckit-validate/SKILL.md b/.devin/skills/speckit-validate/SKILL.md similarity index 100% rename from .windsurf/skills/speckit-validate/SKILL.md rename to .devin/skills/speckit-validate/SKILL.md diff --git a/.windsurf/skills/verification-loop/SKILL.md b/.devin/skills/verification-loop/SKILL.md similarity index 100% rename from .windsurf/skills/verification-loop/SKILL.md rename to .devin/skills/verification-loop/SKILL.md diff --git a/.windsurf/workflows/00-speckit.all.md b/.devin/workflows/00-speckit.all.md similarity index 100% rename from .windsurf/workflows/00-speckit.all.md rename to .devin/workflows/00-speckit.all.md diff --git a/.windsurf/workflows/01-speckit.prepare.md b/.devin/workflows/01-speckit.prepare.md similarity index 100% rename from .windsurf/workflows/01-speckit.prepare.md rename to .devin/workflows/01-speckit.prepare.md diff --git a/.windsurf/workflows/101-speckit.constitution.md b/.devin/workflows/101-speckit.constitution.md similarity index 100% rename from .windsurf/workflows/101-speckit.constitution.md rename to .devin/workflows/101-speckit.constitution.md diff --git a/.windsurf/workflows/102-speckit.specify.md b/.devin/workflows/102-speckit.specify.md similarity index 100% rename from .windsurf/workflows/102-speckit.specify.md rename to .devin/workflows/102-speckit.specify.md diff --git a/.windsurf/workflows/103-speckit.clarify.md b/.devin/workflows/103-speckit.clarify.md similarity index 100% rename from .windsurf/workflows/103-speckit.clarify.md rename to .devin/workflows/103-speckit.clarify.md diff --git a/.windsurf/workflows/104-speckit.plan.md b/.devin/workflows/104-speckit.plan.md similarity index 100% rename from .windsurf/workflows/104-speckit.plan.md rename to .devin/workflows/104-speckit.plan.md diff --git a/.windsurf/workflows/105-speckit.tasks.md b/.devin/workflows/105-speckit.tasks.md similarity index 100% rename from .windsurf/workflows/105-speckit.tasks.md rename to .devin/workflows/105-speckit.tasks.md diff --git a/.windsurf/workflows/106-speckit.analyze.md b/.devin/workflows/106-speckit.analyze.md similarity index 100% rename from .windsurf/workflows/106-speckit.analyze.md rename to .devin/workflows/106-speckit.analyze.md diff --git a/.windsurf/workflows/107-speckit.implement.md b/.devin/workflows/107-speckit.implement.md similarity index 100% rename from .windsurf/workflows/107-speckit.implement.md rename to .devin/workflows/107-speckit.implement.md diff --git a/.windsurf/workflows/108-speckit.checker.md b/.devin/workflows/108-speckit.checker.md similarity index 100% rename from .windsurf/workflows/108-speckit.checker.md rename to .devin/workflows/108-speckit.checker.md diff --git a/.windsurf/workflows/109-speckit.tester.md b/.devin/workflows/109-speckit.tester.md similarity index 100% rename from .windsurf/workflows/109-speckit.tester.md rename to .devin/workflows/109-speckit.tester.md diff --git a/.windsurf/workflows/110-speckit.reviewer.md b/.devin/workflows/110-speckit.reviewer.md similarity index 100% rename from .windsurf/workflows/110-speckit.reviewer.md rename to .devin/workflows/110-speckit.reviewer.md diff --git a/.windsurf/workflows/111-speckit.validate.md b/.devin/workflows/111-speckit.validate.md similarity index 100% rename from .windsurf/workflows/111-speckit.validate.md rename to .devin/workflows/111-speckit.validate.md diff --git a/.windsurf/workflows/112-speckit.security-audit.md b/.devin/workflows/112-speckit.security-audit.md similarity index 100% rename from .windsurf/workflows/112-speckit.security-audit.md rename to .devin/workflows/112-speckit.security-audit.md diff --git a/.windsurf/workflows/bugfix.md b/.devin/workflows/bugfix.md similarity index 100% rename from .windsurf/workflows/bugfix.md rename to .devin/workflows/bugfix.md diff --git a/.windsurf/workflows/check-real-app.md b/.devin/workflows/check-real-app.md similarity index 100% rename from .windsurf/workflows/check-real-app.md rename to .devin/workflows/check-real-app.md diff --git a/.windsurf/workflows/create-backend-module.md b/.devin/workflows/create-backend-module.md similarity index 100% rename from .windsurf/workflows/create-backend-module.md rename to .devin/workflows/create-backend-module.md diff --git a/.windsurf/workflows/create-frontend-page.md b/.devin/workflows/create-frontend-page.md similarity index 100% rename from .windsurf/workflows/create-frontend-page.md rename to .devin/workflows/create-frontend-page.md diff --git a/.windsurf/workflows/deploy.md b/.devin/workflows/deploy.md similarity index 100% rename from .windsurf/workflows/deploy.md rename to .devin/workflows/deploy.md diff --git a/.windsurf/workflows/diagnose.md b/.devin/workflows/diagnose.md similarity index 100% rename from .windsurf/workflows/diagnose.md rename to .devin/workflows/diagnose.md diff --git a/.windsurf/workflows/e2e-testing.md b/.devin/workflows/e2e-testing.md similarity index 100% rename from .windsurf/workflows/e2e-testing.md rename to .devin/workflows/e2e-testing.md diff --git a/.windsurf/workflows/grill-with-docs.md b/.devin/workflows/grill-with-docs.md similarity index 100% rename from .windsurf/workflows/grill-with-docs.md rename to .devin/workflows/grill-with-docs.md diff --git a/.windsurf/workflows/resume-pending-work.md b/.devin/workflows/resume-pending-work.md similarity index 100% rename from .windsurf/workflows/resume-pending-work.md rename to .devin/workflows/resume-pending-work.md diff --git a/.windsurf/workflows/review.md b/.devin/workflows/review.md similarity index 100% rename from .windsurf/workflows/review.md rename to .devin/workflows/review.md diff --git a/.windsurf/workflows/schema-change.md b/.devin/workflows/schema-change.md similarity index 100% rename from .windsurf/workflows/schema-change.md rename to .devin/workflows/schema-change.md diff --git a/.windsurf/workflows/security-review.md b/.devin/workflows/security-review.md similarity index 100% rename from .windsurf/workflows/security-review.md rename to .devin/workflows/security-review.md diff --git a/.windsurf/workflows/tdd.md b/.devin/workflows/tdd.md similarity index 100% rename from .windsurf/workflows/tdd.md rename to .devin/workflows/tdd.md diff --git a/.windsurf/workflows/to-issues.md b/.devin/workflows/to-issues.md similarity index 100% rename from .windsurf/workflows/to-issues.md rename to .devin/workflows/to-issues.md diff --git a/.windsurf/workflows/to-prd.md b/.devin/workflows/to-prd.md similarity index 100% rename from .windsurf/workflows/to-prd.md rename to .devin/workflows/to-prd.md diff --git a/.windsurf/workflows/triage.md b/.devin/workflows/triage.md similarity index 100% rename from .windsurf/workflows/triage.md rename to .devin/workflows/triage.md diff --git a/.windsurf/workflows/util-speckit.checklist.md b/.devin/workflows/util-speckit.checklist.md similarity index 100% rename from .windsurf/workflows/util-speckit.checklist.md rename to .devin/workflows/util-speckit.checklist.md diff --git a/.windsurf/workflows/util-speckit.diff.md b/.devin/workflows/util-speckit.diff.md similarity index 100% rename from .windsurf/workflows/util-speckit.diff.md rename to .devin/workflows/util-speckit.diff.md diff --git a/.windsurf/workflows/util-speckit.migrate.md b/.devin/workflows/util-speckit.migrate.md similarity index 100% rename from .windsurf/workflows/util-speckit.migrate.md rename to .devin/workflows/util-speckit.migrate.md diff --git a/.windsurf/workflows/util-speckit.quizme.md b/.devin/workflows/util-speckit.quizme.md similarity index 100% rename from .windsurf/workflows/util-speckit.quizme.md rename to .devin/workflows/util-speckit.quizme.md diff --git a/.windsurf/workflows/util-speckit.status.md b/.devin/workflows/util-speckit.status.md similarity index 100% rename from .windsurf/workflows/util-speckit.status.md rename to .devin/workflows/util-speckit.status.md diff --git a/.windsurf/workflows/util-speckit.taskstoissues.md b/.devin/workflows/util-speckit.taskstoissues.md similarity index 100% rename from .windsurf/workflows/util-speckit.taskstoissues.md rename to .devin/workflows/util-speckit.taskstoissues.md diff --git a/.windsurf/workflows/verification-loop.md b/.devin/workflows/verification-loop.md similarity index 100% rename from .windsurf/workflows/verification-loop.md rename to .devin/workflows/verification-loop.md diff --git a/.windsurf/workflows/zoom-out.md b/.devin/workflows/zoom-out.md similarity index 100% rename from .windsurf/workflows/zoom-out.md rename to .devin/workflows/zoom-out.md diff --git a/AGENTS.md b/AGENTS.md index 2ef95a03..58352a73 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,7 +1,7 @@ # NAP-DMS Project Context & Rules - For: Windsurf Cascade (and compatible: Codex CLI, opencode, Amp, Antigravity, AGENTS.md tools) -- Version: 1.9.8 | Last synced from repo: 2026-06-02 +- Version: 1.9.9 | Last synced from repo: 2026-06-03 - Repo: [https://git.np-dms.work/np-dms/lcbp3](https://git.np-dms.work/np-dms/lcbp3) - Skill pack: `.agents/skills/` (v1.9.0, 21 skills) — see [`skills/README.md`](./.agents/skills/README.md) + [`skills/_LCBP3-CONTEXT.md`](./.agents/skills/_LCBP3-CONTEXT.md) @@ -137,7 +137,8 @@ Spec priority: **`06-Decision-Records`** > **`05-Engineering-Guidelines`** > oth | **ADR-019 UUID** | `specs/06-Decision-Records/ADR-019-hybrid-identifier-strategy.md` | ✅ Active | UUID-related work | | **ADR-021 Workflow Context** | `specs/06-Decision-Records/ADR-021-workflow-context.md` | ✅ Active | Integrated workflow & step attachments | | **ADR-023 AI Architecture** | `specs/06-Decision-Records/ADR-023-unified-ai-architecture.md` | ✅ Active | Unified AI boundaries and pipeline (base architecture) | -| **ADR-023A AI Model Rev.** | `specs/06-Decision-Records/ADR-023A-unified-ai-architecture.md` | ✅ Active | 2-Model stack (gemma4:e4b Q8_0), BullMQ 2-queue, RAG embed scope, OCR auto-detect | +| **ADR-023A AI Model Rev.** | `specs/06-Decision-Records/ADR-023A-unified-ai-architecture.md` | ✅ Active | 2-queue, RAG embed scope, OCR auto-detect (model stack superseded by ADR-034) | +| **ADR-034 Thai Model Stack** | `specs/06-Decision-Records/ADR-034-AI-model-change.md` | ✅ Active | typhoon2.5-np-dms:latest (Main) + typhoon-np-dms-ocr:latest (OCR, keep_alive:0) | | **ADR-024 Intent Class.** | `specs/06-Decision-Records/ADR-024-intent-classification-strategy.md` | ✅ Active | Hybrid Pattern→LLM Fallback; ai_intent_patterns DB; Redis cache 5 min | | **ADR-025 AI Tool Layer** | `specs/06-Decision-Records/ADR-025-ai-tool-layer-architecture.md` | ✅ Active | Server-side Tool dispatch; CASL-guarded bridge; ToolResult uses publicId only | | **ADR-026 Chat UI** | `specs/06-Decision-Records/ADR-026-document-chat-ui-pattern.md` | ✅ Active | Side-panel Document Chat UI; useAiChat() hook; streaming response support | @@ -269,7 +270,7 @@ Read `specs/05-Engineering-Guidelines/05-07-hybrid-uuid-implementation-plan.md` 5. **Password:** bcrypt 12 salt rounds, min 8 chars, rotate every 90 days 6. **Rate Limiting:** `ThrottlerGuard` on all auth endpoints 7. **File Upload:** Whitelist PDF/DWG/DOCX/XLSX/ZIP, max 50MB, ClamAV scan -8. **AI Isolation (ADR-023/023A):** Ollama on Admin Desktop ONLY — NO direct DB/storage access; 2-model stack `gemma4:e4b Q8_0` + `nomic-embed-text`; all inference via BullMQ (`ai-realtime` / `ai-batch`) +8. **AI Isolation (ADR-023/023A/034):** Ollama on Admin Desktop ONLY — NO direct DB/storage access; model stack `typhoon2.5-np-dms:latest` (main) + `typhoon-np-dms-ocr:latest` (OCR, keep_alive:0) + `nomic-embed-text`; all inference via BullMQ (`ai-realtime` / `ai-batch`) 9. **Error Handling (ADR-007):** Use layered error classification with user-friendly messages 10. **AI Integration (ADR-023/023A):** RFA-First approach; n8n orchestrates Migration Phase only via DMS API — never calls Ollama directly; `QdrantService.search()` requires `projectPublicId` as mandatory param @@ -431,7 +432,7 @@ Full glossary: `specs/00-overview/00-02-glossary.md` **For AI Runtime Layer (ADR-024/025/026/027):** -- ADR-024: Pattern Layer first (ai_intent_patterns DB + Redis cache 5 min) → LLM Fallback (gemma4:e4b Q8_0, semaphore max=3) +- ADR-024: Pattern Layer first (ai_intent_patterns DB + Redis cache 5 min) → LLM Fallback (typhoon2.5-np-dms:latest, semaphore max=3) - ADR-025: Tool Registry dispatch — AI Gateway → Tool → Business Service; ToolResult DTO must use publicId only - ADR-026: useAiChat() hook + side-panel UI; streaming response via SSE; TanStack Query cache - ADR-027: Admin Console — dynamic model/prompt/intent control; CASL-guarded admin-only endpoints @@ -557,7 +558,7 @@ When user asks about... check these files: - [ ] **Qdrant Multi-tenancy:** `projectPublicId` filter enforced - [ ] **Human-in-the-loop:** AI outputs validated before use - [ ] **Audit Logging:** All AI interactions logged to `ai_audit_logs` -- [ ] **2-Model Stack:** gemma4:e4b Q8_0 + nomic-embed-text verified +- [ ] **Model Stack (ADR-034):** typhoon2.5-np-dms:latest + typhoon-np-dms-ocr:latest + nomic-embed-text verified - [ ] **Dynamic Prompts (ADR-029):** Prompt templates loaded from `ai_prompts` DB, not hardcoded **Performance & Complex Logic:** @@ -612,6 +613,7 @@ This file is a **quick reference**. For detailed information: | Version | Date | Changes | Updated By | | ------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------- | +| 1.9.9 | 2026-06-03 | ADR-034 Thai-Optimized AI Model Stack: typhoon2.5-np-dms:latest (main) + typhoon-np-dms-ocr:latest (OCR); model switching in ai-batch processor; AiSettingsService static constants; SQL delta; updated Key Spec Files + AI isolation rule | Windsurf AI | | 1.9.8 | 2026-06-02 | Added ADR-033 Active Model & OCR Runner Management; implemented Synchronous LLM switches, GPU Memory Auto-release, sidecar `X-API-Key` headers protection; updated Key Spec Files & Specialized Work AI runtime sections | Windsurf AI | | 1.9.7 | 2026-05-25 | Added ADR-029 Dynamic Prompt Management to Key Spec Files table; fixed gemma4 model name e2b→e4b Q8_0; added Dynamic Prompt context trigger; added ADR-029 to Tier 3 AI checklist; bumped last synced date | Windsurf AI | | 1.9.6 | 2026-05-22 | Added ADR-024/025/026/027/028 to Key Spec Files table; Tier 3 expanded with AI Runtime Layer + Migration Pipeline tiers; Specialized Work section updated with ADR-024~028 patterns; 6 new Context-Aware Triggers; bumped Last synced date | Windsurf AI | diff --git a/CONTEXT.md b/CONTEXT.md index c956d3fb..a519b110 100644 --- a/CONTEXT.md +++ b/CONTEXT.md @@ -188,11 +188,21 @@ _Avoid_: Throw exception from tool, Untyped error - **AI Gateway** — NestJS module, CASL-guarded, enqueue jobs ไป BullMQ - **n8n** — Workflow orchestrator บน QNAP (Migration Phase + simple routing) -- **Ollama** — Local LLM inference บน Admin Desktop (gemma4:e4b Q8_0 + nomic-embed-text) +- **Ollama** — Local LLM inference บน Admin Desktop (ADR-034: typhoon2.5-np-dms + typhoon-np-dms-ocr + nomic-embed-text) - **QdrantService** — Vector search แบบ project-isolated - **AiRagService** — RAG pipeline (embed query → Qdrant → LLM context) - **OcrService / sidecar** — ระบบประมวลผล OCR ปลอดภัยด้วย API Key และ dynamic model swapping (ADR-033) +## Glossary Updates (from ADR-034) + +| Term | Definition | Avoid | +|------|------------|-------| +| **Thai-Optimized Model** | โมเดล AI ที่ถูก fine-tune มาสำหรับภาษาไทยโดยเฉพาะ (เช่น Typhoon series จาก SCB10X) | Generic model, English-only model | +| **Model Unload/Load** | กระบวนการยกเลิกโหลดโมเดลจาก VRAM และโหลดโมเดลใหม่เข้าไปแทน เพื่อสลับการใช้งานระหว่างโมเดลต่างๆ | Model switching (ambiguous), Hot swap | +| **Cold Start Penalty** | ความล่าช้า 5-15 วินาทีที่เกิดจากการโหลดโมเดล weights เข้า VRAM หลังจากโมเดลถูก unload (keep_alive: 0) | Initial delay, First-run latency | + +--- + ## System readiness summary (resolved) | Component | สถานะ | หมายเหตุ | diff --git a/backend/.env.example b/backend/.env.example index 91fac5f9..17039204 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -49,11 +49,12 @@ AI_N8N_AUTH_TOKEN=change-me-service-token # Qdrant vector store (local docker-compose or QNAP) QDRANT_URL=http://localhost:6333 -# Ollama (Admin Desktop Desk-5439 — ADR-018 AI boundary) -OLLAMA_MODEL_MAIN=gemma4:e2b +# Ollama (Admin Desktop Desk-5439 — ADR-034 Thai-Optimized Model Stack) +OLLAMA_MODEL_MAIN=typhoon2.5-np-dms:latest +OLLAMA_MODEL_OCR=typhoon-np-dms-ocr:latest OLLAMA_MODEL_EMBED=nomic-embed-text OLLAMA_EMBED_MODEL=nomic-embed-text -OLLAMA_RAG_MODEL=gemma4:e2b +OLLAMA_RAG_MODEL=typhoon2.5-np-dms:latest OLLAMA_URL=http://192.168.10.8:11434 # Qdrant (ADR-023A) diff --git a/backend/src/modules/ai/ai-settings.service.spec.ts b/backend/src/modules/ai/ai-settings.service.spec.ts index 014d9594..3a7aa6dd 100644 --- a/backend/src/modules/ai/ai-settings.service.spec.ts +++ b/backend/src/modules/ai/ai-settings.service.spec.ts @@ -99,14 +99,15 @@ describe('AiSettingsService', () => { ); }); - it('ควรใช้ gemma4:e4b เป็นค่า active model เริ่มต้นเมื่อยังไม่มี system setting', async () => { + it('ควรใช้ typhoon2.5-np-dms:latest (DEFAULT_MODEL) เป็นค่า active model เริ่มต้นเมื่อยังไม่มี system setting (ADR-034)', async () => { mockRedis.get.mockResolvedValue(null); mockSettingRepo.findOne.mockResolvedValue(null); - - await expect(service.getActiveModel()).resolves.toBe('gemma4:e4b'); + await expect(service.getActiveModel()).resolves.toBe( + 'typhoon2.5-np-dms:latest' + ); expect(mockRedis.set).toHaveBeenCalledWith( 'system_settings:AI_ACTIVE_MODEL', - 'gemma4:e4b', + 'typhoon2.5-np-dms:latest', 'EX', 30 ); diff --git a/backend/src/modules/ai/ai-settings.service.ts b/backend/src/modules/ai/ai-settings.service.ts index f17be050..237dccfe 100644 --- a/backend/src/modules/ai/ai-settings.service.ts +++ b/backend/src/modules/ai/ai-settings.service.ts @@ -3,6 +3,7 @@ // - 2026-05-21: เพิ่ม service สำหรับอ่าน/เขียน AI feature toggle พร้อม Redis cache. // - 2026-05-22: เพิ่ม try-catch ใน getAiFeaturesEnabled() เพื่อความยืดหยุ่นในกรณีที่ฐานข้อมูลยังไม่ได้อัปเกรดตาราง system_settings // - 2026-05-25: เพิ่ม methods สำหรับจัดการรายการโมเดล AI แบบไดนามิก (ADR-027) +// - 2026-06-03: เพิ่ม DEFAULT_MODEL และ OCR_MODEL static constants ตาม ADR-034 (เปลี่ยนจาก gemma4:e4b เป็น typhoon2.5-np-dms) import { Injectable, Logger } from '@nestjs/common'; import { InjectRedis } from '@nestjs-modules/ioredis'; @@ -24,6 +25,11 @@ const AI_ACTIVE_MODEL_TTL_SECONDS = 30; /** Service สำหรับจัดการ system_settings ที่เกี่ยวข้องกับ AI Admin Console */ @Injectable() export class AiSettingsService { + /** โมเดล AI หลักสำหรับ Extraction, RAG Q&A, AI Suggestion (ADR-034) */ + static readonly DEFAULT_MODEL = 'typhoon2.5-np-dms:latest'; + + /** โมเดล OCR ภาษาไทย — unload หลังใช้งาน (keep_alive=0) (ADR-034) */ + static readonly OCR_MODEL = 'typhoon-np-dms-ocr:latest'; private readonly logger = new Logger(AiSettingsService.name); constructor( @@ -150,7 +156,8 @@ export class AiSettingsService { where: { settingKey: AI_ACTIVE_MODEL_KEY }, }); - const activeModel = setting?.settingValue ?? 'gemma4:e4b'; + const activeModel = + setting?.settingValue ?? AiSettingsService.DEFAULT_MODEL; await this.redis.set( AI_ACTIVE_MODEL_CACHE_KEY, activeModel, @@ -160,7 +167,7 @@ export class AiSettingsService { return activeModel; } catch (error: unknown) { this.logger.error(`Failed to get active model: ${this.toMessage(error)}`); - return 'gemma4:e4b'; + return AiSettingsService.DEFAULT_MODEL; } } diff --git a/backend/src/modules/ai/ai.service.ts b/backend/src/modules/ai/ai.service.ts index 528f4146..f3ab0dfd 100644 --- a/backend/src/modules/ai/ai.service.ts +++ b/backend/src/modules/ai/ai.service.ts @@ -5,6 +5,7 @@ // - 2026-05-21: แก้ไข ESLint unsafe return error ใน getSystemHealth โดยใช้ interface SystemHealthResponse // - 2026-05-29: เพิ่ม OcrService.checkHealth() เข้า getSystemHealth() เพื่อแสดงสถานะ OCR sidecar // - 2026-06-02: ปรับปรุง activateAiModel ให้มีการโหลดและยืนยันโมเดลล่วงหน้าแบบ Synchronous (T008, ADR-033) และล้างโมเดลตัวเก่าออกเพื่อประหยัด VRAM (Suggestion 1) +// - 2026-06-03: ADR-034 — เพิ่ม activeModels field (เอา mainModel+ocrModel) ใน SystemHealthResponse import { Injectable, Logger, Optional } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { HttpService } from '@nestjs/axios'; @@ -123,6 +124,10 @@ export interface AiJobStatusResult { } export interface SystemHealthResponse { + activeModels: { + main: string; + ocr: string; + }; ollama: { status: string; latencyMs: number; @@ -867,6 +872,14 @@ export class AiService { this.getQueueMetrics(this.aiBatchQueue), ]); const health = { + activeModels: { + main: this.ollamaService + ? this.ollamaService.getMainModelName() + : AiSettingsService.DEFAULT_MODEL, + ocr: this.ollamaService + ? this.ollamaService.getOcrModelName() + : AiSettingsService.OCR_MODEL, + }, ollama, qdrant, ocr, diff --git a/backend/src/modules/ai/processors/ai-batch.processor.spec.ts b/backend/src/modules/ai/processors/ai-batch.processor.spec.ts index e0e01634..460390e8 100644 --- a/backend/src/modules/ai/processors/ai-batch.processor.spec.ts +++ b/backend/src/modules/ai/processors/ai-batch.processor.spec.ts @@ -7,12 +7,17 @@ // - 2026-05-27: เพิ่ม Mock สำหรับ getActive และ resolveContext ของ AiPromptsService เพื่อรองรับ Context-Aware Prompt (T017) // - 2026-05-28: เพิ่ม test สำหรับ EC-001 (NEW_TAG_SUGGESTED) และ EC-002 (UNRESOLVED_SENDER/RECIPIENT_UUID) // - 2026-05-29: แก้ไข mockAttachmentRepo เพิ่ม property manager เพื่อรองรับ jest.spyOn ใน EC-001, EC-002, และ migrate-document tests +// - 2026-06-03: ADR-034 — เพิ่ม OCR_JOB_TYPES import, mock unloadModel/loadModel/getOcrModelName, อัปเดต getMainModelName เป็น typhoon2.5, เพิ่ม test ocr-extract model switching import { Test, TestingModule } from '@nestjs/testing'; import { getRepositoryToken } from '@nestjs/typeorm'; import { Repository } from 'typeorm'; import { Job } from 'bullmq'; -import { AiBatchProcessor, AiBatchJobData } from './ai-batch.processor'; +import { + AiBatchProcessor, + AiBatchJobData, + OCR_JOB_TYPES, +} from './ai-batch.processor'; import { EmbeddingService } from '../services/embedding.service'; import { AiRagService } from '../ai-rag.service'; import { Attachment } from '../../../common/file-storage/entities/attachment.entity'; @@ -57,7 +62,10 @@ describe('AiBatchProcessor', () => { }), }; const mockOllamaService = { - getMainModelName: jest.fn().mockReturnValue('gemma4:e4b'), + getMainModelName: jest.fn().mockReturnValue('typhoon2.5-np-dms:latest'), + getOcrModelName: jest.fn().mockReturnValue('typhoon-np-dms-ocr:latest'), + loadModel: jest.fn().mockResolvedValue(true), + unloadModel: jest.fn().mockResolvedValue(true), generate: jest.fn().mockResolvedValue( JSON.stringify({ documentNumber: 'LCBP3-CIV-001', @@ -174,6 +182,49 @@ describe('AiBatchProcessor', () => { attachmentRepo = module.get(getRepositoryToken(Attachment)); jest.clearAllMocks(); }); + it('OCR_JOB_TYPES ควรมี ocr-extract เป็นสมาชิก (ADR-034)', () => { + expect(OCR_JOB_TYPES).toContain('ocr-extract'); + }); + it('ocr-extract: ควร unload main → load OCR (keep_alive:0) → generate → reload main (ADR-034)', async () => { + const job = { + id: 'job-ocr-extract', + data: { + jobType: 'ocr-extract', + documentPublicId: 'doc-ocr-uuid-001', + projectPublicId: 'proj-uuid-456', + payload: { prompt: 'Extract OCR text from this document.' }, + idempotencyKey: 'idem-ocr-001', + }, + } as unknown as Job; + await processor.process(job); + expect(mockOllamaService.unloadModel).toHaveBeenCalledWith( + 'typhoon2.5-np-dms:latest' + ); + expect(mockOllamaService.loadModel).toHaveBeenCalledWith( + 'typhoon-np-dms-ocr:latest', + 0 + ); + expect(mockOllamaService.generate).toHaveBeenCalledWith( + 'Extract OCR text from this document.', + expect.objectContaining({ + model: 'typhoon-np-dms-ocr:latest', + timeoutMs: 120000, + }) + ); + expect(mockOllamaService.loadModel).toHaveBeenCalledWith( + 'typhoon2.5-np-dms:latest', + -1 + ); + expect(mockRedis.setex).toHaveBeenCalledWith( + 'ai:ocr:result:doc-ocr-uuid-001', + 3600, + expect.stringContaining('typhoon-np-dms-ocr:latest') + ); + expect(attachmentRepo.update).toHaveBeenCalledWith( + { publicId: 'doc-ocr-uuid-001' }, + { aiProcessingStatus: 'DONE' } + ); + }); it('ควรสามารถเรียก process embed-document และอัปเดตสถานะใน database', async () => { const job = { id: 'job-embed', diff --git a/backend/src/modules/ai/processors/ai-batch.processor.ts b/backend/src/modules/ai/processors/ai-batch.processor.ts index b672ca07..c7fe5338 100644 --- a/backend/src/modules/ai/processors/ai-batch.processor.ts +++ b/backend/src/modules/ai/processors/ai-batch.processor.ts @@ -9,6 +9,7 @@ // - 2026-05-25: เพิ่ม AiPromptsService เพื่อดึง Dynamic Prompt สำหรับ OCR extraction ใน sandbox และ migration pipeline // - 2026-05-26: แก้ไข bug lockDuration=30000ms ทำให้ sandbox-extract job stall เมื่อ Ollama ใช้เวลา >30s — เพิ่ม lockDuration: 150000 // - 2026-05-28: EC-001 ใช้ findOrSuggestTags เพื่อตรวจจับ Tag ใหม่และบันทึก aiIssues; EC-002 ตรวจสอบ UUID ของผู้ส่ง/ผู้รับ และ Flag เมื่อหาไม่พบ +// - 2026-06-03: ADR-034 — เพิ่ม 'ocr-extract' job type + OCR_JOB_TYPES constant + processOcrExtract() ที่มี model switching logic (unload main → load OCR → generate → reload main) import { Processor, WorkerHost } from '@nestjs/bullmq'; import { Logger } from '@nestjs/common'; @@ -49,6 +50,7 @@ interface MigrateDocumentMetadata extends Record { export type AiBatchJobType = | 'ocr' + | 'ocr-extract' | 'extract-metadata' | 'embed-document' | 'sandbox-rag' @@ -57,6 +59,11 @@ export type AiBatchJobType = | 'sandbox-ai-extract' | 'migrate-document'; +/** รายการ job types ที่ต้องใช้ Typhoon OCR model — จะ trigger model switching (ADR-034) */ +export const OCR_JOB_TYPES: ReadonlyArray = [ + 'ocr-extract', +] as const; + export interface AiBatchJobData { jobType: AiBatchJobType; documentPublicId: string; @@ -177,6 +184,13 @@ export class AiBatchProcessor extends WorkerHost { await this.setAiProcessingStatus(job.data.documentPublicId, 'DONE'); } return; + case 'ocr-extract': + this.logger.log( + `OCR-extract (Typhoon OCR) job processing — jobId=${String(job.id)}` + ); + await this.processOcrExtract(job.data); + await this.setAiProcessingStatus(job.data.documentPublicId, 'DONE'); + return; case 'extract-metadata': this.logger.log( `Metadata extraction job processing — jobId=${String(job.id)}` @@ -296,6 +310,45 @@ export class AiBatchProcessor extends WorkerHost { ); } + /** ประมวลผล ocr-extract job ด้วย Typhoon OCR model — model switching ตาม ADR-034: + * unload main → load OCR (keep_alive:0) → generate OCR → OCR auto-unloads → reload main */ + private async processOcrExtract(data: AiBatchJobData): Promise { + const { documentPublicId, payload } = data; + const mainModel = this.ollamaService.getMainModelName(); + const ocrModel = this.ollamaService.getOcrModelName(); + const prompt = (payload.prompt as string) || ''; + this.logger.log( + `[ModelSwitch] Unloading ${mainModel} — documentPublicId=${documentPublicId}` + ); + await this.ollamaService.unloadModel(mainModel); + this.logger.log(`[ModelSwitch] Loading ${ocrModel} (keep_alive:0)`); + await this.ollamaService.loadModel(ocrModel, 0); + let ocrText = ''; + try { + this.logger.log(`[ModelSwitch] Running OCR extraction with ${ocrModel}`); + ocrText = await this.ollamaService.generate(prompt, { + model: ocrModel, + timeoutMs: 120000, + }); + } finally { + this.logger.log(`[ModelSwitch] Reloading ${mainModel} (keep_alive:-1)`); + await this.ollamaService.loadModel(mainModel, -1); + } + await this.redis.setex( + `ai:ocr:result:${documentPublicId}`, + 3600, + JSON.stringify({ + documentPublicId, + ocrText, + model: ocrModel, + completedAt: new Date().toISOString(), + }) + ); + this.logger.log( + `[ModelSwitch] OCR-extract complete — documentPublicId=${documentPublicId}` + ); + } + /** ประมวลผล sandbox OCR + Metadata extraction โดยไม่บันทึกลง database */ private async processSandboxExtract(data: AiBatchJobData): Promise { const { idempotencyKey, payload, projectPublicId } = data; diff --git a/backend/src/modules/ai/processors/ai-realtime.processor.ts b/backend/src/modules/ai/processors/ai-realtime.processor.ts index dd343a93..3adfd18e 100644 --- a/backend/src/modules/ai/processors/ai-realtime.processor.ts +++ b/backend/src/modules/ai/processors/ai-realtime.processor.ts @@ -1,6 +1,7 @@ // File: src/modules/ai/processors/ai-realtime.processor.ts // Change Log // - 2026-05-15: เพิ่ม processor สำหรับ ai-realtime queue และ pause/resume ai-batch ตาม ADR-023A. +// - 2026-06-03: ADR-034 — เปลี่ยน aiModel ใน audit log จาก hardcode 'gemma4' เป็น ollamaService.getMainModelName() import { Processor, @@ -113,7 +114,7 @@ export class AiRealtimeProcessor extends WorkerHost { await this.aiAuditLogRepo.save( this.aiAuditLogRepo.create({ documentPublicId: job.data.documentPublicId, - aiModel: 'gemma4', + aiModel: this.ollamaService.getMainModelName(), modelName: this.ollamaService.getMainModelName(), aiSuggestionJson: normalizedSuggestion, confidenceScore: this.extractConfidence(normalizedSuggestion), @@ -135,7 +136,7 @@ export class AiRealtimeProcessor extends WorkerHost { await this.aiAuditLogRepo.save( this.aiAuditLogRepo.create({ documentPublicId: job.data.documentPublicId, - aiModel: 'gemma4', + aiModel: this.ollamaService.getMainModelName(), modelName: this.ollamaService.getMainModelName(), processingTimeMs: Date.now() - startTime, status: AiAuditStatus.FAILED, diff --git a/backend/src/modules/ai/services/ollama.service.spec.ts b/backend/src/modules/ai/services/ollama.service.spec.ts new file mode 100644 index 00000000..f29d28c4 --- /dev/null +++ b/backend/src/modules/ai/services/ollama.service.spec.ts @@ -0,0 +1,122 @@ +// File: src/modules/ai/services/ollama.service.spec.ts +// Change Log: +// - 2026-06-03: สร้าง unit test สำหรับ OllamaService ครอบคลุม generate() model option, +// getOcrModelName(), และ loadModel() keepAlive param ตาม ADR-034 + +import { Test, TestingModule } from '@nestjs/testing'; +import { ConfigService } from '@nestjs/config'; +import axios from 'axios'; +import { OllamaService } from './ollama.service'; + +jest.mock('axios'); +const mockedAxios = axios as jest.Mocked; + +describe('OllamaService (ADR-034)', () => { + let service: OllamaService; + const configValues: Record = { + OLLAMA_URL: 'http://localhost:11434', + OLLAMA_MODEL_MAIN: 'typhoon2.5-np-dms:latest', + OLLAMA_MODEL_OCR: 'typhoon-np-dms-ocr:latest', + OLLAMA_MODEL_EMBED: 'nomic-embed-text', + AI_TIMEOUT_MS: 30000, + }; + const mockConfigService = { + get: jest.fn((key: string, defaultValue?: T): T | undefined => { + return (configValues[key] as T | undefined) ?? defaultValue; + }), + }; + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + providers: [ + OllamaService, + { provide: ConfigService, useValue: mockConfigService }, + ], + }).compile(); + service = module.get(OllamaService); + jest.clearAllMocks(); + }); + describe('getMainModelName()', () => { + it('ควรคืน typhoon2.5-np-dms:latest เป็น main model (ADR-034)', () => { + expect(service.getMainModelName()).toBe('typhoon2.5-np-dms:latest'); + }); + }); + describe('getOcrModelName()', () => { + it('ควรคืน typhoon-np-dms-ocr:latest เป็น OCR model (ADR-034)', () => { + expect(service.getOcrModelName()).toBe('typhoon-np-dms-ocr:latest'); + }); + }); + describe('generate()', () => { + it('ควรใช้ mainModel เมื่อ options.model ไม่ได้ระบุ', async () => { + mockedAxios.post = jest + .fn() + .mockResolvedValueOnce({ data: { response: 'test response' } }); + await service.generate('test prompt'); + expect(mockedAxios.post).toHaveBeenCalledWith( + expect.stringContaining('/api/generate'), + expect.objectContaining({ model: 'typhoon2.5-np-dms:latest' }), + expect.anything() + ); + }); + it('ควรใช้ options.model เมื่อระบุ model อื่น (ADR-034 model switching)', async () => { + mockedAxios.post = jest + .fn() + .mockResolvedValueOnce({ data: { response: 'ocr result' } }); + await service.generate('ocr prompt', { + model: 'typhoon-np-dms-ocr:latest', + }); + expect(mockedAxios.post).toHaveBeenCalledWith( + expect.stringContaining('/api/generate'), + expect.objectContaining({ model: 'typhoon-np-dms-ocr:latest' }), + expect.anything() + ); + }); + }); + describe('loadModel()', () => { + it('ควรส่ง keep_alive: -1 เป็น default เมื่อไม่ระบุ keepAlive', async () => { + mockedAxios.get = jest.fn().mockResolvedValueOnce({ + data: { + models: [ + { + name: 'typhoon2.5-np-dms:latest', + model: 'typhoon2.5-np-dms:latest', + }, + ], + }, + }); + mockedAxios.post = jest.fn().mockResolvedValueOnce({ data: {} }); + await service.loadModel('typhoon2.5-np-dms:latest'); + expect(mockedAxios.post).toHaveBeenCalledWith( + expect.stringContaining('/api/generate'), + expect.objectContaining({ keep_alive: -1 }), + expect.anything() + ); + }); + it('ควรส่ง keep_alive: 0 เมื่อ keepAlive=0 (OCR model switching, ADR-034)', async () => { + mockedAxios.get = jest.fn().mockResolvedValueOnce({ + data: { + models: [ + { + name: 'typhoon-np-dms-ocr:latest', + model: 'typhoon-np-dms-ocr:latest', + }, + ], + }, + }); + mockedAxios.post = jest.fn().mockResolvedValueOnce({ data: {} }); + await service.loadModel('typhoon-np-dms-ocr:latest', 0); + expect(mockedAxios.post).toHaveBeenCalledWith( + expect.stringContaining('/api/generate'), + expect.objectContaining({ keep_alive: 0 }), + expect.anything() + ); + }); + it('ควรคืน false เมื่อ model ไม่ได้ติดตั้งใน Ollama', async () => { + mockedAxios.get = jest.fn().mockResolvedValueOnce({ + data: { models: [{ name: 'other-model', model: 'other-model' }] }, + }); + const result = await service.loadModel('typhoon-np-dms-ocr:latest', 0); + expect(result).toBe(false); + expect(mockedAxios.post).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/backend/src/modules/ai/services/ollama.service.ts b/backend/src/modules/ai/services/ollama.service.ts index 2ee21f0d..5593b498 100644 --- a/backend/src/modules/ai/services/ollama.service.ts +++ b/backend/src/modules/ai/services/ollama.service.ts @@ -3,6 +3,7 @@ // - 2026-05-15: เพิ่ม Ollama service สำหรับ ADR-023A 2-model stack. // - 2026-05-21: เพิ่ม checkHealth สำหรับตรวจสอบสุขภาพและความเร็ว (Latency) ของ Ollama // - 2026-06-02: เพิ่ม loadModel() preloading, ดึงจริงจาก /api/ps และเพิ่ม unloadModel() เพื่อล้างหน่วยความจำ GPU/VRAM (ADR-033, Suggestion 1) +// - 2026-06-03: ADR-034 — เปลี่ยน default model เป็น typhoon2.5-np-dms; เพิ่ม ocrModel field, keepAlive param ใน loadModel(), model option ใน OllamaGenerateOptions, getOcrModelName() import { Injectable, Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; @@ -11,6 +12,8 @@ import axios from 'axios'; export interface OllamaGenerateOptions { timeoutMs?: number; signal?: AbortSignal; + /** ชื่อ model ที่ต้องการใช้ — ถ้าไม่ระบุ จะใช้ mainModel เป็นค่าเริ่มต้น (ADR-034) */ + model?: string; } /** บริการเรียก Ollama local-only บน Admin Desktop ตาม ADR-023A */ @@ -19,6 +22,7 @@ export class OllamaService { private readonly logger = new Logger(OllamaService.name); private readonly ollamaUrl: string; private readonly mainModel: string; + private readonly ocrModel: string; private readonly embedModel: string; private readonly timeoutMs: number; @@ -29,7 +33,11 @@ export class OllamaService { ); this.mainModel = this.configService.get( 'OLLAMA_MODEL_MAIN', - 'gemma4:e4b' + 'typhoon2.5-np-dms:latest' + ); + this.ocrModel = this.configService.get( + 'OLLAMA_MODEL_OCR', + 'typhoon-np-dms-ocr:latest' ); this.embedModel = this.configService.get( 'OLLAMA_MODEL_EMBED', @@ -38,7 +46,7 @@ export class OllamaService { this.timeoutMs = this.configService.get('AI_TIMEOUT_MS', 30000); } - /** สร้างข้อความตอบกลับจาก gemma4:e4b หรือค่า ENV ที่กำหนด */ + /** สร้างข้อความตอบกลับด้วย typhoon2.5-np-dms:latest หรือโมเดลที่ระบุใน options.model / ENV */ async generate( prompt: string, options: OllamaGenerateOptions = {} @@ -47,7 +55,7 @@ export class OllamaService { const response = await axios.post<{ response: string }>( `${this.ollamaUrl}/api/generate`, { - model: this.mainModel, + model: options.model ?? this.mainModel, prompt, stream: false, }, @@ -89,6 +97,11 @@ export class OllamaService { return this.mainModel; } + /** คืนชื่อ OCR model สำหรับ model switching ใน BullMQ processor (ADR-034) */ + getOcrModelName(): string { + return this.ocrModel; + } + /** คืนชื่อ embedding model สำหรับ audit log */ getEmbeddingModelName(): string { return this.embedModel; @@ -143,8 +156,13 @@ export class OllamaService { } } - /** โหลดโมเดลล่วงหน้าแบบ Synchronous และตรวจสอบความพร้อมบน Ollama (T007) */ - async loadModel(modelName: string): Promise { + /** โหลดโมเดลเข้า VRAM — ใช้สำหรับ preload และ model switching (ADR-033, ADR-034) + * @param keepAlive ค่า keep_alive: -1 = ค้างใน VRAM ตลอด (main), 0 = unload หลังจบ (OCR) + */ + async loadModel( + modelName: string, + keepAlive?: number | string + ): Promise { try { const tagsResponse = await axios.get<{ models?: Array<{ name: string; model: string }>; @@ -161,7 +179,7 @@ export class OllamaService { return false; } this.logger.log( - `Synchronously pre-loading model ${modelName} into GPU memory...` + `Synchronously pre-loading model ${modelName} into GPU memory (keep_alive=${String(keepAlive ?? -1)})...` ); await axios.post( `${this.ollamaUrl}/api/generate`, @@ -169,9 +187,9 @@ export class OllamaService { model: modelName, prompt: '', stream: false, - keep_alive: -1, + keep_alive: keepAlive ?? -1, }, - { timeout: 30000 } + { timeout: 60000 } ); this.logger.log(`Model ${modelName} pre-loaded successfully`); return true; diff --git a/backend/src/modules/rag/local-llm.service.ts b/backend/src/modules/rag/local-llm.service.ts index b710ae34..1cc9ce14 100644 --- a/backend/src/modules/rag/local-llm.service.ts +++ b/backend/src/modules/rag/local-llm.service.ts @@ -1,6 +1,7 @@ // File: src/modules/rag/local-llm.service.ts // Change Log // - 2026-05-15: แทนที่ cloud LLM API ด้วย Ollama local-only ตาม ADR-023A. +// - 2026-06-03: ADR-034 — เปลี่ยน default fallback จาก gemma4:e4b เป็น typhoon2.5-np-dms:latest import { Injectable, Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; @@ -26,7 +27,10 @@ export class LocalLlmService { ); this.ollamaModel = this.configService.get( 'OLLAMA_MODEL_MAIN', - this.configService.get('OLLAMA_RAG_MODEL', 'gemma4:e4b') + this.configService.get( + 'OLLAMA_RAG_MODEL', + 'typhoon2.5-np-dms:latest' + ) ); this.timeoutMs = this.configService.get('RAG_TIMEOUT_MS', 30000); } diff --git a/frontend/lib/services/admin-ai.service.ts b/frontend/lib/services/admin-ai.service.ts index 9be09d2b..87f6d9c3 100644 --- a/frontend/lib/services/admin-ai.service.ts +++ b/frontend/lib/services/admin-ai.service.ts @@ -9,6 +9,7 @@ // - 2026-05-29: เพิ่ม ocrText, ocrUsed, promptVersionUsed ใน AiSandboxJobResult // - 2026-05-30: เพิ่มเมธอด getOcrEngines และ selectOcrEngine สำหรับจัดการ OCR engines (T017, T018, US1) // - 2026-05-30: เพิ่ม getVramStatus และปรับปรุง getAvailableModels/setActiveModel/addModel ให้เรียกใช้ endpoints ใหม่ที่มี VRAM capacity check (T031-T034, US2) +// - 2026-06-03: ADR-034 — เพิ่ม activeModels field (หลัก+OCR) ใน AiSystemHealth interface // - 2026-06-02: แก้ endpoint getAvailableModels ให้ตรงกับ backend admin route (/ai/admin/models) // - 2026-06-02: normalize VRAM response ให้รองรับ field names จาก backend ปัจจุบันและรูปแบบ loadedModels แบบเดิม @@ -28,6 +29,10 @@ export interface QueueMetrics { } export interface AiSystemHealth { + activeModels?: { + main: string; + ocr: string; + }; ollama: { status: 'HEALTHY' | 'DEGRADED' | 'DOWN'; latencyMs: number; diff --git a/memory/agent-memory.md b/memory/agent-memory.md index 6db37e8a..e6645cb6 100644 --- a/memory/agent-memory.md +++ b/memory/agent-memory.md @@ -13,12 +13,13 @@ - 2026-05-30 (Session 8): OCR Engine Migration — เปลี่ยนจาก PaddleOCR เป็น Tesseract OCR เพื่อแก้ปัญหา SIGILL (Illegal Instruction) บน CPU เก่าที่ไม่รองรับ AVX: อัปเดต requirements.txt (ลบ paddlepaddle/paddleocr, เพิ่ม pytesseract), app.py (เปลี่ยนใช้ pytesseract, OCR_LANG=tha+eng), Dockerfile (ติดตั้ง tesseract-ocr + ภาษาไทย/อังกฤษ), docker-compose.yml (OCR_LANG=tha+eng, ลบ paddleocr_models volume), backend ocr.service.ts (เปลี่ยน comment/error message), frontend OcrSandboxPromptManager.tsx (เปลี่ยน Badge text) - 2026-05-30 (Session 10): OCR Sandbox Two-Step Flow (ADR-030/231) — แยก OCR Sandbox เป็น 2 steps: Step 1 OCR-only → Step 2 AI Extraction. Backend: เพิ่ม job types sandbox-ocr-only และ sandbox-ai-extract, processors processSandboxOcrOnly/processSandboxAiExtract, endpoints POST /ai/admin/sandbox/ocr และ /ai/admin/sandbox/ai-extract, method findByVersion ใน AiPromptsService. Frontend: เพิ่ม methods submitSandboxOcr/submitSandboxAiExtract ใน adminAiService, refactor OcrSandboxPromptManager.tsx ให้มี 2-step UI พร้อม states sandboxStep/ocrResult/selectedPromptVersion, handlers handleStep1Ocr/handleStep2AiExtract/handleResetSandbox. Schema Fix: สร้าง delta SQL 2026-05-30-add-ai-prompts-publicId.sql เพื่อเพิ่ม publicId column ใน ai_prompts table (ADR-019 compliance). - 2026-05-30 (Session 11): Typhoon OCR & LLM Integration (ADR-032) — พัฒนาการใช้งานโมเดลภาษาไทยผสมอังกฤษ Typhoon OCR-3B ร่วมกับ Tesseract OCR แบบ Dynamic พร้อมระบบ caching 24 ชม., VRAM Monitor ป้องกัน GPU OOM และระบบ fallback 5s เมื่อโมเดลมีปัญหา และการสลับและบริหารจัดการ LLM โมเดลหลักแบบ Dynamic ในระบบ AI Model Management ของ Next.js frontend +- 2026-06-03: Thai-Optimized AI Model Stack (ADR-034) — เปลี่ยนโมเดลหลักเป็น `typhoon2.5-np-dms:latest` + `typhoon-np-dms-ocr:latest` (สำหรับ OCR, keep_alive:0); เพิ่ม model switching logic ใน ai-batch processor; เพิ่ม static constants ใน AiSettingsService; สร้าง SQL delta สำหรับ ai_available_models --> # 🧠 Agent Long-term Project Memory > **Project:** NAP-DMS (LCBP3) — Laem Chabang Port Phase 3 Document Management System -> **Version:** 1.9.8 (Last Synced: 2026-05-30) +> **Version:** 1.9.9 (Last Synced: 2026-06-03) > **Stack:** NestJS 11 + Next.js 16 + TypeScript + MariaDB 11.8 + Redis + BullMQ + Elasticsearch + Ollama (on-prem AI) > [!IMPORTANT] @@ -71,7 +72,7 @@ - **Ollama (AI Inference) ต้องทำงานบน Admin Desktop เท่านั้น** ห้ามรันบน Server หรือ Docker ใน Production - AI ห้ามเชื่อมต่อและเข้าถึง Database หรือ Storage โดยตรง (ต้องผ่าน DMS API เท่านั้น) -- โมเดลที่ใช้: `gemma4:e4b Q8_0` (LLM) และ `nomic-embed-text` (Embeddings) +- โมเดลที่ใช้: `typhoon2.5-np-dms:latest` (Main LLM, ADR-034) + `typhoon-np-dms-ocr:latest` (OCR, keep_alive:0) + `nomic-embed-text` (Embeddings) - การทำงานแบบ Background Job หรือ Inference ที่ใช้เวลานานต้องสั่งงานผ่าน **BullMQ** (คิว `ai-realtime` และ `ai-batch`) - ข้อมูลผลลัพธ์จาก AI ทั้งหมดต้องผ่านการตรวจสอบความถูกต้องโดยมนุษย์ (Human-in-the-loop) เสมอ @@ -162,7 +163,7 @@ docker compose ps # Check status | D7 | UUID Strategy: `publicId` (UUIDv7) เท่านั้นสำหรับ Public API — INT PK ต้อง `@Exclude()` | ADR-019 | | D8 | Schema changes: แก้ SQL โดยตรง + เพิ่ม `deltas/*.sql` — ห้ามใช้ TypeORM migration files | ADR-009 | | D9 | Qdrant search ต้องส่ง `projectPublicId` เป็น mandatory parameter ทุกครั้ง (compile-time) | ADR-023A | -| D10 | AI model stack: `gemma4:e4b Q8_0` (LLM) + `nomic-embed-text` (Embeddings) on Admin Desktop | ADR-023A | +| D10 | AI model stack: `typhoon2.5-np-dms:latest` (Main LLM) + `typhoon-np-dms-ocr:latest` (OCR, keep_alive:0) + `nomic-embed-text` (Embeddings) on Admin Desktop (ADR-034, supersedes ADR-023A §2.1) | ADR-034 | --- @@ -193,7 +194,7 @@ docker compose ps # Check status | **Frontend** | `http://localhost:3000` | QNAP `192.168.10.8` | Next.js | | **MariaDB** | `localhost:3307` | QNAP internal | DB: `lcbp3`, root via docker | | **Redis** | `localhost:6379` | QNAP internal | BullMQ + session store | -| **Ollama** | `http://192.168.10.100:11434` | Admin Desktop (Desk-5439) | gemma4:e2b/e4b, typhoon2.1-gemma3-4b + nomic-embed-text | +| **Ollama** | `http://192.168.10.100:11434` | Admin Desktop (Desk-5439) | typhoon2.5-np-dms:latest (main) + typhoon-np-dms-ocr:latest (OCR) + nomic-embed-text | | **Qdrant** | `http://localhost:6333` | Admin Desktop (Desk-5439) | Vector DB — requires projectPublicId | | **OCR Sidecar** | `http://192.168.10.100:8765` | Admin Desktop (Desk-5439) | Dynamic (Tesseract tha+eng / Typhoon OCR-3B) | | **Gitea** | `https://git.np-dms.work` | QNAP `192.168.10.8` | Source + CI/CD | diff --git a/specs/03-Data-and-Storage/deltas/2026-06-03-update-ai-available-models-typhoon.rollback.sql b/specs/03-Data-and-Storage/deltas/2026-06-03-update-ai-available-models-typhoon.rollback.sql new file mode 100644 index 00000000..def17572 --- /dev/null +++ b/specs/03-Data-and-Storage/deltas/2026-06-03-update-ai-available-models-typhoon.rollback.sql @@ -0,0 +1,18 @@ +-- Rollback: Revert ai_available_models to gemma4 stack (undo ADR-034 delta) +-- Date: 2026-06-03 +-- Pair: 2026-06-03-update-ai-available-models-typhoon.sql + +-- 1. Remove Typhoon models +DELETE FROM ai_available_models +WHERE model_name IN ('typhoon2.5-np-dms:latest', 'typhoon-np-dms-ocr:latest'); + +-- 2. Restore gemma4:e2b as default +UPDATE ai_available_models +SET is_default = TRUE, updated_at = NOW() +WHERE model_name = 'gemma4:e2b'; + +-- 3. Revert system_settings active model +UPDATE system_settings +SET setting_value = 'gemma4:e2b', + updated_at = NOW() +WHERE setting_key = 'AI_ACTIVE_MODEL'; diff --git a/specs/03-Data-and-Storage/deltas/2026-06-03-update-ai-available-models-typhoon.sql b/specs/03-Data-and-Storage/deltas/2026-06-03-update-ai-available-models-typhoon.sql new file mode 100644 index 00000000..5fe84c63 --- /dev/null +++ b/specs/03-Data-and-Storage/deltas/2026-06-03-update-ai-available-models-typhoon.sql @@ -0,0 +1,49 @@ +-- Delta: Update ai_available_models for Thai-Optimized Model Stack (ADR-034) +-- Date: 2026-06-03 +-- Author: AI Assistant +-- Related: ADR-034 — Thai-Optimized AI Model Stack, supersedes ADR-023A Section 2.1 +-- Rollback: 2026-06-03-update-ai-available-models-typhoon.rollback.sql + +-- 1. Insert new main model (typhoon2.5-np-dms) as default, demote old defaults +INSERT INTO ai_available_models (model_name, model_version, description, vram_gb, is_active, is_default) +VALUES ( + 'typhoon2.5-np-dms:latest', + 'latest', + 'Thai-optimized main AI model based on typhoon2.5-qwen3-4b (~2.5GB VRAM, standby mode) — ADR-034', + 2.50, + TRUE, + TRUE +) +ON DUPLICATE KEY UPDATE + description = VALUES(description), + vram_gb = VALUES(vram_gb), + is_active = TRUE, + is_default = TRUE, + updated_at = NOW(); + +-- Demote old gemma4 models from default status +UPDATE ai_available_models +SET is_default = FALSE, updated_at = NOW() +WHERE model_name IN ('gemma4:e2b', 'gemma4:e4b', 'typhoon2.1-gemma3-4b'); + +-- 2. Insert OCR model (typhoon-np-dms-ocr) — not default, keep_alive=0 (unload after each job) +INSERT INTO ai_available_models (model_name, model_version, description, vram_gb, is_active, is_default) +VALUES ( + 'typhoon-np-dms-ocr:latest', + 'latest', + 'Thai OCR model based on typhoon-ocr1.5-3b (~3.2GB VRAM, unloads after each job) — ADR-034', + 3.20, + TRUE, + FALSE +) +ON DUPLICATE KEY UPDATE + description = VALUES(description), + vram_gb = VALUES(vram_gb), + is_active = TRUE, + updated_at = NOW(); + +-- 3. Update active model in system_settings to typhoon2.5-np-dms:latest +UPDATE system_settings +SET setting_value = 'typhoon2.5-np-dms:latest', + updated_at = NOW() +WHERE setting_key = 'AI_ACTIVE_MODEL'; diff --git a/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/typhoon-np-dms-ocr.model.md b/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/typhoon-np-dms-ocr.model.md new file mode 100644 index 00000000..6ff1202e --- /dev/null +++ b/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/typhoon-np-dms-ocr.model.md @@ -0,0 +1,28 @@ +FROM scb10x/typhoon-ocr1.5-3b:latest + +PARAMETER num_ctx 8192 +PARAMETER num_predict 4096 +PARAMETER temperature 0.1 +PARAMETER top_p 0.1 +PARAMETER repeat_penalty 1.1 + +SYSTEM """You are an expert in structuring Thai documents. +Extract the information from the image in the most correct and organized format. + +Instructions: +- Return ONLY clean Markdown output. +- Include ALL information visible on the page. +- Preserve document structure and hierarchy. +- Do NOT add explanations or interpretations. + +Formatting Rules: +- Tables: Render tables using ...
in clean HTML format. +- Equations: Render equations using LaTeX syntax with inline ($...$) and block ($$...$$). +- Images/Charts/Diagrams: Wrap any clearly defined visual areas in: +
+Describe the image's main elements, note contextual clues, mention visible text and meaning. Describe in Thai. +
+- Page Numbers: Wrap page numbers in .... +- Checkboxes: Use ☐ for unchecked and ☑ for checked boxes. +- Signatures/Stamps: Describe location and context +- Unclear text: [unclear: context description]""" diff --git a/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/typhoon2.5-np-dms.model.md b/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/typhoon2.5-np-dms.model.md new file mode 100644 index 00000000..2604d628 --- /dev/null +++ b/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/typhoon2.5-np-dms.model.md @@ -0,0 +1,19 @@ +FROM scb10x/typhoon2.5-qwen3-4b:latest + +PARAMETER num_ctx 8192 +PARAMETER num_predict 2048 +PARAMETER temperature 0.1 +PARAMETER top_p 0.85 +PARAMETER repeat_penalty 1.15 +PARAMETER stop "\n\n" + +SYSTEM """คุณคือระบบ AI ผู้เชี่ยวชาญด้านการวิเคราะห์และจัดการเอกสารโครงการ (Document Management System) +หน้าที่ของคุณคืออ่านข้อความภาษาไทยที่ได้มาจากระบบ OCR อย่างละเอียด แล้วทำตามคำสั่งต่อไปนี้อย่างเคร่งครัด: +Guidelines: +1. ข้อมูลเข้าคือข้อความดิบจาก OCR ซึ่งอาจมีคำผิด บรรทัดขาดหาย หรือสัญลักษณ์รบกวน +2. ค้นหาและสกัด 'เลขที่เอกสาร' (Document Number) และ 'วันที่ของเอกสาร' ออกมาให้ถูกต้อง หากไม่พบให้ระบุว่า 'ไม่ระบุ' +3. สรุปเนื้อหาสำคัญของเอกสารนี้อย่างกระชับ เข้าใจง่าย โดยใช้บริบทโดยรวมในการตีความ หากไม่แน่ใจให้ระบุสถานะ "ไม่ชัดเจน" +4. ห้ามสร้างข้อมูล (hallucinate) ที่ไม่มีอยู่ในข้อความต้นฉบับ +5. ห้ามเดาตัวเลข วันที่ หรือเนื้อหาใดๆ ที่ไม่ได้ปรากฏอยู่ในข้อความดิบเด็ดขาด +6. หากข้อมูลไม่ครบ ให้เติม null พร้อมระบุ reason ในฟิลด์ _missing_fields +ตอบกลับเฉพาะ JSON ที่กำหนดเท่านั้น ห้ามเพิ่มข้อความนอกโครงสร้าง""” diff --git a/specs/06-Decision-Records/ADR-023A-unified-ai-architecture.md b/specs/06-Decision-Records/ADR-023A-unified-ai-architecture.md index a3f289f0..d4b9e215 100644 --- a/specs/06-Decision-Records/ADR-023A-unified-ai-architecture.md +++ b/specs/06-Decision-Records/ADR-023A-unified-ai-architecture.md @@ -150,7 +150,8 @@ graph TB | ADR | Version | Dependency Type | Affected Version(s) | Implementation Status | |-----|---------|-----------------|---------------------|----------------------| -| **ADR-023A** | 1.2 | Model Revision | v1.9.0+ | ✅ Active | +| **ADR-034** | 1.0 | Model Stack Revision | v1.9.0+ | ✅ Active (supersedes 023A Section 2.1) | +| **ADR-023A** | 1.2 | Model Revision | v1.9.0+ | ✅ Active (Section 2.1 superseded by ADR-034) | | **ADR-023** | 1.1 | Base Architecture | v1.9.0+ | ✅ Active (superseded by 023A for model config) | | **ADR-016** | 2.0 | Governs | v1.8.0+ | ✅ Active | | **ADR-019** | 1.5 | Governs | v1.8.0+ | ✅ Active | @@ -181,14 +182,16 @@ graph TB #### 2.1 Model Stack & Dynamic Thai-Specialized Models (T041, US2, US3) +> ⚠️ **Update 2026-06-03:** Section นี้ถูก **superseded โดย [ADR-034](./ADR-034-AI-model-change.md)** — โมเดลหลักเปลี่ยนจาก `gemma4:e2b` เป็น `typhoon2.5-np-dms:latest` (Thai-optimized) พร้อม OCR model `typhoon-np-dms-ocr:latest` + ระบบประมวลผลพื้นฐานจะรันด้วยชุด 2-Model Stack ที่ประหยัด VRAM เป็นหลัก และเปิดให้โหลดสลับไปประมวลผลด้วยโมเดลภาษาไทยเฉพาะทางประสิทธิภาพสูง (High-Performance Thai Specialized Models) ได้แบบ Dynamic ภายใต้การควบคุมของ VRAM Monitor เพื่อไม่ให้เกิด VRAM OOM: -##### ชุดประมวลผลหลัก (Baseline 2-Model Stack): +##### ชุดประมวลผลหลัก (Baseline 2-Model Stack) — Superseded by ADR-034: | โมเดล | Role | VRAM (โดยประมาณ) | หมายเหตุ | |-------|------|-----------------|---------| -| `gemma4:e2b` | General Inference + OCR Post-processing + Extraction + RAG Q&A | ~2GB (Q4) + ~0.2GB (KV Cache) | Q4 quantization; Context window 8K tokens; Parameters 2.1B | -| `nomic-embed-text` | Embedding 768-dim → Qdrant | ~0.3GB | สร้าง Semantic Vector สำหรับ Hybrid Search | +| `gemma4:e2b` | ~~General Inference + OCR Post-processing + Extraction + RAG Q&A~~ | ~2GB (Q4) | ❌ ถูกแทนที่โดย `typhoon2.5-np-dms` (ADR-034) | +| `nomic-embed-text` | Embedding 768-dim → Qdrant | ~0.3GB | ✅ ยังใช้อยู่ | | **รวม (peak)** | | **~2.5GB** | **เผื่อ headroom ~5.5GB — มั่นใจสูง เพราะ context window ขนาดใหญ่ (8K tokens)** | ##### โมเดลภาษาไทยเฉพาะทางที่เป็นทางเลือก (Dynamic Thai Specialized Models): diff --git a/specs/06-Decision-Records/ADR-034-AI-model-change.md b/specs/06-Decision-Records/ADR-034-AI-model-change.md new file mode 100644 index 00000000..7b7d7576 --- /dev/null +++ b/specs/06-Decision-Records/ADR-034-AI-model-change.md @@ -0,0 +1,236 @@ +# ADR-034: AI Model Change — Thai-Optimized Model Stack + +**Status:** Accepted +**Date:** 2026-06-03 +**Decision Makers:** Development Team, AI Integration Lead +**Supersedes:** ADR-023A Section 2.1 (Model Stack & Configuration) +**Related Documents:** +- [ADR-023A: Unified AI Architecture — Model Revision](./ADR-023A-unified-ai-architecture.md) +- [ADR-033: Active Model & OCR Management](./ADR-033-active-model-and-ocr-management.md) +- [CONTEXT.md](../../../CONTEXT.md) + +--- + +## Context and Problem Statement + +การใช้งาน `gemma4:e2b` (~2GB) เป็นโมเดลหลักในสภาพแวดล้อมภาษาไทย พบว่าประสิทธิภาพด้าน OCR และการสกัดข้อมูลจากเอกสารภาษาไทยยังไม่เพียงพอ จึงต้องเปลี่ยนเป็นโมเดลที่ถูก fine-tune มาสำหรับภาษาไทยโดยเฉพาะ + +**ข้อจำกัด:** +- VRAM Budget: RTX 2060 Super 8GB +- Main Model + OCR Model ไม่สามารถโหลดพร้อมกันได้ (รวม ~5.7GB ขณะประมวลผล แต่ peak อาจเกิน 8GB) +- ต้องรักษา mechanism `keep_alive` และ VRAM monitoring ตาม ADR-033 + +--- + +## Decision Drivers + +- **Thai Language Optimization:** โมเดลต้องรองรับ OCR และการสกัดข้อมูลภาษาไทยได้ดีกว่า gemma4 +- **VRAM Safety:** ไม่เกิน 8GB ในทุกสถานการณ์ +- **Model Switching:** ใช้ BullMQ processor ควบคุมการสลับโมเดลเท่านั้น +- **No Direct n8n Access:** n8n ห้ามเรียก Ollama โดยตรง ต้องผ่าน DMS API → BullMQ + +--- + +## Decision Outcome + +### Selected Models + +| Model | Role | Base Model | Size | Keep-Alive | +|-------|------|------------|------|------------| +| `typhoon2.5-np-dms:latest` | Main AI (General + OCR Post-processing + Extraction + RAG Q&A) | `scb10x/typhoon2.5-qwen3-4b:latest` | ~2.5GB | Stand by ตลอด (ไม่ใช่ 0) | +| `typhoon-np-dms-ocr:latest` | OCR ภาษาไทย | `scb10x/typhoon-ocr1.5-3b:latest` | ~3.2GB | `0` (unload ทันที) | + +### Key Parameters (Main Model) + +``` +PARAMETER num_ctx 8192 +PARAMETER num_predict 2048 +PARAMETER temperature 0.1 +PARAMETER top_p 0.85 +PARAMETER repeat_penalty 1.15 +``` + +--- + +## Implementation Details + +### 1. Model Files (Desk-5439) + +--- + +file: E:\np-dms\lcbp3\specs\04-Infrastructure-OPS\04-00-docker-compose\Desk-5439\typhoon2.5-np-dms.model.md +```t +# ollama create typhoon2.5-np-dms -f ./typhoon2.5-np-dms.model.md + +FROM scb10x/typhoon2.5-qwen3-4b:latest + +# 1. ปรับขนาดพื้นที่ประมวลผลข้อความ (Context Window) +# ตั้งไว้ที่ 16K ถึง 32K ถือว่าเหลือเฟือมากสำหรับเอกสารข้อความ OCR หลายสิบหน้า +PARAMETER num_ctx 8192 +PARAMETER num_predict 2048 +# 2. ปรับความนิ่งของคำตอบ (Determinism) +# บีบให้เป็น 0 เพื่อป้องกันโมเดล "คิดแทน" หรือเดาตัวเลข/วันที่ขึ้นมาเอง (สำคัญมากสำหรับเลขที่เอกสาร) +PARAMETER temperature 0.1 +PARAMETER top_p 0.85 +PARAMETER repeat_penalty 1.15 +PARAMETER stop "\n\n" + +# 3. ล็อกบทบาทและโครงสร้างผลลัพธ์ที่ต้องการ (System Prompt) +SYSTEM """คุณคือระบบ AI ผู้เชี่ยวชาญด้านการวิเคราะห์และจัดการเอกสารโครงการ (Document Management System) +หน้าที่ของคุณคืออ่านข้อความภาษาไทยที่ได้มาจากระบบ OCR อย่างละเอียด แล้วทำตามคำสั่งต่อไปนี้อย่างเคร่งครัด: +Guidelines: +1. ข้อมูลเข้าคือข้อความดิบจาก OCR ซึ่งอาจมีคำผิด บรรทัดขาดหาย หรือสัญลักษณ์รบกวน +2. ค้นหาและสกัด 'เลขที่เอกสาร' (Document Number) และ 'วันที่ของเอกสาร' ออกมาให้ถูกต้อง หากไม่พบให้ระบุว่า 'ไม่ระบุ' +3. สรุปเนื้อหาสำคัญของเอกสารนี้อย่างกระชับ เข้าใจง่าย โดยใช้บริบทโดยรวมในการตีความ หากไม่แน่ใจให้ระบุสถานะ "ไม่ชัดเจน" +4. ห้ามสร้างข้อมูล (hallucinate) ที่ไม่มีอยู่ในข้อความต้นฉบับ +5. ห้ามเดาตัวเลข วันที่ หรือเนื้อหาใดๆ ที่ไม่ได้ปรากฏอยู่ในข้อความดิบเด็ดขาด +6. หากข้อมูลไม่ครบ ให้เติม null พร้อมระบุ reason ในฟิลด์ _missing_fields +ตอบกลับเฉพาะ JSON ที่กำหนดเท่านั้น ห้ามเพิ่มข้อความนอกโครงสร้าง +""” +``` + +--- +file: E:\np-dms\lcbp3\specs\04-Infrastructure-OPS\04-00-docker-compose\Desk-5439\typhoon-np-dms-ocr.model.md +```t +# ollama create typhoon-np-dms-ocr -f ./typhoon-np-dms-ocr.model.md + +# ใส่ชื่อ tag โมเดล 3B ที่คุณต้องการจูนตรงนี้ได้เลย +FROM scb10x/typhoon-ocr1.5-3b:latest + +# ลดจาก 125k → 8k เพื่อประหยัด และ ล็อกให้ตัวโมเดล + KV Cache กิน VRAM ไม่เกิน 5GB (เหลือโควตาให้ Windows อีก 3GB) +PARAMETER num_ctx 8192 +PARAMETER num_predict 4096 + +# งานดึงข้อความจากภาพสแกน สามารถปรับค่า temperature เป็น 0 เพื่อลดการเดา/มโนคำภาษาไทย +PARAMETER temperature 0.1 +PARAMETER top_p 0.1 + +# ป้องกันไม่ให้โมเดลพิมพ์อักษรซ้ำซากเวลาเจอจุดที่ภาพเบลอหรือรอยเปื้อนบนกระดาษ +PARAMETER repeat_penalty 1.1 + +# ใส่คำสั่งหลักเพื่อให้โมเดลเข้าใจบทบาทและรูปแบบผลลัพธ์ที่ต้องการ (ตัวอย่าง) +SYSTEM """You are an expert in structuring Thai documents. +Extract the information from the image in the most correct and organized format. + +Instructions: +- Return ONLY clean Markdown output. +- Include ALL information visible on the page. +- Preserve document structure and hierarchy. +- Do NOT add explanations or interpretations. + +Formatting Rules: +- Tables: Render tables using ...
in clean HTML format. +- Equations: Render equations using LaTeX syntax with inline ($...$) and block ($$...$$). +- Images/Charts/Diagrams: Wrap any clearly defined visual areas in: +
+Describe the image's main elements, note contextual clues, mention visible text and meaning. Describe in Thai. +
+- Page Numbers: Wrap page numbers in .... +- Checkboxes: Use ☐ for unchecked and ☑ for checked boxes. +- Signatures/Stamps: Describe location and context +- Unclear text: [unclear: context description] +""" +``` + +--- + +### 2. Model Switching Logic (BullMQ Processor) + +```typescript +// Pseudo-code for BullMQ processor (ai-batch queue) +async function processJob(job: Job) { + const { jobType, documentId } = job.data; + + if (jobType === 'ocr-extract') { + // OCR job: unload main, load OCR, process, unload OCR + await ollama.unloadModel('typhoon2.5-np-dms'); + await ollama.loadModel('typhoon-np-dms-ocr', { keep_alive: 0 }); + const result = await ollama.generate('typhoon-np-dms-ocr', prompt); + // keep_alive: 0 จะ unload อัตโนมัติหลังเสร็จ + + // โหลด main model กลับเข้า VRAM สำหรับงานถัดไป + await ollama.loadModel('typhoon2.5-np-dms'); + return result; + } + + // Main model jobs: extraction, rag-query, ai-suggest + const result = await ollama.generate('typhoon2.5-np-dms', prompt); + return result; +} +``` + +**กฎ:** +- **n8n ห้ามเรียก Ollama โดยตรง** — ต้องผ่าน `POST /api/ai/jobs` → BullMQ เท่านั้น +- **BullMQ concurrency = 1** — ป้องกัน VRAM overflow +- **Cold start OCR:** 30-60 วินาทีต่อ job ยอมรับได้ +- **OCR job ซ้อนกัน 3-5 งาน:** รวม 2-5 นาที ยอมรับได้ + +--- + +### 3. Code Changes + +**ไฟล์ที่ต้องแก้ไข:** + +| File | Change | +|------|--------| +| `backend/src/modules/ai/services/ai-settings.service.ts` | Hardcode `DEFAULT_MODEL = 'typhoon2.5-np-dms:latest'` | +| `backend/src/modules/ai/services/ollama.service.ts` | เพิ่ม method `unloadModel()` และ `loadModel()` สำหรับ switching | +| `backend/src/modules/ai/processors/ai-batch.processor.ts` | Implement switching logic ตาม pseudo-code ด้านบน | + +**Note:** ไม่ต้อง update `ai_settings` table — ใช้ hardcode value เพื่อความเร็วในการ deploy + +--- + +### 4. Migration Plan + +**ขั้นตอนการ deploy:** + +1. **Desk-5439:** สร้าง custom models บน Ollama + ```bash + cd /path/to/model/files + ollama create typhoon2.5-np-dms -f ./typhoon2.5-np-dms.model.md + ollama create typhoon-np-dms-ocr -f ./typhoon-np-dms-ocr.model.md + ``` + +2. **QNAP Backend:** Deploy ด้วย code changes (ADR-033 mechanism ยังคงใช้ได้) + +3. **Verification:** + - Test OCR job → ตรวจสอบว่า unload/load ทำงานถูกต้อง + - Test main model job → ตรวจสอบว่า main model พร้อมใช้หลัง OCR + +--- + +### 5. Rollback Strategy + +**ไม่มี automatic rollback mechanism** + +หากพบปัญหา: +1. สร้าง custom model ใหม่จาก base model ตัวอื่น (เช่น กลับไป `gemma4:e2b`) +2. หรือแก้ไข `typhoon2.5-np-dms.model.md` แล้วสร้าง version ใหม่ (`:v2`) +3. Update code ให้ชี้ไป model ใหม่ แล้ว redeploy + +--- + +## Impact on Related ADRs + +| ADR | Section | Impact | +|-----|---------|--------| +| **ADR-023A** | Section 2.1 Model Stack | Superseded by ADR-034 — model config ใช้ค่าจากนี้ | +| **ADR-033** | VRAM Monitor + Model Switching | ยังใช้ได้ — mechanism เดิม เปลี่ยนแค่ชื่อ model | +| **ADR-032** | Typhoon OCR Integration | OCR model ถูกแทนที่โดย `typhoon-np-dms-ocr` | + +--- + +## Glossary Updates (CONTEXT.md) + +เพิ่มคำศัพท์ใหม่: + +| Term | Definition | +|------|------------| +| **Thai-Optimized Model** | โมเดล AI ที่ถูก fine-tune มาสำหรับภาษาไทย (เช่น Typhoon series) | +| **Model Unload/Load** | กระบวนการยกเลิกโหลดโมเดลจาก VRAM และโหลดโมเดลใหม่เข้าไป | +| **Cold Start Penalty** | ความล่าช้า 5-15 วินาทีจากการโหลดโมเดล weights เข้า VRAM | + +--- + +**สำหรับ Implementation:** ดูไฟล์ใน `specs/100-Infrastructures/134-AI-model-change` (สร้างเมื่อเริ่ม implement) diff --git a/specs/100-Infrastructures/134-ai-model-change/checklists/requirements.md b/specs/100-Infrastructures/134-ai-model-change/checklists/requirements.md new file mode 100644 index 00000000..bcf3395f --- /dev/null +++ b/specs/100-Infrastructures/134-ai-model-change/checklists/requirements.md @@ -0,0 +1,36 @@ +# Specification Quality Checklist: Thai-Optimized AI Model Stack + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-06-03 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded (nomic-embed-text excluded; n8n boundary explicit) +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows (OCR, main AI, health check) +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- ADR-034 is the authoritative source; spec captures user-value perspective +- SC-001 OCR quality metric (>90% readable) is aspirational; verify with real docs post-deploy +- A1 (internet access on Desk-5439) should be confirmed before deploy diff --git a/specs/100-Infrastructures/134-ai-model-change/contracts/ollama-service-methods.md b/specs/100-Infrastructures/134-ai-model-change/contracts/ollama-service-methods.md new file mode 100644 index 00000000..b8ed6790 --- /dev/null +++ b/specs/100-Infrastructures/134-ai-model-change/contracts/ollama-service-methods.md @@ -0,0 +1,142 @@ +// File: specs/100-Infrastructures/134-ai-model-change/contracts/ollama-service-methods.md +// Change Log: +// - 2026-06-03: API contracts for OllamaService model management methods + +# Contract: OllamaService Model Management Methods + +--- + +## Method: unloadModel + +**Signature**: `async unloadModel(modelName: string): Promise` +**Location**: `backend/src/modules/ai/services/ollama.service.ts` +**Purpose**: Force unload a model from Ollama VRAM + +### Ollama API Call + +```http +POST http://{OLLAMA_BASE_URL}/api/generate +Content-Type: application/json + +{ + "model": "{modelName}", + "prompt": "", + "keep_alive": 0 +} +``` + +### Success Behavior + +- HTTP 200 → model unloaded from VRAM +- Model already unloaded / not found → log warn; no-op (not fatal) + +### Error Handling + +| Error | Behavior | +|-------|----------| +| HTTP timeout (> 5s) | Throw `Error('Failed to unload model {modelName}: timeout')` | +| Network error | Throw `Error('Failed to unload model {modelName}: {axiosError.message}')` | +| HTTP 404 (model not found) | Log warn; resolve without throwing | + +--- + +## Method: loadModel + +**Signature**: `async loadModel(modelName: string, keepAlive?: number | string): Promise` +**Location**: `backend/src/modules/ai/services/ollama.service.ts` +**Purpose**: Load / warm a model into Ollama VRAM + +### Ollama API Call + +```http +POST http://{OLLAMA_BASE_URL}/api/generate +Content-Type: application/json + +{ + "model": "{modelName}", + "prompt": "", + "keep_alive": {keepAlive ?? -1} +} +``` + +### keepAlive Values + +| Value | Behavior | +|-------|----------| +| `-1` | Model stays in VRAM indefinitely (main model) | +| `0` | Model unloads after request completes (OCR model) | +| `"5m"` | Model unloads after 5 minutes (alternative) | + +### Success Behavior + +- HTTP 200 → model loaded into VRAM + +### Error Handling + +| Error | Behavior | +|-------|----------| +| HTTP timeout (> 60s, cold start) | Throw `Error('Failed to load model {modelName}: timeout (cold start may take up to 60s)')` | +| HTTP 404 (model not found) | Throw `Error('Model {modelName} not found on Ollama. Run: ollama create {modelName} -f ./{modelName}.model.md')` | +| Network error | Throw `Error('Failed to load model {modelName}: {axiosError.message}')` | + +--- + +## BullMQ Processor: Model Switching Contract + +**Location**: `backend/src/modules/ai/processors/ai-batch.processor.ts` + +### OCR Job Types Constant + +```typescript +const OCR_JOB_TYPES: string[] = ['ocr-extract', 'sandbox-ocr-only']; +``` + +### Switching Sequence (Pseudocode) + +``` +processJob(job: Job): + if job.data.jobType ∈ OCR_JOB_TYPES: + log.log(`[ModelSwitch] Unloading ${DEFAULT_MODEL}`) + await ollamaService.unloadModel(DEFAULT_MODEL) // free ~2.5GB + + log.log(`[ModelSwitch] Loading ${OCR_MODEL} (keep_alive: 0)`) + await ollamaService.loadModel(OCR_MODEL, 0) // load ~3.2GB + + result = await runOcrJob(job) + // OCR model auto-unloads via keep_alive: 0 + + log.log(`[ModelSwitch] Reloading ${DEFAULT_MODEL} (keep_alive: -1)`) + await ollamaService.loadModel(DEFAULT_MODEL, -1) // restore main model + + return result + else: + return await runMainModelJob(job) +``` + +### Concurrency Constraint + +- BullMQ concurrency = 1 (ป้องกัน VRAM overflow) +- ไม่มี parallel OCR + main jobs +- ถ้า job fail ระหว่าง switching → BullMQ retry จาก ต้นใหม่ทั้ง sequence + +--- + +## Health Response Contract Update + +**Endpoint**: `GET /api/ai/health` +**Location**: `backend/src/modules/ai/ai.service.ts` + +### Updated Response Shape + +```typescript +interface SystemHealthResponse { + ollama: { + status: 'HEALTHY' | 'DOWN'; + mainModel: string; // 'typhoon2.5-np-dms:latest' + ocrModel: string; // 'typhoon-np-dms-ocr:latest' + latencyMs?: number; + }; + ocr: OcrHealthResult; + // ... existing fields +} +``` diff --git a/specs/100-Infrastructures/134-ai-model-change/data-model.md b/specs/100-Infrastructures/134-ai-model-change/data-model.md new file mode 100644 index 00000000..d8e45ab3 --- /dev/null +++ b/specs/100-Infrastructures/134-ai-model-change/data-model.md @@ -0,0 +1,116 @@ +// File: specs/100-Infrastructures/134-ai-model-change/data-model.md +// Change Log: +// - 2026-06-03: Data model for Thai-Optimized AI Model Stack + +# Data Model: Thai-Optimized AI Model Stack + +--- + +## Database Schema Changes + +**ไม่มี new tables หรือ column changes** — ADR-009 compliant; ไม่มี TypeORM migration + +### Optional: ai_available_models Seed Update + +ตาราง `ai_available_models` (จาก ADR-027 Session 6) ควร update เพื่อความสอดคล้อง: + +**Update existing main model record:** + +```sql +UPDATE ai_available_models +SET model_name = 'typhoon2.5-np-dms:latest', + display_name = 'Typhoon 2.5 NP-DMS (Thai)', + updated_at = NOW() +WHERE model_type = 'main' AND is_active = 1; +``` + +**Insert OCR model record (if column model_type supports 'ocr'):** + +```sql +INSERT INTO ai_available_models (model_name, display_name, model_type, is_active, created_at, updated_at) +VALUES ('typhoon-np-dms-ocr:latest', 'Typhoon OCR 3B (Thai)', 'ocr', 1, NOW(), NOW()) +ON DUPLICATE KEY UPDATE display_name = VALUES(display_name), updated_at = NOW(); +``` + +> **Note**: ตรวจสอบ schema จริงใน `lcbp3-v1.9.0-schema-02-tables.sql` ก่อน run delta — column names และ model_type ENUM อาจแตกต่าง + +--- + +## Code Configuration Model + +### AiSettingsService Constants + +| Constant | เดิม | ใหม่ | +|----------|------|------| +| `DEFAULT_MODEL` | `'gemma4:e2b'` | `'typhoon2.5-np-dms:latest'` | +| `OCR_MODEL` (เพิ่มใหม่) | — | `'typhoon-np-dms-ocr:latest'` | + +**File**: `backend/src/modules/ai/services/ai-settings.service.ts` + +--- + +## Custom Ollama Model Configurations + +### typhoon2.5-np-dms (Main Model) + +| Property | Value | +|----------|-------| +| Custom Name | `typhoon2.5-np-dms:latest` | +| Base Model | `scb10x/typhoon2.5-qwen3-4b:latest` | +| Size | ~2.5GB VRAM | +| keep_alive | Indefinite (`-1`) — standby ตลอด | +| num_ctx | 8192 | +| num_predict | 2048 | +| temperature | 0.1 | +| top_p | 0.85 | +| repeat_penalty | 1.15 | +| Role | Extraction, RAG Q&A, AI Suggestion, OCR Post-processing | +| Modelfile Path | `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/typhoon2.5-np-dms.model.md` | + +### typhoon-np-dms-ocr (OCR Model) + +| Property | Value | +|----------|-------| +| Custom Name | `typhoon-np-dms-ocr:latest` | +| Base Model | `scb10x/typhoon-ocr1.5-3b:latest` | +| Size | ~3.2GB VRAM | +| keep_alive | `0` — auto-unload immediately after job | +| num_ctx | 8192 | +| num_predict | 4096 | +| temperature | 0.1 | +| top_p | 0.1 | +| repeat_penalty | 1.1 | +| Role | Thai OCR extraction from PDF images | +| Modelfile Path | `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/typhoon-np-dms-ocr.model.md` | + +--- + +## VRAM Budget Analysis + +| State | Models in VRAM | Estimated VRAM Usage | +|-------|---------------|---------------------| +| Idle | typhoon2.5-np-dms (standby) | ~2.5GB | +| Main AI job | typhoon2.5-np-dms (active) | ~3.5–4GB peak | +| OCR transition | unloading main → loading OCR | ~3.2GB (OCR only) | +| OCR processing | typhoon-np-dms-ocr (active) | ~4–5GB peak | +| Post-OCR reload | loading main back | ~2.5GB | +| **Max peak** | OCR model active | **~5GB** (safe under 8GB limit) | + +--- + +## OllamaService New Methods Signatures + +```typescript +/** + * unloadModel: Force unload model จาก VRAM + * @param modelName - ชื่อ Ollama model ที่ต้องการ unload + */ +async unloadModel(modelName: string): Promise + +/** + * loadModel: Load / warm model เข้า VRAM + * @param modelName - ชื่อ Ollama model ที่ต้องการ load + * @param keepAlive - -1 = indefinite (default สำหรับ main model); 0 = auto-unload + */ +async loadModel(modelName: string, keepAlive?: number | string): Promise +``` diff --git a/specs/100-Infrastructures/134-ai-model-change/plan.md b/specs/100-Infrastructures/134-ai-model-change/plan.md new file mode 100644 index 00000000..9b312c5a --- /dev/null +++ b/specs/100-Infrastructures/134-ai-model-change/plan.md @@ -0,0 +1,133 @@ +// File: specs/100-Infrastructures/134-ai-model-change/plan.md +// Change Log: +// - 2026-06-03: Initial implementation plan for Thai-Optimized AI Model Stack (ADR-034) + +# Implementation Plan: Thai-Optimized AI Model Stack + +**Branch**: `134-ai-model-change` | **Date**: 2026-06-03 | **Spec**: [spec.md](./spec.md) +**Input**: Feature specification from `specs/100-Infrastructures/134-ai-model-change/spec.md` + +--- + +## Summary + +เปลี่ยน AI model stack จาก `gemma4:e2b` เป็น custom Typhoon models: `typhoon2.5-np-dms:latest` (main AI) + `typhoon-np-dms-ocr:latest` (Thai OCR) ตาม ADR-034. ต้อง implement: +1. Custom Modelfiles บน Desk-5439 (มีอยู่แล้วใน `specs/04-Infrastructure-OPS/...`) +2. `unloadModel()` + `loadModel()` ใน `OllamaService` +3. Model switching logic ใน `ai-batch.processor.ts` +4. อัปเดต `AiSettingsService.DEFAULT_MODEL` +5. SQL delta สำหรับ `ai_available_models` table + +--- + +## Technical Context + +**Language/Version**: TypeScript 5.x, NestJS 11, Node.js 20 +**Primary Dependencies**: `@nestjs/bull`, `bullmq`, `axios` (Ollama HTTP client), Redis +**Storage**: ไม่มี schema changes — optional SQL delta สำหรับ `ai_available_models` seed +**Testing**: Jest (unit tests: OllamaService methods + ai-batch.processor switching) +**Target Platform**: QNAP NAS backend (Docker) + Desk-5439 (Ollama runtime, RTX 2060 Super 8GB) +**Performance Goals**: OCR cold start ≤ 60s; main model warm ≤ 5s; zero VRAM OOM +**Constraints**: VRAM ≤ 8GB; BullMQ concurrency=1; ADR-033 VRAM monitor preserved; ADR-023A AI boundary +**Scale/Scope**: 1 Desk-5439 Ollama instance; 1 concurrent AI job at a time (BullMQ) + +--- + +## Constitution Check + +_GATE: Must pass before Phase 0 research. Re-checked after Phase 1 design._ + +| Rule | Status | Notes | +|------|--------|-------| +| ADR-019 UUID: ไม่มี parseInt บน UUID | ✅ PASS | ไม่มี UUID changes ใน feature นี้ | +| ADR-009 Schema: ไม่มี TypeORM migration | ✅ PASS | ไม่มี table ใหม่; SQL delta สำหรับ seed update เท่านั้น | +| ADR-023/023A AI Boundary: Ollama บน Desk-5439 | ✅ PASS | ยังคง pattern เดิม; ไม่มี direct DB/storage access | +| ADR-033 VRAM Monitor: mechanism ยังใช้งาน | ✅ PASS | ชื่อ model เปลี่ยน; mechanism เดิม | +| ADR-008 BullMQ: Background jobs ผ่าน queue | ✅ PASS | ไม่มี inline AI call ใหม่ | +| ADR-007 Error Handling: Error classification | ✅ PASS | unload/load errors ต้องมี descriptive messages | +| ADR-016 Security: CASL Guard | ✅ PASS | ไม่มี new public endpoints | +| Forbidden: console.log, any type | ✅ PASS | ต้องตรวจสอบขณะ implement (ESLint enforce) | + +--- + +## Project Structure + +### Documentation (this feature) + +```text +specs/100-Infrastructures/134-ai-model-change/ +├── spec.md # Feature specification +├── plan.md # This file +├── research.md # Phase 0: decisions +├── data-model.md # Phase 1: config/model data +├── quickstart.md # Phase 1: verification guide +├── contracts/ +│ └── ollama-service-methods.md # Method signatures + Ollama API +├── checklists/ +│ └── requirements.md # Spec quality checklist +└── tasks.md # Phase 2: task list +``` + +### Infrastructure Files (already exist) + +```text +specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ +├── typhoon2.5-np-dms.model.md # Main model Modelfile ✅ exists +└── typhoon-np-dms-ocr.model.md # OCR model Modelfile ✅ exists +``` + +### Source Code Changes + +```text +backend/src/modules/ai/ +├── services/ +│ ├── ollama.service.ts # เพิ่ม unloadModel() + loadModel() +│ └── ai-settings.service.ts # อัปเดต DEFAULT_MODEL + เพิ่ม OCR_MODEL +└── processors/ + └── ai-batch.processor.ts # เพิ่ม OCR model switching logic +``` + +### Optional Delta + +```text +specs/03-Data-and-Storage/deltas/ +└── 2026-06-03-update-ai-available-models-typhoon.sql +``` + +**Structure Decision**: Web application (backend only) — ไม่มี frontend changes ที่จำเป็น; health UI อาจ update model names เองจาก dynamic data + +--- + +## Phase 0: Research + +ผลการ research ดู [research.md](./research.md) — decisions ถูก resolve จาก ADR-034 แล้ว + +--- + +## Phase 1: Design & Contracts + +### Data Model + +ดู [data-model.md](./data-model.md) + +- ไม่มี new DB entities +- Code constants ใน `AiSettingsService` อัปเดต +- Optional: `ai_available_models` seed delta + +### API Contracts + +ดู [contracts/ollama-service-methods.md](./contracts/ollama-service-methods.md) + +- `OllamaService.unloadModel(modelName: string): Promise` +- `OllamaService.loadModel(modelName: string, keepAlive?: number | string): Promise` +- BullMQ processor switching pseudocode + +### Quickstart Verification + +ดู [quickstart.md](./quickstart.md) + +--- + +## Complexity Tracking + +ไม่มี Constitution Check violations ที่ต้องจัดการ diff --git a/specs/100-Infrastructures/134-ai-model-change/quickstart.md b/specs/100-Infrastructures/134-ai-model-change/quickstart.md new file mode 100644 index 00000000..5d5288b1 --- /dev/null +++ b/specs/100-Infrastructures/134-ai-model-change/quickstart.md @@ -0,0 +1,111 @@ +// File: specs/100-Infrastructures/134-ai-model-change/quickstart.md +// Change Log: +// - 2026-06-03: Verification guide for Thai-Optimized AI Model Stack + +# Quickstart: Thai-Optimized AI Model Stack Verification + +--- + +## Prerequisites + +- Desk-5439 รัน Ollama service (port 11434) +- Internet access บน Desk-5439 (สำหรับ pull base models จาก registry) +- QNAP backend container running (port 3001) +- Model files อยู่ที่ `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/` + +--- + +## Step 1: สร้าง Custom Models บน Desk-5439 + +```powershell +# บน Desk-5439 Windows — เปิด PowerShell ใน directory ที่มี Modelfiles +# Path: specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ + +ollama create typhoon2.5-np-dms -f .\typhoon2.5-np-dms.model.md +# คาดว่าใช้เวลา: 5-15 นาที (download base model ~2.5GB) + +ollama create typhoon-np-dms-ocr -f .\typhoon-np-dms-ocr.model.md +# คาดว่าใช้เวลา: 5-15 นาที (download base model ~3.2GB) + +# ตรวจสอบ +ollama list +# ต้องเห็น: +# typhoon2.5-np-dms:latest +# typhoon-np-dms-ocr:latest +# nomic-embed-text:latest (ยังคงอยู่ — embedding model ไม่เปลี่ยน) +``` + +--- + +## Step 2: Apply SQL Delta (ถ้า ai_available_models table มีอยู่) + +```powershell +# รัน delta ผ่าน DB admin tool หรือ mysql client +# File: specs/03-Data-and-Storage/deltas/2026-06-03-update-ai-available-models-typhoon.sql +``` + +--- + +## Step 3: Deploy Backend + +ปฏิบัติตาม `/deploy` workflow ปกติ (QNAP Container Station) + +--- + +## Step 4: ตรวจสอบ Health Endpoint + +```powershell +Invoke-RestMethod -Uri "http://localhost:3001/api/ai/health" -Method GET | + ConvertTo-Json -Depth 5 +# ตรวจสอบ: +# ollama.mainModel = "typhoon2.5-np-dms:latest" +# ollama.status = "HEALTHY" +``` + +--- + +## Step 5: ทดสอบ OCR Job + +```powershell +# ส่ง OCR job ผ่าน AI Admin Console → OCR Sandbox +# หรือ POST /api/ai/admin/sandbox/ocr ด้วย PDF ภาษาไทย +# ตรวจสอบ result: +# - ocrText มีภาษาไทยที่อ่านออกได้ +# - ไม่มี VRAM OOM error ใน logs +``` + +--- + +## Step 6: ตรวจสอบ BullMQ Model Switching Logs + +```powershell +# ดู backend logs ขณะ OCR job กำลังทำงาน +docker compose logs -f backend | Select-String "ModelSwitch" +# ต้องเห็น: +# [ModelSwitch] Unloading typhoon2.5-np-dms:latest +# [ModelSwitch] Loading typhoon-np-dms-ocr:latest (keep_alive: 0) +# [ModelSwitch] Reloading typhoon2.5-np-dms:latest (keep_alive: -1) +``` + +--- + +## Step 7: ตรวจสอบ VRAM ไม่เกิน 8GB + +```powershell +# บน Desk-5439 ขณะ OCR job กำลังทำงาน +nvidia-smi --query-gpu=memory.used,memory.total --format=csv +# ต้องไม่เกิน 8192 MiB +``` + +--- + +## Rollback + +หากพบปัญหาหลัง deploy: + +1. สร้าง custom model ใหม่จาก `gemma4:e2b`: + ```powershell + ollama create typhoon2.5-np-dms -f .\gemma4-fallback.model.md + ``` +2. หรือ revert `AiSettingsService.DEFAULT_MODEL` กลับเป็น `'gemma4:e2b'` แล้ว redeploy +3. ดูรายละเอียดใน ADR-034 Section 5 Rollback Strategy diff --git a/specs/100-Infrastructures/134-ai-model-change/research.md b/specs/100-Infrastructures/134-ai-model-change/research.md new file mode 100644 index 00000000..e4281a0a --- /dev/null +++ b/specs/100-Infrastructures/134-ai-model-change/research.md @@ -0,0 +1,70 @@ +// File: specs/100-Infrastructures/134-ai-model-change/research.md +// Change Log: +// - 2026-06-03: Phase 0 research for Thai-Optimized AI Model Stack + +# Research: Thai-Optimized AI Model Stack + +**Status**: Complete — all decisions resolved from ADR-034 + +--- + +## Decision 1: Model Unloading via Ollama API + +**Decision**: ใช้ `POST /api/generate` พร้อม `keep_alive: 0` และ `prompt: ""` เพื่อ unload model จาก VRAM +**Rationale**: Ollama ไม่มี dedicated unload endpoint; การส่ง `keep_alive: 0` จะทำให้ model unload ทันทีหลัง request complete; ส่ง empty prompt เพื่อเรียก endpoint โดยไม่ generate output +**Alternatives considered**: +- Restart Ollama service — disruptive; ใช้เวลานาน; ทำลาย in-flight jobs +- `/api/delete` — ลบ model ออกจาก registry ไม่ใช่แค่ unload จาก VRAM + +--- + +## Decision 2: Model Loading via Ollama API + +**Decision**: ใช้ `POST /api/generate` พร้อม `keep_alive: -1` สำหรับ main model; `keep_alive: 0` สำหรับ OCR model +**Rationale**: การส่ง request ไปยัง model ที่ยังไม่ได้ load จะ trigger Ollama auto-load; `keep_alive: -1` = infinite standby (ตาม ADR-034 "Stand by ตลอด"); `keep_alive: 0` = auto-unload หลัง job +**Alternatives considered**: +- `POST /api/chat` — ใช้ได้เช่นกัน แต่ `/api/generate` มี API surface เล็กกว่า เหมาะสำหรับ keepalive ping + +--- + +## Decision 3: OCR Job Type List + +**Decision**: OCR job types = `['ocr-extract', 'sandbox-ocr-only']` +**Rationale**: ทั้งสอง job types ต้องการ OCR model; `ocr-extract` = production; `sandbox-ocr-only` = admin sandbox; types อื่นทั้งหมดใช้ main model +**Alternatives considered**: +- Config-driven job types — over-engineering; ADR-034 กำหนดไว้แล้ว +- DB-driven routing — เพิ่ม complexity โดยไม่จำเป็น + +--- + +## Decision 4: DEFAULT_MODEL Strategy — Hardcode + +**Decision**: Hardcode `DEFAULT_MODEL = 'typhoon2.5-np-dms:latest'` ใน `AiSettingsService` +**Rationale**: ADR-034 ระบุชัดเจน "ไม่ต้อง update ai_settings table — ใช้ hardcode value เพื่อความเร็วในการ deploy"; การ hardcode ป้องกัน misconfiguration จาก DB +**Alternatives considered**: +- อ่านจาก `ai_settings` DB table — ถูก reject ใน ADR-034 เพื่อความง่ายใน deployment +- อ่านจาก ENV variable — อาจ conflict กับ ADR-027 model management; hardcode ชัดกว่า + +--- + +## Decision 5: ai_available_models Table Update + +**Decision**: สร้าง SQL delta `2026-06-03-update-ai-available-models-typhoon.sql` อัปเดต seed +**Rationale**: ADR-027 Admin Console อ่านจาก `ai_available_models` table; ถ้าไม่อัปเดตจะแสดงชื่อ model เก่า ทำให้ admin เห็น inconsistency +**Alternatives considered**: +- ไม่อัปเดต table — admin console แสดงข้อมูลเก่า; เสี่ยง confusion + +--- + +## Decision 6: Error Handling for Model Switching Failure + +**Decision**: Throw `Error` พร้อม descriptive message ที่ระบุ model name; NestJS Logger บันทึก context; ไม่ retry ภายใน switching logic (ปล่อยให้ BullMQ retry ตาม job policy) +**Rationale**: Model load failure ควร fail fast; BullMQ ที่มี retry config จะ re-attempt job ทั้งหมด รวมถึง switching sequence ด้วย; การ retry ภายใน switching เสี่ยง VRAM state inconsistency +**Alternatives considered**: +- Retry unload/load ภายใน service — อาจทำให้ VRAM state confused ถ้า partial load + +--- + +## Unresolved Items + +ไม่มี — decisions ทั้งหมดถูก resolve แล้ว diff --git a/specs/100-Infrastructures/134-ai-model-change/spec.md b/specs/100-Infrastructures/134-ai-model-change/spec.md new file mode 100644 index 00000000..12bca0c9 --- /dev/null +++ b/specs/100-Infrastructures/134-ai-model-change/spec.md @@ -0,0 +1,127 @@ +// File: specs/100-Infrastructures/134-ai-model-change/spec.md +// Change Log: +// - 2026-06-03: Initial specification for Thai-Optimized AI Model Stack (ADR-034) + +# Feature Specification: Thai-Optimized AI Model Stack + +**Feature Branch**: `134-ai-model-change` +**Created**: 2026-06-03 +**Status**: Draft +**Category**: 100-Infrastructures +**ADR Reference**: ADR-034 (Supersedes ADR-023A Section 2.1) +**Input**: User description: "Implement ADR-034 — Switch AI model stack from gemma4:e2b to Thai-optimized Typhoon series: typhoon2.5-np-dms:latest (main AI) + typhoon-np-dms-ocr:latest (Thai OCR). Requires model switching logic in BullMQ processor, new unload/load methods in OllamaService, updated default model config. VRAM budget: RTX 2060 Super 8GB." + +--- + +## User Scenarios & Testing _(mandatory)_ + +### User Story 1 — Thai OCR Extraction With New OCR Model (Priority: P1) + +ในฐานะ DevOps Engineer ฉันต้องการให้งาน OCR ภาษาไทยใช้ Typhoon OCR-3B ที่ถูก fine-tune มาสำหรับภาษาไทย เพื่อให้ข้อความที่สกัดจาก PDF ภาษาไทยมีความแม่นยำสูงกว่า gemma4:e2b + +**Why this priority**: งาน OCR ภาษาไทยคือจุดอ่อนหลักของระบบปัจจุบัน การเปลี่ยนเป็น Typhoon OCR model จะส่งผลโดยตรงต่อคุณภาพการ extract metadata และ text จากเอกสาร + +**Independent Test**: ส่ง OCR job (`jobType: 'ocr-extract'`) เข้า BullMQ ด้วย PDF ภาษาไทย และตรวจสอบว่า (1) job สำเร็จโดยไม่มี VRAM OOM error (2) ข้อความ OCR อ่านออกได้ (3) หลัง OCR เสร็จ main model ยังพร้อมใช้งาน + +**Acceptance Scenarios**: + +1. **Given** `typhoon-np-dms-ocr:latest` ถูก create บน Desk-5439, **When** BullMQ ได้รับ job `ocr-extract` หรือ `sandbox-ocr-only`, **Then** processor unload main model → load OCR model (keep_alive:0) → ประมวลผล → OCR model unload อัตโนมัติ +2. **Given** OCR job เสร็จแล้ว, **When** main model job เข้ามาต่อ, **Then** processor reload main model และประมวลผลสำเร็จภายใน 60 วินาที +3. **Given** VRAM ถูกใช้งานสูงขณะมี OCR job, **When** job เข้าคิว, **Then** BullMQ concurrency=1 ป้องกัน VRAM overflow — ไม่เกิด OOM error + +--- + +### User Story 2 — General AI Tasks Use Thai-Optimized Main Model (Priority: P2) + +ในฐานะ DevOps Engineer ฉันต้องการให้ระบบ AI ใช้ `typhoon2.5-np-dms:latest` เป็น main model สำหรับงาน extraction, RAG Q&A, และ AI suggestion แทน gemma4:e2b เพื่อประสิทธิภาพภาษาไทยที่ดีขึ้น + +**Why this priority**: เมื่อ OCR ทำงานได้แล้ว ต้องมั่นใจว่า AI extraction/RAG ที่ใช้ main model ทำงานได้กับโมเดลใหม่ + +**Independent Test**: ส่ง `ai-extract` job เข้า BullMQ → ตรวจสอบว่า backend เรียก `typhoon2.5-np-dms:latest` (ไม่ใช่ `gemma4:e2b`) และได้ผลลัพธ์ที่ถูกต้อง + +**Acceptance Scenarios**: + +1. **Given** `typhoon2.5-np-dms:latest` ถูก create บน Desk-5439, **When** backend start up, **Then** `AiSettingsService.DEFAULT_MODEL = 'typhoon2.5-np-dms:latest'` +2. **Given** main model พร้อม, **When** ส่ง extraction/RAG/suggestion job, **Then** ผลลัพธ์ return โดยไม่มี "model not found" error +3. **Given** การ reload main model หลัง OCR job, **When** warm reload, **Then** ใช้เวลา ≤ 60 วินาที (cold start acceptable) + +--- + +### User Story 3 — System Health Reflects New Model Stack (Priority: P3) + +ในฐานะ AI Admin ฉันต้องการเห็นสถานะของ model stack ใหม่ใน AI Admin Console เพื่อยืนยันว่าระบบทำงานถูกต้องหลัง deploy + +**Why this priority**: Operations team ต้องการ observability เพื่อตรวจสอบหลัง deployment + +**Independent Test**: เปิด AI Admin Console → System Health → เห็นชื่อ `typhoon2.5-np-dms` และ `typhoon-np-dms-ocr` แทน gemma4 + +**Acceptance Scenarios**: + +1. **Given** backend รันกับ model stack ใหม่, **When** GET `/api/ai/health`, **Then** response แสดง main model = `typhoon2.5-np-dms:latest` และ ocr model = `typhoon-np-dms-ocr:latest` +2. **Given** มี model switching เกิดขึ้น, **When** ดู BullMQ job logs, **Then** เห็น log บันทึก unload/load พร้อม model name และ timestamp + +--- + +### Edge Cases + +- VRAM เต็ม (>8GB) ขณะ load OCR model → BullMQ retry; ไม่ crash; error log ชัดเจน +- Ollama API timeout ขณะ `unloadModel`/`loadModel` → throw descriptive error; ไม่ swallow silently +- โมเดล `typhoon2.5-np-dms` ไม่ถูก create บน Desk-5439 → backend start ได้; job fail พร้อม clear "model not found" error +- OCR job และ main job เข้าคิวพร้อมกัน → BullMQ concurrency=1 handle อยู่แล้ว; ไม่ต้องแก้ +- Cold start OCR (30–60s) ขณะ user รอ → ยอมรับได้; job polling แสดงสถานะ + +--- + +## Requirements _(mandatory)_ + +### Functional Requirements + +- **FR-001**: ระบบ MUST ใช้ `typhoon2.5-np-dms:latest` เป็น default main model สำหรับทุก AI job ที่ไม่ใช่ OCR +- **FR-002**: ระบบ MUST ใช้ `typhoon-np-dms-ocr:latest` เฉพาะ job type `ocr-extract` และ `sandbox-ocr-only` +- **FR-003**: BullMQ processor MUST unload main model ก่อน load OCR model ทุกครั้งที่มี OCR job +- **FR-004**: OCR model MUST unload อัตโนมัติหลัง job เสร็จ (keep_alive: 0) +- **FR-005**: หลัง OCR job เสร็จ processor MUST reload main model (keep_alive: -1 หรือ indefinite) +- **FR-006**: OllamaService MUST expose `unloadModel(modelName)` และ `loadModel(modelName, keepAlive?)` methods +- **FR-007**: `AiSettingsService.DEFAULT_MODEL` MUST hardcoded เป็น `'typhoon2.5-np-dms:latest'` (ตาม ADR-034) +- **FR-008**: Embedding model `nomic-embed-text` MUST ไม่เปลี่ยนแปลง (นอก scope) +- **FR-009**: n8n MUST ไม่เรียก Ollama โดยตรง — ยังคง pattern `POST /api/ai/jobs` → BullMQ (ADR-023A) +- **FR-010**: BullMQ concurrency MUST = 1 เพื่อป้องกัน VRAM overflow + +### Key Entities + +- **Custom Ollama Model `typhoon2.5-np-dms`**: Configuration ของ main AI model — base: typhoon2.5-qwen3-4b, keep_alive: indefinite, size ~2.5GB +- **Custom Ollama Model `typhoon-np-dms-ocr`**: Configuration ของ OCR model — base: typhoon-ocr1.5-3b, keep_alive: 0, size ~3.2GB +- **OllamaService.unloadModel / loadModel**: Methods สำหรับ explicit model management ผ่าน Ollama API +- **AiSettingsService.DEFAULT_MODEL**: Hardcoded constant ชี้ไปยัง main model ใหม่ + +--- + +## Success Criteria _(mandatory)_ + +### Measurable Outcomes + +- **SC-001**: OCR job ประมวลผล PDF ภาษาไทยได้ข้อความที่อ่านออกได้ (ไม่มี garbled Thai characters) > 90% ของ pages +- **SC-002**: ไม่มี VRAM OOM error ตลอดการ process OCR + main jobs ต่อเนื่อง 10 jobs +- **SC-003**: OCR cold start ≤ 60 วินาที; main model warm response ≤ 5 วินาที +- **SC-004**: Deploy custom models บน Desk-5439 (`ollama create`) ใช้เวลา ≤ 15 นาทีต่อ model (download ขึ้นกับ network) +- **SC-005**: ไม่มี regression ใน AI extraction/RAG functionality ที่เดิมทำงานได้ + +--- + +## Assumptions + +- **A1**: Desk-5439 มี internet access สำหรับ pull base models (`scb10x/typhoon*`) จาก Ollama registry +- **A2**: ADR-033 VRAM monitoring mechanism ใช้งานได้กับ Typhoon models +- **A3**: Custom Modelfiles ถูก define ใน `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/` แล้ว +- **A4**: Embedding model `nomic-embed-text` ไม่เปลี่ยนแปลง +- **A5**: `ai_available_models` table (ADR-027) จำเป็นต้อง update seed เพื่อแสดง model ใหม่ใน admin console + +--- + +## Clarifications + +### Session 2026-06-03 + +- Q: nomic-embed-text embedding model เปลี่ยนหรือไม่? → A: ไม่เปลี่ยน — ADR-034 supersedes LLM stack เท่านั้น +- Q: Keep-alive value สำหรับ main model หลัง reload คือเท่าไร? → A: -1 (indefinite) ตาม ADR-034 "Stand by ตลอด (ไม่ใช่ 0)" +- Q: `sandbox-ocr-only` ต้องใช้ OCR model switching ด้วยหรือไม่? → A: ใช่ — เป็น OCR job type เหมือน `ocr-extract` diff --git a/specs/100-Infrastructures/134-ai-model-change/tasks.md b/specs/100-Infrastructures/134-ai-model-change/tasks.md new file mode 100644 index 00000000..0fe57200 --- /dev/null +++ b/specs/100-Infrastructures/134-ai-model-change/tasks.md @@ -0,0 +1,156 @@ +// File: specs/100-Infrastructures/134-ai-model-change/tasks.md +// Change Log: +// - 2026-06-03: Initial task list for Thai-Optimized AI Model Stack + +# Tasks: Thai-Optimized AI Model Stack + +**Input**: Design documents from `specs/100-Infrastructures/134-ai-model-change/` +**ADR Reference**: ADR-034 (Supersedes ADR-023A Section 2.1) +**Prerequisites**: plan.md ✅ spec.md ✅ research.md ✅ data-model.md ✅ contracts/ ✅ + +## Format: `[ID] [P?] [Story?] Description` + +- **[P]**: Can run in parallel (different files, no shared state) +- **[Story]**: User story label (US1/US2/US3) + +--- + +## Phase 1: Setup (Infrastructure Verification) + +**Purpose**: ยืนยัน Model files และ directory structure พร้อม; ไม่ต้องสร้างใหม่เพราะมีอยู่แล้ว + +- [X] T001 [P] ตรวจสอบ `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/typhoon2.5-np-dms.model.md` ว่า content ตรงกับ ADR-034 Section 1 +- [X] T002 [P] ตรวจสอบ `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/typhoon-np-dms-ocr.model.md` ว่า content ตรงกับ ADR-034 Section 1 +- [X] T003 [P] สร้าง SQL delta `specs/03-Data-and-Storage/deltas/2026-06-03-update-ai-available-models-typhoon.sql` — UPDATE main model + INSERT ocr model ใน `ai_available_models` table (ตรวจสอบ schema ก่อน run) +- [X] T003b [P] สร้าง rollback `specs/03-Data-and-Storage/deltas/2026-06-03-update-ai-available-models-typhoon.rollback.sql` — revert model_name กลับเป็น gemma4:e2b + DELETE ocr model record (ADR-009: ทุก delta ต้องมี rollback) + +--- + +## Phase 2: Foundational (OllamaService + AiSettingsService) + +**Purpose**: Backend services support model ใหม่ — BLOCKS Phases 3, 4, 5 + +**⚠️ CRITICAL**: ต้องเสร็จก่อนเริ่ม US phases + +- [X] T004 เพิ่ม method `unloadModel(modelName: string): Promise` ใน `backend/src/modules/ai/services/ollama.service.ts` — POST /api/generate keep_alive:0 + error handling ตาม contract +- [X] T005 เพิ่ม method `loadModel(modelName: string, keepAlive?: number | string): Promise` ใน `backend/src/modules/ai/services/ollama.service.ts` — POST /api/generate keep_alive:{keepAlive ?? -1} + timeout 60s + "model not found" error +- [X] T006 [P] อัปเดต `DEFAULT_MODEL = 'typhoon2.5-np-dms:latest'` และเพิ่ม `OCR_MODEL = 'typhoon-np-dms-ocr:latest'` ใน `backend/src/modules/ai/services/ai-settings.service.ts` +- [X] T007 [P] เขียน unit tests สำหรับ `unloadModel()` และ `loadModel()` ใน `backend/src/modules/ai/services/ollama.service.spec.ts` — mock axios; test timeout, 404, และ success cases + +**Checkpoint**: Foundation พร้อม — processor switching สามารถ implement ได้ + +--- + +## Phase 3: User Story 1 — Thai OCR Processing (Priority: P1) 🎯 MVP + +**Goal**: BullMQ processor switch models สำหรับ OCR jobs; ไม่มี VRAM OOM + +**Independent Test**: ส่ง `ocr-extract` job → ตรวจสอบ log มี ModelSwitch entries + job สำเร็จ + +- [X] T008 [US1] เพิ่ม constant `const OCR_JOB_TYPES = ['ocr-extract', 'sandbox-ocr-only'] as const` ใน `backend/src/modules/ai/processors/ai-batch.processor.ts` +- [X] T009 [US1] Implement model switching block ใน `processJob()`: unload main → load OCR (keep_alive:0) → process → reload main (keep_alive:-1) ใน `backend/src/modules/ai/processors/ai-batch.processor.ts` +- [X] T010 [US1] เพิ่ม NestJS `Logger` log สำหรับ model switch events (model name + timestamp) ใน `backend/src/modules/ai/processors/ai-batch.processor.ts` +- [X] T011 [P] [US1] เขียน unit tests สำหรับ OCR model switching logic ใน `backend/src/modules/ai/processors/ai-batch.processor.spec.ts` — mock OllamaService methods; test switching sequence + non-OCR bypass + +**Checkpoint**: OCR job ทำงานกับ Typhoon OCR model; main model reload หลัง OCR สำเร็จ + +--- + +## Phase 4: User Story 2 — Main AI Tasks With New Model (Priority: P2) + +**Goal**: ทุก non-OCR AI job ใช้ `typhoon2.5-np-dms:latest` + +**Independent Test**: ส่ง `ai-extract` job → ตรวจสอบ Ollama call ใช้ model ใหม่ (ไม่ใช่ gemma4) + +- [X] T012 [US2] ตรวจสอบและอัปเดต `OllamaService.generate()` ให้อ้างอิง `AiSettingsService.DEFAULT_MODEL` แทน hardcoded model name ใน `backend/src/modules/ai/services/ollama.service.ts` +- [X] T013 [P] [US2] grep codebase ด้วย pattern `gemma4|OLLAMA_MODEL_MAIN|OLLAMA_RAG_MODEL` ใน `backend/src/` หา hardcoded model references เก่าทั้งหมด → อัปเดตให้ใช้ `AiSettingsService.DEFAULT_MODEL` หรือ `AiSettingsService.OCR_MODEL` +- [X] T014 [US2] อัปเดต comment/note ใน `backend/.env.example` — อธิบายว่า model names ถูก hardcode ใน `AiSettingsService` ตาม ADR-034 + +**Checkpoint**: ทุก non-OCR AI job ใช้ model ใหม่ + +--- + +## Phase 5: User Story 3 — System Health Visibility (Priority: P3) + +**Goal**: Admin Console แสดงชื่อ model ที่ถูกต้อง + +**Independent Test**: GET /api/ai/health → JSON มี `mainModel` = typhoon2.5-np-dms, `ocrModel` = typhoon-np-dms-ocr + +- [X] T015 [P] [US3] อัปเดต health response structure ใน `backend/src/modules/ai/ai.service.ts` — เพิ่ม `mainModel` และ `ocrModel` fields ที่อ่านจาก `AiSettingsService` +- [X] T016 [P] [US3] ตรวจสอบ `frontend/app/(admin)/admin/ai/page.tsx` ว่า health card แสดง model names ใหม่ (ถ้า dynamic จาก API ไม่ต้องแก้; ถ้า hardcoded ให้แก้) +- [ ] T017 [US3] รัน SQL delta `2026-06-03-update-ai-available-models-typhoon.sql` บน production DB (manual step — ผ่าน DB admin tool) + +**Checkpoint**: Admin Console แสดง model stack ใหม่ถูกต้อง + +--- + +## Phase 6: Polish & Cross-Cutting Concerns + +**Purpose**: Documentation update + compliance verification + +- [X] T018 [P] อัปเดต `AGENTS.md` — Current Decisions D10: เปลี่ยน `gemma4:e4b Q8_0` เป็น `typhoon2.5-np-dms:latest (main) + typhoon-np-dms-ocr:latest (OCR)`; อัปเดต version เป็น v1.9.9 และ sync date +- [X] T019 [P] อัปเดต `memory/agent-memory.md` — Section 2.5 model names + Section 5 D10 + Section 7 Ollama row + Section 8 Recent Rollouts entry +- [X] T020 [P] อัปเดต `.agents/rules/11-ai-integration.md` — 2-model stack: `gemma4:e2b → typhoon2.5-np-dms:latest` +- [ ] T021 [P] รัน type check: `pnpm --filter backend build` — ต้องผ่าน 0 errors +- [ ] T022 [P] รัน lint: `pnpm --filter backend lint` — ตรวจสอบ no console.log, no any +- [ ] T023 [P] รัน unit tests ที่เพิ่มใหม่: `pnpm --filter backend test -- --testPathPattern="ollama.service|ai-batch.processor"` +- [ ] T024 รัน quickstart.md verification บน Desk-5439 + QNAP ตามขั้นตอนใน `quickstart.md` — รวมถึงตรวจสอบ BullMQ concurrency=1 ใน `ai-batch.processor.ts` (FR-010) + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 (Setup)**: ไม่มี dependency — เริ่มทันที +- **Phase 2 (Foundational)**: ไม่มี dependency — เริ่มทันที; **BLOCKS Phase 3, 4, 5** +- **Phase 3 (US1)**: ต้องรอ Phase 2 สมบูรณ์ +- **Phase 4 (US2)**: ต้องรอ Phase 2; สามารถ parallel กับ Phase 3 +- **Phase 5 (US3)**: ต้องรอ Phase 3 + Phase 4 +- **Phase 6 (Polish)**: ต้องรอทุก phase + +### Within Phase 2 + +- T004 → T005 (sequential — เป็น paired methods) +- T006, T007 → parallel ได้ + +### Parallel Opportunities Per Phase + +``` +Phase 1: T001 ∥ T002 ∥ T003 +Phase 2: (T004→T005) ∥ T006 ∥ T007 +Phase 3: (T008→T009→T010) + T011 parallel กับ T010 +Phase 4: T012 → T013 → T014 +Phase 5: T015 ∥ T016; T017 manual last +Phase 6: T018 ∥ T019 ∥ T020 ∥ T021 ∥ T022 ∥ T023 → T024 last +``` + +--- + +## Implementation Strategy + +### MVP First (US1 — OCR Model Switching) + +1. Phase 1: Verify Modelfiles (T001-T002) +2. Phase 2: OllamaService + AiSettings (T004-T007) +3. Phase 3: OCR switching (T008-T011) +4. **Validate**: Test OCR job + confirm main reload + +### Incremental Delivery + +1. MVP (Phase 1-3) → OCR ทำงานกับ Typhoon +2. US2 (Phase 4) → Main model jobs ใช้ model ใหม่ +3. US3 (Phase 5) → Admin health visibility +4. Polish (Phase 6) → Docs + type check + deploy + +--- + +## Notes + +- T001-T002: Model files มีอยู่แล้วใน `specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/` — เป็น verification task ไม่ใช่ creation +- FR-008 (nomic-embed-text) + FR-009 (n8n boundary): Preserved by existing architecture — ไม่มี task เพราะ "do nothing" คือ correct action +- T018: ตรวจสอบ AGENTS.md version จริงก่อน bump เป็น v1.9.9 +- T003: ตรวจสอบ `lcbp3-v1.9.0-schema-02-tables.sql` ก่อน write delta — ai_available_models schema อาจต่างจากที่คาด +- T013: ใช้ grep/code_search เพื่อหา references ทั้งหมด — อย่า hardcode path ไปเอง +- T017: Manual step; ต้องทำผ่าน DBA หรือ migration pipeline +- T024: Real-app verification ตาม `/check-real-app` workflow