diff --git a/backend/scripts/repro_phase6_analytics.ts b/backend/scripts/repro_phase6_analytics.ts new file mode 100644 index 00000000..59ebe6e2 --- /dev/null +++ b/backend/scripts/repro_phase6_analytics.ts @@ -0,0 +1,59 @@ +// File: scripts/repro_phase6_analytics.ts +// Reproduction script for Phase 6 AI Monitoring endpoints +// This script tests the current state before implementation + +import { NestFactory } from '@nestjs/core'; +import { Logger } from '@nestjs/common'; +import { AppModule } from '../src/app.module'; +import { AiService } from '../src/modules/ai/ai.service'; + +const logger = new Logger('ReproPhase6Analytics'); + +async function testAnalyticsEndpoint() { + const app = await NestFactory.createApplicationContext(AppModule); + const aiService = app.get(AiService); + + logger.log('=== Testing T036: Analytics Summary ==='); + try { + const result = await aiService.getAnalyticsSummary(); + logger.log('✓ PASS: getAnalyticsSummary() exists and returned results'); + // logger.debug(JSON.stringify(result, null, 2)); + } catch (error: any) { + logger.error( + '✗ FAIL: getAnalyticsSummary() threw an error:', + error.message + ); + process.exit(1); + } + + logger.log('\n=== Testing T037: Single Audit Log Delete ==='); + try { + const result = await aiService.deleteAuditLogByPublicId('test-uuid', 1); + logger.log('✓ PASS: deleteAuditLogByPublicId() exists'); + } catch (error: any) { + // It might throw NotFound if test-uuid doesn't exist, which is also a form of existence proof + if ( + error.message.includes('not found') || + error.message.includes('NotFound') + ) { + logger.log( + '✓ PASS: deleteAuditLogByPublicId() exists (returned NotFound as expected)' + ); + } else { + logger.error( + '✗ FAIL: deleteAuditLogByPublicId() threw unexpected error:', + error.message + ); + process.exit(1); + } + } + + await app.close(); + logger.log('\n=== Reproduction Script Complete ==='); + logger.log('Both methods are verified to exist.'); +} + +testAnalyticsEndpoint().catch((error) => { + logger.error('Script failed:', error); + process.exit(1); +}); diff --git a/backend/src/modules/delegation/delegation.module.ts b/backend/src/modules/delegation/delegation.module.ts index a0acb6b2..7e3dca57 100644 --- a/backend/src/modules/delegation/delegation.module.ts +++ b/backend/src/modules/delegation/delegation.module.ts @@ -7,9 +7,14 @@ import { DelegationService } from './delegation.service'; import { DelegationController } from './delegation.controller'; import { CircularDetectionService } from './services/circular-detection.service'; import { UserModule } from '../user/user.module'; +import { CaslModule } from '../../common/auth/casl/casl.module'; @Module({ - imports: [TypeOrmModule.forFeature([Delegation, User]), UserModule], + imports: [ + TypeOrmModule.forFeature([Delegation, User]), + UserModule, + CaslModule, + ], providers: [DelegationService, CircularDetectionService], controllers: [DelegationController], exports: [DelegationService], diff --git a/specs/04-Infrastructure-OPS/04-00-docker-compose/QNAP/app/docker-compose-app.yml b/specs/04-Infrastructure-OPS/04-00-docker-compose/QNAP/app/docker-compose-app.yml index ac56ca8f..4a1d349f 100644 --- a/specs/04-Infrastructure-OPS/04-00-docker-compose/QNAP/app/docker-compose-app.yml +++ b/specs/04-Infrastructure-OPS/04-00-docker-compose/QNAP/app/docker-compose-app.yml @@ -183,14 +183,6 @@ services: <<: [*restart_policy, *default_logging] image: clamav/clamav:1.4.4 container_name: clamav - # security_opt: - # - no-new-privileges:true - # cap_drop: - # - ALL - # cap_add: - # - CHOWN - # - SETUID - # - SETGID deploy: resources: limits: @@ -219,8 +211,45 @@ services: retries: 3 start_period: 300s + # ---------------------------------------------------------------- + # 4. Qdrant (Vector Database for RAG — ADR-023A) + # Service Name: qdrant (Backend อ้างอิง QDRANT_HOST=qdrant, port 6333) + # ---------------------------------------------------------------- + qdrant: + <<: [*restart_policy, *default_logging] + image: qdrant/qdrant:v1.7.0 + container_name: qdrant + deploy: + resources: + limits: + cpus: '1.0' + memory: 2G + reservations: + cpus: '0.25' + memory: 512M + environment: + TZ: 'Asia/Bangkok' + QDRANT__SERVICE__GRPC_PORT: '6334' + QDRANT__LOG_LEVEL: 'INFO' + networks: + - lcbp3 + ports: + - '6333:6333' # HTTP API + - '6334:6334' # gRPC API + volumes: + - '/share/np-dms/qdrant/storage:/qdrant/storage' + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:6333/health'] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + # sudo chown -R 100:101 /share/np-dms/data/logs/clamav # sudo chmod -R 755 /share/np-dms/data/logs/climax # sudo chown -R 100:101 /share/np-dms/clamav/data # sudo chmod -R 775 /share/np-dms/clamav/data + +# sudo mkdir -p /share/np-dms/qdrant/storage +# sudo chown -R 100:101 /share/np-dms/qdrant/storage diff --git a/specs/300-others/302-ai-model-revision/quickstart.md b/specs/300-others/302-ai-model-revision/quickstart.md index b5d266c3..1559c0e4 100644 --- a/specs/300-others/302-ai-model-revision/quickstart.md +++ b/specs/300-others/302-ai-model-revision/quickstart.md @@ -7,10 +7,310 @@ ## Prerequisites -1. Desk-5439 มี Ollama พร้อมใช้งาน: `ollama list` ต้องแสดง `gemma4:e4b` และ `nomic-embed-text` -2. Qdrant instance running: `http://QDRANT_HOST:6333/health` → `{"status":"ok"}` -3. Redis 7 running (ใช้ instance เดิมกับ existing BullMQ) -4. `@qdrant/js-client-rest` ต้องติดตั้งใน backend: `npm ls @qdrant/js-client-rest` +### 1. Ollama on Desk-5439 (AI Inference Engine) + +**Requirements:** +- **OS**: Windows 10/11 หรือ Linux (Desk-5439) +- **GPU**: NVIDIA GPU ที่รองรับ CUDA 11.8+ (VRAM ≥ 6GB แนะนำ) +- **Ollama Version**: ≥ 0.5.0 +- **Models**: `gemma4:e2b` (Q4_K_M quantization) + `nomic-embed-text` + +**Verification Steps:** + +```bash +# 1. Check Ollama installation +ollama --version +# Expected: ollama version is 0.5.0 or higher + +# 2. Verify GPU access +nvidia-smi +# Expected: NVIDIA GPU detected with CUDA 11.8+ + +# 3. List installed models +ollama list +# Expected output: +# NAME ID SIZE MODIFIED +# gemma4:e2b 2.4 GB +# nomic-embed-text 274 MB + +# 4. Test model inference (quick test) +ollama run gemma4:e2b "Hello, test response" +# Expected: Gemma4 generates text response within 5-10s + +# 5. Test embedding model +ollama run nomic-embed-text "test text" +# Expected: Returns 768-dimensional vector +``` + +**Installation Commands (if missing):** + +```bash +# Install Ollama (Windows) +# Download from: https://ollama.com/download/windows + +# Pull models +ollama pull gemma4:e2b +ollama pull nomic-embed-text + +# Verify VRAM usage during inference +nvidia-smi --query-gpu=memory.used --format=csv,noheader +# Expected: < 5120 MB (5GB threshold per SC-003) +``` + +**Troubleshooting:** +- **Ollama not responding**: Check if Ollama service is running (`services.msc` on Windows) +- **GPU not detected**: Verify NVIDIA driver and CUDA installation +- **Model pull fails**: Check internet connection and proxy settings +- **VRAM overflow**: Reduce concurrent jobs (BullMQ concurrency=1 enforced) + +--- + +### 2. Qdrant Vector Database (Semantic Search) + +**Requirements:** +- **Version**: Qdrant ≥ 1.7.0 +- **Storage**: ≥ 10GB free space (for ~20,000 docs × 768-dim vectors) +- **Network**: Accessible from backend container (`http://QDRANT_HOST:6333`) +- **Collection**: `lcbp3_documents` (auto-created on first use) + +**Verification Steps:** + +```bash +# 1. Check Qdrant health +curl http://192.168.10.XX:6333/health +# Expected: {"status":"ok"} + +# 2. Check collections (if already created) +curl http://192.168.10.XX:6333/collections +# Expected: {"result":{"collections":[...]}} or empty array + +# 3. Verify Qdrant metrics (optional) +curl http://192.168.10.XX:6333/metrics +# Expected: Prometheus metrics output +``` + +**Installation Commands (Docker - recommended):** + +```bash +# Run Qdrant container (QNAP or ASUSTOR) +docker run -d \ + --name qdrant \ + -p 6333:6333 \ + -p 6334:6334 \ + -v $(pwd)/qdrant_storage:/qdrant/storage \ + qdrant/qdrant:v1.7.0 + +# Or use docker-compose (add to existing compose file) +# See: specs/04-Infrastructure-OPS/04-00-docker-compose/ +``` + +**Troubleshooting:** +- **Connection refused**: Check if Qdrant container is running (`docker ps`) +- **Port conflict**: Ensure port 6333 is not used by other services +- **Storage errors**: Verify disk space and permissions on mounted volume +- **Network issues**: Check firewall rules allow traffic on port 6333 + +--- + +### 3. Redis 7 (BullMQ Queue Backend) + +**Requirements:** +- **Version**: Redis ≥ 7.0.0 +- **Persistence**: AOF enabled (recommended for job durability) +- **Memory**: ≥ 2GB (for job queue storage) +- **Network**: Accessible from backend container (`redis://REDIS_HOST:6379`) + +**Verification Steps:** + +```bash +# 1. Check Redis version +redis-cli -h 192.168.10.XX -p 6379 INFO server | grep redis_version +# Expected: redis_version=7.x.x + +# 2. Check Redis health +redis-cli -h 192.168.10.XX -p 6379 PING +# Expected: PONG + +# 3. Check existing queues (if any) +redis-cli -h 192.168.10.XX -p 6379 KEYS "bull:*" +# Expected: May show existing BullMQ queues from other modules + +# 4. Check memory usage +redis-cli -h 192.168.10.XX -p 6379 INFO memory | grep used_memory_human +# Expected: used_memory_human: (monitor for growth) +``` + +**Configuration Notes:** +- Use existing Redis instance from BullMQ setup (ADR-008) +- No additional configuration needed for 2-queue setup +- Queue names: `ai-realtime` and `ai-batch` (auto-created by BullMQ) + +**Troubleshooting:** +- **Connection timeout**: Check Redis service status and network connectivity +- **Memory overflow**: Monitor Redis memory usage, enable maxmemory policy if needed +- **Queue not created**: Check BullMQ configuration in `backend/src/config/bullmq.config.ts` + +--- + +### 4. Backend Dependencies (NestJS + AI Libraries) + +**Requirements:** +- **Node.js**: ≥ 18.x (LTS) +- **npm/pnpm**: Latest version +- **TypeScript**: ≥ 5.x + +**Required Packages:** + +```bash +# Navigate to backend directory +cd backend + +# Check installed packages +npm ls @qdrant/js-client-rest +# Expected: @qdrant/js-client-rest@1.x.x + +# Check other AI-related packages +npm ls bullmq ioredis +# Expected: bullmq@5.x.x, ioredis@5.x.x + +# Install missing packages (if any) +npm install @qdrant/js-client-rest +npm install bullmq ioredis --save +``` + +**Verification Steps:** + +```bash +# 1. Check package.json +cat package.json | grep -A 5 "dependencies" +# Expected: Should include @qdrant/js-client-rest, bullmq, ioredis + +# 2. Verify TypeScript compilation +npm run build +# Expected: No compilation errors + +# 3. Check environment variables +cat .env | grep -E "OLLAMA|QDRANT|OCR|REDIS" +# Expected: All AI-related variables set (see Environment Variables section) +``` + +**Troubleshooting:** +- **Package not found**: Run `npm install` to restore dependencies +- **TypeScript errors**: Check tsconfig.json and type definitions +- **Env variables missing**: Copy from `.env.example` and configure + +--- + +### 5. PaddleOCR Sidecar (OCR Engine) + +**Requirements:** +- **Python**: ≥ 3.8 +- **PaddlePaddle**: ≥ 2.5.0 +- **PaddleOCR**: ≥ 2.7.0 +- **API Server**: HTTP endpoint on port 8765 (default) + +**Verification Steps:** + +```bash +# 1. Check PaddleOCR API health +curl http://localhost:8765/health +# Expected: {"status":"ok"} or similar + +# 2. Test OCR on sample image +curl -X POST http://localhost:8765/ocr \ + -F "image=@sample.png" \ + -F "lang=th" +# Expected: JSON with extracted text + +# 3. Check OCR character threshold setting +# (Backend env var: OCR_CHAR_THRESHOLD=100) +# This determines when to use PaddleOCR vs PyMuPDF +``` + +**Installation Commands (if not already set up):** + +```bash +# Install PaddleOCR (Python) +pip install paddleocr paddlepaddle + +# Run OCR API server (example) +python -m paddleocr.serve --port 8765 +``` + +**Troubleshooting:** +- **API not responding**: Check if PaddleOCR server is running +- **OCR fails on Thai text**: Verify Thai language model is installed +- **Slow performance**: Consider GPU acceleration for PaddleOCR + +--- + +### 6. Network Configuration & Firewall + +**Requirements:** +- **Desk-5439 (Ollama)**: Port 11434 accessible from backend +- **Qdrant Server**: Port 6333 accessible from backend +- **Redis Server**: Port 6379 accessible from backend +- **PaddleOCR**: Port 8765 accessible from backend (if sidecar on same host) + +**Verification Steps:** + +```bash +# Test connectivity from backend container +# (Run these commands inside backend container or from backend host) + +# 1. Test Ollama +curl http://192.168.10.XX:11434/api/tags +# Expected: JSON with model list + +# 2. Test Qdrant +curl http://192.168.10.XX:6333/health +# Expected: {"status":"ok"} + +# 3. Test Redis +redis-cli -h 192.168.10.XX -p 6379 PING +# Expected: PONG + +# 4. Test PaddleOCR (if on separate host) +curl http://192.168.10.XX:8765/health +# Expected: {"status":"ok"} +``` + +**Troubleshooting:** +- **Connection timeout**: Check firewall rules on target hosts +- **DNS resolution issues**: Use IP addresses instead of hostnames +- **VPN/Network segmentation**: Ensure backend can reach AI infrastructure network + +--- + +### 7. GPU Resource Monitoring (Critical for SC-003) + +**Requirements:** +- **VRAM Limit**: ≤ 5GB peak (per SC-003) +- **Concurrency**: 1 job per queue (enforced by BullMQ) + +**Verification Commands:** + +```bash +# Monitor VRAM in real-time +watch -n 1 nvidia-smi --query-gpu=memory.used --format=csv,noheader + +# Check GPU utilization during job execution +nvidia-smi dmon -s u + +# Log VRAM usage to file (for analysis) +nvidia-smi --query-gpu=timestamp,memory.used,utilization.gpu \ + --format=csv -l 1 > gpu_usage.log +``` + +**Expected Behavior:** +- **ai-batch job**: VRAM peaks at ~2.5GB (gemma4:e2b Q4_K_M) +- **ai-realtime job**: VRAM peaks at ~2.5GB (same model) +- **No concurrent jobs**: ai-batch pauses when ai-realtime active (GPU protection) + +**Troubleshooting:** +- **VRAM overflow (>5GB)**: Reduce model quantization or increase GPU memory +- **GPU contention**: Verify BullMQ concurrency=1 enforcement +- **Slow inference**: Check GPU utilization, consider faster model quantization --- @@ -19,7 +319,7 @@ ```env # AI Infrastructure OLLAMA_HOST=http://192.168.10.XX:11434 -OLLAMA_MODEL_MAIN=gemma4:e4b +OLLAMA_MODEL_MAIN=gemma4:e2b OLLAMA_MODEL_EMBED=nomic-embed-text # Qdrant