From cb9ecb2de647a357412c6c3a945799d300e44358 Mon Sep 17 00:00:00 2001 From: admin Date: Tue, 2 Jun 2026 13:34:22 +0700 Subject: [PATCH] 690602:1334 ADR-033-233 #03 --- frontend/app/(admin)/admin/ai/page.tsx | 83 +++++- frontend/lib/services/admin-ai.service.ts | 57 ++++- .../ASUSTOR/monitoring/o.yml | 240 ++++++++++++++++++ .../prometheus/config/prometheus.bak.yml | 60 +++++ .../prometheus/config/prometheus.yml | 140 +++++----- 5 files changed, 498 insertions(+), 82 deletions(-) create mode 100644 specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/o.yml create mode 100644 specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.bak.yml diff --git a/frontend/app/(admin)/admin/ai/page.tsx b/frontend/app/(admin)/admin/ai/page.tsx index cd8e6293..e3f1a158 100644 --- a/frontend/app/(admin)/admin/ai/page.tsx +++ b/frontend/app/(admin)/admin/ai/page.tsx @@ -24,7 +24,12 @@ import { Textarea } from '@/components/ui/textarea'; import { Progress } from '@/components/ui/progress'; import { useAiStatus, useToggleAiFeatures, useAiHealth } from '@/hooks/use-ai-status'; import { projectService } from '@/lib/services/project.service'; -import { adminAiService, AiSandboxJobResult, AiAvailableModel } from '@/lib/services/admin-ai.service'; +import { + adminAiService, + AiSandboxJobResult, + AiAvailableModel, + AiRagCitation, +} from '@/lib/services/admin-ai.service'; import { toast } from 'sonner'; import OcrSandboxPromptManager from '@/components/admin/ai/OcrSandboxPromptManager'; import OcrEngineSelector from '@/components/admin/ai/OcrEngineSelector'; @@ -35,6 +40,48 @@ interface SandboxProject { projectCode: string; } +interface VramLoadedModelView { + modelId: string; + modelName: string; + vramUsageMB?: number; +} + +function ensureArray(value: unknown): T[] { + return Array.isArray(value) ? value : []; +} + +function normalizeLoadedModels(value: unknown): VramLoadedModelView[] { + if (!Array.isArray(value)) { + return []; + } + return value.map((item, index) => { + if (typeof item === 'string') { + return { + modelId: `${item}-${index}`, + modelName: item, + }; + } + if (item && typeof item === 'object') { + const model = item as { + modelId?: string; + modelName?: string; + name?: string; + vramUsageMB?: number; + }; + const modelName = model.modelName ?? model.name ?? `model-${index + 1}`; + return { + modelId: model.modelId ?? modelName, + modelName, + vramUsageMB: model.vramUsageMB, + }; + } + return { + modelId: `unknown-${index}`, + modelName: `Unknown Model ${index + 1}`, + }; + }); +} + export default function AiAdminConsolePage() { const { data, isLoading, isError, refetch, isFetching } = useAiStatus(); const { data: health, isLoading: isHealthLoading, refetch: refetchHealth } = useAiHealth(); @@ -56,7 +103,7 @@ export default function AiAdminConsolePage() { return await adminAiService.getAvailableModels(); }, }); - const availableModels = aiModelsData?.models ?? []; + const availableModels = ensureArray(aiModelsData?.models); const activeModel = aiModelsData?.activeModel ?? ''; // VRAM Monitoring State (T034, T036, US2) @@ -75,6 +122,13 @@ export default function AiAdminConsolePage() { return res as SandboxProject[]; }, }); + const healthOllamaModels = ensureArray(health?.ollama?.models); + const healthQdrantCollections = ensureArray(health?.qdrant?.collections); + const vramLoadedModels = normalizeLoadedModels(vramStatus?.loadedModels); + const sandboxProjects = ensureArray(projects); + const sandboxCitations = ensureArray( + sandboxJobResult?.citations + ); const handleToggle = async (enabled: boolean): Promise => { await toggleMutation.mutateAsync(enabled); @@ -242,8 +296,8 @@ export default function AiAdminConsolePage() {
โมเดลที่โหลดอยู่:
- {health?.ollama?.models && health.ollama.models.length > 0 ? ( - health.ollama.models.map((m) => ( + {healthOllamaModels.length > 0 ? ( + healthOllamaModels.map((m) => ( {m} @@ -274,8 +328,8 @@ export default function AiAdminConsolePage() {
คอลเลกชัน:
- {health?.qdrant?.collections && health.qdrant.collections.length > 0 ? ( - health.qdrant.collections.map((c) => ( + {healthQdrantCollections.length > 0 ? ( + healthQdrantCollections.map((c) => ( {c} @@ -394,10 +448,13 @@ export default function AiAdminConsolePage() {
โมเดลที่โหลดบน GPU ในปัจจุบัน:
- {vramStatus.loadedModels && vramStatus.loadedModels.length > 0 ? ( - vramStatus.loadedModels.map((m) => ( - - {m.modelName} ({m.vramUsageMB} MB) + {vramLoadedModels.length > 0 ? ( + vramLoadedModels.map((m) => ( + + {m.modelName} + {typeof m.vramUsageMB === 'number' + ? ` (${m.vramUsageMB} MB)` + : ''} )) ) : ( @@ -627,7 +684,7 @@ export default function AiAdminConsolePage() { - {projects.map((proj) => ( + {sandboxProjects.map((proj) => ( {proj.projectName} ({proj.projectCode}) @@ -728,9 +785,9 @@ export default function AiAdminConsolePage() { - {sandboxJobResult.citations && sandboxJobResult.citations.length > 0 ? ( + {sandboxCitations.length > 0 ? (
- {sandboxJobResult.citations.map((cite, index) => ( + {sandboxCitations.map((cite, index) => (
; + canLoadModel?: boolean; + lastUpdated?: string; + totalVramMb?: number; + usedVramMb?: number; + freeVramMb?: number; + hasCapacity?: boolean; +} + export interface AiAvailableModel { id?: number; modelId?: string; @@ -121,6 +137,43 @@ const extractData = (value: unknown): T => { return value as T; }; +const normalizeLoadedModels = ( + models: Array | undefined +): LoadedModelInfo[] => { + if (!Array.isArray(models)) { + return []; + } + return models.map((model, index) => { + if (typeof model === 'string') { + return { + modelId: `${model}-${index}`, + modelName: model, + vramUsageMB: 0, + }; + } + return model; + }); +}; + +const normalizeVramStatus = (value: unknown): VramStatusResponse => { + const raw = extractData(value); + const totalVRAMMB = raw.totalVRAMMB ?? raw.totalVramMb ?? 0; + const usedVRAMMB = raw.usedVRAMMB ?? raw.usedVramMb ?? 0; + const usagePercent = + raw.usagePercent ?? + (totalVRAMMB > 0 ? Math.round((usedVRAMMB / totalVRAMMB) * 100) : 0); + + return { + totalVRAMMB, + usedVRAMMB, + usagePercent, + thresholdPercent: raw.thresholdPercent ?? 90, + loadedModels: normalizeLoadedModels(raw.loadedModels), + canLoadModel: raw.canLoadModel ?? raw.hasCapacity ?? false, + lastUpdated: raw.lastUpdated ?? new Date().toISOString(), + }; +}; + /** Service สำหรับเรียก AI Admin Console API ผ่าน DMS Backend เท่านั้น */ export const adminAiService = { getStatus: async (): Promise => { @@ -199,7 +252,7 @@ export const adminAiService = { // --- AI Model Management (ADR-027, US2) --- getAvailableModels: async (): Promise => { - const { data } = await api.get('/ai/models'); + const { data } = await api.get('/ai/admin/models'); return extractData(data); }, @@ -215,7 +268,7 @@ export const adminAiService = { getVramStatus: async (): Promise => { const { data } = await api.get('/ai/vram/status'); - return extractData(data); + return normalizeVramStatus(data); }, addModel: async ( diff --git a/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/o.yml b/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/o.yml new file mode 100644 index 00000000..d4d1d92d --- /dev/null +++ b/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/o.yml @@ -0,0 +1,240 @@ +# File: /volume1/np-dms/monitoring/docker-compose.yml +# DMS Container v1.8.6: Application name: lcbp3-monitoring +# Deploy on: ASUSTOR AS5403T +# Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki, promtail + +x-restart: &restart_policy + restart: unless-stopped + +x-logging: &default_logging + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "5" + +name: lcbp3-monitoring + +networks: + lcbp3: + external: true + +services: + # ---------------------------------------------------------------- + # 1. Prometheus (Metrics Collection & Storage) + # ---------------------------------------------------------------- + prometheus: + <<: [*restart_policy, *default_logging] + image: prom/prometheus:v2.48.0 + container_name: prometheus + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.25" + memory: 256M + environment: + TZ: "Asia/Bangkok" + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--storage.tsdb.retention.time=30d" + - "--web.enable-lifecycle" + ports: + - "9090:9090" + networks: + - lcbp3 + volumes: + - "/volume1/np-dms/monitoring/prometheus/config:/etc/prometheus:ro" + - "/volume1/np-dms/monitoring/prometheus/data:/prometheus" + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy"] + interval: 30s + timeout: 10s + retries: 3 + + # ---------------------------------------------------------------- + # 2. Grafana (Dashboard & Visualization) + # ---------------------------------------------------------------- + grafana: + <<: [*restart_policy, *default_logging] + image: grafana/grafana:10.2.2 + container_name: grafana + deploy: + resources: + limits: + cpus: "1.0" + memory: 512M + reservations: + cpus: "0.25" + memory: 128M + env_file: + - .env + environment: + TZ: "Asia/Bangkok" + GF_SECURITY_ADMIN_USER: admin + GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:?GRAFANA_ADMIN_PASSWORD required} + GF_SERVER_ROOT_URL: "https://grafana.np-dms.work" + GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-piechart-panel + ports: + - "3003:3000" + networks: + - lcbp3 + volumes: + - "/volume1/np-dms/monitoring/grafana/data:/var/lib/grafana" + depends_on: + - prometheus + healthcheck: + test: + [ + "CMD-SHELL", + "wget --spider -q http://localhost:3000/api/health || exit 1", + ] + interval: 30s + timeout: 10s + retries: 3 + + # ---------------------------------------------------------------- + # 3. Uptime Kuma (Service Availability Monitoring) + # ---------------------------------------------------------------- + uptime-kuma: + <<: [*restart_policy, *default_logging] + image: louislam/uptime-kuma:1 + container_name: uptime-kuma + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + environment: + TZ: "Asia/Bangkok" + ports: + - "3001:3001" + networks: + - lcbp3 + volumes: + - "/volume1/np-dms/monitoring/uptime-kuma/data:/app/data" + healthcheck: + test: + ["CMD-SHELL", "curl -f http://localhost:3001/api/entry-page || exit 1"] + interval: 30s + timeout: 10s + retries: 3 + + # ---------------------------------------------------------------- + # 4. Node Exporter (Host Metrics - ASUSTOR) + # ---------------------------------------------------------------- + node-exporter: + <<: [*restart_policy, *default_logging] + image: prom/node-exporter:v1.7.0 + container_name: node-exporter + deploy: + resources: + limits: + cpus: "0.5" + memory: 128M + environment: + TZ: "Asia/Bangkok" + command: + - "--path.procfs=/host/proc" + - "--path.sysfs=/host/sys" + - "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)" + ports: + - "9100:9100" + networks: + - lcbp3 + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:9100/metrics"] + interval: 30s + timeout: 10s + retries: 3 + + # ---------------------------------------------------------------- + # 5. cAdvisor (Container Metrics - ASUSTOR) + # ---------------------------------------------------------------- + cadvisor: + <<: [*restart_policy, *default_logging] + image: gcr.io/cadvisor/cadvisor:v0.47.2 + container_name: cadvisor + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + environment: + TZ: "Asia/Bangkok" + # H4: cAdvisor binds 8080 container map 8088 host + ports: + - "8088:8080" + networks: + - lcbp3 + volumes: + - /:/rootfs:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"] + interval: 30s + timeout: 10s + retries: 3 + + # ---------------------------------------------------------------- + # 6. Loki (Log Aggregation) + # ---------------------------------------------------------------- + loki: + <<: [*restart_policy, *default_logging] + image: grafana/loki:2.9.0 + container_name: loki + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + environment: + TZ: "Asia/Bangkok" + command: -config.file=/etc/loki/local-config.yaml + ports: + - "3100:3100" + networks: + - lcbp3 + volumes: + - "/volume1/np-dms/monitoring/loki/data:/loki" + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/ready"] + interval: 30s + timeout: 10s + retries: 3 + + # ---------------------------------------------------------------- + # 7. Promtail (Log Shipper) + # ---------------------------------------------------------------- + promtail: + <<: [*restart_policy, *default_logging] + image: grafana/promtail:2.9.0 + container_name: promtail + # L5: root /var/lib/docker/containers + # mount read-only + user: "0:0" + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + environment: + TZ: "Asia/Bangkok" + command: -config.file=/etc/promtail/promtail-config.yml + networks: + - lcbp3 + volumes: + - "/volume1/np-dms/monitoring/promtail/config:/etc/promtail:ro" + - "/var/run/docker.sock:/var/run/docker.sock:ro" + - "/var/lib/docker/containers:/var/lib/docker/containers:ro" + depends_on: + - loki diff --git a/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.bak.yml b/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.bak.yml new file mode 100644 index 00000000..23fa9114 --- /dev/null +++ b/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.bak.yml @@ -0,0 +1,60 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + # Prometheus self-monitoring (ASUSTOR) + - job_name: "prometheus" + static_configs: + - targets: ["localhost:9090"] + + # ============================================ + # ASUSTOR Metrics (Local) + # ============================================ + + # Host metrics from Node Exporter (ASUSTOR) + - job_name: "asustor-node" + static_configs: + - targets: ["node-exporter:9100"] + labels: + host: "asustor" + + # Container metrics from cAdvisor (ASUSTOR) + - job_name: "asustor-cadvisor" + static_configs: + - targets: ["cadvisor:8080"] + labels: + host: "asustor" + + # ============================================ + # QNAP Metrics (Remote - 192.168.10.8) + # ============================================ + + # Host metrics from Node Exporter (QNAP) + - job_name: "qnap-node" + static_configs: + - targets: ["192.168.10.8:9100"] + labels: + host: "qnap" + + # Container metrics from cAdvisor (QNAP) + - job_name: "qnap-cadvisor" + static_configs: + - targets: ["192.168.10.8:8088"] + labels: + host: "qnap" + + # Backend NestJS application (QNAP) + - job_name: "backend" + static_configs: + - targets: ["192.168.10.8:3000"] + labels: + host: "qnap" + metrics_path: "/metrics" + + # MariaDB Exporter (optional - QNAP) + - job_name: "mariadb" + static_configs: + - targets: ["192.168.10.8:9104"] + labels: + host: "qnap" diff --git a/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.yml b/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.yml index c3e54243..788d7847 100644 --- a/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.yml +++ b/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.yml @@ -1,92 +1,98 @@ # File: specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.yml # Prometheus Configuration — รัน บน ASUSTOR AS5403T (lcbp3-monitoring stack) # Change Log: -# - 2026-06-02: Initial config — scrape jobs สำหรับ ASUSTOR local + Desk-5439 remote +# - 2026-06-02: Initial config — merge จาก 0.yml (existing) + เพิ่ม ollama-metrics job # # Deploy path: /volume1/np-dms/monitoring/prometheus/config/prometheus.yml # Mount (read-only): docker-compose volume → /etc/prometheus/prometheus.yml +# +# NOTE: ไฟล์นี้รวม 0.yml (config เดิมบน ASUSTOR) + job ollama-metrics ใหม่ +# เมื่อ deploy แล้วให้ลบ 0.yml ออก หรือ rename เป็น 0.yml.bak global: - scrape_interval: 15s # ดึง metrics ทุก 15 วินาที (default) - evaluation_interval: 15s # ประเมิน rules ทุก 15 วินาที - scrape_timeout: 10s - - # Labels ที่ติดไปกับทุก time series ที่ scrape ได้ - external_labels: - environment: 'production' - cluster: 'lcbp3' - -# ─── Alerting (optional — เชื่อม Alertmanager เมื่อต้องการ) ────────────────── -# alerting: -# alertmanagers: -# - static_configs: -# - targets: ['alertmanager:9093'] - -# ─── Rules (optional) ──────────────────────────────────────────────────────── -# rule_files: -# - /etc/prometheus/rules/*.yml + scrape_interval: 15s + evaluation_interval: 15s # ─── Scrape Jobs ───────────────────────────────────────────────────────────── scrape_configs: # ---------------------------------------------------------------- # 1. Prometheus self-monitoring (ASUSTOR) # ---------------------------------------------------------------- - - job_name: 'prometheus' + - job_name: "prometheus" static_configs: - - targets: ['localhost:9090'] - labels: - host: 'asustor' - service: 'prometheus' + - targets: ["localhost:9090"] - # ---------------------------------------------------------------- - # 2. Node Exporter — Host metrics ของ ASUSTOR - # ---------------------------------------------------------------- - - job_name: 'node-exporter-asustor' + # ============================================ + # ASUSTOR Metrics (Local) + # ============================================ + + # Host metrics from Node Exporter (ASUSTOR) + - job_name: "asustor-node" static_configs: - - targets: ['node-exporter:9100'] + - targets: ["node-exporter:9100"] labels: - host: 'asustor' - service: 'node-exporter' + host: "asustor" - # ---------------------------------------------------------------- - # 3. cAdvisor — Container metrics ของ ASUSTOR - # ---------------------------------------------------------------- - - job_name: 'cadvisor-asustor' + # Container metrics from cAdvisor (ASUSTOR) + - job_name: "asustor-cadvisor" static_configs: - - targets: ['cadvisor:8080'] + - targets: ["cadvisor:8080"] labels: - host: 'asustor' - service: 'cadvisor' + host: "asustor" - # ---------------------------------------------------------------- - # 4. ollama-metrics (NorskHelsenett) — Ollama LLM metrics - # รัน บน Desk-5439 (192.168.10.100) ตาม ADR-023A - # sidecar expose /metrics บน port 9924 + # ============================================ + # QNAP Metrics (Remote - 192.168.10.8) + # ============================================ + + # Host metrics from Node Exporter (QNAP) + - job_name: "qnap-node" + static_configs: + - targets: ["192.168.10.8:9100"] + labels: + host: "qnap" + + # Container metrics from cAdvisor (QNAP) + - job_name: "qnap-cadvisor" + static_configs: + - targets: ["192.168.10.8:8088"] + labels: + host: "qnap" + + # Backend NestJS application (QNAP) + - job_name: "backend" + static_configs: + - targets: ["192.168.10.8:3000"] + labels: + host: "qnap" + metrics_path: "/metrics" + + # MariaDB Exporter (optional - QNAP) + - job_name: "mariadb" + static_configs: + - targets: ["192.168.10.8:9104"] + labels: + host: "qnap" + + # ============================================ + # Desk-5439 Metrics (Remote - 192.168.10.100) + # ============================================ + + # ollama-metrics (NorskHelsenett) — Ollama LLM metrics + # sidecar รันบน Desk-5439 ตาม ADR-023A, expose /metrics บน port 9924 # - # Metrics ที่ collect: - # ollama_prompt_tokens_total — prompt tokens รวม - # ollama_generated_tokens_total — generated tokens รวม - # ollama_request_duration_seconds — latency histogram - # ollama_time_per_token_seconds — inference speed - # ollama_loaded_models — จำนวน model ใน VRAM - # ollama_model_loaded — 1/0 per model - # ollama_model_ram_mb — VRAM usage (MB) per model - # ---------------------------------------------------------------- - - job_name: 'ollama-metrics' - scrape_interval: 30s # Ollama metrics ไม่เปลี่ยนเร็ว — 30s เพียงพอ + # Metrics ที่ collect: + # ollama_prompt_tokens_total — prompt tokens รวม + # ollama_generated_tokens_total — generated tokens รวม + # ollama_request_duration_seconds — latency histogram + # ollama_time_per_token_seconds — inference speed (tok/s) + # ollama_loaded_models — จำนวน model ใน VRAM + # ollama_model_loaded — 1/0 per model + # ollama_model_ram_mb — VRAM usage (MB) per model + - job_name: "ollama-metrics" + scrape_interval: 30s static_configs: - - targets: ['192.168.10.100:9924'] + - targets: ["192.168.10.100:9924"] labels: - host: 'desk-5439' - service: 'ollama' - role: 'ai-inference' - - # ---------------------------------------------------------------- - # 5. Loki — Log aggregation health (ASUSTOR) - # ---------------------------------------------------------------- - - job_name: 'loki' - static_configs: - - targets: ['loki:3100'] - labels: - host: 'asustor' - service: 'loki' + host: "desk-5439" + service: "ollama" + role: "ai-inference"