690602:1334 ADR-033-233 #03

2026-06-02 13:34:22 +07:00
parent b939a25456
commit cb9ecb2de6
5 changed files with 498 additions and 82 deletions
@@ -0,0 +1,60 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+scrape_configs:
+  # Prometheus self-monitoring (ASUSTOR)
+  - job_name: "prometheus"
+    static_configs:
+      - targets: ["localhost:9090"]
+
+  # ============================================
+  # ASUSTOR Metrics (Local)
+  # ============================================
+
+  # Host metrics from Node Exporter (ASUSTOR)
+  - job_name: "asustor-node"
+    static_configs:
+      - targets: ["node-exporter:9100"]
+        labels:
+          host: "asustor"
+
+  # Container metrics from cAdvisor (ASUSTOR)
+  - job_name: "asustor-cadvisor"
+    static_configs:
+      - targets: ["cadvisor:8080"]
+        labels:
+          host: "asustor"
+
+  # ============================================
+  # QNAP Metrics (Remote - 192.168.10.8)
+  # ============================================
+
+  # Host metrics from Node Exporter (QNAP)
+  - job_name: "qnap-node"
+    static_configs:
+      - targets: ["192.168.10.8:9100"]
+        labels:
+          host: "qnap"
+
+  # Container metrics from cAdvisor (QNAP)
+  - job_name: "qnap-cadvisor"
+    static_configs:
+      - targets: ["192.168.10.8:8088"]
+        labels:
+          host: "qnap"
+
+  # Backend NestJS application (QNAP)
+  - job_name: "backend"
+    static_configs:
+      - targets: ["192.168.10.8:3000"]
+        labels:
+          host: "qnap"
+    metrics_path: "/metrics"
+
+  # MariaDB Exporter (optional - QNAP)
+  - job_name: "mariadb"
+    static_configs:
+      - targets: ["192.168.10.8:9104"]
+        labels:
+          host: "qnap"
@@ -1,92 +1,98 @@
 # File: specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.yml
 # Prometheus Configuration — รัน บน ASUSTOR AS5403T (lcbp3-monitoring stack)
 # Change Log:
-# - 2026-06-02: Initial config — scrape jobs สำหรับ ASUSTOR local + Desk-5439 remote
+# - 2026-06-02: Initial config — merge จาก 0.yml (existing) + เพิ่ม ollama-metrics job
 #
 # Deploy path: /volume1/np-dms/monitoring/prometheus/config/prometheus.yml
 # Mount (read-only): docker-compose volume → /etc/prometheus/prometheus.yml
+#
+# NOTE: ไฟล์นี้รวม 0.yml (config เดิมบน ASUSTOR) + job ollama-metrics ใหม่
+#       เมื่อ deploy แล้วให้ลบ 0.yml ออก หรือ rename เป็น 0.yml.bak

 global:
-  scrape_interval: 15s       # ดึง metrics ทุก 15 วินาที (default)
-  evaluation_interval: 15s   # ประเมิน rules ทุก 15 วินาที
-  scrape_timeout: 10s
-
-  # Labels ที่ติดไปกับทุก time series ที่ scrape ได้
-  external_labels:
-    environment: 'production'
-    cluster: 'lcbp3'
-
-# ─── Alerting (optional — เชื่อม Alertmanager เมื่อต้องการ) ──────────────────
-# alerting:
-#   alertmanagers:
-#     - static_configs:
-#         - targets: ['alertmanager:9093']
-
-# ─── Rules (optional) ────────────────────────────────────────────────────────
-# rule_files:
-#   - /etc/prometheus/rules/*.yml
+  scrape_interval: 15s
+  evaluation_interval: 15s

 # ─── Scrape Jobs ─────────────────────────────────────────────────────────────
 scrape_configs:
  # ----------------------------------------------------------------
  # 1. Prometheus self-monitoring (ASUSTOR)
  # ----------------------------------------------------------------
-  - job_name: 'prometheus'
+  - job_name: "prometheus"
    static_configs:
-      - targets: ['localhost:9090']
-        labels:
-          host: 'asustor'
-          service: 'prometheus'
+      - targets: ["localhost:9090"]

-  # ----------------------------------------------------------------
-  # 2. Node Exporter — Host metrics ของ ASUSTOR
-  # ----------------------------------------------------------------
-  - job_name: 'node-exporter-asustor'
+  # ============================================
+  # ASUSTOR Metrics (Local)
+  # ============================================
+
+  # Host metrics from Node Exporter (ASUSTOR)
+  - job_name: "asustor-node"
    static_configs:
-      - targets: ['node-exporter:9100']
+      - targets: ["node-exporter:9100"]
        labels:
-          host: 'asustor'
-          service: 'node-exporter'
+          host: "asustor"

-  # ----------------------------------------------------------------
-  # 3. cAdvisor — Container metrics ของ ASUSTOR
-  # ----------------------------------------------------------------
-  - job_name: 'cadvisor-asustor'
+  # Container metrics from cAdvisor (ASUSTOR)
+  - job_name: "asustor-cadvisor"
    static_configs:
-      - targets: ['cadvisor:8080']
+      - targets: ["cadvisor:8080"]
        labels:
-          host: 'asustor'
-          service: 'cadvisor'
+          host: "asustor"

-  # ----------------------------------------------------------------
-  # 4. ollama-metrics (NorskHelsenett) — Ollama LLM metrics
-  #    รัน บน Desk-5439 (192.168.10.100) ตาม ADR-023A
-  #    sidecar expose /metrics บน port 9924
+  # ============================================
+  # QNAP Metrics (Remote - 192.168.10.8)
+  # ============================================
+
+  # Host metrics from Node Exporter (QNAP)
+  - job_name: "qnap-node"
+    static_configs:
+      - targets: ["192.168.10.8:9100"]
+        labels:
+          host: "qnap"
+
+  # Container metrics from cAdvisor (QNAP)
+  - job_name: "qnap-cadvisor"
+    static_configs:
+      - targets: ["192.168.10.8:8088"]
+        labels:
+          host: "qnap"
+
+  # Backend NestJS application (QNAP)
+  - job_name: "backend"
+    static_configs:
+      - targets: ["192.168.10.8:3000"]
+        labels:
+          host: "qnap"
+    metrics_path: "/metrics"
+
+  # MariaDB Exporter (optional - QNAP)
+  - job_name: "mariadb"
+    static_configs:
+      - targets: ["192.168.10.8:9104"]
+        labels:
+          host: "qnap"
+
+  # ============================================
+  # Desk-5439 Metrics (Remote - 192.168.10.100)
+  # ============================================
+
+  # ollama-metrics (NorskHelsenett) — Ollama LLM metrics
+  # sidecar รันบน Desk-5439 ตาม ADR-023A, expose /metrics บน port 9924
  #
-  #    Metrics ที่ collect:
-  #      ollama_prompt_tokens_total       — prompt tokens รวม
-  #      ollama_generated_tokens_total    — generated tokens รวม
-  #      ollama_request_duration_seconds  — latency histogram
-  #      ollama_time_per_token_seconds    — inference speed
-  #      ollama_loaded_models             — จำนวน model ใน VRAM
-  #      ollama_model_loaded              — 1/0 per model
-  #      ollama_model_ram_mb              — VRAM usage (MB) per model
-  # ----------------------------------------------------------------
-  - job_name: 'ollama-metrics'
-    scrape_interval: 30s   # Ollama metrics ไม่เปลี่ยนเร็ว — 30s เพียงพอ
+  # Metrics ที่ collect:
+  #   ollama_prompt_tokens_total       — prompt tokens รวม
+  #   ollama_generated_tokens_total    — generated tokens รวม
+  #   ollama_request_duration_seconds  — latency histogram
+  #   ollama_time_per_token_seconds    — inference speed (tok/s)
+  #   ollama_loaded_models             — จำนวน model ใน VRAM
+  #   ollama_model_loaded              — 1/0 per model
+  #   ollama_model_ram_mb              — VRAM usage (MB) per model
+  - job_name: "ollama-metrics"
+    scrape_interval: 30s
    static_configs:
-      - targets: ['192.168.10.100:9924']
+      - targets: ["192.168.10.100:9924"]
        labels:
-          host: 'desk-5439'
-          service: 'ollama'
-          role: 'ai-inference'
-
-  # ----------------------------------------------------------------
-  # 5. Loki — Log aggregation health (ASUSTOR)
-  # ----------------------------------------------------------------
-  - job_name: 'loki'
-    static_configs:
-      - targets: ['loki:3100']
-        labels:
-          host: 'asustor'
-          service: 'loki'
+          host: "desk-5439"
+          service: "ollama"
+          role: "ai-inference"