690602:1334 ADR-033-233 #03
CI / CD Pipeline / build (push) Successful in 5m8s
CI / CD Pipeline / deploy (push) Successful in 7m57s

This commit is contained in:
2026-06-02 13:34:22 +07:00
parent b939a25456
commit cb9ecb2de6
5 changed files with 498 additions and 82 deletions
@@ -0,0 +1,60 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
# Prometheus self-monitoring (ASUSTOR)
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
# ============================================
# ASUSTOR Metrics (Local)
# ============================================
# Host metrics from Node Exporter (ASUSTOR)
- job_name: "asustor-node"
static_configs:
- targets: ["node-exporter:9100"]
labels:
host: "asustor"
# Container metrics from cAdvisor (ASUSTOR)
- job_name: "asustor-cadvisor"
static_configs:
- targets: ["cadvisor:8080"]
labels:
host: "asustor"
# ============================================
# QNAP Metrics (Remote - 192.168.10.8)
# ============================================
# Host metrics from Node Exporter (QNAP)
- job_name: "qnap-node"
static_configs:
- targets: ["192.168.10.8:9100"]
labels:
host: "qnap"
# Container metrics from cAdvisor (QNAP)
- job_name: "qnap-cadvisor"
static_configs:
- targets: ["192.168.10.8:8088"]
labels:
host: "qnap"
# Backend NestJS application (QNAP)
- job_name: "backend"
static_configs:
- targets: ["192.168.10.8:3000"]
labels:
host: "qnap"
metrics_path: "/metrics"
# MariaDB Exporter (optional - QNAP)
- job_name: "mariadb"
static_configs:
- targets: ["192.168.10.8:9104"]
labels:
host: "qnap"
@@ -1,92 +1,98 @@
# File: specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.yml
# Prometheus Configuration — รัน บน ASUSTOR AS5403T (lcbp3-monitoring stack)
# Change Log:
# - 2026-06-02: Initial config — scrape jobs สำหรับ ASUSTOR local + Desk-5439 remote
# - 2026-06-02: Initial config — merge จาก 0.yml (existing) + เพิ่ม ollama-metrics job
#
# Deploy path: /volume1/np-dms/monitoring/prometheus/config/prometheus.yml
# Mount (read-only): docker-compose volume → /etc/prometheus/prometheus.yml
#
# NOTE: ไฟล์นี้รวม 0.yml (config เดิมบน ASUSTOR) + job ollama-metrics ใหม่
# เมื่อ deploy แล้วให้ลบ 0.yml ออก หรือ rename เป็น 0.yml.bak
global:
scrape_interval: 15s # ดึง metrics ทุก 15 วินาที (default)
evaluation_interval: 15s # ประเมิน rules ทุก 15 วินาที
scrape_timeout: 10s
# Labels ที่ติดไปกับทุก time series ที่ scrape ได้
external_labels:
environment: 'production'
cluster: 'lcbp3'
# ─── Alerting (optional — เชื่อม Alertmanager เมื่อต้องการ) ──────────────────
# alerting:
# alertmanagers:
# - static_configs:
# - targets: ['alertmanager:9093']
# ─── Rules (optional) ────────────────────────────────────────────────────────
# rule_files:
# - /etc/prometheus/rules/*.yml
scrape_interval: 15s
evaluation_interval: 15s
# ─── Scrape Jobs ─────────────────────────────────────────────────────────────
scrape_configs:
# ----------------------------------------------------------------
# 1. Prometheus self-monitoring (ASUSTOR)
# ----------------------------------------------------------------
- job_name: 'prometheus'
- job_name: "prometheus"
static_configs:
- targets: ['localhost:9090']
labels:
host: 'asustor'
service: 'prometheus'
- targets: ["localhost:9090"]
# ----------------------------------------------------------------
# 2. Node Exporter — Host metrics ของ ASUSTOR
# ----------------------------------------------------------------
- job_name: 'node-exporter-asustor'
# ============================================
# ASUSTOR Metrics (Local)
# ============================================
# Host metrics from Node Exporter (ASUSTOR)
- job_name: "asustor-node"
static_configs:
- targets: ['node-exporter:9100']
- targets: ["node-exporter:9100"]
labels:
host: 'asustor'
service: 'node-exporter'
host: "asustor"
# ----------------------------------------------------------------
# 3. cAdvisor — Container metrics ของ ASUSTOR
# ----------------------------------------------------------------
- job_name: 'cadvisor-asustor'
# Container metrics from cAdvisor (ASUSTOR)
- job_name: "asustor-cadvisor"
static_configs:
- targets: ['cadvisor:8080']
- targets: ["cadvisor:8080"]
labels:
host: 'asustor'
service: 'cadvisor'
host: "asustor"
# ----------------------------------------------------------------
# 4. ollama-metrics (NorskHelsenett) — Ollama LLM metrics
# รัน บน Desk-5439 (192.168.10.100) ตาม ADR-023A
# sidecar expose /metrics บน port 9924
# ============================================
# QNAP Metrics (Remote - 192.168.10.8)
# ============================================
# Host metrics from Node Exporter (QNAP)
- job_name: "qnap-node"
static_configs:
- targets: ["192.168.10.8:9100"]
labels:
host: "qnap"
# Container metrics from cAdvisor (QNAP)
- job_name: "qnap-cadvisor"
static_configs:
- targets: ["192.168.10.8:8088"]
labels:
host: "qnap"
# Backend NestJS application (QNAP)
- job_name: "backend"
static_configs:
- targets: ["192.168.10.8:3000"]
labels:
host: "qnap"
metrics_path: "/metrics"
# MariaDB Exporter (optional - QNAP)
- job_name: "mariadb"
static_configs:
- targets: ["192.168.10.8:9104"]
labels:
host: "qnap"
# ============================================
# Desk-5439 Metrics (Remote - 192.168.10.100)
# ============================================
# ollama-metrics (NorskHelsenett) — Ollama LLM metrics
# sidecar รันบน Desk-5439 ตาม ADR-023A, expose /metrics บน port 9924
#
# Metrics ที่ collect:
# ollama_prompt_tokens_total — prompt tokens รวม
# ollama_generated_tokens_total — generated tokens รวม
# ollama_request_duration_seconds — latency histogram
# ollama_time_per_token_seconds — inference speed
# ollama_loaded_models — จำนวน model ใน VRAM
# ollama_model_loaded — 1/0 per model
# ollama_model_ram_mb — VRAM usage (MB) per model
# ----------------------------------------------------------------
- job_name: 'ollama-metrics'
scrape_interval: 30s # Ollama metrics ไม่เปลี่ยนเร็ว — 30s เพียงพอ
# Metrics ที่ collect:
# ollama_prompt_tokens_total — prompt tokens รวม
# ollama_generated_tokens_total — generated tokens รวม
# ollama_request_duration_seconds — latency histogram
# ollama_time_per_token_seconds — inference speed (tok/s)
# ollama_loaded_models — จำนวน model ใน VRAM
# ollama_model_loaded — 1/0 per model
# ollama_model_ram_mb — VRAM usage (MB) per model
- job_name: "ollama-metrics"
scrape_interval: 30s
static_configs:
- targets: ['192.168.10.100:9924']
- targets: ["192.168.10.100:9924"]
labels:
host: 'desk-5439'
service: 'ollama'
role: 'ai-inference'
# ----------------------------------------------------------------
# 5. Loki — Log aggregation health (ASUSTOR)
# ----------------------------------------------------------------
- job_name: 'loki'
static_configs:
- targets: ['loki:3100']
labels:
host: 'asustor'
service: 'loki'
host: "desk-5439"
service: "ollama"
role: "ai-inference"