690602:1254 ADR-033-233 #02.1 [skip CI]
This commit is contained in:
@@ -2,6 +2,11 @@
|
|||||||
# DMS Container v1.8.6: Application name: lcbp3-monitoring
|
# DMS Container v1.8.6: Application name: lcbp3-monitoring
|
||||||
# Deploy on: ASUSTOR AS5403T
|
# Deploy on: ASUSTOR AS5403T
|
||||||
# Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki, promtail
|
# Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki, promtail
|
||||||
|
#
|
||||||
|
# Remote Scrape Targets (ไม่ใช่ service ใน stack นี้ — scrape ผ่าน prometheus.yml):
|
||||||
|
# ollama-metrics — http://192.168.10.100:9924/metrics (Desk-5439, NorskHelsenett sidecar)
|
||||||
|
# ดู: specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/docker-compose.yml
|
||||||
|
# config: prometheus/config/prometheus.yml → job_name: ollama-metrics
|
||||||
|
|
||||||
x-restart: &restart_policy
|
x-restart: &restart_policy
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|||||||
+92
@@ -0,0 +1,92 @@
|
|||||||
|
# File: specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.yml
|
||||||
|
# Prometheus Configuration — รัน บน ASUSTOR AS5403T (lcbp3-monitoring stack)
|
||||||
|
# Change Log:
|
||||||
|
# - 2026-06-02: Initial config — scrape jobs สำหรับ ASUSTOR local + Desk-5439 remote
|
||||||
|
#
|
||||||
|
# Deploy path: /volume1/np-dms/monitoring/prometheus/config/prometheus.yml
|
||||||
|
# Mount (read-only): docker-compose volume → /etc/prometheus/prometheus.yml
|
||||||
|
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s # ดึง metrics ทุก 15 วินาที (default)
|
||||||
|
evaluation_interval: 15s # ประเมิน rules ทุก 15 วินาที
|
||||||
|
scrape_timeout: 10s
|
||||||
|
|
||||||
|
# Labels ที่ติดไปกับทุก time series ที่ scrape ได้
|
||||||
|
external_labels:
|
||||||
|
environment: 'production'
|
||||||
|
cluster: 'lcbp3'
|
||||||
|
|
||||||
|
# ─── Alerting (optional — เชื่อม Alertmanager เมื่อต้องการ) ──────────────────
|
||||||
|
# alerting:
|
||||||
|
# alertmanagers:
|
||||||
|
# - static_configs:
|
||||||
|
# - targets: ['alertmanager:9093']
|
||||||
|
|
||||||
|
# ─── Rules (optional) ────────────────────────────────────────────────────────
|
||||||
|
# rule_files:
|
||||||
|
# - /etc/prometheus/rules/*.yml
|
||||||
|
|
||||||
|
# ─── Scrape Jobs ─────────────────────────────────────────────────────────────
|
||||||
|
scrape_configs:
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 1. Prometheus self-monitoring (ASUSTOR)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9090']
|
||||||
|
labels:
|
||||||
|
host: 'asustor'
|
||||||
|
service: 'prometheus'
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 2. Node Exporter — Host metrics ของ ASUSTOR
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
- job_name: 'node-exporter-asustor'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['node-exporter:9100']
|
||||||
|
labels:
|
||||||
|
host: 'asustor'
|
||||||
|
service: 'node-exporter'
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 3. cAdvisor — Container metrics ของ ASUSTOR
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
- job_name: 'cadvisor-asustor'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['cadvisor:8080']
|
||||||
|
labels:
|
||||||
|
host: 'asustor'
|
||||||
|
service: 'cadvisor'
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 4. ollama-metrics (NorskHelsenett) — Ollama LLM metrics
|
||||||
|
# รัน บน Desk-5439 (192.168.10.100) ตาม ADR-023A
|
||||||
|
# sidecar expose /metrics บน port 9924
|
||||||
|
#
|
||||||
|
# Metrics ที่ collect:
|
||||||
|
# ollama_prompt_tokens_total — prompt tokens รวม
|
||||||
|
# ollama_generated_tokens_total — generated tokens รวม
|
||||||
|
# ollama_request_duration_seconds — latency histogram
|
||||||
|
# ollama_time_per_token_seconds — inference speed
|
||||||
|
# ollama_loaded_models — จำนวน model ใน VRAM
|
||||||
|
# ollama_model_loaded — 1/0 per model
|
||||||
|
# ollama_model_ram_mb — VRAM usage (MB) per model
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
- job_name: 'ollama-metrics'
|
||||||
|
scrape_interval: 30s # Ollama metrics ไม่เปลี่ยนเร็ว — 30s เพียงพอ
|
||||||
|
static_configs:
|
||||||
|
- targets: ['192.168.10.100:9924']
|
||||||
|
labels:
|
||||||
|
host: 'desk-5439'
|
||||||
|
service: 'ollama'
|
||||||
|
role: 'ai-inference'
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 5. Loki — Log aggregation health (ASUSTOR)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
- job_name: 'loki'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['loki:3100']
|
||||||
|
labels:
|
||||||
|
host: 'asustor'
|
||||||
|
service: 'loki'
|
||||||
+45
-1
@@ -10,12 +10,15 @@
|
|||||||
# - 2026-06-01: ลบ volumes ออกทั้งหมด — backend ส่ง file content ผ่าน multipart /ocr-upload แทน
|
# - 2026-06-01: ลบ volumes ออกทั้งหมด — backend ส่ง file content ผ่าน multipart /ocr-upload แทน
|
||||||
# ไม่ต้องการ shared storage อีกต่อไป
|
# ไม่ต้องการ shared storage อีกต่อไป
|
||||||
# - 2026-06-01: เปลี่ยน TYPHOON_OCR_MODEL เป็น scb10x/typhoon-ocr1.5-3b
|
# - 2026-06-01: เปลี่ยน TYPHOON_OCR_MODEL เป็น scb10x/typhoon-ocr1.5-3b
|
||||||
|
# - 2026-06-02: เพิ่ม ollama-metrics (NorskHelsenett) — Prometheus sidecar สำหรับ Ollama metrics
|
||||||
|
# expose /metrics บน port 9924; Prometheus (ASUSTOR) scrape จาก 192.168.10.100:9924
|
||||||
#
|
#
|
||||||
# วิธีรัน:
|
# วิธีรัน:
|
||||||
# docker compose up -d --build
|
# docker compose up -d --build
|
||||||
#
|
#
|
||||||
# ทดสอบ:
|
# ทดสอบ:
|
||||||
# curl http://192.168.10.100:8765/health
|
# curl http://192.168.10.100:8765/health
|
||||||
|
# curl http://192.168.10.100:9924/metrics
|
||||||
|
|
||||||
name: lcbp3-ocr
|
name: lcbp3-ocr
|
||||||
|
|
||||||
@@ -52,4 +55,45 @@ services:
|
|||||||
retries: 3
|
retries: 3
|
||||||
start_period: 60s
|
start_period: 60s
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# ollama-metrics (NorskHelsenett) — Prometheus Sidecar สำหรับ Ollama
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# Transparent proxy ที่นั่งระหว่าง client กับ Ollama (port 11434)
|
||||||
|
# เก็บ metrics แล้ว expose ที่ /metrics บน port 9924 (Prometheus format)
|
||||||
|
#
|
||||||
|
# Metrics ที่ได้:
|
||||||
|
# ollama_prompt_tokens_total — จำนวน prompt tokens ทั้งหมด
|
||||||
|
# ollama_generated_tokens_total — จำนวน generated tokens ทั้งหมด
|
||||||
|
# ollama_request_duration_seconds — ระยะเวลา request (histogram)
|
||||||
|
# ollama_time_per_token_seconds — เวลาต่อ token (inference speed)
|
||||||
|
# ollama_loaded_models — จำนวน model ที่โหลดอยู่ใน VRAM
|
||||||
|
# ollama_model_loaded — indicator 1/0 ต่อ model
|
||||||
|
# ollama_model_ram_mb — VRAM usage (MB) ต่อ model
|
||||||
|
#
|
||||||
|
# Prometheus (ASUSTOR 192.168.10.8) scrape จาก:
|
||||||
|
# http://192.168.10.100:9924/metrics
|
||||||
|
ollama-metrics:
|
||||||
|
image: ghcr.io/norskhelsenett/ollama-metrics:latest
|
||||||
|
container_name: ollama-metrics
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
# ชี้ไปที่ Ollama ที่รันบน Desk-5439 (host network หรือ LAN IP)
|
||||||
|
# ถ้า Ollama รันบน host (ไม่ใช่ container) ให้ใช้ host.docker.internal
|
||||||
|
OLLAMA_HOST: "http://host.docker.internal:11434"
|
||||||
|
PORT: "9924"
|
||||||
|
ports:
|
||||||
|
- "9924:9924"
|
||||||
|
extra_hosts:
|
||||||
|
# Windows Docker Desktop: map host.docker.internal → host IP
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9924/metrics"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 15s
|
||||||
|
|||||||
Reference in New Issue
Block a user