Files
lcbp3/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/docker-compose.yml
T
admin 13745e5874
CI / CD Pipeline / build (push) Failing after 4m57s
CI / CD Pipeline / deploy (push) Has been skipped
690419:1831 feat: update CI/CD to use SSH key authentication #05
2026-04-19 18:31:30 +07:00

234 lines
6.7 KiB
YAML

# File: /volume1/np-dms/monitoring/docker-compose.yml
# DMS Container v1.8.6: Application name: lcbp3-monitoring
# Deploy on: ASUSTOR AS5403T
# Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki, promtail
x-restart: &restart_policy
restart: unless-stopped
x-logging: &default_logging
logging:
driver: 'json-file'
options:
max-size: '10m'
max-file: '5'
networks:
lcbp3:
external: true
services:
# ----------------------------------------------------------------
# 1. Prometheus (Metrics Collection & Storage)
# ----------------------------------------------------------------
prometheus:
<<: [*restart_policy, *default_logging]
image: prom/prometheus:v2.48.0
container_name: prometheus
deploy:
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.25'
memory: 256M
environment:
TZ: 'Asia/Bangkok'
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.enable-lifecycle'
ports:
- '9090:9090'
networks:
- lcbp3
volumes:
- '/volume1/np-dms/monitoring/prometheus/config:/etc/prometheus:ro'
- '/volume1/np-dms/monitoring/prometheus/data:/prometheus'
healthcheck:
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:9090/-/healthy']
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 2. Grafana (Dashboard & Visualization)
# ----------------------------------------------------------------
grafana:
<<: [*restart_policy, *default_logging]
image: grafana/grafana:10.2.2
container_name: grafana
deploy:
resources:
limits:
cpus: '1.0'
memory: 512M
reservations:
cpus: '0.25'
memory: 128M
env_file:
- .env
environment:
TZ: 'Asia/Bangkok'
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:?GRAFANA_ADMIN_PASSWORD required}
GF_SERVER_ROOT_URL: 'https://grafana.np-dms.work'
GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-piechart-panel
ports:
- '3003:3000'
networks:
- lcbp3
volumes:
- '/volume1/np-dms/monitoring/grafana/data:/var/lib/grafana'
depends_on:
- prometheus
healthcheck:
test: ['CMD-SHELL', 'wget --spider -q http://localhost:3000/api/health || exit 1']
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 3. Uptime Kuma (Service Availability Monitoring)
# ----------------------------------------------------------------
uptime-kuma:
<<: [*restart_policy, *default_logging]
image: louislam/uptime-kuma:1
container_name: uptime-kuma
deploy:
resources:
limits:
cpus: '0.5'
memory: 256M
environment:
TZ: 'Asia/Bangkok'
ports:
- '3001:3001'
networks:
- lcbp3
volumes:
- '/volume1/np-dms/monitoring/uptime-kuma/data:/app/data'
healthcheck:
test: ['CMD-SHELL', 'curl -f http://localhost:3001/api/entry-page || exit 1']
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 4. Node Exporter (Host Metrics - ASUSTOR)
# ----------------------------------------------------------------
node-exporter:
<<: [*restart_policy, *default_logging]
image: prom/node-exporter:v1.7.0
container_name: node-exporter
deploy:
resources:
limits:
cpus: '0.5'
memory: 128M
environment:
TZ: 'Asia/Bangkok'
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
ports:
- '9100:9100'
networks:
- lcbp3
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
healthcheck:
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:9100/metrics']
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 5. cAdvisor (Container Metrics - ASUSTOR)
# ----------------------------------------------------------------
cadvisor:
<<: [*restart_policy, *default_logging]
image: gcr.io/cadvisor/cadvisor:v0.47.2
container_name: cadvisor
deploy:
resources:
limits:
cpus: '0.5'
memory: 256M
environment:
TZ: 'Asia/Bangkok'
# H4: cAdvisor binds 8080 ภายใน container — map เป็น 8088 บน host
ports:
- '8088:8080'
networks:
- lcbp3
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
healthcheck:
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:8080/healthz']
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 6. Loki (Log Aggregation)
# ----------------------------------------------------------------
loki:
<<: [*restart_policy, *default_logging]
image: grafana/loki:2.9.0
container_name: loki
deploy:
resources:
limits:
cpus: '0.5'
memory: 512M
environment:
TZ: 'Asia/Bangkok'
command: -config.file=/etc/loki/local-config.yaml
ports:
- '3100:3100'
networks:
- lcbp3
volumes:
- '/volume1/np-dms/monitoring/loki/data:/loki'
healthcheck:
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3100/ready']
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 7. Promtail (Log Shipper)
# ----------------------------------------------------------------
promtail:
<<: [*restart_policy, *default_logging]
image: grafana/promtail:2.9.0
container_name: promtail
# L5: รันในฐานะ root เพราะต้องอ่าน /var/lib/docker/containers
# ที่ mount เข้ามาแบบ read-only
user: '0:0'
deploy:
resources:
limits:
cpus: '0.5'
memory: 256M
environment:
TZ: 'Asia/Bangkok'
command: -config.file=/etc/promtail/promtail-config.yml
networks:
- lcbp3
volumes:
- '/volume1/np-dms/monitoring/promtail/config:/etc/promtail:ro'
- '/var/run/docker.sock:/var/run/docker.sock:ro'
- '/var/lib/docker/containers:/var/lib/docker/containers:ro'
depends_on:
- loki