lcbp3/specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/docker-compose.yml

# File: /volume1/np-dms/monitoring/docker-compose.yml
# DMS Container v1.8.6: Application name: lcbp3-monitoring
# Deploy on: ASUSTOR AS5403T
# Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki, promtail

x-restart: &restart_policy
  restart: unless-stopped

x-logging: &default_logging
  logging:
    driver: 'json-file'
    options:
      max-size: '10m'
      max-file: '5'

networks:
  lcbp3:
    external: true

services:
  # ----------------------------------------------------------------
  # 1. Prometheus (Metrics Collection & Storage)
  # ----------------------------------------------------------------
  prometheus:
    <<: [*restart_policy, *default_logging]
    image: prom/prometheus:v2.48.0
    container_name: prometheus
    deploy:
      resources:
        limits:
          cpus: '1.0'
          memory: 1G
        reservations:
          cpus: '0.25'
          memory: 256M
    environment:
      TZ: 'Asia/Bangkok'
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--storage.tsdb.retention.time=30d'
      - '--web.enable-lifecycle'
    ports:
      - '9090:9090'
    networks:
      - lcbp3
    volumes:
      - '/volume1/np-dms/monitoring/prometheus/config:/etc/prometheus:ro'
      - '/volume1/np-dms/monitoring/prometheus/data:/prometheus'
    healthcheck:
      test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:9090/-/healthy']
      interval: 30s
      timeout: 10s
      retries: 3

  # ----------------------------------------------------------------
  # 2. Grafana (Dashboard & Visualization)
  # ----------------------------------------------------------------
  grafana:
    <<: [*restart_policy, *default_logging]
    image: grafana/grafana:10.2.2
    container_name: grafana
    deploy:
      resources:
        limits:
          cpus: '1.0'
          memory: 512M
        reservations:
          cpus: '0.25'
          memory: 128M
    env_file:
      - .env
    environment:
      TZ: 'Asia/Bangkok'
      GF_SECURITY_ADMIN_USER: admin
      GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:?GRAFANA_ADMIN_PASSWORD required}
      GF_SERVER_ROOT_URL: 'https://grafana.np-dms.work'
      GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-piechart-panel
    ports:
      - '3003:3000'
    networks:
      - lcbp3
    volumes:
      - '/volume1/np-dms/monitoring/grafana/data:/var/lib/grafana'
    depends_on:
      - prometheus
    healthcheck:
      test: ['CMD-SHELL', 'wget --spider -q http://localhost:3000/api/health || exit 1']
      interval: 30s
      timeout: 10s
      retries: 3

  # ----------------------------------------------------------------
  # 3. Uptime Kuma (Service Availability Monitoring)
  # ----------------------------------------------------------------
  uptime-kuma:
    <<: [*restart_policy, *default_logging]
    image: louislam/uptime-kuma:1
    container_name: uptime-kuma
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 256M
    environment:
      TZ: 'Asia/Bangkok'
    ports:
      - '3001:3001'
    networks:
      - lcbp3
    volumes:
      - '/volume1/np-dms/monitoring/uptime-kuma/data:/app/data'
    healthcheck:
      test: ['CMD-SHELL', 'curl -f http://localhost:3001/api/entry-page || exit 1']
      interval: 30s
      timeout: 10s
      retries: 3

  # ----------------------------------------------------------------
  # 4. Node Exporter (Host Metrics - ASUSTOR)
  # ----------------------------------------------------------------
  node-exporter:
    <<: [*restart_policy, *default_logging]
    image: prom/node-exporter:v1.7.0
    container_name: node-exporter
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 128M
    environment:
      TZ: 'Asia/Bangkok'
    command:
      - '--path.procfs=/host/proc'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
    ports:
      - '9100:9100'
    networks:
      - lcbp3
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    healthcheck:
      test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:9100/metrics']
      interval: 30s
      timeout: 10s
      retries: 3

  # ----------------------------------------------------------------
  # 5. cAdvisor (Container Metrics - ASUSTOR)
  # ----------------------------------------------------------------
  cadvisor:
    <<: [*restart_policy, *default_logging]
    image: gcr.io/cadvisor/cadvisor:v0.47.2
    container_name: cadvisor
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 256M
    environment:
      TZ: 'Asia/Bangkok'
    # H4: cAdvisor binds 8080 ภายใน container — map เป็น 8088 บน host
    ports:
      - '8088:8080'
    networks:
      - lcbp3
    volumes:
      - /:/rootfs:ro
      - /var/run:/var/run:ro
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
    healthcheck:
      test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:8080/healthz']
      interval: 30s
      timeout: 10s
      retries: 3

  # ----------------------------------------------------------------
  # 6. Loki (Log Aggregation)
  # ----------------------------------------------------------------
  loki:
    <<: [*restart_policy, *default_logging]
    image: grafana/loki:2.9.0
    container_name: loki
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 512M
    environment:
      TZ: 'Asia/Bangkok'
    command: -config.file=/etc/loki/local-config.yaml
    ports:
      - '3100:3100'
    networks:
      - lcbp3
    volumes:
      - '/volume1/np-dms/monitoring/loki/data:/loki'
    healthcheck:
      test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3100/ready']
      interval: 30s
      timeout: 10s
      retries: 3

  # ----------------------------------------------------------------
  # 7. Promtail (Log Shipper)
  # ----------------------------------------------------------------
  promtail:
    <<: [*restart_policy, *default_logging]
    image: grafana/promtail:2.9.0
    container_name: promtail
    # L5: รันในฐานะ root เพราะต้องอ่าน /var/lib/docker/containers
    # ที่ mount เข้ามาแบบ read-only
    user: '0:0'
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 256M
    environment:
      TZ: 'Asia/Bangkok'
    command: -config.file=/etc/promtail/promtail-config.yml
    networks:
      - lcbp3
    volumes:
      - '/volume1/np-dms/monitoring/promtail/config:/etc/promtail:ro'
      - '/var/run/docker.sock:/var/run/docker.sock:ro'
      - '/var/lib/docker/containers:/var/lib/docker/containers:ro'
    depends_on:
      - loki