# File: /volume1/np-dms/monitoring/docker-compose.yml # DMS Container v1.8.6: Application name: lcbp3-monitoring # Deploy on: ASUSTOR AS5403T # Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki, promtail x-restart: &restart_policy restart: unless-stopped x-logging: &default_logging logging: driver: 'json-file' options: max-size: '10m' max-file: '5' networks: lcbp3: external: true services: # ---------------------------------------------------------------- # 1. Prometheus (Metrics Collection & Storage) # ---------------------------------------------------------------- prometheus: <<: [*restart_policy, *default_logging] image: prom/prometheus:v2.48.0 container_name: prometheus deploy: resources: limits: cpus: '1.0' memory: 1G reservations: cpus: '0.25' memory: 256M environment: TZ: 'Asia/Bangkok' command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=30d' - '--web.enable-lifecycle' ports: - '9090:9090' networks: - lcbp3 volumes: - '/volume1/np-dms/monitoring/prometheus/config:/etc/prometheus:ro' - '/volume1/np-dms/monitoring/prometheus/data:/prometheus' healthcheck: test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:9090/-/healthy'] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 2. Grafana (Dashboard & Visualization) # ---------------------------------------------------------------- grafana: <<: [*restart_policy, *default_logging] image: grafana/grafana:10.2.2 container_name: grafana deploy: resources: limits: cpus: '1.0' memory: 512M reservations: cpus: '0.25' memory: 128M env_file: - .env environment: TZ: 'Asia/Bangkok' GF_SECURITY_ADMIN_USER: admin GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:?GRAFANA_ADMIN_PASSWORD required} GF_SERVER_ROOT_URL: 'https://grafana.np-dms.work' GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-piechart-panel ports: - '3000:3000' networks: - lcbp3 volumes: - '/volume1/np-dms/monitoring/grafana/data:/var/lib/grafana' depends_on: - prometheus healthcheck: test: ['CMD-SHELL', 'wget --spider -q http://localhost:3000/api/health || exit 1'] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 3. Uptime Kuma (Service Availability Monitoring) # ---------------------------------------------------------------- uptime-kuma: <<: [*restart_policy, *default_logging] image: louislam/uptime-kuma:1 container_name: uptime-kuma deploy: resources: limits: cpus: '0.5' memory: 256M environment: TZ: 'Asia/Bangkok' ports: - '3001:3001' networks: - lcbp3 volumes: - '/volume1/np-dms/monitoring/uptime-kuma/data:/app/data' healthcheck: test: ['CMD-SHELL', 'curl -f http://localhost:3001/api/entry-page || exit 1'] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 4. Node Exporter (Host Metrics - ASUSTOR) # ---------------------------------------------------------------- node-exporter: <<: [*restart_policy, *default_logging] image: prom/node-exporter:v1.7.0 container_name: node-exporter deploy: resources: limits: cpus: '0.5' memory: 128M environment: TZ: 'Asia/Bangkok' command: - '--path.procfs=/host/proc' - '--path.sysfs=/host/sys' - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' ports: - '9100:9100' networks: - lcbp3 volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro healthcheck: test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:9100/metrics'] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 5. cAdvisor (Container Metrics - ASUSTOR) # ---------------------------------------------------------------- cadvisor: <<: [*restart_policy, *default_logging] image: gcr.io/cadvisor/cadvisor:v0.47.2 container_name: cadvisor deploy: resources: limits: cpus: '0.5' memory: 256M environment: TZ: 'Asia/Bangkok' # H4: cAdvisor binds 8080 ภายใน container — map เป็น 8088 บน host ports: - '8088:8080' networks: - lcbp3 volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro healthcheck: test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:8080/healthz'] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 6. Loki (Log Aggregation) # ---------------------------------------------------------------- loki: <<: [*restart_policy, *default_logging] image: grafana/loki:2.9.0 container_name: loki deploy: resources: limits: cpus: '0.5' memory: 512M environment: TZ: 'Asia/Bangkok' command: -config.file=/etc/loki/local-config.yaml ports: - '3100:3100' networks: - lcbp3 volumes: - '/volume1/np-dms/monitoring/loki/data:/loki' healthcheck: test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3100/ready'] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 7. Promtail (Log Shipper) # ---------------------------------------------------------------- promtail: <<: [*restart_policy, *default_logging] image: grafana/promtail:2.9.0 container_name: promtail # L5: รันในฐานะ root เพราะต้องอ่าน /var/lib/docker/containers # ที่ mount เข้ามาแบบ read-only user: '0:0' deploy: resources: limits: cpus: '0.5' memory: 256M environment: TZ: 'Asia/Bangkok' command: -config.file=/etc/promtail/promtail-config.yml networks: - lcbp3 volumes: - '/volume1/np-dms/monitoring/promtail/config:/etc/promtail:ro' - '/var/run/docker.sock:/var/run/docker.sock:ro' - '/var/lib/docker/containers:/var/lib/docker/containers:ro' depends_on: - loki