# File: /volume1/np-dms/monitoring/docker-compose.yml # DMS Container v1.8.0: Application name: lcbp3-monitoring # Deploy on: ASUSTOR AS5403T # Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki, promtail x-restart: &restart_policy restart: unless-stopped x-logging: &default_logging logging: driver: "json-file" options: max-size: "10m" max-file: "5" networks: lcbp3: external: true services: # ---------------------------------------------------------------- # 1. Prometheus (Metrics Collection & Storage) # ---------------------------------------------------------------- prometheus: <<: [*restart_policy, *default_logging] image: prom/prometheus:v2.48.0 container_name: prometheus stdin_open: true tty: true deploy: resources: limits: cpus: "1.0" memory: 1G reservations: cpus: "0.25" memory: 256M environment: TZ: "Asia/Bangkok" command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=30d' - '--web.enable-lifecycle' ports: - "9090:9090" networks: - lcbp3 volumes: - "/volume1/np-dms/monitoring/prometheus/config:/etc/prometheus:ro" - "/volume1/np-dms/monitoring/prometheus/data:/prometheus" healthcheck: test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy"] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 2. Grafana (Dashboard & Visualization) # ---------------------------------------------------------------- grafana: <<: [*restart_policy, *default_logging] image: grafana/grafana:10.2.2 container_name: grafana stdin_open: true tty: true deploy: resources: limits: cpus: "1.0" memory: 512M reservations: cpus: "0.25" memory: 128M environment: TZ: "Asia/Bangkok" GF_SECURITY_ADMIN_USER: admin GF_SECURITY_ADMIN_PASSWORD: "Center#2025" GF_SERVER_ROOT_URL: "https://grafana.np-dms.work" GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-piechart-panel ports: - "3000:3000" networks: - lcbp3 volumes: - "/volume1/np-dms/monitoring/grafana/data:/var/lib/grafana" depends_on: - prometheus healthcheck: test: ["CMD-SHELL", "wget --spider -q http://localhost:3000/api/health || exit 1"] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 3. Uptime Kuma (Service Availability Monitoring) # ---------------------------------------------------------------- uptime-kuma: <<: [*restart_policy, *default_logging] image: louislam/uptime-kuma:1 container_name: uptime-kuma deploy: resources: limits: cpus: "0.5" memory: 256M environment: TZ: "Asia/Bangkok" ports: - "3001:3001" networks: - lcbp3 volumes: - "/volume1/np-dms/monitoring/uptime-kuma/data:/app/data" healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:3001/api/entry-page || exit 1"] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 4. Node Exporter (Host Metrics - ASUSTOR) # ---------------------------------------------------------------- node-exporter: <<: [*restart_policy, *default_logging] image: prom/node-exporter:v1.7.0 container_name: node-exporter deploy: resources: limits: cpus: "0.5" memory: 128M environment: TZ: "Asia/Bangkok" command: - '--path.procfs=/host/proc' - '--path.sysfs=/host/sys' - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' ports: - "9100:9100" networks: - lcbp3 volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro healthcheck: test: ["CMD", "wget", "--spider", "-q", "http://localhost:9100/metrics"] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 5. cAdvisor (Container Metrics - ASUSTOR) # ---------------------------------------------------------------- cadvisor: <<: [*restart_policy, *default_logging] image: gcr.io/cadvisor/cadvisor:v0.47.2 container_name: cadvisor deploy: resources: limits: cpus: "0.5" memory: 256M environment: TZ: "Asia/Bangkok" ports: - "8088:8088" networks: - lcbp3 volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro healthcheck: test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 6. Loki (Log Aggregation) # ---------------------------------------------------------------- loki: <<: [*restart_policy, *default_logging] image: grafana/loki:2.9.0 container_name: loki deploy: resources: limits: cpus: "0.5" memory: 512M environment: TZ: "Asia/Bangkok" command: -config.file=/etc/loki/local-config.yaml ports: - "3100:3100" networks: - lcbp3 volumes: - "/volume1/np-dms/monitoring/loki/data:/loki" healthcheck: test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/ready"] interval: 30s timeout: 10s retries: 3 # ---------------------------------------------------------------- # 7. Promtail (Log Shipper) # ---------------------------------------------------------------- promtail: <<: [*restart_policy, *default_logging] image: grafana/promtail:2.9.0 container_name: promtail user: "0:0" deploy: resources: limits: cpus: "0.5" memory: 256M environment: TZ: "Asia/Bangkok" command: -config.file=/etc/promtail/promtail-config.yml networks: - lcbp3 volumes: - "/volume1/np-dms/monitoring/promtail/config:/etc/promtail:ro" - "/var/run/docker.sock:/var/run/docker.sock:ro" - "/var/lib/docker/containers:/var/lib/docker/containers:ro" depends_on: - loki