260213:1649 3rd Deploy (Next Update MariaDB)
Some checks failed
Spec Validation / validate-markdown (push) Has been cancelled
Spec Validation / validate-diagrams (push) Has been cancelled
Spec Validation / check-todos (push) Has been cancelled

This commit is contained in:
admin
2026-02-13 16:49:19 +07:00
parent 833e2897d1
commit 4d900d0930
3 changed files with 1188 additions and 22 deletions

View File

@@ -15,6 +15,7 @@ Stack สำหรับ Monitoring ประกอบด้วย:
| **cAdvisor** | 8080 (ASUSTOR) / 8088 (QNAP) | เก็บ Metrics ของ Docker containers | Both |
| **Uptime Kuma** | 3001 | Service Availability Monitoring | ASUSTOR |
| **Loki** | 3100 | Log aggregation | ASUSTOR |
| **Promtail** | - | Log shipper (Sender) | ASUSTOR |
---
@@ -31,11 +32,11 @@ Stack สำหรับ Monitoring ประกอบด้วย:
│ │ │
│ │ Scrape Metrics │
│ ▼ │
│ ┌─────────────┐ ┌─────────────┐
│ │node-exporter│ │ cAdvisor │
│ │ :9100 │ │ :8080 │
│ │ (Local) │ │ (Local) │
│ └─────────────┘ └─────────────┘
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ │node-exporter│ │ cAdvisor │ │ Promtail
│ │ :9100 │ │ :8080 │ │ (Log Ship) │
│ │ (Local) │ │ (Local) │ │ (Local)
│ └─────────────┘ └─────────────┘ └─────────────┘
└─────────────────────────────────────────────────────────────────────────┘
│ Remote Scrape
@@ -63,6 +64,7 @@ mkdir -p /volume1/np-dms/monitoring/prometheus/config
mkdir -p /volume1/np-dms/monitoring/grafana/data
mkdir -p /volume1/np-dms/monitoring/uptime-kuma/data
mkdir -p /volume1/np-dms/monitoring/loki/data
mkdir -p /volume1/np-dms/monitoring/promtail/config
# กำหนดสิทธิ์ให้ตรงกับ User ID ใน Container
# Prometheus (UID 65534 - nobody)
@@ -80,6 +82,10 @@ chmod -R 750 /volume1/np-dms/monitoring/uptime-kuma/data
# Loki (UID 10001)
chown -R 10001:10001 /volume1/np-dms/monitoring/loki/data
chmod -R 750 /volume1/np-dms/monitoring/loki/data
# Promtail (Runs as root to read docker logs - no specific chown needed for config dir if created by admin)
# But ensure config file is readable
chmod -R 755 /volume1/np-dms/monitoring/promtail/config
```
---
@@ -135,7 +141,7 @@ docker network inspect lcbp3
# File: /volume1/np-dms/monitoring/docker-compose.yml
# DMS Container v1.8.0: Application name: lcbp3-monitoring
# Deploy on: ASUSTOR AS5403T
# Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki
# Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki, promtail
x-restart: &restart_policy
restart: unless-stopped
@@ -339,6 +345,31 @@ services:
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 7. Promtail (Log Shipper)
# ----------------------------------------------------------------
promtail:
<<: [*restart_policy, *default_logging]
image: grafana/promtail:2.9.0
container_name: promtail
user: "0:0"
deploy:
resources:
limits:
cpus: "0.5"
memory: 256M
environment:
TZ: "Asia/Bangkok"
command: -config.file=/etc/promtail/promtail-config.yml
networks:
- lcbp3
volumes:
- "/volume1/np-dms/monitoring/promtail/config:/etc/promtail:ro"
- "/var/run/docker.sock:/var/run/docker.sock:ro"
- "/var/lib/docker/containers:/var/lib/docker/containers:ro"
depends_on:
- loki
```
---
@@ -489,8 +520,8 @@ scrape_configs:
| Dashboard ID | Name | Purpose |
| :----------- | :--------------------------- | :------------------ |
| 1860 | Node Exporter Full | Host system metrics |
| 14282 | cAdvisor exporter | Container metrics |
| 1860 | Node Exporter Full | Host system metrics | ป |
| 14282 | cAdvisor exporter | Container metrics | ป |
| 11074 | Node Exporter for Prometheus | Node overview |
| 893 | Docker and Container | Docker overview |
| 7362 | MySQL | MySQL view |
@@ -513,10 +544,11 @@ scrape_configs:
| # | ขั้นตอน | Status |
| :--- | :------------------------------------------------------------------------------------------------- | :----- |
| 1 | SSH เข้า ASUSTOR ได้ (`ssh admin@192.168.10.9`) | |
| 2 | Docker Network `lcbp3` สร้างแล้ว (ดูหัวข้อ [สร้าง Docker Network](#-สร้าง-docker-network-ทำครั้งแรกครั้งเดียว)) | |
| 3 | สร้าง Directories และกำหนดสิทธิ์แล้ว (ดูหัวข้อ [กำหนดสิทธิ](#กำหนดสิทธิ-บน-asustor)) | |
| 4 | สร้าง `prometheus.yml` แล้ว (ดูหัวข้อ [Prometheus Configuration](#prometheus-configuration)) | |
| 1 | SSH เข้า ASUSTOR ได้ (`ssh admin@192.168.10.9`) | |
| 2 | Docker Network `lcbp3` สร้างแล้ว (ดูหัวข้อ [สร้าง Docker Network](#-สร้าง-docker-network-ทำครั้งแรกครั้งเดียว)) | |
| 3 | สร้าง Directories และกำหนดสิทธิ์แล้ว (ดูหัวข้อ [กำหนดสิทธิ](#กำหนดสิทธิ-บน-asustor)) | |
| 4 | สร้าง `prometheus.yml` แล้ว (ดูหัวข้อ [Prometheus Configuration](#prometheus-configuration)) | |
| 5 | สร้าง `promtail-config.yml` แล้ว (ดูหัวข้อ [Step 1.2](#step-12-สร้าง-promtail-configyml)) | ✅ |
---
@@ -573,6 +605,40 @@ EOF
cat /volume1/np-dms/monitoring/prometheus/config/prometheus.yml
```
### Step 1.2: สร้าง promtail-config.yml
ต้องสร้าง Config ให้ Promtail อ่าน logs จาก Docker containers และส่งไป Loki:
```bash
# สร้างไฟล์ promtail-config.yml
cat > /volume1/np-dms/monitoring/promtail/config/promtail-config.yml << 'EOF'
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
clients:
- url: http://loki:3100/loki/api/v1/push
scrape_configs:
- job_name: docker
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 5s
relabel_configs:
- source_labels: ['__meta_docker_container_name']
regex: '/(.*)'
target_label: 'container'
- source_labels: ['__meta_docker_container_log_stream']
target_label: 'stream'
EOF
# ตรวจสอบ
cat /volume1/np-dms/monitoring/promtail/config/promtail-config.yml
```
---
### Step 2: Deploy ผ่าน Portainer (แนะนำ)
@@ -612,17 +678,18 @@ docker compose ps
# ตรวจสอบ containers ทั้งหมด
docker ps --filter "name=prometheus" --filter "name=grafana" \
--filter "name=uptime-kuma" --filter "name=node-exporter" \
--filter "name=cadvisor" --filter "name=loki"
--filter "name=cadvisor" --filter "name=loki" --filter "name=promtail"
```
| Service | วิธีตรวจสอบ | Expected Result |
| :---------------- | :----------------------------------------------------------------- | :----------------------------- |
| ✅ **Prometheus** | `curl http://192.168.10.9:9090/-/healthy` | `Prometheus Server is Healthy` |
| ✅ **Grafana** | เปิด `https://grafana.np-dms.work` (หรือ `http://192.168.10.9:3000`) | หน้า Login |
| ✅ **Uptime Kuma** | เปิด `https://uptime.np-dms.work` (หรือ `http://192.168.10.9:3001`) | หน้า Setup |
| ✅ **Node Exp.** | `curl http://192.168.10.9:9100/metrics \| head` | Metrics output |
| ✅ **cAdvisor** | `curl http://192.168.10.9:8080/healthz` | `ok` |
| ✅ **Loki** | `curl http://192.168.10.9:3100/ready` | `ready` |
| Service | วิธีตรวจสอบ | Expected Result |
| :---------------- | :----------------------------------------------------------------- | :------------------------------------ |
| ✅ **Prometheus** | `curl http://192.168.10.9:9090/-/healthy` | `Prometheus Server is Healthy` |
| ✅ **Grafana** | เปิด `https://grafana.np-dms.work` (หรือ `http://192.168.10.9:3000`) | หน้า Login |
| ✅ **Uptime Kuma** | เปิด `https://uptime.np-dms.work` (หรือ `http://192.168.10.9:3001`) | หน้า Setup |
| ✅ **Node Exp.** | `curl http://192.168.10.9:9100/metrics \| head` | Metrics output |
| ✅ **cAdvisor** | `curl http://192.168.10.9:8080/healthz` | `ok` |
| ✅ **Loki** | `curl http://192.168.10.9:3100/ready` | `ready` |
| ✅ **Promtail** | เช็ค Logs: `docker logs promtail` | ไม่ควรมี Error + เห็น connection success |
---
@@ -661,7 +728,7 @@ curl -s http://localhost:9090/api/v1/targets | grep -E '"qnap-(node|cadvisor)"'
4. เลือก **Prometheus**
- URL: `http://prometheus:9090`
- กด **Save & Test** → ต้องขึ้น ✅
5. Import Dashboards (ดูหัวข้อ [Grafana Dashboards](#grafana-dashboards))
5. Import Dashboards (ดูรายละเอียดในหัวข้อ [6. Grafana Dashboards Setup](#6-grafana-dashboards-setup))
#### Uptime Kuma — First Setup
@@ -671,5 +738,62 @@ curl -s http://localhost:9090/api/v1/targets | grep -E '"qnap-(node|cadvisor)"'
---
### 6. Grafana Dashboards Setup
เพื่อการ Monitor ที่สมบูรณ์ แนะนำให้ Import Dashboards ต่อไปนี้:
#### 6.1 Host Monitoring (Node Exporter)
* **Concept:** ดู resource ของเครื่อง Host (CPU, RAM, Disk, Network)
* **Dashboard ID:** `1860` (Node Exporter Full)
* **วิธี Import:**
1. ไปที่ **Dashboards****New****Import**
2. ช่อง **Import via grafana.com** ใส่เลข `1860` กด **Load**
3. เลือก Data source: **Prometheus**
4. กด **Import**
#### 6.2 Container Monitoring (cAdvisor)
* **Concept:** ดู resource ของแต่ละ Container (เชื่อม Logs ด้วย)
* **Dashboard ID:** `14282` (Cadvisor exporter)
* **วิธี Import:**
1. ใส่เลข `14282` กด **Load**
2. เลือก Data source: **Prometheus**
3. กด **Import**
#### 6.3 Logs Monitoring (Loki Integration)
เพื่อให้ Dashboard ของ Container แสดง Logs จาก Loki ได้ด้วย:
1. เปิด Dashboard **Cadvisor exporter** ที่เพิ่ง Import มา
2. กดปุ่ม **Add visualization** (หรือ Edit dashboard)
3. เลือก Data source: **Loki**
4. ในช่อง Query ใส่: `{container="$name"}`
* *(Note: `$name` มาจาก Variable ของ Dashboard 14282)*
5. ปรับ Visualization type เป็น **Logs**
6. ตั้งชื่อ Panel ว่า **"Container Logs"**
7. กด **Apply** และ **Save Dashboard**
ตอนนี้เราจะเห็นทั้ง **กราฟการกินทรัพยากร** และ **Logs** ของ Container นั้นๆ ในหน้าเดียวกันครับ
#### 6.4 Integrated Dashboard (Recommended)
ผมได้เตรียม JSON file ที่รวม Metrics และ Logs ไว้ให้แล้วครับ:
1. ไปที่ **Dashboards****New****Import**
2. ลากไฟล์ หรือ Copy เนื้อหาจากไฟล์:
`specs/08-infrastructure/grafana/dashboards/lcbp3-docker-monitoring.json`
3. กด **Load** และ **Import**
## 7.3 Backup / Export Dashboards
เมื่อปรับแต่ง Dashboard จนพอใจแล้ว ควร Export เก็บเป็นไฟล์ JSON ไว้ backup หรือ version control:
1. เปิด Dashboard ที่ต้องการ backup
2. ไปที่ปุ่ม **Share Dashboard** (ไอคอน 🔗 หรือ Share มุมซ้ายบน)
3. เลือกTab **Export**
4. เปิดตัวเลือก **Export for sharing externally** (เพื่อให้ลบ hardcoded value)
5. กด **Save to file**
6. นำไฟล์ JSON มาเก็บไว้ที่ path: `specs/08-infrastructure/grafana/dashboards/`
---
> 📝 **หมายเหตุ**: เอกสารนี้อ้างอิงจาก Architecture Document **v1.8.0** - Monitoring Stack deploy บน ASUSTOR AS5403T

View File

@@ -0,0 +1,810 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 18,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 18,
"panels": [],
"title": "Logs",
"type": "row"
},
{
"datasource": {
"type": "loki",
"uid": "a78950eb-fe7b-48c5-bbb5-7ef22a250c29"
},
"gridPos": {
"h": 41,
"w": 24,
"x": 0,
"y": 1
},
"id": 6,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": true,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "a78950eb-fe7b-48c5-bbb5-7ef22a250c29"
},
"editorMode": "code",
"expr": "{container=~\"$container\"} |= ``",
"queryType": "range",
"refId": "A"
}
],
"title": "Container Logs",
"type": "logs"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 42
},
"id": 16,
"panels": [],
"title": "CPU",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 43
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"editorMode": "code",
"expr": "rate(container_cpu_usage_seconds_total{image!=\"\",name=~\"$container\",instance=~\"$host\"}[5m]) * 100",
"legendFormat": "{{name}}",
"range": true,
"refId": "A"
}
],
"title": "CPU Usage (%)",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 51
},
"id": 22,
"panels": [],
"title": "Memory",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 52
},
"hiddenSeries": false,
"id": 23,
"legend": {
"alignAsTable": true,
"avg": true,
"current": false,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "10.2.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"expr": "sum(container_memory_rss{instance=~\"$host\",name=~\"$container\",name=~\".+\"}) by (name)",
"hide": false,
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Memory Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:606",
"format": "bytes",
"logBase": 1,
"show": true
},
{
"$$hashKey": "object:607",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 52
},
"hiddenSeries": false,
"id": 24,
"legend": {
"alignAsTable": true,
"avg": true,
"current": false,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "10.2.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"expr": "sum(container_memory_cache{instance=~\"$host\",name=~\"$container\",name=~\".+\"}) by (name)",
"hide": false,
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Memory Cached",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:606",
"format": "bytes",
"logBase": 1,
"show": true
},
{
"$$hashKey": "object:607",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 60
},
"id": 21,
"panels": [],
"title": "Network",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 61
},
"hiddenSeries": false,
"id": 25,
"legend": {
"alignAsTable": true,
"avg": true,
"current": false,
"hideEmpty": false,
"hideZero": false,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "10.2.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"expr": "sum(rate(container_network_receive_bytes_total{instance=~\"$host\",name=~\"$container\",name=~\".+\"}[5m])) by (name)",
"hide": false,
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Received Network Traffic",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:674",
"format": "Bps",
"logBase": 1,
"show": true
},
{
"$$hashKey": "object:675",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 61
},
"hiddenSeries": false,
"id": 26,
"legend": {
"alignAsTable": true,
"avg": true,
"current": false,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "10.2.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"expr": "sum(rate(container_network_transmit_bytes_total{instance=~\"$host\",name=~\"$container\",name=~\".+\"}[5m])) by (name)",
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Sent Network Traffic",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:832",
"format": "Bps",
"logBase": 1,
"show": true
},
{
"$$hashKey": "object:833",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 69
},
"id": 19,
"panels": [],
"title": "Misc",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": false,
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "id"
},
"properties": [
{
"id": "custom.width",
"value": 260
}
]
},
{
"matcher": {
"id": "byName",
"options": "Running"
},
"properties": [
{
"id": "unit",
"value": "d"
},
{
"id": "decimals",
"value": 1
},
{
"id": "custom.cellOptions",
"value": {
"type": "color-text"
}
},
{
"id": "color",
"value": {
"fixedColor": "dark-green",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 10,
"w": 24,
"x": 0,
"y": 70
},
"id": 20,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": []
},
"pluginVersion": "10.2.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"expr": "(time() - container_start_time_seconds{instance=~\"$host\",name=~\"$container\",name=~\".+\"})/86400",
"format": "table",
"instant": true,
"interval": "",
"legendFormat": "{{name}}",
"refId": "A"
}
],
"title": "Containers Info",
"transformations": [
{
"id": "filterFieldsByName",
"options": {
"include": {
"names": [
"container_label_com_docker_compose_project",
"container_label_com_docker_compose_project_working_dir",
"image",
"instance",
"name",
"Value",
"container_label_com_docker_compose_service"
]
}
}
},
{
"id": "organize",
"options": {
"excludeByName": {},
"indexByName": {},
"renameByName": {
"Value": "Running",
"container_label_com_docker_compose_project": "Label",
"container_label_com_docker_compose_project_working_dir": "Working dir",
"container_label_com_docker_compose_service": "Service",
"image": "Registry Image",
"instance": "Instance",
"name": "Name"
}
}
}
],
"type": "table"
}
],
"refresh": "10s",
"schemaVersion": 38,
"tags": [
"docker",
"monitoring",
"lcbp3"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"definition": "label_values(container_cpu_usage_seconds_total{image!=\"\"}, instance)",
"hide": 0,
"includeAll": true,
"label": "Host",
"multi": false,
"name": "host",
"options": [],
"query": {
"query": "label_values(container_cpu_usage_seconds_total{image!=\"\"}, instance)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "/(.*)/",
"skipUrlSync": false,
"sort": 1,
"type": "query"
},
{
"allValue": ".+",
"current": {
"selected": true,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "bd6668d2-9dc4-40a7-8cd2-8b8c9719b1fb"
},
"definition": "label_values(container_cpu_usage_seconds_total{instance=~\"$host\", image!=\"\"}, name)",
"hide": 0,
"includeAll": true,
"label": "Container",
"multi": true,
"name": "container",
"options": [],
"query": {
"query": "label_values(container_cpu_usage_seconds_total{instance=~\"$host\", image!=\"\"}, name)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "/^\\/?(.*)$/",
"skipUrlSync": false,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Data Center Overview (Loki + Prometheus)",
"uid": "lcbp3-docker-metrics-logs",
"version": 5,
"weekStart": ""
}

View File

@@ -0,0 +1,232 @@
# File: /volume1/np-dms/monitoring/docker-compose.yml
# DMS Container v1.8.0: Application name: lcbp3-monitoring
# Deploy on: ASUSTOR AS5403T
# Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki, promtail
x-restart: &restart_policy
restart: unless-stopped
x-logging: &default_logging
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "5"
networks:
lcbp3:
external: true
services:
# ----------------------------------------------------------------
# 1. Prometheus (Metrics Collection & Storage)
# ----------------------------------------------------------------
prometheus:
<<: [*restart_policy, *default_logging]
image: prom/prometheus:v2.48.0
container_name: prometheus
stdin_open: true
tty: true
deploy:
resources:
limits:
cpus: "1.0"
memory: 1G
reservations:
cpus: "0.25"
memory: 256M
environment:
TZ: "Asia/Bangkok"
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.enable-lifecycle'
ports:
- "9090:9090"
networks:
- lcbp3
volumes:
- "/volume1/np-dms/monitoring/prometheus/config:/etc/prometheus:ro"
- "/volume1/np-dms/monitoring/prometheus/data:/prometheus"
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 2. Grafana (Dashboard & Visualization)
# ----------------------------------------------------------------
grafana:
<<: [*restart_policy, *default_logging]
image: grafana/grafana:10.2.2
container_name: grafana
stdin_open: true
tty: true
deploy:
resources:
limits:
cpus: "1.0"
memory: 512M
reservations:
cpus: "0.25"
memory: 128M
environment:
TZ: "Asia/Bangkok"
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: "Center#2025"
GF_SERVER_ROOT_URL: "https://grafana.np-dms.work"
GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-piechart-panel
ports:
- "3000:3000"
networks:
- lcbp3
volumes:
- "/volume1/np-dms/monitoring/grafana/data:/var/lib/grafana"
depends_on:
- prometheus
healthcheck:
test: ["CMD-SHELL", "wget --spider -q http://localhost:3000/api/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 3. Uptime Kuma (Service Availability Monitoring)
# ----------------------------------------------------------------
uptime-kuma:
<<: [*restart_policy, *default_logging]
image: louislam/uptime-kuma:1
container_name: uptime-kuma
deploy:
resources:
limits:
cpus: "0.5"
memory: 256M
environment:
TZ: "Asia/Bangkok"
ports:
- "3001:3001"
networks:
- lcbp3
volumes:
- "/volume1/np-dms/monitoring/uptime-kuma/data:/app/data"
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:3001/api/entry-page || exit 1"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 4. Node Exporter (Host Metrics - ASUSTOR)
# ----------------------------------------------------------------
node-exporter:
<<: [*restart_policy, *default_logging]
image: prom/node-exporter:v1.7.0
container_name: node-exporter
deploy:
resources:
limits:
cpus: "0.5"
memory: 128M
environment:
TZ: "Asia/Bangkok"
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
ports:
- "9100:9100"
networks:
- lcbp3
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9100/metrics"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 5. cAdvisor (Container Metrics - ASUSTOR)
# ----------------------------------------------------------------
cadvisor:
<<: [*restart_policy, *default_logging]
image: gcr.io/cadvisor/cadvisor:v0.47.2
container_name: cadvisor
deploy:
resources:
limits:
cpus: "0.5"
memory: 256M
environment:
TZ: "Asia/Bangkok"
ports:
- "8088:8088"
networks:
- lcbp3
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 6. Loki (Log Aggregation)
# ----------------------------------------------------------------
loki:
<<: [*restart_policy, *default_logging]
image: grafana/loki:2.9.0
container_name: loki
deploy:
resources:
limits:
cpus: "0.5"
memory: 512M
environment:
TZ: "Asia/Bangkok"
command: -config.file=/etc/loki/local-config.yaml
ports:
- "3100:3100"
networks:
- lcbp3
volumes:
- "/volume1/np-dms/monitoring/loki/data:/loki"
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/ready"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 7. Promtail (Log Shipper)
# ----------------------------------------------------------------
promtail:
<<: [*restart_policy, *default_logging]
image: grafana/promtail:2.9.0
container_name: promtail
user: "0:0"
deploy:
resources:
limits:
cpus: "0.5"
memory: 256M
environment:
TZ: "Asia/Bangkok"
command: -config.file=/etc/promtail/promtail-config.yml
networks:
- lcbp3
volumes:
- "/volume1/np-dms/monitoring/promtail/config:/etc/promtail:ro"
- "/var/run/docker.sock:/var/run/docker.sock:ro"
- "/var/lib/docker/containers:/var/lib/docker/containers:ro"
depends_on:
- loki