260223:1415 20260223 nextJS & nestJS Best pratices
All checks were successful
Build and Deploy / deploy (push) Successful in 4m44s

This commit is contained in:
admin
2026-02-23 14:15:06 +07:00
parent c90a664f53
commit ef16817f38
164 changed files with 24815 additions and 311 deletions

View File

@@ -1,904 +0,0 @@
# Infrastructure Setup
> 📍 **Document Version:** v1.8.0
> 🖥️ **Primary Server:** QNAP TS-473A (Application & Database)
> 💾 **Backup Server:** ASUSTOR AS5403T (Infrastructure & Backup)
---
## Overview
> 📖 **ดูรายละเอียด Server Roles และ Service Distribution ได้ที่:** [README.md](README.md#-hardware-infrastructure)
>
> เอกสารนี้มุ่งเน้นการตั้งค่า Technical Configuration สำหรับแต่ละ Service
---
## 1. Redis Configuration (Standalone + Persistence)
### 1.1 Docker Compose Setup
```yaml
# docker-compose-redis.yml
version: '3.8'
services:
redis:
image: 'redis:7.2-alpine'
container_name: lcbp3-redis
restart: unless-stopped
# AOF: Enabled for durability
# Maxmemory: Prevent OOM
command: redis-server --appendonly yes --requirepass ${REDIS_PASSWORD} --maxmemory 1gb --maxmemory-policy noeviction
volumes:
- ./redis/data:/data
ports:
- '6379:6379'
networks:
- lcbp3
deploy:
resources:
limits:
cpus: '2.0'
memory: 1.5G
networks:
lcbp3:
external: true
```
## 2. Database Configuration
### 2.1 MariaDB Optimization for Numbering
```sql
-- /etc/mysql/mariadb.conf.d/50-numbering.cnf
[mysqld]
# Connection pool
max_connections = 200
thread_cache_size = 50
# Query cache (disabled for InnoDB)
query_cache_type = 0
query_cache_size = 0
# InnoDB settings
innodb_buffer_pool_size = 4G
innodb_log_file_size = 512M
innodb_flush_log_at_trx_commit = 1
innodb_lock_wait_timeout = 50
# Performance Schema
performance_schema = ON
performance_schema_instrument = 'wait/lock/%=ON'
# Binary logging
log_bin = /var/log/mysql/mysql-bin.log
expire_logs_days = 7
max_binlog_size = 100M
# Slow query log
slow_query_log = 1
slow_query_log_file = /var/log/mysql/slow-query.log
long_query_time = 1
```
### 2.2 Monitoring Locks
```sql
-- Check for lock contention
SELECT
r.trx_id waiting_trx_id,
r.trx_mysql_thread_id waiting_thread,
r.trx_query waiting_query,
b.trx_id blocking_trx_id,
b.trx_mysql_thread_id blocking_thread,
b.trx_query blocking_query
FROM information_schema.innodb_lock_waits w
INNER JOIN information_schema.innodb_trx b ON b.trx_id = w.blocking_trx_id
INNER JOIN information_schema.innodb_trx r ON r.trx_id = w.requesting_trx_id;
-- Check active transactions
SELECT * FROM information_schema.innodb_trx;
-- Kill long-running transaction (if needed)
KILL <thread_id>;
```
---
## 3. Backend Service Configuration
### 3.1 Backend Service Deployment
#### Docker Compose
```yaml
# docker-compose-backend.yml
version: '3.8'
services:
backend-1:
image: lcbp3-backend:latest
container_name: lcbp3-backend-1
environment:
- NODE_ENV=production
- DB_HOST=mariadb
- REDIS_HOST=cache
- REDIS_PORT=6379
- NUMBERING_LOCK_TIMEOUT=5000
- NUMBERING_RESERVATION_TTL=300
ports:
- "3001:3000"
depends_on:
- mariadb
- cache
networks:
- lcbp3
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
backend-2:
image: lcbp3-backend:latest
container_name: lcbp3-backend-2
environment:
- NODE_ENV=production
- DB_HOST=mariadb
- REDIS_HOST=cache
- REDIS_PORT=6379
ports:
- "3002:3000"
depends_on:
- mariadb
- cache
networks:
- lcbp3
restart: unless-stopped
networks:
lcbp3:
external: true
```
#### Health Check Endpoint
```typescript
// health/numbering.health.ts
import { Injectable } from '@nestjs/common';
import { HealthIndicator, HealthIndicatorResult } from '@nestjs/terminus';
import { Redis } from 'ioredis';
import { DataSource } from 'typeorm';
@Injectable()
export class NumberingHealthIndicator extends HealthIndicator {
constructor(
private redis: Redis,
private dataSource: DataSource,
) {
super();
}
async isHealthy(key: string): Promise<HealthIndicatorResult> {
const checks = await Promise.all([
this.checkRedis(),
this.checkDatabase(),
this.checkSequenceIntegrity(),
]);
const isHealthy = checks.every((check) => check.status === 'up');
return this.getStatus(key, isHealthy, { checks });
}
private async checkRedis(): Promise<any> {
try {
await this.redis.ping();
return { name: 'redis', status: 'up' };
} catch (error) {
return { name: 'redis', status: 'down', error: error.message };
}
}
private async checkDatabase(): Promise<any> {
try {
await this.dataSource.query('SELECT 1');
return { name: 'database', status: 'up' };
} catch (error) {
return { name: 'database', status: 'down', error: error.message };
}
}
private async checkSequenceIntegrity(): Promise<any> {
try {
const result = await this.dataSource.query(`
SELECT COUNT(*) as count
FROM document_numbering_sequences
WHERE current_value > (
SELECT max_value FROM document_numbering_configs
WHERE id = config_id
)
`);
const hasIssue = result[0].count > 0;
return {
name: 'sequence_integrity',
status: hasIssue ? 'degraded' : 'up',
exceeded_sequences: result[0].count,
};
} catch (error) {
return { name: 'sequence_integrity', status: 'down', error: error.message };
}
}
}
```
---
## 4. Monitoring & Alerting
### 4.1 Prometheus Configuration
```yaml
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
rule_files:
- "/etc/prometheus/alerts/numbering.yml"
scrape_configs:
- job_name: 'backend'
static_configs:
- targets:
- 'backend-1:3000'
- 'backend-2:3000'
metrics_path: '/metrics'
- job_name: 'redis-numbering'
static_configs:
- targets:
- 'redis-1:6379'
- 'redis-2:6379'
- 'redis-3:6379'
metrics_path: '/metrics'
- job_name: 'mariadb'
static_configs:
- targets:
- 'mariadb-exporter:9104'
```
### 4.2 Alert Manager Configuration
```yaml
# alertmanager.yml
global:
resolve_timeout: 5m
route:
receiver: 'default'
group_by: ['alertname', 'severity']
group_wait: 10s
group_interval: 10s
repeat_interval: 12h
routes:
- match:
severity: critical
receiver: 'critical'
continue: true
- match:
severity: warning
receiver: 'warning'
receivers:
- name: 'default'
slack_configs:
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
channel: '#lcbp3-alerts'
title: '{{ .GroupLabels.alertname }}'
text: '{{ range .Alerts }}{{ .Annotations.summary }}\n{{ end }}'
- name: 'critical'
email_configs:
- to: 'devops@lcbp3.com'
from: 'alerts@lcbp3.com'
smarthost: 'smtp.gmail.com:587'
auth_username: 'alerts@lcbp3.com'
auth_password: 'your-password'
headers:
Subject: '🚨 CRITICAL: {{ .GroupLabels.alertname }}'
pagerduty_configs:
- service_key: 'YOUR_PAGERDUTY_KEY'
- name: 'warning'
slack_configs:
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
channel: '#lcbp3-warnings'
```
### 4.3 Grafana Dashboards
#### Import Dashboard JSON
```bash
# Download dashboard template
curl -o numbering-dashboard.json \
https://raw.githubusercontent.com/lcbp3/grafana-dashboards/main/numbering.json
# Import to Grafana
curl -X POST http://admin:admin@localhost:3000/api/dashboards/db \
-H "Content-Type: application/json" \
-d @numbering-dashboard.json
```
#### Key Panels to Monitor
1. **Numbers Generated per Minute** - Rate of number creation
2. **Sequence Utilization** - Current usage vs max (alert >90%)
3. **Lock Wait Time (p95)** - Performance indicator
4. **Lock Failures** - System health indicator
5. **Redis Health (Single instance)** - Node status
6. **Database Connection Pool** - Resource usage
---
## 5. Backup & Recovery
### 5.1 Database Backup Strategy
#### Automated Backup Script
```bash
#!/bin/bash
# scripts/backup-numbering-db.sh
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="/backups/numbering"
DB_NAME="lcbp3_production"
echo "🔄 Starting backup at $DATE"
# Create backup directory
mkdir -p $BACKUP_DIR
# Backup numbering tables only
docker exec lcbp3-mariadb mysqldump \
--single-transaction \
--routines \
--triggers \
$DB_NAME \
document_numbering_configs \
document_numbering_sequences \
document_numbering_audit_logs \
> $BACKUP_DIR/numbering_$DATE.sql
# Compress backup
gzip $BACKUP_DIR/numbering_$DATE.sql
# Keep only last 30 days
find $BACKUP_DIR -name "numbering_*.sql.gz" -mtime +30 -delete
echo "✅ Backup complete: numbering_$DATE.sql.gz"
```
#### Cron Schedule
```cron
# Run backup daily at 2 AM
0 2 * * * /opt/lcbp3/scripts/backup-numbering-db.sh >> /var/log/numbering-backup.log 2>&1
# Run integrity check weekly on Sunday at 3 AM
0 3 * * 0 /opt/lcbp3/scripts/check-sequence-integrity.sh >> /var/log/numbering-integrity.log 2>&1
```
### 5.2 Redis Backup
#### Enable RDB Persistence
```conf
# redis.conf
save 900 1 # Save if 1 key changed after 900 seconds
save 300 10 # Save if 10 keys changed after 300 seconds
save 60 10000 # Save if 10000 keys changed after 60 seconds
dbfilename dump.rdb
dir /data
# Enable AOF for durability
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
```
#### Backup Script
```bash
#!/bin/bash
# scripts/backup-redis.sh
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="/backups/redis"
mkdir -p $BACKUP_DIR
echo "Backing up Redis..."
# Trigger BGSAVE
docker exec cache redis-cli BGSAVE
# Wait for save to complete
sleep 10
# Copy RDB file
docker cp cache:/data/dump.rdb \
$BACKUP_DIR/redis_${DATE}.rdb
# Copy AOF file
docker cp cache:/data/appendonly.aof \
$BACKUP_DIR/redis_${DATE}.aof
# Compress
tar -czf $BACKUP_DIR/redis_${DATE}.tar.gz \
$BACKUP_DIR/redis_${DATE}.rdb \
$BACKUP_DIR/redis_${DATE}.aof
# Cleanup
rm $BACKUP_DIR/redis_${DATE}.rdb $BACKUP_DIR/redis_${DATE}.aof
echo "✅ Redis backup complete: redis_${DATE}.tar.gz"
```
### 5.3 Recovery Procedures
#### Scenario 1: Restore from Database Backup
```bash
#!/bin/bash
# scripts/restore-numbering-db.sh
BACKUP_FILE=$1
if [ -z "$BACKUP_FILE" ]; then
echo "Usage: ./restore-numbering-db.sh <backup_file>"
exit 1
fi
echo "⚠️ WARNING: This will overwrite current numbering data!"
read -p "Continue? (yes/no): " confirm
if [ "$confirm" != "yes" ]; then
echo "Aborted"
exit 0
fi
# Decompress if needed
if [[ $BACKUP_FILE == *.gz ]]; then
gunzip -c $BACKUP_FILE > /tmp/restore.sql
RESTORE_FILE="/tmp/restore.sql"
else
RESTORE_FILE=$BACKUP_FILE
fi
# Restore
docker exec -i lcbp3-mariadb mysql lcbp3_production < $RESTORE_FILE
echo "✅ Restore complete"
echo "🔄 Please verify sequence integrity"
```
#### Scenario 2: Redis Failure
```bash
# Check Redis status
docker exec cache redis-cli ping
# If Redis is down, restart container
docker restart cache
# Verify Redis is running
docker exec cache redis-cli ping
# If restart fails, restore from backup
./scripts/restore-redis.sh /backups/redis/latest.tar.gz
```
---
## 6. Maintenance Procedures
### 6.1 Sequence Adjustment
#### Increase Max Value
```sql
-- Check current utilization
SELECT
dc.document_type,
ds.current_value,
dc.max_value,
ROUND((ds.current_value * 100.0 / dc.max_value), 2) as utilization
FROM document_numbering_sequences ds
JOIN document_numbering_configs dc ON ds.config_id = dc.id
WHERE ds.current_value > dc.max_value * 0.8;
-- Increase max_value for type approaching limit
UPDATE document_numbering_configs
SET max_value = max_value * 10,
updated_at = CURRENT_TIMESTAMP
WHERE document_type = 'COR'
AND max_value < 9999999;
-- Audit log
INSERT INTO document_numbering_audit_logs (
operation, document_type, old_value, new_value,
user_id, metadata
) VALUES (
'ADJUST_MAX_VALUE', 'COR', '999999', '9999999',
1, '{"reason": "Approaching limit", "automated": false}'
);
```
#### Reset Yearly Sequence
```sql
-- For document types with yearly reset
-- Run on January 1st
START TRANSACTION;
-- Create new sequence for new year
INSERT INTO document_numbering_sequences (
config_id,
scope_value,
current_value,
last_used_at
)
SELECT
id as config_id,
YEAR(CURDATE()) as scope_value,
0 as current_value,
NULL as last_used_at
FROM document_numbering_configs
WHERE scope = 'YEARLY';
-- Verify
SELECT * FROM document_numbering_sequences
WHERE scope_value = YEAR(CURDATE());
COMMIT;
```
### 6.2 Cleanup Old Audit Logs
```sql
-- Archive logs older than 2 years
-- Run monthly
START TRANSACTION;
-- Create archive table (if not exists)
CREATE TABLE IF NOT EXISTS document_numbering_audit_logs_archive
LIKE document_numbering_audit_logs;
-- Move old logs to archive
INSERT INTO document_numbering_audit_logs_archive
SELECT * FROM document_numbering_audit_logs
WHERE timestamp < DATE_SUB(CURDATE(), INTERVAL 2 YEAR);
-- Delete from main table
DELETE FROM document_numbering_audit_logs
WHERE timestamp < DATE_SUB(CURDATE(), INTERVAL 2 YEAR);
-- Optimize table
OPTIMIZE TABLE document_numbering_audit_logs;
COMMIT;
-- Export archive to file (optional)
SELECT * FROM document_numbering_audit_logs_archive
INTO OUTFILE '/tmp/audit_archive_2023.csv'
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n';
```
### 6.3 Redis Maintenance
#### Flush Expired Reservations
```bash
#!/bin/bash
# scripts/cleanup-expired-reservations.sh
echo "🧹 Cleaning up expired reservations..."
# Get all reservation keys
KEYS=$(docker exec lcbp3-redis-1 redis-cli --cluster call 172.20.0.2:6379 KEYS "reservation:*" | grep -v "(error)")
COUNT=0
for KEY in $KEYS; do
# Check TTL
TTL=$(docker exec lcbp3-redis-1 redis-cli TTL "$KEY")
if [ "$TTL" -lt 0 ]; then
# Delete expired key
docker exec lcbp3-redis-1 redis-cli DEL "$KEY"
((COUNT++))
fi
done
echo "✅ Cleaned up $COUNT expired reservations"
```
---
## 7. Disaster Recovery
### 7.1 Total System Failure
#### Recovery Steps
```bash
#!/bin/bash
# scripts/disaster-recovery.sh
echo "🚨 Starting disaster recovery..."
# 1. Start Redis cluster
echo "1⃣ Starting Redis cluster..."
docker-compose -f docker-compose-redis.yml up -d
sleep 30
# 2. Restore Redis backups
echo "2⃣ Restoring Redis backups..."
./scripts/restore-redis.sh /backups/redis/latest.tar.gz
# 3. Start database
echo "3⃣ Starting MariaDB..."
docker-compose -f docker-compose-db.yml up -d
sleep 30
# 4. Restore database
echo "4⃣ Restoring database..."
./scripts/restore-numbering-db.sh /backups/db/latest.sql.gz
# 5. Verify sequence integrity
echo "5⃣ Verifying sequence integrity..."
./scripts/check-sequence-integrity.sh
# 6. Start backend services
echo "6⃣ Starting backend services..."
docker-compose -f docker-compose-backend.yml up -d
# 7. Run health checks
echo "7⃣ Running health checks..."
sleep 60
for i in {1..5}; do
curl -f http://localhost:3001/health || echo "Backend $i not healthy"
done
echo "✅ Disaster recovery complete"
echo "⚠️ Please verify system functionality manually"
```
### 7.2 RTO/RPO Targets
| Scenario | RTO | RPO | Priority |
| ---------------------------- | ------- | ------ | -------- |
| Single backend node failure | 0 min | 0 | P0 |
| Single Redis node failure | 0 min | 0 | P0 |
| Database primary failure | 5 min | 0 | P0 |
| Complete data center failure | 1 hour | 15 min | P1 |
| Data corruption | 4 hours | 1 day | P2 |
---
## 8. Runbooks
### 8.1 High Sequence Utilization (>90%)
**Alert**: `SequenceWarning` or `SequenceCritical`
**Steps**:
1. Check current utilization
```sql
SELECT document_type, current_value, max_value,
ROUND((current_value * 100.0 / max_value), 2) as pct
FROM document_numbering_sequences s
JOIN document_numbering_configs c ON s.config_id = c.id
WHERE current_value > max_value * 0.9;
```
2. Assess impact
- How many numbers left?
- Daily usage rate?
- Days until exhaustion?
3. Take action
```sql
-- Option A: Increase max_value
UPDATE document_numbering_configs
SET max_value = max_value * 10
WHERE document_type = 'COR';
-- Option B: Reset sequence (yearly types only)
-- Schedule for next year/month
```
4. Notify stakeholders
5. Update monitoring thresholds if needed
---
### 8.2 High Lock Wait Time
**Alert**: `HighLockWaitTime`
**Steps**:
1. Check Redis cluster health
```bash
docker exec lcbp3-redis-1 redis-cli cluster info
docker exec lcbp3-redis-1 redis-cli cluster nodes
```
2. Check database locks
```sql
SELECT * FROM information_schema.innodb_lock_waits;
SELECT * FROM information_schema.innodb_trx
WHERE trx_started < NOW() - INTERVAL 30 SECOND;
```
3. Identify bottleneck
- Redis slow?
- Database slow?
- High concurrent load?
4. Take action based on cause:
- **Redis**: Add more nodes, check network latency
- **Database**: Optimize queries, increase connection pool
- **High load**: Scale horizontally (add backend nodes)
5. Monitor improvements
---
### 8.3 Redis Down
**Alert**: `RedisUnavailable`
**Steps**:
1. Verify Redis is down
```bash
docker exec cache redis-cli ping || echo "Redis DOWN"
```
2. Check system falls back to DB-only mode
```bash
curl http://localhost:3001/health/numbering
# Should show: fallback_mode: true
```
3. Restart Redis container
```bash
docker restart cache
sleep 10
docker exec cache redis-cli ping
```
4. If restart fails, restore from backup
```bash
./scripts/restore-redis.sh /backups/redis/latest.tar.gz
```
5. Verify numbering system back to normal
```bash
curl http://localhost:3001/health/numbering
# Should show: fallback_mode: false
```
6. Review logs for root cause
```bash
docker logs cache --tail 100
```
---
## 9. Performance Tuning
### 9.1 Slow Number Generation
**Diagnosis**:
```sql
-- Check slow queries
SELECT * FROM mysql.slow_log
WHERE sql_text LIKE '%document_numbering%'
ORDER BY query_time DESC
LIMIT 10;
-- Check index usage
EXPLAIN SELECT * FROM document_numbering_sequences
WHERE config_id = 1 AND scope_value = '2025'
FOR UPDATE;
```
**Optimizations**:
```sql
-- Add missing indexes
CREATE INDEX idx_sequence_lookup
ON document_numbering_sequences(config_id, scope_value);
-- Optimize table
OPTIMIZE TABLE document_numbering_sequences;
-- Update statistics
ANALYZE TABLE document_numbering_sequences;
```
### 9.2 Redis Memory Optimization
```bash
# Check memory usage
docker exec cache redis-cli INFO memory
# If memory high, check keys
docker exec cache redis-cli --bigkeys
# Set maxmemory policy
docker exec cache redis-cli CONFIG SET maxmemory 2gb
docker exec cache redis-cli CONFIG SET maxmemory-policy allkeys-lru
```
---
## 10. Security Hardening
### 10.1 Redis Security
```conf
# redis.conf
requirepass your-strong-redis-password
bind 0.0.0.0
protected-mode yes
rename-command FLUSHDB ""
rename-command FLUSHALL ""
rename-command CONFIG "CONFIG_abc123"
```
### 10.2 Database Security
```sql
-- Create dedicated numbering user
CREATE USER 'numbering'@'%' IDENTIFIED BY 'strong-password';
-- Grant minimal permissions
GRANT SELECT, INSERT, UPDATE ON lcbp3_production.document_numbering_* TO 'numbering'@'%';
GRANT SELECT ON lcbp3_production.users TO 'numbering'@'%';
FLUSH PRIVILEGES;
```
### 10.3 Network Security
```yaml
# docker-compose-network.yml
networks:
lcbp3:
driver: bridge
ipam:
config:
- subnet: 172.20.0.0/16
driver_opts:
com.docker.network.bridge.name: lcbp3-br
com.docker.network.bridge.enable_icc: "true"
com.docker.network.bridge.enable_ip_masquerade: "true"
```
---
## 11. Compliance & Audit
### 11.1 Audit Log Retention
```sql
-- Export audit logs for compliance
SELECT *
FROM document_numbering

View File

@@ -1,290 +0,0 @@
# 🗺️ แผนผัง Network Architecture & Container Services (LCBP3-DMS)
แผนผังนี้แสดงการแบ่งส่วนเครือข่าย (VLANs), การเชื่อมต่อ Firewall (ACLs) และบทบาทของ Server ทั้งสองตัว (QNAP: Application, ASUSTOR: Infrastructure)
> 📖 **ดูรายละเอียด Server Roles และ Service Distribution ได้ที่:** [README.md](README.md#-hardware-infrastructure)
---
## 1. Data Flow Diagram
```mermaid
flowchart TB
subgraph Internet["🌐 Internet"]
User[("👤 User")]
end
subgraph QNAP["💾 QNAP TS-473A (App Server)"]
NPM["🔲 NPM<br/>(Reverse Proxy)"]
Frontend["📱 Next.js<br/>(Frontend)"]
Backend["⚙️ NestJS<br/>(Backend API)"]
DB["🗄️ MariaDB"]
Redis["📦 Redis"]
ES["🔍 Elasticsearch"]
end
subgraph ASUSTOR["💾 ASUSTOR AS5403T (Infra Server)"]
Portainer["🐳 Portainer"]
Registry["📦 Registry"]
Prometheus["📊 Prometheus"]
Grafana["📈 Grafana"]
Uptime["⏱️ Uptime Kuma"]
Backup["💾 Restic/Borg"]
NFS["📁 NFS Storage"]
end
User -->|HTTPS 443| NPM
NPM --> Frontend
NPM --> Backend
Frontend --> Backend
Backend --> DB
Backend --> Redis
Backend --> ES
DB -.->|Scheduled Backup| Backup
Backup --> NFS
Portainer -.->|Manage| QNAP
Prometheus -.->|Collect Metrics| Backend
Prometheus -.->|Collect Metrics| DB
Uptime -.->|Health Check| NPM
```
---
## 2. Docker Management View
```mermaid
flowchart TB
subgraph Portainer["🐳 Portainer (ASUSTOR - Central Management)"]
direction TB
subgraph LocalStack["📦 Local Infra Stack"]
Registry["Docker Registry"]
Prometheus["Prometheus"]
Grafana["Grafana"]
Uptime["Uptime Kuma"]
Backup["Restic/Borg"]
Loki["Loki (Logs)"]
ClamAV["ClamAV"]
end
subgraph RemoteStack["🔗 Remote: QNAP App Stack"]
Frontend["Next.js"]
Backend["NestJS"]
MariaDB["MariaDB"]
Redis["Redis"]
ES["Elasticsearch"]
NPM["NPM"]
Gitea["Gitea"]
N8N["n8n"]
PMA["phpMyAdmin"]
end
end
```
---
## 3. Security Zones Diagram
```mermaid
flowchart TB
subgraph PublicZone["🌐 PUBLIC ZONE"]
direction LR
NPM["NPM (Reverse Proxy)"]
SSL["SSL/TLS Termination"]
end
subgraph AppZone["📱 APPLICATION ZONE (QNAP)"]
direction LR
Frontend["Next.js"]
Backend["NestJS"]
N8N["n8n"]
Gitea["Gitea"]
end
subgraph DataZone["💾 DATA ZONE (QNAP - Internal Only)"]
direction LR
MariaDB["MariaDB"]
Redis["Redis"]
ES["Elasticsearch"]
end
subgraph InfraZone["🛠️ INFRASTRUCTURE ZONE (ASUSTOR)"]
direction LR
Backup["Backup Services"]
Registry["Docker Registry"]
Monitoring["Prometheus + Grafana"]
Logs["Loki / Syslog"]
end
PublicZone -->|HTTPS Only| AppZone
AppZone -->|Internal API| DataZone
DataZone -.->|Backup| InfraZone
AppZone -.->|Metrics| InfraZone
```
---
## 4. แผนผังการเชื่อมต่อเครือข่าย (Network Flow)
```mermaid
graph TD
direction TB
subgraph Flow1["การเชื่อมต่อจากภายนอก (Public WAN)"]
User["ผู้ใช้งานภายนอก (Internet)"]
end
subgraph Router["Router (ER7206) - Gateway"]
User -- "Port 80/443 (HTTPS/HTTP)" --> ER7206
ER7206["Port Forwarding<br/>TCP 80 → 192.168.10.8:80<br/>TCP 443 → 192.168.10.8:443"]
end
subgraph VLANs["เครือข่ายภายใน (VLANs & Firewall Rules)"]
direction LR
subgraph VLAN10["VLAN 10: Servers<br/>192.168.10.x"]
QNAP["QNAP NAS<br/>(192.168.10.8)"]
ASUSTOR["ASUSTOR NAS<br/>(192.168.10.9)"]
end
subgraph VLAN20["VLAN 20: MGMT<br/>192.168.20.x"]
AdminPC["Admin PC / Switches"]
end
subgraph VLAN30["VLAN 30: USER<br/>192.168.30.x"]
OfficePC["PC พนักงาน/Wi-Fi"]
end
subgraph VLAN70["VLAN 70: GUEST<br/>192.168.70.x"]
GuestPC["Guest Wi-Fi"]
end
subgraph Firewall["Firewall ACLs (OC200/ER7206)"]
direction TB
rule1["Rule 1: DENY<br/>Guest (VLAN 70) → All VLANs"]
rule2["Rule 2: DENY<br/>Server (VLAN 10) → User (VLAN 30)"]
rule3["Rule 3: ALLOW<br/>User (VLAN 30) → QNAP<br/>Ports: 443, 80"]
rule4["Rule 4: ALLOW<br/>MGMT (VLAN 20) → All"]
end
GuestPC -.x|rule1| QNAP
QNAP -.x|rule2| OfficePC
OfficePC -- "https://lcbp3.np-dms.work" -->|rule3| QNAP
AdminPC -->|rule4| QNAP
AdminPC -->|rule4| ASUSTOR
end
ER7206 --> QNAP
subgraph DockerQNAP["Docker 'lcbp3' (QNAP - Applications)"]
direction TB
subgraph PublicServices["Services ที่ NPM เปิดสู่ภายนอก"]
direction LR
NPM["NPM (Nginx Proxy Manager)"]
FrontendC["frontend:3000"]
BackendC["backend:3000"]
GiteaC["gitea:3000"]
PMAC["pma:80"]
N8NC["n8n:5678"]
end
subgraph InternalServices["Internal Services (Backend Only)"]
direction LR
DBC["mariadb:3306"]
CacheC["cache:6379"]
SearchC["search:9200"]
end
NPM -- "lcbp3.np-dms.work" --> FrontendC
NPM -- "backend.np-dms.work" --> BackendC
NPM -- "git.np-dms.work" --> GiteaC
NPM -- "pma.np-dms.work" --> PMAC
NPM -- "n8n.np-dms.work" --> N8NC
BackendC -- "lcbp3 Network" --> DBC
BackendC -- "lcbp3 Network" --> CacheC
BackendC -- "lcbp3 Network" --> SearchC
end
subgraph DockerASUSTOR["Docker 'lcbp3' (ASUSTOR - Infrastructure)"]
direction TB
subgraph InfraServices["Infrastructure Services"]
direction LR
PortainerC["portainer:9443"]
RegistryC["registry:5000"]
PrometheusC["prometheus:9090"]
GrafanaC["grafana:3000"]
UptimeC["uptime-kuma:3001"]
end
subgraph BackupServices["Backup & Storage"]
direction LR
ResticC["restic/borg"]
NFSC["NFS Share"]
end
PortainerC -.->|"Remote Endpoint"| NPM
PrometheusC -.->|"Scrape Metrics"| BackendC
ResticC --> NFSC
end
QNAP --> NPM
ASUSTOR --> PortainerC
DBC -.->|"Scheduled Backup"| ResticC
```
---
## 5. Firewall & Security Configuration
> 📖 **ดูรายละเอียด Firewall ACLs และ Port Forwarding ได้ที่:** [03_Securities.md](03_Securities.md)
ไฟล์ `03_Securities.md` ประกอบด้วย:
- 🌐 VLAN Segmentation
- 🔥 Firewall Rules (IP Groups, Port Groups, Switch ACL, Gateway ACL)
- 🚪 Port Forwarding Configuration
---
## 6. Container Service Distribution
> 📖 **ดูรายละเอียด Container Services, Ports, และ Domain Mapping ได้ที่:** [README.md](README.md#-domain-mapping-npm-proxy)
---
## 7. Backup Flow
```mermaid
flowchart LR
subgraph QNAP["💾 QNAP TS-473A (Source)"]
direction TB
DB["🗄️ MariaDB<br/>(mysqldump)"]
Redis["📦 Redis<br/>(RDB + AOF)"]
Config["⚙️ App Config<br/>+ Volumes"]
end
subgraph ASUSTOR["💾 ASUSTOR AS5403T (Target)"]
direction TB
BackupDB["📁 /volume1/backup/db/<br/>(Restic Repository)"]
BackupRedis["📁 /volume1/backup/redis/"]
BackupConfig["📁 /volume1/backup/config/"]
end
DB -->|"Daily 2AM"| BackupDB
Redis -->|"Daily 3AM"| BackupRedis
Config -->|"Weekly Sun 4AM"| BackupConfig
subgraph Retention["📋 Retention Policy"]
R1["Daily: 7 days"]
R2["Weekly: 4 weeks"]
R3["Monthly: 6 months"]
end
```
---
> 📝 **หมายเหตุ**: เอกสารนี้อ้างอิงจาก Architecture Document **v1.8.0** - Last updated: 2026-01-28

View File

@@ -1,853 +0,0 @@
# การติดตั้ง Monitoring Stack บน ASUSTOR
## **📝 คำอธิบายและข้อควรพิจารณา**
> ⚠️ **หมายเหตุ**: Monitoring Stack ทั้งหมดติดตั้งบน **ASUSTOR AS5403T** ไม่ใช่ QNAP
> เพื่อแยก Application workload ออกจาก Infrastructure/Monitoring workload
Stack สำหรับ Monitoring ประกอบด้วย:
| Service | Port | Purpose | Host |
| :---------------- | :--------------------------- | :-------------------------------- | :------ |
| **Prometheus** | 9090 | เก็บ Metrics และ Time-series data | ASUSTOR |
| **Grafana** | 3000 | Dashboard สำหรับแสดงผล Metrics | ASUSTOR |
| **Node Exporter** | 9100 | เก็บ Metrics ของ Host system | Both |
| **cAdvisor** | 8080 (ASUSTOR) / 8088 (QNAP) | เก็บ Metrics ของ Docker containers | Both |
| **Uptime Kuma** | 3001 | Service Availability Monitoring | ASUSTOR |
| **Loki** | 3100 | Log aggregation | ASUSTOR |
| **Promtail** | - | Log shipper (Sender) | ASUSTOR |
---
## 🏗️ Architecture Overview
```
┌─────────────────────────────────────────────────────────────────────────┐
│ ASUSTOR AS5403T (Monitoring Hub) │
├─────────────────────────────────────────────────────────────────────────┤
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Prometheus │───▶│ Grafana │ │ Uptime Kuma │ │
│ │ :9090 │ │ :3000 │ │ :3001 │ │
│ └──────┬──────┘ └─────────────┘ └─────────────┘ │
│ │ │
│ │ Scrape Metrics │
│ ▼ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │node-exporter│ │ cAdvisor │ │ Promtail │ │
│ │ :9100 │ │ :8080 │ │ (Log Ship) │ │
│ │ (Local) │ │ (Local) │ │ (Local) │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
└─────────────────────────────────────────────────────────────────────────┘
│ Remote Scrape
┌─────────────────────────────────────────────────────────────────────────┐
│ QNAP TS-473A (App Server) │
├─────────────────────────────────────────────────────────────────────────┤
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │node-exporter│ │ cAdvisor │ │ Backend │ │
│ │ :9100 │ │ :8080 │ │ /metrics │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
└─────────────────────────────────────────────────────────────────────────┘
```
---
## กำหนดสิทธิ (บน ASUSTOR)
```bash
# SSH เข้า ASUSTOR
ssh admin@192.168.10.9
# สร้าง Directory
mkdir -p /volume1/np-dms/monitoring/prometheus/data
mkdir -p /volume1/np-dms/monitoring/prometheus/config
mkdir -p /volume1/np-dms/monitoring/grafana/data
mkdir -p /volume1/np-dms/monitoring/uptime-kuma/data
mkdir -p /volume1/np-dms/monitoring/loki/data
mkdir -p /volume1/np-dms/monitoring/promtail/config
# กำหนดสิทธิ์ให้ตรงกับ User ID ใน Container
# Prometheus (UID 65534 - nobody)
chown -R 65534:65534 /volume1/np-dms/monitoring/prometheus
chmod -R 750 /volume1/np-dms/monitoring/prometheus
# Grafana (UID 472)
chown -R 472:472 /volume1/np-dms/monitoring/grafana/data
chmod -R 750 /volume1/np-dms/monitoring/grafana/data
# Uptime Kuma (UID 1000)
chown -R 1000:1000 /volume1/np-dms/monitoring/uptime-kuma/data
chmod -R 750 /volume1/np-dms/monitoring/uptime-kuma/data
# Loki (UID 10001)
chown -R 10001:10001 /volume1/np-dms/monitoring/loki/data
chmod -R 750 /volume1/np-dms/monitoring/loki/data
# Promtail (Runs as root to read docker logs - no specific chown needed for config dir if created by admin)
# But ensure config file is readable
chmod -R 755 /volume1/np-dms/monitoring/promtail/config
```
---
## 🔗 สร้าง Docker Network (ทำครั้งแรกครั้งเดียว)
> ⚠️ **ต้องสร้าง network ก่อน deploy docker-compose ทุกตัว** เพราะทุก service ใช้ `lcbp3` เป็น external network
### สร้างผ่าน Portainer (แนะนำ)
1. เปิด **Portainer** → เลือก Environment ของ ASUSTOR
2. ไปที่ **Networks****Add network**
3. กรอกข้อมูล:
- **Name:** `lcbp3`
- **Driver:** `bridge`
4. กด **Create the network**
### สร้างผ่าน SSH
```bash
# SSH เข้า ASUSTOR
ssh admin@192.168.10.9
# สร้าง external network
docker network create lcbp3
# ตรวจสอบ
docker network ls | grep lcbp3
docker network inspect lcbp3
```
> 📖 **QNAP** ก็ต้องมี network ชื่อ `lcbp3` เช่นกัน (สร้างผ่าน Container Station หรือ SSH)
> ดู [README.md Quick Reference](README.md#-quick-reference) สำหรับคำสั่งบน QNAP
---
## Note: NPM Proxy Configuration (NPM รันบน QNAP → Forward ไป ASUSTOR)
> ⚠️ เนื่องจาก NPM อยู่บน **QNAP** แต่ Monitoring services อยู่บน **ASUSTOR**
> ต้องใช้ **IP Address** (`192.168.10.9`) แทนชื่อ container (resolve ข้ามเครื่องไม่ได้)
| Domain Names | Scheme | Forward Hostname | Forward Port | Block Common Exploits | Websockets | Force SSL | HTTP/2 |
| :--------------------- | :----- | :--------------- | :----------- | :-------------------- | :--------- | :-------- | :----- |
| grafana.np-dms.work | `http` | `192.168.10.9` | 3000 | [x] | [x] | [x] | [x] |
| prometheus.np-dms.work | `http` | `192.168.10.9` | 9090 | [x] | [ ] | [x] | [x] |
| uptime.np-dms.work | `http` | `192.168.10.9` | 3001 | [x] | [x] | [x] | [x] |
---
## Docker Compose File (ASUSTOR)
```yaml
# File: /volume1/np-dms/monitoring/docker-compose.yml
# DMS Container v1.8.0: Application name: lcbp3-monitoring
# Deploy on: ASUSTOR AS5403T
# Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki, promtail
x-restart: &restart_policy
restart: unless-stopped
x-logging: &default_logging
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "5"
networks:
lcbp3:
external: true
services:
# ----------------------------------------------------------------
# 1. Prometheus (Metrics Collection & Storage)
# ----------------------------------------------------------------
prometheus:
<<: [*restart_policy, *default_logging]
image: prom/prometheus:v2.48.0
container_name: prometheus
stdin_open: true
tty: true
deploy:
resources:
limits:
cpus: "1.0"
memory: 1G
reservations:
cpus: "0.25"
memory: 256M
environment:
TZ: "Asia/Bangkok"
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.enable-lifecycle'
ports:
- "9090:9090"
networks:
- lcbp3
volumes:
- "/volume1/np-dms/monitoring/prometheus/config:/etc/prometheus:ro"
- "/volume1/np-dms/monitoring/prometheus/data:/prometheus"
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 2. Grafana (Dashboard & Visualization)
# ----------------------------------------------------------------
grafana:
<<: [*restart_policy, *default_logging]
image: grafana/grafana:10.2.2
container_name: grafana
stdin_open: true
tty: true
deploy:
resources:
limits:
cpus: "1.0"
memory: 512M
reservations:
cpus: "0.25"
memory: 128M
environment:
TZ: "Asia/Bangkok"
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: "Center#2025"
GF_SERVER_ROOT_URL: "https://grafana.np-dms.work"
GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-piechart-panel
ports:
- "3000:3000"
networks:
- lcbp3
volumes:
- "/volume1/np-dms/monitoring/grafana/data:/var/lib/grafana"
depends_on:
- prometheus
healthcheck:
test: ["CMD-SHELL", "wget --spider -q http://localhost:3000/api/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 3. Uptime Kuma (Service Availability Monitoring)
# ----------------------------------------------------------------
uptime-kuma:
<<: [*restart_policy, *default_logging]
image: louislam/uptime-kuma:1
container_name: uptime-kuma
deploy:
resources:
limits:
cpus: "0.5"
memory: 256M
environment:
TZ: "Asia/Bangkok"
ports:
- "3001:3001"
networks:
- lcbp3
volumes:
- "/volume1/np-dms/monitoring/uptime-kuma/data:/app/data"
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:3001/api/entry-page || exit 1"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 4. Node Exporter (Host Metrics - ASUSTOR)
# ----------------------------------------------------------------
node-exporter:
<<: [*restart_policy, *default_logging]
image: prom/node-exporter:v1.7.0
container_name: node-exporter
deploy:
resources:
limits:
cpus: "0.5"
memory: 128M
environment:
TZ: "Asia/Bangkok"
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
ports:
- "9100:9100"
networks:
- lcbp3
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9100/metrics"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 5. cAdvisor (Container Metrics - ASUSTOR)
# ----------------------------------------------------------------
cadvisor:
<<: [*restart_policy, *default_logging]
image: gcr.io/cadvisor/cadvisor:v0.47.2
container_name: cadvisor
privileged: true
devices:
- /dev/kmsg
deploy:
resources:
limits:
cpus: "0.5"
memory: 256M
environment:
TZ: "Asia/Bangkok"
ports:
- "8088:8088"
networks:
- lcbp3
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 6. Loki (Log Aggregation)
# ----------------------------------------------------------------
loki:
<<: [*restart_policy, *default_logging]
image: grafana/loki:2.9.0
container_name: loki
deploy:
resources:
limits:
cpus: "0.5"
memory: 512M
environment:
TZ: "Asia/Bangkok"
command: -config.file=/etc/loki/local-config.yaml
ports:
- "3100:3100"
networks:
- lcbp3
volumes:
- "/volume1/np-dms/monitoring/loki/data:/loki"
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/ready"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------------
# 7. Promtail (Log Shipper)
# ----------------------------------------------------------------
promtail:
<<: [*restart_policy, *default_logging]
image: grafana/promtail:2.9.0
container_name: promtail
user: "0:0"
deploy:
resources:
limits:
cpus: "0.5"
memory: 256M
environment:
TZ: "Asia/Bangkok"
command: -config.file=/etc/promtail/promtail-config.yml
networks:
- lcbp3
volumes:
- "/volume1/np-dms/monitoring/promtail/config:/etc/promtail:ro"
- "/var/run/docker.sock:/var/run/docker.sock:ro"
- "/var/lib/docker/containers:/var/lib/docker/containers:ro"
depends_on:
- loki
```
---
## QNAP Node Exporter & cAdvisor
ติดตั้ง node-exporter และ cAdvisor บน QNAP เพื่อให้ Prometheus บน ASUSTOR scrape metrics ได้:
```yaml
# File: /share/np-dms/monitoring/docker-compose.yml (QNAP)
# เฉพาะ exporters เท่านั้น - metrics ถูก scrape โดย Prometheus บน ASUSTOR
version: '3.8'
networks:
lcbp3:
external: true
services:
node-exporter:
image: prom/node-exporter:v1.7.0
container_name: node-exporter
restart: unless-stopped
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
ports:
- "9100:9100"
networks:
- lcbp3
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.47.2
container_name: cadvisor
restart: unless-stopped
privileged: true
ports:
- "8088:8080"
networks:
- lcbp3
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /sys/fs/cgroup:/sys/fs/cgroup:ro
mysqld-exporter:
image: prom/mysqld-exporter:v0.15.0
container_name: mysqld-exporter
restart: unless-stopped
user: root
command:
- '--config.my-cnf=/etc/mysql/my.cnf'
ports:
- "9104:9104"
networks:
- lcbp3
volumes:
- "/share/np-dms/monitoring/mysqld-exporter/.my.cnf:/etc/mysql/my.cnf:ro"
```
---
## Prometheus Configuration
สร้างไฟล์ `/volume1/np-dms/monitoring/prometheus/config/prometheus.yml` บน ASUSTOR:
```yaml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
# Prometheus self-monitoring (ASUSTOR)
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# ============================================
# ASUSTOR Metrics (Local)
# ============================================
# Host metrics from Node Exporter (ASUSTOR)
- job_name: 'asustor-node'
static_configs:
- targets: ['node-exporter:9100']
labels:
host: 'asustor'
# Container metrics from cAdvisor (ASUSTOR)
- job_name: 'asustor-cadvisor'
static_configs:
- targets: ['cadvisor:8080']
labels:
host: 'asustor'
# ============================================
# QNAP Metrics (Remote - 192.168.10.8)
# ============================================
# Host metrics from Node Exporter (QNAP)
- job_name: 'qnap-node'
static_configs:
- targets: ['192.168.10.8:9100']
labels:
host: 'qnap'
# Container metrics from cAdvisor (QNAP)
- job_name: 'qnap-cadvisor'
static_configs:
- targets: ['192.168.10.8:8088']
labels:
host: 'qnap'
# Backend NestJS application (QNAP)
- job_name: 'backend'
static_configs:
- targets: ['192.168.10.8:3000']
labels:
host: 'qnap'
metrics_path: '/metrics'
# MariaDB Exporter (QNAP)
- job_name: 'mariadb'
static_configs:
- targets: ['192.168.10.8:9104']
labels:
host: 'qnap'
```
---
## Uptime Kuma Monitors
เมื่อ Uptime Kuma พร้อมใช้งาน ให้เพิ่ม monitors ต่อไปนี้:
| Monitor Name | Type | URL / Host | Interval |
| :------------ | :--- | :--------------------------------- | :------- |
| QNAP NPM | HTTP | https://npm.np-dms.work | 60s |
| Frontend | HTTP | https://lcbp3.np-dms.work | 60s |
| Backend API | HTTP | https://backend.np-dms.work/health | 60s |
| MariaDB | TCP | 192.168.10.8:3306 | 60s |
| Redis | TCP | 192.168.10.8:6379 | 60s |
| Elasticsearch | HTTP | http://192.168.10.8:9200 | 60s |
| Gitea | HTTP | https://git.np-dms.work | 60s |
| n8n | HTTP | https://n8n.np-dms.work | 60s |
| Grafana | HTTP | https://grafana.np-dms.work | 60s |
| QNAP Host | Ping | 192.168.10.8 | 60s |
| ASUSTOR Host | Ping | 192.168.10.9 | 60s |
---
## Grafana Dashboards
### Recommended Dashboards to Import
| Dashboard ID | Name | Purpose |
| :----------- | :--------------------------- | :----------------------------- |
| 1860 | Node Exporter Full | Host system metrics |
| 14282 | cAdvisor exporter | Container metrics |
| 11074 | Node Exporter for Prometheus | Node overview |
| 893 | Docker and Container | Docker overview |
| 7362 | MySQL | MySQL view |
| 1214 | Redis | Redis view |
| 14204 | Elasticsearch | Elasticsearch view |
| 13106 | MySQL/MariaDB Overview | Detailed MySQL/MariaDB metrics |
### Import Dashboard via Grafana UI
1. Go to **Dashboards → Import**
2. Enter Dashboard ID (e.g., `1860`)
3. Select Prometheus data source
4. Click **Import**
---
## 🚀 Deploy lcbp3-monitoring บน ASUSTOR
### 📋 Prerequisites Checklist
| # | ขั้นตอน | Status |
| :--- | :------------------------------------------------------------------------------------------------- | :----- |
| 1 | SSH เข้า ASUSTOR ได้ (`ssh admin@192.168.10.9`) | ✅ |
| 2 | Docker Network `lcbp3` สร้างแล้ว (ดูหัวข้อ [สร้าง Docker Network](#-สร้าง-docker-network-ทำครั้งแรกครั้งเดียว)) | ✅ |
| 3 | สร้าง Directories และกำหนดสิทธิ์แล้ว (ดูหัวข้อ [กำหนดสิทธิ](#กำหนดสิทธิ-บน-asustor)) | ✅ |
| 4 | สร้าง `prometheus.yml` แล้ว (ดูหัวข้อ [Prometheus Configuration](#prometheus-configuration)) | ✅ |
| 5 | สร้าง `promtail-config.yml` แล้ว (ดูหัวข้อ [Step 1.2](#step-12-สร้าง-promtail-configyml)) | ✅ |
---
### Step 1: สร้าง prometheus.yml
```bash
# SSH เข้า ASUSTOR
ssh admin@192.168.10.9
# สร้างไฟล์ prometheus.yml
cat > /volume1/np-dms/monitoring/prometheus/config/prometheus.yml << 'EOF'
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'asustor-node'
static_configs:
- targets: ['node-exporter:9100']
labels:
host: 'asustor'
- job_name: 'asustor-cadvisor'
static_configs:
- targets: ['cadvisor:8080']
labels:
host: 'asustor'
- job_name: 'qnap-node'
static_configs:
- targets: ['192.168.10.8:9100']
labels:
host: 'qnap'
- job_name: 'qnap-cadvisor'
static_configs:
- targets: ['192.168.10.8:8088']
labels:
host: 'qnap'
- job_name: 'backend'
static_configs:
- targets: ['192.168.10.8:3000']
labels:
host: 'qnap'
metrics_path: '/metrics'
EOF
# ตรวจสอบ
cat /volume1/np-dms/monitoring/prometheus/config/prometheus.yml
```
### Step 1.2: สร้าง promtail-config.yml
ต้องสร้าง Config ให้ Promtail อ่าน logs จาก Docker containers และส่งไป Loki:
```bash
# สร้างไฟล์ promtail-config.yml
cat > /volume1/np-dms/monitoring/promtail/config/promtail-config.yml << 'EOF'
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
clients:
- url: http://loki:3100/loki/api/v1/push
scrape_configs:
- job_name: docker
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 5s
relabel_configs:
- source_labels: ['__meta_docker_container_name']
regex: '/(.*)'
target_label: 'container'
- source_labels: ['__meta_docker_container_log_stream']
target_label: 'stream'
EOF
# ขั้นตอนการเตรียมระบบที่ QNAP (ก่อน Deploy Stack)
### 1. สร้าง Monitoring User ใน MariaDB
รันคำสั่ง SQL นี้ผ่าน **phpMyAdmin** หรือ `docker exec`:
```sql
CREATE USER 'exporter'@'%' IDENTIFIED BY 'Center2025' WITH MAX_USER_CONNECTIONS 3;
GRANT PROCESS, REPLICATION CLIENT, SELECT, SLAVE MONITOR ON *.* TO 'exporter'@'%';
FLUSH PRIVILEGES;
```
### 2. สร้างไฟล์คอนฟิก .my.cnf บน QNAP
เพื่อให้ `mysqld-exporter` อ่านรหัสผ่านที่มีตัวอักษรพิเศษได้ถูกต้อง:
1. **SSH เข้า QNAP** (หรือใช้ File Station สร้าง Folder):
```bash
ssh admin@192.168.10.8
```
2. **สร้าง Directory สำหรับเก็บ Config**:
```bash
mkdir -p /share/np-dms/monitoring/mysqld-exporter
```
3. **สร้างไฟล์ .my.cnf**:
```bash
cat > /share/np-dms/monitoring/mysqld-exporter/.my.cnf << 'EOF'
[client]
user=exporter
password=Center2025
host=mariadb
EOF
```
4. **กำหนดสิทธิ์ไฟล์** (เพื่อให้ Container อ่านไฟล์ได้):
```bash
chmod 644 /share/np-dms/monitoring/mysqld-exporter/.my.cnf
```
# ตรวจสอบ
cat /volume1/np-dms/monitoring/promtail/config/promtail-config.yml
```
---
### Step 2: Deploy ผ่าน Portainer (แนะนำ)
1. เปิด **Portainer** → เลือก Environment ของ **ASUSTOR**
2. ไปที่ **Stacks** → **Add stack**
3. กรอกข้อมูล:
- **Name:** `lcbp3-monitoring`
- **Build method:** เลือก **Web editor**
4. วาง (Paste) เนื้อหาจาก [Docker Compose File (ASUSTOR)](#docker-compose-file-asustor) ด้านบน
5. กด **Deploy the stack**
> ⚠️ **สำคัญ:** ตรวจสอบ Password ของ Grafana (`GF_SECURITY_ADMIN_PASSWORD`) ใน docker-compose ก่อน deploy
### Deploy ผ่าน SSH (วิธีสำรอง)
```bash
# SSH เข้า ASUSTOR
ssh admin@192.168.10.9
# คัดลอก docker-compose.yml ไปยัง path
# (วางไฟล์ที่ /volume1/np-dms/monitoring/docker-compose.yml)
# Deploy
cd /volume1/np-dms/monitoring
docker compose up -d
# ตรวจสอบ container status
docker compose ps
```
---
### Step 3: Verify Services
```bash
# ตรวจสอบ containers ทั้งหมด
docker ps --filter "name=prometheus" --filter "name=grafana" \
--filter "name=uptime-kuma" --filter "name=node-exporter" \
--filter "name=cadvisor" --filter "name=loki" --filter "name=promtail"
```
| Service | วิธีตรวจสอบ | Expected Result |
| :---------------- | :----------------------------------------------------------------- | :------------------------------------ |
| ✅ **Prometheus** | `curl http://192.168.10.9:9090/-/healthy` | `Prometheus Server is Healthy` |
| ✅ **Grafana** | เปิด `https://grafana.np-dms.work` (หรือ `http://192.168.10.9:3000`) | หน้า Login |
| ✅ **Uptime Kuma** | เปิด `https://uptime.np-dms.work` (หรือ `http://192.168.10.9:3001`) | หน้า Setup |
| ✅ **Node Exp.** | `curl http://192.168.10.9:9100/metrics \| head` | Metrics output |
| ✅ **cAdvisor** | `curl http://192.168.10.9:8080/healthz` | `ok` |
| ✅ **Loki** | `curl http://192.168.10.9:3100/ready` | `ready` |
| ✅ **Promtail** | เช็ค Logs: `docker logs promtail` | ไม่ควรมี Error + เห็น connection success |
---
### Step 4: Deploy QNAP Exporters
ติดตั้ง node-exporter และ cAdvisor บน QNAP เพื่อให้ Prometheus scrape ข้ามเครื่องได้:
#### ผ่าน Container Station (QNAP)
1. เปิด **Container Station** บน QNAP Web UI
2. ไปที่ **Applications** → **Create**
3. ตั้งชื่อ Application: `lcbp3-exporters`
4. วาง (Paste) เนื้อหาจาก [QNAP Node Exporter & cAdvisor](#qnap-node-exporter--cadvisor)
5. กด **Create**
#### ตรวจสอบจาก ASUSTOR
```bash
# ตรวจว่า Prometheus scrape QNAP ได้
curl -s http://localhost:9090/api/v1/targets | grep -E '"qnap-(node|cadvisor)"'
# หรือเปิด Prometheus UI → Targets
# URL: http://192.168.10.9:9090/targets
# ดูว่า qnap-node, qnap-cadvisor เป็น State: UP
```
---
### Step 5: ตั้งค่า Grafana & Uptime Kuma
#### Grafana — First Login
1. เปิด `https://grafana.np-dms.work`
2. Login: `admin` / `Center#2025` (หรือ password ที่ตั้งไว้)
3. ไปที่ **Connections** → **Data sources** → **Add data source**
4. เลือก **Prometheus**
- URL: `http://prometheus:9090`
- กด **Save & Test** → ต้องขึ้น ✅
5. Import Dashboards (ดูรายละเอียดในหัวข้อ [6. Grafana Dashboards Setup](#6-grafana-dashboards-setup))
#### Uptime Kuma — First Setup
1. เปิด `https://uptime.np-dms.work`
2. สร้าง Admin account
3. เพิ่ม Monitors ตาม [ตาราง Uptime Kuma Monitors](#uptime-kuma-monitors)
---
### 6. Grafana Dashboards Setup
เพื่อการ Monitor ที่สมบูรณ์ แนะนำให้ Import Dashboards ต่อไปนี้:
#### 6.1 Host Monitoring (Node Exporter)
* **Concept:** ดู resource ของเครื่อง Host (CPU, RAM, Disk, Network)
* **Dashboard ID:** `1860` (Node Exporter Full)
* **วิธี Import:**
1. ไปที่ **Dashboards** → **New** → **Import**
2. ช่อง **Import via grafana.com** ใส่เลข `1860` กด **Load**
3. เลือก Data source: **Prometheus**
4. กด **Import**
#### 6.2 Container Monitoring (cAdvisor)
* **Concept:** ดู resource ของแต่ละ Container (เชื่อม Logs ด้วย)
* **Dashboard ID:** `14282` (Cadvisor exporter)
* **วิธี Import:**
1. ใส่เลข `14282` กด **Load**
2. เลือก Data source: **Prometheus**
3. กด **Import**
#### 6.3 Logs Monitoring (Loki Integration)
เพื่อให้ Dashboard ของ Container แสดง Logs จาก Loki ได้ด้วย:
1. เปิด Dashboard **Cadvisor exporter** ที่เพิ่ง Import มา
2. กดปุ่ม **Add visualization** (หรือ Edit dashboard)
3. เลือก Data source: **Loki**
4. ในช่อง Query ใส่: `{container="$name"}`
* *(Note: `$name` มาจาก Variable ของ Dashboard 14282)*
5. ปรับ Visualization type เป็น **Logs**
6. ตั้งชื่อ Panel ว่า **"Container Logs"**
7. กด **Apply** และ **Save Dashboard**
ตอนนี้เราจะเห็นทั้ง **กราฟการกินทรัพยากร** และ **Logs** ของ Container นั้นๆ ในหน้าเดียวกันครับ
#### 6.4 Integrated Dashboard (Recommended)
ผมได้เตรียม JSON file ที่รวม Metrics และ Logs ไว้ให้แล้วครับ:
1. ไปที่ **Dashboards** → **New** → **Import**
2. ลากไฟล์ หรือ Copy เนื้อหาจากไฟล์:
`specs/08-infrastructure/grafana/dashboards/lcbp3-docker-monitoring.json`
3. กด **Load** และ **Import**
## 7.3 Backup / Export Dashboards
เมื่อปรับแต่ง Dashboard จนพอใจแล้ว ควร Export เก็บเป็นไฟล์ JSON ไว้ backup หรือ version control:
1. เปิด Dashboard ที่ต้องการ backup
2. ไปที่ปุ่ม **Share Dashboard** (ไอคอน 🔗 หรือ Share มุมซ้ายบน)
3. เลือกTab **Export**
4. เปิดตัวเลือก **Export for sharing externally** (เพื่อให้ลบ hardcoded value)
5. กด **Save to file**
6. นำไฟล์ JSON มาเก็บไว้ที่ path: `specs/08-infrastructure/grafana/dashboards/`
---
> 📝 **หมายเหตุ**: เอกสารนี้อ้างอิงจาก Architecture Document **v1.8.0** - Monitoring Stack deploy บน ASUSTOR AS5403T

View File

@@ -1,247 +0,0 @@
# Backup Strategy สำหรับ LCBP3-DMS
> 📍 **Deploy on:** ASUSTOR AS5403T (Infrastructure Server)
> 🎯 **Backup Target:** QNAP TS-473A (Application & Database)
> 📄 **Version:** v1.8.0
---
## Overview
ระบบ Backup แบบ Pull-based: ASUSTOR ดึงข้อมูลจาก QNAP เพื่อความปลอดภัย
หาก QNAP ถูกโจมตี ผู้โจมตีจะไม่สามารถลบ Backup บน ASUSTOR ได้
```
┌─────────────────────────────────────────────────────────────────┐
│ BACKUP ARCHITECTURE │
├─────────────────────────────────────────────────────────────────┤
│ │
│ QNAP (Source) ASUSTOR (Backup Target) │
│ 192.168.10.8 192.168.10.9 │
│ │
│ ┌──────────────┐ SSH/Rsync ┌──────────────────────┐ │
│ │ MariaDB │ ─────────────▶ │ /volume1/backup/db/ │ │
│ │ (mysqldump) │ Daily 2AM │ (Restic Repository) │ │
│ └──────────────┘ └──────────────────────┘ │
│ │
│ ┌──────────────┐ ┌──────────────────────┐ │
│ │ Redis RDB │ ─────────────▶ │ /volume1/backup/ │ │
│ │ + AOF │ Daily 3AM │ redis/ │ │
│ └──────────────┘ └──────────────────────┘ │
│ │
│ ┌──────────────┐ ┌──────────────────────┐ │
│ │ App Config │ ─────────────▶ │ /volume1/backup/ │ │
│ │ + Volumes │ Weekly Sun │ config/ │ │
│ └──────────────┘ └──────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────┘
```
---
## 1. MariaDB Backup
### 1.1 Daily Database Backup Script
```bash
#!/bin/bash
# File: /volume1/np-dms/scripts/backup-mariadb.sh
# Run on: ASUSTOR (Pull from QNAP)
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="/volume1/backup/db"
QNAP_IP="192.168.10.8"
DB_NAME="lcbp3_db"
DB_USER="root"
DB_PASSWORD="${MARIADB_ROOT_PASSWORD}"
echo "🔄 Starting MariaDB backup at $DATE"
# Create backup directory
mkdir -p $BACKUP_DIR
# Remote mysqldump via SSH
ssh admin@$QNAP_IP "docker exec mariadb mysqldump \
--single-transaction \
--routines \
--triggers \
-u $DB_USER -p$DB_PASSWORD $DB_NAME" > $BACKUP_DIR/lcbp3_$DATE.sql
# Compress
gzip $BACKUP_DIR/lcbp3_$DATE.sql
# Add to Restic repository
restic -r $BACKUP_DIR/restic-repo backup $BACKUP_DIR/lcbp3_$DATE.sql.gz
# Keep only last 30 days of raw files
find $BACKUP_DIR -name "lcbp3_*.sql.gz" -mtime +30 -delete
echo "✅ MariaDB backup complete: lcbp3_$DATE.sql.gz"
```
### 1.2 Cron Schedule (ASUSTOR)
```cron
# MariaDB daily backup at 2 AM
0 2 * * * /volume1/np-dms/scripts/backup-mariadb.sh >> /var/log/backup-mariadb.log 2>&1
```
---
## 2. Redis Backup
### 2.1 Redis Backup Script
```bash
#!/bin/bash
# File: /volume1/np-dms/scripts/backup-redis.sh
# Run on: ASUSTOR (Pull from QNAP)
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_DIR="/volume1/backup/redis"
QNAP_IP="192.168.10.8"
echo "🔄 Starting Redis backup at $DATE"
mkdir -p $BACKUP_DIR
# Trigger BGSAVE on QNAP Redis
ssh admin@$QNAP_IP "docker exec cache redis-cli BGSAVE"
sleep 10
# Copy RDB and AOF files
scp admin@$QNAP_IP:/share/np-dms/services/cache/data/dump.rdb $BACKUP_DIR/redis_$DATE.rdb
scp admin@$QNAP_IP:/share/np-dms/services/cache/data/appendonly.aof $BACKUP_DIR/redis_$DATE.aof
# Compress
tar -czf $BACKUP_DIR/redis_$DATE.tar.gz \
$BACKUP_DIR/redis_$DATE.rdb \
$BACKUP_DIR/redis_$DATE.aof
# Cleanup raw files
rm $BACKUP_DIR/redis_$DATE.rdb $BACKUP_DIR/redis_$DATE.aof
echo "✅ Redis backup complete: redis_$DATE.tar.gz"
```
### 2.2 Cron Schedule
```cron
# Redis daily backup at 3 AM
0 3 * * * /volume1/np-dms/scripts/backup-redis.sh >> /var/log/backup-redis.log 2>&1
```
---
## 3. Application Config Backup
### 3.1 Weekly Config Backup Script
```bash
#!/bin/bash
# File: /volume1/np-dms/scripts/backup-config.sh
# Run on: ASUSTOR (Pull from QNAP)
DATE=$(date +%Y%m%d)
BACKUP_DIR="/volume1/backup/config"
QNAP_IP="192.168.10.8"
echo "🔄 Starting config backup at $DATE"
mkdir -p $BACKUP_DIR
# Sync Docker compose files and configs
rsync -avz --delete \
admin@$QNAP_IP:/share/np-dms/ \
$BACKUP_DIR/np-dms_$DATE/ \
--exclude='*/data/*' \
--exclude='*/logs/*' \
--exclude='node_modules'
# Compress
tar -czf $BACKUP_DIR/config_$DATE.tar.gz $BACKUP_DIR/np-dms_$DATE
# Cleanup
rm -rf $BACKUP_DIR/np-dms_$DATE
echo "✅ Config backup complete: config_$DATE.tar.gz"
```
### 3.2 Cron Schedule
```cron
# Config weekly backup on Sunday at 4 AM
0 4 * * 0 /volume1/np-dms/scripts/backup-config.sh >> /var/log/backup-config.log 2>&1
```
---
## 4. Retention Policy
| Backup Type | Frequency | Retention | Storage Est. |
| :---------- | :-------- | :-------- | :----------- |
| MariaDB | Daily | 30 days | ~5GB/month |
| Redis | Daily | 7 days | ~500MB |
| Config | Weekly | 4 weeks | ~200MB |
| Restic | Daily | 6 months | Deduplicated |
---
## 5. Restic Repository Setup
```bash
# Initialize Restic repository (one-time)
restic init -r /volume1/backup/restic-repo
# Set password in environment
export RESTIC_PASSWORD="your-secure-backup-password"
# Check repository status
restic -r /volume1/backup/restic-repo snapshots
# Prune old snapshots (keep 30 daily, 4 weekly, 6 monthly)
restic -r /volume1/backup/restic-repo forget \
--keep-daily 30 \
--keep-weekly 4 \
--keep-monthly 6 \
--prune
```
---
## 6. Verification Script
```bash
#!/bin/bash
# File: /volume1/np-dms/scripts/verify-backup.sh
echo "📋 Backup Verification Report"
echo "=============================="
echo ""
# Check latest MariaDB backup
LATEST_DB=$(ls -t /volume1/backup/db/*.sql.gz 2>/dev/null | head -1)
if [ -n "$LATEST_DB" ]; then
echo "✅ Latest DB backup: $LATEST_DB"
echo " Size: $(du -h $LATEST_DB | cut -f1)"
else
echo "❌ No DB backup found!"
fi
# Check latest Redis backup
LATEST_REDIS=$(ls -t /volume1/backup/redis/*.tar.gz 2>/dev/null | head -1)
if [ -n "$LATEST_REDIS" ]; then
echo "✅ Latest Redis backup: $LATEST_REDIS"
else
echo "❌ No Redis backup found!"
fi
# Check Restic repository
echo ""
echo "📦 Restic Snapshots:"
restic -r /volume1/backup/restic-repo snapshots --latest 5
```
---
> 📝 **หมายเหตุ**: เอกสารนี้อ้างอิงจาก Architecture Document **v1.8.0**

View File

@@ -1,209 +0,0 @@
# Disaster Recovery Plan สำหรับ LCBP3-DMS
> 📍 **Version:** v1.8.0
> 🖥️ **Primary Server:** QNAP TS-473A (Application & Database)
> 💾 **Backup Server:** ASUSTOR AS5403T (Infrastructure & Backup)
---
## RTO/RPO Targets
| Scenario | RTO | RPO | Priority |
| :-------------------------- | :------ | :----- | :------- |
| Single backend node failure | 0 min | 0 | P0 |
| Redis failure | 5 min | 0 | P0 |
| MariaDB failure | 10 min | 0 | P0 |
| QNAP total failure | 2 hours | 15 min | P1 |
| Data corruption | 4 hours | 1 day | P2 |
---
## 1. Quick Recovery Procedures
### 1.1 Service Not Responding
```bash
# Check container status
docker ps -a | grep <service-name>
# Restart specific service
docker restart <container-name>
# Check logs for errors
docker logs <container-name> --tail 100
```
### 1.2 Redis Failure
```bash
# Check status
docker exec cache redis-cli ping
# Restart
docker restart cache
# Verify
docker exec cache redis-cli ping
```
### 1.3 MariaDB Failure
```bash
# Check status
docker exec mariadb mysql -u root -p -e "SELECT 1"
# Restart
docker restart mariadb
# Wait for startup
sleep 30
# Verify
docker exec mariadb mysql -u root -p -e "SHOW DATABASES"
```
---
## 2. Full System Recovery
### 2.1 Recovery Prerequisites (ASUSTOR)
ตรวจสอบว่า Backup files พร้อมใช้งาน:
```bash
# SSH to ASUSTOR
ssh admin@192.168.10.9
# List available backups
ls -la /volume1/backup/db/
ls -la /volume1/backup/redis/
ls -la /volume1/backup/config/
# Check Restic snapshots
restic -r /volume1/backup/restic-repo snapshots
```
### 2.2 QNAP Recovery Script
```bash
#!/bin/bash
# File: /volume1/np-dms/scripts/disaster-recovery.sh
# Run on: ASUSTOR (Push to QNAP)
QNAP_IP="192.168.10.8"
BACKUP_DIR="/volume1/backup"
echo "🚨 Starting Disaster Recovery..."
echo "================================"
# 1. Restore Docker Network
echo "1⃣ Creating Docker network..."
ssh admin@$QNAP_IP "docker network create lcbp3 || true"
# 2. Restore config files
echo "2⃣ Restoring configuration files..."
LATEST_CONFIG=$(ls -t $BACKUP_DIR/config/*.tar.gz | head -1)
tar -xzf $LATEST_CONFIG -C /tmp/
rsync -avz /tmp/np-dms/ admin@$QNAP_IP:/share/np-dms/
# 3. Start infrastructure services
echo "3⃣ Starting MariaDB..."
ssh admin@$QNAP_IP "cd /share/np-dms/mariadb && docker-compose up -d"
sleep 30
# 4. Restore database
echo "4⃣ Restoring database..."
LATEST_DB=$(ls -t $BACKUP_DIR/db/*.sql.gz | head -1)
gunzip -c $LATEST_DB | ssh admin@$QNAP_IP "docker exec -i mariadb mysql -u root -p\$MYSQL_ROOT_PASSWORD lcbp3_db"
# 5. Start Redis
echo "5⃣ Starting Redis..."
ssh admin@$QNAP_IP "cd /share/np-dms/services && docker-compose up -d cache"
# 6. Restore Redis data (if needed)
echo "6⃣ Restoring Redis data..."
LATEST_REDIS=$(ls -t $BACKUP_DIR/redis/*.tar.gz | head -1)
tar -xzf $LATEST_REDIS -C /tmp/
scp /tmp/redis_*.rdb admin@$QNAP_IP:/share/np-dms/services/cache/data/dump.rdb
ssh admin@$QNAP_IP "docker restart cache"
# 7. Start remaining services
echo "7⃣ Starting application services..."
ssh admin@$QNAP_IP "cd /share/np-dms/services && docker-compose up -d"
ssh admin@$QNAP_IP "cd /share/np-dms/npm && docker-compose up -d"
# 8. Health check
echo "8⃣ Running health checks..."
sleep 60
curl -f https://lcbp3.np-dms.work/health || echo "⚠️ Frontend not ready"
curl -f https://backend.np-dms.work/health || echo "⚠️ Backend not ready"
echo ""
echo "✅ Disaster Recovery Complete"
echo "⚠️ Please verify system functionality manually"
```
---
## 3. Data Corruption Recovery
### 3.1 Point-in-Time Recovery (Database)
```bash
# List available Restic snapshots
restic -r /volume1/backup/restic-repo snapshots
# Restore specific snapshot
restic -r /volume1/backup/restic-repo restore <snapshot-id> --target /tmp/restore/
# Apply restored backup
gunzip -c /tmp/restore/lcbp3_*.sql.gz | \
ssh admin@192.168.10.8 "docker exec -i mariadb mysql -u root -p\$MYSQL_ROOT_PASSWORD lcbp3_db"
```
### 3.2 Selective Table Recovery
```bash
# Extract specific tables from backup
gunzip -c /volume1/backup/db/lcbp3_YYYYMMDD.sql.gz | \
grep -A1000 "CREATE TABLE \`documents\`" | \
grep -B1000 "UNLOCK TABLES" > /tmp/documents_table.sql
# Restore specific table
ssh admin@192.168.10.8 "docker exec -i mariadb mysql -u root -p\$MYSQL_ROOT_PASSWORD lcbp3_db" < /tmp/documents_table.sql
```
---
## 4. Communication & Escalation
### 4.1 Incident Response
| Severity | Response Time | Notify |
| :------- | :------------ | :----------------------------- |
| P0 | Immediate | Admin Team + Management |
| P1 | 30 minutes | Admin Team |
| P2 | 2 hours | Admin Team (next business day) |
### 4.2 Post-Incident Checklist
- [ ] Identify root cause
- [ ] Document timeline of events
- [ ] Verify all services restored
- [ ] Check data integrity
- [ ] Update monitoring alerts if needed
- [ ] Create incident report
---
## 5. Testing Schedule
| Test Type | Frequency | Last Tested | Next Due |
| :---------------------- | :-------- | :---------- | :------- |
| Backup Verification | Weekly | - | - |
| Single Service Recovery | Monthly | - | - |
| Full DR Test | Quarterly | - | - |
---
> 📝 **หมายเหตุ**: เอกสารนี้อ้างอิงจาก Architecture Document **v1.8.0**