690602:1334 ADR-033-233 #03
This commit is contained in:
@@ -24,7 +24,12 @@ import { Textarea } from '@/components/ui/textarea';
|
|||||||
import { Progress } from '@/components/ui/progress';
|
import { Progress } from '@/components/ui/progress';
|
||||||
import { useAiStatus, useToggleAiFeatures, useAiHealth } from '@/hooks/use-ai-status';
|
import { useAiStatus, useToggleAiFeatures, useAiHealth } from '@/hooks/use-ai-status';
|
||||||
import { projectService } from '@/lib/services/project.service';
|
import { projectService } from '@/lib/services/project.service';
|
||||||
import { adminAiService, AiSandboxJobResult, AiAvailableModel } from '@/lib/services/admin-ai.service';
|
import {
|
||||||
|
adminAiService,
|
||||||
|
AiSandboxJobResult,
|
||||||
|
AiAvailableModel,
|
||||||
|
AiRagCitation,
|
||||||
|
} from '@/lib/services/admin-ai.service';
|
||||||
import { toast } from 'sonner';
|
import { toast } from 'sonner';
|
||||||
import OcrSandboxPromptManager from '@/components/admin/ai/OcrSandboxPromptManager';
|
import OcrSandboxPromptManager from '@/components/admin/ai/OcrSandboxPromptManager';
|
||||||
import OcrEngineSelector from '@/components/admin/ai/OcrEngineSelector';
|
import OcrEngineSelector from '@/components/admin/ai/OcrEngineSelector';
|
||||||
@@ -35,6 +40,48 @@ interface SandboxProject {
|
|||||||
projectCode: string;
|
projectCode: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface VramLoadedModelView {
|
||||||
|
modelId: string;
|
||||||
|
modelName: string;
|
||||||
|
vramUsageMB?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function ensureArray<T>(value: unknown): T[] {
|
||||||
|
return Array.isArray(value) ? value : [];
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeLoadedModels(value: unknown): VramLoadedModelView[] {
|
||||||
|
if (!Array.isArray(value)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
return value.map((item, index) => {
|
||||||
|
if (typeof item === 'string') {
|
||||||
|
return {
|
||||||
|
modelId: `${item}-${index}`,
|
||||||
|
modelName: item,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (item && typeof item === 'object') {
|
||||||
|
const model = item as {
|
||||||
|
modelId?: string;
|
||||||
|
modelName?: string;
|
||||||
|
name?: string;
|
||||||
|
vramUsageMB?: number;
|
||||||
|
};
|
||||||
|
const modelName = model.modelName ?? model.name ?? `model-${index + 1}`;
|
||||||
|
return {
|
||||||
|
modelId: model.modelId ?? modelName,
|
||||||
|
modelName,
|
||||||
|
vramUsageMB: model.vramUsageMB,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
modelId: `unknown-${index}`,
|
||||||
|
modelName: `Unknown Model ${index + 1}`,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
export default function AiAdminConsolePage() {
|
export default function AiAdminConsolePage() {
|
||||||
const { data, isLoading, isError, refetch, isFetching } = useAiStatus();
|
const { data, isLoading, isError, refetch, isFetching } = useAiStatus();
|
||||||
const { data: health, isLoading: isHealthLoading, refetch: refetchHealth } = useAiHealth();
|
const { data: health, isLoading: isHealthLoading, refetch: refetchHealth } = useAiHealth();
|
||||||
@@ -56,7 +103,7 @@ export default function AiAdminConsolePage() {
|
|||||||
return await adminAiService.getAvailableModels();
|
return await adminAiService.getAvailableModels();
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
const availableModels = aiModelsData?.models ?? [];
|
const availableModels = ensureArray<AiAvailableModel>(aiModelsData?.models);
|
||||||
const activeModel = aiModelsData?.activeModel ?? '';
|
const activeModel = aiModelsData?.activeModel ?? '';
|
||||||
|
|
||||||
// VRAM Monitoring State (T034, T036, US2)
|
// VRAM Monitoring State (T034, T036, US2)
|
||||||
@@ -75,6 +122,13 @@ export default function AiAdminConsolePage() {
|
|||||||
return res as SandboxProject[];
|
return res as SandboxProject[];
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
const healthOllamaModels = ensureArray<string>(health?.ollama?.models);
|
||||||
|
const healthQdrantCollections = ensureArray<string>(health?.qdrant?.collections);
|
||||||
|
const vramLoadedModels = normalizeLoadedModels(vramStatus?.loadedModels);
|
||||||
|
const sandboxProjects = ensureArray<SandboxProject>(projects);
|
||||||
|
const sandboxCitations = ensureArray<AiRagCitation>(
|
||||||
|
sandboxJobResult?.citations
|
||||||
|
);
|
||||||
|
|
||||||
const handleToggle = async (enabled: boolean): Promise<void> => {
|
const handleToggle = async (enabled: boolean): Promise<void> => {
|
||||||
await toggleMutation.mutateAsync(enabled);
|
await toggleMutation.mutateAsync(enabled);
|
||||||
@@ -242,8 +296,8 @@ export default function AiAdminConsolePage() {
|
|||||||
<div className="space-y-1">
|
<div className="space-y-1">
|
||||||
<span className="text-xs text-muted-foreground">โมเดลที่โหลดอยู่:</span>
|
<span className="text-xs text-muted-foreground">โมเดลที่โหลดอยู่:</span>
|
||||||
<div className="flex flex-wrap gap-1">
|
<div className="flex flex-wrap gap-1">
|
||||||
{health?.ollama?.models && health.ollama.models.length > 0 ? (
|
{healthOllamaModels.length > 0 ? (
|
||||||
health.ollama.models.map((m) => (
|
healthOllamaModels.map((m) => (
|
||||||
<Badge key={m} variant="secondary" className="text-[10px] py-0 px-1">
|
<Badge key={m} variant="secondary" className="text-[10px] py-0 px-1">
|
||||||
{m}
|
{m}
|
||||||
</Badge>
|
</Badge>
|
||||||
@@ -274,8 +328,8 @@ export default function AiAdminConsolePage() {
|
|||||||
<div className="space-y-1">
|
<div className="space-y-1">
|
||||||
<span className="text-xs text-muted-foreground">คอลเลกชัน:</span>
|
<span className="text-xs text-muted-foreground">คอลเลกชัน:</span>
|
||||||
<div className="flex flex-wrap gap-1">
|
<div className="flex flex-wrap gap-1">
|
||||||
{health?.qdrant?.collections && health.qdrant.collections.length > 0 ? (
|
{healthQdrantCollections.length > 0 ? (
|
||||||
health.qdrant.collections.map((c) => (
|
healthQdrantCollections.map((c) => (
|
||||||
<Badge key={c} variant="outline" className="text-[10px] py-0 px-1 bg-background/30">
|
<Badge key={c} variant="outline" className="text-[10px] py-0 px-1 bg-background/30">
|
||||||
{c}
|
{c}
|
||||||
</Badge>
|
</Badge>
|
||||||
@@ -394,10 +448,13 @@ export default function AiAdminConsolePage() {
|
|||||||
<div className="space-y-1 text-xs">
|
<div className="space-y-1 text-xs">
|
||||||
<span className="text-muted-foreground block">โมเดลที่โหลดบน GPU ในปัจจุบัน:</span>
|
<span className="text-muted-foreground block">โมเดลที่โหลดบน GPU ในปัจจุบัน:</span>
|
||||||
<div className="flex flex-wrap gap-1 mt-1">
|
<div className="flex flex-wrap gap-1 mt-1">
|
||||||
{vramStatus.loadedModels && vramStatus.loadedModels.length > 0 ? (
|
{vramLoadedModels.length > 0 ? (
|
||||||
vramStatus.loadedModels.map((m) => (
|
vramLoadedModels.map((m) => (
|
||||||
<Badge key={m.modelId || m.modelName} className="bg-primary/10 text-primary border-none hover:bg-primary/20 text-[10px]">
|
<Badge key={m.modelId} className="bg-primary/10 text-primary border-none hover:bg-primary/20 text-[10px]">
|
||||||
{m.modelName} ({m.vramUsageMB} MB)
|
{m.modelName}
|
||||||
|
{typeof m.vramUsageMB === 'number'
|
||||||
|
? ` (${m.vramUsageMB} MB)`
|
||||||
|
: ''}
|
||||||
</Badge>
|
</Badge>
|
||||||
))
|
))
|
||||||
) : (
|
) : (
|
||||||
@@ -627,7 +684,7 @@ export default function AiAdminConsolePage() {
|
|||||||
<SelectValue placeholder="-- กรุณาเลือกโครงการ --" />
|
<SelectValue placeholder="-- กรุณาเลือกโครงการ --" />
|
||||||
</SelectTrigger>
|
</SelectTrigger>
|
||||||
<SelectContent>
|
<SelectContent>
|
||||||
{projects.map((proj) => (
|
{sandboxProjects.map((proj) => (
|
||||||
<SelectItem key={proj.publicId} value={proj.publicId}>
|
<SelectItem key={proj.publicId} value={proj.publicId}>
|
||||||
{proj.projectName} ({proj.projectCode})
|
{proj.projectName} ({proj.projectCode})
|
||||||
</SelectItem>
|
</SelectItem>
|
||||||
@@ -728,9 +785,9 @@ export default function AiAdminConsolePage() {
|
|||||||
</CardTitle>
|
</CardTitle>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
<CardContent>
|
<CardContent>
|
||||||
{sandboxJobResult.citations && sandboxJobResult.citations.length > 0 ? (
|
{sandboxCitations.length > 0 ? (
|
||||||
<div className="grid gap-3 sm:grid-cols-1">
|
<div className="grid gap-3 sm:grid-cols-1">
|
||||||
{sandboxJobResult.citations.map((cite, index) => (
|
{sandboxCitations.map((cite, index) => (
|
||||||
<div
|
<div
|
||||||
key={cite.pointId || index}
|
key={cite.pointId || index}
|
||||||
className="rounded-lg border border-border/40 bg-background/30 p-3 hover:bg-background/60 transition-colors space-y-2"
|
className="rounded-lg border border-border/40 bg-background/30 p-3 hover:bg-background/60 transition-colors space-y-2"
|
||||||
|
|||||||
@@ -9,6 +9,8 @@
|
|||||||
// - 2026-05-29: เพิ่ม ocrText, ocrUsed, promptVersionUsed ใน AiSandboxJobResult
|
// - 2026-05-29: เพิ่ม ocrText, ocrUsed, promptVersionUsed ใน AiSandboxJobResult
|
||||||
// - 2026-05-30: เพิ่มเมธอด getOcrEngines และ selectOcrEngine สำหรับจัดการ OCR engines (T017, T018, US1)
|
// - 2026-05-30: เพิ่มเมธอด getOcrEngines และ selectOcrEngine สำหรับจัดการ OCR engines (T017, T018, US1)
|
||||||
// - 2026-05-30: เพิ่ม getVramStatus และปรับปรุง getAvailableModels/setActiveModel/addModel ให้เรียกใช้ endpoints ใหม่ที่มี VRAM capacity check (T031-T034, US2)
|
// - 2026-05-30: เพิ่ม getVramStatus และปรับปรุง getAvailableModels/setActiveModel/addModel ให้เรียกใช้ endpoints ใหม่ที่มี VRAM capacity check (T031-T034, US2)
|
||||||
|
// - 2026-06-02: แก้ endpoint getAvailableModels ให้ตรงกับ backend admin route (/ai/admin/models)
|
||||||
|
// - 2026-06-02: normalize VRAM response ให้รองรับ field names จาก backend ปัจจุบันและรูปแบบ loadedModels แบบเดิม
|
||||||
|
|
||||||
import api from '../api/client';
|
import api from '../api/client';
|
||||||
|
|
||||||
@@ -91,6 +93,20 @@ export interface VramStatusResponse {
|
|||||||
lastUpdated: string;
|
lastUpdated: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface RawVramStatusResponse {
|
||||||
|
totalVRAMMB?: number;
|
||||||
|
usedVRAMMB?: number;
|
||||||
|
usagePercent?: number;
|
||||||
|
thresholdPercent?: number;
|
||||||
|
loadedModels?: Array<string | LoadedModelInfo>;
|
||||||
|
canLoadModel?: boolean;
|
||||||
|
lastUpdated?: string;
|
||||||
|
totalVramMb?: number;
|
||||||
|
usedVramMb?: number;
|
||||||
|
freeVramMb?: number;
|
||||||
|
hasCapacity?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
export interface AiAvailableModel {
|
export interface AiAvailableModel {
|
||||||
id?: number;
|
id?: number;
|
||||||
modelId?: string;
|
modelId?: string;
|
||||||
@@ -121,6 +137,43 @@ const extractData = <T>(value: unknown): T => {
|
|||||||
return value as T;
|
return value as T;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const normalizeLoadedModels = (
|
||||||
|
models: Array<string | LoadedModelInfo> | undefined
|
||||||
|
): LoadedModelInfo[] => {
|
||||||
|
if (!Array.isArray(models)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
return models.map((model, index) => {
|
||||||
|
if (typeof model === 'string') {
|
||||||
|
return {
|
||||||
|
modelId: `${model}-${index}`,
|
||||||
|
modelName: model,
|
||||||
|
vramUsageMB: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return model;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const normalizeVramStatus = (value: unknown): VramStatusResponse => {
|
||||||
|
const raw = extractData<RawVramStatusResponse>(value);
|
||||||
|
const totalVRAMMB = raw.totalVRAMMB ?? raw.totalVramMb ?? 0;
|
||||||
|
const usedVRAMMB = raw.usedVRAMMB ?? raw.usedVramMb ?? 0;
|
||||||
|
const usagePercent =
|
||||||
|
raw.usagePercent ??
|
||||||
|
(totalVRAMMB > 0 ? Math.round((usedVRAMMB / totalVRAMMB) * 100) : 0);
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalVRAMMB,
|
||||||
|
usedVRAMMB,
|
||||||
|
usagePercent,
|
||||||
|
thresholdPercent: raw.thresholdPercent ?? 90,
|
||||||
|
loadedModels: normalizeLoadedModels(raw.loadedModels),
|
||||||
|
canLoadModel: raw.canLoadModel ?? raw.hasCapacity ?? false,
|
||||||
|
lastUpdated: raw.lastUpdated ?? new Date().toISOString(),
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
/** Service สำหรับเรียก AI Admin Console API ผ่าน DMS Backend เท่านั้น */
|
/** Service สำหรับเรียก AI Admin Console API ผ่าน DMS Backend เท่านั้น */
|
||||||
export const adminAiService = {
|
export const adminAiService = {
|
||||||
getStatus: async (): Promise<AiAdminSettings> => {
|
getStatus: async (): Promise<AiAdminSettings> => {
|
||||||
@@ -199,7 +252,7 @@ export const adminAiService = {
|
|||||||
// --- AI Model Management (ADR-027, US2) ---
|
// --- AI Model Management (ADR-027, US2) ---
|
||||||
|
|
||||||
getAvailableModels: async (): Promise<AiModelsResponse> => {
|
getAvailableModels: async (): Promise<AiModelsResponse> => {
|
||||||
const { data } = await api.get('/ai/models');
|
const { data } = await api.get('/ai/admin/models');
|
||||||
return extractData<AiModelsResponse>(data);
|
return extractData<AiModelsResponse>(data);
|
||||||
},
|
},
|
||||||
|
|
||||||
@@ -215,7 +268,7 @@ export const adminAiService = {
|
|||||||
|
|
||||||
getVramStatus: async (): Promise<VramStatusResponse> => {
|
getVramStatus: async (): Promise<VramStatusResponse> => {
|
||||||
const { data } = await api.get('/ai/vram/status');
|
const { data } = await api.get('/ai/vram/status');
|
||||||
return extractData<VramStatusResponse>(data);
|
return normalizeVramStatus(data);
|
||||||
},
|
},
|
||||||
|
|
||||||
addModel: async (
|
addModel: async (
|
||||||
|
|||||||
@@ -0,0 +1,240 @@
|
|||||||
|
# File: /volume1/np-dms/monitoring/docker-compose.yml
|
||||||
|
# DMS Container v1.8.6: Application name: lcbp3-monitoring
|
||||||
|
# Deploy on: ASUSTOR AS5403T
|
||||||
|
# Services: prometheus, grafana, node-exporter, cadvisor, uptime-kuma, loki, promtail
|
||||||
|
|
||||||
|
x-restart: &restart_policy
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
x-logging: &default_logging
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "5"
|
||||||
|
|
||||||
|
name: lcbp3-monitoring
|
||||||
|
|
||||||
|
networks:
|
||||||
|
lcbp3:
|
||||||
|
external: true
|
||||||
|
|
||||||
|
services:
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 1. Prometheus (Metrics Collection & Storage)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
prometheus:
|
||||||
|
<<: [*restart_policy, *default_logging]
|
||||||
|
image: prom/prometheus:v2.48.0
|
||||||
|
container_name: prometheus
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "1.0"
|
||||||
|
memory: 1G
|
||||||
|
reservations:
|
||||||
|
cpus: "0.25"
|
||||||
|
memory: 256M
|
||||||
|
environment:
|
||||||
|
TZ: "Asia/Bangkok"
|
||||||
|
command:
|
||||||
|
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||||
|
- "--storage.tsdb.path=/prometheus"
|
||||||
|
- "--storage.tsdb.retention.time=30d"
|
||||||
|
- "--web.enable-lifecycle"
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
networks:
|
||||||
|
- lcbp3
|
||||||
|
volumes:
|
||||||
|
- "/volume1/np-dms/monitoring/prometheus/config:/etc/prometheus:ro"
|
||||||
|
- "/volume1/np-dms/monitoring/prometheus/data:/prometheus"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 2. Grafana (Dashboard & Visualization)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
grafana:
|
||||||
|
<<: [*restart_policy, *default_logging]
|
||||||
|
image: grafana/grafana:10.2.2
|
||||||
|
container_name: grafana
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "1.0"
|
||||||
|
memory: 512M
|
||||||
|
reservations:
|
||||||
|
cpus: "0.25"
|
||||||
|
memory: 128M
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
TZ: "Asia/Bangkok"
|
||||||
|
GF_SECURITY_ADMIN_USER: admin
|
||||||
|
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:?GRAFANA_ADMIN_PASSWORD required}
|
||||||
|
GF_SERVER_ROOT_URL: "https://grafana.np-dms.work"
|
||||||
|
GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-piechart-panel
|
||||||
|
ports:
|
||||||
|
- "3003:3000"
|
||||||
|
networks:
|
||||||
|
- lcbp3
|
||||||
|
volumes:
|
||||||
|
- "/volume1/np-dms/monitoring/grafana/data:/var/lib/grafana"
|
||||||
|
depends_on:
|
||||||
|
- prometheus
|
||||||
|
healthcheck:
|
||||||
|
test:
|
||||||
|
[
|
||||||
|
"CMD-SHELL",
|
||||||
|
"wget --spider -q http://localhost:3000/api/health || exit 1",
|
||||||
|
]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 3. Uptime Kuma (Service Availability Monitoring)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
uptime-kuma:
|
||||||
|
<<: [*restart_policy, *default_logging]
|
||||||
|
image: louislam/uptime-kuma:1
|
||||||
|
container_name: uptime-kuma
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "0.5"
|
||||||
|
memory: 256M
|
||||||
|
environment:
|
||||||
|
TZ: "Asia/Bangkok"
|
||||||
|
ports:
|
||||||
|
- "3001:3001"
|
||||||
|
networks:
|
||||||
|
- lcbp3
|
||||||
|
volumes:
|
||||||
|
- "/volume1/np-dms/monitoring/uptime-kuma/data:/app/data"
|
||||||
|
healthcheck:
|
||||||
|
test:
|
||||||
|
["CMD-SHELL", "curl -f http://localhost:3001/api/entry-page || exit 1"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 4. Node Exporter (Host Metrics - ASUSTOR)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
node-exporter:
|
||||||
|
<<: [*restart_policy, *default_logging]
|
||||||
|
image: prom/node-exporter:v1.7.0
|
||||||
|
container_name: node-exporter
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "0.5"
|
||||||
|
memory: 128M
|
||||||
|
environment:
|
||||||
|
TZ: "Asia/Bangkok"
|
||||||
|
command:
|
||||||
|
- "--path.procfs=/host/proc"
|
||||||
|
- "--path.sysfs=/host/sys"
|
||||||
|
- "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
|
||||||
|
ports:
|
||||||
|
- "9100:9100"
|
||||||
|
networks:
|
||||||
|
- lcbp3
|
||||||
|
volumes:
|
||||||
|
- /proc:/host/proc:ro
|
||||||
|
- /sys:/host/sys:ro
|
||||||
|
- /:/rootfs:ro
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9100/metrics"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 5. cAdvisor (Container Metrics - ASUSTOR)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
cadvisor:
|
||||||
|
<<: [*restart_policy, *default_logging]
|
||||||
|
image: gcr.io/cadvisor/cadvisor:v0.47.2
|
||||||
|
container_name: cadvisor
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "0.5"
|
||||||
|
memory: 256M
|
||||||
|
environment:
|
||||||
|
TZ: "Asia/Bangkok"
|
||||||
|
# H4: cAdvisor binds 8080 container map 8088 host
|
||||||
|
ports:
|
||||||
|
- "8088:8080"
|
||||||
|
networks:
|
||||||
|
- lcbp3
|
||||||
|
volumes:
|
||||||
|
- /:/rootfs:ro
|
||||||
|
- /var/run:/var/run:ro
|
||||||
|
- /sys:/sys:ro
|
||||||
|
- /var/lib/docker/:/var/lib/docker:ro
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/healthz"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 6. Loki (Log Aggregation)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
loki:
|
||||||
|
<<: [*restart_policy, *default_logging]
|
||||||
|
image: grafana/loki:2.9.0
|
||||||
|
container_name: loki
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "0.5"
|
||||||
|
memory: 512M
|
||||||
|
environment:
|
||||||
|
TZ: "Asia/Bangkok"
|
||||||
|
command: -config.file=/etc/loki/local-config.yaml
|
||||||
|
ports:
|
||||||
|
- "3100:3100"
|
||||||
|
networks:
|
||||||
|
- lcbp3
|
||||||
|
volumes:
|
||||||
|
- "/volume1/np-dms/monitoring/loki/data:/loki"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/ready"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# 7. Promtail (Log Shipper)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
promtail:
|
||||||
|
<<: [*restart_policy, *default_logging]
|
||||||
|
image: grafana/promtail:2.9.0
|
||||||
|
container_name: promtail
|
||||||
|
# L5: root /var/lib/docker/containers
|
||||||
|
# mount read-only
|
||||||
|
user: "0:0"
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "0.5"
|
||||||
|
memory: 256M
|
||||||
|
environment:
|
||||||
|
TZ: "Asia/Bangkok"
|
||||||
|
command: -config.file=/etc/promtail/promtail-config.yml
|
||||||
|
networks:
|
||||||
|
- lcbp3
|
||||||
|
volumes:
|
||||||
|
- "/volume1/np-dms/monitoring/promtail/config:/etc/promtail:ro"
|
||||||
|
- "/var/run/docker.sock:/var/run/docker.sock:ro"
|
||||||
|
- "/var/lib/docker/containers:/var/lib/docker/containers:ro"
|
||||||
|
depends_on:
|
||||||
|
- loki
|
||||||
+60
@@ -0,0 +1,60 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
# Prometheus self-monitoring (ASUSTOR)
|
||||||
|
- job_name: "prometheus"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:9090"]
|
||||||
|
|
||||||
|
# ============================================
|
||||||
|
# ASUSTOR Metrics (Local)
|
||||||
|
# ============================================
|
||||||
|
|
||||||
|
# Host metrics from Node Exporter (ASUSTOR)
|
||||||
|
- job_name: "asustor-node"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["node-exporter:9100"]
|
||||||
|
labels:
|
||||||
|
host: "asustor"
|
||||||
|
|
||||||
|
# Container metrics from cAdvisor (ASUSTOR)
|
||||||
|
- job_name: "asustor-cadvisor"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["cadvisor:8080"]
|
||||||
|
labels:
|
||||||
|
host: "asustor"
|
||||||
|
|
||||||
|
# ============================================
|
||||||
|
# QNAP Metrics (Remote - 192.168.10.8)
|
||||||
|
# ============================================
|
||||||
|
|
||||||
|
# Host metrics from Node Exporter (QNAP)
|
||||||
|
- job_name: "qnap-node"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["192.168.10.8:9100"]
|
||||||
|
labels:
|
||||||
|
host: "qnap"
|
||||||
|
|
||||||
|
# Container metrics from cAdvisor (QNAP)
|
||||||
|
- job_name: "qnap-cadvisor"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["192.168.10.8:8088"]
|
||||||
|
labels:
|
||||||
|
host: "qnap"
|
||||||
|
|
||||||
|
# Backend NestJS application (QNAP)
|
||||||
|
- job_name: "backend"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["192.168.10.8:3000"]
|
||||||
|
labels:
|
||||||
|
host: "qnap"
|
||||||
|
metrics_path: "/metrics"
|
||||||
|
|
||||||
|
# MariaDB Exporter (optional - QNAP)
|
||||||
|
- job_name: "mariadb"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["192.168.10.8:9104"]
|
||||||
|
labels:
|
||||||
|
host: "qnap"
|
||||||
+66
-60
@@ -1,92 +1,98 @@
|
|||||||
# File: specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.yml
|
# File: specs/04-Infrastructure-OPS/04-00-docker-compose/ASUSTOR/monitoring/prometheus/config/prometheus.yml
|
||||||
# Prometheus Configuration — รัน บน ASUSTOR AS5403T (lcbp3-monitoring stack)
|
# Prometheus Configuration — รัน บน ASUSTOR AS5403T (lcbp3-monitoring stack)
|
||||||
# Change Log:
|
# Change Log:
|
||||||
# - 2026-06-02: Initial config — scrape jobs สำหรับ ASUSTOR local + Desk-5439 remote
|
# - 2026-06-02: Initial config — merge จาก 0.yml (existing) + เพิ่ม ollama-metrics job
|
||||||
#
|
#
|
||||||
# Deploy path: /volume1/np-dms/monitoring/prometheus/config/prometheus.yml
|
# Deploy path: /volume1/np-dms/monitoring/prometheus/config/prometheus.yml
|
||||||
# Mount (read-only): docker-compose volume → /etc/prometheus/prometheus.yml
|
# Mount (read-only): docker-compose volume → /etc/prometheus/prometheus.yml
|
||||||
|
#
|
||||||
|
# NOTE: ไฟล์นี้รวม 0.yml (config เดิมบน ASUSTOR) + job ollama-metrics ใหม่
|
||||||
|
# เมื่อ deploy แล้วให้ลบ 0.yml ออก หรือ rename เป็น 0.yml.bak
|
||||||
|
|
||||||
global:
|
global:
|
||||||
scrape_interval: 15s # ดึง metrics ทุก 15 วินาที (default)
|
scrape_interval: 15s
|
||||||
evaluation_interval: 15s # ประเมิน rules ทุก 15 วินาที
|
evaluation_interval: 15s
|
||||||
scrape_timeout: 10s
|
|
||||||
|
|
||||||
# Labels ที่ติดไปกับทุก time series ที่ scrape ได้
|
|
||||||
external_labels:
|
|
||||||
environment: 'production'
|
|
||||||
cluster: 'lcbp3'
|
|
||||||
|
|
||||||
# ─── Alerting (optional — เชื่อม Alertmanager เมื่อต้องการ) ──────────────────
|
|
||||||
# alerting:
|
|
||||||
# alertmanagers:
|
|
||||||
# - static_configs:
|
|
||||||
# - targets: ['alertmanager:9093']
|
|
||||||
|
|
||||||
# ─── Rules (optional) ────────────────────────────────────────────────────────
|
|
||||||
# rule_files:
|
|
||||||
# - /etc/prometheus/rules/*.yml
|
|
||||||
|
|
||||||
# ─── Scrape Jobs ─────────────────────────────────────────────────────────────
|
# ─── Scrape Jobs ─────────────────────────────────────────────────────────────
|
||||||
scrape_configs:
|
scrape_configs:
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
# 1. Prometheus self-monitoring (ASUSTOR)
|
# 1. Prometheus self-monitoring (ASUSTOR)
|
||||||
# ----------------------------------------------------------------
|
# ----------------------------------------------------------------
|
||||||
- job_name: 'prometheus'
|
- job_name: "prometheus"
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['localhost:9090']
|
- targets: ["localhost:9090"]
|
||||||
labels:
|
|
||||||
host: 'asustor'
|
|
||||||
service: 'prometheus'
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
# ============================================
|
||||||
# 2. Node Exporter — Host metrics ของ ASUSTOR
|
# ASUSTOR Metrics (Local)
|
||||||
# ----------------------------------------------------------------
|
# ============================================
|
||||||
- job_name: 'node-exporter-asustor'
|
|
||||||
|
# Host metrics from Node Exporter (ASUSTOR)
|
||||||
|
- job_name: "asustor-node"
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['node-exporter:9100']
|
- targets: ["node-exporter:9100"]
|
||||||
labels:
|
labels:
|
||||||
host: 'asustor'
|
host: "asustor"
|
||||||
service: 'node-exporter'
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
# Container metrics from cAdvisor (ASUSTOR)
|
||||||
# 3. cAdvisor — Container metrics ของ ASUSTOR
|
- job_name: "asustor-cadvisor"
|
||||||
# ----------------------------------------------------------------
|
|
||||||
- job_name: 'cadvisor-asustor'
|
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['cadvisor:8080']
|
- targets: ["cadvisor:8080"]
|
||||||
labels:
|
labels:
|
||||||
host: 'asustor'
|
host: "asustor"
|
||||||
service: 'cadvisor'
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
# ============================================
|
||||||
# 4. ollama-metrics (NorskHelsenett) — Ollama LLM metrics
|
# QNAP Metrics (Remote - 192.168.10.8)
|
||||||
# รัน บน Desk-5439 (192.168.10.100) ตาม ADR-023A
|
# ============================================
|
||||||
# sidecar expose /metrics บน port 9924
|
|
||||||
|
# Host metrics from Node Exporter (QNAP)
|
||||||
|
- job_name: "qnap-node"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["192.168.10.8:9100"]
|
||||||
|
labels:
|
||||||
|
host: "qnap"
|
||||||
|
|
||||||
|
# Container metrics from cAdvisor (QNAP)
|
||||||
|
- job_name: "qnap-cadvisor"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["192.168.10.8:8088"]
|
||||||
|
labels:
|
||||||
|
host: "qnap"
|
||||||
|
|
||||||
|
# Backend NestJS application (QNAP)
|
||||||
|
- job_name: "backend"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["192.168.10.8:3000"]
|
||||||
|
labels:
|
||||||
|
host: "qnap"
|
||||||
|
metrics_path: "/metrics"
|
||||||
|
|
||||||
|
# MariaDB Exporter (optional - QNAP)
|
||||||
|
- job_name: "mariadb"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["192.168.10.8:9104"]
|
||||||
|
labels:
|
||||||
|
host: "qnap"
|
||||||
|
|
||||||
|
# ============================================
|
||||||
|
# Desk-5439 Metrics (Remote - 192.168.10.100)
|
||||||
|
# ============================================
|
||||||
|
|
||||||
|
# ollama-metrics (NorskHelsenett) — Ollama LLM metrics
|
||||||
|
# sidecar รันบน Desk-5439 ตาม ADR-023A, expose /metrics บน port 9924
|
||||||
#
|
#
|
||||||
# Metrics ที่ collect:
|
# Metrics ที่ collect:
|
||||||
# ollama_prompt_tokens_total — prompt tokens รวม
|
# ollama_prompt_tokens_total — prompt tokens รวม
|
||||||
# ollama_generated_tokens_total — generated tokens รวม
|
# ollama_generated_tokens_total — generated tokens รวม
|
||||||
# ollama_request_duration_seconds — latency histogram
|
# ollama_request_duration_seconds — latency histogram
|
||||||
# ollama_time_per_token_seconds — inference speed
|
# ollama_time_per_token_seconds — inference speed (tok/s)
|
||||||
# ollama_loaded_models — จำนวน model ใน VRAM
|
# ollama_loaded_models — จำนวน model ใน VRAM
|
||||||
# ollama_model_loaded — 1/0 per model
|
# ollama_model_loaded — 1/0 per model
|
||||||
# ollama_model_ram_mb — VRAM usage (MB) per model
|
# ollama_model_ram_mb — VRAM usage (MB) per model
|
||||||
# ----------------------------------------------------------------
|
- job_name: "ollama-metrics"
|
||||||
- job_name: 'ollama-metrics'
|
scrape_interval: 30s
|
||||||
scrape_interval: 30s # Ollama metrics ไม่เปลี่ยนเร็ว — 30s เพียงพอ
|
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['192.168.10.100:9924']
|
- targets: ["192.168.10.100:9924"]
|
||||||
labels:
|
labels:
|
||||||
host: 'desk-5439'
|
host: "desk-5439"
|
||||||
service: 'ollama'
|
service: "ollama"
|
||||||
role: 'ai-inference'
|
role: "ai-inference"
|
||||||
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
# 5. Loki — Log aggregation health (ASUSTOR)
|
|
||||||
# ----------------------------------------------------------------
|
|
||||||
- job_name: 'loki'
|
|
||||||
static_configs:
|
|
||||||
- targets: ['loki:3100']
|
|
||||||
labels:
|
|
||||||
host: 'asustor'
|
|
||||||
service: 'loki'
|
|
||||||
|
|||||||
Reference in New Issue
Block a user