add the ansible playbooks for the monitoring stack

This commit is contained in:
tsvetkov
2026-02-27 01:28:08 +00:00
commit d45bbef509
21 changed files with 1017 additions and 0 deletions

View File

@@ -0,0 +1,187 @@
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"id": 1,
"title": "Infrastructure Overview",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "0": { "color": "red", "text": "DOWN" }, "1": { "color": "green", "text": "UP" } }, "type": "value" }
],
"thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] }
}
},
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 },
"id": 2,
"options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Targets Up",
"type": "stat",
"targets": [{ "expr": "count(up == 1)", "refId": "A" }]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "0": { "color": "green", "text": "0" } }, "type": "value" }
],
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }
}
},
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 },
"id": 3,
"options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Targets Down",
"type": "stat",
"targets": [{ "expr": "count(up == 0) or vector(0)", "refId": "A" }]
},
{
"datasource": { "type": "loki", "uid": "loki" },
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 },
"id": 4,
"options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["sum"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Log Lines (1h)",
"type": "stat",
"targets": [{ "expr": "sum(count_over_time({job=~\".+\"}[1h]))", "refId": "A" }]
},
{
"datasource": { "type": "loki", "uid": "loki" },
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 },
"id": 5,
"fieldConfig": {
"defaults": {
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 10 }, { "color": "red", "value": 50 }] }
}
},
"options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["sum"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Error Logs (1h)",
"type": "stat",
"targets": [{ "expr": "sum(count_over_time({severity=~\"err|error|crit|alert|emerg\"}[1h])) or vector(0)", "refId": "A" }]
},
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
"id": 10,
"title": "Network Devices",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"custom": { "lineWidth": 1, "fillOpacity": 10 },
"unit": "bps"
}
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
"id": 11,
"options": { "legend": { "displayMode": "list", "placement": "bottom" } },
"title": "Network Interface Traffic",
"type": "timeseries",
"targets": [
{ "expr": "rate(ifHCInOctets{job=\"snmp\"}[5m]) * 8", "legendFormat": "{{device}} - {{ifDescr}} In", "refId": "A" },
{ "expr": "rate(ifHCOutOctets{job=\"snmp\"}[5m]) * 8", "legendFormat": "{{device}} - {{ifDescr}} Out", "refId": "B" }
]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "1": { "color": "green", "text": "Up" }, "2": { "color": "red", "text": "Down" } }, "type": "value" }
]
}
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
"id": 12,
"options": { "showHeader": true },
"title": "Interface Status",
"type": "table",
"targets": [{ "expr": "ifOperStatus{job=\"snmp\"}", "format": "table", "instant": true, "refId": "A" }],
"transformations": [
{ "id": "organize", "options": { "excludeByName": { "Time": true, "__name__": true, "job": true }, "renameByName": { "device": "Device", "ifDescr": "Interface", "Value": "Status" } } }
]
},
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
"id": 20,
"title": "Proxmox / VMs",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
},
"gridPos": { "h": 6, "w": 8, "x": 0, "y": 15 },
"id": 21,
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
"title": "CPU Usage",
"type": "gauge",
"targets": [{ "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\",instance=~\"proxmox.*|nfs.*\"}[5m])) by (instance)", "legendFormat": "{{instance}}", "refId": "A" }]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
},
"gridPos": { "h": 6, "w": 8, "x": 8, "y": 15 },
"id": 22,
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
"title": "Memory Usage",
"type": "gauge",
"targets": [{ "expr": "1 - (node_memory_MemAvailable_bytes{instance=~\"proxmox.*|nfs.*\"} / node_memory_MemTotal_bytes{instance=~\"proxmox.*|nfs.*\"})", "legendFormat": "{{instance}}", "refId": "A" }]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
},
"gridPos": { "h": 6, "w": 8, "x": 16, "y": 15 },
"id": 23,
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
"title": "Disk Usage",
"type": "gauge",
"targets": [{ "expr": "1 - (node_filesystem_avail_bytes{instance=~\"proxmox.*|nfs.*\",mountpoint=\"/\"} / node_filesystem_size_bytes{instance=~\"proxmox.*|nfs.*\",mountpoint=\"/\"})", "legendFormat": "{{instance}}", "refId": "A" }]
},
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 },
"id": 30,
"title": "Recent Logs",
"type": "row"
},
{
"datasource": { "type": "loki", "uid": "loki" },
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 22 },
"id": 31,
"options": { "showTime": true, "showLabels": true, "showCommonLabels": false, "wrapLogMessage": true, "prettifyLogMessage": false, "enableLogDetails": true, "sortOrder": "Descending" },
"title": "All Logs",
"type": "logs",
"targets": [{ "expr": "{job=~\".+\"}", "refId": "A" }]
}
],
"refresh": "30s",
"schemaVersion": 38,
"style": "dark",
"tags": ["home-infra"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "",
"title": "Home Infrastructure Overview",
"uid": "home-infra-overview",
"version": 1
}