add the ansible playbooks for the monitoring stack
This commit is contained in:
29
ansible/roles/common/tasks/main.yml
Normal file
29
ansible/roles/common/tasks/main.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
---
|
||||
- name: Install common dependencies
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- curl
|
||||
- tar
|
||||
- gzip
|
||||
- ca-certificates
|
||||
state: present
|
||||
|
||||
- name: Create monitoring user
|
||||
ansible.builtin.user:
|
||||
name: monitoring
|
||||
system: true
|
||||
shell: /usr/sbin/nologin
|
||||
home: /var/lib/monitoring
|
||||
create_home: false
|
||||
|
||||
- name: Create common directories
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: monitoring
|
||||
group: monitoring
|
||||
mode: "0755"
|
||||
loop:
|
||||
- /etc/monitoring
|
||||
- /var/lib/monitoring
|
||||
- /var/log/monitoring
|
||||
187
ansible/roles/grafana/files/dashboards/home-infra-overview.json
Normal file
187
ansible/roles/grafana/files/dashboards/home-infra-overview.json
Normal file
@@ -0,0 +1,187 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
|
||||
"id": 1,
|
||||
"title": "Infrastructure Overview",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [
|
||||
{ "options": { "0": { "color": "red", "text": "DOWN" }, "1": { "color": "green", "text": "UP" } }, "type": "value" }
|
||||
],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] }
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 },
|
||||
"id": 2,
|
||||
"options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" },
|
||||
"title": "Targets Up",
|
||||
"type": "stat",
|
||||
"targets": [{ "expr": "count(up == 1)", "refId": "A" }]
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [
|
||||
{ "options": { "0": { "color": "green", "text": "0" } }, "type": "value" }
|
||||
],
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 },
|
||||
"id": 3,
|
||||
"options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" },
|
||||
"title": "Targets Down",
|
||||
"type": "stat",
|
||||
"targets": [{ "expr": "count(up == 0) or vector(0)", "refId": "A" }]
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 },
|
||||
"id": 4,
|
||||
"options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["sum"], "fields": "", "values": false }, "textMode": "auto" },
|
||||
"title": "Log Lines (1h)",
|
||||
"type": "stat",
|
||||
"targets": [{ "expr": "sum(count_over_time({job=~\".+\"}[1h]))", "refId": "A" }]
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 },
|
||||
"id": 5,
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 10 }, { "color": "red", "value": 50 }] }
|
||||
}
|
||||
},
|
||||
"options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["sum"], "fields": "", "values": false }, "textMode": "auto" },
|
||||
"title": "Error Logs (1h)",
|
||||
"type": "stat",
|
||||
"targets": [{ "expr": "sum(count_over_time({severity=~\"err|error|crit|alert|emerg\"}[1h])) or vector(0)", "refId": "A" }]
|
||||
},
|
||||
{
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
|
||||
"id": 10,
|
||||
"title": "Network Devices",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": { "lineWidth": 1, "fillOpacity": 10 },
|
||||
"unit": "bps"
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
|
||||
"id": 11,
|
||||
"options": { "legend": { "displayMode": "list", "placement": "bottom" } },
|
||||
"title": "Network Interface Traffic",
|
||||
"type": "timeseries",
|
||||
"targets": [
|
||||
{ "expr": "rate(ifHCInOctets{job=\"snmp\"}[5m]) * 8", "legendFormat": "{{device}} - {{ifDescr}} In", "refId": "A" },
|
||||
{ "expr": "rate(ifHCOutOctets{job=\"snmp\"}[5m]) * 8", "legendFormat": "{{device}} - {{ifDescr}} Out", "refId": "B" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [
|
||||
{ "options": { "1": { "color": "green", "text": "Up" }, "2": { "color": "red", "text": "Down" } }, "type": "value" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
|
||||
"id": 12,
|
||||
"options": { "showHeader": true },
|
||||
"title": "Interface Status",
|
||||
"type": "table",
|
||||
"targets": [{ "expr": "ifOperStatus{job=\"snmp\"}", "format": "table", "instant": true, "refId": "A" }],
|
||||
"transformations": [
|
||||
{ "id": "organize", "options": { "excludeByName": { "Time": true, "__name__": true, "job": true }, "renameByName": { "device": "Device", "ifDescr": "Interface", "Value": "Status" } } }
|
||||
]
|
||||
},
|
||||
{
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
|
||||
"id": 20,
|
||||
"title": "Proxmox / VMs",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
|
||||
},
|
||||
"gridPos": { "h": 6, "w": 8, "x": 0, "y": 15 },
|
||||
"id": 21,
|
||||
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
|
||||
"title": "CPU Usage",
|
||||
"type": "gauge",
|
||||
"targets": [{ "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\",instance=~\"proxmox.*|nfs.*\"}[5m])) by (instance)", "legendFormat": "{{instance}}", "refId": "A" }]
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
|
||||
},
|
||||
"gridPos": { "h": 6, "w": 8, "x": 8, "y": 15 },
|
||||
"id": 22,
|
||||
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
|
||||
"title": "Memory Usage",
|
||||
"type": "gauge",
|
||||
"targets": [{ "expr": "1 - (node_memory_MemAvailable_bytes{instance=~\"proxmox.*|nfs.*\"} / node_memory_MemTotal_bytes{instance=~\"proxmox.*|nfs.*\"})", "legendFormat": "{{instance}}", "refId": "A" }]
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
|
||||
},
|
||||
"gridPos": { "h": 6, "w": 8, "x": 16, "y": 15 },
|
||||
"id": 23,
|
||||
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
|
||||
"title": "Disk Usage",
|
||||
"type": "gauge",
|
||||
"targets": [{ "expr": "1 - (node_filesystem_avail_bytes{instance=~\"proxmox.*|nfs.*\",mountpoint=\"/\"} / node_filesystem_size_bytes{instance=~\"proxmox.*|nfs.*\",mountpoint=\"/\"})", "legendFormat": "{{instance}}", "refId": "A" }]
|
||||
},
|
||||
{
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 },
|
||||
"id": 30,
|
||||
"title": "Recent Logs",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 22 },
|
||||
"id": 31,
|
||||
"options": { "showTime": true, "showLabels": true, "showCommonLabels": false, "wrapLogMessage": true, "prettifyLogMessage": false, "enableLogDetails": true, "sortOrder": "Descending" },
|
||||
"title": "All Logs",
|
||||
"type": "logs",
|
||||
"targets": [{ "expr": "{job=~\".+\"}", "refId": "A" }]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["home-infra"],
|
||||
"templating": { "list": [] },
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Home Infrastructure Overview",
|
||||
"uid": "home-infra-overview",
|
||||
"version": 1
|
||||
}
|
||||
5
ansible/roles/grafana/handlers/main.yml
Normal file
5
ansible/roles/grafana/handlers/main.yml
Normal file
@@ -0,0 +1,5 @@
|
||||
---
|
||||
- name: Restart grafana
|
||||
ansible.builtin.systemd:
|
||||
name: grafana-server
|
||||
state: restarted
|
||||
77
ansible/roles/grafana/tasks/main.yml
Normal file
77
ansible/roles/grafana/tasks/main.yml
Normal file
@@ -0,0 +1,77 @@
|
||||
---
|
||||
- name: Add Grafana APT key
|
||||
ansible.builtin.get_url:
|
||||
url: https://apt.grafana.com/gpg.key
|
||||
dest: /etc/apt/keyrings/grafana.asc
|
||||
mode: "0644"
|
||||
|
||||
- name: Add Grafana APT repository
|
||||
ansible.builtin.apt_repository:
|
||||
repo: "deb [signed-by=/etc/apt/keyrings/grafana.asc] https://apt.grafana.com stable main"
|
||||
state: present
|
||||
filename: grafana
|
||||
|
||||
- name: Install Grafana
|
||||
ansible.builtin.apt:
|
||||
name: grafana
|
||||
state: present
|
||||
update_cache: true
|
||||
notify: Restart grafana
|
||||
|
||||
- name: Create Grafana provisioning directories
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: grafana
|
||||
group: grafana
|
||||
mode: "0755"
|
||||
loop:
|
||||
- /etc/grafana/provisioning/datasources
|
||||
- /etc/grafana/provisioning/dashboards
|
||||
- /var/lib/grafana/dashboards
|
||||
|
||||
- name: Deploy Grafana datasources
|
||||
ansible.builtin.template:
|
||||
src: datasources.yml.j2
|
||||
dest: /etc/grafana/provisioning/datasources/datasources.yml
|
||||
owner: grafana
|
||||
group: grafana
|
||||
mode: "0640"
|
||||
notify: Restart grafana
|
||||
|
||||
- name: Deploy Grafana dashboard provisioning
|
||||
ansible.builtin.template:
|
||||
src: dashboards.yml.j2
|
||||
dest: /etc/grafana/provisioning/dashboards/dashboards.yml
|
||||
owner: grafana
|
||||
group: grafana
|
||||
mode: "0640"
|
||||
notify: Restart grafana
|
||||
|
||||
- name: Deploy default dashboards
|
||||
ansible.builtin.copy:
|
||||
src: "{{ item }}"
|
||||
dest: /var/lib/grafana/dashboards/
|
||||
owner: grafana
|
||||
group: grafana
|
||||
mode: "0644"
|
||||
loop: "{{ lookup('fileglob', 'files/dashboards/*.json', wantlist=True) }}"
|
||||
notify: Restart grafana
|
||||
ignore_errors: true # OK if no dashboards yet
|
||||
|
||||
- name: Configure Grafana
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/grafana/grafana.ini
|
||||
regexp: "{{ item.regexp }}"
|
||||
line: "{{ item.line }}"
|
||||
state: present
|
||||
loop:
|
||||
- { regexp: '^;?http_port', line: 'http_port = 3000' }
|
||||
- { regexp: '^;?http_addr', line: 'http_addr = 0.0.0.0' }
|
||||
notify: Restart grafana
|
||||
|
||||
- name: Enable and start Grafana
|
||||
ansible.builtin.systemd:
|
||||
name: grafana-server
|
||||
enabled: true
|
||||
state: started
|
||||
13
ansible/roles/grafana/templates/dashboards.yml.j2
Normal file
13
ansible/roles/grafana/templates/dashboards.yml.j2
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'Home Infrastructure'
|
||||
orgId: 1
|
||||
folder: 'Home Infra'
|
||||
folderUid: 'home-infra'
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 30
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
34
ansible/roles/grafana/templates/datasources.yml.j2
Normal file
34
ansible/roles/grafana/templates/datasources.yml.j2
Normal file
@@ -0,0 +1,34 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
# Local Prometheus (RPi - infrastructure metrics)
|
||||
- name: Prometheus-Infra
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://localhost:9090
|
||||
isDefault: true
|
||||
editable: false
|
||||
jsonData:
|
||||
timeInterval: "30s"
|
||||
httpMethod: POST
|
||||
|
||||
# Cluster Prometheus (Talos - Kubernetes metrics)
|
||||
- name: Prometheus-Cluster
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: {{ prometheus_cluster_url }}
|
||||
isDefault: false
|
||||
editable: false
|
||||
jsonData:
|
||||
timeInterval: "30s"
|
||||
httpMethod: POST
|
||||
|
||||
# Loki (Talos cluster - centralized logs)
|
||||
- name: Loki
|
||||
type: loki
|
||||
access: proxy
|
||||
url: {{ loki_url }}
|
||||
isDefault: false
|
||||
editable: false
|
||||
jsonData:
|
||||
maxLines: 1000
|
||||
9
ansible/roles/prometheus/handlers/main.yml
Normal file
9
ansible/roles/prometheus/handlers/main.yml
Normal file
@@ -0,0 +1,9 @@
|
||||
---
|
||||
- name: Reload systemd
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: Restart prometheus
|
||||
ansible.builtin.systemd:
|
||||
name: prometheus
|
||||
state: restarted
|
||||
82
ansible/roles/prometheus/tasks/main.yml
Normal file
82
ansible/roles/prometheus/tasks/main.yml
Normal file
@@ -0,0 +1,82 @@
|
||||
---
|
||||
- name: Create Prometheus directories
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: monitoring
|
||||
group: monitoring
|
||||
mode: "0755"
|
||||
loop:
|
||||
- /etc/prometheus
|
||||
- /var/lib/prometheus
|
||||
|
||||
- name: Check if Prometheus is installed
|
||||
ansible.builtin.stat:
|
||||
path: /usr/local/bin/prometheus
|
||||
register: prometheus_binary
|
||||
|
||||
- name: Get installed Prometheus version
|
||||
ansible.builtin.command: /usr/local/bin/prometheus --version
|
||||
register: prometheus_installed_version
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
when: prometheus_binary.stat.exists
|
||||
|
||||
- name: Download Prometheus
|
||||
ansible.builtin.get_url:
|
||||
url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}.tar.gz"
|
||||
dest: "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
|
||||
mode: "0644"
|
||||
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
|
||||
|
||||
- name: Extract Prometheus
|
||||
ansible.builtin.unarchive:
|
||||
src: "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
|
||||
dest: /tmp
|
||||
remote_src: true
|
||||
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
|
||||
|
||||
- name: Install Prometheus binaries
|
||||
ansible.builtin.copy:
|
||||
src: "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}/{{ item }}"
|
||||
dest: "/usr/local/bin/{{ item }}"
|
||||
mode: "0755"
|
||||
remote_src: true
|
||||
loop:
|
||||
- prometheus
|
||||
- promtool
|
||||
notify: Restart prometheus
|
||||
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
|
||||
|
||||
- name: Deploy Prometheus configuration
|
||||
ansible.builtin.template:
|
||||
src: prometheus.yml.j2
|
||||
dest: /etc/prometheus/prometheus.yml
|
||||
owner: monitoring
|
||||
group: monitoring
|
||||
mode: "0644"
|
||||
notify: Restart prometheus
|
||||
|
||||
- name: Deploy Prometheus systemd service
|
||||
ansible.builtin.template:
|
||||
src: prometheus.service.j2
|
||||
dest: /etc/systemd/system/prometheus.service
|
||||
mode: "0644"
|
||||
notify:
|
||||
- Reload systemd
|
||||
- Restart prometheus
|
||||
|
||||
- name: Enable and start Prometheus
|
||||
ansible.builtin.systemd:
|
||||
name: prometheus
|
||||
enabled: true
|
||||
state: started
|
||||
daemon_reload: true
|
||||
|
||||
- name: Clean up downloaded files
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
|
||||
- "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}"
|
||||
30
ansible/roles/prometheus/templates/prometheus.service.j2
Normal file
30
ansible/roles/prometheus/templates/prometheus.service.j2
Normal file
@@ -0,0 +1,30 @@
|
||||
[Unit]
|
||||
Description=Prometheus Monitoring System
|
||||
Documentation=https://prometheus.io/docs/
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=monitoring
|
||||
Group=monitoring
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
ExecStart=/usr/local/bin/prometheus \
|
||||
--config.file=/etc/prometheus/prometheus.yml \
|
||||
--storage.tsdb.path=/var/lib/prometheus \
|
||||
--storage.tsdb.retention.time=15d \
|
||||
--web.listen-address=0.0.0.0:9090 \
|
||||
--web.enable-lifecycle \
|
||||
--log.level=info
|
||||
|
||||
SyslogIdentifier=prometheus
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
|
||||
# Hardening
|
||||
NoNewPrivileges=true
|
||||
ProtectSystem=full
|
||||
ProtectHome=true
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
68
ansible/roles/prometheus/templates/prometheus.yml.j2
Normal file
68
ansible/roles/prometheus/templates/prometheus.yml.j2
Normal file
@@ -0,0 +1,68 @@
|
||||
global:
|
||||
scrape_interval: 30s
|
||||
evaluation_interval: 30s
|
||||
external_labels:
|
||||
monitor: 'home-infra'
|
||||
source: 'rpi'
|
||||
|
||||
scrape_configs:
|
||||
# Self-monitoring
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
labels:
|
||||
instance: 'rpi-prometheus'
|
||||
|
||||
# SNMP targets (network devices)
|
||||
{% if snmp_targets is defined and snmp_targets | length > 0 %}
|
||||
- job_name: 'snmp'
|
||||
scrape_interval: 60s
|
||||
scrape_timeout: 30s
|
||||
static_configs:
|
||||
{% for target in snmp_targets %}
|
||||
- targets: ['{{ target.ip }}']
|
||||
labels:
|
||||
device: '{{ target.name }}'
|
||||
{% endfor %}
|
||||
metrics_path: /snmp
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: localhost:9116
|
||||
params:
|
||||
module: [if_mib] # Default module, can be overridden per-target
|
||||
{% endif %}
|
||||
|
||||
# Node exporter targets (VMs with node_exporter)
|
||||
{% if node_exporter_targets is defined and node_exporter_targets | length > 0 %}
|
||||
- job_name: 'node'
|
||||
static_configs:
|
||||
{% for target in node_exporter_targets %}
|
||||
- targets: ['{{ target.ip }}:{{ target.port | default(9100) }}']
|
||||
labels:
|
||||
instance: '{{ target.name }}'
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
# Proxmox PVE exporter
|
||||
{% if proxmox_targets is defined and proxmox_targets | length > 0 %}
|
||||
- job_name: 'proxmox'
|
||||
scrape_interval: 60s
|
||||
static_configs:
|
||||
{% for target in proxmox_targets %}
|
||||
- targets: ['{{ target.ip }}:{{ target.port | default(9221) }}']
|
||||
labels:
|
||||
instance: '{{ target.name }}'
|
||||
{% endfor %}
|
||||
metrics_path: /pve
|
||||
params:
|
||||
module: [default]
|
||||
{% endif %}
|
||||
|
||||
# SNMP exporter self-metrics
|
||||
- job_name: 'snmp-exporter'
|
||||
static_configs:
|
||||
- targets: ['localhost:9116']
|
||||
9
ansible/roles/promtail/handlers/main.yml
Normal file
9
ansible/roles/promtail/handlers/main.yml
Normal file
@@ -0,0 +1,9 @@
|
||||
---
|
||||
- name: Reload systemd
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: Restart promtail
|
||||
ansible.builtin.systemd:
|
||||
name: promtail
|
||||
state: restarted
|
||||
78
ansible/roles/promtail/tasks/main.yml
Normal file
78
ansible/roles/promtail/tasks/main.yml
Normal file
@@ -0,0 +1,78 @@
|
||||
---
|
||||
- name: Create Promtail directories
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: monitoring
|
||||
group: monitoring
|
||||
mode: "0755"
|
||||
loop:
|
||||
- /etc/promtail
|
||||
- /var/lib/promtail
|
||||
|
||||
- name: Check if Promtail is installed
|
||||
ansible.builtin.stat:
|
||||
path: /usr/local/bin/promtail
|
||||
register: promtail_binary
|
||||
|
||||
- name: Download Promtail
|
||||
ansible.builtin.get_url:
|
||||
url: "https://github.com/grafana/loki/releases/download/v{{ promtail_version }}/promtail-linux-{{ go_arch }}.zip"
|
||||
dest: "/tmp/promtail-{{ promtail_version }}.zip"
|
||||
mode: "0644"
|
||||
when: not promtail_binary.stat.exists
|
||||
|
||||
- name: Install unzip
|
||||
ansible.builtin.apt:
|
||||
name: unzip
|
||||
state: present
|
||||
when: ansible_os_family == "Debian"
|
||||
|
||||
- name: Extract Promtail
|
||||
ansible.builtin.unarchive:
|
||||
src: "/tmp/promtail-{{ promtail_version }}.zip"
|
||||
dest: /tmp
|
||||
remote_src: true
|
||||
when: not promtail_binary.stat.exists
|
||||
|
||||
- name: Install Promtail binary
|
||||
ansible.builtin.copy:
|
||||
src: /tmp/promtail-linux-{{ go_arch }}
|
||||
dest: /usr/local/bin/promtail
|
||||
mode: "0755"
|
||||
remote_src: true
|
||||
notify: Restart promtail
|
||||
when: not promtail_binary.stat.exists
|
||||
|
||||
- name: Deploy Promtail configuration
|
||||
ansible.builtin.template:
|
||||
src: promtail.yml.j2
|
||||
dest: /etc/promtail/promtail.yml
|
||||
owner: monitoring
|
||||
group: monitoring
|
||||
mode: "0644"
|
||||
notify: Restart promtail
|
||||
|
||||
- name: Deploy Promtail systemd service
|
||||
ansible.builtin.template:
|
||||
src: promtail.service.j2
|
||||
dest: /etc/systemd/system/promtail.service
|
||||
mode: "0644"
|
||||
notify:
|
||||
- Reload systemd
|
||||
- Restart promtail
|
||||
|
||||
- name: Enable and start Promtail
|
||||
ansible.builtin.systemd:
|
||||
name: promtail
|
||||
enabled: true
|
||||
state: started
|
||||
daemon_reload: true
|
||||
|
||||
- name: Clean up downloaded files
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- "/tmp/promtail-{{ promtail_version }}.zip"
|
||||
- "/tmp/promtail-linux-{{ go_arch }}"
|
||||
24
ansible/roles/promtail/templates/promtail.service.j2
Normal file
24
ansible/roles/promtail/templates/promtail.service.j2
Normal file
@@ -0,0 +1,24 @@
|
||||
[Unit]
|
||||
Description=Promtail Log Collector
|
||||
Documentation=https://grafana.com/docs/loki/latest/clients/promtail/
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
Group=root
|
||||
ExecStart=/usr/local/bin/promtail \
|
||||
-config.file=/etc/promtail/promtail.yml \
|
||||
-config.expand-env=true
|
||||
|
||||
SyslogIdentifier=promtail
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
|
||||
# Need root for syslog port 514 and journal access
|
||||
# Can use CAP_NET_BIND_SERVICE instead if preferred
|
||||
AmbientCapabilities=CAP_NET_BIND_SERVICE
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
56
ansible/roles/promtail/templates/promtail.yml.j2
Normal file
56
ansible/roles/promtail/templates/promtail.yml.j2
Normal file
@@ -0,0 +1,56 @@
|
||||
server:
|
||||
http_listen_port: 9080
|
||||
grpc_listen_port: 0
|
||||
|
||||
positions:
|
||||
filename: /var/lib/promtail/positions.yaml
|
||||
|
||||
clients:
|
||||
- url: {{ loki_url }}/loki/api/v1/push
|
||||
tenant_id: home-infra
|
||||
batchwait: 1s
|
||||
batchsize: 1048576
|
||||
timeout: 10s
|
||||
|
||||
scrape_configs:
|
||||
# Syslog listener for network devices
|
||||
- job_name: syslog
|
||||
syslog:
|
||||
listen_address: 0.0.0.0:514
|
||||
listen_protocol: udp
|
||||
idle_timeout: 60s
|
||||
label_structured_data: true
|
||||
labels:
|
||||
job: syslog
|
||||
source: network-devices
|
||||
relabel_configs:
|
||||
- source_labels: ['__syslog_message_hostname']
|
||||
target_label: 'host'
|
||||
- source_labels: ['__syslog_message_severity']
|
||||
target_label: 'severity'
|
||||
- source_labels: ['__syslog_message_facility']
|
||||
target_label: 'facility'
|
||||
- source_labels: ['__syslog_message_app_name']
|
||||
target_label: 'app'
|
||||
pipeline_stages:
|
||||
- match:
|
||||
selector: '{job="syslog"}'
|
||||
stages:
|
||||
# Extract common patterns from network device logs
|
||||
- regex:
|
||||
expression: '(?P<src_ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
|
||||
- labels:
|
||||
src_ip:
|
||||
|
||||
# Local system journal (RPi logs)
|
||||
- job_name: journal
|
||||
journal:
|
||||
max_age: 12h
|
||||
labels:
|
||||
job: systemd-journal
|
||||
host: rpi
|
||||
relabel_configs:
|
||||
- source_labels: ['__journal__systemd_unit']
|
||||
target_label: 'unit'
|
||||
- source_labels: ['__journal_priority_keyword']
|
||||
target_label: 'severity'
|
||||
9
ansible/roles/snmp_exporter/handlers/main.yml
Normal file
9
ansible/roles/snmp_exporter/handlers/main.yml
Normal file
@@ -0,0 +1,9 @@
|
||||
---
|
||||
- name: Reload systemd
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: Restart snmp_exporter
|
||||
ansible.builtin.systemd:
|
||||
name: snmp_exporter
|
||||
state: restarted
|
||||
71
ansible/roles/snmp_exporter/tasks/main.yml
Normal file
71
ansible/roles/snmp_exporter/tasks/main.yml
Normal file
@@ -0,0 +1,71 @@
|
||||
---
|
||||
- name: Create SNMP exporter directory
|
||||
ansible.builtin.file:
|
||||
path: /etc/snmp_exporter
|
||||
state: directory
|
||||
owner: monitoring
|
||||
group: monitoring
|
||||
mode: "0755"
|
||||
|
||||
- name: Check if SNMP exporter is installed
|
||||
ansible.builtin.stat:
|
||||
path: /usr/local/bin/snmp_exporter
|
||||
register: snmp_exporter_binary
|
||||
|
||||
- name: Download SNMP exporter
|
||||
ansible.builtin.get_url:
|
||||
url: "https://github.com/prometheus/snmp_exporter/releases/download/v{{ snmp_exporter_version }}/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}.tar.gz"
|
||||
dest: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.tar.gz"
|
||||
mode: "0644"
|
||||
when: not snmp_exporter_binary.stat.exists
|
||||
|
||||
- name: Extract SNMP exporter
|
||||
ansible.builtin.unarchive:
|
||||
src: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.tar.gz"
|
||||
dest: /tmp
|
||||
remote_src: true
|
||||
when: not snmp_exporter_binary.stat.exists
|
||||
|
||||
- name: Install SNMP exporter binary
|
||||
ansible.builtin.copy:
|
||||
src: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}/snmp_exporter"
|
||||
dest: /usr/local/bin/snmp_exporter
|
||||
mode: "0755"
|
||||
remote_src: true
|
||||
notify: Restart snmp_exporter
|
||||
when: not snmp_exporter_binary.stat.exists
|
||||
|
||||
- name: Install default SNMP exporter config
|
||||
ansible.builtin.copy:
|
||||
src: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}/snmp.yml"
|
||||
dest: /etc/snmp_exporter/snmp.yml
|
||||
owner: monitoring
|
||||
group: monitoring
|
||||
mode: "0644"
|
||||
remote_src: true
|
||||
notify: Restart snmp_exporter
|
||||
when: not snmp_exporter_binary.stat.exists
|
||||
|
||||
- name: Deploy SNMP exporter systemd service
|
||||
ansible.builtin.template:
|
||||
src: snmp_exporter.service.j2
|
||||
dest: /etc/systemd/system/snmp_exporter.service
|
||||
mode: "0644"
|
||||
notify:
|
||||
- Reload systemd
|
||||
- Restart snmp_exporter
|
||||
|
||||
- name: Enable and start SNMP exporter
|
||||
ansible.builtin.systemd:
|
||||
name: snmp_exporter
|
||||
enabled: true
|
||||
state: started
|
||||
daemon_reload: true
|
||||
|
||||
- name: Clean up downloaded files
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- "/tmp/snmp_exporter-{{ snmp_exporter_version }}.tar.gz"
|
||||
- "/tmp/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}"
|
||||
@@ -0,0 +1,25 @@
|
||||
[Unit]
|
||||
Description=SNMP Exporter for Prometheus
|
||||
Documentation=https://github.com/prometheus/snmp_exporter
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=monitoring
|
||||
Group=monitoring
|
||||
ExecStart=/usr/local/bin/snmp_exporter \
|
||||
--config.file=/etc/snmp_exporter/snmp.yml \
|
||||
--web.listen-address=0.0.0.0:9116 \
|
||||
--log.level=info
|
||||
|
||||
SyslogIdentifier=snmp_exporter
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
|
||||
NoNewPrivileges=true
|
||||
ProtectSystem=full
|
||||
ProtectHome=true
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
Reference in New Issue
Block a user