add the ansible playbooks for the monitoring stack

This commit is contained in:
tsvetkov
2026-02-27 01:28:08 +00:00
commit d45bbef509
21 changed files with 1017 additions and 0 deletions

13
README.md Normal file
View File

@@ -0,0 +1,13 @@
The ansible playbook found here is used to deploy a monitoring stack for a homelab.
Detailed instructions about the project can be found in it's [own repository](https://git.96-fromsofia.net/k8s/monitoring-stack)
- playbook.yml is deploying the monitoring stack outside of the talos cluster
- kubernetes-playbook.yml is deploying the monitoring stack inside of the talos cluster
To run a playbook:
```
ansible-playbook -i inventory.yml playbook.yml
```
Both playbooks can be ran and both stacks are designed to coexist. This is so the monitoring of the network layer and physical hosts is still active and visible even when the talos cluster itself is not running.

44
ansible/inventory.yml Normal file
View File

@@ -0,0 +1,44 @@
all:
children:
monitoring:
hosts:
rpi:
ansible_host: 192.168.1.100 # Change to your RPi IP
ansible_user: pi # Change if different
ansible_become: true
# Configuration variables
prometheus_version: "2.48.0"
promtail_version: "2.9.2"
grafana_version: "10.2.2"
snmp_exporter_version: "0.24.1"
# Loki endpoint (in Talos cluster)
loki_url: "http://192.168.1.200:30100" # Change to your Talos node IP
# Prometheus cluster endpoint (for Grafana datasource)
prometheus_cluster_url: "http://192.168.1.200:30090" # Change to your Talos node IP
# Network device IPs for SNMP
snmp_targets:
- name: "router"
ip: "192.168.1.1"
module: "if_mib"
- name: "modem"
ip: "192.168.1.2"
module: "if_mib"
# Targets with node_exporter
node_exporter_targets:
- name: "proxmox"
ip: "192.168.1.10"
port: 9100
- name: "nfs"
ip: "192.168.1.11"
port: 9100
# Proxmox PVE exporter target
proxmox_targets:
- name: "proxmox"
ip: "192.168.1.10"
port: 9221

View File

@@ -0,0 +1,117 @@
---
# Deploy monitoring stack to Talos cluster via Ansible
#
# Prerequisites:
# - kubectl configured with access to your Talos cluster
# - kubernetes.core collection installed: ansible-galaxy collection install kubernetes.core
#
# Usage:
# ansible-playbook -i inventory.yml kubernetes-playbook.yml
#
# Or with a specific kubeconfig:
# ansible-playbook -i inventory.yml kubernetes-playbook.yml -e kubeconfig_path=~/.kube/talos-config
- name: Deploy monitoring stack to Kubernetes
hosts: localhost
connection: local
gather_facts: false
vars:
kubeconfig_path: "{{ lookup('env', 'KUBECONFIG') | default('~/.kube/config', true) }}"
manifests_dir: "{{ playbook_dir }}/kubernetes"
tasks:
- name: Create monitoring namespace
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ manifests_dir }}/namespace.yaml"
- name: Deploy Prometheus
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ item }}"
loop:
- "{{ manifests_dir }}/prometheus/rbac.yaml"
- "{{ manifests_dir }}/prometheus/configmap.yaml"
- "{{ manifests_dir }}/prometheus/deployment.yaml"
- "{{ manifests_dir }}/prometheus/service.yaml"
- name: Wait for Prometheus to be ready
kubernetes.core.k8s_info:
kubeconfig: "{{ kubeconfig_path }}"
kind: Deployment
name: prometheus
namespace: monitoring
register: prometheus_deployment
until: prometheus_deployment.resources[0].status.readyReplicas | default(0) >= 1
retries: 30
delay: 10
- name: Deploy Loki
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ item }}"
loop:
- "{{ manifests_dir }}/loki/configmap.yaml"
- "{{ manifests_dir }}/loki/deployment.yaml"
- "{{ manifests_dir }}/loki/service.yaml"
- name: Wait for Loki to be ready
kubernetes.core.k8s_info:
kubeconfig: "{{ kubeconfig_path }}"
kind: Deployment
name: loki
namespace: monitoring
register: loki_deployment
until: loki_deployment.resources[0].status.readyReplicas | default(0) >= 1
retries: 30
delay: 10
- name: Deploy Promtail
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ item }}"
loop:
- "{{ manifests_dir }}/promtail/rbac.yaml"
- "{{ manifests_dir }}/promtail/configmap.yaml"
- "{{ manifests_dir }}/promtail/daemonset.yaml"
- name: Deploy Node Exporter
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ manifests_dir }}/node-exporter/daemonset.yaml"
- name: Deploy Kube State Metrics
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ item }}"
loop:
- "{{ manifests_dir }}/kube-state-metrics/rbac.yaml"
- "{{ manifests_dir }}/kube-state-metrics/deployment.yaml"
- name: Get cluster node IPs
kubernetes.core.k8s_info:
kubeconfig: "{{ kubeconfig_path }}"
kind: Node
register: cluster_nodes
- name: Display access information
ansible.builtin.debug:
msg:
- "Monitoring stack deployed successfully!"
- ""
- "Prometheus: http://<node-ip>:30090"
- "Loki: http://<node-ip>:30100"
- ""
- "Node IPs:"
- "{{ cluster_nodes.resources | map(attribute='status.addresses') | flatten | selectattr('type', 'equalto', 'InternalIP') | map(attribute='address') | list }}"
- ""
- "Update your RPi inventory.yml with one of these IPs for:"
- " loki_url: http://<node-ip>:30100"
- " prometheus_cluster_url: http://<node-ip>:30090"

37
ansible/playbook.yml Normal file
View File

@@ -0,0 +1,37 @@
---
- name: Deploy monitoring stack on RPi
hosts: monitoring
become: true
vars:
# Architecture detection for ARM
arch_map:
armv7l: "armv7"
aarch64: "arm64"
x86_64: "amd64"
pre_tasks:
- name: Gather architecture
ansible.builtin.set_fact:
go_arch: "{{ arch_map[ansible_architecture] | default('arm64') }}"
- name: Update apt cache
ansible.builtin.apt:
update_cache: true
cache_valid_time: 3600
when: ansible_os_family == "Debian"
roles:
- common
- prometheus
- snmp_exporter
- promtail
- grafana
post_tasks:
- name: Display access information
ansible.builtin.debug:
msg:
- "Grafana: http://{{ ansible_host }}:3000 (admin/admin)"
- "Prometheus: http://{{ ansible_host }}:9090"
- "Syslog listener: {{ ansible_host }}:514 (UDP)"

View File

@@ -0,0 +1,29 @@
---
- name: Install common dependencies
ansible.builtin.apt:
name:
- curl
- tar
- gzip
- ca-certificates
state: present
- name: Create monitoring user
ansible.builtin.user:
name: monitoring
system: true
shell: /usr/sbin/nologin
home: /var/lib/monitoring
create_home: false
- name: Create common directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: monitoring
group: monitoring
mode: "0755"
loop:
- /etc/monitoring
- /var/lib/monitoring
- /var/log/monitoring

View File

@@ -0,0 +1,187 @@
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"id": 1,
"title": "Infrastructure Overview",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "0": { "color": "red", "text": "DOWN" }, "1": { "color": "green", "text": "UP" } }, "type": "value" }
],
"thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] }
}
},
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 },
"id": 2,
"options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Targets Up",
"type": "stat",
"targets": [{ "expr": "count(up == 1)", "refId": "A" }]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "0": { "color": "green", "text": "0" } }, "type": "value" }
],
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }
}
},
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 },
"id": 3,
"options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Targets Down",
"type": "stat",
"targets": [{ "expr": "count(up == 0) or vector(0)", "refId": "A" }]
},
{
"datasource": { "type": "loki", "uid": "loki" },
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 },
"id": 4,
"options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["sum"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Log Lines (1h)",
"type": "stat",
"targets": [{ "expr": "sum(count_over_time({job=~\".+\"}[1h]))", "refId": "A" }]
},
{
"datasource": { "type": "loki", "uid": "loki" },
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 },
"id": 5,
"fieldConfig": {
"defaults": {
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 10 }, { "color": "red", "value": 50 }] }
}
},
"options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["sum"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Error Logs (1h)",
"type": "stat",
"targets": [{ "expr": "sum(count_over_time({severity=~\"err|error|crit|alert|emerg\"}[1h])) or vector(0)", "refId": "A" }]
},
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
"id": 10,
"title": "Network Devices",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"custom": { "lineWidth": 1, "fillOpacity": 10 },
"unit": "bps"
}
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
"id": 11,
"options": { "legend": { "displayMode": "list", "placement": "bottom" } },
"title": "Network Interface Traffic",
"type": "timeseries",
"targets": [
{ "expr": "rate(ifHCInOctets{job=\"snmp\"}[5m]) * 8", "legendFormat": "{{device}} - {{ifDescr}} In", "refId": "A" },
{ "expr": "rate(ifHCOutOctets{job=\"snmp\"}[5m]) * 8", "legendFormat": "{{device}} - {{ifDescr}} Out", "refId": "B" }
]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "1": { "color": "green", "text": "Up" }, "2": { "color": "red", "text": "Down" } }, "type": "value" }
]
}
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
"id": 12,
"options": { "showHeader": true },
"title": "Interface Status",
"type": "table",
"targets": [{ "expr": "ifOperStatus{job=\"snmp\"}", "format": "table", "instant": true, "refId": "A" }],
"transformations": [
{ "id": "organize", "options": { "excludeByName": { "Time": true, "__name__": true, "job": true }, "renameByName": { "device": "Device", "ifDescr": "Interface", "Value": "Status" } } }
]
},
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
"id": 20,
"title": "Proxmox / VMs",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
},
"gridPos": { "h": 6, "w": 8, "x": 0, "y": 15 },
"id": 21,
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
"title": "CPU Usage",
"type": "gauge",
"targets": [{ "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\",instance=~\"proxmox.*|nfs.*\"}[5m])) by (instance)", "legendFormat": "{{instance}}", "refId": "A" }]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
},
"gridPos": { "h": 6, "w": 8, "x": 8, "y": 15 },
"id": 22,
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
"title": "Memory Usage",
"type": "gauge",
"targets": [{ "expr": "1 - (node_memory_MemAvailable_bytes{instance=~\"proxmox.*|nfs.*\"} / node_memory_MemTotal_bytes{instance=~\"proxmox.*|nfs.*\"})", "legendFormat": "{{instance}}", "refId": "A" }]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
},
"gridPos": { "h": 6, "w": 8, "x": 16, "y": 15 },
"id": 23,
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
"title": "Disk Usage",
"type": "gauge",
"targets": [{ "expr": "1 - (node_filesystem_avail_bytes{instance=~\"proxmox.*|nfs.*\",mountpoint=\"/\"} / node_filesystem_size_bytes{instance=~\"proxmox.*|nfs.*\",mountpoint=\"/\"})", "legendFormat": "{{instance}}", "refId": "A" }]
},
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 },
"id": 30,
"title": "Recent Logs",
"type": "row"
},
{
"datasource": { "type": "loki", "uid": "loki" },
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 22 },
"id": 31,
"options": { "showTime": true, "showLabels": true, "showCommonLabels": false, "wrapLogMessage": true, "prettifyLogMessage": false, "enableLogDetails": true, "sortOrder": "Descending" },
"title": "All Logs",
"type": "logs",
"targets": [{ "expr": "{job=~\".+\"}", "refId": "A" }]
}
],
"refresh": "30s",
"schemaVersion": 38,
"style": "dark",
"tags": ["home-infra"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "",
"title": "Home Infrastructure Overview",
"uid": "home-infra-overview",
"version": 1
}

View File

@@ -0,0 +1,5 @@
---
- name: Restart grafana
ansible.builtin.systemd:
name: grafana-server
state: restarted

View File

@@ -0,0 +1,77 @@
---
- name: Add Grafana APT key
ansible.builtin.get_url:
url: https://apt.grafana.com/gpg.key
dest: /etc/apt/keyrings/grafana.asc
mode: "0644"
- name: Add Grafana APT repository
ansible.builtin.apt_repository:
repo: "deb [signed-by=/etc/apt/keyrings/grafana.asc] https://apt.grafana.com stable main"
state: present
filename: grafana
- name: Install Grafana
ansible.builtin.apt:
name: grafana
state: present
update_cache: true
notify: Restart grafana
- name: Create Grafana provisioning directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: grafana
group: grafana
mode: "0755"
loop:
- /etc/grafana/provisioning/datasources
- /etc/grafana/provisioning/dashboards
- /var/lib/grafana/dashboards
- name: Deploy Grafana datasources
ansible.builtin.template:
src: datasources.yml.j2
dest: /etc/grafana/provisioning/datasources/datasources.yml
owner: grafana
group: grafana
mode: "0640"
notify: Restart grafana
- name: Deploy Grafana dashboard provisioning
ansible.builtin.template:
src: dashboards.yml.j2
dest: /etc/grafana/provisioning/dashboards/dashboards.yml
owner: grafana
group: grafana
mode: "0640"
notify: Restart grafana
- name: Deploy default dashboards
ansible.builtin.copy:
src: "{{ item }}"
dest: /var/lib/grafana/dashboards/
owner: grafana
group: grafana
mode: "0644"
loop: "{{ lookup('fileglob', 'files/dashboards/*.json', wantlist=True) }}"
notify: Restart grafana
ignore_errors: true # OK if no dashboards yet
- name: Configure Grafana
ansible.builtin.lineinfile:
path: /etc/grafana/grafana.ini
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
state: present
loop:
- { regexp: '^;?http_port', line: 'http_port = 3000' }
- { regexp: '^;?http_addr', line: 'http_addr = 0.0.0.0' }
notify: Restart grafana
- name: Enable and start Grafana
ansible.builtin.systemd:
name: grafana-server
enabled: true
state: started

View File

@@ -0,0 +1,13 @@
apiVersion: 1
providers:
- name: 'Home Infrastructure'
orgId: 1
folder: 'Home Infra'
folderUid: 'home-infra'
type: file
disableDeletion: false
updateIntervalSeconds: 30
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards

View File

@@ -0,0 +1,34 @@
apiVersion: 1
datasources:
# Local Prometheus (RPi - infrastructure metrics)
- name: Prometheus-Infra
type: prometheus
access: proxy
url: http://localhost:9090
isDefault: true
editable: false
jsonData:
timeInterval: "30s"
httpMethod: POST
# Cluster Prometheus (Talos - Kubernetes metrics)
- name: Prometheus-Cluster
type: prometheus
access: proxy
url: {{ prometheus_cluster_url }}
isDefault: false
editable: false
jsonData:
timeInterval: "30s"
httpMethod: POST
# Loki (Talos cluster - centralized logs)
- name: Loki
type: loki
access: proxy
url: {{ loki_url }}
isDefault: false
editable: false
jsonData:
maxLines: 1000

View File

@@ -0,0 +1,9 @@
---
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: Restart prometheus
ansible.builtin.systemd:
name: prometheus
state: restarted

View File

@@ -0,0 +1,82 @@
---
- name: Create Prometheus directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: monitoring
group: monitoring
mode: "0755"
loop:
- /etc/prometheus
- /var/lib/prometheus
- name: Check if Prometheus is installed
ansible.builtin.stat:
path: /usr/local/bin/prometheus
register: prometheus_binary
- name: Get installed Prometheus version
ansible.builtin.command: /usr/local/bin/prometheus --version
register: prometheus_installed_version
changed_when: false
failed_when: false
when: prometheus_binary.stat.exists
- name: Download Prometheus
ansible.builtin.get_url:
url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}.tar.gz"
dest: "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
mode: "0644"
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
- name: Extract Prometheus
ansible.builtin.unarchive:
src: "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
dest: /tmp
remote_src: true
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
- name: Install Prometheus binaries
ansible.builtin.copy:
src: "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}/{{ item }}"
dest: "/usr/local/bin/{{ item }}"
mode: "0755"
remote_src: true
loop:
- prometheus
- promtool
notify: Restart prometheus
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
- name: Deploy Prometheus configuration
ansible.builtin.template:
src: prometheus.yml.j2
dest: /etc/prometheus/prometheus.yml
owner: monitoring
group: monitoring
mode: "0644"
notify: Restart prometheus
- name: Deploy Prometheus systemd service
ansible.builtin.template:
src: prometheus.service.j2
dest: /etc/systemd/system/prometheus.service
mode: "0644"
notify:
- Reload systemd
- Restart prometheus
- name: Enable and start Prometheus
ansible.builtin.systemd:
name: prometheus
enabled: true
state: started
daemon_reload: true
- name: Clean up downloaded files
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
- "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}"

View File

@@ -0,0 +1,30 @@
[Unit]
Description=Prometheus Monitoring System
Documentation=https://prometheus.io/docs/
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=monitoring
Group=monitoring
ExecReload=/bin/kill -HUP $MAINPID
ExecStart=/usr/local/bin/prometheus \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/var/lib/prometheus \
--storage.tsdb.retention.time=15d \
--web.listen-address=0.0.0.0:9090 \
--web.enable-lifecycle \
--log.level=info
SyslogIdentifier=prometheus
Restart=always
RestartSec=5
# Hardening
NoNewPrivileges=true
ProtectSystem=full
ProtectHome=true
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,68 @@
global:
scrape_interval: 30s
evaluation_interval: 30s
external_labels:
monitor: 'home-infra'
source: 'rpi'
scrape_configs:
# Self-monitoring
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
labels:
instance: 'rpi-prometheus'
# SNMP targets (network devices)
{% if snmp_targets is defined and snmp_targets | length > 0 %}
- job_name: 'snmp'
scrape_interval: 60s
scrape_timeout: 30s
static_configs:
{% for target in snmp_targets %}
- targets: ['{{ target.ip }}']
labels:
device: '{{ target.name }}'
{% endfor %}
metrics_path: /snmp
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: localhost:9116
params:
module: [if_mib] # Default module, can be overridden per-target
{% endif %}
# Node exporter targets (VMs with node_exporter)
{% if node_exporter_targets is defined and node_exporter_targets | length > 0 %}
- job_name: 'node'
static_configs:
{% for target in node_exporter_targets %}
- targets: ['{{ target.ip }}:{{ target.port | default(9100) }}']
labels:
instance: '{{ target.name }}'
{% endfor %}
{% endif %}
# Proxmox PVE exporter
{% if proxmox_targets is defined and proxmox_targets | length > 0 %}
- job_name: 'proxmox'
scrape_interval: 60s
static_configs:
{% for target in proxmox_targets %}
- targets: ['{{ target.ip }}:{{ target.port | default(9221) }}']
labels:
instance: '{{ target.name }}'
{% endfor %}
metrics_path: /pve
params:
module: [default]
{% endif %}
# SNMP exporter self-metrics
- job_name: 'snmp-exporter'
static_configs:
- targets: ['localhost:9116']

View File

@@ -0,0 +1,9 @@
---
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: Restart promtail
ansible.builtin.systemd:
name: promtail
state: restarted

View File

@@ -0,0 +1,78 @@
---
- name: Create Promtail directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: monitoring
group: monitoring
mode: "0755"
loop:
- /etc/promtail
- /var/lib/promtail
- name: Check if Promtail is installed
ansible.builtin.stat:
path: /usr/local/bin/promtail
register: promtail_binary
- name: Download Promtail
ansible.builtin.get_url:
url: "https://github.com/grafana/loki/releases/download/v{{ promtail_version }}/promtail-linux-{{ go_arch }}.zip"
dest: "/tmp/promtail-{{ promtail_version }}.zip"
mode: "0644"
when: not promtail_binary.stat.exists
- name: Install unzip
ansible.builtin.apt:
name: unzip
state: present
when: ansible_os_family == "Debian"
- name: Extract Promtail
ansible.builtin.unarchive:
src: "/tmp/promtail-{{ promtail_version }}.zip"
dest: /tmp
remote_src: true
when: not promtail_binary.stat.exists
- name: Install Promtail binary
ansible.builtin.copy:
src: /tmp/promtail-linux-{{ go_arch }}
dest: /usr/local/bin/promtail
mode: "0755"
remote_src: true
notify: Restart promtail
when: not promtail_binary.stat.exists
- name: Deploy Promtail configuration
ansible.builtin.template:
src: promtail.yml.j2
dest: /etc/promtail/promtail.yml
owner: monitoring
group: monitoring
mode: "0644"
notify: Restart promtail
- name: Deploy Promtail systemd service
ansible.builtin.template:
src: promtail.service.j2
dest: /etc/systemd/system/promtail.service
mode: "0644"
notify:
- Reload systemd
- Restart promtail
- name: Enable and start Promtail
ansible.builtin.systemd:
name: promtail
enabled: true
state: started
daemon_reload: true
- name: Clean up downloaded files
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/promtail-{{ promtail_version }}.zip"
- "/tmp/promtail-linux-{{ go_arch }}"

View File

@@ -0,0 +1,24 @@
[Unit]
Description=Promtail Log Collector
Documentation=https://grafana.com/docs/loki/latest/clients/promtail/
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=root
Group=root
ExecStart=/usr/local/bin/promtail \
-config.file=/etc/promtail/promtail.yml \
-config.expand-env=true
SyslogIdentifier=promtail
Restart=always
RestartSec=5
# Need root for syslog port 514 and journal access
# Can use CAP_NET_BIND_SERVICE instead if preferred
AmbientCapabilities=CAP_NET_BIND_SERVICE
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,56 @@
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /var/lib/promtail/positions.yaml
clients:
- url: {{ loki_url }}/loki/api/v1/push
tenant_id: home-infra
batchwait: 1s
batchsize: 1048576
timeout: 10s
scrape_configs:
# Syslog listener for network devices
- job_name: syslog
syslog:
listen_address: 0.0.0.0:514
listen_protocol: udp
idle_timeout: 60s
label_structured_data: true
labels:
job: syslog
source: network-devices
relabel_configs:
- source_labels: ['__syslog_message_hostname']
target_label: 'host'
- source_labels: ['__syslog_message_severity']
target_label: 'severity'
- source_labels: ['__syslog_message_facility']
target_label: 'facility'
- source_labels: ['__syslog_message_app_name']
target_label: 'app'
pipeline_stages:
- match:
selector: '{job="syslog"}'
stages:
# Extract common patterns from network device logs
- regex:
expression: '(?P<src_ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
- labels:
src_ip:
# Local system journal (RPi logs)
- job_name: journal
journal:
max_age: 12h
labels:
job: systemd-journal
host: rpi
relabel_configs:
- source_labels: ['__journal__systemd_unit']
target_label: 'unit'
- source_labels: ['__journal_priority_keyword']
target_label: 'severity'

View File

@@ -0,0 +1,9 @@
---
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: Restart snmp_exporter
ansible.builtin.systemd:
name: snmp_exporter
state: restarted

View File

@@ -0,0 +1,71 @@
---
- name: Create SNMP exporter directory
ansible.builtin.file:
path: /etc/snmp_exporter
state: directory
owner: monitoring
group: monitoring
mode: "0755"
- name: Check if SNMP exporter is installed
ansible.builtin.stat:
path: /usr/local/bin/snmp_exporter
register: snmp_exporter_binary
- name: Download SNMP exporter
ansible.builtin.get_url:
url: "https://github.com/prometheus/snmp_exporter/releases/download/v{{ snmp_exporter_version }}/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}.tar.gz"
dest: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.tar.gz"
mode: "0644"
when: not snmp_exporter_binary.stat.exists
- name: Extract SNMP exporter
ansible.builtin.unarchive:
src: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.tar.gz"
dest: /tmp
remote_src: true
when: not snmp_exporter_binary.stat.exists
- name: Install SNMP exporter binary
ansible.builtin.copy:
src: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}/snmp_exporter"
dest: /usr/local/bin/snmp_exporter
mode: "0755"
remote_src: true
notify: Restart snmp_exporter
when: not snmp_exporter_binary.stat.exists
- name: Install default SNMP exporter config
ansible.builtin.copy:
src: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}/snmp.yml"
dest: /etc/snmp_exporter/snmp.yml
owner: monitoring
group: monitoring
mode: "0644"
remote_src: true
notify: Restart snmp_exporter
when: not snmp_exporter_binary.stat.exists
- name: Deploy SNMP exporter systemd service
ansible.builtin.template:
src: snmp_exporter.service.j2
dest: /etc/systemd/system/snmp_exporter.service
mode: "0644"
notify:
- Reload systemd
- Restart snmp_exporter
- name: Enable and start SNMP exporter
ansible.builtin.systemd:
name: snmp_exporter
enabled: true
state: started
daemon_reload: true
- name: Clean up downloaded files
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/snmp_exporter-{{ snmp_exporter_version }}.tar.gz"
- "/tmp/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}"

View File

@@ -0,0 +1,25 @@
[Unit]
Description=SNMP Exporter for Prometheus
Documentation=https://github.com/prometheus/snmp_exporter
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=monitoring
Group=monitoring
ExecStart=/usr/local/bin/snmp_exporter \
--config.file=/etc/snmp_exporter/snmp.yml \
--web.listen-address=0.0.0.0:9116 \
--log.level=info
SyslogIdentifier=snmp_exporter
Restart=always
RestartSec=5
NoNewPrivileges=true
ProtectSystem=full
ProtectHome=true
[Install]
WantedBy=multi-user.target