add the ansible playbooks for the monitoring stack

This commit is contained in:
tsvetkov
2026-02-27 01:28:08 +00:00
commit d45bbef509
21 changed files with 1017 additions and 0 deletions

44
ansible/inventory.yml Normal file
View File

@@ -0,0 +1,44 @@
all:
children:
monitoring:
hosts:
rpi:
ansible_host: 192.168.1.100 # Change to your RPi IP
ansible_user: pi # Change if different
ansible_become: true
# Configuration variables
prometheus_version: "2.48.0"
promtail_version: "2.9.2"
grafana_version: "10.2.2"
snmp_exporter_version: "0.24.1"
# Loki endpoint (in Talos cluster)
loki_url: "http://192.168.1.200:30100" # Change to your Talos node IP
# Prometheus cluster endpoint (for Grafana datasource)
prometheus_cluster_url: "http://192.168.1.200:30090" # Change to your Talos node IP
# Network device IPs for SNMP
snmp_targets:
- name: "router"
ip: "192.168.1.1"
module: "if_mib"
- name: "modem"
ip: "192.168.1.2"
module: "if_mib"
# Targets with node_exporter
node_exporter_targets:
- name: "proxmox"
ip: "192.168.1.10"
port: 9100
- name: "nfs"
ip: "192.168.1.11"
port: 9100
# Proxmox PVE exporter target
proxmox_targets:
- name: "proxmox"
ip: "192.168.1.10"
port: 9221

View File

@@ -0,0 +1,117 @@
---
# Deploy monitoring stack to Talos cluster via Ansible
#
# Prerequisites:
# - kubectl configured with access to your Talos cluster
# - kubernetes.core collection installed: ansible-galaxy collection install kubernetes.core
#
# Usage:
# ansible-playbook -i inventory.yml kubernetes-playbook.yml
#
# Or with a specific kubeconfig:
# ansible-playbook -i inventory.yml kubernetes-playbook.yml -e kubeconfig_path=~/.kube/talos-config
- name: Deploy monitoring stack to Kubernetes
hosts: localhost
connection: local
gather_facts: false
vars:
kubeconfig_path: "{{ lookup('env', 'KUBECONFIG') | default('~/.kube/config', true) }}"
manifests_dir: "{{ playbook_dir }}/kubernetes"
tasks:
- name: Create monitoring namespace
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ manifests_dir }}/namespace.yaml"
- name: Deploy Prometheus
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ item }}"
loop:
- "{{ manifests_dir }}/prometheus/rbac.yaml"
- "{{ manifests_dir }}/prometheus/configmap.yaml"
- "{{ manifests_dir }}/prometheus/deployment.yaml"
- "{{ manifests_dir }}/prometheus/service.yaml"
- name: Wait for Prometheus to be ready
kubernetes.core.k8s_info:
kubeconfig: "{{ kubeconfig_path }}"
kind: Deployment
name: prometheus
namespace: monitoring
register: prometheus_deployment
until: prometheus_deployment.resources[0].status.readyReplicas | default(0) >= 1
retries: 30
delay: 10
- name: Deploy Loki
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ item }}"
loop:
- "{{ manifests_dir }}/loki/configmap.yaml"
- "{{ manifests_dir }}/loki/deployment.yaml"
- "{{ manifests_dir }}/loki/service.yaml"
- name: Wait for Loki to be ready
kubernetes.core.k8s_info:
kubeconfig: "{{ kubeconfig_path }}"
kind: Deployment
name: loki
namespace: monitoring
register: loki_deployment
until: loki_deployment.resources[0].status.readyReplicas | default(0) >= 1
retries: 30
delay: 10
- name: Deploy Promtail
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ item }}"
loop:
- "{{ manifests_dir }}/promtail/rbac.yaml"
- "{{ manifests_dir }}/promtail/configmap.yaml"
- "{{ manifests_dir }}/promtail/daemonset.yaml"
- name: Deploy Node Exporter
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ manifests_dir }}/node-exporter/daemonset.yaml"
- name: Deploy Kube State Metrics
kubernetes.core.k8s:
kubeconfig: "{{ kubeconfig_path }}"
state: present
src: "{{ item }}"
loop:
- "{{ manifests_dir }}/kube-state-metrics/rbac.yaml"
- "{{ manifests_dir }}/kube-state-metrics/deployment.yaml"
- name: Get cluster node IPs
kubernetes.core.k8s_info:
kubeconfig: "{{ kubeconfig_path }}"
kind: Node
register: cluster_nodes
- name: Display access information
ansible.builtin.debug:
msg:
- "Monitoring stack deployed successfully!"
- ""
- "Prometheus: http://<node-ip>:30090"
- "Loki: http://<node-ip>:30100"
- ""
- "Node IPs:"
- "{{ cluster_nodes.resources | map(attribute='status.addresses') | flatten | selectattr('type', 'equalto', 'InternalIP') | map(attribute='address') | list }}"
- ""
- "Update your RPi inventory.yml with one of these IPs for:"
- " loki_url: http://<node-ip>:30100"
- " prometheus_cluster_url: http://<node-ip>:30090"

37
ansible/playbook.yml Normal file
View File

@@ -0,0 +1,37 @@
---
- name: Deploy monitoring stack on RPi
hosts: monitoring
become: true
vars:
# Architecture detection for ARM
arch_map:
armv7l: "armv7"
aarch64: "arm64"
x86_64: "amd64"
pre_tasks:
- name: Gather architecture
ansible.builtin.set_fact:
go_arch: "{{ arch_map[ansible_architecture] | default('arm64') }}"
- name: Update apt cache
ansible.builtin.apt:
update_cache: true
cache_valid_time: 3600
when: ansible_os_family == "Debian"
roles:
- common
- prometheus
- snmp_exporter
- promtail
- grafana
post_tasks:
- name: Display access information
ansible.builtin.debug:
msg:
- "Grafana: http://{{ ansible_host }}:3000 (admin/admin)"
- "Prometheus: http://{{ ansible_host }}:9090"
- "Syslog listener: {{ ansible_host }}:514 (UDP)"

View File

@@ -0,0 +1,29 @@
---
- name: Install common dependencies
ansible.builtin.apt:
name:
- curl
- tar
- gzip
- ca-certificates
state: present
- name: Create monitoring user
ansible.builtin.user:
name: monitoring
system: true
shell: /usr/sbin/nologin
home: /var/lib/monitoring
create_home: false
- name: Create common directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: monitoring
group: monitoring
mode: "0755"
loop:
- /etc/monitoring
- /var/lib/monitoring
- /var/log/monitoring

View File

@@ -0,0 +1,187 @@
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"id": 1,
"title": "Infrastructure Overview",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "0": { "color": "red", "text": "DOWN" }, "1": { "color": "green", "text": "UP" } }, "type": "value" }
],
"thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] }
}
},
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 },
"id": 2,
"options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Targets Up",
"type": "stat",
"targets": [{ "expr": "count(up == 1)", "refId": "A" }]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "0": { "color": "green", "text": "0" } }, "type": "value" }
],
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }
}
},
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 },
"id": 3,
"options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Targets Down",
"type": "stat",
"targets": [{ "expr": "count(up == 0) or vector(0)", "refId": "A" }]
},
{
"datasource": { "type": "loki", "uid": "loki" },
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 },
"id": 4,
"options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["sum"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Log Lines (1h)",
"type": "stat",
"targets": [{ "expr": "sum(count_over_time({job=~\".+\"}[1h]))", "refId": "A" }]
},
{
"datasource": { "type": "loki", "uid": "loki" },
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 },
"id": 5,
"fieldConfig": {
"defaults": {
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 10 }, { "color": "red", "value": 50 }] }
}
},
"options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["sum"], "fields": "", "values": false }, "textMode": "auto" },
"title": "Error Logs (1h)",
"type": "stat",
"targets": [{ "expr": "sum(count_over_time({severity=~\"err|error|crit|alert|emerg\"}[1h])) or vector(0)", "refId": "A" }]
},
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
"id": 10,
"title": "Network Devices",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"custom": { "lineWidth": 1, "fillOpacity": 10 },
"unit": "bps"
}
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
"id": 11,
"options": { "legend": { "displayMode": "list", "placement": "bottom" } },
"title": "Network Interface Traffic",
"type": "timeseries",
"targets": [
{ "expr": "rate(ifHCInOctets{job=\"snmp\"}[5m]) * 8", "legendFormat": "{{device}} - {{ifDescr}} In", "refId": "A" },
{ "expr": "rate(ifHCOutOctets{job=\"snmp\"}[5m]) * 8", "legendFormat": "{{device}} - {{ifDescr}} Out", "refId": "B" }
]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "1": { "color": "green", "text": "Up" }, "2": { "color": "red", "text": "Down" } }, "type": "value" }
]
}
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
"id": 12,
"options": { "showHeader": true },
"title": "Interface Status",
"type": "table",
"targets": [{ "expr": "ifOperStatus{job=\"snmp\"}", "format": "table", "instant": true, "refId": "A" }],
"transformations": [
{ "id": "organize", "options": { "excludeByName": { "Time": true, "__name__": true, "job": true }, "renameByName": { "device": "Device", "ifDescr": "Interface", "Value": "Status" } } }
]
},
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
"id": 20,
"title": "Proxmox / VMs",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
},
"gridPos": { "h": 6, "w": 8, "x": 0, "y": 15 },
"id": 21,
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
"title": "CPU Usage",
"type": "gauge",
"targets": [{ "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\",instance=~\"proxmox.*|nfs.*\"}[5m])) by (instance)", "legendFormat": "{{instance}}", "refId": "A" }]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
},
"gridPos": { "h": 6, "w": 8, "x": 8, "y": 15 },
"id": 22,
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
"title": "Memory Usage",
"type": "gauge",
"targets": [{ "expr": "1 - (node_memory_MemAvailable_bytes{instance=~\"proxmox.*|nfs.*\"} / node_memory_MemTotal_bytes{instance=~\"proxmox.*|nfs.*\"})", "legendFormat": "{{instance}}", "refId": "A" }]
},
{
"datasource": { "type": "prometheus", "uid": "prometheus-infra" },
"fieldConfig": {
"defaults": { "unit": "percentunit", "max": 1, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.7 }, { "color": "red", "value": 0.9 }] } }
},
"gridPos": { "h": 6, "w": 8, "x": 16, "y": 15 },
"id": 23,
"options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true },
"title": "Disk Usage",
"type": "gauge",
"targets": [{ "expr": "1 - (node_filesystem_avail_bytes{instance=~\"proxmox.*|nfs.*\",mountpoint=\"/\"} / node_filesystem_size_bytes{instance=~\"proxmox.*|nfs.*\",mountpoint=\"/\"})", "legendFormat": "{{instance}}", "refId": "A" }]
},
{
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 },
"id": 30,
"title": "Recent Logs",
"type": "row"
},
{
"datasource": { "type": "loki", "uid": "loki" },
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 22 },
"id": 31,
"options": { "showTime": true, "showLabels": true, "showCommonLabels": false, "wrapLogMessage": true, "prettifyLogMessage": false, "enableLogDetails": true, "sortOrder": "Descending" },
"title": "All Logs",
"type": "logs",
"targets": [{ "expr": "{job=~\".+\"}", "refId": "A" }]
}
],
"refresh": "30s",
"schemaVersion": 38,
"style": "dark",
"tags": ["home-infra"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "",
"title": "Home Infrastructure Overview",
"uid": "home-infra-overview",
"version": 1
}

View File

@@ -0,0 +1,5 @@
---
- name: Restart grafana
ansible.builtin.systemd:
name: grafana-server
state: restarted

View File

@@ -0,0 +1,77 @@
---
- name: Add Grafana APT key
ansible.builtin.get_url:
url: https://apt.grafana.com/gpg.key
dest: /etc/apt/keyrings/grafana.asc
mode: "0644"
- name: Add Grafana APT repository
ansible.builtin.apt_repository:
repo: "deb [signed-by=/etc/apt/keyrings/grafana.asc] https://apt.grafana.com stable main"
state: present
filename: grafana
- name: Install Grafana
ansible.builtin.apt:
name: grafana
state: present
update_cache: true
notify: Restart grafana
- name: Create Grafana provisioning directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: grafana
group: grafana
mode: "0755"
loop:
- /etc/grafana/provisioning/datasources
- /etc/grafana/provisioning/dashboards
- /var/lib/grafana/dashboards
- name: Deploy Grafana datasources
ansible.builtin.template:
src: datasources.yml.j2
dest: /etc/grafana/provisioning/datasources/datasources.yml
owner: grafana
group: grafana
mode: "0640"
notify: Restart grafana
- name: Deploy Grafana dashboard provisioning
ansible.builtin.template:
src: dashboards.yml.j2
dest: /etc/grafana/provisioning/dashboards/dashboards.yml
owner: grafana
group: grafana
mode: "0640"
notify: Restart grafana
- name: Deploy default dashboards
ansible.builtin.copy:
src: "{{ item }}"
dest: /var/lib/grafana/dashboards/
owner: grafana
group: grafana
mode: "0644"
loop: "{{ lookup('fileglob', 'files/dashboards/*.json', wantlist=True) }}"
notify: Restart grafana
ignore_errors: true # OK if no dashboards yet
- name: Configure Grafana
ansible.builtin.lineinfile:
path: /etc/grafana/grafana.ini
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
state: present
loop:
- { regexp: '^;?http_port', line: 'http_port = 3000' }
- { regexp: '^;?http_addr', line: 'http_addr = 0.0.0.0' }
notify: Restart grafana
- name: Enable and start Grafana
ansible.builtin.systemd:
name: grafana-server
enabled: true
state: started

View File

@@ -0,0 +1,13 @@
apiVersion: 1
providers:
- name: 'Home Infrastructure'
orgId: 1
folder: 'Home Infra'
folderUid: 'home-infra'
type: file
disableDeletion: false
updateIntervalSeconds: 30
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards

View File

@@ -0,0 +1,34 @@
apiVersion: 1
datasources:
# Local Prometheus (RPi - infrastructure metrics)
- name: Prometheus-Infra
type: prometheus
access: proxy
url: http://localhost:9090
isDefault: true
editable: false
jsonData:
timeInterval: "30s"
httpMethod: POST
# Cluster Prometheus (Talos - Kubernetes metrics)
- name: Prometheus-Cluster
type: prometheus
access: proxy
url: {{ prometheus_cluster_url }}
isDefault: false
editable: false
jsonData:
timeInterval: "30s"
httpMethod: POST
# Loki (Talos cluster - centralized logs)
- name: Loki
type: loki
access: proxy
url: {{ loki_url }}
isDefault: false
editable: false
jsonData:
maxLines: 1000

View File

@@ -0,0 +1,9 @@
---
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: Restart prometheus
ansible.builtin.systemd:
name: prometheus
state: restarted

View File

@@ -0,0 +1,82 @@
---
- name: Create Prometheus directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: monitoring
group: monitoring
mode: "0755"
loop:
- /etc/prometheus
- /var/lib/prometheus
- name: Check if Prometheus is installed
ansible.builtin.stat:
path: /usr/local/bin/prometheus
register: prometheus_binary
- name: Get installed Prometheus version
ansible.builtin.command: /usr/local/bin/prometheus --version
register: prometheus_installed_version
changed_when: false
failed_when: false
when: prometheus_binary.stat.exists
- name: Download Prometheus
ansible.builtin.get_url:
url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}.tar.gz"
dest: "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
mode: "0644"
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
- name: Extract Prometheus
ansible.builtin.unarchive:
src: "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
dest: /tmp
remote_src: true
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
- name: Install Prometheus binaries
ansible.builtin.copy:
src: "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}/{{ item }}"
dest: "/usr/local/bin/{{ item }}"
mode: "0755"
remote_src: true
loop:
- prometheus
- promtool
notify: Restart prometheus
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
- name: Deploy Prometheus configuration
ansible.builtin.template:
src: prometheus.yml.j2
dest: /etc/prometheus/prometheus.yml
owner: monitoring
group: monitoring
mode: "0644"
notify: Restart prometheus
- name: Deploy Prometheus systemd service
ansible.builtin.template:
src: prometheus.service.j2
dest: /etc/systemd/system/prometheus.service
mode: "0644"
notify:
- Reload systemd
- Restart prometheus
- name: Enable and start Prometheus
ansible.builtin.systemd:
name: prometheus
enabled: true
state: started
daemon_reload: true
- name: Clean up downloaded files
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
- "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}"

View File

@@ -0,0 +1,30 @@
[Unit]
Description=Prometheus Monitoring System
Documentation=https://prometheus.io/docs/
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=monitoring
Group=monitoring
ExecReload=/bin/kill -HUP $MAINPID
ExecStart=/usr/local/bin/prometheus \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/var/lib/prometheus \
--storage.tsdb.retention.time=15d \
--web.listen-address=0.0.0.0:9090 \
--web.enable-lifecycle \
--log.level=info
SyslogIdentifier=prometheus
Restart=always
RestartSec=5
# Hardening
NoNewPrivileges=true
ProtectSystem=full
ProtectHome=true
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,68 @@
global:
scrape_interval: 30s
evaluation_interval: 30s
external_labels:
monitor: 'home-infra'
source: 'rpi'
scrape_configs:
# Self-monitoring
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
labels:
instance: 'rpi-prometheus'
# SNMP targets (network devices)
{% if snmp_targets is defined and snmp_targets | length > 0 %}
- job_name: 'snmp'
scrape_interval: 60s
scrape_timeout: 30s
static_configs:
{% for target in snmp_targets %}
- targets: ['{{ target.ip }}']
labels:
device: '{{ target.name }}'
{% endfor %}
metrics_path: /snmp
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: localhost:9116
params:
module: [if_mib] # Default module, can be overridden per-target
{% endif %}
# Node exporter targets (VMs with node_exporter)
{% if node_exporter_targets is defined and node_exporter_targets | length > 0 %}
- job_name: 'node'
static_configs:
{% for target in node_exporter_targets %}
- targets: ['{{ target.ip }}:{{ target.port | default(9100) }}']
labels:
instance: '{{ target.name }}'
{% endfor %}
{% endif %}
# Proxmox PVE exporter
{% if proxmox_targets is defined and proxmox_targets | length > 0 %}
- job_name: 'proxmox'
scrape_interval: 60s
static_configs:
{% for target in proxmox_targets %}
- targets: ['{{ target.ip }}:{{ target.port | default(9221) }}']
labels:
instance: '{{ target.name }}'
{% endfor %}
metrics_path: /pve
params:
module: [default]
{% endif %}
# SNMP exporter self-metrics
- job_name: 'snmp-exporter'
static_configs:
- targets: ['localhost:9116']

View File

@@ -0,0 +1,9 @@
---
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: Restart promtail
ansible.builtin.systemd:
name: promtail
state: restarted

View File

@@ -0,0 +1,78 @@
---
- name: Create Promtail directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: monitoring
group: monitoring
mode: "0755"
loop:
- /etc/promtail
- /var/lib/promtail
- name: Check if Promtail is installed
ansible.builtin.stat:
path: /usr/local/bin/promtail
register: promtail_binary
- name: Download Promtail
ansible.builtin.get_url:
url: "https://github.com/grafana/loki/releases/download/v{{ promtail_version }}/promtail-linux-{{ go_arch }}.zip"
dest: "/tmp/promtail-{{ promtail_version }}.zip"
mode: "0644"
when: not promtail_binary.stat.exists
- name: Install unzip
ansible.builtin.apt:
name: unzip
state: present
when: ansible_os_family == "Debian"
- name: Extract Promtail
ansible.builtin.unarchive:
src: "/tmp/promtail-{{ promtail_version }}.zip"
dest: /tmp
remote_src: true
when: not promtail_binary.stat.exists
- name: Install Promtail binary
ansible.builtin.copy:
src: /tmp/promtail-linux-{{ go_arch }}
dest: /usr/local/bin/promtail
mode: "0755"
remote_src: true
notify: Restart promtail
when: not promtail_binary.stat.exists
- name: Deploy Promtail configuration
ansible.builtin.template:
src: promtail.yml.j2
dest: /etc/promtail/promtail.yml
owner: monitoring
group: monitoring
mode: "0644"
notify: Restart promtail
- name: Deploy Promtail systemd service
ansible.builtin.template:
src: promtail.service.j2
dest: /etc/systemd/system/promtail.service
mode: "0644"
notify:
- Reload systemd
- Restart promtail
- name: Enable and start Promtail
ansible.builtin.systemd:
name: promtail
enabled: true
state: started
daemon_reload: true
- name: Clean up downloaded files
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/promtail-{{ promtail_version }}.zip"
- "/tmp/promtail-linux-{{ go_arch }}"

View File

@@ -0,0 +1,24 @@
[Unit]
Description=Promtail Log Collector
Documentation=https://grafana.com/docs/loki/latest/clients/promtail/
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=root
Group=root
ExecStart=/usr/local/bin/promtail \
-config.file=/etc/promtail/promtail.yml \
-config.expand-env=true
SyslogIdentifier=promtail
Restart=always
RestartSec=5
# Need root for syslog port 514 and journal access
# Can use CAP_NET_BIND_SERVICE instead if preferred
AmbientCapabilities=CAP_NET_BIND_SERVICE
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,56 @@
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /var/lib/promtail/positions.yaml
clients:
- url: {{ loki_url }}/loki/api/v1/push
tenant_id: home-infra
batchwait: 1s
batchsize: 1048576
timeout: 10s
scrape_configs:
# Syslog listener for network devices
- job_name: syslog
syslog:
listen_address: 0.0.0.0:514
listen_protocol: udp
idle_timeout: 60s
label_structured_data: true
labels:
job: syslog
source: network-devices
relabel_configs:
- source_labels: ['__syslog_message_hostname']
target_label: 'host'
- source_labels: ['__syslog_message_severity']
target_label: 'severity'
- source_labels: ['__syslog_message_facility']
target_label: 'facility'
- source_labels: ['__syslog_message_app_name']
target_label: 'app'
pipeline_stages:
- match:
selector: '{job="syslog"}'
stages:
# Extract common patterns from network device logs
- regex:
expression: '(?P<src_ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
- labels:
src_ip:
# Local system journal (RPi logs)
- job_name: journal
journal:
max_age: 12h
labels:
job: systemd-journal
host: rpi
relabel_configs:
- source_labels: ['__journal__systemd_unit']
target_label: 'unit'
- source_labels: ['__journal_priority_keyword']
target_label: 'severity'

View File

@@ -0,0 +1,9 @@
---
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: Restart snmp_exporter
ansible.builtin.systemd:
name: snmp_exporter
state: restarted

View File

@@ -0,0 +1,71 @@
---
- name: Create SNMP exporter directory
ansible.builtin.file:
path: /etc/snmp_exporter
state: directory
owner: monitoring
group: monitoring
mode: "0755"
- name: Check if SNMP exporter is installed
ansible.builtin.stat:
path: /usr/local/bin/snmp_exporter
register: snmp_exporter_binary
- name: Download SNMP exporter
ansible.builtin.get_url:
url: "https://github.com/prometheus/snmp_exporter/releases/download/v{{ snmp_exporter_version }}/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}.tar.gz"
dest: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.tar.gz"
mode: "0644"
when: not snmp_exporter_binary.stat.exists
- name: Extract SNMP exporter
ansible.builtin.unarchive:
src: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.tar.gz"
dest: /tmp
remote_src: true
when: not snmp_exporter_binary.stat.exists
- name: Install SNMP exporter binary
ansible.builtin.copy:
src: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}/snmp_exporter"
dest: /usr/local/bin/snmp_exporter
mode: "0755"
remote_src: true
notify: Restart snmp_exporter
when: not snmp_exporter_binary.stat.exists
- name: Install default SNMP exporter config
ansible.builtin.copy:
src: "/tmp/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}/snmp.yml"
dest: /etc/snmp_exporter/snmp.yml
owner: monitoring
group: monitoring
mode: "0644"
remote_src: true
notify: Restart snmp_exporter
when: not snmp_exporter_binary.stat.exists
- name: Deploy SNMP exporter systemd service
ansible.builtin.template:
src: snmp_exporter.service.j2
dest: /etc/systemd/system/snmp_exporter.service
mode: "0644"
notify:
- Reload systemd
- Restart snmp_exporter
- name: Enable and start SNMP exporter
ansible.builtin.systemd:
name: snmp_exporter
enabled: true
state: started
daemon_reload: true
- name: Clean up downloaded files
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/snmp_exporter-{{ snmp_exporter_version }}.tar.gz"
- "/tmp/snmp_exporter-{{ snmp_exporter_version }}.linux-{{ go_arch }}"

View File

@@ -0,0 +1,25 @@
[Unit]
Description=SNMP Exporter for Prometheus
Documentation=https://github.com/prometheus/snmp_exporter
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=monitoring
Group=monitoring
ExecStart=/usr/local/bin/snmp_exporter \
--config.file=/etc/snmp_exporter/snmp.yml \
--web.listen-address=0.0.0.0:9116 \
--log.level=info
SyslogIdentifier=snmp_exporter
Restart=always
RestartSec=5
NoNewPrivileges=true
ProtectSystem=full
ProtectHome=true
[Install]
WantedBy=multi-user.target