add the ansible playbooks for the monitoring stack

This commit is contained in:
tsvetkov
2026-02-27 01:28:08 +00:00
commit d45bbef509
21 changed files with 1017 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
---
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: Restart prometheus
ansible.builtin.systemd:
name: prometheus
state: restarted

View File

@@ -0,0 +1,82 @@
---
- name: Create Prometheus directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: monitoring
group: monitoring
mode: "0755"
loop:
- /etc/prometheus
- /var/lib/prometheus
- name: Check if Prometheus is installed
ansible.builtin.stat:
path: /usr/local/bin/prometheus
register: prometheus_binary
- name: Get installed Prometheus version
ansible.builtin.command: /usr/local/bin/prometheus --version
register: prometheus_installed_version
changed_when: false
failed_when: false
when: prometheus_binary.stat.exists
- name: Download Prometheus
ansible.builtin.get_url:
url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}.tar.gz"
dest: "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
mode: "0644"
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
- name: Extract Prometheus
ansible.builtin.unarchive:
src: "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
dest: /tmp
remote_src: true
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
- name: Install Prometheus binaries
ansible.builtin.copy:
src: "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}/{{ item }}"
dest: "/usr/local/bin/{{ item }}"
mode: "0755"
remote_src: true
loop:
- prometheus
- promtool
notify: Restart prometheus
when: not prometheus_binary.stat.exists or prometheus_version not in (prometheus_installed_version.stdout | default(''))
- name: Deploy Prometheus configuration
ansible.builtin.template:
src: prometheus.yml.j2
dest: /etc/prometheus/prometheus.yml
owner: monitoring
group: monitoring
mode: "0644"
notify: Restart prometheus
- name: Deploy Prometheus systemd service
ansible.builtin.template:
src: prometheus.service.j2
dest: /etc/systemd/system/prometheus.service
mode: "0644"
notify:
- Reload systemd
- Restart prometheus
- name: Enable and start Prometheus
ansible.builtin.systemd:
name: prometheus
enabled: true
state: started
daemon_reload: true
- name: Clean up downloaded files
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/prometheus-{{ prometheus_version }}.tar.gz"
- "/tmp/prometheus-{{ prometheus_version }}.linux-{{ go_arch }}"

View File

@@ -0,0 +1,30 @@
[Unit]
Description=Prometheus Monitoring System
Documentation=https://prometheus.io/docs/
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=monitoring
Group=monitoring
ExecReload=/bin/kill -HUP $MAINPID
ExecStart=/usr/local/bin/prometheus \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/var/lib/prometheus \
--storage.tsdb.retention.time=15d \
--web.listen-address=0.0.0.0:9090 \
--web.enable-lifecycle \
--log.level=info
SyslogIdentifier=prometheus
Restart=always
RestartSec=5
# Hardening
NoNewPrivileges=true
ProtectSystem=full
ProtectHome=true
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,68 @@
global:
scrape_interval: 30s
evaluation_interval: 30s
external_labels:
monitor: 'home-infra'
source: 'rpi'
scrape_configs:
# Self-monitoring
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
labels:
instance: 'rpi-prometheus'
# SNMP targets (network devices)
{% if snmp_targets is defined and snmp_targets | length > 0 %}
- job_name: 'snmp'
scrape_interval: 60s
scrape_timeout: 30s
static_configs:
{% for target in snmp_targets %}
- targets: ['{{ target.ip }}']
labels:
device: '{{ target.name }}'
{% endfor %}
metrics_path: /snmp
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: localhost:9116
params:
module: [if_mib] # Default module, can be overridden per-target
{% endif %}
# Node exporter targets (VMs with node_exporter)
{% if node_exporter_targets is defined and node_exporter_targets | length > 0 %}
- job_name: 'node'
static_configs:
{% for target in node_exporter_targets %}
- targets: ['{{ target.ip }}:{{ target.port | default(9100) }}']
labels:
instance: '{{ target.name }}'
{% endfor %}
{% endif %}
# Proxmox PVE exporter
{% if proxmox_targets is defined and proxmox_targets | length > 0 %}
- job_name: 'proxmox'
scrape_interval: 60s
static_configs:
{% for target in proxmox_targets %}
- targets: ['{{ target.ip }}:{{ target.port | default(9221) }}']
labels:
instance: '{{ target.name }}'
{% endfor %}
metrics_path: /pve
params:
module: [default]
{% endif %}
# SNMP exporter self-metrics
- job_name: 'snmp-exporter'
static_configs:
- targets: ['localhost:9116']