Add the monitoring stack
This commit is contained in:
81
kubernetes/kube-state-metrics/deployment.yaml
Normal file
81
kubernetes/kube-state-metrics/deployment.yaml
Normal file
@@ -0,0 +1,81 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: kube-state-metrics
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: kube-state-metrics
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: kube-state-metrics
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
spec:
|
||||
serviceAccountName: kube-state-metrics
|
||||
securityContext:
|
||||
fsGroup: 65534
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
containers:
|
||||
- name: kube-state-metrics
|
||||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.10.1
|
||||
args:
|
||||
- "--port=8080"
|
||||
- "--telemetry-port=8081"
|
||||
ports:
|
||||
- name: http-metrics
|
||||
containerPort: 8080
|
||||
protocol: TCP
|
||||
- name: telemetry
|
||||
containerPort: 8081
|
||||
protocol: TCP
|
||||
resources:
|
||||
requests:
|
||||
memory: "64Mi"
|
||||
cpu: "50m"
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "100m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: kube-state-metrics
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- name: http-metrics
|
||||
port: 8080
|
||||
targetPort: http-metrics
|
||||
protocol: TCP
|
||||
- name: telemetry
|
||||
port: 8081
|
||||
targetPort: telemetry
|
||||
protocol: TCP
|
||||
selector:
|
||||
app: kube-state-metrics
|
||||
92
kubernetes/kube-state-metrics/rbac.yaml
Normal file
92
kubernetes/kube-state-metrics/rbac.yaml
Normal file
@@ -0,0 +1,92 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
- secrets
|
||||
- nodes
|
||||
- pods
|
||||
- services
|
||||
- serviceaccounts
|
||||
- resourcequotas
|
||||
- replicationcontrollers
|
||||
- limitranges
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
- namespaces
|
||||
- endpoints
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources:
|
||||
- statefulsets
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["batch"]
|
||||
resources:
|
||||
- cronjobs
|
||||
- jobs
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["autoscaling"]
|
||||
resources:
|
||||
- horizontalpodautoscalers
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["authentication.k8s.io"]
|
||||
resources:
|
||||
- tokenreviews
|
||||
verbs: ["create"]
|
||||
- apiGroups: ["authorization.k8s.io"]
|
||||
resources:
|
||||
- subjectaccessreviews
|
||||
verbs: ["create"]
|
||||
- apiGroups: ["policy"]
|
||||
resources:
|
||||
- poddisruptionbudgets
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["certificates.k8s.io"]
|
||||
resources:
|
||||
- certificatesigningrequests
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["storage.k8s.io"]
|
||||
resources:
|
||||
- storageclasses
|
||||
- volumeattachments
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["admissionregistration.k8s.io"]
|
||||
resources:
|
||||
- mutatingwebhookconfigurations
|
||||
- validatingwebhookconfigurations
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["networking.k8s.io"]
|
||||
resources:
|
||||
- networkpolicies
|
||||
- ingresses
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["coordination.k8s.io"]
|
||||
resources:
|
||||
- leases
|
||||
verbs: ["list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: kube-state-metrics
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
67
kubernetes/loki/configmap.yaml
Normal file
67
kubernetes/loki/configmap.yaml
Normal file
@@ -0,0 +1,67 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: loki-config
|
||||
namespace: monitoring
|
||||
data:
|
||||
loki.yaml: |
|
||||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
grpc_listen_port: 9096
|
||||
log_level: info
|
||||
|
||||
common:
|
||||
instance_addr: 127.0.0.1
|
||||
path_prefix: /loki
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: /loki/chunks
|
||||
rules_directory: /loki/rules
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
|
||||
query_range:
|
||||
results_cache:
|
||||
cache:
|
||||
embedded_cache:
|
||||
enabled: true
|
||||
max_size_mb: 100
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-10-24
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
ruler:
|
||||
alertmanager_url: http://localhost:9093
|
||||
|
||||
limits_config:
|
||||
reject_old_samples: true
|
||||
reject_old_samples_max_age: 168h
|
||||
ingestion_rate_mb: 4
|
||||
ingestion_burst_size_mb: 6
|
||||
max_streams_per_user: 10000
|
||||
max_line_size: 256kb
|
||||
|
||||
# Compactor for retention
|
||||
compactor:
|
||||
working_directory: /loki/compactor
|
||||
compaction_interval: 10m
|
||||
retention_enabled: true
|
||||
retention_delete_delay: 2h
|
||||
retention_delete_worker_count: 150
|
||||
delete_request_store: filesystem
|
||||
|
||||
# 7 day retention
|
||||
table_manager:
|
||||
retention_deletes_enabled: true
|
||||
retention_period: 168h
|
||||
68
kubernetes/loki/deployment.yaml
Normal file
68
kubernetes/loki/deployment.yaml
Normal file
@@ -0,0 +1,68 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: loki
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: loki
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: loki
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: loki
|
||||
spec:
|
||||
securityContext:
|
||||
fsGroup: 10001
|
||||
runAsGroup: 10001
|
||||
runAsNonRoot: true
|
||||
runAsUser: 10001
|
||||
containers:
|
||||
- name: loki
|
||||
image: grafana/loki:2.9.2
|
||||
args:
|
||||
- "-config.file=/etc/loki/loki.yaml"
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 3100
|
||||
protocol: TCP
|
||||
- name: grpc
|
||||
containerPort: 9096
|
||||
protocol: TCP
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "300m"
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/loki
|
||||
- name: storage
|
||||
mountPath: /loki
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: http
|
||||
initialDelaySeconds: 45
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: http
|
||||
initialDelaySeconds: 45
|
||||
periodSeconds: 10
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: loki-config
|
||||
- name: storage
|
||||
emptyDir: {}
|
||||
# For production, replace emptyDir with PVC:
|
||||
# - name: storage
|
||||
# persistentVolumeClaim:
|
||||
# claimName: loki-storage
|
||||
17
kubernetes/loki/service.yaml
Normal file
17
kubernetes/loki/service.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: loki
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: loki
|
||||
spec:
|
||||
type: NodePort
|
||||
ports:
|
||||
- name: http
|
||||
port: 3100
|
||||
targetPort: 3100
|
||||
nodePort: 30100
|
||||
protocol: TCP
|
||||
selector:
|
||||
app: loki
|
||||
6
kubernetes/namespace.yaml
Normal file
6
kubernetes/namespace.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: monitoring
|
||||
labels:
|
||||
name: monitoring
|
||||
115
kubernetes/node-exporter/daemonset.yaml
Normal file
115
kubernetes/node-exporter/daemonset.yaml
Normal file
@@ -0,0 +1,115 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: node-exporter
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: node-exporter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: node-exporter
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9100"
|
||||
spec:
|
||||
serviceAccountName: node-exporter
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: node-exporter
|
||||
image: prom/node-exporter:v1.7.0
|
||||
args:
|
||||
- "--path.procfs=/host/proc"
|
||||
- "--path.sysfs=/host/sys"
|
||||
- "--path.rootfs=/host/root"
|
||||
- "--web.listen-address=:9100"
|
||||
- "--collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)"
|
||||
- "--collector.netclass.ignored-devices=^(veth.*|cali.*|docker.*|flannel.*)$"
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 9100
|
||||
hostPort: 9100
|
||||
protocol: TCP
|
||||
resources:
|
||||
requests:
|
||||
memory: "20Mi"
|
||||
cpu: "10m"
|
||||
limits:
|
||||
memory: "50Mi"
|
||||
cpu: "100m"
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
volumeMounts:
|
||||
- name: proc
|
||||
mountPath: /host/proc
|
||||
readOnly: true
|
||||
- name: sys
|
||||
mountPath: /host/sys
|
||||
readOnly: true
|
||||
- name: root
|
||||
mountPath: /host/root
|
||||
readOnly: true
|
||||
mountPropagation: HostToContainer
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: metrics
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 15
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: metrics
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 15
|
||||
volumes:
|
||||
- name: proc
|
||||
hostPath:
|
||||
path: /proc
|
||||
- name: sys
|
||||
hostPath:
|
||||
path: /sys
|
||||
- name: root
|
||||
hostPath:
|
||||
path: /
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: node-exporter
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9100"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: metrics
|
||||
port: 9100
|
||||
targetPort: 9100
|
||||
protocol: TCP
|
||||
selector:
|
||||
app: node-exporter
|
||||
143
kubernetes/prometheus/configmap.yaml
Normal file
143
kubernetes/prometheus/configmap.yaml
Normal file
@@ -0,0 +1,143 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: prometheus-config
|
||||
namespace: monitoring
|
||||
data:
|
||||
prometheus.yml: |
|
||||
global:
|
||||
scrape_interval: 30s
|
||||
evaluation_interval: 30s
|
||||
external_labels:
|
||||
monitor: 'talos-cluster'
|
||||
cluster: 'home'
|
||||
|
||||
scrape_configs:
|
||||
# Prometheus self-monitoring
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
# Kubernetes API server
|
||||
- job_name: 'kubernetes-apiservers'
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
||||
action: keep
|
||||
regex: default;kubernetes;https
|
||||
|
||||
# Kubernetes nodes (kubelet)
|
||||
- job_name: 'kubernetes-nodes'
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/${1}/proxy/metrics
|
||||
|
||||
# Kubernetes nodes (cAdvisor)
|
||||
- job_name: 'kubernetes-cadvisor'
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
||||
|
||||
# Kubernetes service endpoints
|
||||
- job_name: 'kubernetes-service-endpoints'
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
target_label: __scheme__
|
||||
regex: (https?)
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
target_label: __address__
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: replace
|
||||
target_label: kubernetes_name
|
||||
|
||||
# Kubernetes pods
|
||||
- job_name: 'kubernetes-pods'
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: kubernetes_pod_name
|
||||
|
||||
# Node exporter (DaemonSet)
|
||||
- job_name: 'node-exporter'
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_endpoints_name]
|
||||
action: keep
|
||||
regex: node-exporter
|
||||
- source_labels: [__meta_kubernetes_endpoint_node_name]
|
||||
target_label: node
|
||||
|
||||
# Kube State Metrics
|
||||
- job_name: 'kube-state-metrics'
|
||||
static_configs:
|
||||
- targets: ['kube-state-metrics.monitoring.svc.cluster.local:8080']
|
||||
73
kubernetes/prometheus/deployment.yaml
Normal file
73
kubernetes/prometheus/deployment.yaml
Normal file
@@ -0,0 +1,73 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: prometheus
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: prometheus
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: prometheus
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9090"
|
||||
spec:
|
||||
serviceAccountName: prometheus
|
||||
securityContext:
|
||||
fsGroup: 65534
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
containers:
|
||||
- name: prometheus
|
||||
image: prom/prometheus:v2.48.0
|
||||
args:
|
||||
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||
- "--storage.tsdb.path=/prometheus"
|
||||
- "--storage.tsdb.retention.time=15d"
|
||||
- "--web.listen-address=0.0.0.0:9090"
|
||||
- "--web.enable-lifecycle"
|
||||
- "--web.enable-admin-api"
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 9090
|
||||
protocol: TCP
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
cpu: "200m"
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "500m"
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/prometheus
|
||||
- name: storage
|
||||
mountPath: /prometheus
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /-/healthy
|
||||
port: http
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 15
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /-/ready
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: prometheus-config
|
||||
- name: storage
|
||||
emptyDir: {}
|
||||
# For production, replace emptyDir with PVC:
|
||||
# - name: storage
|
||||
# persistentVolumeClaim:
|
||||
# claimName: prometheus-storage
|
||||
40
kubernetes/prometheus/rbac.yaml
Normal file
40
kubernetes/prometheus/rbac.yaml
Normal file
@@ -0,0 +1,40 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/proxy
|
||||
- nodes/metrics
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["extensions", "networking.k8s.io"]
|
||||
resources:
|
||||
- ingresses
|
||||
verbs: ["get", "list", "watch"]
|
||||
- nonResourceURLs: ["/metrics", "/metrics/cadvisor"]
|
||||
verbs: ["get"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
20
kubernetes/prometheus/service.yaml
Normal file
20
kubernetes/prometheus/service.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: prometheus
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9090"
|
||||
spec:
|
||||
type: NodePort
|
||||
ports:
|
||||
- name: http
|
||||
port: 9090
|
||||
targetPort: 9090
|
||||
nodePort: 30090
|
||||
protocol: TCP
|
||||
selector:
|
||||
app: prometheus
|
||||
85
kubernetes/promtail/configmap.yaml
Normal file
85
kubernetes/promtail/configmap.yaml
Normal file
@@ -0,0 +1,85 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: promtail-config
|
||||
namespace: monitoring
|
||||
data:
|
||||
promtail.yaml: |
|
||||
server:
|
||||
http_listen_port: 3101
|
||||
grpc_listen_port: 0
|
||||
|
||||
positions:
|
||||
filename: /run/promtail/positions.yaml
|
||||
|
||||
clients:
|
||||
- url: http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push
|
||||
tenant_id: talos-cluster
|
||||
|
||||
scrape_configs:
|
||||
# Container logs from /var/log/pods
|
||||
- job_name: kubernetes-pods
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
pipeline_stages:
|
||||
- cri: {}
|
||||
relabel_configs:
|
||||
# Only scrape pods with promtail.io/scrape annotation (or all by default)
|
||||
- source_labels:
|
||||
- __meta_kubernetes_pod_annotation_promtail_io_scrape
|
||||
action: drop
|
||||
regex: false
|
||||
|
||||
# Use pod name as instance
|
||||
- source_labels:
|
||||
- __meta_kubernetes_pod_name
|
||||
target_label: instance
|
||||
|
||||
# Namespace label
|
||||
- source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
target_label: namespace
|
||||
|
||||
# Pod name label
|
||||
- source_labels:
|
||||
- __meta_kubernetes_pod_name
|
||||
target_label: pod
|
||||
|
||||
# Container name label
|
||||
- source_labels:
|
||||
- __meta_kubernetes_pod_container_name
|
||||
target_label: container
|
||||
|
||||
# Node name label
|
||||
- source_labels:
|
||||
- __meta_kubernetes_pod_node_name
|
||||
target_label: node
|
||||
|
||||
# App label (from pod labels)
|
||||
- source_labels:
|
||||
- __meta_kubernetes_pod_label_app
|
||||
target_label: app
|
||||
|
||||
# App.kubernetes.io/name label
|
||||
- source_labels:
|
||||
- __meta_kubernetes_pod_label_app_kubernetes_io_name
|
||||
target_label: app
|
||||
regex: (.+)
|
||||
action: replace
|
||||
|
||||
# Set path to container log file
|
||||
- source_labels:
|
||||
- __meta_kubernetes_pod_uid
|
||||
- __meta_kubernetes_pod_container_name
|
||||
target_label: __path__
|
||||
separator: /
|
||||
replacement: /var/log/pods/*$1/*.log
|
||||
|
||||
# Talos system logs (if mounted)
|
||||
- job_name: talos-system
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost
|
||||
labels:
|
||||
job: talos-system
|
||||
__path__: /var/log/containers/*.log
|
||||
93
kubernetes/promtail/daemonset.yaml
Normal file
93
kubernetes/promtail/daemonset.yaml
Normal file
@@ -0,0 +1,93 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: promtail
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: promtail
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: promtail
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: promtail
|
||||
spec:
|
||||
serviceAccountName: promtail
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: promtail
|
||||
image: grafana/promtail:2.9.2
|
||||
args:
|
||||
- "-config.file=/etc/promtail/promtail.yaml"
|
||||
env:
|
||||
- name: HOSTNAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 3101
|
||||
protocol: TCP
|
||||
resources:
|
||||
requests:
|
||||
memory: "50Mi"
|
||||
cpu: "50m"
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "100m"
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: true
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/promtail
|
||||
- name: run
|
||||
mountPath: /run/promtail
|
||||
# Mount pod logs
|
||||
- name: pods
|
||||
mountPath: /var/log/pods
|
||||
readOnly: true
|
||||
# Mount container logs (for CRI-O / containerd)
|
||||
- name: containers
|
||||
mountPath: /var/log/containers
|
||||
readOnly: true
|
||||
# Machine-id for consistent instance identification
|
||||
- name: machine-id
|
||||
mountPath: /etc/machine-id
|
||||
readOnly: true
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: http
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: http
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: promtail-config
|
||||
- name: run
|
||||
emptyDir: {}
|
||||
- name: pods
|
||||
hostPath:
|
||||
path: /var/log/pods
|
||||
- name: containers
|
||||
hostPath:
|
||||
path: /var/log/containers
|
||||
- name: machine-id
|
||||
hostPath:
|
||||
path: /etc/machine-id
|
||||
33
kubernetes/promtail/rbac.yaml
Normal file
33
kubernetes/promtail/rbac.yaml
Normal file
@@ -0,0 +1,33 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: promtail
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: promtail
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/proxy
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["get", "list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: promtail
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: promtail
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: promtail
|
||||
namespace: monitoring
|
||||
Reference in New Issue
Block a user