ready for runners

This commit is contained in:
plasmagoat 2025-06-06 23:32:17 +02:00
parent fc9971ddc9
commit 7dd5043b5d
49 changed files with 2569 additions and 1085 deletions

View file

@ -0,0 +1,30 @@
groups:
- name: Traefik
rules:
- alert: TraefikServiceDown
expr: "count(traefik_service_server_up) by (service) == 0"
for: 0m
labels:
severity: critical
annotations:
summary: Traefik service down (instance {{ $labels.instance }})
description: "All Traefik services are down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: TraefikHighHttp4xxErrorRateService
expr: 'sum(rate(traefik_service_requests_total{code=~"4.*"}[3m])) by (service) / sum(rate(traefik_service_requests_total[3m])) by (service) * 100 > 5'
for: 1m
labels:
severity: critical
annotations:
summary: Traefik high HTTP 4xx error rate service (instance {{ $labels.instance }})
description: "Traefik service 4xx error rate is above 5%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: TraefikHighHttp5xxErrorRateService
expr: 'sum(rate(traefik_service_requests_total{code=~"5.*"}[3m])) by (service) / sum(rate(traefik_service_requests_total[3m])) by (service) * 100 > 5'
for: 1m
labels:
severity: critical
annotations:
summary: Traefik high HTTP 5xx error rate service (instance {{ $labels.instance }})
description: "Traefik service 5xx error rate is above 5%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"