homelab/machines/monitor/prometheus.nix
plasmagoat 032072374b
Some checks failed
Test / tests (push) Has been cancelled
moved mail
2025-07-17 00:35:44 +02:00

187 lines
5.3 KiB
Nix

{pkgs, ...}: let
monitor_hostname = "monitor.lab";
traefik_hostname = "traefik.lab";
sandbox_hostname = "sandbox.lab";
forgejo_hostname = "forgejo.lab";
runner01_hostname = "forgejo-runner-01.lab";
dnsmasq_hostname = "dns.lab";
media_hostname = "media.lab";
mail_hostname = "mail.lab";
keycloak_hostname = "keycloak.lab";
monitored_hosts = [
monitor_hostname
traefik_hostname
sandbox_hostname
forgejo_hostname
runner01_hostname
dnsmasq_hostname
media_hostname
mail_hostname
keycloak_hostname
];
# integrate colmena names and targetHost to generate nodeexporters
generateTargets = port:
map (host: "${host}:${toString port}") monitored_hosts;
instance_relabel_config = [
{
source_labels = ["__address__"];
regex = "([^:]+):\\d+"; # Captures everything before the last colon
target_label = "instance";
replacement = "$1";
}
];
node_exporter_port = 9100;
node_exporter_job = {
job_name = "node";
static_configs = [{targets = generateTargets node_exporter_port;}];
relabel_configs = instance_relabel_config;
};
promtail_port = 9080;
promtail_job = {
job_name = "promtail";
static_configs = [{targets = generateTargets promtail_port;}];
relabel_configs = instance_relabel_config;
};
prometheus_target = "${monitor_hostname}:9090";
prometheus_job = {
job_name = "prometheus";
static_configs = [{targets = [prometheus_target];}];
relabel_configs = instance_relabel_config;
};
alertmanager_target = "${monitor_hostname}:9093";
alertmanager_job = {
job_name = "alertmanager";
static_configs = [{targets = [alertmanager_target];}];
relabel_configs = instance_relabel_config;
};
grafana_target = "${monitor_hostname}:3000";
grafana_job = {
job_name = "grafana";
static_configs = [{targets = [grafana_target];}];
relabel_configs = instance_relabel_config;
};
gatus_target = "${monitor_hostname}:8080";
gatus_job = {
job_name = "gatus";
static_configs = [{targets = [gatus_target];}];
relabel_configs = instance_relabel_config;
};
traefik_monitor_port = 8082;
traefik_job = {
job_name = "traefik";
static_configs = [{targets = ["${traefik_hostname}:${toString traefik_monitor_port}"];}];
relabel_configs = instance_relabel_config;
};
forgejo_monitor_port = 3000;
forgejo_job = {
job_name = "forgejo";
static_configs = [{targets = ["${forgejo_hostname}:${toString forgejo_monitor_port}"];}];
relabel_configs = instance_relabel_config;
};
postgres_exporter_port = 9187;
postgres_job = {
job_name = "postgres";
static_configs = [{targets = ["${forgejo_hostname}:${toString postgres_exporter_port}"];}];
relabel_configs = instance_relabel_config;
};
dnsmasq_exporter_port = 9153;
dnsmasq_job = {
job_name = "dnsmasq";
static_configs = [{targets = ["${dnsmasq_hostname}:${toString dnsmasq_exporter_port}"];}];
relabel_configs = instance_relabel_config;
};
# --- Media Stack Scrape Job ---
media_stack_job = {
job_name = "media_stack";
static_configs = [
{
targets = [
"${media_hostname}:9707" # sonarr
"${media_hostname}:9708" # radarr
"${media_hostname}:9709" # lidarr
"${media_hostname}:9710" # readarr
"${media_hostname}:9711" # prowlarr
# "${media_hostname}:9712" # bazarr
];
}
];
relabel_configs = instance_relabel_config;
};
jellyfin_port = 8096;
jellyfin_exporter_port = 9594;
jellyfin_job = {
job_name = "jellyfin";
static_configs = [
{
targets = [
"${media_hostname}:${toString jellyfin_port}"
"${monitor_hostname}:${toString jellyfin_exporter_port}"
];
}
];
relabel_configs = instance_relabel_config;
};
in {
networking.firewall.allowedTCPPorts = [9090];
services.prometheus = {
enable = true;
retentionTime = "7d";
globalConfig = {
scrape_timeout = "10s";
scrape_interval = "30s";
# A short evaluation_interval will check alerting rules very often.
# It can be costly if you run Prometheus with 100+ alerts.
evaluation_interval = "20s";
};
extraFlags = [
"--web.enable-admin-api"
];
scrapeConfigs = [
node_exporter_job
promtail_job
prometheus_job
alertmanager_job
grafana_job
gatus_job
traefik_job
forgejo_job
postgres_job
dnsmasq_job
media_stack_job
jellyfin_job
];
alertmanagers = [
{
scheme = "http";
static_configs = [{targets = [alertmanager_target];}];
}
];
ruleFiles = [
(pkgs.writeText "prometheus-alerts.yml" (builtins.readFile ./provisioning/alerts/prometheus-alerts.yml))
(pkgs.writeText "loki-alerts.yml" (builtins.readFile ./provisioning/alerts/loki-alerts.yml))
(pkgs.writeText "promtail-alerts.yml" (builtins.readFile ./provisioning/alerts/promtail-alerts.yml))
(pkgs.writeText "postgres-alerts.yml" (builtins.readFile ./provisioning/alerts/postgres-alerts.yml))
(pkgs.writeText "traefik-alerts.yml" (builtins.readFile ./provisioning/alerts/traefik-alerts.yml))
(pkgs.writeText "node-exporter-alerts.yml" (builtins.readFile ./provisioning/alerts/node-exporter-alerts.yml))
];
};
}