# modules/services/prometheus.nix { config, lib, pkgs, ... }: with lib; let cfg = config.homelab.services.prometheus; globalCfg = config.homelab.global; in { options.homelab.services.prometheus = { enable = mkEnableOption "Prometheus monitoring server"; port = mkOption { type = types.port; default = 9090; description = "Prometheus server port"; }; webExternalUrl = mkOption { type = types.str; default = "http://${globalCfg.hostname}:${toString cfg.port}"; description = "External URL for Prometheus"; }; retention = mkOption { type = types.str; default = "30d"; description = "Data retention period"; }; scrapeConfigs = mkOption { type = types.listOf types.attrs; default = []; description = "Additional scrape configurations"; }; alertmanager = { enable = mkOption { type = types.bool; default = false; description = "Enable Alertmanager integration"; }; url = mkOption { type = types.str; default = "http://localhost:9093"; description = "Alertmanager URL"; }; }; }; config = mkIf cfg.enable { # Register service with global homelab config homelab.global.services.prometheus = { enable = true; description = "Metrics collection and monitoring server"; category = "monitoring"; ports = [cfg.port]; tags = ["metrics" "monitoring" "alerting"]; priority = 20; dependencies = ["node-exporter"]; }; # Configure the actual Prometheus service services.prometheus = { enable = true; port = cfg.port; webExternalUrl = cfg.webExternalUrl; retentionTime = cfg.retention; scrapeConfigs = [ # Auto-discover monitoring endpoints from global config { job_name = "homelab-auto"; static_configs = [ { targets = map ( endpoint: "${globalCfg.hostname}:${toString endpoint.port}" ) globalCfg.monitoring.endpoints; } ]; scrape_interval = "30s"; metrics_path = "/metrics"; } ] ++ cfg.scrapeConfigs; # Alertmanager configuration alertmanagers = mkIf cfg.alertmanager.enable [ { static_configs = [ { targets = [cfg.alertmanager.url]; } ]; } ]; rules = [ # Basic homelab alerting rules (pkgs.writeText "homelab-alerts.yml" '' groups: - name: homelab rules: - alert: ServiceDown expr: up == 0 for: 5m labels: severity: critical annotations: summary: "Service {{ $labels.instance }} is down" description: "{{ $labels.job }} on {{ $labels.instance }} has been down for more than 5 minutes." - alert: HighMemoryUsage expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9 for: 10m labels: severity: warning annotations: summary: "High memory usage on {{ $labels.instance }}" description: "Memory usage is above 90% on {{ $labels.instance }}" - alert: HighDiskUsage expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.85 for: 5m labels: severity: warning annotations: summary: "High disk usage on {{ $labels.instance }}" description: "Disk usage is above 85% on {{ $labels.instance }} for filesystem {{ $labels.mountpoint }}" '') ]; }; # Add monitoring endpoint to global config homelab.global.monitoring.endpoints = [ { name = "prometheus"; port = cfg.port; path = "/metrics"; jobName = "prometheus"; scrapeInterval = "30s"; labels = { service = "prometheus"; role = "monitoring"; }; } ]; # Add reverse proxy entry if configured homelab.global.reverseProxy.entries = mkIf (globalCfg.domain != null) [ { subdomain = "prometheus"; port = cfg.port; path = "/"; enableAuth = true; enableSSL = true; customHeaders = { "X-Frame-Options" = "DENY"; "X-Content-Type-Options" = "nosniff"; }; } ]; # Add backup job for Prometheus data homelab.global.backups.jobs = [ { name = "prometheus-data"; backend = "restic"; paths = ["/var/lib/prometheus2"]; schedule = "daily"; retention = { daily = "7"; weekly = "4"; monthly = "3"; yearly = "1"; }; excludePatterns = [ "*.tmp" "*/wal/*" ]; preHook = '' # Stop prometheus temporarily for consistent backup systemctl stop prometheus ''; postHook = '' # Restart prometheus after backup systemctl start prometheus ''; } ]; # Open firewall port networking.firewall.allowedTCPPorts = [cfg.port]; # Create prometheus configuration directory systemd.tmpfiles.rules = [ "d /var/lib/prometheus2 0755 prometheus prometheus -" "d /etc/prometheus 0755 root root -" ]; }; }