From 3362c4721167ddba1c8d98c694641d12cf500bc5 Mon Sep 17 00:00:00 2001 From: plasmagoat Date: Mon, 28 Jul 2025 12:08:32 +0200 Subject: [PATCH 1/3] cleanup --- hosts/monitor/default.nix | 41 ++ modules/lib/helpers.nix | 126 ----- modules/nixos/backup-manager.nix | 187 ------- modules/nixos/default.nix | 5 - modules/nixos/global-config.nix | 462 ------------------ modules/nixos/motd/default.nix | 304 ------------ modules/nixos/services/default.nix | 4 - modules/nixos/services/forgejo-runner.nix | 0 modules/nixos/services/forgejo.nix | 1 - modules/nixos/services/grafana.nix | 72 --- modules/nixos/services/jellyfin.nix | 125 ----- modules/nixos/services/postgres.nix | 0 modules/nixos/services/prometheus.nix | 208 -------- modules/nixos/system/backups/backrest.nix | 4 - .../nixos/system/backups/backups-option.nix | 95 ---- modules/nixos/system/backups/default.nix | 6 - modules/nixos/system/backups/restic.nix | 234 --------- modules/nixos/system/backups/root.nix | 66 --- proxmox-infra/.gitignore | 7 - proxmox-infra/.terraform.lock.hcl | 24 - proxmox-infra/main.tf | 52 -- proxmox-infra/outputs.tf | 0 proxmox-infra/provider.tf | 9 - proxmox-infra/sandbox.tf | 106 ---- proxmox-infra/terraform.tfstate.backup | 1 - proxmox-infra/variables.tf | 30 -- proxmox-infra/versions.tf | 9 - users/default.nix | 3 + users/plasmagoat.nix | 1 - 29 files changed, 44 insertions(+), 2138 deletions(-) create mode 100644 hosts/monitor/default.nix delete mode 100644 modules/lib/helpers.nix delete mode 100644 modules/nixos/backup-manager.nix delete mode 100644 modules/nixos/global-config.nix delete mode 100644 modules/nixos/motd/default.nix delete mode 100644 modules/nixos/services/default.nix delete mode 100644 modules/nixos/services/forgejo-runner.nix delete mode 100644 modules/nixos/services/forgejo.nix delete mode 100644 modules/nixos/services/grafana.nix delete mode 100644 modules/nixos/services/jellyfin.nix delete mode 100644 modules/nixos/services/postgres.nix delete mode 100644 modules/nixos/services/prometheus.nix delete mode 100644 modules/nixos/system/backups/backrest.nix delete mode 100644 modules/nixos/system/backups/backups-option.nix delete mode 100644 modules/nixos/system/backups/default.nix delete mode 100644 modules/nixos/system/backups/restic.nix delete mode 100644 modules/nixos/system/backups/root.nix delete mode 100644 proxmox-infra/.gitignore delete mode 100644 proxmox-infra/.terraform.lock.hcl delete mode 100644 proxmox-infra/main.tf delete mode 100644 proxmox-infra/outputs.tf delete mode 100644 proxmox-infra/provider.tf delete mode 100644 proxmox-infra/sandbox.tf delete mode 100644 proxmox-infra/terraform.tfstate.backup delete mode 100644 proxmox-infra/variables.tf delete mode 100644 proxmox-infra/versions.tf create mode 100644 users/default.nix diff --git a/hosts/monitor/default.nix b/hosts/monitor/default.nix new file mode 100644 index 0000000..bd72a58 --- /dev/null +++ b/hosts/monitor/default.nix @@ -0,0 +1,41 @@ +{ + config, + name, + ... +}: { + sops.secrets."restic/default-password" = {}; + + homelab = { + enable = true; + hostname = name; + tags = [name]; + + monitoring.enable = true; + motd.enable = true; + + backups = { + enable = true; + backends = { + restic = { + enable = true; + repository = "/srv/restic-repo"; + passwordFile = config.sops.secrets."restic/default-password".path; + }; + }; + }; + + services.prometheus = { + enable = true; + }; + + services.gatus = { + enable = true; + ui = { + title = "Homelab Status Dashboard"; + header = "My Homelab Services"; + }; + }; + }; + + system.stateVersion = "25.05"; +} diff --git a/modules/lib/helpers.nix b/modules/lib/helpers.nix deleted file mode 100644 index 34201dc..0000000 --- a/modules/lib/helpers.nix +++ /dev/null @@ -1,126 +0,0 @@ -# modules/lib/helpers.nix -{lib, ...}: -with lib; rec { - # Helper to merge global configurations from multiple sources - mergeGlobalConfigs = configs: let - mergeEndpoints = foldl' (acc: cfg: acc ++ cfg.monitoring.endpoints) []; - mergeBackups = foldl' (acc: cfg: acc ++ cfg.backups.jobs) []; - mergeProxyEntries = foldl' (acc: cfg: acc ++ cfg.reverseProxy.entries) []; - in { - monitoring.endpoints = mergeEndpoints configs; - backups.jobs = mergeBackups configs; - reverseProxy.entries = mergeProxyEntries configs; - }; - - # Helper to create a service module template - createServiceModule = { - name, - port, - hasMetrics ? true, - hasWebUI ? true, - dataDir ? "/var/lib/${name}", - }: { - config, - lib, - pkgs, - ... - }: - with lib; let - cfg = config.services.${name}; - in { - options.services.${name} = { - enable = mkEnableOption "${name} service"; - port = mkOption { - type = types.port; - default = port; - description = "Port for ${name}"; - }; - dataDir = mkOption { - type = types.str; - default = dataDir; - description = "Data directory for ${name}"; - }; - enableMetrics = mkOption { - type = types.bool; - default = hasMetrics; - description = "Enable metrics endpoint"; - }; - exposeWeb = mkOption { - type = types.bool; - default = hasWebUI; - description = "Expose web interface"; - }; - }; - - config = mkIf cfg.enable { - homelab.global = { - backups.jobs = [ - { - name = "${name}-data"; - backend = "restic"; - paths = [cfg.dataDir]; - schedule = "daily"; - } - ]; - - reverseProxy.entries = mkIf cfg.exposeWeb [ - { - subdomain = name; - port = cfg.port; - } - ]; - - monitoring.endpoints = mkIf cfg.enableMetrics [ - { - name = name; - port = cfg.port; - path = "/metrics"; - jobName = name; - } - ]; - }; - }; - }; - - # Helper to generate nginx configuration from proxy entries - generateNginxConfig = proxyEntries: domain: let - createVHost = entry: { - "${entry.subdomain}.${domain}" = { - enableACME = entry.enableSSL; - forceSSL = entry.enableSSL; - locations."${entry.path}" = { - proxyPass = "http://${entry.targetHost}:${toString entry.port}"; - proxyWebsockets = entry.websockets; - extraConfig = '' - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - ${concatStringsSep "\n" (mapAttrsToList ( - name: value: "proxy_set_header ${name} ${value};" - ) - entry.customHeaders)} - ''; - }; - }; - }; - in - foldl' (acc: entry: acc // (createVHost entry)) {} proxyEntries; - - # Helper to generate Prometheus scrape configs - generatePrometheusConfig = endpoints: let - endpointsByJob = groupBy (e: e.jobName) endpoints; - createJobConfig = jobName: jobEndpoints: { - job_name = jobName; - scrape_interval = (head jobEndpoints).scrapeInterval; - metrics_path = (head jobEndpoints).path; - static_configs = [ - { - targets = map (e: "${e.targetHost}:${toString e.port}") jobEndpoints; - labels = foldl' (acc: e: acc // e.labels) {} jobEndpoints; - } - ]; - }; - in - mapAttrsToList createJobConfig endpointsByJob; -} diff --git a/modules/nixos/backup-manager.nix b/modules/nixos/backup-manager.nix deleted file mode 100644 index cd06883..0000000 --- a/modules/nixos/backup-manager.nix +++ /dev/null @@ -1,187 +0,0 @@ -# modules/backup-manager.nix -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.homelab.backups; - globalCfg = config.homelab.global; - - # Create systemd services for backup jobs - createBackupService = job: let - serviceName = "backup-${job.name}"; - allExcludes = globalCfg.backups.globalExcludes ++ job.excludePatterns; - excludeArgs = map (pattern: "--exclude '${pattern}'") allExcludes; - - backupScript = - if job.backend == "restic" - then '' - #!/bin/bash - set -euo pipefail - - ${optionalString (job.preHook != null) job.preHook} - - # Restic backup - ${pkgs.restic}/bin/restic backup \ - ${concatStringsSep " " (map (path: "'${path}'") job.paths)} \ - ${concatStringsSep " " excludeArgs} \ - --tag "host:${globalCfg.hostname}" \ - --tag "job:${job.name}" \ - --tag "env:${globalCfg.environment}" - - # Apply retention policy - ${pkgs.restic}/bin/restic forget \ - --keep-daily ${job.retention.daily} \ - --keep-weekly ${job.retention.weekly} \ - --keep-monthly ${job.retention.monthly} \ - --keep-yearly ${job.retention.yearly} \ - --prune - - ${optionalString (job.postHook != null) job.postHook} - '' - else if job.backend == "borg" - then '' - #!/bin/bash - set -euo pipefail - - ${optionalString (job.preHook != null) job.preHook} - - # Borg backup - ${pkgs.borgbackup}/bin/borg create \ - --stats --progress \ - ${concatStringsSep " " excludeArgs} \ - "::${globalCfg.hostname}-${job.name}-{now}" \ - ${concatStringsSep " " (map (path: "'${path}'") job.paths)} - - # Apply retention policy - ${pkgs.borgbackup}/bin/borg prune \ - --keep-daily ${job.retention.daily} \ - --keep-weekly ${job.retention.weekly} \ - --keep-monthly ${job.retention.monthly} \ - --keep-yearly ${job.retention.yearly} - - ${optionalString (job.postHook != null) job.postHook} - '' - else throw "Unsupported backup backend: ${job.backend}"; - in { - ${serviceName} = { - description = "Backup job: ${job.name}"; - after = ["network-online.target"]; - wants = ["network-online.target"]; - serviceConfig = { - Type = "oneshot"; - User = "backup"; - Group = "backup"; - ExecStart = pkgs.writeScript "backup-${job.name}" backupScript; - EnvironmentFile = "/etc/backup/environment"; - }; - }; - }; - - # Create systemd timers for backup jobs - createBackupTimer = job: let - serviceName = "backup-${job.name}"; - timerName = "${serviceName}.timer"; - in { - ${timerName} = { - description = "Timer for backup job: ${job.name}"; - wantedBy = ["timers.target"]; - timerConfig = { - OnCalendar = - if job.schedule == "daily" - then "daily" - else if job.schedule == "weekly" - then "weekly" - else if job.schedule == "hourly" - then "hourly" - else job.schedule; # Assume it's a cron expression - Persistent = true; - RandomizedDelaySec = "15min"; - }; - }; - }; -in { - options.homelab.backups = { - enable = mkEnableOption "Backup management"; - - restic = { - repository = mkOption { - type = types.str; - description = "Restic repository URL"; - }; - passwordFile = mkOption { - type = types.str; - default = "/etc/backup/restic-password"; - description = "Path to file containing restic password"; - }; - }; - - borg = { - repository = mkOption { - type = types.str; - description = "Borg repository path"; - }; - sshKey = mkOption { - type = types.str; - default = "/etc/backup/borg-ssh-key"; - description = "Path to SSH key for borg repository"; - }; - }; - }; - - config = mkIf (cfg.enable && globalCfg.enable && (length globalCfg.backups.jobs) > 0) { - # Create backup user - users.users.backup = { - isSystemUser = true; - group = "backup"; - home = "/var/lib/backup"; - createHome = true; - }; - - users.groups.backup = {}; - - # Install backup tools - environment.systemPackages = with pkgs; [ - restic - borgbackup - rclone - - (pkgs.writeScriptBin "backup-status" '' - #!/bin/bash - echo "=== Backup Status ===" - echo - ${concatStringsSep "\n" (map (job: '' - echo "Job: ${job.name}" - systemctl is-active backup-${job.name}.timer || echo "Timer inactive" - systemctl status backup-${job.name}.timer --no-pager -l | grep -E "(Active|Trigger)" || true - echo - '') - globalCfg.backups.jobs)} - '') - ]; - - # Create systemd services and timers - systemd.services = lib.foldl' (acc: job: acc // (createBackupService job)) {} globalCfg.backups.jobs; - systemd.timers = lib.foldl' (acc: job: acc // (createBackupTimer job)) {} globalCfg.backups.jobs; - - # Environment file template - environment.etc."backup/environment.example".text = '' - # Restic configuration - RESTIC_REPOSITORY=${cfg.restic.repository} - RESTIC_PASSWORD_FILE=${cfg.restic.passwordFile} - - # AWS S3 credentials (if using S3 backend) - AWS_ACCESS_KEY_ID=your-access-key - AWS_SECRET_ACCESS_KEY=your-secret-key - - # Borg configuration - BORG_REPO=${cfg.borg.repository} - BORG_RSH="ssh -i ${cfg.borg.sshKey}" - - # Notification settings - NOTIFICATION_URL=your-webhook-url - ''; - }; -} diff --git a/modules/nixos/default.nix b/modules/nixos/default.nix index af472eb..a0250d5 100644 --- a/modules/nixos/default.nix +++ b/modules/nixos/default.nix @@ -1,8 +1,3 @@ { ente = import ./ente.nix; - global-config = import ./global-config.nix; - backup-manager = import ./backup-manager.nix; - - # Service modules - services = import ./services; } diff --git a/modules/nixos/global-config.nix b/modules/nixos/global-config.nix deleted file mode 100644 index 3443eca..0000000 --- a/modules/nixos/global-config.nix +++ /dev/null @@ -1,462 +0,0 @@ -# modules/global-config.nix -{ - config, - lib, - outputs, - ... -}: -with lib; let - cfg = config.homelab.global; - - # Service type definition - serviceType = types.submodule { - options = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable this service"; - }; - - description = mkOption { - type = types.str; - description = "Human-readable description of the service"; - }; - - category = mkOption { - type = types.enum ["monitoring" "networking" "storage" "security" "media" "development" "backup" "other"]; - default = "other"; - description = "Service category for organization"; - }; - - dependencies = mkOption { - type = types.listOf types.str; - default = []; - description = "List of other homelab services this depends on"; - }; - - ports = mkOption { - type = types.listOf types.port; - default = []; - description = "Ports this service uses"; - }; - - tags = mkOption { - type = types.listOf types.str; - default = []; - description = "Additional tags for this service"; - }; - - priority = mkOption { - type = types.int; - default = 100; - description = "Service priority (lower numbers start first)"; - }; - }; - }; - - # Type definitions - monitoringEndpointType = types.submodule { - options = { - name = mkOption { - type = types.str; - description = "Name of the monitoring endpoint"; - }; - port = mkOption { - type = types.port; - description = "Port number for the endpoint"; - }; - path = mkOption { - type = types.str; - default = "/metrics"; - description = "Path for the metrics endpoint"; - }; - jobName = mkOption { - type = types.str; - description = "Prometheus job name"; - }; - scrapeInterval = mkOption { - type = types.str; - default = "30s"; - description = "Prometheus scrape interval"; - }; - labels = mkOption { - type = types.attrsOf types.str; - default = {}; - description = "Additional labels for this endpoint"; - }; - }; - }; - - backupJobType = types.submodule { - options = { - name = mkOption { - type = types.str; - description = "Name of the backup job"; - }; - backend = mkOption { - type = types.enum ["restic" "borg" "rclone"]; - description = "Backup backend to use"; - }; - paths = mkOption { - type = types.listOf types.str; - description = "List of paths to backup"; - }; - schedule = mkOption { - type = types.str; - default = "daily"; - description = "Backup schedule (cron format or preset)"; - }; - retention = mkOption { - type = types.attrsOf types.str; - default = { - daily = "7"; - weekly = "4"; - monthly = "6"; - yearly = "2"; - }; - description = "Retention policy"; - }; - excludePatterns = mkOption { - type = types.listOf types.str; - default = []; - description = "Patterns to exclude from backup"; - }; - preHook = mkOption { - type = types.nullOr types.str; - default = null; - description = "Script to run before backup"; - }; - postHook = mkOption { - type = types.nullOr types.str; - default = null; - description = "Script to run after backup"; - }; - }; - }; - - reverseProxyEntryType = types.submodule { - options = { - subdomain = mkOption { - type = types.str; - description = "Subdomain for the service"; - }; - port = mkOption { - type = types.port; - description = "Internal port to proxy to"; - }; - path = mkOption { - type = types.str; - default = "/"; - description = "Path prefix for the service"; - }; - enableAuth = mkOption { - type = types.bool; - default = false; - description = "Enable authentication for this service"; - }; - enableSSL = mkOption { - type = types.bool; - default = true; - description = "Enable SSL for this service"; - }; - customHeaders = mkOption { - type = types.attrsOf types.str; - default = {}; - description = "Custom headers to add"; - }; - websockets = mkOption { - type = types.bool; - default = false; - description = "Enable websocket support"; - }; - }; - }; - - # Helper functions for services - enabledServices = filterAttrs (name: service: service.enable) cfg.services; - servicesByCategory = category: filterAttrs (name: service: service.enable && service.category == category) cfg.services; -in { - imports = [ - ./motd - ]; - - options.homelab.global = { - enable = mkEnableOption "Global homelab configuration"; - - hostname = mkOption { - type = types.str; - description = "Hostname for this system"; - }; - - domain = mkOption { - type = types.str; - default = "procopius.dk"; - description = "Base domain for the homelab"; - }; - - environment = mkOption { - type = types.enum ["production" "staging" "development"]; - default = "production"; - description = "Environment type"; - }; - - location = mkOption { - type = types.str; - default = "homelab"; - description = "Physical location identifier"; - }; - - tags = mkOption { - type = types.listOf types.str; - default = []; - description = "Tags for this system"; - }; - - services = mkOption { - type = types.attrsOf serviceType; - default = {}; - description = "Homelab services configuration"; - example = literalExpression '' - { - prometheus = { - enable = true; - description = "Metrics collection and monitoring"; - category = "monitoring"; - ports = [ 9090 ]; - tags = [ "metrics" "alerting" ]; - }; - - traefik = { - enable = true; - description = "Reverse proxy and load balancer"; - category = "networking"; - ports = [ 80 443 8080 ]; - tags = [ "proxy" "loadbalancer" ]; - priority = 10; - }; - } - ''; - }; - - monitoring = { - endpoints = mkOption { - type = types.listOf monitoringEndpointType; - default = []; - description = "Monitoring endpoints exposed by this system"; - }; - - nodeExporter = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable node exporter"; - }; - port = mkOption { - type = types.port; - default = 9100; - description = "Node exporter port"; - }; - }; - }; - - backups = { - jobs = mkOption { - type = types.listOf backupJobType; - default = []; - description = "Backup jobs for this system"; - }; - - globalExcludes = mkOption { - type = types.listOf types.str; - default = [ - "*.tmp" - "*.cache" - "*/.git" - "*/node_modules" - "*/target" - ]; - description = "Global exclude patterns for all backup jobs"; - }; - }; - - reverseProxy = { - entries = mkOption { - type = types.listOf reverseProxyEntryType; - default = []; - description = "Reverse proxy entries for this system"; - }; - }; - - # Helper function to add monitoring endpoint - addMonitoringEndpoint = mkOption { - type = types.functionTo (types.functionTo types.anything); - default = name: endpoint: { - homelab.global.monitoring.endpoints = [ - (endpoint // {inherit name;}) - ]; - }; - description = "Helper function to add monitoring endpoints"; - }; - - # Helper function to add backup job - addBackupJob = mkOption { - type = types.functionTo (types.functionTo types.anything); - default = name: job: { - homelab.global.backups.jobs = [ - (job // {inherit name;}) - ]; - }; - description = "Helper function to add backup jobs"; - }; - - # Helper function to add reverse proxy entry - addReverseProxyEntry = mkOption { - type = types.functionTo (types.functionTo types.anything); - default = subdomain: entry: { - homelab.global.reverseProxy.entries = [ - (entry // {inherit subdomain;}) - ]; - }; - description = "Helper function to add reverse proxy entries"; - }; - - # Helper functions - enabledServicesList = mkOption { - type = types.listOf types.str; - default = attrNames enabledServices; - description = "List of enabled service names"; - readOnly = true; - }; - - servicesByPriority = mkOption { - type = types.listOf types.str; - default = - map (x: x.name) (sort (a: b: a.priority < b.priority) - (mapAttrsToList (name: service: service // {inherit name;}) enabledServices)); - description = "Services sorted by priority"; - readOnly = true; - }; - }; - - config = mkIf cfg.enable { - # Set hostname - networking.hostName = cfg.hostname; - - # Configure node exporter if enabled - services.prometheus.exporters.node = mkIf cfg.monitoring.nodeExporter.enable { - enable = true; - port = cfg.monitoring.nodeExporter.port; - enabledCollectors = [ - "systemd" - "textfile" - "filesystem" - "loadavg" - "meminfo" - "netdev" - "stat" - ]; - }; - - # Automatically add node exporter to monitoring endpoints - homelab.global.monitoring.endpoints = mkIf cfg.monitoring.nodeExporter.enable [ - { - name = "node-exporter"; - port = cfg.monitoring.nodeExporter.port; - path = "/metrics"; - jobName = "node"; - labels = { - instance = cfg.hostname; - environment = cfg.environment; - location = cfg.location; - }; - } - ]; - - # Export configuration for external consumption - environment.etc."homelab/config.json".text = builtins.toJSON { - inherit (cfg) hostname domain environment location tags; - - services = - mapAttrs (name: service: { - inherit (service) enable description category dependencies ports tags priority; - }) - cfg.services; - - enabledServices = enabledServices; - - servicesByCategory = { - monitoring = servicesByCategory "monitoring"; - networking = servicesByCategory "networking"; - storage = servicesByCategory "storage"; - security = servicesByCategory "security"; - media = servicesByCategory "media"; - development = servicesByCategory "development"; - backup = servicesByCategory "backup"; - other = servicesByCategory "other"; - }; - - monitoring = { - endpoints = - map (endpoint: { - name = endpoint.name; - url = "http://${cfg.hostname}:${toString endpoint.port}${endpoint.path}"; - port = endpoint.port; - path = endpoint.path; - jobName = endpoint.jobName; - scrapeInterval = endpoint.scrapeInterval; - labels = - endpoint.labels - // { - hostname = cfg.hostname; - environment = cfg.environment; - }; - }) - cfg.monitoring.endpoints; - }; - - backups = { - jobs = cfg.backups.jobs; - }; - - reverseProxy = { - entries = - map (entry: { - subdomain = entry.subdomain; - url = "http://${cfg.hostname}:${toString entry.port}"; - port = entry.port; - path = entry.path; - domain = "${entry.subdomain}.${cfg.domain}"; - enableAuth = entry.enableAuth; - enableSSL = entry.enableSSL; - customHeaders = entry.customHeaders; - websockets = entry.websockets; - }) - cfg.reverseProxy.entries; - }; - }; - - # Create a status command that shows service information - environment.systemPackages = [ - # (pkgs.writeScriptBin "homelab-services" '' - # #!/bin/bash - # echo "🏠 Homelab Services Status" - # echo "==========================" - # echo - - # ${concatStringsSep "\n" (mapAttrsToList (name: service: '' - # echo "${name}: ${service.description}" - # echo " Category: ${service.category}" - # echo " Status: $(systemctl is-active ${name} 2>/dev/null || echo "not found")" - # ${optionalString (service.ports != []) '' - # echo " Ports: ${concatStringsSep ", " (map toString service.ports)}" - # ''} - # ${optionalString (service.tags != []) '' - # echo " Tags: ${concatStringsSep ", " service.tags}" - # ''} - # echo - # '') - # enabledServices)} - # '') - ]; - }; -} diff --git a/modules/nixos/motd/default.nix b/modules/nixos/motd/default.nix deleted file mode 100644 index 3c56198..0000000 --- a/modules/nixos/motd/default.nix +++ /dev/null @@ -1,304 +0,0 @@ -# modules/motd/default.nix -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.homelab.motd; - globalCfg = config.homelab.global; - enabledServices = filterAttrs (name: service: service.enable) globalCfg.services; - - homelab-motd = pkgs.writeShellScriptBin "homelab-motd" '' - #! /usr/bin/env bash - source /etc/os-release - - # Colors for output - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[1;33m' - BLUE='\033[0;34m' - PURPLE='\033[0;35m' - CYAN='\033[0;36m' - WHITE='\033[1;37m' - NC='\033[0m' # No Color - BOLD='\033[1m' - - # Helper functions - print_header() { - echo -e "''${BOLD}''${BLUE}╔══════════════════════════════════════════════════════════════╗''${NC}" - echo -e "''${BOLD}''${BLUE}║''${NC}''${WHITE} 🏠 HOMELAB STATUS ''${NC}''${BOLD}''${BLUE}║''${NC}" - echo -e "''${BOLD}''${BLUE}╚══════════════════════════════════════════════════════════════╝''${NC}" - } - - print_section() { - echo -e "\n''${BOLD}''${CYAN}▶ $1''${NC}" - echo -e "''${CYAN}─────────────────────────────────────────────────────────────''${NC}" - } - - get_service_status() { - local service="$1" - if ${pkgs.systemd}/bin/systemctl is-active --quiet "$service" 2>/dev/null; then - echo -e "''${GREEN}●''${NC} Active" - elif ${pkgs.systemd}/bin/systemctl is-enabled --quiet "$service" 2>/dev/null; then - echo -e "''${YELLOW}●''${NC} Inactive" - else - echo -e "''${RED}●''${NC} Disabled" - fi - } - - get_timer_status() { - local timer="$1" - if ${pkgs.systemd}/bin/systemctl is-active --quiet "$timer" 2>/dev/null; then - local next_run=$(${pkgs.systemd}/bin/systemctl show "$timer" --property=NextElapseUSecRealtime --value 2>/dev/null || echo "0") - if [[ "$next_run" != "0" && "$next_run" != "n/a" ]]; then - local next_readable=$(${pkgs.systemd}/bin/systemctl list-timers --no-pager "$timer" 2>/dev/null | tail -n +2 | head -n 1 | awk '{print $1, $2}' || echo "Unknown") - echo -e "''${GREEN}●''${NC} Next: ''${next_readable}" - else - echo -e "''${GREEN}●''${NC} Active" - fi - else - echo -e "''${RED}●''${NC} Inactive" - fi - } - - # Main script - ${optionalString cfg.clearScreen "clear"} - print_header - - # Check if global config exists - CONFIG_FILE="/etc/homelab/config.json" - if [[ ! -f "$CONFIG_FILE" ]]; then - echo -e "''${RED}❌ Global homelab configuration not found at $CONFIG_FILE''${NC}" - exit 1 - fi - - # Parse global configuration - HOSTNAME=$(${pkgs.jq}/bin/jq -r '.hostname' "$CONFIG_FILE" 2>/dev/null || hostname) - DOMAIN=$(${pkgs.jq}/bin/jq -r '.domain' "$CONFIG_FILE" 2>/dev/null || echo "unknown") - ENVIRONMENT=$(${pkgs.jq}/bin/jq -r '.environment' "$CONFIG_FILE" 2>/dev/null || echo "unknown") - LOCATION=$(${pkgs.jq}/bin/jq -r '.location' "$CONFIG_FILE" 2>/dev/null || echo "unknown") - TAGS=$(${pkgs.jq}/bin/jq -r '.tags[]?' "$CONFIG_FILE" 2>/dev/null | tr '\n' ' ' || echo "none") - - print_section "SYSTEM INFO" - echo -e " ''${BOLD}Hostname:''${NC} $HOSTNAME" - echo -e " ''${BOLD}Domain:''${NC} $DOMAIN" - echo -e " ''${BOLD}Environment:''${NC} $ENVIRONMENT" - echo -e " ''${BOLD}Location:''${NC} $LOCATION" - echo -e " ''${BOLD}Tags:''${NC} ''${TAGS:-none}" - echo -e " ''${BOLD}Uptime:''${NC} $(${pkgs.procps}/bin/uptime -p)" - echo -e " ''${BOLD}Load:''${NC} $(${pkgs.procps}/bin/uptime | awk -F'load average:' '{print $2}' | xargs)" - - ${optionalString cfg.showServices '' - # Enabled services from homelab config - print_section "HOMELAB SERVICES" - ${concatStringsSep "\n" (mapAttrsToList (name: service: '' - status=$(get_service_status "${service.systemdService}") - printf " %-25s %s\n" "${name}" "$status" - '') - cfg.services)} - ''} - - ${optionalString cfg.showMonitoring '' - # Monitoring endpoints - print_section "MONITORING ENDPOINTS" - ENDPOINTS=$(${pkgs.jq}/bin/jq -c '.monitoring.endpoints[]?' "$CONFIG_FILE" 2>/dev/null || echo "") - if [[ -n "$ENDPOINTS" ]]; then - while IFS= read -r endpoint; do - name=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.name') - port=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.port') - path=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.path') - job=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.jobName') - - # Check if port is accessible - if ${pkgs.netcat}/bin/nc -z localhost "$port" 2>/dev/null; then - status="''${GREEN}●''${NC}" - else - status="''${RED}●''${NC}" - fi - - printf " %-20s %s %s:%s%s (job: %s)\n" "$name" "$status" "$HOSTNAME" "$port" "$path" "$job" - done <<< "$ENDPOINTS" - else - echo -e " ''${YELLOW}No monitoring endpoints configured''${NC}" - fi - ''} - - ${optionalString cfg.showBackups '' - # Backup jobs status - print_section "BACKUP JOBS" - BACKUP_JOBS=$(${pkgs.jq}/bin/jq -c '.backups.jobs[]?' "$CONFIG_FILE" 2>/dev/null || echo "") - if [[ -n "$BACKUP_JOBS" ]]; then - while IFS= read -r job; do - name=$(echo "$job" | ${pkgs.jq}/bin/jq -r '.name') - backend=$(echo "$job" | ${pkgs.jq}/bin/jq -r '.backend') - schedule=$(echo "$job" | ${pkgs.jq}/bin/jq -r '.schedule') - - service_name="backup-''${name}" - timer_name="''${service_name}.timer" - - timer_status=$(get_timer_status "$timer_name") - - # Get last backup info - last_run="Unknown" - if ${pkgs.systemd}/bin/systemctl show "$service_name" --property=ExecMainStartTimestamp --value 2>/dev/null | grep -q "^[^n]"; then - last_run=$(${pkgs.systemd}/bin/systemctl show "$service_name" --property=ExecMainStartTimestamp --value 2>/dev/null | head -1) - if [[ "$last_run" != "n/a" && -n "$last_run" ]]; then - last_run=$(${pkgs.coreutils}/bin/date -d "$last_run" "+%Y-%m-%d %H:%M" 2>/dev/null || echo "Unknown") - fi - fi - - printf " %-20s %s (%s, %s) Last: %s\n" "$name" "$timer_status" "$backend" "$schedule" "$last_run" - done <<< "$BACKUP_JOBS" - - # Show backup-status command output if available - if command -v backup-status >/dev/null 2>&1; then - echo -e "\n ''${BOLD}Quick Status:''${NC}" - backup-status 2>/dev/null | tail -n +3 | head -10 | sed 's/^/ /' - fi - else - echo -e " ''${YELLOW}No backup jobs configured''${NC}" - fi - ''} - - ${optionalString cfg.showReverseProxy '' - # Reverse proxy entries - print_section "REVERSE PROXY ENTRIES" - PROXY_ENTRIES=$(${pkgs.jq}/bin/jq -c '.reverseProxy.entries[]?' "$CONFIG_FILE" 2>/dev/null || echo "") - if [[ -n "$PROXY_ENTRIES" ]]; then - while IFS= read -r entry; do - subdomain=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.subdomain') - port=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.port') - domain=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.domain') - auth=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.enableAuth') - ssl=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.enableSSL') - - # Check if service is running on the port - if ${pkgs.netcat}/bin/nc -z localhost "$port" 2>/dev/null; then - status="''${GREEN}●''${NC}" - else - status="''${RED}●''${NC}" - fi - - auth_indicator="" - [[ "$auth" == "true" ]] && auth_indicator=" 🔐" - - ssl_indicator="" - [[ "$ssl" == "true" ]] && ssl_indicator=" 🔒" - - printf " %-25s %s :%s → %s%s%s\n" "''${domain}" "$status" "$port" "$domain" "$auth_indicator" "$ssl_indicator" - done <<< "$PROXY_ENTRIES" - else - echo -e " ''${YELLOW}No reverse proxy entries configured''${NC}" - fi - ''} - - ${optionalString cfg.showResources '' - # Resource usage - print_section "RESOURCE USAGE" - echo -e " ''${BOLD}Memory:''${NC} $(${pkgs.procps}/bin/free -h | awk '/^Mem:/ {printf "%s/%s (%.1f%%)", $3, $2, ($3/$2)*100}')" - echo -e " ''${BOLD}Disk (root):''${NC} $(${pkgs.coreutils}/bin/df -h / | awk 'NR==2 {printf "%s/%s (%s)", $3, $2, $5}')" - echo -e " ''${BOLD}CPU Usage:''${NC} $(${pkgs.procps}/bin/top -bn1 | grep "Cpu(s)" | awk '{printf "%.1f%%", $2+$4}' | sed 's/%us,//')%" - ''} - - ${optionalString cfg.showRecentIssues '' - # Recent logs (errors only) - print_section "RECENT ISSUES" - error_count=$(${pkgs.systemd}/bin/journalctl --since "24 hours ago" --priority=err --no-pager -q | wc -l) - if [[ "$error_count" -gt 0 ]]; then - echo -e " ''${RED}⚠ $error_count errors in last 24h''${NC}" - ${pkgs.systemd}/bin/journalctl --since "24 hours ago" --priority=err --no-pager -q | tail -3 | sed 's/^/ /' - else - echo -e " ''${GREEN}✓ No critical errors in last 24h''${NC}" - fi - ''} - - echo -e "\n''${BOLD}''${BLUE}╔══════════════════════════════════════════════════════════════╗''${NC}" - echo -e "''${BOLD}''${BLUE}║''${NC} ''${WHITE}Run 'backup-status' for detailed backup info ''${NC}''${BOLD}''${BLUE}║''${NC}" - echo -e "''${BOLD}''${BLUE}║''${NC} ''${WHITE}Config: /etc/homelab/config.json ''${NC}''${BOLD}''${BLUE}║''${NC}" - echo -e "''${BOLD}''${BLUE}╚══════════════════════════════════════════════════════════════╝''${NC}" - echo - ''; -in { - options.homelab.motd = { - enable = mkEnableOption "Dynamic homelab MOTD"; - - clearScreen = mkOption { - type = types.bool; - default = true; - description = "Clear screen before showing MOTD"; - }; - - showServices = mkOption { - type = types.bool; - default = true; - description = "Show enabled homelab services"; - }; - - showMonitoring = mkOption { - type = types.bool; - default = true; - description = "Show monitoring endpoints"; - }; - - showBackups = mkOption { - type = types.bool; - default = true; - description = "Show backup jobs status"; - }; - - showReverseProxy = mkOption { - type = types.bool; - default = true; - description = "Show reverse proxy entries"; - }; - - showResources = mkOption { - type = types.bool; - default = true; - description = "Show system resource usage"; - }; - - showRecentIssues = mkOption { - type = types.bool; - default = true; - description = "Show recent system issues"; - }; - - services = mkOption { - type = types.attrsOf (types.submodule { - options = { - systemdService = mkOption { - type = types.str; - description = "Name of the systemd service to monitor"; - }; - description = mkOption { - type = types.str; - default = ""; - description = "Human-readable description of the service"; - }; - }; - }); - default = {}; - description = "Homelab services to monitor in MOTD"; - }; - }; - - config = mkIf (cfg.enable && globalCfg.enable) { - # Register services with MOTD - homelab.motd.services = - mapAttrs (name: service: { - systemdService = name; - description = service.description; - }) - enabledServices; - - # Create a command to manually run the MOTD - environment.systemPackages = with pkgs; [ - jq - netcat - homelab-motd - ]; - }; -} diff --git a/modules/nixos/services/default.nix b/modules/nixos/services/default.nix deleted file mode 100644 index c5ac354..0000000 --- a/modules/nixos/services/default.nix +++ /dev/null @@ -1,4 +0,0 @@ -{ - jellyfin = import ./jellyfin.nix; - grafana = import ./grafana.nix; -} diff --git a/modules/nixos/services/forgejo-runner.nix b/modules/nixos/services/forgejo-runner.nix deleted file mode 100644 index e69de29..0000000 diff --git a/modules/nixos/services/forgejo.nix b/modules/nixos/services/forgejo.nix deleted file mode 100644 index 8b13789..0000000 --- a/modules/nixos/services/forgejo.nix +++ /dev/null @@ -1 +0,0 @@ - diff --git a/modules/nixos/services/grafana.nix b/modules/nixos/services/grafana.nix deleted file mode 100644 index f76edf7..0000000 --- a/modules/nixos/services/grafana.nix +++ /dev/null @@ -1,72 +0,0 @@ -# modules/services/grafana.nix -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.services.grafana; - helpers = import ../lib/helpers.nix {inherit lib;}; -in { - options.services.grafana = { - enable = mkEnableOption "Grafana monitoring dashboard"; - port = mkOption { - type = types.port; - default = 3000; - description = "Grafana web interface port"; - }; - adminPassword = mkOption { - type = types.str; - description = "Admin password for Grafana"; - }; - }; - - config = mkIf cfg.enable { - services.grafana = { - enable = true; - settings = { - server = { - http_port = cfg.port; - domain = "${config.homelab.global.hostname}.${config.homelab.global.domain}"; - }; - security = { - admin_password = cfg.adminPassword; - }; - }; - }; - - homelab.global = { - backups.jobs = [ - { - name = "grafana-data"; - backend = "restic"; - paths = ["/var/lib/grafana"]; - schedule = "daily"; - excludePatterns = ["*/plugins/*" "*/png/*"]; - } - ]; - - reverseProxy.entries = [ - { - subdomain = "grafana"; - port = cfg.port; - enableAuth = false; # Grafana handles its own auth - } - ]; - - monitoring.endpoints = [ - { - name = "grafana"; - port = cfg.port; - path = "/metrics"; - jobName = "grafana"; - labels = { - service = "grafana"; - type = "monitoring"; - }; - } - ]; - }; - }; -} diff --git a/modules/nixos/services/jellyfin.nix b/modules/nixos/services/jellyfin.nix deleted file mode 100644 index 1aac7e5..0000000 --- a/modules/nixos/services/jellyfin.nix +++ /dev/null @@ -1,125 +0,0 @@ -# modules/services/jellyfin.nix -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.services.jellyfin; -in { - options.services.jellyfin = { - enable = mkEnableOption "Jellyfin media server"; - - port = mkOption { - type = types.port; - default = 8096; - description = "Port for Jellyfin web interface"; - }; - - dataDir = mkOption { - type = types.str; - default = "/var/lib/jellyfin"; - description = "Directory to store Jellyfin data"; - }; - - mediaDir = mkOption { - type = types.str; - default = "/media"; - description = "Directory containing media files"; - }; - - enableMetrics = mkOption { - type = types.bool; - default = true; - description = "Enable Prometheus metrics"; - }; - - exposeWeb = mkOption { - type = types.bool; - default = true; - description = "Expose web interface through reverse proxy"; - }; - }; - - config = mkIf cfg.enable { - # Enable the service - services.jellyfin = { - enable = true; - dataDir = cfg.dataDir; - }; - - # Configure global settings - homelab.global = { - # Add backup job for Jellyfin data - backups.jobs = [ - { - name = "jellyfin-config"; - backend = "restic"; - paths = ["${cfg.dataDir}/config" "${cfg.dataDir}/data"]; - schedule = "0 2 * * *"; # Daily at 2 AM - excludePatterns = [ - "*/cache/*" - "*/transcodes/*" - "*/logs/*" - ]; - preHook = '' - # Stop jellyfin for consistent backup - systemctl stop jellyfin - ''; - postHook = '' - # Restart jellyfin after backup - systemctl start jellyfin - ''; - } - { - name = "jellyfin-media"; - backend = "restic"; - paths = [cfg.mediaDir]; - schedule = "0 3 * * 0"; # Weekly on Sunday at 3 AM - excludePatterns = [ - "*.tmp" - "*/.@__thumb/*" # Synology thumbnails - ]; - } - ]; - - # Add reverse proxy entry if enabled - reverseProxy.entries = mkIf cfg.exposeWeb [ - { - subdomain = "jellyfin"; - port = cfg.port; - enableAuth = false; # Jellyfin has its own auth - websockets = true; - customHeaders = { - "X-Forwarded-Proto" = "$scheme"; - "X-Forwarded-Host" = "$host"; - }; - } - ]; - - # Add monitoring endpoint if metrics enabled - monitoring.endpoints = mkIf cfg.enableMetrics [ - { - name = "jellyfin"; - port = cfg.port; - path = "/metrics"; # Assuming you have a metrics plugin - jobName = "jellyfin"; - scrapeInterval = "60s"; - labels = { - service = "jellyfin"; - type = "media-server"; - }; - } - ]; - }; - - # Open firewall - networking.firewall.allowedTCPPorts = [cfg.port]; - - # Create media directory - systemd.tmpfiles.rules = [ - "d ${cfg.mediaDir} 0755 jellyfin jellyfin -" - ]; - }; -} diff --git a/modules/nixos/services/postgres.nix b/modules/nixos/services/postgres.nix deleted file mode 100644 index e69de29..0000000 diff --git a/modules/nixos/services/prometheus.nix b/modules/nixos/services/prometheus.nix deleted file mode 100644 index 9485b3a..0000000 --- a/modules/nixos/services/prometheus.nix +++ /dev/null @@ -1,208 +0,0 @@ -# modules/services/prometheus.nix -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.homelab.services.prometheus; - globalCfg = config.homelab.global; -in { - options.homelab.services.prometheus = { - enable = mkEnableOption "Prometheus monitoring server"; - - port = mkOption { - type = types.port; - default = 9090; - description = "Prometheus server port"; - }; - - webExternalUrl = mkOption { - type = types.str; - default = "http://${globalCfg.hostname}:${toString cfg.port}"; - description = "External URL for Prometheus"; - }; - - retention = mkOption { - type = types.str; - default = "30d"; - description = "Data retention period"; - }; - - scrapeConfigs = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Additional scrape configurations"; - }; - - alertmanager = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable Alertmanager integration"; - }; - - url = mkOption { - type = types.str; - default = "http://localhost:9093"; - description = "Alertmanager URL"; - }; - }; - }; - - config = mkIf cfg.enable { - # Register service with global homelab config - homelab.global.services.prometheus = { - enable = true; - description = "Metrics collection and monitoring server"; - category = "monitoring"; - ports = [cfg.port]; - tags = ["metrics" "monitoring" "alerting"]; - priority = 20; - dependencies = ["node-exporter"]; - }; - - # Configure the actual Prometheus service - services.prometheus = { - enable = true; - port = cfg.port; - webExternalUrl = cfg.webExternalUrl; - - retentionTime = cfg.retention; - - scrapeConfigs = - [ - # Auto-discover monitoring endpoints from global config - { - job_name = "homelab-auto"; - static_configs = [ - { - targets = - map ( - endpoint: "${globalCfg.hostname}:${toString endpoint.port}" - ) - globalCfg.monitoring.endpoints; - } - ]; - scrape_interval = "30s"; - metrics_path = "/metrics"; - } - ] - ++ cfg.scrapeConfigs; - - # Alertmanager configuration - alertmanagers = mkIf cfg.alertmanager.enable [ - { - static_configs = [ - { - targets = [cfg.alertmanager.url]; - } - ]; - } - ]; - - rules = [ - # Basic homelab alerting rules - (pkgs.writeText "homelab-alerts.yml" '' - groups: - - name: homelab - rules: - - alert: ServiceDown - expr: up == 0 - for: 5m - labels: - severity: critical - annotations: - summary: "Service {{ $labels.instance }} is down" - description: "{{ $labels.job }} on {{ $labels.instance }} has been down for more than 5 minutes." - - - alert: HighMemoryUsage - expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9 - for: 10m - labels: - severity: warning - annotations: - summary: "High memory usage on {{ $labels.instance }}" - description: "Memory usage is above 90% on {{ $labels.instance }}" - - - alert: HighDiskUsage - expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.85 - for: 5m - labels: - severity: warning - annotations: - summary: "High disk usage on {{ $labels.instance }}" - description: "Disk usage is above 85% on {{ $labels.instance }} for filesystem {{ $labels.mountpoint }}" - '') - ]; - }; - - # Add monitoring endpoint to global config - homelab.global.monitoring.endpoints = [ - { - name = "prometheus"; - port = cfg.port; - path = "/metrics"; - jobName = "prometheus"; - scrapeInterval = "30s"; - labels = { - service = "prometheus"; - role = "monitoring"; - }; - } - ]; - - # Add reverse proxy entry if configured - homelab.global.reverseProxy.entries = mkIf (globalCfg.domain != null) [ - { - subdomain = "prometheus"; - port = cfg.port; - path = "/"; - enableAuth = true; - enableSSL = true; - customHeaders = { - "X-Frame-Options" = "DENY"; - "X-Content-Type-Options" = "nosniff"; - }; - } - ]; - - # Add backup job for Prometheus data - homelab.global.backups.jobs = [ - { - name = "prometheus-data"; - backend = "restic"; - paths = ["/var/lib/prometheus2"]; - schedule = "daily"; - retention = { - daily = "7"; - weekly = "4"; - monthly = "3"; - yearly = "1"; - }; - excludePatterns = [ - "*.tmp" - "*/wal/*" - ]; - preHook = '' - # Stop prometheus temporarily for consistent backup - systemctl stop prometheus - ''; - postHook = '' - # Restart prometheus after backup - systemctl start prometheus - ''; - } - ]; - - # Open firewall port - networking.firewall.allowedTCPPorts = [cfg.port]; - - # Create prometheus configuration directory - systemd.tmpfiles.rules = [ - "d /var/lib/prometheus2 0755 prometheus prometheus -" - "d /etc/prometheus 0755 root root -" - ]; - }; -} diff --git a/modules/nixos/system/backups/backrest.nix b/modules/nixos/system/backups/backrest.nix deleted file mode 100644 index e230402..0000000 --- a/modules/nixos/system/backups/backrest.nix +++ /dev/null @@ -1,4 +0,0 @@ -{ - # TODO - # https://github.com/L-Trump/nixos-configs/blob/ab3fb16e330b8a2904b9967e46af8c061b56266e/modules/nixos/server/backrest.nix#L7 -} diff --git a/modules/nixos/system/backups/backups-option.nix b/modules/nixos/system/backups/backups-option.nix deleted file mode 100644 index 137f73f..0000000 --- a/modules/nixos/system/backups/backups-option.nix +++ /dev/null @@ -1,95 +0,0 @@ -# backups-option.nix -cfg: let - inherit (cfg.lib) mkOption types mkEnableOption attrNames; -in - mkOption { - type = types.attrsOf ( - types.submodule ( - { - name, - config, - ... - } @ args: { - options = { - backend = mkOption { - type = types.enum (attrNames cfg.backends); - description = "The backup backend to use"; - }; - - paths = mkOption { - type = types.listOf types.str; - default = []; - description = "Paths to backup"; - }; - - enable = mkOption { - type = types.bool; - default = true; - description = "Whether to enable this backup job"; - }; - - timerConfig = mkOption { - type = with types; nullOr attrs; - default = null; - example = { - OnCalendar = "00:05"; - Persistent = true; - RandomizedDelaySec = "5h"; - }; - description = '' - When to run the backup. If null, inherits from backend's default timerConfig. - Set to null to disable automatic scheduling. - ''; - }; - - backendOptions = mkOption { - type = let - backupConfig = config; - backupName = name; - in - types.submodule ( - {config, ...} @ args'': - cfg.backends.${args.config.backend} (args'' // {inherit backupConfig backupName;}) - ); - default = {}; - description = "Backend-specific options"; - }; - - preBackupScript = mkOption { - type = types.lines; - default = ""; - description = "Script to run before backing up"; - }; - - postBackupScript = mkOption { - type = types.lines; - default = ""; - description = '' - Script to run after backing up. Runs even if the backup fails. - ''; - }; - - notifications = { - failure = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable failure notifications"; - }; - }; - - success = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable success notifications"; - }; - }; - }; - }; - } - ) - ); - default = {}; - description = "Backup job definitions"; - } diff --git a/modules/nixos/system/backups/default.nix b/modules/nixos/system/backups/default.nix deleted file mode 100644 index d29d46e..0000000 --- a/modules/nixos/system/backups/default.nix +++ /dev/null @@ -1,6 +0,0 @@ -{ - imports = [ - ./root.nix - ./restic.nix - ]; -} diff --git a/modules/nixos/system/backups/restic.nix b/modules/nixos/system/backups/restic.nix deleted file mode 100644 index 58bfb1b..0000000 --- a/modules/nixos/system/backups/restic.nix +++ /dev/null @@ -1,234 +0,0 @@ -# restic.nix - Restic backend implementation -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.system.backups; - resticCfg = cfg.restic; - - # Get only restic backups that are enabled - resticBackups = filterAttrs (_: backup: backup.backend == "restic" && backup.enable) cfg.backups; - - # Create restic service configurations - createResticServices = - mapAttrs ( - name: backup: let - # Merge global defaults with backup-specific options - serviceConfig = - recursiveUpdate resticCfg.defaultBackendOptions backup.backendOptions - // { - inherit (backup) paths; - - # Use backup-specific timer or fall back to global default - timerConfig = - if backup.timerConfig != null - then backup.timerConfig - else resticCfg.timerConfig; - }; - in - serviceConfig - ) - resticBackups; -in { - options.system.backups.restic = { - enable = mkEnableOption "restic backup backend"; - - timerConfig = mkOption { - type = types.attrs; - default = { - OnCalendar = "*-*-* 05:00:00"; - Persistent = true; - }; - description = "Default systemd timer configuration for restic backups"; - }; - - defaultBackendOptions = mkOption { - type = types.attrs; - default = {}; - example = { - repository = "/backup/restic"; - passwordFile = "/etc/nixos/secrets/restic-password"; - initialize = true; - pruneOpts = [ - "--keep-daily 7" - "--keep-weekly 5" - "--keep-monthly 12" - "--keep-yearly 75" - ]; - }; - description = "Default backend options applied to all restic backup jobs"; - }; - - # Advanced options - runMaintenance = mkOption { - type = types.bool; - default = true; - description = "Whether to run repository maintenance after backups"; - }; - - maintenanceTimer = mkOption { - type = types.attrs; - default = { - OnCalendar = "*-*-* 06:00:00"; - Persistent = true; - }; - description = "Timer configuration for maintenance tasks"; - }; - - pruneOpts = mkOption { - type = types.listOf types.str; - default = [ - "--keep-daily 7" - "--keep-weekly 4" - "--keep-monthly 6" - "--keep-yearly 3" - ]; - description = "Default pruning options for maintenance"; - }; - }; - - config = mkIf resticCfg.enable { - # Register restic backend - system.backups.backends.restic = { - backupConfig, - backupName, - ... - }: { - # Define the proper options schema for restic backendOptions - options = { - repository = mkOption { - type = types.str; - description = "Restic repository path or URL"; - }; - - passwordFile = mkOption { - type = types.str; - description = "Path to file containing the repository password"; - }; - - initialize = mkOption { - type = types.bool; - default = true; - description = "Whether to initialize the repository if it doesn't exist"; - }; - - exclude = mkOption { - type = types.listOf types.str; - default = []; - description = "Patterns to exclude from backup"; - }; - - extraBackupArgs = mkOption { - type = types.listOf types.str; - default = []; - description = "Additional arguments passed to restic backup command"; - }; - - user = mkOption { - type = types.str; - default = "root"; - description = "User to run the backup as"; - }; - - pruneOpts = mkOption { - type = types.listOf types.str; - default = resticCfg.pruneOpts; - description = "Pruning options for this backup"; - }; - }; - - # Default config merged with global defaults - config = { - extraBackupArgs = - [ - "--tag ${backupName}" - "--verbose" - ] - ++ (resticCfg.defaultBackendOptions.extraBackupArgs or []); - }; - }; - - # Create actual restic backup services - services.restic.backups = createResticServices; - - # Add restic package - environment.systemPackages = [pkgs.restic]; - - # Systemd service customizations for restic backups - systemd.services = - (mapAttrs' ( - name: backup: - nameValuePair "restic-backups-${name}" { - # Custom pre/post scripts - preStart = mkBefore backup.preBackupScript; - postStop = mkAfter backup.postBackupScript; - - # Enhanced service configuration - serviceConfig = { - # Restart configuration - Restart = "on-failure"; - RestartSec = "5m"; - RestartMaxDelaySec = "30m"; - RestartSteps = 3; - - # Rate limiting - StartLimitBurst = 4; - StartLimitIntervalSec = "2h"; - }; - - # Failure handling could be extended here for notifications - # onFailure = optional backup.notifications.failure.enable "restic-backup-${name}-failure-notify.service"; - } - ) - resticBackups) - // optionalAttrs resticCfg.runMaintenance { - # Repository maintenance service - restic-maintenance = { - description = "Restic repository maintenance"; - after = map (name: "restic-backups-${name}.service") (attrNames resticBackups); - - environment = - resticCfg.defaultBackendOptions - // { - RESTIC_CACHE_DIR = "/var/cache/restic-maintenance"; - }; - - serviceConfig = { - Type = "oneshot"; - ExecStart = [ - "${pkgs.restic}/bin/restic forget --prune ${concatStringsSep " " resticCfg.pruneOpts}" - "${pkgs.restic}/bin/restic check --read-data-subset=500M" - ]; - - User = "root"; - CacheDirectory = "restic-maintenance"; - CacheDirectoryMode = "0700"; - }; - }; - }; - - # Maintenance timer - systemd.timers = mkIf resticCfg.runMaintenance { - restic-maintenance = { - description = "Timer for restic repository maintenance"; - wantedBy = ["timers.target"]; - timerConfig = resticCfg.maintenanceTimer; - }; - }; - - # Helpful shell aliases - programs.zsh.shellAliases = - { - restic-snapshots = "restic snapshots --compact --group-by tags"; - restic-repo-size = "restic stats --mode raw-data"; - } - // (mapAttrs' ( - name: _: - nameValuePair "backup-${name}" "systemctl start restic-backups-${name}" - ) - resticBackups); - }; -} diff --git a/modules/nixos/system/backups/root.nix b/modules/nixos/system/backups/root.nix deleted file mode 100644 index 5656f72..0000000 --- a/modules/nixos/system/backups/root.nix +++ /dev/null @@ -1,66 +0,0 @@ -# root.nix - Main backup system module -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.system.backups; - - # Filter backups by backend - getBackupsByBackend = backend: - filterAttrs (_: backup: backup.backend == backend && backup.enable) cfg.backups; -in { - options.system.backups = { - # Backend registration system - backends register themselves here - backends = mkOption { - type = with types; attrsOf (functionTo attrs); - internal = true; - default = {}; - description = '' - Attribute set of backends where the value is a function that accepts - backend-specific arguments and returns an attribute set for the backend's options. - ''; - }; - - # Import the backups option from separate file, passing cfg for backend inference - backups = import ./backups-option.nix cfg; - - # Pass lib to the backups-option for access to mkOption, types, etc. - lib = mkOption { - type = types.attrs; - internal = true; - default = lib; - }; - }; - - config = { - # Re-export backups at root level for convenience - # backups = cfg.backups; - - # Common backup packages - environment.systemPackages = with pkgs; [ - # Add common backup utilities here - ]; - - # Common systemd service modifications for all backup services - systemd.services = let - allBackupServices = flatten ( - mapAttrsToList ( - backendName: backups: - mapAttrsToList (name: backup: "${backendName}-backups-${name}") backups - ) (genAttrs (attrNames cfg.backends) (backend: getBackupsByBackend backend)) - ); - in - genAttrs allBackupServices (serviceName: { - serviceConfig = { - # Common hardening for all backup services - ProtectSystem = "strict"; - ProtectHome = "read-only"; - PrivateTmp = true; - NoNewPrivileges = true; - }; - }); - }; -} diff --git a/proxmox-infra/.gitignore b/proxmox-infra/.gitignore deleted file mode 100644 index 9b8ce00..0000000 --- a/proxmox-infra/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -# proxmox-infra/.gitignore -.terraform/ - -*.tfstate -.tfstate. -crash.log -*.tfvars diff --git a/proxmox-infra/.terraform.lock.hcl b/proxmox-infra/.terraform.lock.hcl deleted file mode 100644 index 978a610..0000000 --- a/proxmox-infra/.terraform.lock.hcl +++ /dev/null @@ -1,24 +0,0 @@ -# This file is maintained automatically by "tofu init". -# Manual edits may be lost in future updates. - -provider "registry.opentofu.org/telmate/proxmox" { - version = "3.0.2-rc01" - constraints = "3.0.2-rc01" - hashes = [ - "h1:571ROPuTMC0w5lr9hbUXi7NVLsG3SpmZxXXZx8cAT+Q=", - "zh:34d264243a4513f4e30c01fb37cc6a3e592d7823dfd182c5edfb170ac7b7de3a", - "zh:544428311ad20fbb3ad2cd854e893bbf036023cb57c3acc5093d141976dac670", - "zh:5c2396b328edee8de7ac144c15a6b7e668e81063699bc8c110d7c39fb8da70e9", - "zh:5ca8e33476ad06a0259071120a59477e8f107f30c1178ea7b9f6cafe1a461ade", - "zh:5ea56eb8275edc754a01a0180750e9c939cd997d3a50659617770211f4337da9", - "zh:9dd3482df6bbe00a4a6152be3567b6c08d35c3644a327a1f5ac30fd95ccd449f", - "zh:a76075fafadcc94a825151aff169bae4e0c05e3c7717e16dcdcf16ffa61a0780", - "zh:b1d95f97b22f671db762f7adf428b409e6736c078bcf267d8391985b8847d6e3", - "zh:cc94255cd1b18e6a341c15089015c457c8c639c25c426b07f278d5ea9850b3b5", - "zh:ce991103cb69b0b3e275127e3ab92c88bb3b6b0f4e5a2cb082aeaef70a7f7d61", - "zh:d24838bce87b38e12544a1329f5ad30e2be045968e639a3f4ddd5c84aa648e04", - "zh:e106ebd4eea8d62d62e62f261a262febc615e17466b54ac18f7e65c7e79e0008", - "zh:e254ca76c95e6e92da973b7bddc36bfa0a1e31d7c7e758ef4b01315db969388b", - "zh:f1d1d5f4c39267cacebe0ab7e9e06caf9692707f3b5369685541b65bc8b840ce", - ] -} diff --git a/proxmox-infra/main.tf b/proxmox-infra/main.tf deleted file mode 100644 index e9ef2ce..0000000 --- a/proxmox-infra/main.tf +++ /dev/null @@ -1,52 +0,0 @@ -# # This calls the module to define a new VM (e.g., if you were creating one) -# resource "proxmox_vm_qemu" "sandbox" { -# name = "sandbox" -# desc = "OpenTofu testing" -# target_nodes = [var.proxmox_node] -# vmid= 100 -# full_clone = true -# clone_id = 9100 -# agent = 1 -# scsihw = "virtio-scsi-single" -# ciuser = "root" -# ipconfig0 = "ip=dhcp" -# cpu { -# cores = 2 -# } -# memory = 2048 -# disks { -# virtio { -# virtio0 { -# disk { -# size = "9452M" -# storage = "local-lvm" -# } -# } -# } -# ide { -# ide2 { -# cloudinit { -# storage = "local-lvm" -# } -# } -# } -# } -# network { -# id = 0 -# bridge = "vmbr0" -# model = "virtio" -# } -# serial { -# id = 0 -# } -# } - -# output "sandbox_vmid" { -# description = "sandbox VM ID" -# value = proxmox_vm_qemu.sandbox.id -# } - -# output "sandbox_ipv4" { -# description = "sandbox public IPv4 address" -# value = proxmox_vm_qemu.sandbox.default_ipv4_address -# } diff --git a/proxmox-infra/outputs.tf b/proxmox-infra/outputs.tf deleted file mode 100644 index e69de29..0000000 diff --git a/proxmox-infra/provider.tf b/proxmox-infra/provider.tf deleted file mode 100644 index 0d04a2a..0000000 --- a/proxmox-infra/provider.tf +++ /dev/null @@ -1,9 +0,0 @@ -provider "proxmox" { - pm_tls_insecure = true - pm_api_url = var.proxmox_api_url - pm_user = var.proxmox_user - pm_password = var.proxmox_password - # Or use API token for better security: - # pm_api_token_id = var.proxmox_api_token_id - # pm_api_token_secret = var.proxmox_api_token_secret -} diff --git a/proxmox-infra/sandbox.tf b/proxmox-infra/sandbox.tf deleted file mode 100644 index 7932732..0000000 --- a/proxmox-infra/sandbox.tf +++ /dev/null @@ -1,106 +0,0 @@ -# proxmox_vm_qemu.sandbox: -resource "proxmox_vm_qemu" "sandbox" { - agent = 1 - bios = "seabios" - boot = " " - ciuser = "root" - cores = 0 - current_node = "proxmox-01" - define_connection_info = false - desc = " generated by NixOS" - force_create = false - full_clone = false - hotplug = "network,disk,usb" - id = "proxmox-01/qemu/100" - ipconfig0 = "ip=dhcp" - kvm = true - linked_vmid = 0 - memory = 2048 - name = "sandbox" - numa = false - onboot = true - protection = false - qemu_os = "l26" - reboot_required = false - scsihw = "virtio-scsi-single" - sockets = 0 - sshkeys = <<-EOT - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCljEOf8Lv7Ptgsc1+CYzXpnrctPy7LFXXOyVZTI9uN7R4HY5aEdZTKEGSsU/+p+JtXWzzI65fnrZU8pTMG/wvCK+gYyNZcEM4g/TXMVa+CWZR3y13zGky88R7dKiBl5L00U4BePDD1ci3EU3/Mjr/GVTQHtkbJfLtvhR9zkCNZzxbu+rySWDroUPWPvE3y60/iLjBsh5ZmHo59CW67lh1jgbAlZjKWZzLWo0Bc5wgbxoQPWcO4BCh17N4g8llrRxGOwJzHeaipBnXn9J1AGIm9Zls6pxT9j6MKltcCOb7tQZwc3hlPOW2ku6f7OHTrziKw37drIDM0UDublAOcnIfBjE+XuWsp5t6ojdIzIDMrzaYW2MyMA3PHuf7VESUQdP4TZ1XUwtRRzOjn5AZJi9DPoowPaxKL92apRpFG+ovaFpWZsG7s8NWXHAC79IpgMUzscEmM15OMQ36RQ5xeytGDVCmVT8DbHGrMT9HUfR5fBSWD3aDQiOOiIIhrbY35m+U65Sz/GpZMk6HlaiV3tKNB0m+xE+84MUEmm4fFzt3B/0N4kscMArnLAm/OMUblihPwbKAUAUWErGRBfP+u+zjRCi1D9/pffpl2OQ2QIuVM82g6/EPa1ZsXZP+4iHooQoJbrqVGzkfiA1EKLfcdGfkP/O4nRl+D5UgkGdqqvm20NQ== root@proxmox-01 - EOT - tablet = true - target_nodes = [ - "proxmox-01", - ] - unused_disk = [] - vcpus = 0 - vm_state = "running" - vmid = 100 - - cpu { - cores = 2 - limit = 0 - numa = false - sockets = 1 - type = "host" - units = 0 - vcores = 0 - } - - disks { - ide { - ide2 { - cloudinit { - storage = "local-lvm" - } - } - } - virtio { - virtio0 { - disk { - backup = true - discard = false - format = "raw" - id = 0 - iops_r_burst = 0 - iops_r_burst_length = 0 - iops_r_concurrent = 0 - iops_wr_burst = 0 - iops_wr_burst_length = 0 - iops_wr_concurrent = 0 - iothread = false - linked_disk_id = -1 - mbps_r_burst = 0 - mbps_r_concurrent = 0 - mbps_wr_burst = 0 - mbps_wr_concurrent = 0 - readonly = false - replicate = true - size = "9452M" - storage = "local-lvm" - } - } - } - } - - network { - bridge = "vmbr0" - firewall = true - id = 0 - link_down = false - macaddr = "bc:24:11:a7:e8:2a" - model = "virtio" - mtu = 0 - queues = 0 - rate = 0 - tag = 0 - } - - serial { - id = 0 - type = "socket" - } - - smbios { - uuid = "37cd09d5-29a5-42e2-baba-f21b691130e8" - } -} diff --git a/proxmox-infra/terraform.tfstate.backup b/proxmox-infra/terraform.tfstate.backup deleted file mode 100644 index 8a8181b..0000000 --- a/proxmox-infra/terraform.tfstate.backup +++ /dev/null @@ -1 +0,0 @@ -{"version":4,"terraform_version":"1.9.1","serial":2,"lineage":"ecd6c5f8-5352-bf30-6117-d55763366399","outputs":{"sandbox_ipv4":{"value":"192.168.1.206","type":"string"},"sandbox_vmid":{"value":"proxmox-01/qemu/999","type":"string"}},"resources":[{"mode":"managed","type":"proxmox_vm_qemu","name":"sandbox","provider":"provider[\"registry.opentofu.org/telmate/proxmox\"]","instances":[{"schema_version":0,"attributes":{"additional_wait":5,"agent":1,"agent_timeout":90,"args":"","automatic_reboot":true,"balloon":0,"bios":"seabios","boot":" ","bootdisk":"","ci_wait":null,"cicustom":null,"cipassword":"","ciupgrade":false,"ciuser":"root","clone":null,"clone_id":9100,"clone_wait":10,"cores":0,"cpu":[{"affinity":"","cores":2,"flags":[],"limit":0,"numa":false,"sockets":1,"type":"host","units":0,"vcores":0}],"cpu_type":"","current_node":"proxmox-01","default_ipv4_address":"192.168.1.206","default_ipv6_address":"2a05:f6c7:2030:0:be24:11ff:feb9:919f","define_connection_info":true,"desc":"OpenTofu testing","disk":[],"disks":[{"ide":[{"ide0":[],"ide1":[],"ide2":[{"cdrom":[],"cloudinit":[{"storage":"local-lvm"}],"disk":[],"ignore":false,"passthrough":[]}],"ide3":[]}],"sata":[],"scsi":[],"virtio":[{"virtio0":[{"cdrom":[],"disk":[{"asyncio":"","backup":true,"cache":"","discard":false,"format":"raw","id":0,"iops_r_burst":0,"iops_r_burst_length":0,"iops_r_concurrent":0,"iops_wr_burst":0,"iops_wr_burst_length":0,"iops_wr_concurrent":0,"iothread":false,"linked_disk_id":-1,"mbps_r_burst":0,"mbps_r_concurrent":0,"mbps_wr_burst":0,"mbps_wr_concurrent":0,"readonly":false,"replicate":false,"serial":"","size":"9452M","storage":"local-lvm","wwn":""}],"ignore":false,"passthrough":[]}],"virtio1":[],"virtio10":[],"virtio11":[],"virtio12":[],"virtio13":[],"virtio14":[],"virtio15":[],"virtio2":[],"virtio3":[],"virtio4":[],"virtio5":[],"virtio6":[],"virtio7":[],"virtio8":[],"virtio9":[]}]}],"efidisk":[],"force_create":false,"force_recreate_on_change_of":null,"full_clone":true,"hagroup":"","hastate":"","hostpci":[],"hotplug":"network,disk,usb","id":"proxmox-01/qemu/999","ipconfig0":"ip=dhcp","ipconfig1":null,"ipconfig10":null,"ipconfig11":null,"ipconfig12":null,"ipconfig13":null,"ipconfig14":null,"ipconfig15":null,"ipconfig2":null,"ipconfig3":null,"ipconfig4":null,"ipconfig5":null,"ipconfig6":null,"ipconfig7":null,"ipconfig8":null,"ipconfig9":null,"kvm":true,"linked_vmid":0,"machine":"","memory":2048,"name":"sandbox2","nameserver":null,"network":[{"bridge":"vmbr0","firewall":false,"id":0,"link_down":false,"macaddr":"bc:24:11:b9:91:9f","model":"virtio","mtu":0,"queues":0,"rate":0,"tag":0}],"numa":false,"onboot":false,"os_network_config":null,"os_type":null,"pci":[],"pcis":[],"pool":"","protection":false,"pxe":null,"qemu_os":"l26","reboot_required":false,"scsihw":"virtio-scsi-single","searchdomain":null,"serial":[{"id":0,"type":"socket"}],"skip_ipv4":false,"skip_ipv6":false,"smbios":[{"family":"","manufacturer":"","product":"","serial":"","sku":"","uuid":"51a93ec4-4afa-428b-911a-daab70390a8c","version":""}],"sockets":0,"ssh_forward_ip":null,"ssh_host":"192.168.1.206","ssh_port":"22","ssh_private_key":null,"ssh_user":null,"sshkeys":null,"startup":"","tablet":true,"tags":"v0.0.2","target_node":null,"target_nodes":["proxmox-01"],"timeouts":null,"tpm_state":[],"unused_disk":[],"usb":[],"usbs":[],"vcpus":0,"vga":[],"vm_state":"running","vmid":999},"sensitive_attributes":[[{"type":"get_attr","value":"cipassword"}],[{"type":"get_attr","value":"ssh_private_key"}]],"private":"eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjoxMjAwMDAwMDAwMDAwLCJkZWZhdWx0IjoxMjAwMDAwMDAwMDAwLCJkZWxldGUiOjEyMDAwMDAwMDAwMDAsInJlYWQiOjEyMDAwMDAwMDAwMDAsInVwZGF0ZSI6MTIwMDAwMDAwMDAwMH19"}]}],"check_results":null} diff --git a/proxmox-infra/variables.tf b/proxmox-infra/variables.tf deleted file mode 100644 index 71653f0..0000000 --- a/proxmox-infra/variables.tf +++ /dev/null @@ -1,30 +0,0 @@ -# proxmox-infra/variables.tf - -variable "proxmox_api_url" { - description = "The URL of the Proxmox API (e.g., https://192.168.1.10:8006/api2/json)" - type = string - # No default here, so OpenTofu will prompt or expect a .tfvars file/env var -} - -variable "proxmox_user" { - description = "Proxmox user (e.g., root@pam or user@pve)" - type = string -} - -variable "proxmox_password" { - description = "Proxmox user password" - type = string - sensitive = true # Mark as sensitive to hide in logs -} - -variable "proxmox_node" { - description = "The Proxmox node name where VMs will be deployed (e.g., 'pve')" - type = string -} - -# Example for templates - you might have different templates -variable "nixos_template_id" { - description = "VMID of the nixos cloud-init template" - type = number - # Example: default = 100 -} diff --git a/proxmox-infra/versions.tf b/proxmox-infra/versions.tf deleted file mode 100644 index 3ca35cc..0000000 --- a/proxmox-infra/versions.tf +++ /dev/null @@ -1,9 +0,0 @@ -# versions.tf -terraform { - required_providers { - proxmox = { - source = "Telmate/proxmox" - version = "3.0.2-rc01" - } - } -} diff --git a/users/default.nix b/users/default.nix new file mode 100644 index 0000000..b4edc6d --- /dev/null +++ b/users/default.nix @@ -0,0 +1,3 @@ +{ + defaultUser = import ./plasmagoat.nix; +} diff --git a/users/plasmagoat.nix b/users/plasmagoat.nix index 9e5a96b..037b202 100644 --- a/users/plasmagoat.nix +++ b/users/plasmagoat.nix @@ -1,4 +1,3 @@ -# users/plasmagoat.nix - Your user configuration { config, lib, From a955528e449efec30deab9af6d530f96e5ed418c Mon Sep 17 00:00:00 2001 From: plasmagoat Date: Tue, 29 Jul 2025 02:18:19 +0200 Subject: [PATCH 2/3] another refactor partly done --- hosts/sandbox/default.nix | 16 +- modules/homelab/backup-config.nix | 116 --- modules/homelab/default.nix | 138 ++- modules/homelab/lib/aggregators/base.nix | 55 ++ modules/homelab/lib/features/logging.nix | 87 ++ modules/homelab/lib/features/monitoring.nix | 108 +++ modules/homelab/lib/features/proxy.nix | 64 ++ modules/homelab/lib/node-aggregation.nix | 226 ----- modules/homelab/lib/service-interface.nix | 295 ------ modules/homelab/lib/systems/backups.nix | 163 ++++ modules/homelab/lib/systems/logging.nix | 209 ++++ modules/homelab/lib/systems/monitoring.nix | 222 +++++ modules/homelab/lib/systems/proxy.nix | 98 ++ modules/homelab/monitoring-config.nix | 214 ----- modules/homelab/proxy-config.nix | 53 -- modules/homelab/services/default.nix | 6 +- modules/homelab/services/example-service.nix | 161 ---- modules/homelab/services/gatus.nix | 267 ++++++ modules/homelab/services/grafana.nix | 86 ++ modules/homelab/services/jellyfin.nix | 125 --- modules/homelab/services/monitoring/gatus.nix | 269 +++--- .../homelab/services/monitoring/grafana.nix | 900 ++++++++++++------ .../homelab/services/monitoring/grafana_1.nix | 198 ++++ .../services/monitoring/grafana_gg.nix | 416 ++++++++ .../homelab/services/monitoring/influxdb.nix | 399 ++++++++ modules/homelab/services/monitoring/loki.nix | 356 +++++++ .../services/monitoring/prometheus.nix | 13 +- .../homelab/services/monitoring/promtail.nix | 0 modules/homelab/services/postgres.nix | 0 modules/homelab/services/prometheus.nix | 252 +++++ modules/homelab/services/prometheus_old.nix | 208 ---- 31 files changed, 3790 insertions(+), 1930 deletions(-) delete mode 100644 modules/homelab/backup-config.nix create mode 100644 modules/homelab/lib/aggregators/base.nix create mode 100644 modules/homelab/lib/features/logging.nix create mode 100644 modules/homelab/lib/features/monitoring.nix create mode 100644 modules/homelab/lib/features/proxy.nix delete mode 100644 modules/homelab/lib/node-aggregation.nix delete mode 100644 modules/homelab/lib/service-interface.nix create mode 100644 modules/homelab/lib/systems/backups.nix create mode 100644 modules/homelab/lib/systems/logging.nix create mode 100644 modules/homelab/lib/systems/monitoring.nix create mode 100644 modules/homelab/lib/systems/proxy.nix delete mode 100644 modules/homelab/monitoring-config.nix delete mode 100644 modules/homelab/proxy-config.nix delete mode 100644 modules/homelab/services/example-service.nix create mode 100644 modules/homelab/services/gatus.nix create mode 100644 modules/homelab/services/grafana.nix delete mode 100644 modules/homelab/services/jellyfin.nix create mode 100644 modules/homelab/services/monitoring/grafana_1.nix create mode 100644 modules/homelab/services/monitoring/grafana_gg.nix delete mode 100644 modules/homelab/services/monitoring/promtail.nix delete mode 100644 modules/homelab/services/postgres.nix create mode 100644 modules/homelab/services/prometheus.nix delete mode 100644 modules/homelab/services/prometheus_old.nix diff --git a/hosts/sandbox/default.nix b/hosts/sandbox/default.nix index 2782e30..ebf4475 100644 --- a/hosts/sandbox/default.nix +++ b/hosts/sandbox/default.nix @@ -11,6 +11,7 @@ tags = [name]; monitoring.enable = true; + logging.enable = true; motd.enable = true; backups = { @@ -40,17 +41,10 @@ ]; }; - services.prometheus = { - enable = true; - }; - - services.gatus = { - enable = true; - ui = { - title = "Homelab Status Dashboard"; - header = "My Homelab Services"; - }; - }; + # services.loki.enable = true; + services.prometheus.enable = true; + services.grafana.enable = true; + services.gatus.enable = true; }; system.stateVersion = "25.05"; diff --git a/modules/homelab/backup-config.nix b/modules/homelab/backup-config.nix deleted file mode 100644 index e26dcb2..0000000 --- a/modules/homelab/backup-config.nix +++ /dev/null @@ -1,116 +0,0 @@ -{ - config, - lib, - ... -}: -with lib; let - cfg = config.homelab.backups; - homelabCfg = config.homelab; - - # Get all defined backend names dynamically - backendNames = attrNames cfg.backends or {}; - - backupJobType = types.submodule { - options = { - name = mkOption { - type = types.str; - description = "Name of the backup job"; - }; - backend = mkOption { - type = types.enum backendNames; - description = "Backend to use for this backup job"; - }; - backendOptions = mkOption { - type = types.attrs; - default = {}; - description = "Backend-specific options to override or extend the backend configuration"; - }; - labels = mkOption { - type = types.attrsOf types.str; - default = {}; - description = "Additional labels for this backup job"; - }; - }; - }; -in { - imports = [ - ./backup/restic.nix - # ./backup/borgbackup.nix - ]; - - options.homelab.backups = { - enable = mkEnableOption "Homelab backup system"; - - jobs = mkOption { - type = types.listOf backupJobType; - default = []; - description = "Backup jobs to execute on this system"; - }; - - defaultLabels = mkOption { - type = types.attrsOf types.str; - default = { - hostname = homelabCfg.hostname; - environment = homelabCfg.environment; - location = homelabCfg.location; - }; - description = "Default labels applied to all backup jobs"; - }; - - monitoring = mkOption { - type = types.bool; - default = true; - description = "Enable backup monitoring and metrics"; - }; - }; - - config = mkIf cfg.enable { - # Validate that all job backends exist - assertions = [ - { - assertion = all (job: cfg.backends.${job.backend} != null) cfg.jobs; - message = "All backup jobs must reference backends that are defined and not null in homelab.backups.backends"; - } - ]; - - # Add backup jobs to monitoring endpoints if monitoring is enabled - # homelab.monitoring.endpoints = - # mkIf (cfg.monitoring && config.homelab.monitoring.enable) - # (map (job: { - # name = "backup-${job.name}"; - # port = 9100; # Assuming node exporter collects backup metrics - # path = "/metrics"; - # jobName = "backup"; - # labels = - # cfg.defaultLabels - # // job.labels - # // { - # backup_job = job.name; - # backup_backend = job.backend; - # }; - # }) - # cfg.jobs); - - # Export backup configuration for external consumption - environment.etc."homelab/backup-config.json".text = builtins.toJSON { - backends = - mapAttrs (name: config: { - inherit name; - enabled = config.enable or false; - }) - cfg.backends; - - jobs = - map (job: { - inherit (job) name backend labels; - allLabels = cfg.defaultLabels // job.labels; - paths = job.backendOptions.paths or []; - schedule = job.backendOptions.timerConfig.OnCalendar or job.backendOptions.startAt or "unknown"; - node = homelabCfg.hostname; - environment = homelabCfg.environment; - location = homelabCfg.location; - }) - cfg.jobs; - }; - }; -} diff --git a/modules/homelab/default.nix b/modules/homelab/default.nix index e56aae9..ce19d59 100644 --- a/modules/homelab/default.nix +++ b/modules/homelab/default.nix @@ -1,6 +1,7 @@ { config, lib, + nodes, ... }: with lib; let @@ -9,18 +10,13 @@ with lib; let nodeAgg = import ./lib/node-aggregation.nix {inherit lib;}; in { imports = [ - ./monitoring-config.nix - ./proxy-config.nix - ./backup-config.nix - ./motd + ./lib/systems/monitoring.nix + ./lib/systems/logging.nix + ./lib/systems/proxy.nix + ./lib/systems/backups.nix ./services - - # Global aggregation modules - (nodeAgg.mkGlobalModule "monitoring" nodeAgg.aggregators.monitoring) - # (nodeAgg.mkGlobalModule "logs" nodeAgg.aggregators.logs) - (nodeAgg.mkGlobalModule "reverseProxy" nodeAgg.aggregators.reverseProxy) - (nodeAgg.mkGlobalModule "backups" nodeAgg.aggregators.backups) + ./motd ]; options.homelab = { @@ -61,73 +57,73 @@ in { networking.hostName = cfg.hostname; # Export configuration for external consumption - environment.etc."homelab/config.json".text = builtins.toJSON { - inherit (cfg) hostname domain environment location tags; + # environment.etc."homelab/config.json".text = builtins.toJSON { + # inherit (cfg) hostname domain environment location tags; - monitoring = { - # Metrics endpoints (Prometheus, etc.) - metrics = - map (endpoint: { - inherit (endpoint) name host port path jobName scrapeInterval labels; - url = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}"; - }) - cfg.global.monitoring.allMetrics or []; + # monitoring = { + # # Metrics endpoints (Prometheus, etc.) + # metrics = + # map (endpoint: { + # inherit (endpoint) name host port path jobName scrapeInterval labels; + # url = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}"; + # }) + # cfg.global.monitoring.allMetrics or []; - # Health check endpoints - healthChecks = - map (check: let - # Determine the host based on useExternalDomain - actualHost = - if check.useExternalDomain - then "${check.subdomain}.${cfg.externalDomain}" - else check.host; + # # Health check endpoints + # healthChecks = + # map (check: let + # # Determine the host based on useExternalDomain + # actualHost = + # if check.useExternalDomain + # then "${check.subdomain}.${cfg.externalDomain}" + # else check.host; - # Build the URL - portPart = - if check.port != null - then ":${toString check.port}" - else ""; - url = "${check.protocol}://${actualHost}${portPart}${check.path}"; - in { - inherit (check) name protocol method interval timeout conditions alerts group labels enabled; - host = actualHost; - port = check.port; - path = check.path; - url = url; - useExternalDomain = check.useExternalDomain; - subdomain = check.subdomain; - sourceNode = cfg.hostname; - }) - cfg.global.monitoring.allHealthChecks or []; - }; + # # Build the URL + # portPart = + # if check.port != null + # then ":${toString check.port}" + # else ""; + # url = "${check.protocol}://${actualHost}${portPart}${check.path}"; + # in { + # inherit (check) name protocol method interval timeout conditions alerts group labels enabled; + # host = actualHost; + # port = check.port; + # path = check.path; + # url = url; + # useExternalDomain = check.useExternalDomain; + # subdomain = check.subdomain; + # sourceNode = cfg.hostname; + # }) + # cfg.global.monitoring.allHealthChecks or []; + # }; - reverseProxy = { - entries = - map (entry: { - inherit (entry) subdomain host port path enableAuth enableSSL; - internalHost = "${cfg.hostname}:${toString entry.port}${entry.path}"; - externalHost = "${entry.subdomain}.${cfg.externalDomain}"; - }) - cfg.global.reverseProxy.all; - }; + # reverseProxy = { + # entries = + # map (entry: { + # inherit (entry) subdomain host port path enableAuth enableSSL; + # internalHost = "${cfg.hostname}:${toString entry.port}${entry.path}"; + # externalHost = "${entry.subdomain}.${cfg.externalDomain}"; + # }) + # cfg.global.reverseProxy.all; + # }; - backups = { - jobs = - map (job: { - inherit (job) name backend labels; - backupId = job._backupId; - sourceNode = job._sourceNode; - }) - cfg.global.backups.all; + # backups = { + # jobs = + # map (job: { + # inherit (job) name backend labels; + # backupId = job._backupId; + # sourceNode = job._sourceNode; + # }) + # cfg.global.backups.all; - backends = cfg.global.backups.allBackends; + # backends = cfg.global.backups.allBackends; - summary = { - totalJobs = length cfg.global.backups.all; - jobsByBackend = mapAttrs (backend: jobs: length jobs) cfg.global.backups.byBackend; - jobsByNode = mapAttrs (node: jobs: length jobs) cfg.global.backups.byNode; - }; - }; - }; + # summary = { + # totalJobs = length cfg.global.backups.all; + # jobsByBackend = mapAttrs (backend: jobs: length jobs) cfg.global.backups.byBackend; + # jobsByNode = mapAttrs (node: jobs: length jobs) cfg.global.backups.byNode; + # }; + # }; + # }; }; } diff --git a/modules/homelab/lib/aggregators/base.nix b/modules/homelab/lib/aggregators/base.nix new file mode 100644 index 0000000..e32228c --- /dev/null +++ b/modules/homelab/lib/aggregators/base.nix @@ -0,0 +1,55 @@ +{lib}: let + inherit (lib) flatten mapAttrs attrValues filterAttrs mapAttrsToList filter groupBy length unique attrByPath splitString; + + # Generic function to aggregate any attribute across nodes + aggregateFromNodes = { + nodes, + attributePath, # e.g. "homelab.monitoring.metrics" or "homelab.backups.jobs" + enhancer ? null, # optional function to enhance each item with node context + }: let + # Extract the attribute from each node using the path + getNestedAttr = path: config: let + pathList = splitString "." path; + in + attrByPath pathList [] config; + + # Get all items from all nodes + allItems = flatten (mapAttrsToList + (nodeName: nodeConfig: let + items = getNestedAttr attributePath nodeConfig.config; + baseEnhancer = item: + item + // { + _nodeName = nodeName; + _nodeConfig = nodeConfig; + _nodeAddress = nodeConfig.config.networking.hostName or nodeName; + }; + finalEnhancer = + if enhancer != null + then (item: enhancer (baseEnhancer item)) + else baseEnhancer; + in + map finalEnhancer items) + nodes); + in { + # Raw aggregated data + all = allItems; + + # Common grouping patterns + byNode = groupBy (item: item._nodeName) allItems; + byType = groupBy (item: item.type or "unknown") allItems; + byService = groupBy (item: item.service or "unknown") allItems; + + # Utility functions for filtering + filterBy = predicate: filter predicate allItems; + ofType = type: filter (item: (item.type or "") == type) allItems; + ofNode = nodeName: filter (item: item._nodeName == nodeName) allItems; + enabled = filter (item: item.enabled or true) allItems; + + # Counting utilities + count = length allItems; + countBy = fn: mapAttrs (key: items: length items) (groupBy fn allItems); + }; +in { + inherit aggregateFromNodes; +} diff --git a/modules/homelab/lib/features/logging.nix b/modules/homelab/lib/features/logging.nix new file mode 100644 index 0000000..010b766 --- /dev/null +++ b/modules/homelab/lib/features/logging.nix @@ -0,0 +1,87 @@ +serviceName: { + config, + lib, + ... +}: +with lib; let + cfg = config.homelab.services.${serviceName}; + homelabCfg = config.homelab; +in { + options.homelab.services.${serviceName}.logging = { + enable = mkEnableOption "logging for ${serviceName}"; + + files = mkOption { + type = types.listOf types.str; + default = []; + }; + + parsing = { + regex = mkOption { + type = types.nullOr types.str; + default = null; + }; + + extractFields = mkOption { + type = types.listOf types.str; + default = []; + }; + }; + + multiline = mkOption { + type = types.nullOr (types.submodule { + options = { + firstLineRegex = mkOption {type = types.str;}; + maxWaitTime = mkOption { + type = types.str; + default = "3s"; + }; + }; + }); + default = null; + }; + + extraLabels = mkOption { + type = types.attrsOf types.str; + default = {}; + }; + + extraSources = mkOption { + type = types.listOf types.attrs; + default = []; + }; + }; + + config = mkIf (cfg.enable && cfg.logging.enable) { + homelab.logging.sources = + [ + { + name = "${serviceName}-logs"; + type = "file"; + files = { + paths = cfg.logging.files; + multiline = cfg.logging.multiline; + }; + labels = + cfg.logging.extraLabels + // { + service = serviceName; + node = homelabCfg.hostname; + environment = homelabCfg.environment; + }; + pipelineStages = + mkIf (cfg.logging.parsing.regex != null) [ + { + regex.expression = cfg.logging.parsing.regex; + } + ] + ++ [ + { + labels = listToAttrs (map (field: nameValuePair field null) cfg.logging.parsing.extractFields); + } + ]; + enabled = true; + } + ] + ++ cfg.logging.extraSources; + }; +} diff --git a/modules/homelab/lib/features/monitoring.nix b/modules/homelab/lib/features/monitoring.nix new file mode 100644 index 0000000..90b36f9 --- /dev/null +++ b/modules/homelab/lib/features/monitoring.nix @@ -0,0 +1,108 @@ +serviceName: { + config, + lib, + ... +}: +with lib; let + cfg = config.homelab.services.${serviceName}; + homelabCfg = config.homelab; +in { + # Define the service-specific monitoring options + options.homelab.services.${serviceName}.monitoring = { + enable = mkEnableOption "monitoring for ${serviceName}"; + + metrics = { + enable = mkOption { + type = types.bool; + default = true; + }; + + path = mkOption { + type = types.str; + default = "/metrics"; + }; + + extraEndpoints = mkOption { + type = types.listOf types.attrs; + default = []; + }; + }; + + healthCheck = { + enable = mkOption { + type = types.bool; + default = true; + }; + + path = mkOption { + type = types.str; + default = "/health"; + }; + + conditions = mkOption { + type = types.listOf types.str; + default = ["[STATUS] == 200"]; + }; + + extraChecks = mkOption { + type = types.listOf types.attrs; + default = []; + }; + }; + + extraLabels = mkOption { + type = types.attrsOf types.str; + default = {}; + }; + }; + + # Generate the homelab config automatically when service is enabled + config = mkIf (cfg.enable && cfg.monitoring.enable) { + homelab.monitoring = { + metrics = + [ + { + name = "${serviceName}-main"; + host = homelabCfg.hostname; + port = cfg.port; + path = cfg.monitoring.metrics.path; + jobName = serviceName; + scrapeInterval = "30s"; + labels = + cfg.monitoring.extraLabels + // { + service = serviceName; + node = homelabCfg.hostname; + environment = homelabCfg.environment; + }; + } + ] + ++ cfg.monitoring.metrics.extraEndpoints; + + healthChecks = + [ + { + name = "${serviceName}-health"; + host = homelabCfg.hostname; + port = cfg.port; + path = cfg.monitoring.healthCheck.path; + protocol = "http"; + method = "GET"; + interval = "30s"; + timeout = "10s"; + conditions = cfg.monitoring.healthCheck.conditions; + group = "services"; + labels = + cfg.monitoring.extraLabels + // { + service = serviceName; + node = homelabCfg.hostname; + environment = homelabCfg.environment; + }; + enabled = true; + } + ] + ++ cfg.monitoring.healthCheck.extraChecks; + }; + }; +} diff --git a/modules/homelab/lib/features/proxy.nix b/modules/homelab/lib/features/proxy.nix new file mode 100644 index 0000000..2658c7a --- /dev/null +++ b/modules/homelab/lib/features/proxy.nix @@ -0,0 +1,64 @@ +serviceName: { + config, + lib, + ... +}: +with lib; let + cfg = config.homelab.services.${serviceName}; + homelabCfg = config.homelab; +in { + options.homelab.services.${serviceName}.proxy = { + enable = mkEnableOption "reverse proxy for ${serviceName}"; + + subdomain = mkOption { + type = types.str; + default = serviceName; + }; + + enableAuth = mkOption { + type = types.bool; + default = false; + }; + + additionalSubdomains = mkOption { + type = types.listOf (types.submodule { + options = { + subdomain = mkOption {type = types.str;}; + port = mkOption {type = types.port;}; + path = mkOption { + type = types.str; + default = "/"; + }; + enableAuth = mkOption { + type = types.bool; + default = false; + }; + }; + }); + default = []; + }; + }; + + config = mkIf (cfg.enable && cfg.proxy.enable) { + homelab.reverseProxy.entries = + [ + { + subdomain = cfg.proxy.subdomain; + host = homelabCfg.hostname; + port = cfg.port; + path = "/"; + enableAuth = cfg.proxy.enableAuth; + enableSSL = true; + } + ] + ++ map (sub: { + subdomain = sub.subdomain; + host = homelabCfg.hostname; + port = sub.port; + path = sub.path; + enableAuth = sub.enableAuth; + enableSSL = true; + }) + cfg.proxy.additionalSubdomains; + }; +} diff --git a/modules/homelab/lib/node-aggregation.nix b/modules/homelab/lib/node-aggregation.nix deleted file mode 100644 index 1719012..0000000 --- a/modules/homelab/lib/node-aggregation.nix +++ /dev/null @@ -1,226 +0,0 @@ -{lib}: let - inherit (lib) flatten mapAttrs mapAttrsToList filter groupBy length unique attrByPath splitString; - - # Generic function to aggregate any attribute across nodes - aggregateFromNodes = { - nodes, - attributePath, # e.g. "homelab.monitoring.endpoints" or "homelab.backups.jobs" - enhancer ? null, # optional function to enhance each item with node context - }: let - # Extract the attribute from each node using the path - getNestedAttr = path: config: let - pathList = splitString "." path; - in - attrByPath pathList [] config; - - # Get all items from all nodes - allItems = flatten (mapAttrsToList - (nodeName: nodeConfig: let - items = getNestedAttr attributePath nodeConfig.config; - baseEnhancer = item: - item - // { - _nodeName = nodeName; - _nodeConfig = nodeConfig; - _nodeAddress = nodeConfig.config.networking.hostName or nodeName; - }; - finalEnhancer = - if enhancer != null - then (item: enhancer (baseEnhancer item)) - else baseEnhancer; - in - map finalEnhancer items) - nodes); - in { - # Raw aggregated data - all = allItems; - - # Common grouping patterns - byNode = groupBy (item: item._nodeName) allItems; - byType = groupBy (item: item.type or "unknown") allItems; - byService = groupBy (item: item.service or "unknown") allItems; - - # Utility functions for filtering - filterBy = predicate: filter predicate allItems; - ofType = type: filter (item: (item.type or "") == type) allItems; - - count = length allItems; - countBy = fn: mapAttrs (key: items: length items) (groupBy fn allItems); - }; - - # Specialized aggregators for common use cases - aggregators = { - monitoring = nodes: let - # Aggregate metrics endpoints - metricsAgg = aggregateFromNodes { - inherit nodes; - attributePath = "homelab.monitoring.metrics"; - enhancer = endpoint: - endpoint - // { - _fullAddress = "${endpoint.host or endpoint._nodeAddress}:${toString endpoint.port}"; - _metricsUrl = "http://${endpoint.host or endpoint._nodeAddress}:${toString endpoint.port}${endpoint.path or "/metrics"}"; - _type = "metrics"; - }; - }; - # Aggregate health checks - healthChecksAgg = aggregateFromNodes { - inherit nodes; - attributePath = "homelab.monitoring.healthChecks"; - enhancer = check: let - # Compute the actual host and URL - actualHost = - if check.useExternalDomain or false - then "${check.subdomain}.${check._nodeConfig.config.homelab.externalDomain or "example.com"}" - else check.host or check._nodeAddress; - portPart = - if check.port != null - then ":${toString check.port}" - else ""; - url = "${check.protocol or "http"}://${actualHost}${portPart}${check.path or "/"}"; - in - check - // { - _actualHost = actualHost; - _url = url; - _type = "health-check"; - # Merge default labels with node context - labels = - (check.labels or {}) - // { - node = check._nodeName; - environment = check._nodeConfig.config.homelab.environment or "unknown"; - }; - }; - }; - in - metricsAgg - // healthChecksAgg - // { - # Metrics-specific aggregations - allMetrics = metricsAgg.all; - metricsByNode = metricsAgg.byNode; - metricsByJobName = groupBy (m: m.jobName or "unknown") metricsAgg.all; - - # Health checks-specific aggregations - allHealthChecks = healthChecksAgg.all; - healthChecksByNode = healthChecksAgg.byNode; - healthChecksByGroup = groupBy (hc: hc.group or "default") healthChecksAgg.all; - healthChecksByProtocol = groupBy (hc: hc.protocol or "http") healthChecksAgg.all; - - # Filtered health checks - externalHealthChecks = filter (hc: hc.useExternalDomain or false) healthChecksAgg.all; - internalHealthChecks = filter (hc: !(hc.useExternalDomain or false)) healthChecksAgg.all; - enabledHealthChecks = filter (hc: hc.enabled or true) healthChecksAgg.all; - - # Summary statistics - summary = { - totalMetrics = length metricsAgg.all; - totalHealthChecks = length healthChecksAgg.all; - healthChecksByGroup = - mapAttrs (group: checks: length checks) - (groupBy (hc: hc.group or "default") healthChecksAgg.all); - healthChecksByProtocol = - mapAttrs (protocol: checks: length checks) - (groupBy (hc: hc.protocol or "http") healthChecksAgg.all); - externalChecksCount = length (filter (hc: hc.useExternalDomain or false) healthChecksAgg.all); - internalChecksCount = length (filter (hc: !(hc.useExternalDomain or false)) healthChecksAgg.all); - }; - }; - - # Promtail log configurations - # logs = nodes: - # aggregateFromNodes { - # inherit nodes; - # attributePath = "homelab.logging.sources"; - # enhancer = logSource: - # logSource - # // { - # # Add log-specific computed fields - # _logPath = logSource.path or "/var/log/${logSource.service}.log"; - # _labels = - # (logSource.labels or {}) - # // { - # node = logSource._nodeName; - # service = logSource.service or "unknown"; - # }; - # }; - # }; - - # Reverse proxy configurations - reverseProxy = nodes: - aggregateFromNodes { - inherit nodes; - attributePath = "homelab.reverseProxy.entries"; - enhancer = entry: - entry - // { - # Add proxy-specific computed fields - _upstream = "http://${entry.host or entry._nodeAddress}:${toString entry.port}"; - _fqdn = "${entry.subdomain or entry.service}.${entry.domain or "local"}"; - }; - }; - - # Backup jobs with enhanced aggregation - backups = nodes: let - baseAgg = aggregateFromNodes { - inherit nodes; - attributePath = "homelab.backups.jobs"; - enhancer = backup: - backup - // { - _sourceNode = backup._nodeName; - _backupId = "${backup._nodeName}-${backup.name}"; - _jobFqdn = "${backup.name}.${backup._nodeName}"; - }; - }; - - # Get all unique backends across all nodes - allBackends = let - allBackendConfigs = - mapAttrsToList - (nodeName: nodeConfig: - attrByPath ["homelab" "backups" "backends"] {} nodeConfig.config) - nodes; - enabledBackends = flatten (map (backends: - filter (name: backends.${name} != null) (lib.attrNames backends)) - allBackendConfigs); - in - unique enabledBackends; - in - baseAgg - // { - # Backup-specific aggregations - byBackend = groupBy (job: job.backend) baseAgg.all; - allBackends = allBackends; - - # Enhanced summary - summary = { - totalJobs = length baseAgg.all; - jobsByBackend = - mapAttrs (backend: jobs: length jobs) - (groupBy (job: job.backend) baseAgg.all); - jobsByNode = baseAgg.countBy (job: job._nodeName); - availableBackends = allBackends; - backendsInUse = unique (map (job: job.backend) baseAgg.all); - }; - }; - }; -in { - inherit aggregateFromNodes aggregators; - - # Convenience function to create a module that provides global aggregations - mkGlobalModule = attributeName: aggregatorFn: { - lib, - nodes, - ... - }: { - options.homelab.global.${attributeName} = lib.mkOption { - type = lib.types.attrs; - readOnly = true; - description = "Globally aggregated ${attributeName} from all nodes"; - }; - - config.homelab.global.${attributeName} = aggregatorFn nodes; - }; -} diff --git a/modules/homelab/lib/service-interface.nix b/modules/homelab/lib/service-interface.nix deleted file mode 100644 index 2bc7ed8..0000000 --- a/modules/homelab/lib/service-interface.nix +++ /dev/null @@ -1,295 +0,0 @@ -# Standard service interface for homelab services -# This provides a consistent contract that all services should follow -{lib}: let - inherit (lib) mkOption mkEnableOption types; - - # Define the standard service interface - mkServiceInterface = { - serviceName, - defaultPort ? null, - defaultSubdomain ? serviceName, - defaultDescription ? "Homelab ${serviceName} service", - monitoringPath ? "/metrics", - healthCheckPath ? "/health", - healthCheckConditions ? ["[STATUS] == 200"], - # Custom options that the service wants to expose - serviceOptions ? {}, - }: - { - # Standard interface options that all services must have - enable = mkEnableOption defaultDescription; - - port = mkOption { - type = types.port; - default = - if defaultPort != null - then defaultPort - else throw "Service ${serviceName} must specify a default port"; - description = "Port for ${serviceName} service"; - }; - - openFirewall = mkOption { - type = types.bool; - default = true; - description = "Whether to automatically open firewall ports"; - }; - - proxy = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable reverse proxy for this service"; - }; - - subdomain = mkOption { - type = types.str; - default = defaultSubdomain; - description = "Subdomain for reverse proxy (${defaultSubdomain}.yourdomain.com)"; - }; - - enableAuth = mkOption { - type = types.bool; - default = false; - description = "Enable authentication for reverse proxy"; - }; - - enableSSL = mkOption { - type = types.bool; - default = true; - description = "Enable SSL for reverse proxy"; - }; - }; - - monitoring = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable monitoring (metrics and health checks)"; - }; - - metricsPath = mkOption { - type = types.str; - default = monitoringPath; - description = "Path for metrics endpoint"; - }; - - jobName = mkOption { - type = types.str; - default = serviceName; - description = "Prometheus job name"; - }; - - scrapeInterval = mkOption { - type = types.str; - default = "30s"; - description = "Prometheus scrape interval"; - }; - - healthCheck = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable health check monitoring"; - }; - - path = mkOption { - type = types.str; - default = healthCheckPath; - description = "Path for health check endpoint"; - }; - - interval = mkOption { - type = types.str; - default = "30s"; - description = "Health check interval"; - }; - - timeout = mkOption { - type = types.str; - default = "10s"; - description = "Health check timeout"; - }; - - conditions = mkOption { - type = types.listOf types.str; - default = healthCheckConditions; - description = "Health check conditions"; - }; - - group = mkOption { - type = types.str; - default = "services"; - description = "Health check group name"; - }; - }; - - extraLabels = mkOption { - type = types.attrsOf types.str; - default = {}; - description = "Additional labels for monitoring"; - }; - }; - - description = mkOption { - type = types.str; - default = defaultDescription; - description = "Service description"; - }; - - extraOptions = mkOption { - type = types.attrs; - default = {}; - description = "Additional service-specific configuration options"; - }; - - # Merge in service-specific options - } - // serviceOptions; - - # Helper function to implement the standard service behavior - mkServiceConfig = { - config, - cfg, - homelabCfg, - serviceName, - # Function that returns the actual service configuration - serviceConfig, - # Optional: custom monitoring labels - extraMonitoringLabels ? {}, - # Optional: custom health check configuration - customHealthChecks ? [], - # Optional: custom reverse proxy configuration - customProxyConfig ? {}, - }: let - # Standard monitoring labels - standardLabels = - { - service = serviceName; - component = "main"; - instance = "${homelabCfg.hostname}.${homelabCfg.domain}"; - } - // extraMonitoringLabels // cfg.monitoring.extraLabels; - - # Standard reverse proxy entry - standardProxyEntry = - { - subdomain = cfg.proxy.subdomain; - host = homelabCfg.hostname; - port = cfg.port; - enableAuth = cfg.proxy.enableAuth; - enableSSL = cfg.proxy.enableSSL; - } - // customProxyConfig; - - # Standard metrics configuration - standardMetrics = lib.optional cfg.monitoring.enable { - name = "${serviceName}-metrics"; - port = cfg.port; - path = cfg.monitoring.metricsPath; - jobName = cfg.monitoring.jobName; - scrapeInterval = cfg.monitoring.scrapeInterval; - labels = standardLabels; - }; - - # Standard health check configuration - standardHealthCheck = lib.optional (cfg.monitoring.enable && cfg.monitoring.healthCheck.enable) { - name = "${serviceName}-health"; - port = cfg.port; - path = cfg.monitoring.healthCheck.path; - interval = cfg.monitoring.healthCheck.interval; - timeout = cfg.monitoring.healthCheck.timeout; - conditions = cfg.monitoring.healthCheck.conditions; - group = cfg.monitoring.healthCheck.group; - labels = standardLabels; - }; - - # Merge service config with standard behaviors - baseConfig = lib.mkMerge [ - # Service-specific configuration - serviceConfig - - # Standard firewall configuration - (lib.mkIf cfg.openFirewall { - networking.firewall.allowedTCPPorts = [cfg.port]; - }) - - # Standard monitoring configuration - (lib.mkIf cfg.monitoring.enable { - homelab.monitoring.metrics = standardMetrics; - homelab.monitoring.healthChecks = standardHealthCheck ++ customHealthChecks; - }) - - # Standard reverse proxy configuration - (lib.mkIf cfg.proxy.enable { - homelab.reverseProxy.entries = [standardProxyEntry]; - }) - ]; - in - lib.mkIf cfg.enable baseConfig; - - # Validation helper to ensure required options are set - validateServiceConfig = cfg: serviceName: [ - # Validate that if proxy is enabled, subdomain is set - (lib.mkIf (cfg.proxy.enable && cfg.proxy.subdomain == "") - (throw "Service ${serviceName}: proxy.subdomain is required when proxy.enable is true")) - - # Validate that if monitoring is enabled, required paths are set - (lib.mkIf (cfg.monitoring.enable && cfg.monitoring.metricsPath == "") - (throw "Service ${serviceName}: monitoring.metricsPath cannot be empty when monitoring is enabled")) - ]; -in { - inherit mkServiceInterface mkServiceConfig validateServiceConfig; - - # Common service option patterns - commonOptions = { - # Log level option - logLevel = mkOption { - type = types.enum ["debug" "info" "warn" "error"]; - default = "info"; - description = "Log level"; - }; - - # Environment file option (for secrets) - environmentFile = mkOption { - type = types.nullOr types.path; - default = null; - description = "Environment file for secrets"; - }; - - # External URL option - externalUrl = serviceName: homelabCfg: - mkOption { - type = types.str; - default = "https://${serviceName}.${homelabCfg.externalDomain}"; - description = "External URL for ${serviceName}"; - }; - }; - - # Helper for creating service modules with the interface - mkServiceModule = { - serviceName, - defaultPort, - defaultSubdomain ? serviceName, - serviceOptions ? {}, - ... - } @ args: { - config, - lib, - ... - }: let - cfg = config.homelab.services.${serviceName}; - homelabCfg = config.homelab; - - serviceInterface = mkServiceInterface { - inherit serviceName defaultPort defaultSubdomain serviceOptions; - }; - in { - options.homelab.services.${serviceName} = serviceInterface; - - config = mkServiceConfig { - inherit config cfg homelabCfg serviceName; - # Service implementor must provide this function - serviceConfig = args.serviceConfig or (throw "mkServiceModule requires serviceConfig function"); - }; - }; -} diff --git a/modules/homelab/lib/systems/backups.nix b/modules/homelab/lib/systems/backups.nix new file mode 100644 index 0000000..a39d1f9 --- /dev/null +++ b/modules/homelab/lib/systems/backups.nix @@ -0,0 +1,163 @@ +{ + config, + lib, + nodes, + ... +}: +with lib; let + cfg = config.homelab.backups; + homelabCfg = config.homelab; + hasNodes = length (attrNames nodes) > 0; + + # Get all defined backend names dynamically + backendNames = attrNames cfg.backends or {}; + + backupJobType = types.submodule { + options = { + name = mkOption { + type = types.str; + description = "Name of the backup job"; + }; + backend = mkOption { + type = types.enum backendNames; + description = "Backend to use for this backup job"; + }; + backendOptions = mkOption { + type = types.attrs; + default = {}; + description = "Backend-specific options to override or extend the backend configuration"; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + description = "Additional labels for this backup job"; + }; + }; + }; + + # Local aggregation + localAggregation = { + allJobs = cfg.jobs; + allBackends = backendNames; + }; + + # Global aggregation + globalAggregation = let + baseAgg = import ../aggregators/base.nix {inherit lib;}; + + jobsAgg = baseAgg.aggregateFromNodes { + inherit nodes; + attributePath = "homelab.backups.allJobs"; + enhancer = job: + job + // { + _sourceNode = job._nodeName; + _backupId = "${job._nodeName}-${job.name}"; + _jobFqdn = "${job.name}.${job._nodeName}"; + }; + }; + + # Get all backends from all nodes + allBackendsFromNodes = let + backendConfigs = + mapAttrsToList ( + nodeName: nodeConfig: + attrByPath ["homelab" "backups" "backends"] {} nodeConfig.config + ) + nodes; + enabledBackends = flatten (map ( + backends: + filter (name: backends.${name} != null) (attrNames backends) + ) + backendConfigs); + in + unique enabledBackends; + in { + allJobs = jobsAgg.all; + allBackends = allBackendsFromNodes; + jobsByBackend = groupBy (j: j.backend) jobsAgg.all; + summary = { + total = length jobsAgg.all; + byBackend = jobsAgg.countBy (j: j.backend); + byNode = jobsAgg.countBy (j: j._nodeName); + uniqueBackends = unique (map (j: j.backend) jobsAgg.all); + }; + }; +in { + imports = [ + ../../backup/restic.nix + # ./backup/borgbackup.nix + ]; + + options.homelab.backups = { + enable = mkEnableOption "backup system"; + + jobs = mkOption { + type = types.listOf backupJobType; + default = []; + description = "Backup jobs to execute on this system"; + }; + + # Backend configurations (like your existing setup) + # backends = mkOption { + # type = types.attrs; + # default = {}; + # description = "Backup backend configurations"; + # }; + + defaultLabels = mkOption { + type = types.attrsOf types.str; + default = { + hostname = homelabCfg.hostname; + environment = homelabCfg.environment; + location = homelabCfg.location; + }; + description = "Default labels applied to all backup jobs"; + }; + + monitoring = mkOption { + type = types.bool; + default = true; + description = "Enable backup monitoring and metrics"; + }; + + # Always exposed aggregated data + allJobs = mkOption { + type = types.listOf types.attrs; + default = []; + readOnly = true; + }; + + allBackends = mkOption { + type = types.listOf types.str; + default = []; + readOnly = true; + }; + + global = mkOption { + type = types.attrs; + default = {}; + readOnly = true; + }; + }; + + config = mkIf cfg.enable { + # Validate that all job backends exist + assertions = [ + { + assertion = all (job: cfg.backends.${job.backend} != null) cfg.jobs; + message = "All backup jobs must reference backends that are defined and not null in homelab.backups.backends"; + } + ]; + + # Always expose both local and global + homelab.backups = { + allJobs = localAggregation.allJobs; + allBackends = localAggregation.allBackends; + global = + if hasNodes + then globalAggregation + else {}; + }; + }; +} diff --git a/modules/homelab/lib/systems/logging.nix b/modules/homelab/lib/systems/logging.nix new file mode 100644 index 0000000..d760ee3 --- /dev/null +++ b/modules/homelab/lib/systems/logging.nix @@ -0,0 +1,209 @@ +{ + config, + lib, + nodes, + ... +}: +with lib; let + cfg = config.homelab.logging; + homelabCfg = config.homelab; + hasNodes = length (attrNames nodes) > 0; + + # Local aggregation + localAggregation = { + allSources = + cfg.sources + ++ (optional cfg.promtail.enable { + name = "system-journal"; + type = "journal"; + journal.path = "/var/log/journal"; + labels = + cfg.defaultLabels + // { + component = "system"; + log_source = "journald"; + }; + enabled = true; + }); + }; + + # Global aggregation + globalAggregation = let + baseAgg = import ../aggregators/base.nix {inherit lib;}; + + sourcesAgg = baseAgg.aggregateFromNodes { + inherit nodes; + attributePath = "homelab.logging.allSources"; + enhancer = source: + source + // { + _sourceNode = source._nodeName; + _logId = "${source._nodeName}-${source.name}"; + }; + }; + in { + allSources = sourcesAgg.all; + sourcesByType = groupBy (s: s.type) sourcesAgg.all; + summary = { + total = length sourcesAgg.all; + byType = sourcesAgg.countBy (s: s.type); + byNode = sourcesAgg.countBy (s: s._nodeName); + }; + }; +in { + options.homelab.logging = { + enable = mkEnableOption "logging system"; + + promtail = { + enable = mkOption { + type = types.bool; + default = true; + }; + port = mkOption { + type = types.port; + default = 9080; + }; + clients = mkOption { + type = types.listOf (types.submodule { + options = { + url = mkOption {type = types.str;}; + tenant_id = mkOption { + type = types.nullOr types.str; + default = null; + }; + }; + }); + default = [{url = "http://monitor.${homelabCfg.domain}:3100/loki/api/v1/push";}]; + }; + }; + + sources = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption {type = types.str;}; + type = mkOption { + type = types.enum ["journal" "file" "syslog" "docker"]; + default = "file"; + }; + files = mkOption { + type = types.submodule { + options = { + paths = mkOption { + type = types.listOf types.str; + default = []; + }; + multiline = mkOption { + type = types.nullOr types.attrs; + default = null; + }; + }; + }; + default = {}; + }; + journal = mkOption { + type = types.submodule { + options = { + path = mkOption { + type = types.str; + default = "/var/log/journal"; + }; + }; + }; + default = {}; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + }; + pipelineStages = mkOption { + type = types.listOf types.attrs; + default = []; + }; + enabled = mkOption { + type = types.bool; + default = true; + }; + }; + }); + default = []; + }; + + defaultLabels = mkOption { + type = types.attrsOf types.str; + default = { + hostname = homelabCfg.hostname; + environment = homelabCfg.environment; + location = homelabCfg.location; + }; + }; + + # Always exposed aggregated data + allSources = mkOption { + type = types.listOf types.attrs; + default = []; + readOnly = true; + }; + + global = mkOption { + type = types.attrs; + default = {}; + readOnly = true; + }; + }; + + config = mkIf cfg.enable { + # Local setup + services.promtail = mkIf cfg.promtail.enable { + enable = true; + configuration = { + server = { + http_listen_port = cfg.promtail.port; + grpc_listen_port = 0; + }; + positions.filename = "/var/lib/promtail/positions.yaml"; + clients = cfg.promtail.clients; + scrape_configs = map (source: + { + job_name = source.name; + static_configs = [ + { + targets = ["localhost"]; + labels = + cfg.defaultLabels + // source.labels + // ( + if source.type == "file" + then { + __path__ = concatStringsSep "," source.files.paths; + } + else {} + ); + } + ]; + # pipeline_stages = source.pipelineStages; + } + // ( + if source.type == "journal" + then { + journal = { + path = source.journal.path; + labels = cfg.defaultLabels // source.labels; + }; + } + else {} + )) + localAggregation.allSources; + }; + }; + + networking.firewall.allowedTCPPorts = optionals cfg.promtail.enable [cfg.promtail.port]; + + homelab.logging = { + allSources = localAggregation.allSources; + global = + if hasNodes + then globalAggregation + else {}; + }; + }; +} diff --git a/modules/homelab/lib/systems/monitoring.nix b/modules/homelab/lib/systems/monitoring.nix new file mode 100644 index 0000000..a44df40 --- /dev/null +++ b/modules/homelab/lib/systems/monitoring.nix @@ -0,0 +1,222 @@ +{ + config, + lib, + nodes, + ... +}: +with lib; let + cfg = config.homelab.monitoring; + homelabCfg = config.homelab; + hasNodes = length (attrNames nodes) > 0; + + # Local aggregation from this instance + localAggregation = { + # Metrics from manually configured + automatic node exporter + allMetrics = + cfg.metrics + ++ (optional cfg.nodeExporter.enable { + name = "node-exporter"; + host = homelabCfg.hostname; + port = cfg.nodeExporter.port; + path = "/metrics"; + jobName = "node"; + scrapeInterval = "30s"; + labels = { + instance = "${homelabCfg.hostname}.${homelabCfg.domain}"; + environment = homelabCfg.environment; + location = homelabCfg.location; + }; + }); + + allHealthChecks = cfg.healthChecks; + }; + + # Global aggregation from all nodes (when nodes available) + globalAggregation = let + baseAgg = import ../aggregators/base.nix {inherit lib;}; + + # Aggregate metrics from all nodes + metricsAgg = baseAgg.aggregateFromNodes { + inherit nodes; + attributePath = "homelab.monitoring.allMetrics"; + enhancer = endpoint: + endpoint + // { + _fullAddress = "${endpoint.host}:${toString endpoint.port}"; + _metricsUrl = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}"; + }; + }; + + # Aggregate health checks from all nodes + healthChecksAgg = baseAgg.aggregateFromNodes { + inherit nodes; + attributePath = "homelab.monitoring.allHealthChecks"; + enhancer = check: let + actualHost = check.host; + portPart = + if check.port != null + then ":${toString check.port}" + else ""; + url = "${check.protocol or "http"}://${actualHost}${portPart}${check.path}"; + in + check + // { + _actualHost = actualHost; + _url = url; + }; + }; + in { + allMetrics = metricsAgg.all; + allHealthChecks = healthChecksAgg.all; + + # Useful groupings for services + metricsByJobName = groupBy (m: m.jobName) metricsAgg.all; + healthChecksByGroup = groupBy (hc: hc.group or "default") healthChecksAgg.all; + + summary = { + totalMetrics = length metricsAgg.all; + totalHealthChecks = length healthChecksAgg.all; + nodesCovered = unique (map (m: m._nodeName or m.host) metricsAgg.all); + }; + }; +in { + # Instance-level monitoring options + options.homelab.monitoring = { + enable = mkEnableOption "monitoring system"; + + # Node exporter (automatically enabled) + nodeExporter = { + enable = mkOption { + type = types.bool; + default = true; + }; + port = mkOption { + type = types.port; + default = 9100; + }; + }; + + # Manual metrics (in addition to service auto-registration) + metrics = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption {type = types.str;}; + host = mkOption { + type = types.str; + default = homelabCfg.hostname; + }; + port = mkOption {type = types.port;}; + path = mkOption { + type = types.str; + default = "/metrics"; + }; + jobName = mkOption {type = types.str;}; + scrapeInterval = mkOption { + type = types.str; + default = "30s"; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + }; + }; + }); + default = []; + }; + + # Manual health checks (in addition to service auto-registration) + healthChecks = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption {type = types.str;}; + host = mkOption { + type = types.str; + default = homelabCfg.hostname; + }; + port = mkOption { + type = types.nullOr types.port; + default = null; + }; + path = mkOption { + type = types.str; + default = "/"; + }; + protocol = mkOption { + type = types.enum ["http" "https" "tcp" "icmp"]; + default = "http"; + }; + method = mkOption { + type = types.str; + default = "GET"; + }; + interval = mkOption { + type = types.str; + default = "30s"; + }; + timeout = mkOption { + type = types.str; + default = "10s"; + }; + conditions = mkOption { + type = types.listOf types.str; + default = ["[STATUS] == 200"]; + }; + group = mkOption { + type = types.str; + default = "manual"; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + }; + enabled = mkOption { + type = types.bool; + default = true; + }; + }; + }); + default = []; + }; + + # Read-only aggregated data (always exposed) + allMetrics = mkOption { + type = types.listOf types.attrs; + default = localAggregation.allMetrics; + readOnly = true; + }; + + allHealthChecks = mkOption { + type = types.listOf types.attrs; + default = localAggregation.allHealthChecks; + readOnly = true; + }; + + # Global aggregation (always available, empty if no nodes) + global = mkOption { + type = types.attrs; + default = globalAggregation; + readOnly = true; + }; + }; + + # Configuration - always includes both local and global + config = mkIf cfg.enable { + # Basic instance setup + services.prometheus.exporters.node = mkIf cfg.nodeExporter.enable { + enable = true; + port = cfg.nodeExporter.port; + enabledCollectors = ["systemd" "textfile" "filesystem" "loadavg" "meminfo" "netdev" "stat"]; + }; + + networking.firewall.allowedTCPPorts = optionals cfg.nodeExporter.enable [cfg.nodeExporter.port]; + + # homelab.monitoring = { + # allMetrics = localAggregation.allMetrics; + # allHealthChecks = localAggregation.allHealthChecks; + # global = + # if hasNodes + # then globalAggregation + # else {}; + # }; + }; +} diff --git a/modules/homelab/lib/systems/proxy.nix b/modules/homelab/lib/systems/proxy.nix new file mode 100644 index 0000000..a16694d --- /dev/null +++ b/modules/homelab/lib/systems/proxy.nix @@ -0,0 +1,98 @@ +{ + config, + lib, + nodes, + ... +}: +with lib; let + cfg = config.homelab.reverseProxy; + homelabCfg = config.homelab; + hasNodes = length (attrNames nodes) > 0; + + # Local aggregation + localAggregation = { + allEntries = cfg.entries; + }; + + # Global aggregation + globalAggregation = let + baseAgg = import ../aggregators/base.nix {inherit lib;}; + + entriesAgg = baseAgg.aggregateFromNodes { + inherit nodes; + attributePath = "homelab.reverseProxy.allEntries"; + enhancer = entry: + entry + // { + _upstream = "http://${entry.host}:${toString entry.port}${entry.path or ""}"; + _fqdn = "${entry.subdomain}.${entry._nodeConfig.config.homelab.externalDomain or homelabCfg.externalDomain}"; + _internal = "${entry.host}:${toString entry.port}"; + }; + }; + in { + allEntries = entriesAgg.all; + entriesBySubdomain = groupBy (e: e.subdomain) entriesAgg.all; + entriesWithAuth = entriesAgg.filterBy (e: e.enableAuth or false); + entriesWithoutAuth = entriesAgg.filterBy (e: !(e.enableAuth or false)); + summary = { + total = length entriesAgg.all; + byNode = entriesAgg.countBy (e: e._nodeName); + withAuth = length (entriesAgg.filterBy (e: e.enableAuth or false)); + withoutAuth = length (entriesAgg.filterBy (e: !(e.enableAuth or false))); + }; + }; +in { + options.homelab.reverseProxy = { + enable = mkEnableOption "reverse proxy system"; + + entries = mkOption { + type = types.listOf (types.submodule { + options = { + subdomain = mkOption {type = types.str;}; + host = mkOption { + type = types.str; + default = homelabCfg.hostname; + }; + port = mkOption {type = types.port;}; + path = mkOption { + type = types.str; + default = "/"; + }; + enableAuth = mkOption { + type = types.bool; + default = false; + }; + enableSSL = mkOption { + type = types.bool; + default = true; + }; + }; + }); + default = []; + }; + + # Always exposed aggregated data + allEntries = mkOption { + type = types.listOf types.attrs; + default = []; + readOnly = true; + }; + + global = mkOption { + type = types.attrs; + default = {}; + readOnly = true; + }; + }; + + config = mkIf cfg.enable { + # Always expose both local and global + homelab.reverseProxy = { + allEntries = localAggregation.allEntries; + global = + if hasNodes + then globalAggregation + else {}; + }; + }; +} diff --git a/modules/homelab/monitoring-config.nix b/modules/homelab/monitoring-config.nix deleted file mode 100644 index 2490467..0000000 --- a/modules/homelab/monitoring-config.nix +++ /dev/null @@ -1,214 +0,0 @@ -{ - config, - lib, - ... -}: -with lib; let - cfg = config.homelab.monitoring; - homelabCfg = config.homelab; - - metricsEndpointType = types.submodule { - options = { - name = mkOption { - type = types.str; - description = "Name of the metrics endpoint"; - }; - host = mkOption { - type = types.str; - description = "Domain name of the host (default: hostname.domain)"; - default = "${homelabCfg.hostname}.${homelabCfg.domain}"; - }; - port = mkOption { - type = types.port; - description = "Port number for the endpoint"; - }; - path = mkOption { - type = types.str; - default = "/metrics"; - description = "Path for the metrics endpoint"; - }; - jobName = mkOption { - type = types.str; - description = "Prometheus job name"; - }; - scrapeInterval = mkOption { - type = types.str; - default = "30s"; - description = "Prometheus scrape interval"; - }; - labels = mkOption { - type = types.attrsOf types.str; - default = {}; - description = "Additional labels for this endpoint"; - }; - }; - }; - - healthCheckEndpointType = types.submodule { - options = { - name = mkOption { - type = types.str; - description = "Name of the health check endpoint"; - }; - host = mkOption { - type = types.str; - description = "Domain name of the host"; - default = "${homelabCfg.hostname}.${homelabCfg.domain}"; - }; - port = mkOption { - type = types.nullOr types.port; - default = null; - description = "Port number for the endpoint (null for standard HTTP/HTTPS)"; - }; - path = mkOption { - type = types.str; - default = "/"; - description = "Path for the health check endpoint"; - }; - protocol = mkOption { - type = types.enum ["http" "https" "tcp" "icmp"]; - default = "http"; - description = "Protocol to use for health checks"; - }; - method = mkOption { - type = types.str; - default = "GET"; - description = "HTTP method for health checks (only applies to http/https)"; - }; - interval = mkOption { - type = types.str; - default = "30s"; - description = "Health check interval"; - }; - timeout = mkOption { - type = types.str; - default = "10s"; - description = "Health check timeout"; - }; - conditions = mkOption { - type = types.listOf types.str; - default = ["[STATUS] == 200"]; - description = "Health check conditions (Gatus format)"; - example = ["[STATUS] == 200" "[BODY].status == UP" "[RESPONSE_TIME] < 500"]; - }; - alerts = mkOption { - type = types.listOf (types.submodule { - options = { - type = mkOption { - type = types.str; - description = "Alert type"; - example = "discord"; - }; - enabled = mkOption { - type = types.bool; - default = true; - description = "Whether this alert is enabled"; - }; - failure-threshold = mkOption { - type = types.int; - default = 3; - description = "Number of failures before alerting"; - }; - success-threshold = mkOption { - type = types.int; - default = 2; - description = "Number of successes before resolving alert"; - }; - }; - }); - default = []; - description = "Alert configurations"; - }; - group = mkOption { - type = types.str; - default = "default"; - description = "Group name for organizing health checks"; - }; - labels = mkOption { - type = types.attrsOf types.str; - default = {}; - description = "Additional labels for this health check"; - }; - enabled = mkOption { - type = types.bool; - default = true; - description = "Whether this health check is enabled"; - }; - # External domain support - useExternalDomain = mkOption { - type = types.bool; - default = false; - description = "Use external domain instead of internal"; - }; - subdomain = mkOption { - type = types.nullOr types.str; - default = null; - description = "Subdomain for external domain (required if useExternalDomain is true)"; - }; - }; - }; -in { - options.homelab.monitoring = { - enable = mkEnableOption "Homelab monitoring"; - metrics = mkOption { - type = types.listOf metricsEndpointType; - default = []; - description = "Metric endpoints exposed by this system"; - }; - - healthChecks = mkOption { - type = types.listOf healthCheckEndpointType; - default = []; - description = "Health check endpoints for uptime monitoring"; - }; - - nodeExporter = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable node exporter"; - }; - port = mkOption { - type = types.port; - default = 9100; - description = "Node exporter port"; - }; - }; - }; - - config = mkIf cfg.enable { - # Configure node exporter if enabled - services.prometheus.exporters.node = mkIf cfg.nodeExporter.enable { - enable = true; - port = cfg.nodeExporter.port; - enabledCollectors = [ - "systemd" - "textfile" - "filesystem" - "loadavg" - "meminfo" - "netdev" - "stat" - ]; - }; - - # Automatically add node exporter to monitoring endpoints - homelab.monitoring.metrics = mkIf cfg.nodeExporter.enable [ - { - name = "node-exporter"; - port = cfg.nodeExporter.port; - path = "/metrics"; - jobName = "node"; - labels = { - instance = "${homelabCfg.hostname}.${homelabCfg.domain}"; - environment = homelabCfg.environment; - location = homelabCfg.location; - }; - } - ]; - - networking.firewall.allowedTCPPorts = optionals cfg.nodeExporter.enable [ - cfg.nodeExporter.port - ]; - }; -} diff --git a/modules/homelab/proxy-config.nix b/modules/homelab/proxy-config.nix deleted file mode 100644 index e7236d8..0000000 --- a/modules/homelab/proxy-config.nix +++ /dev/null @@ -1,53 +0,0 @@ -{ - config, - lib, - ... -}: -with lib; let - cfg = config.homelab.reverseProxy; - homelabCfg = config.homelab; - - reverseProxyEntryType = types.submodule { - options = { - subdomain = mkOption { - type = types.str; - description = "Subdomain for the service"; - }; - host = mkOption { - type = types.str; - description = "Host to proxy to"; - default = "${homelabCfg.hostname}.${homelabCfg.domain}"; - }; - port = mkOption { - type = types.port; - description = "Port to proxy to"; - }; - path = mkOption { - type = types.str; - default = "/"; - description = "Path prefix for the service"; - }; - enableAuth = mkOption { - type = types.bool; - default = false; - description = "Enable authentication for this service"; - }; - enableSSL = mkOption { - type = types.bool; - default = true; - description = "Enable SSL for this service"; - }; - }; - }; -in { - options.homelab.reverseProxy = { - entries = mkOption { - type = types.listOf reverseProxyEntryType; - default = []; - description = "Reverse proxy entries for this system"; - }; - }; - - config = { - }; -} diff --git a/modules/homelab/services/default.nix b/modules/homelab/services/default.nix index 2847a3c..2071dd6 100644 --- a/modules/homelab/services/default.nix +++ b/modules/homelab/services/default.nix @@ -1,7 +1,9 @@ { imports = [ ./minio.nix - ./monitoring/gatus.nix - ./monitoring/prometheus.nix + ./gatus.nix + ./prometheus.nix + ./grafana.nix + # ./monitoring/loki.nix ]; } diff --git a/modules/homelab/services/example-service.nix b/modules/homelab/services/example-service.nix deleted file mode 100644 index df59348..0000000 --- a/modules/homelab/services/example-service.nix +++ /dev/null @@ -1,161 +0,0 @@ -# Example showing how to create a service using the standard interface -{ - config, - lib, - pkgs, - ... -}: -with lib; let - serviceInterface = import ../lib/service-interface.nix {inherit lib;}; - - cfg = config.homelab.services.grafana; - homelabCfg = config.homelab; - - # Service-specific options beyond the standard interface - grafanaServiceOptions = { - domain = mkOption { - type = types.str; - default = "grafana.${homelabCfg.externalDomain}"; - description = "Domain for Grafana"; - }; - - rootUrl = mkOption { - type = types.str; - default = "https://grafana.${homelabCfg.externalDomain}"; - description = "Root URL for Grafana"; - }; - - dataDir = serviceInterface.commonOptions.dataDir "grafana"; - - admin = { - user = mkOption { - type = types.str; - default = "admin"; - description = "Admin username"; - }; - - password = mkOption { - type = types.str; - default = "admin"; - description = "Admin password"; - }; - }; - - datasources = { - prometheus = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable Prometheus datasource"; - }; - - url = mkOption { - type = types.str; - default = "http://localhost:9090"; - description = "Prometheus URL"; - }; - }; - }; - - plugins = mkOption { - type = types.listOf types.package; - default = []; - description = "Grafana plugins to install"; - }; - }; -in { - options.homelab.services.grafana = serviceInterface.mkServiceInterface { - serviceName = "grafana"; - defaultPort = 3000; - defaultSubdomain = "grafana"; - monitoringPath = "/metrics"; - healthCheckPath = "/api/health"; - healthCheckConditions = [ - "[STATUS] == 200" - "[BODY].database == ok" - "[RESPONSE_TIME] < 2000" - ]; - serviceOptions = grafanaServiceOptions; - }; - - config = serviceInterface.mkServiceConfig { - inherit config cfg homelabCfg; - serviceName = "grafana"; - - extraMonitoringLabels = { - component = "dashboard"; - }; - - customHealthChecks = [ - { - name = "grafana-login"; - port = cfg.port; - path = "/login"; - interval = "60s"; - conditions = [ - "[STATUS] == 200" - "[RESPONSE_TIME] < 3000" - ]; - group = "monitoring"; - labels = { - service = "grafana"; - component = "login"; - }; - } - ]; - - serviceConfig = { - services.grafana = { - enable = true; - dataDir = cfg.dataDir; - declarativePlugins = cfg.plugins; - - settings = { - server = { - http_port = cfg.port; - http_addr = "0.0.0.0"; - domain = cfg.domain; - root_url = cfg.rootUrl; - }; - - security = { - admin_user = cfg.admin.user; - admin_password = cfg.admin.password; - }; - }; - - provision = { - enable = true; - datasources.settings.datasources = mkIf cfg.datasources.prometheus.enable [ - { - name = "Prometheus"; - type = "prometheus"; - url = cfg.datasources.prometheus.url; - isDefault = true; - } - ]; - }; - }; - }; - }; -} -# Usage example in your configuration: -/* -{ - homelab.services.grafana = { - enable = true; - # Standard interface options: - port = 3000; # Optional: defaults to 3000 - openFirewall = true; # Optional: defaults to true - proxy.subdomain = "grafana"; # Optional: defaults to "grafana" - proxy.enableAuth = false; # Optional: defaults to false - monitoring.enable = true; # Optional: defaults to true - - # Service-specific options: - admin.password = "secure-password"; - datasources.prometheus.url = "http://prometheus.lab:9090"; - plugins = with pkgs.grafanaPlugins; [ grafana-piechart-panel ]; - }; -} -*/ - diff --git a/modules/homelab/services/gatus.nix b/modules/homelab/services/gatus.nix new file mode 100644 index 0000000..da907c4 --- /dev/null +++ b/modules/homelab/services/gatus.nix @@ -0,0 +1,267 @@ +{ + config, + lib, + ... +}: +with lib; let + serviceName = "gatus"; + cfg = config.homelab.services.${serviceName}; + homelabCfg = config.homelab; + + # Convert homelab health checks to Gatus format + formatHealthCheck = check: let + # Build the URL based on the health check configuration + url = check._url or "http://${check.host}:${toString (check.port or 80)}${check.path}"; + + # Convert conditions to Gatus format (they should already be compatible) + conditions = check.conditions or ["[STATUS] == 200"]; + + # Convert alerts to Gatus format + alerts = map (alert: { + inherit (alert) type enabled; + failure-threshold = alert.failure-threshold or 3; + success-threshold = alert.success-threshold or 2; + description = "Health check alert for ${check.name}"; + }) (check.alerts or []); + in { + name = check.name; + group = check.group or "default"; + url = url; + interval = check.interval or "30s"; + + # Add method and headers for HTTP/HTTPS checks + method = + if (check.protocol == "http" || check.protocol == "https") + then check.method or "GET" + else null; + + conditions = conditions; + + # Add timeout + client = { + timeout = check.timeout or "10s"; + }; + + # Add alerts if configured + alerts = + if alerts != [] + then alerts + else []; + + # Add labels for UI organization + ui = { + hide-hostname = false; + hide-url = false; + description = "Health check for ${check.name} on ${check.host or check._actualHost or "unknown"}"; + }; + }; + + # Generate Gatus configuration from aggregated health checks + gatusConfig = + recursiveUpdate { + # Global Gatus settings + alerting = mkIf (cfg.alerting != {}) cfg.alerting; + + web = { + address = cfg.web.address; + port = cfg.port; + }; + + # Enable metrics + metrics = cfg.monitoring.enable; + + ui = { + title = cfg.ui.title; + header = cfg.ui.header; + link = cfg.ui.link; + buttons = cfg.ui.buttons; + }; + + storage = cfg.storage; + + # Convert all enabled health checks from the fleet to Gatus endpoints + endpoints = let + # Get all health checks - try global first, fallback to local + allHealthChecks = homelabCfg.monitoring.global.allHealthChecks + or homelabCfg.monitoring.allHealthChecks + or []; + + # Filter only enabled health checks + enabledHealthChecks = filter (check: check.enabled or true) allHealthChecks; + + # Convert to Gatus format + gatusEndpoints = map formatHealthCheck enabledHealthChecks; + in + gatusEndpoints; + } + cfg.extraConfig; +in { + imports = [ + (import ../lib/features/monitoring.nix serviceName) + (import ../lib/features/logging.nix serviceName) + (import ../lib/features/proxy.nix serviceName) + ]; + + # Core service options + options.homelab.services.${serviceName} = { + enable = mkEnableOption "Gatus Status Page"; + + port = mkOption { + type = types.port; + default = 8080; + }; + + description = mkOption { + type = types.str; + default = "Gatus Status Page"; + }; + + # Gatus-specific options + ui = { + title = mkOption { + type = types.str; + default = "Homelab Status"; + description = "Title for the Gatus web interface"; + }; + + header = mkOption { + type = types.str; + default = "Homelab Services Status"; + description = "Header text for the Gatus interface"; + }; + + link = mkOption { + type = types.str; + default = "https://status.${homelabCfg.externalDomain}"; + description = "Link in the Gatus header"; + }; + + buttons = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption {type = types.str;}; + link = mkOption {type = types.str;}; + }; + }); + default = [ + { + name = "Grafana"; + link = "https://grafana.${homelabCfg.externalDomain}"; + } + { + name = "Prometheus"; + link = "https://prometheus.${homelabCfg.externalDomain}"; + } + ]; + description = "Navigation buttons in the Gatus interface"; + }; + }; + + alerting = mkOption { + type = types.attrs; + default = {}; + description = "Gatus alerting configuration"; + example = literalExpression '' + { + discord = { + webhook-url = "https://discord.com/api/webhooks/..."; + default-alert = { + enabled = true; + description = "Health check failed"; + failure-threshold = 3; + success-threshold = 2; + }; + }; + } + ''; + }; + + storage = mkOption { + type = types.attrs; + default = { + type = "memory"; + }; + description = "Gatus storage configuration"; + example = literalExpression '' + { + type = "postgres"; + path = "postgres://user:password@localhost/gatus?sslmode=disable"; + } + ''; + }; + + web = { + address = mkOption { + type = types.str; + default = "0.0.0.0"; + description = "Web interface bind address"; + }; + }; + + extraConfig = mkOption { + type = types.attrs; + default = {}; + description = "Additional Gatus configuration options"; + }; + }; + + # Service configuration with smart defaults + config = mkIf cfg.enable (mkMerge [ + # Core Gatus service + { + services.gatus = { + enable = true; + settings = gatusConfig; + }; + + networking.firewall.allowedTCPPorts = [cfg.port]; + + homelab.services.${serviceName}.monitoring.enable = mkDefault true; + } + + # Smart defaults for Gatus + (mkIf cfg.monitoring.enable { + homelab.services.${serviceName}.monitoring = mkDefault { + metrics = { + path = "/metrics"; + extraEndpoints = []; + }; + healthCheck = { + path = "/health"; + conditions = [ + "[STATUS] == 200" + "[BODY].status == UP" + "[RESPONSE_TIME] < 1000" + ]; + extraChecks = []; + }; + extraLabels = { + component = "status-monitoring"; + tier = "monitoring"; + }; + }; + }) + + (mkIf cfg.logging.enable { + homelab.services.${serviceName}.logging = mkDefault { + files = ["/var/log/gatus/gatus.log"]; + parsing = { + # Gatus log format: 2024-01-01T12:00:00Z [INFO] message + regex = "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z) \\[(?P\\w+)\\] (?P.*)"; + extractFields = ["level"]; + }; + extraLabels = { + component = "status-monitoring"; + application = "gatus"; + }; + }; + }) + + (mkIf cfg.proxy.enable { + homelab.services.${serviceName}.proxy = mkDefault { + subdomain = "status"; + enableAuth = false; # Status page should be public + }; + }) + ]); +} diff --git a/modules/homelab/services/grafana.nix b/modules/homelab/services/grafana.nix new file mode 100644 index 0000000..5f5aad9 --- /dev/null +++ b/modules/homelab/services/grafana.nix @@ -0,0 +1,86 @@ +{ + config, + lib, + pkgs, + ... +}: +with lib; let + serviceName = "grafana"; + cfg = config.homelab.services.${serviceName}; +in { + imports = [ + (import ../lib/features/monitoring.nix serviceName) + (import ../lib/features/logging.nix serviceName) + (import ../lib/features/proxy.nix serviceName) + ]; + + options.homelab.services.${serviceName} = { + enable = mkEnableOption "Grafana Dashboard"; + + port = mkOption { + type = types.port; + default = 3000; + }; + + description = mkOption { + type = types.str; + default = "Grafana Metrics Dashboard"; + }; + }; + + config = mkIf cfg.enable (mkMerge [ + # Core Grafana service + { + services.grafana = { + enable = true; + settings.server = { + http_port = cfg.port; + http_addr = "0.0.0.0"; + }; + }; + + networking.firewall.allowedTCPPorts = [cfg.port]; + + homelab.services.${serviceName}.monitoring.enable = mkDefault true; + } + + # Smart defaults for Grafana + (mkIf cfg.logging.enable { + # Grafana-specific log setup + homelab.services.${serviceName}.logging = mkDefault { + files = ["/var/log/grafana/grafana.log"]; + parsing = { + # Grafana log format: t=2024-01-01T12:00:00Z lvl=info msg="message" + regex = "^t=(?P[^ ]+) lvl=(?P\\w+) msg=\"(?P[^\"]*)\""; + extractFields = ["level"]; + }; + extraLabels = { + application = "grafana"; + component = "dashboard"; + }; + }; + }) + + (mkIf cfg.monitoring.enable { + homelab.services.${serviceName}.monitoring = mkDefault { + metrics.path = "/metrics"; + healthCheck = { + path = "/api/health"; + conditions = ["[STATUS] == 200" "[BODY].database == ok"]; + }; + extraLabels = { + component = "dashboard"; + tier = "monitoring"; + }; + }; + }) + + (mkIf cfg.proxy.enable { + # Grafana needs auth by default (admin interface) + homelab.services.${serviceName}.proxy = mkDefault { + subdomain = "grafana"; + # enableAuth = true; + }; + }) + ]); +} diff --git a/modules/homelab/services/jellyfin.nix b/modules/homelab/services/jellyfin.nix deleted file mode 100644 index 1aac7e5..0000000 --- a/modules/homelab/services/jellyfin.nix +++ /dev/null @@ -1,125 +0,0 @@ -# modules/services/jellyfin.nix -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.services.jellyfin; -in { - options.services.jellyfin = { - enable = mkEnableOption "Jellyfin media server"; - - port = mkOption { - type = types.port; - default = 8096; - description = "Port for Jellyfin web interface"; - }; - - dataDir = mkOption { - type = types.str; - default = "/var/lib/jellyfin"; - description = "Directory to store Jellyfin data"; - }; - - mediaDir = mkOption { - type = types.str; - default = "/media"; - description = "Directory containing media files"; - }; - - enableMetrics = mkOption { - type = types.bool; - default = true; - description = "Enable Prometheus metrics"; - }; - - exposeWeb = mkOption { - type = types.bool; - default = true; - description = "Expose web interface through reverse proxy"; - }; - }; - - config = mkIf cfg.enable { - # Enable the service - services.jellyfin = { - enable = true; - dataDir = cfg.dataDir; - }; - - # Configure global settings - homelab.global = { - # Add backup job for Jellyfin data - backups.jobs = [ - { - name = "jellyfin-config"; - backend = "restic"; - paths = ["${cfg.dataDir}/config" "${cfg.dataDir}/data"]; - schedule = "0 2 * * *"; # Daily at 2 AM - excludePatterns = [ - "*/cache/*" - "*/transcodes/*" - "*/logs/*" - ]; - preHook = '' - # Stop jellyfin for consistent backup - systemctl stop jellyfin - ''; - postHook = '' - # Restart jellyfin after backup - systemctl start jellyfin - ''; - } - { - name = "jellyfin-media"; - backend = "restic"; - paths = [cfg.mediaDir]; - schedule = "0 3 * * 0"; # Weekly on Sunday at 3 AM - excludePatterns = [ - "*.tmp" - "*/.@__thumb/*" # Synology thumbnails - ]; - } - ]; - - # Add reverse proxy entry if enabled - reverseProxy.entries = mkIf cfg.exposeWeb [ - { - subdomain = "jellyfin"; - port = cfg.port; - enableAuth = false; # Jellyfin has its own auth - websockets = true; - customHeaders = { - "X-Forwarded-Proto" = "$scheme"; - "X-Forwarded-Host" = "$host"; - }; - } - ]; - - # Add monitoring endpoint if metrics enabled - monitoring.endpoints = mkIf cfg.enableMetrics [ - { - name = "jellyfin"; - port = cfg.port; - path = "/metrics"; # Assuming you have a metrics plugin - jobName = "jellyfin"; - scrapeInterval = "60s"; - labels = { - service = "jellyfin"; - type = "media-server"; - }; - } - ]; - }; - - # Open firewall - networking.firewall.allowedTCPPorts = [cfg.port]; - - # Create media directory - systemd.tmpfiles.rules = [ - "d ${cfg.mediaDir} 0755 jellyfin jellyfin -" - ]; - }; -} diff --git a/modules/homelab/services/monitoring/gatus.nix b/modules/homelab/services/monitoring/gatus.nix index 8d1f20f..60f0700 100644 --- a/modules/homelab/services/monitoring/gatus.nix +++ b/modules/homelab/services/monitoring/gatus.nix @@ -4,110 +4,13 @@ ... }: with lib; let + serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; + cfg = config.homelab.services.gatus; homelabCfg = config.homelab; - # Convert our health check format to Gatus format - formatHealthCheck = check: let - # Build the URL - url = check._url; - - # Convert conditions to Gatus format (they should already be compatible) - conditions = check.conditions or ["[STATUS] == 200"]; - - # Convert alerts to Gatus format - alerts = map (alert: { - inherit (alert) type enabled; - failure-threshold = alert.failure-threshold or 3; - success-threshold = alert.success-threshold or 2; - description = "Health check alert for ${check.name}"; - }) (check.alerts or []); - in { - name = check.name; - group = check.group or "default"; - url = url; - interval = check.interval or "30s"; - - # Add method and headers for HTTP/HTTPS checks - method = - if (check.protocol == "http" || check.protocol == "https") - then check.method or "GET" - else null; - - conditions = conditions; - - # Add timeout - client = { - timeout = check.timeout or "10s"; - }; - - # Add alerts if configured - alerts = - if alerts != [] - then alerts - else []; - - # Add labels for UI organization - ui = { - hide-hostname = false; - hide-url = false; - description = "Health check for ${check.name} on ${check._nodeName}"; - }; - }; - - # Generate Gatus configuration - gatusConfig = { - # Global Gatus settings - alerting = mkIf (cfg.alerting != {}) cfg.alerting; - - web = { - address = "0.0.0.0"; - port = cfg.port; - }; - - # TODO: Introduce monitor option to toggle monitoring - metrics = true; - - ui = { - title = cfg.ui.title; - header = cfg.ui.header; - link = cfg.ui.link; - buttons = cfg.ui.buttons; - }; - - storage = mkIf (cfg.storage != {}) cfg.storage; - - # Convert all enabled health checks to Gatus endpoints - endpoints = let - # Get all health checks from global config - allHealthChecks = homelabCfg.global.monitoring.enabledHealthChecks or []; - - # Group by group name for better organization - # groupedChecks = homelabCfg.global.monitoring.healthChecksByGroup or {}; - - # Convert to Gatus format - gatusEndpoints = map formatHealthCheck allHealthChecks; - in - gatusEndpoints; - }; -in { - options.homelab.services.gatus = { - enable = mkEnableOption "Gatus uptime monitoring service"; - - port = mkOption { - type = types.port; - default = 8080; - description = "Port for Gatus web interface"; - }; - - openFirewall = lib.mkOption { - type = lib.types.bool; - default = true; - description = '' - Whether to automatically open the specified ports in the firewall. - ''; - }; - + # Service-specific options beyond the standard interface + gatusServiceOptions = { ui = { title = mkOption { type = types.str; @@ -123,7 +26,7 @@ in { link = mkOption { type = types.str; - default = "https://gatus.${homelabCfg.externalDomain}"; + default = "https://status.${homelabCfg.externalDomain}"; description = "Link in the Gatus header"; }; @@ -186,59 +89,129 @@ in { default = {}; description = "Additional Gatus configuration options"; }; + + web = { + address = mkOption { + type = types.str; + default = "0.0.0.0"; + description = "Web interface bind address"; + }; + }; }; - config = mkIf cfg.enable { - services.gatus = { - enable = true; - openFirewall = cfg.openFirewall; - settings = gatusConfig; + # Convert our health check format to Gatus format + formatHealthCheck = check: let + # Build the URL based on the health check configuration + url = check._url; + + # Convert conditions to Gatus format (they should already be compatible) + conditions = check.conditions or ["[STATUS] == 200"]; + + # Convert alerts to Gatus format + alerts = map (alert: { + inherit (alert) type enabled; + failure-threshold = alert.failure-threshold or 3; + success-threshold = alert.success-threshold or 2; + description = "Health check alert for ${check.name}"; + }) (check.alerts or []); + in { + name = check.name; + group = check.group or "default"; + url = url; + interval = check.interval or "30s"; + + # Add method and headers for HTTP/HTTPS checks + method = + if (check.protocol == "http" || check.protocol == "https") + then check.method or "GET" + else null; + + conditions = conditions; + + # Add timeout + client = { + timeout = check.timeout or "10s"; }; - # Add to monitoring endpoints - homelab.monitoring.metrics = [ - { - name = "gatus"; - port = cfg.port; - path = "/metrics"; - jobName = "gatus"; - labels = { - service = "gatus"; - component = "monitoring"; - }; - } - ]; + # Add alerts if configured + alerts = + if alerts != [] + then alerts + else []; - # Add health check for Gatus itself - homelab.monitoring.healthChecks = [ - { - name = "gatus-web-interface"; - port = cfg.port; - path = "/health"; - interval = "30s"; - conditions = [ - "[STATUS] == 200" - "[BODY].status == UP" - "[RESPONSE_TIME] < 1000" - ]; - group = "monitoring"; - labels = { - service = "gatus"; - component = "web-interface"; - }; - } - ]; + # Add labels for UI organization + ui = { + hide-hostname = false; + hide-url = false; + description = "Health check for ${check.name} on ${check.host}"; + }; + }; - # Add reverse proxy entry if needed - homelab.reverseProxy.entries = [ - { - subdomain = "status"; - host = homelabCfg.hostname; + # Generate Gatus configuration + gatusConfig = + recursiveUpdate { + # Global Gatus settings + alerting = mkIf (cfg.alerting != {}) cfg.alerting; + + web = { + address = cfg.web.address; port = cfg.port; - # path = "/"; - # enableAuth = false; # Status page should be publicly accessible - # enableSSL = true; - } + }; + + # Enable metrics + metrics = cfg.monitoring.enable; + + ui = { + title = cfg.ui.title; + header = cfg.ui.header; + link = cfg.ui.link; + buttons = cfg.ui.buttons; + }; + + storage = cfg.storage; + + # Convert all enabled health checks to Gatus endpoints + endpoints = let + # Get all health checks from global config + allHealthChecks = homelabCfg.global.monitoring.allHealthChecks or []; + + # Filter only enabled health checks + enabledHealthChecks = filter (check: check.enabled or true) allHealthChecks; + + # Convert to Gatus format + gatusEndpoints = map formatHealthCheck enabledHealthChecks; + in + gatusEndpoints; + } + cfg.extraConfig; +in { + options.homelab.services.gatus = serviceInterface.mkServiceInterface { + serviceName = "gatus"; + defaultPort = 8080; + defaultSubdomain = "status"; + monitoringPath = "/metrics"; + healthCheckPath = "/health"; + healthCheckConditions = [ + "[STATUS] == 200" + "[BODY].status == UP" + "[RESPONSE_TIME] < 1000" ]; + serviceOptions = gatusServiceOptions; + }; + + config = serviceInterface.mkServiceConfig { + inherit config cfg homelabCfg; + serviceName = "gatus"; + + extraMonitoringLabels = { + component = "status-monitoring"; + }; + + serviceConfig = { + services.gatus = { + enable = true; + settings = gatusConfig; + }; + }; }; } diff --git a/modules/homelab/services/monitoring/grafana.nix b/modules/homelab/services/monitoring/grafana.nix index 64650cf..8ecb14c 100644 --- a/modules/homelab/services/monitoring/grafana.nix +++ b/modules/homelab/services/monitoring/grafana.nix @@ -5,169 +5,389 @@ ... }: with lib; let + serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; + cfg = config.homelab.services.grafana; homelabCfg = config.homelab; - # Default dashboards for homelab monitoring + # Default community dashboards with proper configuration defaultDashboards = { - "node-exporter" = pkgs.fetchurl { - url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; - sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this + "node-exporter-full" = { + name = "Node Exporter Full"; + id = 12486; + revision = 2; + # url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; + sha256 = "sha256-1DE1aaanRHHeCOMWDGdOS1wBXxOF84UXAjJzT5Ek6mM="; + + url = "https://grafana.com/api/dashboards/12486/revisions/2/download"; }; - "prometheus-stats" = pkgs.fetchurl { + "prometheus-2-0-stats" = { + name = "Prometheus 2.0 Stats"; + id = 2; + revision = 2; url = "https://grafana.com/api/dashboards/2/revisions/2/download"; - sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this + sha256 = "sha256-Ydk4LPwfX4qJN8tiWPLWQdtAqzj8CKi6HYsuE+kWcXw="; }; }; - # Grafana provisioning configuration - provisioningConfig = { - # Data sources - datasources = - [ - { - name = "Prometheus"; - type = "prometheus"; - access = "proxy"; - url = cfg.datasources.prometheus.url; - isDefault = true; - editable = false; - jsonData = { - timeInterval = "5s"; - queryTimeout = "60s"; - httpMethod = "POST"; - }; - } - ] - ++ cfg.datasources.extra; + # Function to fetch a dashboard from Grafana.com + fetchGrafanaDashboard = name: config: + pkgs.fetchurl { + inherit (config) url sha256; + name = "${name}-dashboard.json"; + }; - # Dashboard providers - dashboards = [ - { - name = "homelab"; - type = "file"; - disableDeletion = false; - updateIntervalSeconds = 10; - allowUiUpdates = true; - options = { - path = "/var/lib/grafana/dashboards"; + # Git repository management for custom dashboards + gitDashboardsRepo = mkIf (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") ( + pkgs.fetchgit { + url = cfg.dashboards.git.url; + rev = cfg.dashboards.git.rev; + sha256 = cfg.dashboards.git.sha256; + } + ); + + # Dashboard provisioning configuration + provisionDashboard = name: source: { + "grafana-dashboards/${name}.json" = { + inherit source; + user = "grafana"; + group = "grafana"; + mode = "0644"; + }; + }; + + # Generate dashboard files from various sources + dashboardFiles = + # Default community dashboards + (foldl' ( + acc: name: + acc // (provisionDashboard name (fetchGrafanaDashboard name defaultDashboards.${name})) + ) {} (attrNames (filterAttrs (n: v: cfg.dashboards.defaults.${n}.enable) cfg.dashboards.defaults))) + # Custom file-based dashboards + // (foldl' ( + acc: dashboard: + acc // (provisionDashboard dashboard.name dashboard.source) + ) {} + cfg.dashboards.files) + # Git-synced dashboards + // (optionalAttrs (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") ( + let + gitDashboards = + if pathExists "${gitDashboardsRepo}/${cfg.dashboards.git.path}" + then builtins.readDir "${gitDashboardsRepo}/${cfg.dashboards.git.path}" + else {}; + in + mapAttrs' ( + filename: type: let + name = removeSuffix ".json" filename; + source = "${gitDashboardsRepo}/${cfg.dashboards.git.path}/${filename}"; + in + nameValuePair "grafana-dashboards/${name}.json" { + inherit source; + user = "grafana"; + group = "grafana"; + mode = "0644"; + } + ) (filterAttrs (name: type: type == "regular" && hasSuffix ".json" name) gitDashboards) + )); + + # Service-specific options beyond the standard interface + grafanaServiceOptions = { + # Authentication settings + auth = { + admin = { + user = mkOption { + type = types.str; + default = "admin"; + description = "Admin username"; }; - } - ]; - # Notification channels - notifiers = cfg.notifications; - }; -in { - options.homelab.services.grafana = { - enable = mkEnableOption "Grafana dashboard service"; + passwordFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to admin password file"; + }; - port = mkOption { - type = types.port; - default = 3000; - description = "Port for Grafana web interface"; - }; - - openFirewall = mkOption { - type = types.bool; - default = true; - description = "Whether to open firewall ports"; - }; - - dataDir = mkOption { - type = types.str; - default = "/var/lib/grafana"; - description = "Directory to store Grafana data"; - }; - - domain = mkOption { - type = types.str; - default = "grafana.${homelabCfg.externalDomain}"; - description = "Domain for Grafana"; - }; - - rootUrl = mkOption { - type = types.str; - default = "https://grafana.${homelabCfg.externalDomain}"; - description = "Root URL for Grafana"; - }; - - admin = { - user = mkOption { - type = types.str; - default = "admin"; - description = "Admin username"; + email = mkOption { + type = types.str; + default = "admin@${homelabCfg.externalDomain}"; + description = "Admin email address"; + }; }; - password = mkOption { - type = types.str; - default = "admin"; - description = "Admin password (change this!)"; + disableLoginForm = mkOption { + type = types.bool; + default = false; + description = "Disable the login form"; }; - email = mkOption { - type = types.str; - default = "admin@${homelabCfg.externalDomain}"; - description = "Admin email"; + oauthAutoLogin = mkOption { + type = types.bool; + default = false; + description = "Enable OAuth auto-login"; + }; + + anonymousAccess = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable anonymous access"; + }; + + orgName = mkOption { + type = types.str; + default = "Homelab"; + description = "Organization name for anonymous users"; + }; + + orgRole = mkOption { + type = types.enum ["Viewer" "Editor" "Admin"]; + default = "Viewer"; + description = "Role for anonymous users"; + }; + }; + + genericOauth = { + enabled = mkOption { + type = types.bool; + default = false; + description = "Enable generic OAuth"; + }; + + configFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to OAuth configuration file"; + }; }; }; + # Enhanced datasource configuration datasources = { prometheus = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable Prometheus datasource"; + }; + url = mkOption { type = types.str; - default = "http://localhost:9090"; + default = "http://127.0.0.1:9090"; description = "Prometheus URL"; }; + + uid = mkOption { + type = types.str; + default = "prometheus"; + description = "Unique identifier for Prometheus datasource"; + }; + + scrapeInterval = mkOption { + type = types.str; + default = "15s"; + description = "Default scrape interval for Prometheus"; + }; + + manageAlerts = mkOption { + type = types.bool; + default = true; + description = "Manage alerts in Grafana"; + }; + + exemplarTraceIdDestinations = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Exemplar trace ID destinations"; + }; + }; + + loki = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable Loki datasource"; + }; + + url = mkOption { + type = types.str; + default = "http://127.0.0.1:3100"; + description = "Loki URL"; + }; + + uid = mkOption { + type = types.str; + default = "loki"; + description = "Unique identifier for Loki datasource"; + }; + + maxLines = mkOption { + type = types.int; + default = 1000; + description = "Maximum lines to return from Loki"; + }; + + derivedFields = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Derived fields configuration for Loki"; + }; + }; + + influxdb = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable InfluxDB datasource"; + }; + + url = mkOption { + type = types.str; + default = "http://127.0.0.1:8086"; + description = "InfluxDB URL"; + }; + + database = mkOption { + type = types.str; + default = "homelab"; + description = "InfluxDB database name"; + }; + + tokenFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to InfluxDB token file"; + }; + + uid = mkOption { + type = types.str; + default = "influxdb"; + description = "Unique identifier for InfluxDB datasource"; + }; + + version = mkOption { + type = types.enum ["1.x" "2.x"]; + default = "2.x"; + description = "InfluxDB version"; + }; + + organization = mkOption { + type = types.str; + default = "homelab"; + description = "InfluxDB organization (for v2.x)"; + }; + + bucket = mkOption { + type = types.str; + default = "homelab"; + description = "InfluxDB bucket (for v2.x)"; + }; }; extra = mkOption { type = types.listOf types.attrs; default = []; description = "Additional data sources"; - example = literalExpression '' - [ - { - name = "Loki"; - type = "loki"; - url = "http://localhost:3100"; - } - ] - ''; }; }; - notifications = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Notification channels configuration"; - example = literalExpression '' - [ - { - name = "discord-webhook"; - type = "discord"; - settings = { - url = "https://discord.com/api/webhooks/..."; - username = "Grafana"; + # Enhanced dashboard configuration + dashboards = { + # Default community dashboards + defaults = mkOption { + type = types.attrsOf (types.submodule { + options = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable this default dashboard"; }; + }; + }); + default = mapAttrs (name: config: {enable = false;}) defaultDashboards; + description = "Enable default community dashboards"; + example = literalExpression '' + { + "node-exporter-full".enable = true; + "prometheus-2-0-stats".enable = true; } - ] - ''; + ''; + }; + + # File-based dashboards + files = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption { + type = types.str; + description = "Dashboard name (without .json extension)"; + }; + source = mkOption { + type = types.path; + description = "Path to dashboard JSON file"; + }; + }; + }); + default = []; + description = "Dashboard files to provision"; + }; + + # Git-based dashboard sync + git = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable git-based dashboard synchronization"; + }; + + url = mkOption { + type = types.str; + default = ""; + description = "Git repository URL for dashboards"; + }; + + rev = mkOption { + type = types.str; + default = "HEAD"; + description = "Git revision to use"; + }; + + sha256 = mkOption { + type = types.str; + default = ""; + description = "SHA256 hash of the git repository content"; + }; + + path = mkOption { + type = types.str; + default = "."; + description = "Path within the git repository containing dashboards"; + }; + + updateInterval = mkOption { + type = types.str; + default = "1h"; + description = "How often to check for dashboard updates"; + }; + }; + + path = mkOption { + type = types.str; + default = "/etc/grafana-dashboards"; + description = "Path where dashboard files are stored"; + }; }; + # Plugin configuration plugins = mkOption { - type = types.listOf types.str; - default = [ - "grafana-piechart-panel" - "grafana-worldmap-panel" - "grafana-clock-panel" - "grafana-simple-json-datasource" - ]; + type = types.listOf types.package; + default = []; description = "Grafana plugins to install"; }; + # SMTP configuration smtp = { - enabled = mkOption { + enable = mkOption { type = types.bool; default = false; description = "Enable SMTP for email notifications"; @@ -185,10 +405,10 @@ in { description = "SMTP username"; }; - password = mkOption { - type = types.str; - default = ""; - description = "SMTP password"; + passwordFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to SMTP password file"; }; fromAddress = mkOption { @@ -202,9 +422,22 @@ in { default = "Homelab Grafana"; description = "From name"; }; + + skipVerify = mkOption { + type = types.bool; + default = false; + description = "Skip SSL certificate verification"; + }; }; + # Security settings security = { + secretKeyFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to secret key file for signing"; + }; + allowEmbedding = mkOption { type = types.bool; default = false; @@ -217,200 +450,279 @@ in { description = "Set secure flag on cookies"; }; - secretKey = mkOption { - type = types.str; - default = "change-this-secret-key"; - description = "Secret key for signing (change this!)"; + contentSecurityPolicy = mkOption { + type = types.bool; + default = true; + description = "Enable Content Security Policy header"; + }; + + strictTransportSecurity = mkOption { + type = types.bool; + default = true; + description = "Enable Strict Transport Security header"; }; }; - auth = { - anonymousEnabled = mkOption { - type = types.bool; - default = false; - description = "Enable anonymous access"; - }; - - disableLoginForm = mkOption { - type = types.bool; - default = false; - description = "Disable login form"; - }; + # Data directory + dataDir = mkOption { + type = types.str; + default = "/var/lib/grafana"; + description = "Directory to store Grafana data"; }; - extraConfig = mkOption { + # Extra Grafana settings + extraSettings = mkOption { type = types.attrs; default = {}; - description = "Additional Grafana configuration"; + description = "Additional Grafana settings"; }; }; - config = mkIf cfg.enable { - services.grafana = { - enable = true; - settings = - recursiveUpdate { - server = { - http_addr = "0.0.0.0"; - http_port = cfg.port; - domain = cfg.domain; - root_url = cfg.rootUrl; - serve_from_sub_path = false; - }; - - database = { - type = "sqlite3"; - path = "${cfg.dataDir}/grafana.db"; - }; - - security = { - admin_user = cfg.admin.user; - admin_password = cfg.admin.password; - admin_email = cfg.admin.email; - allow_embedding = cfg.security.allowEmbedding; - cookie_secure = cfg.security.cookieSecure; - secret_key = cfg.security.secretKey; - }; - - users = { - allow_sign_up = false; - auto_assign_org = true; - auto_assign_org_role = "Viewer"; - }; - - auth.anonymous = { - enabled = cfg.auth.anonymousEnabled; - org_name = "Homelab"; - org_role = "Viewer"; - }; - - auth.basic = { - enabled = !cfg.auth.disableLoginForm; - }; - - smtp = mkIf cfg.smtp.enabled { - enabled = true; - host = cfg.smtp.host; - user = cfg.smtp.user; - password = cfg.smtp.password; - from_address = cfg.smtp.fromAddress; - from_name = cfg.smtp.fromName; - }; - - analytics = { - reporting_enabled = false; - check_for_updates = false; - }; - - log = { - mode = "console"; - level = "info"; - }; - - paths = { - data = cfg.dataDir; - logs = "${cfg.dataDir}/log"; - plugins = "${cfg.dataDir}/plugins"; - provisioning = "/etc/grafana/provisioning"; - }; - } - cfg.extraConfig; - - dataDir = cfg.dataDir; + # Enhanced datasource configuration + buildDatasources = let + # Build prometheus datasource + prometheusDatasource = optional cfg.datasources.prometheus.enable { + uid = cfg.datasources.prometheus.uid; + name = "Prometheus"; + type = "prometheus"; + url = cfg.datasources.prometheus.url; + access = "proxy"; + isDefault = true; + editable = false; + jsonData = { + timeInterval = cfg.datasources.prometheus.scrapeInterval; + queryTimeout = "60s"; + httpMethod = "POST"; + manageAlerts = cfg.datasources.prometheus.manageAlerts; + exemplarTraceIdDestinations = cfg.datasources.prometheus.exemplarTraceIdDestinations; + }; }; - # Install plugins - systemd.services.grafana.preStart = mkIf (cfg.plugins != []) ( - concatStringsSep "\n" (map ( - plugin: "${pkgs.grafana}/bin/grafana-cli --pluginsDir ${cfg.dataDir}/plugins plugins install ${plugin} || true" - ) - cfg.plugins) - ); + # Build loki datasource + lokiDatasource = optional cfg.datasources.loki.enable { + uid = cfg.datasources.loki.uid; + name = "Loki"; + type = "loki"; + url = cfg.datasources.loki.url; + access = "proxy"; + editable = false; + jsonData = { + maxLines = cfg.datasources.loki.maxLines; + derivedFields = cfg.datasources.loki.derivedFields; + }; + }; - # Provisioning configuration - environment.etc = - { - "grafana/provisioning/datasources/datasources.yaml".text = builtins.toJSON { - apiVersion = 1; - datasources = provisioningConfig.datasources; - }; + # Build influxdb datasource + influxdbDatasource = optional cfg.datasources.influxdb.enable { + uid = cfg.datasources.influxdb.uid; + name = "InfluxDB"; + type = "influxdb"; + url = cfg.datasources.influxdb.url; + access = "proxy"; + database = cfg.datasources.influxdb.database; + editable = false; + jsonData = { + dbName = cfg.datasources.influxdb.database; + httpHeaderName1 = "Authorization"; + version = cfg.datasources.influxdb.version; + organization = cfg.datasources.influxdb.organization; + defaultBucket = cfg.datasources.influxdb.bucket; + }; + secureJsonData = mkIf (cfg.datasources.influxdb.tokenFile != null) { + httpHeaderValue1 = "$__file{${cfg.datasources.influxdb.tokenFile}}"; + }; + }; - "grafana/provisioning/dashboards/dashboards.yaml".text = builtins.toJSON { - apiVersion = 1; - providers = provisioningConfig.dashboards; - }; - } - // (mkIf (cfg.notifications != []) { - "grafana/provisioning/notifiers/notifiers.yaml".text = builtins.toJSON { - apiVersion = 1; - notifiers = provisioningConfig.notifiers; - }; - }); - - # Create dashboard directory - systemd.tmpfiles.rules = [ - "d ${cfg.dataDir}/dashboards 0755 grafana grafana -" + # Build extra datasources + extraDatasources = cfg.datasources.extra; + in + prometheusDatasource ++ lokiDatasource ++ influxdbDatasource ++ extraDatasources; +in { + options.homelab.services.grafana = serviceInterface.mkServiceInterface { + serviceName = "grafana"; + defaultPort = 3000; + defaultSubdomain = "grafana"; + monitoringPath = "/metrics"; + healthCheckPath = "/api/health"; + healthCheckConditions = [ + "[STATUS] == 200" + "[BODY].database == ok" + "[RESPONSE_TIME] < 2000" ]; + serviceOptions = grafanaServiceOptions; + }; - # Open firewall if requested - networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [cfg.port]; + config = serviceInterface.mkServiceConfig { + inherit config cfg homelabCfg; + serviceName = "grafana"; - # Add to monitoring endpoints - homelab.monitoring.metrics = [ + extraMonitoringLabels = { + component = "dashboard"; + }; + + # Additional health checks specific to Grafana + customHealthChecks = []; + + serviceConfig = mkMerge [ { - name = "grafana"; - port = cfg.port; - path = "/metrics"; - jobName = "grafana"; - labels = { - service = "grafana"; - component = "monitoring"; + services.grafana = { + enable = true; + dataDir = cfg.dataDir; + # declarativePlugins = + # cfg.plugins + # ++ (with pkgs.grafanaPlugins; [ + # grafana-exploretraces-app + # grafana-metricsdrilldown-app + # grafana-pyroscope-app + # grafana-lokiexplore-app + # grafana-worldmap-panel + # grafana-piechart-panel + # ]); + + settings = + recursiveUpdate { + server = { + http_port = cfg.port; + http_addr = "0.0.0.0"; + domain = "${cfg.proxy.subdomain}.${homelabCfg.externalDomain}"; + root_url = "https://${cfg.proxy.subdomain}.${homelabCfg.externalDomain}"; + serve_from_sub_path = false; + }; + + database = { + type = "sqlite3"; + path = "${cfg.dataDir}/grafana.db"; + }; + + security = + { + admin_user = cfg.auth.admin.user; + admin_email = cfg.auth.admin.email; + # allow_embedding = cfg.security.allowEmbedding; + # cookie_secure = cfg.security.cookieSecure; + # content_security_policy = cfg.security.contentSecurityPolicy; + # strict_transport_security = cfg.security.strictTransportSecurity; + } + // (optionalAttrs (cfg.auth.admin.passwordFile != null) { + admin_password = "$__file{${cfg.auth.admin.passwordFile}}"; + }) + // (optionalAttrs (cfg.security.secretKeyFile != null) { + secret_key = "$__file{${cfg.security.secretKeyFile}}"; + }); + + users = { + allow_sign_up = false; + auto_assign_org = true; + auto_assign_org_role = "Viewer"; + }; + + "auth.anonymous" = { + enabled = cfg.auth.anonymousAccess.enable; + org_name = cfg.auth.anonymousAccess.orgName; + org_role = cfg.auth.anonymousAccess.orgRole; + }; + + "auth.basic" = { + enabled = !cfg.auth.disableLoginForm; + }; + + "auth.generic_oauth" = + mkIf cfg.auth.genericOauth.enabled { + enabled = true; + } + // (optionalAttrs (cfg.auth.genericOauth.configFile != null) { + client_id = "$__file{${cfg.auth.genericOauth.configFile}}"; + }); + + smtp = mkIf cfg.smtp.enable ({ + enabled = true; + host = cfg.smtp.host; + user = cfg.smtp.user; + from_address = cfg.smtp.fromAddress; + from_name = cfg.smtp.fromName; + skip_verify = cfg.smtp.skipVerify; + } + // (optionalAttrs (cfg.smtp.passwordFile != null) { + password = "$__file{${cfg.smtp.passwordFile}}"; + })); + + analytics = { + reporting_enabled = false; + check_for_updates = false; + }; + news.news_feed_enabled = false; + + feature_toggles = { + provisioning = true; + kubernetesDashboards = true; + }; + # paths = { + # plugins = "${cfg.dataDir}/plugins"; + # provisioning = "/etc/grafana/provisioning"; + # }; + } + cfg.extraSettings; + + provision = { + enable = true; + + datasources.settings.datasources = buildDatasources; + + dashboards.settings.providers = [ + { + name = "homelab-dashboards"; + type = "file"; + disableDeletion = false; + updateIntervalSeconds = 10; + allowUiUpdates = true; + options = { + path = cfg.dashboards.path; + }; + } + ]; + }; }; - } - ]; - # Add health checks - homelab.monitoring.healthChecks = [ - { - name = "grafana-web-interface"; - port = cfg.port; - path = "/api/health"; - interval = "30s"; - conditions = [ - "[STATUS] == 200" - "[BODY].database == ok" - "[RESPONSE_TIME] < 2000" + # Provision dashboard files + environment.etc = dashboardFiles; + + # Ensure dashboard directory exists + systemd.tmpfiles.rules = [ + "d ${cfg.dashboards.path} 0755 grafana grafana -" ]; - group = "monitoring"; - labels = { - service = "grafana"; - component = "web-interface"; - }; } - { - name = "grafana-login-page"; - port = cfg.port; - path = "/login"; - interval = "60s"; - conditions = [ - "[STATUS] == 200" - "[RESPONSE_TIME] < 3000" - ]; - group = "monitoring"; - labels = { - service = "grafana"; - component = "login"; - }; - } - ]; - # Add reverse proxy entry - homelab.reverseProxy.entries = [ - { - subdomain = "grafana"; - host = homelabCfg.hostname; - port = cfg.port; - } + # Git dashboard sync service (if enabled) + (mkIf (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") { + systemd.services.grafana-dashboard-sync = { + description = "Sync Grafana dashboards from git"; + after = ["grafana.service"]; + wantedBy = ["multi-user.target"]; + + serviceConfig = { + Type = "oneshot"; + User = "grafana"; + Group = "grafana"; + }; + + script = '' + echo "Syncing dashboards from git repository..." + # Dashboard files are already provisioned via Nix + # This service can be extended for runtime updates if needed + systemctl reload grafana.service + ''; + }; + + systemd.timers.grafana-dashboard-sync = { + description = "Timer for Grafana dashboard sync"; + wantedBy = ["timers.target"]; + + timerConfig = { + OnCalendar = cfg.dashboards.git.updateInterval; + Persistent = true; + }; + }; + }) ]; }; } diff --git a/modules/homelab/services/monitoring/grafana_1.nix b/modules/homelab/services/monitoring/grafana_1.nix new file mode 100644 index 0000000..c5ae73f --- /dev/null +++ b/modules/homelab/services/monitoring/grafana_1.nix @@ -0,0 +1,198 @@ +# Example showing how to create a service using the standard interface +{ + config, + lib, + pkgs, + ... +}: +with lib; let + serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; + + cfg = config.homelab.services.grafana; + homelabCfg = config.homelab; + + # Service-specific options beyond the standard interface + grafanaServiceOptions = { + admin = { + user = mkOption { + type = types.str; + default = "admin"; + description = "Admin username"; + }; + + passwordFile = mkOption { + type = types.str; + default = "admin"; + description = "Path to the Admin password file"; + }; + }; + + datasources = { + prometheus = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable Prometheus datasource"; + }; + + url = mkOption { + type = types.str; + default = "http://127.0.0.1:9090"; + description = "Prometheus URL"; + }; + + uid = mkOption { + type = types.str; + default = "prometheus"; + description = "Unique identifier for Prometheus datasource"; + }; + }; + + loki = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable Loki datasource"; + }; + + url = mkOption { + type = types.str; + default = "http://127.0.0.1:3100"; + description = "Loki URL"; + }; + + uid = mkOption { + type = types.str; + default = "loki"; + description = "Unique identifier for Loki datasource"; + }; + }; + + influxdb = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable InfluxDB datasource"; + }; + + url = mkOption { + type = types.str; + default = "http://127.0.0.1:8086"; + description = "InfluxDB URL"; + }; + + database = mkOption { + type = types.str; + default = "homelab"; + description = "InfluxDB database name"; + }; + + tokenFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to InfluxDB token file"; + }; + + uid = mkOption { + type = types.str; + default = "influxdb"; + description = "Unique identifier for InfluxDB datasource"; + }; + }; + + extra = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Additional data sources"; + }; + }; + + plugins = mkOption { + type = types.listOf types.package; + default = []; + description = "Grafana plugins to install"; + }; + }; +in { + options.homelab.services.grafana = serviceInterface.mkServiceInterface { + serviceName = "grafana"; + defaultPort = 3000; + defaultSubdomain = "grafana"; + monitoringPath = "/metrics"; + healthCheckPath = "/api/health"; + healthCheckConditions = [ + "[STATUS] == 200" + "[BODY].database == ok" + "[RESPONSE_TIME] < 2000" + ]; + serviceOptions = grafanaServiceOptions; + }; + + config = serviceInterface.mkServiceConfig { + inherit config cfg homelabCfg; + serviceName = "grafana"; + + extraMonitoringLabels = { + component = "dashboard"; + }; + + serviceConfig = { + services.grafana = { + enable = true; + declarativePlugins = cfg.plugins; + + settings = { + server = { + http_port = cfg.port; + http_addr = "0.0.0.0"; + root_url = "https://${cfg.proxy.subdomain}.${homelabCfg.externalDomain}"; + }; + + security = { + admin_user = cfg.admin.user; + admin_password = "$__file{${cfg.admin.passwordFile}}"; + }; + }; + + provision = { + enable = true; + datasources.settings = { + datasources = let + # Build datasource list + datasources = + [] + ++ optional cfg.datasources.prometheus.enable { + uid = cfg.datasources.prometheus.uid; + name = "Prometheus"; + type = "prometheus"; + url = cfg.datasources.prometheus.url; + } + ++ optional cfg.datasources.loki.enable { + uid = cfg.datasources.loki.uid; + name = "Loki"; + type = "loki"; + url = cfg.datasources.loki.url; + } + ++ optional cfg.datasources.influxdb.enable { + uid = cfg.datasources.influxdb.uid; + name = "InfluxDB"; + type = "influxdb"; + url = cfg.datasources.influxdb.url; + access = "proxy"; + jsonData = { + dbName = cfg.datasources.influxdb.database; + httpHeaderName1 = "Authorization"; + }; + secureJsonData = mkIf (cfg.datasources.influxdb.tokenPath != null) { + httpHeaderValue1 = "$__file{${cfg.datasources.influxdb.tokenPath}}"; + }; + } + ++ cfg.datasources.extra; + in + datasources; + }; + }; + }; + }; + }; +} diff --git a/modules/homelab/services/monitoring/grafana_gg.nix b/modules/homelab/services/monitoring/grafana_gg.nix new file mode 100644 index 0000000..64650cf --- /dev/null +++ b/modules/homelab/services/monitoring/grafana_gg.nix @@ -0,0 +1,416 @@ +{ + config, + lib, + pkgs, + ... +}: +with lib; let + cfg = config.homelab.services.grafana; + homelabCfg = config.homelab; + + # Default dashboards for homelab monitoring + defaultDashboards = { + "node-exporter" = pkgs.fetchurl { + url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; + sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this + }; + "prometheus-stats" = pkgs.fetchurl { + url = "https://grafana.com/api/dashboards/2/revisions/2/download"; + sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this + }; + }; + + # Grafana provisioning configuration + provisioningConfig = { + # Data sources + datasources = + [ + { + name = "Prometheus"; + type = "prometheus"; + access = "proxy"; + url = cfg.datasources.prometheus.url; + isDefault = true; + editable = false; + jsonData = { + timeInterval = "5s"; + queryTimeout = "60s"; + httpMethod = "POST"; + }; + } + ] + ++ cfg.datasources.extra; + + # Dashboard providers + dashboards = [ + { + name = "homelab"; + type = "file"; + disableDeletion = false; + updateIntervalSeconds = 10; + allowUiUpdates = true; + options = { + path = "/var/lib/grafana/dashboards"; + }; + } + ]; + + # Notification channels + notifiers = cfg.notifications; + }; +in { + options.homelab.services.grafana = { + enable = mkEnableOption "Grafana dashboard service"; + + port = mkOption { + type = types.port; + default = 3000; + description = "Port for Grafana web interface"; + }; + + openFirewall = mkOption { + type = types.bool; + default = true; + description = "Whether to open firewall ports"; + }; + + dataDir = mkOption { + type = types.str; + default = "/var/lib/grafana"; + description = "Directory to store Grafana data"; + }; + + domain = mkOption { + type = types.str; + default = "grafana.${homelabCfg.externalDomain}"; + description = "Domain for Grafana"; + }; + + rootUrl = mkOption { + type = types.str; + default = "https://grafana.${homelabCfg.externalDomain}"; + description = "Root URL for Grafana"; + }; + + admin = { + user = mkOption { + type = types.str; + default = "admin"; + description = "Admin username"; + }; + + password = mkOption { + type = types.str; + default = "admin"; + description = "Admin password (change this!)"; + }; + + email = mkOption { + type = types.str; + default = "admin@${homelabCfg.externalDomain}"; + description = "Admin email"; + }; + }; + + datasources = { + prometheus = { + url = mkOption { + type = types.str; + default = "http://localhost:9090"; + description = "Prometheus URL"; + }; + }; + + extra = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Additional data sources"; + example = literalExpression '' + [ + { + name = "Loki"; + type = "loki"; + url = "http://localhost:3100"; + } + ] + ''; + }; + }; + + notifications = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Notification channels configuration"; + example = literalExpression '' + [ + { + name = "discord-webhook"; + type = "discord"; + settings = { + url = "https://discord.com/api/webhooks/..."; + username = "Grafana"; + }; + } + ] + ''; + }; + + plugins = mkOption { + type = types.listOf types.str; + default = [ + "grafana-piechart-panel" + "grafana-worldmap-panel" + "grafana-clock-panel" + "grafana-simple-json-datasource" + ]; + description = "Grafana plugins to install"; + }; + + smtp = { + enabled = mkOption { + type = types.bool; + default = false; + description = "Enable SMTP for email notifications"; + }; + + host = mkOption { + type = types.str; + default = "localhost:587"; + description = "SMTP server host:port"; + }; + + user = mkOption { + type = types.str; + default = ""; + description = "SMTP username"; + }; + + password = mkOption { + type = types.str; + default = ""; + description = "SMTP password"; + }; + + fromAddress = mkOption { + type = types.str; + default = "grafana@${homelabCfg.externalDomain}"; + description = "From email address"; + }; + + fromName = mkOption { + type = types.str; + default = "Homelab Grafana"; + description = "From name"; + }; + }; + + security = { + allowEmbedding = mkOption { + type = types.bool; + default = false; + description = "Allow embedding Grafana in iframes"; + }; + + cookieSecure = mkOption { + type = types.bool; + default = true; + description = "Set secure flag on cookies"; + }; + + secretKey = mkOption { + type = types.str; + default = "change-this-secret-key"; + description = "Secret key for signing (change this!)"; + }; + }; + + auth = { + anonymousEnabled = mkOption { + type = types.bool; + default = false; + description = "Enable anonymous access"; + }; + + disableLoginForm = mkOption { + type = types.bool; + default = false; + description = "Disable login form"; + }; + }; + + extraConfig = mkOption { + type = types.attrs; + default = {}; + description = "Additional Grafana configuration"; + }; + }; + + config = mkIf cfg.enable { + services.grafana = { + enable = true; + settings = + recursiveUpdate { + server = { + http_addr = "0.0.0.0"; + http_port = cfg.port; + domain = cfg.domain; + root_url = cfg.rootUrl; + serve_from_sub_path = false; + }; + + database = { + type = "sqlite3"; + path = "${cfg.dataDir}/grafana.db"; + }; + + security = { + admin_user = cfg.admin.user; + admin_password = cfg.admin.password; + admin_email = cfg.admin.email; + allow_embedding = cfg.security.allowEmbedding; + cookie_secure = cfg.security.cookieSecure; + secret_key = cfg.security.secretKey; + }; + + users = { + allow_sign_up = false; + auto_assign_org = true; + auto_assign_org_role = "Viewer"; + }; + + auth.anonymous = { + enabled = cfg.auth.anonymousEnabled; + org_name = "Homelab"; + org_role = "Viewer"; + }; + + auth.basic = { + enabled = !cfg.auth.disableLoginForm; + }; + + smtp = mkIf cfg.smtp.enabled { + enabled = true; + host = cfg.smtp.host; + user = cfg.smtp.user; + password = cfg.smtp.password; + from_address = cfg.smtp.fromAddress; + from_name = cfg.smtp.fromName; + }; + + analytics = { + reporting_enabled = false; + check_for_updates = false; + }; + + log = { + mode = "console"; + level = "info"; + }; + + paths = { + data = cfg.dataDir; + logs = "${cfg.dataDir}/log"; + plugins = "${cfg.dataDir}/plugins"; + provisioning = "/etc/grafana/provisioning"; + }; + } + cfg.extraConfig; + + dataDir = cfg.dataDir; + }; + + # Install plugins + systemd.services.grafana.preStart = mkIf (cfg.plugins != []) ( + concatStringsSep "\n" (map ( + plugin: "${pkgs.grafana}/bin/grafana-cli --pluginsDir ${cfg.dataDir}/plugins plugins install ${plugin} || true" + ) + cfg.plugins) + ); + + # Provisioning configuration + environment.etc = + { + "grafana/provisioning/datasources/datasources.yaml".text = builtins.toJSON { + apiVersion = 1; + datasources = provisioningConfig.datasources; + }; + + "grafana/provisioning/dashboards/dashboards.yaml".text = builtins.toJSON { + apiVersion = 1; + providers = provisioningConfig.dashboards; + }; + } + // (mkIf (cfg.notifications != []) { + "grafana/provisioning/notifiers/notifiers.yaml".text = builtins.toJSON { + apiVersion = 1; + notifiers = provisioningConfig.notifiers; + }; + }); + + # Create dashboard directory + systemd.tmpfiles.rules = [ + "d ${cfg.dataDir}/dashboards 0755 grafana grafana -" + ]; + + # Open firewall if requested + networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [cfg.port]; + + # Add to monitoring endpoints + homelab.monitoring.metrics = [ + { + name = "grafana"; + port = cfg.port; + path = "/metrics"; + jobName = "grafana"; + labels = { + service = "grafana"; + component = "monitoring"; + }; + } + ]; + + # Add health checks + homelab.monitoring.healthChecks = [ + { + name = "grafana-web-interface"; + port = cfg.port; + path = "/api/health"; + interval = "30s"; + conditions = [ + "[STATUS] == 200" + "[BODY].database == ok" + "[RESPONSE_TIME] < 2000" + ]; + group = "monitoring"; + labels = { + service = "grafana"; + component = "web-interface"; + }; + } + { + name = "grafana-login-page"; + port = cfg.port; + path = "/login"; + interval = "60s"; + conditions = [ + "[STATUS] == 200" + "[RESPONSE_TIME] < 3000" + ]; + group = "monitoring"; + labels = { + service = "grafana"; + component = "login"; + }; + } + ]; + + # Add reverse proxy entry + homelab.reverseProxy.entries = [ + { + subdomain = "grafana"; + host = homelabCfg.hostname; + port = cfg.port; + } + ]; + }; +} diff --git a/modules/homelab/services/monitoring/influxdb.nix b/modules/homelab/services/monitoring/influxdb.nix index e69de29..75bd525 100644 --- a/modules/homelab/services/monitoring/influxdb.nix +++ b/modules/homelab/services/monitoring/influxdb.nix @@ -0,0 +1,399 @@ +{ + config, + lib, + pkgs, + ... +}: +with lib; let + serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; + + cfg = config.homelab.services.influxdb; + homelabCfg = config.homelab; + + # Service-specific options beyond the standard interface + influxdbServiceOptions = { + version = mkOption { + type = types.enum ["1" "2"]; + default = "2"; + description = "InfluxDB version to use"; + }; + + dataDir = mkOption { + type = types.str; + default = "/var/lib/influxdb"; + description = "Directory to store InfluxDB data"; + }; + + # InfluxDB 2.x options + v2 = { + org = mkOption { + type = types.str; + default = "homelab"; + description = "Initial organization name"; + }; + + bucket = mkOption { + type = types.str; + default = "homelab"; + description = "Initial bucket name"; + }; + + username = mkOption { + type = types.str; + default = "admin"; + description = "Initial admin username"; + }; + + password = mkOption { + type = types.str; + default = "changeme"; + description = "Initial admin password"; + }; + + retention = mkOption { + type = types.str; + default = "30d"; + description = "Default retention period"; + }; + + tokenFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "File containing the admin token"; + }; + }; + + # InfluxDB 1.x options + v1 = { + database = mkOption { + type = types.str; + default = "homelab"; + description = "Default database name"; + }; + + retention = mkOption { + type = types.str; + default = "30d"; + description = "Default retention period"; + }; + + adminUser = mkOption { + type = types.str; + default = "admin"; + description = "Admin username"; + }; + + adminPassword = mkOption { + type = types.str; + default = "changeme"; + description = "Admin password"; + }; + + httpAuth = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable HTTP authentication"; + }; + }; + }; + + extraConfig = mkOption { + type = types.attrs; + default = {}; + description = "Additional InfluxDB configuration"; + }; + + backup = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable automatic backups"; + }; + + schedule = mkOption { + type = types.str; + default = "daily"; + description = "Backup schedule"; + }; + + retention = mkOption { + type = types.str; + default = "7d"; + description = "Backup retention period"; + }; + }; + }; + + # Generate configuration based on version + influxdbConfig = + if cfg.version == "2" + then + recursiveUpdate { + bolt-path = "${cfg.dataDir}/influxd.bolt"; + engine-path = "${cfg.dataDir}/engine"; + http-bind-address = "0.0.0.0:${toString cfg.port}"; + reporting-disabled = true; + log-level = "info"; + } + cfg.extraConfig + else + recursiveUpdate { + meta = { + dir = "${cfg.dataDir}/meta"; + }; + data = { + dir = "${cfg.dataDir}/data"; + wal-dir = "${cfg.dataDir}/wal"; + }; + http = { + bind-address = "0.0.0.0:${toString cfg.port}"; + auth-enabled = cfg.v1.httpAuth.enable; + }; + logging = { + level = "info"; + }; + reporting-disabled = true; + } + cfg.extraConfig; +in { + options.homelab.services.influxdb = serviceInterface.mkServiceInterface { + serviceName = "influxdb"; + defaultPort = 8086; + defaultSubdomain = "influxdb"; + monitoringPath = "/metrics"; + healthCheckPath = + if cfg.version == "2" + then "/health" + else "/ping"; + healthCheckConditions = + if cfg.version == "2" + then ["[STATUS] == 200" "[BODY].status == pass"] + else ["[STATUS] == 204" "[RESPONSE_TIME] < 1000"]; + serviceOptions = influxdbServiceOptions; + }; + + config = serviceInterface.mkServiceConfig { + inherit config cfg homelabCfg; + serviceName = "influxdb"; + + extraMonitoringLabels = { + component = "timeseries-database"; + version = cfg.version; + }; + + customHealthChecks = + [ + { + name = "influxdb-query"; + port = cfg.port; + path = + if cfg.version == "2" + then "/api/v2/query" + else "/query"; + interval = "60s"; + method = "POST"; + conditions = [ + "[STATUS] < 500" + "[RESPONSE_TIME] < 3000" + ]; + group = "monitoring"; + labels = { + service = "influxdb"; + component = "query-engine"; + }; + } + ] + ++ optional (cfg.version == "2") { + name = "influxdb-write"; + port = cfg.port; + path = "/api/v2/write"; + interval = "60s"; + method = "POST"; + conditions = [ + "[STATUS] < 500" + "[RESPONSE_TIME] < 2000" + ]; + group = "monitoring"; + labels = { + service = "influxdb"; + component = "write-engine"; + }; + }; + + serviceConfig = mkMerge [ + # Common configuration + { + # Create data directories + systemd.tmpfiles.rules = + [ + "d ${cfg.dataDir} 0755 influxdb influxdb -" + ] + ++ optionals (cfg.version == "1") [ + "d ${cfg.dataDir}/meta 0755 influxdb influxdb -" + "d ${cfg.dataDir}/data 0755 influxdb influxdb -" + "d ${cfg.dataDir}/wal 0755 influxdb influxdb -" + ]; + + # Ensure influxdb user exists + users.users.influxdb = { + isSystemUser = true; + group = "influxdb"; + home = cfg.dataDir; + createHome = true; + }; + + users.groups.influxdb = {}; + } + + # InfluxDB 2.x configuration + (mkIf (cfg.version == "2") { + services.influxdb2 = { + enable = true; + dataDir = cfg.dataDir; + settings = influxdbConfig; + }; + + # Initial setup for InfluxDB 2.x + systemd.services.influxdb2-setup = { + description = "InfluxDB 2.x initial setup"; + after = ["influxdb2.service"]; + wants = ["influxdb2.service"]; + wantedBy = ["multi-user.target"]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + User = "influxdb"; + Group = "influxdb"; + }; + script = let + setupScript = pkgs.writeShellScript "influxdb2-setup" '' + # Wait for InfluxDB to be ready + timeout=60 + while [ $timeout -gt 0 ]; do + if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/health > /dev/null 2>&1; then + break + fi + sleep 1 + timeout=$((timeout - 1)) + done + + # Check if setup is already done + if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/api/v2/setup > /dev/null 2>&1; then + # Setup InfluxDB if not already done + ${pkgs.influxdb2}/bin/influx setup \ + --host http://localhost:${toString cfg.port} \ + --org "${cfg.v2.org}" \ + --bucket "${cfg.v2.bucket}" \ + --username "${cfg.v2.username}" \ + --password "${cfg.v2.password}" \ + --retention "${cfg.v2.retention}" \ + --force + fi + ''; + in "${setupScript}"; + }; + }) + + # InfluxDB 1.x configuration + (mkIf (cfg.version == "1") { + services.influxdb = { + enable = true; + dataDir = cfg.dataDir; + extraConfig = influxdbConfig; + }; + + # Initial setup for InfluxDB 1.x + systemd.services.influxdb-setup = mkIf cfg.v1.httpAuth.enable { + description = "InfluxDB 1.x initial setup"; + after = ["influxdb.service"]; + wants = ["influxdb.service"]; + wantedBy = ["multi-user.target"]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + User = "influxdb"; + Group = "influxdb"; + }; + script = let + setupScript = pkgs.writeShellScript "influxdb-setup" '' + # Wait for InfluxDB to be ready + timeout=60 + while [ $timeout -gt 0 ]; do + if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/ping > /dev/null 2>&1; then + break + fi + sleep 1 + timeout=$((timeout - 1)) + done + + # Create admin user + ${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -execute "CREATE USER \"${cfg.v1.adminUser}\" WITH PASSWORD '${cfg.v1.adminPassword}' WITH ALL PRIVILEGES" || true + + # Create database + ${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -username "${cfg.v1.adminUser}" -password "${cfg.v1.adminPassword}" -execute "CREATE DATABASE \"${cfg.v1.database}\"" || true + + # Set retention policy + ${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -username "${cfg.v1.adminUser}" -password "${cfg.v1.adminPassword}" -database "${cfg.v1.database}" -execute "CREATE RETENTION POLICY \"default\" ON \"${cfg.v1.database}\" DURATION ${cfg.v1.retention} REPLICATION 1 DEFAULT" || true + ''; + in "${setupScript}"; + }; + }) + + # Backup configuration + (mkIf cfg.backup.enable { + systemd.services.influxdb-backup = { + description = "InfluxDB backup"; + serviceConfig = { + Type = "oneshot"; + User = "influxdb"; + Group = "influxdb"; + }; + script = let + backupScript = + if cfg.version == "2" + then + pkgs.writeShellScript "influxdb2-backup" '' + backup_dir="${cfg.dataDir}/backups/$(date +%Y%m%d_%H%M%S)" + mkdir -p "$backup_dir" + ${pkgs.influxdb2}/bin/influx backup \ + --host http://localhost:${toString cfg.port} \ + --org "${cfg.v2.org}" \ + "$backup_dir" + + # Clean old backups + find "${cfg.dataDir}/backups" -type d -mtime +${cfg.backup.retention} -exec rm -rf {} + || true + '' + else + pkgs.writeShellScript "influxdb-backup" '' + backup_dir="${cfg.dataDir}/backups/$(date +%Y%m%d_%H%M%S)" + mkdir -p "$backup_dir" + ${pkgs.influxdb}/bin/influxd backup \ + -host localhost:${toString cfg.port} \ + -database "${cfg.v1.database}" \ + "$backup_dir" + + # Clean old backups + find "${cfg.dataDir}/backups" -type d -mtime +${cfg.backup.retention} -exec rm -rf {} + || true + ''; + in "${backupScript}"; + }; + + systemd.timers.influxdb-backup = { + description = "InfluxDB backup timer"; + wantedBy = ["timers.target"]; + timerConfig = { + OnCalendar = cfg.backup.schedule; + Persistent = true; + RandomizedDelaySec = "5m"; + }; + }; + + # Create backup directory + systemd.tmpfiles.rules = [ + "d ${cfg.dataDir}/backups 0755 influxdb influxdb -" + ]; + }) + ]; + }; +} diff --git a/modules/homelab/services/monitoring/loki.nix b/modules/homelab/services/monitoring/loki.nix index e69de29..4467b2a 100644 --- a/modules/homelab/services/monitoring/loki.nix +++ b/modules/homelab/services/monitoring/loki.nix @@ -0,0 +1,356 @@ +{ + config, + lib, + pkgs, + ... +}: +with lib; let + serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; + + cfg = config.homelab.services.loki; + homelabCfg = config.homelab; + + # Service-specific options beyond the standard interface + lokiServiceOptions = { + # Storage configuration + storage = { + type = mkOption { + type = types.enum ["filesystem" "s3" "gcs"]; + default = "filesystem"; + description = "Storage backend type"; + }; + + filesystem = { + directory = mkOption { + type = types.str; + default = "/var/lib/loki"; + description = "Directory for filesystem storage"; + }; + }; + + s3 = { + endpoint = mkOption { + type = types.nullOr types.str; + default = null; + description = "S3 endpoint URL"; + }; + + bucket = mkOption { + type = types.nullOr types.str; + default = null; + description = "S3 bucket name"; + }; + + region = mkOption { + type = types.nullOr types.str; + default = null; + description = "S3 region"; + }; + + accessKeyId = mkOption { + type = types.nullOr types.str; + default = null; + description = "S3 access key ID"; + }; + + secretAccessKey = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to file containing S3 secret access key"; + }; + }; + }; + + # Retention configuration + retention = { + period = mkOption { + type = types.str; + default = "168h"; # 7 days + description = "Log retention period"; + }; + + streamRetention = mkOption { + type = types.listOf (types.submodule { + options = { + selector = mkOption { + type = types.str; + description = "Log stream selector"; + example = "{environment=\"development\"}"; + }; + priority = mkOption { + type = types.int; + description = "Rule priority (higher = more important)"; + default = 1; + }; + period = mkOption { + type = types.str; + description = "Retention period for this stream"; + example = "24h"; + }; + }; + }); + default = []; + description = "Per-stream retention rules"; + }; + }; + + # Performance tuning + limits = { + rejectOldSamples = mkOption { + type = types.bool; + default = true; + description = "Reject samples older than max age"; + }; + + rejectOldSamplesMaxAge = mkOption { + type = types.str; + default = "168h"; + description = "Maximum age for samples"; + }; + + ingestionRateMB = mkOption { + type = types.int; + default = 4; + description = "Ingestion rate limit in MB/s per tenant"; + }; + + ingestionBurstSizeMB = mkOption { + type = types.int; + default = 6; + description = "Ingestion burst size in MB per tenant"; + }; + + maxStreamsPerUser = mkOption { + type = types.int; + default = 10000; + description = "Maximum number of streams per user"; + }; + + maxLineSize = mkOption { + type = types.str; + default = "256KB"; + description = "Maximum line size"; + }; + }; + + # Authentication + auth = { + enabled = mkOption { + type = types.bool; + default = false; + description = "Enable authentication"; + }; + }; + + # Extra configuration options + extraConfig = mkOption { + type = types.attrs; + default = {}; + description = "Additional Loki configuration options"; + }; + + # Data directory + dataDir = mkOption { + type = types.str; + default = "/var/lib/loki"; + description = "Directory to store Loki data"; + }; + }; + + # Build the Loki configuration + lokiConfig = + recursiveUpdate { + # Server configuration + server = { + http_listen_port = cfg.port; + grpc_listen_port = cfg.port + 1000; # e.g., 3100 -> 4100 + http_listen_address = "0.0.0.0"; + grpc_listen_address = "0.0.0.0"; + log_level = cfg.monitoring.extraLabels.log_level or "info"; + }; + + # Authentication + auth_enabled = cfg.auth.enabled; + + # Analytics + analytics.reporting_enabled = false; + + # Common configuration for single-binary mode + common = { + ring = { + instance_addr = "127.0.0.1"; + kvstore.store = "inmemory"; + }; + replication_factor = 1; + path_prefix = cfg.dataDir; + }; + + # Schema configuration + schema_config = { + configs = [ + { + from = "2020-05-15"; + store = "tsdb"; + object_store = cfg.storage.type; + schema = "v13"; + index = { + prefix = "index_"; + period = "24h"; + }; + } + ]; + }; + + # Storage configuration + storage_config = mkMerge [ + # Filesystem storage + (mkIf (cfg.storage.type == "filesystem") { + filesystem.directory = "${cfg.storage.filesystem.directory}/chunks"; + }) + + # S3 storage + (mkIf (cfg.storage.type == "s3") { + aws = + { + s3 = cfg.storage.s3.endpoint; + bucketnames = cfg.storage.s3.bucket; + region = cfg.storage.s3.region; + access_key_id = cfg.storage.s3.accessKeyId; + } + // (optionalAttrs (cfg.storage.s3.secretAccessKey != null) { + secret_access_key = "$__file{${cfg.storage.s3.secretAccessKey}}"; + }); + }) + ]; + + # Limits configuration + limits_config = + { + reject_old_samples = cfg.limits.rejectOldSamples; + reject_old_samples_max_age = cfg.limits.rejectOldSamplesMaxAge; + ingestion_rate_mb = cfg.limits.ingestionRateMB; + ingestion_burst_size_mb = cfg.limits.ingestionBurstSizeMB; + max_streams_per_user = cfg.limits.maxStreamsPerUser; + max_line_size = cfg.limits.maxLineSize; + + # Retention configuration + retention_period = cfg.retention.period; + } + // (optionalAttrs (cfg.retention.streamRetention != []) { + retention_stream = + map (rule: { + selector = rule.selector; + priority = rule.priority; + period = rule.period; + }) + cfg.retention.streamRetention; + }); + + # Table manager for retention + table_manager = { + retention_deletes_enabled = true; + retention_period = cfg.retention.period; + }; + + # Compactor configuration + compactor = { + working_directory = "${cfg.dataDir}/compactor"; + # shared_store = cfg.storage.type; + compaction_interval = "10m"; + # retention_enabled = true; + # retention_delete_delay = "2h"; + # retention_delete_worker_count = 150; + }; + + # Query range configuration + query_range = { + results_cache = { + cache = { + embedded_cache = { + enabled = true; + max_size_mb = 100; + }; + }; + }; + }; + + # Frontend configuration + frontend = { + max_outstanding_per_tenant = 256; + compress_responses = true; + }; + + # Query scheduler + query_scheduler = { + max_outstanding_requests_per_tenant = 256; + }; + + # Runtime configuration + runtime_config = { + file = "/etc/loki/runtime.yml"; + }; + } + cfg.extraConfig; +in { + options.homelab.services.loki = serviceInterface.mkServiceInterface { + serviceName = "loki"; + defaultPort = 3100; + defaultSubdomain = "loki"; + monitoringPath = "/metrics"; + healthCheckPath = "/ready"; + healthCheckConditions = [ + "[STATUS] == 200" + "[RESPONSE_TIME] < 2000" + ]; + serviceOptions = lokiServiceOptions; + }; + + config = serviceInterface.mkServiceConfig { + inherit config cfg homelabCfg; + serviceName = "loki"; + + extraMonitoringLabels = { + component = "log-aggregation"; + log_level = "info"; + }; + + customHealthChecks = [ + { + name = "loki-health"; + port = cfg.port; + # https://grafana.com/docs/loki/latest/reference/loki-http-api/#status-endpoints + path = "/loki/api/v1/status/buildinfo"; + interval = "30s"; + conditions = ["[STATUS] == 200"]; + group = "logging"; + labels = { + service = "loki"; + component = "api"; + }; + } + ]; + + serviceConfig = mkMerge [ + { + services.loki = { + enable = true; + dataDir = cfg.dataDir; + configuration = lokiConfig; + }; + + # Ensure data directories exist + systemd.tmpfiles.rules = [ + "d ${cfg.dataDir} 0755 loki loki -" + "d ${cfg.dataDir}/chunks 0755 loki loki -" + "d ${cfg.dataDir}/compactor 0755 loki loki -" + ]; + + # Runtime configuration file for dynamic updates + environment.etc."loki/runtime.yml".text = '' + # Runtime configuration for Loki + # This file can be updated without restarting Loki + ''; + } + ]; + }; +} diff --git a/modules/homelab/services/monitoring/prometheus.nix b/modules/homelab/services/monitoring/prometheus.nix index 76c30ff..b4ac904 100644 --- a/modules/homelab/services/monitoring/prometheus.nix +++ b/modules/homelab/services/monitoring/prometheus.nix @@ -19,12 +19,13 @@ with lib; let mapAttrsToList (jobName: endpoints: { job_name = jobName; scrape_interval = head endpoints.scrapeInterval or ["30s"]; - static_configs = [ - { - targets = map (endpoint: "${endpoint.host}:${toString endpoint.port}") endpoints; - labels = fold (endpoint: acc: acc // endpoint.labels) {} endpoints; - } - ]; + static_configs = + map + (endpoint: { + targets = ["${endpoint.host}:${toString endpoint.port}"]; + labels = endpoint.labels; + }) + endpoints; metrics_path = head endpoints.path or [null]; }) jobGroups; diff --git a/modules/homelab/services/monitoring/promtail.nix b/modules/homelab/services/monitoring/promtail.nix deleted file mode 100644 index e69de29..0000000 diff --git a/modules/homelab/services/postgres.nix b/modules/homelab/services/postgres.nix deleted file mode 100644 index e69de29..0000000 diff --git a/modules/homelab/services/prometheus.nix b/modules/homelab/services/prometheus.nix new file mode 100644 index 0000000..7457568 --- /dev/null +++ b/modules/homelab/services/prometheus.nix @@ -0,0 +1,252 @@ +{ + config, + lib, + pkgs, + ... +}: +with lib; let + serviceName = "prometheus"; + cfg = config.homelab.services.${serviceName}; + homelabCfg = config.homelab; + + # Generate Prometheus scrape configs from global monitoring data + prometheusScrapeConfigs = let + # Get all metrics - try global first, fallback to local + allMetrics = homelabCfg.monitoring.global.allMetrics + or homelabCfg.monitoring.allMetrics + or []; + + jobGroups = groupBy (m: m.jobName) allMetrics; + + scrapeConfigs = + mapAttrsToList (jobName: endpoints: { + job_name = jobName; + scrape_interval = head endpoints.scrapeInterval or ["30s"]; + static_configs = + map + (endpoint: { + targets = ["${endpoint.host}:${toString endpoint.port}"]; + labels = endpoint.labels; + }) + endpoints; + metrics_path = head endpoints.path or ["/metrics"]; + }) + jobGroups; + in + scrapeConfigs; + + # Standard alerting rules for homelab + alertingRules = [ + { + name = "homelab.rules"; + rules = [ + { + alert = "InstanceDown"; + expr = "up == 0"; + for = "5m"; + labels = {severity = "critical";}; + annotations = { + summary = "Instance {{ $labels.instance }} down"; + description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."; + }; + } + { + alert = "HighCPUUsage"; + expr = "100 - (avg by(instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100) > 80"; + for = "10m"; + labels = {severity = "warning";}; + annotations = { + summary = "High CPU usage on {{ $labels.instance }}"; + description = "CPU usage is above 80% for more than 10 minutes on {{ $labels.instance }}."; + }; + } + { + alert = "HighMemoryUsage"; + expr = "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85"; + for = "10m"; + labels = {severity = "warning";}; + annotations = { + summary = "High memory usage on {{ $labels.instance }}"; + description = "Memory usage is above 85% for more than 10 minutes on {{ $labels.instance }}."; + }; + } + { + alert = "DiskSpaceLow"; + expr = "((node_filesystem_size_bytes - node_filesystem_avail_bytes) / node_filesystem_size_bytes) * 100 > 90"; + for = "5m"; + labels = {severity = "critical";}; + annotations = { + summary = "Disk space low on {{ $labels.instance }}"; + description = "Disk usage is above 90% on {{ $labels.instance }} {{ $labels.mountpoint }}."; + }; + } + ]; + } + ]; +in { + imports = [ + (import ../lib/features/monitoring.nix serviceName) + (import ../lib/features/logging.nix serviceName) + (import ../lib/features/proxy.nix serviceName) + ]; + + # Core service options + options.homelab.services.${serviceName} = { + enable = mkEnableOption "Prometheus Monitoring Server"; + + port = mkOption { + type = types.port; + default = 9090; + }; + + description = mkOption { + type = types.str; + default = "Prometheus Monitoring Server"; + }; + + # Prometheus-specific options + retention = mkOption { + type = types.str; + default = "15d"; + description = "How long to retain metrics data"; + }; + + alertmanager = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable integration with Alertmanager"; + }; + + url = mkOption { + type = types.str; + default = "alertmanager.${homelabCfg.domain}:9093"; + description = "Alertmanager URL"; + }; + }; + + extraScrapeConfigs = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Additional scrape configurations"; + }; + + extraAlertingRules = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Additional alerting rules"; + }; + + globalConfig = mkOption { + type = types.attrs; + default = { + scrape_interval = "15s"; + evaluation_interval = "15s"; + }; + description = "Global Prometheus configuration"; + }; + + extraFlags = mkOption { + type = types.listOf types.str; + default = []; + description = "Extra command line flags"; + }; + + ruleFiles = mkOption { + type = types.listOf types.path; + default = []; + description = "Additional rule files to load"; + }; + }; + + # Service configuration with smart defaults + config = mkIf cfg.enable (mkMerge [ + # Core Prometheus service + { + services.prometheus = { + enable = true; + port = cfg.port; + listenAddress = "0.0.0.0"; + retentionTime = cfg.retention; + + globalConfig = cfg.globalConfig; + extraFlags = cfg.extraFlags; + + # Automatically aggregate all metrics from the fleet + scrapeConfigs = prometheusScrapeConfigs ++ cfg.extraScrapeConfigs; + + # Include standard + custom alerting rules + ruleFiles = + map (ruleGroup: + pkgs.writeText "${ruleGroup.name}.yml" (builtins.toJSON { + groups = [ruleGroup]; + })) (alertingRules ++ cfg.extraAlertingRules) + ++ cfg.ruleFiles; + + # Connect to Alertmanager if enabled + alertmanagers = mkIf cfg.alertmanager.enable [ + { + static_configs = [ + { + targets = [cfg.alertmanager.url]; + } + ]; + } + ]; + }; + + networking.firewall.allowedTCPPorts = [cfg.port]; + + homelab.services.${serviceName}.monitoring.enable = mkDefault true; + } + + # Smart defaults for Prometheus + (mkIf cfg.monitoring.enable { + homelab.services.${serviceName}.monitoring = mkDefault { + metrics = { + path = "/metrics"; + extraEndpoints = []; + }; + healthCheck = { + path = "/-/healthy"; + conditions = ["[STATUS] == 200" "[RESPONSE_TIME] < 1000"]; + extraChecks = [ + { + name = "prometheus-ready"; + port = cfg.port; + path = "/-/ready"; + conditions = ["[STATUS] == 200"]; + group = "monitoring"; + } + ]; + }; + extraLabels = { + component = "monitoring-server"; + tier = "monitoring"; + }; + }; + }) + + (mkIf cfg.logging.enable { + homelab.services.${serviceName}.logging = mkDefault { + files = ["/var/log/prometheus/prometheus.log"]; + parsing = { + # Prometheus log format: ts=2024-01-01T12:00:00.000Z caller=main.go:123 level=info msg="message" + regex = "^ts=(?P[^ ]+) caller=(?P[^ ]+) level=(?P\\w+) msg=\"(?P[^\"]*)\""; + extractFields = ["level" "caller"]; + }; + extraLabels = { + component = "monitoring-server"; + application = "prometheus"; + }; + }; + }) + + (mkIf cfg.proxy.enable { + homelab.services.${serviceName}.proxy = mkDefault { + subdomain = "prometheus"; + enableAuth = true; # Admin interface needs protection + }; + }) + ]); +} diff --git a/modules/homelab/services/prometheus_old.nix b/modules/homelab/services/prometheus_old.nix deleted file mode 100644 index 9485b3a..0000000 --- a/modules/homelab/services/prometheus_old.nix +++ /dev/null @@ -1,208 +0,0 @@ -# modules/services/prometheus.nix -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.homelab.services.prometheus; - globalCfg = config.homelab.global; -in { - options.homelab.services.prometheus = { - enable = mkEnableOption "Prometheus monitoring server"; - - port = mkOption { - type = types.port; - default = 9090; - description = "Prometheus server port"; - }; - - webExternalUrl = mkOption { - type = types.str; - default = "http://${globalCfg.hostname}:${toString cfg.port}"; - description = "External URL for Prometheus"; - }; - - retention = mkOption { - type = types.str; - default = "30d"; - description = "Data retention period"; - }; - - scrapeConfigs = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Additional scrape configurations"; - }; - - alertmanager = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable Alertmanager integration"; - }; - - url = mkOption { - type = types.str; - default = "http://localhost:9093"; - description = "Alertmanager URL"; - }; - }; - }; - - config = mkIf cfg.enable { - # Register service with global homelab config - homelab.global.services.prometheus = { - enable = true; - description = "Metrics collection and monitoring server"; - category = "monitoring"; - ports = [cfg.port]; - tags = ["metrics" "monitoring" "alerting"]; - priority = 20; - dependencies = ["node-exporter"]; - }; - - # Configure the actual Prometheus service - services.prometheus = { - enable = true; - port = cfg.port; - webExternalUrl = cfg.webExternalUrl; - - retentionTime = cfg.retention; - - scrapeConfigs = - [ - # Auto-discover monitoring endpoints from global config - { - job_name = "homelab-auto"; - static_configs = [ - { - targets = - map ( - endpoint: "${globalCfg.hostname}:${toString endpoint.port}" - ) - globalCfg.monitoring.endpoints; - } - ]; - scrape_interval = "30s"; - metrics_path = "/metrics"; - } - ] - ++ cfg.scrapeConfigs; - - # Alertmanager configuration - alertmanagers = mkIf cfg.alertmanager.enable [ - { - static_configs = [ - { - targets = [cfg.alertmanager.url]; - } - ]; - } - ]; - - rules = [ - # Basic homelab alerting rules - (pkgs.writeText "homelab-alerts.yml" '' - groups: - - name: homelab - rules: - - alert: ServiceDown - expr: up == 0 - for: 5m - labels: - severity: critical - annotations: - summary: "Service {{ $labels.instance }} is down" - description: "{{ $labels.job }} on {{ $labels.instance }} has been down for more than 5 minutes." - - - alert: HighMemoryUsage - expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9 - for: 10m - labels: - severity: warning - annotations: - summary: "High memory usage on {{ $labels.instance }}" - description: "Memory usage is above 90% on {{ $labels.instance }}" - - - alert: HighDiskUsage - expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.85 - for: 5m - labels: - severity: warning - annotations: - summary: "High disk usage on {{ $labels.instance }}" - description: "Disk usage is above 85% on {{ $labels.instance }} for filesystem {{ $labels.mountpoint }}" - '') - ]; - }; - - # Add monitoring endpoint to global config - homelab.global.monitoring.endpoints = [ - { - name = "prometheus"; - port = cfg.port; - path = "/metrics"; - jobName = "prometheus"; - scrapeInterval = "30s"; - labels = { - service = "prometheus"; - role = "monitoring"; - }; - } - ]; - - # Add reverse proxy entry if configured - homelab.global.reverseProxy.entries = mkIf (globalCfg.domain != null) [ - { - subdomain = "prometheus"; - port = cfg.port; - path = "/"; - enableAuth = true; - enableSSL = true; - customHeaders = { - "X-Frame-Options" = "DENY"; - "X-Content-Type-Options" = "nosniff"; - }; - } - ]; - - # Add backup job for Prometheus data - homelab.global.backups.jobs = [ - { - name = "prometheus-data"; - backend = "restic"; - paths = ["/var/lib/prometheus2"]; - schedule = "daily"; - retention = { - daily = "7"; - weekly = "4"; - monthly = "3"; - yearly = "1"; - }; - excludePatterns = [ - "*.tmp" - "*/wal/*" - ]; - preHook = '' - # Stop prometheus temporarily for consistent backup - systemctl stop prometheus - ''; - postHook = '' - # Restart prometheus after backup - systemctl start prometheus - ''; - } - ]; - - # Open firewall port - networking.firewall.allowedTCPPorts = [cfg.port]; - - # Create prometheus configuration directory - systemd.tmpfiles.rules = [ - "d /var/lib/prometheus2 0755 prometheus prometheus -" - "d /etc/prometheus 0755 root root -" - ]; - }; -} From ac59749e9f2594f19e82e2aeb0b587ac44f979fd Mon Sep 17 00:00:00 2001 From: Forgejo Bot Date: Tue, 29 Jul 2025 06:01:02 +0000 Subject: [PATCH 3/3] feat: automated changes --- flake.lock | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/flake.lock b/flake.lock index 61e80ac..0fa9b9e 100644 --- a/flake.lock +++ b/flake.lock @@ -25,11 +25,11 @@ "stable": "stable" }, "locked": { - "lastModified": 1752287590, - "narHash": "sha256-U1IqFnxlgCRrPaeT5IGCdH0j9CNLPFcI/fRAidi0aDQ=", + "lastModified": 1753701727, + "narHash": "sha256-tgiPAFXoSGIm3wUAuKwjk2fgTgZ0rpT90RNfhU5QKJA=", "owner": "zhaofengli", "repo": "colmena", - "rev": "d2beb694d54db653399b8597c0f6e15e20b26405", + "rev": "342054695f53c4a27c8dce0a8c9f35ade6d963d6", "type": "github" }, "original": { @@ -156,11 +156,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1753429684, - "narHash": "sha256-9h7+4/53cSfQ/uA3pSvCaBepmZaz/dLlLVJnbQ+SJjk=", + "lastModified": 1750134718, + "narHash": "sha256-v263g4GbxXv87hMXMCpjkIxd/viIF7p3JpJrwgKdNiI=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "7fd36ee82c0275fb545775cc5e4d30542899511d", + "rev": "9e83b64f727c88a7711a2c463a7b16eedb69a84c", "type": "github" }, "original": { @@ -188,11 +188,11 @@ }, "nixpkgs-unstable": { "locked": { - "lastModified": 1752480373, - "narHash": "sha256-JHQbm+OcGp32wAsXTE/FLYGNpb+4GLi5oTvCxwSoBOA=", + "lastModified": 1753694789, + "narHash": "sha256-cKgvtz6fKuK1Xr5LQW/zOUiAC0oSQoA9nOISB0pJZqM=", "owner": "nixos", "repo": "nixpkgs", - "rev": "62e0f05ede1da0d54515d4ea8ce9c733f12d9f08", + "rev": "dc9637876d0dcc8c9e5e22986b857632effeb727", "type": "github" }, "original": { @@ -204,11 +204,11 @@ }, "nixpkgs_2": { "locked": { - "lastModified": 1752624097, - "narHash": "sha256-mQCof2VccFzF7cmXy43n3GCwSN2+m8TVhZpGLx9sxVc=", + "lastModified": 1753766937, + "narHash": "sha256-L4kx7TQsmD7GnNaGGXlLhqZVPwilP7QIwDPw88Wzzrs=", "owner": "nixos", "repo": "nixpkgs", - "rev": "d7c8095791ce3aafe97d9c16c1dc2f4e3d69a3ba", + "rev": "23296a5649db3ae810cdc41ecd5055d131a82b11", "type": "github" }, "original": { @@ -237,11 +237,11 @@ "nixpkgs-25_05": "nixpkgs-25_05" }, "locked": { - "lastModified": 1752060039, - "narHash": "sha256-MqcbN/PgfXOv8S4q6GcmlORd6kJZ3UlFNhzCvLOEe4I=", + "lastModified": 1753285640, + "narHash": "sha256-ofa021NeHDXAxg5J8mSnn8rHa393PAlD85ZCetP4Qa0=", "owner": "simple-nixos-mailserver", "repo": "nixos-mailserver", - "rev": "80d21ed7a1ab8007597f7cd9adc26ebc98b9611f", + "rev": "ce87c8a9771d1a20c3fa3b60113b9b0821627dcb", "type": "gitlab" }, "original": {