diff --git a/hosts/sandbox/default.nix b/hosts/sandbox/default.nix index 2782e30..ebf4475 100644 --- a/hosts/sandbox/default.nix +++ b/hosts/sandbox/default.nix @@ -11,6 +11,7 @@ tags = [name]; monitoring.enable = true; + logging.enable = true; motd.enable = true; backups = { @@ -40,17 +41,10 @@ ]; }; - services.prometheus = { - enable = true; - }; - - services.gatus = { - enable = true; - ui = { - title = "Homelab Status Dashboard"; - header = "My Homelab Services"; - }; - }; + # services.loki.enable = true; + services.prometheus.enable = true; + services.grafana.enable = true; + services.gatus.enable = true; }; system.stateVersion = "25.05"; diff --git a/modules/homelab/backup-config.nix b/modules/homelab/backup-config.nix deleted file mode 100644 index e26dcb2..0000000 --- a/modules/homelab/backup-config.nix +++ /dev/null @@ -1,116 +0,0 @@ -{ - config, - lib, - ... -}: -with lib; let - cfg = config.homelab.backups; - homelabCfg = config.homelab; - - # Get all defined backend names dynamically - backendNames = attrNames cfg.backends or {}; - - backupJobType = types.submodule { - options = { - name = mkOption { - type = types.str; - description = "Name of the backup job"; - }; - backend = mkOption { - type = types.enum backendNames; - description = "Backend to use for this backup job"; - }; - backendOptions = mkOption { - type = types.attrs; - default = {}; - description = "Backend-specific options to override or extend the backend configuration"; - }; - labels = mkOption { - type = types.attrsOf types.str; - default = {}; - description = "Additional labels for this backup job"; - }; - }; - }; -in { - imports = [ - ./backup/restic.nix - # ./backup/borgbackup.nix - ]; - - options.homelab.backups = { - enable = mkEnableOption "Homelab backup system"; - - jobs = mkOption { - type = types.listOf backupJobType; - default = []; - description = "Backup jobs to execute on this system"; - }; - - defaultLabels = mkOption { - type = types.attrsOf types.str; - default = { - hostname = homelabCfg.hostname; - environment = homelabCfg.environment; - location = homelabCfg.location; - }; - description = "Default labels applied to all backup jobs"; - }; - - monitoring = mkOption { - type = types.bool; - default = true; - description = "Enable backup monitoring and metrics"; - }; - }; - - config = mkIf cfg.enable { - # Validate that all job backends exist - assertions = [ - { - assertion = all (job: cfg.backends.${job.backend} != null) cfg.jobs; - message = "All backup jobs must reference backends that are defined and not null in homelab.backups.backends"; - } - ]; - - # Add backup jobs to monitoring endpoints if monitoring is enabled - # homelab.monitoring.endpoints = - # mkIf (cfg.monitoring && config.homelab.monitoring.enable) - # (map (job: { - # name = "backup-${job.name}"; - # port = 9100; # Assuming node exporter collects backup metrics - # path = "/metrics"; - # jobName = "backup"; - # labels = - # cfg.defaultLabels - # // job.labels - # // { - # backup_job = job.name; - # backup_backend = job.backend; - # }; - # }) - # cfg.jobs); - - # Export backup configuration for external consumption - environment.etc."homelab/backup-config.json".text = builtins.toJSON { - backends = - mapAttrs (name: config: { - inherit name; - enabled = config.enable or false; - }) - cfg.backends; - - jobs = - map (job: { - inherit (job) name backend labels; - allLabels = cfg.defaultLabels // job.labels; - paths = job.backendOptions.paths or []; - schedule = job.backendOptions.timerConfig.OnCalendar or job.backendOptions.startAt or "unknown"; - node = homelabCfg.hostname; - environment = homelabCfg.environment; - location = homelabCfg.location; - }) - cfg.jobs; - }; - }; -} diff --git a/modules/homelab/default.nix b/modules/homelab/default.nix index e56aae9..ce19d59 100644 --- a/modules/homelab/default.nix +++ b/modules/homelab/default.nix @@ -1,6 +1,7 @@ { config, lib, + nodes, ... }: with lib; let @@ -9,18 +10,13 @@ with lib; let nodeAgg = import ./lib/node-aggregation.nix {inherit lib;}; in { imports = [ - ./monitoring-config.nix - ./proxy-config.nix - ./backup-config.nix - ./motd + ./lib/systems/monitoring.nix + ./lib/systems/logging.nix + ./lib/systems/proxy.nix + ./lib/systems/backups.nix ./services - - # Global aggregation modules - (nodeAgg.mkGlobalModule "monitoring" nodeAgg.aggregators.monitoring) - # (nodeAgg.mkGlobalModule "logs" nodeAgg.aggregators.logs) - (nodeAgg.mkGlobalModule "reverseProxy" nodeAgg.aggregators.reverseProxy) - (nodeAgg.mkGlobalModule "backups" nodeAgg.aggregators.backups) + ./motd ]; options.homelab = { @@ -61,73 +57,73 @@ in { networking.hostName = cfg.hostname; # Export configuration for external consumption - environment.etc."homelab/config.json".text = builtins.toJSON { - inherit (cfg) hostname domain environment location tags; + # environment.etc."homelab/config.json".text = builtins.toJSON { + # inherit (cfg) hostname domain environment location tags; - monitoring = { - # Metrics endpoints (Prometheus, etc.) - metrics = - map (endpoint: { - inherit (endpoint) name host port path jobName scrapeInterval labels; - url = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}"; - }) - cfg.global.monitoring.allMetrics or []; + # monitoring = { + # # Metrics endpoints (Prometheus, etc.) + # metrics = + # map (endpoint: { + # inherit (endpoint) name host port path jobName scrapeInterval labels; + # url = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}"; + # }) + # cfg.global.monitoring.allMetrics or []; - # Health check endpoints - healthChecks = - map (check: let - # Determine the host based on useExternalDomain - actualHost = - if check.useExternalDomain - then "${check.subdomain}.${cfg.externalDomain}" - else check.host; + # # Health check endpoints + # healthChecks = + # map (check: let + # # Determine the host based on useExternalDomain + # actualHost = + # if check.useExternalDomain + # then "${check.subdomain}.${cfg.externalDomain}" + # else check.host; - # Build the URL - portPart = - if check.port != null - then ":${toString check.port}" - else ""; - url = "${check.protocol}://${actualHost}${portPart}${check.path}"; - in { - inherit (check) name protocol method interval timeout conditions alerts group labels enabled; - host = actualHost; - port = check.port; - path = check.path; - url = url; - useExternalDomain = check.useExternalDomain; - subdomain = check.subdomain; - sourceNode = cfg.hostname; - }) - cfg.global.monitoring.allHealthChecks or []; - }; + # # Build the URL + # portPart = + # if check.port != null + # then ":${toString check.port}" + # else ""; + # url = "${check.protocol}://${actualHost}${portPart}${check.path}"; + # in { + # inherit (check) name protocol method interval timeout conditions alerts group labels enabled; + # host = actualHost; + # port = check.port; + # path = check.path; + # url = url; + # useExternalDomain = check.useExternalDomain; + # subdomain = check.subdomain; + # sourceNode = cfg.hostname; + # }) + # cfg.global.monitoring.allHealthChecks or []; + # }; - reverseProxy = { - entries = - map (entry: { - inherit (entry) subdomain host port path enableAuth enableSSL; - internalHost = "${cfg.hostname}:${toString entry.port}${entry.path}"; - externalHost = "${entry.subdomain}.${cfg.externalDomain}"; - }) - cfg.global.reverseProxy.all; - }; + # reverseProxy = { + # entries = + # map (entry: { + # inherit (entry) subdomain host port path enableAuth enableSSL; + # internalHost = "${cfg.hostname}:${toString entry.port}${entry.path}"; + # externalHost = "${entry.subdomain}.${cfg.externalDomain}"; + # }) + # cfg.global.reverseProxy.all; + # }; - backups = { - jobs = - map (job: { - inherit (job) name backend labels; - backupId = job._backupId; - sourceNode = job._sourceNode; - }) - cfg.global.backups.all; + # backups = { + # jobs = + # map (job: { + # inherit (job) name backend labels; + # backupId = job._backupId; + # sourceNode = job._sourceNode; + # }) + # cfg.global.backups.all; - backends = cfg.global.backups.allBackends; + # backends = cfg.global.backups.allBackends; - summary = { - totalJobs = length cfg.global.backups.all; - jobsByBackend = mapAttrs (backend: jobs: length jobs) cfg.global.backups.byBackend; - jobsByNode = mapAttrs (node: jobs: length jobs) cfg.global.backups.byNode; - }; - }; - }; + # summary = { + # totalJobs = length cfg.global.backups.all; + # jobsByBackend = mapAttrs (backend: jobs: length jobs) cfg.global.backups.byBackend; + # jobsByNode = mapAttrs (node: jobs: length jobs) cfg.global.backups.byNode; + # }; + # }; + # }; }; } diff --git a/modules/homelab/lib/aggregators/base.nix b/modules/homelab/lib/aggregators/base.nix new file mode 100644 index 0000000..e32228c --- /dev/null +++ b/modules/homelab/lib/aggregators/base.nix @@ -0,0 +1,55 @@ +{lib}: let + inherit (lib) flatten mapAttrs attrValues filterAttrs mapAttrsToList filter groupBy length unique attrByPath splitString; + + # Generic function to aggregate any attribute across nodes + aggregateFromNodes = { + nodes, + attributePath, # e.g. "homelab.monitoring.metrics" or "homelab.backups.jobs" + enhancer ? null, # optional function to enhance each item with node context + }: let + # Extract the attribute from each node using the path + getNestedAttr = path: config: let + pathList = splitString "." path; + in + attrByPath pathList [] config; + + # Get all items from all nodes + allItems = flatten (mapAttrsToList + (nodeName: nodeConfig: let + items = getNestedAttr attributePath nodeConfig.config; + baseEnhancer = item: + item + // { + _nodeName = nodeName; + _nodeConfig = nodeConfig; + _nodeAddress = nodeConfig.config.networking.hostName or nodeName; + }; + finalEnhancer = + if enhancer != null + then (item: enhancer (baseEnhancer item)) + else baseEnhancer; + in + map finalEnhancer items) + nodes); + in { + # Raw aggregated data + all = allItems; + + # Common grouping patterns + byNode = groupBy (item: item._nodeName) allItems; + byType = groupBy (item: item.type or "unknown") allItems; + byService = groupBy (item: item.service or "unknown") allItems; + + # Utility functions for filtering + filterBy = predicate: filter predicate allItems; + ofType = type: filter (item: (item.type or "") == type) allItems; + ofNode = nodeName: filter (item: item._nodeName == nodeName) allItems; + enabled = filter (item: item.enabled or true) allItems; + + # Counting utilities + count = length allItems; + countBy = fn: mapAttrs (key: items: length items) (groupBy fn allItems); + }; +in { + inherit aggregateFromNodes; +} diff --git a/modules/homelab/lib/features/logging.nix b/modules/homelab/lib/features/logging.nix new file mode 100644 index 0000000..010b766 --- /dev/null +++ b/modules/homelab/lib/features/logging.nix @@ -0,0 +1,87 @@ +serviceName: { + config, + lib, + ... +}: +with lib; let + cfg = config.homelab.services.${serviceName}; + homelabCfg = config.homelab; +in { + options.homelab.services.${serviceName}.logging = { + enable = mkEnableOption "logging for ${serviceName}"; + + files = mkOption { + type = types.listOf types.str; + default = []; + }; + + parsing = { + regex = mkOption { + type = types.nullOr types.str; + default = null; + }; + + extractFields = mkOption { + type = types.listOf types.str; + default = []; + }; + }; + + multiline = mkOption { + type = types.nullOr (types.submodule { + options = { + firstLineRegex = mkOption {type = types.str;}; + maxWaitTime = mkOption { + type = types.str; + default = "3s"; + }; + }; + }); + default = null; + }; + + extraLabels = mkOption { + type = types.attrsOf types.str; + default = {}; + }; + + extraSources = mkOption { + type = types.listOf types.attrs; + default = []; + }; + }; + + config = mkIf (cfg.enable && cfg.logging.enable) { + homelab.logging.sources = + [ + { + name = "${serviceName}-logs"; + type = "file"; + files = { + paths = cfg.logging.files; + multiline = cfg.logging.multiline; + }; + labels = + cfg.logging.extraLabels + // { + service = serviceName; + node = homelabCfg.hostname; + environment = homelabCfg.environment; + }; + pipelineStages = + mkIf (cfg.logging.parsing.regex != null) [ + { + regex.expression = cfg.logging.parsing.regex; + } + ] + ++ [ + { + labels = listToAttrs (map (field: nameValuePair field null) cfg.logging.parsing.extractFields); + } + ]; + enabled = true; + } + ] + ++ cfg.logging.extraSources; + }; +} diff --git a/modules/homelab/lib/features/monitoring.nix b/modules/homelab/lib/features/monitoring.nix new file mode 100644 index 0000000..90b36f9 --- /dev/null +++ b/modules/homelab/lib/features/monitoring.nix @@ -0,0 +1,108 @@ +serviceName: { + config, + lib, + ... +}: +with lib; let + cfg = config.homelab.services.${serviceName}; + homelabCfg = config.homelab; +in { + # Define the service-specific monitoring options + options.homelab.services.${serviceName}.monitoring = { + enable = mkEnableOption "monitoring for ${serviceName}"; + + metrics = { + enable = mkOption { + type = types.bool; + default = true; + }; + + path = mkOption { + type = types.str; + default = "/metrics"; + }; + + extraEndpoints = mkOption { + type = types.listOf types.attrs; + default = []; + }; + }; + + healthCheck = { + enable = mkOption { + type = types.bool; + default = true; + }; + + path = mkOption { + type = types.str; + default = "/health"; + }; + + conditions = mkOption { + type = types.listOf types.str; + default = ["[STATUS] == 200"]; + }; + + extraChecks = mkOption { + type = types.listOf types.attrs; + default = []; + }; + }; + + extraLabels = mkOption { + type = types.attrsOf types.str; + default = {}; + }; + }; + + # Generate the homelab config automatically when service is enabled + config = mkIf (cfg.enable && cfg.monitoring.enable) { + homelab.monitoring = { + metrics = + [ + { + name = "${serviceName}-main"; + host = homelabCfg.hostname; + port = cfg.port; + path = cfg.monitoring.metrics.path; + jobName = serviceName; + scrapeInterval = "30s"; + labels = + cfg.monitoring.extraLabels + // { + service = serviceName; + node = homelabCfg.hostname; + environment = homelabCfg.environment; + }; + } + ] + ++ cfg.monitoring.metrics.extraEndpoints; + + healthChecks = + [ + { + name = "${serviceName}-health"; + host = homelabCfg.hostname; + port = cfg.port; + path = cfg.monitoring.healthCheck.path; + protocol = "http"; + method = "GET"; + interval = "30s"; + timeout = "10s"; + conditions = cfg.monitoring.healthCheck.conditions; + group = "services"; + labels = + cfg.monitoring.extraLabels + // { + service = serviceName; + node = homelabCfg.hostname; + environment = homelabCfg.environment; + }; + enabled = true; + } + ] + ++ cfg.monitoring.healthCheck.extraChecks; + }; + }; +} diff --git a/modules/homelab/lib/features/proxy.nix b/modules/homelab/lib/features/proxy.nix new file mode 100644 index 0000000..2658c7a --- /dev/null +++ b/modules/homelab/lib/features/proxy.nix @@ -0,0 +1,64 @@ +serviceName: { + config, + lib, + ... +}: +with lib; let + cfg = config.homelab.services.${serviceName}; + homelabCfg = config.homelab; +in { + options.homelab.services.${serviceName}.proxy = { + enable = mkEnableOption "reverse proxy for ${serviceName}"; + + subdomain = mkOption { + type = types.str; + default = serviceName; + }; + + enableAuth = mkOption { + type = types.bool; + default = false; + }; + + additionalSubdomains = mkOption { + type = types.listOf (types.submodule { + options = { + subdomain = mkOption {type = types.str;}; + port = mkOption {type = types.port;}; + path = mkOption { + type = types.str; + default = "/"; + }; + enableAuth = mkOption { + type = types.bool; + default = false; + }; + }; + }); + default = []; + }; + }; + + config = mkIf (cfg.enable && cfg.proxy.enable) { + homelab.reverseProxy.entries = + [ + { + subdomain = cfg.proxy.subdomain; + host = homelabCfg.hostname; + port = cfg.port; + path = "/"; + enableAuth = cfg.proxy.enableAuth; + enableSSL = true; + } + ] + ++ map (sub: { + subdomain = sub.subdomain; + host = homelabCfg.hostname; + port = sub.port; + path = sub.path; + enableAuth = sub.enableAuth; + enableSSL = true; + }) + cfg.proxy.additionalSubdomains; + }; +} diff --git a/modules/homelab/lib/node-aggregation.nix b/modules/homelab/lib/node-aggregation.nix deleted file mode 100644 index 1719012..0000000 --- a/modules/homelab/lib/node-aggregation.nix +++ /dev/null @@ -1,226 +0,0 @@ -{lib}: let - inherit (lib) flatten mapAttrs mapAttrsToList filter groupBy length unique attrByPath splitString; - - # Generic function to aggregate any attribute across nodes - aggregateFromNodes = { - nodes, - attributePath, # e.g. "homelab.monitoring.endpoints" or "homelab.backups.jobs" - enhancer ? null, # optional function to enhance each item with node context - }: let - # Extract the attribute from each node using the path - getNestedAttr = path: config: let - pathList = splitString "." path; - in - attrByPath pathList [] config; - - # Get all items from all nodes - allItems = flatten (mapAttrsToList - (nodeName: nodeConfig: let - items = getNestedAttr attributePath nodeConfig.config; - baseEnhancer = item: - item - // { - _nodeName = nodeName; - _nodeConfig = nodeConfig; - _nodeAddress = nodeConfig.config.networking.hostName or nodeName; - }; - finalEnhancer = - if enhancer != null - then (item: enhancer (baseEnhancer item)) - else baseEnhancer; - in - map finalEnhancer items) - nodes); - in { - # Raw aggregated data - all = allItems; - - # Common grouping patterns - byNode = groupBy (item: item._nodeName) allItems; - byType = groupBy (item: item.type or "unknown") allItems; - byService = groupBy (item: item.service or "unknown") allItems; - - # Utility functions for filtering - filterBy = predicate: filter predicate allItems; - ofType = type: filter (item: (item.type or "") == type) allItems; - - count = length allItems; - countBy = fn: mapAttrs (key: items: length items) (groupBy fn allItems); - }; - - # Specialized aggregators for common use cases - aggregators = { - monitoring = nodes: let - # Aggregate metrics endpoints - metricsAgg = aggregateFromNodes { - inherit nodes; - attributePath = "homelab.monitoring.metrics"; - enhancer = endpoint: - endpoint - // { - _fullAddress = "${endpoint.host or endpoint._nodeAddress}:${toString endpoint.port}"; - _metricsUrl = "http://${endpoint.host or endpoint._nodeAddress}:${toString endpoint.port}${endpoint.path or "/metrics"}"; - _type = "metrics"; - }; - }; - # Aggregate health checks - healthChecksAgg = aggregateFromNodes { - inherit nodes; - attributePath = "homelab.monitoring.healthChecks"; - enhancer = check: let - # Compute the actual host and URL - actualHost = - if check.useExternalDomain or false - then "${check.subdomain}.${check._nodeConfig.config.homelab.externalDomain or "example.com"}" - else check.host or check._nodeAddress; - portPart = - if check.port != null - then ":${toString check.port}" - else ""; - url = "${check.protocol or "http"}://${actualHost}${portPart}${check.path or "/"}"; - in - check - // { - _actualHost = actualHost; - _url = url; - _type = "health-check"; - # Merge default labels with node context - labels = - (check.labels or {}) - // { - node = check._nodeName; - environment = check._nodeConfig.config.homelab.environment or "unknown"; - }; - }; - }; - in - metricsAgg - // healthChecksAgg - // { - # Metrics-specific aggregations - allMetrics = metricsAgg.all; - metricsByNode = metricsAgg.byNode; - metricsByJobName = groupBy (m: m.jobName or "unknown") metricsAgg.all; - - # Health checks-specific aggregations - allHealthChecks = healthChecksAgg.all; - healthChecksByNode = healthChecksAgg.byNode; - healthChecksByGroup = groupBy (hc: hc.group or "default") healthChecksAgg.all; - healthChecksByProtocol = groupBy (hc: hc.protocol or "http") healthChecksAgg.all; - - # Filtered health checks - externalHealthChecks = filter (hc: hc.useExternalDomain or false) healthChecksAgg.all; - internalHealthChecks = filter (hc: !(hc.useExternalDomain or false)) healthChecksAgg.all; - enabledHealthChecks = filter (hc: hc.enabled or true) healthChecksAgg.all; - - # Summary statistics - summary = { - totalMetrics = length metricsAgg.all; - totalHealthChecks = length healthChecksAgg.all; - healthChecksByGroup = - mapAttrs (group: checks: length checks) - (groupBy (hc: hc.group or "default") healthChecksAgg.all); - healthChecksByProtocol = - mapAttrs (protocol: checks: length checks) - (groupBy (hc: hc.protocol or "http") healthChecksAgg.all); - externalChecksCount = length (filter (hc: hc.useExternalDomain or false) healthChecksAgg.all); - internalChecksCount = length (filter (hc: !(hc.useExternalDomain or false)) healthChecksAgg.all); - }; - }; - - # Promtail log configurations - # logs = nodes: - # aggregateFromNodes { - # inherit nodes; - # attributePath = "homelab.logging.sources"; - # enhancer = logSource: - # logSource - # // { - # # Add log-specific computed fields - # _logPath = logSource.path or "/var/log/${logSource.service}.log"; - # _labels = - # (logSource.labels or {}) - # // { - # node = logSource._nodeName; - # service = logSource.service or "unknown"; - # }; - # }; - # }; - - # Reverse proxy configurations - reverseProxy = nodes: - aggregateFromNodes { - inherit nodes; - attributePath = "homelab.reverseProxy.entries"; - enhancer = entry: - entry - // { - # Add proxy-specific computed fields - _upstream = "http://${entry.host or entry._nodeAddress}:${toString entry.port}"; - _fqdn = "${entry.subdomain or entry.service}.${entry.domain or "local"}"; - }; - }; - - # Backup jobs with enhanced aggregation - backups = nodes: let - baseAgg = aggregateFromNodes { - inherit nodes; - attributePath = "homelab.backups.jobs"; - enhancer = backup: - backup - // { - _sourceNode = backup._nodeName; - _backupId = "${backup._nodeName}-${backup.name}"; - _jobFqdn = "${backup.name}.${backup._nodeName}"; - }; - }; - - # Get all unique backends across all nodes - allBackends = let - allBackendConfigs = - mapAttrsToList - (nodeName: nodeConfig: - attrByPath ["homelab" "backups" "backends"] {} nodeConfig.config) - nodes; - enabledBackends = flatten (map (backends: - filter (name: backends.${name} != null) (lib.attrNames backends)) - allBackendConfigs); - in - unique enabledBackends; - in - baseAgg - // { - # Backup-specific aggregations - byBackend = groupBy (job: job.backend) baseAgg.all; - allBackends = allBackends; - - # Enhanced summary - summary = { - totalJobs = length baseAgg.all; - jobsByBackend = - mapAttrs (backend: jobs: length jobs) - (groupBy (job: job.backend) baseAgg.all); - jobsByNode = baseAgg.countBy (job: job._nodeName); - availableBackends = allBackends; - backendsInUse = unique (map (job: job.backend) baseAgg.all); - }; - }; - }; -in { - inherit aggregateFromNodes aggregators; - - # Convenience function to create a module that provides global aggregations - mkGlobalModule = attributeName: aggregatorFn: { - lib, - nodes, - ... - }: { - options.homelab.global.${attributeName} = lib.mkOption { - type = lib.types.attrs; - readOnly = true; - description = "Globally aggregated ${attributeName} from all nodes"; - }; - - config.homelab.global.${attributeName} = aggregatorFn nodes; - }; -} diff --git a/modules/homelab/lib/service-interface.nix b/modules/homelab/lib/service-interface.nix deleted file mode 100644 index 2bc7ed8..0000000 --- a/modules/homelab/lib/service-interface.nix +++ /dev/null @@ -1,295 +0,0 @@ -# Standard service interface for homelab services -# This provides a consistent contract that all services should follow -{lib}: let - inherit (lib) mkOption mkEnableOption types; - - # Define the standard service interface - mkServiceInterface = { - serviceName, - defaultPort ? null, - defaultSubdomain ? serviceName, - defaultDescription ? "Homelab ${serviceName} service", - monitoringPath ? "/metrics", - healthCheckPath ? "/health", - healthCheckConditions ? ["[STATUS] == 200"], - # Custom options that the service wants to expose - serviceOptions ? {}, - }: - { - # Standard interface options that all services must have - enable = mkEnableOption defaultDescription; - - port = mkOption { - type = types.port; - default = - if defaultPort != null - then defaultPort - else throw "Service ${serviceName} must specify a default port"; - description = "Port for ${serviceName} service"; - }; - - openFirewall = mkOption { - type = types.bool; - default = true; - description = "Whether to automatically open firewall ports"; - }; - - proxy = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable reverse proxy for this service"; - }; - - subdomain = mkOption { - type = types.str; - default = defaultSubdomain; - description = "Subdomain for reverse proxy (${defaultSubdomain}.yourdomain.com)"; - }; - - enableAuth = mkOption { - type = types.bool; - default = false; - description = "Enable authentication for reverse proxy"; - }; - - enableSSL = mkOption { - type = types.bool; - default = true; - description = "Enable SSL for reverse proxy"; - }; - }; - - monitoring = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable monitoring (metrics and health checks)"; - }; - - metricsPath = mkOption { - type = types.str; - default = monitoringPath; - description = "Path for metrics endpoint"; - }; - - jobName = mkOption { - type = types.str; - default = serviceName; - description = "Prometheus job name"; - }; - - scrapeInterval = mkOption { - type = types.str; - default = "30s"; - description = "Prometheus scrape interval"; - }; - - healthCheck = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable health check monitoring"; - }; - - path = mkOption { - type = types.str; - default = healthCheckPath; - description = "Path for health check endpoint"; - }; - - interval = mkOption { - type = types.str; - default = "30s"; - description = "Health check interval"; - }; - - timeout = mkOption { - type = types.str; - default = "10s"; - description = "Health check timeout"; - }; - - conditions = mkOption { - type = types.listOf types.str; - default = healthCheckConditions; - description = "Health check conditions"; - }; - - group = mkOption { - type = types.str; - default = "services"; - description = "Health check group name"; - }; - }; - - extraLabels = mkOption { - type = types.attrsOf types.str; - default = {}; - description = "Additional labels for monitoring"; - }; - }; - - description = mkOption { - type = types.str; - default = defaultDescription; - description = "Service description"; - }; - - extraOptions = mkOption { - type = types.attrs; - default = {}; - description = "Additional service-specific configuration options"; - }; - - # Merge in service-specific options - } - // serviceOptions; - - # Helper function to implement the standard service behavior - mkServiceConfig = { - config, - cfg, - homelabCfg, - serviceName, - # Function that returns the actual service configuration - serviceConfig, - # Optional: custom monitoring labels - extraMonitoringLabels ? {}, - # Optional: custom health check configuration - customHealthChecks ? [], - # Optional: custom reverse proxy configuration - customProxyConfig ? {}, - }: let - # Standard monitoring labels - standardLabels = - { - service = serviceName; - component = "main"; - instance = "${homelabCfg.hostname}.${homelabCfg.domain}"; - } - // extraMonitoringLabels // cfg.monitoring.extraLabels; - - # Standard reverse proxy entry - standardProxyEntry = - { - subdomain = cfg.proxy.subdomain; - host = homelabCfg.hostname; - port = cfg.port; - enableAuth = cfg.proxy.enableAuth; - enableSSL = cfg.proxy.enableSSL; - } - // customProxyConfig; - - # Standard metrics configuration - standardMetrics = lib.optional cfg.monitoring.enable { - name = "${serviceName}-metrics"; - port = cfg.port; - path = cfg.monitoring.metricsPath; - jobName = cfg.monitoring.jobName; - scrapeInterval = cfg.monitoring.scrapeInterval; - labels = standardLabels; - }; - - # Standard health check configuration - standardHealthCheck = lib.optional (cfg.monitoring.enable && cfg.monitoring.healthCheck.enable) { - name = "${serviceName}-health"; - port = cfg.port; - path = cfg.monitoring.healthCheck.path; - interval = cfg.monitoring.healthCheck.interval; - timeout = cfg.monitoring.healthCheck.timeout; - conditions = cfg.monitoring.healthCheck.conditions; - group = cfg.monitoring.healthCheck.group; - labels = standardLabels; - }; - - # Merge service config with standard behaviors - baseConfig = lib.mkMerge [ - # Service-specific configuration - serviceConfig - - # Standard firewall configuration - (lib.mkIf cfg.openFirewall { - networking.firewall.allowedTCPPorts = [cfg.port]; - }) - - # Standard monitoring configuration - (lib.mkIf cfg.monitoring.enable { - homelab.monitoring.metrics = standardMetrics; - homelab.monitoring.healthChecks = standardHealthCheck ++ customHealthChecks; - }) - - # Standard reverse proxy configuration - (lib.mkIf cfg.proxy.enable { - homelab.reverseProxy.entries = [standardProxyEntry]; - }) - ]; - in - lib.mkIf cfg.enable baseConfig; - - # Validation helper to ensure required options are set - validateServiceConfig = cfg: serviceName: [ - # Validate that if proxy is enabled, subdomain is set - (lib.mkIf (cfg.proxy.enable && cfg.proxy.subdomain == "") - (throw "Service ${serviceName}: proxy.subdomain is required when proxy.enable is true")) - - # Validate that if monitoring is enabled, required paths are set - (lib.mkIf (cfg.monitoring.enable && cfg.monitoring.metricsPath == "") - (throw "Service ${serviceName}: monitoring.metricsPath cannot be empty when monitoring is enabled")) - ]; -in { - inherit mkServiceInterface mkServiceConfig validateServiceConfig; - - # Common service option patterns - commonOptions = { - # Log level option - logLevel = mkOption { - type = types.enum ["debug" "info" "warn" "error"]; - default = "info"; - description = "Log level"; - }; - - # Environment file option (for secrets) - environmentFile = mkOption { - type = types.nullOr types.path; - default = null; - description = "Environment file for secrets"; - }; - - # External URL option - externalUrl = serviceName: homelabCfg: - mkOption { - type = types.str; - default = "https://${serviceName}.${homelabCfg.externalDomain}"; - description = "External URL for ${serviceName}"; - }; - }; - - # Helper for creating service modules with the interface - mkServiceModule = { - serviceName, - defaultPort, - defaultSubdomain ? serviceName, - serviceOptions ? {}, - ... - } @ args: { - config, - lib, - ... - }: let - cfg = config.homelab.services.${serviceName}; - homelabCfg = config.homelab; - - serviceInterface = mkServiceInterface { - inherit serviceName defaultPort defaultSubdomain serviceOptions; - }; - in { - options.homelab.services.${serviceName} = serviceInterface; - - config = mkServiceConfig { - inherit config cfg homelabCfg serviceName; - # Service implementor must provide this function - serviceConfig = args.serviceConfig or (throw "mkServiceModule requires serviceConfig function"); - }; - }; -} diff --git a/modules/homelab/lib/systems/backups.nix b/modules/homelab/lib/systems/backups.nix new file mode 100644 index 0000000..a39d1f9 --- /dev/null +++ b/modules/homelab/lib/systems/backups.nix @@ -0,0 +1,163 @@ +{ + config, + lib, + nodes, + ... +}: +with lib; let + cfg = config.homelab.backups; + homelabCfg = config.homelab; + hasNodes = length (attrNames nodes) > 0; + + # Get all defined backend names dynamically + backendNames = attrNames cfg.backends or {}; + + backupJobType = types.submodule { + options = { + name = mkOption { + type = types.str; + description = "Name of the backup job"; + }; + backend = mkOption { + type = types.enum backendNames; + description = "Backend to use for this backup job"; + }; + backendOptions = mkOption { + type = types.attrs; + default = {}; + description = "Backend-specific options to override or extend the backend configuration"; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + description = "Additional labels for this backup job"; + }; + }; + }; + + # Local aggregation + localAggregation = { + allJobs = cfg.jobs; + allBackends = backendNames; + }; + + # Global aggregation + globalAggregation = let + baseAgg = import ../aggregators/base.nix {inherit lib;}; + + jobsAgg = baseAgg.aggregateFromNodes { + inherit nodes; + attributePath = "homelab.backups.allJobs"; + enhancer = job: + job + // { + _sourceNode = job._nodeName; + _backupId = "${job._nodeName}-${job.name}"; + _jobFqdn = "${job.name}.${job._nodeName}"; + }; + }; + + # Get all backends from all nodes + allBackendsFromNodes = let + backendConfigs = + mapAttrsToList ( + nodeName: nodeConfig: + attrByPath ["homelab" "backups" "backends"] {} nodeConfig.config + ) + nodes; + enabledBackends = flatten (map ( + backends: + filter (name: backends.${name} != null) (attrNames backends) + ) + backendConfigs); + in + unique enabledBackends; + in { + allJobs = jobsAgg.all; + allBackends = allBackendsFromNodes; + jobsByBackend = groupBy (j: j.backend) jobsAgg.all; + summary = { + total = length jobsAgg.all; + byBackend = jobsAgg.countBy (j: j.backend); + byNode = jobsAgg.countBy (j: j._nodeName); + uniqueBackends = unique (map (j: j.backend) jobsAgg.all); + }; + }; +in { + imports = [ + ../../backup/restic.nix + # ./backup/borgbackup.nix + ]; + + options.homelab.backups = { + enable = mkEnableOption "backup system"; + + jobs = mkOption { + type = types.listOf backupJobType; + default = []; + description = "Backup jobs to execute on this system"; + }; + + # Backend configurations (like your existing setup) + # backends = mkOption { + # type = types.attrs; + # default = {}; + # description = "Backup backend configurations"; + # }; + + defaultLabels = mkOption { + type = types.attrsOf types.str; + default = { + hostname = homelabCfg.hostname; + environment = homelabCfg.environment; + location = homelabCfg.location; + }; + description = "Default labels applied to all backup jobs"; + }; + + monitoring = mkOption { + type = types.bool; + default = true; + description = "Enable backup monitoring and metrics"; + }; + + # Always exposed aggregated data + allJobs = mkOption { + type = types.listOf types.attrs; + default = []; + readOnly = true; + }; + + allBackends = mkOption { + type = types.listOf types.str; + default = []; + readOnly = true; + }; + + global = mkOption { + type = types.attrs; + default = {}; + readOnly = true; + }; + }; + + config = mkIf cfg.enable { + # Validate that all job backends exist + assertions = [ + { + assertion = all (job: cfg.backends.${job.backend} != null) cfg.jobs; + message = "All backup jobs must reference backends that are defined and not null in homelab.backups.backends"; + } + ]; + + # Always expose both local and global + homelab.backups = { + allJobs = localAggregation.allJobs; + allBackends = localAggregation.allBackends; + global = + if hasNodes + then globalAggregation + else {}; + }; + }; +} diff --git a/modules/homelab/lib/systems/logging.nix b/modules/homelab/lib/systems/logging.nix new file mode 100644 index 0000000..d760ee3 --- /dev/null +++ b/modules/homelab/lib/systems/logging.nix @@ -0,0 +1,209 @@ +{ + config, + lib, + nodes, + ... +}: +with lib; let + cfg = config.homelab.logging; + homelabCfg = config.homelab; + hasNodes = length (attrNames nodes) > 0; + + # Local aggregation + localAggregation = { + allSources = + cfg.sources + ++ (optional cfg.promtail.enable { + name = "system-journal"; + type = "journal"; + journal.path = "/var/log/journal"; + labels = + cfg.defaultLabels + // { + component = "system"; + log_source = "journald"; + }; + enabled = true; + }); + }; + + # Global aggregation + globalAggregation = let + baseAgg = import ../aggregators/base.nix {inherit lib;}; + + sourcesAgg = baseAgg.aggregateFromNodes { + inherit nodes; + attributePath = "homelab.logging.allSources"; + enhancer = source: + source + // { + _sourceNode = source._nodeName; + _logId = "${source._nodeName}-${source.name}"; + }; + }; + in { + allSources = sourcesAgg.all; + sourcesByType = groupBy (s: s.type) sourcesAgg.all; + summary = { + total = length sourcesAgg.all; + byType = sourcesAgg.countBy (s: s.type); + byNode = sourcesAgg.countBy (s: s._nodeName); + }; + }; +in { + options.homelab.logging = { + enable = mkEnableOption "logging system"; + + promtail = { + enable = mkOption { + type = types.bool; + default = true; + }; + port = mkOption { + type = types.port; + default = 9080; + }; + clients = mkOption { + type = types.listOf (types.submodule { + options = { + url = mkOption {type = types.str;}; + tenant_id = mkOption { + type = types.nullOr types.str; + default = null; + }; + }; + }); + default = [{url = "http://monitor.${homelabCfg.domain}:3100/loki/api/v1/push";}]; + }; + }; + + sources = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption {type = types.str;}; + type = mkOption { + type = types.enum ["journal" "file" "syslog" "docker"]; + default = "file"; + }; + files = mkOption { + type = types.submodule { + options = { + paths = mkOption { + type = types.listOf types.str; + default = []; + }; + multiline = mkOption { + type = types.nullOr types.attrs; + default = null; + }; + }; + }; + default = {}; + }; + journal = mkOption { + type = types.submodule { + options = { + path = mkOption { + type = types.str; + default = "/var/log/journal"; + }; + }; + }; + default = {}; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + }; + pipelineStages = mkOption { + type = types.listOf types.attrs; + default = []; + }; + enabled = mkOption { + type = types.bool; + default = true; + }; + }; + }); + default = []; + }; + + defaultLabels = mkOption { + type = types.attrsOf types.str; + default = { + hostname = homelabCfg.hostname; + environment = homelabCfg.environment; + location = homelabCfg.location; + }; + }; + + # Always exposed aggregated data + allSources = mkOption { + type = types.listOf types.attrs; + default = []; + readOnly = true; + }; + + global = mkOption { + type = types.attrs; + default = {}; + readOnly = true; + }; + }; + + config = mkIf cfg.enable { + # Local setup + services.promtail = mkIf cfg.promtail.enable { + enable = true; + configuration = { + server = { + http_listen_port = cfg.promtail.port; + grpc_listen_port = 0; + }; + positions.filename = "/var/lib/promtail/positions.yaml"; + clients = cfg.promtail.clients; + scrape_configs = map (source: + { + job_name = source.name; + static_configs = [ + { + targets = ["localhost"]; + labels = + cfg.defaultLabels + // source.labels + // ( + if source.type == "file" + then { + __path__ = concatStringsSep "," source.files.paths; + } + else {} + ); + } + ]; + # pipeline_stages = source.pipelineStages; + } + // ( + if source.type == "journal" + then { + journal = { + path = source.journal.path; + labels = cfg.defaultLabels // source.labels; + }; + } + else {} + )) + localAggregation.allSources; + }; + }; + + networking.firewall.allowedTCPPorts = optionals cfg.promtail.enable [cfg.promtail.port]; + + homelab.logging = { + allSources = localAggregation.allSources; + global = + if hasNodes + then globalAggregation + else {}; + }; + }; +} diff --git a/modules/homelab/lib/systems/monitoring.nix b/modules/homelab/lib/systems/monitoring.nix new file mode 100644 index 0000000..a44df40 --- /dev/null +++ b/modules/homelab/lib/systems/monitoring.nix @@ -0,0 +1,222 @@ +{ + config, + lib, + nodes, + ... +}: +with lib; let + cfg = config.homelab.monitoring; + homelabCfg = config.homelab; + hasNodes = length (attrNames nodes) > 0; + + # Local aggregation from this instance + localAggregation = { + # Metrics from manually configured + automatic node exporter + allMetrics = + cfg.metrics + ++ (optional cfg.nodeExporter.enable { + name = "node-exporter"; + host = homelabCfg.hostname; + port = cfg.nodeExporter.port; + path = "/metrics"; + jobName = "node"; + scrapeInterval = "30s"; + labels = { + instance = "${homelabCfg.hostname}.${homelabCfg.domain}"; + environment = homelabCfg.environment; + location = homelabCfg.location; + }; + }); + + allHealthChecks = cfg.healthChecks; + }; + + # Global aggregation from all nodes (when nodes available) + globalAggregation = let + baseAgg = import ../aggregators/base.nix {inherit lib;}; + + # Aggregate metrics from all nodes + metricsAgg = baseAgg.aggregateFromNodes { + inherit nodes; + attributePath = "homelab.monitoring.allMetrics"; + enhancer = endpoint: + endpoint + // { + _fullAddress = "${endpoint.host}:${toString endpoint.port}"; + _metricsUrl = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}"; + }; + }; + + # Aggregate health checks from all nodes + healthChecksAgg = baseAgg.aggregateFromNodes { + inherit nodes; + attributePath = "homelab.monitoring.allHealthChecks"; + enhancer = check: let + actualHost = check.host; + portPart = + if check.port != null + then ":${toString check.port}" + else ""; + url = "${check.protocol or "http"}://${actualHost}${portPart}${check.path}"; + in + check + // { + _actualHost = actualHost; + _url = url; + }; + }; + in { + allMetrics = metricsAgg.all; + allHealthChecks = healthChecksAgg.all; + + # Useful groupings for services + metricsByJobName = groupBy (m: m.jobName) metricsAgg.all; + healthChecksByGroup = groupBy (hc: hc.group or "default") healthChecksAgg.all; + + summary = { + totalMetrics = length metricsAgg.all; + totalHealthChecks = length healthChecksAgg.all; + nodesCovered = unique (map (m: m._nodeName or m.host) metricsAgg.all); + }; + }; +in { + # Instance-level monitoring options + options.homelab.monitoring = { + enable = mkEnableOption "monitoring system"; + + # Node exporter (automatically enabled) + nodeExporter = { + enable = mkOption { + type = types.bool; + default = true; + }; + port = mkOption { + type = types.port; + default = 9100; + }; + }; + + # Manual metrics (in addition to service auto-registration) + metrics = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption {type = types.str;}; + host = mkOption { + type = types.str; + default = homelabCfg.hostname; + }; + port = mkOption {type = types.port;}; + path = mkOption { + type = types.str; + default = "/metrics"; + }; + jobName = mkOption {type = types.str;}; + scrapeInterval = mkOption { + type = types.str; + default = "30s"; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + }; + }; + }); + default = []; + }; + + # Manual health checks (in addition to service auto-registration) + healthChecks = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption {type = types.str;}; + host = mkOption { + type = types.str; + default = homelabCfg.hostname; + }; + port = mkOption { + type = types.nullOr types.port; + default = null; + }; + path = mkOption { + type = types.str; + default = "/"; + }; + protocol = mkOption { + type = types.enum ["http" "https" "tcp" "icmp"]; + default = "http"; + }; + method = mkOption { + type = types.str; + default = "GET"; + }; + interval = mkOption { + type = types.str; + default = "30s"; + }; + timeout = mkOption { + type = types.str; + default = "10s"; + }; + conditions = mkOption { + type = types.listOf types.str; + default = ["[STATUS] == 200"]; + }; + group = mkOption { + type = types.str; + default = "manual"; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + }; + enabled = mkOption { + type = types.bool; + default = true; + }; + }; + }); + default = []; + }; + + # Read-only aggregated data (always exposed) + allMetrics = mkOption { + type = types.listOf types.attrs; + default = localAggregation.allMetrics; + readOnly = true; + }; + + allHealthChecks = mkOption { + type = types.listOf types.attrs; + default = localAggregation.allHealthChecks; + readOnly = true; + }; + + # Global aggregation (always available, empty if no nodes) + global = mkOption { + type = types.attrs; + default = globalAggregation; + readOnly = true; + }; + }; + + # Configuration - always includes both local and global + config = mkIf cfg.enable { + # Basic instance setup + services.prometheus.exporters.node = mkIf cfg.nodeExporter.enable { + enable = true; + port = cfg.nodeExporter.port; + enabledCollectors = ["systemd" "textfile" "filesystem" "loadavg" "meminfo" "netdev" "stat"]; + }; + + networking.firewall.allowedTCPPorts = optionals cfg.nodeExporter.enable [cfg.nodeExporter.port]; + + # homelab.monitoring = { + # allMetrics = localAggregation.allMetrics; + # allHealthChecks = localAggregation.allHealthChecks; + # global = + # if hasNodes + # then globalAggregation + # else {}; + # }; + }; +} diff --git a/modules/homelab/lib/systems/proxy.nix b/modules/homelab/lib/systems/proxy.nix new file mode 100644 index 0000000..a16694d --- /dev/null +++ b/modules/homelab/lib/systems/proxy.nix @@ -0,0 +1,98 @@ +{ + config, + lib, + nodes, + ... +}: +with lib; let + cfg = config.homelab.reverseProxy; + homelabCfg = config.homelab; + hasNodes = length (attrNames nodes) > 0; + + # Local aggregation + localAggregation = { + allEntries = cfg.entries; + }; + + # Global aggregation + globalAggregation = let + baseAgg = import ../aggregators/base.nix {inherit lib;}; + + entriesAgg = baseAgg.aggregateFromNodes { + inherit nodes; + attributePath = "homelab.reverseProxy.allEntries"; + enhancer = entry: + entry + // { + _upstream = "http://${entry.host}:${toString entry.port}${entry.path or ""}"; + _fqdn = "${entry.subdomain}.${entry._nodeConfig.config.homelab.externalDomain or homelabCfg.externalDomain}"; + _internal = "${entry.host}:${toString entry.port}"; + }; + }; + in { + allEntries = entriesAgg.all; + entriesBySubdomain = groupBy (e: e.subdomain) entriesAgg.all; + entriesWithAuth = entriesAgg.filterBy (e: e.enableAuth or false); + entriesWithoutAuth = entriesAgg.filterBy (e: !(e.enableAuth or false)); + summary = { + total = length entriesAgg.all; + byNode = entriesAgg.countBy (e: e._nodeName); + withAuth = length (entriesAgg.filterBy (e: e.enableAuth or false)); + withoutAuth = length (entriesAgg.filterBy (e: !(e.enableAuth or false))); + }; + }; +in { + options.homelab.reverseProxy = { + enable = mkEnableOption "reverse proxy system"; + + entries = mkOption { + type = types.listOf (types.submodule { + options = { + subdomain = mkOption {type = types.str;}; + host = mkOption { + type = types.str; + default = homelabCfg.hostname; + }; + port = mkOption {type = types.port;}; + path = mkOption { + type = types.str; + default = "/"; + }; + enableAuth = mkOption { + type = types.bool; + default = false; + }; + enableSSL = mkOption { + type = types.bool; + default = true; + }; + }; + }); + default = []; + }; + + # Always exposed aggregated data + allEntries = mkOption { + type = types.listOf types.attrs; + default = []; + readOnly = true; + }; + + global = mkOption { + type = types.attrs; + default = {}; + readOnly = true; + }; + }; + + config = mkIf cfg.enable { + # Always expose both local and global + homelab.reverseProxy = { + allEntries = localAggregation.allEntries; + global = + if hasNodes + then globalAggregation + else {}; + }; + }; +} diff --git a/modules/homelab/monitoring-config.nix b/modules/homelab/monitoring-config.nix deleted file mode 100644 index 2490467..0000000 --- a/modules/homelab/monitoring-config.nix +++ /dev/null @@ -1,214 +0,0 @@ -{ - config, - lib, - ... -}: -with lib; let - cfg = config.homelab.monitoring; - homelabCfg = config.homelab; - - metricsEndpointType = types.submodule { - options = { - name = mkOption { - type = types.str; - description = "Name of the metrics endpoint"; - }; - host = mkOption { - type = types.str; - description = "Domain name of the host (default: hostname.domain)"; - default = "${homelabCfg.hostname}.${homelabCfg.domain}"; - }; - port = mkOption { - type = types.port; - description = "Port number for the endpoint"; - }; - path = mkOption { - type = types.str; - default = "/metrics"; - description = "Path for the metrics endpoint"; - }; - jobName = mkOption { - type = types.str; - description = "Prometheus job name"; - }; - scrapeInterval = mkOption { - type = types.str; - default = "30s"; - description = "Prometheus scrape interval"; - }; - labels = mkOption { - type = types.attrsOf types.str; - default = {}; - description = "Additional labels for this endpoint"; - }; - }; - }; - - healthCheckEndpointType = types.submodule { - options = { - name = mkOption { - type = types.str; - description = "Name of the health check endpoint"; - }; - host = mkOption { - type = types.str; - description = "Domain name of the host"; - default = "${homelabCfg.hostname}.${homelabCfg.domain}"; - }; - port = mkOption { - type = types.nullOr types.port; - default = null; - description = "Port number for the endpoint (null for standard HTTP/HTTPS)"; - }; - path = mkOption { - type = types.str; - default = "/"; - description = "Path for the health check endpoint"; - }; - protocol = mkOption { - type = types.enum ["http" "https" "tcp" "icmp"]; - default = "http"; - description = "Protocol to use for health checks"; - }; - method = mkOption { - type = types.str; - default = "GET"; - description = "HTTP method for health checks (only applies to http/https)"; - }; - interval = mkOption { - type = types.str; - default = "30s"; - description = "Health check interval"; - }; - timeout = mkOption { - type = types.str; - default = "10s"; - description = "Health check timeout"; - }; - conditions = mkOption { - type = types.listOf types.str; - default = ["[STATUS] == 200"]; - description = "Health check conditions (Gatus format)"; - example = ["[STATUS] == 200" "[BODY].status == UP" "[RESPONSE_TIME] < 500"]; - }; - alerts = mkOption { - type = types.listOf (types.submodule { - options = { - type = mkOption { - type = types.str; - description = "Alert type"; - example = "discord"; - }; - enabled = mkOption { - type = types.bool; - default = true; - description = "Whether this alert is enabled"; - }; - failure-threshold = mkOption { - type = types.int; - default = 3; - description = "Number of failures before alerting"; - }; - success-threshold = mkOption { - type = types.int; - default = 2; - description = "Number of successes before resolving alert"; - }; - }; - }); - default = []; - description = "Alert configurations"; - }; - group = mkOption { - type = types.str; - default = "default"; - description = "Group name for organizing health checks"; - }; - labels = mkOption { - type = types.attrsOf types.str; - default = {}; - description = "Additional labels for this health check"; - }; - enabled = mkOption { - type = types.bool; - default = true; - description = "Whether this health check is enabled"; - }; - # External domain support - useExternalDomain = mkOption { - type = types.bool; - default = false; - description = "Use external domain instead of internal"; - }; - subdomain = mkOption { - type = types.nullOr types.str; - default = null; - description = "Subdomain for external domain (required if useExternalDomain is true)"; - }; - }; - }; -in { - options.homelab.monitoring = { - enable = mkEnableOption "Homelab monitoring"; - metrics = mkOption { - type = types.listOf metricsEndpointType; - default = []; - description = "Metric endpoints exposed by this system"; - }; - - healthChecks = mkOption { - type = types.listOf healthCheckEndpointType; - default = []; - description = "Health check endpoints for uptime monitoring"; - }; - - nodeExporter = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable node exporter"; - }; - port = mkOption { - type = types.port; - default = 9100; - description = "Node exporter port"; - }; - }; - }; - - config = mkIf cfg.enable { - # Configure node exporter if enabled - services.prometheus.exporters.node = mkIf cfg.nodeExporter.enable { - enable = true; - port = cfg.nodeExporter.port; - enabledCollectors = [ - "systemd" - "textfile" - "filesystem" - "loadavg" - "meminfo" - "netdev" - "stat" - ]; - }; - - # Automatically add node exporter to monitoring endpoints - homelab.monitoring.metrics = mkIf cfg.nodeExporter.enable [ - { - name = "node-exporter"; - port = cfg.nodeExporter.port; - path = "/metrics"; - jobName = "node"; - labels = { - instance = "${homelabCfg.hostname}.${homelabCfg.domain}"; - environment = homelabCfg.environment; - location = homelabCfg.location; - }; - } - ]; - - networking.firewall.allowedTCPPorts = optionals cfg.nodeExporter.enable [ - cfg.nodeExporter.port - ]; - }; -} diff --git a/modules/homelab/proxy-config.nix b/modules/homelab/proxy-config.nix deleted file mode 100644 index e7236d8..0000000 --- a/modules/homelab/proxy-config.nix +++ /dev/null @@ -1,53 +0,0 @@ -{ - config, - lib, - ... -}: -with lib; let - cfg = config.homelab.reverseProxy; - homelabCfg = config.homelab; - - reverseProxyEntryType = types.submodule { - options = { - subdomain = mkOption { - type = types.str; - description = "Subdomain for the service"; - }; - host = mkOption { - type = types.str; - description = "Host to proxy to"; - default = "${homelabCfg.hostname}.${homelabCfg.domain}"; - }; - port = mkOption { - type = types.port; - description = "Port to proxy to"; - }; - path = mkOption { - type = types.str; - default = "/"; - description = "Path prefix for the service"; - }; - enableAuth = mkOption { - type = types.bool; - default = false; - description = "Enable authentication for this service"; - }; - enableSSL = mkOption { - type = types.bool; - default = true; - description = "Enable SSL for this service"; - }; - }; - }; -in { - options.homelab.reverseProxy = { - entries = mkOption { - type = types.listOf reverseProxyEntryType; - default = []; - description = "Reverse proxy entries for this system"; - }; - }; - - config = { - }; -} diff --git a/modules/homelab/services/default.nix b/modules/homelab/services/default.nix index 2847a3c..2071dd6 100644 --- a/modules/homelab/services/default.nix +++ b/modules/homelab/services/default.nix @@ -1,7 +1,9 @@ { imports = [ ./minio.nix - ./monitoring/gatus.nix - ./monitoring/prometheus.nix + ./gatus.nix + ./prometheus.nix + ./grafana.nix + # ./monitoring/loki.nix ]; } diff --git a/modules/homelab/services/example-service.nix b/modules/homelab/services/example-service.nix deleted file mode 100644 index df59348..0000000 --- a/modules/homelab/services/example-service.nix +++ /dev/null @@ -1,161 +0,0 @@ -# Example showing how to create a service using the standard interface -{ - config, - lib, - pkgs, - ... -}: -with lib; let - serviceInterface = import ../lib/service-interface.nix {inherit lib;}; - - cfg = config.homelab.services.grafana; - homelabCfg = config.homelab; - - # Service-specific options beyond the standard interface - grafanaServiceOptions = { - domain = mkOption { - type = types.str; - default = "grafana.${homelabCfg.externalDomain}"; - description = "Domain for Grafana"; - }; - - rootUrl = mkOption { - type = types.str; - default = "https://grafana.${homelabCfg.externalDomain}"; - description = "Root URL for Grafana"; - }; - - dataDir = serviceInterface.commonOptions.dataDir "grafana"; - - admin = { - user = mkOption { - type = types.str; - default = "admin"; - description = "Admin username"; - }; - - password = mkOption { - type = types.str; - default = "admin"; - description = "Admin password"; - }; - }; - - datasources = { - prometheus = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable Prometheus datasource"; - }; - - url = mkOption { - type = types.str; - default = "http://localhost:9090"; - description = "Prometheus URL"; - }; - }; - }; - - plugins = mkOption { - type = types.listOf types.package; - default = []; - description = "Grafana plugins to install"; - }; - }; -in { - options.homelab.services.grafana = serviceInterface.mkServiceInterface { - serviceName = "grafana"; - defaultPort = 3000; - defaultSubdomain = "grafana"; - monitoringPath = "/metrics"; - healthCheckPath = "/api/health"; - healthCheckConditions = [ - "[STATUS] == 200" - "[BODY].database == ok" - "[RESPONSE_TIME] < 2000" - ]; - serviceOptions = grafanaServiceOptions; - }; - - config = serviceInterface.mkServiceConfig { - inherit config cfg homelabCfg; - serviceName = "grafana"; - - extraMonitoringLabels = { - component = "dashboard"; - }; - - customHealthChecks = [ - { - name = "grafana-login"; - port = cfg.port; - path = "/login"; - interval = "60s"; - conditions = [ - "[STATUS] == 200" - "[RESPONSE_TIME] < 3000" - ]; - group = "monitoring"; - labels = { - service = "grafana"; - component = "login"; - }; - } - ]; - - serviceConfig = { - services.grafana = { - enable = true; - dataDir = cfg.dataDir; - declarativePlugins = cfg.plugins; - - settings = { - server = { - http_port = cfg.port; - http_addr = "0.0.0.0"; - domain = cfg.domain; - root_url = cfg.rootUrl; - }; - - security = { - admin_user = cfg.admin.user; - admin_password = cfg.admin.password; - }; - }; - - provision = { - enable = true; - datasources.settings.datasources = mkIf cfg.datasources.prometheus.enable [ - { - name = "Prometheus"; - type = "prometheus"; - url = cfg.datasources.prometheus.url; - isDefault = true; - } - ]; - }; - }; - }; - }; -} -# Usage example in your configuration: -/* -{ - homelab.services.grafana = { - enable = true; - # Standard interface options: - port = 3000; # Optional: defaults to 3000 - openFirewall = true; # Optional: defaults to true - proxy.subdomain = "grafana"; # Optional: defaults to "grafana" - proxy.enableAuth = false; # Optional: defaults to false - monitoring.enable = true; # Optional: defaults to true - - # Service-specific options: - admin.password = "secure-password"; - datasources.prometheus.url = "http://prometheus.lab:9090"; - plugins = with pkgs.grafanaPlugins; [ grafana-piechart-panel ]; - }; -} -*/ - diff --git a/modules/homelab/services/gatus.nix b/modules/homelab/services/gatus.nix new file mode 100644 index 0000000..da907c4 --- /dev/null +++ b/modules/homelab/services/gatus.nix @@ -0,0 +1,267 @@ +{ + config, + lib, + ... +}: +with lib; let + serviceName = "gatus"; + cfg = config.homelab.services.${serviceName}; + homelabCfg = config.homelab; + + # Convert homelab health checks to Gatus format + formatHealthCheck = check: let + # Build the URL based on the health check configuration + url = check._url or "http://${check.host}:${toString (check.port or 80)}${check.path}"; + + # Convert conditions to Gatus format (they should already be compatible) + conditions = check.conditions or ["[STATUS] == 200"]; + + # Convert alerts to Gatus format + alerts = map (alert: { + inherit (alert) type enabled; + failure-threshold = alert.failure-threshold or 3; + success-threshold = alert.success-threshold or 2; + description = "Health check alert for ${check.name}"; + }) (check.alerts or []); + in { + name = check.name; + group = check.group or "default"; + url = url; + interval = check.interval or "30s"; + + # Add method and headers for HTTP/HTTPS checks + method = + if (check.protocol == "http" || check.protocol == "https") + then check.method or "GET" + else null; + + conditions = conditions; + + # Add timeout + client = { + timeout = check.timeout or "10s"; + }; + + # Add alerts if configured + alerts = + if alerts != [] + then alerts + else []; + + # Add labels for UI organization + ui = { + hide-hostname = false; + hide-url = false; + description = "Health check for ${check.name} on ${check.host or check._actualHost or "unknown"}"; + }; + }; + + # Generate Gatus configuration from aggregated health checks + gatusConfig = + recursiveUpdate { + # Global Gatus settings + alerting = mkIf (cfg.alerting != {}) cfg.alerting; + + web = { + address = cfg.web.address; + port = cfg.port; + }; + + # Enable metrics + metrics = cfg.monitoring.enable; + + ui = { + title = cfg.ui.title; + header = cfg.ui.header; + link = cfg.ui.link; + buttons = cfg.ui.buttons; + }; + + storage = cfg.storage; + + # Convert all enabled health checks from the fleet to Gatus endpoints + endpoints = let + # Get all health checks - try global first, fallback to local + allHealthChecks = homelabCfg.monitoring.global.allHealthChecks + or homelabCfg.monitoring.allHealthChecks + or []; + + # Filter only enabled health checks + enabledHealthChecks = filter (check: check.enabled or true) allHealthChecks; + + # Convert to Gatus format + gatusEndpoints = map formatHealthCheck enabledHealthChecks; + in + gatusEndpoints; + } + cfg.extraConfig; +in { + imports = [ + (import ../lib/features/monitoring.nix serviceName) + (import ../lib/features/logging.nix serviceName) + (import ../lib/features/proxy.nix serviceName) + ]; + + # Core service options + options.homelab.services.${serviceName} = { + enable = mkEnableOption "Gatus Status Page"; + + port = mkOption { + type = types.port; + default = 8080; + }; + + description = mkOption { + type = types.str; + default = "Gatus Status Page"; + }; + + # Gatus-specific options + ui = { + title = mkOption { + type = types.str; + default = "Homelab Status"; + description = "Title for the Gatus web interface"; + }; + + header = mkOption { + type = types.str; + default = "Homelab Services Status"; + description = "Header text for the Gatus interface"; + }; + + link = mkOption { + type = types.str; + default = "https://status.${homelabCfg.externalDomain}"; + description = "Link in the Gatus header"; + }; + + buttons = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption {type = types.str;}; + link = mkOption {type = types.str;}; + }; + }); + default = [ + { + name = "Grafana"; + link = "https://grafana.${homelabCfg.externalDomain}"; + } + { + name = "Prometheus"; + link = "https://prometheus.${homelabCfg.externalDomain}"; + } + ]; + description = "Navigation buttons in the Gatus interface"; + }; + }; + + alerting = mkOption { + type = types.attrs; + default = {}; + description = "Gatus alerting configuration"; + example = literalExpression '' + { + discord = { + webhook-url = "https://discord.com/api/webhooks/..."; + default-alert = { + enabled = true; + description = "Health check failed"; + failure-threshold = 3; + success-threshold = 2; + }; + }; + } + ''; + }; + + storage = mkOption { + type = types.attrs; + default = { + type = "memory"; + }; + description = "Gatus storage configuration"; + example = literalExpression '' + { + type = "postgres"; + path = "postgres://user:password@localhost/gatus?sslmode=disable"; + } + ''; + }; + + web = { + address = mkOption { + type = types.str; + default = "0.0.0.0"; + description = "Web interface bind address"; + }; + }; + + extraConfig = mkOption { + type = types.attrs; + default = {}; + description = "Additional Gatus configuration options"; + }; + }; + + # Service configuration with smart defaults + config = mkIf cfg.enable (mkMerge [ + # Core Gatus service + { + services.gatus = { + enable = true; + settings = gatusConfig; + }; + + networking.firewall.allowedTCPPorts = [cfg.port]; + + homelab.services.${serviceName}.monitoring.enable = mkDefault true; + } + + # Smart defaults for Gatus + (mkIf cfg.monitoring.enable { + homelab.services.${serviceName}.monitoring = mkDefault { + metrics = { + path = "/metrics"; + extraEndpoints = []; + }; + healthCheck = { + path = "/health"; + conditions = [ + "[STATUS] == 200" + "[BODY].status == UP" + "[RESPONSE_TIME] < 1000" + ]; + extraChecks = []; + }; + extraLabels = { + component = "status-monitoring"; + tier = "monitoring"; + }; + }; + }) + + (mkIf cfg.logging.enable { + homelab.services.${serviceName}.logging = mkDefault { + files = ["/var/log/gatus/gatus.log"]; + parsing = { + # Gatus log format: 2024-01-01T12:00:00Z [INFO] message + regex = "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z) \\[(?P\\w+)\\] (?P.*)"; + extractFields = ["level"]; + }; + extraLabels = { + component = "status-monitoring"; + application = "gatus"; + }; + }; + }) + + (mkIf cfg.proxy.enable { + homelab.services.${serviceName}.proxy = mkDefault { + subdomain = "status"; + enableAuth = false; # Status page should be public + }; + }) + ]); +} diff --git a/modules/homelab/services/grafana.nix b/modules/homelab/services/grafana.nix new file mode 100644 index 0000000..5f5aad9 --- /dev/null +++ b/modules/homelab/services/grafana.nix @@ -0,0 +1,86 @@ +{ + config, + lib, + pkgs, + ... +}: +with lib; let + serviceName = "grafana"; + cfg = config.homelab.services.${serviceName}; +in { + imports = [ + (import ../lib/features/monitoring.nix serviceName) + (import ../lib/features/logging.nix serviceName) + (import ../lib/features/proxy.nix serviceName) + ]; + + options.homelab.services.${serviceName} = { + enable = mkEnableOption "Grafana Dashboard"; + + port = mkOption { + type = types.port; + default = 3000; + }; + + description = mkOption { + type = types.str; + default = "Grafana Metrics Dashboard"; + }; + }; + + config = mkIf cfg.enable (mkMerge [ + # Core Grafana service + { + services.grafana = { + enable = true; + settings.server = { + http_port = cfg.port; + http_addr = "0.0.0.0"; + }; + }; + + networking.firewall.allowedTCPPorts = [cfg.port]; + + homelab.services.${serviceName}.monitoring.enable = mkDefault true; + } + + # Smart defaults for Grafana + (mkIf cfg.logging.enable { + # Grafana-specific log setup + homelab.services.${serviceName}.logging = mkDefault { + files = ["/var/log/grafana/grafana.log"]; + parsing = { + # Grafana log format: t=2024-01-01T12:00:00Z lvl=info msg="message" + regex = "^t=(?P[^ ]+) lvl=(?P\\w+) msg=\"(?P[^\"]*)\""; + extractFields = ["level"]; + }; + extraLabels = { + application = "grafana"; + component = "dashboard"; + }; + }; + }) + + (mkIf cfg.monitoring.enable { + homelab.services.${serviceName}.monitoring = mkDefault { + metrics.path = "/metrics"; + healthCheck = { + path = "/api/health"; + conditions = ["[STATUS] == 200" "[BODY].database == ok"]; + }; + extraLabels = { + component = "dashboard"; + tier = "monitoring"; + }; + }; + }) + + (mkIf cfg.proxy.enable { + # Grafana needs auth by default (admin interface) + homelab.services.${serviceName}.proxy = mkDefault { + subdomain = "grafana"; + # enableAuth = true; + }; + }) + ]); +} diff --git a/modules/homelab/services/jellyfin.nix b/modules/homelab/services/jellyfin.nix deleted file mode 100644 index 1aac7e5..0000000 --- a/modules/homelab/services/jellyfin.nix +++ /dev/null @@ -1,125 +0,0 @@ -# modules/services/jellyfin.nix -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.services.jellyfin; -in { - options.services.jellyfin = { - enable = mkEnableOption "Jellyfin media server"; - - port = mkOption { - type = types.port; - default = 8096; - description = "Port for Jellyfin web interface"; - }; - - dataDir = mkOption { - type = types.str; - default = "/var/lib/jellyfin"; - description = "Directory to store Jellyfin data"; - }; - - mediaDir = mkOption { - type = types.str; - default = "/media"; - description = "Directory containing media files"; - }; - - enableMetrics = mkOption { - type = types.bool; - default = true; - description = "Enable Prometheus metrics"; - }; - - exposeWeb = mkOption { - type = types.bool; - default = true; - description = "Expose web interface through reverse proxy"; - }; - }; - - config = mkIf cfg.enable { - # Enable the service - services.jellyfin = { - enable = true; - dataDir = cfg.dataDir; - }; - - # Configure global settings - homelab.global = { - # Add backup job for Jellyfin data - backups.jobs = [ - { - name = "jellyfin-config"; - backend = "restic"; - paths = ["${cfg.dataDir}/config" "${cfg.dataDir}/data"]; - schedule = "0 2 * * *"; # Daily at 2 AM - excludePatterns = [ - "*/cache/*" - "*/transcodes/*" - "*/logs/*" - ]; - preHook = '' - # Stop jellyfin for consistent backup - systemctl stop jellyfin - ''; - postHook = '' - # Restart jellyfin after backup - systemctl start jellyfin - ''; - } - { - name = "jellyfin-media"; - backend = "restic"; - paths = [cfg.mediaDir]; - schedule = "0 3 * * 0"; # Weekly on Sunday at 3 AM - excludePatterns = [ - "*.tmp" - "*/.@__thumb/*" # Synology thumbnails - ]; - } - ]; - - # Add reverse proxy entry if enabled - reverseProxy.entries = mkIf cfg.exposeWeb [ - { - subdomain = "jellyfin"; - port = cfg.port; - enableAuth = false; # Jellyfin has its own auth - websockets = true; - customHeaders = { - "X-Forwarded-Proto" = "$scheme"; - "X-Forwarded-Host" = "$host"; - }; - } - ]; - - # Add monitoring endpoint if metrics enabled - monitoring.endpoints = mkIf cfg.enableMetrics [ - { - name = "jellyfin"; - port = cfg.port; - path = "/metrics"; # Assuming you have a metrics plugin - jobName = "jellyfin"; - scrapeInterval = "60s"; - labels = { - service = "jellyfin"; - type = "media-server"; - }; - } - ]; - }; - - # Open firewall - networking.firewall.allowedTCPPorts = [cfg.port]; - - # Create media directory - systemd.tmpfiles.rules = [ - "d ${cfg.mediaDir} 0755 jellyfin jellyfin -" - ]; - }; -} diff --git a/modules/homelab/services/monitoring/gatus.nix b/modules/homelab/services/monitoring/gatus.nix index 8d1f20f..60f0700 100644 --- a/modules/homelab/services/monitoring/gatus.nix +++ b/modules/homelab/services/monitoring/gatus.nix @@ -4,110 +4,13 @@ ... }: with lib; let + serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; + cfg = config.homelab.services.gatus; homelabCfg = config.homelab; - # Convert our health check format to Gatus format - formatHealthCheck = check: let - # Build the URL - url = check._url; - - # Convert conditions to Gatus format (they should already be compatible) - conditions = check.conditions or ["[STATUS] == 200"]; - - # Convert alerts to Gatus format - alerts = map (alert: { - inherit (alert) type enabled; - failure-threshold = alert.failure-threshold or 3; - success-threshold = alert.success-threshold or 2; - description = "Health check alert for ${check.name}"; - }) (check.alerts or []); - in { - name = check.name; - group = check.group or "default"; - url = url; - interval = check.interval or "30s"; - - # Add method and headers for HTTP/HTTPS checks - method = - if (check.protocol == "http" || check.protocol == "https") - then check.method or "GET" - else null; - - conditions = conditions; - - # Add timeout - client = { - timeout = check.timeout or "10s"; - }; - - # Add alerts if configured - alerts = - if alerts != [] - then alerts - else []; - - # Add labels for UI organization - ui = { - hide-hostname = false; - hide-url = false; - description = "Health check for ${check.name} on ${check._nodeName}"; - }; - }; - - # Generate Gatus configuration - gatusConfig = { - # Global Gatus settings - alerting = mkIf (cfg.alerting != {}) cfg.alerting; - - web = { - address = "0.0.0.0"; - port = cfg.port; - }; - - # TODO: Introduce monitor option to toggle monitoring - metrics = true; - - ui = { - title = cfg.ui.title; - header = cfg.ui.header; - link = cfg.ui.link; - buttons = cfg.ui.buttons; - }; - - storage = mkIf (cfg.storage != {}) cfg.storage; - - # Convert all enabled health checks to Gatus endpoints - endpoints = let - # Get all health checks from global config - allHealthChecks = homelabCfg.global.monitoring.enabledHealthChecks or []; - - # Group by group name for better organization - # groupedChecks = homelabCfg.global.monitoring.healthChecksByGroup or {}; - - # Convert to Gatus format - gatusEndpoints = map formatHealthCheck allHealthChecks; - in - gatusEndpoints; - }; -in { - options.homelab.services.gatus = { - enable = mkEnableOption "Gatus uptime monitoring service"; - - port = mkOption { - type = types.port; - default = 8080; - description = "Port for Gatus web interface"; - }; - - openFirewall = lib.mkOption { - type = lib.types.bool; - default = true; - description = '' - Whether to automatically open the specified ports in the firewall. - ''; - }; - + # Service-specific options beyond the standard interface + gatusServiceOptions = { ui = { title = mkOption { type = types.str; @@ -123,7 +26,7 @@ in { link = mkOption { type = types.str; - default = "https://gatus.${homelabCfg.externalDomain}"; + default = "https://status.${homelabCfg.externalDomain}"; description = "Link in the Gatus header"; }; @@ -186,59 +89,129 @@ in { default = {}; description = "Additional Gatus configuration options"; }; + + web = { + address = mkOption { + type = types.str; + default = "0.0.0.0"; + description = "Web interface bind address"; + }; + }; }; - config = mkIf cfg.enable { - services.gatus = { - enable = true; - openFirewall = cfg.openFirewall; - settings = gatusConfig; + # Convert our health check format to Gatus format + formatHealthCheck = check: let + # Build the URL based on the health check configuration + url = check._url; + + # Convert conditions to Gatus format (they should already be compatible) + conditions = check.conditions or ["[STATUS] == 200"]; + + # Convert alerts to Gatus format + alerts = map (alert: { + inherit (alert) type enabled; + failure-threshold = alert.failure-threshold or 3; + success-threshold = alert.success-threshold or 2; + description = "Health check alert for ${check.name}"; + }) (check.alerts or []); + in { + name = check.name; + group = check.group or "default"; + url = url; + interval = check.interval or "30s"; + + # Add method and headers for HTTP/HTTPS checks + method = + if (check.protocol == "http" || check.protocol == "https") + then check.method or "GET" + else null; + + conditions = conditions; + + # Add timeout + client = { + timeout = check.timeout or "10s"; }; - # Add to monitoring endpoints - homelab.monitoring.metrics = [ - { - name = "gatus"; - port = cfg.port; - path = "/metrics"; - jobName = "gatus"; - labels = { - service = "gatus"; - component = "monitoring"; - }; - } - ]; + # Add alerts if configured + alerts = + if alerts != [] + then alerts + else []; - # Add health check for Gatus itself - homelab.monitoring.healthChecks = [ - { - name = "gatus-web-interface"; - port = cfg.port; - path = "/health"; - interval = "30s"; - conditions = [ - "[STATUS] == 200" - "[BODY].status == UP" - "[RESPONSE_TIME] < 1000" - ]; - group = "monitoring"; - labels = { - service = "gatus"; - component = "web-interface"; - }; - } - ]; + # Add labels for UI organization + ui = { + hide-hostname = false; + hide-url = false; + description = "Health check for ${check.name} on ${check.host}"; + }; + }; - # Add reverse proxy entry if needed - homelab.reverseProxy.entries = [ - { - subdomain = "status"; - host = homelabCfg.hostname; + # Generate Gatus configuration + gatusConfig = + recursiveUpdate { + # Global Gatus settings + alerting = mkIf (cfg.alerting != {}) cfg.alerting; + + web = { + address = cfg.web.address; port = cfg.port; - # path = "/"; - # enableAuth = false; # Status page should be publicly accessible - # enableSSL = true; - } + }; + + # Enable metrics + metrics = cfg.monitoring.enable; + + ui = { + title = cfg.ui.title; + header = cfg.ui.header; + link = cfg.ui.link; + buttons = cfg.ui.buttons; + }; + + storage = cfg.storage; + + # Convert all enabled health checks to Gatus endpoints + endpoints = let + # Get all health checks from global config + allHealthChecks = homelabCfg.global.monitoring.allHealthChecks or []; + + # Filter only enabled health checks + enabledHealthChecks = filter (check: check.enabled or true) allHealthChecks; + + # Convert to Gatus format + gatusEndpoints = map formatHealthCheck enabledHealthChecks; + in + gatusEndpoints; + } + cfg.extraConfig; +in { + options.homelab.services.gatus = serviceInterface.mkServiceInterface { + serviceName = "gatus"; + defaultPort = 8080; + defaultSubdomain = "status"; + monitoringPath = "/metrics"; + healthCheckPath = "/health"; + healthCheckConditions = [ + "[STATUS] == 200" + "[BODY].status == UP" + "[RESPONSE_TIME] < 1000" ]; + serviceOptions = gatusServiceOptions; + }; + + config = serviceInterface.mkServiceConfig { + inherit config cfg homelabCfg; + serviceName = "gatus"; + + extraMonitoringLabels = { + component = "status-monitoring"; + }; + + serviceConfig = { + services.gatus = { + enable = true; + settings = gatusConfig; + }; + }; }; } diff --git a/modules/homelab/services/monitoring/grafana.nix b/modules/homelab/services/monitoring/grafana.nix index 64650cf..8ecb14c 100644 --- a/modules/homelab/services/monitoring/grafana.nix +++ b/modules/homelab/services/monitoring/grafana.nix @@ -5,169 +5,389 @@ ... }: with lib; let + serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; + cfg = config.homelab.services.grafana; homelabCfg = config.homelab; - # Default dashboards for homelab monitoring + # Default community dashboards with proper configuration defaultDashboards = { - "node-exporter" = pkgs.fetchurl { - url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; - sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this + "node-exporter-full" = { + name = "Node Exporter Full"; + id = 12486; + revision = 2; + # url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; + sha256 = "sha256-1DE1aaanRHHeCOMWDGdOS1wBXxOF84UXAjJzT5Ek6mM="; + + url = "https://grafana.com/api/dashboards/12486/revisions/2/download"; }; - "prometheus-stats" = pkgs.fetchurl { + "prometheus-2-0-stats" = { + name = "Prometheus 2.0 Stats"; + id = 2; + revision = 2; url = "https://grafana.com/api/dashboards/2/revisions/2/download"; - sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this + sha256 = "sha256-Ydk4LPwfX4qJN8tiWPLWQdtAqzj8CKi6HYsuE+kWcXw="; }; }; - # Grafana provisioning configuration - provisioningConfig = { - # Data sources - datasources = - [ - { - name = "Prometheus"; - type = "prometheus"; - access = "proxy"; - url = cfg.datasources.prometheus.url; - isDefault = true; - editable = false; - jsonData = { - timeInterval = "5s"; - queryTimeout = "60s"; - httpMethod = "POST"; - }; - } - ] - ++ cfg.datasources.extra; + # Function to fetch a dashboard from Grafana.com + fetchGrafanaDashboard = name: config: + pkgs.fetchurl { + inherit (config) url sha256; + name = "${name}-dashboard.json"; + }; - # Dashboard providers - dashboards = [ - { - name = "homelab"; - type = "file"; - disableDeletion = false; - updateIntervalSeconds = 10; - allowUiUpdates = true; - options = { - path = "/var/lib/grafana/dashboards"; + # Git repository management for custom dashboards + gitDashboardsRepo = mkIf (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") ( + pkgs.fetchgit { + url = cfg.dashboards.git.url; + rev = cfg.dashboards.git.rev; + sha256 = cfg.dashboards.git.sha256; + } + ); + + # Dashboard provisioning configuration + provisionDashboard = name: source: { + "grafana-dashboards/${name}.json" = { + inherit source; + user = "grafana"; + group = "grafana"; + mode = "0644"; + }; + }; + + # Generate dashboard files from various sources + dashboardFiles = + # Default community dashboards + (foldl' ( + acc: name: + acc // (provisionDashboard name (fetchGrafanaDashboard name defaultDashboards.${name})) + ) {} (attrNames (filterAttrs (n: v: cfg.dashboards.defaults.${n}.enable) cfg.dashboards.defaults))) + # Custom file-based dashboards + // (foldl' ( + acc: dashboard: + acc // (provisionDashboard dashboard.name dashboard.source) + ) {} + cfg.dashboards.files) + # Git-synced dashboards + // (optionalAttrs (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") ( + let + gitDashboards = + if pathExists "${gitDashboardsRepo}/${cfg.dashboards.git.path}" + then builtins.readDir "${gitDashboardsRepo}/${cfg.dashboards.git.path}" + else {}; + in + mapAttrs' ( + filename: type: let + name = removeSuffix ".json" filename; + source = "${gitDashboardsRepo}/${cfg.dashboards.git.path}/${filename}"; + in + nameValuePair "grafana-dashboards/${name}.json" { + inherit source; + user = "grafana"; + group = "grafana"; + mode = "0644"; + } + ) (filterAttrs (name: type: type == "regular" && hasSuffix ".json" name) gitDashboards) + )); + + # Service-specific options beyond the standard interface + grafanaServiceOptions = { + # Authentication settings + auth = { + admin = { + user = mkOption { + type = types.str; + default = "admin"; + description = "Admin username"; }; - } - ]; - # Notification channels - notifiers = cfg.notifications; - }; -in { - options.homelab.services.grafana = { - enable = mkEnableOption "Grafana dashboard service"; + passwordFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to admin password file"; + }; - port = mkOption { - type = types.port; - default = 3000; - description = "Port for Grafana web interface"; - }; - - openFirewall = mkOption { - type = types.bool; - default = true; - description = "Whether to open firewall ports"; - }; - - dataDir = mkOption { - type = types.str; - default = "/var/lib/grafana"; - description = "Directory to store Grafana data"; - }; - - domain = mkOption { - type = types.str; - default = "grafana.${homelabCfg.externalDomain}"; - description = "Domain for Grafana"; - }; - - rootUrl = mkOption { - type = types.str; - default = "https://grafana.${homelabCfg.externalDomain}"; - description = "Root URL for Grafana"; - }; - - admin = { - user = mkOption { - type = types.str; - default = "admin"; - description = "Admin username"; + email = mkOption { + type = types.str; + default = "admin@${homelabCfg.externalDomain}"; + description = "Admin email address"; + }; }; - password = mkOption { - type = types.str; - default = "admin"; - description = "Admin password (change this!)"; + disableLoginForm = mkOption { + type = types.bool; + default = false; + description = "Disable the login form"; }; - email = mkOption { - type = types.str; - default = "admin@${homelabCfg.externalDomain}"; - description = "Admin email"; + oauthAutoLogin = mkOption { + type = types.bool; + default = false; + description = "Enable OAuth auto-login"; + }; + + anonymousAccess = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable anonymous access"; + }; + + orgName = mkOption { + type = types.str; + default = "Homelab"; + description = "Organization name for anonymous users"; + }; + + orgRole = mkOption { + type = types.enum ["Viewer" "Editor" "Admin"]; + default = "Viewer"; + description = "Role for anonymous users"; + }; + }; + + genericOauth = { + enabled = mkOption { + type = types.bool; + default = false; + description = "Enable generic OAuth"; + }; + + configFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to OAuth configuration file"; + }; }; }; + # Enhanced datasource configuration datasources = { prometheus = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable Prometheus datasource"; + }; + url = mkOption { type = types.str; - default = "http://localhost:9090"; + default = "http://127.0.0.1:9090"; description = "Prometheus URL"; }; + + uid = mkOption { + type = types.str; + default = "prometheus"; + description = "Unique identifier for Prometheus datasource"; + }; + + scrapeInterval = mkOption { + type = types.str; + default = "15s"; + description = "Default scrape interval for Prometheus"; + }; + + manageAlerts = mkOption { + type = types.bool; + default = true; + description = "Manage alerts in Grafana"; + }; + + exemplarTraceIdDestinations = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Exemplar trace ID destinations"; + }; + }; + + loki = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable Loki datasource"; + }; + + url = mkOption { + type = types.str; + default = "http://127.0.0.1:3100"; + description = "Loki URL"; + }; + + uid = mkOption { + type = types.str; + default = "loki"; + description = "Unique identifier for Loki datasource"; + }; + + maxLines = mkOption { + type = types.int; + default = 1000; + description = "Maximum lines to return from Loki"; + }; + + derivedFields = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Derived fields configuration for Loki"; + }; + }; + + influxdb = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable InfluxDB datasource"; + }; + + url = mkOption { + type = types.str; + default = "http://127.0.0.1:8086"; + description = "InfluxDB URL"; + }; + + database = mkOption { + type = types.str; + default = "homelab"; + description = "InfluxDB database name"; + }; + + tokenFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to InfluxDB token file"; + }; + + uid = mkOption { + type = types.str; + default = "influxdb"; + description = "Unique identifier for InfluxDB datasource"; + }; + + version = mkOption { + type = types.enum ["1.x" "2.x"]; + default = "2.x"; + description = "InfluxDB version"; + }; + + organization = mkOption { + type = types.str; + default = "homelab"; + description = "InfluxDB organization (for v2.x)"; + }; + + bucket = mkOption { + type = types.str; + default = "homelab"; + description = "InfluxDB bucket (for v2.x)"; + }; }; extra = mkOption { type = types.listOf types.attrs; default = []; description = "Additional data sources"; - example = literalExpression '' - [ - { - name = "Loki"; - type = "loki"; - url = "http://localhost:3100"; - } - ] - ''; }; }; - notifications = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Notification channels configuration"; - example = literalExpression '' - [ - { - name = "discord-webhook"; - type = "discord"; - settings = { - url = "https://discord.com/api/webhooks/..."; - username = "Grafana"; + # Enhanced dashboard configuration + dashboards = { + # Default community dashboards + defaults = mkOption { + type = types.attrsOf (types.submodule { + options = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable this default dashboard"; }; + }; + }); + default = mapAttrs (name: config: {enable = false;}) defaultDashboards; + description = "Enable default community dashboards"; + example = literalExpression '' + { + "node-exporter-full".enable = true; + "prometheus-2-0-stats".enable = true; } - ] - ''; + ''; + }; + + # File-based dashboards + files = mkOption { + type = types.listOf (types.submodule { + options = { + name = mkOption { + type = types.str; + description = "Dashboard name (without .json extension)"; + }; + source = mkOption { + type = types.path; + description = "Path to dashboard JSON file"; + }; + }; + }); + default = []; + description = "Dashboard files to provision"; + }; + + # Git-based dashboard sync + git = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable git-based dashboard synchronization"; + }; + + url = mkOption { + type = types.str; + default = ""; + description = "Git repository URL for dashboards"; + }; + + rev = mkOption { + type = types.str; + default = "HEAD"; + description = "Git revision to use"; + }; + + sha256 = mkOption { + type = types.str; + default = ""; + description = "SHA256 hash of the git repository content"; + }; + + path = mkOption { + type = types.str; + default = "."; + description = "Path within the git repository containing dashboards"; + }; + + updateInterval = mkOption { + type = types.str; + default = "1h"; + description = "How often to check for dashboard updates"; + }; + }; + + path = mkOption { + type = types.str; + default = "/etc/grafana-dashboards"; + description = "Path where dashboard files are stored"; + }; }; + # Plugin configuration plugins = mkOption { - type = types.listOf types.str; - default = [ - "grafana-piechart-panel" - "grafana-worldmap-panel" - "grafana-clock-panel" - "grafana-simple-json-datasource" - ]; + type = types.listOf types.package; + default = []; description = "Grafana plugins to install"; }; + # SMTP configuration smtp = { - enabled = mkOption { + enable = mkOption { type = types.bool; default = false; description = "Enable SMTP for email notifications"; @@ -185,10 +405,10 @@ in { description = "SMTP username"; }; - password = mkOption { - type = types.str; - default = ""; - description = "SMTP password"; + passwordFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to SMTP password file"; }; fromAddress = mkOption { @@ -202,9 +422,22 @@ in { default = "Homelab Grafana"; description = "From name"; }; + + skipVerify = mkOption { + type = types.bool; + default = false; + description = "Skip SSL certificate verification"; + }; }; + # Security settings security = { + secretKeyFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to secret key file for signing"; + }; + allowEmbedding = mkOption { type = types.bool; default = false; @@ -217,200 +450,279 @@ in { description = "Set secure flag on cookies"; }; - secretKey = mkOption { - type = types.str; - default = "change-this-secret-key"; - description = "Secret key for signing (change this!)"; + contentSecurityPolicy = mkOption { + type = types.bool; + default = true; + description = "Enable Content Security Policy header"; + }; + + strictTransportSecurity = mkOption { + type = types.bool; + default = true; + description = "Enable Strict Transport Security header"; }; }; - auth = { - anonymousEnabled = mkOption { - type = types.bool; - default = false; - description = "Enable anonymous access"; - }; - - disableLoginForm = mkOption { - type = types.bool; - default = false; - description = "Disable login form"; - }; + # Data directory + dataDir = mkOption { + type = types.str; + default = "/var/lib/grafana"; + description = "Directory to store Grafana data"; }; - extraConfig = mkOption { + # Extra Grafana settings + extraSettings = mkOption { type = types.attrs; default = {}; - description = "Additional Grafana configuration"; + description = "Additional Grafana settings"; }; }; - config = mkIf cfg.enable { - services.grafana = { - enable = true; - settings = - recursiveUpdate { - server = { - http_addr = "0.0.0.0"; - http_port = cfg.port; - domain = cfg.domain; - root_url = cfg.rootUrl; - serve_from_sub_path = false; - }; - - database = { - type = "sqlite3"; - path = "${cfg.dataDir}/grafana.db"; - }; - - security = { - admin_user = cfg.admin.user; - admin_password = cfg.admin.password; - admin_email = cfg.admin.email; - allow_embedding = cfg.security.allowEmbedding; - cookie_secure = cfg.security.cookieSecure; - secret_key = cfg.security.secretKey; - }; - - users = { - allow_sign_up = false; - auto_assign_org = true; - auto_assign_org_role = "Viewer"; - }; - - auth.anonymous = { - enabled = cfg.auth.anonymousEnabled; - org_name = "Homelab"; - org_role = "Viewer"; - }; - - auth.basic = { - enabled = !cfg.auth.disableLoginForm; - }; - - smtp = mkIf cfg.smtp.enabled { - enabled = true; - host = cfg.smtp.host; - user = cfg.smtp.user; - password = cfg.smtp.password; - from_address = cfg.smtp.fromAddress; - from_name = cfg.smtp.fromName; - }; - - analytics = { - reporting_enabled = false; - check_for_updates = false; - }; - - log = { - mode = "console"; - level = "info"; - }; - - paths = { - data = cfg.dataDir; - logs = "${cfg.dataDir}/log"; - plugins = "${cfg.dataDir}/plugins"; - provisioning = "/etc/grafana/provisioning"; - }; - } - cfg.extraConfig; - - dataDir = cfg.dataDir; + # Enhanced datasource configuration + buildDatasources = let + # Build prometheus datasource + prometheusDatasource = optional cfg.datasources.prometheus.enable { + uid = cfg.datasources.prometheus.uid; + name = "Prometheus"; + type = "prometheus"; + url = cfg.datasources.prometheus.url; + access = "proxy"; + isDefault = true; + editable = false; + jsonData = { + timeInterval = cfg.datasources.prometheus.scrapeInterval; + queryTimeout = "60s"; + httpMethod = "POST"; + manageAlerts = cfg.datasources.prometheus.manageAlerts; + exemplarTraceIdDestinations = cfg.datasources.prometheus.exemplarTraceIdDestinations; + }; }; - # Install plugins - systemd.services.grafana.preStart = mkIf (cfg.plugins != []) ( - concatStringsSep "\n" (map ( - plugin: "${pkgs.grafana}/bin/grafana-cli --pluginsDir ${cfg.dataDir}/plugins plugins install ${plugin} || true" - ) - cfg.plugins) - ); + # Build loki datasource + lokiDatasource = optional cfg.datasources.loki.enable { + uid = cfg.datasources.loki.uid; + name = "Loki"; + type = "loki"; + url = cfg.datasources.loki.url; + access = "proxy"; + editable = false; + jsonData = { + maxLines = cfg.datasources.loki.maxLines; + derivedFields = cfg.datasources.loki.derivedFields; + }; + }; - # Provisioning configuration - environment.etc = - { - "grafana/provisioning/datasources/datasources.yaml".text = builtins.toJSON { - apiVersion = 1; - datasources = provisioningConfig.datasources; - }; + # Build influxdb datasource + influxdbDatasource = optional cfg.datasources.influxdb.enable { + uid = cfg.datasources.influxdb.uid; + name = "InfluxDB"; + type = "influxdb"; + url = cfg.datasources.influxdb.url; + access = "proxy"; + database = cfg.datasources.influxdb.database; + editable = false; + jsonData = { + dbName = cfg.datasources.influxdb.database; + httpHeaderName1 = "Authorization"; + version = cfg.datasources.influxdb.version; + organization = cfg.datasources.influxdb.organization; + defaultBucket = cfg.datasources.influxdb.bucket; + }; + secureJsonData = mkIf (cfg.datasources.influxdb.tokenFile != null) { + httpHeaderValue1 = "$__file{${cfg.datasources.influxdb.tokenFile}}"; + }; + }; - "grafana/provisioning/dashboards/dashboards.yaml".text = builtins.toJSON { - apiVersion = 1; - providers = provisioningConfig.dashboards; - }; - } - // (mkIf (cfg.notifications != []) { - "grafana/provisioning/notifiers/notifiers.yaml".text = builtins.toJSON { - apiVersion = 1; - notifiers = provisioningConfig.notifiers; - }; - }); - - # Create dashboard directory - systemd.tmpfiles.rules = [ - "d ${cfg.dataDir}/dashboards 0755 grafana grafana -" + # Build extra datasources + extraDatasources = cfg.datasources.extra; + in + prometheusDatasource ++ lokiDatasource ++ influxdbDatasource ++ extraDatasources; +in { + options.homelab.services.grafana = serviceInterface.mkServiceInterface { + serviceName = "grafana"; + defaultPort = 3000; + defaultSubdomain = "grafana"; + monitoringPath = "/metrics"; + healthCheckPath = "/api/health"; + healthCheckConditions = [ + "[STATUS] == 200" + "[BODY].database == ok" + "[RESPONSE_TIME] < 2000" ]; + serviceOptions = grafanaServiceOptions; + }; - # Open firewall if requested - networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [cfg.port]; + config = serviceInterface.mkServiceConfig { + inherit config cfg homelabCfg; + serviceName = "grafana"; - # Add to monitoring endpoints - homelab.monitoring.metrics = [ + extraMonitoringLabels = { + component = "dashboard"; + }; + + # Additional health checks specific to Grafana + customHealthChecks = []; + + serviceConfig = mkMerge [ { - name = "grafana"; - port = cfg.port; - path = "/metrics"; - jobName = "grafana"; - labels = { - service = "grafana"; - component = "monitoring"; + services.grafana = { + enable = true; + dataDir = cfg.dataDir; + # declarativePlugins = + # cfg.plugins + # ++ (with pkgs.grafanaPlugins; [ + # grafana-exploretraces-app + # grafana-metricsdrilldown-app + # grafana-pyroscope-app + # grafana-lokiexplore-app + # grafana-worldmap-panel + # grafana-piechart-panel + # ]); + + settings = + recursiveUpdate { + server = { + http_port = cfg.port; + http_addr = "0.0.0.0"; + domain = "${cfg.proxy.subdomain}.${homelabCfg.externalDomain}"; + root_url = "https://${cfg.proxy.subdomain}.${homelabCfg.externalDomain}"; + serve_from_sub_path = false; + }; + + database = { + type = "sqlite3"; + path = "${cfg.dataDir}/grafana.db"; + }; + + security = + { + admin_user = cfg.auth.admin.user; + admin_email = cfg.auth.admin.email; + # allow_embedding = cfg.security.allowEmbedding; + # cookie_secure = cfg.security.cookieSecure; + # content_security_policy = cfg.security.contentSecurityPolicy; + # strict_transport_security = cfg.security.strictTransportSecurity; + } + // (optionalAttrs (cfg.auth.admin.passwordFile != null) { + admin_password = "$__file{${cfg.auth.admin.passwordFile}}"; + }) + // (optionalAttrs (cfg.security.secretKeyFile != null) { + secret_key = "$__file{${cfg.security.secretKeyFile}}"; + }); + + users = { + allow_sign_up = false; + auto_assign_org = true; + auto_assign_org_role = "Viewer"; + }; + + "auth.anonymous" = { + enabled = cfg.auth.anonymousAccess.enable; + org_name = cfg.auth.anonymousAccess.orgName; + org_role = cfg.auth.anonymousAccess.orgRole; + }; + + "auth.basic" = { + enabled = !cfg.auth.disableLoginForm; + }; + + "auth.generic_oauth" = + mkIf cfg.auth.genericOauth.enabled { + enabled = true; + } + // (optionalAttrs (cfg.auth.genericOauth.configFile != null) { + client_id = "$__file{${cfg.auth.genericOauth.configFile}}"; + }); + + smtp = mkIf cfg.smtp.enable ({ + enabled = true; + host = cfg.smtp.host; + user = cfg.smtp.user; + from_address = cfg.smtp.fromAddress; + from_name = cfg.smtp.fromName; + skip_verify = cfg.smtp.skipVerify; + } + // (optionalAttrs (cfg.smtp.passwordFile != null) { + password = "$__file{${cfg.smtp.passwordFile}}"; + })); + + analytics = { + reporting_enabled = false; + check_for_updates = false; + }; + news.news_feed_enabled = false; + + feature_toggles = { + provisioning = true; + kubernetesDashboards = true; + }; + # paths = { + # plugins = "${cfg.dataDir}/plugins"; + # provisioning = "/etc/grafana/provisioning"; + # }; + } + cfg.extraSettings; + + provision = { + enable = true; + + datasources.settings.datasources = buildDatasources; + + dashboards.settings.providers = [ + { + name = "homelab-dashboards"; + type = "file"; + disableDeletion = false; + updateIntervalSeconds = 10; + allowUiUpdates = true; + options = { + path = cfg.dashboards.path; + }; + } + ]; + }; }; - } - ]; - # Add health checks - homelab.monitoring.healthChecks = [ - { - name = "grafana-web-interface"; - port = cfg.port; - path = "/api/health"; - interval = "30s"; - conditions = [ - "[STATUS] == 200" - "[BODY].database == ok" - "[RESPONSE_TIME] < 2000" + # Provision dashboard files + environment.etc = dashboardFiles; + + # Ensure dashboard directory exists + systemd.tmpfiles.rules = [ + "d ${cfg.dashboards.path} 0755 grafana grafana -" ]; - group = "monitoring"; - labels = { - service = "grafana"; - component = "web-interface"; - }; } - { - name = "grafana-login-page"; - port = cfg.port; - path = "/login"; - interval = "60s"; - conditions = [ - "[STATUS] == 200" - "[RESPONSE_TIME] < 3000" - ]; - group = "monitoring"; - labels = { - service = "grafana"; - component = "login"; - }; - } - ]; - # Add reverse proxy entry - homelab.reverseProxy.entries = [ - { - subdomain = "grafana"; - host = homelabCfg.hostname; - port = cfg.port; - } + # Git dashboard sync service (if enabled) + (mkIf (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") { + systemd.services.grafana-dashboard-sync = { + description = "Sync Grafana dashboards from git"; + after = ["grafana.service"]; + wantedBy = ["multi-user.target"]; + + serviceConfig = { + Type = "oneshot"; + User = "grafana"; + Group = "grafana"; + }; + + script = '' + echo "Syncing dashboards from git repository..." + # Dashboard files are already provisioned via Nix + # This service can be extended for runtime updates if needed + systemctl reload grafana.service + ''; + }; + + systemd.timers.grafana-dashboard-sync = { + description = "Timer for Grafana dashboard sync"; + wantedBy = ["timers.target"]; + + timerConfig = { + OnCalendar = cfg.dashboards.git.updateInterval; + Persistent = true; + }; + }; + }) ]; }; } diff --git a/modules/homelab/services/monitoring/grafana_1.nix b/modules/homelab/services/monitoring/grafana_1.nix new file mode 100644 index 0000000..c5ae73f --- /dev/null +++ b/modules/homelab/services/monitoring/grafana_1.nix @@ -0,0 +1,198 @@ +# Example showing how to create a service using the standard interface +{ + config, + lib, + pkgs, + ... +}: +with lib; let + serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; + + cfg = config.homelab.services.grafana; + homelabCfg = config.homelab; + + # Service-specific options beyond the standard interface + grafanaServiceOptions = { + admin = { + user = mkOption { + type = types.str; + default = "admin"; + description = "Admin username"; + }; + + passwordFile = mkOption { + type = types.str; + default = "admin"; + description = "Path to the Admin password file"; + }; + }; + + datasources = { + prometheus = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable Prometheus datasource"; + }; + + url = mkOption { + type = types.str; + default = "http://127.0.0.1:9090"; + description = "Prometheus URL"; + }; + + uid = mkOption { + type = types.str; + default = "prometheus"; + description = "Unique identifier for Prometheus datasource"; + }; + }; + + loki = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable Loki datasource"; + }; + + url = mkOption { + type = types.str; + default = "http://127.0.0.1:3100"; + description = "Loki URL"; + }; + + uid = mkOption { + type = types.str; + default = "loki"; + description = "Unique identifier for Loki datasource"; + }; + }; + + influxdb = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable InfluxDB datasource"; + }; + + url = mkOption { + type = types.str; + default = "http://127.0.0.1:8086"; + description = "InfluxDB URL"; + }; + + database = mkOption { + type = types.str; + default = "homelab"; + description = "InfluxDB database name"; + }; + + tokenFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to InfluxDB token file"; + }; + + uid = mkOption { + type = types.str; + default = "influxdb"; + description = "Unique identifier for InfluxDB datasource"; + }; + }; + + extra = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Additional data sources"; + }; + }; + + plugins = mkOption { + type = types.listOf types.package; + default = []; + description = "Grafana plugins to install"; + }; + }; +in { + options.homelab.services.grafana = serviceInterface.mkServiceInterface { + serviceName = "grafana"; + defaultPort = 3000; + defaultSubdomain = "grafana"; + monitoringPath = "/metrics"; + healthCheckPath = "/api/health"; + healthCheckConditions = [ + "[STATUS] == 200" + "[BODY].database == ok" + "[RESPONSE_TIME] < 2000" + ]; + serviceOptions = grafanaServiceOptions; + }; + + config = serviceInterface.mkServiceConfig { + inherit config cfg homelabCfg; + serviceName = "grafana"; + + extraMonitoringLabels = { + component = "dashboard"; + }; + + serviceConfig = { + services.grafana = { + enable = true; + declarativePlugins = cfg.plugins; + + settings = { + server = { + http_port = cfg.port; + http_addr = "0.0.0.0"; + root_url = "https://${cfg.proxy.subdomain}.${homelabCfg.externalDomain}"; + }; + + security = { + admin_user = cfg.admin.user; + admin_password = "$__file{${cfg.admin.passwordFile}}"; + }; + }; + + provision = { + enable = true; + datasources.settings = { + datasources = let + # Build datasource list + datasources = + [] + ++ optional cfg.datasources.prometheus.enable { + uid = cfg.datasources.prometheus.uid; + name = "Prometheus"; + type = "prometheus"; + url = cfg.datasources.prometheus.url; + } + ++ optional cfg.datasources.loki.enable { + uid = cfg.datasources.loki.uid; + name = "Loki"; + type = "loki"; + url = cfg.datasources.loki.url; + } + ++ optional cfg.datasources.influxdb.enable { + uid = cfg.datasources.influxdb.uid; + name = "InfluxDB"; + type = "influxdb"; + url = cfg.datasources.influxdb.url; + access = "proxy"; + jsonData = { + dbName = cfg.datasources.influxdb.database; + httpHeaderName1 = "Authorization"; + }; + secureJsonData = mkIf (cfg.datasources.influxdb.tokenPath != null) { + httpHeaderValue1 = "$__file{${cfg.datasources.influxdb.tokenPath}}"; + }; + } + ++ cfg.datasources.extra; + in + datasources; + }; + }; + }; + }; + }; +} diff --git a/modules/homelab/services/monitoring/grafana_gg.nix b/modules/homelab/services/monitoring/grafana_gg.nix new file mode 100644 index 0000000..64650cf --- /dev/null +++ b/modules/homelab/services/monitoring/grafana_gg.nix @@ -0,0 +1,416 @@ +{ + config, + lib, + pkgs, + ... +}: +with lib; let + cfg = config.homelab.services.grafana; + homelabCfg = config.homelab; + + # Default dashboards for homelab monitoring + defaultDashboards = { + "node-exporter" = pkgs.fetchurl { + url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; + sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this + }; + "prometheus-stats" = pkgs.fetchurl { + url = "https://grafana.com/api/dashboards/2/revisions/2/download"; + sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this + }; + }; + + # Grafana provisioning configuration + provisioningConfig = { + # Data sources + datasources = + [ + { + name = "Prometheus"; + type = "prometheus"; + access = "proxy"; + url = cfg.datasources.prometheus.url; + isDefault = true; + editable = false; + jsonData = { + timeInterval = "5s"; + queryTimeout = "60s"; + httpMethod = "POST"; + }; + } + ] + ++ cfg.datasources.extra; + + # Dashboard providers + dashboards = [ + { + name = "homelab"; + type = "file"; + disableDeletion = false; + updateIntervalSeconds = 10; + allowUiUpdates = true; + options = { + path = "/var/lib/grafana/dashboards"; + }; + } + ]; + + # Notification channels + notifiers = cfg.notifications; + }; +in { + options.homelab.services.grafana = { + enable = mkEnableOption "Grafana dashboard service"; + + port = mkOption { + type = types.port; + default = 3000; + description = "Port for Grafana web interface"; + }; + + openFirewall = mkOption { + type = types.bool; + default = true; + description = "Whether to open firewall ports"; + }; + + dataDir = mkOption { + type = types.str; + default = "/var/lib/grafana"; + description = "Directory to store Grafana data"; + }; + + domain = mkOption { + type = types.str; + default = "grafana.${homelabCfg.externalDomain}"; + description = "Domain for Grafana"; + }; + + rootUrl = mkOption { + type = types.str; + default = "https://grafana.${homelabCfg.externalDomain}"; + description = "Root URL for Grafana"; + }; + + admin = { + user = mkOption { + type = types.str; + default = "admin"; + description = "Admin username"; + }; + + password = mkOption { + type = types.str; + default = "admin"; + description = "Admin password (change this!)"; + }; + + email = mkOption { + type = types.str; + default = "admin@${homelabCfg.externalDomain}"; + description = "Admin email"; + }; + }; + + datasources = { + prometheus = { + url = mkOption { + type = types.str; + default = "http://localhost:9090"; + description = "Prometheus URL"; + }; + }; + + extra = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Additional data sources"; + example = literalExpression '' + [ + { + name = "Loki"; + type = "loki"; + url = "http://localhost:3100"; + } + ] + ''; + }; + }; + + notifications = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Notification channels configuration"; + example = literalExpression '' + [ + { + name = "discord-webhook"; + type = "discord"; + settings = { + url = "https://discord.com/api/webhooks/..."; + username = "Grafana"; + }; + } + ] + ''; + }; + + plugins = mkOption { + type = types.listOf types.str; + default = [ + "grafana-piechart-panel" + "grafana-worldmap-panel" + "grafana-clock-panel" + "grafana-simple-json-datasource" + ]; + description = "Grafana plugins to install"; + }; + + smtp = { + enabled = mkOption { + type = types.bool; + default = false; + description = "Enable SMTP for email notifications"; + }; + + host = mkOption { + type = types.str; + default = "localhost:587"; + description = "SMTP server host:port"; + }; + + user = mkOption { + type = types.str; + default = ""; + description = "SMTP username"; + }; + + password = mkOption { + type = types.str; + default = ""; + description = "SMTP password"; + }; + + fromAddress = mkOption { + type = types.str; + default = "grafana@${homelabCfg.externalDomain}"; + description = "From email address"; + }; + + fromName = mkOption { + type = types.str; + default = "Homelab Grafana"; + description = "From name"; + }; + }; + + security = { + allowEmbedding = mkOption { + type = types.bool; + default = false; + description = "Allow embedding Grafana in iframes"; + }; + + cookieSecure = mkOption { + type = types.bool; + default = true; + description = "Set secure flag on cookies"; + }; + + secretKey = mkOption { + type = types.str; + default = "change-this-secret-key"; + description = "Secret key for signing (change this!)"; + }; + }; + + auth = { + anonymousEnabled = mkOption { + type = types.bool; + default = false; + description = "Enable anonymous access"; + }; + + disableLoginForm = mkOption { + type = types.bool; + default = false; + description = "Disable login form"; + }; + }; + + extraConfig = mkOption { + type = types.attrs; + default = {}; + description = "Additional Grafana configuration"; + }; + }; + + config = mkIf cfg.enable { + services.grafana = { + enable = true; + settings = + recursiveUpdate { + server = { + http_addr = "0.0.0.0"; + http_port = cfg.port; + domain = cfg.domain; + root_url = cfg.rootUrl; + serve_from_sub_path = false; + }; + + database = { + type = "sqlite3"; + path = "${cfg.dataDir}/grafana.db"; + }; + + security = { + admin_user = cfg.admin.user; + admin_password = cfg.admin.password; + admin_email = cfg.admin.email; + allow_embedding = cfg.security.allowEmbedding; + cookie_secure = cfg.security.cookieSecure; + secret_key = cfg.security.secretKey; + }; + + users = { + allow_sign_up = false; + auto_assign_org = true; + auto_assign_org_role = "Viewer"; + }; + + auth.anonymous = { + enabled = cfg.auth.anonymousEnabled; + org_name = "Homelab"; + org_role = "Viewer"; + }; + + auth.basic = { + enabled = !cfg.auth.disableLoginForm; + }; + + smtp = mkIf cfg.smtp.enabled { + enabled = true; + host = cfg.smtp.host; + user = cfg.smtp.user; + password = cfg.smtp.password; + from_address = cfg.smtp.fromAddress; + from_name = cfg.smtp.fromName; + }; + + analytics = { + reporting_enabled = false; + check_for_updates = false; + }; + + log = { + mode = "console"; + level = "info"; + }; + + paths = { + data = cfg.dataDir; + logs = "${cfg.dataDir}/log"; + plugins = "${cfg.dataDir}/plugins"; + provisioning = "/etc/grafana/provisioning"; + }; + } + cfg.extraConfig; + + dataDir = cfg.dataDir; + }; + + # Install plugins + systemd.services.grafana.preStart = mkIf (cfg.plugins != []) ( + concatStringsSep "\n" (map ( + plugin: "${pkgs.grafana}/bin/grafana-cli --pluginsDir ${cfg.dataDir}/plugins plugins install ${plugin} || true" + ) + cfg.plugins) + ); + + # Provisioning configuration + environment.etc = + { + "grafana/provisioning/datasources/datasources.yaml".text = builtins.toJSON { + apiVersion = 1; + datasources = provisioningConfig.datasources; + }; + + "grafana/provisioning/dashboards/dashboards.yaml".text = builtins.toJSON { + apiVersion = 1; + providers = provisioningConfig.dashboards; + }; + } + // (mkIf (cfg.notifications != []) { + "grafana/provisioning/notifiers/notifiers.yaml".text = builtins.toJSON { + apiVersion = 1; + notifiers = provisioningConfig.notifiers; + }; + }); + + # Create dashboard directory + systemd.tmpfiles.rules = [ + "d ${cfg.dataDir}/dashboards 0755 grafana grafana -" + ]; + + # Open firewall if requested + networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [cfg.port]; + + # Add to monitoring endpoints + homelab.monitoring.metrics = [ + { + name = "grafana"; + port = cfg.port; + path = "/metrics"; + jobName = "grafana"; + labels = { + service = "grafana"; + component = "monitoring"; + }; + } + ]; + + # Add health checks + homelab.monitoring.healthChecks = [ + { + name = "grafana-web-interface"; + port = cfg.port; + path = "/api/health"; + interval = "30s"; + conditions = [ + "[STATUS] == 200" + "[BODY].database == ok" + "[RESPONSE_TIME] < 2000" + ]; + group = "monitoring"; + labels = { + service = "grafana"; + component = "web-interface"; + }; + } + { + name = "grafana-login-page"; + port = cfg.port; + path = "/login"; + interval = "60s"; + conditions = [ + "[STATUS] == 200" + "[RESPONSE_TIME] < 3000" + ]; + group = "monitoring"; + labels = { + service = "grafana"; + component = "login"; + }; + } + ]; + + # Add reverse proxy entry + homelab.reverseProxy.entries = [ + { + subdomain = "grafana"; + host = homelabCfg.hostname; + port = cfg.port; + } + ]; + }; +} diff --git a/modules/homelab/services/monitoring/influxdb.nix b/modules/homelab/services/monitoring/influxdb.nix index e69de29..75bd525 100644 --- a/modules/homelab/services/monitoring/influxdb.nix +++ b/modules/homelab/services/monitoring/influxdb.nix @@ -0,0 +1,399 @@ +{ + config, + lib, + pkgs, + ... +}: +with lib; let + serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; + + cfg = config.homelab.services.influxdb; + homelabCfg = config.homelab; + + # Service-specific options beyond the standard interface + influxdbServiceOptions = { + version = mkOption { + type = types.enum ["1" "2"]; + default = "2"; + description = "InfluxDB version to use"; + }; + + dataDir = mkOption { + type = types.str; + default = "/var/lib/influxdb"; + description = "Directory to store InfluxDB data"; + }; + + # InfluxDB 2.x options + v2 = { + org = mkOption { + type = types.str; + default = "homelab"; + description = "Initial organization name"; + }; + + bucket = mkOption { + type = types.str; + default = "homelab"; + description = "Initial bucket name"; + }; + + username = mkOption { + type = types.str; + default = "admin"; + description = "Initial admin username"; + }; + + password = mkOption { + type = types.str; + default = "changeme"; + description = "Initial admin password"; + }; + + retention = mkOption { + type = types.str; + default = "30d"; + description = "Default retention period"; + }; + + tokenFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "File containing the admin token"; + }; + }; + + # InfluxDB 1.x options + v1 = { + database = mkOption { + type = types.str; + default = "homelab"; + description = "Default database name"; + }; + + retention = mkOption { + type = types.str; + default = "30d"; + description = "Default retention period"; + }; + + adminUser = mkOption { + type = types.str; + default = "admin"; + description = "Admin username"; + }; + + adminPassword = mkOption { + type = types.str; + default = "changeme"; + description = "Admin password"; + }; + + httpAuth = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable HTTP authentication"; + }; + }; + }; + + extraConfig = mkOption { + type = types.attrs; + default = {}; + description = "Additional InfluxDB configuration"; + }; + + backup = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable automatic backups"; + }; + + schedule = mkOption { + type = types.str; + default = "daily"; + description = "Backup schedule"; + }; + + retention = mkOption { + type = types.str; + default = "7d"; + description = "Backup retention period"; + }; + }; + }; + + # Generate configuration based on version + influxdbConfig = + if cfg.version == "2" + then + recursiveUpdate { + bolt-path = "${cfg.dataDir}/influxd.bolt"; + engine-path = "${cfg.dataDir}/engine"; + http-bind-address = "0.0.0.0:${toString cfg.port}"; + reporting-disabled = true; + log-level = "info"; + } + cfg.extraConfig + else + recursiveUpdate { + meta = { + dir = "${cfg.dataDir}/meta"; + }; + data = { + dir = "${cfg.dataDir}/data"; + wal-dir = "${cfg.dataDir}/wal"; + }; + http = { + bind-address = "0.0.0.0:${toString cfg.port}"; + auth-enabled = cfg.v1.httpAuth.enable; + }; + logging = { + level = "info"; + }; + reporting-disabled = true; + } + cfg.extraConfig; +in { + options.homelab.services.influxdb = serviceInterface.mkServiceInterface { + serviceName = "influxdb"; + defaultPort = 8086; + defaultSubdomain = "influxdb"; + monitoringPath = "/metrics"; + healthCheckPath = + if cfg.version == "2" + then "/health" + else "/ping"; + healthCheckConditions = + if cfg.version == "2" + then ["[STATUS] == 200" "[BODY].status == pass"] + else ["[STATUS] == 204" "[RESPONSE_TIME] < 1000"]; + serviceOptions = influxdbServiceOptions; + }; + + config = serviceInterface.mkServiceConfig { + inherit config cfg homelabCfg; + serviceName = "influxdb"; + + extraMonitoringLabels = { + component = "timeseries-database"; + version = cfg.version; + }; + + customHealthChecks = + [ + { + name = "influxdb-query"; + port = cfg.port; + path = + if cfg.version == "2" + then "/api/v2/query" + else "/query"; + interval = "60s"; + method = "POST"; + conditions = [ + "[STATUS] < 500" + "[RESPONSE_TIME] < 3000" + ]; + group = "monitoring"; + labels = { + service = "influxdb"; + component = "query-engine"; + }; + } + ] + ++ optional (cfg.version == "2") { + name = "influxdb-write"; + port = cfg.port; + path = "/api/v2/write"; + interval = "60s"; + method = "POST"; + conditions = [ + "[STATUS] < 500" + "[RESPONSE_TIME] < 2000" + ]; + group = "monitoring"; + labels = { + service = "influxdb"; + component = "write-engine"; + }; + }; + + serviceConfig = mkMerge [ + # Common configuration + { + # Create data directories + systemd.tmpfiles.rules = + [ + "d ${cfg.dataDir} 0755 influxdb influxdb -" + ] + ++ optionals (cfg.version == "1") [ + "d ${cfg.dataDir}/meta 0755 influxdb influxdb -" + "d ${cfg.dataDir}/data 0755 influxdb influxdb -" + "d ${cfg.dataDir}/wal 0755 influxdb influxdb -" + ]; + + # Ensure influxdb user exists + users.users.influxdb = { + isSystemUser = true; + group = "influxdb"; + home = cfg.dataDir; + createHome = true; + }; + + users.groups.influxdb = {}; + } + + # InfluxDB 2.x configuration + (mkIf (cfg.version == "2") { + services.influxdb2 = { + enable = true; + dataDir = cfg.dataDir; + settings = influxdbConfig; + }; + + # Initial setup for InfluxDB 2.x + systemd.services.influxdb2-setup = { + description = "InfluxDB 2.x initial setup"; + after = ["influxdb2.service"]; + wants = ["influxdb2.service"]; + wantedBy = ["multi-user.target"]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + User = "influxdb"; + Group = "influxdb"; + }; + script = let + setupScript = pkgs.writeShellScript "influxdb2-setup" '' + # Wait for InfluxDB to be ready + timeout=60 + while [ $timeout -gt 0 ]; do + if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/health > /dev/null 2>&1; then + break + fi + sleep 1 + timeout=$((timeout - 1)) + done + + # Check if setup is already done + if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/api/v2/setup > /dev/null 2>&1; then + # Setup InfluxDB if not already done + ${pkgs.influxdb2}/bin/influx setup \ + --host http://localhost:${toString cfg.port} \ + --org "${cfg.v2.org}" \ + --bucket "${cfg.v2.bucket}" \ + --username "${cfg.v2.username}" \ + --password "${cfg.v2.password}" \ + --retention "${cfg.v2.retention}" \ + --force + fi + ''; + in "${setupScript}"; + }; + }) + + # InfluxDB 1.x configuration + (mkIf (cfg.version == "1") { + services.influxdb = { + enable = true; + dataDir = cfg.dataDir; + extraConfig = influxdbConfig; + }; + + # Initial setup for InfluxDB 1.x + systemd.services.influxdb-setup = mkIf cfg.v1.httpAuth.enable { + description = "InfluxDB 1.x initial setup"; + after = ["influxdb.service"]; + wants = ["influxdb.service"]; + wantedBy = ["multi-user.target"]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + User = "influxdb"; + Group = "influxdb"; + }; + script = let + setupScript = pkgs.writeShellScript "influxdb-setup" '' + # Wait for InfluxDB to be ready + timeout=60 + while [ $timeout -gt 0 ]; do + if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/ping > /dev/null 2>&1; then + break + fi + sleep 1 + timeout=$((timeout - 1)) + done + + # Create admin user + ${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -execute "CREATE USER \"${cfg.v1.adminUser}\" WITH PASSWORD '${cfg.v1.adminPassword}' WITH ALL PRIVILEGES" || true + + # Create database + ${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -username "${cfg.v1.adminUser}" -password "${cfg.v1.adminPassword}" -execute "CREATE DATABASE \"${cfg.v1.database}\"" || true + + # Set retention policy + ${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -username "${cfg.v1.adminUser}" -password "${cfg.v1.adminPassword}" -database "${cfg.v1.database}" -execute "CREATE RETENTION POLICY \"default\" ON \"${cfg.v1.database}\" DURATION ${cfg.v1.retention} REPLICATION 1 DEFAULT" || true + ''; + in "${setupScript}"; + }; + }) + + # Backup configuration + (mkIf cfg.backup.enable { + systemd.services.influxdb-backup = { + description = "InfluxDB backup"; + serviceConfig = { + Type = "oneshot"; + User = "influxdb"; + Group = "influxdb"; + }; + script = let + backupScript = + if cfg.version == "2" + then + pkgs.writeShellScript "influxdb2-backup" '' + backup_dir="${cfg.dataDir}/backups/$(date +%Y%m%d_%H%M%S)" + mkdir -p "$backup_dir" + ${pkgs.influxdb2}/bin/influx backup \ + --host http://localhost:${toString cfg.port} \ + --org "${cfg.v2.org}" \ + "$backup_dir" + + # Clean old backups + find "${cfg.dataDir}/backups" -type d -mtime +${cfg.backup.retention} -exec rm -rf {} + || true + '' + else + pkgs.writeShellScript "influxdb-backup" '' + backup_dir="${cfg.dataDir}/backups/$(date +%Y%m%d_%H%M%S)" + mkdir -p "$backup_dir" + ${pkgs.influxdb}/bin/influxd backup \ + -host localhost:${toString cfg.port} \ + -database "${cfg.v1.database}" \ + "$backup_dir" + + # Clean old backups + find "${cfg.dataDir}/backups" -type d -mtime +${cfg.backup.retention} -exec rm -rf {} + || true + ''; + in "${backupScript}"; + }; + + systemd.timers.influxdb-backup = { + description = "InfluxDB backup timer"; + wantedBy = ["timers.target"]; + timerConfig = { + OnCalendar = cfg.backup.schedule; + Persistent = true; + RandomizedDelaySec = "5m"; + }; + }; + + # Create backup directory + systemd.tmpfiles.rules = [ + "d ${cfg.dataDir}/backups 0755 influxdb influxdb -" + ]; + }) + ]; + }; +} diff --git a/modules/homelab/services/monitoring/loki.nix b/modules/homelab/services/monitoring/loki.nix index e69de29..4467b2a 100644 --- a/modules/homelab/services/monitoring/loki.nix +++ b/modules/homelab/services/monitoring/loki.nix @@ -0,0 +1,356 @@ +{ + config, + lib, + pkgs, + ... +}: +with lib; let + serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; + + cfg = config.homelab.services.loki; + homelabCfg = config.homelab; + + # Service-specific options beyond the standard interface + lokiServiceOptions = { + # Storage configuration + storage = { + type = mkOption { + type = types.enum ["filesystem" "s3" "gcs"]; + default = "filesystem"; + description = "Storage backend type"; + }; + + filesystem = { + directory = mkOption { + type = types.str; + default = "/var/lib/loki"; + description = "Directory for filesystem storage"; + }; + }; + + s3 = { + endpoint = mkOption { + type = types.nullOr types.str; + default = null; + description = "S3 endpoint URL"; + }; + + bucket = mkOption { + type = types.nullOr types.str; + default = null; + description = "S3 bucket name"; + }; + + region = mkOption { + type = types.nullOr types.str; + default = null; + description = "S3 region"; + }; + + accessKeyId = mkOption { + type = types.nullOr types.str; + default = null; + description = "S3 access key ID"; + }; + + secretAccessKey = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to file containing S3 secret access key"; + }; + }; + }; + + # Retention configuration + retention = { + period = mkOption { + type = types.str; + default = "168h"; # 7 days + description = "Log retention period"; + }; + + streamRetention = mkOption { + type = types.listOf (types.submodule { + options = { + selector = mkOption { + type = types.str; + description = "Log stream selector"; + example = "{environment=\"development\"}"; + }; + priority = mkOption { + type = types.int; + description = "Rule priority (higher = more important)"; + default = 1; + }; + period = mkOption { + type = types.str; + description = "Retention period for this stream"; + example = "24h"; + }; + }; + }); + default = []; + description = "Per-stream retention rules"; + }; + }; + + # Performance tuning + limits = { + rejectOldSamples = mkOption { + type = types.bool; + default = true; + description = "Reject samples older than max age"; + }; + + rejectOldSamplesMaxAge = mkOption { + type = types.str; + default = "168h"; + description = "Maximum age for samples"; + }; + + ingestionRateMB = mkOption { + type = types.int; + default = 4; + description = "Ingestion rate limit in MB/s per tenant"; + }; + + ingestionBurstSizeMB = mkOption { + type = types.int; + default = 6; + description = "Ingestion burst size in MB per tenant"; + }; + + maxStreamsPerUser = mkOption { + type = types.int; + default = 10000; + description = "Maximum number of streams per user"; + }; + + maxLineSize = mkOption { + type = types.str; + default = "256KB"; + description = "Maximum line size"; + }; + }; + + # Authentication + auth = { + enabled = mkOption { + type = types.bool; + default = false; + description = "Enable authentication"; + }; + }; + + # Extra configuration options + extraConfig = mkOption { + type = types.attrs; + default = {}; + description = "Additional Loki configuration options"; + }; + + # Data directory + dataDir = mkOption { + type = types.str; + default = "/var/lib/loki"; + description = "Directory to store Loki data"; + }; + }; + + # Build the Loki configuration + lokiConfig = + recursiveUpdate { + # Server configuration + server = { + http_listen_port = cfg.port; + grpc_listen_port = cfg.port + 1000; # e.g., 3100 -> 4100 + http_listen_address = "0.0.0.0"; + grpc_listen_address = "0.0.0.0"; + log_level = cfg.monitoring.extraLabels.log_level or "info"; + }; + + # Authentication + auth_enabled = cfg.auth.enabled; + + # Analytics + analytics.reporting_enabled = false; + + # Common configuration for single-binary mode + common = { + ring = { + instance_addr = "127.0.0.1"; + kvstore.store = "inmemory"; + }; + replication_factor = 1; + path_prefix = cfg.dataDir; + }; + + # Schema configuration + schema_config = { + configs = [ + { + from = "2020-05-15"; + store = "tsdb"; + object_store = cfg.storage.type; + schema = "v13"; + index = { + prefix = "index_"; + period = "24h"; + }; + } + ]; + }; + + # Storage configuration + storage_config = mkMerge [ + # Filesystem storage + (mkIf (cfg.storage.type == "filesystem") { + filesystem.directory = "${cfg.storage.filesystem.directory}/chunks"; + }) + + # S3 storage + (mkIf (cfg.storage.type == "s3") { + aws = + { + s3 = cfg.storage.s3.endpoint; + bucketnames = cfg.storage.s3.bucket; + region = cfg.storage.s3.region; + access_key_id = cfg.storage.s3.accessKeyId; + } + // (optionalAttrs (cfg.storage.s3.secretAccessKey != null) { + secret_access_key = "$__file{${cfg.storage.s3.secretAccessKey}}"; + }); + }) + ]; + + # Limits configuration + limits_config = + { + reject_old_samples = cfg.limits.rejectOldSamples; + reject_old_samples_max_age = cfg.limits.rejectOldSamplesMaxAge; + ingestion_rate_mb = cfg.limits.ingestionRateMB; + ingestion_burst_size_mb = cfg.limits.ingestionBurstSizeMB; + max_streams_per_user = cfg.limits.maxStreamsPerUser; + max_line_size = cfg.limits.maxLineSize; + + # Retention configuration + retention_period = cfg.retention.period; + } + // (optionalAttrs (cfg.retention.streamRetention != []) { + retention_stream = + map (rule: { + selector = rule.selector; + priority = rule.priority; + period = rule.period; + }) + cfg.retention.streamRetention; + }); + + # Table manager for retention + table_manager = { + retention_deletes_enabled = true; + retention_period = cfg.retention.period; + }; + + # Compactor configuration + compactor = { + working_directory = "${cfg.dataDir}/compactor"; + # shared_store = cfg.storage.type; + compaction_interval = "10m"; + # retention_enabled = true; + # retention_delete_delay = "2h"; + # retention_delete_worker_count = 150; + }; + + # Query range configuration + query_range = { + results_cache = { + cache = { + embedded_cache = { + enabled = true; + max_size_mb = 100; + }; + }; + }; + }; + + # Frontend configuration + frontend = { + max_outstanding_per_tenant = 256; + compress_responses = true; + }; + + # Query scheduler + query_scheduler = { + max_outstanding_requests_per_tenant = 256; + }; + + # Runtime configuration + runtime_config = { + file = "/etc/loki/runtime.yml"; + }; + } + cfg.extraConfig; +in { + options.homelab.services.loki = serviceInterface.mkServiceInterface { + serviceName = "loki"; + defaultPort = 3100; + defaultSubdomain = "loki"; + monitoringPath = "/metrics"; + healthCheckPath = "/ready"; + healthCheckConditions = [ + "[STATUS] == 200" + "[RESPONSE_TIME] < 2000" + ]; + serviceOptions = lokiServiceOptions; + }; + + config = serviceInterface.mkServiceConfig { + inherit config cfg homelabCfg; + serviceName = "loki"; + + extraMonitoringLabels = { + component = "log-aggregation"; + log_level = "info"; + }; + + customHealthChecks = [ + { + name = "loki-health"; + port = cfg.port; + # https://grafana.com/docs/loki/latest/reference/loki-http-api/#status-endpoints + path = "/loki/api/v1/status/buildinfo"; + interval = "30s"; + conditions = ["[STATUS] == 200"]; + group = "logging"; + labels = { + service = "loki"; + component = "api"; + }; + } + ]; + + serviceConfig = mkMerge [ + { + services.loki = { + enable = true; + dataDir = cfg.dataDir; + configuration = lokiConfig; + }; + + # Ensure data directories exist + systemd.tmpfiles.rules = [ + "d ${cfg.dataDir} 0755 loki loki -" + "d ${cfg.dataDir}/chunks 0755 loki loki -" + "d ${cfg.dataDir}/compactor 0755 loki loki -" + ]; + + # Runtime configuration file for dynamic updates + environment.etc."loki/runtime.yml".text = '' + # Runtime configuration for Loki + # This file can be updated without restarting Loki + ''; + } + ]; + }; +} diff --git a/modules/homelab/services/monitoring/prometheus.nix b/modules/homelab/services/monitoring/prometheus.nix index 76c30ff..b4ac904 100644 --- a/modules/homelab/services/monitoring/prometheus.nix +++ b/modules/homelab/services/monitoring/prometheus.nix @@ -19,12 +19,13 @@ with lib; let mapAttrsToList (jobName: endpoints: { job_name = jobName; scrape_interval = head endpoints.scrapeInterval or ["30s"]; - static_configs = [ - { - targets = map (endpoint: "${endpoint.host}:${toString endpoint.port}") endpoints; - labels = fold (endpoint: acc: acc // endpoint.labels) {} endpoints; - } - ]; + static_configs = + map + (endpoint: { + targets = ["${endpoint.host}:${toString endpoint.port}"]; + labels = endpoint.labels; + }) + endpoints; metrics_path = head endpoints.path or [null]; }) jobGroups; diff --git a/modules/homelab/services/monitoring/promtail.nix b/modules/homelab/services/monitoring/promtail.nix deleted file mode 100644 index e69de29..0000000 diff --git a/modules/homelab/services/postgres.nix b/modules/homelab/services/postgres.nix deleted file mode 100644 index e69de29..0000000 diff --git a/modules/homelab/services/prometheus.nix b/modules/homelab/services/prometheus.nix new file mode 100644 index 0000000..7457568 --- /dev/null +++ b/modules/homelab/services/prometheus.nix @@ -0,0 +1,252 @@ +{ + config, + lib, + pkgs, + ... +}: +with lib; let + serviceName = "prometheus"; + cfg = config.homelab.services.${serviceName}; + homelabCfg = config.homelab; + + # Generate Prometheus scrape configs from global monitoring data + prometheusScrapeConfigs = let + # Get all metrics - try global first, fallback to local + allMetrics = homelabCfg.monitoring.global.allMetrics + or homelabCfg.monitoring.allMetrics + or []; + + jobGroups = groupBy (m: m.jobName) allMetrics; + + scrapeConfigs = + mapAttrsToList (jobName: endpoints: { + job_name = jobName; + scrape_interval = head endpoints.scrapeInterval or ["30s"]; + static_configs = + map + (endpoint: { + targets = ["${endpoint.host}:${toString endpoint.port}"]; + labels = endpoint.labels; + }) + endpoints; + metrics_path = head endpoints.path or ["/metrics"]; + }) + jobGroups; + in + scrapeConfigs; + + # Standard alerting rules for homelab + alertingRules = [ + { + name = "homelab.rules"; + rules = [ + { + alert = "InstanceDown"; + expr = "up == 0"; + for = "5m"; + labels = {severity = "critical";}; + annotations = { + summary = "Instance {{ $labels.instance }} down"; + description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."; + }; + } + { + alert = "HighCPUUsage"; + expr = "100 - (avg by(instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100) > 80"; + for = "10m"; + labels = {severity = "warning";}; + annotations = { + summary = "High CPU usage on {{ $labels.instance }}"; + description = "CPU usage is above 80% for more than 10 minutes on {{ $labels.instance }}."; + }; + } + { + alert = "HighMemoryUsage"; + expr = "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85"; + for = "10m"; + labels = {severity = "warning";}; + annotations = { + summary = "High memory usage on {{ $labels.instance }}"; + description = "Memory usage is above 85% for more than 10 minutes on {{ $labels.instance }}."; + }; + } + { + alert = "DiskSpaceLow"; + expr = "((node_filesystem_size_bytes - node_filesystem_avail_bytes) / node_filesystem_size_bytes) * 100 > 90"; + for = "5m"; + labels = {severity = "critical";}; + annotations = { + summary = "Disk space low on {{ $labels.instance }}"; + description = "Disk usage is above 90% on {{ $labels.instance }} {{ $labels.mountpoint }}."; + }; + } + ]; + } + ]; +in { + imports = [ + (import ../lib/features/monitoring.nix serviceName) + (import ../lib/features/logging.nix serviceName) + (import ../lib/features/proxy.nix serviceName) + ]; + + # Core service options + options.homelab.services.${serviceName} = { + enable = mkEnableOption "Prometheus Monitoring Server"; + + port = mkOption { + type = types.port; + default = 9090; + }; + + description = mkOption { + type = types.str; + default = "Prometheus Monitoring Server"; + }; + + # Prometheus-specific options + retention = mkOption { + type = types.str; + default = "15d"; + description = "How long to retain metrics data"; + }; + + alertmanager = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable integration with Alertmanager"; + }; + + url = mkOption { + type = types.str; + default = "alertmanager.${homelabCfg.domain}:9093"; + description = "Alertmanager URL"; + }; + }; + + extraScrapeConfigs = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Additional scrape configurations"; + }; + + extraAlertingRules = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Additional alerting rules"; + }; + + globalConfig = mkOption { + type = types.attrs; + default = { + scrape_interval = "15s"; + evaluation_interval = "15s"; + }; + description = "Global Prometheus configuration"; + }; + + extraFlags = mkOption { + type = types.listOf types.str; + default = []; + description = "Extra command line flags"; + }; + + ruleFiles = mkOption { + type = types.listOf types.path; + default = []; + description = "Additional rule files to load"; + }; + }; + + # Service configuration with smart defaults + config = mkIf cfg.enable (mkMerge [ + # Core Prometheus service + { + services.prometheus = { + enable = true; + port = cfg.port; + listenAddress = "0.0.0.0"; + retentionTime = cfg.retention; + + globalConfig = cfg.globalConfig; + extraFlags = cfg.extraFlags; + + # Automatically aggregate all metrics from the fleet + scrapeConfigs = prometheusScrapeConfigs ++ cfg.extraScrapeConfigs; + + # Include standard + custom alerting rules + ruleFiles = + map (ruleGroup: + pkgs.writeText "${ruleGroup.name}.yml" (builtins.toJSON { + groups = [ruleGroup]; + })) (alertingRules ++ cfg.extraAlertingRules) + ++ cfg.ruleFiles; + + # Connect to Alertmanager if enabled + alertmanagers = mkIf cfg.alertmanager.enable [ + { + static_configs = [ + { + targets = [cfg.alertmanager.url]; + } + ]; + } + ]; + }; + + networking.firewall.allowedTCPPorts = [cfg.port]; + + homelab.services.${serviceName}.monitoring.enable = mkDefault true; + } + + # Smart defaults for Prometheus + (mkIf cfg.monitoring.enable { + homelab.services.${serviceName}.monitoring = mkDefault { + metrics = { + path = "/metrics"; + extraEndpoints = []; + }; + healthCheck = { + path = "/-/healthy"; + conditions = ["[STATUS] == 200" "[RESPONSE_TIME] < 1000"]; + extraChecks = [ + { + name = "prometheus-ready"; + port = cfg.port; + path = "/-/ready"; + conditions = ["[STATUS] == 200"]; + group = "monitoring"; + } + ]; + }; + extraLabels = { + component = "monitoring-server"; + tier = "monitoring"; + }; + }; + }) + + (mkIf cfg.logging.enable { + homelab.services.${serviceName}.logging = mkDefault { + files = ["/var/log/prometheus/prometheus.log"]; + parsing = { + # Prometheus log format: ts=2024-01-01T12:00:00.000Z caller=main.go:123 level=info msg="message" + regex = "^ts=(?P[^ ]+) caller=(?P[^ ]+) level=(?P\\w+) msg=\"(?P[^\"]*)\""; + extractFields = ["level" "caller"]; + }; + extraLabels = { + component = "monitoring-server"; + application = "prometheus"; + }; + }; + }) + + (mkIf cfg.proxy.enable { + homelab.services.${serviceName}.proxy = mkDefault { + subdomain = "prometheus"; + enableAuth = true; # Admin interface needs protection + }; + }) + ]); +} diff --git a/modules/homelab/services/prometheus_old.nix b/modules/homelab/services/prometheus_old.nix deleted file mode 100644 index 9485b3a..0000000 --- a/modules/homelab/services/prometheus_old.nix +++ /dev/null @@ -1,208 +0,0 @@ -# modules/services/prometheus.nix -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.homelab.services.prometheus; - globalCfg = config.homelab.global; -in { - options.homelab.services.prometheus = { - enable = mkEnableOption "Prometheus monitoring server"; - - port = mkOption { - type = types.port; - default = 9090; - description = "Prometheus server port"; - }; - - webExternalUrl = mkOption { - type = types.str; - default = "http://${globalCfg.hostname}:${toString cfg.port}"; - description = "External URL for Prometheus"; - }; - - retention = mkOption { - type = types.str; - default = "30d"; - description = "Data retention period"; - }; - - scrapeConfigs = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Additional scrape configurations"; - }; - - alertmanager = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable Alertmanager integration"; - }; - - url = mkOption { - type = types.str; - default = "http://localhost:9093"; - description = "Alertmanager URL"; - }; - }; - }; - - config = mkIf cfg.enable { - # Register service with global homelab config - homelab.global.services.prometheus = { - enable = true; - description = "Metrics collection and monitoring server"; - category = "monitoring"; - ports = [cfg.port]; - tags = ["metrics" "monitoring" "alerting"]; - priority = 20; - dependencies = ["node-exporter"]; - }; - - # Configure the actual Prometheus service - services.prometheus = { - enable = true; - port = cfg.port; - webExternalUrl = cfg.webExternalUrl; - - retentionTime = cfg.retention; - - scrapeConfigs = - [ - # Auto-discover monitoring endpoints from global config - { - job_name = "homelab-auto"; - static_configs = [ - { - targets = - map ( - endpoint: "${globalCfg.hostname}:${toString endpoint.port}" - ) - globalCfg.monitoring.endpoints; - } - ]; - scrape_interval = "30s"; - metrics_path = "/metrics"; - } - ] - ++ cfg.scrapeConfigs; - - # Alertmanager configuration - alertmanagers = mkIf cfg.alertmanager.enable [ - { - static_configs = [ - { - targets = [cfg.alertmanager.url]; - } - ]; - } - ]; - - rules = [ - # Basic homelab alerting rules - (pkgs.writeText "homelab-alerts.yml" '' - groups: - - name: homelab - rules: - - alert: ServiceDown - expr: up == 0 - for: 5m - labels: - severity: critical - annotations: - summary: "Service {{ $labels.instance }} is down" - description: "{{ $labels.job }} on {{ $labels.instance }} has been down for more than 5 minutes." - - - alert: HighMemoryUsage - expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9 - for: 10m - labels: - severity: warning - annotations: - summary: "High memory usage on {{ $labels.instance }}" - description: "Memory usage is above 90% on {{ $labels.instance }}" - - - alert: HighDiskUsage - expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.85 - for: 5m - labels: - severity: warning - annotations: - summary: "High disk usage on {{ $labels.instance }}" - description: "Disk usage is above 85% on {{ $labels.instance }} for filesystem {{ $labels.mountpoint }}" - '') - ]; - }; - - # Add monitoring endpoint to global config - homelab.global.monitoring.endpoints = [ - { - name = "prometheus"; - port = cfg.port; - path = "/metrics"; - jobName = "prometheus"; - scrapeInterval = "30s"; - labels = { - service = "prometheus"; - role = "monitoring"; - }; - } - ]; - - # Add reverse proxy entry if configured - homelab.global.reverseProxy.entries = mkIf (globalCfg.domain != null) [ - { - subdomain = "prometheus"; - port = cfg.port; - path = "/"; - enableAuth = true; - enableSSL = true; - customHeaders = { - "X-Frame-Options" = "DENY"; - "X-Content-Type-Options" = "nosniff"; - }; - } - ]; - - # Add backup job for Prometheus data - homelab.global.backups.jobs = [ - { - name = "prometheus-data"; - backend = "restic"; - paths = ["/var/lib/prometheus2"]; - schedule = "daily"; - retention = { - daily = "7"; - weekly = "4"; - monthly = "3"; - yearly = "1"; - }; - excludePatterns = [ - "*.tmp" - "*/wal/*" - ]; - preHook = '' - # Stop prometheus temporarily for consistent backup - systemctl stop prometheus - ''; - postHook = '' - # Restart prometheus after backup - systemctl start prometheus - ''; - } - ]; - - # Open firewall port - networking.firewall.allowedTCPPorts = [cfg.port]; - - # Create prometheus configuration directory - systemd.tmpfiles.rules = [ - "d /var/lib/prometheus2 0755 prometheus prometheus -" - "d /etc/prometheus 0755 root root -" - ]; - }; -}