diff --git a/flake.lock b/flake.lock index 0fa9b9e..ed23ee6 100644 --- a/flake.lock +++ b/flake.lock @@ -25,11 +25,11 @@ "stable": "stable" }, "locked": { - "lastModified": 1753701727, - "narHash": "sha256-tgiPAFXoSGIm3wUAuKwjk2fgTgZ0rpT90RNfhU5QKJA=", + "lastModified": 1752287590, + "narHash": "sha256-U1IqFnxlgCRrPaeT5IGCdH0j9CNLPFcI/fRAidi0aDQ=", "owner": "zhaofengli", "repo": "colmena", - "rev": "342054695f53c4a27c8dce0a8c9f35ade6d963d6", + "rev": "d2beb694d54db653399b8597c0f6e15e20b26405", "type": "github" }, "original": { @@ -188,11 +188,11 @@ }, "nixpkgs-unstable": { "locked": { - "lastModified": 1753694789, - "narHash": "sha256-cKgvtz6fKuK1Xr5LQW/zOUiAC0oSQoA9nOISB0pJZqM=", + "lastModified": 1753429684, + "narHash": "sha256-9h7+4/53cSfQ/uA3pSvCaBepmZaz/dLlLVJnbQ+SJjk=", "owner": "nixos", "repo": "nixpkgs", - "rev": "dc9637876d0dcc8c9e5e22986b857632effeb727", + "rev": "7fd36ee82c0275fb545775cc5e4d30542899511d", "type": "github" }, "original": { @@ -204,11 +204,11 @@ }, "nixpkgs_2": { "locked": { - "lastModified": 1753766937, - "narHash": "sha256-L4kx7TQsmD7GnNaGGXlLhqZVPwilP7QIwDPw88Wzzrs=", + "lastModified": 1753679156, + "narHash": "sha256-CiYhgWDUG6TF1gHo7hf309KnMNzlU5Y8m6pU/4PPFMI=", "owner": "nixos", "repo": "nixpkgs", - "rev": "23296a5649db3ae810cdc41ecd5055d131a82b11", + "rev": "1e95fd75ac8ec3a9ce1f9cb45e8a8e849ad32aba", "type": "github" }, "original": { diff --git a/hosts/monitor/default.nix b/hosts/monitor/default.nix deleted file mode 100644 index bd72a58..0000000 --- a/hosts/monitor/default.nix +++ /dev/null @@ -1,41 +0,0 @@ -{ - config, - name, - ... -}: { - sops.secrets."restic/default-password" = {}; - - homelab = { - enable = true; - hostname = name; - tags = [name]; - - monitoring.enable = true; - motd.enable = true; - - backups = { - enable = true; - backends = { - restic = { - enable = true; - repository = "/srv/restic-repo"; - passwordFile = config.sops.secrets."restic/default-password".path; - }; - }; - }; - - services.prometheus = { - enable = true; - }; - - services.gatus = { - enable = true; - ui = { - title = "Homelab Status Dashboard"; - header = "My Homelab Services"; - }; - }; - }; - - system.stateVersion = "25.05"; -} diff --git a/hosts/sandbox/default.nix b/hosts/sandbox/default.nix index ebf4475..2782e30 100644 --- a/hosts/sandbox/default.nix +++ b/hosts/sandbox/default.nix @@ -11,7 +11,6 @@ tags = [name]; monitoring.enable = true; - logging.enable = true; motd.enable = true; backups = { @@ -41,10 +40,17 @@ ]; }; - # services.loki.enable = true; - services.prometheus.enable = true; - services.grafana.enable = true; - services.gatus.enable = true; + services.prometheus = { + enable = true; + }; + + services.gatus = { + enable = true; + ui = { + title = "Homelab Status Dashboard"; + header = "My Homelab Services"; + }; + }; }; system.stateVersion = "25.05"; diff --git a/modules/homelab/backup-config.nix b/modules/homelab/backup-config.nix new file mode 100644 index 0000000..e26dcb2 --- /dev/null +++ b/modules/homelab/backup-config.nix @@ -0,0 +1,116 @@ +{ + config, + lib, + ... +}: +with lib; let + cfg = config.homelab.backups; + homelabCfg = config.homelab; + + # Get all defined backend names dynamically + backendNames = attrNames cfg.backends or {}; + + backupJobType = types.submodule { + options = { + name = mkOption { + type = types.str; + description = "Name of the backup job"; + }; + backend = mkOption { + type = types.enum backendNames; + description = "Backend to use for this backup job"; + }; + backendOptions = mkOption { + type = types.attrs; + default = {}; + description = "Backend-specific options to override or extend the backend configuration"; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + description = "Additional labels for this backup job"; + }; + }; + }; +in { + imports = [ + ./backup/restic.nix + # ./backup/borgbackup.nix + ]; + + options.homelab.backups = { + enable = mkEnableOption "Homelab backup system"; + + jobs = mkOption { + type = types.listOf backupJobType; + default = []; + description = "Backup jobs to execute on this system"; + }; + + defaultLabels = mkOption { + type = types.attrsOf types.str; + default = { + hostname = homelabCfg.hostname; + environment = homelabCfg.environment; + location = homelabCfg.location; + }; + description = "Default labels applied to all backup jobs"; + }; + + monitoring = mkOption { + type = types.bool; + default = true; + description = "Enable backup monitoring and metrics"; + }; + }; + + config = mkIf cfg.enable { + # Validate that all job backends exist + assertions = [ + { + assertion = all (job: cfg.backends.${job.backend} != null) cfg.jobs; + message = "All backup jobs must reference backends that are defined and not null in homelab.backups.backends"; + } + ]; + + # Add backup jobs to monitoring endpoints if monitoring is enabled + # homelab.monitoring.endpoints = + # mkIf (cfg.monitoring && config.homelab.monitoring.enable) + # (map (job: { + # name = "backup-${job.name}"; + # port = 9100; # Assuming node exporter collects backup metrics + # path = "/metrics"; + # jobName = "backup"; + # labels = + # cfg.defaultLabels + # // job.labels + # // { + # backup_job = job.name; + # backup_backend = job.backend; + # }; + # }) + # cfg.jobs); + + # Export backup configuration for external consumption + environment.etc."homelab/backup-config.json".text = builtins.toJSON { + backends = + mapAttrs (name: config: { + inherit name; + enabled = config.enable or false; + }) + cfg.backends; + + jobs = + map (job: { + inherit (job) name backend labels; + allLabels = cfg.defaultLabels // job.labels; + paths = job.backendOptions.paths or []; + schedule = job.backendOptions.timerConfig.OnCalendar or job.backendOptions.startAt or "unknown"; + node = homelabCfg.hostname; + environment = homelabCfg.environment; + location = homelabCfg.location; + }) + cfg.jobs; + }; + }; +} diff --git a/modules/homelab/default.nix b/modules/homelab/default.nix index ce19d59..e56aae9 100644 --- a/modules/homelab/default.nix +++ b/modules/homelab/default.nix @@ -1,7 +1,6 @@ { config, lib, - nodes, ... }: with lib; let @@ -10,13 +9,18 @@ with lib; let nodeAgg = import ./lib/node-aggregation.nix {inherit lib;}; in { imports = [ - ./lib/systems/monitoring.nix - ./lib/systems/logging.nix - ./lib/systems/proxy.nix - ./lib/systems/backups.nix + ./monitoring-config.nix + ./proxy-config.nix + ./backup-config.nix + ./motd ./services - ./motd + + # Global aggregation modules + (nodeAgg.mkGlobalModule "monitoring" nodeAgg.aggregators.monitoring) + # (nodeAgg.mkGlobalModule "logs" nodeAgg.aggregators.logs) + (nodeAgg.mkGlobalModule "reverseProxy" nodeAgg.aggregators.reverseProxy) + (nodeAgg.mkGlobalModule "backups" nodeAgg.aggregators.backups) ]; options.homelab = { @@ -57,73 +61,73 @@ in { networking.hostName = cfg.hostname; # Export configuration for external consumption - # environment.etc."homelab/config.json".text = builtins.toJSON { - # inherit (cfg) hostname domain environment location tags; + environment.etc."homelab/config.json".text = builtins.toJSON { + inherit (cfg) hostname domain environment location tags; - # monitoring = { - # # Metrics endpoints (Prometheus, etc.) - # metrics = - # map (endpoint: { - # inherit (endpoint) name host port path jobName scrapeInterval labels; - # url = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}"; - # }) - # cfg.global.monitoring.allMetrics or []; + monitoring = { + # Metrics endpoints (Prometheus, etc.) + metrics = + map (endpoint: { + inherit (endpoint) name host port path jobName scrapeInterval labels; + url = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}"; + }) + cfg.global.monitoring.allMetrics or []; - # # Health check endpoints - # healthChecks = - # map (check: let - # # Determine the host based on useExternalDomain - # actualHost = - # if check.useExternalDomain - # then "${check.subdomain}.${cfg.externalDomain}" - # else check.host; + # Health check endpoints + healthChecks = + map (check: let + # Determine the host based on useExternalDomain + actualHost = + if check.useExternalDomain + then "${check.subdomain}.${cfg.externalDomain}" + else check.host; - # # Build the URL - # portPart = - # if check.port != null - # then ":${toString check.port}" - # else ""; - # url = "${check.protocol}://${actualHost}${portPart}${check.path}"; - # in { - # inherit (check) name protocol method interval timeout conditions alerts group labels enabled; - # host = actualHost; - # port = check.port; - # path = check.path; - # url = url; - # useExternalDomain = check.useExternalDomain; - # subdomain = check.subdomain; - # sourceNode = cfg.hostname; - # }) - # cfg.global.monitoring.allHealthChecks or []; - # }; + # Build the URL + portPart = + if check.port != null + then ":${toString check.port}" + else ""; + url = "${check.protocol}://${actualHost}${portPart}${check.path}"; + in { + inherit (check) name protocol method interval timeout conditions alerts group labels enabled; + host = actualHost; + port = check.port; + path = check.path; + url = url; + useExternalDomain = check.useExternalDomain; + subdomain = check.subdomain; + sourceNode = cfg.hostname; + }) + cfg.global.monitoring.allHealthChecks or []; + }; - # reverseProxy = { - # entries = - # map (entry: { - # inherit (entry) subdomain host port path enableAuth enableSSL; - # internalHost = "${cfg.hostname}:${toString entry.port}${entry.path}"; - # externalHost = "${entry.subdomain}.${cfg.externalDomain}"; - # }) - # cfg.global.reverseProxy.all; - # }; + reverseProxy = { + entries = + map (entry: { + inherit (entry) subdomain host port path enableAuth enableSSL; + internalHost = "${cfg.hostname}:${toString entry.port}${entry.path}"; + externalHost = "${entry.subdomain}.${cfg.externalDomain}"; + }) + cfg.global.reverseProxy.all; + }; - # backups = { - # jobs = - # map (job: { - # inherit (job) name backend labels; - # backupId = job._backupId; - # sourceNode = job._sourceNode; - # }) - # cfg.global.backups.all; + backups = { + jobs = + map (job: { + inherit (job) name backend labels; + backupId = job._backupId; + sourceNode = job._sourceNode; + }) + cfg.global.backups.all; - # backends = cfg.global.backups.allBackends; + backends = cfg.global.backups.allBackends; - # summary = { - # totalJobs = length cfg.global.backups.all; - # jobsByBackend = mapAttrs (backend: jobs: length jobs) cfg.global.backups.byBackend; - # jobsByNode = mapAttrs (node: jobs: length jobs) cfg.global.backups.byNode; - # }; - # }; - # }; + summary = { + totalJobs = length cfg.global.backups.all; + jobsByBackend = mapAttrs (backend: jobs: length jobs) cfg.global.backups.byBackend; + jobsByNode = mapAttrs (node: jobs: length jobs) cfg.global.backups.byNode; + }; + }; + }; }; } diff --git a/modules/homelab/lib/aggregators/base.nix b/modules/homelab/lib/aggregators/base.nix deleted file mode 100644 index e32228c..0000000 --- a/modules/homelab/lib/aggregators/base.nix +++ /dev/null @@ -1,55 +0,0 @@ -{lib}: let - inherit (lib) flatten mapAttrs attrValues filterAttrs mapAttrsToList filter groupBy length unique attrByPath splitString; - - # Generic function to aggregate any attribute across nodes - aggregateFromNodes = { - nodes, - attributePath, # e.g. "homelab.monitoring.metrics" or "homelab.backups.jobs" - enhancer ? null, # optional function to enhance each item with node context - }: let - # Extract the attribute from each node using the path - getNestedAttr = path: config: let - pathList = splitString "." path; - in - attrByPath pathList [] config; - - # Get all items from all nodes - allItems = flatten (mapAttrsToList - (nodeName: nodeConfig: let - items = getNestedAttr attributePath nodeConfig.config; - baseEnhancer = item: - item - // { - _nodeName = nodeName; - _nodeConfig = nodeConfig; - _nodeAddress = nodeConfig.config.networking.hostName or nodeName; - }; - finalEnhancer = - if enhancer != null - then (item: enhancer (baseEnhancer item)) - else baseEnhancer; - in - map finalEnhancer items) - nodes); - in { - # Raw aggregated data - all = allItems; - - # Common grouping patterns - byNode = groupBy (item: item._nodeName) allItems; - byType = groupBy (item: item.type or "unknown") allItems; - byService = groupBy (item: item.service or "unknown") allItems; - - # Utility functions for filtering - filterBy = predicate: filter predicate allItems; - ofType = type: filter (item: (item.type or "") == type) allItems; - ofNode = nodeName: filter (item: item._nodeName == nodeName) allItems; - enabled = filter (item: item.enabled or true) allItems; - - # Counting utilities - count = length allItems; - countBy = fn: mapAttrs (key: items: length items) (groupBy fn allItems); - }; -in { - inherit aggregateFromNodes; -} diff --git a/modules/homelab/lib/features/logging.nix b/modules/homelab/lib/features/logging.nix deleted file mode 100644 index 010b766..0000000 --- a/modules/homelab/lib/features/logging.nix +++ /dev/null @@ -1,87 +0,0 @@ -serviceName: { - config, - lib, - ... -}: -with lib; let - cfg = config.homelab.services.${serviceName}; - homelabCfg = config.homelab; -in { - options.homelab.services.${serviceName}.logging = { - enable = mkEnableOption "logging for ${serviceName}"; - - files = mkOption { - type = types.listOf types.str; - default = []; - }; - - parsing = { - regex = mkOption { - type = types.nullOr types.str; - default = null; - }; - - extractFields = mkOption { - type = types.listOf types.str; - default = []; - }; - }; - - multiline = mkOption { - type = types.nullOr (types.submodule { - options = { - firstLineRegex = mkOption {type = types.str;}; - maxWaitTime = mkOption { - type = types.str; - default = "3s"; - }; - }; - }); - default = null; - }; - - extraLabels = mkOption { - type = types.attrsOf types.str; - default = {}; - }; - - extraSources = mkOption { - type = types.listOf types.attrs; - default = []; - }; - }; - - config = mkIf (cfg.enable && cfg.logging.enable) { - homelab.logging.sources = - [ - { - name = "${serviceName}-logs"; - type = "file"; - files = { - paths = cfg.logging.files; - multiline = cfg.logging.multiline; - }; - labels = - cfg.logging.extraLabels - // { - service = serviceName; - node = homelabCfg.hostname; - environment = homelabCfg.environment; - }; - pipelineStages = - mkIf (cfg.logging.parsing.regex != null) [ - { - regex.expression = cfg.logging.parsing.regex; - } - ] - ++ [ - { - labels = listToAttrs (map (field: nameValuePair field null) cfg.logging.parsing.extractFields); - } - ]; - enabled = true; - } - ] - ++ cfg.logging.extraSources; - }; -} diff --git a/modules/homelab/lib/features/monitoring.nix b/modules/homelab/lib/features/monitoring.nix deleted file mode 100644 index 90b36f9..0000000 --- a/modules/homelab/lib/features/monitoring.nix +++ /dev/null @@ -1,108 +0,0 @@ -serviceName: { - config, - lib, - ... -}: -with lib; let - cfg = config.homelab.services.${serviceName}; - homelabCfg = config.homelab; -in { - # Define the service-specific monitoring options - options.homelab.services.${serviceName}.monitoring = { - enable = mkEnableOption "monitoring for ${serviceName}"; - - metrics = { - enable = mkOption { - type = types.bool; - default = true; - }; - - path = mkOption { - type = types.str; - default = "/metrics"; - }; - - extraEndpoints = mkOption { - type = types.listOf types.attrs; - default = []; - }; - }; - - healthCheck = { - enable = mkOption { - type = types.bool; - default = true; - }; - - path = mkOption { - type = types.str; - default = "/health"; - }; - - conditions = mkOption { - type = types.listOf types.str; - default = ["[STATUS] == 200"]; - }; - - extraChecks = mkOption { - type = types.listOf types.attrs; - default = []; - }; - }; - - extraLabels = mkOption { - type = types.attrsOf types.str; - default = {}; - }; - }; - - # Generate the homelab config automatically when service is enabled - config = mkIf (cfg.enable && cfg.monitoring.enable) { - homelab.monitoring = { - metrics = - [ - { - name = "${serviceName}-main"; - host = homelabCfg.hostname; - port = cfg.port; - path = cfg.monitoring.metrics.path; - jobName = serviceName; - scrapeInterval = "30s"; - labels = - cfg.monitoring.extraLabels - // { - service = serviceName; - node = homelabCfg.hostname; - environment = homelabCfg.environment; - }; - } - ] - ++ cfg.monitoring.metrics.extraEndpoints; - - healthChecks = - [ - { - name = "${serviceName}-health"; - host = homelabCfg.hostname; - port = cfg.port; - path = cfg.monitoring.healthCheck.path; - protocol = "http"; - method = "GET"; - interval = "30s"; - timeout = "10s"; - conditions = cfg.monitoring.healthCheck.conditions; - group = "services"; - labels = - cfg.monitoring.extraLabels - // { - service = serviceName; - node = homelabCfg.hostname; - environment = homelabCfg.environment; - }; - enabled = true; - } - ] - ++ cfg.monitoring.healthCheck.extraChecks; - }; - }; -} diff --git a/modules/homelab/lib/features/proxy.nix b/modules/homelab/lib/features/proxy.nix deleted file mode 100644 index 2658c7a..0000000 --- a/modules/homelab/lib/features/proxy.nix +++ /dev/null @@ -1,64 +0,0 @@ -serviceName: { - config, - lib, - ... -}: -with lib; let - cfg = config.homelab.services.${serviceName}; - homelabCfg = config.homelab; -in { - options.homelab.services.${serviceName}.proxy = { - enable = mkEnableOption "reverse proxy for ${serviceName}"; - - subdomain = mkOption { - type = types.str; - default = serviceName; - }; - - enableAuth = mkOption { - type = types.bool; - default = false; - }; - - additionalSubdomains = mkOption { - type = types.listOf (types.submodule { - options = { - subdomain = mkOption {type = types.str;}; - port = mkOption {type = types.port;}; - path = mkOption { - type = types.str; - default = "/"; - }; - enableAuth = mkOption { - type = types.bool; - default = false; - }; - }; - }); - default = []; - }; - }; - - config = mkIf (cfg.enable && cfg.proxy.enable) { - homelab.reverseProxy.entries = - [ - { - subdomain = cfg.proxy.subdomain; - host = homelabCfg.hostname; - port = cfg.port; - path = "/"; - enableAuth = cfg.proxy.enableAuth; - enableSSL = true; - } - ] - ++ map (sub: { - subdomain = sub.subdomain; - host = homelabCfg.hostname; - port = sub.port; - path = sub.path; - enableAuth = sub.enableAuth; - enableSSL = true; - }) - cfg.proxy.additionalSubdomains; - }; -} diff --git a/modules/homelab/lib/node-aggregation.nix b/modules/homelab/lib/node-aggregation.nix new file mode 100644 index 0000000..1719012 --- /dev/null +++ b/modules/homelab/lib/node-aggregation.nix @@ -0,0 +1,226 @@ +{lib}: let + inherit (lib) flatten mapAttrs mapAttrsToList filter groupBy length unique attrByPath splitString; + + # Generic function to aggregate any attribute across nodes + aggregateFromNodes = { + nodes, + attributePath, # e.g. "homelab.monitoring.endpoints" or "homelab.backups.jobs" + enhancer ? null, # optional function to enhance each item with node context + }: let + # Extract the attribute from each node using the path + getNestedAttr = path: config: let + pathList = splitString "." path; + in + attrByPath pathList [] config; + + # Get all items from all nodes + allItems = flatten (mapAttrsToList + (nodeName: nodeConfig: let + items = getNestedAttr attributePath nodeConfig.config; + baseEnhancer = item: + item + // { + _nodeName = nodeName; + _nodeConfig = nodeConfig; + _nodeAddress = nodeConfig.config.networking.hostName or nodeName; + }; + finalEnhancer = + if enhancer != null + then (item: enhancer (baseEnhancer item)) + else baseEnhancer; + in + map finalEnhancer items) + nodes); + in { + # Raw aggregated data + all = allItems; + + # Common grouping patterns + byNode = groupBy (item: item._nodeName) allItems; + byType = groupBy (item: item.type or "unknown") allItems; + byService = groupBy (item: item.service or "unknown") allItems; + + # Utility functions for filtering + filterBy = predicate: filter predicate allItems; + ofType = type: filter (item: (item.type or "") == type) allItems; + + count = length allItems; + countBy = fn: mapAttrs (key: items: length items) (groupBy fn allItems); + }; + + # Specialized aggregators for common use cases + aggregators = { + monitoring = nodes: let + # Aggregate metrics endpoints + metricsAgg = aggregateFromNodes { + inherit nodes; + attributePath = "homelab.monitoring.metrics"; + enhancer = endpoint: + endpoint + // { + _fullAddress = "${endpoint.host or endpoint._nodeAddress}:${toString endpoint.port}"; + _metricsUrl = "http://${endpoint.host or endpoint._nodeAddress}:${toString endpoint.port}${endpoint.path or "/metrics"}"; + _type = "metrics"; + }; + }; + # Aggregate health checks + healthChecksAgg = aggregateFromNodes { + inherit nodes; + attributePath = "homelab.monitoring.healthChecks"; + enhancer = check: let + # Compute the actual host and URL + actualHost = + if check.useExternalDomain or false + then "${check.subdomain}.${check._nodeConfig.config.homelab.externalDomain or "example.com"}" + else check.host or check._nodeAddress; + portPart = + if check.port != null + then ":${toString check.port}" + else ""; + url = "${check.protocol or "http"}://${actualHost}${portPart}${check.path or "/"}"; + in + check + // { + _actualHost = actualHost; + _url = url; + _type = "health-check"; + # Merge default labels with node context + labels = + (check.labels or {}) + // { + node = check._nodeName; + environment = check._nodeConfig.config.homelab.environment or "unknown"; + }; + }; + }; + in + metricsAgg + // healthChecksAgg + // { + # Metrics-specific aggregations + allMetrics = metricsAgg.all; + metricsByNode = metricsAgg.byNode; + metricsByJobName = groupBy (m: m.jobName or "unknown") metricsAgg.all; + + # Health checks-specific aggregations + allHealthChecks = healthChecksAgg.all; + healthChecksByNode = healthChecksAgg.byNode; + healthChecksByGroup = groupBy (hc: hc.group or "default") healthChecksAgg.all; + healthChecksByProtocol = groupBy (hc: hc.protocol or "http") healthChecksAgg.all; + + # Filtered health checks + externalHealthChecks = filter (hc: hc.useExternalDomain or false) healthChecksAgg.all; + internalHealthChecks = filter (hc: !(hc.useExternalDomain or false)) healthChecksAgg.all; + enabledHealthChecks = filter (hc: hc.enabled or true) healthChecksAgg.all; + + # Summary statistics + summary = { + totalMetrics = length metricsAgg.all; + totalHealthChecks = length healthChecksAgg.all; + healthChecksByGroup = + mapAttrs (group: checks: length checks) + (groupBy (hc: hc.group or "default") healthChecksAgg.all); + healthChecksByProtocol = + mapAttrs (protocol: checks: length checks) + (groupBy (hc: hc.protocol or "http") healthChecksAgg.all); + externalChecksCount = length (filter (hc: hc.useExternalDomain or false) healthChecksAgg.all); + internalChecksCount = length (filter (hc: !(hc.useExternalDomain or false)) healthChecksAgg.all); + }; + }; + + # Promtail log configurations + # logs = nodes: + # aggregateFromNodes { + # inherit nodes; + # attributePath = "homelab.logging.sources"; + # enhancer = logSource: + # logSource + # // { + # # Add log-specific computed fields + # _logPath = logSource.path or "/var/log/${logSource.service}.log"; + # _labels = + # (logSource.labels or {}) + # // { + # node = logSource._nodeName; + # service = logSource.service or "unknown"; + # }; + # }; + # }; + + # Reverse proxy configurations + reverseProxy = nodes: + aggregateFromNodes { + inherit nodes; + attributePath = "homelab.reverseProxy.entries"; + enhancer = entry: + entry + // { + # Add proxy-specific computed fields + _upstream = "http://${entry.host or entry._nodeAddress}:${toString entry.port}"; + _fqdn = "${entry.subdomain or entry.service}.${entry.domain or "local"}"; + }; + }; + + # Backup jobs with enhanced aggregation + backups = nodes: let + baseAgg = aggregateFromNodes { + inherit nodes; + attributePath = "homelab.backups.jobs"; + enhancer = backup: + backup + // { + _sourceNode = backup._nodeName; + _backupId = "${backup._nodeName}-${backup.name}"; + _jobFqdn = "${backup.name}.${backup._nodeName}"; + }; + }; + + # Get all unique backends across all nodes + allBackends = let + allBackendConfigs = + mapAttrsToList + (nodeName: nodeConfig: + attrByPath ["homelab" "backups" "backends"] {} nodeConfig.config) + nodes; + enabledBackends = flatten (map (backends: + filter (name: backends.${name} != null) (lib.attrNames backends)) + allBackendConfigs); + in + unique enabledBackends; + in + baseAgg + // { + # Backup-specific aggregations + byBackend = groupBy (job: job.backend) baseAgg.all; + allBackends = allBackends; + + # Enhanced summary + summary = { + totalJobs = length baseAgg.all; + jobsByBackend = + mapAttrs (backend: jobs: length jobs) + (groupBy (job: job.backend) baseAgg.all); + jobsByNode = baseAgg.countBy (job: job._nodeName); + availableBackends = allBackends; + backendsInUse = unique (map (job: job.backend) baseAgg.all); + }; + }; + }; +in { + inherit aggregateFromNodes aggregators; + + # Convenience function to create a module that provides global aggregations + mkGlobalModule = attributeName: aggregatorFn: { + lib, + nodes, + ... + }: { + options.homelab.global.${attributeName} = lib.mkOption { + type = lib.types.attrs; + readOnly = true; + description = "Globally aggregated ${attributeName} from all nodes"; + }; + + config.homelab.global.${attributeName} = aggregatorFn nodes; + }; +} diff --git a/modules/homelab/lib/service-interface.nix b/modules/homelab/lib/service-interface.nix new file mode 100644 index 0000000..2bc7ed8 --- /dev/null +++ b/modules/homelab/lib/service-interface.nix @@ -0,0 +1,295 @@ +# Standard service interface for homelab services +# This provides a consistent contract that all services should follow +{lib}: let + inherit (lib) mkOption mkEnableOption types; + + # Define the standard service interface + mkServiceInterface = { + serviceName, + defaultPort ? null, + defaultSubdomain ? serviceName, + defaultDescription ? "Homelab ${serviceName} service", + monitoringPath ? "/metrics", + healthCheckPath ? "/health", + healthCheckConditions ? ["[STATUS] == 200"], + # Custom options that the service wants to expose + serviceOptions ? {}, + }: + { + # Standard interface options that all services must have + enable = mkEnableOption defaultDescription; + + port = mkOption { + type = types.port; + default = + if defaultPort != null + then defaultPort + else throw "Service ${serviceName} must specify a default port"; + description = "Port for ${serviceName} service"; + }; + + openFirewall = mkOption { + type = types.bool; + default = true; + description = "Whether to automatically open firewall ports"; + }; + + proxy = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable reverse proxy for this service"; + }; + + subdomain = mkOption { + type = types.str; + default = defaultSubdomain; + description = "Subdomain for reverse proxy (${defaultSubdomain}.yourdomain.com)"; + }; + + enableAuth = mkOption { + type = types.bool; + default = false; + description = "Enable authentication for reverse proxy"; + }; + + enableSSL = mkOption { + type = types.bool; + default = true; + description = "Enable SSL for reverse proxy"; + }; + }; + + monitoring = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable monitoring (metrics and health checks)"; + }; + + metricsPath = mkOption { + type = types.str; + default = monitoringPath; + description = "Path for metrics endpoint"; + }; + + jobName = mkOption { + type = types.str; + default = serviceName; + description = "Prometheus job name"; + }; + + scrapeInterval = mkOption { + type = types.str; + default = "30s"; + description = "Prometheus scrape interval"; + }; + + healthCheck = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable health check monitoring"; + }; + + path = mkOption { + type = types.str; + default = healthCheckPath; + description = "Path for health check endpoint"; + }; + + interval = mkOption { + type = types.str; + default = "30s"; + description = "Health check interval"; + }; + + timeout = mkOption { + type = types.str; + default = "10s"; + description = "Health check timeout"; + }; + + conditions = mkOption { + type = types.listOf types.str; + default = healthCheckConditions; + description = "Health check conditions"; + }; + + group = mkOption { + type = types.str; + default = "services"; + description = "Health check group name"; + }; + }; + + extraLabels = mkOption { + type = types.attrsOf types.str; + default = {}; + description = "Additional labels for monitoring"; + }; + }; + + description = mkOption { + type = types.str; + default = defaultDescription; + description = "Service description"; + }; + + extraOptions = mkOption { + type = types.attrs; + default = {}; + description = "Additional service-specific configuration options"; + }; + + # Merge in service-specific options + } + // serviceOptions; + + # Helper function to implement the standard service behavior + mkServiceConfig = { + config, + cfg, + homelabCfg, + serviceName, + # Function that returns the actual service configuration + serviceConfig, + # Optional: custom monitoring labels + extraMonitoringLabels ? {}, + # Optional: custom health check configuration + customHealthChecks ? [], + # Optional: custom reverse proxy configuration + customProxyConfig ? {}, + }: let + # Standard monitoring labels + standardLabels = + { + service = serviceName; + component = "main"; + instance = "${homelabCfg.hostname}.${homelabCfg.domain}"; + } + // extraMonitoringLabels // cfg.monitoring.extraLabels; + + # Standard reverse proxy entry + standardProxyEntry = + { + subdomain = cfg.proxy.subdomain; + host = homelabCfg.hostname; + port = cfg.port; + enableAuth = cfg.proxy.enableAuth; + enableSSL = cfg.proxy.enableSSL; + } + // customProxyConfig; + + # Standard metrics configuration + standardMetrics = lib.optional cfg.monitoring.enable { + name = "${serviceName}-metrics"; + port = cfg.port; + path = cfg.monitoring.metricsPath; + jobName = cfg.monitoring.jobName; + scrapeInterval = cfg.monitoring.scrapeInterval; + labels = standardLabels; + }; + + # Standard health check configuration + standardHealthCheck = lib.optional (cfg.monitoring.enable && cfg.monitoring.healthCheck.enable) { + name = "${serviceName}-health"; + port = cfg.port; + path = cfg.monitoring.healthCheck.path; + interval = cfg.monitoring.healthCheck.interval; + timeout = cfg.monitoring.healthCheck.timeout; + conditions = cfg.monitoring.healthCheck.conditions; + group = cfg.monitoring.healthCheck.group; + labels = standardLabels; + }; + + # Merge service config with standard behaviors + baseConfig = lib.mkMerge [ + # Service-specific configuration + serviceConfig + + # Standard firewall configuration + (lib.mkIf cfg.openFirewall { + networking.firewall.allowedTCPPorts = [cfg.port]; + }) + + # Standard monitoring configuration + (lib.mkIf cfg.monitoring.enable { + homelab.monitoring.metrics = standardMetrics; + homelab.monitoring.healthChecks = standardHealthCheck ++ customHealthChecks; + }) + + # Standard reverse proxy configuration + (lib.mkIf cfg.proxy.enable { + homelab.reverseProxy.entries = [standardProxyEntry]; + }) + ]; + in + lib.mkIf cfg.enable baseConfig; + + # Validation helper to ensure required options are set + validateServiceConfig = cfg: serviceName: [ + # Validate that if proxy is enabled, subdomain is set + (lib.mkIf (cfg.proxy.enable && cfg.proxy.subdomain == "") + (throw "Service ${serviceName}: proxy.subdomain is required when proxy.enable is true")) + + # Validate that if monitoring is enabled, required paths are set + (lib.mkIf (cfg.monitoring.enable && cfg.monitoring.metricsPath == "") + (throw "Service ${serviceName}: monitoring.metricsPath cannot be empty when monitoring is enabled")) + ]; +in { + inherit mkServiceInterface mkServiceConfig validateServiceConfig; + + # Common service option patterns + commonOptions = { + # Log level option + logLevel = mkOption { + type = types.enum ["debug" "info" "warn" "error"]; + default = "info"; + description = "Log level"; + }; + + # Environment file option (for secrets) + environmentFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Environment file for secrets"; + }; + + # External URL option + externalUrl = serviceName: homelabCfg: + mkOption { + type = types.str; + default = "https://${serviceName}.${homelabCfg.externalDomain}"; + description = "External URL for ${serviceName}"; + }; + }; + + # Helper for creating service modules with the interface + mkServiceModule = { + serviceName, + defaultPort, + defaultSubdomain ? serviceName, + serviceOptions ? {}, + ... + } @ args: { + config, + lib, + ... + }: let + cfg = config.homelab.services.${serviceName}; + homelabCfg = config.homelab; + + serviceInterface = mkServiceInterface { + inherit serviceName defaultPort defaultSubdomain serviceOptions; + }; + in { + options.homelab.services.${serviceName} = serviceInterface; + + config = mkServiceConfig { + inherit config cfg homelabCfg serviceName; + # Service implementor must provide this function + serviceConfig = args.serviceConfig or (throw "mkServiceModule requires serviceConfig function"); + }; + }; +} diff --git a/modules/homelab/lib/systems/backups.nix b/modules/homelab/lib/systems/backups.nix deleted file mode 100644 index a39d1f9..0000000 --- a/modules/homelab/lib/systems/backups.nix +++ /dev/null @@ -1,163 +0,0 @@ -{ - config, - lib, - nodes, - ... -}: -with lib; let - cfg = config.homelab.backups; - homelabCfg = config.homelab; - hasNodes = length (attrNames nodes) > 0; - - # Get all defined backend names dynamically - backendNames = attrNames cfg.backends or {}; - - backupJobType = types.submodule { - options = { - name = mkOption { - type = types.str; - description = "Name of the backup job"; - }; - backend = mkOption { - type = types.enum backendNames; - description = "Backend to use for this backup job"; - }; - backendOptions = mkOption { - type = types.attrs; - default = {}; - description = "Backend-specific options to override or extend the backend configuration"; - }; - labels = mkOption { - type = types.attrsOf types.str; - default = {}; - description = "Additional labels for this backup job"; - }; - }; - }; - - # Local aggregation - localAggregation = { - allJobs = cfg.jobs; - allBackends = backendNames; - }; - - # Global aggregation - globalAggregation = let - baseAgg = import ../aggregators/base.nix {inherit lib;}; - - jobsAgg = baseAgg.aggregateFromNodes { - inherit nodes; - attributePath = "homelab.backups.allJobs"; - enhancer = job: - job - // { - _sourceNode = job._nodeName; - _backupId = "${job._nodeName}-${job.name}"; - _jobFqdn = "${job.name}.${job._nodeName}"; - }; - }; - - # Get all backends from all nodes - allBackendsFromNodes = let - backendConfigs = - mapAttrsToList ( - nodeName: nodeConfig: - attrByPath ["homelab" "backups" "backends"] {} nodeConfig.config - ) - nodes; - enabledBackends = flatten (map ( - backends: - filter (name: backends.${name} != null) (attrNames backends) - ) - backendConfigs); - in - unique enabledBackends; - in { - allJobs = jobsAgg.all; - allBackends = allBackendsFromNodes; - jobsByBackend = groupBy (j: j.backend) jobsAgg.all; - summary = { - total = length jobsAgg.all; - byBackend = jobsAgg.countBy (j: j.backend); - byNode = jobsAgg.countBy (j: j._nodeName); - uniqueBackends = unique (map (j: j.backend) jobsAgg.all); - }; - }; -in { - imports = [ - ../../backup/restic.nix - # ./backup/borgbackup.nix - ]; - - options.homelab.backups = { - enable = mkEnableOption "backup system"; - - jobs = mkOption { - type = types.listOf backupJobType; - default = []; - description = "Backup jobs to execute on this system"; - }; - - # Backend configurations (like your existing setup) - # backends = mkOption { - # type = types.attrs; - # default = {}; - # description = "Backup backend configurations"; - # }; - - defaultLabels = mkOption { - type = types.attrsOf types.str; - default = { - hostname = homelabCfg.hostname; - environment = homelabCfg.environment; - location = homelabCfg.location; - }; - description = "Default labels applied to all backup jobs"; - }; - - monitoring = mkOption { - type = types.bool; - default = true; - description = "Enable backup monitoring and metrics"; - }; - - # Always exposed aggregated data - allJobs = mkOption { - type = types.listOf types.attrs; - default = []; - readOnly = true; - }; - - allBackends = mkOption { - type = types.listOf types.str; - default = []; - readOnly = true; - }; - - global = mkOption { - type = types.attrs; - default = {}; - readOnly = true; - }; - }; - - config = mkIf cfg.enable { - # Validate that all job backends exist - assertions = [ - { - assertion = all (job: cfg.backends.${job.backend} != null) cfg.jobs; - message = "All backup jobs must reference backends that are defined and not null in homelab.backups.backends"; - } - ]; - - # Always expose both local and global - homelab.backups = { - allJobs = localAggregation.allJobs; - allBackends = localAggregation.allBackends; - global = - if hasNodes - then globalAggregation - else {}; - }; - }; -} diff --git a/modules/homelab/lib/systems/logging.nix b/modules/homelab/lib/systems/logging.nix deleted file mode 100644 index d760ee3..0000000 --- a/modules/homelab/lib/systems/logging.nix +++ /dev/null @@ -1,209 +0,0 @@ -{ - config, - lib, - nodes, - ... -}: -with lib; let - cfg = config.homelab.logging; - homelabCfg = config.homelab; - hasNodes = length (attrNames nodes) > 0; - - # Local aggregation - localAggregation = { - allSources = - cfg.sources - ++ (optional cfg.promtail.enable { - name = "system-journal"; - type = "journal"; - journal.path = "/var/log/journal"; - labels = - cfg.defaultLabels - // { - component = "system"; - log_source = "journald"; - }; - enabled = true; - }); - }; - - # Global aggregation - globalAggregation = let - baseAgg = import ../aggregators/base.nix {inherit lib;}; - - sourcesAgg = baseAgg.aggregateFromNodes { - inherit nodes; - attributePath = "homelab.logging.allSources"; - enhancer = source: - source - // { - _sourceNode = source._nodeName; - _logId = "${source._nodeName}-${source.name}"; - }; - }; - in { - allSources = sourcesAgg.all; - sourcesByType = groupBy (s: s.type) sourcesAgg.all; - summary = { - total = length sourcesAgg.all; - byType = sourcesAgg.countBy (s: s.type); - byNode = sourcesAgg.countBy (s: s._nodeName); - }; - }; -in { - options.homelab.logging = { - enable = mkEnableOption "logging system"; - - promtail = { - enable = mkOption { - type = types.bool; - default = true; - }; - port = mkOption { - type = types.port; - default = 9080; - }; - clients = mkOption { - type = types.listOf (types.submodule { - options = { - url = mkOption {type = types.str;}; - tenant_id = mkOption { - type = types.nullOr types.str; - default = null; - }; - }; - }); - default = [{url = "http://monitor.${homelabCfg.domain}:3100/loki/api/v1/push";}]; - }; - }; - - sources = mkOption { - type = types.listOf (types.submodule { - options = { - name = mkOption {type = types.str;}; - type = mkOption { - type = types.enum ["journal" "file" "syslog" "docker"]; - default = "file"; - }; - files = mkOption { - type = types.submodule { - options = { - paths = mkOption { - type = types.listOf types.str; - default = []; - }; - multiline = mkOption { - type = types.nullOr types.attrs; - default = null; - }; - }; - }; - default = {}; - }; - journal = mkOption { - type = types.submodule { - options = { - path = mkOption { - type = types.str; - default = "/var/log/journal"; - }; - }; - }; - default = {}; - }; - labels = mkOption { - type = types.attrsOf types.str; - default = {}; - }; - pipelineStages = mkOption { - type = types.listOf types.attrs; - default = []; - }; - enabled = mkOption { - type = types.bool; - default = true; - }; - }; - }); - default = []; - }; - - defaultLabels = mkOption { - type = types.attrsOf types.str; - default = { - hostname = homelabCfg.hostname; - environment = homelabCfg.environment; - location = homelabCfg.location; - }; - }; - - # Always exposed aggregated data - allSources = mkOption { - type = types.listOf types.attrs; - default = []; - readOnly = true; - }; - - global = mkOption { - type = types.attrs; - default = {}; - readOnly = true; - }; - }; - - config = mkIf cfg.enable { - # Local setup - services.promtail = mkIf cfg.promtail.enable { - enable = true; - configuration = { - server = { - http_listen_port = cfg.promtail.port; - grpc_listen_port = 0; - }; - positions.filename = "/var/lib/promtail/positions.yaml"; - clients = cfg.promtail.clients; - scrape_configs = map (source: - { - job_name = source.name; - static_configs = [ - { - targets = ["localhost"]; - labels = - cfg.defaultLabels - // source.labels - // ( - if source.type == "file" - then { - __path__ = concatStringsSep "," source.files.paths; - } - else {} - ); - } - ]; - # pipeline_stages = source.pipelineStages; - } - // ( - if source.type == "journal" - then { - journal = { - path = source.journal.path; - labels = cfg.defaultLabels // source.labels; - }; - } - else {} - )) - localAggregation.allSources; - }; - }; - - networking.firewall.allowedTCPPorts = optionals cfg.promtail.enable [cfg.promtail.port]; - - homelab.logging = { - allSources = localAggregation.allSources; - global = - if hasNodes - then globalAggregation - else {}; - }; - }; -} diff --git a/modules/homelab/lib/systems/monitoring.nix b/modules/homelab/lib/systems/monitoring.nix deleted file mode 100644 index a44df40..0000000 --- a/modules/homelab/lib/systems/monitoring.nix +++ /dev/null @@ -1,222 +0,0 @@ -{ - config, - lib, - nodes, - ... -}: -with lib; let - cfg = config.homelab.monitoring; - homelabCfg = config.homelab; - hasNodes = length (attrNames nodes) > 0; - - # Local aggregation from this instance - localAggregation = { - # Metrics from manually configured + automatic node exporter - allMetrics = - cfg.metrics - ++ (optional cfg.nodeExporter.enable { - name = "node-exporter"; - host = homelabCfg.hostname; - port = cfg.nodeExporter.port; - path = "/metrics"; - jobName = "node"; - scrapeInterval = "30s"; - labels = { - instance = "${homelabCfg.hostname}.${homelabCfg.domain}"; - environment = homelabCfg.environment; - location = homelabCfg.location; - }; - }); - - allHealthChecks = cfg.healthChecks; - }; - - # Global aggregation from all nodes (when nodes available) - globalAggregation = let - baseAgg = import ../aggregators/base.nix {inherit lib;}; - - # Aggregate metrics from all nodes - metricsAgg = baseAgg.aggregateFromNodes { - inherit nodes; - attributePath = "homelab.monitoring.allMetrics"; - enhancer = endpoint: - endpoint - // { - _fullAddress = "${endpoint.host}:${toString endpoint.port}"; - _metricsUrl = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}"; - }; - }; - - # Aggregate health checks from all nodes - healthChecksAgg = baseAgg.aggregateFromNodes { - inherit nodes; - attributePath = "homelab.monitoring.allHealthChecks"; - enhancer = check: let - actualHost = check.host; - portPart = - if check.port != null - then ":${toString check.port}" - else ""; - url = "${check.protocol or "http"}://${actualHost}${portPart}${check.path}"; - in - check - // { - _actualHost = actualHost; - _url = url; - }; - }; - in { - allMetrics = metricsAgg.all; - allHealthChecks = healthChecksAgg.all; - - # Useful groupings for services - metricsByJobName = groupBy (m: m.jobName) metricsAgg.all; - healthChecksByGroup = groupBy (hc: hc.group or "default") healthChecksAgg.all; - - summary = { - totalMetrics = length metricsAgg.all; - totalHealthChecks = length healthChecksAgg.all; - nodesCovered = unique (map (m: m._nodeName or m.host) metricsAgg.all); - }; - }; -in { - # Instance-level monitoring options - options.homelab.monitoring = { - enable = mkEnableOption "monitoring system"; - - # Node exporter (automatically enabled) - nodeExporter = { - enable = mkOption { - type = types.bool; - default = true; - }; - port = mkOption { - type = types.port; - default = 9100; - }; - }; - - # Manual metrics (in addition to service auto-registration) - metrics = mkOption { - type = types.listOf (types.submodule { - options = { - name = mkOption {type = types.str;}; - host = mkOption { - type = types.str; - default = homelabCfg.hostname; - }; - port = mkOption {type = types.port;}; - path = mkOption { - type = types.str; - default = "/metrics"; - }; - jobName = mkOption {type = types.str;}; - scrapeInterval = mkOption { - type = types.str; - default = "30s"; - }; - labels = mkOption { - type = types.attrsOf types.str; - default = {}; - }; - }; - }); - default = []; - }; - - # Manual health checks (in addition to service auto-registration) - healthChecks = mkOption { - type = types.listOf (types.submodule { - options = { - name = mkOption {type = types.str;}; - host = mkOption { - type = types.str; - default = homelabCfg.hostname; - }; - port = mkOption { - type = types.nullOr types.port; - default = null; - }; - path = mkOption { - type = types.str; - default = "/"; - }; - protocol = mkOption { - type = types.enum ["http" "https" "tcp" "icmp"]; - default = "http"; - }; - method = mkOption { - type = types.str; - default = "GET"; - }; - interval = mkOption { - type = types.str; - default = "30s"; - }; - timeout = mkOption { - type = types.str; - default = "10s"; - }; - conditions = mkOption { - type = types.listOf types.str; - default = ["[STATUS] == 200"]; - }; - group = mkOption { - type = types.str; - default = "manual"; - }; - labels = mkOption { - type = types.attrsOf types.str; - default = {}; - }; - enabled = mkOption { - type = types.bool; - default = true; - }; - }; - }); - default = []; - }; - - # Read-only aggregated data (always exposed) - allMetrics = mkOption { - type = types.listOf types.attrs; - default = localAggregation.allMetrics; - readOnly = true; - }; - - allHealthChecks = mkOption { - type = types.listOf types.attrs; - default = localAggregation.allHealthChecks; - readOnly = true; - }; - - # Global aggregation (always available, empty if no nodes) - global = mkOption { - type = types.attrs; - default = globalAggregation; - readOnly = true; - }; - }; - - # Configuration - always includes both local and global - config = mkIf cfg.enable { - # Basic instance setup - services.prometheus.exporters.node = mkIf cfg.nodeExporter.enable { - enable = true; - port = cfg.nodeExporter.port; - enabledCollectors = ["systemd" "textfile" "filesystem" "loadavg" "meminfo" "netdev" "stat"]; - }; - - networking.firewall.allowedTCPPorts = optionals cfg.nodeExporter.enable [cfg.nodeExporter.port]; - - # homelab.monitoring = { - # allMetrics = localAggregation.allMetrics; - # allHealthChecks = localAggregation.allHealthChecks; - # global = - # if hasNodes - # then globalAggregation - # else {}; - # }; - }; -} diff --git a/modules/homelab/lib/systems/proxy.nix b/modules/homelab/lib/systems/proxy.nix deleted file mode 100644 index a16694d..0000000 --- a/modules/homelab/lib/systems/proxy.nix +++ /dev/null @@ -1,98 +0,0 @@ -{ - config, - lib, - nodes, - ... -}: -with lib; let - cfg = config.homelab.reverseProxy; - homelabCfg = config.homelab; - hasNodes = length (attrNames nodes) > 0; - - # Local aggregation - localAggregation = { - allEntries = cfg.entries; - }; - - # Global aggregation - globalAggregation = let - baseAgg = import ../aggregators/base.nix {inherit lib;}; - - entriesAgg = baseAgg.aggregateFromNodes { - inherit nodes; - attributePath = "homelab.reverseProxy.allEntries"; - enhancer = entry: - entry - // { - _upstream = "http://${entry.host}:${toString entry.port}${entry.path or ""}"; - _fqdn = "${entry.subdomain}.${entry._nodeConfig.config.homelab.externalDomain or homelabCfg.externalDomain}"; - _internal = "${entry.host}:${toString entry.port}"; - }; - }; - in { - allEntries = entriesAgg.all; - entriesBySubdomain = groupBy (e: e.subdomain) entriesAgg.all; - entriesWithAuth = entriesAgg.filterBy (e: e.enableAuth or false); - entriesWithoutAuth = entriesAgg.filterBy (e: !(e.enableAuth or false)); - summary = { - total = length entriesAgg.all; - byNode = entriesAgg.countBy (e: e._nodeName); - withAuth = length (entriesAgg.filterBy (e: e.enableAuth or false)); - withoutAuth = length (entriesAgg.filterBy (e: !(e.enableAuth or false))); - }; - }; -in { - options.homelab.reverseProxy = { - enable = mkEnableOption "reverse proxy system"; - - entries = mkOption { - type = types.listOf (types.submodule { - options = { - subdomain = mkOption {type = types.str;}; - host = mkOption { - type = types.str; - default = homelabCfg.hostname; - }; - port = mkOption {type = types.port;}; - path = mkOption { - type = types.str; - default = "/"; - }; - enableAuth = mkOption { - type = types.bool; - default = false; - }; - enableSSL = mkOption { - type = types.bool; - default = true; - }; - }; - }); - default = []; - }; - - # Always exposed aggregated data - allEntries = mkOption { - type = types.listOf types.attrs; - default = []; - readOnly = true; - }; - - global = mkOption { - type = types.attrs; - default = {}; - readOnly = true; - }; - }; - - config = mkIf cfg.enable { - # Always expose both local and global - homelab.reverseProxy = { - allEntries = localAggregation.allEntries; - global = - if hasNodes - then globalAggregation - else {}; - }; - }; -} diff --git a/modules/homelab/monitoring-config.nix b/modules/homelab/monitoring-config.nix new file mode 100644 index 0000000..2490467 --- /dev/null +++ b/modules/homelab/monitoring-config.nix @@ -0,0 +1,214 @@ +{ + config, + lib, + ... +}: +with lib; let + cfg = config.homelab.monitoring; + homelabCfg = config.homelab; + + metricsEndpointType = types.submodule { + options = { + name = mkOption { + type = types.str; + description = "Name of the metrics endpoint"; + }; + host = mkOption { + type = types.str; + description = "Domain name of the host (default: hostname.domain)"; + default = "${homelabCfg.hostname}.${homelabCfg.domain}"; + }; + port = mkOption { + type = types.port; + description = "Port number for the endpoint"; + }; + path = mkOption { + type = types.str; + default = "/metrics"; + description = "Path for the metrics endpoint"; + }; + jobName = mkOption { + type = types.str; + description = "Prometheus job name"; + }; + scrapeInterval = mkOption { + type = types.str; + default = "30s"; + description = "Prometheus scrape interval"; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + description = "Additional labels for this endpoint"; + }; + }; + }; + + healthCheckEndpointType = types.submodule { + options = { + name = mkOption { + type = types.str; + description = "Name of the health check endpoint"; + }; + host = mkOption { + type = types.str; + description = "Domain name of the host"; + default = "${homelabCfg.hostname}.${homelabCfg.domain}"; + }; + port = mkOption { + type = types.nullOr types.port; + default = null; + description = "Port number for the endpoint (null for standard HTTP/HTTPS)"; + }; + path = mkOption { + type = types.str; + default = "/"; + description = "Path for the health check endpoint"; + }; + protocol = mkOption { + type = types.enum ["http" "https" "tcp" "icmp"]; + default = "http"; + description = "Protocol to use for health checks"; + }; + method = mkOption { + type = types.str; + default = "GET"; + description = "HTTP method for health checks (only applies to http/https)"; + }; + interval = mkOption { + type = types.str; + default = "30s"; + description = "Health check interval"; + }; + timeout = mkOption { + type = types.str; + default = "10s"; + description = "Health check timeout"; + }; + conditions = mkOption { + type = types.listOf types.str; + default = ["[STATUS] == 200"]; + description = "Health check conditions (Gatus format)"; + example = ["[STATUS] == 200" "[BODY].status == UP" "[RESPONSE_TIME] < 500"]; + }; + alerts = mkOption { + type = types.listOf (types.submodule { + options = { + type = mkOption { + type = types.str; + description = "Alert type"; + example = "discord"; + }; + enabled = mkOption { + type = types.bool; + default = true; + description = "Whether this alert is enabled"; + }; + failure-threshold = mkOption { + type = types.int; + default = 3; + description = "Number of failures before alerting"; + }; + success-threshold = mkOption { + type = types.int; + default = 2; + description = "Number of successes before resolving alert"; + }; + }; + }); + default = []; + description = "Alert configurations"; + }; + group = mkOption { + type = types.str; + default = "default"; + description = "Group name for organizing health checks"; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + description = "Additional labels for this health check"; + }; + enabled = mkOption { + type = types.bool; + default = true; + description = "Whether this health check is enabled"; + }; + # External domain support + useExternalDomain = mkOption { + type = types.bool; + default = false; + description = "Use external domain instead of internal"; + }; + subdomain = mkOption { + type = types.nullOr types.str; + default = null; + description = "Subdomain for external domain (required if useExternalDomain is true)"; + }; + }; + }; +in { + options.homelab.monitoring = { + enable = mkEnableOption "Homelab monitoring"; + metrics = mkOption { + type = types.listOf metricsEndpointType; + default = []; + description = "Metric endpoints exposed by this system"; + }; + + healthChecks = mkOption { + type = types.listOf healthCheckEndpointType; + default = []; + description = "Health check endpoints for uptime monitoring"; + }; + + nodeExporter = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable node exporter"; + }; + port = mkOption { + type = types.port; + default = 9100; + description = "Node exporter port"; + }; + }; + }; + + config = mkIf cfg.enable { + # Configure node exporter if enabled + services.prometheus.exporters.node = mkIf cfg.nodeExporter.enable { + enable = true; + port = cfg.nodeExporter.port; + enabledCollectors = [ + "systemd" + "textfile" + "filesystem" + "loadavg" + "meminfo" + "netdev" + "stat" + ]; + }; + + # Automatically add node exporter to monitoring endpoints + homelab.monitoring.metrics = mkIf cfg.nodeExporter.enable [ + { + name = "node-exporter"; + port = cfg.nodeExporter.port; + path = "/metrics"; + jobName = "node"; + labels = { + instance = "${homelabCfg.hostname}.${homelabCfg.domain}"; + environment = homelabCfg.environment; + location = homelabCfg.location; + }; + } + ]; + + networking.firewall.allowedTCPPorts = optionals cfg.nodeExporter.enable [ + cfg.nodeExporter.port + ]; + }; +} diff --git a/modules/homelab/proxy-config.nix b/modules/homelab/proxy-config.nix new file mode 100644 index 0000000..e7236d8 --- /dev/null +++ b/modules/homelab/proxy-config.nix @@ -0,0 +1,53 @@ +{ + config, + lib, + ... +}: +with lib; let + cfg = config.homelab.reverseProxy; + homelabCfg = config.homelab; + + reverseProxyEntryType = types.submodule { + options = { + subdomain = mkOption { + type = types.str; + description = "Subdomain for the service"; + }; + host = mkOption { + type = types.str; + description = "Host to proxy to"; + default = "${homelabCfg.hostname}.${homelabCfg.domain}"; + }; + port = mkOption { + type = types.port; + description = "Port to proxy to"; + }; + path = mkOption { + type = types.str; + default = "/"; + description = "Path prefix for the service"; + }; + enableAuth = mkOption { + type = types.bool; + default = false; + description = "Enable authentication for this service"; + }; + enableSSL = mkOption { + type = types.bool; + default = true; + description = "Enable SSL for this service"; + }; + }; + }; +in { + options.homelab.reverseProxy = { + entries = mkOption { + type = types.listOf reverseProxyEntryType; + default = []; + description = "Reverse proxy entries for this system"; + }; + }; + + config = { + }; +} diff --git a/modules/homelab/services/default.nix b/modules/homelab/services/default.nix index 2071dd6..2847a3c 100644 --- a/modules/homelab/services/default.nix +++ b/modules/homelab/services/default.nix @@ -1,9 +1,7 @@ { imports = [ ./minio.nix - ./gatus.nix - ./prometheus.nix - ./grafana.nix - # ./monitoring/loki.nix + ./monitoring/gatus.nix + ./monitoring/prometheus.nix ]; } diff --git a/modules/homelab/services/example-service.nix b/modules/homelab/services/example-service.nix new file mode 100644 index 0000000..df59348 --- /dev/null +++ b/modules/homelab/services/example-service.nix @@ -0,0 +1,161 @@ +# Example showing how to create a service using the standard interface +{ + config, + lib, + pkgs, + ... +}: +with lib; let + serviceInterface = import ../lib/service-interface.nix {inherit lib;}; + + cfg = config.homelab.services.grafana; + homelabCfg = config.homelab; + + # Service-specific options beyond the standard interface + grafanaServiceOptions = { + domain = mkOption { + type = types.str; + default = "grafana.${homelabCfg.externalDomain}"; + description = "Domain for Grafana"; + }; + + rootUrl = mkOption { + type = types.str; + default = "https://grafana.${homelabCfg.externalDomain}"; + description = "Root URL for Grafana"; + }; + + dataDir = serviceInterface.commonOptions.dataDir "grafana"; + + admin = { + user = mkOption { + type = types.str; + default = "admin"; + description = "Admin username"; + }; + + password = mkOption { + type = types.str; + default = "admin"; + description = "Admin password"; + }; + }; + + datasources = { + prometheus = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable Prometheus datasource"; + }; + + url = mkOption { + type = types.str; + default = "http://localhost:9090"; + description = "Prometheus URL"; + }; + }; + }; + + plugins = mkOption { + type = types.listOf types.package; + default = []; + description = "Grafana plugins to install"; + }; + }; +in { + options.homelab.services.grafana = serviceInterface.mkServiceInterface { + serviceName = "grafana"; + defaultPort = 3000; + defaultSubdomain = "grafana"; + monitoringPath = "/metrics"; + healthCheckPath = "/api/health"; + healthCheckConditions = [ + "[STATUS] == 200" + "[BODY].database == ok" + "[RESPONSE_TIME] < 2000" + ]; + serviceOptions = grafanaServiceOptions; + }; + + config = serviceInterface.mkServiceConfig { + inherit config cfg homelabCfg; + serviceName = "grafana"; + + extraMonitoringLabels = { + component = "dashboard"; + }; + + customHealthChecks = [ + { + name = "grafana-login"; + port = cfg.port; + path = "/login"; + interval = "60s"; + conditions = [ + "[STATUS] == 200" + "[RESPONSE_TIME] < 3000" + ]; + group = "monitoring"; + labels = { + service = "grafana"; + component = "login"; + }; + } + ]; + + serviceConfig = { + services.grafana = { + enable = true; + dataDir = cfg.dataDir; + declarativePlugins = cfg.plugins; + + settings = { + server = { + http_port = cfg.port; + http_addr = "0.0.0.0"; + domain = cfg.domain; + root_url = cfg.rootUrl; + }; + + security = { + admin_user = cfg.admin.user; + admin_password = cfg.admin.password; + }; + }; + + provision = { + enable = true; + datasources.settings.datasources = mkIf cfg.datasources.prometheus.enable [ + { + name = "Prometheus"; + type = "prometheus"; + url = cfg.datasources.prometheus.url; + isDefault = true; + } + ]; + }; + }; + }; + }; +} +# Usage example in your configuration: +/* +{ + homelab.services.grafana = { + enable = true; + # Standard interface options: + port = 3000; # Optional: defaults to 3000 + openFirewall = true; # Optional: defaults to true + proxy.subdomain = "grafana"; # Optional: defaults to "grafana" + proxy.enableAuth = false; # Optional: defaults to false + monitoring.enable = true; # Optional: defaults to true + + # Service-specific options: + admin.password = "secure-password"; + datasources.prometheus.url = "http://prometheus.lab:9090"; + plugins = with pkgs.grafanaPlugins; [ grafana-piechart-panel ]; + }; +} +*/ + diff --git a/modules/homelab/services/gatus.nix b/modules/homelab/services/gatus.nix deleted file mode 100644 index da907c4..0000000 --- a/modules/homelab/services/gatus.nix +++ /dev/null @@ -1,267 +0,0 @@ -{ - config, - lib, - ... -}: -with lib; let - serviceName = "gatus"; - cfg = config.homelab.services.${serviceName}; - homelabCfg = config.homelab; - - # Convert homelab health checks to Gatus format - formatHealthCheck = check: let - # Build the URL based on the health check configuration - url = check._url or "http://${check.host}:${toString (check.port or 80)}${check.path}"; - - # Convert conditions to Gatus format (they should already be compatible) - conditions = check.conditions or ["[STATUS] == 200"]; - - # Convert alerts to Gatus format - alerts = map (alert: { - inherit (alert) type enabled; - failure-threshold = alert.failure-threshold or 3; - success-threshold = alert.success-threshold or 2; - description = "Health check alert for ${check.name}"; - }) (check.alerts or []); - in { - name = check.name; - group = check.group or "default"; - url = url; - interval = check.interval or "30s"; - - # Add method and headers for HTTP/HTTPS checks - method = - if (check.protocol == "http" || check.protocol == "https") - then check.method or "GET" - else null; - - conditions = conditions; - - # Add timeout - client = { - timeout = check.timeout or "10s"; - }; - - # Add alerts if configured - alerts = - if alerts != [] - then alerts - else []; - - # Add labels for UI organization - ui = { - hide-hostname = false; - hide-url = false; - description = "Health check for ${check.name} on ${check.host or check._actualHost or "unknown"}"; - }; - }; - - # Generate Gatus configuration from aggregated health checks - gatusConfig = - recursiveUpdate { - # Global Gatus settings - alerting = mkIf (cfg.alerting != {}) cfg.alerting; - - web = { - address = cfg.web.address; - port = cfg.port; - }; - - # Enable metrics - metrics = cfg.monitoring.enable; - - ui = { - title = cfg.ui.title; - header = cfg.ui.header; - link = cfg.ui.link; - buttons = cfg.ui.buttons; - }; - - storage = cfg.storage; - - # Convert all enabled health checks from the fleet to Gatus endpoints - endpoints = let - # Get all health checks - try global first, fallback to local - allHealthChecks = homelabCfg.monitoring.global.allHealthChecks - or homelabCfg.monitoring.allHealthChecks - or []; - - # Filter only enabled health checks - enabledHealthChecks = filter (check: check.enabled or true) allHealthChecks; - - # Convert to Gatus format - gatusEndpoints = map formatHealthCheck enabledHealthChecks; - in - gatusEndpoints; - } - cfg.extraConfig; -in { - imports = [ - (import ../lib/features/monitoring.nix serviceName) - (import ../lib/features/logging.nix serviceName) - (import ../lib/features/proxy.nix serviceName) - ]; - - # Core service options - options.homelab.services.${serviceName} = { - enable = mkEnableOption "Gatus Status Page"; - - port = mkOption { - type = types.port; - default = 8080; - }; - - description = mkOption { - type = types.str; - default = "Gatus Status Page"; - }; - - # Gatus-specific options - ui = { - title = mkOption { - type = types.str; - default = "Homelab Status"; - description = "Title for the Gatus web interface"; - }; - - header = mkOption { - type = types.str; - default = "Homelab Services Status"; - description = "Header text for the Gatus interface"; - }; - - link = mkOption { - type = types.str; - default = "https://status.${homelabCfg.externalDomain}"; - description = "Link in the Gatus header"; - }; - - buttons = mkOption { - type = types.listOf (types.submodule { - options = { - name = mkOption {type = types.str;}; - link = mkOption {type = types.str;}; - }; - }); - default = [ - { - name = "Grafana"; - link = "https://grafana.${homelabCfg.externalDomain}"; - } - { - name = "Prometheus"; - link = "https://prometheus.${homelabCfg.externalDomain}"; - } - ]; - description = "Navigation buttons in the Gatus interface"; - }; - }; - - alerting = mkOption { - type = types.attrs; - default = {}; - description = "Gatus alerting configuration"; - example = literalExpression '' - { - discord = { - webhook-url = "https://discord.com/api/webhooks/..."; - default-alert = { - enabled = true; - description = "Health check failed"; - failure-threshold = 3; - success-threshold = 2; - }; - }; - } - ''; - }; - - storage = mkOption { - type = types.attrs; - default = { - type = "memory"; - }; - description = "Gatus storage configuration"; - example = literalExpression '' - { - type = "postgres"; - path = "postgres://user:password@localhost/gatus?sslmode=disable"; - } - ''; - }; - - web = { - address = mkOption { - type = types.str; - default = "0.0.0.0"; - description = "Web interface bind address"; - }; - }; - - extraConfig = mkOption { - type = types.attrs; - default = {}; - description = "Additional Gatus configuration options"; - }; - }; - - # Service configuration with smart defaults - config = mkIf cfg.enable (mkMerge [ - # Core Gatus service - { - services.gatus = { - enable = true; - settings = gatusConfig; - }; - - networking.firewall.allowedTCPPorts = [cfg.port]; - - homelab.services.${serviceName}.monitoring.enable = mkDefault true; - } - - # Smart defaults for Gatus - (mkIf cfg.monitoring.enable { - homelab.services.${serviceName}.monitoring = mkDefault { - metrics = { - path = "/metrics"; - extraEndpoints = []; - }; - healthCheck = { - path = "/health"; - conditions = [ - "[STATUS] == 200" - "[BODY].status == UP" - "[RESPONSE_TIME] < 1000" - ]; - extraChecks = []; - }; - extraLabels = { - component = "status-monitoring"; - tier = "monitoring"; - }; - }; - }) - - (mkIf cfg.logging.enable { - homelab.services.${serviceName}.logging = mkDefault { - files = ["/var/log/gatus/gatus.log"]; - parsing = { - # Gatus log format: 2024-01-01T12:00:00Z [INFO] message - regex = "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z) \\[(?P\\w+)\\] (?P.*)"; - extractFields = ["level"]; - }; - extraLabels = { - component = "status-monitoring"; - application = "gatus"; - }; - }; - }) - - (mkIf cfg.proxy.enable { - homelab.services.${serviceName}.proxy = mkDefault { - subdomain = "status"; - enableAuth = false; # Status page should be public - }; - }) - ]); -} diff --git a/modules/homelab/services/grafana.nix b/modules/homelab/services/grafana.nix deleted file mode 100644 index 5f5aad9..0000000 --- a/modules/homelab/services/grafana.nix +++ /dev/null @@ -1,86 +0,0 @@ -{ - config, - lib, - pkgs, - ... -}: -with lib; let - serviceName = "grafana"; - cfg = config.homelab.services.${serviceName}; -in { - imports = [ - (import ../lib/features/monitoring.nix serviceName) - (import ../lib/features/logging.nix serviceName) - (import ../lib/features/proxy.nix serviceName) - ]; - - options.homelab.services.${serviceName} = { - enable = mkEnableOption "Grafana Dashboard"; - - port = mkOption { - type = types.port; - default = 3000; - }; - - description = mkOption { - type = types.str; - default = "Grafana Metrics Dashboard"; - }; - }; - - config = mkIf cfg.enable (mkMerge [ - # Core Grafana service - { - services.grafana = { - enable = true; - settings.server = { - http_port = cfg.port; - http_addr = "0.0.0.0"; - }; - }; - - networking.firewall.allowedTCPPorts = [cfg.port]; - - homelab.services.${serviceName}.monitoring.enable = mkDefault true; - } - - # Smart defaults for Grafana - (mkIf cfg.logging.enable { - # Grafana-specific log setup - homelab.services.${serviceName}.logging = mkDefault { - files = ["/var/log/grafana/grafana.log"]; - parsing = { - # Grafana log format: t=2024-01-01T12:00:00Z lvl=info msg="message" - regex = "^t=(?P[^ ]+) lvl=(?P\\w+) msg=\"(?P[^\"]*)\""; - extractFields = ["level"]; - }; - extraLabels = { - application = "grafana"; - component = "dashboard"; - }; - }; - }) - - (mkIf cfg.monitoring.enable { - homelab.services.${serviceName}.monitoring = mkDefault { - metrics.path = "/metrics"; - healthCheck = { - path = "/api/health"; - conditions = ["[STATUS] == 200" "[BODY].database == ok"]; - }; - extraLabels = { - component = "dashboard"; - tier = "monitoring"; - }; - }; - }) - - (mkIf cfg.proxy.enable { - # Grafana needs auth by default (admin interface) - homelab.services.${serviceName}.proxy = mkDefault { - subdomain = "grafana"; - # enableAuth = true; - }; - }) - ]); -} diff --git a/modules/homelab/services/jellyfin.nix b/modules/homelab/services/jellyfin.nix new file mode 100644 index 0000000..1aac7e5 --- /dev/null +++ b/modules/homelab/services/jellyfin.nix @@ -0,0 +1,125 @@ +# modules/services/jellyfin.nix +{ + config, + lib, + pkgs, + ... +}: +with lib; let + cfg = config.services.jellyfin; +in { + options.services.jellyfin = { + enable = mkEnableOption "Jellyfin media server"; + + port = mkOption { + type = types.port; + default = 8096; + description = "Port for Jellyfin web interface"; + }; + + dataDir = mkOption { + type = types.str; + default = "/var/lib/jellyfin"; + description = "Directory to store Jellyfin data"; + }; + + mediaDir = mkOption { + type = types.str; + default = "/media"; + description = "Directory containing media files"; + }; + + enableMetrics = mkOption { + type = types.bool; + default = true; + description = "Enable Prometheus metrics"; + }; + + exposeWeb = mkOption { + type = types.bool; + default = true; + description = "Expose web interface through reverse proxy"; + }; + }; + + config = mkIf cfg.enable { + # Enable the service + services.jellyfin = { + enable = true; + dataDir = cfg.dataDir; + }; + + # Configure global settings + homelab.global = { + # Add backup job for Jellyfin data + backups.jobs = [ + { + name = "jellyfin-config"; + backend = "restic"; + paths = ["${cfg.dataDir}/config" "${cfg.dataDir}/data"]; + schedule = "0 2 * * *"; # Daily at 2 AM + excludePatterns = [ + "*/cache/*" + "*/transcodes/*" + "*/logs/*" + ]; + preHook = '' + # Stop jellyfin for consistent backup + systemctl stop jellyfin + ''; + postHook = '' + # Restart jellyfin after backup + systemctl start jellyfin + ''; + } + { + name = "jellyfin-media"; + backend = "restic"; + paths = [cfg.mediaDir]; + schedule = "0 3 * * 0"; # Weekly on Sunday at 3 AM + excludePatterns = [ + "*.tmp" + "*/.@__thumb/*" # Synology thumbnails + ]; + } + ]; + + # Add reverse proxy entry if enabled + reverseProxy.entries = mkIf cfg.exposeWeb [ + { + subdomain = "jellyfin"; + port = cfg.port; + enableAuth = false; # Jellyfin has its own auth + websockets = true; + customHeaders = { + "X-Forwarded-Proto" = "$scheme"; + "X-Forwarded-Host" = "$host"; + }; + } + ]; + + # Add monitoring endpoint if metrics enabled + monitoring.endpoints = mkIf cfg.enableMetrics [ + { + name = "jellyfin"; + port = cfg.port; + path = "/metrics"; # Assuming you have a metrics plugin + jobName = "jellyfin"; + scrapeInterval = "60s"; + labels = { + service = "jellyfin"; + type = "media-server"; + }; + } + ]; + }; + + # Open firewall + networking.firewall.allowedTCPPorts = [cfg.port]; + + # Create media directory + systemd.tmpfiles.rules = [ + "d ${cfg.mediaDir} 0755 jellyfin jellyfin -" + ]; + }; +} diff --git a/modules/homelab/services/monitoring/gatus.nix b/modules/homelab/services/monitoring/gatus.nix index 60f0700..8d1f20f 100644 --- a/modules/homelab/services/monitoring/gatus.nix +++ b/modules/homelab/services/monitoring/gatus.nix @@ -4,13 +4,110 @@ ... }: with lib; let - serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; - cfg = config.homelab.services.gatus; homelabCfg = config.homelab; - # Service-specific options beyond the standard interface - gatusServiceOptions = { + # Convert our health check format to Gatus format + formatHealthCheck = check: let + # Build the URL + url = check._url; + + # Convert conditions to Gatus format (they should already be compatible) + conditions = check.conditions or ["[STATUS] == 200"]; + + # Convert alerts to Gatus format + alerts = map (alert: { + inherit (alert) type enabled; + failure-threshold = alert.failure-threshold or 3; + success-threshold = alert.success-threshold or 2; + description = "Health check alert for ${check.name}"; + }) (check.alerts or []); + in { + name = check.name; + group = check.group or "default"; + url = url; + interval = check.interval or "30s"; + + # Add method and headers for HTTP/HTTPS checks + method = + if (check.protocol == "http" || check.protocol == "https") + then check.method or "GET" + else null; + + conditions = conditions; + + # Add timeout + client = { + timeout = check.timeout or "10s"; + }; + + # Add alerts if configured + alerts = + if alerts != [] + then alerts + else []; + + # Add labels for UI organization + ui = { + hide-hostname = false; + hide-url = false; + description = "Health check for ${check.name} on ${check._nodeName}"; + }; + }; + + # Generate Gatus configuration + gatusConfig = { + # Global Gatus settings + alerting = mkIf (cfg.alerting != {}) cfg.alerting; + + web = { + address = "0.0.0.0"; + port = cfg.port; + }; + + # TODO: Introduce monitor option to toggle monitoring + metrics = true; + + ui = { + title = cfg.ui.title; + header = cfg.ui.header; + link = cfg.ui.link; + buttons = cfg.ui.buttons; + }; + + storage = mkIf (cfg.storage != {}) cfg.storage; + + # Convert all enabled health checks to Gatus endpoints + endpoints = let + # Get all health checks from global config + allHealthChecks = homelabCfg.global.monitoring.enabledHealthChecks or []; + + # Group by group name for better organization + # groupedChecks = homelabCfg.global.monitoring.healthChecksByGroup or {}; + + # Convert to Gatus format + gatusEndpoints = map formatHealthCheck allHealthChecks; + in + gatusEndpoints; + }; +in { + options.homelab.services.gatus = { + enable = mkEnableOption "Gatus uptime monitoring service"; + + port = mkOption { + type = types.port; + default = 8080; + description = "Port for Gatus web interface"; + }; + + openFirewall = lib.mkOption { + type = lib.types.bool; + default = true; + description = '' + Whether to automatically open the specified ports in the firewall. + ''; + }; + ui = { title = mkOption { type = types.str; @@ -26,7 +123,7 @@ with lib; let link = mkOption { type = types.str; - default = "https://status.${homelabCfg.externalDomain}"; + default = "https://gatus.${homelabCfg.externalDomain}"; description = "Link in the Gatus header"; }; @@ -89,129 +186,59 @@ with lib; let default = {}; description = "Additional Gatus configuration options"; }; - - web = { - address = mkOption { - type = types.str; - default = "0.0.0.0"; - description = "Web interface bind address"; - }; - }; }; - # Convert our health check format to Gatus format - formatHealthCheck = check: let - # Build the URL based on the health check configuration - url = check._url; - - # Convert conditions to Gatus format (they should already be compatible) - conditions = check.conditions or ["[STATUS] == 200"]; - - # Convert alerts to Gatus format - alerts = map (alert: { - inherit (alert) type enabled; - failure-threshold = alert.failure-threshold or 3; - success-threshold = alert.success-threshold or 2; - description = "Health check alert for ${check.name}"; - }) (check.alerts or []); - in { - name = check.name; - group = check.group or "default"; - url = url; - interval = check.interval or "30s"; - - # Add method and headers for HTTP/HTTPS checks - method = - if (check.protocol == "http" || check.protocol == "https") - then check.method or "GET" - else null; - - conditions = conditions; - - # Add timeout - client = { - timeout = check.timeout or "10s"; + config = mkIf cfg.enable { + services.gatus = { + enable = true; + openFirewall = cfg.openFirewall; + settings = gatusConfig; }; - # Add alerts if configured - alerts = - if alerts != [] - then alerts - else []; - - # Add labels for UI organization - ui = { - hide-hostname = false; - hide-url = false; - description = "Health check for ${check.name} on ${check.host}"; - }; - }; - - # Generate Gatus configuration - gatusConfig = - recursiveUpdate { - # Global Gatus settings - alerting = mkIf (cfg.alerting != {}) cfg.alerting; - - web = { - address = cfg.web.address; + # Add to monitoring endpoints + homelab.monitoring.metrics = [ + { + name = "gatus"; port = cfg.port; - }; - - # Enable metrics - metrics = cfg.monitoring.enable; - - ui = { - title = cfg.ui.title; - header = cfg.ui.header; - link = cfg.ui.link; - buttons = cfg.ui.buttons; - }; - - storage = cfg.storage; - - # Convert all enabled health checks to Gatus endpoints - endpoints = let - # Get all health checks from global config - allHealthChecks = homelabCfg.global.monitoring.allHealthChecks or []; - - # Filter only enabled health checks - enabledHealthChecks = filter (check: check.enabled or true) allHealthChecks; - - # Convert to Gatus format - gatusEndpoints = map formatHealthCheck enabledHealthChecks; - in - gatusEndpoints; - } - cfg.extraConfig; -in { - options.homelab.services.gatus = serviceInterface.mkServiceInterface { - serviceName = "gatus"; - defaultPort = 8080; - defaultSubdomain = "status"; - monitoringPath = "/metrics"; - healthCheckPath = "/health"; - healthCheckConditions = [ - "[STATUS] == 200" - "[BODY].status == UP" - "[RESPONSE_TIME] < 1000" + path = "/metrics"; + jobName = "gatus"; + labels = { + service = "gatus"; + component = "monitoring"; + }; + } ]; - serviceOptions = gatusServiceOptions; - }; - config = serviceInterface.mkServiceConfig { - inherit config cfg homelabCfg; - serviceName = "gatus"; + # Add health check for Gatus itself + homelab.monitoring.healthChecks = [ + { + name = "gatus-web-interface"; + port = cfg.port; + path = "/health"; + interval = "30s"; + conditions = [ + "[STATUS] == 200" + "[BODY].status == UP" + "[RESPONSE_TIME] < 1000" + ]; + group = "monitoring"; + labels = { + service = "gatus"; + component = "web-interface"; + }; + } + ]; - extraMonitoringLabels = { - component = "status-monitoring"; - }; - - serviceConfig = { - services.gatus = { - enable = true; - settings = gatusConfig; - }; - }; + # Add reverse proxy entry if needed + homelab.reverseProxy.entries = [ + { + subdomain = "status"; + host = homelabCfg.hostname; + port = cfg.port; + # path = "/"; + # enableAuth = false; # Status page should be publicly accessible + # enableSSL = true; + } + ]; }; } diff --git a/modules/homelab/services/monitoring/grafana.nix b/modules/homelab/services/monitoring/grafana.nix index 8ecb14c..64650cf 100644 --- a/modules/homelab/services/monitoring/grafana.nix +++ b/modules/homelab/services/monitoring/grafana.nix @@ -5,389 +5,169 @@ ... }: with lib; let - serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; - cfg = config.homelab.services.grafana; homelabCfg = config.homelab; - # Default community dashboards with proper configuration + # Default dashboards for homelab monitoring defaultDashboards = { - "node-exporter-full" = { - name = "Node Exporter Full"; - id = 12486; - revision = 2; - # url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; - sha256 = "sha256-1DE1aaanRHHeCOMWDGdOS1wBXxOF84UXAjJzT5Ek6mM="; - - url = "https://grafana.com/api/dashboards/12486/revisions/2/download"; + "node-exporter" = pkgs.fetchurl { + url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; + sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this }; - "prometheus-2-0-stats" = { - name = "Prometheus 2.0 Stats"; - id = 2; - revision = 2; + "prometheus-stats" = pkgs.fetchurl { url = "https://grafana.com/api/dashboards/2/revisions/2/download"; - sha256 = "sha256-Ydk4LPwfX4qJN8tiWPLWQdtAqzj8CKi6HYsuE+kWcXw="; + sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this }; }; - # Function to fetch a dashboard from Grafana.com - fetchGrafanaDashboard = name: config: - pkgs.fetchurl { - inherit (config) url sha256; - name = "${name}-dashboard.json"; - }; + # Grafana provisioning configuration + provisioningConfig = { + # Data sources + datasources = + [ + { + name = "Prometheus"; + type = "prometheus"; + access = "proxy"; + url = cfg.datasources.prometheus.url; + isDefault = true; + editable = false; + jsonData = { + timeInterval = "5s"; + queryTimeout = "60s"; + httpMethod = "POST"; + }; + } + ] + ++ cfg.datasources.extra; - # Git repository management for custom dashboards - gitDashboardsRepo = mkIf (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") ( - pkgs.fetchgit { - url = cfg.dashboards.git.url; - rev = cfg.dashboards.git.rev; - sha256 = cfg.dashboards.git.sha256; - } - ); + # Dashboard providers + dashboards = [ + { + name = "homelab"; + type = "file"; + disableDeletion = false; + updateIntervalSeconds = 10; + allowUiUpdates = true; + options = { + path = "/var/lib/grafana/dashboards"; + }; + } + ]; - # Dashboard provisioning configuration - provisionDashboard = name: source: { - "grafana-dashboards/${name}.json" = { - inherit source; - user = "grafana"; - group = "grafana"; - mode = "0644"; - }; + # Notification channels + notifiers = cfg.notifications; }; +in { + options.homelab.services.grafana = { + enable = mkEnableOption "Grafana dashboard service"; - # Generate dashboard files from various sources - dashboardFiles = - # Default community dashboards - (foldl' ( - acc: name: - acc // (provisionDashboard name (fetchGrafanaDashboard name defaultDashboards.${name})) - ) {} (attrNames (filterAttrs (n: v: cfg.dashboards.defaults.${n}.enable) cfg.dashboards.defaults))) - # Custom file-based dashboards - // (foldl' ( - acc: dashboard: - acc // (provisionDashboard dashboard.name dashboard.source) - ) {} - cfg.dashboards.files) - # Git-synced dashboards - // (optionalAttrs (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") ( - let - gitDashboards = - if pathExists "${gitDashboardsRepo}/${cfg.dashboards.git.path}" - then builtins.readDir "${gitDashboardsRepo}/${cfg.dashboards.git.path}" - else {}; - in - mapAttrs' ( - filename: type: let - name = removeSuffix ".json" filename; - source = "${gitDashboardsRepo}/${cfg.dashboards.git.path}/${filename}"; - in - nameValuePair "grafana-dashboards/${name}.json" { - inherit source; - user = "grafana"; - group = "grafana"; - mode = "0644"; - } - ) (filterAttrs (name: type: type == "regular" && hasSuffix ".json" name) gitDashboards) - )); + port = mkOption { + type = types.port; + default = 3000; + description = "Port for Grafana web interface"; + }; - # Service-specific options beyond the standard interface - grafanaServiceOptions = { - # Authentication settings - auth = { - admin = { - user = mkOption { - type = types.str; - default = "admin"; - description = "Admin username"; - }; + openFirewall = mkOption { + type = types.bool; + default = true; + description = "Whether to open firewall ports"; + }; - passwordFile = mkOption { - type = types.nullOr types.path; - default = null; - description = "Path to admin password file"; - }; + dataDir = mkOption { + type = types.str; + default = "/var/lib/grafana"; + description = "Directory to store Grafana data"; + }; - email = mkOption { - type = types.str; - default = "admin@${homelabCfg.externalDomain}"; - description = "Admin email address"; - }; + domain = mkOption { + type = types.str; + default = "grafana.${homelabCfg.externalDomain}"; + description = "Domain for Grafana"; + }; + + rootUrl = mkOption { + type = types.str; + default = "https://grafana.${homelabCfg.externalDomain}"; + description = "Root URL for Grafana"; + }; + + admin = { + user = mkOption { + type = types.str; + default = "admin"; + description = "Admin username"; }; - disableLoginForm = mkOption { - type = types.bool; - default = false; - description = "Disable the login form"; + password = mkOption { + type = types.str; + default = "admin"; + description = "Admin password (change this!)"; }; - oauthAutoLogin = mkOption { - type = types.bool; - default = false; - description = "Enable OAuth auto-login"; - }; - - anonymousAccess = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable anonymous access"; - }; - - orgName = mkOption { - type = types.str; - default = "Homelab"; - description = "Organization name for anonymous users"; - }; - - orgRole = mkOption { - type = types.enum ["Viewer" "Editor" "Admin"]; - default = "Viewer"; - description = "Role for anonymous users"; - }; - }; - - genericOauth = { - enabled = mkOption { - type = types.bool; - default = false; - description = "Enable generic OAuth"; - }; - - configFile = mkOption { - type = types.nullOr types.path; - default = null; - description = "Path to OAuth configuration file"; - }; + email = mkOption { + type = types.str; + default = "admin@${homelabCfg.externalDomain}"; + description = "Admin email"; }; }; - # Enhanced datasource configuration datasources = { prometheus = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable Prometheus datasource"; - }; - url = mkOption { type = types.str; - default = "http://127.0.0.1:9090"; + default = "http://localhost:9090"; description = "Prometheus URL"; }; - - uid = mkOption { - type = types.str; - default = "prometheus"; - description = "Unique identifier for Prometheus datasource"; - }; - - scrapeInterval = mkOption { - type = types.str; - default = "15s"; - description = "Default scrape interval for Prometheus"; - }; - - manageAlerts = mkOption { - type = types.bool; - default = true; - description = "Manage alerts in Grafana"; - }; - - exemplarTraceIdDestinations = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Exemplar trace ID destinations"; - }; - }; - - loki = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable Loki datasource"; - }; - - url = mkOption { - type = types.str; - default = "http://127.0.0.1:3100"; - description = "Loki URL"; - }; - - uid = mkOption { - type = types.str; - default = "loki"; - description = "Unique identifier for Loki datasource"; - }; - - maxLines = mkOption { - type = types.int; - default = 1000; - description = "Maximum lines to return from Loki"; - }; - - derivedFields = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Derived fields configuration for Loki"; - }; - }; - - influxdb = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable InfluxDB datasource"; - }; - - url = mkOption { - type = types.str; - default = "http://127.0.0.1:8086"; - description = "InfluxDB URL"; - }; - - database = mkOption { - type = types.str; - default = "homelab"; - description = "InfluxDB database name"; - }; - - tokenFile = mkOption { - type = types.nullOr types.path; - default = null; - description = "Path to InfluxDB token file"; - }; - - uid = mkOption { - type = types.str; - default = "influxdb"; - description = "Unique identifier for InfluxDB datasource"; - }; - - version = mkOption { - type = types.enum ["1.x" "2.x"]; - default = "2.x"; - description = "InfluxDB version"; - }; - - organization = mkOption { - type = types.str; - default = "homelab"; - description = "InfluxDB organization (for v2.x)"; - }; - - bucket = mkOption { - type = types.str; - default = "homelab"; - description = "InfluxDB bucket (for v2.x)"; - }; }; extra = mkOption { type = types.listOf types.attrs; default = []; description = "Additional data sources"; - }; - }; - - # Enhanced dashboard configuration - dashboards = { - # Default community dashboards - defaults = mkOption { - type = types.attrsOf (types.submodule { - options = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable this default dashboard"; - }; - }; - }); - default = mapAttrs (name: config: {enable = false;}) defaultDashboards; - description = "Enable default community dashboards"; example = literalExpression '' - { - "node-exporter-full".enable = true; - "prometheus-2-0-stats".enable = true; - } + [ + { + name = "Loki"; + type = "loki"; + url = "http://localhost:3100"; + } + ] ''; }; - - # File-based dashboards - files = mkOption { - type = types.listOf (types.submodule { - options = { - name = mkOption { - type = types.str; - description = "Dashboard name (without .json extension)"; - }; - source = mkOption { - type = types.path; - description = "Path to dashboard JSON file"; - }; - }; - }); - default = []; - description = "Dashboard files to provision"; - }; - - # Git-based dashboard sync - git = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable git-based dashboard synchronization"; - }; - - url = mkOption { - type = types.str; - default = ""; - description = "Git repository URL for dashboards"; - }; - - rev = mkOption { - type = types.str; - default = "HEAD"; - description = "Git revision to use"; - }; - - sha256 = mkOption { - type = types.str; - default = ""; - description = "SHA256 hash of the git repository content"; - }; - - path = mkOption { - type = types.str; - default = "."; - description = "Path within the git repository containing dashboards"; - }; - - updateInterval = mkOption { - type = types.str; - default = "1h"; - description = "How often to check for dashboard updates"; - }; - }; - - path = mkOption { - type = types.str; - default = "/etc/grafana-dashboards"; - description = "Path where dashboard files are stored"; - }; }; - # Plugin configuration - plugins = mkOption { - type = types.listOf types.package; + notifications = mkOption { + type = types.listOf types.attrs; default = []; + description = "Notification channels configuration"; + example = literalExpression '' + [ + { + name = "discord-webhook"; + type = "discord"; + settings = { + url = "https://discord.com/api/webhooks/..."; + username = "Grafana"; + }; + } + ] + ''; + }; + + plugins = mkOption { + type = types.listOf types.str; + default = [ + "grafana-piechart-panel" + "grafana-worldmap-panel" + "grafana-clock-panel" + "grafana-simple-json-datasource" + ]; description = "Grafana plugins to install"; }; - # SMTP configuration smtp = { - enable = mkOption { + enabled = mkOption { type = types.bool; default = false; description = "Enable SMTP for email notifications"; @@ -405,10 +185,10 @@ with lib; let description = "SMTP username"; }; - passwordFile = mkOption { - type = types.nullOr types.path; - default = null; - description = "Path to SMTP password file"; + password = mkOption { + type = types.str; + default = ""; + description = "SMTP password"; }; fromAddress = mkOption { @@ -422,22 +202,9 @@ with lib; let default = "Homelab Grafana"; description = "From name"; }; - - skipVerify = mkOption { - type = types.bool; - default = false; - description = "Skip SSL certificate verification"; - }; }; - # Security settings security = { - secretKeyFile = mkOption { - type = types.nullOr types.path; - default = null; - description = "Path to secret key file for signing"; - }; - allowEmbedding = mkOption { type = types.bool; default = false; @@ -450,279 +217,200 @@ with lib; let description = "Set secure flag on cookies"; }; - contentSecurityPolicy = mkOption { - type = types.bool; - default = true; - description = "Enable Content Security Policy header"; - }; - - strictTransportSecurity = mkOption { - type = types.bool; - default = true; - description = "Enable Strict Transport Security header"; + secretKey = mkOption { + type = types.str; + default = "change-this-secret-key"; + description = "Secret key for signing (change this!)"; }; }; - # Data directory - dataDir = mkOption { - type = types.str; - default = "/var/lib/grafana"; - description = "Directory to store Grafana data"; + auth = { + anonymousEnabled = mkOption { + type = types.bool; + default = false; + description = "Enable anonymous access"; + }; + + disableLoginForm = mkOption { + type = types.bool; + default = false; + description = "Disable login form"; + }; }; - # Extra Grafana settings - extraSettings = mkOption { + extraConfig = mkOption { type = types.attrs; default = {}; - description = "Additional Grafana settings"; + description = "Additional Grafana configuration"; }; }; - # Enhanced datasource configuration - buildDatasources = let - # Build prometheus datasource - prometheusDatasource = optional cfg.datasources.prometheus.enable { - uid = cfg.datasources.prometheus.uid; - name = "Prometheus"; - type = "prometheus"; - url = cfg.datasources.prometheus.url; - access = "proxy"; - isDefault = true; - editable = false; - jsonData = { - timeInterval = cfg.datasources.prometheus.scrapeInterval; - queryTimeout = "60s"; - httpMethod = "POST"; - manageAlerts = cfg.datasources.prometheus.manageAlerts; - exemplarTraceIdDestinations = cfg.datasources.prometheus.exemplarTraceIdDestinations; - }; + config = mkIf cfg.enable { + services.grafana = { + enable = true; + settings = + recursiveUpdate { + server = { + http_addr = "0.0.0.0"; + http_port = cfg.port; + domain = cfg.domain; + root_url = cfg.rootUrl; + serve_from_sub_path = false; + }; + + database = { + type = "sqlite3"; + path = "${cfg.dataDir}/grafana.db"; + }; + + security = { + admin_user = cfg.admin.user; + admin_password = cfg.admin.password; + admin_email = cfg.admin.email; + allow_embedding = cfg.security.allowEmbedding; + cookie_secure = cfg.security.cookieSecure; + secret_key = cfg.security.secretKey; + }; + + users = { + allow_sign_up = false; + auto_assign_org = true; + auto_assign_org_role = "Viewer"; + }; + + auth.anonymous = { + enabled = cfg.auth.anonymousEnabled; + org_name = "Homelab"; + org_role = "Viewer"; + }; + + auth.basic = { + enabled = !cfg.auth.disableLoginForm; + }; + + smtp = mkIf cfg.smtp.enabled { + enabled = true; + host = cfg.smtp.host; + user = cfg.smtp.user; + password = cfg.smtp.password; + from_address = cfg.smtp.fromAddress; + from_name = cfg.smtp.fromName; + }; + + analytics = { + reporting_enabled = false; + check_for_updates = false; + }; + + log = { + mode = "console"; + level = "info"; + }; + + paths = { + data = cfg.dataDir; + logs = "${cfg.dataDir}/log"; + plugins = "${cfg.dataDir}/plugins"; + provisioning = "/etc/grafana/provisioning"; + }; + } + cfg.extraConfig; + + dataDir = cfg.dataDir; }; - # Build loki datasource - lokiDatasource = optional cfg.datasources.loki.enable { - uid = cfg.datasources.loki.uid; - name = "Loki"; - type = "loki"; - url = cfg.datasources.loki.url; - access = "proxy"; - editable = false; - jsonData = { - maxLines = cfg.datasources.loki.maxLines; - derivedFields = cfg.datasources.loki.derivedFields; - }; - }; + # Install plugins + systemd.services.grafana.preStart = mkIf (cfg.plugins != []) ( + concatStringsSep "\n" (map ( + plugin: "${pkgs.grafana}/bin/grafana-cli --pluginsDir ${cfg.dataDir}/plugins plugins install ${plugin} || true" + ) + cfg.plugins) + ); - # Build influxdb datasource - influxdbDatasource = optional cfg.datasources.influxdb.enable { - uid = cfg.datasources.influxdb.uid; - name = "InfluxDB"; - type = "influxdb"; - url = cfg.datasources.influxdb.url; - access = "proxy"; - database = cfg.datasources.influxdb.database; - editable = false; - jsonData = { - dbName = cfg.datasources.influxdb.database; - httpHeaderName1 = "Authorization"; - version = cfg.datasources.influxdb.version; - organization = cfg.datasources.influxdb.organization; - defaultBucket = cfg.datasources.influxdb.bucket; - }; - secureJsonData = mkIf (cfg.datasources.influxdb.tokenFile != null) { - httpHeaderValue1 = "$__file{${cfg.datasources.influxdb.tokenFile}}"; - }; - }; - - # Build extra datasources - extraDatasources = cfg.datasources.extra; - in - prometheusDatasource ++ lokiDatasource ++ influxdbDatasource ++ extraDatasources; -in { - options.homelab.services.grafana = serviceInterface.mkServiceInterface { - serviceName = "grafana"; - defaultPort = 3000; - defaultSubdomain = "grafana"; - monitoringPath = "/metrics"; - healthCheckPath = "/api/health"; - healthCheckConditions = [ - "[STATUS] == 200" - "[BODY].database == ok" - "[RESPONSE_TIME] < 2000" - ]; - serviceOptions = grafanaServiceOptions; - }; - - config = serviceInterface.mkServiceConfig { - inherit config cfg homelabCfg; - serviceName = "grafana"; - - extraMonitoringLabels = { - component = "dashboard"; - }; - - # Additional health checks specific to Grafana - customHealthChecks = []; - - serviceConfig = mkMerge [ + # Provisioning configuration + environment.etc = { - services.grafana = { - enable = true; - dataDir = cfg.dataDir; - # declarativePlugins = - # cfg.plugins - # ++ (with pkgs.grafanaPlugins; [ - # grafana-exploretraces-app - # grafana-metricsdrilldown-app - # grafana-pyroscope-app - # grafana-lokiexplore-app - # grafana-worldmap-panel - # grafana-piechart-panel - # ]); - - settings = - recursiveUpdate { - server = { - http_port = cfg.port; - http_addr = "0.0.0.0"; - domain = "${cfg.proxy.subdomain}.${homelabCfg.externalDomain}"; - root_url = "https://${cfg.proxy.subdomain}.${homelabCfg.externalDomain}"; - serve_from_sub_path = false; - }; - - database = { - type = "sqlite3"; - path = "${cfg.dataDir}/grafana.db"; - }; - - security = - { - admin_user = cfg.auth.admin.user; - admin_email = cfg.auth.admin.email; - # allow_embedding = cfg.security.allowEmbedding; - # cookie_secure = cfg.security.cookieSecure; - # content_security_policy = cfg.security.contentSecurityPolicy; - # strict_transport_security = cfg.security.strictTransportSecurity; - } - // (optionalAttrs (cfg.auth.admin.passwordFile != null) { - admin_password = "$__file{${cfg.auth.admin.passwordFile}}"; - }) - // (optionalAttrs (cfg.security.secretKeyFile != null) { - secret_key = "$__file{${cfg.security.secretKeyFile}}"; - }); - - users = { - allow_sign_up = false; - auto_assign_org = true; - auto_assign_org_role = "Viewer"; - }; - - "auth.anonymous" = { - enabled = cfg.auth.anonymousAccess.enable; - org_name = cfg.auth.anonymousAccess.orgName; - org_role = cfg.auth.anonymousAccess.orgRole; - }; - - "auth.basic" = { - enabled = !cfg.auth.disableLoginForm; - }; - - "auth.generic_oauth" = - mkIf cfg.auth.genericOauth.enabled { - enabled = true; - } - // (optionalAttrs (cfg.auth.genericOauth.configFile != null) { - client_id = "$__file{${cfg.auth.genericOauth.configFile}}"; - }); - - smtp = mkIf cfg.smtp.enable ({ - enabled = true; - host = cfg.smtp.host; - user = cfg.smtp.user; - from_address = cfg.smtp.fromAddress; - from_name = cfg.smtp.fromName; - skip_verify = cfg.smtp.skipVerify; - } - // (optionalAttrs (cfg.smtp.passwordFile != null) { - password = "$__file{${cfg.smtp.passwordFile}}"; - })); - - analytics = { - reporting_enabled = false; - check_for_updates = false; - }; - news.news_feed_enabled = false; - - feature_toggles = { - provisioning = true; - kubernetesDashboards = true; - }; - # paths = { - # plugins = "${cfg.dataDir}/plugins"; - # provisioning = "/etc/grafana/provisioning"; - # }; - } - cfg.extraSettings; - - provision = { - enable = true; - - datasources.settings.datasources = buildDatasources; - - dashboards.settings.providers = [ - { - name = "homelab-dashboards"; - type = "file"; - disableDeletion = false; - updateIntervalSeconds = 10; - allowUiUpdates = true; - options = { - path = cfg.dashboards.path; - }; - } - ]; - }; + "grafana/provisioning/datasources/datasources.yaml".text = builtins.toJSON { + apiVersion = 1; + datasources = provisioningConfig.datasources; }; - # Provision dashboard files - environment.etc = dashboardFiles; - - # Ensure dashboard directory exists - systemd.tmpfiles.rules = [ - "d ${cfg.dashboards.path} 0755 grafana grafana -" - ]; + "grafana/provisioning/dashboards/dashboards.yaml".text = builtins.toJSON { + apiVersion = 1; + providers = provisioningConfig.dashboards; + }; } - - # Git dashboard sync service (if enabled) - (mkIf (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") { - systemd.services.grafana-dashboard-sync = { - description = "Sync Grafana dashboards from git"; - after = ["grafana.service"]; - wantedBy = ["multi-user.target"]; - - serviceConfig = { - Type = "oneshot"; - User = "grafana"; - Group = "grafana"; - }; - - script = '' - echo "Syncing dashboards from git repository..." - # Dashboard files are already provisioned via Nix - # This service can be extended for runtime updates if needed - systemctl reload grafana.service - ''; + // (mkIf (cfg.notifications != []) { + "grafana/provisioning/notifiers/notifiers.yaml".text = builtins.toJSON { + apiVersion = 1; + notifiers = provisioningConfig.notifiers; }; + }); - systemd.timers.grafana-dashboard-sync = { - description = "Timer for Grafana dashboard sync"; - wantedBy = ["timers.target"]; + # Create dashboard directory + systemd.tmpfiles.rules = [ + "d ${cfg.dataDir}/dashboards 0755 grafana grafana -" + ]; - timerConfig = { - OnCalendar = cfg.dashboards.git.updateInterval; - Persistent = true; - }; + # Open firewall if requested + networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [cfg.port]; + + # Add to monitoring endpoints + homelab.monitoring.metrics = [ + { + name = "grafana"; + port = cfg.port; + path = "/metrics"; + jobName = "grafana"; + labels = { + service = "grafana"; + component = "monitoring"; }; - }) + } + ]; + + # Add health checks + homelab.monitoring.healthChecks = [ + { + name = "grafana-web-interface"; + port = cfg.port; + path = "/api/health"; + interval = "30s"; + conditions = [ + "[STATUS] == 200" + "[BODY].database == ok" + "[RESPONSE_TIME] < 2000" + ]; + group = "monitoring"; + labels = { + service = "grafana"; + component = "web-interface"; + }; + } + { + name = "grafana-login-page"; + port = cfg.port; + path = "/login"; + interval = "60s"; + conditions = [ + "[STATUS] == 200" + "[RESPONSE_TIME] < 3000" + ]; + group = "monitoring"; + labels = { + service = "grafana"; + component = "login"; + }; + } + ]; + + # Add reverse proxy entry + homelab.reverseProxy.entries = [ + { + subdomain = "grafana"; + host = homelabCfg.hostname; + port = cfg.port; + } ]; }; } diff --git a/modules/homelab/services/monitoring/grafana_1.nix b/modules/homelab/services/monitoring/grafana_1.nix deleted file mode 100644 index c5ae73f..0000000 --- a/modules/homelab/services/monitoring/grafana_1.nix +++ /dev/null @@ -1,198 +0,0 @@ -# Example showing how to create a service using the standard interface -{ - config, - lib, - pkgs, - ... -}: -with lib; let - serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; - - cfg = config.homelab.services.grafana; - homelabCfg = config.homelab; - - # Service-specific options beyond the standard interface - grafanaServiceOptions = { - admin = { - user = mkOption { - type = types.str; - default = "admin"; - description = "Admin username"; - }; - - passwordFile = mkOption { - type = types.str; - default = "admin"; - description = "Path to the Admin password file"; - }; - }; - - datasources = { - prometheus = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable Prometheus datasource"; - }; - - url = mkOption { - type = types.str; - default = "http://127.0.0.1:9090"; - description = "Prometheus URL"; - }; - - uid = mkOption { - type = types.str; - default = "prometheus"; - description = "Unique identifier for Prometheus datasource"; - }; - }; - - loki = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable Loki datasource"; - }; - - url = mkOption { - type = types.str; - default = "http://127.0.0.1:3100"; - description = "Loki URL"; - }; - - uid = mkOption { - type = types.str; - default = "loki"; - description = "Unique identifier for Loki datasource"; - }; - }; - - influxdb = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable InfluxDB datasource"; - }; - - url = mkOption { - type = types.str; - default = "http://127.0.0.1:8086"; - description = "InfluxDB URL"; - }; - - database = mkOption { - type = types.str; - default = "homelab"; - description = "InfluxDB database name"; - }; - - tokenFile = mkOption { - type = types.nullOr types.path; - default = null; - description = "Path to InfluxDB token file"; - }; - - uid = mkOption { - type = types.str; - default = "influxdb"; - description = "Unique identifier for InfluxDB datasource"; - }; - }; - - extra = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Additional data sources"; - }; - }; - - plugins = mkOption { - type = types.listOf types.package; - default = []; - description = "Grafana plugins to install"; - }; - }; -in { - options.homelab.services.grafana = serviceInterface.mkServiceInterface { - serviceName = "grafana"; - defaultPort = 3000; - defaultSubdomain = "grafana"; - monitoringPath = "/metrics"; - healthCheckPath = "/api/health"; - healthCheckConditions = [ - "[STATUS] == 200" - "[BODY].database == ok" - "[RESPONSE_TIME] < 2000" - ]; - serviceOptions = grafanaServiceOptions; - }; - - config = serviceInterface.mkServiceConfig { - inherit config cfg homelabCfg; - serviceName = "grafana"; - - extraMonitoringLabels = { - component = "dashboard"; - }; - - serviceConfig = { - services.grafana = { - enable = true; - declarativePlugins = cfg.plugins; - - settings = { - server = { - http_port = cfg.port; - http_addr = "0.0.0.0"; - root_url = "https://${cfg.proxy.subdomain}.${homelabCfg.externalDomain}"; - }; - - security = { - admin_user = cfg.admin.user; - admin_password = "$__file{${cfg.admin.passwordFile}}"; - }; - }; - - provision = { - enable = true; - datasources.settings = { - datasources = let - # Build datasource list - datasources = - [] - ++ optional cfg.datasources.prometheus.enable { - uid = cfg.datasources.prometheus.uid; - name = "Prometheus"; - type = "prometheus"; - url = cfg.datasources.prometheus.url; - } - ++ optional cfg.datasources.loki.enable { - uid = cfg.datasources.loki.uid; - name = "Loki"; - type = "loki"; - url = cfg.datasources.loki.url; - } - ++ optional cfg.datasources.influxdb.enable { - uid = cfg.datasources.influxdb.uid; - name = "InfluxDB"; - type = "influxdb"; - url = cfg.datasources.influxdb.url; - access = "proxy"; - jsonData = { - dbName = cfg.datasources.influxdb.database; - httpHeaderName1 = "Authorization"; - }; - secureJsonData = mkIf (cfg.datasources.influxdb.tokenPath != null) { - httpHeaderValue1 = "$__file{${cfg.datasources.influxdb.tokenPath}}"; - }; - } - ++ cfg.datasources.extra; - in - datasources; - }; - }; - }; - }; - }; -} diff --git a/modules/homelab/services/monitoring/grafana_gg.nix b/modules/homelab/services/monitoring/grafana_gg.nix deleted file mode 100644 index 64650cf..0000000 --- a/modules/homelab/services/monitoring/grafana_gg.nix +++ /dev/null @@ -1,416 +0,0 @@ -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.homelab.services.grafana; - homelabCfg = config.homelab; - - # Default dashboards for homelab monitoring - defaultDashboards = { - "node-exporter" = pkgs.fetchurl { - url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; - sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this - }; - "prometheus-stats" = pkgs.fetchurl { - url = "https://grafana.com/api/dashboards/2/revisions/2/download"; - sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this - }; - }; - - # Grafana provisioning configuration - provisioningConfig = { - # Data sources - datasources = - [ - { - name = "Prometheus"; - type = "prometheus"; - access = "proxy"; - url = cfg.datasources.prometheus.url; - isDefault = true; - editable = false; - jsonData = { - timeInterval = "5s"; - queryTimeout = "60s"; - httpMethod = "POST"; - }; - } - ] - ++ cfg.datasources.extra; - - # Dashboard providers - dashboards = [ - { - name = "homelab"; - type = "file"; - disableDeletion = false; - updateIntervalSeconds = 10; - allowUiUpdates = true; - options = { - path = "/var/lib/grafana/dashboards"; - }; - } - ]; - - # Notification channels - notifiers = cfg.notifications; - }; -in { - options.homelab.services.grafana = { - enable = mkEnableOption "Grafana dashboard service"; - - port = mkOption { - type = types.port; - default = 3000; - description = "Port for Grafana web interface"; - }; - - openFirewall = mkOption { - type = types.bool; - default = true; - description = "Whether to open firewall ports"; - }; - - dataDir = mkOption { - type = types.str; - default = "/var/lib/grafana"; - description = "Directory to store Grafana data"; - }; - - domain = mkOption { - type = types.str; - default = "grafana.${homelabCfg.externalDomain}"; - description = "Domain for Grafana"; - }; - - rootUrl = mkOption { - type = types.str; - default = "https://grafana.${homelabCfg.externalDomain}"; - description = "Root URL for Grafana"; - }; - - admin = { - user = mkOption { - type = types.str; - default = "admin"; - description = "Admin username"; - }; - - password = mkOption { - type = types.str; - default = "admin"; - description = "Admin password (change this!)"; - }; - - email = mkOption { - type = types.str; - default = "admin@${homelabCfg.externalDomain}"; - description = "Admin email"; - }; - }; - - datasources = { - prometheus = { - url = mkOption { - type = types.str; - default = "http://localhost:9090"; - description = "Prometheus URL"; - }; - }; - - extra = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Additional data sources"; - example = literalExpression '' - [ - { - name = "Loki"; - type = "loki"; - url = "http://localhost:3100"; - } - ] - ''; - }; - }; - - notifications = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Notification channels configuration"; - example = literalExpression '' - [ - { - name = "discord-webhook"; - type = "discord"; - settings = { - url = "https://discord.com/api/webhooks/..."; - username = "Grafana"; - }; - } - ] - ''; - }; - - plugins = mkOption { - type = types.listOf types.str; - default = [ - "grafana-piechart-panel" - "grafana-worldmap-panel" - "grafana-clock-panel" - "grafana-simple-json-datasource" - ]; - description = "Grafana plugins to install"; - }; - - smtp = { - enabled = mkOption { - type = types.bool; - default = false; - description = "Enable SMTP for email notifications"; - }; - - host = mkOption { - type = types.str; - default = "localhost:587"; - description = "SMTP server host:port"; - }; - - user = mkOption { - type = types.str; - default = ""; - description = "SMTP username"; - }; - - password = mkOption { - type = types.str; - default = ""; - description = "SMTP password"; - }; - - fromAddress = mkOption { - type = types.str; - default = "grafana@${homelabCfg.externalDomain}"; - description = "From email address"; - }; - - fromName = mkOption { - type = types.str; - default = "Homelab Grafana"; - description = "From name"; - }; - }; - - security = { - allowEmbedding = mkOption { - type = types.bool; - default = false; - description = "Allow embedding Grafana in iframes"; - }; - - cookieSecure = mkOption { - type = types.bool; - default = true; - description = "Set secure flag on cookies"; - }; - - secretKey = mkOption { - type = types.str; - default = "change-this-secret-key"; - description = "Secret key for signing (change this!)"; - }; - }; - - auth = { - anonymousEnabled = mkOption { - type = types.bool; - default = false; - description = "Enable anonymous access"; - }; - - disableLoginForm = mkOption { - type = types.bool; - default = false; - description = "Disable login form"; - }; - }; - - extraConfig = mkOption { - type = types.attrs; - default = {}; - description = "Additional Grafana configuration"; - }; - }; - - config = mkIf cfg.enable { - services.grafana = { - enable = true; - settings = - recursiveUpdate { - server = { - http_addr = "0.0.0.0"; - http_port = cfg.port; - domain = cfg.domain; - root_url = cfg.rootUrl; - serve_from_sub_path = false; - }; - - database = { - type = "sqlite3"; - path = "${cfg.dataDir}/grafana.db"; - }; - - security = { - admin_user = cfg.admin.user; - admin_password = cfg.admin.password; - admin_email = cfg.admin.email; - allow_embedding = cfg.security.allowEmbedding; - cookie_secure = cfg.security.cookieSecure; - secret_key = cfg.security.secretKey; - }; - - users = { - allow_sign_up = false; - auto_assign_org = true; - auto_assign_org_role = "Viewer"; - }; - - auth.anonymous = { - enabled = cfg.auth.anonymousEnabled; - org_name = "Homelab"; - org_role = "Viewer"; - }; - - auth.basic = { - enabled = !cfg.auth.disableLoginForm; - }; - - smtp = mkIf cfg.smtp.enabled { - enabled = true; - host = cfg.smtp.host; - user = cfg.smtp.user; - password = cfg.smtp.password; - from_address = cfg.smtp.fromAddress; - from_name = cfg.smtp.fromName; - }; - - analytics = { - reporting_enabled = false; - check_for_updates = false; - }; - - log = { - mode = "console"; - level = "info"; - }; - - paths = { - data = cfg.dataDir; - logs = "${cfg.dataDir}/log"; - plugins = "${cfg.dataDir}/plugins"; - provisioning = "/etc/grafana/provisioning"; - }; - } - cfg.extraConfig; - - dataDir = cfg.dataDir; - }; - - # Install plugins - systemd.services.grafana.preStart = mkIf (cfg.plugins != []) ( - concatStringsSep "\n" (map ( - plugin: "${pkgs.grafana}/bin/grafana-cli --pluginsDir ${cfg.dataDir}/plugins plugins install ${plugin} || true" - ) - cfg.plugins) - ); - - # Provisioning configuration - environment.etc = - { - "grafana/provisioning/datasources/datasources.yaml".text = builtins.toJSON { - apiVersion = 1; - datasources = provisioningConfig.datasources; - }; - - "grafana/provisioning/dashboards/dashboards.yaml".text = builtins.toJSON { - apiVersion = 1; - providers = provisioningConfig.dashboards; - }; - } - // (mkIf (cfg.notifications != []) { - "grafana/provisioning/notifiers/notifiers.yaml".text = builtins.toJSON { - apiVersion = 1; - notifiers = provisioningConfig.notifiers; - }; - }); - - # Create dashboard directory - systemd.tmpfiles.rules = [ - "d ${cfg.dataDir}/dashboards 0755 grafana grafana -" - ]; - - # Open firewall if requested - networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [cfg.port]; - - # Add to monitoring endpoints - homelab.monitoring.metrics = [ - { - name = "grafana"; - port = cfg.port; - path = "/metrics"; - jobName = "grafana"; - labels = { - service = "grafana"; - component = "monitoring"; - }; - } - ]; - - # Add health checks - homelab.monitoring.healthChecks = [ - { - name = "grafana-web-interface"; - port = cfg.port; - path = "/api/health"; - interval = "30s"; - conditions = [ - "[STATUS] == 200" - "[BODY].database == ok" - "[RESPONSE_TIME] < 2000" - ]; - group = "monitoring"; - labels = { - service = "grafana"; - component = "web-interface"; - }; - } - { - name = "grafana-login-page"; - port = cfg.port; - path = "/login"; - interval = "60s"; - conditions = [ - "[STATUS] == 200" - "[RESPONSE_TIME] < 3000" - ]; - group = "monitoring"; - labels = { - service = "grafana"; - component = "login"; - }; - } - ]; - - # Add reverse proxy entry - homelab.reverseProxy.entries = [ - { - subdomain = "grafana"; - host = homelabCfg.hostname; - port = cfg.port; - } - ]; - }; -} diff --git a/modules/homelab/services/monitoring/influxdb.nix b/modules/homelab/services/monitoring/influxdb.nix index 75bd525..e69de29 100644 --- a/modules/homelab/services/monitoring/influxdb.nix +++ b/modules/homelab/services/monitoring/influxdb.nix @@ -1,399 +0,0 @@ -{ - config, - lib, - pkgs, - ... -}: -with lib; let - serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; - - cfg = config.homelab.services.influxdb; - homelabCfg = config.homelab; - - # Service-specific options beyond the standard interface - influxdbServiceOptions = { - version = mkOption { - type = types.enum ["1" "2"]; - default = "2"; - description = "InfluxDB version to use"; - }; - - dataDir = mkOption { - type = types.str; - default = "/var/lib/influxdb"; - description = "Directory to store InfluxDB data"; - }; - - # InfluxDB 2.x options - v2 = { - org = mkOption { - type = types.str; - default = "homelab"; - description = "Initial organization name"; - }; - - bucket = mkOption { - type = types.str; - default = "homelab"; - description = "Initial bucket name"; - }; - - username = mkOption { - type = types.str; - default = "admin"; - description = "Initial admin username"; - }; - - password = mkOption { - type = types.str; - default = "changeme"; - description = "Initial admin password"; - }; - - retention = mkOption { - type = types.str; - default = "30d"; - description = "Default retention period"; - }; - - tokenFile = mkOption { - type = types.nullOr types.path; - default = null; - description = "File containing the admin token"; - }; - }; - - # InfluxDB 1.x options - v1 = { - database = mkOption { - type = types.str; - default = "homelab"; - description = "Default database name"; - }; - - retention = mkOption { - type = types.str; - default = "30d"; - description = "Default retention period"; - }; - - adminUser = mkOption { - type = types.str; - default = "admin"; - description = "Admin username"; - }; - - adminPassword = mkOption { - type = types.str; - default = "changeme"; - description = "Admin password"; - }; - - httpAuth = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable HTTP authentication"; - }; - }; - }; - - extraConfig = mkOption { - type = types.attrs; - default = {}; - description = "Additional InfluxDB configuration"; - }; - - backup = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable automatic backups"; - }; - - schedule = mkOption { - type = types.str; - default = "daily"; - description = "Backup schedule"; - }; - - retention = mkOption { - type = types.str; - default = "7d"; - description = "Backup retention period"; - }; - }; - }; - - # Generate configuration based on version - influxdbConfig = - if cfg.version == "2" - then - recursiveUpdate { - bolt-path = "${cfg.dataDir}/influxd.bolt"; - engine-path = "${cfg.dataDir}/engine"; - http-bind-address = "0.0.0.0:${toString cfg.port}"; - reporting-disabled = true; - log-level = "info"; - } - cfg.extraConfig - else - recursiveUpdate { - meta = { - dir = "${cfg.dataDir}/meta"; - }; - data = { - dir = "${cfg.dataDir}/data"; - wal-dir = "${cfg.dataDir}/wal"; - }; - http = { - bind-address = "0.0.0.0:${toString cfg.port}"; - auth-enabled = cfg.v1.httpAuth.enable; - }; - logging = { - level = "info"; - }; - reporting-disabled = true; - } - cfg.extraConfig; -in { - options.homelab.services.influxdb = serviceInterface.mkServiceInterface { - serviceName = "influxdb"; - defaultPort = 8086; - defaultSubdomain = "influxdb"; - monitoringPath = "/metrics"; - healthCheckPath = - if cfg.version == "2" - then "/health" - else "/ping"; - healthCheckConditions = - if cfg.version == "2" - then ["[STATUS] == 200" "[BODY].status == pass"] - else ["[STATUS] == 204" "[RESPONSE_TIME] < 1000"]; - serviceOptions = influxdbServiceOptions; - }; - - config = serviceInterface.mkServiceConfig { - inherit config cfg homelabCfg; - serviceName = "influxdb"; - - extraMonitoringLabels = { - component = "timeseries-database"; - version = cfg.version; - }; - - customHealthChecks = - [ - { - name = "influxdb-query"; - port = cfg.port; - path = - if cfg.version == "2" - then "/api/v2/query" - else "/query"; - interval = "60s"; - method = "POST"; - conditions = [ - "[STATUS] < 500" - "[RESPONSE_TIME] < 3000" - ]; - group = "monitoring"; - labels = { - service = "influxdb"; - component = "query-engine"; - }; - } - ] - ++ optional (cfg.version == "2") { - name = "influxdb-write"; - port = cfg.port; - path = "/api/v2/write"; - interval = "60s"; - method = "POST"; - conditions = [ - "[STATUS] < 500" - "[RESPONSE_TIME] < 2000" - ]; - group = "monitoring"; - labels = { - service = "influxdb"; - component = "write-engine"; - }; - }; - - serviceConfig = mkMerge [ - # Common configuration - { - # Create data directories - systemd.tmpfiles.rules = - [ - "d ${cfg.dataDir} 0755 influxdb influxdb -" - ] - ++ optionals (cfg.version == "1") [ - "d ${cfg.dataDir}/meta 0755 influxdb influxdb -" - "d ${cfg.dataDir}/data 0755 influxdb influxdb -" - "d ${cfg.dataDir}/wal 0755 influxdb influxdb -" - ]; - - # Ensure influxdb user exists - users.users.influxdb = { - isSystemUser = true; - group = "influxdb"; - home = cfg.dataDir; - createHome = true; - }; - - users.groups.influxdb = {}; - } - - # InfluxDB 2.x configuration - (mkIf (cfg.version == "2") { - services.influxdb2 = { - enable = true; - dataDir = cfg.dataDir; - settings = influxdbConfig; - }; - - # Initial setup for InfluxDB 2.x - systemd.services.influxdb2-setup = { - description = "InfluxDB 2.x initial setup"; - after = ["influxdb2.service"]; - wants = ["influxdb2.service"]; - wantedBy = ["multi-user.target"]; - serviceConfig = { - Type = "oneshot"; - RemainAfterExit = true; - User = "influxdb"; - Group = "influxdb"; - }; - script = let - setupScript = pkgs.writeShellScript "influxdb2-setup" '' - # Wait for InfluxDB to be ready - timeout=60 - while [ $timeout -gt 0 ]; do - if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/health > /dev/null 2>&1; then - break - fi - sleep 1 - timeout=$((timeout - 1)) - done - - # Check if setup is already done - if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/api/v2/setup > /dev/null 2>&1; then - # Setup InfluxDB if not already done - ${pkgs.influxdb2}/bin/influx setup \ - --host http://localhost:${toString cfg.port} \ - --org "${cfg.v2.org}" \ - --bucket "${cfg.v2.bucket}" \ - --username "${cfg.v2.username}" \ - --password "${cfg.v2.password}" \ - --retention "${cfg.v2.retention}" \ - --force - fi - ''; - in "${setupScript}"; - }; - }) - - # InfluxDB 1.x configuration - (mkIf (cfg.version == "1") { - services.influxdb = { - enable = true; - dataDir = cfg.dataDir; - extraConfig = influxdbConfig; - }; - - # Initial setup for InfluxDB 1.x - systemd.services.influxdb-setup = mkIf cfg.v1.httpAuth.enable { - description = "InfluxDB 1.x initial setup"; - after = ["influxdb.service"]; - wants = ["influxdb.service"]; - wantedBy = ["multi-user.target"]; - serviceConfig = { - Type = "oneshot"; - RemainAfterExit = true; - User = "influxdb"; - Group = "influxdb"; - }; - script = let - setupScript = pkgs.writeShellScript "influxdb-setup" '' - # Wait for InfluxDB to be ready - timeout=60 - while [ $timeout -gt 0 ]; do - if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/ping > /dev/null 2>&1; then - break - fi - sleep 1 - timeout=$((timeout - 1)) - done - - # Create admin user - ${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -execute "CREATE USER \"${cfg.v1.adminUser}\" WITH PASSWORD '${cfg.v1.adminPassword}' WITH ALL PRIVILEGES" || true - - # Create database - ${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -username "${cfg.v1.adminUser}" -password "${cfg.v1.adminPassword}" -execute "CREATE DATABASE \"${cfg.v1.database}\"" || true - - # Set retention policy - ${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -username "${cfg.v1.adminUser}" -password "${cfg.v1.adminPassword}" -database "${cfg.v1.database}" -execute "CREATE RETENTION POLICY \"default\" ON \"${cfg.v1.database}\" DURATION ${cfg.v1.retention} REPLICATION 1 DEFAULT" || true - ''; - in "${setupScript}"; - }; - }) - - # Backup configuration - (mkIf cfg.backup.enable { - systemd.services.influxdb-backup = { - description = "InfluxDB backup"; - serviceConfig = { - Type = "oneshot"; - User = "influxdb"; - Group = "influxdb"; - }; - script = let - backupScript = - if cfg.version == "2" - then - pkgs.writeShellScript "influxdb2-backup" '' - backup_dir="${cfg.dataDir}/backups/$(date +%Y%m%d_%H%M%S)" - mkdir -p "$backup_dir" - ${pkgs.influxdb2}/bin/influx backup \ - --host http://localhost:${toString cfg.port} \ - --org "${cfg.v2.org}" \ - "$backup_dir" - - # Clean old backups - find "${cfg.dataDir}/backups" -type d -mtime +${cfg.backup.retention} -exec rm -rf {} + || true - '' - else - pkgs.writeShellScript "influxdb-backup" '' - backup_dir="${cfg.dataDir}/backups/$(date +%Y%m%d_%H%M%S)" - mkdir -p "$backup_dir" - ${pkgs.influxdb}/bin/influxd backup \ - -host localhost:${toString cfg.port} \ - -database "${cfg.v1.database}" \ - "$backup_dir" - - # Clean old backups - find "${cfg.dataDir}/backups" -type d -mtime +${cfg.backup.retention} -exec rm -rf {} + || true - ''; - in "${backupScript}"; - }; - - systemd.timers.influxdb-backup = { - description = "InfluxDB backup timer"; - wantedBy = ["timers.target"]; - timerConfig = { - OnCalendar = cfg.backup.schedule; - Persistent = true; - RandomizedDelaySec = "5m"; - }; - }; - - # Create backup directory - systemd.tmpfiles.rules = [ - "d ${cfg.dataDir}/backups 0755 influxdb influxdb -" - ]; - }) - ]; - }; -} diff --git a/modules/homelab/services/monitoring/loki.nix b/modules/homelab/services/monitoring/loki.nix index 4467b2a..e69de29 100644 --- a/modules/homelab/services/monitoring/loki.nix +++ b/modules/homelab/services/monitoring/loki.nix @@ -1,356 +0,0 @@ -{ - config, - lib, - pkgs, - ... -}: -with lib; let - serviceInterface = import ../../lib/service-interface.nix {inherit lib;}; - - cfg = config.homelab.services.loki; - homelabCfg = config.homelab; - - # Service-specific options beyond the standard interface - lokiServiceOptions = { - # Storage configuration - storage = { - type = mkOption { - type = types.enum ["filesystem" "s3" "gcs"]; - default = "filesystem"; - description = "Storage backend type"; - }; - - filesystem = { - directory = mkOption { - type = types.str; - default = "/var/lib/loki"; - description = "Directory for filesystem storage"; - }; - }; - - s3 = { - endpoint = mkOption { - type = types.nullOr types.str; - default = null; - description = "S3 endpoint URL"; - }; - - bucket = mkOption { - type = types.nullOr types.str; - default = null; - description = "S3 bucket name"; - }; - - region = mkOption { - type = types.nullOr types.str; - default = null; - description = "S3 region"; - }; - - accessKeyId = mkOption { - type = types.nullOr types.str; - default = null; - description = "S3 access key ID"; - }; - - secretAccessKey = mkOption { - type = types.nullOr types.path; - default = null; - description = "Path to file containing S3 secret access key"; - }; - }; - }; - - # Retention configuration - retention = { - period = mkOption { - type = types.str; - default = "168h"; # 7 days - description = "Log retention period"; - }; - - streamRetention = mkOption { - type = types.listOf (types.submodule { - options = { - selector = mkOption { - type = types.str; - description = "Log stream selector"; - example = "{environment=\"development\"}"; - }; - priority = mkOption { - type = types.int; - description = "Rule priority (higher = more important)"; - default = 1; - }; - period = mkOption { - type = types.str; - description = "Retention period for this stream"; - example = "24h"; - }; - }; - }); - default = []; - description = "Per-stream retention rules"; - }; - }; - - # Performance tuning - limits = { - rejectOldSamples = mkOption { - type = types.bool; - default = true; - description = "Reject samples older than max age"; - }; - - rejectOldSamplesMaxAge = mkOption { - type = types.str; - default = "168h"; - description = "Maximum age for samples"; - }; - - ingestionRateMB = mkOption { - type = types.int; - default = 4; - description = "Ingestion rate limit in MB/s per tenant"; - }; - - ingestionBurstSizeMB = mkOption { - type = types.int; - default = 6; - description = "Ingestion burst size in MB per tenant"; - }; - - maxStreamsPerUser = mkOption { - type = types.int; - default = 10000; - description = "Maximum number of streams per user"; - }; - - maxLineSize = mkOption { - type = types.str; - default = "256KB"; - description = "Maximum line size"; - }; - }; - - # Authentication - auth = { - enabled = mkOption { - type = types.bool; - default = false; - description = "Enable authentication"; - }; - }; - - # Extra configuration options - extraConfig = mkOption { - type = types.attrs; - default = {}; - description = "Additional Loki configuration options"; - }; - - # Data directory - dataDir = mkOption { - type = types.str; - default = "/var/lib/loki"; - description = "Directory to store Loki data"; - }; - }; - - # Build the Loki configuration - lokiConfig = - recursiveUpdate { - # Server configuration - server = { - http_listen_port = cfg.port; - grpc_listen_port = cfg.port + 1000; # e.g., 3100 -> 4100 - http_listen_address = "0.0.0.0"; - grpc_listen_address = "0.0.0.0"; - log_level = cfg.monitoring.extraLabels.log_level or "info"; - }; - - # Authentication - auth_enabled = cfg.auth.enabled; - - # Analytics - analytics.reporting_enabled = false; - - # Common configuration for single-binary mode - common = { - ring = { - instance_addr = "127.0.0.1"; - kvstore.store = "inmemory"; - }; - replication_factor = 1; - path_prefix = cfg.dataDir; - }; - - # Schema configuration - schema_config = { - configs = [ - { - from = "2020-05-15"; - store = "tsdb"; - object_store = cfg.storage.type; - schema = "v13"; - index = { - prefix = "index_"; - period = "24h"; - }; - } - ]; - }; - - # Storage configuration - storage_config = mkMerge [ - # Filesystem storage - (mkIf (cfg.storage.type == "filesystem") { - filesystem.directory = "${cfg.storage.filesystem.directory}/chunks"; - }) - - # S3 storage - (mkIf (cfg.storage.type == "s3") { - aws = - { - s3 = cfg.storage.s3.endpoint; - bucketnames = cfg.storage.s3.bucket; - region = cfg.storage.s3.region; - access_key_id = cfg.storage.s3.accessKeyId; - } - // (optionalAttrs (cfg.storage.s3.secretAccessKey != null) { - secret_access_key = "$__file{${cfg.storage.s3.secretAccessKey}}"; - }); - }) - ]; - - # Limits configuration - limits_config = - { - reject_old_samples = cfg.limits.rejectOldSamples; - reject_old_samples_max_age = cfg.limits.rejectOldSamplesMaxAge; - ingestion_rate_mb = cfg.limits.ingestionRateMB; - ingestion_burst_size_mb = cfg.limits.ingestionBurstSizeMB; - max_streams_per_user = cfg.limits.maxStreamsPerUser; - max_line_size = cfg.limits.maxLineSize; - - # Retention configuration - retention_period = cfg.retention.period; - } - // (optionalAttrs (cfg.retention.streamRetention != []) { - retention_stream = - map (rule: { - selector = rule.selector; - priority = rule.priority; - period = rule.period; - }) - cfg.retention.streamRetention; - }); - - # Table manager for retention - table_manager = { - retention_deletes_enabled = true; - retention_period = cfg.retention.period; - }; - - # Compactor configuration - compactor = { - working_directory = "${cfg.dataDir}/compactor"; - # shared_store = cfg.storage.type; - compaction_interval = "10m"; - # retention_enabled = true; - # retention_delete_delay = "2h"; - # retention_delete_worker_count = 150; - }; - - # Query range configuration - query_range = { - results_cache = { - cache = { - embedded_cache = { - enabled = true; - max_size_mb = 100; - }; - }; - }; - }; - - # Frontend configuration - frontend = { - max_outstanding_per_tenant = 256; - compress_responses = true; - }; - - # Query scheduler - query_scheduler = { - max_outstanding_requests_per_tenant = 256; - }; - - # Runtime configuration - runtime_config = { - file = "/etc/loki/runtime.yml"; - }; - } - cfg.extraConfig; -in { - options.homelab.services.loki = serviceInterface.mkServiceInterface { - serviceName = "loki"; - defaultPort = 3100; - defaultSubdomain = "loki"; - monitoringPath = "/metrics"; - healthCheckPath = "/ready"; - healthCheckConditions = [ - "[STATUS] == 200" - "[RESPONSE_TIME] < 2000" - ]; - serviceOptions = lokiServiceOptions; - }; - - config = serviceInterface.mkServiceConfig { - inherit config cfg homelabCfg; - serviceName = "loki"; - - extraMonitoringLabels = { - component = "log-aggregation"; - log_level = "info"; - }; - - customHealthChecks = [ - { - name = "loki-health"; - port = cfg.port; - # https://grafana.com/docs/loki/latest/reference/loki-http-api/#status-endpoints - path = "/loki/api/v1/status/buildinfo"; - interval = "30s"; - conditions = ["[STATUS] == 200"]; - group = "logging"; - labels = { - service = "loki"; - component = "api"; - }; - } - ]; - - serviceConfig = mkMerge [ - { - services.loki = { - enable = true; - dataDir = cfg.dataDir; - configuration = lokiConfig; - }; - - # Ensure data directories exist - systemd.tmpfiles.rules = [ - "d ${cfg.dataDir} 0755 loki loki -" - "d ${cfg.dataDir}/chunks 0755 loki loki -" - "d ${cfg.dataDir}/compactor 0755 loki loki -" - ]; - - # Runtime configuration file for dynamic updates - environment.etc."loki/runtime.yml".text = '' - # Runtime configuration for Loki - # This file can be updated without restarting Loki - ''; - } - ]; - }; -} diff --git a/modules/homelab/services/monitoring/prometheus.nix b/modules/homelab/services/monitoring/prometheus.nix index b4ac904..76c30ff 100644 --- a/modules/homelab/services/monitoring/prometheus.nix +++ b/modules/homelab/services/monitoring/prometheus.nix @@ -19,13 +19,12 @@ with lib; let mapAttrsToList (jobName: endpoints: { job_name = jobName; scrape_interval = head endpoints.scrapeInterval or ["30s"]; - static_configs = - map - (endpoint: { - targets = ["${endpoint.host}:${toString endpoint.port}"]; - labels = endpoint.labels; - }) - endpoints; + static_configs = [ + { + targets = map (endpoint: "${endpoint.host}:${toString endpoint.port}") endpoints; + labels = fold (endpoint: acc: acc // endpoint.labels) {} endpoints; + } + ]; metrics_path = head endpoints.path or [null]; }) jobGroups; diff --git a/modules/homelab/services/monitoring/promtail.nix b/modules/homelab/services/monitoring/promtail.nix new file mode 100644 index 0000000..e69de29 diff --git a/modules/homelab/services/postgres.nix b/modules/homelab/services/postgres.nix new file mode 100644 index 0000000..e69de29 diff --git a/modules/homelab/services/prometheus.nix b/modules/homelab/services/prometheus.nix deleted file mode 100644 index 7457568..0000000 --- a/modules/homelab/services/prometheus.nix +++ /dev/null @@ -1,252 +0,0 @@ -{ - config, - lib, - pkgs, - ... -}: -with lib; let - serviceName = "prometheus"; - cfg = config.homelab.services.${serviceName}; - homelabCfg = config.homelab; - - # Generate Prometheus scrape configs from global monitoring data - prometheusScrapeConfigs = let - # Get all metrics - try global first, fallback to local - allMetrics = homelabCfg.monitoring.global.allMetrics - or homelabCfg.monitoring.allMetrics - or []; - - jobGroups = groupBy (m: m.jobName) allMetrics; - - scrapeConfigs = - mapAttrsToList (jobName: endpoints: { - job_name = jobName; - scrape_interval = head endpoints.scrapeInterval or ["30s"]; - static_configs = - map - (endpoint: { - targets = ["${endpoint.host}:${toString endpoint.port}"]; - labels = endpoint.labels; - }) - endpoints; - metrics_path = head endpoints.path or ["/metrics"]; - }) - jobGroups; - in - scrapeConfigs; - - # Standard alerting rules for homelab - alertingRules = [ - { - name = "homelab.rules"; - rules = [ - { - alert = "InstanceDown"; - expr = "up == 0"; - for = "5m"; - labels = {severity = "critical";}; - annotations = { - summary = "Instance {{ $labels.instance }} down"; - description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."; - }; - } - { - alert = "HighCPUUsage"; - expr = "100 - (avg by(instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100) > 80"; - for = "10m"; - labels = {severity = "warning";}; - annotations = { - summary = "High CPU usage on {{ $labels.instance }}"; - description = "CPU usage is above 80% for more than 10 minutes on {{ $labels.instance }}."; - }; - } - { - alert = "HighMemoryUsage"; - expr = "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85"; - for = "10m"; - labels = {severity = "warning";}; - annotations = { - summary = "High memory usage on {{ $labels.instance }}"; - description = "Memory usage is above 85% for more than 10 minutes on {{ $labels.instance }}."; - }; - } - { - alert = "DiskSpaceLow"; - expr = "((node_filesystem_size_bytes - node_filesystem_avail_bytes) / node_filesystem_size_bytes) * 100 > 90"; - for = "5m"; - labels = {severity = "critical";}; - annotations = { - summary = "Disk space low on {{ $labels.instance }}"; - description = "Disk usage is above 90% on {{ $labels.instance }} {{ $labels.mountpoint }}."; - }; - } - ]; - } - ]; -in { - imports = [ - (import ../lib/features/monitoring.nix serviceName) - (import ../lib/features/logging.nix serviceName) - (import ../lib/features/proxy.nix serviceName) - ]; - - # Core service options - options.homelab.services.${serviceName} = { - enable = mkEnableOption "Prometheus Monitoring Server"; - - port = mkOption { - type = types.port; - default = 9090; - }; - - description = mkOption { - type = types.str; - default = "Prometheus Monitoring Server"; - }; - - # Prometheus-specific options - retention = mkOption { - type = types.str; - default = "15d"; - description = "How long to retain metrics data"; - }; - - alertmanager = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable integration with Alertmanager"; - }; - - url = mkOption { - type = types.str; - default = "alertmanager.${homelabCfg.domain}:9093"; - description = "Alertmanager URL"; - }; - }; - - extraScrapeConfigs = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Additional scrape configurations"; - }; - - extraAlertingRules = mkOption { - type = types.listOf types.attrs; - default = []; - description = "Additional alerting rules"; - }; - - globalConfig = mkOption { - type = types.attrs; - default = { - scrape_interval = "15s"; - evaluation_interval = "15s"; - }; - description = "Global Prometheus configuration"; - }; - - extraFlags = mkOption { - type = types.listOf types.str; - default = []; - description = "Extra command line flags"; - }; - - ruleFiles = mkOption { - type = types.listOf types.path; - default = []; - description = "Additional rule files to load"; - }; - }; - - # Service configuration with smart defaults - config = mkIf cfg.enable (mkMerge [ - # Core Prometheus service - { - services.prometheus = { - enable = true; - port = cfg.port; - listenAddress = "0.0.0.0"; - retentionTime = cfg.retention; - - globalConfig = cfg.globalConfig; - extraFlags = cfg.extraFlags; - - # Automatically aggregate all metrics from the fleet - scrapeConfigs = prometheusScrapeConfigs ++ cfg.extraScrapeConfigs; - - # Include standard + custom alerting rules - ruleFiles = - map (ruleGroup: - pkgs.writeText "${ruleGroup.name}.yml" (builtins.toJSON { - groups = [ruleGroup]; - })) (alertingRules ++ cfg.extraAlertingRules) - ++ cfg.ruleFiles; - - # Connect to Alertmanager if enabled - alertmanagers = mkIf cfg.alertmanager.enable [ - { - static_configs = [ - { - targets = [cfg.alertmanager.url]; - } - ]; - } - ]; - }; - - networking.firewall.allowedTCPPorts = [cfg.port]; - - homelab.services.${serviceName}.monitoring.enable = mkDefault true; - } - - # Smart defaults for Prometheus - (mkIf cfg.monitoring.enable { - homelab.services.${serviceName}.monitoring = mkDefault { - metrics = { - path = "/metrics"; - extraEndpoints = []; - }; - healthCheck = { - path = "/-/healthy"; - conditions = ["[STATUS] == 200" "[RESPONSE_TIME] < 1000"]; - extraChecks = [ - { - name = "prometheus-ready"; - port = cfg.port; - path = "/-/ready"; - conditions = ["[STATUS] == 200"]; - group = "monitoring"; - } - ]; - }; - extraLabels = { - component = "monitoring-server"; - tier = "monitoring"; - }; - }; - }) - - (mkIf cfg.logging.enable { - homelab.services.${serviceName}.logging = mkDefault { - files = ["/var/log/prometheus/prometheus.log"]; - parsing = { - # Prometheus log format: ts=2024-01-01T12:00:00.000Z caller=main.go:123 level=info msg="message" - regex = "^ts=(?P[^ ]+) caller=(?P[^ ]+) level=(?P\\w+) msg=\"(?P[^\"]*)\""; - extractFields = ["level" "caller"]; - }; - extraLabels = { - component = "monitoring-server"; - application = "prometheus"; - }; - }; - }) - - (mkIf cfg.proxy.enable { - homelab.services.${serviceName}.proxy = mkDefault { - subdomain = "prometheus"; - enableAuth = true; # Admin interface needs protection - }; - }) - ]); -} diff --git a/modules/homelab/services/prometheus_old.nix b/modules/homelab/services/prometheus_old.nix new file mode 100644 index 0000000..9485b3a --- /dev/null +++ b/modules/homelab/services/prometheus_old.nix @@ -0,0 +1,208 @@ +# modules/services/prometheus.nix +{ + config, + lib, + pkgs, + ... +}: +with lib; let + cfg = config.homelab.services.prometheus; + globalCfg = config.homelab.global; +in { + options.homelab.services.prometheus = { + enable = mkEnableOption "Prometheus monitoring server"; + + port = mkOption { + type = types.port; + default = 9090; + description = "Prometheus server port"; + }; + + webExternalUrl = mkOption { + type = types.str; + default = "http://${globalCfg.hostname}:${toString cfg.port}"; + description = "External URL for Prometheus"; + }; + + retention = mkOption { + type = types.str; + default = "30d"; + description = "Data retention period"; + }; + + scrapeConfigs = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Additional scrape configurations"; + }; + + alertmanager = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable Alertmanager integration"; + }; + + url = mkOption { + type = types.str; + default = "http://localhost:9093"; + description = "Alertmanager URL"; + }; + }; + }; + + config = mkIf cfg.enable { + # Register service with global homelab config + homelab.global.services.prometheus = { + enable = true; + description = "Metrics collection and monitoring server"; + category = "monitoring"; + ports = [cfg.port]; + tags = ["metrics" "monitoring" "alerting"]; + priority = 20; + dependencies = ["node-exporter"]; + }; + + # Configure the actual Prometheus service + services.prometheus = { + enable = true; + port = cfg.port; + webExternalUrl = cfg.webExternalUrl; + + retentionTime = cfg.retention; + + scrapeConfigs = + [ + # Auto-discover monitoring endpoints from global config + { + job_name = "homelab-auto"; + static_configs = [ + { + targets = + map ( + endpoint: "${globalCfg.hostname}:${toString endpoint.port}" + ) + globalCfg.monitoring.endpoints; + } + ]; + scrape_interval = "30s"; + metrics_path = "/metrics"; + } + ] + ++ cfg.scrapeConfigs; + + # Alertmanager configuration + alertmanagers = mkIf cfg.alertmanager.enable [ + { + static_configs = [ + { + targets = [cfg.alertmanager.url]; + } + ]; + } + ]; + + rules = [ + # Basic homelab alerting rules + (pkgs.writeText "homelab-alerts.yml" '' + groups: + - name: homelab + rules: + - alert: ServiceDown + expr: up == 0 + for: 5m + labels: + severity: critical + annotations: + summary: "Service {{ $labels.instance }} is down" + description: "{{ $labels.job }} on {{ $labels.instance }} has been down for more than 5 minutes." + + - alert: HighMemoryUsage + expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9 + for: 10m + labels: + severity: warning + annotations: + summary: "High memory usage on {{ $labels.instance }}" + description: "Memory usage is above 90% on {{ $labels.instance }}" + + - alert: HighDiskUsage + expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.85 + for: 5m + labels: + severity: warning + annotations: + summary: "High disk usage on {{ $labels.instance }}" + description: "Disk usage is above 85% on {{ $labels.instance }} for filesystem {{ $labels.mountpoint }}" + '') + ]; + }; + + # Add monitoring endpoint to global config + homelab.global.monitoring.endpoints = [ + { + name = "prometheus"; + port = cfg.port; + path = "/metrics"; + jobName = "prometheus"; + scrapeInterval = "30s"; + labels = { + service = "prometheus"; + role = "monitoring"; + }; + } + ]; + + # Add reverse proxy entry if configured + homelab.global.reverseProxy.entries = mkIf (globalCfg.domain != null) [ + { + subdomain = "prometheus"; + port = cfg.port; + path = "/"; + enableAuth = true; + enableSSL = true; + customHeaders = { + "X-Frame-Options" = "DENY"; + "X-Content-Type-Options" = "nosniff"; + }; + } + ]; + + # Add backup job for Prometheus data + homelab.global.backups.jobs = [ + { + name = "prometheus-data"; + backend = "restic"; + paths = ["/var/lib/prometheus2"]; + schedule = "daily"; + retention = { + daily = "7"; + weekly = "4"; + monthly = "3"; + yearly = "1"; + }; + excludePatterns = [ + "*.tmp" + "*/wal/*" + ]; + preHook = '' + # Stop prometheus temporarily for consistent backup + systemctl stop prometheus + ''; + postHook = '' + # Restart prometheus after backup + systemctl start prometheus + ''; + } + ]; + + # Open firewall port + networking.firewall.allowedTCPPorts = [cfg.port]; + + # Create prometheus configuration directory + systemd.tmpfiles.rules = [ + "d /var/lib/prometheus2 0755 prometheus prometheus -" + "d /etc/prometheus 0755 root root -" + ]; + }; +} diff --git a/modules/lib/helpers.nix b/modules/lib/helpers.nix new file mode 100644 index 0000000..34201dc --- /dev/null +++ b/modules/lib/helpers.nix @@ -0,0 +1,126 @@ +# modules/lib/helpers.nix +{lib, ...}: +with lib; rec { + # Helper to merge global configurations from multiple sources + mergeGlobalConfigs = configs: let + mergeEndpoints = foldl' (acc: cfg: acc ++ cfg.monitoring.endpoints) []; + mergeBackups = foldl' (acc: cfg: acc ++ cfg.backups.jobs) []; + mergeProxyEntries = foldl' (acc: cfg: acc ++ cfg.reverseProxy.entries) []; + in { + monitoring.endpoints = mergeEndpoints configs; + backups.jobs = mergeBackups configs; + reverseProxy.entries = mergeProxyEntries configs; + }; + + # Helper to create a service module template + createServiceModule = { + name, + port, + hasMetrics ? true, + hasWebUI ? true, + dataDir ? "/var/lib/${name}", + }: { + config, + lib, + pkgs, + ... + }: + with lib; let + cfg = config.services.${name}; + in { + options.services.${name} = { + enable = mkEnableOption "${name} service"; + port = mkOption { + type = types.port; + default = port; + description = "Port for ${name}"; + }; + dataDir = mkOption { + type = types.str; + default = dataDir; + description = "Data directory for ${name}"; + }; + enableMetrics = mkOption { + type = types.bool; + default = hasMetrics; + description = "Enable metrics endpoint"; + }; + exposeWeb = mkOption { + type = types.bool; + default = hasWebUI; + description = "Expose web interface"; + }; + }; + + config = mkIf cfg.enable { + homelab.global = { + backups.jobs = [ + { + name = "${name}-data"; + backend = "restic"; + paths = [cfg.dataDir]; + schedule = "daily"; + } + ]; + + reverseProxy.entries = mkIf cfg.exposeWeb [ + { + subdomain = name; + port = cfg.port; + } + ]; + + monitoring.endpoints = mkIf cfg.enableMetrics [ + { + name = name; + port = cfg.port; + path = "/metrics"; + jobName = name; + } + ]; + }; + }; + }; + + # Helper to generate nginx configuration from proxy entries + generateNginxConfig = proxyEntries: domain: let + createVHost = entry: { + "${entry.subdomain}.${domain}" = { + enableACME = entry.enableSSL; + forceSSL = entry.enableSSL; + locations."${entry.path}" = { + proxyPass = "http://${entry.targetHost}:${toString entry.port}"; + proxyWebsockets = entry.websockets; + extraConfig = '' + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + ${concatStringsSep "\n" (mapAttrsToList ( + name: value: "proxy_set_header ${name} ${value};" + ) + entry.customHeaders)} + ''; + }; + }; + }; + in + foldl' (acc: entry: acc // (createVHost entry)) {} proxyEntries; + + # Helper to generate Prometheus scrape configs + generatePrometheusConfig = endpoints: let + endpointsByJob = groupBy (e: e.jobName) endpoints; + createJobConfig = jobName: jobEndpoints: { + job_name = jobName; + scrape_interval = (head jobEndpoints).scrapeInterval; + metrics_path = (head jobEndpoints).path; + static_configs = [ + { + targets = map (e: "${e.targetHost}:${toString e.port}") jobEndpoints; + labels = foldl' (acc: e: acc // e.labels) {} jobEndpoints; + } + ]; + }; + in + mapAttrsToList createJobConfig endpointsByJob; +} diff --git a/modules/nixos/backup-manager.nix b/modules/nixos/backup-manager.nix new file mode 100644 index 0000000..cd06883 --- /dev/null +++ b/modules/nixos/backup-manager.nix @@ -0,0 +1,187 @@ +# modules/backup-manager.nix +{ + config, + lib, + pkgs, + ... +}: +with lib; let + cfg = config.homelab.backups; + globalCfg = config.homelab.global; + + # Create systemd services for backup jobs + createBackupService = job: let + serviceName = "backup-${job.name}"; + allExcludes = globalCfg.backups.globalExcludes ++ job.excludePatterns; + excludeArgs = map (pattern: "--exclude '${pattern}'") allExcludes; + + backupScript = + if job.backend == "restic" + then '' + #!/bin/bash + set -euo pipefail + + ${optionalString (job.preHook != null) job.preHook} + + # Restic backup + ${pkgs.restic}/bin/restic backup \ + ${concatStringsSep " " (map (path: "'${path}'") job.paths)} \ + ${concatStringsSep " " excludeArgs} \ + --tag "host:${globalCfg.hostname}" \ + --tag "job:${job.name}" \ + --tag "env:${globalCfg.environment}" + + # Apply retention policy + ${pkgs.restic}/bin/restic forget \ + --keep-daily ${job.retention.daily} \ + --keep-weekly ${job.retention.weekly} \ + --keep-monthly ${job.retention.monthly} \ + --keep-yearly ${job.retention.yearly} \ + --prune + + ${optionalString (job.postHook != null) job.postHook} + '' + else if job.backend == "borg" + then '' + #!/bin/bash + set -euo pipefail + + ${optionalString (job.preHook != null) job.preHook} + + # Borg backup + ${pkgs.borgbackup}/bin/borg create \ + --stats --progress \ + ${concatStringsSep " " excludeArgs} \ + "::${globalCfg.hostname}-${job.name}-{now}" \ + ${concatStringsSep " " (map (path: "'${path}'") job.paths)} + + # Apply retention policy + ${pkgs.borgbackup}/bin/borg prune \ + --keep-daily ${job.retention.daily} \ + --keep-weekly ${job.retention.weekly} \ + --keep-monthly ${job.retention.monthly} \ + --keep-yearly ${job.retention.yearly} + + ${optionalString (job.postHook != null) job.postHook} + '' + else throw "Unsupported backup backend: ${job.backend}"; + in { + ${serviceName} = { + description = "Backup job: ${job.name}"; + after = ["network-online.target"]; + wants = ["network-online.target"]; + serviceConfig = { + Type = "oneshot"; + User = "backup"; + Group = "backup"; + ExecStart = pkgs.writeScript "backup-${job.name}" backupScript; + EnvironmentFile = "/etc/backup/environment"; + }; + }; + }; + + # Create systemd timers for backup jobs + createBackupTimer = job: let + serviceName = "backup-${job.name}"; + timerName = "${serviceName}.timer"; + in { + ${timerName} = { + description = "Timer for backup job: ${job.name}"; + wantedBy = ["timers.target"]; + timerConfig = { + OnCalendar = + if job.schedule == "daily" + then "daily" + else if job.schedule == "weekly" + then "weekly" + else if job.schedule == "hourly" + then "hourly" + else job.schedule; # Assume it's a cron expression + Persistent = true; + RandomizedDelaySec = "15min"; + }; + }; + }; +in { + options.homelab.backups = { + enable = mkEnableOption "Backup management"; + + restic = { + repository = mkOption { + type = types.str; + description = "Restic repository URL"; + }; + passwordFile = mkOption { + type = types.str; + default = "/etc/backup/restic-password"; + description = "Path to file containing restic password"; + }; + }; + + borg = { + repository = mkOption { + type = types.str; + description = "Borg repository path"; + }; + sshKey = mkOption { + type = types.str; + default = "/etc/backup/borg-ssh-key"; + description = "Path to SSH key for borg repository"; + }; + }; + }; + + config = mkIf (cfg.enable && globalCfg.enable && (length globalCfg.backups.jobs) > 0) { + # Create backup user + users.users.backup = { + isSystemUser = true; + group = "backup"; + home = "/var/lib/backup"; + createHome = true; + }; + + users.groups.backup = {}; + + # Install backup tools + environment.systemPackages = with pkgs; [ + restic + borgbackup + rclone + + (pkgs.writeScriptBin "backup-status" '' + #!/bin/bash + echo "=== Backup Status ===" + echo + ${concatStringsSep "\n" (map (job: '' + echo "Job: ${job.name}" + systemctl is-active backup-${job.name}.timer || echo "Timer inactive" + systemctl status backup-${job.name}.timer --no-pager -l | grep -E "(Active|Trigger)" || true + echo + '') + globalCfg.backups.jobs)} + '') + ]; + + # Create systemd services and timers + systemd.services = lib.foldl' (acc: job: acc // (createBackupService job)) {} globalCfg.backups.jobs; + systemd.timers = lib.foldl' (acc: job: acc // (createBackupTimer job)) {} globalCfg.backups.jobs; + + # Environment file template + environment.etc."backup/environment.example".text = '' + # Restic configuration + RESTIC_REPOSITORY=${cfg.restic.repository} + RESTIC_PASSWORD_FILE=${cfg.restic.passwordFile} + + # AWS S3 credentials (if using S3 backend) + AWS_ACCESS_KEY_ID=your-access-key + AWS_SECRET_ACCESS_KEY=your-secret-key + + # Borg configuration + BORG_REPO=${cfg.borg.repository} + BORG_RSH="ssh -i ${cfg.borg.sshKey}" + + # Notification settings + NOTIFICATION_URL=your-webhook-url + ''; + }; +} diff --git a/modules/nixos/default.nix b/modules/nixos/default.nix index a0250d5..af472eb 100644 --- a/modules/nixos/default.nix +++ b/modules/nixos/default.nix @@ -1,3 +1,8 @@ { ente = import ./ente.nix; + global-config = import ./global-config.nix; + backup-manager = import ./backup-manager.nix; + + # Service modules + services = import ./services; } diff --git a/modules/nixos/global-config.nix b/modules/nixos/global-config.nix new file mode 100644 index 0000000..3443eca --- /dev/null +++ b/modules/nixos/global-config.nix @@ -0,0 +1,462 @@ +# modules/global-config.nix +{ + config, + lib, + outputs, + ... +}: +with lib; let + cfg = config.homelab.global; + + # Service type definition + serviceType = types.submodule { + options = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable this service"; + }; + + description = mkOption { + type = types.str; + description = "Human-readable description of the service"; + }; + + category = mkOption { + type = types.enum ["monitoring" "networking" "storage" "security" "media" "development" "backup" "other"]; + default = "other"; + description = "Service category for organization"; + }; + + dependencies = mkOption { + type = types.listOf types.str; + default = []; + description = "List of other homelab services this depends on"; + }; + + ports = mkOption { + type = types.listOf types.port; + default = []; + description = "Ports this service uses"; + }; + + tags = mkOption { + type = types.listOf types.str; + default = []; + description = "Additional tags for this service"; + }; + + priority = mkOption { + type = types.int; + default = 100; + description = "Service priority (lower numbers start first)"; + }; + }; + }; + + # Type definitions + monitoringEndpointType = types.submodule { + options = { + name = mkOption { + type = types.str; + description = "Name of the monitoring endpoint"; + }; + port = mkOption { + type = types.port; + description = "Port number for the endpoint"; + }; + path = mkOption { + type = types.str; + default = "/metrics"; + description = "Path for the metrics endpoint"; + }; + jobName = mkOption { + type = types.str; + description = "Prometheus job name"; + }; + scrapeInterval = mkOption { + type = types.str; + default = "30s"; + description = "Prometheus scrape interval"; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + description = "Additional labels for this endpoint"; + }; + }; + }; + + backupJobType = types.submodule { + options = { + name = mkOption { + type = types.str; + description = "Name of the backup job"; + }; + backend = mkOption { + type = types.enum ["restic" "borg" "rclone"]; + description = "Backup backend to use"; + }; + paths = mkOption { + type = types.listOf types.str; + description = "List of paths to backup"; + }; + schedule = mkOption { + type = types.str; + default = "daily"; + description = "Backup schedule (cron format or preset)"; + }; + retention = mkOption { + type = types.attrsOf types.str; + default = { + daily = "7"; + weekly = "4"; + monthly = "6"; + yearly = "2"; + }; + description = "Retention policy"; + }; + excludePatterns = mkOption { + type = types.listOf types.str; + default = []; + description = "Patterns to exclude from backup"; + }; + preHook = mkOption { + type = types.nullOr types.str; + default = null; + description = "Script to run before backup"; + }; + postHook = mkOption { + type = types.nullOr types.str; + default = null; + description = "Script to run after backup"; + }; + }; + }; + + reverseProxyEntryType = types.submodule { + options = { + subdomain = mkOption { + type = types.str; + description = "Subdomain for the service"; + }; + port = mkOption { + type = types.port; + description = "Internal port to proxy to"; + }; + path = mkOption { + type = types.str; + default = "/"; + description = "Path prefix for the service"; + }; + enableAuth = mkOption { + type = types.bool; + default = false; + description = "Enable authentication for this service"; + }; + enableSSL = mkOption { + type = types.bool; + default = true; + description = "Enable SSL for this service"; + }; + customHeaders = mkOption { + type = types.attrsOf types.str; + default = {}; + description = "Custom headers to add"; + }; + websockets = mkOption { + type = types.bool; + default = false; + description = "Enable websocket support"; + }; + }; + }; + + # Helper functions for services + enabledServices = filterAttrs (name: service: service.enable) cfg.services; + servicesByCategory = category: filterAttrs (name: service: service.enable && service.category == category) cfg.services; +in { + imports = [ + ./motd + ]; + + options.homelab.global = { + enable = mkEnableOption "Global homelab configuration"; + + hostname = mkOption { + type = types.str; + description = "Hostname for this system"; + }; + + domain = mkOption { + type = types.str; + default = "procopius.dk"; + description = "Base domain for the homelab"; + }; + + environment = mkOption { + type = types.enum ["production" "staging" "development"]; + default = "production"; + description = "Environment type"; + }; + + location = mkOption { + type = types.str; + default = "homelab"; + description = "Physical location identifier"; + }; + + tags = mkOption { + type = types.listOf types.str; + default = []; + description = "Tags for this system"; + }; + + services = mkOption { + type = types.attrsOf serviceType; + default = {}; + description = "Homelab services configuration"; + example = literalExpression '' + { + prometheus = { + enable = true; + description = "Metrics collection and monitoring"; + category = "monitoring"; + ports = [ 9090 ]; + tags = [ "metrics" "alerting" ]; + }; + + traefik = { + enable = true; + description = "Reverse proxy and load balancer"; + category = "networking"; + ports = [ 80 443 8080 ]; + tags = [ "proxy" "loadbalancer" ]; + priority = 10; + }; + } + ''; + }; + + monitoring = { + endpoints = mkOption { + type = types.listOf monitoringEndpointType; + default = []; + description = "Monitoring endpoints exposed by this system"; + }; + + nodeExporter = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable node exporter"; + }; + port = mkOption { + type = types.port; + default = 9100; + description = "Node exporter port"; + }; + }; + }; + + backups = { + jobs = mkOption { + type = types.listOf backupJobType; + default = []; + description = "Backup jobs for this system"; + }; + + globalExcludes = mkOption { + type = types.listOf types.str; + default = [ + "*.tmp" + "*.cache" + "*/.git" + "*/node_modules" + "*/target" + ]; + description = "Global exclude patterns for all backup jobs"; + }; + }; + + reverseProxy = { + entries = mkOption { + type = types.listOf reverseProxyEntryType; + default = []; + description = "Reverse proxy entries for this system"; + }; + }; + + # Helper function to add monitoring endpoint + addMonitoringEndpoint = mkOption { + type = types.functionTo (types.functionTo types.anything); + default = name: endpoint: { + homelab.global.monitoring.endpoints = [ + (endpoint // {inherit name;}) + ]; + }; + description = "Helper function to add monitoring endpoints"; + }; + + # Helper function to add backup job + addBackupJob = mkOption { + type = types.functionTo (types.functionTo types.anything); + default = name: job: { + homelab.global.backups.jobs = [ + (job // {inherit name;}) + ]; + }; + description = "Helper function to add backup jobs"; + }; + + # Helper function to add reverse proxy entry + addReverseProxyEntry = mkOption { + type = types.functionTo (types.functionTo types.anything); + default = subdomain: entry: { + homelab.global.reverseProxy.entries = [ + (entry // {inherit subdomain;}) + ]; + }; + description = "Helper function to add reverse proxy entries"; + }; + + # Helper functions + enabledServicesList = mkOption { + type = types.listOf types.str; + default = attrNames enabledServices; + description = "List of enabled service names"; + readOnly = true; + }; + + servicesByPriority = mkOption { + type = types.listOf types.str; + default = + map (x: x.name) (sort (a: b: a.priority < b.priority) + (mapAttrsToList (name: service: service // {inherit name;}) enabledServices)); + description = "Services sorted by priority"; + readOnly = true; + }; + }; + + config = mkIf cfg.enable { + # Set hostname + networking.hostName = cfg.hostname; + + # Configure node exporter if enabled + services.prometheus.exporters.node = mkIf cfg.monitoring.nodeExporter.enable { + enable = true; + port = cfg.monitoring.nodeExporter.port; + enabledCollectors = [ + "systemd" + "textfile" + "filesystem" + "loadavg" + "meminfo" + "netdev" + "stat" + ]; + }; + + # Automatically add node exporter to monitoring endpoints + homelab.global.monitoring.endpoints = mkIf cfg.monitoring.nodeExporter.enable [ + { + name = "node-exporter"; + port = cfg.monitoring.nodeExporter.port; + path = "/metrics"; + jobName = "node"; + labels = { + instance = cfg.hostname; + environment = cfg.environment; + location = cfg.location; + }; + } + ]; + + # Export configuration for external consumption + environment.etc."homelab/config.json".text = builtins.toJSON { + inherit (cfg) hostname domain environment location tags; + + services = + mapAttrs (name: service: { + inherit (service) enable description category dependencies ports tags priority; + }) + cfg.services; + + enabledServices = enabledServices; + + servicesByCategory = { + monitoring = servicesByCategory "monitoring"; + networking = servicesByCategory "networking"; + storage = servicesByCategory "storage"; + security = servicesByCategory "security"; + media = servicesByCategory "media"; + development = servicesByCategory "development"; + backup = servicesByCategory "backup"; + other = servicesByCategory "other"; + }; + + monitoring = { + endpoints = + map (endpoint: { + name = endpoint.name; + url = "http://${cfg.hostname}:${toString endpoint.port}${endpoint.path}"; + port = endpoint.port; + path = endpoint.path; + jobName = endpoint.jobName; + scrapeInterval = endpoint.scrapeInterval; + labels = + endpoint.labels + // { + hostname = cfg.hostname; + environment = cfg.environment; + }; + }) + cfg.monitoring.endpoints; + }; + + backups = { + jobs = cfg.backups.jobs; + }; + + reverseProxy = { + entries = + map (entry: { + subdomain = entry.subdomain; + url = "http://${cfg.hostname}:${toString entry.port}"; + port = entry.port; + path = entry.path; + domain = "${entry.subdomain}.${cfg.domain}"; + enableAuth = entry.enableAuth; + enableSSL = entry.enableSSL; + customHeaders = entry.customHeaders; + websockets = entry.websockets; + }) + cfg.reverseProxy.entries; + }; + }; + + # Create a status command that shows service information + environment.systemPackages = [ + # (pkgs.writeScriptBin "homelab-services" '' + # #!/bin/bash + # echo "🏠 Homelab Services Status" + # echo "==========================" + # echo + + # ${concatStringsSep "\n" (mapAttrsToList (name: service: '' + # echo "${name}: ${service.description}" + # echo " Category: ${service.category}" + # echo " Status: $(systemctl is-active ${name} 2>/dev/null || echo "not found")" + # ${optionalString (service.ports != []) '' + # echo " Ports: ${concatStringsSep ", " (map toString service.ports)}" + # ''} + # ${optionalString (service.tags != []) '' + # echo " Tags: ${concatStringsSep ", " service.tags}" + # ''} + # echo + # '') + # enabledServices)} + # '') + ]; + }; +} diff --git a/modules/nixos/motd/default.nix b/modules/nixos/motd/default.nix new file mode 100644 index 0000000..3c56198 --- /dev/null +++ b/modules/nixos/motd/default.nix @@ -0,0 +1,304 @@ +# modules/motd/default.nix +{ + config, + lib, + pkgs, + ... +}: +with lib; let + cfg = config.homelab.motd; + globalCfg = config.homelab.global; + enabledServices = filterAttrs (name: service: service.enable) globalCfg.services; + + homelab-motd = pkgs.writeShellScriptBin "homelab-motd" '' + #! /usr/bin/env bash + source /etc/os-release + + # Colors for output + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[1;33m' + BLUE='\033[0;34m' + PURPLE='\033[0;35m' + CYAN='\033[0;36m' + WHITE='\033[1;37m' + NC='\033[0m' # No Color + BOLD='\033[1m' + + # Helper functions + print_header() { + echo -e "''${BOLD}''${BLUE}╔══════════════════════════════════════════════════════════════╗''${NC}" + echo -e "''${BOLD}''${BLUE}║''${NC}''${WHITE} 🏠 HOMELAB STATUS ''${NC}''${BOLD}''${BLUE}║''${NC}" + echo -e "''${BOLD}''${BLUE}╚══════════════════════════════════════════════════════════════╝''${NC}" + } + + print_section() { + echo -e "\n''${BOLD}''${CYAN}▶ $1''${NC}" + echo -e "''${CYAN}─────────────────────────────────────────────────────────────''${NC}" + } + + get_service_status() { + local service="$1" + if ${pkgs.systemd}/bin/systemctl is-active --quiet "$service" 2>/dev/null; then + echo -e "''${GREEN}●''${NC} Active" + elif ${pkgs.systemd}/bin/systemctl is-enabled --quiet "$service" 2>/dev/null; then + echo -e "''${YELLOW}●''${NC} Inactive" + else + echo -e "''${RED}●''${NC} Disabled" + fi + } + + get_timer_status() { + local timer="$1" + if ${pkgs.systemd}/bin/systemctl is-active --quiet "$timer" 2>/dev/null; then + local next_run=$(${pkgs.systemd}/bin/systemctl show "$timer" --property=NextElapseUSecRealtime --value 2>/dev/null || echo "0") + if [[ "$next_run" != "0" && "$next_run" != "n/a" ]]; then + local next_readable=$(${pkgs.systemd}/bin/systemctl list-timers --no-pager "$timer" 2>/dev/null | tail -n +2 | head -n 1 | awk '{print $1, $2}' || echo "Unknown") + echo -e "''${GREEN}●''${NC} Next: ''${next_readable}" + else + echo -e "''${GREEN}●''${NC} Active" + fi + else + echo -e "''${RED}●''${NC} Inactive" + fi + } + + # Main script + ${optionalString cfg.clearScreen "clear"} + print_header + + # Check if global config exists + CONFIG_FILE="/etc/homelab/config.json" + if [[ ! -f "$CONFIG_FILE" ]]; then + echo -e "''${RED}❌ Global homelab configuration not found at $CONFIG_FILE''${NC}" + exit 1 + fi + + # Parse global configuration + HOSTNAME=$(${pkgs.jq}/bin/jq -r '.hostname' "$CONFIG_FILE" 2>/dev/null || hostname) + DOMAIN=$(${pkgs.jq}/bin/jq -r '.domain' "$CONFIG_FILE" 2>/dev/null || echo "unknown") + ENVIRONMENT=$(${pkgs.jq}/bin/jq -r '.environment' "$CONFIG_FILE" 2>/dev/null || echo "unknown") + LOCATION=$(${pkgs.jq}/bin/jq -r '.location' "$CONFIG_FILE" 2>/dev/null || echo "unknown") + TAGS=$(${pkgs.jq}/bin/jq -r '.tags[]?' "$CONFIG_FILE" 2>/dev/null | tr '\n' ' ' || echo "none") + + print_section "SYSTEM INFO" + echo -e " ''${BOLD}Hostname:''${NC} $HOSTNAME" + echo -e " ''${BOLD}Domain:''${NC} $DOMAIN" + echo -e " ''${BOLD}Environment:''${NC} $ENVIRONMENT" + echo -e " ''${BOLD}Location:''${NC} $LOCATION" + echo -e " ''${BOLD}Tags:''${NC} ''${TAGS:-none}" + echo -e " ''${BOLD}Uptime:''${NC} $(${pkgs.procps}/bin/uptime -p)" + echo -e " ''${BOLD}Load:''${NC} $(${pkgs.procps}/bin/uptime | awk -F'load average:' '{print $2}' | xargs)" + + ${optionalString cfg.showServices '' + # Enabled services from homelab config + print_section "HOMELAB SERVICES" + ${concatStringsSep "\n" (mapAttrsToList (name: service: '' + status=$(get_service_status "${service.systemdService}") + printf " %-25s %s\n" "${name}" "$status" + '') + cfg.services)} + ''} + + ${optionalString cfg.showMonitoring '' + # Monitoring endpoints + print_section "MONITORING ENDPOINTS" + ENDPOINTS=$(${pkgs.jq}/bin/jq -c '.monitoring.endpoints[]?' "$CONFIG_FILE" 2>/dev/null || echo "") + if [[ -n "$ENDPOINTS" ]]; then + while IFS= read -r endpoint; do + name=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.name') + port=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.port') + path=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.path') + job=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.jobName') + + # Check if port is accessible + if ${pkgs.netcat}/bin/nc -z localhost "$port" 2>/dev/null; then + status="''${GREEN}●''${NC}" + else + status="''${RED}●''${NC}" + fi + + printf " %-20s %s %s:%s%s (job: %s)\n" "$name" "$status" "$HOSTNAME" "$port" "$path" "$job" + done <<< "$ENDPOINTS" + else + echo -e " ''${YELLOW}No monitoring endpoints configured''${NC}" + fi + ''} + + ${optionalString cfg.showBackups '' + # Backup jobs status + print_section "BACKUP JOBS" + BACKUP_JOBS=$(${pkgs.jq}/bin/jq -c '.backups.jobs[]?' "$CONFIG_FILE" 2>/dev/null || echo "") + if [[ -n "$BACKUP_JOBS" ]]; then + while IFS= read -r job; do + name=$(echo "$job" | ${pkgs.jq}/bin/jq -r '.name') + backend=$(echo "$job" | ${pkgs.jq}/bin/jq -r '.backend') + schedule=$(echo "$job" | ${pkgs.jq}/bin/jq -r '.schedule') + + service_name="backup-''${name}" + timer_name="''${service_name}.timer" + + timer_status=$(get_timer_status "$timer_name") + + # Get last backup info + last_run="Unknown" + if ${pkgs.systemd}/bin/systemctl show "$service_name" --property=ExecMainStartTimestamp --value 2>/dev/null | grep -q "^[^n]"; then + last_run=$(${pkgs.systemd}/bin/systemctl show "$service_name" --property=ExecMainStartTimestamp --value 2>/dev/null | head -1) + if [[ "$last_run" != "n/a" && -n "$last_run" ]]; then + last_run=$(${pkgs.coreutils}/bin/date -d "$last_run" "+%Y-%m-%d %H:%M" 2>/dev/null || echo "Unknown") + fi + fi + + printf " %-20s %s (%s, %s) Last: %s\n" "$name" "$timer_status" "$backend" "$schedule" "$last_run" + done <<< "$BACKUP_JOBS" + + # Show backup-status command output if available + if command -v backup-status >/dev/null 2>&1; then + echo -e "\n ''${BOLD}Quick Status:''${NC}" + backup-status 2>/dev/null | tail -n +3 | head -10 | sed 's/^/ /' + fi + else + echo -e " ''${YELLOW}No backup jobs configured''${NC}" + fi + ''} + + ${optionalString cfg.showReverseProxy '' + # Reverse proxy entries + print_section "REVERSE PROXY ENTRIES" + PROXY_ENTRIES=$(${pkgs.jq}/bin/jq -c '.reverseProxy.entries[]?' "$CONFIG_FILE" 2>/dev/null || echo "") + if [[ -n "$PROXY_ENTRIES" ]]; then + while IFS= read -r entry; do + subdomain=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.subdomain') + port=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.port') + domain=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.domain') + auth=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.enableAuth') + ssl=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.enableSSL') + + # Check if service is running on the port + if ${pkgs.netcat}/bin/nc -z localhost "$port" 2>/dev/null; then + status="''${GREEN}●''${NC}" + else + status="''${RED}●''${NC}" + fi + + auth_indicator="" + [[ "$auth" == "true" ]] && auth_indicator=" 🔐" + + ssl_indicator="" + [[ "$ssl" == "true" ]] && ssl_indicator=" 🔒" + + printf " %-25s %s :%s → %s%s%s\n" "''${domain}" "$status" "$port" "$domain" "$auth_indicator" "$ssl_indicator" + done <<< "$PROXY_ENTRIES" + else + echo -e " ''${YELLOW}No reverse proxy entries configured''${NC}" + fi + ''} + + ${optionalString cfg.showResources '' + # Resource usage + print_section "RESOURCE USAGE" + echo -e " ''${BOLD}Memory:''${NC} $(${pkgs.procps}/bin/free -h | awk '/^Mem:/ {printf "%s/%s (%.1f%%)", $3, $2, ($3/$2)*100}')" + echo -e " ''${BOLD}Disk (root):''${NC} $(${pkgs.coreutils}/bin/df -h / | awk 'NR==2 {printf "%s/%s (%s)", $3, $2, $5}')" + echo -e " ''${BOLD}CPU Usage:''${NC} $(${pkgs.procps}/bin/top -bn1 | grep "Cpu(s)" | awk '{printf "%.1f%%", $2+$4}' | sed 's/%us,//')%" + ''} + + ${optionalString cfg.showRecentIssues '' + # Recent logs (errors only) + print_section "RECENT ISSUES" + error_count=$(${pkgs.systemd}/bin/journalctl --since "24 hours ago" --priority=err --no-pager -q | wc -l) + if [[ "$error_count" -gt 0 ]]; then + echo -e " ''${RED}⚠ $error_count errors in last 24h''${NC}" + ${pkgs.systemd}/bin/journalctl --since "24 hours ago" --priority=err --no-pager -q | tail -3 | sed 's/^/ /' + else + echo -e " ''${GREEN}✓ No critical errors in last 24h''${NC}" + fi + ''} + + echo -e "\n''${BOLD}''${BLUE}╔══════════════════════════════════════════════════════════════╗''${NC}" + echo -e "''${BOLD}''${BLUE}║''${NC} ''${WHITE}Run 'backup-status' for detailed backup info ''${NC}''${BOLD}''${BLUE}║''${NC}" + echo -e "''${BOLD}''${BLUE}║''${NC} ''${WHITE}Config: /etc/homelab/config.json ''${NC}''${BOLD}''${BLUE}║''${NC}" + echo -e "''${BOLD}''${BLUE}╚══════════════════════════════════════════════════════════════╝''${NC}" + echo + ''; +in { + options.homelab.motd = { + enable = mkEnableOption "Dynamic homelab MOTD"; + + clearScreen = mkOption { + type = types.bool; + default = true; + description = "Clear screen before showing MOTD"; + }; + + showServices = mkOption { + type = types.bool; + default = true; + description = "Show enabled homelab services"; + }; + + showMonitoring = mkOption { + type = types.bool; + default = true; + description = "Show monitoring endpoints"; + }; + + showBackups = mkOption { + type = types.bool; + default = true; + description = "Show backup jobs status"; + }; + + showReverseProxy = mkOption { + type = types.bool; + default = true; + description = "Show reverse proxy entries"; + }; + + showResources = mkOption { + type = types.bool; + default = true; + description = "Show system resource usage"; + }; + + showRecentIssues = mkOption { + type = types.bool; + default = true; + description = "Show recent system issues"; + }; + + services = mkOption { + type = types.attrsOf (types.submodule { + options = { + systemdService = mkOption { + type = types.str; + description = "Name of the systemd service to monitor"; + }; + description = mkOption { + type = types.str; + default = ""; + description = "Human-readable description of the service"; + }; + }; + }); + default = {}; + description = "Homelab services to monitor in MOTD"; + }; + }; + + config = mkIf (cfg.enable && globalCfg.enable) { + # Register services with MOTD + homelab.motd.services = + mapAttrs (name: service: { + systemdService = name; + description = service.description; + }) + enabledServices; + + # Create a command to manually run the MOTD + environment.systemPackages = with pkgs; [ + jq + netcat + homelab-motd + ]; + }; +} diff --git a/modules/nixos/services/default.nix b/modules/nixos/services/default.nix new file mode 100644 index 0000000..c5ac354 --- /dev/null +++ b/modules/nixos/services/default.nix @@ -0,0 +1,4 @@ +{ + jellyfin = import ./jellyfin.nix; + grafana = import ./grafana.nix; +} diff --git a/modules/nixos/services/forgejo-runner.nix b/modules/nixos/services/forgejo-runner.nix new file mode 100644 index 0000000..e69de29 diff --git a/modules/nixos/services/forgejo.nix b/modules/nixos/services/forgejo.nix new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/modules/nixos/services/forgejo.nix @@ -0,0 +1 @@ + diff --git a/modules/nixos/services/grafana.nix b/modules/nixos/services/grafana.nix new file mode 100644 index 0000000..f76edf7 --- /dev/null +++ b/modules/nixos/services/grafana.nix @@ -0,0 +1,72 @@ +# modules/services/grafana.nix +{ + config, + lib, + pkgs, + ... +}: +with lib; let + cfg = config.services.grafana; + helpers = import ../lib/helpers.nix {inherit lib;}; +in { + options.services.grafana = { + enable = mkEnableOption "Grafana monitoring dashboard"; + port = mkOption { + type = types.port; + default = 3000; + description = "Grafana web interface port"; + }; + adminPassword = mkOption { + type = types.str; + description = "Admin password for Grafana"; + }; + }; + + config = mkIf cfg.enable { + services.grafana = { + enable = true; + settings = { + server = { + http_port = cfg.port; + domain = "${config.homelab.global.hostname}.${config.homelab.global.domain}"; + }; + security = { + admin_password = cfg.adminPassword; + }; + }; + }; + + homelab.global = { + backups.jobs = [ + { + name = "grafana-data"; + backend = "restic"; + paths = ["/var/lib/grafana"]; + schedule = "daily"; + excludePatterns = ["*/plugins/*" "*/png/*"]; + } + ]; + + reverseProxy.entries = [ + { + subdomain = "grafana"; + port = cfg.port; + enableAuth = false; # Grafana handles its own auth + } + ]; + + monitoring.endpoints = [ + { + name = "grafana"; + port = cfg.port; + path = "/metrics"; + jobName = "grafana"; + labels = { + service = "grafana"; + type = "monitoring"; + }; + } + ]; + }; + }; +} diff --git a/modules/nixos/services/jellyfin.nix b/modules/nixos/services/jellyfin.nix new file mode 100644 index 0000000..1aac7e5 --- /dev/null +++ b/modules/nixos/services/jellyfin.nix @@ -0,0 +1,125 @@ +# modules/services/jellyfin.nix +{ + config, + lib, + pkgs, + ... +}: +with lib; let + cfg = config.services.jellyfin; +in { + options.services.jellyfin = { + enable = mkEnableOption "Jellyfin media server"; + + port = mkOption { + type = types.port; + default = 8096; + description = "Port for Jellyfin web interface"; + }; + + dataDir = mkOption { + type = types.str; + default = "/var/lib/jellyfin"; + description = "Directory to store Jellyfin data"; + }; + + mediaDir = mkOption { + type = types.str; + default = "/media"; + description = "Directory containing media files"; + }; + + enableMetrics = mkOption { + type = types.bool; + default = true; + description = "Enable Prometheus metrics"; + }; + + exposeWeb = mkOption { + type = types.bool; + default = true; + description = "Expose web interface through reverse proxy"; + }; + }; + + config = mkIf cfg.enable { + # Enable the service + services.jellyfin = { + enable = true; + dataDir = cfg.dataDir; + }; + + # Configure global settings + homelab.global = { + # Add backup job for Jellyfin data + backups.jobs = [ + { + name = "jellyfin-config"; + backend = "restic"; + paths = ["${cfg.dataDir}/config" "${cfg.dataDir}/data"]; + schedule = "0 2 * * *"; # Daily at 2 AM + excludePatterns = [ + "*/cache/*" + "*/transcodes/*" + "*/logs/*" + ]; + preHook = '' + # Stop jellyfin for consistent backup + systemctl stop jellyfin + ''; + postHook = '' + # Restart jellyfin after backup + systemctl start jellyfin + ''; + } + { + name = "jellyfin-media"; + backend = "restic"; + paths = [cfg.mediaDir]; + schedule = "0 3 * * 0"; # Weekly on Sunday at 3 AM + excludePatterns = [ + "*.tmp" + "*/.@__thumb/*" # Synology thumbnails + ]; + } + ]; + + # Add reverse proxy entry if enabled + reverseProxy.entries = mkIf cfg.exposeWeb [ + { + subdomain = "jellyfin"; + port = cfg.port; + enableAuth = false; # Jellyfin has its own auth + websockets = true; + customHeaders = { + "X-Forwarded-Proto" = "$scheme"; + "X-Forwarded-Host" = "$host"; + }; + } + ]; + + # Add monitoring endpoint if metrics enabled + monitoring.endpoints = mkIf cfg.enableMetrics [ + { + name = "jellyfin"; + port = cfg.port; + path = "/metrics"; # Assuming you have a metrics plugin + jobName = "jellyfin"; + scrapeInterval = "60s"; + labels = { + service = "jellyfin"; + type = "media-server"; + }; + } + ]; + }; + + # Open firewall + networking.firewall.allowedTCPPorts = [cfg.port]; + + # Create media directory + systemd.tmpfiles.rules = [ + "d ${cfg.mediaDir} 0755 jellyfin jellyfin -" + ]; + }; +} diff --git a/modules/nixos/services/postgres.nix b/modules/nixos/services/postgres.nix new file mode 100644 index 0000000..e69de29 diff --git a/modules/nixos/services/prometheus.nix b/modules/nixos/services/prometheus.nix new file mode 100644 index 0000000..9485b3a --- /dev/null +++ b/modules/nixos/services/prometheus.nix @@ -0,0 +1,208 @@ +# modules/services/prometheus.nix +{ + config, + lib, + pkgs, + ... +}: +with lib; let + cfg = config.homelab.services.prometheus; + globalCfg = config.homelab.global; +in { + options.homelab.services.prometheus = { + enable = mkEnableOption "Prometheus monitoring server"; + + port = mkOption { + type = types.port; + default = 9090; + description = "Prometheus server port"; + }; + + webExternalUrl = mkOption { + type = types.str; + default = "http://${globalCfg.hostname}:${toString cfg.port}"; + description = "External URL for Prometheus"; + }; + + retention = mkOption { + type = types.str; + default = "30d"; + description = "Data retention period"; + }; + + scrapeConfigs = mkOption { + type = types.listOf types.attrs; + default = []; + description = "Additional scrape configurations"; + }; + + alertmanager = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable Alertmanager integration"; + }; + + url = mkOption { + type = types.str; + default = "http://localhost:9093"; + description = "Alertmanager URL"; + }; + }; + }; + + config = mkIf cfg.enable { + # Register service with global homelab config + homelab.global.services.prometheus = { + enable = true; + description = "Metrics collection and monitoring server"; + category = "monitoring"; + ports = [cfg.port]; + tags = ["metrics" "monitoring" "alerting"]; + priority = 20; + dependencies = ["node-exporter"]; + }; + + # Configure the actual Prometheus service + services.prometheus = { + enable = true; + port = cfg.port; + webExternalUrl = cfg.webExternalUrl; + + retentionTime = cfg.retention; + + scrapeConfigs = + [ + # Auto-discover monitoring endpoints from global config + { + job_name = "homelab-auto"; + static_configs = [ + { + targets = + map ( + endpoint: "${globalCfg.hostname}:${toString endpoint.port}" + ) + globalCfg.monitoring.endpoints; + } + ]; + scrape_interval = "30s"; + metrics_path = "/metrics"; + } + ] + ++ cfg.scrapeConfigs; + + # Alertmanager configuration + alertmanagers = mkIf cfg.alertmanager.enable [ + { + static_configs = [ + { + targets = [cfg.alertmanager.url]; + } + ]; + } + ]; + + rules = [ + # Basic homelab alerting rules + (pkgs.writeText "homelab-alerts.yml" '' + groups: + - name: homelab + rules: + - alert: ServiceDown + expr: up == 0 + for: 5m + labels: + severity: critical + annotations: + summary: "Service {{ $labels.instance }} is down" + description: "{{ $labels.job }} on {{ $labels.instance }} has been down for more than 5 minutes." + + - alert: HighMemoryUsage + expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9 + for: 10m + labels: + severity: warning + annotations: + summary: "High memory usage on {{ $labels.instance }}" + description: "Memory usage is above 90% on {{ $labels.instance }}" + + - alert: HighDiskUsage + expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.85 + for: 5m + labels: + severity: warning + annotations: + summary: "High disk usage on {{ $labels.instance }}" + description: "Disk usage is above 85% on {{ $labels.instance }} for filesystem {{ $labels.mountpoint }}" + '') + ]; + }; + + # Add monitoring endpoint to global config + homelab.global.monitoring.endpoints = [ + { + name = "prometheus"; + port = cfg.port; + path = "/metrics"; + jobName = "prometheus"; + scrapeInterval = "30s"; + labels = { + service = "prometheus"; + role = "monitoring"; + }; + } + ]; + + # Add reverse proxy entry if configured + homelab.global.reverseProxy.entries = mkIf (globalCfg.domain != null) [ + { + subdomain = "prometheus"; + port = cfg.port; + path = "/"; + enableAuth = true; + enableSSL = true; + customHeaders = { + "X-Frame-Options" = "DENY"; + "X-Content-Type-Options" = "nosniff"; + }; + } + ]; + + # Add backup job for Prometheus data + homelab.global.backups.jobs = [ + { + name = "prometheus-data"; + backend = "restic"; + paths = ["/var/lib/prometheus2"]; + schedule = "daily"; + retention = { + daily = "7"; + weekly = "4"; + monthly = "3"; + yearly = "1"; + }; + excludePatterns = [ + "*.tmp" + "*/wal/*" + ]; + preHook = '' + # Stop prometheus temporarily for consistent backup + systemctl stop prometheus + ''; + postHook = '' + # Restart prometheus after backup + systemctl start prometheus + ''; + } + ]; + + # Open firewall port + networking.firewall.allowedTCPPorts = [cfg.port]; + + # Create prometheus configuration directory + systemd.tmpfiles.rules = [ + "d /var/lib/prometheus2 0755 prometheus prometheus -" + "d /etc/prometheus 0755 root root -" + ]; + }; +} diff --git a/modules/nixos/system/backups/backrest.nix b/modules/nixos/system/backups/backrest.nix new file mode 100644 index 0000000..e230402 --- /dev/null +++ b/modules/nixos/system/backups/backrest.nix @@ -0,0 +1,4 @@ +{ + # TODO + # https://github.com/L-Trump/nixos-configs/blob/ab3fb16e330b8a2904b9967e46af8c061b56266e/modules/nixos/server/backrest.nix#L7 +} diff --git a/modules/nixos/system/backups/backups-option.nix b/modules/nixos/system/backups/backups-option.nix new file mode 100644 index 0000000..137f73f --- /dev/null +++ b/modules/nixos/system/backups/backups-option.nix @@ -0,0 +1,95 @@ +# backups-option.nix +cfg: let + inherit (cfg.lib) mkOption types mkEnableOption attrNames; +in + mkOption { + type = types.attrsOf ( + types.submodule ( + { + name, + config, + ... + } @ args: { + options = { + backend = mkOption { + type = types.enum (attrNames cfg.backends); + description = "The backup backend to use"; + }; + + paths = mkOption { + type = types.listOf types.str; + default = []; + description = "Paths to backup"; + }; + + enable = mkOption { + type = types.bool; + default = true; + description = "Whether to enable this backup job"; + }; + + timerConfig = mkOption { + type = with types; nullOr attrs; + default = null; + example = { + OnCalendar = "00:05"; + Persistent = true; + RandomizedDelaySec = "5h"; + }; + description = '' + When to run the backup. If null, inherits from backend's default timerConfig. + Set to null to disable automatic scheduling. + ''; + }; + + backendOptions = mkOption { + type = let + backupConfig = config; + backupName = name; + in + types.submodule ( + {config, ...} @ args'': + cfg.backends.${args.config.backend} (args'' // {inherit backupConfig backupName;}) + ); + default = {}; + description = "Backend-specific options"; + }; + + preBackupScript = mkOption { + type = types.lines; + default = ""; + description = "Script to run before backing up"; + }; + + postBackupScript = mkOption { + type = types.lines; + default = ""; + description = '' + Script to run after backing up. Runs even if the backup fails. + ''; + }; + + notifications = { + failure = { + enable = mkOption { + type = types.bool; + default = true; + description = "Enable failure notifications"; + }; + }; + + success = { + enable = mkOption { + type = types.bool; + default = false; + description = "Enable success notifications"; + }; + }; + }; + }; + } + ) + ); + default = {}; + description = "Backup job definitions"; + } diff --git a/modules/nixos/system/backups/default.nix b/modules/nixos/system/backups/default.nix new file mode 100644 index 0000000..d29d46e --- /dev/null +++ b/modules/nixos/system/backups/default.nix @@ -0,0 +1,6 @@ +{ + imports = [ + ./root.nix + ./restic.nix + ]; +} diff --git a/modules/nixos/system/backups/restic.nix b/modules/nixos/system/backups/restic.nix new file mode 100644 index 0000000..58bfb1b --- /dev/null +++ b/modules/nixos/system/backups/restic.nix @@ -0,0 +1,234 @@ +# restic.nix - Restic backend implementation +{ + config, + lib, + pkgs, + ... +}: +with lib; let + cfg = config.system.backups; + resticCfg = cfg.restic; + + # Get only restic backups that are enabled + resticBackups = filterAttrs (_: backup: backup.backend == "restic" && backup.enable) cfg.backups; + + # Create restic service configurations + createResticServices = + mapAttrs ( + name: backup: let + # Merge global defaults with backup-specific options + serviceConfig = + recursiveUpdate resticCfg.defaultBackendOptions backup.backendOptions + // { + inherit (backup) paths; + + # Use backup-specific timer or fall back to global default + timerConfig = + if backup.timerConfig != null + then backup.timerConfig + else resticCfg.timerConfig; + }; + in + serviceConfig + ) + resticBackups; +in { + options.system.backups.restic = { + enable = mkEnableOption "restic backup backend"; + + timerConfig = mkOption { + type = types.attrs; + default = { + OnCalendar = "*-*-* 05:00:00"; + Persistent = true; + }; + description = "Default systemd timer configuration for restic backups"; + }; + + defaultBackendOptions = mkOption { + type = types.attrs; + default = {}; + example = { + repository = "/backup/restic"; + passwordFile = "/etc/nixos/secrets/restic-password"; + initialize = true; + pruneOpts = [ + "--keep-daily 7" + "--keep-weekly 5" + "--keep-monthly 12" + "--keep-yearly 75" + ]; + }; + description = "Default backend options applied to all restic backup jobs"; + }; + + # Advanced options + runMaintenance = mkOption { + type = types.bool; + default = true; + description = "Whether to run repository maintenance after backups"; + }; + + maintenanceTimer = mkOption { + type = types.attrs; + default = { + OnCalendar = "*-*-* 06:00:00"; + Persistent = true; + }; + description = "Timer configuration for maintenance tasks"; + }; + + pruneOpts = mkOption { + type = types.listOf types.str; + default = [ + "--keep-daily 7" + "--keep-weekly 4" + "--keep-monthly 6" + "--keep-yearly 3" + ]; + description = "Default pruning options for maintenance"; + }; + }; + + config = mkIf resticCfg.enable { + # Register restic backend + system.backups.backends.restic = { + backupConfig, + backupName, + ... + }: { + # Define the proper options schema for restic backendOptions + options = { + repository = mkOption { + type = types.str; + description = "Restic repository path or URL"; + }; + + passwordFile = mkOption { + type = types.str; + description = "Path to file containing the repository password"; + }; + + initialize = mkOption { + type = types.bool; + default = true; + description = "Whether to initialize the repository if it doesn't exist"; + }; + + exclude = mkOption { + type = types.listOf types.str; + default = []; + description = "Patterns to exclude from backup"; + }; + + extraBackupArgs = mkOption { + type = types.listOf types.str; + default = []; + description = "Additional arguments passed to restic backup command"; + }; + + user = mkOption { + type = types.str; + default = "root"; + description = "User to run the backup as"; + }; + + pruneOpts = mkOption { + type = types.listOf types.str; + default = resticCfg.pruneOpts; + description = "Pruning options for this backup"; + }; + }; + + # Default config merged with global defaults + config = { + extraBackupArgs = + [ + "--tag ${backupName}" + "--verbose" + ] + ++ (resticCfg.defaultBackendOptions.extraBackupArgs or []); + }; + }; + + # Create actual restic backup services + services.restic.backups = createResticServices; + + # Add restic package + environment.systemPackages = [pkgs.restic]; + + # Systemd service customizations for restic backups + systemd.services = + (mapAttrs' ( + name: backup: + nameValuePair "restic-backups-${name}" { + # Custom pre/post scripts + preStart = mkBefore backup.preBackupScript; + postStop = mkAfter backup.postBackupScript; + + # Enhanced service configuration + serviceConfig = { + # Restart configuration + Restart = "on-failure"; + RestartSec = "5m"; + RestartMaxDelaySec = "30m"; + RestartSteps = 3; + + # Rate limiting + StartLimitBurst = 4; + StartLimitIntervalSec = "2h"; + }; + + # Failure handling could be extended here for notifications + # onFailure = optional backup.notifications.failure.enable "restic-backup-${name}-failure-notify.service"; + } + ) + resticBackups) + // optionalAttrs resticCfg.runMaintenance { + # Repository maintenance service + restic-maintenance = { + description = "Restic repository maintenance"; + after = map (name: "restic-backups-${name}.service") (attrNames resticBackups); + + environment = + resticCfg.defaultBackendOptions + // { + RESTIC_CACHE_DIR = "/var/cache/restic-maintenance"; + }; + + serviceConfig = { + Type = "oneshot"; + ExecStart = [ + "${pkgs.restic}/bin/restic forget --prune ${concatStringsSep " " resticCfg.pruneOpts}" + "${pkgs.restic}/bin/restic check --read-data-subset=500M" + ]; + + User = "root"; + CacheDirectory = "restic-maintenance"; + CacheDirectoryMode = "0700"; + }; + }; + }; + + # Maintenance timer + systemd.timers = mkIf resticCfg.runMaintenance { + restic-maintenance = { + description = "Timer for restic repository maintenance"; + wantedBy = ["timers.target"]; + timerConfig = resticCfg.maintenanceTimer; + }; + }; + + # Helpful shell aliases + programs.zsh.shellAliases = + { + restic-snapshots = "restic snapshots --compact --group-by tags"; + restic-repo-size = "restic stats --mode raw-data"; + } + // (mapAttrs' ( + name: _: + nameValuePair "backup-${name}" "systemctl start restic-backups-${name}" + ) + resticBackups); + }; +} diff --git a/modules/nixos/system/backups/root.nix b/modules/nixos/system/backups/root.nix new file mode 100644 index 0000000..5656f72 --- /dev/null +++ b/modules/nixos/system/backups/root.nix @@ -0,0 +1,66 @@ +# root.nix - Main backup system module +{ + config, + lib, + pkgs, + ... +}: +with lib; let + cfg = config.system.backups; + + # Filter backups by backend + getBackupsByBackend = backend: + filterAttrs (_: backup: backup.backend == backend && backup.enable) cfg.backups; +in { + options.system.backups = { + # Backend registration system - backends register themselves here + backends = mkOption { + type = with types; attrsOf (functionTo attrs); + internal = true; + default = {}; + description = '' + Attribute set of backends where the value is a function that accepts + backend-specific arguments and returns an attribute set for the backend's options. + ''; + }; + + # Import the backups option from separate file, passing cfg for backend inference + backups = import ./backups-option.nix cfg; + + # Pass lib to the backups-option for access to mkOption, types, etc. + lib = mkOption { + type = types.attrs; + internal = true; + default = lib; + }; + }; + + config = { + # Re-export backups at root level for convenience + # backups = cfg.backups; + + # Common backup packages + environment.systemPackages = with pkgs; [ + # Add common backup utilities here + ]; + + # Common systemd service modifications for all backup services + systemd.services = let + allBackupServices = flatten ( + mapAttrsToList ( + backendName: backups: + mapAttrsToList (name: backup: "${backendName}-backups-${name}") backups + ) (genAttrs (attrNames cfg.backends) (backend: getBackupsByBackend backend)) + ); + in + genAttrs allBackupServices (serviceName: { + serviceConfig = { + # Common hardening for all backup services + ProtectSystem = "strict"; + ProtectHome = "read-only"; + PrivateTmp = true; + NoNewPrivileges = true; + }; + }); + }; +} diff --git a/proxmox-infra/.gitignore b/proxmox-infra/.gitignore new file mode 100644 index 0000000..9b8ce00 --- /dev/null +++ b/proxmox-infra/.gitignore @@ -0,0 +1,7 @@ +# proxmox-infra/.gitignore +.terraform/ + +*.tfstate +.tfstate. +crash.log +*.tfvars diff --git a/proxmox-infra/.terraform.lock.hcl b/proxmox-infra/.terraform.lock.hcl new file mode 100644 index 0000000..978a610 --- /dev/null +++ b/proxmox-infra/.terraform.lock.hcl @@ -0,0 +1,24 @@ +# This file is maintained automatically by "tofu init". +# Manual edits may be lost in future updates. + +provider "registry.opentofu.org/telmate/proxmox" { + version = "3.0.2-rc01" + constraints = "3.0.2-rc01" + hashes = [ + "h1:571ROPuTMC0w5lr9hbUXi7NVLsG3SpmZxXXZx8cAT+Q=", + "zh:34d264243a4513f4e30c01fb37cc6a3e592d7823dfd182c5edfb170ac7b7de3a", + "zh:544428311ad20fbb3ad2cd854e893bbf036023cb57c3acc5093d141976dac670", + "zh:5c2396b328edee8de7ac144c15a6b7e668e81063699bc8c110d7c39fb8da70e9", + "zh:5ca8e33476ad06a0259071120a59477e8f107f30c1178ea7b9f6cafe1a461ade", + "zh:5ea56eb8275edc754a01a0180750e9c939cd997d3a50659617770211f4337da9", + "zh:9dd3482df6bbe00a4a6152be3567b6c08d35c3644a327a1f5ac30fd95ccd449f", + "zh:a76075fafadcc94a825151aff169bae4e0c05e3c7717e16dcdcf16ffa61a0780", + "zh:b1d95f97b22f671db762f7adf428b409e6736c078bcf267d8391985b8847d6e3", + "zh:cc94255cd1b18e6a341c15089015c457c8c639c25c426b07f278d5ea9850b3b5", + "zh:ce991103cb69b0b3e275127e3ab92c88bb3b6b0f4e5a2cb082aeaef70a7f7d61", + "zh:d24838bce87b38e12544a1329f5ad30e2be045968e639a3f4ddd5c84aa648e04", + "zh:e106ebd4eea8d62d62e62f261a262febc615e17466b54ac18f7e65c7e79e0008", + "zh:e254ca76c95e6e92da973b7bddc36bfa0a1e31d7c7e758ef4b01315db969388b", + "zh:f1d1d5f4c39267cacebe0ab7e9e06caf9692707f3b5369685541b65bc8b840ce", + ] +} diff --git a/proxmox-infra/main.tf b/proxmox-infra/main.tf new file mode 100644 index 0000000..e9ef2ce --- /dev/null +++ b/proxmox-infra/main.tf @@ -0,0 +1,52 @@ +# # This calls the module to define a new VM (e.g., if you were creating one) +# resource "proxmox_vm_qemu" "sandbox" { +# name = "sandbox" +# desc = "OpenTofu testing" +# target_nodes = [var.proxmox_node] +# vmid= 100 +# full_clone = true +# clone_id = 9100 +# agent = 1 +# scsihw = "virtio-scsi-single" +# ciuser = "root" +# ipconfig0 = "ip=dhcp" +# cpu { +# cores = 2 +# } +# memory = 2048 +# disks { +# virtio { +# virtio0 { +# disk { +# size = "9452M" +# storage = "local-lvm" +# } +# } +# } +# ide { +# ide2 { +# cloudinit { +# storage = "local-lvm" +# } +# } +# } +# } +# network { +# id = 0 +# bridge = "vmbr0" +# model = "virtio" +# } +# serial { +# id = 0 +# } +# } + +# output "sandbox_vmid" { +# description = "sandbox VM ID" +# value = proxmox_vm_qemu.sandbox.id +# } + +# output "sandbox_ipv4" { +# description = "sandbox public IPv4 address" +# value = proxmox_vm_qemu.sandbox.default_ipv4_address +# } diff --git a/proxmox-infra/outputs.tf b/proxmox-infra/outputs.tf new file mode 100644 index 0000000..e69de29 diff --git a/proxmox-infra/provider.tf b/proxmox-infra/provider.tf new file mode 100644 index 0000000..0d04a2a --- /dev/null +++ b/proxmox-infra/provider.tf @@ -0,0 +1,9 @@ +provider "proxmox" { + pm_tls_insecure = true + pm_api_url = var.proxmox_api_url + pm_user = var.proxmox_user + pm_password = var.proxmox_password + # Or use API token for better security: + # pm_api_token_id = var.proxmox_api_token_id + # pm_api_token_secret = var.proxmox_api_token_secret +} diff --git a/proxmox-infra/sandbox.tf b/proxmox-infra/sandbox.tf new file mode 100644 index 0000000..7932732 --- /dev/null +++ b/proxmox-infra/sandbox.tf @@ -0,0 +1,106 @@ +# proxmox_vm_qemu.sandbox: +resource "proxmox_vm_qemu" "sandbox" { + agent = 1 + bios = "seabios" + boot = " " + ciuser = "root" + cores = 0 + current_node = "proxmox-01" + define_connection_info = false + desc = " generated by NixOS" + force_create = false + full_clone = false + hotplug = "network,disk,usb" + id = "proxmox-01/qemu/100" + ipconfig0 = "ip=dhcp" + kvm = true + linked_vmid = 0 + memory = 2048 + name = "sandbox" + numa = false + onboot = true + protection = false + qemu_os = "l26" + reboot_required = false + scsihw = "virtio-scsi-single" + sockets = 0 + sshkeys = <<-EOT + ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCljEOf8Lv7Ptgsc1+CYzXpnrctPy7LFXXOyVZTI9uN7R4HY5aEdZTKEGSsU/+p+JtXWzzI65fnrZU8pTMG/wvCK+gYyNZcEM4g/TXMVa+CWZR3y13zGky88R7dKiBl5L00U4BePDD1ci3EU3/Mjr/GVTQHtkbJfLtvhR9zkCNZzxbu+rySWDroUPWPvE3y60/iLjBsh5ZmHo59CW67lh1jgbAlZjKWZzLWo0Bc5wgbxoQPWcO4BCh17N4g8llrRxGOwJzHeaipBnXn9J1AGIm9Zls6pxT9j6MKltcCOb7tQZwc3hlPOW2ku6f7OHTrziKw37drIDM0UDublAOcnIfBjE+XuWsp5t6ojdIzIDMrzaYW2MyMA3PHuf7VESUQdP4TZ1XUwtRRzOjn5AZJi9DPoowPaxKL92apRpFG+ovaFpWZsG7s8NWXHAC79IpgMUzscEmM15OMQ36RQ5xeytGDVCmVT8DbHGrMT9HUfR5fBSWD3aDQiOOiIIhrbY35m+U65Sz/GpZMk6HlaiV3tKNB0m+xE+84MUEmm4fFzt3B/0N4kscMArnLAm/OMUblihPwbKAUAUWErGRBfP+u+zjRCi1D9/pffpl2OQ2QIuVM82g6/EPa1ZsXZP+4iHooQoJbrqVGzkfiA1EKLfcdGfkP/O4nRl+D5UgkGdqqvm20NQ== root@proxmox-01 + EOT + tablet = true + target_nodes = [ + "proxmox-01", + ] + unused_disk = [] + vcpus = 0 + vm_state = "running" + vmid = 100 + + cpu { + cores = 2 + limit = 0 + numa = false + sockets = 1 + type = "host" + units = 0 + vcores = 0 + } + + disks { + ide { + ide2 { + cloudinit { + storage = "local-lvm" + } + } + } + virtio { + virtio0 { + disk { + backup = true + discard = false + format = "raw" + id = 0 + iops_r_burst = 0 + iops_r_burst_length = 0 + iops_r_concurrent = 0 + iops_wr_burst = 0 + iops_wr_burst_length = 0 + iops_wr_concurrent = 0 + iothread = false + linked_disk_id = -1 + mbps_r_burst = 0 + mbps_r_concurrent = 0 + mbps_wr_burst = 0 + mbps_wr_concurrent = 0 + readonly = false + replicate = true + size = "9452M" + storage = "local-lvm" + } + } + } + } + + network { + bridge = "vmbr0" + firewall = true + id = 0 + link_down = false + macaddr = "bc:24:11:a7:e8:2a" + model = "virtio" + mtu = 0 + queues = 0 + rate = 0 + tag = 0 + } + + serial { + id = 0 + type = "socket" + } + + smbios { + uuid = "37cd09d5-29a5-42e2-baba-f21b691130e8" + } +} diff --git a/proxmox-infra/terraform.tfstate.backup b/proxmox-infra/terraform.tfstate.backup new file mode 100644 index 0000000..8a8181b --- /dev/null +++ b/proxmox-infra/terraform.tfstate.backup @@ -0,0 +1 @@ +{"version":4,"terraform_version":"1.9.1","serial":2,"lineage":"ecd6c5f8-5352-bf30-6117-d55763366399","outputs":{"sandbox_ipv4":{"value":"192.168.1.206","type":"string"},"sandbox_vmid":{"value":"proxmox-01/qemu/999","type":"string"}},"resources":[{"mode":"managed","type":"proxmox_vm_qemu","name":"sandbox","provider":"provider[\"registry.opentofu.org/telmate/proxmox\"]","instances":[{"schema_version":0,"attributes":{"additional_wait":5,"agent":1,"agent_timeout":90,"args":"","automatic_reboot":true,"balloon":0,"bios":"seabios","boot":" ","bootdisk":"","ci_wait":null,"cicustom":null,"cipassword":"","ciupgrade":false,"ciuser":"root","clone":null,"clone_id":9100,"clone_wait":10,"cores":0,"cpu":[{"affinity":"","cores":2,"flags":[],"limit":0,"numa":false,"sockets":1,"type":"host","units":0,"vcores":0}],"cpu_type":"","current_node":"proxmox-01","default_ipv4_address":"192.168.1.206","default_ipv6_address":"2a05:f6c7:2030:0:be24:11ff:feb9:919f","define_connection_info":true,"desc":"OpenTofu testing","disk":[],"disks":[{"ide":[{"ide0":[],"ide1":[],"ide2":[{"cdrom":[],"cloudinit":[{"storage":"local-lvm"}],"disk":[],"ignore":false,"passthrough":[]}],"ide3":[]}],"sata":[],"scsi":[],"virtio":[{"virtio0":[{"cdrom":[],"disk":[{"asyncio":"","backup":true,"cache":"","discard":false,"format":"raw","id":0,"iops_r_burst":0,"iops_r_burst_length":0,"iops_r_concurrent":0,"iops_wr_burst":0,"iops_wr_burst_length":0,"iops_wr_concurrent":0,"iothread":false,"linked_disk_id":-1,"mbps_r_burst":0,"mbps_r_concurrent":0,"mbps_wr_burst":0,"mbps_wr_concurrent":0,"readonly":false,"replicate":false,"serial":"","size":"9452M","storage":"local-lvm","wwn":""}],"ignore":false,"passthrough":[]}],"virtio1":[],"virtio10":[],"virtio11":[],"virtio12":[],"virtio13":[],"virtio14":[],"virtio15":[],"virtio2":[],"virtio3":[],"virtio4":[],"virtio5":[],"virtio6":[],"virtio7":[],"virtio8":[],"virtio9":[]}]}],"efidisk":[],"force_create":false,"force_recreate_on_change_of":null,"full_clone":true,"hagroup":"","hastate":"","hostpci":[],"hotplug":"network,disk,usb","id":"proxmox-01/qemu/999","ipconfig0":"ip=dhcp","ipconfig1":null,"ipconfig10":null,"ipconfig11":null,"ipconfig12":null,"ipconfig13":null,"ipconfig14":null,"ipconfig15":null,"ipconfig2":null,"ipconfig3":null,"ipconfig4":null,"ipconfig5":null,"ipconfig6":null,"ipconfig7":null,"ipconfig8":null,"ipconfig9":null,"kvm":true,"linked_vmid":0,"machine":"","memory":2048,"name":"sandbox2","nameserver":null,"network":[{"bridge":"vmbr0","firewall":false,"id":0,"link_down":false,"macaddr":"bc:24:11:b9:91:9f","model":"virtio","mtu":0,"queues":0,"rate":0,"tag":0}],"numa":false,"onboot":false,"os_network_config":null,"os_type":null,"pci":[],"pcis":[],"pool":"","protection":false,"pxe":null,"qemu_os":"l26","reboot_required":false,"scsihw":"virtio-scsi-single","searchdomain":null,"serial":[{"id":0,"type":"socket"}],"skip_ipv4":false,"skip_ipv6":false,"smbios":[{"family":"","manufacturer":"","product":"","serial":"","sku":"","uuid":"51a93ec4-4afa-428b-911a-daab70390a8c","version":""}],"sockets":0,"ssh_forward_ip":null,"ssh_host":"192.168.1.206","ssh_port":"22","ssh_private_key":null,"ssh_user":null,"sshkeys":null,"startup":"","tablet":true,"tags":"v0.0.2","target_node":null,"target_nodes":["proxmox-01"],"timeouts":null,"tpm_state":[],"unused_disk":[],"usb":[],"usbs":[],"vcpus":0,"vga":[],"vm_state":"running","vmid":999},"sensitive_attributes":[[{"type":"get_attr","value":"cipassword"}],[{"type":"get_attr","value":"ssh_private_key"}]],"private":"eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjoxMjAwMDAwMDAwMDAwLCJkZWZhdWx0IjoxMjAwMDAwMDAwMDAwLCJkZWxldGUiOjEyMDAwMDAwMDAwMDAsInJlYWQiOjEyMDAwMDAwMDAwMDAsInVwZGF0ZSI6MTIwMDAwMDAwMDAwMH19"}]}],"check_results":null} diff --git a/proxmox-infra/variables.tf b/proxmox-infra/variables.tf new file mode 100644 index 0000000..71653f0 --- /dev/null +++ b/proxmox-infra/variables.tf @@ -0,0 +1,30 @@ +# proxmox-infra/variables.tf + +variable "proxmox_api_url" { + description = "The URL of the Proxmox API (e.g., https://192.168.1.10:8006/api2/json)" + type = string + # No default here, so OpenTofu will prompt or expect a .tfvars file/env var +} + +variable "proxmox_user" { + description = "Proxmox user (e.g., root@pam or user@pve)" + type = string +} + +variable "proxmox_password" { + description = "Proxmox user password" + type = string + sensitive = true # Mark as sensitive to hide in logs +} + +variable "proxmox_node" { + description = "The Proxmox node name where VMs will be deployed (e.g., 'pve')" + type = string +} + +# Example for templates - you might have different templates +variable "nixos_template_id" { + description = "VMID of the nixos cloud-init template" + type = number + # Example: default = 100 +} diff --git a/proxmox-infra/versions.tf b/proxmox-infra/versions.tf new file mode 100644 index 0000000..3ca35cc --- /dev/null +++ b/proxmox-infra/versions.tf @@ -0,0 +1,9 @@ +# versions.tf +terraform { + required_providers { + proxmox = { + source = "Telmate/proxmox" + version = "3.0.2-rc01" + } + } +} diff --git a/users/default.nix b/users/default.nix deleted file mode 100644 index b4edc6d..0000000 --- a/users/default.nix +++ /dev/null @@ -1,3 +0,0 @@ -{ - defaultUser = import ./plasmagoat.nix; -} diff --git a/users/plasmagoat.nix b/users/plasmagoat.nix index 037b202..9e5a96b 100644 --- a/users/plasmagoat.nix +++ b/users/plasmagoat.nix @@ -1,3 +1,4 @@ +# users/plasmagoat.nix - Your user configuration { config, lib,