homelab framework module init (everything is a mess)
This commit is contained in:
parent
0347f4d325
commit
bcbcc8b17b
94 changed files with 7289 additions and 436 deletions
116
modules/homelab/backup-config.nix
Normal file
116
modules/homelab/backup-config.nix
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.backups;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Get all defined backend names dynamically
|
||||
backendNames = attrNames cfg.backends or {};
|
||||
|
||||
backupJobType = types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = "Name of the backup job";
|
||||
};
|
||||
backend = mkOption {
|
||||
type = types.enum backendNames;
|
||||
description = "Backend to use for this backup job";
|
||||
};
|
||||
backendOptions = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Backend-specific options to override or extend the backend configuration";
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
description = "Additional labels for this backup job";
|
||||
};
|
||||
};
|
||||
};
|
||||
in {
|
||||
imports = [
|
||||
./backup/restic.nix
|
||||
# ./backup/borgbackup.nix
|
||||
];
|
||||
|
||||
options.homelab.backups = {
|
||||
enable = mkEnableOption "Homelab backup system";
|
||||
|
||||
jobs = mkOption {
|
||||
type = types.listOf backupJobType;
|
||||
default = [];
|
||||
description = "Backup jobs to execute on this system";
|
||||
};
|
||||
|
||||
defaultLabels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {
|
||||
hostname = homelabCfg.hostname;
|
||||
environment = homelabCfg.environment;
|
||||
location = homelabCfg.location;
|
||||
};
|
||||
description = "Default labels applied to all backup jobs";
|
||||
};
|
||||
|
||||
monitoring = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable backup monitoring and metrics";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Validate that all job backends exist
|
||||
assertions = [
|
||||
{
|
||||
assertion = all (job: cfg.backends.${job.backend} != null) cfg.jobs;
|
||||
message = "All backup jobs must reference backends that are defined and not null in homelab.backups.backends";
|
||||
}
|
||||
];
|
||||
|
||||
# Add backup jobs to monitoring endpoints if monitoring is enabled
|
||||
# homelab.monitoring.endpoints =
|
||||
# mkIf (cfg.monitoring && config.homelab.monitoring.enable)
|
||||
# (map (job: {
|
||||
# name = "backup-${job.name}";
|
||||
# port = 9100; # Assuming node exporter collects backup metrics
|
||||
# path = "/metrics";
|
||||
# jobName = "backup";
|
||||
# labels =
|
||||
# cfg.defaultLabels
|
||||
# // job.labels
|
||||
# // {
|
||||
# backup_job = job.name;
|
||||
# backup_backend = job.backend;
|
||||
# };
|
||||
# })
|
||||
# cfg.jobs);
|
||||
|
||||
# Export backup configuration for external consumption
|
||||
environment.etc."homelab/backup-config.json".text = builtins.toJSON {
|
||||
backends =
|
||||
mapAttrs (name: config: {
|
||||
inherit name;
|
||||
enabled = config.enable or false;
|
||||
})
|
||||
cfg.backends;
|
||||
|
||||
jobs =
|
||||
map (job: {
|
||||
inherit (job) name backend labels;
|
||||
allLabels = cfg.defaultLabels // job.labels;
|
||||
paths = job.backendOptions.paths or [];
|
||||
schedule = job.backendOptions.timerConfig.OnCalendar or job.backendOptions.startAt or "unknown";
|
||||
node = homelabCfg.hostname;
|
||||
environment = homelabCfg.environment;
|
||||
location = homelabCfg.location;
|
||||
})
|
||||
cfg.jobs;
|
||||
};
|
||||
};
|
||||
}
|
||||
105
modules/homelab/backup/restic.nix
Normal file
105
modules/homelab/backup/restic.nix
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.backups;
|
||||
|
||||
# Get restic backend config if it exists
|
||||
resticBackend = cfg.backends.restic or null;
|
||||
resticEnabled = resticBackend.enable or false;
|
||||
|
||||
# Filter jobs that use the restic backend
|
||||
resticJobs = filter (job: job.backend == "restic") cfg.jobs;
|
||||
in {
|
||||
options.homelab.backups.backends.restic = mkOption {
|
||||
type = types.nullOr (types.submodule {
|
||||
options = {
|
||||
enable = mkEnableOption "Restic backup backend";
|
||||
|
||||
# Default restic options - these map directly to services.restic.backups.<name>
|
||||
repository = mkOption {
|
||||
type = types.str;
|
||||
description = "Default repository for restic backups";
|
||||
};
|
||||
|
||||
initialize = lib.mkOption {
|
||||
type = lib.types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Create the repository if it doesn't exist.
|
||||
'';
|
||||
};
|
||||
|
||||
passwordFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Default password file for restic repository";
|
||||
};
|
||||
|
||||
environmentFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Default environment file for restic credentials";
|
||||
};
|
||||
|
||||
paths = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Default paths to backup";
|
||||
};
|
||||
|
||||
exclude = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Default exclude patterns";
|
||||
};
|
||||
|
||||
timerConfig = mkOption {
|
||||
type = types.attrs;
|
||||
default = {
|
||||
OnCalendar = "daily";
|
||||
RandomizedDelaySec = "1h";
|
||||
};
|
||||
description = "Default timer configuration";
|
||||
};
|
||||
|
||||
pruneOpts = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [
|
||||
"--keep-daily 7"
|
||||
"--keep-weekly 4"
|
||||
"--keep-monthly 6"
|
||||
];
|
||||
description = "Default pruning options";
|
||||
};
|
||||
|
||||
# Allow any other restic options
|
||||
extraOptions = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional default restic options";
|
||||
};
|
||||
};
|
||||
});
|
||||
default = null;
|
||||
description = "Restic backend configuration";
|
||||
};
|
||||
|
||||
config = mkIf (cfg.enable && resticEnabled && length resticJobs > 0) {
|
||||
# Configure restic service for each job using the restic backend
|
||||
services.restic.backups = listToAttrs (map (
|
||||
job: let
|
||||
# Get base config without the 'enable' field
|
||||
baseConfig = removeAttrs resticBackend ["enable"];
|
||||
# Merge extraOptions into base config
|
||||
baseWithExtras = recursiveUpdate (removeAttrs baseConfig ["extraOptions"]) (baseConfig.extraOptions or {});
|
||||
# Apply job-specific overrides
|
||||
finalConfig = recursiveUpdate baseWithExtras job.backendOptions;
|
||||
in
|
||||
nameValuePair job.name finalConfig
|
||||
)
|
||||
resticJobs);
|
||||
};
|
||||
}
|
||||
133
modules/homelab/default.nix
Normal file
133
modules/homelab/default.nix
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab;
|
||||
|
||||
nodeAgg = import ./lib/node-aggregation.nix {inherit lib;};
|
||||
in {
|
||||
imports = [
|
||||
./monitoring-config.nix
|
||||
./proxy-config.nix
|
||||
./backup-config.nix
|
||||
./motd
|
||||
|
||||
./services
|
||||
|
||||
# Global aggregation modules
|
||||
(nodeAgg.mkGlobalModule "monitoring" nodeAgg.aggregators.monitoring)
|
||||
# (nodeAgg.mkGlobalModule "logs" nodeAgg.aggregators.logs)
|
||||
(nodeAgg.mkGlobalModule "reverseProxy" nodeAgg.aggregators.reverseProxy)
|
||||
(nodeAgg.mkGlobalModule "backups" nodeAgg.aggregators.backups)
|
||||
];
|
||||
|
||||
options.homelab = {
|
||||
enable = mkEnableOption "Homelab fleet configuration";
|
||||
hostname = mkOption {
|
||||
type = types.str;
|
||||
description = "Hostname for this system";
|
||||
};
|
||||
domain = mkOption {
|
||||
type = types.str;
|
||||
default = "lab";
|
||||
description = "Base domain for the homelab";
|
||||
};
|
||||
externalDomain = mkOption {
|
||||
type = types.str;
|
||||
default = "procopius.dk";
|
||||
description = "External doamin to the homelab";
|
||||
};
|
||||
environment = mkOption {
|
||||
type = types.enum ["production" "staging" "development"];
|
||||
default = "production";
|
||||
description = "Environment type";
|
||||
};
|
||||
location = mkOption {
|
||||
type = types.str;
|
||||
default = "homelab";
|
||||
description = "Physical location identifier";
|
||||
};
|
||||
tags = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Tags for this system";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Set hostname
|
||||
networking.hostName = cfg.hostname;
|
||||
|
||||
# Export configuration for external consumption
|
||||
environment.etc."homelab/config.json".text = builtins.toJSON {
|
||||
inherit (cfg) hostname domain environment location tags;
|
||||
|
||||
monitoring = {
|
||||
# Metrics endpoints (Prometheus, etc.)
|
||||
metrics =
|
||||
map (endpoint: {
|
||||
inherit (endpoint) name host port path jobName scrapeInterval labels;
|
||||
url = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}";
|
||||
})
|
||||
cfg.global.monitoring.allMetrics or [];
|
||||
|
||||
# Health check endpoints
|
||||
healthChecks =
|
||||
map (check: let
|
||||
# Determine the host based on useExternalDomain
|
||||
actualHost =
|
||||
if check.useExternalDomain
|
||||
then "${check.subdomain}.${cfg.externalDomain}"
|
||||
else check.host;
|
||||
|
||||
# Build the URL
|
||||
portPart =
|
||||
if check.port != null
|
||||
then ":${toString check.port}"
|
||||
else "";
|
||||
url = "${check.protocol}://${actualHost}${portPart}${check.path}";
|
||||
in {
|
||||
inherit (check) name protocol method interval timeout conditions alerts group labels enabled;
|
||||
host = actualHost;
|
||||
port = check.port;
|
||||
path = check.path;
|
||||
url = url;
|
||||
useExternalDomain = check.useExternalDomain;
|
||||
subdomain = check.subdomain;
|
||||
sourceNode = cfg.hostname;
|
||||
})
|
||||
cfg.global.monitoring.allHealthChecks or [];
|
||||
};
|
||||
|
||||
reverseProxy = {
|
||||
entries =
|
||||
map (entry: {
|
||||
inherit (entry) subdomain host port path enableAuth enableSSL;
|
||||
internalHost = "${cfg.hostname}:${toString entry.port}${entry.path}";
|
||||
externalHost = "${entry.subdomain}.${cfg.externalDomain}";
|
||||
})
|
||||
cfg.global.reverseProxy.all;
|
||||
};
|
||||
|
||||
backups = {
|
||||
jobs =
|
||||
map (job: {
|
||||
inherit (job) name backend labels;
|
||||
backupId = job._backupId;
|
||||
sourceNode = job._sourceNode;
|
||||
})
|
||||
cfg.global.backups.all;
|
||||
|
||||
backends = cfg.global.backups.allBackends;
|
||||
|
||||
summary = {
|
||||
totalJobs = length cfg.global.backups.all;
|
||||
jobsByBackend = mapAttrs (backend: jobs: length jobs) cfg.global.backups.byBackend;
|
||||
jobsByNode = mapAttrs (node: jobs: length jobs) cfg.global.backups.byNode;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
226
modules/homelab/lib/node-aggregation.nix
Normal file
226
modules/homelab/lib/node-aggregation.nix
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
{lib}: let
|
||||
inherit (lib) flatten mapAttrs mapAttrsToList filter groupBy length unique attrByPath splitString;
|
||||
|
||||
# Generic function to aggregate any attribute across nodes
|
||||
aggregateFromNodes = {
|
||||
nodes,
|
||||
attributePath, # e.g. "homelab.monitoring.endpoints" or "homelab.backups.jobs"
|
||||
enhancer ? null, # optional function to enhance each item with node context
|
||||
}: let
|
||||
# Extract the attribute from each node using the path
|
||||
getNestedAttr = path: config: let
|
||||
pathList = splitString "." path;
|
||||
in
|
||||
attrByPath pathList [] config;
|
||||
|
||||
# Get all items from all nodes
|
||||
allItems = flatten (mapAttrsToList
|
||||
(nodeName: nodeConfig: let
|
||||
items = getNestedAttr attributePath nodeConfig.config;
|
||||
baseEnhancer = item:
|
||||
item
|
||||
// {
|
||||
_nodeName = nodeName;
|
||||
_nodeConfig = nodeConfig;
|
||||
_nodeAddress = nodeConfig.config.networking.hostName or nodeName;
|
||||
};
|
||||
finalEnhancer =
|
||||
if enhancer != null
|
||||
then (item: enhancer (baseEnhancer item))
|
||||
else baseEnhancer;
|
||||
in
|
||||
map finalEnhancer items)
|
||||
nodes);
|
||||
in {
|
||||
# Raw aggregated data
|
||||
all = allItems;
|
||||
|
||||
# Common grouping patterns
|
||||
byNode = groupBy (item: item._nodeName) allItems;
|
||||
byType = groupBy (item: item.type or "unknown") allItems;
|
||||
byService = groupBy (item: item.service or "unknown") allItems;
|
||||
|
||||
# Utility functions for filtering
|
||||
filterBy = predicate: filter predicate allItems;
|
||||
ofType = type: filter (item: (item.type or "") == type) allItems;
|
||||
|
||||
count = length allItems;
|
||||
countBy = fn: mapAttrs (key: items: length items) (groupBy fn allItems);
|
||||
};
|
||||
|
||||
# Specialized aggregators for common use cases
|
||||
aggregators = {
|
||||
monitoring = nodes: let
|
||||
# Aggregate metrics endpoints
|
||||
metricsAgg = aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.monitoring.metrics";
|
||||
enhancer = endpoint:
|
||||
endpoint
|
||||
// {
|
||||
_fullAddress = "${endpoint.host or endpoint._nodeAddress}:${toString endpoint.port}";
|
||||
_metricsUrl = "http://${endpoint.host or endpoint._nodeAddress}:${toString endpoint.port}${endpoint.path or "/metrics"}";
|
||||
_type = "metrics";
|
||||
};
|
||||
};
|
||||
# Aggregate health checks
|
||||
healthChecksAgg = aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.monitoring.healthChecks";
|
||||
enhancer = check: let
|
||||
# Compute the actual host and URL
|
||||
actualHost =
|
||||
if check.useExternalDomain or false
|
||||
then "${check.subdomain}.${check._nodeConfig.config.homelab.externalDomain or "example.com"}"
|
||||
else check.host or check._nodeAddress;
|
||||
portPart =
|
||||
if check.port != null
|
||||
then ":${toString check.port}"
|
||||
else "";
|
||||
url = "${check.protocol or "http"}://${actualHost}${portPart}${check.path or "/"}";
|
||||
in
|
||||
check
|
||||
// {
|
||||
_actualHost = actualHost;
|
||||
_url = url;
|
||||
_type = "health-check";
|
||||
# Merge default labels with node context
|
||||
labels =
|
||||
(check.labels or {})
|
||||
// {
|
||||
node = check._nodeName;
|
||||
environment = check._nodeConfig.config.homelab.environment or "unknown";
|
||||
};
|
||||
};
|
||||
};
|
||||
in
|
||||
metricsAgg
|
||||
// healthChecksAgg
|
||||
// {
|
||||
# Metrics-specific aggregations
|
||||
allMetrics = metricsAgg.all;
|
||||
metricsByNode = metricsAgg.byNode;
|
||||
metricsByJobName = groupBy (m: m.jobName or "unknown") metricsAgg.all;
|
||||
|
||||
# Health checks-specific aggregations
|
||||
allHealthChecks = healthChecksAgg.all;
|
||||
healthChecksByNode = healthChecksAgg.byNode;
|
||||
healthChecksByGroup = groupBy (hc: hc.group or "default") healthChecksAgg.all;
|
||||
healthChecksByProtocol = groupBy (hc: hc.protocol or "http") healthChecksAgg.all;
|
||||
|
||||
# Filtered health checks
|
||||
externalHealthChecks = filter (hc: hc.useExternalDomain or false) healthChecksAgg.all;
|
||||
internalHealthChecks = filter (hc: !(hc.useExternalDomain or false)) healthChecksAgg.all;
|
||||
enabledHealthChecks = filter (hc: hc.enabled or true) healthChecksAgg.all;
|
||||
|
||||
# Summary statistics
|
||||
summary = {
|
||||
totalMetrics = length metricsAgg.all;
|
||||
totalHealthChecks = length healthChecksAgg.all;
|
||||
healthChecksByGroup =
|
||||
mapAttrs (group: checks: length checks)
|
||||
(groupBy (hc: hc.group or "default") healthChecksAgg.all);
|
||||
healthChecksByProtocol =
|
||||
mapAttrs (protocol: checks: length checks)
|
||||
(groupBy (hc: hc.protocol or "http") healthChecksAgg.all);
|
||||
externalChecksCount = length (filter (hc: hc.useExternalDomain or false) healthChecksAgg.all);
|
||||
internalChecksCount = length (filter (hc: !(hc.useExternalDomain or false)) healthChecksAgg.all);
|
||||
};
|
||||
};
|
||||
|
||||
# Promtail log configurations
|
||||
# logs = nodes:
|
||||
# aggregateFromNodes {
|
||||
# inherit nodes;
|
||||
# attributePath = "homelab.logging.sources";
|
||||
# enhancer = logSource:
|
||||
# logSource
|
||||
# // {
|
||||
# # Add log-specific computed fields
|
||||
# _logPath = logSource.path or "/var/log/${logSource.service}.log";
|
||||
# _labels =
|
||||
# (logSource.labels or {})
|
||||
# // {
|
||||
# node = logSource._nodeName;
|
||||
# service = logSource.service or "unknown";
|
||||
# };
|
||||
# };
|
||||
# };
|
||||
|
||||
# Reverse proxy configurations
|
||||
reverseProxy = nodes:
|
||||
aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.reverseProxy.entries";
|
||||
enhancer = entry:
|
||||
entry
|
||||
// {
|
||||
# Add proxy-specific computed fields
|
||||
_upstream = "http://${entry.host or entry._nodeAddress}:${toString entry.port}";
|
||||
_fqdn = "${entry.subdomain or entry.service}.${entry.domain or "local"}";
|
||||
};
|
||||
};
|
||||
|
||||
# Backup jobs with enhanced aggregation
|
||||
backups = nodes: let
|
||||
baseAgg = aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.backups.jobs";
|
||||
enhancer = backup:
|
||||
backup
|
||||
// {
|
||||
_sourceNode = backup._nodeName;
|
||||
_backupId = "${backup._nodeName}-${backup.name}";
|
||||
_jobFqdn = "${backup.name}.${backup._nodeName}";
|
||||
};
|
||||
};
|
||||
|
||||
# Get all unique backends across all nodes
|
||||
allBackends = let
|
||||
allBackendConfigs =
|
||||
mapAttrsToList
|
||||
(nodeName: nodeConfig:
|
||||
attrByPath ["homelab" "backups" "backends"] {} nodeConfig.config)
|
||||
nodes;
|
||||
enabledBackends = flatten (map (backends:
|
||||
filter (name: backends.${name} != null) (lib.attrNames backends))
|
||||
allBackendConfigs);
|
||||
in
|
||||
unique enabledBackends;
|
||||
in
|
||||
baseAgg
|
||||
// {
|
||||
# Backup-specific aggregations
|
||||
byBackend = groupBy (job: job.backend) baseAgg.all;
|
||||
allBackends = allBackends;
|
||||
|
||||
# Enhanced summary
|
||||
summary = {
|
||||
totalJobs = length baseAgg.all;
|
||||
jobsByBackend =
|
||||
mapAttrs (backend: jobs: length jobs)
|
||||
(groupBy (job: job.backend) baseAgg.all);
|
||||
jobsByNode = baseAgg.countBy (job: job._nodeName);
|
||||
availableBackends = allBackends;
|
||||
backendsInUse = unique (map (job: job.backend) baseAgg.all);
|
||||
};
|
||||
};
|
||||
};
|
||||
in {
|
||||
inherit aggregateFromNodes aggregators;
|
||||
|
||||
# Convenience function to create a module that provides global aggregations
|
||||
mkGlobalModule = attributeName: aggregatorFn: {
|
||||
lib,
|
||||
nodes,
|
||||
...
|
||||
}: {
|
||||
options.homelab.global.${attributeName} = lib.mkOption {
|
||||
type = lib.types.attrs;
|
||||
readOnly = true;
|
||||
description = "Globally aggregated ${attributeName} from all nodes";
|
||||
};
|
||||
|
||||
config.homelab.global.${attributeName} = aggregatorFn nodes;
|
||||
};
|
||||
}
|
||||
295
modules/homelab/lib/service-interface.nix
Normal file
295
modules/homelab/lib/service-interface.nix
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
# Standard service interface for homelab services
|
||||
# This provides a consistent contract that all services should follow
|
||||
{lib}: let
|
||||
inherit (lib) mkOption mkEnableOption types;
|
||||
|
||||
# Define the standard service interface
|
||||
mkServiceInterface = {
|
||||
serviceName,
|
||||
defaultPort ? null,
|
||||
defaultSubdomain ? serviceName,
|
||||
defaultDescription ? "Homelab ${serviceName} service",
|
||||
monitoringPath ? "/metrics",
|
||||
healthCheckPath ? "/health",
|
||||
healthCheckConditions ? ["[STATUS] == 200"],
|
||||
# Custom options that the service wants to expose
|
||||
serviceOptions ? {},
|
||||
}:
|
||||
{
|
||||
# Standard interface options that all services must have
|
||||
enable = mkEnableOption defaultDescription;
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default =
|
||||
if defaultPort != null
|
||||
then defaultPort
|
||||
else throw "Service ${serviceName} must specify a default port";
|
||||
description = "Port for ${serviceName} service";
|
||||
};
|
||||
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether to automatically open firewall ports";
|
||||
};
|
||||
|
||||
proxy = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable reverse proxy for this service";
|
||||
};
|
||||
|
||||
subdomain = mkOption {
|
||||
type = types.str;
|
||||
default = defaultSubdomain;
|
||||
description = "Subdomain for reverse proxy (${defaultSubdomain}.yourdomain.com)";
|
||||
};
|
||||
|
||||
enableAuth = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable authentication for reverse proxy";
|
||||
};
|
||||
|
||||
enableSSL = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable SSL for reverse proxy";
|
||||
};
|
||||
};
|
||||
|
||||
monitoring = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable monitoring (metrics and health checks)";
|
||||
};
|
||||
|
||||
metricsPath = mkOption {
|
||||
type = types.str;
|
||||
default = monitoringPath;
|
||||
description = "Path for metrics endpoint";
|
||||
};
|
||||
|
||||
jobName = mkOption {
|
||||
type = types.str;
|
||||
default = serviceName;
|
||||
description = "Prometheus job name";
|
||||
};
|
||||
|
||||
scrapeInterval = mkOption {
|
||||
type = types.str;
|
||||
default = "30s";
|
||||
description = "Prometheus scrape interval";
|
||||
};
|
||||
|
||||
healthCheck = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable health check monitoring";
|
||||
};
|
||||
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = healthCheckPath;
|
||||
description = "Path for health check endpoint";
|
||||
};
|
||||
|
||||
interval = mkOption {
|
||||
type = types.str;
|
||||
default = "30s";
|
||||
description = "Health check interval";
|
||||
};
|
||||
|
||||
timeout = mkOption {
|
||||
type = types.str;
|
||||
default = "10s";
|
||||
description = "Health check timeout";
|
||||
};
|
||||
|
||||
conditions = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = healthCheckConditions;
|
||||
description = "Health check conditions";
|
||||
};
|
||||
|
||||
group = mkOption {
|
||||
type = types.str;
|
||||
default = "services";
|
||||
description = "Health check group name";
|
||||
};
|
||||
};
|
||||
|
||||
extraLabels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
description = "Additional labels for monitoring";
|
||||
};
|
||||
};
|
||||
|
||||
description = mkOption {
|
||||
type = types.str;
|
||||
default = defaultDescription;
|
||||
description = "Service description";
|
||||
};
|
||||
|
||||
extraOptions = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional service-specific configuration options";
|
||||
};
|
||||
|
||||
# Merge in service-specific options
|
||||
}
|
||||
// serviceOptions;
|
||||
|
||||
# Helper function to implement the standard service behavior
|
||||
mkServiceConfig = {
|
||||
config,
|
||||
cfg,
|
||||
homelabCfg,
|
||||
serviceName,
|
||||
# Function that returns the actual service configuration
|
||||
serviceConfig,
|
||||
# Optional: custom monitoring labels
|
||||
extraMonitoringLabels ? {},
|
||||
# Optional: custom health check configuration
|
||||
customHealthChecks ? [],
|
||||
# Optional: custom reverse proxy configuration
|
||||
customProxyConfig ? {},
|
||||
}: let
|
||||
# Standard monitoring labels
|
||||
standardLabels =
|
||||
{
|
||||
service = serviceName;
|
||||
component = "main";
|
||||
instance = "${homelabCfg.hostname}.${homelabCfg.domain}";
|
||||
}
|
||||
// extraMonitoringLabels // cfg.monitoring.extraLabels;
|
||||
|
||||
# Standard reverse proxy entry
|
||||
standardProxyEntry =
|
||||
{
|
||||
subdomain = cfg.proxy.subdomain;
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
enableAuth = cfg.proxy.enableAuth;
|
||||
enableSSL = cfg.proxy.enableSSL;
|
||||
}
|
||||
// customProxyConfig;
|
||||
|
||||
# Standard metrics configuration
|
||||
standardMetrics = lib.optional cfg.monitoring.enable {
|
||||
name = "${serviceName}-metrics";
|
||||
port = cfg.port;
|
||||
path = cfg.monitoring.metricsPath;
|
||||
jobName = cfg.monitoring.jobName;
|
||||
scrapeInterval = cfg.monitoring.scrapeInterval;
|
||||
labels = standardLabels;
|
||||
};
|
||||
|
||||
# Standard health check configuration
|
||||
standardHealthCheck = lib.optional (cfg.monitoring.enable && cfg.monitoring.healthCheck.enable) {
|
||||
name = "${serviceName}-health";
|
||||
port = cfg.port;
|
||||
path = cfg.monitoring.healthCheck.path;
|
||||
interval = cfg.monitoring.healthCheck.interval;
|
||||
timeout = cfg.monitoring.healthCheck.timeout;
|
||||
conditions = cfg.monitoring.healthCheck.conditions;
|
||||
group = cfg.monitoring.healthCheck.group;
|
||||
labels = standardLabels;
|
||||
};
|
||||
|
||||
# Merge service config with standard behaviors
|
||||
baseConfig = lib.mkMerge [
|
||||
# Service-specific configuration
|
||||
serviceConfig
|
||||
|
||||
# Standard firewall configuration
|
||||
(lib.mkIf cfg.openFirewall {
|
||||
networking.firewall.allowedTCPPorts = [cfg.port];
|
||||
})
|
||||
|
||||
# Standard monitoring configuration
|
||||
(lib.mkIf cfg.monitoring.enable {
|
||||
homelab.monitoring.metrics = standardMetrics;
|
||||
homelab.monitoring.healthChecks = standardHealthCheck ++ customHealthChecks;
|
||||
})
|
||||
|
||||
# Standard reverse proxy configuration
|
||||
(lib.mkIf cfg.proxy.enable {
|
||||
homelab.reverseProxy.entries = [standardProxyEntry];
|
||||
})
|
||||
];
|
||||
in
|
||||
lib.mkIf cfg.enable baseConfig;
|
||||
|
||||
# Validation helper to ensure required options are set
|
||||
validateServiceConfig = cfg: serviceName: [
|
||||
# Validate that if proxy is enabled, subdomain is set
|
||||
(lib.mkIf (cfg.proxy.enable && cfg.proxy.subdomain == "")
|
||||
(throw "Service ${serviceName}: proxy.subdomain is required when proxy.enable is true"))
|
||||
|
||||
# Validate that if monitoring is enabled, required paths are set
|
||||
(lib.mkIf (cfg.monitoring.enable && cfg.monitoring.metricsPath == "")
|
||||
(throw "Service ${serviceName}: monitoring.metricsPath cannot be empty when monitoring is enabled"))
|
||||
];
|
||||
in {
|
||||
inherit mkServiceInterface mkServiceConfig validateServiceConfig;
|
||||
|
||||
# Common service option patterns
|
||||
commonOptions = {
|
||||
# Log level option
|
||||
logLevel = mkOption {
|
||||
type = types.enum ["debug" "info" "warn" "error"];
|
||||
default = "info";
|
||||
description = "Log level";
|
||||
};
|
||||
|
||||
# Environment file option (for secrets)
|
||||
environmentFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Environment file for secrets";
|
||||
};
|
||||
|
||||
# External URL option
|
||||
externalUrl = serviceName: homelabCfg:
|
||||
mkOption {
|
||||
type = types.str;
|
||||
default = "https://${serviceName}.${homelabCfg.externalDomain}";
|
||||
description = "External URL for ${serviceName}";
|
||||
};
|
||||
};
|
||||
|
||||
# Helper for creating service modules with the interface
|
||||
mkServiceModule = {
|
||||
serviceName,
|
||||
defaultPort,
|
||||
defaultSubdomain ? serviceName,
|
||||
serviceOptions ? {},
|
||||
...
|
||||
} @ args: {
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}: let
|
||||
cfg = config.homelab.services.${serviceName};
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
serviceInterface = mkServiceInterface {
|
||||
inherit serviceName defaultPort defaultSubdomain serviceOptions;
|
||||
};
|
||||
in {
|
||||
options.homelab.services.${serviceName} = serviceInterface;
|
||||
|
||||
config = mkServiceConfig {
|
||||
inherit config cfg homelabCfg serviceName;
|
||||
# Service implementor must provide this function
|
||||
serviceConfig = args.serviceConfig or (throw "mkServiceModule requires serviceConfig function");
|
||||
};
|
||||
};
|
||||
}
|
||||
214
modules/homelab/monitoring-config.nix
Normal file
214
modules/homelab/monitoring-config.nix
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.monitoring;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
metricsEndpointType = types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = "Name of the metrics endpoint";
|
||||
};
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
description = "Domain name of the host (default: hostname.domain)";
|
||||
default = "${homelabCfg.hostname}.${homelabCfg.domain}";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
description = "Port number for the endpoint";
|
||||
};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/metrics";
|
||||
description = "Path for the metrics endpoint";
|
||||
};
|
||||
jobName = mkOption {
|
||||
type = types.str;
|
||||
description = "Prometheus job name";
|
||||
};
|
||||
scrapeInterval = mkOption {
|
||||
type = types.str;
|
||||
default = "30s";
|
||||
description = "Prometheus scrape interval";
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
description = "Additional labels for this endpoint";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
healthCheckEndpointType = types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = "Name of the health check endpoint";
|
||||
};
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
description = "Domain name of the host";
|
||||
default = "${homelabCfg.hostname}.${homelabCfg.domain}";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.nullOr types.port;
|
||||
default = null;
|
||||
description = "Port number for the endpoint (null for standard HTTP/HTTPS)";
|
||||
};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/";
|
||||
description = "Path for the health check endpoint";
|
||||
};
|
||||
protocol = mkOption {
|
||||
type = types.enum ["http" "https" "tcp" "icmp"];
|
||||
default = "http";
|
||||
description = "Protocol to use for health checks";
|
||||
};
|
||||
method = mkOption {
|
||||
type = types.str;
|
||||
default = "GET";
|
||||
description = "HTTP method for health checks (only applies to http/https)";
|
||||
};
|
||||
interval = mkOption {
|
||||
type = types.str;
|
||||
default = "30s";
|
||||
description = "Health check interval";
|
||||
};
|
||||
timeout = mkOption {
|
||||
type = types.str;
|
||||
default = "10s";
|
||||
description = "Health check timeout";
|
||||
};
|
||||
conditions = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = ["[STATUS] == 200"];
|
||||
description = "Health check conditions (Gatus format)";
|
||||
example = ["[STATUS] == 200" "[BODY].status == UP" "[RESPONSE_TIME] < 500"];
|
||||
};
|
||||
alerts = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
type = mkOption {
|
||||
type = types.str;
|
||||
description = "Alert type";
|
||||
example = "discord";
|
||||
};
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether this alert is enabled";
|
||||
};
|
||||
failure-threshold = mkOption {
|
||||
type = types.int;
|
||||
default = 3;
|
||||
description = "Number of failures before alerting";
|
||||
};
|
||||
success-threshold = mkOption {
|
||||
type = types.int;
|
||||
default = 2;
|
||||
description = "Number of successes before resolving alert";
|
||||
};
|
||||
};
|
||||
});
|
||||
default = [];
|
||||
description = "Alert configurations";
|
||||
};
|
||||
group = mkOption {
|
||||
type = types.str;
|
||||
default = "default";
|
||||
description = "Group name for organizing health checks";
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
description = "Additional labels for this health check";
|
||||
};
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether this health check is enabled";
|
||||
};
|
||||
# External domain support
|
||||
useExternalDomain = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Use external domain instead of internal";
|
||||
};
|
||||
subdomain = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Subdomain for external domain (required if useExternalDomain is true)";
|
||||
};
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.homelab.monitoring = {
|
||||
enable = mkEnableOption "Homelab monitoring";
|
||||
metrics = mkOption {
|
||||
type = types.listOf metricsEndpointType;
|
||||
default = [];
|
||||
description = "Metric endpoints exposed by this system";
|
||||
};
|
||||
|
||||
healthChecks = mkOption {
|
||||
type = types.listOf healthCheckEndpointType;
|
||||
default = [];
|
||||
description = "Health check endpoints for uptime monitoring";
|
||||
};
|
||||
|
||||
nodeExporter = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable node exporter";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 9100;
|
||||
description = "Node exporter port";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Configure node exporter if enabled
|
||||
services.prometheus.exporters.node = mkIf cfg.nodeExporter.enable {
|
||||
enable = true;
|
||||
port = cfg.nodeExporter.port;
|
||||
enabledCollectors = [
|
||||
"systemd"
|
||||
"textfile"
|
||||
"filesystem"
|
||||
"loadavg"
|
||||
"meminfo"
|
||||
"netdev"
|
||||
"stat"
|
||||
];
|
||||
};
|
||||
|
||||
# Automatically add node exporter to monitoring endpoints
|
||||
homelab.monitoring.metrics = mkIf cfg.nodeExporter.enable [
|
||||
{
|
||||
name = "node-exporter";
|
||||
port = cfg.nodeExporter.port;
|
||||
path = "/metrics";
|
||||
jobName = "node";
|
||||
labels = {
|
||||
instance = "${homelabCfg.hostname}.${homelabCfg.domain}";
|
||||
environment = homelabCfg.environment;
|
||||
location = homelabCfg.location;
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
networking.firewall.allowedTCPPorts = optionals cfg.nodeExporter.enable [
|
||||
cfg.nodeExporter.port
|
||||
];
|
||||
};
|
||||
}
|
||||
397
modules/homelab/motd/default.nix
Normal file
397
modules/homelab/motd/default.nix
Normal file
|
|
@ -0,0 +1,397 @@
|
|||
# modules/motd/default.nix
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.motd;
|
||||
|
||||
homelab-motd = pkgs.writeShellScriptBin "homelab-motd" ''
|
||||
#! /usr/bin/env bash
|
||||
|
||||
# Colors for output
|
||||
RED="\e[31m"
|
||||
GREEN="\e[32m"
|
||||
YELLOW="\e[33m"
|
||||
BLUE='\e[0;34m'
|
||||
CYAN='\e[0;36m'
|
||||
WHITE='\e[1;37m'
|
||||
NC='\e[0m' # No Color
|
||||
BOLD='\e[1m'
|
||||
|
||||
# Helper functions
|
||||
print_header() {
|
||||
echo -e "''${BOLD}''${BLUE}╔══════════════════════════════════════════════════════════════╗''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}║''${NC}''${WHITE} 🏠 $(hostname -s) HOMELAB ''${NC}''${BOLD}''${BLUE}║''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}╚══════════════════════════════════════════════════════════════╝''${NC}"
|
||||
}
|
||||
|
||||
print_section() {
|
||||
echo -e "\n''${BOLD}''${CYAN}▶ $1''${NC}"
|
||||
echo -e "''${CYAN}─────────────────────────────────────────────────────────────''${NC}"
|
||||
}
|
||||
|
||||
get_service_status() {
|
||||
local service="$1"
|
||||
if ${pkgs.systemd}/bin/systemctl is-active --quiet "$service" 2>/dev/null; then
|
||||
echo -e "''${GREEN}●''${NC}"
|
||||
elif ${pkgs.systemd}/bin/systemctl is-enabled --quiet "$service" 2>/dev/null; then
|
||||
echo -e "''${YELLOW}○''${NC}"
|
||||
else
|
||||
echo -e "''${RED}×''${NC}"
|
||||
fi
|
||||
}
|
||||
|
||||
check_backup_issues() {
|
||||
local issues=0
|
||||
# Check for failed backup services in the last 24 hours
|
||||
if ${pkgs.systemd}/bin/journalctl --since "24 hours ago" --unit="*backup*" --unit="restic*" --unit="borgbackup*" --priority=err --no-pager -q 2>/dev/null | grep -q .; then
|
||||
issues=$((issues + 1))
|
||||
fi
|
||||
|
||||
# Check for failed backup timers
|
||||
local failed_timers=$(${pkgs.systemd}/bin/systemctl list-timers --failed --no-pager --no-legend 2>/dev/null | grep -E "(backup|restic|borgbackup)" | wc -l)
|
||||
issues=$((issues + failed_timers))
|
||||
|
||||
echo $issues
|
||||
}
|
||||
|
||||
# Main script
|
||||
${optionalString cfg.clearScreen "clear"}
|
||||
print_header
|
||||
|
||||
# System info
|
||||
print_section "SYSTEM"
|
||||
echo -e " ''${BOLD}Uptime:''${NC} $(${pkgs.procps}/bin/uptime -p | sed 's/up //')"
|
||||
echo -e " ''${BOLD}Load:''${NC} $(${pkgs.procps}/bin/uptime | awk -F'load average:' '{print $2}' | xargs)"
|
||||
echo -e " ''${BOLD}Memory:''${NC} $(${pkgs.procps}/bin/free -h | awk '/^Mem:/ {printf "%s/%s", $3, $2}')"
|
||||
echo -e " ''${BOLD}Disk:''${NC} $(${pkgs.coreutils}/bin/df -h / | awk 'NR==2 {printf "%s/%s (%s)", $3, $2, $5}')"
|
||||
|
||||
${optionalString cfg.showServices ''
|
||||
# Local homelab services (auto-detected + manual)
|
||||
print_section "HOMELAB SERVICES"
|
||||
|
||||
# Auto-detect services from homelab configuration
|
||||
${optionalString (config.homelab.services.gatus.enable or false) ''
|
||||
status=$(get_service_status "gatus")
|
||||
printf " %-20s %b %s\n" "gatus" "$status" "Uptime monitoring"
|
||||
''}
|
||||
|
||||
${optionalString (config.homelab.services.prometheus.enable or false) ''
|
||||
status=$(get_service_status "prometheus")
|
||||
printf " %-20s %b %s\n" "prometheus" "$status" "Metrics collection"
|
||||
''}
|
||||
|
||||
${optionalString (config.homelab.services.grafana.enable or false) ''
|
||||
status=$(get_service_status "grafana")
|
||||
printf " %-20s %b %s\n" "grafana" "$status" "Monitoring dashboard"
|
||||
''}
|
||||
|
||||
${optionalString (config.homelab.services.alertmanager.enable or false) ''
|
||||
status=$(get_service_status "alertmanager")
|
||||
printf " %-20s %b %s\n" "alertmanager" "$status" "Alert routing"
|
||||
''}
|
||||
|
||||
${optionalString (config.services.nginx.enable or false) ''
|
||||
status=$(get_service_status "nginx")
|
||||
printf " %-20s %b %s\n" "nginx" "$status" "Web server/proxy"
|
||||
''}
|
||||
|
||||
${optionalString (config.services.postgresql.enable or false) ''
|
||||
status=$(get_service_status "postgresql")
|
||||
printf " %-20s %b %s\n" "postgresql" "$status" "Database server"
|
||||
''}
|
||||
|
||||
${optionalString (config.services.redis.server.enable or false) ''
|
||||
status=$(get_service_status "redis")
|
||||
printf " %-20s %b %s\n" "redis" "$status" "Key-value store"
|
||||
''}
|
||||
|
||||
# Manual services from configuration
|
||||
${concatStringsSep "\n" (mapAttrsToList (name: service: ''
|
||||
status=$(get_service_status "${service.systemdService}")
|
||||
printf " %-20s %b %s\n" "${name}" "$status" "${service.description}"
|
||||
'')
|
||||
cfg.services)}
|
||||
|
||||
# Show legend
|
||||
echo -e "\n ''${GREEN}●''${NC} Active ''${YELLOW}○''${NC} Inactive ''${RED}×''${NC} Disabled"
|
||||
''}
|
||||
|
||||
# Quick backup check
|
||||
backup_issues=$(check_backup_issues)
|
||||
if [[ $backup_issues -gt 0 ]]; then
|
||||
echo -e "\n''${BOLD}''${RED}⚠ WARNING: $backup_issues backup issues detected!''${NC}"
|
||||
echo -e " Run ''${BOLD}homelab-backup-status''${NC} for details"
|
||||
fi
|
||||
|
||||
# Recent critical issues
|
||||
error_count=$(${pkgs.systemd}/bin/journalctl --since "24 hours ago" --priority=err --no-pager -q 2>/dev/null | wc -l || echo 0)
|
||||
if [[ "$error_count" -gt 0 ]]; then
|
||||
echo -e "\n''${BOLD}''${YELLOW}⚠ $error_count system errors in last 24h''${NC}"
|
||||
echo -e " Run ''${BOLD}journalctl --priority=err --since='24 hours ago' ''${NC} for details"
|
||||
fi
|
||||
|
||||
# Helpful commands
|
||||
echo -e "\n''${BOLD}''${BLUE}╔══════════════════════════════════════════════════════════════╗''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}║''${NC} ''${WHITE}Useful commands: ''${NC}''${BOLD}''${BLUE}║''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}║''${NC} ''${CYAN}homelab-monitor-status''${NC} - Monitoring overview ''${BOLD}''${BLUE}║''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}║''${NC} ''${CYAN}homelab-backup-status''${NC} - Backup jobs status ''${BOLD}''${BLUE}║''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}║''${NC} ''${CYAN}homelab-proxy-status''${NC} - Reverse proxy entries ''${BOLD}''${BLUE}║''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}║''${NC} ''${CYAN}systemctl status <srv>''${NC} - Check specific service ''${BOLD}''${BLUE}║''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}╚══════════════════════════════════════════════════════════════╝''${NC}"
|
||||
echo
|
||||
'';
|
||||
|
||||
# Helper script for monitoring status
|
||||
homelab-monitor-status = pkgs.writeShellScriptBin "homelab-monitor-status" ''
|
||||
#! /usr/bin/env bash
|
||||
|
||||
# Colors
|
||||
RED="\e[31m"
|
||||
GREEN="\e[32m"
|
||||
YELLOW="\e[33m"
|
||||
BLUE='\e[0;34m'
|
||||
CYAN='\e[0;36m'
|
||||
WHITE='\e[1;37m'
|
||||
NC='\e[0m'
|
||||
BOLD='\e[1m'
|
||||
|
||||
CONFIG_FILE="/etc/homelab/config.json"
|
||||
if [[ ! -f "$CONFIG_FILE" ]]; then
|
||||
echo -e "''${RED}❌ Global homelab configuration not found''${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "''${BOLD}''${BLUE}📊 Homelab Monitoring Status''${NC}"
|
||||
echo -e "''${BLUE}=============================''${NC}"
|
||||
|
||||
# Show metrics endpoints
|
||||
echo -e "\n''${BOLD}''${CYAN}Metrics Endpoints:''${NC}"
|
||||
metrics_count=$(${pkgs.jq}/bin/jq '.monitoring.metrics | length' "$CONFIG_FILE" 2>/dev/null || echo 0)
|
||||
if [[ $metrics_count -gt 0 ]]; then
|
||||
${pkgs.jq}/bin/jq -r '.monitoring.metrics[]? | " ''${GREEN}●''${NC} \(.name): ''${BOLD}\(.host):\(.port)''${NC}\(.path) ''${YELLOW}(job: \(.jobName))''${NC}"' "$CONFIG_FILE" 2>/dev/null
|
||||
echo -e "\n ''${BOLD}Total: ''${metrics_count} endpoints''${NC}"
|
||||
else
|
||||
echo -e " ''${YELLOW}No metrics endpoints configured''${NC}"
|
||||
fi
|
||||
|
||||
# Show health checks by group
|
||||
echo -e "\n''${BOLD}''${CYAN}Health Checks:''${NC}"
|
||||
health_count=$(${pkgs.jq}/bin/jq '.monitoring.healthChecks | length' "$CONFIG_FILE" 2>/dev/null || echo 0)
|
||||
if [[ $health_count -gt 0 ]]; then
|
||||
# Group health checks
|
||||
${pkgs.jq}/bin/jq -r '
|
||||
.monitoring.healthChecks |
|
||||
group_by(.group // "default") |
|
||||
.[] |
|
||||
"''${BOLD} \(.[0].group // "default" | ascii_upcase) Group:''${NC}" as $header |
|
||||
($header, (
|
||||
.[] |
|
||||
" ''${if .enabled // true then "''${GREEN}●" else "''${YELLOW}○" end}''${NC} \(.name): ''${BOLD}\(.protocol)://\(.host)\(if .port then ":\(.port)" else "" end)''${NC}\(.path)"
|
||||
))
|
||||
' "$CONFIG_FILE" 2>/dev/null
|
||||
echo -e "\n ''${BOLD}Total: ''${health_count} health checks''${NC}"
|
||||
else
|
||||
echo -e " ''${YELLOW}No health checks configured''${NC}"
|
||||
fi
|
||||
|
||||
echo -e "\n''${CYAN}Run ''${BOLD}homelab-proxy-status''${NC}''${CYAN} and ''${BOLD}homelab-backup-status''${NC}''${CYAN} for more details.''${NC}"
|
||||
'';
|
||||
|
||||
# Helper script for backup status
|
||||
homelab-backup-status = pkgs.writeShellScriptBin "homelab-backup-status" ''
|
||||
#! /usr/bin/env bash
|
||||
|
||||
# Colors
|
||||
RED="\e[31m"
|
||||
GREEN="\e[32m"
|
||||
YELLOW="\e[33m"
|
||||
BLUE='\e[0;34m'
|
||||
CYAN='\e[0;36m'
|
||||
WHITE='\e[1;37m'
|
||||
NC='\e[0m'
|
||||
BOLD='\e[1m'
|
||||
|
||||
echo -e "''${BOLD}''${BLUE}💾 Backup Status''${NC}"
|
||||
echo -e "''${BLUE}===============''${NC}"
|
||||
|
||||
# Check backup timers
|
||||
echo -e "\n''${BOLD}''${CYAN}Backup Timers:''${NC}"
|
||||
backup_timers=$(${pkgs.systemd}/bin/systemctl list-timers --no-pager --no-legend 2>/dev/null | grep -E "(backup|restic|borgbackup)")
|
||||
if [[ -n "$backup_timers" ]]; then
|
||||
while IFS= read -r line; do
|
||||
if [[ -n "$line" ]]; then
|
||||
next=$(echo "$line" | awk '{print $1, $2}')
|
||||
left=$(echo "$line" | awk '{print $3}')
|
||||
timer=$(echo "$line" | awk '{print $5}')
|
||||
service=$(echo "$line" | awk '{print $6}')
|
||||
|
||||
# Color code based on time left
|
||||
if [[ "$left" == "n/a" ]]; then
|
||||
color="''${RED}"
|
||||
status="●"
|
||||
elif echo "$left" | grep -qE "(sec|min|[0-9]h)"; then
|
||||
color="''${YELLOW}"
|
||||
status="●"
|
||||
else
|
||||
color="''${GREEN}"
|
||||
status="●"
|
||||
fi
|
||||
|
||||
printf " %b%s%b %-25s Next: %s (%s)\n" "$color" "$status" "$NC" "$(basename "$timer" .timer)" "$next" "$left"
|
||||
fi
|
||||
done <<< "$backup_timers"
|
||||
else
|
||||
echo -e " ''${YELLOW}No backup timers found''${NC}"
|
||||
fi
|
||||
|
||||
# Check recent backup activity (last 3 days, summarized)
|
||||
echo -e "\n''${BOLD}''${CYAN}Recent Activity (3 days):''${NC}"
|
||||
|
||||
# Count successful vs failed backups
|
||||
success_count=$(${pkgs.systemd}/bin/journalctl --since "3 days ago" --unit="*backup*" --unit="restic*" --unit="borgbackup*" --no-pager -q 2>/dev/null | grep -iE "(completed|success|finished)" | wc -l)
|
||||
error_count=$(${pkgs.systemd}/bin/journalctl --since "3 days ago" --unit="*backup*" --unit="restic*" --unit="borgbackup*" --priority=err --no-pager -q 2>/dev/null | wc -l)
|
||||
|
||||
if [[ $success_count -gt 0 ]]; then
|
||||
echo -e " ''${GREEN}✅ $success_count successful backups''${NC}"
|
||||
fi
|
||||
if [[ $error_count -gt 0 ]]; then
|
||||
echo -e " ''${RED}❌ $error_count failed backups''${NC}"
|
||||
echo -e "\n''${BOLD}''${RED}Recent Failures:''${NC}"
|
||||
${pkgs.systemd}/bin/journalctl --since "3 days ago" --unit="*backup*" --unit="restic*" --unit="borgbackup*" --priority=err --no-pager --lines=3 2>/dev/null | while read -r line; do
|
||||
# Extract just the important parts
|
||||
timestamp=$(echo "$line" | awk '{print $1, $2, $3}')
|
||||
service=$(echo "$line" | grep -oE "(restic-backups-[^[]+|borgbackup-job-[^[]+|[^[]*backup[^[]*)" | head -1)
|
||||
message=$(echo "$line" | sed -E 's/.*\]: //' | cut -c1-60)
|
||||
echo -e " ''${YELLOW}$timestamp''${NC} ''${BOLD}$service''${NC}: $message..."
|
||||
done
|
||||
elif [[ $success_count -eq 0 ]]; then
|
||||
echo -e " ''${YELLOW}⚠️ No backup activity in last 3 days''${NC}"
|
||||
else
|
||||
echo -e " ''${GREEN}✅ All backups completed successfully''${NC}"
|
||||
fi
|
||||
|
||||
# Show backup summary from global config if available
|
||||
CONFIG_FILE="/etc/homelab/config.json"
|
||||
if [[ -f "$CONFIG_FILE" ]]; then
|
||||
total_jobs=$(${pkgs.jq}/bin/jq -r '.backups.summary.totalJobs // 0' "$CONFIG_FILE" 2>/dev/null)
|
||||
backends=$(${pkgs.jq}/bin/jq -r '.backups.summary.backendsInUse[]?' "$CONFIG_FILE" 2>/dev/null | tr '\n' ' ')
|
||||
|
||||
if [[ $total_jobs -gt 0 ]]; then
|
||||
echo -e "\n''${BOLD}''${CYAN}Configuration:''${NC}"
|
||||
echo -e " ''${BOLD}Total jobs:''${NC} $total_jobs"
|
||||
if [[ -n "$backends" ]]; then
|
||||
echo -e " ''${BOLD}Backends:''${NC} $backends"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
'';
|
||||
|
||||
# Helper script for proxy status
|
||||
homelab-proxy-status = pkgs.writeShellScriptBin "homelab-proxy-status" ''
|
||||
#! /usr/bin/env bash
|
||||
|
||||
# Colors
|
||||
RED="\e[31m"
|
||||
GREEN="\e[32m"
|
||||
YELLOW="\e[33m"
|
||||
BLUE='\e[0;34m'
|
||||
CYAN='\e[0;36m'
|
||||
WHITE='\e[1;37m'
|
||||
NC='\e[0m'
|
||||
BOLD='\e[1m'
|
||||
|
||||
CONFIG_FILE="/etc/homelab/config.json"
|
||||
if [[ ! -f "$CONFIG_FILE" ]]; then
|
||||
echo -e "''${RED}❌ Global homelab configuration not found''${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "''${BOLD}''${BLUE}🔗 Reverse Proxy Status''${NC}"
|
||||
echo -e "''${BLUE}======================''${NC}"
|
||||
|
||||
proxy_count=$(${pkgs.jq}/bin/jq '.reverseProxy.entries | length' "$CONFIG_FILE" 2>/dev/null || echo 0)
|
||||
if [[ $proxy_count -gt 0 ]]; then
|
||||
${pkgs.jq}/bin/jq -r '.reverseProxy.entries[]? |
|
||||
" ''${GREEN}●''${NC} ''${BOLD}\(.subdomain)''${NC}: \(.externalHost) → \(.internalHost)\(if .enableAuth then " ''${YELLOW}🔐''${NC}" else "" end)\(if .enableSSL then " ''${GREEN}🔒''${NC}" else "" end)"' "$CONFIG_FILE" 2>/dev/null
|
||||
|
||||
echo -e "\n''${BOLD}Legend:''${NC} ''${YELLOW}🔐''${NC} Auth enabled, ''${GREEN}🔒''${NC} SSL enabled"
|
||||
echo -e "''${BOLD}Total: ''${proxy_count} proxy entries''${NC}"
|
||||
else
|
||||
echo -e " ''${YELLOW}No proxy entries configured''${NC}"
|
||||
fi
|
||||
'';
|
||||
in {
|
||||
options.homelab.motd = {
|
||||
enable = mkEnableOption "Simple homelab MOTD";
|
||||
|
||||
clearScreen = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Clear screen before showing MOTD";
|
||||
};
|
||||
|
||||
showServices = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Show local homelab services status";
|
||||
};
|
||||
|
||||
services = mkOption {
|
||||
type = types.attrsOf (types.submodule {
|
||||
options = {
|
||||
systemdService = mkOption {
|
||||
type = types.str;
|
||||
description = "Name of the systemd service to monitor";
|
||||
};
|
||||
description = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = "Human-readable description of the service";
|
||||
};
|
||||
};
|
||||
});
|
||||
default = {};
|
||||
description = "Local homelab services to show in MOTD";
|
||||
example = literalExpression ''
|
||||
{
|
||||
"nginx" = {
|
||||
systemdService = "nginx";
|
||||
description = "Web server";
|
||||
};
|
||||
"grafana" = {
|
||||
systemdService = "grafana";
|
||||
description = "Monitoring dashboard";
|
||||
};
|
||||
}
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Create helper commands
|
||||
environment.systemPackages = with pkgs; [
|
||||
jq
|
||||
homelab-motd
|
||||
homelab-monitor-status
|
||||
homelab-backup-status
|
||||
homelab-proxy-status
|
||||
];
|
||||
|
||||
# Set up MOTD to run on login
|
||||
programs.bash.interactiveShellInit = ''
|
||||
# Run homelab MOTD on interactive login (only once per session)
|
||||
if [[ $- == *i* ]] && [[ -z "$MOTD_SHOWN" ]] && [[ -n "$SSH_CONNECTION" || "$TERM" == "linux" ]]; then
|
||||
export MOTD_SHOWN=1
|
||||
${homelab-motd}/bin/homelab-motd
|
||||
fi
|
||||
'';
|
||||
|
||||
# Disable default MOTD
|
||||
users.motd = mkDefault "";
|
||||
security.pam.services.login.showMotd = mkDefault false;
|
||||
};
|
||||
}
|
||||
53
modules/homelab/proxy-config.nix
Normal file
53
modules/homelab/proxy-config.nix
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.reverseProxy;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
reverseProxyEntryType = types.submodule {
|
||||
options = {
|
||||
subdomain = mkOption {
|
||||
type = types.str;
|
||||
description = "Subdomain for the service";
|
||||
};
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
description = "Host to proxy to";
|
||||
default = "${homelabCfg.hostname}.${homelabCfg.domain}";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
description = "Port to proxy to";
|
||||
};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/";
|
||||
description = "Path prefix for the service";
|
||||
};
|
||||
enableAuth = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable authentication for this service";
|
||||
};
|
||||
enableSSL = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable SSL for this service";
|
||||
};
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.homelab.reverseProxy = {
|
||||
entries = mkOption {
|
||||
type = types.listOf reverseProxyEntryType;
|
||||
default = [];
|
||||
description = "Reverse proxy entries for this system";
|
||||
};
|
||||
};
|
||||
|
||||
config = {
|
||||
};
|
||||
}
|
||||
7
modules/homelab/services/default.nix
Normal file
7
modules/homelab/services/default.nix
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
imports = [
|
||||
./minio.nix
|
||||
./monitoring/gatus.nix
|
||||
./monitoring/prometheus.nix
|
||||
];
|
||||
}
|
||||
161
modules/homelab/services/example-service.nix
Normal file
161
modules/homelab/services/example-service.nix
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
# Example showing how to create a service using the standard interface
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
serviceInterface = import ../lib/service-interface.nix {inherit lib;};
|
||||
|
||||
cfg = config.homelab.services.grafana;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Service-specific options beyond the standard interface
|
||||
grafanaServiceOptions = {
|
||||
domain = mkOption {
|
||||
type = types.str;
|
||||
default = "grafana.${homelabCfg.externalDomain}";
|
||||
description = "Domain for Grafana";
|
||||
};
|
||||
|
||||
rootUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "https://grafana.${homelabCfg.externalDomain}";
|
||||
description = "Root URL for Grafana";
|
||||
};
|
||||
|
||||
dataDir = serviceInterface.commonOptions.dataDir "grafana";
|
||||
|
||||
admin = {
|
||||
user = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin username";
|
||||
};
|
||||
|
||||
password = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin password";
|
||||
};
|
||||
};
|
||||
|
||||
datasources = {
|
||||
prometheus = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Prometheus datasource";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://localhost:9090";
|
||||
description = "Prometheus URL";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
plugins = mkOption {
|
||||
type = types.listOf types.package;
|
||||
default = [];
|
||||
description = "Grafana plugins to install";
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.homelab.services.grafana = serviceInterface.mkServiceInterface {
|
||||
serviceName = "grafana";
|
||||
defaultPort = 3000;
|
||||
defaultSubdomain = "grafana";
|
||||
monitoringPath = "/metrics";
|
||||
healthCheckPath = "/api/health";
|
||||
healthCheckConditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].database == ok"
|
||||
"[RESPONSE_TIME] < 2000"
|
||||
];
|
||||
serviceOptions = grafanaServiceOptions;
|
||||
};
|
||||
|
||||
config = serviceInterface.mkServiceConfig {
|
||||
inherit config cfg homelabCfg;
|
||||
serviceName = "grafana";
|
||||
|
||||
extraMonitoringLabels = {
|
||||
component = "dashboard";
|
||||
};
|
||||
|
||||
customHealthChecks = [
|
||||
{
|
||||
name = "grafana-login";
|
||||
port = cfg.port;
|
||||
path = "/login";
|
||||
interval = "60s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[RESPONSE_TIME] < 3000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "login";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
serviceConfig = {
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
dataDir = cfg.dataDir;
|
||||
declarativePlugins = cfg.plugins;
|
||||
|
||||
settings = {
|
||||
server = {
|
||||
http_port = cfg.port;
|
||||
http_addr = "0.0.0.0";
|
||||
domain = cfg.domain;
|
||||
root_url = cfg.rootUrl;
|
||||
};
|
||||
|
||||
security = {
|
||||
admin_user = cfg.admin.user;
|
||||
admin_password = cfg.admin.password;
|
||||
};
|
||||
};
|
||||
|
||||
provision = {
|
||||
enable = true;
|
||||
datasources.settings.datasources = mkIf cfg.datasources.prometheus.enable [
|
||||
{
|
||||
name = "Prometheus";
|
||||
type = "prometheus";
|
||||
url = cfg.datasources.prometheus.url;
|
||||
isDefault = true;
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
# Usage example in your configuration:
|
||||
/*
|
||||
{
|
||||
homelab.services.grafana = {
|
||||
enable = true;
|
||||
# Standard interface options:
|
||||
port = 3000; # Optional: defaults to 3000
|
||||
openFirewall = true; # Optional: defaults to true
|
||||
proxy.subdomain = "grafana"; # Optional: defaults to "grafana"
|
||||
proxy.enableAuth = false; # Optional: defaults to false
|
||||
monitoring.enable = true; # Optional: defaults to true
|
||||
|
||||
# Service-specific options:
|
||||
admin.password = "secure-password";
|
||||
datasources.prometheus.url = "http://prometheus.lab:9090";
|
||||
plugins = with pkgs.grafanaPlugins; [ grafana-piechart-panel ];
|
||||
};
|
||||
}
|
||||
*/
|
||||
|
||||
125
modules/homelab/services/jellyfin.nix
Normal file
125
modules/homelab/services/jellyfin.nix
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
# modules/services/jellyfin.nix
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.services.jellyfin;
|
||||
in {
|
||||
options.services.jellyfin = {
|
||||
enable = mkEnableOption "Jellyfin media server";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 8096;
|
||||
description = "Port for Jellyfin web interface";
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/jellyfin";
|
||||
description = "Directory to store Jellyfin data";
|
||||
};
|
||||
|
||||
mediaDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/media";
|
||||
description = "Directory containing media files";
|
||||
};
|
||||
|
||||
enableMetrics = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Prometheus metrics";
|
||||
};
|
||||
|
||||
exposeWeb = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Expose web interface through reverse proxy";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Enable the service
|
||||
services.jellyfin = {
|
||||
enable = true;
|
||||
dataDir = cfg.dataDir;
|
||||
};
|
||||
|
||||
# Configure global settings
|
||||
homelab.global = {
|
||||
# Add backup job for Jellyfin data
|
||||
backups.jobs = [
|
||||
{
|
||||
name = "jellyfin-config";
|
||||
backend = "restic";
|
||||
paths = ["${cfg.dataDir}/config" "${cfg.dataDir}/data"];
|
||||
schedule = "0 2 * * *"; # Daily at 2 AM
|
||||
excludePatterns = [
|
||||
"*/cache/*"
|
||||
"*/transcodes/*"
|
||||
"*/logs/*"
|
||||
];
|
||||
preHook = ''
|
||||
# Stop jellyfin for consistent backup
|
||||
systemctl stop jellyfin
|
||||
'';
|
||||
postHook = ''
|
||||
# Restart jellyfin after backup
|
||||
systemctl start jellyfin
|
||||
'';
|
||||
}
|
||||
{
|
||||
name = "jellyfin-media";
|
||||
backend = "restic";
|
||||
paths = [cfg.mediaDir];
|
||||
schedule = "0 3 * * 0"; # Weekly on Sunday at 3 AM
|
||||
excludePatterns = [
|
||||
"*.tmp"
|
||||
"*/.@__thumb/*" # Synology thumbnails
|
||||
];
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry if enabled
|
||||
reverseProxy.entries = mkIf cfg.exposeWeb [
|
||||
{
|
||||
subdomain = "jellyfin";
|
||||
port = cfg.port;
|
||||
enableAuth = false; # Jellyfin has its own auth
|
||||
websockets = true;
|
||||
customHeaders = {
|
||||
"X-Forwarded-Proto" = "$scheme";
|
||||
"X-Forwarded-Host" = "$host";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add monitoring endpoint if metrics enabled
|
||||
monitoring.endpoints = mkIf cfg.enableMetrics [
|
||||
{
|
||||
name = "jellyfin";
|
||||
port = cfg.port;
|
||||
path = "/metrics"; # Assuming you have a metrics plugin
|
||||
jobName = "jellyfin";
|
||||
scrapeInterval = "60s";
|
||||
labels = {
|
||||
service = "jellyfin";
|
||||
type = "media-server";
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
# Open firewall
|
||||
networking.firewall.allowedTCPPorts = [cfg.port];
|
||||
|
||||
# Create media directory
|
||||
systemd.tmpfiles.rules = [
|
||||
"d ${cfg.mediaDir} 0755 jellyfin jellyfin -"
|
||||
];
|
||||
};
|
||||
}
|
||||
66
modules/homelab/services/minio.nix
Normal file
66
modules/homelab/services/minio.nix
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
service = "minio";
|
||||
cfg = config.homelab.services.${service};
|
||||
homelabCfg = config.homelab;
|
||||
in {
|
||||
options.homelab.services.${service} = {
|
||||
enable = mkEnableOption "Minio Object Storage";
|
||||
|
||||
port = mkOption {
|
||||
default = 9000;
|
||||
type = types.port;
|
||||
description = "Port of the server.";
|
||||
};
|
||||
|
||||
webPort = mkOption {
|
||||
default = 9001;
|
||||
type = types.port;
|
||||
description = "Port of the web UI (console).";
|
||||
};
|
||||
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Whether to open the ports specified in `port` and `webPort` in the firewall.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
sops.secrets."ente/minio/root_user" = {};
|
||||
sops.secrets."ente/minio/root_password" = {};
|
||||
|
||||
sops.templates."minio-root-credentials".content = ''
|
||||
MINIO_ROOT_USER=${config.sops.placeholder."ente/minio/root_user"}
|
||||
MINIO_ROOT_PASSWORD=${config.sops.placeholder."ente/minio/root_password"}
|
||||
'';
|
||||
|
||||
services.minio = {
|
||||
enable = true;
|
||||
rootCredentialsFile = config.sops.templates."minio-root-credentials".path;
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = optionals cfg.openFirewall [cfg.port cfg.webPort];
|
||||
|
||||
homelab.reverseProxy.entries = [
|
||||
{
|
||||
subdomain = "${service}-api";
|
||||
port = cfg.port;
|
||||
}
|
||||
{
|
||||
subdomain = "${service}";
|
||||
port = cfg.webPort;
|
||||
}
|
||||
];
|
||||
|
||||
# https://min.io/docs/minio/linux/operations/monitoring/collect-minio-metrics-using-prometheus.html
|
||||
# metrics and monitoring...
|
||||
};
|
||||
}
|
||||
237
modules/homelab/services/monitoring/alertmanager.nix
Normal file
237
modules/homelab/services/monitoring/alertmanager.nix
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.alertmanager;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Default alertmanager configuration
|
||||
defaultConfig = {
|
||||
global = {
|
||||
smtp_smarthost = cfg.smtp.host;
|
||||
smtp_from = cfg.smtp.from;
|
||||
smtp_auth_username = cfg.smtp.username;
|
||||
smtp_auth_password = cfg.smtp.password;
|
||||
};
|
||||
|
||||
# Inhibit rules to prevent spam
|
||||
inhibit_rules = [
|
||||
{
|
||||
source_match = {
|
||||
severity = "critical";
|
||||
};
|
||||
target_match = {
|
||||
severity = "warning";
|
||||
};
|
||||
equal = ["alertname" "dev" "instance"];
|
||||
}
|
||||
];
|
||||
|
||||
route = {
|
||||
group_by = ["alertname"];
|
||||
group_wait = "10s";
|
||||
group_interval = "10s";
|
||||
repeat_interval = "1h";
|
||||
receiver = "web.hook";
|
||||
routes = cfg.routes;
|
||||
};
|
||||
|
||||
receivers =
|
||||
[
|
||||
{
|
||||
name = "web.hook";
|
||||
webhook_configs = [
|
||||
{
|
||||
url = "http://127.0.0.1:5001/";
|
||||
}
|
||||
];
|
||||
}
|
||||
]
|
||||
++ cfg.receivers;
|
||||
};
|
||||
|
||||
# Merge with user config
|
||||
alertmanagerConfig = recursiveUpdate defaultConfig cfg.extraConfig;
|
||||
in {
|
||||
options.homelab.services.alertmanager = {
|
||||
enable = mkEnableOption "Alertmanager for handling alerts";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 9093;
|
||||
description = "Port for Alertmanager web interface";
|
||||
};
|
||||
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether to open firewall ports";
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/alertmanager";
|
||||
description = "Directory to store Alertmanager data";
|
||||
};
|
||||
|
||||
smtp = {
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
default = "localhost:587";
|
||||
description = "SMTP server host:port";
|
||||
};
|
||||
|
||||
from = mkOption {
|
||||
type = types.str;
|
||||
default = "alertmanager@${homelabCfg.externalDomain}";
|
||||
description = "From email address";
|
||||
};
|
||||
|
||||
username = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = "SMTP username";
|
||||
};
|
||||
|
||||
password = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = "SMTP password";
|
||||
};
|
||||
};
|
||||
|
||||
routes = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional routing rules";
|
||||
example = literalExpression ''
|
||||
[
|
||||
{
|
||||
match = {
|
||||
service = "gatus";
|
||||
};
|
||||
receiver = "discord-webhook";
|
||||
}
|
||||
{
|
||||
match = {
|
||||
severity = "critical";
|
||||
};
|
||||
receiver = "email-alerts";
|
||||
}
|
||||
]
|
||||
'';
|
||||
};
|
||||
|
||||
receivers = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Alert receivers configuration";
|
||||
example = literalExpression ''
|
||||
[
|
||||
{
|
||||
name = "email-alerts";
|
||||
email_configs = [{
|
||||
to = "admin@example.com";
|
||||
subject = "{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}";
|
||||
body = "{{ range .Alerts }}{{ .Annotations.description }}{{ end }}";
|
||||
}];
|
||||
}
|
||||
{
|
||||
name = "discord-webhook";
|
||||
webhook_configs = [{
|
||||
url = "https://discord.com/api/webhooks/...";
|
||||
title = "{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}";
|
||||
}];
|
||||
}
|
||||
]
|
||||
'';
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional Alertmanager configuration";
|
||||
};
|
||||
|
||||
webExternalUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "https://alertmanager.${homelabCfg.externalDomain}";
|
||||
description = "External URL for Alertmanager web interface";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
services.prometheus.alertmanager = {
|
||||
enable = true;
|
||||
port = cfg.port;
|
||||
listenAddress = "0.0.0.0";
|
||||
webExternalUrl = cfg.webExternalUrl;
|
||||
dataDir = cfg.dataDir;
|
||||
|
||||
# Write configuration to file
|
||||
configuration = alertmanagerConfig;
|
||||
};
|
||||
|
||||
# Open firewall if requested
|
||||
networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [cfg.port];
|
||||
|
||||
# Add to monitoring endpoints
|
||||
homelab.monitoring.metrics = [
|
||||
{
|
||||
name = "alertmanager";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "alertmanager";
|
||||
labels = {
|
||||
service = "alertmanager";
|
||||
component = "monitoring";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add health checks
|
||||
homelab.monitoring.healthChecks = [
|
||||
{
|
||||
name = "alertmanager-web-interface";
|
||||
port = cfg.port;
|
||||
path = "/-/healthy";
|
||||
interval = "30s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[RESPONSE_TIME] < 1000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "alertmanager";
|
||||
component = "web-interface";
|
||||
};
|
||||
}
|
||||
{
|
||||
name = "alertmanager-ready";
|
||||
port = cfg.port;
|
||||
path = "/-/ready";
|
||||
interval = "30s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "alertmanager";
|
||||
component = "readiness";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry
|
||||
homelab.reverseProxy.entries = [
|
||||
{
|
||||
subdomain = "alertmanager";
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
326
modules/homelab/services/monitoring/alertmanager_new.nix
Normal file
326
modules/homelab/services/monitoring/alertmanager_new.nix
Normal file
|
|
@ -0,0 +1,326 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.alertmanager;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Build alertmanager configuration
|
||||
alertmanagerConfig = {
|
||||
route = {
|
||||
receiver = cfg.defaultReceiver;
|
||||
group_by = cfg.groupBy;
|
||||
group_wait = cfg.groupWait;
|
||||
group_interval = cfg.groupInterval;
|
||||
repeat_interval = cfg.repeatInterval;
|
||||
routes = cfg.routes;
|
||||
};
|
||||
|
||||
receivers =
|
||||
[
|
||||
{name = cfg.defaultReceiver;}
|
||||
]
|
||||
++ cfg.receivers;
|
||||
|
||||
inhibit_rules = cfg.inhibitRules;
|
||||
|
||||
templates = cfg.templates;
|
||||
};
|
||||
in {
|
||||
options.homelab.services.alertmanager = {
|
||||
enable = mkEnableOption "Alertmanager for handling alerts";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 9093;
|
||||
description = "Port for Alertmanager web interface";
|
||||
};
|
||||
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether to open firewall ports";
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/alertmanager";
|
||||
description = "Directory to store Alertmanager data";
|
||||
};
|
||||
|
||||
webExternalUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "http://${homelabCfg.hostname}.${homelabCfg.domain}:${toString cfg.port}";
|
||||
description = "External URL for Alertmanager web interface";
|
||||
};
|
||||
|
||||
environmentFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Environment file for secrets (e.g., Telegram bot token)";
|
||||
example = "/run/secrets/alertmanager-env";
|
||||
};
|
||||
|
||||
# Routing configuration
|
||||
defaultReceiver = mkOption {
|
||||
type = types.str;
|
||||
default = "null";
|
||||
description = "Default receiver for unmatched alerts";
|
||||
};
|
||||
|
||||
groupBy = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = ["alertname"];
|
||||
description = "Labels to group alerts by";
|
||||
};
|
||||
|
||||
groupWait = mkOption {
|
||||
type = types.str;
|
||||
default = "10s";
|
||||
description = "Time to wait before sending initial notification";
|
||||
};
|
||||
|
||||
groupInterval = mkOption {
|
||||
type = types.str;
|
||||
default = "5m";
|
||||
description = "Time to wait before sending updates for a group";
|
||||
};
|
||||
|
||||
repeatInterval = mkOption {
|
||||
type = types.str;
|
||||
default = "4h";
|
||||
description = "Time to wait before re-sending an alert";
|
||||
};
|
||||
|
||||
routes = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Alert routing rules";
|
||||
example = literalExpression ''
|
||||
[
|
||||
{
|
||||
receiver = "telegram";
|
||||
matchers = ["severity =~ \"warning|critical\""];
|
||||
group_wait = "10s";
|
||||
continue = true;
|
||||
}
|
||||
]
|
||||
'';
|
||||
};
|
||||
|
||||
receivers = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Alert receivers configuration";
|
||||
example = literalExpression ''
|
||||
[
|
||||
{
|
||||
name = "telegram";
|
||||
telegram_configs = [{
|
||||
api_url = "https://api.telegram.org";
|
||||
bot_token = "$TELEGRAM_BOT_TOKEN";
|
||||
chat_id = -1002642560007;
|
||||
message_thread_id = 4;
|
||||
parse_mode = "HTML";
|
||||
send_resolved = true;
|
||||
message = "{{ template \"telegram.message\" . }}";
|
||||
}];
|
||||
}
|
||||
]
|
||||
'';
|
||||
};
|
||||
|
||||
inhibitRules = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [
|
||||
{
|
||||
source_match = {severity = "critical";};
|
||||
target_match = {severity = "warning";};
|
||||
equal = ["alertname" "instance"];
|
||||
}
|
||||
];
|
||||
description = "Rules for inhibiting alerts";
|
||||
};
|
||||
|
||||
templates = mkOption {
|
||||
type = types.listOf types.path;
|
||||
default = [];
|
||||
description = "Template files for alert formatting";
|
||||
example = literalExpression ''
|
||||
[
|
||||
(pkgs.writeText "telegram.tmpl" '''
|
||||
{{- define "telegram.message" -}}
|
||||
{{- if gt (len .Alerts.Firing) 0 -}}
|
||||
🔥 <b>FIRING</b> 🔥
|
||||
{{- range .Alerts.Firing }}
|
||||
<b>{{ .Annotations.summary }}</b>
|
||||
{{ .Annotations.description }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if gt (len .Alerts.Resolved) 0 -}}
|
||||
✅ <b>RESOLVED</b> ✅
|
||||
{{- range .Alerts.Resolved }}
|
||||
<b>{{ .Annotations.summary }}</b>
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
''')
|
||||
]
|
||||
'';
|
||||
};
|
||||
|
||||
# Convenience options for common receivers
|
||||
telegram = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable Telegram notifications";
|
||||
};
|
||||
|
||||
botToken = mkOption {
|
||||
type = types.str;
|
||||
default = "$TELEGRAM_BOT_TOKEN";
|
||||
description = "Telegram bot token (use environment variable)";
|
||||
};
|
||||
|
||||
chatId = mkOption {
|
||||
type = types.int;
|
||||
description = "Telegram chat ID";
|
||||
example = -1002642560007;
|
||||
};
|
||||
|
||||
messageThreadId = mkOption {
|
||||
type = types.nullOr types.int;
|
||||
default = null;
|
||||
description = "Telegram message thread ID (for forum groups)";
|
||||
};
|
||||
|
||||
template = mkOption {
|
||||
type = types.str;
|
||||
default = "telegram.message";
|
||||
description = "Template to use for Telegram messages";
|
||||
};
|
||||
};
|
||||
|
||||
discord = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable Discord notifications";
|
||||
};
|
||||
|
||||
webhookUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "$DISCORD_WEBHOOK_URL";
|
||||
description = "Discord webhook URL (use environment variable)";
|
||||
};
|
||||
|
||||
username = mkOption {
|
||||
type = types.str;
|
||||
default = "Alertmanager";
|
||||
description = "Discord bot username";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
services.prometheus.alertmanager = {
|
||||
enable = true;
|
||||
port = cfg.port;
|
||||
listenAddress = "0.0.0.0";
|
||||
openFirewall = cfg.openFirewall;
|
||||
webExternalUrl = cfg.webExternalUrl;
|
||||
dataDir = cfg.dataDir;
|
||||
environmentFile = cfg.environmentFile;
|
||||
configuration = alertmanagerConfig;
|
||||
};
|
||||
|
||||
# Auto-configure Telegram and Discord receiver if enabled
|
||||
homelab.services.alertmanager.receivers = [
|
||||
(optional cfg.telegram.enable {
|
||||
name = "telegram";
|
||||
telegram_configs = [
|
||||
{
|
||||
api_url = "https://api.telegram.org";
|
||||
bot_token = cfg.telegram.botToken;
|
||||
chat_id = cfg.telegram.chatId;
|
||||
message_thread_id = cfg.telegram.messageThreadId;
|
||||
parse_mode = "HTML";
|
||||
send_resolved = true;
|
||||
message = "{{ template \"${cfg.telegram.template}\" . }}";
|
||||
}
|
||||
];
|
||||
})
|
||||
(optional cfg.discord.enable {
|
||||
name = "discord";
|
||||
discord_configs = [
|
||||
{
|
||||
webhook_url = cfg.discord.webhookUrl;
|
||||
username = cfg.discord.username;
|
||||
send_resolved = true;
|
||||
}
|
||||
];
|
||||
})
|
||||
];
|
||||
|
||||
# Auto-configure routes for convenience receivers
|
||||
homelab.services.alertmanager.routes =
|
||||
(optional cfg.telegram.enable {
|
||||
receiver = "telegram";
|
||||
matchers = ["severity =~ \"warning|critical\""];
|
||||
group_wait = "10s";
|
||||
continue = true;
|
||||
})
|
||||
++ (optional cfg.discord.enable {
|
||||
receiver = "discord";
|
||||
matchers = ["severity =~ \"warning|critical\""];
|
||||
group_wait = "10s";
|
||||
continue = true;
|
||||
});
|
||||
|
||||
# Add to monitoring endpoints
|
||||
homelab.monitoring.metrics = [
|
||||
{
|
||||
name = "alertmanager";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "alertmanager";
|
||||
labels = {
|
||||
service = "alertmanager";
|
||||
component = "monitoring";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add health checks
|
||||
homelab.monitoring.healthChecks = [
|
||||
{
|
||||
name = "alertmanager-web-interface";
|
||||
port = cfg.port;
|
||||
path = "/-/healthy";
|
||||
interval = "30s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[RESPONSE_TIME] < 1000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "alertmanager";
|
||||
component = "web-interface";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry
|
||||
homelab.reverseProxy.entries = [
|
||||
{
|
||||
subdomain = "alertmanager";
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
148
modules/homelab/services/monitoring/example.nix
Normal file
148
modules/homelab/services/monitoring/example.nix
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
# Example configuration showing how to use the monitoring stack
|
||||
# with the homelab.global approach for dynamic discovery
|
||||
{
|
||||
config,
|
||||
pkgs,
|
||||
...
|
||||
}: {
|
||||
# Import the monitoring services
|
||||
imports = [
|
||||
./services/prometheus.nix
|
||||
./services/alertmanager.nix
|
||||
./services/grafana.nix
|
||||
./services/monitoring-stack.nix
|
||||
];
|
||||
|
||||
# Enable the full monitoring stack
|
||||
homelab.services.monitoring-stack.enable = true;
|
||||
|
||||
# Configure Prometheus - it will automatically discover scrape targets
|
||||
# from homelab.global.monitoring.allMetrics
|
||||
homelab.services.prometheus = {
|
||||
enable = true;
|
||||
port = 9090;
|
||||
retention = "7d";
|
||||
|
||||
# Optional: Add custom scrape configs if needed
|
||||
extraScrapeConfigs = [
|
||||
# Any additional manual scrape configs can go here
|
||||
# but most should be discovered via homelab.monitoring.metrics
|
||||
];
|
||||
|
||||
# Optional: Add custom alerting rules
|
||||
extraAlertingRules = [
|
||||
# Custom alert groups can be added here
|
||||
];
|
||||
|
||||
# Optional: Add external rule files
|
||||
ruleFiles = [
|
||||
# ./path/to/custom-rules.yml
|
||||
];
|
||||
};
|
||||
|
||||
# Configure Alertmanager with Telegram support (like your original)
|
||||
homelab.services.alertmanager = {
|
||||
enable = true;
|
||||
port = 9093;
|
||||
|
||||
# Use sops secrets for environment variables
|
||||
environmentFile = config.sops.secrets."alertmanager/env".path;
|
||||
|
||||
# Enable Telegram notifications
|
||||
telegram = {
|
||||
enable = true;
|
||||
botToken = "$TELEGRAM_BOT_TOKEN"; # From environment file
|
||||
chatId = -1002642560007;
|
||||
messageThreadId = 4;
|
||||
};
|
||||
|
||||
# Custom templates (similar to your setup)
|
||||
templates = [
|
||||
(pkgs.writeText "telegram.tmpl" ''
|
||||
{{- define "telegram.message" -}}
|
||||
{{- if gt (len .Alerts.Firing) 0 -}}
|
||||
🔥 <b>FIRING</b> 🔥
|
||||
{{- range .Alerts.Firing }}
|
||||
<b>{{ .Annotations.summary }}</b>
|
||||
{{ .Annotations.description }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if gt (len .Alerts.Resolved) 0 -}}
|
||||
✅ <b>RESOLVED</b> ✅
|
||||
{{- range .Alerts.Resolved }}
|
||||
<b>{{ .Annotations.summary }}</b>
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
'')
|
||||
];
|
||||
};
|
||||
|
||||
# Configure Grafana with data sources (similar to your setup)
|
||||
homelab.services.grafana = {
|
||||
enable = true;
|
||||
port = 3000;
|
||||
domain = "grafana.procopius.dk";
|
||||
rootUrl = "https://grafana.procopius.dk";
|
||||
|
||||
# Add grafana user to influxdb2 group for accessing secrets
|
||||
extraGroups = ["influxdb2"];
|
||||
|
||||
# Enable data sources
|
||||
datasources = {
|
||||
prometheus.enable = true;
|
||||
loki.enable = true;
|
||||
influxdb = {
|
||||
enable = true;
|
||||
database = "proxmox";
|
||||
tokenPath = config.sops.secrets."influxdb/token".path;
|
||||
};
|
||||
};
|
||||
|
||||
# Provision dashboards (similar to your environment.etc approach)
|
||||
dashboards.files = [
|
||||
{
|
||||
name = "traefik";
|
||||
source = ./dashboards/traefik.json;
|
||||
}
|
||||
{
|
||||
name = "traefik-access";
|
||||
source = ./dashboards/traefik-access.json;
|
||||
}
|
||||
{
|
||||
name = "grafana-traefik";
|
||||
source = ./dashboards/grafana-traefik.json;
|
||||
}
|
||||
{
|
||||
name = "node-exporter";
|
||||
source = ./dashboards/node-exporter.json;
|
||||
}
|
||||
{
|
||||
name = "promtail";
|
||||
source = ./dashboards/promtail.json;
|
||||
}
|
||||
{
|
||||
name = "gitea";
|
||||
source = ./dashboards/gitea.json;
|
||||
}
|
||||
{
|
||||
name = "postgres";
|
||||
source = ./dashboards/postgres.json;
|
||||
}
|
||||
{
|
||||
name = "gatus";
|
||||
source = ./dashboards/gatus.json;
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
# Configure sops secrets (keep your existing setup)
|
||||
sops.secrets."alertmanager/env" = {
|
||||
sopsFile = ../../secrets/secrets.yaml;
|
||||
mode = "0440";
|
||||
};
|
||||
|
||||
# All services automatically register with homelab.monitoring.metrics
|
||||
# and homelab.monitoring.healthChecks for Gatus monitoring
|
||||
# All services automatically get reverse proxy entries
|
||||
}
|
||||
244
modules/homelab/services/monitoring/gatus.nix
Normal file
244
modules/homelab/services/monitoring/gatus.nix
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.gatus;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Convert our health check format to Gatus format
|
||||
formatHealthCheck = check: let
|
||||
# Build the URL
|
||||
url = check._url;
|
||||
|
||||
# Convert conditions to Gatus format (they should already be compatible)
|
||||
conditions = check.conditions or ["[STATUS] == 200"];
|
||||
|
||||
# Convert alerts to Gatus format
|
||||
alerts = map (alert: {
|
||||
inherit (alert) type enabled;
|
||||
failure-threshold = alert.failure-threshold or 3;
|
||||
success-threshold = alert.success-threshold or 2;
|
||||
description = "Health check alert for ${check.name}";
|
||||
}) (check.alerts or []);
|
||||
in {
|
||||
name = check.name;
|
||||
group = check.group or "default";
|
||||
url = url;
|
||||
interval = check.interval or "30s";
|
||||
|
||||
# Add method and headers for HTTP/HTTPS checks
|
||||
method =
|
||||
if (check.protocol == "http" || check.protocol == "https")
|
||||
then check.method or "GET"
|
||||
else null;
|
||||
|
||||
conditions = conditions;
|
||||
|
||||
# Add timeout
|
||||
client = {
|
||||
timeout = check.timeout or "10s";
|
||||
};
|
||||
|
||||
# Add alerts if configured
|
||||
alerts =
|
||||
if alerts != []
|
||||
then alerts
|
||||
else [];
|
||||
|
||||
# Add labels for UI organization
|
||||
ui = {
|
||||
hide-hostname = false;
|
||||
hide-url = false;
|
||||
description = "Health check for ${check.name} on ${check._nodeName}";
|
||||
};
|
||||
};
|
||||
|
||||
# Generate Gatus configuration
|
||||
gatusConfig = {
|
||||
# Global Gatus settings
|
||||
alerting = mkIf (cfg.alerting != {}) cfg.alerting;
|
||||
|
||||
web = {
|
||||
address = "0.0.0.0";
|
||||
port = cfg.port;
|
||||
};
|
||||
|
||||
# TODO: Introduce monitor option to toggle monitoring
|
||||
metrics = true;
|
||||
|
||||
ui = {
|
||||
title = cfg.ui.title;
|
||||
header = cfg.ui.header;
|
||||
link = cfg.ui.link;
|
||||
buttons = cfg.ui.buttons;
|
||||
};
|
||||
|
||||
storage = mkIf (cfg.storage != {}) cfg.storage;
|
||||
|
||||
# Convert all enabled health checks to Gatus endpoints
|
||||
endpoints = let
|
||||
# Get all health checks from global config
|
||||
allHealthChecks = homelabCfg.global.monitoring.enabledHealthChecks or [];
|
||||
|
||||
# Group by group name for better organization
|
||||
# groupedChecks = homelabCfg.global.monitoring.healthChecksByGroup or {};
|
||||
|
||||
# Convert to Gatus format
|
||||
gatusEndpoints = map formatHealthCheck allHealthChecks;
|
||||
in
|
||||
gatusEndpoints;
|
||||
};
|
||||
in {
|
||||
options.homelab.services.gatus = {
|
||||
enable = mkEnableOption "Gatus uptime monitoring service";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 8080;
|
||||
description = "Port for Gatus web interface";
|
||||
};
|
||||
|
||||
openFirewall = lib.mkOption {
|
||||
type = lib.types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Whether to automatically open the specified ports in the firewall.
|
||||
'';
|
||||
};
|
||||
|
||||
ui = {
|
||||
title = mkOption {
|
||||
type = types.str;
|
||||
default = "Homelab Status";
|
||||
description = "Title for the Gatus web interface";
|
||||
};
|
||||
|
||||
header = mkOption {
|
||||
type = types.str;
|
||||
default = "Homelab Services Status";
|
||||
description = "Header text for the Gatus interface";
|
||||
};
|
||||
|
||||
link = mkOption {
|
||||
type = types.str;
|
||||
default = "https://gatus.${homelabCfg.externalDomain}";
|
||||
description = "Link in the Gatus header";
|
||||
};
|
||||
|
||||
buttons = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
name = mkOption {type = types.str;};
|
||||
link = mkOption {type = types.str;};
|
||||
};
|
||||
});
|
||||
default = [
|
||||
{
|
||||
name = "Grafana";
|
||||
link = "https://grafana.${homelabCfg.externalDomain}";
|
||||
}
|
||||
{
|
||||
name = "Prometheus";
|
||||
link = "https://prometheus.${homelabCfg.externalDomain}";
|
||||
}
|
||||
];
|
||||
description = "Navigation buttons in the Gatus interface";
|
||||
};
|
||||
};
|
||||
|
||||
alerting = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Gatus alerting configuration";
|
||||
example = literalExpression ''
|
||||
{
|
||||
discord = {
|
||||
webhook-url = "https://discord.com/api/webhooks/...";
|
||||
default-alert = {
|
||||
enabled = true;
|
||||
description = "Health check failed";
|
||||
failure-threshold = 3;
|
||||
success-threshold = 2;
|
||||
};
|
||||
};
|
||||
}
|
||||
'';
|
||||
};
|
||||
|
||||
storage = mkOption {
|
||||
type = types.attrs;
|
||||
default = {
|
||||
type = "memory";
|
||||
};
|
||||
description = "Gatus storage configuration";
|
||||
example = literalExpression ''
|
||||
{
|
||||
type = "postgres";
|
||||
path = "postgres://user:password@localhost/gatus?sslmode=disable";
|
||||
}
|
||||
'';
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional Gatus configuration options";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
services.gatus = {
|
||||
enable = true;
|
||||
openFirewall = cfg.openFirewall;
|
||||
settings = gatusConfig;
|
||||
};
|
||||
|
||||
# Add to monitoring endpoints
|
||||
homelab.monitoring.metrics = [
|
||||
{
|
||||
name = "gatus";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "gatus";
|
||||
labels = {
|
||||
service = "gatus";
|
||||
component = "monitoring";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add health check for Gatus itself
|
||||
homelab.monitoring.healthChecks = [
|
||||
{
|
||||
name = "gatus-web-interface";
|
||||
port = cfg.port;
|
||||
path = "/health";
|
||||
interval = "30s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].status == UP"
|
||||
"[RESPONSE_TIME] < 1000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "gatus";
|
||||
component = "web-interface";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry if needed
|
||||
homelab.reverseProxy.entries = [
|
||||
{
|
||||
subdomain = "status";
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
# path = "/";
|
||||
# enableAuth = false; # Status page should be publicly accessible
|
||||
# enableSSL = true;
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
416
modules/homelab/services/monitoring/grafana.nix
Normal file
416
modules/homelab/services/monitoring/grafana.nix
Normal file
|
|
@ -0,0 +1,416 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.grafana;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Default dashboards for homelab monitoring
|
||||
defaultDashboards = {
|
||||
"node-exporter" = pkgs.fetchurl {
|
||||
url = "https://grafana.com/api/dashboards/1860/revisions/37/download";
|
||||
sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this
|
||||
};
|
||||
"prometheus-stats" = pkgs.fetchurl {
|
||||
url = "https://grafana.com/api/dashboards/2/revisions/2/download";
|
||||
sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this
|
||||
};
|
||||
};
|
||||
|
||||
# Grafana provisioning configuration
|
||||
provisioningConfig = {
|
||||
# Data sources
|
||||
datasources =
|
||||
[
|
||||
{
|
||||
name = "Prometheus";
|
||||
type = "prometheus";
|
||||
access = "proxy";
|
||||
url = cfg.datasources.prometheus.url;
|
||||
isDefault = true;
|
||||
editable = false;
|
||||
jsonData = {
|
||||
timeInterval = "5s";
|
||||
queryTimeout = "60s";
|
||||
httpMethod = "POST";
|
||||
};
|
||||
}
|
||||
]
|
||||
++ cfg.datasources.extra;
|
||||
|
||||
# Dashboard providers
|
||||
dashboards = [
|
||||
{
|
||||
name = "homelab";
|
||||
type = "file";
|
||||
disableDeletion = false;
|
||||
updateIntervalSeconds = 10;
|
||||
allowUiUpdates = true;
|
||||
options = {
|
||||
path = "/var/lib/grafana/dashboards";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Notification channels
|
||||
notifiers = cfg.notifications;
|
||||
};
|
||||
in {
|
||||
options.homelab.services.grafana = {
|
||||
enable = mkEnableOption "Grafana dashboard service";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 3000;
|
||||
description = "Port for Grafana web interface";
|
||||
};
|
||||
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether to open firewall ports";
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/grafana";
|
||||
description = "Directory to store Grafana data";
|
||||
};
|
||||
|
||||
domain = mkOption {
|
||||
type = types.str;
|
||||
default = "grafana.${homelabCfg.externalDomain}";
|
||||
description = "Domain for Grafana";
|
||||
};
|
||||
|
||||
rootUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "https://grafana.${homelabCfg.externalDomain}";
|
||||
description = "Root URL for Grafana";
|
||||
};
|
||||
|
||||
admin = {
|
||||
user = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin username";
|
||||
};
|
||||
|
||||
password = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin password (change this!)";
|
||||
};
|
||||
|
||||
email = mkOption {
|
||||
type = types.str;
|
||||
default = "admin@${homelabCfg.externalDomain}";
|
||||
description = "Admin email";
|
||||
};
|
||||
};
|
||||
|
||||
datasources = {
|
||||
prometheus = {
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://localhost:9090";
|
||||
description = "Prometheus URL";
|
||||
};
|
||||
};
|
||||
|
||||
extra = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional data sources";
|
||||
example = literalExpression ''
|
||||
[
|
||||
{
|
||||
name = "Loki";
|
||||
type = "loki";
|
||||
url = "http://localhost:3100";
|
||||
}
|
||||
]
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
notifications = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Notification channels configuration";
|
||||
example = literalExpression ''
|
||||
[
|
||||
{
|
||||
name = "discord-webhook";
|
||||
type = "discord";
|
||||
settings = {
|
||||
url = "https://discord.com/api/webhooks/...";
|
||||
username = "Grafana";
|
||||
};
|
||||
}
|
||||
]
|
||||
'';
|
||||
};
|
||||
|
||||
plugins = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [
|
||||
"grafana-piechart-panel"
|
||||
"grafana-worldmap-panel"
|
||||
"grafana-clock-panel"
|
||||
"grafana-simple-json-datasource"
|
||||
];
|
||||
description = "Grafana plugins to install";
|
||||
};
|
||||
|
||||
smtp = {
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable SMTP for email notifications";
|
||||
};
|
||||
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
default = "localhost:587";
|
||||
description = "SMTP server host:port";
|
||||
};
|
||||
|
||||
user = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = "SMTP username";
|
||||
};
|
||||
|
||||
password = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = "SMTP password";
|
||||
};
|
||||
|
||||
fromAddress = mkOption {
|
||||
type = types.str;
|
||||
default = "grafana@${homelabCfg.externalDomain}";
|
||||
description = "From email address";
|
||||
};
|
||||
|
||||
fromName = mkOption {
|
||||
type = types.str;
|
||||
default = "Homelab Grafana";
|
||||
description = "From name";
|
||||
};
|
||||
};
|
||||
|
||||
security = {
|
||||
allowEmbedding = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Allow embedding Grafana in iframes";
|
||||
};
|
||||
|
||||
cookieSecure = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Set secure flag on cookies";
|
||||
};
|
||||
|
||||
secretKey = mkOption {
|
||||
type = types.str;
|
||||
default = "change-this-secret-key";
|
||||
description = "Secret key for signing (change this!)";
|
||||
};
|
||||
};
|
||||
|
||||
auth = {
|
||||
anonymousEnabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable anonymous access";
|
||||
};
|
||||
|
||||
disableLoginForm = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Disable login form";
|
||||
};
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional Grafana configuration";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings =
|
||||
recursiveUpdate {
|
||||
server = {
|
||||
http_addr = "0.0.0.0";
|
||||
http_port = cfg.port;
|
||||
domain = cfg.domain;
|
||||
root_url = cfg.rootUrl;
|
||||
serve_from_sub_path = false;
|
||||
};
|
||||
|
||||
database = {
|
||||
type = "sqlite3";
|
||||
path = "${cfg.dataDir}/grafana.db";
|
||||
};
|
||||
|
||||
security = {
|
||||
admin_user = cfg.admin.user;
|
||||
admin_password = cfg.admin.password;
|
||||
admin_email = cfg.admin.email;
|
||||
allow_embedding = cfg.security.allowEmbedding;
|
||||
cookie_secure = cfg.security.cookieSecure;
|
||||
secret_key = cfg.security.secretKey;
|
||||
};
|
||||
|
||||
users = {
|
||||
allow_sign_up = false;
|
||||
auto_assign_org = true;
|
||||
auto_assign_org_role = "Viewer";
|
||||
};
|
||||
|
||||
auth.anonymous = {
|
||||
enabled = cfg.auth.anonymousEnabled;
|
||||
org_name = "Homelab";
|
||||
org_role = "Viewer";
|
||||
};
|
||||
|
||||
auth.basic = {
|
||||
enabled = !cfg.auth.disableLoginForm;
|
||||
};
|
||||
|
||||
smtp = mkIf cfg.smtp.enabled {
|
||||
enabled = true;
|
||||
host = cfg.smtp.host;
|
||||
user = cfg.smtp.user;
|
||||
password = cfg.smtp.password;
|
||||
from_address = cfg.smtp.fromAddress;
|
||||
from_name = cfg.smtp.fromName;
|
||||
};
|
||||
|
||||
analytics = {
|
||||
reporting_enabled = false;
|
||||
check_for_updates = false;
|
||||
};
|
||||
|
||||
log = {
|
||||
mode = "console";
|
||||
level = "info";
|
||||
};
|
||||
|
||||
paths = {
|
||||
data = cfg.dataDir;
|
||||
logs = "${cfg.dataDir}/log";
|
||||
plugins = "${cfg.dataDir}/plugins";
|
||||
provisioning = "/etc/grafana/provisioning";
|
||||
};
|
||||
}
|
||||
cfg.extraConfig;
|
||||
|
||||
dataDir = cfg.dataDir;
|
||||
};
|
||||
|
||||
# Install plugins
|
||||
systemd.services.grafana.preStart = mkIf (cfg.plugins != []) (
|
||||
concatStringsSep "\n" (map (
|
||||
plugin: "${pkgs.grafana}/bin/grafana-cli --pluginsDir ${cfg.dataDir}/plugins plugins install ${plugin} || true"
|
||||
)
|
||||
cfg.plugins)
|
||||
);
|
||||
|
||||
# Provisioning configuration
|
||||
environment.etc =
|
||||
{
|
||||
"grafana/provisioning/datasources/datasources.yaml".text = builtins.toJSON {
|
||||
apiVersion = 1;
|
||||
datasources = provisioningConfig.datasources;
|
||||
};
|
||||
|
||||
"grafana/provisioning/dashboards/dashboards.yaml".text = builtins.toJSON {
|
||||
apiVersion = 1;
|
||||
providers = provisioningConfig.dashboards;
|
||||
};
|
||||
}
|
||||
// (mkIf (cfg.notifications != []) {
|
||||
"grafana/provisioning/notifiers/notifiers.yaml".text = builtins.toJSON {
|
||||
apiVersion = 1;
|
||||
notifiers = provisioningConfig.notifiers;
|
||||
};
|
||||
});
|
||||
|
||||
# Create dashboard directory
|
||||
systemd.tmpfiles.rules = [
|
||||
"d ${cfg.dataDir}/dashboards 0755 grafana grafana -"
|
||||
];
|
||||
|
||||
# Open firewall if requested
|
||||
networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [cfg.port];
|
||||
|
||||
# Add to monitoring endpoints
|
||||
homelab.monitoring.metrics = [
|
||||
{
|
||||
name = "grafana";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "grafana";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "monitoring";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add health checks
|
||||
homelab.monitoring.healthChecks = [
|
||||
{
|
||||
name = "grafana-web-interface";
|
||||
port = cfg.port;
|
||||
path = "/api/health";
|
||||
interval = "30s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].database == ok"
|
||||
"[RESPONSE_TIME] < 2000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "web-interface";
|
||||
};
|
||||
}
|
||||
{
|
||||
name = "grafana-login-page";
|
||||
port = cfg.port;
|
||||
path = "/login";
|
||||
interval = "60s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[RESPONSE_TIME] < 3000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "login";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry
|
||||
homelab.reverseProxy.entries = [
|
||||
{
|
||||
subdomain = "grafana";
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
369
modules/homelab/services/monitoring/grafana_new.nix
Normal file
369
modules/homelab/services/monitoring/grafana_new.nix
Normal file
|
|
@ -0,0 +1,369 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.grafana;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Dashboard provisioning
|
||||
provisionDashboard = name: source: {
|
||||
"grafana-dashboards/${name}.json" = {
|
||||
inherit source;
|
||||
user = "grafana";
|
||||
group = "grafana";
|
||||
mode = "0644";
|
||||
};
|
||||
};
|
||||
|
||||
# Generate all dashboard files
|
||||
dashboardFiles =
|
||||
fold (
|
||||
dashboard: acc:
|
||||
acc // (provisionDashboard dashboard.name dashboard.source)
|
||||
) {}
|
||||
cfg.dashboards.files;
|
||||
in {
|
||||
options.homelab.services.grafana = {
|
||||
enable = mkEnableOption "Grafana dashboard service";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 3000;
|
||||
description = "Port for Grafana web interface";
|
||||
};
|
||||
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether to open firewall ports";
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/grafana";
|
||||
description = "Directory to store Grafana data";
|
||||
};
|
||||
|
||||
domain = mkOption {
|
||||
type = types.str;
|
||||
default = "grafana.${homelabCfg.externalDomain}";
|
||||
description = "Domain for Grafana";
|
||||
};
|
||||
|
||||
rootUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "https://grafana.${homelabCfg.externalDomain}";
|
||||
description = "Root URL for Grafana";
|
||||
};
|
||||
|
||||
# Authentication settings
|
||||
auth = {
|
||||
disableLoginForm = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Disable the login form";
|
||||
};
|
||||
|
||||
oauthAutoLogin = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable OAuth auto-login";
|
||||
};
|
||||
|
||||
genericOauth = {
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable generic OAuth";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Data source configuration
|
||||
datasources = {
|
||||
prometheus = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Prometheus datasource";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://127.0.0.1:9090";
|
||||
description = "Prometheus URL";
|
||||
};
|
||||
|
||||
uid = mkOption {
|
||||
type = types.str;
|
||||
default = "prometheus";
|
||||
description = "Unique identifier for Prometheus datasource";
|
||||
};
|
||||
};
|
||||
|
||||
loki = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable Loki datasource";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://127.0.0.1:3100";
|
||||
description = "Loki URL";
|
||||
};
|
||||
|
||||
uid = mkOption {
|
||||
type = types.str;
|
||||
default = "loki";
|
||||
description = "Unique identifier for Loki datasource";
|
||||
};
|
||||
};
|
||||
|
||||
influxdb = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable InfluxDB datasource";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://127.0.0.1:8086";
|
||||
description = "InfluxDB URL";
|
||||
};
|
||||
|
||||
database = mkOption {
|
||||
type = types.str;
|
||||
default = "homelab";
|
||||
description = "InfluxDB database name";
|
||||
};
|
||||
|
||||
tokenPath = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Path to InfluxDB token file";
|
||||
};
|
||||
|
||||
uid = mkOption {
|
||||
type = types.str;
|
||||
default = "influxdb";
|
||||
description = "Unique identifier for InfluxDB datasource";
|
||||
};
|
||||
};
|
||||
|
||||
extra = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional data sources";
|
||||
};
|
||||
};
|
||||
|
||||
# Dashboard configuration
|
||||
dashboards = {
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/etc/grafana-dashboards";
|
||||
description = "Path to dashboard files";
|
||||
};
|
||||
|
||||
files = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = "Dashboard name (without .json extension)";
|
||||
example = "node-exporter";
|
||||
};
|
||||
source = mkOption {
|
||||
type = types.path;
|
||||
description = "Path to dashboard JSON file";
|
||||
};
|
||||
};
|
||||
});
|
||||
default = [];
|
||||
description = "Dashboard files to provision";
|
||||
example = literalExpression ''
|
||||
[
|
||||
{
|
||||
name = "node-exporter";
|
||||
source = ./dashboards/node-exporter.json;
|
||||
}
|
||||
{
|
||||
name = "traefik";
|
||||
source = ./dashboards/traefik.json;
|
||||
}
|
||||
]
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
# Extra user groups for accessing secrets
|
||||
extraGroups = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Additional groups for the grafana user";
|
||||
example = ["influxdb2"];
|
||||
};
|
||||
|
||||
# Additional settings
|
||||
extraSettings = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional Grafana settings";
|
||||
};
|
||||
|
||||
plugins = mkOption {
|
||||
type = types.listOf types.package;
|
||||
default = [];
|
||||
description = "Grafana plugins to install";
|
||||
example = literalExpression "with pkgs.grafanaPlugins; [ grafana-piechart-panel ]";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Add grafana user to extra groups (e.g., for accessing secrets)
|
||||
users.users.grafana.extraGroups = cfg.extraGroups;
|
||||
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
dataDir = cfg.dataDir;
|
||||
declarativePlugins = cfg.plugins;
|
||||
|
||||
settings =
|
||||
recursiveUpdate {
|
||||
server = {
|
||||
http_port = cfg.port;
|
||||
http_addr = "0.0.0.0";
|
||||
domain = cfg.domain;
|
||||
root_url = cfg.rootUrl;
|
||||
oauth_auto_login = cfg.auth.oauthAutoLogin;
|
||||
};
|
||||
|
||||
"auth.generic_oauth" = {
|
||||
enabled = cfg.auth.genericOauth.enabled;
|
||||
};
|
||||
|
||||
auth = {
|
||||
disable_login_form = cfg.auth.disableLoginForm;
|
||||
};
|
||||
}
|
||||
cfg.extraSettings;
|
||||
|
||||
provision = {
|
||||
enable = true;
|
||||
|
||||
datasources.settings = {
|
||||
datasources = let
|
||||
# Build datasource list
|
||||
datasources =
|
||||
[]
|
||||
++ optional cfg.datasources.prometheus.enable {
|
||||
uid = cfg.datasources.prometheus.uid;
|
||||
name = "Prometheus";
|
||||
type = "prometheus";
|
||||
url = cfg.datasources.prometheus.url;
|
||||
}
|
||||
++ optional cfg.datasources.loki.enable {
|
||||
uid = cfg.datasources.loki.uid;
|
||||
name = "Loki";
|
||||
type = "loki";
|
||||
url = cfg.datasources.loki.url;
|
||||
}
|
||||
++ optional cfg.datasources.influxdb.enable {
|
||||
uid = cfg.datasources.influxdb.uid;
|
||||
name = "InfluxDB";
|
||||
type = "influxdb";
|
||||
url = cfg.datasources.influxdb.url;
|
||||
access = "proxy";
|
||||
jsonData = {
|
||||
dbName = cfg.datasources.influxdb.database;
|
||||
httpHeaderName1 = "Authorization";
|
||||
};
|
||||
secureJsonData = mkIf (cfg.datasources.influxdb.tokenPath != null) {
|
||||
httpHeaderValue1 = "$__file{${cfg.datasources.influxdb.tokenPath}}";
|
||||
};
|
||||
}
|
||||
++ cfg.datasources.extra;
|
||||
in
|
||||
datasources;
|
||||
};
|
||||
|
||||
dashboards.settings.providers = mkIf (cfg.dashboards.files != []) [
|
||||
{
|
||||
name = "homelab-dashboards";
|
||||
options.path = cfg.dashboards.path;
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
# Open firewall if requested
|
||||
networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [cfg.port];
|
||||
|
||||
# Provision dashboard files
|
||||
environment.etc = dashboardFiles;
|
||||
|
||||
# Add to monitoring endpoints
|
||||
homelab.monitoring.metrics = [
|
||||
{
|
||||
name = "grafana";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "grafana";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "monitoring";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add health checks
|
||||
homelab.monitoring.healthChecks = [
|
||||
{
|
||||
name = "grafana-web-interface";
|
||||
port = cfg.port;
|
||||
path = "/api/health";
|
||||
interval = "30s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].database == ok"
|
||||
"[RESPONSE_TIME] < 2000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "web-interface";
|
||||
};
|
||||
}
|
||||
{
|
||||
name = "grafana-login-page";
|
||||
port = cfg.port;
|
||||
path = "/login";
|
||||
interval = "60s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[RESPONSE_TIME] < 3000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "login";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry
|
||||
homelab.reverseProxy.entries = [
|
||||
{
|
||||
subdomain = "grafana";
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
0
modules/homelab/services/monitoring/influxdb.nix
Normal file
0
modules/homelab/services/monitoring/influxdb.nix
Normal file
0
modules/homelab/services/monitoring/loki.nix
Normal file
0
modules/homelab/services/monitoring/loki.nix
Normal file
60
modules/homelab/services/monitoring/monitoring-stack.nix
Normal file
60
modules/homelab/services/monitoring/monitoring-stack.nix
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.monitoring-stack;
|
||||
in {
|
||||
imports = [
|
||||
./prometheus.nix
|
||||
./alertmanager.nix
|
||||
./grafana.nix
|
||||
];
|
||||
|
||||
options.homelab.services.monitoring-stack = {
|
||||
enable = mkEnableOption "Complete monitoring stack (Prometheus + Alertmanager + Grafana)";
|
||||
|
||||
prometheus = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Prometheus";
|
||||
};
|
||||
};
|
||||
|
||||
alertmanager = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Alertmanager";
|
||||
};
|
||||
};
|
||||
|
||||
grafana = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Grafana";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Enable services based on configuration
|
||||
homelab.services.prometheus.enable = mkDefault cfg.prometheus.enable;
|
||||
homelab.services.alertmanager.enable = mkDefault cfg.alertmanager.enable;
|
||||
homelab.services.grafana.enable = mkDefault cfg.grafana.enable;
|
||||
|
||||
# Configure Prometheus to use Alertmanager if both are enabled
|
||||
homelab.services.prometheus.alertmanager = mkIf (cfg.prometheus.enable && cfg.alertmanager.enable) {
|
||||
enable = true;
|
||||
url = "http://localhost:${toString config.homelab.services.alertmanager.port}";
|
||||
};
|
||||
|
||||
# Configure Grafana to use Prometheus if both are enabled
|
||||
homelab.services.grafana.datasources.prometheus = mkIf (cfg.prometheus.enable && cfg.grafana.enable) {
|
||||
url = "http://localhost:${toString config.homelab.services.prometheus.port}";
|
||||
};
|
||||
};
|
||||
}
|
||||
203
modules/homelab/services/monitoring/prometheus.nix
Normal file
203
modules/homelab/services/monitoring/prometheus.nix
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
serviceInterface = import ../../lib/service-interface.nix {inherit lib;};
|
||||
|
||||
cfg = config.homelab.services.prometheus;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Generate Prometheus scrape configs from global monitoring data
|
||||
prometheusScrapeConfigs = let
|
||||
allMetrics = homelabCfg.global.monitoring.allMetrics or [];
|
||||
jobGroups = groupBy (m: m.jobName) allMetrics;
|
||||
|
||||
scrapeConfigs =
|
||||
mapAttrsToList (jobName: endpoints: {
|
||||
job_name = jobName;
|
||||
scrape_interval = head endpoints.scrapeInterval or ["30s"];
|
||||
static_configs = [
|
||||
{
|
||||
targets = map (endpoint: "${endpoint.host}:${toString endpoint.port}") endpoints;
|
||||
labels = fold (endpoint: acc: acc // endpoint.labels) {} endpoints;
|
||||
}
|
||||
];
|
||||
metrics_path = head endpoints.path or [null];
|
||||
})
|
||||
jobGroups;
|
||||
in
|
||||
scrapeConfigs;
|
||||
|
||||
# Service-specific options beyond the standard interface
|
||||
prometheusServiceOptions = {
|
||||
retention = mkOption {
|
||||
type = types.str;
|
||||
default = "15d";
|
||||
description = "How long to retain metrics data";
|
||||
};
|
||||
|
||||
alertmanager = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable integration with Alertmanager";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "${homelabCfg.hostname}.${homelabCfg.domain}:9093";
|
||||
description = "Alertmanager URL";
|
||||
};
|
||||
};
|
||||
|
||||
extraScrapeConfigs = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional scrape configurations";
|
||||
};
|
||||
|
||||
extraAlertingRules = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional alerting rules";
|
||||
};
|
||||
|
||||
globalConfig = mkOption {
|
||||
type = types.attrs;
|
||||
default = {
|
||||
scrape_interval = "15s";
|
||||
evaluation_interval = "15s";
|
||||
};
|
||||
description = "Global Prometheus configuration";
|
||||
};
|
||||
|
||||
extraFlags = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Extra command line flags";
|
||||
};
|
||||
|
||||
ruleFiles = mkOption {
|
||||
type = types.listOf types.path;
|
||||
default = [];
|
||||
description = "Additional rule files to load";
|
||||
};
|
||||
};
|
||||
|
||||
# Standard alerting rules
|
||||
alertingRules = [
|
||||
{
|
||||
name = "homelab.rules";
|
||||
rules = [
|
||||
{
|
||||
alert = "InstanceDown";
|
||||
expr = "up == 0";
|
||||
for = "5m";
|
||||
labels = {severity = "critical";};
|
||||
annotations = {
|
||||
summary = "Instance {{ $labels.instance }} down";
|
||||
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "HighCPUUsage";
|
||||
expr = "100 - (avg by(instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100) > 80";
|
||||
for = "10m";
|
||||
labels = {severity = "warning";};
|
||||
annotations = {
|
||||
summary = "High CPU usage on {{ $labels.instance }}";
|
||||
description = "CPU usage is above 80% for more than 10 minutes on {{ $labels.instance }}.";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "HighMemoryUsage";
|
||||
expr = "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85";
|
||||
for = "10m";
|
||||
labels = {severity = "warning";};
|
||||
annotations = {
|
||||
summary = "High memory usage on {{ $labels.instance }}";
|
||||
description = "Memory usage is above 85% for more than 10 minutes on {{ $labels.instance }}.";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "DiskSpaceLow";
|
||||
expr = "((node_filesystem_size_bytes - node_filesystem_avail_bytes) / node_filesystem_size_bytes) * 100 > 90";
|
||||
for = "5m";
|
||||
labels = {severity = "critical";};
|
||||
annotations = {
|
||||
summary = "Disk space low on {{ $labels.instance }}";
|
||||
description = "Disk usage is above 90% on {{ $labels.instance }} {{ $labels.mountpoint }}.";
|
||||
};
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
in {
|
||||
options.homelab.services.prometheus = serviceInterface.mkServiceInterface {
|
||||
serviceName = "prometheus";
|
||||
defaultPort = 9090;
|
||||
defaultSubdomain = "prometheus";
|
||||
monitoringPath = "/metrics";
|
||||
healthCheckPath = "/-/healthy";
|
||||
healthCheckConditions = ["[STATUS] == 200" "[RESPONSE_TIME] < 1000"];
|
||||
serviceOptions = prometheusServiceOptions;
|
||||
};
|
||||
|
||||
config = serviceInterface.mkServiceConfig {
|
||||
inherit config cfg homelabCfg;
|
||||
serviceName = "prometheus";
|
||||
|
||||
extraMonitoringLabels = {
|
||||
component = "monitoring-server";
|
||||
};
|
||||
|
||||
customHealthChecks = [
|
||||
{
|
||||
name = "prometheus-ready";
|
||||
port = cfg.port;
|
||||
path = "/-/ready";
|
||||
interval = "30s";
|
||||
conditions = ["[STATUS] == 200"];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "prometheus";
|
||||
component = "readiness";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
serviceConfig = {
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
port = cfg.port;
|
||||
listenAddress = "0.0.0.0";
|
||||
retentionTime = cfg.retention;
|
||||
|
||||
globalConfig = cfg.globalConfig;
|
||||
extraFlags = cfg.extraFlags;
|
||||
|
||||
scrapeConfigs = prometheusScrapeConfigs ++ cfg.extraScrapeConfigs;
|
||||
|
||||
ruleFiles =
|
||||
map (ruleGroup:
|
||||
pkgs.writeText "${ruleGroup.name}.yml" (builtins.toJSON {
|
||||
groups = [ruleGroup];
|
||||
})) (alertingRules ++ cfg.extraAlertingRules)
|
||||
++ cfg.ruleFiles;
|
||||
|
||||
alertmanagers = mkIf cfg.alertmanager.enable [
|
||||
{
|
||||
static_configs = [
|
||||
{
|
||||
targets = [cfg.alertmanager.url];
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
0
modules/homelab/services/monitoring/promtail.nix
Normal file
0
modules/homelab/services/monitoring/promtail.nix
Normal file
0
modules/homelab/services/monitoring/tempo.nix
Normal file
0
modules/homelab/services/monitoring/tempo.nix
Normal file
0
modules/homelab/services/postgres.nix
Normal file
0
modules/homelab/services/postgres.nix
Normal file
208
modules/homelab/services/prometheus_old.nix
Normal file
208
modules/homelab/services/prometheus_old.nix
Normal file
|
|
@ -0,0 +1,208 @@
|
|||
# modules/services/prometheus.nix
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.prometheus;
|
||||
globalCfg = config.homelab.global;
|
||||
in {
|
||||
options.homelab.services.prometheus = {
|
||||
enable = mkEnableOption "Prometheus monitoring server";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 9090;
|
||||
description = "Prometheus server port";
|
||||
};
|
||||
|
||||
webExternalUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "http://${globalCfg.hostname}:${toString cfg.port}";
|
||||
description = "External URL for Prometheus";
|
||||
};
|
||||
|
||||
retention = mkOption {
|
||||
type = types.str;
|
||||
default = "30d";
|
||||
description = "Data retention period";
|
||||
};
|
||||
|
||||
scrapeConfigs = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional scrape configurations";
|
||||
};
|
||||
|
||||
alertmanager = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable Alertmanager integration";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://localhost:9093";
|
||||
description = "Alertmanager URL";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Register service with global homelab config
|
||||
homelab.global.services.prometheus = {
|
||||
enable = true;
|
||||
description = "Metrics collection and monitoring server";
|
||||
category = "monitoring";
|
||||
ports = [cfg.port];
|
||||
tags = ["metrics" "monitoring" "alerting"];
|
||||
priority = 20;
|
||||
dependencies = ["node-exporter"];
|
||||
};
|
||||
|
||||
# Configure the actual Prometheus service
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
port = cfg.port;
|
||||
webExternalUrl = cfg.webExternalUrl;
|
||||
|
||||
retentionTime = cfg.retention;
|
||||
|
||||
scrapeConfigs =
|
||||
[
|
||||
# Auto-discover monitoring endpoints from global config
|
||||
{
|
||||
job_name = "homelab-auto";
|
||||
static_configs = [
|
||||
{
|
||||
targets =
|
||||
map (
|
||||
endpoint: "${globalCfg.hostname}:${toString endpoint.port}"
|
||||
)
|
||||
globalCfg.monitoring.endpoints;
|
||||
}
|
||||
];
|
||||
scrape_interval = "30s";
|
||||
metrics_path = "/metrics";
|
||||
}
|
||||
]
|
||||
++ cfg.scrapeConfigs;
|
||||
|
||||
# Alertmanager configuration
|
||||
alertmanagers = mkIf cfg.alertmanager.enable [
|
||||
{
|
||||
static_configs = [
|
||||
{
|
||||
targets = [cfg.alertmanager.url];
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
|
||||
rules = [
|
||||
# Basic homelab alerting rules
|
||||
(pkgs.writeText "homelab-alerts.yml" ''
|
||||
groups:
|
||||
- name: homelab
|
||||
rules:
|
||||
- alert: ServiceDown
|
||||
expr: up == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Service {{ $labels.instance }} is down"
|
||||
description: "{{ $labels.job }} on {{ $labels.instance }} has been down for more than 5 minutes."
|
||||
|
||||
- alert: HighMemoryUsage
|
||||
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High memory usage on {{ $labels.instance }}"
|
||||
description: "Memory usage is above 90% on {{ $labels.instance }}"
|
||||
|
||||
- alert: HighDiskUsage
|
||||
expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.85
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High disk usage on {{ $labels.instance }}"
|
||||
description: "Disk usage is above 85% on {{ $labels.instance }} for filesystem {{ $labels.mountpoint }}"
|
||||
'')
|
||||
];
|
||||
};
|
||||
|
||||
# Add monitoring endpoint to global config
|
||||
homelab.global.monitoring.endpoints = [
|
||||
{
|
||||
name = "prometheus";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "prometheus";
|
||||
scrapeInterval = "30s";
|
||||
labels = {
|
||||
service = "prometheus";
|
||||
role = "monitoring";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry if configured
|
||||
homelab.global.reverseProxy.entries = mkIf (globalCfg.domain != null) [
|
||||
{
|
||||
subdomain = "prometheus";
|
||||
port = cfg.port;
|
||||
path = "/";
|
||||
enableAuth = true;
|
||||
enableSSL = true;
|
||||
customHeaders = {
|
||||
"X-Frame-Options" = "DENY";
|
||||
"X-Content-Type-Options" = "nosniff";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add backup job for Prometheus data
|
||||
homelab.global.backups.jobs = [
|
||||
{
|
||||
name = "prometheus-data";
|
||||
backend = "restic";
|
||||
paths = ["/var/lib/prometheus2"];
|
||||
schedule = "daily";
|
||||
retention = {
|
||||
daily = "7";
|
||||
weekly = "4";
|
||||
monthly = "3";
|
||||
yearly = "1";
|
||||
};
|
||||
excludePatterns = [
|
||||
"*.tmp"
|
||||
"*/wal/*"
|
||||
];
|
||||
preHook = ''
|
||||
# Stop prometheus temporarily for consistent backup
|
||||
systemctl stop prometheus
|
||||
'';
|
||||
postHook = ''
|
||||
# Restart prometheus after backup
|
||||
systemctl start prometheus
|
||||
'';
|
||||
}
|
||||
];
|
||||
|
||||
# Open firewall port
|
||||
networking.firewall.allowedTCPPorts = [cfg.port];
|
||||
|
||||
# Create prometheus configuration directory
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /var/lib/prometheus2 0755 prometheus prometheus -"
|
||||
"d /etc/prometheus 0755 root root -"
|
||||
];
|
||||
};
|
||||
}
|
||||
126
modules/lib/helpers.nix
Normal file
126
modules/lib/helpers.nix
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
# modules/lib/helpers.nix
|
||||
{lib, ...}:
|
||||
with lib; rec {
|
||||
# Helper to merge global configurations from multiple sources
|
||||
mergeGlobalConfigs = configs: let
|
||||
mergeEndpoints = foldl' (acc: cfg: acc ++ cfg.monitoring.endpoints) [];
|
||||
mergeBackups = foldl' (acc: cfg: acc ++ cfg.backups.jobs) [];
|
||||
mergeProxyEntries = foldl' (acc: cfg: acc ++ cfg.reverseProxy.entries) [];
|
||||
in {
|
||||
monitoring.endpoints = mergeEndpoints configs;
|
||||
backups.jobs = mergeBackups configs;
|
||||
reverseProxy.entries = mergeProxyEntries configs;
|
||||
};
|
||||
|
||||
# Helper to create a service module template
|
||||
createServiceModule = {
|
||||
name,
|
||||
port,
|
||||
hasMetrics ? true,
|
||||
hasWebUI ? true,
|
||||
dataDir ? "/var/lib/${name}",
|
||||
}: {
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.services.${name};
|
||||
in {
|
||||
options.services.${name} = {
|
||||
enable = mkEnableOption "${name} service";
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = port;
|
||||
description = "Port for ${name}";
|
||||
};
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = dataDir;
|
||||
description = "Data directory for ${name}";
|
||||
};
|
||||
enableMetrics = mkOption {
|
||||
type = types.bool;
|
||||
default = hasMetrics;
|
||||
description = "Enable metrics endpoint";
|
||||
};
|
||||
exposeWeb = mkOption {
|
||||
type = types.bool;
|
||||
default = hasWebUI;
|
||||
description = "Expose web interface";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
homelab.global = {
|
||||
backups.jobs = [
|
||||
{
|
||||
name = "${name}-data";
|
||||
backend = "restic";
|
||||
paths = [cfg.dataDir];
|
||||
schedule = "daily";
|
||||
}
|
||||
];
|
||||
|
||||
reverseProxy.entries = mkIf cfg.exposeWeb [
|
||||
{
|
||||
subdomain = name;
|
||||
port = cfg.port;
|
||||
}
|
||||
];
|
||||
|
||||
monitoring.endpoints = mkIf cfg.enableMetrics [
|
||||
{
|
||||
name = name;
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = name;
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Helper to generate nginx configuration from proxy entries
|
||||
generateNginxConfig = proxyEntries: domain: let
|
||||
createVHost = entry: {
|
||||
"${entry.subdomain}.${domain}" = {
|
||||
enableACME = entry.enableSSL;
|
||||
forceSSL = entry.enableSSL;
|
||||
locations."${entry.path}" = {
|
||||
proxyPass = "http://${entry.targetHost}:${toString entry.port}";
|
||||
proxyWebsockets = entry.websockets;
|
||||
extraConfig = ''
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
${concatStringsSep "\n" (mapAttrsToList (
|
||||
name: value: "proxy_set_header ${name} ${value};"
|
||||
)
|
||||
entry.customHeaders)}
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
in
|
||||
foldl' (acc: entry: acc // (createVHost entry)) {} proxyEntries;
|
||||
|
||||
# Helper to generate Prometheus scrape configs
|
||||
generatePrometheusConfig = endpoints: let
|
||||
endpointsByJob = groupBy (e: e.jobName) endpoints;
|
||||
createJobConfig = jobName: jobEndpoints: {
|
||||
job_name = jobName;
|
||||
scrape_interval = (head jobEndpoints).scrapeInterval;
|
||||
metrics_path = (head jobEndpoints).path;
|
||||
static_configs = [
|
||||
{
|
||||
targets = map (e: "${e.targetHost}:${toString e.port}") jobEndpoints;
|
||||
labels = foldl' (acc: e: acc // e.labels) {} jobEndpoints;
|
||||
}
|
||||
];
|
||||
};
|
||||
in
|
||||
mapAttrsToList createJobConfig endpointsByJob;
|
||||
}
|
||||
187
modules/nixos/backup-manager.nix
Normal file
187
modules/nixos/backup-manager.nix
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
# modules/backup-manager.nix
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.backups;
|
||||
globalCfg = config.homelab.global;
|
||||
|
||||
# Create systemd services for backup jobs
|
||||
createBackupService = job: let
|
||||
serviceName = "backup-${job.name}";
|
||||
allExcludes = globalCfg.backups.globalExcludes ++ job.excludePatterns;
|
||||
excludeArgs = map (pattern: "--exclude '${pattern}'") allExcludes;
|
||||
|
||||
backupScript =
|
||||
if job.backend == "restic"
|
||||
then ''
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
${optionalString (job.preHook != null) job.preHook}
|
||||
|
||||
# Restic backup
|
||||
${pkgs.restic}/bin/restic backup \
|
||||
${concatStringsSep " " (map (path: "'${path}'") job.paths)} \
|
||||
${concatStringsSep " " excludeArgs} \
|
||||
--tag "host:${globalCfg.hostname}" \
|
||||
--tag "job:${job.name}" \
|
||||
--tag "env:${globalCfg.environment}"
|
||||
|
||||
# Apply retention policy
|
||||
${pkgs.restic}/bin/restic forget \
|
||||
--keep-daily ${job.retention.daily} \
|
||||
--keep-weekly ${job.retention.weekly} \
|
||||
--keep-monthly ${job.retention.monthly} \
|
||||
--keep-yearly ${job.retention.yearly} \
|
||||
--prune
|
||||
|
||||
${optionalString (job.postHook != null) job.postHook}
|
||||
''
|
||||
else if job.backend == "borg"
|
||||
then ''
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
${optionalString (job.preHook != null) job.preHook}
|
||||
|
||||
# Borg backup
|
||||
${pkgs.borgbackup}/bin/borg create \
|
||||
--stats --progress \
|
||||
${concatStringsSep " " excludeArgs} \
|
||||
"::${globalCfg.hostname}-${job.name}-{now}" \
|
||||
${concatStringsSep " " (map (path: "'${path}'") job.paths)}
|
||||
|
||||
# Apply retention policy
|
||||
${pkgs.borgbackup}/bin/borg prune \
|
||||
--keep-daily ${job.retention.daily} \
|
||||
--keep-weekly ${job.retention.weekly} \
|
||||
--keep-monthly ${job.retention.monthly} \
|
||||
--keep-yearly ${job.retention.yearly}
|
||||
|
||||
${optionalString (job.postHook != null) job.postHook}
|
||||
''
|
||||
else throw "Unsupported backup backend: ${job.backend}";
|
||||
in {
|
||||
${serviceName} = {
|
||||
description = "Backup job: ${job.name}";
|
||||
after = ["network-online.target"];
|
||||
wants = ["network-online.target"];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
User = "backup";
|
||||
Group = "backup";
|
||||
ExecStart = pkgs.writeScript "backup-${job.name}" backupScript;
|
||||
EnvironmentFile = "/etc/backup/environment";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Create systemd timers for backup jobs
|
||||
createBackupTimer = job: let
|
||||
serviceName = "backup-${job.name}";
|
||||
timerName = "${serviceName}.timer";
|
||||
in {
|
||||
${timerName} = {
|
||||
description = "Timer for backup job: ${job.name}";
|
||||
wantedBy = ["timers.target"];
|
||||
timerConfig = {
|
||||
OnCalendar =
|
||||
if job.schedule == "daily"
|
||||
then "daily"
|
||||
else if job.schedule == "weekly"
|
||||
then "weekly"
|
||||
else if job.schedule == "hourly"
|
||||
then "hourly"
|
||||
else job.schedule; # Assume it's a cron expression
|
||||
Persistent = true;
|
||||
RandomizedDelaySec = "15min";
|
||||
};
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.homelab.backups = {
|
||||
enable = mkEnableOption "Backup management";
|
||||
|
||||
restic = {
|
||||
repository = mkOption {
|
||||
type = types.str;
|
||||
description = "Restic repository URL";
|
||||
};
|
||||
passwordFile = mkOption {
|
||||
type = types.str;
|
||||
default = "/etc/backup/restic-password";
|
||||
description = "Path to file containing restic password";
|
||||
};
|
||||
};
|
||||
|
||||
borg = {
|
||||
repository = mkOption {
|
||||
type = types.str;
|
||||
description = "Borg repository path";
|
||||
};
|
||||
sshKey = mkOption {
|
||||
type = types.str;
|
||||
default = "/etc/backup/borg-ssh-key";
|
||||
description = "Path to SSH key for borg repository";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf (cfg.enable && globalCfg.enable && (length globalCfg.backups.jobs) > 0) {
|
||||
# Create backup user
|
||||
users.users.backup = {
|
||||
isSystemUser = true;
|
||||
group = "backup";
|
||||
home = "/var/lib/backup";
|
||||
createHome = true;
|
||||
};
|
||||
|
||||
users.groups.backup = {};
|
||||
|
||||
# Install backup tools
|
||||
environment.systemPackages = with pkgs; [
|
||||
restic
|
||||
borgbackup
|
||||
rclone
|
||||
|
||||
(pkgs.writeScriptBin "backup-status" ''
|
||||
#!/bin/bash
|
||||
echo "=== Backup Status ==="
|
||||
echo
|
||||
${concatStringsSep "\n" (map (job: ''
|
||||
echo "Job: ${job.name}"
|
||||
systemctl is-active backup-${job.name}.timer || echo "Timer inactive"
|
||||
systemctl status backup-${job.name}.timer --no-pager -l | grep -E "(Active|Trigger)" || true
|
||||
echo
|
||||
'')
|
||||
globalCfg.backups.jobs)}
|
||||
'')
|
||||
];
|
||||
|
||||
# Create systemd services and timers
|
||||
systemd.services = lib.foldl' (acc: job: acc // (createBackupService job)) {} globalCfg.backups.jobs;
|
||||
systemd.timers = lib.foldl' (acc: job: acc // (createBackupTimer job)) {} globalCfg.backups.jobs;
|
||||
|
||||
# Environment file template
|
||||
environment.etc."backup/environment.example".text = ''
|
||||
# Restic configuration
|
||||
RESTIC_REPOSITORY=${cfg.restic.repository}
|
||||
RESTIC_PASSWORD_FILE=${cfg.restic.passwordFile}
|
||||
|
||||
# AWS S3 credentials (if using S3 backend)
|
||||
AWS_ACCESS_KEY_ID=your-access-key
|
||||
AWS_SECRET_ACCESS_KEY=your-secret-key
|
||||
|
||||
# Borg configuration
|
||||
BORG_REPO=${cfg.borg.repository}
|
||||
BORG_RSH="ssh -i ${cfg.borg.sshKey}"
|
||||
|
||||
# Notification settings
|
||||
NOTIFICATION_URL=your-webhook-url
|
||||
'';
|
||||
};
|
||||
}
|
||||
|
|
@ -1,3 +1,8 @@
|
|||
{
|
||||
ente = import ./ente.nix;
|
||||
global-config = import ./global-config.nix;
|
||||
backup-manager = import ./backup-manager.nix;
|
||||
|
||||
# Service modules
|
||||
services = import ./services;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,6 +72,11 @@ in {
|
|||
type = types.str;
|
||||
description = "The domain under which the photos frontend will be served.";
|
||||
};
|
||||
|
||||
auth = mkOption {
|
||||
type = types.str;
|
||||
description = "The domain under which the auth frontend will be served.";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
|
|
@ -187,6 +192,11 @@ in {
|
|||
name = "ente";
|
||||
user = "ente";
|
||||
};
|
||||
key = {
|
||||
encryption._secret = pkgs.writeText "encryption" "T0sn+zUVFOApdX4jJL4op6BtqqAfyQLH95fu8ASWfno=";
|
||||
hash._secret = pkgs.writeText "hash" "g/dBZBs1zi9SXQ0EKr4RCt1TGr7ZCKkgrpjyjrQEKovWPu5/ce8dYM6YvMIPL23MMZToVuuG+Z6SGxxTbxg5NQ==";
|
||||
};
|
||||
jwt.secret._secret = pkgs.writeText "jwt" "i2DecQmfGreG6q1vBj5tCokhlN41gcfS2cjOs9Po-u8=";
|
||||
};
|
||||
|
||||
systemd.services.ente = {
|
||||
|
|
@ -243,6 +253,7 @@ in {
|
|||
BindReadOnlyPaths = [
|
||||
"${cfgApi.package}/share/museum/migrations:${dataDir}/migrations"
|
||||
"${cfgApi.package}/share/museum/mail-templates:${dataDir}/mail-templates"
|
||||
"${cfgApi.package}/share/museum/web-templates:${dataDir}/web-templates"
|
||||
];
|
||||
|
||||
User = cfgApi.user;
|
||||
|
|
@ -311,7 +322,12 @@ in {
|
|||
in {
|
||||
enable = true;
|
||||
virtualHosts.${domainFor "accounts"} = {
|
||||
forceSSL = mkDefault false;
|
||||
listen = [
|
||||
{
|
||||
addr = "0.0.0.0";
|
||||
port = 3001;
|
||||
}
|
||||
];
|
||||
locations."/" = {
|
||||
root = webPackage "accounts";
|
||||
tryFiles = "$uri $uri.html /index.html";
|
||||
|
|
@ -321,7 +337,12 @@ in {
|
|||
};
|
||||
};
|
||||
virtualHosts.${domainFor "cast"} = {
|
||||
forceSSL = mkDefault false;
|
||||
listen = [
|
||||
{
|
||||
addr = "0.0.0.0";
|
||||
port = 3004;
|
||||
}
|
||||
];
|
||||
locations."/" = {
|
||||
root = webPackage "cast";
|
||||
tryFiles = "$uri $uri.html /index.html";
|
||||
|
|
@ -334,7 +355,12 @@ in {
|
|||
serverAliases = [
|
||||
(domainFor "albums") # the albums app is shared with the photos frontend
|
||||
];
|
||||
forceSSL = mkDefault false;
|
||||
listen = [
|
||||
{
|
||||
addr = "0.0.0.0";
|
||||
port = 3000;
|
||||
}
|
||||
];
|
||||
locations."/" = {
|
||||
root = webPackage "photos";
|
||||
tryFiles = "$uri $uri.html /index.html";
|
||||
|
|
@ -343,6 +369,21 @@ in {
|
|||
'';
|
||||
};
|
||||
};
|
||||
virtualHosts.${domainFor "auth"} = {
|
||||
listen = [
|
||||
{
|
||||
addr = "0.0.0.0";
|
||||
port = 3003;
|
||||
}
|
||||
];
|
||||
locations."/" = {
|
||||
root = webPackage "auth";
|
||||
tryFiles = "$uri $uri.html /index.html";
|
||||
extraConfig = ''
|
||||
add_header Access-Control-Allow-Origin 'https://${cfgWeb.domains.api}';
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
})
|
||||
];
|
||||
|
|
|
|||
462
modules/nixos/global-config.nix
Normal file
462
modules/nixos/global-config.nix
Normal file
|
|
@ -0,0 +1,462 @@
|
|||
# modules/global-config.nix
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
outputs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.global;
|
||||
|
||||
# Service type definition
|
||||
serviceType = types.submodule {
|
||||
options = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable this service";
|
||||
};
|
||||
|
||||
description = mkOption {
|
||||
type = types.str;
|
||||
description = "Human-readable description of the service";
|
||||
};
|
||||
|
||||
category = mkOption {
|
||||
type = types.enum ["monitoring" "networking" "storage" "security" "media" "development" "backup" "other"];
|
||||
default = "other";
|
||||
description = "Service category for organization";
|
||||
};
|
||||
|
||||
dependencies = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "List of other homelab services this depends on";
|
||||
};
|
||||
|
||||
ports = mkOption {
|
||||
type = types.listOf types.port;
|
||||
default = [];
|
||||
description = "Ports this service uses";
|
||||
};
|
||||
|
||||
tags = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Additional tags for this service";
|
||||
};
|
||||
|
||||
priority = mkOption {
|
||||
type = types.int;
|
||||
default = 100;
|
||||
description = "Service priority (lower numbers start first)";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Type definitions
|
||||
monitoringEndpointType = types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = "Name of the monitoring endpoint";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
description = "Port number for the endpoint";
|
||||
};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/metrics";
|
||||
description = "Path for the metrics endpoint";
|
||||
};
|
||||
jobName = mkOption {
|
||||
type = types.str;
|
||||
description = "Prometheus job name";
|
||||
};
|
||||
scrapeInterval = mkOption {
|
||||
type = types.str;
|
||||
default = "30s";
|
||||
description = "Prometheus scrape interval";
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
description = "Additional labels for this endpoint";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
backupJobType = types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = "Name of the backup job";
|
||||
};
|
||||
backend = mkOption {
|
||||
type = types.enum ["restic" "borg" "rclone"];
|
||||
description = "Backup backend to use";
|
||||
};
|
||||
paths = mkOption {
|
||||
type = types.listOf types.str;
|
||||
description = "List of paths to backup";
|
||||
};
|
||||
schedule = mkOption {
|
||||
type = types.str;
|
||||
default = "daily";
|
||||
description = "Backup schedule (cron format or preset)";
|
||||
};
|
||||
retention = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {
|
||||
daily = "7";
|
||||
weekly = "4";
|
||||
monthly = "6";
|
||||
yearly = "2";
|
||||
};
|
||||
description = "Retention policy";
|
||||
};
|
||||
excludePatterns = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Patterns to exclude from backup";
|
||||
};
|
||||
preHook = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Script to run before backup";
|
||||
};
|
||||
postHook = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Script to run after backup";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
reverseProxyEntryType = types.submodule {
|
||||
options = {
|
||||
subdomain = mkOption {
|
||||
type = types.str;
|
||||
description = "Subdomain for the service";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
description = "Internal port to proxy to";
|
||||
};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/";
|
||||
description = "Path prefix for the service";
|
||||
};
|
||||
enableAuth = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable authentication for this service";
|
||||
};
|
||||
enableSSL = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable SSL for this service";
|
||||
};
|
||||
customHeaders = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
description = "Custom headers to add";
|
||||
};
|
||||
websockets = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable websocket support";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Helper functions for services
|
||||
enabledServices = filterAttrs (name: service: service.enable) cfg.services;
|
||||
servicesByCategory = category: filterAttrs (name: service: service.enable && service.category == category) cfg.services;
|
||||
in {
|
||||
imports = [
|
||||
./motd
|
||||
];
|
||||
|
||||
options.homelab.global = {
|
||||
enable = mkEnableOption "Global homelab configuration";
|
||||
|
||||
hostname = mkOption {
|
||||
type = types.str;
|
||||
description = "Hostname for this system";
|
||||
};
|
||||
|
||||
domain = mkOption {
|
||||
type = types.str;
|
||||
default = "procopius.dk";
|
||||
description = "Base domain for the homelab";
|
||||
};
|
||||
|
||||
environment = mkOption {
|
||||
type = types.enum ["production" "staging" "development"];
|
||||
default = "production";
|
||||
description = "Environment type";
|
||||
};
|
||||
|
||||
location = mkOption {
|
||||
type = types.str;
|
||||
default = "homelab";
|
||||
description = "Physical location identifier";
|
||||
};
|
||||
|
||||
tags = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Tags for this system";
|
||||
};
|
||||
|
||||
services = mkOption {
|
||||
type = types.attrsOf serviceType;
|
||||
default = {};
|
||||
description = "Homelab services configuration";
|
||||
example = literalExpression ''
|
||||
{
|
||||
prometheus = {
|
||||
enable = true;
|
||||
description = "Metrics collection and monitoring";
|
||||
category = "monitoring";
|
||||
ports = [ 9090 ];
|
||||
tags = [ "metrics" "alerting" ];
|
||||
};
|
||||
|
||||
traefik = {
|
||||
enable = true;
|
||||
description = "Reverse proxy and load balancer";
|
||||
category = "networking";
|
||||
ports = [ 80 443 8080 ];
|
||||
tags = [ "proxy" "loadbalancer" ];
|
||||
priority = 10;
|
||||
};
|
||||
}
|
||||
'';
|
||||
};
|
||||
|
||||
monitoring = {
|
||||
endpoints = mkOption {
|
||||
type = types.listOf monitoringEndpointType;
|
||||
default = [];
|
||||
description = "Monitoring endpoints exposed by this system";
|
||||
};
|
||||
|
||||
nodeExporter = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable node exporter";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 9100;
|
||||
description = "Node exporter port";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
backups = {
|
||||
jobs = mkOption {
|
||||
type = types.listOf backupJobType;
|
||||
default = [];
|
||||
description = "Backup jobs for this system";
|
||||
};
|
||||
|
||||
globalExcludes = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [
|
||||
"*.tmp"
|
||||
"*.cache"
|
||||
"*/.git"
|
||||
"*/node_modules"
|
||||
"*/target"
|
||||
];
|
||||
description = "Global exclude patterns for all backup jobs";
|
||||
};
|
||||
};
|
||||
|
||||
reverseProxy = {
|
||||
entries = mkOption {
|
||||
type = types.listOf reverseProxyEntryType;
|
||||
default = [];
|
||||
description = "Reverse proxy entries for this system";
|
||||
};
|
||||
};
|
||||
|
||||
# Helper function to add monitoring endpoint
|
||||
addMonitoringEndpoint = mkOption {
|
||||
type = types.functionTo (types.functionTo types.anything);
|
||||
default = name: endpoint: {
|
||||
homelab.global.monitoring.endpoints = [
|
||||
(endpoint // {inherit name;})
|
||||
];
|
||||
};
|
||||
description = "Helper function to add monitoring endpoints";
|
||||
};
|
||||
|
||||
# Helper function to add backup job
|
||||
addBackupJob = mkOption {
|
||||
type = types.functionTo (types.functionTo types.anything);
|
||||
default = name: job: {
|
||||
homelab.global.backups.jobs = [
|
||||
(job // {inherit name;})
|
||||
];
|
||||
};
|
||||
description = "Helper function to add backup jobs";
|
||||
};
|
||||
|
||||
# Helper function to add reverse proxy entry
|
||||
addReverseProxyEntry = mkOption {
|
||||
type = types.functionTo (types.functionTo types.anything);
|
||||
default = subdomain: entry: {
|
||||
homelab.global.reverseProxy.entries = [
|
||||
(entry // {inherit subdomain;})
|
||||
];
|
||||
};
|
||||
description = "Helper function to add reverse proxy entries";
|
||||
};
|
||||
|
||||
# Helper functions
|
||||
enabledServicesList = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = attrNames enabledServices;
|
||||
description = "List of enabled service names";
|
||||
readOnly = true;
|
||||
};
|
||||
|
||||
servicesByPriority = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default =
|
||||
map (x: x.name) (sort (a: b: a.priority < b.priority)
|
||||
(mapAttrsToList (name: service: service // {inherit name;}) enabledServices));
|
||||
description = "Services sorted by priority";
|
||||
readOnly = true;
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Set hostname
|
||||
networking.hostName = cfg.hostname;
|
||||
|
||||
# Configure node exporter if enabled
|
||||
services.prometheus.exporters.node = mkIf cfg.monitoring.nodeExporter.enable {
|
||||
enable = true;
|
||||
port = cfg.monitoring.nodeExporter.port;
|
||||
enabledCollectors = [
|
||||
"systemd"
|
||||
"textfile"
|
||||
"filesystem"
|
||||
"loadavg"
|
||||
"meminfo"
|
||||
"netdev"
|
||||
"stat"
|
||||
];
|
||||
};
|
||||
|
||||
# Automatically add node exporter to monitoring endpoints
|
||||
homelab.global.monitoring.endpoints = mkIf cfg.monitoring.nodeExporter.enable [
|
||||
{
|
||||
name = "node-exporter";
|
||||
port = cfg.monitoring.nodeExporter.port;
|
||||
path = "/metrics";
|
||||
jobName = "node";
|
||||
labels = {
|
||||
instance = cfg.hostname;
|
||||
environment = cfg.environment;
|
||||
location = cfg.location;
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Export configuration for external consumption
|
||||
environment.etc."homelab/config.json".text = builtins.toJSON {
|
||||
inherit (cfg) hostname domain environment location tags;
|
||||
|
||||
services =
|
||||
mapAttrs (name: service: {
|
||||
inherit (service) enable description category dependencies ports tags priority;
|
||||
})
|
||||
cfg.services;
|
||||
|
||||
enabledServices = enabledServices;
|
||||
|
||||
servicesByCategory = {
|
||||
monitoring = servicesByCategory "monitoring";
|
||||
networking = servicesByCategory "networking";
|
||||
storage = servicesByCategory "storage";
|
||||
security = servicesByCategory "security";
|
||||
media = servicesByCategory "media";
|
||||
development = servicesByCategory "development";
|
||||
backup = servicesByCategory "backup";
|
||||
other = servicesByCategory "other";
|
||||
};
|
||||
|
||||
monitoring = {
|
||||
endpoints =
|
||||
map (endpoint: {
|
||||
name = endpoint.name;
|
||||
url = "http://${cfg.hostname}:${toString endpoint.port}${endpoint.path}";
|
||||
port = endpoint.port;
|
||||
path = endpoint.path;
|
||||
jobName = endpoint.jobName;
|
||||
scrapeInterval = endpoint.scrapeInterval;
|
||||
labels =
|
||||
endpoint.labels
|
||||
// {
|
||||
hostname = cfg.hostname;
|
||||
environment = cfg.environment;
|
||||
};
|
||||
})
|
||||
cfg.monitoring.endpoints;
|
||||
};
|
||||
|
||||
backups = {
|
||||
jobs = cfg.backups.jobs;
|
||||
};
|
||||
|
||||
reverseProxy = {
|
||||
entries =
|
||||
map (entry: {
|
||||
subdomain = entry.subdomain;
|
||||
url = "http://${cfg.hostname}:${toString entry.port}";
|
||||
port = entry.port;
|
||||
path = entry.path;
|
||||
domain = "${entry.subdomain}.${cfg.domain}";
|
||||
enableAuth = entry.enableAuth;
|
||||
enableSSL = entry.enableSSL;
|
||||
customHeaders = entry.customHeaders;
|
||||
websockets = entry.websockets;
|
||||
})
|
||||
cfg.reverseProxy.entries;
|
||||
};
|
||||
};
|
||||
|
||||
# Create a status command that shows service information
|
||||
environment.systemPackages = [
|
||||
# (pkgs.writeScriptBin "homelab-services" ''
|
||||
# #!/bin/bash
|
||||
# echo "🏠 Homelab Services Status"
|
||||
# echo "=========================="
|
||||
# echo
|
||||
|
||||
# ${concatStringsSep "\n" (mapAttrsToList (name: service: ''
|
||||
# echo "${name}: ${service.description}"
|
||||
# echo " Category: ${service.category}"
|
||||
# echo " Status: $(systemctl is-active ${name} 2>/dev/null || echo "not found")"
|
||||
# ${optionalString (service.ports != []) ''
|
||||
# echo " Ports: ${concatStringsSep ", " (map toString service.ports)}"
|
||||
# ''}
|
||||
# ${optionalString (service.tags != []) ''
|
||||
# echo " Tags: ${concatStringsSep ", " service.tags}"
|
||||
# ''}
|
||||
# echo
|
||||
# '')
|
||||
# enabledServices)}
|
||||
# '')
|
||||
];
|
||||
};
|
||||
}
|
||||
304
modules/nixos/motd/default.nix
Normal file
304
modules/nixos/motd/default.nix
Normal file
|
|
@ -0,0 +1,304 @@
|
|||
# modules/motd/default.nix
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.motd;
|
||||
globalCfg = config.homelab.global;
|
||||
enabledServices = filterAttrs (name: service: service.enable) globalCfg.services;
|
||||
|
||||
homelab-motd = pkgs.writeShellScriptBin "homelab-motd" ''
|
||||
#! /usr/bin/env bash
|
||||
source /etc/os-release
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
PURPLE='\033[0;35m'
|
||||
CYAN='\033[0;36m'
|
||||
WHITE='\033[1;37m'
|
||||
NC='\033[0m' # No Color
|
||||
BOLD='\033[1m'
|
||||
|
||||
# Helper functions
|
||||
print_header() {
|
||||
echo -e "''${BOLD}''${BLUE}╔══════════════════════════════════════════════════════════════╗''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}║''${NC}''${WHITE} 🏠 HOMELAB STATUS ''${NC}''${BOLD}''${BLUE}║''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}╚══════════════════════════════════════════════════════════════╝''${NC}"
|
||||
}
|
||||
|
||||
print_section() {
|
||||
echo -e "\n''${BOLD}''${CYAN}▶ $1''${NC}"
|
||||
echo -e "''${CYAN}─────────────────────────────────────────────────────────────''${NC}"
|
||||
}
|
||||
|
||||
get_service_status() {
|
||||
local service="$1"
|
||||
if ${pkgs.systemd}/bin/systemctl is-active --quiet "$service" 2>/dev/null; then
|
||||
echo -e "''${GREEN}●''${NC} Active"
|
||||
elif ${pkgs.systemd}/bin/systemctl is-enabled --quiet "$service" 2>/dev/null; then
|
||||
echo -e "''${YELLOW}●''${NC} Inactive"
|
||||
else
|
||||
echo -e "''${RED}●''${NC} Disabled"
|
||||
fi
|
||||
}
|
||||
|
||||
get_timer_status() {
|
||||
local timer="$1"
|
||||
if ${pkgs.systemd}/bin/systemctl is-active --quiet "$timer" 2>/dev/null; then
|
||||
local next_run=$(${pkgs.systemd}/bin/systemctl show "$timer" --property=NextElapseUSecRealtime --value 2>/dev/null || echo "0")
|
||||
if [[ "$next_run" != "0" && "$next_run" != "n/a" ]]; then
|
||||
local next_readable=$(${pkgs.systemd}/bin/systemctl list-timers --no-pager "$timer" 2>/dev/null | tail -n +2 | head -n 1 | awk '{print $1, $2}' || echo "Unknown")
|
||||
echo -e "''${GREEN}●''${NC} Next: ''${next_readable}"
|
||||
else
|
||||
echo -e "''${GREEN}●''${NC} Active"
|
||||
fi
|
||||
else
|
||||
echo -e "''${RED}●''${NC} Inactive"
|
||||
fi
|
||||
}
|
||||
|
||||
# Main script
|
||||
${optionalString cfg.clearScreen "clear"}
|
||||
print_header
|
||||
|
||||
# Check if global config exists
|
||||
CONFIG_FILE="/etc/homelab/config.json"
|
||||
if [[ ! -f "$CONFIG_FILE" ]]; then
|
||||
echo -e "''${RED}❌ Global homelab configuration not found at $CONFIG_FILE''${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Parse global configuration
|
||||
HOSTNAME=$(${pkgs.jq}/bin/jq -r '.hostname' "$CONFIG_FILE" 2>/dev/null || hostname)
|
||||
DOMAIN=$(${pkgs.jq}/bin/jq -r '.domain' "$CONFIG_FILE" 2>/dev/null || echo "unknown")
|
||||
ENVIRONMENT=$(${pkgs.jq}/bin/jq -r '.environment' "$CONFIG_FILE" 2>/dev/null || echo "unknown")
|
||||
LOCATION=$(${pkgs.jq}/bin/jq -r '.location' "$CONFIG_FILE" 2>/dev/null || echo "unknown")
|
||||
TAGS=$(${pkgs.jq}/bin/jq -r '.tags[]?' "$CONFIG_FILE" 2>/dev/null | tr '\n' ' ' || echo "none")
|
||||
|
||||
print_section "SYSTEM INFO"
|
||||
echo -e " ''${BOLD}Hostname:''${NC} $HOSTNAME"
|
||||
echo -e " ''${BOLD}Domain:''${NC} $DOMAIN"
|
||||
echo -e " ''${BOLD}Environment:''${NC} $ENVIRONMENT"
|
||||
echo -e " ''${BOLD}Location:''${NC} $LOCATION"
|
||||
echo -e " ''${BOLD}Tags:''${NC} ''${TAGS:-none}"
|
||||
echo -e " ''${BOLD}Uptime:''${NC} $(${pkgs.procps}/bin/uptime -p)"
|
||||
echo -e " ''${BOLD}Load:''${NC} $(${pkgs.procps}/bin/uptime | awk -F'load average:' '{print $2}' | xargs)"
|
||||
|
||||
${optionalString cfg.showServices ''
|
||||
# Enabled services from homelab config
|
||||
print_section "HOMELAB SERVICES"
|
||||
${concatStringsSep "\n" (mapAttrsToList (name: service: ''
|
||||
status=$(get_service_status "${service.systemdService}")
|
||||
printf " %-25s %s\n" "${name}" "$status"
|
||||
'')
|
||||
cfg.services)}
|
||||
''}
|
||||
|
||||
${optionalString cfg.showMonitoring ''
|
||||
# Monitoring endpoints
|
||||
print_section "MONITORING ENDPOINTS"
|
||||
ENDPOINTS=$(${pkgs.jq}/bin/jq -c '.monitoring.endpoints[]?' "$CONFIG_FILE" 2>/dev/null || echo "")
|
||||
if [[ -n "$ENDPOINTS" ]]; then
|
||||
while IFS= read -r endpoint; do
|
||||
name=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.name')
|
||||
port=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.port')
|
||||
path=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.path')
|
||||
job=$(echo "$endpoint" | ${pkgs.jq}/bin/jq -r '.jobName')
|
||||
|
||||
# Check if port is accessible
|
||||
if ${pkgs.netcat}/bin/nc -z localhost "$port" 2>/dev/null; then
|
||||
status="''${GREEN}●''${NC}"
|
||||
else
|
||||
status="''${RED}●''${NC}"
|
||||
fi
|
||||
|
||||
printf " %-20s %s %s:%s%s (job: %s)\n" "$name" "$status" "$HOSTNAME" "$port" "$path" "$job"
|
||||
done <<< "$ENDPOINTS"
|
||||
else
|
||||
echo -e " ''${YELLOW}No monitoring endpoints configured''${NC}"
|
||||
fi
|
||||
''}
|
||||
|
||||
${optionalString cfg.showBackups ''
|
||||
# Backup jobs status
|
||||
print_section "BACKUP JOBS"
|
||||
BACKUP_JOBS=$(${pkgs.jq}/bin/jq -c '.backups.jobs[]?' "$CONFIG_FILE" 2>/dev/null || echo "")
|
||||
if [[ -n "$BACKUP_JOBS" ]]; then
|
||||
while IFS= read -r job; do
|
||||
name=$(echo "$job" | ${pkgs.jq}/bin/jq -r '.name')
|
||||
backend=$(echo "$job" | ${pkgs.jq}/bin/jq -r '.backend')
|
||||
schedule=$(echo "$job" | ${pkgs.jq}/bin/jq -r '.schedule')
|
||||
|
||||
service_name="backup-''${name}"
|
||||
timer_name="''${service_name}.timer"
|
||||
|
||||
timer_status=$(get_timer_status "$timer_name")
|
||||
|
||||
# Get last backup info
|
||||
last_run="Unknown"
|
||||
if ${pkgs.systemd}/bin/systemctl show "$service_name" --property=ExecMainStartTimestamp --value 2>/dev/null | grep -q "^[^n]"; then
|
||||
last_run=$(${pkgs.systemd}/bin/systemctl show "$service_name" --property=ExecMainStartTimestamp --value 2>/dev/null | head -1)
|
||||
if [[ "$last_run" != "n/a" && -n "$last_run" ]]; then
|
||||
last_run=$(${pkgs.coreutils}/bin/date -d "$last_run" "+%Y-%m-%d %H:%M" 2>/dev/null || echo "Unknown")
|
||||
fi
|
||||
fi
|
||||
|
||||
printf " %-20s %s (%s, %s) Last: %s\n" "$name" "$timer_status" "$backend" "$schedule" "$last_run"
|
||||
done <<< "$BACKUP_JOBS"
|
||||
|
||||
# Show backup-status command output if available
|
||||
if command -v backup-status >/dev/null 2>&1; then
|
||||
echo -e "\n ''${BOLD}Quick Status:''${NC}"
|
||||
backup-status 2>/dev/null | tail -n +3 | head -10 | sed 's/^/ /'
|
||||
fi
|
||||
else
|
||||
echo -e " ''${YELLOW}No backup jobs configured''${NC}"
|
||||
fi
|
||||
''}
|
||||
|
||||
${optionalString cfg.showReverseProxy ''
|
||||
# Reverse proxy entries
|
||||
print_section "REVERSE PROXY ENTRIES"
|
||||
PROXY_ENTRIES=$(${pkgs.jq}/bin/jq -c '.reverseProxy.entries[]?' "$CONFIG_FILE" 2>/dev/null || echo "")
|
||||
if [[ -n "$PROXY_ENTRIES" ]]; then
|
||||
while IFS= read -r entry; do
|
||||
subdomain=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.subdomain')
|
||||
port=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.port')
|
||||
domain=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.domain')
|
||||
auth=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.enableAuth')
|
||||
ssl=$(echo "$entry" | ${pkgs.jq}/bin/jq -r '.enableSSL')
|
||||
|
||||
# Check if service is running on the port
|
||||
if ${pkgs.netcat}/bin/nc -z localhost "$port" 2>/dev/null; then
|
||||
status="''${GREEN}●''${NC}"
|
||||
else
|
||||
status="''${RED}●''${NC}"
|
||||
fi
|
||||
|
||||
auth_indicator=""
|
||||
[[ "$auth" == "true" ]] && auth_indicator=" 🔐"
|
||||
|
||||
ssl_indicator=""
|
||||
[[ "$ssl" == "true" ]] && ssl_indicator=" 🔒"
|
||||
|
||||
printf " %-25s %s :%s → %s%s%s\n" "''${domain}" "$status" "$port" "$domain" "$auth_indicator" "$ssl_indicator"
|
||||
done <<< "$PROXY_ENTRIES"
|
||||
else
|
||||
echo -e " ''${YELLOW}No reverse proxy entries configured''${NC}"
|
||||
fi
|
||||
''}
|
||||
|
||||
${optionalString cfg.showResources ''
|
||||
# Resource usage
|
||||
print_section "RESOURCE USAGE"
|
||||
echo -e " ''${BOLD}Memory:''${NC} $(${pkgs.procps}/bin/free -h | awk '/^Mem:/ {printf "%s/%s (%.1f%%)", $3, $2, ($3/$2)*100}')"
|
||||
echo -e " ''${BOLD}Disk (root):''${NC} $(${pkgs.coreutils}/bin/df -h / | awk 'NR==2 {printf "%s/%s (%s)", $3, $2, $5}')"
|
||||
echo -e " ''${BOLD}CPU Usage:''${NC} $(${pkgs.procps}/bin/top -bn1 | grep "Cpu(s)" | awk '{printf "%.1f%%", $2+$4}' | sed 's/%us,//')%"
|
||||
''}
|
||||
|
||||
${optionalString cfg.showRecentIssues ''
|
||||
# Recent logs (errors only)
|
||||
print_section "RECENT ISSUES"
|
||||
error_count=$(${pkgs.systemd}/bin/journalctl --since "24 hours ago" --priority=err --no-pager -q | wc -l)
|
||||
if [[ "$error_count" -gt 0 ]]; then
|
||||
echo -e " ''${RED}⚠ $error_count errors in last 24h''${NC}"
|
||||
${pkgs.systemd}/bin/journalctl --since "24 hours ago" --priority=err --no-pager -q | tail -3 | sed 's/^/ /'
|
||||
else
|
||||
echo -e " ''${GREEN}✓ No critical errors in last 24h''${NC}"
|
||||
fi
|
||||
''}
|
||||
|
||||
echo -e "\n''${BOLD}''${BLUE}╔══════════════════════════════════════════════════════════════╗''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}║''${NC} ''${WHITE}Run 'backup-status' for detailed backup info ''${NC}''${BOLD}''${BLUE}║''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}║''${NC} ''${WHITE}Config: /etc/homelab/config.json ''${NC}''${BOLD}''${BLUE}║''${NC}"
|
||||
echo -e "''${BOLD}''${BLUE}╚══════════════════════════════════════════════════════════════╝''${NC}"
|
||||
echo
|
||||
'';
|
||||
in {
|
||||
options.homelab.motd = {
|
||||
enable = mkEnableOption "Dynamic homelab MOTD";
|
||||
|
||||
clearScreen = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Clear screen before showing MOTD";
|
||||
};
|
||||
|
||||
showServices = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Show enabled homelab services";
|
||||
};
|
||||
|
||||
showMonitoring = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Show monitoring endpoints";
|
||||
};
|
||||
|
||||
showBackups = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Show backup jobs status";
|
||||
};
|
||||
|
||||
showReverseProxy = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Show reverse proxy entries";
|
||||
};
|
||||
|
||||
showResources = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Show system resource usage";
|
||||
};
|
||||
|
||||
showRecentIssues = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Show recent system issues";
|
||||
};
|
||||
|
||||
services = mkOption {
|
||||
type = types.attrsOf (types.submodule {
|
||||
options = {
|
||||
systemdService = mkOption {
|
||||
type = types.str;
|
||||
description = "Name of the systemd service to monitor";
|
||||
};
|
||||
description = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = "Human-readable description of the service";
|
||||
};
|
||||
};
|
||||
});
|
||||
default = {};
|
||||
description = "Homelab services to monitor in MOTD";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf (cfg.enable && globalCfg.enable) {
|
||||
# Register services with MOTD
|
||||
homelab.motd.services =
|
||||
mapAttrs (name: service: {
|
||||
systemdService = name;
|
||||
description = service.description;
|
||||
})
|
||||
enabledServices;
|
||||
|
||||
# Create a command to manually run the MOTD
|
||||
environment.systemPackages = with pkgs; [
|
||||
jq
|
||||
netcat
|
||||
homelab-motd
|
||||
];
|
||||
};
|
||||
}
|
||||
4
modules/nixos/services/default.nix
Normal file
4
modules/nixos/services/default.nix
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
jellyfin = import ./jellyfin.nix;
|
||||
grafana = import ./grafana.nix;
|
||||
}
|
||||
0
modules/nixos/services/forgejo-runner.nix
Normal file
0
modules/nixos/services/forgejo-runner.nix
Normal file
1
modules/nixos/services/forgejo.nix
Normal file
1
modules/nixos/services/forgejo.nix
Normal file
|
|
@ -0,0 +1 @@
|
|||
|
||||
72
modules/nixos/services/grafana.nix
Normal file
72
modules/nixos/services/grafana.nix
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
# modules/services/grafana.nix
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.services.grafana;
|
||||
helpers = import ../lib/helpers.nix {inherit lib;};
|
||||
in {
|
||||
options.services.grafana = {
|
||||
enable = mkEnableOption "Grafana monitoring dashboard";
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 3000;
|
||||
description = "Grafana web interface port";
|
||||
};
|
||||
adminPassword = mkOption {
|
||||
type = types.str;
|
||||
description = "Admin password for Grafana";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings = {
|
||||
server = {
|
||||
http_port = cfg.port;
|
||||
domain = "${config.homelab.global.hostname}.${config.homelab.global.domain}";
|
||||
};
|
||||
security = {
|
||||
admin_password = cfg.adminPassword;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
homelab.global = {
|
||||
backups.jobs = [
|
||||
{
|
||||
name = "grafana-data";
|
||||
backend = "restic";
|
||||
paths = ["/var/lib/grafana"];
|
||||
schedule = "daily";
|
||||
excludePatterns = ["*/plugins/*" "*/png/*"];
|
||||
}
|
||||
];
|
||||
|
||||
reverseProxy.entries = [
|
||||
{
|
||||
subdomain = "grafana";
|
||||
port = cfg.port;
|
||||
enableAuth = false; # Grafana handles its own auth
|
||||
}
|
||||
];
|
||||
|
||||
monitoring.endpoints = [
|
||||
{
|
||||
name = "grafana";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "grafana";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
type = "monitoring";
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
}
|
||||
125
modules/nixos/services/jellyfin.nix
Normal file
125
modules/nixos/services/jellyfin.nix
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
# modules/services/jellyfin.nix
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.services.jellyfin;
|
||||
in {
|
||||
options.services.jellyfin = {
|
||||
enable = mkEnableOption "Jellyfin media server";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 8096;
|
||||
description = "Port for Jellyfin web interface";
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/jellyfin";
|
||||
description = "Directory to store Jellyfin data";
|
||||
};
|
||||
|
||||
mediaDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/media";
|
||||
description = "Directory containing media files";
|
||||
};
|
||||
|
||||
enableMetrics = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Prometheus metrics";
|
||||
};
|
||||
|
||||
exposeWeb = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Expose web interface through reverse proxy";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Enable the service
|
||||
services.jellyfin = {
|
||||
enable = true;
|
||||
dataDir = cfg.dataDir;
|
||||
};
|
||||
|
||||
# Configure global settings
|
||||
homelab.global = {
|
||||
# Add backup job for Jellyfin data
|
||||
backups.jobs = [
|
||||
{
|
||||
name = "jellyfin-config";
|
||||
backend = "restic";
|
||||
paths = ["${cfg.dataDir}/config" "${cfg.dataDir}/data"];
|
||||
schedule = "0 2 * * *"; # Daily at 2 AM
|
||||
excludePatterns = [
|
||||
"*/cache/*"
|
||||
"*/transcodes/*"
|
||||
"*/logs/*"
|
||||
];
|
||||
preHook = ''
|
||||
# Stop jellyfin for consistent backup
|
||||
systemctl stop jellyfin
|
||||
'';
|
||||
postHook = ''
|
||||
# Restart jellyfin after backup
|
||||
systemctl start jellyfin
|
||||
'';
|
||||
}
|
||||
{
|
||||
name = "jellyfin-media";
|
||||
backend = "restic";
|
||||
paths = [cfg.mediaDir];
|
||||
schedule = "0 3 * * 0"; # Weekly on Sunday at 3 AM
|
||||
excludePatterns = [
|
||||
"*.tmp"
|
||||
"*/.@__thumb/*" # Synology thumbnails
|
||||
];
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry if enabled
|
||||
reverseProxy.entries = mkIf cfg.exposeWeb [
|
||||
{
|
||||
subdomain = "jellyfin";
|
||||
port = cfg.port;
|
||||
enableAuth = false; # Jellyfin has its own auth
|
||||
websockets = true;
|
||||
customHeaders = {
|
||||
"X-Forwarded-Proto" = "$scheme";
|
||||
"X-Forwarded-Host" = "$host";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add monitoring endpoint if metrics enabled
|
||||
monitoring.endpoints = mkIf cfg.enableMetrics [
|
||||
{
|
||||
name = "jellyfin";
|
||||
port = cfg.port;
|
||||
path = "/metrics"; # Assuming you have a metrics plugin
|
||||
jobName = "jellyfin";
|
||||
scrapeInterval = "60s";
|
||||
labels = {
|
||||
service = "jellyfin";
|
||||
type = "media-server";
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
# Open firewall
|
||||
networking.firewall.allowedTCPPorts = [cfg.port];
|
||||
|
||||
# Create media directory
|
||||
systemd.tmpfiles.rules = [
|
||||
"d ${cfg.mediaDir} 0755 jellyfin jellyfin -"
|
||||
];
|
||||
};
|
||||
}
|
||||
0
modules/nixos/services/postgres.nix
Normal file
0
modules/nixos/services/postgres.nix
Normal file
208
modules/nixos/services/prometheus.nix
Normal file
208
modules/nixos/services/prometheus.nix
Normal file
|
|
@ -0,0 +1,208 @@
|
|||
# modules/services/prometheus.nix
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.prometheus;
|
||||
globalCfg = config.homelab.global;
|
||||
in {
|
||||
options.homelab.services.prometheus = {
|
||||
enable = mkEnableOption "Prometheus monitoring server";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 9090;
|
||||
description = "Prometheus server port";
|
||||
};
|
||||
|
||||
webExternalUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "http://${globalCfg.hostname}:${toString cfg.port}";
|
||||
description = "External URL for Prometheus";
|
||||
};
|
||||
|
||||
retention = mkOption {
|
||||
type = types.str;
|
||||
default = "30d";
|
||||
description = "Data retention period";
|
||||
};
|
||||
|
||||
scrapeConfigs = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional scrape configurations";
|
||||
};
|
||||
|
||||
alertmanager = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable Alertmanager integration";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://localhost:9093";
|
||||
description = "Alertmanager URL";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Register service with global homelab config
|
||||
homelab.global.services.prometheus = {
|
||||
enable = true;
|
||||
description = "Metrics collection and monitoring server";
|
||||
category = "monitoring";
|
||||
ports = [cfg.port];
|
||||
tags = ["metrics" "monitoring" "alerting"];
|
||||
priority = 20;
|
||||
dependencies = ["node-exporter"];
|
||||
};
|
||||
|
||||
# Configure the actual Prometheus service
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
port = cfg.port;
|
||||
webExternalUrl = cfg.webExternalUrl;
|
||||
|
||||
retentionTime = cfg.retention;
|
||||
|
||||
scrapeConfigs =
|
||||
[
|
||||
# Auto-discover monitoring endpoints from global config
|
||||
{
|
||||
job_name = "homelab-auto";
|
||||
static_configs = [
|
||||
{
|
||||
targets =
|
||||
map (
|
||||
endpoint: "${globalCfg.hostname}:${toString endpoint.port}"
|
||||
)
|
||||
globalCfg.monitoring.endpoints;
|
||||
}
|
||||
];
|
||||
scrape_interval = "30s";
|
||||
metrics_path = "/metrics";
|
||||
}
|
||||
]
|
||||
++ cfg.scrapeConfigs;
|
||||
|
||||
# Alertmanager configuration
|
||||
alertmanagers = mkIf cfg.alertmanager.enable [
|
||||
{
|
||||
static_configs = [
|
||||
{
|
||||
targets = [cfg.alertmanager.url];
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
|
||||
rules = [
|
||||
# Basic homelab alerting rules
|
||||
(pkgs.writeText "homelab-alerts.yml" ''
|
||||
groups:
|
||||
- name: homelab
|
||||
rules:
|
||||
- alert: ServiceDown
|
||||
expr: up == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Service {{ $labels.instance }} is down"
|
||||
description: "{{ $labels.job }} on {{ $labels.instance }} has been down for more than 5 minutes."
|
||||
|
||||
- alert: HighMemoryUsage
|
||||
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High memory usage on {{ $labels.instance }}"
|
||||
description: "Memory usage is above 90% on {{ $labels.instance }}"
|
||||
|
||||
- alert: HighDiskUsage
|
||||
expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.85
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High disk usage on {{ $labels.instance }}"
|
||||
description: "Disk usage is above 85% on {{ $labels.instance }} for filesystem {{ $labels.mountpoint }}"
|
||||
'')
|
||||
];
|
||||
};
|
||||
|
||||
# Add monitoring endpoint to global config
|
||||
homelab.global.monitoring.endpoints = [
|
||||
{
|
||||
name = "prometheus";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "prometheus";
|
||||
scrapeInterval = "30s";
|
||||
labels = {
|
||||
service = "prometheus";
|
||||
role = "monitoring";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry if configured
|
||||
homelab.global.reverseProxy.entries = mkIf (globalCfg.domain != null) [
|
||||
{
|
||||
subdomain = "prometheus";
|
||||
port = cfg.port;
|
||||
path = "/";
|
||||
enableAuth = true;
|
||||
enableSSL = true;
|
||||
customHeaders = {
|
||||
"X-Frame-Options" = "DENY";
|
||||
"X-Content-Type-Options" = "nosniff";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add backup job for Prometheus data
|
||||
homelab.global.backups.jobs = [
|
||||
{
|
||||
name = "prometheus-data";
|
||||
backend = "restic";
|
||||
paths = ["/var/lib/prometheus2"];
|
||||
schedule = "daily";
|
||||
retention = {
|
||||
daily = "7";
|
||||
weekly = "4";
|
||||
monthly = "3";
|
||||
yearly = "1";
|
||||
};
|
||||
excludePatterns = [
|
||||
"*.tmp"
|
||||
"*/wal/*"
|
||||
];
|
||||
preHook = ''
|
||||
# Stop prometheus temporarily for consistent backup
|
||||
systemctl stop prometheus
|
||||
'';
|
||||
postHook = ''
|
||||
# Restart prometheus after backup
|
||||
systemctl start prometheus
|
||||
'';
|
||||
}
|
||||
];
|
||||
|
||||
# Open firewall port
|
||||
networking.firewall.allowedTCPPorts = [cfg.port];
|
||||
|
||||
# Create prometheus configuration directory
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /var/lib/prometheus2 0755 prometheus prometheus -"
|
||||
"d /etc/prometheus 0755 root root -"
|
||||
];
|
||||
};
|
||||
}
|
||||
4
modules/nixos/system/backups/backrest.nix
Normal file
4
modules/nixos/system/backups/backrest.nix
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
# TODO
|
||||
# https://github.com/L-Trump/nixos-configs/blob/ab3fb16e330b8a2904b9967e46af8c061b56266e/modules/nixos/server/backrest.nix#L7
|
||||
}
|
||||
95
modules/nixos/system/backups/backups-option.nix
Normal file
95
modules/nixos/system/backups/backups-option.nix
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
# backups-option.nix
|
||||
cfg: let
|
||||
inherit (cfg.lib) mkOption types mkEnableOption attrNames;
|
||||
in
|
||||
mkOption {
|
||||
type = types.attrsOf (
|
||||
types.submodule (
|
||||
{
|
||||
name,
|
||||
config,
|
||||
...
|
||||
} @ args: {
|
||||
options = {
|
||||
backend = mkOption {
|
||||
type = types.enum (attrNames cfg.backends);
|
||||
description = "The backup backend to use";
|
||||
};
|
||||
|
||||
paths = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Paths to backup";
|
||||
};
|
||||
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether to enable this backup job";
|
||||
};
|
||||
|
||||
timerConfig = mkOption {
|
||||
type = with types; nullOr attrs;
|
||||
default = null;
|
||||
example = {
|
||||
OnCalendar = "00:05";
|
||||
Persistent = true;
|
||||
RandomizedDelaySec = "5h";
|
||||
};
|
||||
description = ''
|
||||
When to run the backup. If null, inherits from backend's default timerConfig.
|
||||
Set to null to disable automatic scheduling.
|
||||
'';
|
||||
};
|
||||
|
||||
backendOptions = mkOption {
|
||||
type = let
|
||||
backupConfig = config;
|
||||
backupName = name;
|
||||
in
|
||||
types.submodule (
|
||||
{config, ...} @ args'':
|
||||
cfg.backends.${args.config.backend} (args'' // {inherit backupConfig backupName;})
|
||||
);
|
||||
default = {};
|
||||
description = "Backend-specific options";
|
||||
};
|
||||
|
||||
preBackupScript = mkOption {
|
||||
type = types.lines;
|
||||
default = "";
|
||||
description = "Script to run before backing up";
|
||||
};
|
||||
|
||||
postBackupScript = mkOption {
|
||||
type = types.lines;
|
||||
default = "";
|
||||
description = ''
|
||||
Script to run after backing up. Runs even if the backup fails.
|
||||
'';
|
||||
};
|
||||
|
||||
notifications = {
|
||||
failure = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable failure notifications";
|
||||
};
|
||||
};
|
||||
|
||||
success = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable success notifications";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
)
|
||||
);
|
||||
default = {};
|
||||
description = "Backup job definitions";
|
||||
}
|
||||
6
modules/nixos/system/backups/default.nix
Normal file
6
modules/nixos/system/backups/default.nix
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
imports = [
|
||||
./root.nix
|
||||
./restic.nix
|
||||
];
|
||||
}
|
||||
234
modules/nixos/system/backups/restic.nix
Normal file
234
modules/nixos/system/backups/restic.nix
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
# restic.nix - Restic backend implementation
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.system.backups;
|
||||
resticCfg = cfg.restic;
|
||||
|
||||
# Get only restic backups that are enabled
|
||||
resticBackups = filterAttrs (_: backup: backup.backend == "restic" && backup.enable) cfg.backups;
|
||||
|
||||
# Create restic service configurations
|
||||
createResticServices =
|
||||
mapAttrs (
|
||||
name: backup: let
|
||||
# Merge global defaults with backup-specific options
|
||||
serviceConfig =
|
||||
recursiveUpdate resticCfg.defaultBackendOptions backup.backendOptions
|
||||
// {
|
||||
inherit (backup) paths;
|
||||
|
||||
# Use backup-specific timer or fall back to global default
|
||||
timerConfig =
|
||||
if backup.timerConfig != null
|
||||
then backup.timerConfig
|
||||
else resticCfg.timerConfig;
|
||||
};
|
||||
in
|
||||
serviceConfig
|
||||
)
|
||||
resticBackups;
|
||||
in {
|
||||
options.system.backups.restic = {
|
||||
enable = mkEnableOption "restic backup backend";
|
||||
|
||||
timerConfig = mkOption {
|
||||
type = types.attrs;
|
||||
default = {
|
||||
OnCalendar = "*-*-* 05:00:00";
|
||||
Persistent = true;
|
||||
};
|
||||
description = "Default systemd timer configuration for restic backups";
|
||||
};
|
||||
|
||||
defaultBackendOptions = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
example = {
|
||||
repository = "/backup/restic";
|
||||
passwordFile = "/etc/nixos/secrets/restic-password";
|
||||
initialize = true;
|
||||
pruneOpts = [
|
||||
"--keep-daily 7"
|
||||
"--keep-weekly 5"
|
||||
"--keep-monthly 12"
|
||||
"--keep-yearly 75"
|
||||
];
|
||||
};
|
||||
description = "Default backend options applied to all restic backup jobs";
|
||||
};
|
||||
|
||||
# Advanced options
|
||||
runMaintenance = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether to run repository maintenance after backups";
|
||||
};
|
||||
|
||||
maintenanceTimer = mkOption {
|
||||
type = types.attrs;
|
||||
default = {
|
||||
OnCalendar = "*-*-* 06:00:00";
|
||||
Persistent = true;
|
||||
};
|
||||
description = "Timer configuration for maintenance tasks";
|
||||
};
|
||||
|
||||
pruneOpts = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [
|
||||
"--keep-daily 7"
|
||||
"--keep-weekly 4"
|
||||
"--keep-monthly 6"
|
||||
"--keep-yearly 3"
|
||||
];
|
||||
description = "Default pruning options for maintenance";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf resticCfg.enable {
|
||||
# Register restic backend
|
||||
system.backups.backends.restic = {
|
||||
backupConfig,
|
||||
backupName,
|
||||
...
|
||||
}: {
|
||||
# Define the proper options schema for restic backendOptions
|
||||
options = {
|
||||
repository = mkOption {
|
||||
type = types.str;
|
||||
description = "Restic repository path or URL";
|
||||
};
|
||||
|
||||
passwordFile = mkOption {
|
||||
type = types.str;
|
||||
description = "Path to file containing the repository password";
|
||||
};
|
||||
|
||||
initialize = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether to initialize the repository if it doesn't exist";
|
||||
};
|
||||
|
||||
exclude = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Patterns to exclude from backup";
|
||||
};
|
||||
|
||||
extraBackupArgs = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Additional arguments passed to restic backup command";
|
||||
};
|
||||
|
||||
user = mkOption {
|
||||
type = types.str;
|
||||
default = "root";
|
||||
description = "User to run the backup as";
|
||||
};
|
||||
|
||||
pruneOpts = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = resticCfg.pruneOpts;
|
||||
description = "Pruning options for this backup";
|
||||
};
|
||||
};
|
||||
|
||||
# Default config merged with global defaults
|
||||
config = {
|
||||
extraBackupArgs =
|
||||
[
|
||||
"--tag ${backupName}"
|
||||
"--verbose"
|
||||
]
|
||||
++ (resticCfg.defaultBackendOptions.extraBackupArgs or []);
|
||||
};
|
||||
};
|
||||
|
||||
# Create actual restic backup services
|
||||
services.restic.backups = createResticServices;
|
||||
|
||||
# Add restic package
|
||||
environment.systemPackages = [pkgs.restic];
|
||||
|
||||
# Systemd service customizations for restic backups
|
||||
systemd.services =
|
||||
(mapAttrs' (
|
||||
name: backup:
|
||||
nameValuePair "restic-backups-${name}" {
|
||||
# Custom pre/post scripts
|
||||
preStart = mkBefore backup.preBackupScript;
|
||||
postStop = mkAfter backup.postBackupScript;
|
||||
|
||||
# Enhanced service configuration
|
||||
serviceConfig = {
|
||||
# Restart configuration
|
||||
Restart = "on-failure";
|
||||
RestartSec = "5m";
|
||||
RestartMaxDelaySec = "30m";
|
||||
RestartSteps = 3;
|
||||
|
||||
# Rate limiting
|
||||
StartLimitBurst = 4;
|
||||
StartLimitIntervalSec = "2h";
|
||||
};
|
||||
|
||||
# Failure handling could be extended here for notifications
|
||||
# onFailure = optional backup.notifications.failure.enable "restic-backup-${name}-failure-notify.service";
|
||||
}
|
||||
)
|
||||
resticBackups)
|
||||
// optionalAttrs resticCfg.runMaintenance {
|
||||
# Repository maintenance service
|
||||
restic-maintenance = {
|
||||
description = "Restic repository maintenance";
|
||||
after = map (name: "restic-backups-${name}.service") (attrNames resticBackups);
|
||||
|
||||
environment =
|
||||
resticCfg.defaultBackendOptions
|
||||
// {
|
||||
RESTIC_CACHE_DIR = "/var/cache/restic-maintenance";
|
||||
};
|
||||
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = [
|
||||
"${pkgs.restic}/bin/restic forget --prune ${concatStringsSep " " resticCfg.pruneOpts}"
|
||||
"${pkgs.restic}/bin/restic check --read-data-subset=500M"
|
||||
];
|
||||
|
||||
User = "root";
|
||||
CacheDirectory = "restic-maintenance";
|
||||
CacheDirectoryMode = "0700";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Maintenance timer
|
||||
systemd.timers = mkIf resticCfg.runMaintenance {
|
||||
restic-maintenance = {
|
||||
description = "Timer for restic repository maintenance";
|
||||
wantedBy = ["timers.target"];
|
||||
timerConfig = resticCfg.maintenanceTimer;
|
||||
};
|
||||
};
|
||||
|
||||
# Helpful shell aliases
|
||||
programs.zsh.shellAliases =
|
||||
{
|
||||
restic-snapshots = "restic snapshots --compact --group-by tags";
|
||||
restic-repo-size = "restic stats --mode raw-data";
|
||||
}
|
||||
// (mapAttrs' (
|
||||
name: _:
|
||||
nameValuePair "backup-${name}" "systemctl start restic-backups-${name}"
|
||||
)
|
||||
resticBackups);
|
||||
};
|
||||
}
|
||||
66
modules/nixos/system/backups/root.nix
Normal file
66
modules/nixos/system/backups/root.nix
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
# root.nix - Main backup system module
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.system.backups;
|
||||
|
||||
# Filter backups by backend
|
||||
getBackupsByBackend = backend:
|
||||
filterAttrs (_: backup: backup.backend == backend && backup.enable) cfg.backups;
|
||||
in {
|
||||
options.system.backups = {
|
||||
# Backend registration system - backends register themselves here
|
||||
backends = mkOption {
|
||||
type = with types; attrsOf (functionTo attrs);
|
||||
internal = true;
|
||||
default = {};
|
||||
description = ''
|
||||
Attribute set of backends where the value is a function that accepts
|
||||
backend-specific arguments and returns an attribute set for the backend's options.
|
||||
'';
|
||||
};
|
||||
|
||||
# Import the backups option from separate file, passing cfg for backend inference
|
||||
backups = import ./backups-option.nix cfg;
|
||||
|
||||
# Pass lib to the backups-option for access to mkOption, types, etc.
|
||||
lib = mkOption {
|
||||
type = types.attrs;
|
||||
internal = true;
|
||||
default = lib;
|
||||
};
|
||||
};
|
||||
|
||||
config = {
|
||||
# Re-export backups at root level for convenience
|
||||
# backups = cfg.backups;
|
||||
|
||||
# Common backup packages
|
||||
environment.systemPackages = with pkgs; [
|
||||
# Add common backup utilities here
|
||||
];
|
||||
|
||||
# Common systemd service modifications for all backup services
|
||||
systemd.services = let
|
||||
allBackupServices = flatten (
|
||||
mapAttrsToList (
|
||||
backendName: backups:
|
||||
mapAttrsToList (name: backup: "${backendName}-backups-${name}") backups
|
||||
) (genAttrs (attrNames cfg.backends) (backend: getBackupsByBackend backend))
|
||||
);
|
||||
in
|
||||
genAttrs allBackupServices (serviceName: {
|
||||
serviceConfig = {
|
||||
# Common hardening for all backup services
|
||||
ProtectSystem = "strict";
|
||||
ProtectHome = "read-only";
|
||||
PrivateTmp = true;
|
||||
NoNewPrivileges = true;
|
||||
};
|
||||
});
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue