another refactor partly done
This commit is contained in:
parent
3362c47211
commit
a955528e44
31 changed files with 3790 additions and 1930 deletions
|
|
@ -1,116 +0,0 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.backups;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Get all defined backend names dynamically
|
||||
backendNames = attrNames cfg.backends or {};
|
||||
|
||||
backupJobType = types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = "Name of the backup job";
|
||||
};
|
||||
backend = mkOption {
|
||||
type = types.enum backendNames;
|
||||
description = "Backend to use for this backup job";
|
||||
};
|
||||
backendOptions = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Backend-specific options to override or extend the backend configuration";
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
description = "Additional labels for this backup job";
|
||||
};
|
||||
};
|
||||
};
|
||||
in {
|
||||
imports = [
|
||||
./backup/restic.nix
|
||||
# ./backup/borgbackup.nix
|
||||
];
|
||||
|
||||
options.homelab.backups = {
|
||||
enable = mkEnableOption "Homelab backup system";
|
||||
|
||||
jobs = mkOption {
|
||||
type = types.listOf backupJobType;
|
||||
default = [];
|
||||
description = "Backup jobs to execute on this system";
|
||||
};
|
||||
|
||||
defaultLabels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {
|
||||
hostname = homelabCfg.hostname;
|
||||
environment = homelabCfg.environment;
|
||||
location = homelabCfg.location;
|
||||
};
|
||||
description = "Default labels applied to all backup jobs";
|
||||
};
|
||||
|
||||
monitoring = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable backup monitoring and metrics";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Validate that all job backends exist
|
||||
assertions = [
|
||||
{
|
||||
assertion = all (job: cfg.backends.${job.backend} != null) cfg.jobs;
|
||||
message = "All backup jobs must reference backends that are defined and not null in homelab.backups.backends";
|
||||
}
|
||||
];
|
||||
|
||||
# Add backup jobs to monitoring endpoints if monitoring is enabled
|
||||
# homelab.monitoring.endpoints =
|
||||
# mkIf (cfg.monitoring && config.homelab.monitoring.enable)
|
||||
# (map (job: {
|
||||
# name = "backup-${job.name}";
|
||||
# port = 9100; # Assuming node exporter collects backup metrics
|
||||
# path = "/metrics";
|
||||
# jobName = "backup";
|
||||
# labels =
|
||||
# cfg.defaultLabels
|
||||
# // job.labels
|
||||
# // {
|
||||
# backup_job = job.name;
|
||||
# backup_backend = job.backend;
|
||||
# };
|
||||
# })
|
||||
# cfg.jobs);
|
||||
|
||||
# Export backup configuration for external consumption
|
||||
environment.etc."homelab/backup-config.json".text = builtins.toJSON {
|
||||
backends =
|
||||
mapAttrs (name: config: {
|
||||
inherit name;
|
||||
enabled = config.enable or false;
|
||||
})
|
||||
cfg.backends;
|
||||
|
||||
jobs =
|
||||
map (job: {
|
||||
inherit (job) name backend labels;
|
||||
allLabels = cfg.defaultLabels // job.labels;
|
||||
paths = job.backendOptions.paths or [];
|
||||
schedule = job.backendOptions.timerConfig.OnCalendar or job.backendOptions.startAt or "unknown";
|
||||
node = homelabCfg.hostname;
|
||||
environment = homelabCfg.environment;
|
||||
location = homelabCfg.location;
|
||||
})
|
||||
cfg.jobs;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
nodes,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
|
|
@ -9,18 +10,13 @@ with lib; let
|
|||
nodeAgg = import ./lib/node-aggregation.nix {inherit lib;};
|
||||
in {
|
||||
imports = [
|
||||
./monitoring-config.nix
|
||||
./proxy-config.nix
|
||||
./backup-config.nix
|
||||
./motd
|
||||
./lib/systems/monitoring.nix
|
||||
./lib/systems/logging.nix
|
||||
./lib/systems/proxy.nix
|
||||
./lib/systems/backups.nix
|
||||
|
||||
./services
|
||||
|
||||
# Global aggregation modules
|
||||
(nodeAgg.mkGlobalModule "monitoring" nodeAgg.aggregators.monitoring)
|
||||
# (nodeAgg.mkGlobalModule "logs" nodeAgg.aggregators.logs)
|
||||
(nodeAgg.mkGlobalModule "reverseProxy" nodeAgg.aggregators.reverseProxy)
|
||||
(nodeAgg.mkGlobalModule "backups" nodeAgg.aggregators.backups)
|
||||
./motd
|
||||
];
|
||||
|
||||
options.homelab = {
|
||||
|
|
@ -61,73 +57,73 @@ in {
|
|||
networking.hostName = cfg.hostname;
|
||||
|
||||
# Export configuration for external consumption
|
||||
environment.etc."homelab/config.json".text = builtins.toJSON {
|
||||
inherit (cfg) hostname domain environment location tags;
|
||||
# environment.etc."homelab/config.json".text = builtins.toJSON {
|
||||
# inherit (cfg) hostname domain environment location tags;
|
||||
|
||||
monitoring = {
|
||||
# Metrics endpoints (Prometheus, etc.)
|
||||
metrics =
|
||||
map (endpoint: {
|
||||
inherit (endpoint) name host port path jobName scrapeInterval labels;
|
||||
url = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}";
|
||||
})
|
||||
cfg.global.monitoring.allMetrics or [];
|
||||
# monitoring = {
|
||||
# # Metrics endpoints (Prometheus, etc.)
|
||||
# metrics =
|
||||
# map (endpoint: {
|
||||
# inherit (endpoint) name host port path jobName scrapeInterval labels;
|
||||
# url = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}";
|
||||
# })
|
||||
# cfg.global.monitoring.allMetrics or [];
|
||||
|
||||
# Health check endpoints
|
||||
healthChecks =
|
||||
map (check: let
|
||||
# Determine the host based on useExternalDomain
|
||||
actualHost =
|
||||
if check.useExternalDomain
|
||||
then "${check.subdomain}.${cfg.externalDomain}"
|
||||
else check.host;
|
||||
# # Health check endpoints
|
||||
# healthChecks =
|
||||
# map (check: let
|
||||
# # Determine the host based on useExternalDomain
|
||||
# actualHost =
|
||||
# if check.useExternalDomain
|
||||
# then "${check.subdomain}.${cfg.externalDomain}"
|
||||
# else check.host;
|
||||
|
||||
# Build the URL
|
||||
portPart =
|
||||
if check.port != null
|
||||
then ":${toString check.port}"
|
||||
else "";
|
||||
url = "${check.protocol}://${actualHost}${portPart}${check.path}";
|
||||
in {
|
||||
inherit (check) name protocol method interval timeout conditions alerts group labels enabled;
|
||||
host = actualHost;
|
||||
port = check.port;
|
||||
path = check.path;
|
||||
url = url;
|
||||
useExternalDomain = check.useExternalDomain;
|
||||
subdomain = check.subdomain;
|
||||
sourceNode = cfg.hostname;
|
||||
})
|
||||
cfg.global.monitoring.allHealthChecks or [];
|
||||
};
|
||||
# # Build the URL
|
||||
# portPart =
|
||||
# if check.port != null
|
||||
# then ":${toString check.port}"
|
||||
# else "";
|
||||
# url = "${check.protocol}://${actualHost}${portPart}${check.path}";
|
||||
# in {
|
||||
# inherit (check) name protocol method interval timeout conditions alerts group labels enabled;
|
||||
# host = actualHost;
|
||||
# port = check.port;
|
||||
# path = check.path;
|
||||
# url = url;
|
||||
# useExternalDomain = check.useExternalDomain;
|
||||
# subdomain = check.subdomain;
|
||||
# sourceNode = cfg.hostname;
|
||||
# })
|
||||
# cfg.global.monitoring.allHealthChecks or [];
|
||||
# };
|
||||
|
||||
reverseProxy = {
|
||||
entries =
|
||||
map (entry: {
|
||||
inherit (entry) subdomain host port path enableAuth enableSSL;
|
||||
internalHost = "${cfg.hostname}:${toString entry.port}${entry.path}";
|
||||
externalHost = "${entry.subdomain}.${cfg.externalDomain}";
|
||||
})
|
||||
cfg.global.reverseProxy.all;
|
||||
};
|
||||
# reverseProxy = {
|
||||
# entries =
|
||||
# map (entry: {
|
||||
# inherit (entry) subdomain host port path enableAuth enableSSL;
|
||||
# internalHost = "${cfg.hostname}:${toString entry.port}${entry.path}";
|
||||
# externalHost = "${entry.subdomain}.${cfg.externalDomain}";
|
||||
# })
|
||||
# cfg.global.reverseProxy.all;
|
||||
# };
|
||||
|
||||
backups = {
|
||||
jobs =
|
||||
map (job: {
|
||||
inherit (job) name backend labels;
|
||||
backupId = job._backupId;
|
||||
sourceNode = job._sourceNode;
|
||||
})
|
||||
cfg.global.backups.all;
|
||||
# backups = {
|
||||
# jobs =
|
||||
# map (job: {
|
||||
# inherit (job) name backend labels;
|
||||
# backupId = job._backupId;
|
||||
# sourceNode = job._sourceNode;
|
||||
# })
|
||||
# cfg.global.backups.all;
|
||||
|
||||
backends = cfg.global.backups.allBackends;
|
||||
# backends = cfg.global.backups.allBackends;
|
||||
|
||||
summary = {
|
||||
totalJobs = length cfg.global.backups.all;
|
||||
jobsByBackend = mapAttrs (backend: jobs: length jobs) cfg.global.backups.byBackend;
|
||||
jobsByNode = mapAttrs (node: jobs: length jobs) cfg.global.backups.byNode;
|
||||
};
|
||||
};
|
||||
};
|
||||
# summary = {
|
||||
# totalJobs = length cfg.global.backups.all;
|
||||
# jobsByBackend = mapAttrs (backend: jobs: length jobs) cfg.global.backups.byBackend;
|
||||
# jobsByNode = mapAttrs (node: jobs: length jobs) cfg.global.backups.byNode;
|
||||
# };
|
||||
# };
|
||||
# };
|
||||
};
|
||||
}
|
||||
|
|
|
|||
55
modules/homelab/lib/aggregators/base.nix
Normal file
55
modules/homelab/lib/aggregators/base.nix
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
{lib}: let
|
||||
inherit (lib) flatten mapAttrs attrValues filterAttrs mapAttrsToList filter groupBy length unique attrByPath splitString;
|
||||
|
||||
# Generic function to aggregate any attribute across nodes
|
||||
aggregateFromNodes = {
|
||||
nodes,
|
||||
attributePath, # e.g. "homelab.monitoring.metrics" or "homelab.backups.jobs"
|
||||
enhancer ? null, # optional function to enhance each item with node context
|
||||
}: let
|
||||
# Extract the attribute from each node using the path
|
||||
getNestedAttr = path: config: let
|
||||
pathList = splitString "." path;
|
||||
in
|
||||
attrByPath pathList [] config;
|
||||
|
||||
# Get all items from all nodes
|
||||
allItems = flatten (mapAttrsToList
|
||||
(nodeName: nodeConfig: let
|
||||
items = getNestedAttr attributePath nodeConfig.config;
|
||||
baseEnhancer = item:
|
||||
item
|
||||
// {
|
||||
_nodeName = nodeName;
|
||||
_nodeConfig = nodeConfig;
|
||||
_nodeAddress = nodeConfig.config.networking.hostName or nodeName;
|
||||
};
|
||||
finalEnhancer =
|
||||
if enhancer != null
|
||||
then (item: enhancer (baseEnhancer item))
|
||||
else baseEnhancer;
|
||||
in
|
||||
map finalEnhancer items)
|
||||
nodes);
|
||||
in {
|
||||
# Raw aggregated data
|
||||
all = allItems;
|
||||
|
||||
# Common grouping patterns
|
||||
byNode = groupBy (item: item._nodeName) allItems;
|
||||
byType = groupBy (item: item.type or "unknown") allItems;
|
||||
byService = groupBy (item: item.service or "unknown") allItems;
|
||||
|
||||
# Utility functions for filtering
|
||||
filterBy = predicate: filter predicate allItems;
|
||||
ofType = type: filter (item: (item.type or "") == type) allItems;
|
||||
ofNode = nodeName: filter (item: item._nodeName == nodeName) allItems;
|
||||
enabled = filter (item: item.enabled or true) allItems;
|
||||
|
||||
# Counting utilities
|
||||
count = length allItems;
|
||||
countBy = fn: mapAttrs (key: items: length items) (groupBy fn allItems);
|
||||
};
|
||||
in {
|
||||
inherit aggregateFromNodes;
|
||||
}
|
||||
87
modules/homelab/lib/features/logging.nix
Normal file
87
modules/homelab/lib/features/logging.nix
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
serviceName: {
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.${serviceName};
|
||||
homelabCfg = config.homelab;
|
||||
in {
|
||||
options.homelab.services.${serviceName}.logging = {
|
||||
enable = mkEnableOption "logging for ${serviceName}";
|
||||
|
||||
files = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
};
|
||||
|
||||
parsing = {
|
||||
regex = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
};
|
||||
|
||||
extractFields = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
};
|
||||
};
|
||||
|
||||
multiline = mkOption {
|
||||
type = types.nullOr (types.submodule {
|
||||
options = {
|
||||
firstLineRegex = mkOption {type = types.str;};
|
||||
maxWaitTime = mkOption {
|
||||
type = types.str;
|
||||
default = "3s";
|
||||
};
|
||||
};
|
||||
});
|
||||
default = null;
|
||||
};
|
||||
|
||||
extraLabels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
};
|
||||
|
||||
extraSources = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf (cfg.enable && cfg.logging.enable) {
|
||||
homelab.logging.sources =
|
||||
[
|
||||
{
|
||||
name = "${serviceName}-logs";
|
||||
type = "file";
|
||||
files = {
|
||||
paths = cfg.logging.files;
|
||||
multiline = cfg.logging.multiline;
|
||||
};
|
||||
labels =
|
||||
cfg.logging.extraLabels
|
||||
// {
|
||||
service = serviceName;
|
||||
node = homelabCfg.hostname;
|
||||
environment = homelabCfg.environment;
|
||||
};
|
||||
pipelineStages =
|
||||
mkIf (cfg.logging.parsing.regex != null) [
|
||||
{
|
||||
regex.expression = cfg.logging.parsing.regex;
|
||||
}
|
||||
]
|
||||
++ [
|
||||
{
|
||||
labels = listToAttrs (map (field: nameValuePair field null) cfg.logging.parsing.extractFields);
|
||||
}
|
||||
];
|
||||
enabled = true;
|
||||
}
|
||||
]
|
||||
++ cfg.logging.extraSources;
|
||||
};
|
||||
}
|
||||
108
modules/homelab/lib/features/monitoring.nix
Normal file
108
modules/homelab/lib/features/monitoring.nix
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
serviceName: {
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.${serviceName};
|
||||
homelabCfg = config.homelab;
|
||||
in {
|
||||
# Define the service-specific monitoring options
|
||||
options.homelab.services.${serviceName}.monitoring = {
|
||||
enable = mkEnableOption "monitoring for ${serviceName}";
|
||||
|
||||
metrics = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
};
|
||||
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/metrics";
|
||||
};
|
||||
|
||||
extraEndpoints = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
};
|
||||
};
|
||||
|
||||
healthCheck = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
};
|
||||
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/health";
|
||||
};
|
||||
|
||||
conditions = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = ["[STATUS] == 200"];
|
||||
};
|
||||
|
||||
extraChecks = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
};
|
||||
};
|
||||
|
||||
extraLabels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
};
|
||||
};
|
||||
|
||||
# Generate the homelab config automatically when service is enabled
|
||||
config = mkIf (cfg.enable && cfg.monitoring.enable) {
|
||||
homelab.monitoring = {
|
||||
metrics =
|
||||
[
|
||||
{
|
||||
name = "${serviceName}-main";
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
path = cfg.monitoring.metrics.path;
|
||||
jobName = serviceName;
|
||||
scrapeInterval = "30s";
|
||||
labels =
|
||||
cfg.monitoring.extraLabels
|
||||
// {
|
||||
service = serviceName;
|
||||
node = homelabCfg.hostname;
|
||||
environment = homelabCfg.environment;
|
||||
};
|
||||
}
|
||||
]
|
||||
++ cfg.monitoring.metrics.extraEndpoints;
|
||||
|
||||
healthChecks =
|
||||
[
|
||||
{
|
||||
name = "${serviceName}-health";
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
path = cfg.monitoring.healthCheck.path;
|
||||
protocol = "http";
|
||||
method = "GET";
|
||||
interval = "30s";
|
||||
timeout = "10s";
|
||||
conditions = cfg.monitoring.healthCheck.conditions;
|
||||
group = "services";
|
||||
labels =
|
||||
cfg.monitoring.extraLabels
|
||||
// {
|
||||
service = serviceName;
|
||||
node = homelabCfg.hostname;
|
||||
environment = homelabCfg.environment;
|
||||
};
|
||||
enabled = true;
|
||||
}
|
||||
]
|
||||
++ cfg.monitoring.healthCheck.extraChecks;
|
||||
};
|
||||
};
|
||||
}
|
||||
64
modules/homelab/lib/features/proxy.nix
Normal file
64
modules/homelab/lib/features/proxy.nix
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
serviceName: {
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.${serviceName};
|
||||
homelabCfg = config.homelab;
|
||||
in {
|
||||
options.homelab.services.${serviceName}.proxy = {
|
||||
enable = mkEnableOption "reverse proxy for ${serviceName}";
|
||||
|
||||
subdomain = mkOption {
|
||||
type = types.str;
|
||||
default = serviceName;
|
||||
};
|
||||
|
||||
enableAuth = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
};
|
||||
|
||||
additionalSubdomains = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
subdomain = mkOption {type = types.str;};
|
||||
port = mkOption {type = types.port;};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/";
|
||||
};
|
||||
enableAuth = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
};
|
||||
};
|
||||
});
|
||||
default = [];
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf (cfg.enable && cfg.proxy.enable) {
|
||||
homelab.reverseProxy.entries =
|
||||
[
|
||||
{
|
||||
subdomain = cfg.proxy.subdomain;
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
path = "/";
|
||||
enableAuth = cfg.proxy.enableAuth;
|
||||
enableSSL = true;
|
||||
}
|
||||
]
|
||||
++ map (sub: {
|
||||
subdomain = sub.subdomain;
|
||||
host = homelabCfg.hostname;
|
||||
port = sub.port;
|
||||
path = sub.path;
|
||||
enableAuth = sub.enableAuth;
|
||||
enableSSL = true;
|
||||
})
|
||||
cfg.proxy.additionalSubdomains;
|
||||
};
|
||||
}
|
||||
|
|
@ -1,226 +0,0 @@
|
|||
{lib}: let
|
||||
inherit (lib) flatten mapAttrs mapAttrsToList filter groupBy length unique attrByPath splitString;
|
||||
|
||||
# Generic function to aggregate any attribute across nodes
|
||||
aggregateFromNodes = {
|
||||
nodes,
|
||||
attributePath, # e.g. "homelab.monitoring.endpoints" or "homelab.backups.jobs"
|
||||
enhancer ? null, # optional function to enhance each item with node context
|
||||
}: let
|
||||
# Extract the attribute from each node using the path
|
||||
getNestedAttr = path: config: let
|
||||
pathList = splitString "." path;
|
||||
in
|
||||
attrByPath pathList [] config;
|
||||
|
||||
# Get all items from all nodes
|
||||
allItems = flatten (mapAttrsToList
|
||||
(nodeName: nodeConfig: let
|
||||
items = getNestedAttr attributePath nodeConfig.config;
|
||||
baseEnhancer = item:
|
||||
item
|
||||
// {
|
||||
_nodeName = nodeName;
|
||||
_nodeConfig = nodeConfig;
|
||||
_nodeAddress = nodeConfig.config.networking.hostName or nodeName;
|
||||
};
|
||||
finalEnhancer =
|
||||
if enhancer != null
|
||||
then (item: enhancer (baseEnhancer item))
|
||||
else baseEnhancer;
|
||||
in
|
||||
map finalEnhancer items)
|
||||
nodes);
|
||||
in {
|
||||
# Raw aggregated data
|
||||
all = allItems;
|
||||
|
||||
# Common grouping patterns
|
||||
byNode = groupBy (item: item._nodeName) allItems;
|
||||
byType = groupBy (item: item.type or "unknown") allItems;
|
||||
byService = groupBy (item: item.service or "unknown") allItems;
|
||||
|
||||
# Utility functions for filtering
|
||||
filterBy = predicate: filter predicate allItems;
|
||||
ofType = type: filter (item: (item.type or "") == type) allItems;
|
||||
|
||||
count = length allItems;
|
||||
countBy = fn: mapAttrs (key: items: length items) (groupBy fn allItems);
|
||||
};
|
||||
|
||||
# Specialized aggregators for common use cases
|
||||
aggregators = {
|
||||
monitoring = nodes: let
|
||||
# Aggregate metrics endpoints
|
||||
metricsAgg = aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.monitoring.metrics";
|
||||
enhancer = endpoint:
|
||||
endpoint
|
||||
// {
|
||||
_fullAddress = "${endpoint.host or endpoint._nodeAddress}:${toString endpoint.port}";
|
||||
_metricsUrl = "http://${endpoint.host or endpoint._nodeAddress}:${toString endpoint.port}${endpoint.path or "/metrics"}";
|
||||
_type = "metrics";
|
||||
};
|
||||
};
|
||||
# Aggregate health checks
|
||||
healthChecksAgg = aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.monitoring.healthChecks";
|
||||
enhancer = check: let
|
||||
# Compute the actual host and URL
|
||||
actualHost =
|
||||
if check.useExternalDomain or false
|
||||
then "${check.subdomain}.${check._nodeConfig.config.homelab.externalDomain or "example.com"}"
|
||||
else check.host or check._nodeAddress;
|
||||
portPart =
|
||||
if check.port != null
|
||||
then ":${toString check.port}"
|
||||
else "";
|
||||
url = "${check.protocol or "http"}://${actualHost}${portPart}${check.path or "/"}";
|
||||
in
|
||||
check
|
||||
// {
|
||||
_actualHost = actualHost;
|
||||
_url = url;
|
||||
_type = "health-check";
|
||||
# Merge default labels with node context
|
||||
labels =
|
||||
(check.labels or {})
|
||||
// {
|
||||
node = check._nodeName;
|
||||
environment = check._nodeConfig.config.homelab.environment or "unknown";
|
||||
};
|
||||
};
|
||||
};
|
||||
in
|
||||
metricsAgg
|
||||
// healthChecksAgg
|
||||
// {
|
||||
# Metrics-specific aggregations
|
||||
allMetrics = metricsAgg.all;
|
||||
metricsByNode = metricsAgg.byNode;
|
||||
metricsByJobName = groupBy (m: m.jobName or "unknown") metricsAgg.all;
|
||||
|
||||
# Health checks-specific aggregations
|
||||
allHealthChecks = healthChecksAgg.all;
|
||||
healthChecksByNode = healthChecksAgg.byNode;
|
||||
healthChecksByGroup = groupBy (hc: hc.group or "default") healthChecksAgg.all;
|
||||
healthChecksByProtocol = groupBy (hc: hc.protocol or "http") healthChecksAgg.all;
|
||||
|
||||
# Filtered health checks
|
||||
externalHealthChecks = filter (hc: hc.useExternalDomain or false) healthChecksAgg.all;
|
||||
internalHealthChecks = filter (hc: !(hc.useExternalDomain or false)) healthChecksAgg.all;
|
||||
enabledHealthChecks = filter (hc: hc.enabled or true) healthChecksAgg.all;
|
||||
|
||||
# Summary statistics
|
||||
summary = {
|
||||
totalMetrics = length metricsAgg.all;
|
||||
totalHealthChecks = length healthChecksAgg.all;
|
||||
healthChecksByGroup =
|
||||
mapAttrs (group: checks: length checks)
|
||||
(groupBy (hc: hc.group or "default") healthChecksAgg.all);
|
||||
healthChecksByProtocol =
|
||||
mapAttrs (protocol: checks: length checks)
|
||||
(groupBy (hc: hc.protocol or "http") healthChecksAgg.all);
|
||||
externalChecksCount = length (filter (hc: hc.useExternalDomain or false) healthChecksAgg.all);
|
||||
internalChecksCount = length (filter (hc: !(hc.useExternalDomain or false)) healthChecksAgg.all);
|
||||
};
|
||||
};
|
||||
|
||||
# Promtail log configurations
|
||||
# logs = nodes:
|
||||
# aggregateFromNodes {
|
||||
# inherit nodes;
|
||||
# attributePath = "homelab.logging.sources";
|
||||
# enhancer = logSource:
|
||||
# logSource
|
||||
# // {
|
||||
# # Add log-specific computed fields
|
||||
# _logPath = logSource.path or "/var/log/${logSource.service}.log";
|
||||
# _labels =
|
||||
# (logSource.labels or {})
|
||||
# // {
|
||||
# node = logSource._nodeName;
|
||||
# service = logSource.service or "unknown";
|
||||
# };
|
||||
# };
|
||||
# };
|
||||
|
||||
# Reverse proxy configurations
|
||||
reverseProxy = nodes:
|
||||
aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.reverseProxy.entries";
|
||||
enhancer = entry:
|
||||
entry
|
||||
// {
|
||||
# Add proxy-specific computed fields
|
||||
_upstream = "http://${entry.host or entry._nodeAddress}:${toString entry.port}";
|
||||
_fqdn = "${entry.subdomain or entry.service}.${entry.domain or "local"}";
|
||||
};
|
||||
};
|
||||
|
||||
# Backup jobs with enhanced aggregation
|
||||
backups = nodes: let
|
||||
baseAgg = aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.backups.jobs";
|
||||
enhancer = backup:
|
||||
backup
|
||||
// {
|
||||
_sourceNode = backup._nodeName;
|
||||
_backupId = "${backup._nodeName}-${backup.name}";
|
||||
_jobFqdn = "${backup.name}.${backup._nodeName}";
|
||||
};
|
||||
};
|
||||
|
||||
# Get all unique backends across all nodes
|
||||
allBackends = let
|
||||
allBackendConfigs =
|
||||
mapAttrsToList
|
||||
(nodeName: nodeConfig:
|
||||
attrByPath ["homelab" "backups" "backends"] {} nodeConfig.config)
|
||||
nodes;
|
||||
enabledBackends = flatten (map (backends:
|
||||
filter (name: backends.${name} != null) (lib.attrNames backends))
|
||||
allBackendConfigs);
|
||||
in
|
||||
unique enabledBackends;
|
||||
in
|
||||
baseAgg
|
||||
// {
|
||||
# Backup-specific aggregations
|
||||
byBackend = groupBy (job: job.backend) baseAgg.all;
|
||||
allBackends = allBackends;
|
||||
|
||||
# Enhanced summary
|
||||
summary = {
|
||||
totalJobs = length baseAgg.all;
|
||||
jobsByBackend =
|
||||
mapAttrs (backend: jobs: length jobs)
|
||||
(groupBy (job: job.backend) baseAgg.all);
|
||||
jobsByNode = baseAgg.countBy (job: job._nodeName);
|
||||
availableBackends = allBackends;
|
||||
backendsInUse = unique (map (job: job.backend) baseAgg.all);
|
||||
};
|
||||
};
|
||||
};
|
||||
in {
|
||||
inherit aggregateFromNodes aggregators;
|
||||
|
||||
# Convenience function to create a module that provides global aggregations
|
||||
mkGlobalModule = attributeName: aggregatorFn: {
|
||||
lib,
|
||||
nodes,
|
||||
...
|
||||
}: {
|
||||
options.homelab.global.${attributeName} = lib.mkOption {
|
||||
type = lib.types.attrs;
|
||||
readOnly = true;
|
||||
description = "Globally aggregated ${attributeName} from all nodes";
|
||||
};
|
||||
|
||||
config.homelab.global.${attributeName} = aggregatorFn nodes;
|
||||
};
|
||||
}
|
||||
|
|
@ -1,295 +0,0 @@
|
|||
# Standard service interface for homelab services
|
||||
# This provides a consistent contract that all services should follow
|
||||
{lib}: let
|
||||
inherit (lib) mkOption mkEnableOption types;
|
||||
|
||||
# Define the standard service interface
|
||||
mkServiceInterface = {
|
||||
serviceName,
|
||||
defaultPort ? null,
|
||||
defaultSubdomain ? serviceName,
|
||||
defaultDescription ? "Homelab ${serviceName} service",
|
||||
monitoringPath ? "/metrics",
|
||||
healthCheckPath ? "/health",
|
||||
healthCheckConditions ? ["[STATUS] == 200"],
|
||||
# Custom options that the service wants to expose
|
||||
serviceOptions ? {},
|
||||
}:
|
||||
{
|
||||
# Standard interface options that all services must have
|
||||
enable = mkEnableOption defaultDescription;
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default =
|
||||
if defaultPort != null
|
||||
then defaultPort
|
||||
else throw "Service ${serviceName} must specify a default port";
|
||||
description = "Port for ${serviceName} service";
|
||||
};
|
||||
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether to automatically open firewall ports";
|
||||
};
|
||||
|
||||
proxy = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable reverse proxy for this service";
|
||||
};
|
||||
|
||||
subdomain = mkOption {
|
||||
type = types.str;
|
||||
default = defaultSubdomain;
|
||||
description = "Subdomain for reverse proxy (${defaultSubdomain}.yourdomain.com)";
|
||||
};
|
||||
|
||||
enableAuth = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable authentication for reverse proxy";
|
||||
};
|
||||
|
||||
enableSSL = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable SSL for reverse proxy";
|
||||
};
|
||||
};
|
||||
|
||||
monitoring = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable monitoring (metrics and health checks)";
|
||||
};
|
||||
|
||||
metricsPath = mkOption {
|
||||
type = types.str;
|
||||
default = monitoringPath;
|
||||
description = "Path for metrics endpoint";
|
||||
};
|
||||
|
||||
jobName = mkOption {
|
||||
type = types.str;
|
||||
default = serviceName;
|
||||
description = "Prometheus job name";
|
||||
};
|
||||
|
||||
scrapeInterval = mkOption {
|
||||
type = types.str;
|
||||
default = "30s";
|
||||
description = "Prometheus scrape interval";
|
||||
};
|
||||
|
||||
healthCheck = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable health check monitoring";
|
||||
};
|
||||
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = healthCheckPath;
|
||||
description = "Path for health check endpoint";
|
||||
};
|
||||
|
||||
interval = mkOption {
|
||||
type = types.str;
|
||||
default = "30s";
|
||||
description = "Health check interval";
|
||||
};
|
||||
|
||||
timeout = mkOption {
|
||||
type = types.str;
|
||||
default = "10s";
|
||||
description = "Health check timeout";
|
||||
};
|
||||
|
||||
conditions = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = healthCheckConditions;
|
||||
description = "Health check conditions";
|
||||
};
|
||||
|
||||
group = mkOption {
|
||||
type = types.str;
|
||||
default = "services";
|
||||
description = "Health check group name";
|
||||
};
|
||||
};
|
||||
|
||||
extraLabels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
description = "Additional labels for monitoring";
|
||||
};
|
||||
};
|
||||
|
||||
description = mkOption {
|
||||
type = types.str;
|
||||
default = defaultDescription;
|
||||
description = "Service description";
|
||||
};
|
||||
|
||||
extraOptions = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional service-specific configuration options";
|
||||
};
|
||||
|
||||
# Merge in service-specific options
|
||||
}
|
||||
// serviceOptions;
|
||||
|
||||
# Helper function to implement the standard service behavior
|
||||
mkServiceConfig = {
|
||||
config,
|
||||
cfg,
|
||||
homelabCfg,
|
||||
serviceName,
|
||||
# Function that returns the actual service configuration
|
||||
serviceConfig,
|
||||
# Optional: custom monitoring labels
|
||||
extraMonitoringLabels ? {},
|
||||
# Optional: custom health check configuration
|
||||
customHealthChecks ? [],
|
||||
# Optional: custom reverse proxy configuration
|
||||
customProxyConfig ? {},
|
||||
}: let
|
||||
# Standard monitoring labels
|
||||
standardLabels =
|
||||
{
|
||||
service = serviceName;
|
||||
component = "main";
|
||||
instance = "${homelabCfg.hostname}.${homelabCfg.domain}";
|
||||
}
|
||||
// extraMonitoringLabels // cfg.monitoring.extraLabels;
|
||||
|
||||
# Standard reverse proxy entry
|
||||
standardProxyEntry =
|
||||
{
|
||||
subdomain = cfg.proxy.subdomain;
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
enableAuth = cfg.proxy.enableAuth;
|
||||
enableSSL = cfg.proxy.enableSSL;
|
||||
}
|
||||
// customProxyConfig;
|
||||
|
||||
# Standard metrics configuration
|
||||
standardMetrics = lib.optional cfg.monitoring.enable {
|
||||
name = "${serviceName}-metrics";
|
||||
port = cfg.port;
|
||||
path = cfg.monitoring.metricsPath;
|
||||
jobName = cfg.monitoring.jobName;
|
||||
scrapeInterval = cfg.monitoring.scrapeInterval;
|
||||
labels = standardLabels;
|
||||
};
|
||||
|
||||
# Standard health check configuration
|
||||
standardHealthCheck = lib.optional (cfg.monitoring.enable && cfg.monitoring.healthCheck.enable) {
|
||||
name = "${serviceName}-health";
|
||||
port = cfg.port;
|
||||
path = cfg.monitoring.healthCheck.path;
|
||||
interval = cfg.monitoring.healthCheck.interval;
|
||||
timeout = cfg.monitoring.healthCheck.timeout;
|
||||
conditions = cfg.monitoring.healthCheck.conditions;
|
||||
group = cfg.monitoring.healthCheck.group;
|
||||
labels = standardLabels;
|
||||
};
|
||||
|
||||
# Merge service config with standard behaviors
|
||||
baseConfig = lib.mkMerge [
|
||||
# Service-specific configuration
|
||||
serviceConfig
|
||||
|
||||
# Standard firewall configuration
|
||||
(lib.mkIf cfg.openFirewall {
|
||||
networking.firewall.allowedTCPPorts = [cfg.port];
|
||||
})
|
||||
|
||||
# Standard monitoring configuration
|
||||
(lib.mkIf cfg.monitoring.enable {
|
||||
homelab.monitoring.metrics = standardMetrics;
|
||||
homelab.monitoring.healthChecks = standardHealthCheck ++ customHealthChecks;
|
||||
})
|
||||
|
||||
# Standard reverse proxy configuration
|
||||
(lib.mkIf cfg.proxy.enable {
|
||||
homelab.reverseProxy.entries = [standardProxyEntry];
|
||||
})
|
||||
];
|
||||
in
|
||||
lib.mkIf cfg.enable baseConfig;
|
||||
|
||||
# Validation helper to ensure required options are set
|
||||
validateServiceConfig = cfg: serviceName: [
|
||||
# Validate that if proxy is enabled, subdomain is set
|
||||
(lib.mkIf (cfg.proxy.enable && cfg.proxy.subdomain == "")
|
||||
(throw "Service ${serviceName}: proxy.subdomain is required when proxy.enable is true"))
|
||||
|
||||
# Validate that if monitoring is enabled, required paths are set
|
||||
(lib.mkIf (cfg.monitoring.enable && cfg.monitoring.metricsPath == "")
|
||||
(throw "Service ${serviceName}: monitoring.metricsPath cannot be empty when monitoring is enabled"))
|
||||
];
|
||||
in {
|
||||
inherit mkServiceInterface mkServiceConfig validateServiceConfig;
|
||||
|
||||
# Common service option patterns
|
||||
commonOptions = {
|
||||
# Log level option
|
||||
logLevel = mkOption {
|
||||
type = types.enum ["debug" "info" "warn" "error"];
|
||||
default = "info";
|
||||
description = "Log level";
|
||||
};
|
||||
|
||||
# Environment file option (for secrets)
|
||||
environmentFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Environment file for secrets";
|
||||
};
|
||||
|
||||
# External URL option
|
||||
externalUrl = serviceName: homelabCfg:
|
||||
mkOption {
|
||||
type = types.str;
|
||||
default = "https://${serviceName}.${homelabCfg.externalDomain}";
|
||||
description = "External URL for ${serviceName}";
|
||||
};
|
||||
};
|
||||
|
||||
# Helper for creating service modules with the interface
|
||||
mkServiceModule = {
|
||||
serviceName,
|
||||
defaultPort,
|
||||
defaultSubdomain ? serviceName,
|
||||
serviceOptions ? {},
|
||||
...
|
||||
} @ args: {
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}: let
|
||||
cfg = config.homelab.services.${serviceName};
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
serviceInterface = mkServiceInterface {
|
||||
inherit serviceName defaultPort defaultSubdomain serviceOptions;
|
||||
};
|
||||
in {
|
||||
options.homelab.services.${serviceName} = serviceInterface;
|
||||
|
||||
config = mkServiceConfig {
|
||||
inherit config cfg homelabCfg serviceName;
|
||||
# Service implementor must provide this function
|
||||
serviceConfig = args.serviceConfig or (throw "mkServiceModule requires serviceConfig function");
|
||||
};
|
||||
};
|
||||
}
|
||||
163
modules/homelab/lib/systems/backups.nix
Normal file
163
modules/homelab/lib/systems/backups.nix
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
nodes,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.backups;
|
||||
homelabCfg = config.homelab;
|
||||
hasNodes = length (attrNames nodes) > 0;
|
||||
|
||||
# Get all defined backend names dynamically
|
||||
backendNames = attrNames cfg.backends or {};
|
||||
|
||||
backupJobType = types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = "Name of the backup job";
|
||||
};
|
||||
backend = mkOption {
|
||||
type = types.enum backendNames;
|
||||
description = "Backend to use for this backup job";
|
||||
};
|
||||
backendOptions = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Backend-specific options to override or extend the backend configuration";
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
description = "Additional labels for this backup job";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Local aggregation
|
||||
localAggregation = {
|
||||
allJobs = cfg.jobs;
|
||||
allBackends = backendNames;
|
||||
};
|
||||
|
||||
# Global aggregation
|
||||
globalAggregation = let
|
||||
baseAgg = import ../aggregators/base.nix {inherit lib;};
|
||||
|
||||
jobsAgg = baseAgg.aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.backups.allJobs";
|
||||
enhancer = job:
|
||||
job
|
||||
// {
|
||||
_sourceNode = job._nodeName;
|
||||
_backupId = "${job._nodeName}-${job.name}";
|
||||
_jobFqdn = "${job.name}.${job._nodeName}";
|
||||
};
|
||||
};
|
||||
|
||||
# Get all backends from all nodes
|
||||
allBackendsFromNodes = let
|
||||
backendConfigs =
|
||||
mapAttrsToList (
|
||||
nodeName: nodeConfig:
|
||||
attrByPath ["homelab" "backups" "backends"] {} nodeConfig.config
|
||||
)
|
||||
nodes;
|
||||
enabledBackends = flatten (map (
|
||||
backends:
|
||||
filter (name: backends.${name} != null) (attrNames backends)
|
||||
)
|
||||
backendConfigs);
|
||||
in
|
||||
unique enabledBackends;
|
||||
in {
|
||||
allJobs = jobsAgg.all;
|
||||
allBackends = allBackendsFromNodes;
|
||||
jobsByBackend = groupBy (j: j.backend) jobsAgg.all;
|
||||
summary = {
|
||||
total = length jobsAgg.all;
|
||||
byBackend = jobsAgg.countBy (j: j.backend);
|
||||
byNode = jobsAgg.countBy (j: j._nodeName);
|
||||
uniqueBackends = unique (map (j: j.backend) jobsAgg.all);
|
||||
};
|
||||
};
|
||||
in {
|
||||
imports = [
|
||||
../../backup/restic.nix
|
||||
# ./backup/borgbackup.nix
|
||||
];
|
||||
|
||||
options.homelab.backups = {
|
||||
enable = mkEnableOption "backup system";
|
||||
|
||||
jobs = mkOption {
|
||||
type = types.listOf backupJobType;
|
||||
default = [];
|
||||
description = "Backup jobs to execute on this system";
|
||||
};
|
||||
|
||||
# Backend configurations (like your existing setup)
|
||||
# backends = mkOption {
|
||||
# type = types.attrs;
|
||||
# default = {};
|
||||
# description = "Backup backend configurations";
|
||||
# };
|
||||
|
||||
defaultLabels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {
|
||||
hostname = homelabCfg.hostname;
|
||||
environment = homelabCfg.environment;
|
||||
location = homelabCfg.location;
|
||||
};
|
||||
description = "Default labels applied to all backup jobs";
|
||||
};
|
||||
|
||||
monitoring = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable backup monitoring and metrics";
|
||||
};
|
||||
|
||||
# Always exposed aggregated data
|
||||
allJobs = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
readOnly = true;
|
||||
};
|
||||
|
||||
allBackends = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
readOnly = true;
|
||||
};
|
||||
|
||||
global = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
readOnly = true;
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Validate that all job backends exist
|
||||
assertions = [
|
||||
{
|
||||
assertion = all (job: cfg.backends.${job.backend} != null) cfg.jobs;
|
||||
message = "All backup jobs must reference backends that are defined and not null in homelab.backups.backends";
|
||||
}
|
||||
];
|
||||
|
||||
# Always expose both local and global
|
||||
homelab.backups = {
|
||||
allJobs = localAggregation.allJobs;
|
||||
allBackends = localAggregation.allBackends;
|
||||
global =
|
||||
if hasNodes
|
||||
then globalAggregation
|
||||
else {};
|
||||
};
|
||||
};
|
||||
}
|
||||
209
modules/homelab/lib/systems/logging.nix
Normal file
209
modules/homelab/lib/systems/logging.nix
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
nodes,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.logging;
|
||||
homelabCfg = config.homelab;
|
||||
hasNodes = length (attrNames nodes) > 0;
|
||||
|
||||
# Local aggregation
|
||||
localAggregation = {
|
||||
allSources =
|
||||
cfg.sources
|
||||
++ (optional cfg.promtail.enable {
|
||||
name = "system-journal";
|
||||
type = "journal";
|
||||
journal.path = "/var/log/journal";
|
||||
labels =
|
||||
cfg.defaultLabels
|
||||
// {
|
||||
component = "system";
|
||||
log_source = "journald";
|
||||
};
|
||||
enabled = true;
|
||||
});
|
||||
};
|
||||
|
||||
# Global aggregation
|
||||
globalAggregation = let
|
||||
baseAgg = import ../aggregators/base.nix {inherit lib;};
|
||||
|
||||
sourcesAgg = baseAgg.aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.logging.allSources";
|
||||
enhancer = source:
|
||||
source
|
||||
// {
|
||||
_sourceNode = source._nodeName;
|
||||
_logId = "${source._nodeName}-${source.name}";
|
||||
};
|
||||
};
|
||||
in {
|
||||
allSources = sourcesAgg.all;
|
||||
sourcesByType = groupBy (s: s.type) sourcesAgg.all;
|
||||
summary = {
|
||||
total = length sourcesAgg.all;
|
||||
byType = sourcesAgg.countBy (s: s.type);
|
||||
byNode = sourcesAgg.countBy (s: s._nodeName);
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.homelab.logging = {
|
||||
enable = mkEnableOption "logging system";
|
||||
|
||||
promtail = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 9080;
|
||||
};
|
||||
clients = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
url = mkOption {type = types.str;};
|
||||
tenant_id = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
};
|
||||
};
|
||||
});
|
||||
default = [{url = "http://monitor.${homelabCfg.domain}:3100/loki/api/v1/push";}];
|
||||
};
|
||||
};
|
||||
|
||||
sources = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
name = mkOption {type = types.str;};
|
||||
type = mkOption {
|
||||
type = types.enum ["journal" "file" "syslog" "docker"];
|
||||
default = "file";
|
||||
};
|
||||
files = mkOption {
|
||||
type = types.submodule {
|
||||
options = {
|
||||
paths = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
};
|
||||
multiline = mkOption {
|
||||
type = types.nullOr types.attrs;
|
||||
default = null;
|
||||
};
|
||||
};
|
||||
};
|
||||
default = {};
|
||||
};
|
||||
journal = mkOption {
|
||||
type = types.submodule {
|
||||
options = {
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/log/journal";
|
||||
};
|
||||
};
|
||||
};
|
||||
default = {};
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
};
|
||||
pipelineStages = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
};
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
};
|
||||
};
|
||||
});
|
||||
default = [];
|
||||
};
|
||||
|
||||
defaultLabels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {
|
||||
hostname = homelabCfg.hostname;
|
||||
environment = homelabCfg.environment;
|
||||
location = homelabCfg.location;
|
||||
};
|
||||
};
|
||||
|
||||
# Always exposed aggregated data
|
||||
allSources = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
readOnly = true;
|
||||
};
|
||||
|
||||
global = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
readOnly = true;
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Local setup
|
||||
services.promtail = mkIf cfg.promtail.enable {
|
||||
enable = true;
|
||||
configuration = {
|
||||
server = {
|
||||
http_listen_port = cfg.promtail.port;
|
||||
grpc_listen_port = 0;
|
||||
};
|
||||
positions.filename = "/var/lib/promtail/positions.yaml";
|
||||
clients = cfg.promtail.clients;
|
||||
scrape_configs = map (source:
|
||||
{
|
||||
job_name = source.name;
|
||||
static_configs = [
|
||||
{
|
||||
targets = ["localhost"];
|
||||
labels =
|
||||
cfg.defaultLabels
|
||||
// source.labels
|
||||
// (
|
||||
if source.type == "file"
|
||||
then {
|
||||
__path__ = concatStringsSep "," source.files.paths;
|
||||
}
|
||||
else {}
|
||||
);
|
||||
}
|
||||
];
|
||||
# pipeline_stages = source.pipelineStages;
|
||||
}
|
||||
// (
|
||||
if source.type == "journal"
|
||||
then {
|
||||
journal = {
|
||||
path = source.journal.path;
|
||||
labels = cfg.defaultLabels // source.labels;
|
||||
};
|
||||
}
|
||||
else {}
|
||||
))
|
||||
localAggregation.allSources;
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = optionals cfg.promtail.enable [cfg.promtail.port];
|
||||
|
||||
homelab.logging = {
|
||||
allSources = localAggregation.allSources;
|
||||
global =
|
||||
if hasNodes
|
||||
then globalAggregation
|
||||
else {};
|
||||
};
|
||||
};
|
||||
}
|
||||
222
modules/homelab/lib/systems/monitoring.nix
Normal file
222
modules/homelab/lib/systems/monitoring.nix
Normal file
|
|
@ -0,0 +1,222 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
nodes,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.monitoring;
|
||||
homelabCfg = config.homelab;
|
||||
hasNodes = length (attrNames nodes) > 0;
|
||||
|
||||
# Local aggregation from this instance
|
||||
localAggregation = {
|
||||
# Metrics from manually configured + automatic node exporter
|
||||
allMetrics =
|
||||
cfg.metrics
|
||||
++ (optional cfg.nodeExporter.enable {
|
||||
name = "node-exporter";
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.nodeExporter.port;
|
||||
path = "/metrics";
|
||||
jobName = "node";
|
||||
scrapeInterval = "30s";
|
||||
labels = {
|
||||
instance = "${homelabCfg.hostname}.${homelabCfg.domain}";
|
||||
environment = homelabCfg.environment;
|
||||
location = homelabCfg.location;
|
||||
};
|
||||
});
|
||||
|
||||
allHealthChecks = cfg.healthChecks;
|
||||
};
|
||||
|
||||
# Global aggregation from all nodes (when nodes available)
|
||||
globalAggregation = let
|
||||
baseAgg = import ../aggregators/base.nix {inherit lib;};
|
||||
|
||||
# Aggregate metrics from all nodes
|
||||
metricsAgg = baseAgg.aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.monitoring.allMetrics";
|
||||
enhancer = endpoint:
|
||||
endpoint
|
||||
// {
|
||||
_fullAddress = "${endpoint.host}:${toString endpoint.port}";
|
||||
_metricsUrl = "http://${endpoint.host}:${toString endpoint.port}${endpoint.path}";
|
||||
};
|
||||
};
|
||||
|
||||
# Aggregate health checks from all nodes
|
||||
healthChecksAgg = baseAgg.aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.monitoring.allHealthChecks";
|
||||
enhancer = check: let
|
||||
actualHost = check.host;
|
||||
portPart =
|
||||
if check.port != null
|
||||
then ":${toString check.port}"
|
||||
else "";
|
||||
url = "${check.protocol or "http"}://${actualHost}${portPart}${check.path}";
|
||||
in
|
||||
check
|
||||
// {
|
||||
_actualHost = actualHost;
|
||||
_url = url;
|
||||
};
|
||||
};
|
||||
in {
|
||||
allMetrics = metricsAgg.all;
|
||||
allHealthChecks = healthChecksAgg.all;
|
||||
|
||||
# Useful groupings for services
|
||||
metricsByJobName = groupBy (m: m.jobName) metricsAgg.all;
|
||||
healthChecksByGroup = groupBy (hc: hc.group or "default") healthChecksAgg.all;
|
||||
|
||||
summary = {
|
||||
totalMetrics = length metricsAgg.all;
|
||||
totalHealthChecks = length healthChecksAgg.all;
|
||||
nodesCovered = unique (map (m: m._nodeName or m.host) metricsAgg.all);
|
||||
};
|
||||
};
|
||||
in {
|
||||
# Instance-level monitoring options
|
||||
options.homelab.monitoring = {
|
||||
enable = mkEnableOption "monitoring system";
|
||||
|
||||
# Node exporter (automatically enabled)
|
||||
nodeExporter = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 9100;
|
||||
};
|
||||
};
|
||||
|
||||
# Manual metrics (in addition to service auto-registration)
|
||||
metrics = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
name = mkOption {type = types.str;};
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
default = homelabCfg.hostname;
|
||||
};
|
||||
port = mkOption {type = types.port;};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/metrics";
|
||||
};
|
||||
jobName = mkOption {type = types.str;};
|
||||
scrapeInterval = mkOption {
|
||||
type = types.str;
|
||||
default = "30s";
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
};
|
||||
};
|
||||
});
|
||||
default = [];
|
||||
};
|
||||
|
||||
# Manual health checks (in addition to service auto-registration)
|
||||
healthChecks = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
name = mkOption {type = types.str;};
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
default = homelabCfg.hostname;
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.nullOr types.port;
|
||||
default = null;
|
||||
};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/";
|
||||
};
|
||||
protocol = mkOption {
|
||||
type = types.enum ["http" "https" "tcp" "icmp"];
|
||||
default = "http";
|
||||
};
|
||||
method = mkOption {
|
||||
type = types.str;
|
||||
default = "GET";
|
||||
};
|
||||
interval = mkOption {
|
||||
type = types.str;
|
||||
default = "30s";
|
||||
};
|
||||
timeout = mkOption {
|
||||
type = types.str;
|
||||
default = "10s";
|
||||
};
|
||||
conditions = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = ["[STATUS] == 200"];
|
||||
};
|
||||
group = mkOption {
|
||||
type = types.str;
|
||||
default = "manual";
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
};
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
};
|
||||
};
|
||||
});
|
||||
default = [];
|
||||
};
|
||||
|
||||
# Read-only aggregated data (always exposed)
|
||||
allMetrics = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = localAggregation.allMetrics;
|
||||
readOnly = true;
|
||||
};
|
||||
|
||||
allHealthChecks = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = localAggregation.allHealthChecks;
|
||||
readOnly = true;
|
||||
};
|
||||
|
||||
# Global aggregation (always available, empty if no nodes)
|
||||
global = mkOption {
|
||||
type = types.attrs;
|
||||
default = globalAggregation;
|
||||
readOnly = true;
|
||||
};
|
||||
};
|
||||
|
||||
# Configuration - always includes both local and global
|
||||
config = mkIf cfg.enable {
|
||||
# Basic instance setup
|
||||
services.prometheus.exporters.node = mkIf cfg.nodeExporter.enable {
|
||||
enable = true;
|
||||
port = cfg.nodeExporter.port;
|
||||
enabledCollectors = ["systemd" "textfile" "filesystem" "loadavg" "meminfo" "netdev" "stat"];
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = optionals cfg.nodeExporter.enable [cfg.nodeExporter.port];
|
||||
|
||||
# homelab.monitoring = {
|
||||
# allMetrics = localAggregation.allMetrics;
|
||||
# allHealthChecks = localAggregation.allHealthChecks;
|
||||
# global =
|
||||
# if hasNodes
|
||||
# then globalAggregation
|
||||
# else {};
|
||||
# };
|
||||
};
|
||||
}
|
||||
98
modules/homelab/lib/systems/proxy.nix
Normal file
98
modules/homelab/lib/systems/proxy.nix
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
nodes,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.reverseProxy;
|
||||
homelabCfg = config.homelab;
|
||||
hasNodes = length (attrNames nodes) > 0;
|
||||
|
||||
# Local aggregation
|
||||
localAggregation = {
|
||||
allEntries = cfg.entries;
|
||||
};
|
||||
|
||||
# Global aggregation
|
||||
globalAggregation = let
|
||||
baseAgg = import ../aggregators/base.nix {inherit lib;};
|
||||
|
||||
entriesAgg = baseAgg.aggregateFromNodes {
|
||||
inherit nodes;
|
||||
attributePath = "homelab.reverseProxy.allEntries";
|
||||
enhancer = entry:
|
||||
entry
|
||||
// {
|
||||
_upstream = "http://${entry.host}:${toString entry.port}${entry.path or ""}";
|
||||
_fqdn = "${entry.subdomain}.${entry._nodeConfig.config.homelab.externalDomain or homelabCfg.externalDomain}";
|
||||
_internal = "${entry.host}:${toString entry.port}";
|
||||
};
|
||||
};
|
||||
in {
|
||||
allEntries = entriesAgg.all;
|
||||
entriesBySubdomain = groupBy (e: e.subdomain) entriesAgg.all;
|
||||
entriesWithAuth = entriesAgg.filterBy (e: e.enableAuth or false);
|
||||
entriesWithoutAuth = entriesAgg.filterBy (e: !(e.enableAuth or false));
|
||||
summary = {
|
||||
total = length entriesAgg.all;
|
||||
byNode = entriesAgg.countBy (e: e._nodeName);
|
||||
withAuth = length (entriesAgg.filterBy (e: e.enableAuth or false));
|
||||
withoutAuth = length (entriesAgg.filterBy (e: !(e.enableAuth or false)));
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.homelab.reverseProxy = {
|
||||
enable = mkEnableOption "reverse proxy system";
|
||||
|
||||
entries = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
subdomain = mkOption {type = types.str;};
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
default = homelabCfg.hostname;
|
||||
};
|
||||
port = mkOption {type = types.port;};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/";
|
||||
};
|
||||
enableAuth = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
};
|
||||
enableSSL = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
};
|
||||
};
|
||||
});
|
||||
default = [];
|
||||
};
|
||||
|
||||
# Always exposed aggregated data
|
||||
allEntries = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
readOnly = true;
|
||||
};
|
||||
|
||||
global = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
readOnly = true;
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Always expose both local and global
|
||||
homelab.reverseProxy = {
|
||||
allEntries = localAggregation.allEntries;
|
||||
global =
|
||||
if hasNodes
|
||||
then globalAggregation
|
||||
else {};
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
@ -1,214 +0,0 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.monitoring;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
metricsEndpointType = types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = "Name of the metrics endpoint";
|
||||
};
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
description = "Domain name of the host (default: hostname.domain)";
|
||||
default = "${homelabCfg.hostname}.${homelabCfg.domain}";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
description = "Port number for the endpoint";
|
||||
};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/metrics";
|
||||
description = "Path for the metrics endpoint";
|
||||
};
|
||||
jobName = mkOption {
|
||||
type = types.str;
|
||||
description = "Prometheus job name";
|
||||
};
|
||||
scrapeInterval = mkOption {
|
||||
type = types.str;
|
||||
default = "30s";
|
||||
description = "Prometheus scrape interval";
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
description = "Additional labels for this endpoint";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
healthCheckEndpointType = types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = "Name of the health check endpoint";
|
||||
};
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
description = "Domain name of the host";
|
||||
default = "${homelabCfg.hostname}.${homelabCfg.domain}";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.nullOr types.port;
|
||||
default = null;
|
||||
description = "Port number for the endpoint (null for standard HTTP/HTTPS)";
|
||||
};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/";
|
||||
description = "Path for the health check endpoint";
|
||||
};
|
||||
protocol = mkOption {
|
||||
type = types.enum ["http" "https" "tcp" "icmp"];
|
||||
default = "http";
|
||||
description = "Protocol to use for health checks";
|
||||
};
|
||||
method = mkOption {
|
||||
type = types.str;
|
||||
default = "GET";
|
||||
description = "HTTP method for health checks (only applies to http/https)";
|
||||
};
|
||||
interval = mkOption {
|
||||
type = types.str;
|
||||
default = "30s";
|
||||
description = "Health check interval";
|
||||
};
|
||||
timeout = mkOption {
|
||||
type = types.str;
|
||||
default = "10s";
|
||||
description = "Health check timeout";
|
||||
};
|
||||
conditions = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = ["[STATUS] == 200"];
|
||||
description = "Health check conditions (Gatus format)";
|
||||
example = ["[STATUS] == 200" "[BODY].status == UP" "[RESPONSE_TIME] < 500"];
|
||||
};
|
||||
alerts = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
type = mkOption {
|
||||
type = types.str;
|
||||
description = "Alert type";
|
||||
example = "discord";
|
||||
};
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether this alert is enabled";
|
||||
};
|
||||
failure-threshold = mkOption {
|
||||
type = types.int;
|
||||
default = 3;
|
||||
description = "Number of failures before alerting";
|
||||
};
|
||||
success-threshold = mkOption {
|
||||
type = types.int;
|
||||
default = 2;
|
||||
description = "Number of successes before resolving alert";
|
||||
};
|
||||
};
|
||||
});
|
||||
default = [];
|
||||
description = "Alert configurations";
|
||||
};
|
||||
group = mkOption {
|
||||
type = types.str;
|
||||
default = "default";
|
||||
description = "Group name for organizing health checks";
|
||||
};
|
||||
labels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = {};
|
||||
description = "Additional labels for this health check";
|
||||
};
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether this health check is enabled";
|
||||
};
|
||||
# External domain support
|
||||
useExternalDomain = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Use external domain instead of internal";
|
||||
};
|
||||
subdomain = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Subdomain for external domain (required if useExternalDomain is true)";
|
||||
};
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.homelab.monitoring = {
|
||||
enable = mkEnableOption "Homelab monitoring";
|
||||
metrics = mkOption {
|
||||
type = types.listOf metricsEndpointType;
|
||||
default = [];
|
||||
description = "Metric endpoints exposed by this system";
|
||||
};
|
||||
|
||||
healthChecks = mkOption {
|
||||
type = types.listOf healthCheckEndpointType;
|
||||
default = [];
|
||||
description = "Health check endpoints for uptime monitoring";
|
||||
};
|
||||
|
||||
nodeExporter = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable node exporter";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 9100;
|
||||
description = "Node exporter port";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Configure node exporter if enabled
|
||||
services.prometheus.exporters.node = mkIf cfg.nodeExporter.enable {
|
||||
enable = true;
|
||||
port = cfg.nodeExporter.port;
|
||||
enabledCollectors = [
|
||||
"systemd"
|
||||
"textfile"
|
||||
"filesystem"
|
||||
"loadavg"
|
||||
"meminfo"
|
||||
"netdev"
|
||||
"stat"
|
||||
];
|
||||
};
|
||||
|
||||
# Automatically add node exporter to monitoring endpoints
|
||||
homelab.monitoring.metrics = mkIf cfg.nodeExporter.enable [
|
||||
{
|
||||
name = "node-exporter";
|
||||
port = cfg.nodeExporter.port;
|
||||
path = "/metrics";
|
||||
jobName = "node";
|
||||
labels = {
|
||||
instance = "${homelabCfg.hostname}.${homelabCfg.domain}";
|
||||
environment = homelabCfg.environment;
|
||||
location = homelabCfg.location;
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
networking.firewall.allowedTCPPorts = optionals cfg.nodeExporter.enable [
|
||||
cfg.nodeExporter.port
|
||||
];
|
||||
};
|
||||
}
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.reverseProxy;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
reverseProxyEntryType = types.submodule {
|
||||
options = {
|
||||
subdomain = mkOption {
|
||||
type = types.str;
|
||||
description = "Subdomain for the service";
|
||||
};
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
description = "Host to proxy to";
|
||||
default = "${homelabCfg.hostname}.${homelabCfg.domain}";
|
||||
};
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
description = "Port to proxy to";
|
||||
};
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/";
|
||||
description = "Path prefix for the service";
|
||||
};
|
||||
enableAuth = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable authentication for this service";
|
||||
};
|
||||
enableSSL = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable SSL for this service";
|
||||
};
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.homelab.reverseProxy = {
|
||||
entries = mkOption {
|
||||
type = types.listOf reverseProxyEntryType;
|
||||
default = [];
|
||||
description = "Reverse proxy entries for this system";
|
||||
};
|
||||
};
|
||||
|
||||
config = {
|
||||
};
|
||||
}
|
||||
|
|
@ -1,7 +1,9 @@
|
|||
{
|
||||
imports = [
|
||||
./minio.nix
|
||||
./monitoring/gatus.nix
|
||||
./monitoring/prometheus.nix
|
||||
./gatus.nix
|
||||
./prometheus.nix
|
||||
./grafana.nix
|
||||
# ./monitoring/loki.nix
|
||||
];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,161 +0,0 @@
|
|||
# Example showing how to create a service using the standard interface
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
serviceInterface = import ../lib/service-interface.nix {inherit lib;};
|
||||
|
||||
cfg = config.homelab.services.grafana;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Service-specific options beyond the standard interface
|
||||
grafanaServiceOptions = {
|
||||
domain = mkOption {
|
||||
type = types.str;
|
||||
default = "grafana.${homelabCfg.externalDomain}";
|
||||
description = "Domain for Grafana";
|
||||
};
|
||||
|
||||
rootUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "https://grafana.${homelabCfg.externalDomain}";
|
||||
description = "Root URL for Grafana";
|
||||
};
|
||||
|
||||
dataDir = serviceInterface.commonOptions.dataDir "grafana";
|
||||
|
||||
admin = {
|
||||
user = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin username";
|
||||
};
|
||||
|
||||
password = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin password";
|
||||
};
|
||||
};
|
||||
|
||||
datasources = {
|
||||
prometheus = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Prometheus datasource";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://localhost:9090";
|
||||
description = "Prometheus URL";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
plugins = mkOption {
|
||||
type = types.listOf types.package;
|
||||
default = [];
|
||||
description = "Grafana plugins to install";
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.homelab.services.grafana = serviceInterface.mkServiceInterface {
|
||||
serviceName = "grafana";
|
||||
defaultPort = 3000;
|
||||
defaultSubdomain = "grafana";
|
||||
monitoringPath = "/metrics";
|
||||
healthCheckPath = "/api/health";
|
||||
healthCheckConditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].database == ok"
|
||||
"[RESPONSE_TIME] < 2000"
|
||||
];
|
||||
serviceOptions = grafanaServiceOptions;
|
||||
};
|
||||
|
||||
config = serviceInterface.mkServiceConfig {
|
||||
inherit config cfg homelabCfg;
|
||||
serviceName = "grafana";
|
||||
|
||||
extraMonitoringLabels = {
|
||||
component = "dashboard";
|
||||
};
|
||||
|
||||
customHealthChecks = [
|
||||
{
|
||||
name = "grafana-login";
|
||||
port = cfg.port;
|
||||
path = "/login";
|
||||
interval = "60s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[RESPONSE_TIME] < 3000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "login";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
serviceConfig = {
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
dataDir = cfg.dataDir;
|
||||
declarativePlugins = cfg.plugins;
|
||||
|
||||
settings = {
|
||||
server = {
|
||||
http_port = cfg.port;
|
||||
http_addr = "0.0.0.0";
|
||||
domain = cfg.domain;
|
||||
root_url = cfg.rootUrl;
|
||||
};
|
||||
|
||||
security = {
|
||||
admin_user = cfg.admin.user;
|
||||
admin_password = cfg.admin.password;
|
||||
};
|
||||
};
|
||||
|
||||
provision = {
|
||||
enable = true;
|
||||
datasources.settings.datasources = mkIf cfg.datasources.prometheus.enable [
|
||||
{
|
||||
name = "Prometheus";
|
||||
type = "prometheus";
|
||||
url = cfg.datasources.prometheus.url;
|
||||
isDefault = true;
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
# Usage example in your configuration:
|
||||
/*
|
||||
{
|
||||
homelab.services.grafana = {
|
||||
enable = true;
|
||||
# Standard interface options:
|
||||
port = 3000; # Optional: defaults to 3000
|
||||
openFirewall = true; # Optional: defaults to true
|
||||
proxy.subdomain = "grafana"; # Optional: defaults to "grafana"
|
||||
proxy.enableAuth = false; # Optional: defaults to false
|
||||
monitoring.enable = true; # Optional: defaults to true
|
||||
|
||||
# Service-specific options:
|
||||
admin.password = "secure-password";
|
||||
datasources.prometheus.url = "http://prometheus.lab:9090";
|
||||
plugins = with pkgs.grafanaPlugins; [ grafana-piechart-panel ];
|
||||
};
|
||||
}
|
||||
*/
|
||||
|
||||
267
modules/homelab/services/gatus.nix
Normal file
267
modules/homelab/services/gatus.nix
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
serviceName = "gatus";
|
||||
cfg = config.homelab.services.${serviceName};
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Convert homelab health checks to Gatus format
|
||||
formatHealthCheck = check: let
|
||||
# Build the URL based on the health check configuration
|
||||
url = check._url or "http://${check.host}:${toString (check.port or 80)}${check.path}";
|
||||
|
||||
# Convert conditions to Gatus format (they should already be compatible)
|
||||
conditions = check.conditions or ["[STATUS] == 200"];
|
||||
|
||||
# Convert alerts to Gatus format
|
||||
alerts = map (alert: {
|
||||
inherit (alert) type enabled;
|
||||
failure-threshold = alert.failure-threshold or 3;
|
||||
success-threshold = alert.success-threshold or 2;
|
||||
description = "Health check alert for ${check.name}";
|
||||
}) (check.alerts or []);
|
||||
in {
|
||||
name = check.name;
|
||||
group = check.group or "default";
|
||||
url = url;
|
||||
interval = check.interval or "30s";
|
||||
|
||||
# Add method and headers for HTTP/HTTPS checks
|
||||
method =
|
||||
if (check.protocol == "http" || check.protocol == "https")
|
||||
then check.method or "GET"
|
||||
else null;
|
||||
|
||||
conditions = conditions;
|
||||
|
||||
# Add timeout
|
||||
client = {
|
||||
timeout = check.timeout or "10s";
|
||||
};
|
||||
|
||||
# Add alerts if configured
|
||||
alerts =
|
||||
if alerts != []
|
||||
then alerts
|
||||
else [];
|
||||
|
||||
# Add labels for UI organization
|
||||
ui = {
|
||||
hide-hostname = false;
|
||||
hide-url = false;
|
||||
description = "Health check for ${check.name} on ${check.host or check._actualHost or "unknown"}";
|
||||
};
|
||||
};
|
||||
|
||||
# Generate Gatus configuration from aggregated health checks
|
||||
gatusConfig =
|
||||
recursiveUpdate {
|
||||
# Global Gatus settings
|
||||
alerting = mkIf (cfg.alerting != {}) cfg.alerting;
|
||||
|
||||
web = {
|
||||
address = cfg.web.address;
|
||||
port = cfg.port;
|
||||
};
|
||||
|
||||
# Enable metrics
|
||||
metrics = cfg.monitoring.enable;
|
||||
|
||||
ui = {
|
||||
title = cfg.ui.title;
|
||||
header = cfg.ui.header;
|
||||
link = cfg.ui.link;
|
||||
buttons = cfg.ui.buttons;
|
||||
};
|
||||
|
||||
storage = cfg.storage;
|
||||
|
||||
# Convert all enabled health checks from the fleet to Gatus endpoints
|
||||
endpoints = let
|
||||
# Get all health checks - try global first, fallback to local
|
||||
allHealthChecks = homelabCfg.monitoring.global.allHealthChecks
|
||||
or homelabCfg.monitoring.allHealthChecks
|
||||
or [];
|
||||
|
||||
# Filter only enabled health checks
|
||||
enabledHealthChecks = filter (check: check.enabled or true) allHealthChecks;
|
||||
|
||||
# Convert to Gatus format
|
||||
gatusEndpoints = map formatHealthCheck enabledHealthChecks;
|
||||
in
|
||||
gatusEndpoints;
|
||||
}
|
||||
cfg.extraConfig;
|
||||
in {
|
||||
imports = [
|
||||
(import ../lib/features/monitoring.nix serviceName)
|
||||
(import ../lib/features/logging.nix serviceName)
|
||||
(import ../lib/features/proxy.nix serviceName)
|
||||
];
|
||||
|
||||
# Core service options
|
||||
options.homelab.services.${serviceName} = {
|
||||
enable = mkEnableOption "Gatus Status Page";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 8080;
|
||||
};
|
||||
|
||||
description = mkOption {
|
||||
type = types.str;
|
||||
default = "Gatus Status Page";
|
||||
};
|
||||
|
||||
# Gatus-specific options
|
||||
ui = {
|
||||
title = mkOption {
|
||||
type = types.str;
|
||||
default = "Homelab Status";
|
||||
description = "Title for the Gatus web interface";
|
||||
};
|
||||
|
||||
header = mkOption {
|
||||
type = types.str;
|
||||
default = "Homelab Services Status";
|
||||
description = "Header text for the Gatus interface";
|
||||
};
|
||||
|
||||
link = mkOption {
|
||||
type = types.str;
|
||||
default = "https://status.${homelabCfg.externalDomain}";
|
||||
description = "Link in the Gatus header";
|
||||
};
|
||||
|
||||
buttons = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
name = mkOption {type = types.str;};
|
||||
link = mkOption {type = types.str;};
|
||||
};
|
||||
});
|
||||
default = [
|
||||
{
|
||||
name = "Grafana";
|
||||
link = "https://grafana.${homelabCfg.externalDomain}";
|
||||
}
|
||||
{
|
||||
name = "Prometheus";
|
||||
link = "https://prometheus.${homelabCfg.externalDomain}";
|
||||
}
|
||||
];
|
||||
description = "Navigation buttons in the Gatus interface";
|
||||
};
|
||||
};
|
||||
|
||||
alerting = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Gatus alerting configuration";
|
||||
example = literalExpression ''
|
||||
{
|
||||
discord = {
|
||||
webhook-url = "https://discord.com/api/webhooks/...";
|
||||
default-alert = {
|
||||
enabled = true;
|
||||
description = "Health check failed";
|
||||
failure-threshold = 3;
|
||||
success-threshold = 2;
|
||||
};
|
||||
};
|
||||
}
|
||||
'';
|
||||
};
|
||||
|
||||
storage = mkOption {
|
||||
type = types.attrs;
|
||||
default = {
|
||||
type = "memory";
|
||||
};
|
||||
description = "Gatus storage configuration";
|
||||
example = literalExpression ''
|
||||
{
|
||||
type = "postgres";
|
||||
path = "postgres://user:password@localhost/gatus?sslmode=disable";
|
||||
}
|
||||
'';
|
||||
};
|
||||
|
||||
web = {
|
||||
address = mkOption {
|
||||
type = types.str;
|
||||
default = "0.0.0.0";
|
||||
description = "Web interface bind address";
|
||||
};
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional Gatus configuration options";
|
||||
};
|
||||
};
|
||||
|
||||
# Service configuration with smart defaults
|
||||
config = mkIf cfg.enable (mkMerge [
|
||||
# Core Gatus service
|
||||
{
|
||||
services.gatus = {
|
||||
enable = true;
|
||||
settings = gatusConfig;
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [cfg.port];
|
||||
|
||||
homelab.services.${serviceName}.monitoring.enable = mkDefault true;
|
||||
}
|
||||
|
||||
# Smart defaults for Gatus
|
||||
(mkIf cfg.monitoring.enable {
|
||||
homelab.services.${serviceName}.monitoring = mkDefault {
|
||||
metrics = {
|
||||
path = "/metrics";
|
||||
extraEndpoints = [];
|
||||
};
|
||||
healthCheck = {
|
||||
path = "/health";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].status == UP"
|
||||
"[RESPONSE_TIME] < 1000"
|
||||
];
|
||||
extraChecks = [];
|
||||
};
|
||||
extraLabels = {
|
||||
component = "status-monitoring";
|
||||
tier = "monitoring";
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf cfg.logging.enable {
|
||||
homelab.services.${serviceName}.logging = mkDefault {
|
||||
files = ["/var/log/gatus/gatus.log"];
|
||||
parsing = {
|
||||
# Gatus log format: 2024-01-01T12:00:00Z [INFO] message
|
||||
regex = "^(?P<timestamp>\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z) \\[(?P<level>\\w+)\\] (?P<message>.*)";
|
||||
extractFields = ["level"];
|
||||
};
|
||||
extraLabels = {
|
||||
component = "status-monitoring";
|
||||
application = "gatus";
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf cfg.proxy.enable {
|
||||
homelab.services.${serviceName}.proxy = mkDefault {
|
||||
subdomain = "status";
|
||||
enableAuth = false; # Status page should be public
|
||||
};
|
||||
})
|
||||
]);
|
||||
}
|
||||
86
modules/homelab/services/grafana.nix
Normal file
86
modules/homelab/services/grafana.nix
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
serviceName = "grafana";
|
||||
cfg = config.homelab.services.${serviceName};
|
||||
in {
|
||||
imports = [
|
||||
(import ../lib/features/monitoring.nix serviceName)
|
||||
(import ../lib/features/logging.nix serviceName)
|
||||
(import ../lib/features/proxy.nix serviceName)
|
||||
];
|
||||
|
||||
options.homelab.services.${serviceName} = {
|
||||
enable = mkEnableOption "Grafana Dashboard";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 3000;
|
||||
};
|
||||
|
||||
description = mkOption {
|
||||
type = types.str;
|
||||
default = "Grafana Metrics Dashboard";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable (mkMerge [
|
||||
# Core Grafana service
|
||||
{
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings.server = {
|
||||
http_port = cfg.port;
|
||||
http_addr = "0.0.0.0";
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [cfg.port];
|
||||
|
||||
homelab.services.${serviceName}.monitoring.enable = mkDefault true;
|
||||
}
|
||||
|
||||
# Smart defaults for Grafana
|
||||
(mkIf cfg.logging.enable {
|
||||
# Grafana-specific log setup
|
||||
homelab.services.${serviceName}.logging = mkDefault {
|
||||
files = ["/var/log/grafana/grafana.log"];
|
||||
parsing = {
|
||||
# Grafana log format: t=2024-01-01T12:00:00Z lvl=info msg="message"
|
||||
regex = "^t=(?P<timestamp>[^ ]+) lvl=(?P<level>\\w+) msg=\"(?P<message>[^\"]*)\"";
|
||||
extractFields = ["level"];
|
||||
};
|
||||
extraLabels = {
|
||||
application = "grafana";
|
||||
component = "dashboard";
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf cfg.monitoring.enable {
|
||||
homelab.services.${serviceName}.monitoring = mkDefault {
|
||||
metrics.path = "/metrics";
|
||||
healthCheck = {
|
||||
path = "/api/health";
|
||||
conditions = ["[STATUS] == 200" "[BODY].database == ok"];
|
||||
};
|
||||
extraLabels = {
|
||||
component = "dashboard";
|
||||
tier = "monitoring";
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf cfg.proxy.enable {
|
||||
# Grafana needs auth by default (admin interface)
|
||||
homelab.services.${serviceName}.proxy = mkDefault {
|
||||
subdomain = "grafana";
|
||||
# enableAuth = true;
|
||||
};
|
||||
})
|
||||
]);
|
||||
}
|
||||
|
|
@ -1,125 +0,0 @@
|
|||
# modules/services/jellyfin.nix
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.services.jellyfin;
|
||||
in {
|
||||
options.services.jellyfin = {
|
||||
enable = mkEnableOption "Jellyfin media server";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 8096;
|
||||
description = "Port for Jellyfin web interface";
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/jellyfin";
|
||||
description = "Directory to store Jellyfin data";
|
||||
};
|
||||
|
||||
mediaDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/media";
|
||||
description = "Directory containing media files";
|
||||
};
|
||||
|
||||
enableMetrics = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Prometheus metrics";
|
||||
};
|
||||
|
||||
exposeWeb = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Expose web interface through reverse proxy";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Enable the service
|
||||
services.jellyfin = {
|
||||
enable = true;
|
||||
dataDir = cfg.dataDir;
|
||||
};
|
||||
|
||||
# Configure global settings
|
||||
homelab.global = {
|
||||
# Add backup job for Jellyfin data
|
||||
backups.jobs = [
|
||||
{
|
||||
name = "jellyfin-config";
|
||||
backend = "restic";
|
||||
paths = ["${cfg.dataDir}/config" "${cfg.dataDir}/data"];
|
||||
schedule = "0 2 * * *"; # Daily at 2 AM
|
||||
excludePatterns = [
|
||||
"*/cache/*"
|
||||
"*/transcodes/*"
|
||||
"*/logs/*"
|
||||
];
|
||||
preHook = ''
|
||||
# Stop jellyfin for consistent backup
|
||||
systemctl stop jellyfin
|
||||
'';
|
||||
postHook = ''
|
||||
# Restart jellyfin after backup
|
||||
systemctl start jellyfin
|
||||
'';
|
||||
}
|
||||
{
|
||||
name = "jellyfin-media";
|
||||
backend = "restic";
|
||||
paths = [cfg.mediaDir];
|
||||
schedule = "0 3 * * 0"; # Weekly on Sunday at 3 AM
|
||||
excludePatterns = [
|
||||
"*.tmp"
|
||||
"*/.@__thumb/*" # Synology thumbnails
|
||||
];
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry if enabled
|
||||
reverseProxy.entries = mkIf cfg.exposeWeb [
|
||||
{
|
||||
subdomain = "jellyfin";
|
||||
port = cfg.port;
|
||||
enableAuth = false; # Jellyfin has its own auth
|
||||
websockets = true;
|
||||
customHeaders = {
|
||||
"X-Forwarded-Proto" = "$scheme";
|
||||
"X-Forwarded-Host" = "$host";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add monitoring endpoint if metrics enabled
|
||||
monitoring.endpoints = mkIf cfg.enableMetrics [
|
||||
{
|
||||
name = "jellyfin";
|
||||
port = cfg.port;
|
||||
path = "/metrics"; # Assuming you have a metrics plugin
|
||||
jobName = "jellyfin";
|
||||
scrapeInterval = "60s";
|
||||
labels = {
|
||||
service = "jellyfin";
|
||||
type = "media-server";
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
# Open firewall
|
||||
networking.firewall.allowedTCPPorts = [cfg.port];
|
||||
|
||||
# Create media directory
|
||||
systemd.tmpfiles.rules = [
|
||||
"d ${cfg.mediaDir} 0755 jellyfin jellyfin -"
|
||||
];
|
||||
};
|
||||
}
|
||||
|
|
@ -4,110 +4,13 @@
|
|||
...
|
||||
}:
|
||||
with lib; let
|
||||
serviceInterface = import ../../lib/service-interface.nix {inherit lib;};
|
||||
|
||||
cfg = config.homelab.services.gatus;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Convert our health check format to Gatus format
|
||||
formatHealthCheck = check: let
|
||||
# Build the URL
|
||||
url = check._url;
|
||||
|
||||
# Convert conditions to Gatus format (they should already be compatible)
|
||||
conditions = check.conditions or ["[STATUS] == 200"];
|
||||
|
||||
# Convert alerts to Gatus format
|
||||
alerts = map (alert: {
|
||||
inherit (alert) type enabled;
|
||||
failure-threshold = alert.failure-threshold or 3;
|
||||
success-threshold = alert.success-threshold or 2;
|
||||
description = "Health check alert for ${check.name}";
|
||||
}) (check.alerts or []);
|
||||
in {
|
||||
name = check.name;
|
||||
group = check.group or "default";
|
||||
url = url;
|
||||
interval = check.interval or "30s";
|
||||
|
||||
# Add method and headers for HTTP/HTTPS checks
|
||||
method =
|
||||
if (check.protocol == "http" || check.protocol == "https")
|
||||
then check.method or "GET"
|
||||
else null;
|
||||
|
||||
conditions = conditions;
|
||||
|
||||
# Add timeout
|
||||
client = {
|
||||
timeout = check.timeout or "10s";
|
||||
};
|
||||
|
||||
# Add alerts if configured
|
||||
alerts =
|
||||
if alerts != []
|
||||
then alerts
|
||||
else [];
|
||||
|
||||
# Add labels for UI organization
|
||||
ui = {
|
||||
hide-hostname = false;
|
||||
hide-url = false;
|
||||
description = "Health check for ${check.name} on ${check._nodeName}";
|
||||
};
|
||||
};
|
||||
|
||||
# Generate Gatus configuration
|
||||
gatusConfig = {
|
||||
# Global Gatus settings
|
||||
alerting = mkIf (cfg.alerting != {}) cfg.alerting;
|
||||
|
||||
web = {
|
||||
address = "0.0.0.0";
|
||||
port = cfg.port;
|
||||
};
|
||||
|
||||
# TODO: Introduce monitor option to toggle monitoring
|
||||
metrics = true;
|
||||
|
||||
ui = {
|
||||
title = cfg.ui.title;
|
||||
header = cfg.ui.header;
|
||||
link = cfg.ui.link;
|
||||
buttons = cfg.ui.buttons;
|
||||
};
|
||||
|
||||
storage = mkIf (cfg.storage != {}) cfg.storage;
|
||||
|
||||
# Convert all enabled health checks to Gatus endpoints
|
||||
endpoints = let
|
||||
# Get all health checks from global config
|
||||
allHealthChecks = homelabCfg.global.monitoring.enabledHealthChecks or [];
|
||||
|
||||
# Group by group name for better organization
|
||||
# groupedChecks = homelabCfg.global.monitoring.healthChecksByGroup or {};
|
||||
|
||||
# Convert to Gatus format
|
||||
gatusEndpoints = map formatHealthCheck allHealthChecks;
|
||||
in
|
||||
gatusEndpoints;
|
||||
};
|
||||
in {
|
||||
options.homelab.services.gatus = {
|
||||
enable = mkEnableOption "Gatus uptime monitoring service";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 8080;
|
||||
description = "Port for Gatus web interface";
|
||||
};
|
||||
|
||||
openFirewall = lib.mkOption {
|
||||
type = lib.types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Whether to automatically open the specified ports in the firewall.
|
||||
'';
|
||||
};
|
||||
|
||||
# Service-specific options beyond the standard interface
|
||||
gatusServiceOptions = {
|
||||
ui = {
|
||||
title = mkOption {
|
||||
type = types.str;
|
||||
|
|
@ -123,7 +26,7 @@ in {
|
|||
|
||||
link = mkOption {
|
||||
type = types.str;
|
||||
default = "https://gatus.${homelabCfg.externalDomain}";
|
||||
default = "https://status.${homelabCfg.externalDomain}";
|
||||
description = "Link in the Gatus header";
|
||||
};
|
||||
|
||||
|
|
@ -186,59 +89,129 @@ in {
|
|||
default = {};
|
||||
description = "Additional Gatus configuration options";
|
||||
};
|
||||
|
||||
web = {
|
||||
address = mkOption {
|
||||
type = types.str;
|
||||
default = "0.0.0.0";
|
||||
description = "Web interface bind address";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
services.gatus = {
|
||||
enable = true;
|
||||
openFirewall = cfg.openFirewall;
|
||||
settings = gatusConfig;
|
||||
# Convert our health check format to Gatus format
|
||||
formatHealthCheck = check: let
|
||||
# Build the URL based on the health check configuration
|
||||
url = check._url;
|
||||
|
||||
# Convert conditions to Gatus format (they should already be compatible)
|
||||
conditions = check.conditions or ["[STATUS] == 200"];
|
||||
|
||||
# Convert alerts to Gatus format
|
||||
alerts = map (alert: {
|
||||
inherit (alert) type enabled;
|
||||
failure-threshold = alert.failure-threshold or 3;
|
||||
success-threshold = alert.success-threshold or 2;
|
||||
description = "Health check alert for ${check.name}";
|
||||
}) (check.alerts or []);
|
||||
in {
|
||||
name = check.name;
|
||||
group = check.group or "default";
|
||||
url = url;
|
||||
interval = check.interval or "30s";
|
||||
|
||||
# Add method and headers for HTTP/HTTPS checks
|
||||
method =
|
||||
if (check.protocol == "http" || check.protocol == "https")
|
||||
then check.method or "GET"
|
||||
else null;
|
||||
|
||||
conditions = conditions;
|
||||
|
||||
# Add timeout
|
||||
client = {
|
||||
timeout = check.timeout or "10s";
|
||||
};
|
||||
|
||||
# Add to monitoring endpoints
|
||||
homelab.monitoring.metrics = [
|
||||
{
|
||||
name = "gatus";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "gatus";
|
||||
labels = {
|
||||
service = "gatus";
|
||||
component = "monitoring";
|
||||
};
|
||||
}
|
||||
];
|
||||
# Add alerts if configured
|
||||
alerts =
|
||||
if alerts != []
|
||||
then alerts
|
||||
else [];
|
||||
|
||||
# Add health check for Gatus itself
|
||||
homelab.monitoring.healthChecks = [
|
||||
{
|
||||
name = "gatus-web-interface";
|
||||
port = cfg.port;
|
||||
path = "/health";
|
||||
interval = "30s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].status == UP"
|
||||
"[RESPONSE_TIME] < 1000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "gatus";
|
||||
component = "web-interface";
|
||||
};
|
||||
}
|
||||
];
|
||||
# Add labels for UI organization
|
||||
ui = {
|
||||
hide-hostname = false;
|
||||
hide-url = false;
|
||||
description = "Health check for ${check.name} on ${check.host}";
|
||||
};
|
||||
};
|
||||
|
||||
# Add reverse proxy entry if needed
|
||||
homelab.reverseProxy.entries = [
|
||||
{
|
||||
subdomain = "status";
|
||||
host = homelabCfg.hostname;
|
||||
# Generate Gatus configuration
|
||||
gatusConfig =
|
||||
recursiveUpdate {
|
||||
# Global Gatus settings
|
||||
alerting = mkIf (cfg.alerting != {}) cfg.alerting;
|
||||
|
||||
web = {
|
||||
address = cfg.web.address;
|
||||
port = cfg.port;
|
||||
# path = "/";
|
||||
# enableAuth = false; # Status page should be publicly accessible
|
||||
# enableSSL = true;
|
||||
}
|
||||
};
|
||||
|
||||
# Enable metrics
|
||||
metrics = cfg.monitoring.enable;
|
||||
|
||||
ui = {
|
||||
title = cfg.ui.title;
|
||||
header = cfg.ui.header;
|
||||
link = cfg.ui.link;
|
||||
buttons = cfg.ui.buttons;
|
||||
};
|
||||
|
||||
storage = cfg.storage;
|
||||
|
||||
# Convert all enabled health checks to Gatus endpoints
|
||||
endpoints = let
|
||||
# Get all health checks from global config
|
||||
allHealthChecks = homelabCfg.global.monitoring.allHealthChecks or [];
|
||||
|
||||
# Filter only enabled health checks
|
||||
enabledHealthChecks = filter (check: check.enabled or true) allHealthChecks;
|
||||
|
||||
# Convert to Gatus format
|
||||
gatusEndpoints = map formatHealthCheck enabledHealthChecks;
|
||||
in
|
||||
gatusEndpoints;
|
||||
}
|
||||
cfg.extraConfig;
|
||||
in {
|
||||
options.homelab.services.gatus = serviceInterface.mkServiceInterface {
|
||||
serviceName = "gatus";
|
||||
defaultPort = 8080;
|
||||
defaultSubdomain = "status";
|
||||
monitoringPath = "/metrics";
|
||||
healthCheckPath = "/health";
|
||||
healthCheckConditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].status == UP"
|
||||
"[RESPONSE_TIME] < 1000"
|
||||
];
|
||||
serviceOptions = gatusServiceOptions;
|
||||
};
|
||||
|
||||
config = serviceInterface.mkServiceConfig {
|
||||
inherit config cfg homelabCfg;
|
||||
serviceName = "gatus";
|
||||
|
||||
extraMonitoringLabels = {
|
||||
component = "status-monitoring";
|
||||
};
|
||||
|
||||
serviceConfig = {
|
||||
services.gatus = {
|
||||
enable = true;
|
||||
settings = gatusConfig;
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,169 +5,389 @@
|
|||
...
|
||||
}:
|
||||
with lib; let
|
||||
serviceInterface = import ../../lib/service-interface.nix {inherit lib;};
|
||||
|
||||
cfg = config.homelab.services.grafana;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Default dashboards for homelab monitoring
|
||||
# Default community dashboards with proper configuration
|
||||
defaultDashboards = {
|
||||
"node-exporter" = pkgs.fetchurl {
|
||||
url = "https://grafana.com/api/dashboards/1860/revisions/37/download";
|
||||
sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this
|
||||
"node-exporter-full" = {
|
||||
name = "Node Exporter Full";
|
||||
id = 12486;
|
||||
revision = 2;
|
||||
# url = "https://grafana.com/api/dashboards/1860/revisions/37/download";
|
||||
sha256 = "sha256-1DE1aaanRHHeCOMWDGdOS1wBXxOF84UXAjJzT5Ek6mM=";
|
||||
|
||||
url = "https://grafana.com/api/dashboards/12486/revisions/2/download";
|
||||
};
|
||||
"prometheus-stats" = pkgs.fetchurl {
|
||||
"prometheus-2-0-stats" = {
|
||||
name = "Prometheus 2.0 Stats";
|
||||
id = 2;
|
||||
revision = 2;
|
||||
url = "https://grafana.com/api/dashboards/2/revisions/2/download";
|
||||
sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this
|
||||
sha256 = "sha256-Ydk4LPwfX4qJN8tiWPLWQdtAqzj8CKi6HYsuE+kWcXw=";
|
||||
};
|
||||
};
|
||||
|
||||
# Grafana provisioning configuration
|
||||
provisioningConfig = {
|
||||
# Data sources
|
||||
datasources =
|
||||
[
|
||||
{
|
||||
name = "Prometheus";
|
||||
type = "prometheus";
|
||||
access = "proxy";
|
||||
url = cfg.datasources.prometheus.url;
|
||||
isDefault = true;
|
||||
editable = false;
|
||||
jsonData = {
|
||||
timeInterval = "5s";
|
||||
queryTimeout = "60s";
|
||||
httpMethod = "POST";
|
||||
};
|
||||
}
|
||||
]
|
||||
++ cfg.datasources.extra;
|
||||
# Function to fetch a dashboard from Grafana.com
|
||||
fetchGrafanaDashboard = name: config:
|
||||
pkgs.fetchurl {
|
||||
inherit (config) url sha256;
|
||||
name = "${name}-dashboard.json";
|
||||
};
|
||||
|
||||
# Dashboard providers
|
||||
dashboards = [
|
||||
{
|
||||
name = "homelab";
|
||||
type = "file";
|
||||
disableDeletion = false;
|
||||
updateIntervalSeconds = 10;
|
||||
allowUiUpdates = true;
|
||||
options = {
|
||||
path = "/var/lib/grafana/dashboards";
|
||||
# Git repository management for custom dashboards
|
||||
gitDashboardsRepo = mkIf (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") (
|
||||
pkgs.fetchgit {
|
||||
url = cfg.dashboards.git.url;
|
||||
rev = cfg.dashboards.git.rev;
|
||||
sha256 = cfg.dashboards.git.sha256;
|
||||
}
|
||||
);
|
||||
|
||||
# Dashboard provisioning configuration
|
||||
provisionDashboard = name: source: {
|
||||
"grafana-dashboards/${name}.json" = {
|
||||
inherit source;
|
||||
user = "grafana";
|
||||
group = "grafana";
|
||||
mode = "0644";
|
||||
};
|
||||
};
|
||||
|
||||
# Generate dashboard files from various sources
|
||||
dashboardFiles =
|
||||
# Default community dashboards
|
||||
(foldl' (
|
||||
acc: name:
|
||||
acc // (provisionDashboard name (fetchGrafanaDashboard name defaultDashboards.${name}))
|
||||
) {} (attrNames (filterAttrs (n: v: cfg.dashboards.defaults.${n}.enable) cfg.dashboards.defaults)))
|
||||
# Custom file-based dashboards
|
||||
// (foldl' (
|
||||
acc: dashboard:
|
||||
acc // (provisionDashboard dashboard.name dashboard.source)
|
||||
) {}
|
||||
cfg.dashboards.files)
|
||||
# Git-synced dashboards
|
||||
// (optionalAttrs (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") (
|
||||
let
|
||||
gitDashboards =
|
||||
if pathExists "${gitDashboardsRepo}/${cfg.dashboards.git.path}"
|
||||
then builtins.readDir "${gitDashboardsRepo}/${cfg.dashboards.git.path}"
|
||||
else {};
|
||||
in
|
||||
mapAttrs' (
|
||||
filename: type: let
|
||||
name = removeSuffix ".json" filename;
|
||||
source = "${gitDashboardsRepo}/${cfg.dashboards.git.path}/${filename}";
|
||||
in
|
||||
nameValuePair "grafana-dashboards/${name}.json" {
|
||||
inherit source;
|
||||
user = "grafana";
|
||||
group = "grafana";
|
||||
mode = "0644";
|
||||
}
|
||||
) (filterAttrs (name: type: type == "regular" && hasSuffix ".json" name) gitDashboards)
|
||||
));
|
||||
|
||||
# Service-specific options beyond the standard interface
|
||||
grafanaServiceOptions = {
|
||||
# Authentication settings
|
||||
auth = {
|
||||
admin = {
|
||||
user = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin username";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Notification channels
|
||||
notifiers = cfg.notifications;
|
||||
};
|
||||
in {
|
||||
options.homelab.services.grafana = {
|
||||
enable = mkEnableOption "Grafana dashboard service";
|
||||
passwordFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Path to admin password file";
|
||||
};
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 3000;
|
||||
description = "Port for Grafana web interface";
|
||||
};
|
||||
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether to open firewall ports";
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/grafana";
|
||||
description = "Directory to store Grafana data";
|
||||
};
|
||||
|
||||
domain = mkOption {
|
||||
type = types.str;
|
||||
default = "grafana.${homelabCfg.externalDomain}";
|
||||
description = "Domain for Grafana";
|
||||
};
|
||||
|
||||
rootUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "https://grafana.${homelabCfg.externalDomain}";
|
||||
description = "Root URL for Grafana";
|
||||
};
|
||||
|
||||
admin = {
|
||||
user = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin username";
|
||||
email = mkOption {
|
||||
type = types.str;
|
||||
default = "admin@${homelabCfg.externalDomain}";
|
||||
description = "Admin email address";
|
||||
};
|
||||
};
|
||||
|
||||
password = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin password (change this!)";
|
||||
disableLoginForm = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Disable the login form";
|
||||
};
|
||||
|
||||
email = mkOption {
|
||||
type = types.str;
|
||||
default = "admin@${homelabCfg.externalDomain}";
|
||||
description = "Admin email";
|
||||
oauthAutoLogin = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable OAuth auto-login";
|
||||
};
|
||||
|
||||
anonymousAccess = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable anonymous access";
|
||||
};
|
||||
|
||||
orgName = mkOption {
|
||||
type = types.str;
|
||||
default = "Homelab";
|
||||
description = "Organization name for anonymous users";
|
||||
};
|
||||
|
||||
orgRole = mkOption {
|
||||
type = types.enum ["Viewer" "Editor" "Admin"];
|
||||
default = "Viewer";
|
||||
description = "Role for anonymous users";
|
||||
};
|
||||
};
|
||||
|
||||
genericOauth = {
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable generic OAuth";
|
||||
};
|
||||
|
||||
configFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Path to OAuth configuration file";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Enhanced datasource configuration
|
||||
datasources = {
|
||||
prometheus = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Prometheus datasource";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://localhost:9090";
|
||||
default = "http://127.0.0.1:9090";
|
||||
description = "Prometheus URL";
|
||||
};
|
||||
|
||||
uid = mkOption {
|
||||
type = types.str;
|
||||
default = "prometheus";
|
||||
description = "Unique identifier for Prometheus datasource";
|
||||
};
|
||||
|
||||
scrapeInterval = mkOption {
|
||||
type = types.str;
|
||||
default = "15s";
|
||||
description = "Default scrape interval for Prometheus";
|
||||
};
|
||||
|
||||
manageAlerts = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Manage alerts in Grafana";
|
||||
};
|
||||
|
||||
exemplarTraceIdDestinations = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Exemplar trace ID destinations";
|
||||
};
|
||||
};
|
||||
|
||||
loki = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable Loki datasource";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://127.0.0.1:3100";
|
||||
description = "Loki URL";
|
||||
};
|
||||
|
||||
uid = mkOption {
|
||||
type = types.str;
|
||||
default = "loki";
|
||||
description = "Unique identifier for Loki datasource";
|
||||
};
|
||||
|
||||
maxLines = mkOption {
|
||||
type = types.int;
|
||||
default = 1000;
|
||||
description = "Maximum lines to return from Loki";
|
||||
};
|
||||
|
||||
derivedFields = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Derived fields configuration for Loki";
|
||||
};
|
||||
};
|
||||
|
||||
influxdb = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable InfluxDB datasource";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://127.0.0.1:8086";
|
||||
description = "InfluxDB URL";
|
||||
};
|
||||
|
||||
database = mkOption {
|
||||
type = types.str;
|
||||
default = "homelab";
|
||||
description = "InfluxDB database name";
|
||||
};
|
||||
|
||||
tokenFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Path to InfluxDB token file";
|
||||
};
|
||||
|
||||
uid = mkOption {
|
||||
type = types.str;
|
||||
default = "influxdb";
|
||||
description = "Unique identifier for InfluxDB datasource";
|
||||
};
|
||||
|
||||
version = mkOption {
|
||||
type = types.enum ["1.x" "2.x"];
|
||||
default = "2.x";
|
||||
description = "InfluxDB version";
|
||||
};
|
||||
|
||||
organization = mkOption {
|
||||
type = types.str;
|
||||
default = "homelab";
|
||||
description = "InfluxDB organization (for v2.x)";
|
||||
};
|
||||
|
||||
bucket = mkOption {
|
||||
type = types.str;
|
||||
default = "homelab";
|
||||
description = "InfluxDB bucket (for v2.x)";
|
||||
};
|
||||
};
|
||||
|
||||
extra = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional data sources";
|
||||
example = literalExpression ''
|
||||
[
|
||||
{
|
||||
name = "Loki";
|
||||
type = "loki";
|
||||
url = "http://localhost:3100";
|
||||
}
|
||||
]
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
notifications = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Notification channels configuration";
|
||||
example = literalExpression ''
|
||||
[
|
||||
{
|
||||
name = "discord-webhook";
|
||||
type = "discord";
|
||||
settings = {
|
||||
url = "https://discord.com/api/webhooks/...";
|
||||
username = "Grafana";
|
||||
# Enhanced dashboard configuration
|
||||
dashboards = {
|
||||
# Default community dashboards
|
||||
defaults = mkOption {
|
||||
type = types.attrsOf (types.submodule {
|
||||
options = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable this default dashboard";
|
||||
};
|
||||
};
|
||||
});
|
||||
default = mapAttrs (name: config: {enable = false;}) defaultDashboards;
|
||||
description = "Enable default community dashboards";
|
||||
example = literalExpression ''
|
||||
{
|
||||
"node-exporter-full".enable = true;
|
||||
"prometheus-2-0-stats".enable = true;
|
||||
}
|
||||
]
|
||||
'';
|
||||
'';
|
||||
};
|
||||
|
||||
# File-based dashboards
|
||||
files = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
description = "Dashboard name (without .json extension)";
|
||||
};
|
||||
source = mkOption {
|
||||
type = types.path;
|
||||
description = "Path to dashboard JSON file";
|
||||
};
|
||||
};
|
||||
});
|
||||
default = [];
|
||||
description = "Dashboard files to provision";
|
||||
};
|
||||
|
||||
# Git-based dashboard sync
|
||||
git = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable git-based dashboard synchronization";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = "Git repository URL for dashboards";
|
||||
};
|
||||
|
||||
rev = mkOption {
|
||||
type = types.str;
|
||||
default = "HEAD";
|
||||
description = "Git revision to use";
|
||||
};
|
||||
|
||||
sha256 = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = "SHA256 hash of the git repository content";
|
||||
};
|
||||
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = ".";
|
||||
description = "Path within the git repository containing dashboards";
|
||||
};
|
||||
|
||||
updateInterval = mkOption {
|
||||
type = types.str;
|
||||
default = "1h";
|
||||
description = "How often to check for dashboard updates";
|
||||
};
|
||||
};
|
||||
|
||||
path = mkOption {
|
||||
type = types.str;
|
||||
default = "/etc/grafana-dashboards";
|
||||
description = "Path where dashboard files are stored";
|
||||
};
|
||||
};
|
||||
|
||||
# Plugin configuration
|
||||
plugins = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [
|
||||
"grafana-piechart-panel"
|
||||
"grafana-worldmap-panel"
|
||||
"grafana-clock-panel"
|
||||
"grafana-simple-json-datasource"
|
||||
];
|
||||
type = types.listOf types.package;
|
||||
default = [];
|
||||
description = "Grafana plugins to install";
|
||||
};
|
||||
|
||||
# SMTP configuration
|
||||
smtp = {
|
||||
enabled = mkOption {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable SMTP for email notifications";
|
||||
|
|
@ -185,10 +405,10 @@ in {
|
|||
description = "SMTP username";
|
||||
};
|
||||
|
||||
password = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = "SMTP password";
|
||||
passwordFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Path to SMTP password file";
|
||||
};
|
||||
|
||||
fromAddress = mkOption {
|
||||
|
|
@ -202,9 +422,22 @@ in {
|
|||
default = "Homelab Grafana";
|
||||
description = "From name";
|
||||
};
|
||||
|
||||
skipVerify = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Skip SSL certificate verification";
|
||||
};
|
||||
};
|
||||
|
||||
# Security settings
|
||||
security = {
|
||||
secretKeyFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Path to secret key file for signing";
|
||||
};
|
||||
|
||||
allowEmbedding = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
|
|
@ -217,200 +450,279 @@ in {
|
|||
description = "Set secure flag on cookies";
|
||||
};
|
||||
|
||||
secretKey = mkOption {
|
||||
type = types.str;
|
||||
default = "change-this-secret-key";
|
||||
description = "Secret key for signing (change this!)";
|
||||
contentSecurityPolicy = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Content Security Policy header";
|
||||
};
|
||||
|
||||
strictTransportSecurity = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Strict Transport Security header";
|
||||
};
|
||||
};
|
||||
|
||||
auth = {
|
||||
anonymousEnabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable anonymous access";
|
||||
};
|
||||
|
||||
disableLoginForm = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Disable login form";
|
||||
};
|
||||
# Data directory
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/grafana";
|
||||
description = "Directory to store Grafana data";
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
# Extra Grafana settings
|
||||
extraSettings = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional Grafana configuration";
|
||||
description = "Additional Grafana settings";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings =
|
||||
recursiveUpdate {
|
||||
server = {
|
||||
http_addr = "0.0.0.0";
|
||||
http_port = cfg.port;
|
||||
domain = cfg.domain;
|
||||
root_url = cfg.rootUrl;
|
||||
serve_from_sub_path = false;
|
||||
};
|
||||
|
||||
database = {
|
||||
type = "sqlite3";
|
||||
path = "${cfg.dataDir}/grafana.db";
|
||||
};
|
||||
|
||||
security = {
|
||||
admin_user = cfg.admin.user;
|
||||
admin_password = cfg.admin.password;
|
||||
admin_email = cfg.admin.email;
|
||||
allow_embedding = cfg.security.allowEmbedding;
|
||||
cookie_secure = cfg.security.cookieSecure;
|
||||
secret_key = cfg.security.secretKey;
|
||||
};
|
||||
|
||||
users = {
|
||||
allow_sign_up = false;
|
||||
auto_assign_org = true;
|
||||
auto_assign_org_role = "Viewer";
|
||||
};
|
||||
|
||||
auth.anonymous = {
|
||||
enabled = cfg.auth.anonymousEnabled;
|
||||
org_name = "Homelab";
|
||||
org_role = "Viewer";
|
||||
};
|
||||
|
||||
auth.basic = {
|
||||
enabled = !cfg.auth.disableLoginForm;
|
||||
};
|
||||
|
||||
smtp = mkIf cfg.smtp.enabled {
|
||||
enabled = true;
|
||||
host = cfg.smtp.host;
|
||||
user = cfg.smtp.user;
|
||||
password = cfg.smtp.password;
|
||||
from_address = cfg.smtp.fromAddress;
|
||||
from_name = cfg.smtp.fromName;
|
||||
};
|
||||
|
||||
analytics = {
|
||||
reporting_enabled = false;
|
||||
check_for_updates = false;
|
||||
};
|
||||
|
||||
log = {
|
||||
mode = "console";
|
||||
level = "info";
|
||||
};
|
||||
|
||||
paths = {
|
||||
data = cfg.dataDir;
|
||||
logs = "${cfg.dataDir}/log";
|
||||
plugins = "${cfg.dataDir}/plugins";
|
||||
provisioning = "/etc/grafana/provisioning";
|
||||
};
|
||||
}
|
||||
cfg.extraConfig;
|
||||
|
||||
dataDir = cfg.dataDir;
|
||||
# Enhanced datasource configuration
|
||||
buildDatasources = let
|
||||
# Build prometheus datasource
|
||||
prometheusDatasource = optional cfg.datasources.prometheus.enable {
|
||||
uid = cfg.datasources.prometheus.uid;
|
||||
name = "Prometheus";
|
||||
type = "prometheus";
|
||||
url = cfg.datasources.prometheus.url;
|
||||
access = "proxy";
|
||||
isDefault = true;
|
||||
editable = false;
|
||||
jsonData = {
|
||||
timeInterval = cfg.datasources.prometheus.scrapeInterval;
|
||||
queryTimeout = "60s";
|
||||
httpMethod = "POST";
|
||||
manageAlerts = cfg.datasources.prometheus.manageAlerts;
|
||||
exemplarTraceIdDestinations = cfg.datasources.prometheus.exemplarTraceIdDestinations;
|
||||
};
|
||||
};
|
||||
|
||||
# Install plugins
|
||||
systemd.services.grafana.preStart = mkIf (cfg.plugins != []) (
|
||||
concatStringsSep "\n" (map (
|
||||
plugin: "${pkgs.grafana}/bin/grafana-cli --pluginsDir ${cfg.dataDir}/plugins plugins install ${plugin} || true"
|
||||
)
|
||||
cfg.plugins)
|
||||
);
|
||||
# Build loki datasource
|
||||
lokiDatasource = optional cfg.datasources.loki.enable {
|
||||
uid = cfg.datasources.loki.uid;
|
||||
name = "Loki";
|
||||
type = "loki";
|
||||
url = cfg.datasources.loki.url;
|
||||
access = "proxy";
|
||||
editable = false;
|
||||
jsonData = {
|
||||
maxLines = cfg.datasources.loki.maxLines;
|
||||
derivedFields = cfg.datasources.loki.derivedFields;
|
||||
};
|
||||
};
|
||||
|
||||
# Provisioning configuration
|
||||
environment.etc =
|
||||
{
|
||||
"grafana/provisioning/datasources/datasources.yaml".text = builtins.toJSON {
|
||||
apiVersion = 1;
|
||||
datasources = provisioningConfig.datasources;
|
||||
};
|
||||
# Build influxdb datasource
|
||||
influxdbDatasource = optional cfg.datasources.influxdb.enable {
|
||||
uid = cfg.datasources.influxdb.uid;
|
||||
name = "InfluxDB";
|
||||
type = "influxdb";
|
||||
url = cfg.datasources.influxdb.url;
|
||||
access = "proxy";
|
||||
database = cfg.datasources.influxdb.database;
|
||||
editable = false;
|
||||
jsonData = {
|
||||
dbName = cfg.datasources.influxdb.database;
|
||||
httpHeaderName1 = "Authorization";
|
||||
version = cfg.datasources.influxdb.version;
|
||||
organization = cfg.datasources.influxdb.organization;
|
||||
defaultBucket = cfg.datasources.influxdb.bucket;
|
||||
};
|
||||
secureJsonData = mkIf (cfg.datasources.influxdb.tokenFile != null) {
|
||||
httpHeaderValue1 = "$__file{${cfg.datasources.influxdb.tokenFile}}";
|
||||
};
|
||||
};
|
||||
|
||||
"grafana/provisioning/dashboards/dashboards.yaml".text = builtins.toJSON {
|
||||
apiVersion = 1;
|
||||
providers = provisioningConfig.dashboards;
|
||||
};
|
||||
}
|
||||
// (mkIf (cfg.notifications != []) {
|
||||
"grafana/provisioning/notifiers/notifiers.yaml".text = builtins.toJSON {
|
||||
apiVersion = 1;
|
||||
notifiers = provisioningConfig.notifiers;
|
||||
};
|
||||
});
|
||||
|
||||
# Create dashboard directory
|
||||
systemd.tmpfiles.rules = [
|
||||
"d ${cfg.dataDir}/dashboards 0755 grafana grafana -"
|
||||
# Build extra datasources
|
||||
extraDatasources = cfg.datasources.extra;
|
||||
in
|
||||
prometheusDatasource ++ lokiDatasource ++ influxdbDatasource ++ extraDatasources;
|
||||
in {
|
||||
options.homelab.services.grafana = serviceInterface.mkServiceInterface {
|
||||
serviceName = "grafana";
|
||||
defaultPort = 3000;
|
||||
defaultSubdomain = "grafana";
|
||||
monitoringPath = "/metrics";
|
||||
healthCheckPath = "/api/health";
|
||||
healthCheckConditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].database == ok"
|
||||
"[RESPONSE_TIME] < 2000"
|
||||
];
|
||||
serviceOptions = grafanaServiceOptions;
|
||||
};
|
||||
|
||||
# Open firewall if requested
|
||||
networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [cfg.port];
|
||||
config = serviceInterface.mkServiceConfig {
|
||||
inherit config cfg homelabCfg;
|
||||
serviceName = "grafana";
|
||||
|
||||
# Add to monitoring endpoints
|
||||
homelab.monitoring.metrics = [
|
||||
extraMonitoringLabels = {
|
||||
component = "dashboard";
|
||||
};
|
||||
|
||||
# Additional health checks specific to Grafana
|
||||
customHealthChecks = [];
|
||||
|
||||
serviceConfig = mkMerge [
|
||||
{
|
||||
name = "grafana";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "grafana";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "monitoring";
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
dataDir = cfg.dataDir;
|
||||
# declarativePlugins =
|
||||
# cfg.plugins
|
||||
# ++ (with pkgs.grafanaPlugins; [
|
||||
# grafana-exploretraces-app
|
||||
# grafana-metricsdrilldown-app
|
||||
# grafana-pyroscope-app
|
||||
# grafana-lokiexplore-app
|
||||
# grafana-worldmap-panel
|
||||
# grafana-piechart-panel
|
||||
# ]);
|
||||
|
||||
settings =
|
||||
recursiveUpdate {
|
||||
server = {
|
||||
http_port = cfg.port;
|
||||
http_addr = "0.0.0.0";
|
||||
domain = "${cfg.proxy.subdomain}.${homelabCfg.externalDomain}";
|
||||
root_url = "https://${cfg.proxy.subdomain}.${homelabCfg.externalDomain}";
|
||||
serve_from_sub_path = false;
|
||||
};
|
||||
|
||||
database = {
|
||||
type = "sqlite3";
|
||||
path = "${cfg.dataDir}/grafana.db";
|
||||
};
|
||||
|
||||
security =
|
||||
{
|
||||
admin_user = cfg.auth.admin.user;
|
||||
admin_email = cfg.auth.admin.email;
|
||||
# allow_embedding = cfg.security.allowEmbedding;
|
||||
# cookie_secure = cfg.security.cookieSecure;
|
||||
# content_security_policy = cfg.security.contentSecurityPolicy;
|
||||
# strict_transport_security = cfg.security.strictTransportSecurity;
|
||||
}
|
||||
// (optionalAttrs (cfg.auth.admin.passwordFile != null) {
|
||||
admin_password = "$__file{${cfg.auth.admin.passwordFile}}";
|
||||
})
|
||||
// (optionalAttrs (cfg.security.secretKeyFile != null) {
|
||||
secret_key = "$__file{${cfg.security.secretKeyFile}}";
|
||||
});
|
||||
|
||||
users = {
|
||||
allow_sign_up = false;
|
||||
auto_assign_org = true;
|
||||
auto_assign_org_role = "Viewer";
|
||||
};
|
||||
|
||||
"auth.anonymous" = {
|
||||
enabled = cfg.auth.anonymousAccess.enable;
|
||||
org_name = cfg.auth.anonymousAccess.orgName;
|
||||
org_role = cfg.auth.anonymousAccess.orgRole;
|
||||
};
|
||||
|
||||
"auth.basic" = {
|
||||
enabled = !cfg.auth.disableLoginForm;
|
||||
};
|
||||
|
||||
"auth.generic_oauth" =
|
||||
mkIf cfg.auth.genericOauth.enabled {
|
||||
enabled = true;
|
||||
}
|
||||
// (optionalAttrs (cfg.auth.genericOauth.configFile != null) {
|
||||
client_id = "$__file{${cfg.auth.genericOauth.configFile}}";
|
||||
});
|
||||
|
||||
smtp = mkIf cfg.smtp.enable ({
|
||||
enabled = true;
|
||||
host = cfg.smtp.host;
|
||||
user = cfg.smtp.user;
|
||||
from_address = cfg.smtp.fromAddress;
|
||||
from_name = cfg.smtp.fromName;
|
||||
skip_verify = cfg.smtp.skipVerify;
|
||||
}
|
||||
// (optionalAttrs (cfg.smtp.passwordFile != null) {
|
||||
password = "$__file{${cfg.smtp.passwordFile}}";
|
||||
}));
|
||||
|
||||
analytics = {
|
||||
reporting_enabled = false;
|
||||
check_for_updates = false;
|
||||
};
|
||||
news.news_feed_enabled = false;
|
||||
|
||||
feature_toggles = {
|
||||
provisioning = true;
|
||||
kubernetesDashboards = true;
|
||||
};
|
||||
# paths = {
|
||||
# plugins = "${cfg.dataDir}/plugins";
|
||||
# provisioning = "/etc/grafana/provisioning";
|
||||
# };
|
||||
}
|
||||
cfg.extraSettings;
|
||||
|
||||
provision = {
|
||||
enable = true;
|
||||
|
||||
datasources.settings.datasources = buildDatasources;
|
||||
|
||||
dashboards.settings.providers = [
|
||||
{
|
||||
name = "homelab-dashboards";
|
||||
type = "file";
|
||||
disableDeletion = false;
|
||||
updateIntervalSeconds = 10;
|
||||
allowUiUpdates = true;
|
||||
options = {
|
||||
path = cfg.dashboards.path;
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add health checks
|
||||
homelab.monitoring.healthChecks = [
|
||||
{
|
||||
name = "grafana-web-interface";
|
||||
port = cfg.port;
|
||||
path = "/api/health";
|
||||
interval = "30s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].database == ok"
|
||||
"[RESPONSE_TIME] < 2000"
|
||||
# Provision dashboard files
|
||||
environment.etc = dashboardFiles;
|
||||
|
||||
# Ensure dashboard directory exists
|
||||
systemd.tmpfiles.rules = [
|
||||
"d ${cfg.dashboards.path} 0755 grafana grafana -"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "web-interface";
|
||||
};
|
||||
}
|
||||
{
|
||||
name = "grafana-login-page";
|
||||
port = cfg.port;
|
||||
path = "/login";
|
||||
interval = "60s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[RESPONSE_TIME] < 3000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "login";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry
|
||||
homelab.reverseProxy.entries = [
|
||||
{
|
||||
subdomain = "grafana";
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
}
|
||||
# Git dashboard sync service (if enabled)
|
||||
(mkIf (cfg.dashboards.git.enable && cfg.dashboards.git.url != "") {
|
||||
systemd.services.grafana-dashboard-sync = {
|
||||
description = "Sync Grafana dashboards from git";
|
||||
after = ["grafana.service"];
|
||||
wantedBy = ["multi-user.target"];
|
||||
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
User = "grafana";
|
||||
Group = "grafana";
|
||||
};
|
||||
|
||||
script = ''
|
||||
echo "Syncing dashboards from git repository..."
|
||||
# Dashboard files are already provisioned via Nix
|
||||
# This service can be extended for runtime updates if needed
|
||||
systemctl reload grafana.service
|
||||
'';
|
||||
};
|
||||
|
||||
systemd.timers.grafana-dashboard-sync = {
|
||||
description = "Timer for Grafana dashboard sync";
|
||||
wantedBy = ["timers.target"];
|
||||
|
||||
timerConfig = {
|
||||
OnCalendar = cfg.dashboards.git.updateInterval;
|
||||
Persistent = true;
|
||||
};
|
||||
};
|
||||
})
|
||||
];
|
||||
};
|
||||
}
|
||||
|
|
|
|||
198
modules/homelab/services/monitoring/grafana_1.nix
Normal file
198
modules/homelab/services/monitoring/grafana_1.nix
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
# Example showing how to create a service using the standard interface
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
serviceInterface = import ../../lib/service-interface.nix {inherit lib;};
|
||||
|
||||
cfg = config.homelab.services.grafana;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Service-specific options beyond the standard interface
|
||||
grafanaServiceOptions = {
|
||||
admin = {
|
||||
user = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin username";
|
||||
};
|
||||
|
||||
passwordFile = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Path to the Admin password file";
|
||||
};
|
||||
};
|
||||
|
||||
datasources = {
|
||||
prometheus = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable Prometheus datasource";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://127.0.0.1:9090";
|
||||
description = "Prometheus URL";
|
||||
};
|
||||
|
||||
uid = mkOption {
|
||||
type = types.str;
|
||||
default = "prometheus";
|
||||
description = "Unique identifier for Prometheus datasource";
|
||||
};
|
||||
};
|
||||
|
||||
loki = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable Loki datasource";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://127.0.0.1:3100";
|
||||
description = "Loki URL";
|
||||
};
|
||||
|
||||
uid = mkOption {
|
||||
type = types.str;
|
||||
default = "loki";
|
||||
description = "Unique identifier for Loki datasource";
|
||||
};
|
||||
};
|
||||
|
||||
influxdb = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable InfluxDB datasource";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://127.0.0.1:8086";
|
||||
description = "InfluxDB URL";
|
||||
};
|
||||
|
||||
database = mkOption {
|
||||
type = types.str;
|
||||
default = "homelab";
|
||||
description = "InfluxDB database name";
|
||||
};
|
||||
|
||||
tokenFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Path to InfluxDB token file";
|
||||
};
|
||||
|
||||
uid = mkOption {
|
||||
type = types.str;
|
||||
default = "influxdb";
|
||||
description = "Unique identifier for InfluxDB datasource";
|
||||
};
|
||||
};
|
||||
|
||||
extra = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional data sources";
|
||||
};
|
||||
};
|
||||
|
||||
plugins = mkOption {
|
||||
type = types.listOf types.package;
|
||||
default = [];
|
||||
description = "Grafana plugins to install";
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.homelab.services.grafana = serviceInterface.mkServiceInterface {
|
||||
serviceName = "grafana";
|
||||
defaultPort = 3000;
|
||||
defaultSubdomain = "grafana";
|
||||
monitoringPath = "/metrics";
|
||||
healthCheckPath = "/api/health";
|
||||
healthCheckConditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].database == ok"
|
||||
"[RESPONSE_TIME] < 2000"
|
||||
];
|
||||
serviceOptions = grafanaServiceOptions;
|
||||
};
|
||||
|
||||
config = serviceInterface.mkServiceConfig {
|
||||
inherit config cfg homelabCfg;
|
||||
serviceName = "grafana";
|
||||
|
||||
extraMonitoringLabels = {
|
||||
component = "dashboard";
|
||||
};
|
||||
|
||||
serviceConfig = {
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
declarativePlugins = cfg.plugins;
|
||||
|
||||
settings = {
|
||||
server = {
|
||||
http_port = cfg.port;
|
||||
http_addr = "0.0.0.0";
|
||||
root_url = "https://${cfg.proxy.subdomain}.${homelabCfg.externalDomain}";
|
||||
};
|
||||
|
||||
security = {
|
||||
admin_user = cfg.admin.user;
|
||||
admin_password = "$__file{${cfg.admin.passwordFile}}";
|
||||
};
|
||||
};
|
||||
|
||||
provision = {
|
||||
enable = true;
|
||||
datasources.settings = {
|
||||
datasources = let
|
||||
# Build datasource list
|
||||
datasources =
|
||||
[]
|
||||
++ optional cfg.datasources.prometheus.enable {
|
||||
uid = cfg.datasources.prometheus.uid;
|
||||
name = "Prometheus";
|
||||
type = "prometheus";
|
||||
url = cfg.datasources.prometheus.url;
|
||||
}
|
||||
++ optional cfg.datasources.loki.enable {
|
||||
uid = cfg.datasources.loki.uid;
|
||||
name = "Loki";
|
||||
type = "loki";
|
||||
url = cfg.datasources.loki.url;
|
||||
}
|
||||
++ optional cfg.datasources.influxdb.enable {
|
||||
uid = cfg.datasources.influxdb.uid;
|
||||
name = "InfluxDB";
|
||||
type = "influxdb";
|
||||
url = cfg.datasources.influxdb.url;
|
||||
access = "proxy";
|
||||
jsonData = {
|
||||
dbName = cfg.datasources.influxdb.database;
|
||||
httpHeaderName1 = "Authorization";
|
||||
};
|
||||
secureJsonData = mkIf (cfg.datasources.influxdb.tokenPath != null) {
|
||||
httpHeaderValue1 = "$__file{${cfg.datasources.influxdb.tokenPath}}";
|
||||
};
|
||||
}
|
||||
++ cfg.datasources.extra;
|
||||
in
|
||||
datasources;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
416
modules/homelab/services/monitoring/grafana_gg.nix
Normal file
416
modules/homelab/services/monitoring/grafana_gg.nix
Normal file
|
|
@ -0,0 +1,416 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.grafana;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Default dashboards for homelab monitoring
|
||||
defaultDashboards = {
|
||||
"node-exporter" = pkgs.fetchurl {
|
||||
url = "https://grafana.com/api/dashboards/1860/revisions/37/download";
|
||||
sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this
|
||||
};
|
||||
"prometheus-stats" = pkgs.fetchurl {
|
||||
url = "https://grafana.com/api/dashboards/2/revisions/2/download";
|
||||
sha256 = "sha256-0000000000000000000000000000000000000000000="; # You'll need to update this
|
||||
};
|
||||
};
|
||||
|
||||
# Grafana provisioning configuration
|
||||
provisioningConfig = {
|
||||
# Data sources
|
||||
datasources =
|
||||
[
|
||||
{
|
||||
name = "Prometheus";
|
||||
type = "prometheus";
|
||||
access = "proxy";
|
||||
url = cfg.datasources.prometheus.url;
|
||||
isDefault = true;
|
||||
editable = false;
|
||||
jsonData = {
|
||||
timeInterval = "5s";
|
||||
queryTimeout = "60s";
|
||||
httpMethod = "POST";
|
||||
};
|
||||
}
|
||||
]
|
||||
++ cfg.datasources.extra;
|
||||
|
||||
# Dashboard providers
|
||||
dashboards = [
|
||||
{
|
||||
name = "homelab";
|
||||
type = "file";
|
||||
disableDeletion = false;
|
||||
updateIntervalSeconds = 10;
|
||||
allowUiUpdates = true;
|
||||
options = {
|
||||
path = "/var/lib/grafana/dashboards";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Notification channels
|
||||
notifiers = cfg.notifications;
|
||||
};
|
||||
in {
|
||||
options.homelab.services.grafana = {
|
||||
enable = mkEnableOption "Grafana dashboard service";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 3000;
|
||||
description = "Port for Grafana web interface";
|
||||
};
|
||||
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Whether to open firewall ports";
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/grafana";
|
||||
description = "Directory to store Grafana data";
|
||||
};
|
||||
|
||||
domain = mkOption {
|
||||
type = types.str;
|
||||
default = "grafana.${homelabCfg.externalDomain}";
|
||||
description = "Domain for Grafana";
|
||||
};
|
||||
|
||||
rootUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "https://grafana.${homelabCfg.externalDomain}";
|
||||
description = "Root URL for Grafana";
|
||||
};
|
||||
|
||||
admin = {
|
||||
user = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin username";
|
||||
};
|
||||
|
||||
password = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin password (change this!)";
|
||||
};
|
||||
|
||||
email = mkOption {
|
||||
type = types.str;
|
||||
default = "admin@${homelabCfg.externalDomain}";
|
||||
description = "Admin email";
|
||||
};
|
||||
};
|
||||
|
||||
datasources = {
|
||||
prometheus = {
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://localhost:9090";
|
||||
description = "Prometheus URL";
|
||||
};
|
||||
};
|
||||
|
||||
extra = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional data sources";
|
||||
example = literalExpression ''
|
||||
[
|
||||
{
|
||||
name = "Loki";
|
||||
type = "loki";
|
||||
url = "http://localhost:3100";
|
||||
}
|
||||
]
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
notifications = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Notification channels configuration";
|
||||
example = literalExpression ''
|
||||
[
|
||||
{
|
||||
name = "discord-webhook";
|
||||
type = "discord";
|
||||
settings = {
|
||||
url = "https://discord.com/api/webhooks/...";
|
||||
username = "Grafana";
|
||||
};
|
||||
}
|
||||
]
|
||||
'';
|
||||
};
|
||||
|
||||
plugins = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [
|
||||
"grafana-piechart-panel"
|
||||
"grafana-worldmap-panel"
|
||||
"grafana-clock-panel"
|
||||
"grafana-simple-json-datasource"
|
||||
];
|
||||
description = "Grafana plugins to install";
|
||||
};
|
||||
|
||||
smtp = {
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable SMTP for email notifications";
|
||||
};
|
||||
|
||||
host = mkOption {
|
||||
type = types.str;
|
||||
default = "localhost:587";
|
||||
description = "SMTP server host:port";
|
||||
};
|
||||
|
||||
user = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = "SMTP username";
|
||||
};
|
||||
|
||||
password = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = "SMTP password";
|
||||
};
|
||||
|
||||
fromAddress = mkOption {
|
||||
type = types.str;
|
||||
default = "grafana@${homelabCfg.externalDomain}";
|
||||
description = "From email address";
|
||||
};
|
||||
|
||||
fromName = mkOption {
|
||||
type = types.str;
|
||||
default = "Homelab Grafana";
|
||||
description = "From name";
|
||||
};
|
||||
};
|
||||
|
||||
security = {
|
||||
allowEmbedding = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Allow embedding Grafana in iframes";
|
||||
};
|
||||
|
||||
cookieSecure = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Set secure flag on cookies";
|
||||
};
|
||||
|
||||
secretKey = mkOption {
|
||||
type = types.str;
|
||||
default = "change-this-secret-key";
|
||||
description = "Secret key for signing (change this!)";
|
||||
};
|
||||
};
|
||||
|
||||
auth = {
|
||||
anonymousEnabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable anonymous access";
|
||||
};
|
||||
|
||||
disableLoginForm = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Disable login form";
|
||||
};
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional Grafana configuration";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
settings =
|
||||
recursiveUpdate {
|
||||
server = {
|
||||
http_addr = "0.0.0.0";
|
||||
http_port = cfg.port;
|
||||
domain = cfg.domain;
|
||||
root_url = cfg.rootUrl;
|
||||
serve_from_sub_path = false;
|
||||
};
|
||||
|
||||
database = {
|
||||
type = "sqlite3";
|
||||
path = "${cfg.dataDir}/grafana.db";
|
||||
};
|
||||
|
||||
security = {
|
||||
admin_user = cfg.admin.user;
|
||||
admin_password = cfg.admin.password;
|
||||
admin_email = cfg.admin.email;
|
||||
allow_embedding = cfg.security.allowEmbedding;
|
||||
cookie_secure = cfg.security.cookieSecure;
|
||||
secret_key = cfg.security.secretKey;
|
||||
};
|
||||
|
||||
users = {
|
||||
allow_sign_up = false;
|
||||
auto_assign_org = true;
|
||||
auto_assign_org_role = "Viewer";
|
||||
};
|
||||
|
||||
auth.anonymous = {
|
||||
enabled = cfg.auth.anonymousEnabled;
|
||||
org_name = "Homelab";
|
||||
org_role = "Viewer";
|
||||
};
|
||||
|
||||
auth.basic = {
|
||||
enabled = !cfg.auth.disableLoginForm;
|
||||
};
|
||||
|
||||
smtp = mkIf cfg.smtp.enabled {
|
||||
enabled = true;
|
||||
host = cfg.smtp.host;
|
||||
user = cfg.smtp.user;
|
||||
password = cfg.smtp.password;
|
||||
from_address = cfg.smtp.fromAddress;
|
||||
from_name = cfg.smtp.fromName;
|
||||
};
|
||||
|
||||
analytics = {
|
||||
reporting_enabled = false;
|
||||
check_for_updates = false;
|
||||
};
|
||||
|
||||
log = {
|
||||
mode = "console";
|
||||
level = "info";
|
||||
};
|
||||
|
||||
paths = {
|
||||
data = cfg.dataDir;
|
||||
logs = "${cfg.dataDir}/log";
|
||||
plugins = "${cfg.dataDir}/plugins";
|
||||
provisioning = "/etc/grafana/provisioning";
|
||||
};
|
||||
}
|
||||
cfg.extraConfig;
|
||||
|
||||
dataDir = cfg.dataDir;
|
||||
};
|
||||
|
||||
# Install plugins
|
||||
systemd.services.grafana.preStart = mkIf (cfg.plugins != []) (
|
||||
concatStringsSep "\n" (map (
|
||||
plugin: "${pkgs.grafana}/bin/grafana-cli --pluginsDir ${cfg.dataDir}/plugins plugins install ${plugin} || true"
|
||||
)
|
||||
cfg.plugins)
|
||||
);
|
||||
|
||||
# Provisioning configuration
|
||||
environment.etc =
|
||||
{
|
||||
"grafana/provisioning/datasources/datasources.yaml".text = builtins.toJSON {
|
||||
apiVersion = 1;
|
||||
datasources = provisioningConfig.datasources;
|
||||
};
|
||||
|
||||
"grafana/provisioning/dashboards/dashboards.yaml".text = builtins.toJSON {
|
||||
apiVersion = 1;
|
||||
providers = provisioningConfig.dashboards;
|
||||
};
|
||||
}
|
||||
// (mkIf (cfg.notifications != []) {
|
||||
"grafana/provisioning/notifiers/notifiers.yaml".text = builtins.toJSON {
|
||||
apiVersion = 1;
|
||||
notifiers = provisioningConfig.notifiers;
|
||||
};
|
||||
});
|
||||
|
||||
# Create dashboard directory
|
||||
systemd.tmpfiles.rules = [
|
||||
"d ${cfg.dataDir}/dashboards 0755 grafana grafana -"
|
||||
];
|
||||
|
||||
# Open firewall if requested
|
||||
networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [cfg.port];
|
||||
|
||||
# Add to monitoring endpoints
|
||||
homelab.monitoring.metrics = [
|
||||
{
|
||||
name = "grafana";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "grafana";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "monitoring";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add health checks
|
||||
homelab.monitoring.healthChecks = [
|
||||
{
|
||||
name = "grafana-web-interface";
|
||||
port = cfg.port;
|
||||
path = "/api/health";
|
||||
interval = "30s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[BODY].database == ok"
|
||||
"[RESPONSE_TIME] < 2000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "web-interface";
|
||||
};
|
||||
}
|
||||
{
|
||||
name = "grafana-login-page";
|
||||
port = cfg.port;
|
||||
path = "/login";
|
||||
interval = "60s";
|
||||
conditions = [
|
||||
"[STATUS] == 200"
|
||||
"[RESPONSE_TIME] < 3000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "grafana";
|
||||
component = "login";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry
|
||||
homelab.reverseProxy.entries = [
|
||||
{
|
||||
subdomain = "grafana";
|
||||
host = homelabCfg.hostname;
|
||||
port = cfg.port;
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,399 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
serviceInterface = import ../../lib/service-interface.nix {inherit lib;};
|
||||
|
||||
cfg = config.homelab.services.influxdb;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Service-specific options beyond the standard interface
|
||||
influxdbServiceOptions = {
|
||||
version = mkOption {
|
||||
type = types.enum ["1" "2"];
|
||||
default = "2";
|
||||
description = "InfluxDB version to use";
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/influxdb";
|
||||
description = "Directory to store InfluxDB data";
|
||||
};
|
||||
|
||||
# InfluxDB 2.x options
|
||||
v2 = {
|
||||
org = mkOption {
|
||||
type = types.str;
|
||||
default = "homelab";
|
||||
description = "Initial organization name";
|
||||
};
|
||||
|
||||
bucket = mkOption {
|
||||
type = types.str;
|
||||
default = "homelab";
|
||||
description = "Initial bucket name";
|
||||
};
|
||||
|
||||
username = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Initial admin username";
|
||||
};
|
||||
|
||||
password = mkOption {
|
||||
type = types.str;
|
||||
default = "changeme";
|
||||
description = "Initial admin password";
|
||||
};
|
||||
|
||||
retention = mkOption {
|
||||
type = types.str;
|
||||
default = "30d";
|
||||
description = "Default retention period";
|
||||
};
|
||||
|
||||
tokenFile = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "File containing the admin token";
|
||||
};
|
||||
};
|
||||
|
||||
# InfluxDB 1.x options
|
||||
v1 = {
|
||||
database = mkOption {
|
||||
type = types.str;
|
||||
default = "homelab";
|
||||
description = "Default database name";
|
||||
};
|
||||
|
||||
retention = mkOption {
|
||||
type = types.str;
|
||||
default = "30d";
|
||||
description = "Default retention period";
|
||||
};
|
||||
|
||||
adminUser = mkOption {
|
||||
type = types.str;
|
||||
default = "admin";
|
||||
description = "Admin username";
|
||||
};
|
||||
|
||||
adminPassword = mkOption {
|
||||
type = types.str;
|
||||
default = "changeme";
|
||||
description = "Admin password";
|
||||
};
|
||||
|
||||
httpAuth = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable HTTP authentication";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional InfluxDB configuration";
|
||||
};
|
||||
|
||||
backup = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable automatic backups";
|
||||
};
|
||||
|
||||
schedule = mkOption {
|
||||
type = types.str;
|
||||
default = "daily";
|
||||
description = "Backup schedule";
|
||||
};
|
||||
|
||||
retention = mkOption {
|
||||
type = types.str;
|
||||
default = "7d";
|
||||
description = "Backup retention period";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Generate configuration based on version
|
||||
influxdbConfig =
|
||||
if cfg.version == "2"
|
||||
then
|
||||
recursiveUpdate {
|
||||
bolt-path = "${cfg.dataDir}/influxd.bolt";
|
||||
engine-path = "${cfg.dataDir}/engine";
|
||||
http-bind-address = "0.0.0.0:${toString cfg.port}";
|
||||
reporting-disabled = true;
|
||||
log-level = "info";
|
||||
}
|
||||
cfg.extraConfig
|
||||
else
|
||||
recursiveUpdate {
|
||||
meta = {
|
||||
dir = "${cfg.dataDir}/meta";
|
||||
};
|
||||
data = {
|
||||
dir = "${cfg.dataDir}/data";
|
||||
wal-dir = "${cfg.dataDir}/wal";
|
||||
};
|
||||
http = {
|
||||
bind-address = "0.0.0.0:${toString cfg.port}";
|
||||
auth-enabled = cfg.v1.httpAuth.enable;
|
||||
};
|
||||
logging = {
|
||||
level = "info";
|
||||
};
|
||||
reporting-disabled = true;
|
||||
}
|
||||
cfg.extraConfig;
|
||||
in {
|
||||
options.homelab.services.influxdb = serviceInterface.mkServiceInterface {
|
||||
serviceName = "influxdb";
|
||||
defaultPort = 8086;
|
||||
defaultSubdomain = "influxdb";
|
||||
monitoringPath = "/metrics";
|
||||
healthCheckPath =
|
||||
if cfg.version == "2"
|
||||
then "/health"
|
||||
else "/ping";
|
||||
healthCheckConditions =
|
||||
if cfg.version == "2"
|
||||
then ["[STATUS] == 200" "[BODY].status == pass"]
|
||||
else ["[STATUS] == 204" "[RESPONSE_TIME] < 1000"];
|
||||
serviceOptions = influxdbServiceOptions;
|
||||
};
|
||||
|
||||
config = serviceInterface.mkServiceConfig {
|
||||
inherit config cfg homelabCfg;
|
||||
serviceName = "influxdb";
|
||||
|
||||
extraMonitoringLabels = {
|
||||
component = "timeseries-database";
|
||||
version = cfg.version;
|
||||
};
|
||||
|
||||
customHealthChecks =
|
||||
[
|
||||
{
|
||||
name = "influxdb-query";
|
||||
port = cfg.port;
|
||||
path =
|
||||
if cfg.version == "2"
|
||||
then "/api/v2/query"
|
||||
else "/query";
|
||||
interval = "60s";
|
||||
method = "POST";
|
||||
conditions = [
|
||||
"[STATUS] < 500"
|
||||
"[RESPONSE_TIME] < 3000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "influxdb";
|
||||
component = "query-engine";
|
||||
};
|
||||
}
|
||||
]
|
||||
++ optional (cfg.version == "2") {
|
||||
name = "influxdb-write";
|
||||
port = cfg.port;
|
||||
path = "/api/v2/write";
|
||||
interval = "60s";
|
||||
method = "POST";
|
||||
conditions = [
|
||||
"[STATUS] < 500"
|
||||
"[RESPONSE_TIME] < 2000"
|
||||
];
|
||||
group = "monitoring";
|
||||
labels = {
|
||||
service = "influxdb";
|
||||
component = "write-engine";
|
||||
};
|
||||
};
|
||||
|
||||
serviceConfig = mkMerge [
|
||||
# Common configuration
|
||||
{
|
||||
# Create data directories
|
||||
systemd.tmpfiles.rules =
|
||||
[
|
||||
"d ${cfg.dataDir} 0755 influxdb influxdb -"
|
||||
]
|
||||
++ optionals (cfg.version == "1") [
|
||||
"d ${cfg.dataDir}/meta 0755 influxdb influxdb -"
|
||||
"d ${cfg.dataDir}/data 0755 influxdb influxdb -"
|
||||
"d ${cfg.dataDir}/wal 0755 influxdb influxdb -"
|
||||
];
|
||||
|
||||
# Ensure influxdb user exists
|
||||
users.users.influxdb = {
|
||||
isSystemUser = true;
|
||||
group = "influxdb";
|
||||
home = cfg.dataDir;
|
||||
createHome = true;
|
||||
};
|
||||
|
||||
users.groups.influxdb = {};
|
||||
}
|
||||
|
||||
# InfluxDB 2.x configuration
|
||||
(mkIf (cfg.version == "2") {
|
||||
services.influxdb2 = {
|
||||
enable = true;
|
||||
dataDir = cfg.dataDir;
|
||||
settings = influxdbConfig;
|
||||
};
|
||||
|
||||
# Initial setup for InfluxDB 2.x
|
||||
systemd.services.influxdb2-setup = {
|
||||
description = "InfluxDB 2.x initial setup";
|
||||
after = ["influxdb2.service"];
|
||||
wants = ["influxdb2.service"];
|
||||
wantedBy = ["multi-user.target"];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
User = "influxdb";
|
||||
Group = "influxdb";
|
||||
};
|
||||
script = let
|
||||
setupScript = pkgs.writeShellScript "influxdb2-setup" ''
|
||||
# Wait for InfluxDB to be ready
|
||||
timeout=60
|
||||
while [ $timeout -gt 0 ]; do
|
||||
if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/health > /dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
timeout=$((timeout - 1))
|
||||
done
|
||||
|
||||
# Check if setup is already done
|
||||
if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/api/v2/setup > /dev/null 2>&1; then
|
||||
# Setup InfluxDB if not already done
|
||||
${pkgs.influxdb2}/bin/influx setup \
|
||||
--host http://localhost:${toString cfg.port} \
|
||||
--org "${cfg.v2.org}" \
|
||||
--bucket "${cfg.v2.bucket}" \
|
||||
--username "${cfg.v2.username}" \
|
||||
--password "${cfg.v2.password}" \
|
||||
--retention "${cfg.v2.retention}" \
|
||||
--force
|
||||
fi
|
||||
'';
|
||||
in "${setupScript}";
|
||||
};
|
||||
})
|
||||
|
||||
# InfluxDB 1.x configuration
|
||||
(mkIf (cfg.version == "1") {
|
||||
services.influxdb = {
|
||||
enable = true;
|
||||
dataDir = cfg.dataDir;
|
||||
extraConfig = influxdbConfig;
|
||||
};
|
||||
|
||||
# Initial setup for InfluxDB 1.x
|
||||
systemd.services.influxdb-setup = mkIf cfg.v1.httpAuth.enable {
|
||||
description = "InfluxDB 1.x initial setup";
|
||||
after = ["influxdb.service"];
|
||||
wants = ["influxdb.service"];
|
||||
wantedBy = ["multi-user.target"];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
User = "influxdb";
|
||||
Group = "influxdb";
|
||||
};
|
||||
script = let
|
||||
setupScript = pkgs.writeShellScript "influxdb-setup" ''
|
||||
# Wait for InfluxDB to be ready
|
||||
timeout=60
|
||||
while [ $timeout -gt 0 ]; do
|
||||
if ${pkgs.curl}/bin/curl -f http://localhost:${toString cfg.port}/ping > /dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
timeout=$((timeout - 1))
|
||||
done
|
||||
|
||||
# Create admin user
|
||||
${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -execute "CREATE USER \"${cfg.v1.adminUser}\" WITH PASSWORD '${cfg.v1.adminPassword}' WITH ALL PRIVILEGES" || true
|
||||
|
||||
# Create database
|
||||
${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -username "${cfg.v1.adminUser}" -password "${cfg.v1.adminPassword}" -execute "CREATE DATABASE \"${cfg.v1.database}\"" || true
|
||||
|
||||
# Set retention policy
|
||||
${pkgs.influxdb}/bin/influx -host localhost -port ${toString cfg.port} -username "${cfg.v1.adminUser}" -password "${cfg.v1.adminPassword}" -database "${cfg.v1.database}" -execute "CREATE RETENTION POLICY \"default\" ON \"${cfg.v1.database}\" DURATION ${cfg.v1.retention} REPLICATION 1 DEFAULT" || true
|
||||
'';
|
||||
in "${setupScript}";
|
||||
};
|
||||
})
|
||||
|
||||
# Backup configuration
|
||||
(mkIf cfg.backup.enable {
|
||||
systemd.services.influxdb-backup = {
|
||||
description = "InfluxDB backup";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
User = "influxdb";
|
||||
Group = "influxdb";
|
||||
};
|
||||
script = let
|
||||
backupScript =
|
||||
if cfg.version == "2"
|
||||
then
|
||||
pkgs.writeShellScript "influxdb2-backup" ''
|
||||
backup_dir="${cfg.dataDir}/backups/$(date +%Y%m%d_%H%M%S)"
|
||||
mkdir -p "$backup_dir"
|
||||
${pkgs.influxdb2}/bin/influx backup \
|
||||
--host http://localhost:${toString cfg.port} \
|
||||
--org "${cfg.v2.org}" \
|
||||
"$backup_dir"
|
||||
|
||||
# Clean old backups
|
||||
find "${cfg.dataDir}/backups" -type d -mtime +${cfg.backup.retention} -exec rm -rf {} + || true
|
||||
''
|
||||
else
|
||||
pkgs.writeShellScript "influxdb-backup" ''
|
||||
backup_dir="${cfg.dataDir}/backups/$(date +%Y%m%d_%H%M%S)"
|
||||
mkdir -p "$backup_dir"
|
||||
${pkgs.influxdb}/bin/influxd backup \
|
||||
-host localhost:${toString cfg.port} \
|
||||
-database "${cfg.v1.database}" \
|
||||
"$backup_dir"
|
||||
|
||||
# Clean old backups
|
||||
find "${cfg.dataDir}/backups" -type d -mtime +${cfg.backup.retention} -exec rm -rf {} + || true
|
||||
'';
|
||||
in "${backupScript}";
|
||||
};
|
||||
|
||||
systemd.timers.influxdb-backup = {
|
||||
description = "InfluxDB backup timer";
|
||||
wantedBy = ["timers.target"];
|
||||
timerConfig = {
|
||||
OnCalendar = cfg.backup.schedule;
|
||||
Persistent = true;
|
||||
RandomizedDelaySec = "5m";
|
||||
};
|
||||
};
|
||||
|
||||
# Create backup directory
|
||||
systemd.tmpfiles.rules = [
|
||||
"d ${cfg.dataDir}/backups 0755 influxdb influxdb -"
|
||||
];
|
||||
})
|
||||
];
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,356 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
serviceInterface = import ../../lib/service-interface.nix {inherit lib;};
|
||||
|
||||
cfg = config.homelab.services.loki;
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Service-specific options beyond the standard interface
|
||||
lokiServiceOptions = {
|
||||
# Storage configuration
|
||||
storage = {
|
||||
type = mkOption {
|
||||
type = types.enum ["filesystem" "s3" "gcs"];
|
||||
default = "filesystem";
|
||||
description = "Storage backend type";
|
||||
};
|
||||
|
||||
filesystem = {
|
||||
directory = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/loki";
|
||||
description = "Directory for filesystem storage";
|
||||
};
|
||||
};
|
||||
|
||||
s3 = {
|
||||
endpoint = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "S3 endpoint URL";
|
||||
};
|
||||
|
||||
bucket = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "S3 bucket name";
|
||||
};
|
||||
|
||||
region = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "S3 region";
|
||||
};
|
||||
|
||||
accessKeyId = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "S3 access key ID";
|
||||
};
|
||||
|
||||
secretAccessKey = mkOption {
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
description = "Path to file containing S3 secret access key";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Retention configuration
|
||||
retention = {
|
||||
period = mkOption {
|
||||
type = types.str;
|
||||
default = "168h"; # 7 days
|
||||
description = "Log retention period";
|
||||
};
|
||||
|
||||
streamRetention = mkOption {
|
||||
type = types.listOf (types.submodule {
|
||||
options = {
|
||||
selector = mkOption {
|
||||
type = types.str;
|
||||
description = "Log stream selector";
|
||||
example = "{environment=\"development\"}";
|
||||
};
|
||||
priority = mkOption {
|
||||
type = types.int;
|
||||
description = "Rule priority (higher = more important)";
|
||||
default = 1;
|
||||
};
|
||||
period = mkOption {
|
||||
type = types.str;
|
||||
description = "Retention period for this stream";
|
||||
example = "24h";
|
||||
};
|
||||
};
|
||||
});
|
||||
default = [];
|
||||
description = "Per-stream retention rules";
|
||||
};
|
||||
};
|
||||
|
||||
# Performance tuning
|
||||
limits = {
|
||||
rejectOldSamples = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Reject samples older than max age";
|
||||
};
|
||||
|
||||
rejectOldSamplesMaxAge = mkOption {
|
||||
type = types.str;
|
||||
default = "168h";
|
||||
description = "Maximum age for samples";
|
||||
};
|
||||
|
||||
ingestionRateMB = mkOption {
|
||||
type = types.int;
|
||||
default = 4;
|
||||
description = "Ingestion rate limit in MB/s per tenant";
|
||||
};
|
||||
|
||||
ingestionBurstSizeMB = mkOption {
|
||||
type = types.int;
|
||||
default = 6;
|
||||
description = "Ingestion burst size in MB per tenant";
|
||||
};
|
||||
|
||||
maxStreamsPerUser = mkOption {
|
||||
type = types.int;
|
||||
default = 10000;
|
||||
description = "Maximum number of streams per user";
|
||||
};
|
||||
|
||||
maxLineSize = mkOption {
|
||||
type = types.str;
|
||||
default = "256KB";
|
||||
description = "Maximum line size";
|
||||
};
|
||||
};
|
||||
|
||||
# Authentication
|
||||
auth = {
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable authentication";
|
||||
};
|
||||
};
|
||||
|
||||
# Extra configuration options
|
||||
extraConfig = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
description = "Additional Loki configuration options";
|
||||
};
|
||||
|
||||
# Data directory
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/loki";
|
||||
description = "Directory to store Loki data";
|
||||
};
|
||||
};
|
||||
|
||||
# Build the Loki configuration
|
||||
lokiConfig =
|
||||
recursiveUpdate {
|
||||
# Server configuration
|
||||
server = {
|
||||
http_listen_port = cfg.port;
|
||||
grpc_listen_port = cfg.port + 1000; # e.g., 3100 -> 4100
|
||||
http_listen_address = "0.0.0.0";
|
||||
grpc_listen_address = "0.0.0.0";
|
||||
log_level = cfg.monitoring.extraLabels.log_level or "info";
|
||||
};
|
||||
|
||||
# Authentication
|
||||
auth_enabled = cfg.auth.enabled;
|
||||
|
||||
# Analytics
|
||||
analytics.reporting_enabled = false;
|
||||
|
||||
# Common configuration for single-binary mode
|
||||
common = {
|
||||
ring = {
|
||||
instance_addr = "127.0.0.1";
|
||||
kvstore.store = "inmemory";
|
||||
};
|
||||
replication_factor = 1;
|
||||
path_prefix = cfg.dataDir;
|
||||
};
|
||||
|
||||
# Schema configuration
|
||||
schema_config = {
|
||||
configs = [
|
||||
{
|
||||
from = "2020-05-15";
|
||||
store = "tsdb";
|
||||
object_store = cfg.storage.type;
|
||||
schema = "v13";
|
||||
index = {
|
||||
prefix = "index_";
|
||||
period = "24h";
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
# Storage configuration
|
||||
storage_config = mkMerge [
|
||||
# Filesystem storage
|
||||
(mkIf (cfg.storage.type == "filesystem") {
|
||||
filesystem.directory = "${cfg.storage.filesystem.directory}/chunks";
|
||||
})
|
||||
|
||||
# S3 storage
|
||||
(mkIf (cfg.storage.type == "s3") {
|
||||
aws =
|
||||
{
|
||||
s3 = cfg.storage.s3.endpoint;
|
||||
bucketnames = cfg.storage.s3.bucket;
|
||||
region = cfg.storage.s3.region;
|
||||
access_key_id = cfg.storage.s3.accessKeyId;
|
||||
}
|
||||
// (optionalAttrs (cfg.storage.s3.secretAccessKey != null) {
|
||||
secret_access_key = "$__file{${cfg.storage.s3.secretAccessKey}}";
|
||||
});
|
||||
})
|
||||
];
|
||||
|
||||
# Limits configuration
|
||||
limits_config =
|
||||
{
|
||||
reject_old_samples = cfg.limits.rejectOldSamples;
|
||||
reject_old_samples_max_age = cfg.limits.rejectOldSamplesMaxAge;
|
||||
ingestion_rate_mb = cfg.limits.ingestionRateMB;
|
||||
ingestion_burst_size_mb = cfg.limits.ingestionBurstSizeMB;
|
||||
max_streams_per_user = cfg.limits.maxStreamsPerUser;
|
||||
max_line_size = cfg.limits.maxLineSize;
|
||||
|
||||
# Retention configuration
|
||||
retention_period = cfg.retention.period;
|
||||
}
|
||||
// (optionalAttrs (cfg.retention.streamRetention != []) {
|
||||
retention_stream =
|
||||
map (rule: {
|
||||
selector = rule.selector;
|
||||
priority = rule.priority;
|
||||
period = rule.period;
|
||||
})
|
||||
cfg.retention.streamRetention;
|
||||
});
|
||||
|
||||
# Table manager for retention
|
||||
table_manager = {
|
||||
retention_deletes_enabled = true;
|
||||
retention_period = cfg.retention.period;
|
||||
};
|
||||
|
||||
# Compactor configuration
|
||||
compactor = {
|
||||
working_directory = "${cfg.dataDir}/compactor";
|
||||
# shared_store = cfg.storage.type;
|
||||
compaction_interval = "10m";
|
||||
# retention_enabled = true;
|
||||
# retention_delete_delay = "2h";
|
||||
# retention_delete_worker_count = 150;
|
||||
};
|
||||
|
||||
# Query range configuration
|
||||
query_range = {
|
||||
results_cache = {
|
||||
cache = {
|
||||
embedded_cache = {
|
||||
enabled = true;
|
||||
max_size_mb = 100;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Frontend configuration
|
||||
frontend = {
|
||||
max_outstanding_per_tenant = 256;
|
||||
compress_responses = true;
|
||||
};
|
||||
|
||||
# Query scheduler
|
||||
query_scheduler = {
|
||||
max_outstanding_requests_per_tenant = 256;
|
||||
};
|
||||
|
||||
# Runtime configuration
|
||||
runtime_config = {
|
||||
file = "/etc/loki/runtime.yml";
|
||||
};
|
||||
}
|
||||
cfg.extraConfig;
|
||||
in {
|
||||
options.homelab.services.loki = serviceInterface.mkServiceInterface {
|
||||
serviceName = "loki";
|
||||
defaultPort = 3100;
|
||||
defaultSubdomain = "loki";
|
||||
monitoringPath = "/metrics";
|
||||
healthCheckPath = "/ready";
|
||||
healthCheckConditions = [
|
||||
"[STATUS] == 200"
|
||||
"[RESPONSE_TIME] < 2000"
|
||||
];
|
||||
serviceOptions = lokiServiceOptions;
|
||||
};
|
||||
|
||||
config = serviceInterface.mkServiceConfig {
|
||||
inherit config cfg homelabCfg;
|
||||
serviceName = "loki";
|
||||
|
||||
extraMonitoringLabels = {
|
||||
component = "log-aggregation";
|
||||
log_level = "info";
|
||||
};
|
||||
|
||||
customHealthChecks = [
|
||||
{
|
||||
name = "loki-health";
|
||||
port = cfg.port;
|
||||
# https://grafana.com/docs/loki/latest/reference/loki-http-api/#status-endpoints
|
||||
path = "/loki/api/v1/status/buildinfo";
|
||||
interval = "30s";
|
||||
conditions = ["[STATUS] == 200"];
|
||||
group = "logging";
|
||||
labels = {
|
||||
service = "loki";
|
||||
component = "api";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
serviceConfig = mkMerge [
|
||||
{
|
||||
services.loki = {
|
||||
enable = true;
|
||||
dataDir = cfg.dataDir;
|
||||
configuration = lokiConfig;
|
||||
};
|
||||
|
||||
# Ensure data directories exist
|
||||
systemd.tmpfiles.rules = [
|
||||
"d ${cfg.dataDir} 0755 loki loki -"
|
||||
"d ${cfg.dataDir}/chunks 0755 loki loki -"
|
||||
"d ${cfg.dataDir}/compactor 0755 loki loki -"
|
||||
];
|
||||
|
||||
# Runtime configuration file for dynamic updates
|
||||
environment.etc."loki/runtime.yml".text = ''
|
||||
# Runtime configuration for Loki
|
||||
# This file can be updated without restarting Loki
|
||||
'';
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
|
|
@ -19,12 +19,13 @@ with lib; let
|
|||
mapAttrsToList (jobName: endpoints: {
|
||||
job_name = jobName;
|
||||
scrape_interval = head endpoints.scrapeInterval or ["30s"];
|
||||
static_configs = [
|
||||
{
|
||||
targets = map (endpoint: "${endpoint.host}:${toString endpoint.port}") endpoints;
|
||||
labels = fold (endpoint: acc: acc // endpoint.labels) {} endpoints;
|
||||
}
|
||||
];
|
||||
static_configs =
|
||||
map
|
||||
(endpoint: {
|
||||
targets = ["${endpoint.host}:${toString endpoint.port}"];
|
||||
labels = endpoint.labels;
|
||||
})
|
||||
endpoints;
|
||||
metrics_path = head endpoints.path or [null];
|
||||
})
|
||||
jobGroups;
|
||||
|
|
|
|||
252
modules/homelab/services/prometheus.nix
Normal file
252
modules/homelab/services/prometheus.nix
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
serviceName = "prometheus";
|
||||
cfg = config.homelab.services.${serviceName};
|
||||
homelabCfg = config.homelab;
|
||||
|
||||
# Generate Prometheus scrape configs from global monitoring data
|
||||
prometheusScrapeConfigs = let
|
||||
# Get all metrics - try global first, fallback to local
|
||||
allMetrics = homelabCfg.monitoring.global.allMetrics
|
||||
or homelabCfg.monitoring.allMetrics
|
||||
or [];
|
||||
|
||||
jobGroups = groupBy (m: m.jobName) allMetrics;
|
||||
|
||||
scrapeConfigs =
|
||||
mapAttrsToList (jobName: endpoints: {
|
||||
job_name = jobName;
|
||||
scrape_interval = head endpoints.scrapeInterval or ["30s"];
|
||||
static_configs =
|
||||
map
|
||||
(endpoint: {
|
||||
targets = ["${endpoint.host}:${toString endpoint.port}"];
|
||||
labels = endpoint.labels;
|
||||
})
|
||||
endpoints;
|
||||
metrics_path = head endpoints.path or ["/metrics"];
|
||||
})
|
||||
jobGroups;
|
||||
in
|
||||
scrapeConfigs;
|
||||
|
||||
# Standard alerting rules for homelab
|
||||
alertingRules = [
|
||||
{
|
||||
name = "homelab.rules";
|
||||
rules = [
|
||||
{
|
||||
alert = "InstanceDown";
|
||||
expr = "up == 0";
|
||||
for = "5m";
|
||||
labels = {severity = "critical";};
|
||||
annotations = {
|
||||
summary = "Instance {{ $labels.instance }} down";
|
||||
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "HighCPUUsage";
|
||||
expr = "100 - (avg by(instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100) > 80";
|
||||
for = "10m";
|
||||
labels = {severity = "warning";};
|
||||
annotations = {
|
||||
summary = "High CPU usage on {{ $labels.instance }}";
|
||||
description = "CPU usage is above 80% for more than 10 minutes on {{ $labels.instance }}.";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "HighMemoryUsage";
|
||||
expr = "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85";
|
||||
for = "10m";
|
||||
labels = {severity = "warning";};
|
||||
annotations = {
|
||||
summary = "High memory usage on {{ $labels.instance }}";
|
||||
description = "Memory usage is above 85% for more than 10 minutes on {{ $labels.instance }}.";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "DiskSpaceLow";
|
||||
expr = "((node_filesystem_size_bytes - node_filesystem_avail_bytes) / node_filesystem_size_bytes) * 100 > 90";
|
||||
for = "5m";
|
||||
labels = {severity = "critical";};
|
||||
annotations = {
|
||||
summary = "Disk space low on {{ $labels.instance }}";
|
||||
description = "Disk usage is above 90% on {{ $labels.instance }} {{ $labels.mountpoint }}.";
|
||||
};
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
in {
|
||||
imports = [
|
||||
(import ../lib/features/monitoring.nix serviceName)
|
||||
(import ../lib/features/logging.nix serviceName)
|
||||
(import ../lib/features/proxy.nix serviceName)
|
||||
];
|
||||
|
||||
# Core service options
|
||||
options.homelab.services.${serviceName} = {
|
||||
enable = mkEnableOption "Prometheus Monitoring Server";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 9090;
|
||||
};
|
||||
|
||||
description = mkOption {
|
||||
type = types.str;
|
||||
default = "Prometheus Monitoring Server";
|
||||
};
|
||||
|
||||
# Prometheus-specific options
|
||||
retention = mkOption {
|
||||
type = types.str;
|
||||
default = "15d";
|
||||
description = "How long to retain metrics data";
|
||||
};
|
||||
|
||||
alertmanager = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Enable integration with Alertmanager";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "alertmanager.${homelabCfg.domain}:9093";
|
||||
description = "Alertmanager URL";
|
||||
};
|
||||
};
|
||||
|
||||
extraScrapeConfigs = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional scrape configurations";
|
||||
};
|
||||
|
||||
extraAlertingRules = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional alerting rules";
|
||||
};
|
||||
|
||||
globalConfig = mkOption {
|
||||
type = types.attrs;
|
||||
default = {
|
||||
scrape_interval = "15s";
|
||||
evaluation_interval = "15s";
|
||||
};
|
||||
description = "Global Prometheus configuration";
|
||||
};
|
||||
|
||||
extraFlags = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "Extra command line flags";
|
||||
};
|
||||
|
||||
ruleFiles = mkOption {
|
||||
type = types.listOf types.path;
|
||||
default = [];
|
||||
description = "Additional rule files to load";
|
||||
};
|
||||
};
|
||||
|
||||
# Service configuration with smart defaults
|
||||
config = mkIf cfg.enable (mkMerge [
|
||||
# Core Prometheus service
|
||||
{
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
port = cfg.port;
|
||||
listenAddress = "0.0.0.0";
|
||||
retentionTime = cfg.retention;
|
||||
|
||||
globalConfig = cfg.globalConfig;
|
||||
extraFlags = cfg.extraFlags;
|
||||
|
||||
# Automatically aggregate all metrics from the fleet
|
||||
scrapeConfigs = prometheusScrapeConfigs ++ cfg.extraScrapeConfigs;
|
||||
|
||||
# Include standard + custom alerting rules
|
||||
ruleFiles =
|
||||
map (ruleGroup:
|
||||
pkgs.writeText "${ruleGroup.name}.yml" (builtins.toJSON {
|
||||
groups = [ruleGroup];
|
||||
})) (alertingRules ++ cfg.extraAlertingRules)
|
||||
++ cfg.ruleFiles;
|
||||
|
||||
# Connect to Alertmanager if enabled
|
||||
alertmanagers = mkIf cfg.alertmanager.enable [
|
||||
{
|
||||
static_configs = [
|
||||
{
|
||||
targets = [cfg.alertmanager.url];
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [cfg.port];
|
||||
|
||||
homelab.services.${serviceName}.monitoring.enable = mkDefault true;
|
||||
}
|
||||
|
||||
# Smart defaults for Prometheus
|
||||
(mkIf cfg.monitoring.enable {
|
||||
homelab.services.${serviceName}.monitoring = mkDefault {
|
||||
metrics = {
|
||||
path = "/metrics";
|
||||
extraEndpoints = [];
|
||||
};
|
||||
healthCheck = {
|
||||
path = "/-/healthy";
|
||||
conditions = ["[STATUS] == 200" "[RESPONSE_TIME] < 1000"];
|
||||
extraChecks = [
|
||||
{
|
||||
name = "prometheus-ready";
|
||||
port = cfg.port;
|
||||
path = "/-/ready";
|
||||
conditions = ["[STATUS] == 200"];
|
||||
group = "monitoring";
|
||||
}
|
||||
];
|
||||
};
|
||||
extraLabels = {
|
||||
component = "monitoring-server";
|
||||
tier = "monitoring";
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf cfg.logging.enable {
|
||||
homelab.services.${serviceName}.logging = mkDefault {
|
||||
files = ["/var/log/prometheus/prometheus.log"];
|
||||
parsing = {
|
||||
# Prometheus log format: ts=2024-01-01T12:00:00.000Z caller=main.go:123 level=info msg="message"
|
||||
regex = "^ts=(?P<timestamp>[^ ]+) caller=(?P<caller>[^ ]+) level=(?P<level>\\w+) msg=\"(?P<message>[^\"]*)\"";
|
||||
extractFields = ["level" "caller"];
|
||||
};
|
||||
extraLabels = {
|
||||
component = "monitoring-server";
|
||||
application = "prometheus";
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf cfg.proxy.enable {
|
||||
homelab.services.${serviceName}.proxy = mkDefault {
|
||||
subdomain = "prometheus";
|
||||
enableAuth = true; # Admin interface needs protection
|
||||
};
|
||||
})
|
||||
]);
|
||||
}
|
||||
|
|
@ -1,208 +0,0 @@
|
|||
# modules/services/prometheus.nix
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
with lib; let
|
||||
cfg = config.homelab.services.prometheus;
|
||||
globalCfg = config.homelab.global;
|
||||
in {
|
||||
options.homelab.services.prometheus = {
|
||||
enable = mkEnableOption "Prometheus monitoring server";
|
||||
|
||||
port = mkOption {
|
||||
type = types.port;
|
||||
default = 9090;
|
||||
description = "Prometheus server port";
|
||||
};
|
||||
|
||||
webExternalUrl = mkOption {
|
||||
type = types.str;
|
||||
default = "http://${globalCfg.hostname}:${toString cfg.port}";
|
||||
description = "External URL for Prometheus";
|
||||
};
|
||||
|
||||
retention = mkOption {
|
||||
type = types.str;
|
||||
default = "30d";
|
||||
description = "Data retention period";
|
||||
};
|
||||
|
||||
scrapeConfigs = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = "Additional scrape configurations";
|
||||
};
|
||||
|
||||
alertmanager = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable Alertmanager integration";
|
||||
};
|
||||
|
||||
url = mkOption {
|
||||
type = types.str;
|
||||
default = "http://localhost:9093";
|
||||
description = "Alertmanager URL";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Register service with global homelab config
|
||||
homelab.global.services.prometheus = {
|
||||
enable = true;
|
||||
description = "Metrics collection and monitoring server";
|
||||
category = "monitoring";
|
||||
ports = [cfg.port];
|
||||
tags = ["metrics" "monitoring" "alerting"];
|
||||
priority = 20;
|
||||
dependencies = ["node-exporter"];
|
||||
};
|
||||
|
||||
# Configure the actual Prometheus service
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
port = cfg.port;
|
||||
webExternalUrl = cfg.webExternalUrl;
|
||||
|
||||
retentionTime = cfg.retention;
|
||||
|
||||
scrapeConfigs =
|
||||
[
|
||||
# Auto-discover monitoring endpoints from global config
|
||||
{
|
||||
job_name = "homelab-auto";
|
||||
static_configs = [
|
||||
{
|
||||
targets =
|
||||
map (
|
||||
endpoint: "${globalCfg.hostname}:${toString endpoint.port}"
|
||||
)
|
||||
globalCfg.monitoring.endpoints;
|
||||
}
|
||||
];
|
||||
scrape_interval = "30s";
|
||||
metrics_path = "/metrics";
|
||||
}
|
||||
]
|
||||
++ cfg.scrapeConfigs;
|
||||
|
||||
# Alertmanager configuration
|
||||
alertmanagers = mkIf cfg.alertmanager.enable [
|
||||
{
|
||||
static_configs = [
|
||||
{
|
||||
targets = [cfg.alertmanager.url];
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
|
||||
rules = [
|
||||
# Basic homelab alerting rules
|
||||
(pkgs.writeText "homelab-alerts.yml" ''
|
||||
groups:
|
||||
- name: homelab
|
||||
rules:
|
||||
- alert: ServiceDown
|
||||
expr: up == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Service {{ $labels.instance }} is down"
|
||||
description: "{{ $labels.job }} on {{ $labels.instance }} has been down for more than 5 minutes."
|
||||
|
||||
- alert: HighMemoryUsage
|
||||
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High memory usage on {{ $labels.instance }}"
|
||||
description: "Memory usage is above 90% on {{ $labels.instance }}"
|
||||
|
||||
- alert: HighDiskUsage
|
||||
expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.85
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High disk usage on {{ $labels.instance }}"
|
||||
description: "Disk usage is above 85% on {{ $labels.instance }} for filesystem {{ $labels.mountpoint }}"
|
||||
'')
|
||||
];
|
||||
};
|
||||
|
||||
# Add monitoring endpoint to global config
|
||||
homelab.global.monitoring.endpoints = [
|
||||
{
|
||||
name = "prometheus";
|
||||
port = cfg.port;
|
||||
path = "/metrics";
|
||||
jobName = "prometheus";
|
||||
scrapeInterval = "30s";
|
||||
labels = {
|
||||
service = "prometheus";
|
||||
role = "monitoring";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add reverse proxy entry if configured
|
||||
homelab.global.reverseProxy.entries = mkIf (globalCfg.domain != null) [
|
||||
{
|
||||
subdomain = "prometheus";
|
||||
port = cfg.port;
|
||||
path = "/";
|
||||
enableAuth = true;
|
||||
enableSSL = true;
|
||||
customHeaders = {
|
||||
"X-Frame-Options" = "DENY";
|
||||
"X-Content-Type-Options" = "nosniff";
|
||||
};
|
||||
}
|
||||
];
|
||||
|
||||
# Add backup job for Prometheus data
|
||||
homelab.global.backups.jobs = [
|
||||
{
|
||||
name = "prometheus-data";
|
||||
backend = "restic";
|
||||
paths = ["/var/lib/prometheus2"];
|
||||
schedule = "daily";
|
||||
retention = {
|
||||
daily = "7";
|
||||
weekly = "4";
|
||||
monthly = "3";
|
||||
yearly = "1";
|
||||
};
|
||||
excludePatterns = [
|
||||
"*.tmp"
|
||||
"*/wal/*"
|
||||
];
|
||||
preHook = ''
|
||||
# Stop prometheus temporarily for consistent backup
|
||||
systemctl stop prometheus
|
||||
'';
|
||||
postHook = ''
|
||||
# Restart prometheus after backup
|
||||
systemctl start prometheus
|
||||
'';
|
||||
}
|
||||
];
|
||||
|
||||
# Open firewall port
|
||||
networking.firewall.allowedTCPPorts = [cfg.port];
|
||||
|
||||
# Create prometheus configuration directory
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /var/lib/prometheus2 0755 prometheus prometheus -"
|
||||
"d /etc/prometheus 0755 root root -"
|
||||
];
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue