homelab framework module init (everything is a mess)
Some checks failed
Test / tests (push) Has been cancelled
/ OpenTofu (push) Has been cancelled

This commit is contained in:
plasmagoat 2025-07-28 02:05:13 +02:00
parent 0347f4d325
commit bcbcc8b17b
94 changed files with 7289 additions and 436 deletions

View file

@ -0,0 +1,208 @@
# modules/services/prometheus.nix
{
config,
lib,
pkgs,
...
}:
with lib; let
cfg = config.homelab.services.prometheus;
globalCfg = config.homelab.global;
in {
options.homelab.services.prometheus = {
enable = mkEnableOption "Prometheus monitoring server";
port = mkOption {
type = types.port;
default = 9090;
description = "Prometheus server port";
};
webExternalUrl = mkOption {
type = types.str;
default = "http://${globalCfg.hostname}:${toString cfg.port}";
description = "External URL for Prometheus";
};
retention = mkOption {
type = types.str;
default = "30d";
description = "Data retention period";
};
scrapeConfigs = mkOption {
type = types.listOf types.attrs;
default = [];
description = "Additional scrape configurations";
};
alertmanager = {
enable = mkOption {
type = types.bool;
default = false;
description = "Enable Alertmanager integration";
};
url = mkOption {
type = types.str;
default = "http://localhost:9093";
description = "Alertmanager URL";
};
};
};
config = mkIf cfg.enable {
# Register service with global homelab config
homelab.global.services.prometheus = {
enable = true;
description = "Metrics collection and monitoring server";
category = "monitoring";
ports = [cfg.port];
tags = ["metrics" "monitoring" "alerting"];
priority = 20;
dependencies = ["node-exporter"];
};
# Configure the actual Prometheus service
services.prometheus = {
enable = true;
port = cfg.port;
webExternalUrl = cfg.webExternalUrl;
retentionTime = cfg.retention;
scrapeConfigs =
[
# Auto-discover monitoring endpoints from global config
{
job_name = "homelab-auto";
static_configs = [
{
targets =
map (
endpoint: "${globalCfg.hostname}:${toString endpoint.port}"
)
globalCfg.monitoring.endpoints;
}
];
scrape_interval = "30s";
metrics_path = "/metrics";
}
]
++ cfg.scrapeConfigs;
# Alertmanager configuration
alertmanagers = mkIf cfg.alertmanager.enable [
{
static_configs = [
{
targets = [cfg.alertmanager.url];
}
];
}
];
rules = [
# Basic homelab alerting rules
(pkgs.writeText "homelab-alerts.yml" ''
groups:
- name: homelab
rules:
- alert: ServiceDown
expr: up == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Service {{ $labels.instance }} is down"
description: "{{ $labels.job }} on {{ $labels.instance }} has been down for more than 5 minutes."
- alert: HighMemoryUsage
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9
for: 10m
labels:
severity: warning
annotations:
summary: "High memory usage on {{ $labels.instance }}"
description: "Memory usage is above 90% on {{ $labels.instance }}"
- alert: HighDiskUsage
expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.85
for: 5m
labels:
severity: warning
annotations:
summary: "High disk usage on {{ $labels.instance }}"
description: "Disk usage is above 85% on {{ $labels.instance }} for filesystem {{ $labels.mountpoint }}"
'')
];
};
# Add monitoring endpoint to global config
homelab.global.monitoring.endpoints = [
{
name = "prometheus";
port = cfg.port;
path = "/metrics";
jobName = "prometheus";
scrapeInterval = "30s";
labels = {
service = "prometheus";
role = "monitoring";
};
}
];
# Add reverse proxy entry if configured
homelab.global.reverseProxy.entries = mkIf (globalCfg.domain != null) [
{
subdomain = "prometheus";
port = cfg.port;
path = "/";
enableAuth = true;
enableSSL = true;
customHeaders = {
"X-Frame-Options" = "DENY";
"X-Content-Type-Options" = "nosniff";
};
}
];
# Add backup job for Prometheus data
homelab.global.backups.jobs = [
{
name = "prometheus-data";
backend = "restic";
paths = ["/var/lib/prometheus2"];
schedule = "daily";
retention = {
daily = "7";
weekly = "4";
monthly = "3";
yearly = "1";
};
excludePatterns = [
"*.tmp"
"*/wal/*"
];
preHook = ''
# Stop prometheus temporarily for consistent backup
systemctl stop prometheus
'';
postHook = ''
# Restart prometheus after backup
systemctl start prometheus
'';
}
];
# Open firewall port
networking.firewall.allowedTCPPorts = [cfg.port];
# Create prometheus configuration directory
systemd.tmpfiles.rules = [
"d /var/lib/prometheus2 0755 prometheus prometheus -"
"d /etc/prometheus 0755 root root -"
];
};
}