From 5feb74d56d2d75590641e2d911582e972809df3c Mon Sep 17 00:00:00 2001 From: plasmagoat Date: Sun, 6 Jul 2025 21:25:57 +0200 Subject: [PATCH] colmena initial implementation for sandbox and monitor --- .forgejo/workflows/build-image.yml | 61 - .forgejo/workflows/colmena-apply.yml | 53 + .forgejo/workflows/deploy-nixos.yaml | 39 - .forgejo/workflows/proxmox-nixos-deploy.yml | 34 - .sops.yaml | 3 + ansible/books/clone-template.yml | 6 +- flake.lock | 154 + flake.nix | 34 + hive.nix | 48 + machines/_default/common_config.nix | 89 + machines/_default/default.nix | 40 + machines/modules/default.nix | 11 + machines/modules/journal-log.nix | 95 + machines/modules/node-exporter.nix | 40 + machines/monitor/alertmanager.nix | 63 + machines/monitor/dashboards/15356_rev14.json | 2353 +++ machines/monitor/dashboards/gitea.json | 1082 ++ .../monitor/dashboards/grafana-traefik.json | 692 + .../monitor/dashboards/node-exporter.json | 13554 ++++++++++++++++ machines/monitor/dashboards/postgres.json | 3096 ++++ machines/monitor/dashboards/promtail.json | 2043 +++ .../monitor/dashboards/traefik-access.json | 1087 ++ machines/monitor/dashboards/traefik.json | 1619 ++ machines/monitor/definition.nix | 13 + machines/monitor/grafana.nix | 126 + machines/monitor/influxdb.nix | 35 + machines/monitor/jellyfin-exporter.nix | 14 + machines/monitor/loki.nix | 37 + machines/monitor/prometheus.nix | 185 + .../provisioning/alerts/loki-alerts.yml | 39 + .../alerts/node-exporter-alerts.yml | 318 + .../provisioning/alerts/postgres-alerts.yml | 201 + .../provisioning/alerts/prometheus-alerts.yml | 255 + .../provisioning/alerts/promtail-alerts.yml | 21 + .../provisioning/alerts/traefik-alerts.yml | 30 + .../provisioning/templates/telegram.tmpl | 29 + machines/sandbox/definition.nix | 3 + nixos/flake.lock | 113 +- nixos/flake.nix | 34 +- secrets/secrets.yaml | 21 + 40 files changed, 27629 insertions(+), 141 deletions(-) delete mode 100644 .forgejo/workflows/build-image.yml create mode 100644 .forgejo/workflows/colmena-apply.yml delete mode 100644 .forgejo/workflows/deploy-nixos.yaml delete mode 100644 .forgejo/workflows/proxmox-nixos-deploy.yml create mode 100644 .sops.yaml create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 hive.nix create mode 100644 machines/_default/common_config.nix create mode 100644 machines/_default/default.nix create mode 100644 machines/modules/default.nix create mode 100644 machines/modules/journal-log.nix create mode 100644 machines/modules/node-exporter.nix create mode 100644 machines/monitor/alertmanager.nix create mode 100644 machines/monitor/dashboards/15356_rev14.json create mode 100644 machines/monitor/dashboards/gitea.json create mode 100644 machines/monitor/dashboards/grafana-traefik.json create mode 100644 machines/monitor/dashboards/node-exporter.json create mode 100644 machines/monitor/dashboards/postgres.json create mode 100644 machines/monitor/dashboards/promtail.json create mode 100644 machines/monitor/dashboards/traefik-access.json create mode 100644 machines/monitor/dashboards/traefik.json create mode 100644 machines/monitor/definition.nix create mode 100644 machines/monitor/grafana.nix create mode 100644 machines/monitor/influxdb.nix create mode 100644 machines/monitor/jellyfin-exporter.nix create mode 100644 machines/monitor/loki.nix create mode 100644 machines/monitor/prometheus.nix create mode 100644 machines/monitor/provisioning/alerts/loki-alerts.yml create mode 100644 machines/monitor/provisioning/alerts/node-exporter-alerts.yml create mode 100644 machines/monitor/provisioning/alerts/postgres-alerts.yml create mode 100644 machines/monitor/provisioning/alerts/prometheus-alerts.yml create mode 100644 machines/monitor/provisioning/alerts/promtail-alerts.yml create mode 100644 machines/monitor/provisioning/alerts/traefik-alerts.yml create mode 100644 machines/monitor/provisioning/templates/telegram.tmpl create mode 100644 machines/sandbox/definition.nix create mode 100644 secrets/secrets.yaml diff --git a/.forgejo/workflows/build-image.yml b/.forgejo/workflows/build-image.yml deleted file mode 100644 index b428a77..0000000 --- a/.forgejo/workflows/build-image.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: "Build NixOS Image" - -on: - workflow_dispatch: - -jobs: - build: - runs-on: native - steps: - - name: Install nodejs - run: nix-env -iA nixpkgs.nodejs - - - name: Checkout repo - uses: actions/checkout@v4 - - # - name: Install Nix - # uses: cachix/install-nix-action@v31 - # with: - # nix_path: nixpkgs=channel:nixos-unstable - # extra_nix_config: | - # experimental-features = nix-command flakes - - # - name: Enable experimental features - # run: | - # mkdir -p ~/.config/nix - # echo "experimental-features = nix-command flakes" >> ~/.config/nix/nix.conf - - # - name: Update Channel - # run: nix-channel --update - - - name: Build NixOS image - working-directory: nixos - run: nix build .#proxmoxTemplate - - # - name: Upload & Restore to Proxmox - # working-directory: nixos - # env: - # PROXMOX_SSH_KEY: ${{ secrets.PROXMOX_SSH_KEY }} - # PROXMOX_HOST: 192.168.1.205 - # PROXMOX_USER: root - # run: | - # set -e - - # IMAGE_NAME="vm-image.vma.zst" - # REMOTE_PATH="/var/lib/vz/template/$IMAGE_NAME" - # VM_ID="9000" - - # echo "Starting ssh-agent and uploading..." - # eval "$(ssh-agent -s)" - # ssh-add <(echo "$PROXMOX_SSH_KEY") - - # echo "Uploading image..." - # scp -o StrictHostKeyChecking=no ./result/$IMAGE_NAME $PROXMOX_USER@$PROXMOX_HOST:$REMOTE_PATH - - # echo "Restoring VM $VM_ID..." - # ssh -o StrictHostKeyChecking=no $PROXMOX_USER@$PROXMOX_HOST " - # qm stop $VM_ID || true - # qm destroy $VM_ID || true - # qmrestore --unique $REMOTE_PATH $VM_ID - # qm template $VM_ID - # " diff --git a/.forgejo/workflows/colmena-apply.yml b/.forgejo/workflows/colmena-apply.yml new file mode 100644 index 0000000..13f7563 --- /dev/null +++ b/.forgejo/workflows/colmena-apply.yml @@ -0,0 +1,53 @@ +name: "Colmena apply" + +on: + push: + tags: + - "v*" # triggers on v1.0.0, v1.2.3, etc. + workflow_dispatch: + +jobs: + apply: + name: Apply flake configurations to colmena hive + # Ensure 'nixos-latest' runner has Docker, SSH client, and basic Nix tools installed. + # It seems it already does. + runs-on: nixos-latest + env: + NIXOS_BUILER_HOST: nixos-builder.lab + NIXOS_BUILER_USER: runner + + steps: + # Use nix-env for setup (as you prefer and it works well for ephemeral environments) + - name: Install dependencies via nix-env + run: | + nix-env -iA nixpkgs.nodejs + nix-env -iA nixpkgs.openssh + nix-env -if https://github.com/zhaofengli/colmena/tarball/main + nix-env -iA cachix -f https://cachix.org/api/v1/install + cachix use plasmagoat + cachix authtoken ${{ secrets.CACHIX_AUTH_TOKEN }} + + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Enable experimental features + run: | + mkdir -p ~/.config/nix + echo "experimental-features = nix-command flakes" >> ~/.config/nix/nix.conf + + - name: Prepare SSH keys and known_hosts for builder and Proxmox + run: | + mkdir -p ~/.ssh + echo "${{ secrets.RUNNER_SSH_KEY }}" > ~/.ssh/id_rsa + chmod 600 ~/.ssh/id_rsa + ssh-keyscan -H "$NIXOS_BUILER_HOST" >> ~/.ssh/known_hosts + chmod 600 ~/.ssh/known_hosts + + - name: Test SSH connection to NixOS Builder + run: | + echo "Testing SSH connection to $NIXOS_BUILER_HOST..." + ssh -o StrictHostKeyChecking=yes "$NIXOS_BUILER_USER"@"$NIXOS_BUILER_HOST" "echo 'SSH success. Hostname:' && hostname" + + - name: Apply Colmena + id: apply + run: colmena apply diff --git a/.forgejo/workflows/deploy-nixos.yaml b/.forgejo/workflows/deploy-nixos.yaml deleted file mode 100644 index a285d8e..0000000 --- a/.forgejo/workflows/deploy-nixos.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: Deploy NixOS VM - -on: - workflow_dispatch: - -jobs: - deploy: - runs-on: docker - container: - image: nixos/nix - steps: - - name: Checkout repo - uses: actions/checkout@v4 - - - name: Install Terraform - run: nix-env -iA nixpkgs.terraform - - - name: Setup SSH key - run: | - mkdir -p ~/.ssh - echo "$SSH_PRIVATE_KEY" > ~/.ssh/id_ed25519 - chmod 600 ~/.ssh/id_ed25519 - env: - SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} - - - name: Terraform Init & Apply - run: | - terraform init - terraform apply -auto-approve - working-directory: ./terraform - env: - PROXMOX_PASSWORD: ${{ secrets.PROXMOX_PASSWORD }} - - - name: Deploy NixOS via nixos-anywhere - run: | - nix run github:numtide/nixos-anywhere -- \ - --build-on-remote \ - --flake .#new-vm \ - root@ diff --git a/.forgejo/workflows/proxmox-nixos-deploy.yml b/.forgejo/workflows/proxmox-nixos-deploy.yml deleted file mode 100644 index ff9c203..0000000 --- a/.forgejo/workflows/proxmox-nixos-deploy.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: Terraform Proxmox NixOS VM Deploy - -on: - workflow_dispatch: - -jobs: - deploy-nixos-vm: - runs-on: nixos-latest - steps: - - name: Install nodejs - run: nix-env -iA nixpkgs.nodejs - - - name: Install terraform - run: nix-env -iA nixpkgs.terraform - - - name: Install sops - run: nix-env -iA nixpkgs.sops - - - name: Checkout repo - uses: actions/checkout@v3 - - - name: Decrypt secrets - env: - SOPS_AGE_KEY_FILE: ${{ secrets.AGE_KEY_FILE }} - run: | - sops --decrypt secrets.yaml.enc > secrets.yaml - - - name: Terraform Init - run: terraform init - - - name: Terraform Apply - env: - PROXMOX_PASSWORD: ${{ secrets.PROXMOX_PASSWORD }} - run: terraform apply -auto-approve diff --git a/.sops.yaml b/.sops.yaml new file mode 100644 index 0000000..eafd8b5 --- /dev/null +++ b/.sops.yaml @@ -0,0 +1,3 @@ +creation_rules: + - age: >- + age1n20y9kmdh324m3tkclvhmyuc7c8hk4w84zsal725adahwl8nzq0s04aq4y diff --git a/ansible/books/clone-template.yml b/ansible/books/clone-template.yml index 7231432..2c75737 100644 --- a/ansible/books/clone-template.yml +++ b/ansible/books/clone-template.yml @@ -55,7 +55,7 @@ register: qga_json failed_when: qga_json.rc != 0 - - name: Parse out eth0’s IPv4 address + - name: Parse out eth0's IPv4 address ansible.builtin.set_fact: vm_ipv4: >- {{ @@ -71,11 +71,11 @@ ) }} - - name: Show the VM’s IP + - name: Show the VM's IP ansible.builtin.debug: msg: "VM {{ new_vmid }} ({{ new_name }}) reports IPv4: {{ vm_ipv4 }}" - - name: Add new VM’s IP to in-memory inventory (for later tasks) + - name: Add new VM's IP to in-memory inventory (for later tasks) ansible.builtin.add_host: name: "nixos-{{ new_vmid }}" ansible_host: "{{ vm_ipv4 }}" diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..19b6da5 --- /dev/null +++ b/flake.lock @@ -0,0 +1,154 @@ +{ + "nodes": { + "colmena": { + "inputs": { + "flake-compat": "flake-compat", + "flake-utils": "flake-utils", + "nix-github-actions": "nix-github-actions", + "nixpkgs": "nixpkgs", + "stable": "stable" + }, + "locked": { + "lastModified": 1751144689, + "narHash": "sha256-cgIntaqhcm62V1KU6GmrAGpHpahT4UExEWW2ryS02ZU=", + "owner": "zhaofengli", + "repo": "colmena", + "rev": "3ceec72cfb396a8a8de5fe96a9d75a9ce88cc18e", + "type": "github" + }, + "original": { + "owner": "zhaofengli", + "repo": "colmena", + "type": "github" + } + }, + "flake-compat": { + "flake": false, + "locked": { + "lastModified": 1650374568, + "narHash": "sha256-Z+s0J8/r907g149rllvwhb4pKi8Wam5ij0st8PwAh+E=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "b4a34015c698c7793d592d66adbab377907a2be8", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "flake-utils": { + "locked": { + "lastModified": 1659877975, + "narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nix-github-actions": { + "inputs": { + "nixpkgs": [ + "colmena", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1729742964, + "narHash": "sha256-B4mzTcQ0FZHdpeWcpDYPERtyjJd/NIuaQ9+BV1h+MpA=", + "owner": "nix-community", + "repo": "nix-github-actions", + "rev": "e04df33f62cdcf93d73e9a04142464753a16db67", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "nix-github-actions", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1750134718, + "narHash": "sha256-v263g4GbxXv87hMXMCpjkIxd/viIF7p3JpJrwgKdNiI=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "9e83b64f727c88a7711a2c463a7b16eedb69a84c", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs_2": { + "locked": { + "lastModified": 1751801514, + "narHash": "sha256-Ve3ZTzcXEGt4IoXLsWqk35w3w4cH5G1MJb+gLdj/jtE=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "4e3e6431fd60d653bb7f4fa5487e2c500d50f49f", + "type": "github" + }, + "original": { + "owner": "nixos", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "colmena": "colmena", + "nixpkgs": "nixpkgs_2", + "sops-nix": "sops-nix" + } + }, + "sops-nix": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1751606940, + "narHash": "sha256-KrDPXobG7DFKTOteqdSVeL1bMVitDcy7otpVZWDE6MA=", + "owner": "Mic92", + "repo": "sops-nix", + "rev": "3633fc4acf03f43b260244d94c71e9e14a2f6e0d", + "type": "github" + }, + "original": { + "owner": "Mic92", + "repo": "sops-nix", + "type": "github" + } + }, + "stable": { + "locked": { + "lastModified": 1750133334, + "narHash": "sha256-urV51uWH7fVnhIvsZIELIYalMYsyr2FCalvlRTzqWRw=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "36ab78dab7da2e4e27911007033713bab534187b", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-25.05", + "repo": "nixpkgs", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..9e03ab2 --- /dev/null +++ b/flake.nix @@ -0,0 +1,34 @@ +{ + description = "Declarative NixOS HomeLab"; + + inputs = { + nixpkgs.url = "github:nixos/nixpkgs"; + # systems.url = "github:nix-systems/default"; + sops-nix = { + url = "github:Mic92/sops-nix"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + # home-manager = { + # url = "home-manager"; + # inputs.nixpkgs.follows = "nixpkgs"; + # }; + colmena.url = "github:zhaofengli/colmena"; + }; + + outputs = { + self, + nixpkgs, + # systems, + sops-nix, + # home-manager, + colmena, + ... + } @ inputs: let + overlays = [ + colmena.overlays.default + ]; + in { + colmenaHive = colmena.lib.makeHive self.outputs.colmena; + colmena = (import ./hive.nix) (inputs // {inherit overlays;}); + }; +} diff --git a/hive.nix b/hive.nix new file mode 100644 index 0000000..9ef4204 --- /dev/null +++ b/hive.nix @@ -0,0 +1,48 @@ +inputs @ { + self, + nixpkgs, + sops-nix, + # home-manager, + overlays, + ... +}: { + meta = { + nixpkgs = import nixpkgs { + system = "x86_64-linux"; + }; + specialArgs.flakeInputs = inputs; + }; + + defaults = { + pkgs, + lib, + name, + nodes, + meta, + config, + ... + }: { + imports = [ + ./machines/_default + ./machines/modules + sops-nix.nixosModules.sops + # home-manager.nixosModules.home-manager + ]; + nixpkgs = { + inherit overlays; + system = lib.mkDefault "x86_64-linux"; + config.allowUnfree = true; + }; + deployment.tags = [config.nixpkgs.system name]; + }; + + sandbox = {name, ...}: { + imports = [./machines/${name}/definition.nix]; + deployment.tags = ["sandbox"]; + }; + + monitor = {name, ...}: { + imports = [./machines/${name}/definition.nix]; + deployment.tags = ["grafana" "prometheus"]; + }; +} diff --git a/machines/_default/common_config.nix b/machines/_default/common_config.nix new file mode 100644 index 0000000..d65a3f5 --- /dev/null +++ b/machines/_default/common_config.nix @@ -0,0 +1,89 @@ +{ + pkgs, + lib, + modulesPath, + ... +}: { + imports = [ + # Enables QEMU Guest Agent support in the VM + (modulesPath + "/profiles/qemu-guest.nix") + ]; + + services.qemuGuest.enable = lib.mkDefault true; + + boot.loader.grub.enable = lib.mkDefault true; + boot.loader.grub.devices = ["nodev"]; + + boot.growPartition = lib.mkDefault true; + + boot.tmp.cleanOnBoot = true; + + fileSystems."/" = lib.mkDefault { + device = "/dev/disk/by-label/nixos"; + autoResize = true; # grow on first boot + fsType = "ext4"; + }; + + nix = { + settings.experimental-features = ["nix-command" "flakes"]; + gc.automatic = true; + gc.options = "--delete-older-than 15d"; + gc.dates = "daily"; + optimise.automatic = true; + settings = { + auto-optimise-store = true; + allowed-users = ["@wheel"]; + trusted-users = ["root" "@wheel"]; + }; + extraOptions = '' + keep-outputs = true + keep-derivations = true + ''; + }; + + security.sudo.wheelNeedsPassword = false; + + users.users.plasmagoat = { + isNormalUser = true; + description = "plasmagoat"; + extraGroups = ["wheel" "docker"]; + # shell = pkgs.zsh; + # shell = pkgs.fish; + }; + + services.openssh.enable = true; + services.openssh.openFirewall = true; + services.openssh.settings.PasswordAuthentication = false; + services.openssh.settings.PermitRootLogin = "prohibit-password"; + services.openssh.settings.KbdInteractiveAuthentication = false; + + services.sshguard.enable = true; + + programs.ssh.startAgent = true; + + users.users.plasmagoat.openssh.authorizedKeys.keys = [ + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCeg/n/vst9KME8byhxX2FhA+FZNQ60W38kkNt45eNzK5zFqBYuwo1nDXVanJSh9unRvB13b+ygpZhrb4sHvkETGWiEioc49MiWr8czEhu6Wpo0vv5MAJkiYvGZUYPdUW52jUzWcYdw8PukG2rowrxL5G0CmsqLwHMPU2FyeCe5aByFI/JZb8R80LoEacgjUiipJcoLWUVgG2koMomHClqGu+16kB8nL5Ja3Kc9lgLfDK7L0A5R8JXhCjrlEsmXbxZmwDKuxvjDAZdE9Sl1VZmMDfWkyrRlenrt01eR3t3Fec6ziRm5ZJk9e2Iu1DPoz+PoHH9aZGVwmlvvnr/gMF3OILxcqb0qx+AYlCCnb6D6pJ9zufhZkKcPRS1Q187F6fz+v2oD1xLZWFHJ92+7ItM0WmbDOHOC29s5EA6wNm3iXZCq86OI3n6T34njDtPqh6Z7Pk2sdK4GBwnFj4KwEWXvdKZKSX1qb2EVlEBE9QI4Gf3eg4SiBu2cAFt3nOSzs8c= asol\dbs@ALPHA-DBS-P14sG2" + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC+U3DWOrklcA8n8wdbLBGyli5LsJI3dpL2Zod8mx8eOdC4H127ZT1hzuk2uSmkic4c73BykPyQv8rcqwaRGW94xdMRanKmHYxnbHXo5FBiGrCkNlNNZuahthAGO49c6sUhJMq0eLhYOoFWjtf15sr5Zu7Ug2YTUL3HXB1o9PZ3c9sqYHo2rC/Il1x2j3jNAMKST/qUZYySvdfNJEeQhMbQcdoKJsShcE3oGRL6DFBoV/mjJAJ+wuDhGLDnqi79nQjYfbYja1xKcrKX+D3MfkFxFl6ZIzomR1t75AnZ+09oaWcv1J7ehZ3h9PpDBFNXvzyLwDBMNS+UYcH6SyFjkUbF David@NZXT" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICUP7m8jZJiclZGfSje8CeBYFhX10SrdtjYziuChmj1X plasmagoat@macbook-air" + ]; + + users.users.root.openssh.authorizedKeys.keys = [ + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCeg/n/vst9KME8byhxX2FhA+FZNQ60W38kkNt45eNzK5zFqBYuwo1nDXVanJSh9unRvB13b+ygpZhrb4sHvkETGWiEioc49MiWr8czEhu6Wpo0vv5MAJkiYvGZUYPdUW52jUzWcYdw8PukG2rowrxL5G0CmsqLwHMPU2FyeCe5aByFI/JZb8R80LoEacgjUiipJcoLWUVgG2koMomHClqGu+16kB8nL5Ja3Kc9lgLfDK7L0A5R8JXhCjrlEsmXbxZmwDKuxvjDAZdE9Sl1VZmMDfWkyrRlenrt01eR3t3Fec6ziRm5ZJk9e2Iu1DPoz+PoHH9aZGVwmlvvnr/gMF3OILxcqb0qx+AYlCCnb6D6pJ9zufhZkKcPRS1Q187F6fz+v2oD1xLZWFHJ92+7ItM0WmbDOHOC29s5EA6wNm3iXZCq86OI3n6T34njDtPqh6Z7Pk2sdK4GBwnFj4KwEWXvdKZKSX1qb2EVlEBE9QI4Gf3eg4SiBu2cAFt3nOSzs8c= asol\\dbs@ALPHA-DBS-P14sG2" + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC+U3DWOrklcA8n8wdbLBGyli5LsJI3dpL2Zod8mx8eOdC4H127ZT1hzuk2uSmkic4c73BykPyQv8rcqwaRGW94xdMRanKmHYxnbHXo5FBiGrCkNlNNZuahthAGO49c6sUhJMq0eLhYOoFWjtf15sr5Zu7Ug2YTUL3HXB1o9PZ3c9sqYHo2rC/Il1x2j3jNAMKST/qUZYySvdfNJEeQhMbQcdoKJsShcE3oGRL6DFBoV/mjJAJ+wuDhGLDnqi79nQjYfbYja1xKcrKX+D3MfkFxFl6ZIzomR1t75AnZ+09oaWcv1J7ehZ3h9PpDBFNXvzyLwDBMNS+UYcH6SyFjkUbF David@NZXT" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICUP7m8jZJiclZGfSje8CeBYFhX10SrdtjYziuChmj1X plasmagoat@macbook-air" + ]; + + environment.systemPackages = with pkgs; [ + dig + nmap + traceroute + vim + git + curl + python3 + ]; + + time.timeZone = "Europe/Copenhagen"; + + console.keyMap = "dk-latin1"; +} diff --git a/machines/_default/default.nix b/machines/_default/default.nix new file mode 100644 index 0000000..0380c78 --- /dev/null +++ b/machines/_default/default.nix @@ -0,0 +1,40 @@ +{ + lib, + name, + ... +}: { + imports = [ + ./common_config.nix + ]; + + networking.hostName = name; + + deployment = { + replaceUnknownProfiles = lib.mkDefault true; + buildOnTarget = lib.mkDefault false; + targetHost = lib.mkDefault "${name}.lab"; + tags = lib.mkDefault ["homelab"]; + }; + + sops = { + age.keyFile = "/etc/sops/age.key"; + defaultSopsFile = ../../secrets/secrets.yml; + }; + + # home-manager = { + # useGlobalPkgs = true; + # useUserPackages = true; + # users.cottand = { + # imports = with flakeInputs.cottand.homeManagerModules; [cli]; + # home.stateVersion = "22.11"; + # }; + # users.root = { + # imports = with flakeInputs.cottand.homeManagerModules; [cli]; + # home.stateVersion = "22.11"; + # }; + # }; + + # consulNode.enable = lib.mkDefault true; + nodeExporter.enable = lib.mkDefault true; + journalLog.enable = lib.mkDefault true; +} diff --git a/machines/modules/default.nix b/machines/modules/default.nix new file mode 100644 index 0000000..bad1c63 --- /dev/null +++ b/machines/modules/default.nix @@ -0,0 +1,11 @@ +{ + imports = [ + ./node-exporter.nix + ./journal-log.nix + # ./wireguard.nix + # ./nomad.nix + # ./vault.nix + # ./vaultSecret.nix + # ./consul.nix + ]; +} diff --git a/machines/modules/journal-log.nix b/machines/modules/journal-log.nix new file mode 100644 index 0000000..260cb73 --- /dev/null +++ b/machines/modules/journal-log.nix @@ -0,0 +1,95 @@ +{ + lib, + config, + nodes, + # name, + # meta, + ... +}: +with lib; let + cfg = config.journalLog; +in { + options.journalLog = { + enable = mkOption { + type = types.bool; + default = false; + }; + + port = mkOption { + type = types.number; + default = 9080; + }; + + clientUrl = mkOption { + type = types.string; + default = "http://monitor.lab:3100/loki/api/v1/push"; + }; + + extraConfig = mkOption { + type = types.attrs; + default = {}; + }; + }; + + config = mkIf cfg.enable { + networking.firewall.allowedTCPPorts = [cfg.port]; + + systemd.tmpfiles.rules = [ + "d /var/lib/promtail 0755 promtail promtail -" + ]; + + services.promtail = { + enable = true; + configuration = { + server = { + http_listen_port = cfg.port; + grpc_listen_port = 0; + }; + positions = { + filename = "/var/lib/promtail/positions.yaml"; + }; + clients = [ + { + url = cfg.clientUrl; + } + ]; + scrape_configs = [ + { + job_name = "journal"; + journal = { + path = "/var/log/journal"; + labels = { + job = "promtail"; + host = config.networking.hostName; + env = "proxmox"; + instance = "${config.networking.hostName}.lab"; + }; + }; + relabel_configs = [ + { + source_labels = ["__journal__systemd_unit"]; + target_label = "unit"; + } + { + source_labels = ["__journal__hostname"]; + target_label = "host"; + } + { + source_labels = ["__journal__systemd_user_unit"]; + target_label = "user_unit"; + } + { + source_labels = ["__journal__transport"]; + target_label = "transport"; + } + { + source_labels = ["__journal_priority_keyword"]; + target_label = "severity"; + } + ]; + } + ]; + }; + }; + }; +} diff --git a/machines/modules/node-exporter.nix b/machines/modules/node-exporter.nix new file mode 100644 index 0000000..be66874 --- /dev/null +++ b/machines/modules/node-exporter.nix @@ -0,0 +1,40 @@ +{ + lib, + config, + # name, + # meta, + ... +}: +with lib; let + cfg = config.nodeExporter; +in { + options.nodeExporter = { + enable = mkOption { + type = types.bool; + default = false; + }; + + port = mkOption { + type = types.number; + default = 9100; + }; + + extraConfig = mkOption { + type = types.attrs; + default = {}; + }; + }; + + config = mkIf cfg.enable { + networking.firewall.allowedTCPPorts = [cfg.port]; + + services.prometheus.exporters.node = + { + enable = true; + enabledCollectors = ["systemd"]; + port = cfg.port; + extraFlags = ["--collector.ethtool" "--collector.softirqs" "--collector.tcpstat" "--collector.wifi"]; + } + // cfg.extraConfig; + }; +} diff --git a/machines/monitor/alertmanager.nix b/machines/monitor/alertmanager.nix new file mode 100644 index 0000000..3d15787 --- /dev/null +++ b/machines/monitor/alertmanager.nix @@ -0,0 +1,63 @@ +{ + config, + pkgs, + ... +}: let + alertmanagerEnv = config.sops.secrets."alertmanager/env".path; +in { + sops.secrets."alertmanager/env" = { + sopsFile = ../../secrets/secrets.yaml; + mode = "0440"; + }; + + services.prometheus.alertmanager = { + enable = true; + openFirewall = true; + environmentFile = alertmanagerEnv; + + webExternalUrl = "http://monitor.lab:9093"; # optional but helpful + configuration = { + route = { + receiver = "null"; + group_by = ["alertname"]; + group_wait = "10s"; + group_interval = "5m"; + repeat_interval = "4h"; + + routes = [ + { + receiver = "telegram"; + matchers = [ + "severity =~ \"warning|critical\"" + ]; + group_wait = "10s"; + continue = true; + } + ]; + }; + + receivers = [ + {name = "null";} + { + name = "telegram"; + telegram_configs = [ + { + api_url = "https://api.telegram.org"; + bot_token = "$TELEGRAM_BOT_TOKEN"; + chat_id = -1002642560007; + message_thread_id = 4; + parse_mode = "HTML"; + send_resolved = true; + message = "{{ template \"telegram.message\". }}"; + } + ]; + } + ]; + + templates = [ + (pkgs.writeText "telegram.tmpl" (builtins.readFile ./provisioning/templates/telegram.tmpl)) + # (pkgs.writeText "telegram.markdown.v2.tmpl" (builtins.readFile ./provisioning/templates/telegram.markdown.v2.tmpl)) + ]; + }; + }; +} diff --git a/machines/monitor/dashboards/15356_rev14.json b/machines/monitor/dashboards/15356_rev14.json new file mode 100644 index 0000000..da0de99 --- /dev/null +++ b/machines/monitor/dashboards/15356_rev14.json @@ -0,0 +1,2353 @@ +{ + "__inputs": [ + { + "name": "DS_PROXMOX-FLUX", + "label": "proxmox-flux", + "description": "", + "type": "datasource", + "pluginId": "influxdb", + "pluginName": "InfluxDB" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.0.1" + }, + { + "type": "datasource", + "id": "influxdb", + "name": "InfluxDB", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Proxmox Dashboard complete written in Flux w/ Cluster support", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 15356, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": true, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "influxdb", + "uid": "000000001" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 18, + "panels": [], + "repeat": "server", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Summary $server", + "type": "row" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 0.75 + }, + { + "color": "red", + "value": 0.8 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop:v.timeRangeStop)\n |> filter(fn: (r) =>\n r._measurement == \"cpustat\" and\n r._field == \"cpu\"\n )\n |> filter(fn: (r) => r[\"host\"] == \"${server}\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean)", + "refId": "A" + } + ], + "title": "Server CPU", + "type": "stat" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop:v.timeRangeStop)\n |> filter(fn: (r) =>\n r._measurement == \"cpustat\" and\n r._field == \"cpus\"\n )\n |> filter(fn: (r) => r[\"host\"] == \"${server}\")\n |> limit(n:1)", + "refId": "A" + } + ], + "title": "Logical Cores", + "type": "stat" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 8, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop:v.timeRangeStop)\n |> filter(fn: (r) =>\n r._measurement == \"memory\" and\n r._field == \"memtotal\"\n )\n |> filter(fn: (r) => r[\"host\"] == \"${server}\")\n |> limit(n:1)", + "refId": "A" + } + ], + "title": "Total Memory", + "type": "stat" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 9, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop:v.timeRangeStop)\n |> filter(fn: (r) =>\n r._measurement == \"memory\" and\n r._field == \"swaptotal\"\n )\n |> filter(fn: (r) => r[\"host\"] == \"${server}\")\n |> limit(n:1)", + "refId": "A" + } + ], + "title": "Swap Total", + "type": "stat" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "displayName": "${__field.labels.host} (${__field.labels.type})", + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 14, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "10.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop:v.timeRangeStop)\n |> filter(fn: (r) => r._measurement == \"system\")\n |> filter(fn: (r) => r._field == \"avail\" or r._field == \"used\")\n |> filter(fn: (r) => r[\"nodename\"] == \"${server}\")\n |> last()\n |> pivot(rowKey:[\"_time\"], columnKey: [\"_field\"], valueColumn: \"_value\")\n |> map(fn: (r) => ({ r with \n _time: r._time,\n _measurement: r.measurement,\n _field: \"used_perc\",\n _value: float(v: 100) / (float(v: r.avail) + float(v: r.used)) * float(v:r.used)\n }))\n |> drop(columns: [\"avail\",\"used\"])\n", + "refId": "A" + } + ], + "title": "Storage Pools", + "type": "bargauge" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 4 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop:v.timeRangeStop)\n |> filter(fn: (r) =>\n r._measurement == \"cpustat\" and\n r._field == \"avg1\"\n )\n |> filter(fn: (r) => r[\"host\"] == \"${server}\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n", + "refId": "A" + } + ], + "title": "Load Avg (1m)", + "type": "stat" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "description": "Memory in use by Host OS, VM's and Containers", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "mappings": [], + "max": 64, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 500 + }, + { + "color": "red", + "value": 800 + } + ] + }, + "unit": "gbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 4 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop:v.timeRangeStop)\n |> filter(fn: (r) =>\n r._measurement == \"memory\" and\n r._field == \"memused\"\n )\n |> filter(fn: (r) => r[\"host\"] == \"${server}\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> map(fn: (r) => ({\n r with\n _value: float(v: r._value) / 1073741824.0\n })\n )\n |> last()", + "refId": "A" + } + ], + "title": "Memory in Use", + "type": "stat" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 0.5 + }, + { + "color": "red", + "value": 0.75 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 4 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop:v.timeRangeStop)\n |> filter(fn: (r) =>\n r._measurement == \"cpustat\" and\n r._field == \"wait\"\n )\n |> filter(fn: (r) => r[\"host\"] == \"${server}\")\n |> aggregateWindow(every: v.windowPeriod, fn:mean, createEmpty: false)", + "refId": "A" + } + ], + "title": "I/O Wait", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 70 + }, + "id": 49, + "panels": [], + "title": "Cluster summary of Resources $server", + "type": "row" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Uptime" + }, + "properties": [ + { + "id": "unit", + "value": "dtdhms" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 71 + }, + "id": 16, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: -10m)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> filter(fn: (r) => r[\"object\"] == \"qemu\")\n |> filter(fn: (r) => r[\"nodename\"] =~ /${server:regex}/)\n |> group(columns: [\"host\",\"nodename\"], mode: \"by\")\n |> filter(fn: (r) => r._value > 0)\n |> last()\n |> limit(n:1)\n |> group(columns: [\"_measurement\"])\n |> keep(columns: [\"_time\",\"host\",\"nodename\",\"_value\"])\n \n", + "refId": "A" + } + ], + "title": "Running VMs", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "_time": 0, + "_value": 2, + "host": 1 + }, + "renameByName": { + "_time": "Time", + "_value": "Uptime", + "host": "VM" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Uptime" + }, + "properties": [ + { + "id": "unit", + "value": "dtdhms" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 71 + }, + "id": 19, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: -10m)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"uptime\")\n |> filter(fn: (r) => r[\"object\"] == \"lxc\")\n |> filter(fn: (r) => r[\"nodename\"] =~ /${server:regex}/)\n |> group(columns: [\"host\",\"nodename\"], mode: \"by\")\n |> filter(fn: (r) => r._value > 0)\n |> last()\n |> limit(n:1)\n |> group(columns: [\"_measurement\"])\n |> keep(columns: [\"_time\",\"host\",\"_value\"])\n \n", + "refId": "A" + } + ], + "title": "Running LXCs", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "_time": 0, + "_value": 2, + "host": 1 + }, + "renameByName": { + "_time": "Time", + "_value": "Uptime", + "host": "VM" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "influxdb", + "uid": "000000001" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 78 + }, + "id": 23, + "panels": [], + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Memory and CPU usage $server", + "type": "row" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Percent", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "avg1" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "custom.axisLabel", + "value": "Loadavg" + }, + { + "id": "unit", + "value": "short" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 79 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.2.5", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop:v.timeRangeStop)\n |> filter(fn: (r) =>\n r._measurement == \"cpustat\" and\n r._field == \"cpu\"\n )\n |> filter(fn: (r) => r[\"host\"] =~ /${server:regex}/)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> map(fn: (r) => ({\n r with\n _time: r._time,\n _value: float(v: r._value) * float(v: 100)\n })\n )\n", + "refId": "A" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "hide": false, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop:v.timeRangeStop)\n |> filter(fn: (r) =>\n r._measurement == \"cpustat\" and\n r._field == \"wait\"\n )\n |> filter(fn: (r) => r[\"host\"] =~ /${server:regex}/)\n |> map(fn: (r) => ({\n r with\n _time: r._time,\n _value: float(v: r._value) * float(v: 100)\n })\n )\n |> aggregateWindow(every: 1m, fn: mean, createEmpty: false)", + "refId": "B" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "hide": false, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop:v.timeRangeStop)\n |> filter(fn: (r) =>\n r._measurement == \"cpustat\" and\n r._field == \"avg1\"\n )\n |> filter(fn: (r) => r[\"host\"] =~ /${server:regex}/)\n |> aggregateWindow(every: 1m, fn: mean, createEmpty: false)", + "refId": "C" + } + ], + "title": "Host CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Memory", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 79 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.2.5", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"memory\")\n |> filter(fn: (r) => r[\"_field\"] == \"memtotal\" or r[\"_field\"] == \"memused\")\n |> filter(fn: (r) => r[\"host\"] =~ /${server:regex}/)\n// |> group(columns: [\"host\",\"nodename\"], mode: \"by\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")", + "refId": "A" + } + ], + "title": "Host Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "displayName": "${__field.labels.host} ${__field.labels.nodename}", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 88 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"cpu\")\n |> filter(fn: (r) => r[\"object\"] == \"qemu\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] =~ /${server:regex}/)\n |> group(columns: [\"host\",\"nodename\"], mode: \"by\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n// |> yield(name: \"mean\")\n", + "refId": "A" + } + ], + "title": "VM CPU usage", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 88 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "mem = from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"mem\")\n |> filter(fn: (r) => r[\"object\"] == \"qemu\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] =~ /^${server:regex}$/)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\nmaxmem = from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"maxmem\")\n |> filter(fn: (r) => r[\"object\"] == \"qemu\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] =~ /^${server:regex}$/)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\njoin(\n tables: {mem:mem, maxmem:maxmem},\n on: [\"_time\",\"_stop\",\"_start\",\"host\",\"nodename\"]\n\n)\n\n|> map(fn: (r) => ({\n _time: r._time,\n _value: float(v:r._value_mem) / float(v:r._value_maxmem) * float(v:100),\n host: r.host,\n nodename: r.nodename\n })\n )\n\n|> group(columns: [\"host\",\"nodename\"], mode: \"by\")", + "refId": "A" + } + ], + "title": "VM memory usage", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "displayName": "${__field.labels.host}", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 96 + }, + "id": 28, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"cpu\")\n |> filter(fn: (r) => r[\"object\"] == \"lxc\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] == \"${server}\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n// |> yield(name: \"mean\")\n", + "refId": "A" + } + ], + "title": "LXC CPU usage", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 96 + }, + "id": 29, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "mem = from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"mem\")\n |> filter(fn: (r) => r[\"object\"] == \"lxc\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] == \"${server}\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\nmaxmem = from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"maxmem\")\n |> filter(fn: (r) => r[\"object\"] == \"lxc\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\njoin(\n tables: {mem:mem, maxmem:maxmem},\n on: [\"_time\",\"_stop\",\"_start\",\"host\"]\n\n)\n\n|> map(fn: (r) => ({\n _time: r._time,\n _value: float(v:r._value_mem) / float(v:r._value_maxmem) * float(v:100),\n host: r.host\n })\n )\n\n|> group(columns: [\"host\"])", + "refId": "A" + } + ], + "title": "LXC memory usage", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "displayName": "${__field.labels.host} ${__field.labels.nodename}", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 104 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"diskread\")\n |> filter(fn: (r) => r[\"object\"] == \"qemu\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] =~ /${server:regex}/)\n |> group(columns: [\"host\",\"nodename\"], mode:\"by\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> derivative(unit: 1s)\n// |> yield(name: \"mean\")\n", + "refId": "A" + } + ], + "title": "VM I/O Read", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "displayName": "${__field.labels.host} ${__field.labels.nodename}", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 104 + }, + "id": 31, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"diskwrite\")\n |> filter(fn: (r) => r[\"object\"] == \"qemu\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] =~ /${server:regex}/)\n |> group(columns: [\"host\",\"nodename\"], mode: \"by\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> derivative(unit: 1s)\n// |> yield(name: \"mean\")\n", + "refId": "A" + } + ], + "title": "VMs I/O Write", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "displayName": "${__field.labels.host}", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 112 + }, + "id": 32, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"diskread\")\n |> filter(fn: (r) => r[\"object\"] == \"lxc\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] =~ /${server:regex}/)\n |> group(columns: [\"host\",\"nodename\"], mode: \"by\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> derivative(unit: 1s)\n// |> yield(name: \"mean\")\n", + "refId": "A" + } + ], + "title": "LXC I/O Read", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "displayName": "${__field.labels.host}", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 112 + }, + "id": 33, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"diskwrite\")\n |> filter(fn: (r) => r[\"object\"] == \"lxc\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] =~ /${server:regex}/)\n |> group(columns: [\"host\",\"nodename\"], mode: \"by\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> derivative(unit: 1s)\n// |> yield(name: \"mean\")\n", + "refId": "A" + } + ], + "title": "LXC I/O Write", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "displayName": "${__field.labels.host} ${__field.labels.nodename}", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 120 + }, + "id": 34, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"netin\")\n |> filter(fn: (r) => r[\"object\"] == \"qemu\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] =~ /${server:regex}/)\n |> group(columns: [\"host\",\"nodename\"], mode: \"by\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> derivative(unit: 1s, nonNegative: true)\n// |> yield(name: \"mean\")\n", + "refId": "A" + } + ], + "title": "VM traffic In", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "displayName": "${__field.labels.host} ${__field.labels.nodename}", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 120 + }, + "id": 35, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"netout\")\n |> filter(fn: (r) => r[\"object\"] == \"qemu\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] =~ /${server:regex}/)\n |> group(columns: [\"host\",\"nodename\"], mode: \"by\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> derivative(unit: 1s, nonNegative: true)\n// |> yield(name: \"mean\")\n", + "refId": "A" + } + ], + "title": "VM traffic out", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "displayName": "${__field.labels.host}", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 128 + }, + "id": 36, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"netin\")\n |> filter(fn: (r) => r[\"object\"] == \"lxc\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] =~ /${server:regex}/)\n |> group(columns: [\"host\",\"nodename\"], mode: \"by\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> derivative(unit: 1s, nonNegative: true)\n// |> yield(name: \"mean\")\n", + "refId": "A" + } + ], + "title": "LXC traffic In", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "displayName": "${__field.labels.host}", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 128 + }, + "id": 37, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"system\")\n |> filter(fn: (r) => r[\"_field\"] == \"netout\")\n |> filter(fn: (r) => r[\"object\"] == \"lxc\")\n |> filter(fn: (r) => r[\"_value\"] > 0)\n |> filter(fn: (r) => r[\"nodename\"] =~ /${server:regex}/)\n |> group(columns: [\"host\",\"nodename\"], mode: \"by\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> derivative(unit: 1s, nonNegative: true)\n// |> yield(name: \"mean\")\n", + "refId": "A" + } + ], + "title": "LXC traffic out", + "transformations": [], + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "chromaweb-flux-telegraf", + "value": "chromaweb-flux-telegraf" + }, + "description": "Fluxlang Capable Datasouce", + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "dsProxmox", + "options": [], + "query": "influxdb", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": {}, + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "definition": "buckets()", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "Bucket", + "options": [], + "query": "buckets()", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "influxdb", + "uid": "${DS_PROXMOX-FLUX}" + }, + "definition": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => \n r._measurement == \"system\" and\n r.object == \"nodes\"\n )\n |> keyValues(keyColumns: [\"host\"])\n |> group()\n |> keep(columns: [\"host\"])\n |> distinct(column: \"host\")\n\n", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "server", + "options": [], + "query": "from(bucket: \"${Bucket}\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => \n r._measurement == \"system\" and\n r.object == \"nodes\"\n )\n |> keyValues(keyColumns: [\"host\"])\n |> group()\n |> keep(columns: [\"host\"])\n |> distinct(column: \"host\")\n\n", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Proxmox Cluster [Flux]", + "uid": "IfgdXjtnk", + "version": 27, + "weekStart": "" +} \ No newline at end of file diff --git a/machines/monitor/dashboards/gitea.json b/machines/monitor/dashboards/gitea.json new file mode 100644 index 0000000..26cfc9c --- /dev/null +++ b/machines/monitor/dashboards/gitea.json @@ -0,0 +1,1082 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.7.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:2075", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Monitor Gitea server", + "editable": true, + "gnetId": 13192, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 5, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pluginVersion": "6.7.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{job=\"gitea\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Resident memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 5, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pluginVersion": "6.7.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_cpu_seconds_total{job=\"gitea\"}", + "instant": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 5, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pluginVersion": "6.7.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_open_fds{job=\"gitea\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Open FDS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 6 + }, + "id": 2, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-blue", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_accesses", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Accesses", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 6 + }, + "id": 18, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-blue", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_repositories", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Repositories", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 6 + }, + "id": 24, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-blue", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_users", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Users", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 6 + }, + "id": 5, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-blue", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_issues", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Issues", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 10 + }, + "id": 11, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-yellow", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_actions", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Actions", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 10 + }, + "id": 10, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-yellow", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_labels", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Labels", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 10 + }, + "id": 21, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-yellow", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_organizations", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Organizations", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 10 + }, + "id": 16, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-yellow", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_teams", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Teams", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 10 + }, + "id": 8, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-yellow", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_follows", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Follows", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 10 + }, + "id": 20, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-yellow", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_publickeys", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of PublicKeys", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 14 + }, + "id": 17, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-purple", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_stars", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Stars", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 14 + }, + "id": 14, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-purple", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_webhooks", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Webhooks", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 14 + }, + "id": 19, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-purple", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_releases", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Releases", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 14 + }, + "id": 7, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-purple", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_comments", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Comments", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 14 + }, + "id": 3, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-purple", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_milestones", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Milestones", + "type": "stat" + }, + { + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 14 + }, + "id": 25, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-purple", + "value": null + } + ] + } + }, + "overrides": [], + "values": false + }, + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto" + }, + "pluginVersion": "6.7.3", + "targets": [ + { + "expr": "gitea_watches", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Watches", + "type": "stat" + } + ], + "refresh": "1m", + "schemaVersion": 22, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": ["30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"] + }, + "timezone": "browser", + "title": "Gitea", + "uid": "nNq1Iw5Gz", + "variables": { + "list": [] + }, + "version": 7 +} diff --git a/machines/monitor/dashboards/grafana-traefik.json b/machines/monitor/dashboards/grafana-traefik.json new file mode 100644 index 0000000..c03e56e --- /dev/null +++ b/machines/monitor/dashboards/grafana-traefik.json @@ -0,0 +1,692 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Traefik dashboard prometheus", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 10, + "panels": [], + "title": "$backend stats", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "maxDataPoints": 3, + "options": { + "displayLabels": [], + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": ["value", "percent"] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "text": {}, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "traefik_service_requests_total{service=\"$service\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{method}} : {{code}}", + "refId": "A" + } + ], + "title": "$service return code", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": ["mean"], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "sum(traefik_service_request_duration_seconds_sum{service=\"$service\"}) / sum(traefik_service_requests_total{service=\"$service\"}) * 1000", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "title": "$service response time", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "alertThreshold": true, + "legend": { + "calcs": ["mean", "max", "min"], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "sum(rate(traefik_service_requests_total{service=\"$service\"}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Total requests $service", + "refId": "A" + } + ], + "title": "Total requests over 5min $service", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 12, + "panels": [], + "title": "Global stats", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "alertThreshold": true, + "legend": { + "calcs": ["lastNotNull", "max", "min"], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code=\"200\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{method}} : {{code}}", + "refId": "A" + } + ], + "title": "Status code 200 over 5min", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 6, + "options": { + "alertThreshold": true, + "legend": { + "calcs": ["lastNotNull", "max", "min"], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code!=\"200\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ method }} : {{code}}", + "refId": "A" + } + ], + "title": "Others status code over 5min", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 7, + "maxDataPoints": 3, + "options": { + "displayLabels": [], + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": ["value"] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": ["sum"], + "fields": "", + "values": false + }, + "text": {}, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "sum(rate(traefik_service_requests_total[5m])) by (service) ", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ service }}", + "refId": "A" + } + ], + "title": "Requests by service", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 8, + "maxDataPoints": 3, + "options": { + "displayLabels": [], + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": ["value"] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": ["sum"], + "fields": "", + "values": false + }, + "text": {}, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.0+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "sum(rate(traefik_entrypoint_requests_total{entrypoint =~ \"$entrypoint\"}[5m])) by (entrypoint) ", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ entrypoint }}", + "refId": "A" + } + ], + "title": "Requests by protocol", + "type": "piechart" + } + ], + "preload": false, + "schemaVersion": 41, + "tags": ["traefik", "prometheus"], + "templating": { + "list": [ + { + "current": {}, + "datasource": "Prometheus", + "definition": "label_values({job=\"traefik\"},service)", + "includeAll": false, + "name": "service", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({job=\"traefik\"},service)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": {}, + "datasource": "Prometheus", + "definition": "", + "includeAll": true, + "multi": true, + "name": "entrypoint", + "options": [], + "query": { + "query": "label_values(entrypoint)", + "refId": "Prometheus-entrypoint-Variable-Query" + }, + "refresh": 1, + "regex": "", + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Traefik", + "uid": "qPdAviJmz", + "version": 1 +} diff --git a/machines/monitor/dashboards/node-exporter.json b/machines/monitor/dashboards/node-exporter.json new file mode 100644 index 0000000..e005adf --- /dev/null +++ b/machines/monitor/dashboards/node-exporter.json @@ -0,0 +1,13554 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.7.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:19", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Complete Prometheus Node Exporter dashboard", + "editable": true, + "gnetId": 12486, + "graphTooltip": 0, + "id": null, + "iteration": 1592835369005, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 261, + "panels": [], + "repeat": null, + "title": "Quick CPU / Mem / Disk", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Busy state of all CPU cores together", + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 20, + "links": [], + "options": { + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "nullValueMode": "null", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 85 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "percent" + }, + "overrides": [], + "values": false + }, + "orientation": "horizontal", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.7.4", + "targets": [ + { + "expr": "(((count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))) - avg(sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])))) * 100) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", + "hide": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 900 + } + ], + "title": "CPU Busy", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Busy state of all CPU cores together (5 min average)", + "gridPos": { + "h": 4, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 155, + "links": [], + "options": { + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "nullValueMode": "null", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 85 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "percent" + }, + "overrides": [], + "values": false + }, + "orientation": "horizontal", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.7.4", + "targets": [ + { + "expr": "avg(node_load5{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "title": "Sys Load (5m avg)", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Busy state of all CPU cores together (15 min average)", + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 19, + "links": [], + "options": { + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "nullValueMode": "null", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 85 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "percent" + }, + "overrides": [], + "values": false + }, + "orientation": "horizontal", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.7.4", + "targets": [ + { + "expr": "avg(node_load15{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "title": "Sys Load (15m avg)", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Non available RAM memory", + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 1 + }, + "hideTimeOverride": false, + "id": 16, + "links": [], + "options": { + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "max": 100, + "min": 0, + "nullValueMode": "null", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [], + "values": false + }, + "orientation": "horizontal", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.7.4", + "targets": [ + { + "expr": "((node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "refId": "A", + "step": 900 + }, + { + "expr": "100 - ((node_memory_MemAvailable_bytes{instance=\"$node\",job=\"$job\"} * 100) / node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "B", + "step": 900 + } + ], + "title": "RAM Used", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Used Swap", + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 21, + "links": [], + "options": { + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "nullValueMode": "null", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 25 + } + ] + }, + "unit": "percent" + }, + "overrides": [], + "values": false + }, + "orientation": "horizontal", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.7.4", + "targets": [ + { + "expr": "((node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100", + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "title": "SWAP Used", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Used Root FS", + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 1 + }, + "id": 154, + "links": [], + "options": { + "fieldOptions": { + "calcs": ["lastNotNull"], + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "max": 100, + "min": 0, + "nullValueMode": "null", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [], + "values": false + }, + "orientation": "horizontal", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.7.4", + "targets": [ + { + "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "title": "Root FS Used", + "type": "gauge" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "description": "Total number of CPU cores", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 18, + "y": 1 + }, + "id": 14, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "null", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 900 + } + ], + "thresholds": "", + "title": "CPU Cores", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 1, + "description": "System uptime", + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 20, + "y": 1 + }, + "hideTimeOverride": true, + "id": 15, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "null", + "nullText": null, + "postfix": "s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "node_time_seconds{instance=\"$node\",job=\"$job\"} - node_boot_time_seconds{instance=\"$node\",job=\"$job\"}", + "intervalFactor": 2, + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 0, + "description": "Total RootFS", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 18, + "y": 3 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "null", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "thresholds": "70,90", + "title": "RootFS Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 0, + "description": "Total RAM", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 20, + "y": 3 + }, + "id": 75, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "null", + "nullText": null, + "postfix": "", + "postfixFontSize": "70%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "thresholds": "", + "title": "RAM Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 0, + "description": "Total SWAP", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 22, + "y": 3 + }, + "id": 18, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "null", + "nullText": null, + "postfix": "", + "postfixFontSize": "70%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"}", + "intervalFactor": 1, + "refId": "A", + "step": 900 + } + ], + "thresholds": "", + "title": "SWAP Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 263, + "panels": [ + { + "aliasColors": { + "Busy": "#EAB839", + "Busy Iowait": "#890F02", + "Busy other": "#1F78C1", + "Idle": "#052B51", + "Idle - Waiting for something to happen": "#052B51", + "guest": "#9AC48A", + "idle": "#052B51", + "iowait": "#EAB839", + "irq": "#BF1B00", + "nice": "#C15C17", + "softirq": "#E24D42", + "steal": "#FCE2DE", + "system": "#508642", + "user": "#5195CE" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "Basic CPU info", + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 77, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 250, + "sort": null, + "sortDesc": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": true, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Busy Iowait", + "color": "#890F02" + }, + { + "alias": "Idle", + "color": "#7EB26D" + }, + { + "alias": "Busy System", + "color": "#EAB839" + }, + { + "alias": "Busy User", + "color": "#0A437C" + }, + { + "alias": "Busy Other", + "color": "#6D1F62" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Busy System", + "refId": "A", + "step": 240 + }, + { + "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode='user',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Busy User", + "refId": "B", + "step": 240 + }, + { + "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode='iowait',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Busy Iowait", + "refId": "C", + "step": 240 + }, + { + "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode=~\".*irq\",instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Busy IRQs", + "refId": "D", + "step": 240 + }, + { + "expr": "sum (irate(node_cpu_seconds_total{mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Busy Other", + "refId": "E", + "step": 240 + }, + { + "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Idle", + "refId": "F", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Basic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "SWAP Used": "#BF1B00", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap Used": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "Basic memory usage", + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 6 + }, + "hiddenSeries": false, + "id": 78, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "RAM Total", + "color": "#E0F9D7", + "fill": 0, + "stack": false + }, + { + "alias": "RAM Cache + Buffer", + "color": "#052B51" + }, + { + "alias": "RAM Free", + "color": "#7EB26D" + }, + { + "alias": "Avaliable", + "color": "#DEDAF7", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "RAM Total", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - (node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "RAM Used", + "refId": "B", + "step": 240 + }, + { + "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "RAM Cache + Buffer", + "refId": "C", + "step": 240 + }, + { + "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "RAM Free", + "refId": "D", + "step": 240 + }, + { + "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "SWAP Used", + "refId": "E", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Basic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Recv_bytes_eth2": "#7EB26D", + "Recv_bytes_lo": "#0A50A1", + "Recv_drop_eth2": "#6ED0E0", + "Recv_drop_lo": "#E0F9D7", + "Recv_errs_eth2": "#BF1B00", + "Recv_errs_lo": "#CCA300", + "Trans_bytes_eth2": "#7EB26D", + "Trans_bytes_lo": "#0A50A1", + "Trans_drop_eth2": "#6ED0E0", + "Trans_drop_lo": "#E0F9D7", + "Trans_errs_eth2": "#BF1B00", + "Trans_errs_lo": "#CCA300", + "recv_bytes_lo": "#0A50A1", + "recv_drop_eth0": "#99440A", + "recv_drop_lo": "#967302", + "recv_errs_eth0": "#BF1B00", + "recv_errs_lo": "#890F02", + "trans_bytes_eth0": "#7EB26D", + "trans_bytes_lo": "#0A50A1", + "trans_drop_eth0": "#99440A", + "trans_drop_lo": "#967302", + "trans_errs_eth0": "#BF1B00", + "trans_errs_lo": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Basic network info per interface", + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 13 + }, + "hiddenSeries": false, + "id": 74, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "recv {{device}}", + "refId": "A", + "step": 240 + }, + { + "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "trans {{device}} ", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic Basic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "pps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "description": "Disk space used of all filesystems mounted", + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 13 + }, + "height": "", + "hiddenSeries": false, + "id": 152, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{mountpoint}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Space Used Basic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Basic CPU / Mem / Net / Disk", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 265, + "panels": [ + { + "aliasColors": { + "Idle - Waiting for something to happen": "#052B51", + "guest": "#9AC48A", + "idle": "#052B51", + "iowait": "#EAB839", + "irq": "#BF1B00", + "nice": "#C15C17", + "softirq": "#E24D42", + "steal": "#FCE2DE", + "system": "#508642", + "user": "#5195CE" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "", + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 250, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": true, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 2, + "legendFormat": "System - Processes executing in kernel mode", + "refId": "A", + "step": 20 + }, + { + "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='user',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "User - Normal processes executing in user mode", + "refId": "B", + "step": 240 + }, + { + "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='nice',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Nice - Niced processes executing in user mode", + "refId": "C", + "step": 240 + }, + { + "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Idle - Waiting for something to happen", + "refId": "D", + "step": 240 + }, + { + "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='iowait',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Iowait - Waiting for I/O to complete", + "refId": "E", + "step": 240 + }, + { + "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='irq',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Irq - Servicing interrupts", + "refId": "F", + "step": 240 + }, + { + "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='softirq',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Softirq - Servicing softirqs", + "refId": "G", + "step": 240 + }, + { + "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='steal',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Steal - Time spent in other operating systems when running in a virtualized environment", + "refId": "H", + "step": 240 + }, + { + "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='guest',instance=\"$node\",job=\"$job\"}[5m])) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Guest - Time spent running a virtual CPU for a guest operating system", + "refId": "I", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "percentage", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap - Swap memory usage": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839", + "Unused - Free memory unassigned": "#052B51" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "", + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 7 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Hardware Corrupted - *./", + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"} - node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Apps - Memory used by user-space applications", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "PageTables - Memory used to map between virtual and physical memory addresses", + "refId": "B", + "step": 240 + }, + { + "expr": "node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified", + "refId": "C", + "step": 240 + }, + { + "expr": "node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)", + "refId": "D", + "step": 240 + }, + { + "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Cache - Parked file data (file content) cache", + "refId": "E", + "step": 240 + }, + { + "expr": "node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Buffers - Block device (e.g. harddisk) cache", + "refId": "F", + "step": 240 + }, + { + "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Unused - Free memory unassigned", + "refId": "G", + "step": 240 + }, + { + "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Swap - Swap space used", + "refId": "H", + "step": 240 + }, + { + "expr": "node_memory_HardwareCorrupted_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working", + "refId": "I", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Stack", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "receive_packets_eth0": "#7EB26D", + "receive_packets_lo": "#E24D42", + "transmit_packets_eth0": "#7EB26D", + "transmit_packets_lo": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 19 + }, + "hiddenSeries": false, + "id": 84, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Receive", + "refId": "A", + "step": 240 + }, + { + "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Transmit", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bps", + "label": "bits out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "description": "", + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 19 + }, + "height": "", + "hiddenSeries": false, + "id": 156, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{mountpoint}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Space Used", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 31 + }, + "hiddenSeries": false, + "id": 229, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Read.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - Reads completed", + "refId": "A", + "step": 480 + }, + { + "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}} - Writes completed", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk IOps", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "iops", + "label": "IO read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "io time": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "description": "", + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 31 + }, + "hiddenSeries": false, + "id": 42, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*read*./", + "transform": "negative-Y" + }, + { + "alias": "/.*sda.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde.*/", + "color": "#E24D42" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}} - Successfully read bytes", + "refId": "A", + "step": 240 + }, + { + "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}} - Successfully written bytes", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "I/O Usage Read / Write", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "io time": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "description": "", + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 43 + }, + "hiddenSeries": false, + "id": 127, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"} [5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}} - Time spent doing I/Os", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "I/O Usage Times", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "time", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "CPU / Memory / Net / Disk", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 266, + "panels": [ + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 136, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Inactive_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Inactive - Memory which has been less recently used. It is more eligible to be reclaimed for other purposes", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_Active_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Active - Memory that has been used more recently and usually not reclaimed unless absolutely necessary", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Active / Inactive", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 8 + }, + "hiddenSeries": false, + "id": 135, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Committed_AS - *./" + }, + { + "alias": "/.*CommitLimit - *./", + "color": "#BF1B00", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Committed_AS_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Committed_AS - Amount of memory presently allocated on the system", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_CommitLimit_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "CommitLimit - Amount of memory currently available to be allocated on the system", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Commited", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 191, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Inactive_file_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Inactive_file - File-backed memory on inactive LRU list", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_Inactive_anon_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Inactive_anon - Anonymous and swap cache on inactive LRU list, including tmpfs (shmem)", + "refId": "B", + "step": 4 + }, + { + "expr": "node_memory_Active_file_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Active_file - File-backed memory on active LRU list", + "refId": "C", + "step": 4 + }, + { + "expr": "node_memory_Active_anon_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Active_anon - Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs", + "refId": "D", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Active / Inactive Detail", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "Total Swap": "#614D93", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 18 + }, + "hiddenSeries": false, + "id": 130, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Writeback_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writeback - Memory which is actively being written back to disk", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_WritebackTmp_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "WritebackTmp - Memory used by FUSE for temporary writeback buffers", + "refId": "B", + "step": 4 + }, + { + "expr": "node_memory_Dirty_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Dirty - Memory which is waiting to get written back to the disk", + "refId": "C", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Writeback and Dirty", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 138, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages", + "fill": 0 + }, + { + "alias": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Mapped_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Mapped - Used memory in mapped pages files which have been mmaped, such as libraries", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_Shmem_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Shmem - Used shared memory (shared between several processes, thus including RAM disks)", + "refId": "B", + "step": 4 + }, + { + "expr": "node_memory_ShmemHugePages_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages", + "refId": "C", + "step": 4 + }, + { + "expr": "node_memory_ShmemPmdMapped_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "ShmemPmdMapped - Ammount of shared (shmem/tmpfs) memory backed by huge pages", + "refId": "D", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Shared and Mapped", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "Total Swap": "#614D93", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 28 + }, + "hiddenSeries": false, + "id": 131, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_SUnreclaim_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "SUnreclaim - Part of Slab, that cannot be reclaimed on memory pressure", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "SReclaimable - Part of Slab, that might be reclaimed, such as caches", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Slab", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 38 + }, + "hiddenSeries": false, + "id": 70, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_VmallocChunk_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "VmallocChunk - Largest contigious block of vmalloc area which is free", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_VmallocTotal_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "VmallocTotal - Total size of vmalloc memory area", + "refId": "B", + "step": 4 + }, + { + "expr": "node_memory_VmallocUsed_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "VmallocUsed - Amount of vmalloc area which is used", + "refId": "C", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Vmalloc", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 38 + }, + "hiddenSeries": false, + "id": 159, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Bounce_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Bounce - Memory used for block device bounce buffers", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Bounce", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 48 + }, + "hiddenSeries": false, + "id": 129, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Inactive *./", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_AnonHugePages_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "AnonHugePages - Memory in anonymous huge pages", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_AnonPages_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "AnonPages - Memory in user pages not backed by files", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Anonymous", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 48 + }, + "hiddenSeries": false, + "id": 160, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_KernelStack_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "KernelStack - Kernel memory stack. This is not reclaimable", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_Percpu_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "PerCPU - Per CPU memory allocated dynamically by loadable modules", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Kernel / CPU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#806EB7", + "Total RAM + Swap": "#806EB7", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 58 + }, + "hiddenSeries": false, + "id": 140, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_HugePages_Free{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "HugePages_Free - Huge pages in the pool that are not yet allocated", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_HugePages_Rsvd{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "HugePages_Rsvd - Huge pages for which a commitment to allocate from the pool has been made, but no allocation has yet been made", + "refId": "B", + "step": 4 + }, + { + "expr": "node_memory_HugePages_Surp{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "HugePages_Surp - Huge pages in the pool above the value in /proc/sys/vm/nr_hugepages", + "refId": "C", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory HugePages Counter", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "pages", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#806EB7", + "Total RAM + Swap": "#806EB7", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 58 + }, + "hiddenSeries": false, + "id": 71, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_HugePages_Total{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "HugePages - Total size of the pool of huge pages", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Hugepagesize - Huge Page size", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory HugePages Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 68 + }, + "hiddenSeries": false, + "id": 128, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_DirectMap1G_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "DirectMap1G - Amount of pages mapped as this size", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_DirectMap2M_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "DirectMap2M - Amount of pages mapped as this size", + "refId": "B", + "step": 4 + }, + { + "expr": "node_memory_DirectMap4k_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "DirectMap4K - Amount of pages mapped as this size", + "refId": "C", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory DirectMap", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 68 + }, + "hiddenSeries": false, + "id": 137, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Unevictable_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Unevictable - Amount of unevictable memory that can't be swapped out for a variety of reasons", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_Mlocked_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "MLocked - Size of pages locked to memory using the mlock() system call", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Unevictable and MLocked", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "Total Swap": "#614D93", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 78 + }, + "hiddenSeries": false, + "id": 132, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_NFS_Unstable_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "NFS Unstable - Memory in NFS pages sent to the server, but not yet commited to the storage", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory NFS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Memory Meminfo", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 267, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 9 + }, + "hiddenSeries": false, + "id": 176, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*out/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_vmstat_pgpgin{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Pagesin - Page in operations", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_vmstat_pgpgout{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Pagesout - Page out operations", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Pages In / Out", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "pages out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 9 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*out/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_vmstat_pswpin{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Pswpin - Pages swapped in", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_vmstat_pswpout{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Pswpout - Pages swapped out", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Pages Swap In / Out", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "pages out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 19 + }, + "hiddenSeries": false, + "id": 175, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Pgfault - Page major and minor fault operations", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Pgfault - Page major and minor fault operations", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Pgmajfault - Major page fault operations", + "refId": "B", + "step": 4 + }, + { + "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[5m]) - irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Pgminfault - Minor page fault operations", + "refId": "C", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Page Faults", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "faults", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "Total Swap": "#614D93", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 19 + }, + "hiddenSeries": false, + "id": 307, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "oom killer invocations ", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "OOM Killer", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Memory Vmstat", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 293, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 10 + }, + "hiddenSeries": false, + "id": 260, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Variation*./", + "color": "#890F02" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_timex_estimated_error_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Estimated error in seconds", + "refId": "A", + "step": 240 + }, + { + "expr": "node_timex_offset_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Time offset in between local system and reference clock", + "refId": "B", + "step": 240 + }, + { + "expr": "node_timex_maxerror_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Maximum error in seconds", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Time Syncronized Drift", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "seconds", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 10 + }, + "hiddenSeries": false, + "id": 291, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_timex_loop_time_constant{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Phase-locked loop time adjust", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Time PLL Adjust", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 20 + }, + "hiddenSeries": false, + "id": 168, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Variation*./", + "color": "#890F02" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_timex_sync_status{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Is clock synchronized to a reliable server (1 = yes, 0 = no)", + "refId": "A", + "step": 240 + }, + { + "expr": "node_timex_frequency_adjustment_ratio{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Local clock frequency adjustment", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Time Syncronized Status", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 20 + }, + "hiddenSeries": false, + "id": 294, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_timex_tick_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Seconds between clock ticks", + "refId": "A", + "step": 240 + }, + { + "expr": "node_timex_tai_offset_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "International Atomic Time (TAI) offset", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Time Misc", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "seconds", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "System Timesync", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 312, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 11 + }, + "hiddenSeries": false, + "id": 62, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_procs_blocked{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Processes blocked waiting for I/O to complete", + "refId": "A", + "step": 240 + }, + { + "expr": "node_procs_running{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Processes in runnable state", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Processes Status", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Requires --collector.processes", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 11 + }, + "hiddenSeries": false, + "id": 315, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_processes_state{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ state }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Processes State", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 21 + }, + "hiddenSeries": false, + "id": 148, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_forks_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Processes forks second", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Processes Forks", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "forks / sec", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 21 + }, + "hiddenSeries": false, + "id": 149, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Max.*/", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}[5m])", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Processes virtual memory size in bytes", + "refId": "A", + "step": 240 + }, + { + "expr": "process_resident_memory_max_bytes{instance=\"$node\",job=\"$job\"}", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Maximum amount of virtual memory available in bytes", + "refId": "B", + "step": 240 + }, + { + "expr": "irate(process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}[5m])", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Processes virtual memory size in bytes", + "refId": "C", + "step": 240 + }, + { + "expr": "irate(process_virtual_memory_max_bytes{instance=\"$node\",job=\"$job\"}[5m])", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Maximum amount of virtual memory available in bytes", + "refId": "D", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Processes Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Requires --collector.processes", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 31 + }, + "hiddenSeries": false, + "id": 313, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "PIDs limit", + "color": "#F2495C", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_processes_pids{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Number of PIDs", + "refId": "A", + "step": 240 + }, + { + "expr": "node_processes_max_processes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "PIDs limit", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "PIDs Number and Limit", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 31 + }, + "hiddenSeries": false, + "id": 305, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*waiting.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_schedstat_running_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "CPU {{ cpu }} - seconds spent running a process", + "refId": "A", + "step": 240 + }, + { + "expr": "irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "CPU {{ cpu }} - seconds spent by processing waiting for this CPU", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Process schdeule stats Running / Waiting", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "seconds", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 41 + }, + "hiddenSeries": false, + "id": 314, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Threads limit", + "color": "#F2495C", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_processes_threads{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Allocated threads", + "refId": "A", + "step": 240 + }, + { + "expr": "node_processes_max_threads{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Threads limit", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Threads Number and Limit", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "System Processes", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 269, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 52 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_context_switches_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Context switches", + "refId": "A", + "step": 240 + }, + { + "expr": "irate(node_intr_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Interrupts", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Context Switches / Interrupts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 52 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Load 1m", + "refId": "A", + "step": 480 + }, + { + "expr": "node_load5{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Load 5m", + "refId": "B", + "step": 480 + }, + { + "expr": "node_load15{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Load 15m", + "refId": "C", + "step": 480 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "System Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Requires --collector.interrupts", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 62 + }, + "hiddenSeries": false, + "id": 259, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Critical*./", + "color": "#E24D42", + "fill": 0 + }, + { + "alias": "/.*Max*./", + "color": "#EF843C", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_interrupts_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ type }} - {{ info }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Interrupts Detail", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 62 + }, + "hiddenSeries": false, + "id": 306, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_schedstat_timeslices_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "CPU {{ cpu }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Schedule timeslices executed by each cpu", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 72 + }, + "hiddenSeries": false, + "id": 151, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_entropy_available_bits{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Entropy available to random number generators", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Entropy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 72 + }, + "hiddenSeries": false, + "id": 308, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(process_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Time spent", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU time spent in user and system contexts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "seconds", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 82 + }, + "hiddenSeries": false, + "id": 64, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Max*./", + "color": "#890F02", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_max_fds{instance=\"$node\",job=\"$job\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Maximum open file descriptors", + "refId": "A", + "step": 240 + }, + { + "expr": "process_open_fds{instance=\"$node\",job=\"$job\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Open file descriptors", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "File Descriptors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "System Misc", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 304, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 13 + }, + "hiddenSeries": false, + "id": 158, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Critical*./", + "color": "#E24D42", + "fill": 0 + }, + { + "alias": "/.*Max*./", + "color": "#EF843C", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_hwmon_temp_celsius{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ chip }} {{ sensor }} temp", + "refId": "A", + "step": 240 + }, + { + "expr": "node_hwmon_temp_crit_alarm_celsius{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ chip }} {{ sensor }} Critical Alarm", + "refId": "B", + "step": 240 + }, + { + "expr": "node_hwmon_temp_crit_celsius{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ chip }} {{ sensor }} Critical", + "refId": "C", + "step": 240 + }, + { + "expr": "node_hwmon_temp_crit_hyst_celsius{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ chip }} {{ sensor }} Critical Historical", + "refId": "D", + "step": 240 + }, + { + "expr": "node_hwmon_temp_max_celsius{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ chip }} {{ sensor }} Max", + "refId": "E", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Hardware temperature monitor", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "celsius", + "label": "temperature", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Requires --collector.thermal_zone", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 13 + }, + "hiddenSeries": false, + "id": 300, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Max*./", + "color": "#EF843C", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_cooling_device_cur_state{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Current {{ name }} in {{ type }}", + "refId": "A", + "step": 240 + }, + { + "expr": "node_cooling_device_max_state{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Max {{ name }} in {{ type }}", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Throttle cooling device", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Requires --collector.powersupplyclass", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 23 + }, + "hiddenSeries": false, + "id": 302, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_power_supply_online{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ power_supply }} online", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Power supply", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Hardware Misc", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 296, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 297, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_systemd_socket_accepted_connections_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ name }} Connections", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Systemd Sockets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 14 + }, + "hiddenSeries": false, + "id": 298, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Failed", + "color": "#F2495C" + }, + { + "alias": "Inactive", + "color": "#FF9830" + }, + { + "alias": "Active", + "color": "#73BF69" + }, + { + "alias": "Deactivating", + "color": "#FFCB7D" + }, + { + "alias": "Activating", + "color": "#C8F2C2" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"activating\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Activating", + "refId": "A", + "step": 240 + }, + { + "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"active\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Active", + "refId": "B", + "step": 240 + }, + { + "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"deactivating\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Deactivating", + "refId": "C", + "step": 240 + }, + { + "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"failed\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Failed", + "refId": "D", + "step": 240 + }, + { + "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"inactive\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Inactive", + "refId": "E", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Systemd Units State", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Systemd", + "type": "row" + }, + { + "collapsed": false, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 270, + "panels": [], + "repeat": null, + "title": "Storage Disk", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 15 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*Read.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[5m])", + "intervalFactor": 4, + "legendFormat": "{{device}} - Reads completed", + "refId": "A", + "step": 8 + }, + { + "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[5m])", + "intervalFactor": 2, + "legendFormat": "{{device}} - Writes completed", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk IOps Completed", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "iops", + "label": "IO read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 15 + }, + "hiddenSeries": false, + "id": 33, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Read.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "{{device}} - Read bytes", + "refId": "A", + "step": 8 + }, + { + "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Written bytes", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk R/W Data", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "bytes read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 3, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 25 + }, + "hiddenSeries": false, + "id": 37, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Read.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_read_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", + "hide": false, + "intervalFactor": 4, + "legendFormat": "{{device}} - Read time", + "refId": "A", + "step": 8 + }, + { + "expr": "irate(node_disk_write_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{device}} - Write time", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk R/W Time", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "time. read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 25 + }, + "hiddenSeries": false, + "id": 35, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", + "intervalFactor": 4, + "legendFormat": "{{device}} - IO time weighted", + "refId": "A", + "step": 8 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk IOs Weighted", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "time", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 35 + }, + "hiddenSeries": false, + "id": 133, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Read.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_reads_merged_total{instance=\"$node\",job=\"$job\"}[5m])", + "intervalFactor": 2, + "legendFormat": "{{device}} - Read merged", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_disk_writes_merged_total{instance=\"$node\",job=\"$job\"}[5m])", + "intervalFactor": 2, + "legendFormat": "{{device}} - Write merged", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk R/W Merged", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "iops", + "label": "I/Os", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 3, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 35 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", + "intervalFactor": 4, + "legendFormat": "{{device}} - IO time", + "refId": "A", + "step": 8 + }, + { + "expr": "irate(node_disk_discard_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - discard time", + "refId": "B", + "step": 8 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Time Spent Doing I/Os", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "time", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 45 + }, + "hiddenSeries": false, + "id": 34, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_disk_io_now{instance=\"$node\",job=\"$job\"}", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - IO now", + "refId": "A", + "step": 8 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk IOs Current in Progress", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "iops", + "label": "I/Os", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 45 + }, + "hiddenSeries": false, + "id": 301, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_discards_completed_total{instance=\"$node\",job=\"$job\"}[5m])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - Discards completed", + "refId": "A", + "step": 8 + }, + { + "expr": "irate(node_disk_discards_merged_total{instance=\"$node\",job=\"$job\"}[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}} - Discards merged", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk IOps Discards completed / merged", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "iops", + "label": "IOs", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 55 + }, + "id": 271, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 43, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{mountpoint}} - Available", + "metric": "", + "refId": "A", + "step": 4 + }, + { + "expr": "node_filesystem_free_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{mountpoint}} - Free", + "refId": "B", + "step": 2 + }, + { + "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{mountpoint}} - Size", + "refId": "C", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Filesystem space available", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 41, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_files_free{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{mountpoint}} - Free file nodes", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "File Nodes Free", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "file nodes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filefd_maximum{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Max open files", + "refId": "A", + "step": 8 + }, + { + "expr": "node_filefd_allocated{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Open files", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "File Descriptor", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "files", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 26 + }, + "hiddenSeries": false, + "id": 219, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_files{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{mountpoint}} - File nodes total", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "File Nodes Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "file Nodes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "/ ReadOnly": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 36 + }, + "hiddenSeries": false, + "id": 44, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_readonly{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{mountpoint}} - ReadOnly", + "refId": "A", + "step": 4 + }, + { + "expr": "node_filesystem_device_error{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{mountpoint}} - Device error", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Filesystem in ReadOnly / Error", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Storage Filesystem", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 56 + }, + "id": 272, + "panels": [ + { + "aliasColors": { + "receive_packets_eth0": "#7EB26D", + "receive_packets_lo": "#E24D42", + "transmit_packets_eth0": "#7EB26D", + "transmit_packets_lo": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 17 + }, + "hiddenSeries": false, + "id": 60, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_packets_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}} - Receive", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_network_transmit_packets_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}} - Transmit", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic by Packets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 17 + }, + "hiddenSeries": false, + "id": 142, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_errs_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Receive errors", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_network_transmit_errs_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Rransmit errors", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 27 + }, + "hiddenSeries": false, + "id": 143, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_drop_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Receive drop", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_network_transmit_drop_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Transmit drop", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic Drop", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 27 + }, + "hiddenSeries": false, + "id": 141, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_compressed_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Receive compressed", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_network_transmit_compressed_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Transmit compressed", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic Compressed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 37 + }, + "hiddenSeries": false, + "id": 146, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_multicast_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Receive multicast", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic Multicast", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 37 + }, + "hiddenSeries": false, + "id": 144, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_fifo_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Receive fifo", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_network_transmit_fifo_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Transmit fifo", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic Fifo", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 47 + }, + "hiddenSeries": false, + "id": 145, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_frame_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{device}} - Receive frame", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic Frame", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 47 + }, + "hiddenSeries": false, + "id": 231, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_transmit_carrier_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Statistic transmit_carrier", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic Carrier", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 57 + }, + "hiddenSeries": false, + "id": 232, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_transmit_colls_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - Transmit colls", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic Colls", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 57 + }, + "hiddenSeries": false, + "id": 61, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "NF conntrack limit", + "color": "#890F02", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_nf_conntrack_entries{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "NF conntrack entries", + "refId": "A", + "step": 4 + }, + { + "expr": "node_nf_conntrack_entries_limit{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "NF conntrack limit", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "NF Contrack", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "entries", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 67 + }, + "hiddenSeries": false, + "id": 230, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_arp_entries{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ device }} - ARP entries", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ARP Entries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Entries", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 67 + }, + "hiddenSeries": false, + "id": 288, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_network_mtu_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ device }} - Bytes", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "MTU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 77 + }, + "hiddenSeries": false, + "id": 280, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_network_speed_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ device }} - Speed", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Speed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 77 + }, + "hiddenSeries": false, + "id": 289, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_network_transmit_queue_length{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ device }} - Interface transmit queue length", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queue Length", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": "packets", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 87 + }, + "hiddenSeries": false, + "id": 290, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Dropped.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_softnet_processed_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "CPU {{cpu}} - Processed", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_softnet_dropped_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "CPU {{cpu}} - Dropped", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Softnet Packets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "packetes drop (-) / process (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 87 + }, + "hiddenSeries": false, + "id": 310, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_softnet_times_squeezed_total{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "CPU {{cpu}} - Squeezed", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Softnet Out of Quota", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 97 + }, + "hiddenSeries": false, + "id": 309, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_network_up{operstate=\"up\",instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{interface}} - Operational state UP", + "refId": "A", + "step": 4 + }, + { + "expr": "node_network_carrier{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{device}} - Physical link state", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Operational Status", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Network Traffic", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 57 + }, + "id": 273, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 108 + }, + "hiddenSeries": false, + "id": 63, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_sockstat_TCP_alloc{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "TCP_alloc - Allocated sockets", + "refId": "A", + "step": 240 + }, + { + "expr": "node_sockstat_TCP_inuse{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "TCP_inuse - Tcp sockets currently in use", + "refId": "B", + "step": 240 + }, + { + "expr": "node_sockstat_TCP_mem{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "TCP_mem - Used memory for tcp", + "refId": "C", + "step": 240 + }, + { + "expr": "node_sockstat_TCP_orphan{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "TCP_orphan - Orphan sockets", + "refId": "D", + "step": 240 + }, + { + "expr": "node_sockstat_TCP_tw{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "TCP_tw - Sockets wating close", + "refId": "E", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Sockstat TCP", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 108 + }, + "hiddenSeries": false, + "id": 124, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_sockstat_UDPLITE_inuse{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "UDPLITE_inuse - Udplite sockets currently in use", + "refId": "A", + "step": 240 + }, + { + "expr": "node_sockstat_UDP_inuse{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "UDP_inuse - Udp sockets currently in use", + "refId": "B", + "step": 240 + }, + { + "expr": "node_sockstat_UDP_mem{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "UDP_mem - Used memory for udp", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Sockstat UDP", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 118 + }, + "hiddenSeries": false, + "id": 126, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_sockstat_sockets_used{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Sockets_used - Sockets currently in use", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Sockstat Used", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "sockets", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 118 + }, + "hiddenSeries": false, + "id": 220, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_sockstat_TCP_mem_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "mem_bytes - TCP sockets in that state", + "refId": "A", + "step": 240 + }, + { + "expr": "node_sockstat_UDP_mem_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "mem_bytes - UDP sockets in that state", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Sockstat Memory Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 128 + }, + "hiddenSeries": false, + "id": 125, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_sockstat_FRAG_inuse{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "FRAG_inuse - Frag sockets currently in use", + "refId": "A", + "step": 240 + }, + { + "expr": "node_sockstat_FRAG_memory{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "FRAG_memory - Used memory for frag", + "refId": "B", + "step": 240 + }, + { + "expr": "node_sockstat_RAW_inuse{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "RAW_inuse - Raw sockets currently in use", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Sockstat FRAG / RAW", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Network Sockstat", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 58 + }, + "id": 274, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 19 + }, + "height": "", + "hiddenSeries": false, + "id": 221, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_IpExt_InOctets{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "InOctets - Received octets", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_netstat_IpExt_OutOctets{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "OutOctets - Sent octets", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Netstat IP In / Out Octets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "octects out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 19 + }, + "height": "", + "hiddenSeries": false, + "id": 81, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_Ip_Forwarding{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Forwarding - IP forwarding", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Netstat IP Forwarding", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "datagrams", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 29 + }, + "height": "", + "hiddenSeries": false, + "id": 115, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_Icmp_InMsgs{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "InMsgs - Messages which the entity received. Note that this counter includes all those counted by icmpInErrors", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_netstat_Icmp_OutMsgs{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "OutMsgs - Messages which this entity attempted to send. Note that this counter includes all those counted by icmpOutErrors", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ICMP In / Out", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "messages out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 29 + }, + "height": "", + "hiddenSeries": false, + "id": 50, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_Icmp_InErrors{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "InErrors - Messages which the entity received but determined as having ICMP-specific errors (bad ICMP checksums, bad length, etc.)", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ICMP Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "messages out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 39 + }, + "height": "", + "hiddenSeries": false, + "id": 55, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*Snd.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_Udp_InDatagrams{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "InDatagrams - Datagrams received", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_netstat_Udp_OutDatagrams{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "OutDatagrams - Datagrams sent", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "UDP In / Out", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "datagrams out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 39 + }, + "height": "", + "hiddenSeries": false, + "id": 109, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_Udp_InErrors{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "InErrors - UDP Datagrams that could not be delivered to an application", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_netstat_Udp_NoPorts{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "NoPorts - UDP Datagrams received on a port with no listener", + "refId": "B", + "step": 4 + }, + { + "expr": "irate(node_netstat_UdpLite_InErrors{instance=\"$node\",job=\"$job\"}[5m])", + "interval": "", + "legendFormat": "InErrors Lite - UDPLite Datagrams that could not be delivered to an application", + "refId": "C" + }, + { + "expr": "irate(node_netstat_Udp_RcvbufErrors{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "RcvbufErrors - UDP buffer errors received", + "refId": "D", + "step": 4 + }, + { + "expr": "irate(node_netstat_Udp_SndbufErrors{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "SndbufErrors - UDP buffer errors send", + "refId": "E", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "UDP Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "datagrams", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 49 + }, + "height": "", + "hiddenSeries": false, + "id": 299, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*Snd.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_Tcp_InSegs{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "InSegs - Segments received, including those received in error. This count includes segments received on currently established connections", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_netstat_Tcp_OutSegs{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "OutSegs - Segments sent, including those on current connections but excluding those containing only retransmitted octets", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP In / Out", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "datagrams out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 49 + }, + "height": "", + "hiddenSeries": false, + "id": 104, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_TcpExt_ListenOverflows{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "ListenOverflows - Times the listen queue of a socket overflowed", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_netstat_TcpExt_ListenDrops{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "ListenDrops - SYNs to LISTEN sockets ignored", + "refId": "B", + "step": 4 + }, + { + "expr": "irate(node_netstat_TcpExt_TCPSynRetrans{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "TCPSynRetrans - SYN-SYN/ACK retransmits to break down retransmissions in SYN, fast/timeout retransmits", + "refId": "C", + "step": 4 + }, + { + "expr": "irate(node_netstat_Tcp_RetransSegs{instance=\"$node\",job=\"$job\"}[5m])", + "interval": "", + "legendFormat": "RetransSegs - Segments retransmitted - that is, the number of TCP segments transmitted containing one or more previously transmitted octets", + "refId": "D" + }, + { + "expr": "irate(node_netstat_Tcp_InErrs{instance=\"$node\",job=\"$job\"}[5m])", + "interval": "", + "legendFormat": "InErrs - Segments received in error (e.g., bad TCP checksums)", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 59 + }, + "height": "", + "hiddenSeries": false, + "id": 85, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*MaxConn *./", + "color": "#890F02", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_netstat_Tcp_CurrEstab{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "CurrEstab - TCP connections for which the current state is either ESTABLISHED or CLOSE- WAIT", + "refId": "A", + "step": 4 + }, + { + "expr": "node_netstat_Tcp_MaxConn{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "MaxConn - Limit on the total number of TCP connections the entity can support (Dinamic is \"-1\")", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "connections", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 59 + }, + "height": "", + "hiddenSeries": false, + "id": 91, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Sent.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_TcpExt_SyncookiesFailed{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "SyncookiesFailed - Invalid SYN cookies received", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_netstat_TcpExt_SyncookiesRecv{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "SyncookiesRecv - SYN cookies received", + "refId": "B", + "step": 4 + }, + { + "expr": "irate(node_netstat_TcpExt_SyncookiesSent{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "SyncookiesSent - SYN cookies sent", + "refId": "C", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP SynCookie", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter out (-) / in (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 69 + }, + "height": "", + "hiddenSeries": false, + "id": 82, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_Tcp_ActiveOpens{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "ActiveOpens - TCP connections that have made a direct transition to the SYN-SENT state from the CLOSED state", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_netstat_Tcp_PassiveOpens{instance=\"$node\",job=\"$job\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "PassiveOpens - TCP connections that have made a direct transition to the SYN-RCVD state from the LISTEN state", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP Direct Transition", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "connections", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Network Netstat", + "type": "row" + }, + { + "collapsed": true, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 279, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 20 + }, + "hiddenSeries": false, + "id": 40, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_scrape_collector_duration_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{collector}} - Scrape duration", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Node Exporter Scrape Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "seconds", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 20 + }, + "hiddenSeries": false, + "id": 157, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:1085", + "alias": "/.*error.*/", + "color": "#F2495C", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_scrape_collector_success{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{collector}} - Scrape success", + "refId": "A", + "step": 4 + }, + { + "expr": "node_textfile_scrape_error{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{collector}} - Scrape textfile error (1 = true)", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Node Exporter Scrape", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1100", + "format": "short", + "label": "counter", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1101", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Node Exporter", + "type": "row" + } + ], + "refresh": "1m", + "schemaVersion": 22, + "style": "dark", + "tags": ["linux"], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "label": "datasource", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "definition": "", + "hide": 0, + "includeAll": false, + "index": -1, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(node_uname_info, job)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "definition": "label_values(node_uname_info{job=\"$job\"}, instance)", + "hide": 0, + "includeAll": false, + "index": -1, + "label": "Host:", + "multi": false, + "name": "node", + "options": [], + "query": "label_values(node_uname_info{job=\"$job\"}, instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "[a-z]+|nvme[0-9]+n[0-9]+", + "value": "[a-z]+|nvme[0-9]+n[0-9]+" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "diskdevices", + "options": [ + { + "selected": true, + "text": "[a-z]+|nvme[0-9]+n[0-9]+", + "value": "[a-z]+|nvme[0-9]+n[0-9]+" + } + ], + "query": "[a-z]+|nvme[0-9]+n[0-9]+", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] + }, + "timezone": "browser", + "title": "Node Exporter Full", + "uid": "rYdddlPWj", + "variables": { + "list": [] + }, + "version": 2 +} diff --git a/machines/monitor/dashboards/postgres.json b/machines/monitor/dashboards/postgres.json new file mode 100644 index 0000000..d674fbd --- /dev/null +++ b/machines/monitor/dashboards/postgres.json @@ -0,0 +1,3096 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.3.5" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "This dashboard works with postgres_exporter for prometheus", + "editable": true, + "gnetId": 9628, + "graphTooltip": 0, + "id": null, + "iteration": 1568751813814, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 34, + "panels": [], + "title": "General Counters, CPU, Memory and File Descriptor Stats", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": ["#299c46", "#7eb26d", "#d44a3a"], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 36, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_static{release=\"$release\", instance=\"$instance\"}", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{short_version}}", + "refId": "A" + } + ], + "thresholds": "", + "title": "Version", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "name" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus", + "description": "start time of the process", + "format": "dateTimeFromNow", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "110%", + "prefix": "", + "prefixFontSize": "110%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_postmaster_start_time_seconds{release=\"$release\", instance=\"$instance\"} * 1000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Start Time", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 8, + "y": 1 + }, + "height": "200px", + "id": 10, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "SUM(pg_stat_database_tup_fetched{datname=~\"$datname\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Current fetch data", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 12, + "y": 1 + }, + "height": "200px", + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "SUM(pg_stat_database_tup_inserted{release=\"$release\", datname=~\"$datname\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Current insert data", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 16, + "y": 1 + }, + "height": "200px", + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "SUM(pg_stat_database_tup_updated{datname=~\"$datname\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Current update data", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 38, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_settings_max_connections{release=\"$release\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Max Connections", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Average user and system CPU time spent in seconds.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 3 + }, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(rate(process_cpu_seconds_total{release=\"$release\", instance=\"$instance\"}[5m]) * 1000)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "CPU Time", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Virtual and Resident memory size in bytes, averages over 5 min interval", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 3 + }, + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(rate(process_resident_memory_bytes{release=\"$release\", instance=\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Resident Mem", + "refId": "A" + }, + { + "expr": "avg(rate(process_virtual_memory_bytes{release=\"$release\", instance=\"$instance\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Virtual Mem", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Number of open file descriptors", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 3 + }, + "id": 26, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_open_fds{release=\"$release\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Open FD", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Open File Descriptors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 32, + "panels": [], + "title": "Settings", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 11 + }, + "id": 40, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_settings_shared_buffers_bytes{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Shared Buffers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 11 + }, + "id": 42, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_settings_effective_cache_size_bytes{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Effective Cache", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 11 + }, + "id": 44, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_settings_maintenance_work_mem_bytes{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Maintenance Work Mem", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 11 + }, + "id": 46, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_settings_work_mem_bytes{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Work Mem", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus", + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 11 + }, + "id": 48, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_settings_max_wal_size_bytes{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Max WAL Size", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 15, + "y": 11 + }, + "id": 50, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_settings_random_page_cost{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Random Page Cost", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 18, + "y": 11 + }, + "id": 52, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_settings_seq_page_cost{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Seq Page Cost", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 20, + "y": 11 + }, + "id": 54, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_settings_max_worker_processes{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Max Worker Processes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 22, + "y": 11 + }, + "id": 56, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "pg_settings_max_parallel_workers{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Max Parallel Workers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 30, + "panels": [], + "title": "Database Stats", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 15 + }, + "id": 1, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 3, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "pg_stat_activity_count{datname=~\"$datname\", instance=~\"$instance\", state=\"active\"} !=0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{datname}}, s: {{state}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Active sessions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 15 + }, + "id": 60, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(pg_stat_database_xact_commit{instance=\"$instance\", datname=~\"$datname\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{datname}} commits", + "refId": "A" + }, + { + "expr": "irate(pg_stat_database_xact_rollback{instance=\"$instance\", datname=~\"$datname\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{datname}} rollbacks", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Transactions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 15 + }, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "pg_stat_database_tup_updated{datname=~\"$datname\", instance=~\"$instance\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{datname}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Update data", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 22 + }, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "pg_stat_database_tup_fetched{datname=~\"$datname\", instance=~\"$instance\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{datname}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Fetch data (SELECT)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 22 + }, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "pg_stat_database_tup_inserted{datname=~\"$datname\", instance=~\"$instance\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{datname}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Insert data", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 22 + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "pg_locks_count{datname=~\"$datname\", instance=~\"$instance\", mode=~\"$mode\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{datname}},{{mode}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Lock tables", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 29 + }, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "pg_stat_database_tup_returned{datname=~\"$datname\", instance=~\"$instance\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{datname}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Return data", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 29 + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "pg_stat_activity_count{datname=~\"$datname\", instance=~\"$instance\", state=~\"idle|idle in transaction|idle in transaction (aborted)\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{datname}}, s: {{state}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Idle sessions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 29 + }, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "pg_stat_database_tup_deleted{datname=~\"$datname\", instance=~\"$instance\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{datname}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Delete data", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 36 + }, + "id": 62, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "pg_stat_database_blks_hit{instance=\"$instance\", datname=~\"$datname\"} / (pg_stat_database_blks_read{instance=\"$instance\", datname=~\"$datname\"} + pg_stat_database_blks_hit{instance=\"$instance\", datname=~\"$datname\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ datname }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cache Hit Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 4, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 36 + }, + "id": 64, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(pg_stat_bgwriter_buffers_backend_total{instance=\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "buffers_backend", + "refId": "A" + }, + { + "expr": "irate(pg_stat_bgwriter_buffers_alloc_total{instance=\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "buffers_alloc", + "refId": "B" + }, + { + "expr": "irate(pg_stat_bgwriter_buffers_backend_fsync_total{instance=\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "backend_fsync", + "refId": "C" + }, + { + "expr": "irate(pg_stat_bgwriter_buffers_checkpoint_total{instance=\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "buffers_checkpoint", + "refId": "D" + }, + { + "expr": "irate(pg_stat_bgwriter_buffers_clean_total{instance=\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "buffers_clean", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Buffers (bgwriter)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 36 + }, + "id": 66, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(pg_stat_database_conflicts{instance=\"$instance\", datname=~\"$datname\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{datname}} conflicts", + "refId": "B" + }, + { + "expr": "irate(pg_stat_database_deadlocks{instance=\"$instance\", datname=~\"$datname\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{datname}} deadlocks", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Conflicts/Deadlocks", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Total amount of data written to temporary files by queries in this database. All temporary files are counted, regardless of why the temporary file was created, and regardless of the log_temp_files setting.", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 43 + }, + "id": 68, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(pg_stat_database_temp_bytes{instance=\"$instance\", datname=~\"$datname\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{datname}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Temp File (Bytes)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 16, + "x": 8, + "y": 43 + }, + "id": 70, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(pg_stat_bgwriter_checkpoint_write_time_total{instance=\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "write_time - Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk.", + "refId": "B" + }, + { + "expr": "irate(pg_stat_bgwriter_checkpoint_sync_time_total{instance=\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "sync_time - Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk.", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Checkpoint Stats", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "10s", + "schemaVersion": 19, + "style": "dark", + "tags": ["postgres", "db", "stats"], + "templating": { + "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "auto": true, + "auto_count": 200, + "auto_min": "1s", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1s", + "value": "1s" + }, + { + "selected": false, + "text": "5s", + "value": "5s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + } + ], + "query": "1s,5s,1m,5m,1h,6h,1d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "query_result(pg_exporter_last_scrape_duration_seconds)", + "refresh": 2, + "regex": "/.*kubernetes_namespace=\"([^\"]+).*/", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Release", + "multi": false, + "name": "release", + "options": [], + "query": "query_result(pg_exporter_last_scrape_duration_seconds{kubernetes_namespace=\"$namespace\"})", + "refresh": 2, + "regex": "/.*release=\"([^\"]+)/", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": "query_result(pg_up{release=\"$release\"})", + "refresh": 1, + "regex": "/.*instance=\"([^\"]+).*/", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "definition": "", + "hide": 0, + "includeAll": true, + "label": "Database", + "multi": true, + "name": "datname", + "options": [], + "query": "label_values(datname)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "definition": "", + "hide": 0, + "includeAll": true, + "label": "Lock table", + "multi": true, + "name": "mode", + "options": [], + "query": "label_values({mode=~\"accessexclusivelock|accesssharelock|exclusivelock|rowexclusivelock|rowsharelock|sharelock|sharerowexclusivelock|shareupdateexclusivelock\"}, mode)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] + }, + "timezone": "", + "title": "PostgreSQL Database", + "uid": "000000039", + "version": 1 +} diff --git a/machines/monitor/dashboards/promtail.json b/machines/monitor/dashboards/promtail.json new file mode 100644 index 0000000..f8771ab --- /dev/null +++ b/machines/monitor/dashboards/promtail.json @@ -0,0 +1,2043 @@ +{ + "__inputs": [], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.4.1" + }, + { + "type": "panel", + "id": "logs", + "name": "Logs", + "version": "" + }, + { + "type": "datasource", + "id": "loki", + "name": "Loki", + "version": "1.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Monitor Promtail metrics and logs", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [ + { + "asDropdown": false, + "icon": "doc", + "includeVars": false, + "keepTime": false, + "tags": [], + "targetBlank": true, + "title": "Github Source", + "tooltip": "Github Source", + "type": "link", + "url": "https://github.com/voidquark/grafana-dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 3, + "panels": [], + "title": "Promtail - Stats", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "index": 0, + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 0, + "y": 1 + }, + "id": 7, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "sum by(instance) (count_over_time({$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | logfmt | level=\"debug\" | __error__=\"\" [$__range]))", + "queryType": "instant", + "refId": "A" + } + ], + "title": "DEBUG Log Lines", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "index": 0, + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "light-green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 2, + "y": 1 + }, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "sum by(instance) (count_over_time({$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | logfmt | level=\"info\" | __error__=\"\" [$__range]))", + "queryType": "instant", + "refId": "A" + } + ], + "title": "INFO Log Lines", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "index": 0, + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "yellow", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 4, + "y": 1 + }, + "id": 5, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "sum by(instance) (count_over_time({$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | logfmt | level=\"warn\" | __error__=\"\" [$__range]))", + "queryType": "instant", + "refId": "A" + } + ], + "title": "WARN Log Lines", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "index": 0, + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 6, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "sum by(instance) (count_over_time({$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | logfmt | level=\"error\" | __error__=\"\" [$__range]))", + "queryType": "instant", + "refId": "A" + } + ], + "title": "ERROR Log Lines", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "index": 0, + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "orange", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 8, + "y": 1 + }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "sum(count_over_time({$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | __error__=\"\" [$__range]))", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Promtail Log Lines", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "index": 0, + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "orange", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 11, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["sum"], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "sum(bytes_over_time({$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | __error__=\"\" [$__range]))", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Promtail Log in bytes", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Deployed Promtail Version", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 14, + "y": 1 + }, + "id": 9, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "name", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "promtail_build_info{job=~\"$job\", instance=~\"$instance.*\"} ", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "{{version}}", + "range": false, + "refId": "A" + } + ], + "title": "Promtail Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Number of active files.", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#77a0ed", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 17, + "y": 1 + }, + "id": 16, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "promtail_files_active_total{job=~\"$job\", instance=~\"$instance.*\"} ", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Active Files", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Current number of bytes sent. ( Real Time )", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#77a0ed", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 19, + "y": 1 + }, + "id": 30, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(promtail_sent_bytes_total{job=~\"$job\", instance=~\"$instance.*\"}[$interval])", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Current Sent Bytes", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "description": "Use LogQL Filter to filter information.", + "gridPos": { + "h": 12, + "w": 22, + "x": 0, + "y": 4 + }, + "id": 8, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "{$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | logfmt | level=\"debug\" |= \"$filter\" | line_format `➡️ 🔵 {{regexReplaceAll \"level=debug|ts=[^\\\\s]+\\\\s\" __line__ \"$1\" | trim }}`", + "queryType": "range", + "refId": "A" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "{$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | logfmt | level=\"info\" |= \"$filter\" | line_format `➡️ 🟢 {{regexReplaceAll \"level=info|ts=[^\\\\s]+\\\\s\" __line__ \"$1\" | trim }}`", + "hide": false, + "queryType": "range", + "refId": "B" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "{$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | logfmt | level=\"warn\" |= \"$filter\" | line_format `➡️ 🟡 {{regexReplaceAll \"level=warn|ts=[^\\\\s]+\\\\s\" __line__ \"$1\" | trim }}`", + "hide": false, + "queryType": "range", + "refId": "C" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "{$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | logfmt | level=\"error\" |= \"$filter\" | line_format `➡️ 🔴 {{regexReplaceAll \"level=error|ts=[^\\\\s]+\\\\s\" __line__ \"$1\" | trim }}`", + "hide": false, + "queryType": "range", + "refId": "D" + } + ], + "title": "Promtail Recent Logs", + "type": "logs" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "light-purple", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 6, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "unit", + "value": "Count" + }, + { + "id": "custom.drawStyle", + "value": "points" + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + }, + { + "id": "custom.axisPlacement", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "unit", + "value": "Count" + }, + { + "id": "custom.drawStyle", + "value": "points" + }, + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.axisPlacement", + "value": "right" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 22, + "x": 0, + "y": 16 + }, + "id": 33, + "options": { + "legend": { + "calcs": ["lastNotNull", "max"], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "100 * sum(rate(promtail_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance.*\",status_code=~\"5.*\"}[$interval])) by (job, instance) / sum(rate(promtail_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance.*\"}[$interval])) by (job, instance)", + "hide": false, + "instant": false, + "legendFormat": "Error Requests", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "sum by (level) (count_over_time({$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | logfmt | level=\"warn\" | __error__=\"\"[$__auto]))", + "hide": false, + "legendFormat": "Warning Logs Lines", + "queryType": "range", + "refId": "B" + }, + { + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "editorMode": "code", + "expr": "sum by (level) (count_over_time({$label_name=~\"$label_value\", job=~\"$job\", instance=~\"$instance\"} | logfmt | level=\"error\" | __error__=\"\"[$__auto]))", + "hide": false, + "legendFormat": "Error Logs Lines", + "queryType": "range", + "refId": "C" + } + ], + "title": "Error Requests and Logs Count - Overtime", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 19, + "panels": [], + "title": "Promtail Details", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Number of bytes read. - Overtime", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "promtail_read_bytes_total" + }, + "properties": [ + { + "id": "color" + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 18, + "options": { + "legend": { + "calcs": ["lastNotNull", "max"], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "width": 350 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(promtail_read_bytes_total{job=~\"$job\", instance=~\"$instance.*\"}[$interval])", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Read Bytes - Overtime", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "unit", + "value": "Lines" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 12, + "y": 25 + }, + "id": 28, + "options": { + "cellHeight": "md", + "footer": { + "countRows": false, + "enablePagination": false, + "fields": ["Value"], + "reducer": ["lastNotNull"], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "promtail_read_bytes_total{job=~\"$job\", instance=~\"$instance.*\"} ", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(promtail_read_bytes_total{job=~\"$job\", instance=~\"$instance.*\"}[$interval])", + "format": "table", + "hide": false, + "instant": false, + "legendFormat": "", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "rate(promtail_read_lines_total{job=~\"$job\", instance=~\"$instance.*\"}[$interval])", + "format": "table", + "hide": false, + "instant": false, + "legendFormat": "", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "increase(promtail_read_bytes_total{job=~\"$job\", instance=~\"$instance.*\"}[24h])", + "format": "table", + "hide": false, + "instant": false, + "interval": "24h", + "legendFormat": "", + "range": true, + "refId": "D" + } + ], + "title": "Read Bytes - Details", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "path", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "__name__": true, + "instance": true, + "job": true, + "job 1": false + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value": "Read Bytes Total", + "Value #A": "Total Read", + "Value #B": "Current Read", + "Value #C": "Current Lines Read", + "Value #D": "Total Read in 24h", + "instance": "", + "job": "", + "path": "Path" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "- Encoded: Number of bytes encoded and ready to send.\n- Send: Number of bytes sent. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 37 + }, + "id": 27, + "options": { + "legend": { + "calcs": ["lastNotNull", "max"], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(promtail_encoded_bytes_total{job=~\"$job\", instance=~\"$instance.*\"}[$interval])", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "Encoded Bytes", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "rate(promtail_sent_bytes_total{job=~\"$job\", instance=~\"$instance.*\"}[$interval])", + "hide": false, + "instant": false, + "legendFormat": "Send Bytes", + "range": true, + "refId": "B" + } + ], + "title": "Encoded and Send Bytes - Overtime", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "unit", + "value": "Lines" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "unit", + "value": "decbytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 12, + "y": 37 + }, + "id": 29, + "options": { + "cellHeight": "md", + "footer": { + "countRows": false, + "enablePagination": false, + "fields": ["Value"], + "reducer": ["lastNotNull"], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(promtail_sent_bytes_total{job=~\"$job\", instance=~\"$instance.*\"}[$interval])", + "format": "table", + "hide": false, + "instant": false, + "legendFormat": "", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "increase(promtail_sent_bytes_total{job=~\"$job\", instance=~\"$instance.*\"}[24h])", + "format": "table", + "hide": false, + "instant": false, + "interval": "24h", + "legendFormat": "", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(promtail_encoded_bytes_total{job=~\"$job\", instance=~\"$instance.*\"}[$interval])", + "format": "table", + "hide": false, + "instant": false, + "legendFormat": "", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "increase(promtail_encoded_bytes_total{job=~\"$job\", instance=~\"$instance.*\"}[24h])", + "format": "table", + "hide": false, + "instant": false, + "interval": "24h", + "legendFormat": "", + "range": true, + "refId": "D" + } + ], + "title": "Encoded and Send - Details", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "host", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "__name__": true, + "instance": true, + "job": true, + "job 1": false + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value": "Read Bytes Total", + "Value #A": "Current Send Bytes", + "Value #B": "Send Bytes 24h", + "Value #C": "Current Encoded Bytes", + "Value #D": "Encoded Bytes 24h", + "host": "Loki Endpoint", + "instance": "", + "job": "", + "path": "Path" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "- Dropped Bytes: Number of bytes dropped because failed to be sent to the ingester after all retries.\n- Dropped Entries: Number of log entries dropped because failed to be sent to the ingester after all retries.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "unit", + "value": "Entries" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 45 + }, + "id": 22, + "options": { + "legend": { + "calcs": ["lastNotNull"], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(promtail_dropped_bytes_total{job=~\"$job\", instance=~\"$instance.*\"}[$interval])", + "hide": false, + "instant": false, + "legendFormat": "Dropped Bytes - {{reason}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(promtail_dropped_entries_total{job=~\"$job\", instance=~\"$instance.*\"}[$interval])", + "hide": false, + "instant": false, + "legendFormat": "Dropped Entries - {{reason}}", + "range": true, + "refId": "B" + } + ], + "title": "Dropped Bytes - Overtime", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "99th percentile latency", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 12, + "y": 45 + }, + "id": 23, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile(0.99, sum(rate(promtail_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance.*\"}[5m])) by (le))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Request Latency", + "range": true, + "refId": "A" + } + ], + "title": "Request Latency - 99th Percentile Latency", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": ["promtail"], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "loki", + "value": "P982945308D3682D1" + }, + "hide": 0, + "includeAll": false, + "label": "Datasource Loki", + "multi": false, + "name": "datasource", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": {}, + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Label Name", + "multi": false, + "name": "label_name", + "options": [], + "query": { + "label": "", + "refId": "LokiVariableQueryEditor-VariableQuery", + "stream": "", + "type": 0 + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Label Value", + "multi": false, + "name": "label_value", + "options": [], + "query": { + "label": "$label_name", + "refId": "LokiVariableQueryEditor-VariableQuery", + "stream": "", + "type": 1 + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "definition": "label_values(promtail_build_info,job)", + "description": "Job label from Prometheus datasource.", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(promtail_build_info,job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "loki", + "uid": "Loki" + }, + "definition": "", + "description": "Instance label from Loki datasource.", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": { + "label": "instance", + "refId": "LokiVariableQueryEditor-VariableQuery", + "stream": "", + "type": 1 + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "prometheus", + "value": "P1809F7CD0C75ACF3" + }, + "hide": 0, + "includeAll": false, + "label": "Datasource Prometheus", + "multi": false, + "name": "datasourceProm", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "text": "5m", + "value": "5m" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + } + ], + "query": "30s,1m,2m,3m,5m,10m,30m", + "queryValue": "", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "hide": 0, + "label": "LogQL Filter", + "name": "filter", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Promtail Monitoring - Metrics and Logs", + "uid": "promtailntfc0a", + "version": 1, + "weekStart": "", + "gnetId": 20881 +} diff --git a/machines/monitor/dashboards/traefik-access.json b/machines/monitor/dashboards/traefik-access.json new file mode 100644 index 0000000..b133d2f --- /dev/null +++ b/machines/monitor/dashboards/traefik-access.json @@ -0,0 +1,1087 @@ +{ + "__inputs": [], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "7.3.6" + }, + { + "type": "panel", + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "version": "1.3.8" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "panel", + "id": "logs", + "name": "Logs", + "version": "" + }, + { + "type": "datasource", + "id": "loki", + "name": "Loki", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Loki version 2 showcase using JSON Traefik access logs.", + "editable": true, + "gnetId": 13713, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": "Loki", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": null + }, + "decimals": 4, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 32, + "interval": "", + "options": { + "content": "
\n \n Traefik Dashboard\n
", + "mode": "html" + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "expr": "loki_build_info", + "format": "table", + "instant": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "", + "transparent": true, + "type": "text" + }, + { + "datasource": "Loki", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": null + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 2 + }, + "id": 4, + "interval": "1m", + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["sum"], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "expr": "sum(count_over_time({job=\"traefik\"} |= \"RequestProtocol\" [$__interval]))", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": "1h", + "timeShift": null, + "title": "Total requests ", + "transformations": [], + "transparent": true, + "type": "stat" + }, + { + "datasource": "Loki", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": null + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "light-blue", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 6, + "y": 2 + }, + "id": 5, + "interval": "30s", + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["sum"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "expr": "sum by (OriginStatus) (count_over_time({job=\"traefik\"}|= \"RequestProtocol\" | json | __error__=\"\" [$__interval]))", + "legendFormat": "HTTP Status: {{OriginStatus}}", + "refId": "A" + } + ], + "timeFrom": "3h", + "timeShift": null, + "title": "Requests per status code", + "transformations": [], + "transparent": true, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Loki", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": null + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 15, + "y": 2 + }, + "id": 19, + "interval": "5m", + "links": [], + "maxDataPoints": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["mean"], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "expr": " sum(rate({job=\"traefik\"} |~ \"RequestProtocol\" | json | OriginStatus >= 400 |__error__=\"\"[$__interval])) / (sum(rate({job=\"traefik\"} |~ \"RequestProtocol\" | json | __error__=\"\"[$__interval])) / 100)", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": "1h", + "timeShift": null, + "title": "% of 4/5xx ", + "transparent": true, + "type": "stat" + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": "Loki", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": null + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 5, + "x": 19, + "y": 2 + }, + "hiddenSeries": false, + "id": 36, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " sum by (OriginStatus,ServiceName) (count_over_time({job=\"traefik\"} |~ \"RequestProtocol\" | json | OriginStatus >= 400 |__error__=\"\"[$__interval]))", + "legendFormat": " {{ServiceName}} / {{OriginStatus}} ", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": "3h", + "timeRegions": [], + "timeShift": null, + "title": " 4/5xx Services", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:405", + "decimals": 0, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:406", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "Loki", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": null + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Requests" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 6 + }, + "id": 22, + "interval": "5m", + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["mean"], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "expr": "count(sum by (ClientHost) (count_over_time({job=\"traefik\"}|= \"RequestProtocol\" | json | __error__=\"\" [$__interval])))", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": "5m", + "timeShift": null, + "title": "Users right now", + "transformations": [], + "transparent": true, + "type": "stat" + }, + { + "datasource": "Loki", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": null + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "IP Address" + }, + "properties": [ + { + "id": "custom.width", + "value": 410 + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 15, + "y": 6 + }, + "id": 8, + "interval": "1m", + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["sum"], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "7.3.6", + "targets": [ + { + "expr": "sum_over_time({job=\"traefik\"}|= \"RequestProtocol\" | json | OriginStatus=200 | unwrap DownstreamContentSize | __error__=\"\" [$__interval])", + "legendFormat": "Bytes sent", + "refId": "A" + } + ], + "timeFrom": "5m", + "timeShift": null, + "title": "Total Bytes Sent", + "transformations": [ + { + "id": "reduce", + "options": { + "reducers": ["sum"] + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Total": "Bytes Sent" + } + } + } + ], + "transparent": true, + "type": "stat" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "Loki", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": null + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "purple", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Requests" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + } + ] + }, + "fontSize": "80%", + "format": "short", + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 33, + "interval": "5m", + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "pluginVersion": "7.3.4", + "strokeWidth": 1, + "targets": [ + { + "expr": "sum by (RouterName) (count_over_time({job=\"traefik\"}|= \"RequestProtocol\" | json | __error__=\"\" [$__interval]))", + "legendFormat": "{{RouterName}}", + "refId": "A" + } + ], + "timeFrom": "1h", + "timeShift": null, + "title": "Requests Route", + "transformations": [], + "transparent": true, + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 16, + "x": 8, + "y": 10 + }, + "id": 11, + "interval": "", + "options": { + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "expr": "{job=\"traefik\"} |= \"RequestProtocol\"| json | line_format \"Status:{{.OriginStatus}} Client From {{.ClientAddr}} {{.RequestMethod}} {{.RequestAddr}}{{.RequestPath}} Route To {{.ServiceAddr}}\"", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": "5m", + "timeShift": null, + "title": "Logs", + "transparent": true, + "type": "logs" + }, + { + "aliasColors": { + "95th percentile": "blue", + "max latency": "super-light-blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Loki", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "ns" + }, + "overrides": [] + }, + "fill": 5, + "fillGradient": 9, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 16, + "interval": "30s", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:232", + "alias": "max latency", + "dashes": true, + "fillGradient": 3 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile_over_time(0.95,{job=\"traefik\"} |= \"RequestProtocol\"| json | unwrap Duration | __error__=\"\" [$__interval]) by (ServiceName)", + "hide": false, + "legendFormat": " {{ ServiceName }}", + "refId": "C" + } + ], + "thresholds": [ + { + "$$hashKey": "object:168", + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.3, + "yaxis": "left" + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "95th percentile of Request Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:178", + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:179", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "95th percentile": "blue", + "max latency": "super-light-blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Loki", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "ns" + }, + "overrides": [] + }, + "fill": 5, + "fillGradient": 9, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 18 + }, + "hiddenSeries": false, + "id": 34, + "interval": "30s", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:232", + "alias": "max latency", + "dashes": true, + "fillGradient": 3 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by (ServiceName) (max_over_time({job=\"traefik\"} |= \"RequestProtocol\" |json | unwrap Duration | __error__=\"\" [$__interval]))", + "hide": false, + "legendFormat": "{{ ServiceName}}", + "refId": "D" + } + ], + "thresholds": [ + { + "$$hashKey": "object:168", + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.3, + "yaxis": "left" + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Max Age of Request Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:178", + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:179", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "95th percentile": "blue", + "max latency": "super-light-blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Loki", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "decbytes" + }, + "overrides": [] + }, + "fill": 5, + "fillGradient": 9, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 18 + }, + "hiddenSeries": false, + "id": 35, + "interval": "30s", + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:232", + "alias": "max latency", + "dashes": true, + "fillGradient": 3 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (ServiceName) (sum_over_time({job=\"traefik\"} |= \"RequestProtocol\" |json | unwrap RequestContentSize | __error__=\"\" [$__interval]))", + "hide": false, + "legendFormat": "{{ ServiceName}}", + "refId": "D" + } + ], + "thresholds": [ + { + "$$hashKey": "object:168", + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.3, + "yaxis": "left" + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:178", + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:179", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Traefik Via Loki", + "uid": "fJarCeaGk", + "version": 1 +} diff --git a/machines/monitor/dashboards/traefik.json b/machines/monitor/dashboards/traefik.json new file mode 100644 index 0000000..0d6779a --- /dev/null +++ b/machines/monitor/dashboards/traefik.json @@ -0,0 +1,1619 @@ +{ + "__inputs": [], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "11.0.1" + }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 4, + "panels": [], + "title": "General", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "legend": { + "displayMode": "list", + "placement": "right", + "showLegend": true, + "values": ["percent"] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": ["mean"], + "fields": "", + "values": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "asc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(method, code) (rate(traefik_service_requests_total{protocol=\"http\"}[1m])) > 0", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{method}}[{{code}}]", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Http Code ", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 5, + "y": 1 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": ["value"] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": ["last"], + "fields": "", + "values": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "count by(service) (rate(traefik_service_requests_total[$__rate_interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ service }}", + "range": true, + "refId": "A" + } + ], + "title": "Requests by service", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 11, + "y": 1 + }, + "id": 7, + "options": { + "legend": { + "calcs": ["last"], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "expr": "sum(traefik_service_request_duration_seconds_sum{}) by (service) / sum(traefik_service_request_duration_seconds_count{}) by (service)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": " {{service}}", + "refId": "A", + "step": 240 + } + ], + "title": "Average response time by service", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "dateTimeAsLocal" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "URL" + }, + "properties": [ + { + "id": "unit" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 19, + "y": 1 + }, + "hideTimeOverride": true, + "id": 34, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Expire Date" + } + ] + }, + "pluginVersion": "11.0.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "traefik_tls_certs_not_after{san=~\".*\"} * 1000", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "timeFrom": "1s", + "title": "Certificate Dates", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "keepLabels": ["cn"], + "mode": "columns" + } + }, + { + "id": "merge", + "options": {} + }, + { + "id": "reduce", + "options": { + "includeTimeField": true, + "mode": "reduceFields", + "reducers": [] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": { + "Time": 0, + "cn": 1, + "traefik_tls_certs_not_after": 2 + }, + "renameByName": { + "Time": "Last Check Time", + "Value": "Expire Date", + "cn": "URL", + "sans": "SAN", + "traefik_tls_certs_not_after": "Expire Date" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 5, + "panels": [], + "title": "Entrypoint", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 35, + "options": { + "legend": { + "calcs": ["last", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum by(entrypoint) (rate(traefik_entrypoint_requests_total[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Requests per Entrypoint", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 36, + "options": { + "legend": { + "calcs": ["last", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum by(entrypoint) (rate(traefik_entrypoint_request_duration_seconds_sum[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Request Duration per Entrypoint", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 10 + }, + "id": 16, + "options": { + "legend": { + "calcs": ["last", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "traefik_open_connections", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "key": "Q-7e0fd20e-acdd-47d5-b3c6-6665e485fdfc-0", + "legendFormat": "{{entrypoint}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Open Connections per Entrypoint", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 22, + "panels": [], + "title": "Routers", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 20 + }, + "id": 37, + "options": { + "legend": { + "calcs": ["last", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum by(service) (rate(traefik_service_requests_total[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Requests per Router", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 20 + }, + "id": 38, + "options": { + "legend": { + "calcs": ["last", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum by(service) (rate(traefik_service_request_duration_seconds_count[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Request Duration per Router", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 20 + }, + "id": 40, + "options": { + "legend": { + "calcs": ["last", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "label_replace(sum by(service) (rate(traefik_service_responses_bytes_total[$__rate_interval])), \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\")", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{service}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Request size per Router", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 10, + "panels": [], + "title": "HTTP Codes", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 0, + "y": 30 + }, + "id": 46, + "options": { + "legend": { + "calcs": ["last", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "topk(15, label_replace(sum by(service, method, code) (rate(traefik_service_requests_total{code=~\"2..\", protocol=\"http\"}[5m])), \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\"))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{method}}[{{code}}] on {{service}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "2xx over 5 min", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 8, + "y": 30 + }, + "id": 48, + "options": { + "legend": { + "calcs": ["last", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "topk(15, label_replace(sum by(service, method, code) (rate(traefik_service_requests_total{code=~\"5..\", protocol=\"http\"}[5m])), \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\"))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{method}}[{{code}}] on {{service}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "5xx over 5 min", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 16, + "y": 30 + }, + "id": 47, + "options": { + "legend": { + "calcs": ["last", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "topk(15, label_replace(sum by(service, method, code) (rate(traefik_service_requests_total{code!~\"2..|5..\", protocol=\"http\"}[5m])), \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\"))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{method}}[{{code}}] on {{service}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Other codes over 5 min", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 2, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 49, + "options": { + "legend": { + "calcs": ["last", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "topk(15, label_replace(sum by(service, method) (rate(traefik_service_requests_bytes_total{protocol=\"http\"}[1m])), \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\"))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{method}} on {{service}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Requests Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 50, + "options": { + "legend": { + "calcs": ["last", "mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "topk(15, label_replace(sum by(service, method) (rate(traefik_service_responses_bytes_total{protocol=\"http\"}[1m])), \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\"))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{method}} on {{service}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Responses Size", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": ["traefik", "system", "server", "prometheus"], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": {}, + "timezone": "browser", + "title": "Traefik", + "uid": "ddmlqvk12uozka", + "version": 2, + "weekStart": "" +} diff --git a/machines/monitor/definition.nix b/machines/monitor/definition.nix new file mode 100644 index 0000000..09a422f --- /dev/null +++ b/machines/monitor/definition.nix @@ -0,0 +1,13 @@ +{ + imports = [ + ./alertmanager.nix + ./prometheus.nix + ./influxdb.nix + ./loki.nix + ./grafana.nix + + ./jellyfin-exporter.nix + ]; + + system.stateVersion = "25.05"; +} diff --git a/machines/monitor/grafana.nix b/machines/monitor/grafana.nix new file mode 100644 index 0000000..ff683f5 --- /dev/null +++ b/machines/monitor/grafana.nix @@ -0,0 +1,126 @@ +{ + config, + pkgs, + modulesPath, + lib, + ... +}: { + services.grafana.enable = true; + services.grafana.settings = { + server = { + http_port = 3000; + http_addr = "0.0.0.0"; + # Grafana needs to know on which domain and URL it's running + domain = "grafana.procopius.dk"; + root_url = "https://grafana.procopius.dk"; # Not needed if it is `https://your.domain/` + # serve_from_sub_path = true; + oauth_auto_login = false; + }; + "auth.generic_oauth" = { + enabled = false; + }; + "auth" = { + disable_login_form = false; + }; + }; + + networking.firewall.allowedTCPPorts = [3000]; + + services.grafana = { + # declarativePlugins = with pkgs.grafanaPlugins; [ ... ]; + + provision = { + enable = true; + + datasources.settings.datasources = [ + # "Built-in" datasources can be provisioned - c.f. https://grafana.com/docs/grafana/latest/administration/provisioning/#data-sources + { + uid = "prometheus"; + name = "Prometheus"; + type = "prometheus"; + url = "http://127.0.0.1:${toString config.services.prometheus.port}"; + } + { + uid = "loki"; + name = "Loki"; + type = "loki"; + url = "http://127.0.0.1:${toString config.services.loki.configuration.server.http_listen_port}"; + } + { + uid = "influxdb"; + name = "InfluxDB"; + type = "influxdb"; + url = "http://127.0.0.1:8086"; + access = "proxy"; + jsonData = { + dbName = "proxmox"; + httpHeaderName1 = "Authorization"; + }; + secureJsonData = { + httpHeaderValue1 = "Token iY4MTuqUAVJbBkDUiMde"; + }; + } + ]; + + # Note: removing attributes from the above `datasources.settings.datasources` is not enough for them to be deleted on `grafana`; + # One needs to use the following option: + # datasources.settings.deleteDatasources = [ { name = "prometheus"; orgId = 1; } { name = "loki"; orgId = 1; } ]; + + dashboards.settings.providers = [ + { + name = "my dashboards"; + options.path = "/etc/grafana-dashboards"; + } + ]; + }; + }; + + environment.etc."grafana-dashboards/traefik.json" = { + source = ./dashboards/traefik.json; + user = "grafana"; + group = "grafana"; + mode = "0644"; + }; + + environment.etc."grafana-dashboards/traefik-access.json" = { + source = ./dashboards/traefik-access.json; + user = "grafana"; + group = "grafana"; + mode = "0644"; + }; + + environment.etc."grafana-dashboards/grafana-traefik.json" = { + source = ./dashboards/grafana-traefik.json; + user = "grafana"; + group = "grafana"; + mode = "0644"; + }; + + environment.etc."grafana-dashboards/node-exporter.json" = { + source = ./dashboards/node-exporter.json; + user = "grafana"; + group = "grafana"; + mode = "0644"; + }; + + environment.etc."grafana-dashboards/promtail.json" = { + source = ./dashboards/promtail.json; + user = "grafana"; + group = "grafana"; + mode = "0644"; + }; + + environment.etc."grafana-dashboards/gitea.json" = { + source = ./dashboards/gitea.json; + user = "grafana"; + group = "grafana"; + mode = "0644"; + }; + + environment.etc."grafana-dashboards/postgres.json" = { + source = ./dashboards/postgres.json; + user = "grafana"; + group = "grafana"; + mode = "0644"; + }; +} diff --git a/machines/monitor/influxdb.nix b/machines/monitor/influxdb.nix new file mode 100644 index 0000000..5847dd9 --- /dev/null +++ b/machines/monitor/influxdb.nix @@ -0,0 +1,35 @@ +{ + config, + pkgs, + ... +}: let + influxdbPassword = config.sops.secrets."influxdb/password".path; + influxdbToken = config.sops.secrets."influxdb/token".path; +in { + sops.secrets."influxdb/password" = { + sopsFile = ../../secrets/secrets.yaml; + owner = "influxdb2"; + }; + sops.secrets."influxdb/token" = { + sopsFile = ../../secrets/secrets.yaml; + owner = "influxdb2"; + }; + + networking.firewall.allowedTCPPorts = [8086]; + + services.influxdb2 = { + enable = true; + settings = { + }; + provision = { + enable = true; + initialSetup = { + username = "plasmagoat"; + passwordFile = influxdbPassword; + tokenFile = influxdbToken; + organization = "procopius"; + bucket = "proxmox"; + }; + }; + }; +} diff --git a/machines/monitor/jellyfin-exporter.nix b/machines/monitor/jellyfin-exporter.nix new file mode 100644 index 0000000..ff74dc9 --- /dev/null +++ b/machines/monitor/jellyfin-exporter.nix @@ -0,0 +1,14 @@ +{ + virtualisation.oci-containers.containers = { + jellyfin_exporter = { + image = "rebelcore/jellyfin-exporter:latest"; + ports = [ + "9594:9594" + ]; + cmd = [ + "--jellyfin.address=http://media.lab:8096" + "--jellyfin.token=f7c89e5aa307434c9b3ecb329e896335" + ]; + }; + }; +} diff --git a/machines/monitor/loki.nix b/machines/monitor/loki.nix new file mode 100644 index 0000000..8cd9cc7 --- /dev/null +++ b/machines/monitor/loki.nix @@ -0,0 +1,37 @@ +{ + networking.firewall.allowedTCPPorts = [ 3100 ]; + + services.loki = { + enable = true; + configuration = { + server.http_listen_port = 3100; + auth_enabled = false; + analytics.reporting_enabled = false; + + common = { + ring = { + instance_addr = "127.0.0.1"; + kvstore.store = "inmemory"; + }; + replication_factor = 1; + path_prefix = "/tmp/loki"; + }; + + schema_config = { + configs = [ + { + from = "2020-05-15"; + store = "tsdb"; + object_store = "filesystem"; + schema = "v13"; + index = { + prefix = "index_"; + period = "24h"; + }; + } + ]; + }; + storage_config.filesystem.directory = "/var/lib/loki/chunk"; + }; + }; +} diff --git a/machines/monitor/prometheus.nix b/machines/monitor/prometheus.nix new file mode 100644 index 0000000..4a3c13d --- /dev/null +++ b/machines/monitor/prometheus.nix @@ -0,0 +1,185 @@ +{ + config, + pkgs, + modulesPath, + lib, + ... +}: let + monitor_hostname = "monitor.lab"; + traefik_hostname = "traefik.lab"; + sandbox_hostname = "sandbox.lab"; + forgejo_hostname = "forgejo.lab"; + runner01_hostname = "forgejo-runner-01.lab"; + dnsmasq_hostname = "dns.lab"; + media_hostname = "media.lab"; + mail_hostname = "mail.lab"; + keycloak_hostname = "keycloak.lab"; + + monitored_hosts = [ + monitor_hostname + traefik_hostname + sandbox_hostname + forgejo_hostname + runner01_hostname + dnsmasq_hostname + media_hostname + mail_hostname + keycloak_hostname + ]; + + # integrate colmena names and targetHost to generate nodeexporters + generateTargets = port: + map (host: "${host}:${toString port}") monitored_hosts; + + instance_relabel_config = [ + { + source_labels = ["__address__"]; + regex = "([^:]+):\\d+"; # Captures everything before the last colon + target_label = "instance"; + replacement = "$1"; + } + ]; + + node_exporter_port = 9100; + node_exporter_job = { + job_name = "node"; + static_configs = [{targets = generateTargets node_exporter_port;}]; + relabel_configs = instance_relabel_config; + }; + + promtail_port = 9080; + promtail_job = { + job_name = "promtail"; + static_configs = [{targets = generateTargets promtail_port;}]; + relabel_configs = instance_relabel_config; + }; + + prometheus_target = "${monitor_hostname}:9090"; + prometheus_job = { + job_name = "prometheus"; + static_configs = [{targets = [prometheus_target];}]; + relabel_configs = instance_relabel_config; + }; + + alertmanager_target = "${monitor_hostname}:9093"; + alertmanager_job = { + job_name = "alertmanager"; + static_configs = [{targets = [alertmanager_target];}]; + relabel_configs = instance_relabel_config; + }; + + grafana_target = "${monitor_hostname}:3000"; + grafana_job = { + job_name = "grafana"; + static_configs = [{targets = [grafana_target];}]; + relabel_configs = instance_relabel_config; + }; + + traefik_monitor_port = 8082; + traefik_job = { + job_name = "traefik"; + static_configs = [{targets = ["${traefik_hostname}:${toString traefik_monitor_port}"];}]; + relabel_configs = instance_relabel_config; + }; + + forgejo_monitor_port = 3000; + forgejo_job = { + job_name = "forgejo"; + static_configs = [{targets = ["${forgejo_hostname}:${toString forgejo_monitor_port}"];}]; + relabel_configs = instance_relabel_config; + }; + + postgres_exporter_port = 9187; + postgres_job = { + job_name = "postgres"; + static_configs = [{targets = ["${forgejo_hostname}:${toString postgres_exporter_port}"];}]; + relabel_configs = instance_relabel_config; + }; + + dnsmasq_exporter_port = 9153; + dnsmasq_job = { + job_name = "dnsmasq"; + static_configs = [{targets = ["${dnsmasq_hostname}:${toString dnsmasq_exporter_port}"];}]; + relabel_configs = instance_relabel_config; + }; + + # --- Media Stack Scrape Job --- + media_stack_job = { + job_name = "media_stack"; + static_configs = [ + { + targets = [ + "${media_hostname}:9707" # sonarr + "${media_hostname}:9708" # readarr + "${media_hostname}:9709" # radarr + "${media_hostname}:9710" # prowlarr + "${media_hostname}:9711" # lidarr + "${media_hostname}:9712" # bazarr + ]; + } + ]; + relabel_configs = instance_relabel_config; + }; + + jellyfin_port = 8096; + jellyfin_exporter_port = 9594; + jellyfin_job = { + job_name = "jellyfin"; + static_configs = [ + { + targets = [ + "${media_hostname}:${toString jellyfin_port}" + "${monitor_hostname}:${toString jellyfin_exporter_port}" + ]; + } + ]; + relabel_configs = instance_relabel_config; + }; +in { + networking.firewall.allowedTCPPorts = [9090]; + + services.prometheus = { + enable = true; + retentionTime = "7d"; + globalConfig = { + scrape_timeout = "10s"; + scrape_interval = "30s"; + # A short evaluation_interval will check alerting rules very often. + # It can be costly if you run Prometheus with 100+ alerts. + evaluation_interval = "20s"; + }; + extraFlags = [ + "--web.enable-admin-api" + ]; + + scrapeConfigs = [ + node_exporter_job + promtail_job + prometheus_job + alertmanager_job + grafana_job + traefik_job + forgejo_job + postgres_job + dnsmasq_job + media_stack_job + jellyfin_job + ]; + + alertmanagers = [ + { + scheme = "http"; + static_configs = [{targets = [alertmanager_target];}]; + } + ]; + + ruleFiles = [ + (pkgs.writeText "prometheus-alerts.yml" (builtins.readFile ./provisioning/alerts/prometheus-alerts.yml)) + (pkgs.writeText "loki-alerts.yml" (builtins.readFile ./provisioning/alerts/loki-alerts.yml)) + (pkgs.writeText "promtail-alerts.yml" (builtins.readFile ./provisioning/alerts/promtail-alerts.yml)) + (pkgs.writeText "postgres-alerts.yml" (builtins.readFile ./provisioning/alerts/postgres-alerts.yml)) + (pkgs.writeText "traefik-alerts.yml" (builtins.readFile ./provisioning/alerts/traefik-alerts.yml)) + (pkgs.writeText "node-exporter-alerts.yml" (builtins.readFile ./provisioning/alerts/node-exporter-alerts.yml)) + ]; + }; +} diff --git a/machines/monitor/provisioning/alerts/loki-alerts.yml b/machines/monitor/provisioning/alerts/loki-alerts.yml new file mode 100644 index 0000000..0d4f91e --- /dev/null +++ b/machines/monitor/provisioning/alerts/loki-alerts.yml @@ -0,0 +1,39 @@ +groups: + - name: Loki + + rules: + - alert: LokiProcessTooManyRestarts + expr: 'changes(process_start_time_seconds{job=~".*loki.*"}[15m]) > 2' + for: 0m + labels: + severity: warning + annotations: + summary: Loki process too many restarts (instance {{ $labels.instance }}) + description: "A loki process had too many restarts (target {{ $labels.instance }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: LokiRequestErrors + expr: '100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route) > 10' + for: 15m + labels: + severity: critical + annotations: + summary: Loki request errors (instance {{ $labels.instance }}) + description: "The {{ $labels.job }} and {{ $labels.route }} are experiencing errors\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: LokiRequestPanic + expr: "sum(increase(loki_panic_total[10m])) by (namespace, job) > 0" + for: 5m + labels: + severity: critical + annotations: + summary: Loki request panic (instance {{ $labels.instance }}) + description: "The {{ $labels.job }} is experiencing {{ printf \"%.2f\" $value }}% increase of panics\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: LokiRequestLatency + expr: '(histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{route!~"(?i).*tail.*"}[5m])) by (le))) > 1' + for: 5m + labels: + severity: critical + annotations: + summary: Loki request latency (instance {{ $labels.instance }}) + description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/machines/monitor/provisioning/alerts/node-exporter-alerts.yml b/machines/monitor/provisioning/alerts/node-exporter-alerts.yml new file mode 100644 index 0000000..57e4605 --- /dev/null +++ b/machines/monitor/provisioning/alerts/node-exporter-alerts.yml @@ -0,0 +1,318 @@ +groups: + - name: NodeExporter + + rules: + - alert: HostOutOfMemory + expr: "(node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes < .10)" + for: 2m + labels: + severity: warning + annotations: + summary: Host out of memory (instance {{ $labels.instance }}) + description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostMemoryUnderMemoryPressure + expr: "(rate(node_vmstat_pgmajfault[5m]) > 1000)" + for: 0m + labels: + severity: warning + annotations: + summary: Host memory under memory pressure (instance {{ $labels.instance }}) + description: "The node is under heavy memory pressure. High rate of loading memory pages from disk.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostMemoryIsUnderutilized + expr: "min_over_time(node_memory_MemFree_bytes[1w]) > node_memory_MemTotal_bytes * .8" + for: 0m + labels: + severity: info + annotations: + summary: Host Memory is underutilized (instance {{ $labels.instance }}) + description: "Node memory usage is < 20% for 1 week. Consider reducing memory space. (instance {{ $labels.instance }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostUnusualNetworkThroughputIn + expr: "((rate(node_network_receive_bytes_total[5m]) / on(instance, device) node_network_speed_bytes) > .80)" + for: 0m + labels: + severity: warning + annotations: + summary: Host unusual network throughput in (instance {{ $labels.instance }}) + description: "Host receive bandwidth is high (>80%).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostUnusualNetworkThroughputOut + expr: "((rate(node_network_transmit_bytes_total[5m]) / on(instance, device) node_network_speed_bytes) > .80)" + for: 0m + labels: + severity: warning + annotations: + summary: Host unusual network throughput out (instance {{ $labels.instance }}) + description: "Host transmit bandwidth is high (>80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostUnusualDiskReadRate + expr: "(rate(node_disk_io_time_seconds_total[5m]) > .80)" + for: 0m + labels: + severity: warning + annotations: + summary: Host unusual disk read rate (instance {{ $labels.instance }}) + description: "Disk is too busy (IO wait > 80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostOutOfDiskSpace + expr: '(node_filesystem_avail_bytes{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"} / node_filesystem_size_bytes < .10 and on (instance, device, mountpoint) node_filesystem_readonly == 0)' + for: 2m + labels: + severity: critical + annotations: + summary: Host out of disk space (instance {{ $labels.instance }}) + description: "Disk is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostDiskMayFillIn24Hours + expr: 'predict_linear(node_filesystem_avail_bytes{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"}[1h], 86400) <= 0 and node_filesystem_avail_bytes > 0' + for: 2m + labels: + severity: warning + annotations: + summary: Host disk may fill in 24 hours (instance {{ $labels.instance }}) + description: "Filesystem will likely run out of space within the next 24 hours.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostOutOfInodes + expr: "(node_filesystem_files_free / node_filesystem_files < .10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0)" + for: 2m + labels: + severity: critical + annotations: + summary: Host out of inodes (instance {{ $labels.instance }}) + description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostFilesystemDeviceError + expr: 'node_filesystem_device_error{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"} == 1' + for: 2m + labels: + severity: critical + annotations: + summary: Host filesystem device error (instance {{ $labels.instance }}) + description: "Error stat-ing the {{ $labels.mountpoint }} filesystem\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostInodesMayFillIn24Hours + expr: 'predict_linear(node_filesystem_files_free{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"}[1h], 86400) <= 0 and node_filesystem_files_free > 0' + for: 2m + labels: + severity: warning + annotations: + summary: Host inodes may fill in 24 hours (instance {{ $labels.instance }}) + description: "Filesystem will likely run out of inodes within the next 24 hours at current write rate\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostUnusualDiskReadLatency + expr: "(rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 0.1 and rate(node_disk_reads_completed_total[1m]) > 0)" + for: 2m + labels: + severity: warning + annotations: + summary: Host unusual disk read latency (instance {{ $labels.instance }}) + description: "Disk latency is growing (read operations > 100ms)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostUnusualDiskWriteLatency + expr: "(rate(node_disk_write_time_seconds_total[1m]) / rate(node_disk_writes_completed_total[1m]) > 0.1 and rate(node_disk_writes_completed_total[1m]) > 0)" + for: 2m + labels: + severity: warning + annotations: + summary: Host unusual disk write latency (instance {{ $labels.instance }}) + description: "Disk latency is growing (write operations > 100ms)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostHighCpuLoad + expr: '(avg by (instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m]))) > .80' + for: 10m + labels: + severity: warning + annotations: + summary: Host high CPU load (instance {{ $labels.instance }}) + description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostCpuIsUnderutilized + expr: '(min by (instance) (rate(node_cpu_seconds_total{mode="idle"}[1h]))) > 0.95' + for: 1w + labels: + severity: info + annotations: + summary: Host CPU is underutilized (instance {{ $labels.instance }}) + description: "CPU load has been < 20% for 1 week. Consider reducing the number of CPUs.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostCpuStealNoisyNeighbor + expr: 'avg by(instance) (rate(node_cpu_seconds_total{mode="steal"}[5m])) * 100 > 10' + for: 0m + labels: + severity: warning + annotations: + summary: Host CPU steal noisy neighbor (instance {{ $labels.instance }}) + description: "CPU steal is > 10%. A noisy neighbor is killing VM performances or a spot instance may be out of credit.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostCpuHighIowait + expr: 'avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) > .10' + for: 0m + labels: + severity: warning + annotations: + summary: Host CPU high iowait (instance {{ $labels.instance }}) + description: "CPU iowait > 10%. Your CPU is idling waiting for storage to respond.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostUnusualDiskIo + expr: "rate(node_disk_io_time_seconds_total[5m]) > 0.8" + for: 5m + labels: + severity: warning + annotations: + summary: Host unusual disk IO (instance {{ $labels.instance }}) + description: "Disk usage >80%. Check storage for issues or increase IOPS capabilities. Check storage for issues.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostContextSwitchingHigh + expr: '(rate(node_context_switches_total[15m])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"})) / (rate(node_context_switches_total[1d])/count without(mode,cpu) (node_cpu_seconds_total{mode="idle"})) > 2' + for: 0m + labels: + severity: warning + annotations: + summary: Host context switching high (instance {{ $labels.instance }}) + description: "Context switching is growing on the node (twice the daily average during the last 15m)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostSwapIsFillingUp + expr: "((1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80)" + for: 2m + labels: + severity: warning + annotations: + summary: Host swap is filling up (instance {{ $labels.instance }}) + description: "Swap is filling up (>80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostSystemdServiceCrashed + expr: '(node_systemd_unit_state{state="failed"} == 1)' + for: 0m + labels: + severity: warning + annotations: + summary: Host systemd service crashed (instance {{ $labels.instance }}) + description: "systemd service crashed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostPhysicalComponentTooHot + expr: "node_hwmon_temp_celsius > node_hwmon_temp_max_celsius" + for: 5m + labels: + severity: warning + annotations: + summary: Host physical component too hot (instance {{ $labels.instance }}) + description: "Physical hardware component too hot\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostNodeOvertemperatureAlarm + expr: "((node_hwmon_temp_crit_alarm_celsius == 1) or (node_hwmon_temp_alarm == 1))" + for: 0m + labels: + severity: critical + annotations: + summary: Host node overtemperature alarm (instance {{ $labels.instance }}) + description: "Physical node temperature alarm triggered\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostSoftwareRaidInsufficientDrives + expr: '((node_md_disks_required - on(device, instance) node_md_disks{state="active"}) > 0)' + for: 0m + labels: + severity: critical + annotations: + summary: Host software RAID insufficient drives (instance {{ $labels.instance }}) + description: "MD RAID array {{ $labels.device }} on {{ $labels.instance }} has insufficient drives remaining.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostSoftwareRaidDiskFailure + expr: '(node_md_disks{state="failed"} > 0)' + for: 2m + labels: + severity: warning + annotations: + summary: Host software RAID disk failure (instance {{ $labels.instance }}) + description: "MD RAID array {{ $labels.device }} on {{ $labels.instance }} needs attention.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostKernelVersionDeviations + expr: "changes(node_uname_info[1h]) > 0" + for: 0m + labels: + severity: info + annotations: + summary: Host kernel version deviations (instance {{ $labels.instance }}) + description: "Kernel version for {{ $labels.instance }} has changed.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostOomKillDetected + expr: "(increase(node_vmstat_oom_kill[1m]) > 0)" + for: 0m + labels: + severity: warning + annotations: + summary: Host OOM kill detected (instance {{ $labels.instance }}) + description: "OOM kill detected\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostEdacCorrectableErrorsDetected + expr: "(increase(node_edac_correctable_errors_total[1m]) > 0)" + for: 0m + labels: + severity: info + annotations: + summary: Host EDAC Correctable Errors detected (instance {{ $labels.instance }}) + description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostEdacUncorrectableErrorsDetected + expr: "(node_edac_uncorrectable_errors_total > 0)" + for: 0m + labels: + severity: warning + annotations: + summary: Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }}) + description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostNetworkReceiveErrors + expr: "(rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01)" + for: 2m + labels: + severity: warning + annotations: + summary: Host Network Receive Errors (instance {{ $labels.instance }}) + description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} receive errors in the last two minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostNetworkTransmitErrors + expr: "(rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01)" + for: 2m + labels: + severity: warning + annotations: + summary: Host Network Transmit Errors (instance {{ $labels.instance }}) + description: "Host {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} transmit errors in the last two minutes.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostNetworkBondDegraded + expr: "((node_bonding_active - node_bonding_slaves) != 0)" + for: 2m + labels: + severity: warning + annotations: + summary: Host Network Bond Degraded (instance {{ $labels.instance }}) + description: "Bond \"{{ $labels.device }}\" degraded on \"{{ $labels.instance }}\".\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostConntrackLimit + expr: "(node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8)" + for: 5m + labels: + severity: warning + annotations: + summary: Host conntrack limit (instance {{ $labels.instance }}) + description: "The number of conntrack is approaching limit\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostClockSkew + expr: "((node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0))" + for: 10m + labels: + severity: warning + annotations: + summary: Host clock skew (instance {{ $labels.instance }}) + description: "Clock skew detected. Clock is out of sync. Ensure NTP is configured correctly on this host.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostClockNotSynchronising + expr: "(min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16)" + for: 2m + labels: + severity: warning + annotations: + summary: Host clock not synchronising (instance {{ $labels.instance }}) + description: "Clock not synchronising. Ensure NTP is configured on this host.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/machines/monitor/provisioning/alerts/postgres-alerts.yml b/machines/monitor/provisioning/alerts/postgres-alerts.yml new file mode 100644 index 0000000..8bec959 --- /dev/null +++ b/machines/monitor/provisioning/alerts/postgres-alerts.yml @@ -0,0 +1,201 @@ +groups: + - name: Postgres + + rules: + - alert: PostgresqlDown + expr: "pg_up == 0" + for: 0m + labels: + severity: critical + annotations: + summary: Postgresql down (instance {{ $labels.instance }}) + description: "Postgresql instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlRestarted + expr: "time() - pg_postmaster_start_time_seconds < 60" + for: 0m + labels: + severity: critical + annotations: + summary: Postgresql restarted (instance {{ $labels.instance }}) + description: "Postgresql restarted\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlExporterError + expr: "pg_exporter_last_scrape_error > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Postgresql exporter error (instance {{ $labels.instance }}) + description: "Postgresql exporter is showing errors. A query may be buggy in query.yaml\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlTableNotAutoVacuumed + expr: "((pg_stat_user_tables_n_tup_del + pg_stat_user_tables_n_tup_upd + pg_stat_user_tables_n_tup_hot_upd) > pg_settings_autovacuum_vacuum_threshold) and (time() - pg_stat_user_tables_last_autovacuum) > 60 * 60 * 24 * 10" + for: 0m + labels: + severity: warning + annotations: + summary: Postgresql table not auto vacuumed (instance {{ $labels.instance }}) + description: "Table {{ $labels.relname }} has not been auto vacuumed for 10 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlTableNotAutoAnalyzed + expr: "((pg_stat_user_tables_n_tup_del + pg_stat_user_tables_n_tup_upd + pg_stat_user_tables_n_tup_hot_upd) > pg_settings_autovacuum_analyze_threshold) and (time() - pg_stat_user_tables_last_autoanalyze) > 24 * 60 * 60 * 10" + for: 0m + labels: + severity: warning + annotations: + summary: Postgresql table not auto analyzed (instance {{ $labels.instance }}) + description: "Table {{ $labels.relname }} has not been auto analyzed for 10 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlTooManyConnections + expr: "sum by (instance, job, server) (pg_stat_activity_count) > min by (instance, job, server) (pg_settings_max_connections * 0.8)" + for: 2m + labels: + severity: warning + annotations: + summary: Postgresql too many connections (instance {{ $labels.instance }}) + description: "PostgreSQL instance has too many connections (> 80%).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlNotEnoughConnections + expr: 'sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) < 2' + for: 2m + labels: + severity: warning + annotations: + summary: Postgresql not enough connections (instance {{ $labels.instance }}) + description: "PostgreSQL instance should have more connections (> 2)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlDeadLocks + expr: 'increase(pg_stat_database_deadlocks{datname!~"template.*|postgres"}[1m]) > 5' + for: 0m + labels: + severity: warning + annotations: + summary: Postgresql dead locks (instance {{ $labels.instance }}) + description: "PostgreSQL has dead-locks\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlHighRollbackRate + expr: 'sum by (namespace,datname) ((rate(pg_stat_database_xact_rollback{datname!~"template.*|postgres",datid!="0"}[3m])) / ((rate(pg_stat_database_xact_rollback{datname!~"template.*|postgres",datid!="0"}[3m])) + (rate(pg_stat_database_xact_commit{datname!~"template.*|postgres",datid!="0"}[3m])))) > 0.02' + for: 0m + labels: + severity: warning + annotations: + summary: Postgresql high rollback rate (instance {{ $labels.instance }}) + description: "Ratio of transactions being aborted compared to committed is > 2 %\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlCommitRateLow + expr: 'increase(pg_stat_database_xact_commit{datname!~"template.*|postgres",datid!="0"}[5m]) < 5' + for: 2m + labels: + severity: critical + annotations: + summary: Postgresql commit rate low (instance {{ $labels.instance }}) + description: "Postgresql seems to be processing very few transactions\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlLowXidConsumption + expr: "rate(pg_txid_current[1m]) < 5" + for: 2m + labels: + severity: warning + annotations: + summary: Postgresql low XID consumption (instance {{ $labels.instance }}) + description: "Postgresql seems to be consuming transaction IDs very slowly\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlHighRateStatementTimeout + expr: 'rate(postgresql_errors_total{type="statement_timeout"}[1m]) > 3' + for: 0m + labels: + severity: critical + annotations: + summary: Postgresql high rate statement timeout (instance {{ $labels.instance }}) + description: "Postgres transactions showing high rate of statement timeouts\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlHighRateDeadlock + expr: 'increase(postgresql_errors_total{type="deadlock_detected"}[1m]) > 1' + for: 0m + labels: + severity: critical + annotations: + summary: Postgresql high rate deadlock (instance {{ $labels.instance }}) + description: "Postgres detected deadlocks\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlUnusedReplicationSlot + expr: "pg_replication_slots_active == 0" + for: 1m + labels: + severity: warning + annotations: + summary: Postgresql unused replication slot (instance {{ $labels.instance }}) + description: "Unused Replication Slots\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlTooManyDeadTuples + expr: "((pg_stat_user_tables_n_dead_tup > 10000) / (pg_stat_user_tables_n_live_tup + pg_stat_user_tables_n_dead_tup)) >= 0.1" + for: 2m + labels: + severity: warning + annotations: + summary: Postgresql too many dead tuples (instance {{ $labels.instance }}) + description: "PostgreSQL dead tuples is too large\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlConfigurationChanged + expr: '{__name__=~"pg_settings_.*"} != ON(__name__, instance) {__name__=~"pg_settings_([^t]|t[^r]|tr[^a]|tra[^n]|tran[^s]|trans[^a]|transa[^c]|transac[^t]|transact[^i]|transacti[^o]|transactio[^n]|transaction[^_]|transaction_[^r]|transaction_r[^e]|transaction_re[^a]|transaction_rea[^d]|transaction_read[^_]|transaction_read_[^o]|transaction_read_o[^n]|transaction_read_on[^l]|transaction_read_onl[^y]).*"} OFFSET 5m' + for: 0m + labels: + severity: info + annotations: + summary: Postgresql configuration changed (instance {{ $labels.instance }}) + description: "Postgres Database configuration change has occurred\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlSslCompressionActive + expr: "sum(pg_stat_ssl_compression) > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Postgresql SSL compression active (instance {{ $labels.instance }}) + description: "Database allows connections with SSL compression enabled. This may add significant jitter in replication delay. Replicas should turn off SSL compression via `sslcompression=0` in `recovery.conf`.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlTooManyLocksAcquired + expr: "((sum (pg_locks_count)) / (pg_settings_max_locks_per_transaction * pg_settings_max_connections)) > 0.20" + for: 2m + labels: + severity: critical + annotations: + summary: Postgresql too many locks acquired (instance {{ $labels.instance }}) + description: "Too many locks acquired on the database. If this alert happens frequently, we may need to increase the postgres setting max_locks_per_transaction.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlBloatIndexHigh(>80%) + expr: "pg_bloat_btree_bloat_pct > 80 and on (idxname) (pg_bloat_btree_real_size > 100000000)" + for: 1h + labels: + severity: warning + annotations: + summary: Postgresql bloat index high (> 80%) (instance {{ $labels.instance }}) + description: "The index {{ $labels.idxname }} is bloated. You should execute `REINDEX INDEX CONCURRENTLY {{ $labels.idxname }};`\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlBloatTableHigh(>80%) + expr: "pg_bloat_table_bloat_pct > 80 and on (relname) (pg_bloat_table_real_size > 200000000)" + for: 1h + labels: + severity: warning + annotations: + summary: Postgresql bloat table high (> 80%) (instance {{ $labels.instance }}) + description: "The table {{ $labels.relname }} is bloated. You should execute `VACUUM {{ $labels.relname }};`\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlInvalidIndex + expr: 'pg_general_index_info_pg_relation_size{indexrelname=~".*ccnew.*"}' + for: 6h + labels: + severity: warning + annotations: + summary: Postgresql invalid index (instance {{ $labels.instance }}) + description: "The table {{ $labels.relname }} has an invalid index: {{ $labels.indexrelname }}. You should execute `DROP INDEX {{ $labels.indexrelname }};`\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PostgresqlReplicationLag + expr: "pg_replication_lag_seconds > 5" + for: 30s + labels: + severity: warning + annotations: + summary: Postgresql replication lag (instance {{ $labels.instance }}) + description: "The PostgreSQL replication lag is high (> 5s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/machines/monitor/provisioning/alerts/prometheus-alerts.yml b/machines/monitor/provisioning/alerts/prometheus-alerts.yml new file mode 100644 index 0000000..68952cc --- /dev/null +++ b/machines/monitor/provisioning/alerts/prometheus-alerts.yml @@ -0,0 +1,255 @@ +groups: + - name: Prometheus + + rules: + - alert: PrometheusJobMissing + expr: 'absent(up{job="prometheus"})' + for: 0m + labels: + severity: warning + annotations: + summary: Prometheus job missing (instance {{ $labels.instance }}) + description: "A Prometheus job has disappeared\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTargetMissing + expr: "up == 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus target missing (instance {{ $labels.instance }}) + description: "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusAllTargetsMissing + expr: "sum by (job) (up) == 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus all targets missing (instance {{ $labels.instance }}) + description: "A Prometheus job does not have living target anymore.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTargetMissingWithWarmupTime + expr: "sum by (instance, job) ((up == 0) * on (instance) group_left(__name__) (node_time_seconds - node_boot_time_seconds > 600))" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus target missing with warmup time (instance {{ $labels.instance }}) + description: "Allow a job time to start up (10 minutes) before alerting that it's down.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusConfigurationReloadFailure + expr: "prometheus_config_last_reload_successful != 1" + for: 0m + labels: + severity: warning + annotations: + summary: Prometheus configuration reload failure (instance {{ $labels.instance }}) + description: "Prometheus configuration reload error\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTooManyRestarts + expr: 'changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[15m]) > 2' + for: 0m + labels: + severity: warning + annotations: + summary: Prometheus too many restarts (instance {{ $labels.instance }}) + description: "Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusAlertmanagerJobMissing + expr: 'absent(up{job="alertmanager"})' + for: 0m + labels: + severity: warning + annotations: + summary: Prometheus AlertManager job missing (instance {{ $labels.instance }}) + description: "A Prometheus AlertManager job has disappeared\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusAlertmanagerConfigurationReloadFailure + expr: "alertmanager_config_last_reload_successful != 1" + for: 0m + labels: + severity: warning + annotations: + summary: Prometheus AlertManager configuration reload failure (instance {{ $labels.instance }}) + description: "AlertManager configuration reload error\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusAlertmanagerConfigNotSynced + expr: 'count(count_values("config_hash", alertmanager_config_hash)) > 1' + for: 0m + labels: + severity: warning + annotations: + summary: Prometheus AlertManager config not synced (instance {{ $labels.instance }}) + description: "Configurations of AlertManager cluster instances are out of sync\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusAlertmanagerE2eDeadManSwitch + expr: "vector(1)" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus AlertManager E2E dead man switch (instance {{ $labels.instance }}) + description: "Prometheus DeadManSwitch is an always-firing alert. It's used as an end-to-end test of Prometheus through the Alertmanager.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusNotConnectedToAlertmanager + expr: "prometheus_notifications_alertmanagers_discovered < 1" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus not connected to alertmanager (instance {{ $labels.instance }}) + description: "Prometheus cannot connect the alertmanager\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusRuleEvaluationFailures + expr: "increase(prometheus_rule_evaluation_failures_total[3m]) > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus rule evaluation failures (instance {{ $labels.instance }}) + description: "Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTemplateTextExpansionFailures + expr: "increase(prometheus_template_text_expansion_failures_total[3m]) > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus template text expansion failures (instance {{ $labels.instance }}) + description: "Prometheus encountered {{ $value }} template text expansion failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusRuleEvaluationSlow + expr: "prometheus_rule_group_last_duration_seconds > prometheus_rule_group_interval_seconds" + for: 5m + labels: + severity: warning + annotations: + summary: Prometheus rule evaluation slow (instance {{ $labels.instance }}) + description: "Prometheus rule evaluation took more time than the scheduled interval. It indicates a slower storage backend access or too complex query.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusNotificationsBacklog + expr: "min_over_time(prometheus_notifications_queue_length[10m]) > 0" + for: 0m + labels: + severity: warning + annotations: + summary: Prometheus notifications backlog (instance {{ $labels.instance }}) + description: "The Prometheus notification queue has not been empty for 10 minutes\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusAlertmanagerNotificationFailing + expr: "rate(alertmanager_notifications_failed_total[1m]) > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus AlertManager notification failing (instance {{ $labels.instance }}) + description: "Alertmanager is failing sending notifications\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTargetEmpty + expr: "prometheus_sd_discovered_targets == 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus target empty (instance {{ $labels.instance }}) + description: "Prometheus has no target in service discovery\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTargetScrapingSlow + expr: 'prometheus_target_interval_length_seconds{quantile="0.9"} / on (interval, instance, job) prometheus_target_interval_length_seconds{quantile="0.5"} > 1.05' + for: 5m + labels: + severity: warning + annotations: + summary: Prometheus target scraping slow (instance {{ $labels.instance }}) + description: "Prometheus is scraping exporters slowly since it exceeded the requested interval time. Your Prometheus server is under-provisioned.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusLargeScrape + expr: "increase(prometheus_target_scrapes_exceeded_sample_limit_total[10m]) > 10" + for: 5m + labels: + severity: warning + annotations: + summary: Prometheus large scrape (instance {{ $labels.instance }}) + description: "Prometheus has many scrapes that exceed the sample limit\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTargetScrapeDuplicate + expr: "increase(prometheus_target_scrapes_sample_duplicate_timestamp_total[5m]) > 0" + for: 0m + labels: + severity: warning + annotations: + summary: Prometheus target scrape duplicate (instance {{ $labels.instance }}) + description: "Prometheus has many samples rejected due to duplicate timestamps but different values\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTsdbCheckpointCreationFailures + expr: "increase(prometheus_tsdb_checkpoint_creations_failed_total[1m]) > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus TSDB checkpoint creation failures (instance {{ $labels.instance }}) + description: "Prometheus encountered {{ $value }} checkpoint creation failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTsdbCheckpointDeletionFailures + expr: "increase(prometheus_tsdb_checkpoint_deletions_failed_total[1m]) > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus TSDB checkpoint deletion failures (instance {{ $labels.instance }}) + description: "Prometheus encountered {{ $value }} checkpoint deletion failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTsdbCompactionsFailed + expr: "increase(prometheus_tsdb_compactions_failed_total[1m]) > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus TSDB compactions failed (instance {{ $labels.instance }}) + description: "Prometheus encountered {{ $value }} TSDB compactions failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTsdbHeadTruncationsFailed + expr: "increase(prometheus_tsdb_head_truncations_failed_total[1m]) > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus TSDB head truncations failed (instance {{ $labels.instance }}) + description: "Prometheus encountered {{ $value }} TSDB head truncation failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTsdbReloadFailures + expr: "increase(prometheus_tsdb_reloads_failures_total[1m]) > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus TSDB reload failures (instance {{ $labels.instance }}) + description: "Prometheus encountered {{ $value }} TSDB reload failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTsdbWalCorruptions + expr: "increase(prometheus_tsdb_wal_corruptions_total[1m]) > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus TSDB WAL corruptions (instance {{ $labels.instance }}) + description: "Prometheus encountered {{ $value }} TSDB WAL corruptions\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTsdbWalTruncationsFailed + expr: "increase(prometheus_tsdb_wal_truncations_failed_total[1m]) > 0" + for: 0m + labels: + severity: critical + annotations: + summary: Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }}) + description: "Prometheus encountered {{ $value }} TSDB WAL truncation failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PrometheusTimeseriesCardinality + expr: 'label_replace(count by(__name__) ({__name__=~".+"}), "name", "$1", "__name__", "(.+)") > 10000' + for: 0m + labels: + severity: warning + annotations: + summary: Prometheus timeseries cardinality (instance {{ $labels.instance }}) + description: "The \"{{ $labels.name }}\" timeseries cardinality is getting very high: {{ $value }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/machines/monitor/provisioning/alerts/promtail-alerts.yml b/machines/monitor/provisioning/alerts/promtail-alerts.yml new file mode 100644 index 0000000..39aee40 --- /dev/null +++ b/machines/monitor/provisioning/alerts/promtail-alerts.yml @@ -0,0 +1,21 @@ +groups: + - name: Promtail + + rules: + - alert: PromtailRequestErrors + expr: '100 * sum(rate(promtail_request_duration_seconds_count{status_code=~"5..|failed"}[1m])) by (namespace, job, route, instance) / sum(rate(promtail_request_duration_seconds_count[1m])) by (namespace, job, route, instance) > 10' + for: 5m + labels: + severity: critical + annotations: + summary: Promtail request errors (instance {{ $labels.instance }}) + description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}% errors.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: PromtailRequestLatency + expr: "histogram_quantile(0.99, sum(rate(promtail_request_duration_seconds_bucket[5m])) by (le)) > 1" + for: 5m + labels: + severity: critical + annotations: + summary: Promtail request latency (instance {{ $labels.instance }}) + description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/machines/monitor/provisioning/alerts/traefik-alerts.yml b/machines/monitor/provisioning/alerts/traefik-alerts.yml new file mode 100644 index 0000000..218843c --- /dev/null +++ b/machines/monitor/provisioning/alerts/traefik-alerts.yml @@ -0,0 +1,30 @@ +groups: + - name: Traefik + + rules: + - alert: TraefikServiceDown + expr: "count(traefik_service_server_up) by (service) == 0" + for: 0m + labels: + severity: critical + annotations: + summary: Traefik service down (instance {{ $labels.instance }}) + description: "All Traefik services are down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: TraefikHighHttp4xxErrorRateService + expr: 'sum(rate(traefik_service_requests_total{code=~"4.*"}[3m])) by (service) / sum(rate(traefik_service_requests_total[3m])) by (service) * 100 > 5' + for: 1m + labels: + severity: critical + annotations: + summary: Traefik high HTTP 4xx error rate service (instance {{ $labels.instance }}) + description: "Traefik service 4xx error rate is above 5%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: TraefikHighHttp5xxErrorRateService + expr: 'sum(rate(traefik_service_requests_total{code=~"5.*"}[3m])) by (service) / sum(rate(traefik_service_requests_total[3m])) by (service) * 100 > 5' + for: 1m + labels: + severity: critical + annotations: + summary: Traefik high HTTP 5xx error rate service (instance {{ $labels.instance }}) + description: "Traefik service 5xx error rate is above 5%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/machines/monitor/provisioning/templates/telegram.tmpl b/machines/monitor/provisioning/templates/telegram.tmpl new file mode 100644 index 0000000..28d15c8 --- /dev/null +++ b/machines/monitor/provisioning/templates/telegram.tmpl @@ -0,0 +1,29 @@ +{{ define "alert_list" }}{{ range . }} +--- +🪪 {{ .Labels.alertname | html }} +{{- if eq .Labels.severity "critical" }} +🚨 CRITICAL 🚨 {{ end }} +{{- if eq .Labels.severity "warning" }} +⚠️ WARNING ⚠️{{ end }} +{{- if .Annotations.summary }} +📝 {{ .Annotations.summary | html }}{{ end }} +{{- if .Annotations.description }} +📖 {{ .Annotations.description | html }}{{ end }} + +🏷 Labels: +{{ range .Labels.SortedPairs }} {{ .Name | html }}: {{ .Value | html }} +{{ end }}{{ end }} +🛠 Grafana +💊 Alertmanager +{{ end }} + +{{ define "telegram.message" }} +{{ if gt (len .Alerts.Firing) 0 }} +🔥 Alerts Firing 🔥 +{{ template "alert_list" .Alerts.Firing }} +{{ end }} +{{ if gt (len .Alerts.Resolved) 0 }} +✅ Alerts Resolved ✅ +{{ template "alert_list" .Alerts.Resolved }} +{{ end }} +{{ end }} diff --git a/machines/sandbox/definition.nix b/machines/sandbox/definition.nix new file mode 100644 index 0000000..8033946 --- /dev/null +++ b/machines/sandbox/definition.nix @@ -0,0 +1,3 @@ +{ + system.stateVersion = "25.05"; +} diff --git a/nixos/flake.lock b/nixos/flake.lock index becb6f0..27c04ad 100644 --- a/nixos/flake.lock +++ b/nixos/flake.lock @@ -1,8 +1,82 @@ { "nodes": { + "colmena": { + "inputs": { + "flake-compat": "flake-compat", + "flake-utils": "flake-utils", + "nix-github-actions": "nix-github-actions", + "nixpkgs": "nixpkgs", + "stable": "stable" + }, + "locked": { + "lastModified": 1751144689, + "narHash": "sha256-cgIntaqhcm62V1KU6GmrAGpHpahT4UExEWW2ryS02ZU=", + "owner": "zhaofengli", + "repo": "colmena", + "rev": "3ceec72cfb396a8a8de5fe96a9d75a9ce88cc18e", + "type": "github" + }, + "original": { + "owner": "zhaofengli", + "repo": "colmena", + "type": "github" + } + }, + "flake-compat": { + "flake": false, + "locked": { + "lastModified": 1650374568, + "narHash": "sha256-Z+s0J8/r907g149rllvwhb4pKi8Wam5ij0st8PwAh+E=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "b4a34015c698c7793d592d66adbab377907a2be8", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "flake-utils": { + "locked": { + "lastModified": 1659877975, + "narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nix-github-actions": { + "inputs": { + "nixpkgs": [ + "colmena", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1729742964, + "narHash": "sha256-B4mzTcQ0FZHdpeWcpDYPERtyjJd/NIuaQ9+BV1h+MpA=", + "owner": "nix-community", + "repo": "nix-github-actions", + "rev": "e04df33f62cdcf93d73e9a04142464753a16db67", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "nix-github-actions", + "type": "github" + } + }, "nixarr": { "inputs": { - "nixpkgs": "nixpkgs", + "nixpkgs": "nixpkgs_2", "vpnconfinement": "vpnconfinement", "website-builder": "website-builder" }, @@ -21,6 +95,22 @@ } }, "nixpkgs": { + "locked": { + "lastModified": 1750134718, + "narHash": "sha256-v263g4GbxXv87hMXMCpjkIxd/viIF7p3JpJrwgKdNiI=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "9e83b64f727c88a7711a2c463a7b16eedb69a84c", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs_2": { "locked": { "lastModified": 1748662220, "narHash": "sha256-7gGa49iB9nCnFk4h/g9zwjlQAyjtpgcFkODjcOQS0Es=", @@ -36,7 +126,7 @@ "type": "github" } }, - "nixpkgs_2": { + "nixpkgs_3": { "locked": { "lastModified": 1748809735, "narHash": "sha256-UR5vKj8rwKQmE8wxKFHgoJKbod05DMoH5phTje4L1l8=", @@ -53,8 +143,9 @@ }, "root": { "inputs": { + "colmena": "colmena", "nixarr": "nixarr", - "nixpkgs": "nixpkgs_2", + "nixpkgs": "nixpkgs_3", "sops-nix": "sops-nix" } }, @@ -78,6 +169,22 @@ "type": "github" } }, + "stable": { + "locked": { + "lastModified": 1750133334, + "narHash": "sha256-urV51uWH7fVnhIvsZIELIYalMYsyr2FCalvlRTzqWRw=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "36ab78dab7da2e4e27911007033713bab534187b", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-25.05", + "repo": "nixpkgs", + "type": "github" + } + }, "vpnconfinement": { "locked": { "lastModified": 1743810720, diff --git a/nixos/flake.nix b/nixos/flake.nix index eadd118..0195467 100644 --- a/nixos/flake.nix +++ b/nixos/flake.nix @@ -3,6 +3,7 @@ inputs = { nixpkgs.url = "github:nixos/nixpkgs"; + colmena.url = "github:zhaofengli/colmena"; sops-nix = { url = "github:Mic92/sops-nix"; inputs.nixpkgs.follows = "nixpkgs"; @@ -14,7 +15,11 @@ # }; }; - outputs = inputs @ {...}: let + outputs = inputs @ { + nixpkgs, + colmena, + ... + }: let system = "x86_64-linux"; liveVMs = { @@ -102,5 +107,32 @@ }; in { nixosConfigurations = liveVMs; + + colmenaHive = colmena.lib.makeHive { + meta = { + nixpkgs = import nixpkgs { + system = "x86_64-linux"; + overlays = []; + }; + + defaults = {pkgs, ...}: { + }; + }; + + host-b = { + name, + nodes, + pkgs, + ... + }: { + deployment = { + targetHost = "somehost.tld"; + targetPort = 1234; + targetUser = "luser"; + }; + boot.isContainer = true; + time.timeZone = "America/Los_Angeles"; + }; + }; }; } diff --git a/secrets/secrets.yaml b/secrets/secrets.yaml new file mode 100644 index 0000000..e1807cd --- /dev/null +++ b/secrets/secrets.yaml @@ -0,0 +1,21 @@ +telegram-alert-bot-token: ENC[AES256_GCM,data:7Bvhtrkaqc06xmSeOsw730cAfHAw4qBvz1ontPXO/6j4Hy6AAKbb8LYGIONVGA==,iv:2xdhmAM2anEH7flV72BlfVeXjStu6sEUqT97PW+dY2w=,tag:h12zGj8J0ftKuGuKZuCEmw==,type:str] +alertmanager: + env: ENC[AES256_GCM,data:lMZVLGY4JNeEa1OhiQsAyBqArDttpMjAILjtFQfi7933RfckJMike9cWOV8pSVMJjKUBCtmnir/KDhbyYCZ3oYQh,iv:dLGqXsvJ8x32bqtcaq66O85HcbF/I78HSmo3o/Sx76o=,tag:SHv07/J8O+JPkpTu5rRCzA==,type:str] +influxdb: + password: ENC[AES256_GCM,data:OP+4vK6ulZs7jVM4lgnpUatr+Qs=,iv:MEmD6yyy+Z7beVOdR1xNDn0c27DYDIDTYdnaNiaVHks=,tag:dyG7VPPV40JqSE4UAeVbtQ==,type:str] + token: ENC[AES256_GCM,data:QraVWLW1uCSF0YvbkHCKYtPvqs0=,iv:pzkfEyLksjRFVj7wZS8LxO0idQTpEk7OTMpQSsuIRvQ=,tag:d6U6vMqEYbu3CaTpnc0gGw==,type:str] +sops: + age: + - recipient: age1n20y9kmdh324m3tkclvhmyuc7c8hk4w84zsal725adahwl8nzq0s04aq4y + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBVa29TZnF4K2dtcEc2N1Qy + cmVTeWJpZXlYRWV6SG84M2NjVHhiRER4TFdFCjNCb1owUU5lL1BDOGt4ZXhad242 + cFI2YWd0SnBCV2RXVUNlY2dKYWpUK1kKLS0tIHh6UmRkdlJqeWZHaTFYQ0M4L2xo + QzNYRk5ERmR4aGtLQ3dwQ1lPeDZyaEkKJMLXqv6tBBql7VVnWDIwAh24SfQ2O6Ca + CEOQTGEonbqr5doWqTsXUXrdQAS0amL45UdT6ITFtfNAjaHwCMfhZg== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2025-07-06T17:10:59Z" + mac: ENC[AES256_GCM,data:dXLWT5fmSs2ddpFPXA1yOtwaej7b3lPesFxN7aEZ/bV6YRr+Ht5dHFQcXO0TfJArzhFRRtAumdcdVsorMkR4tao4XCcimACcWrZgXlXGM6XgT3hdPJ4006QLePXU+uyzpqyEuOouaxF7fyuSTL68uDr+E/NAHgmP2dnqpWnebpY=,iv:oUkmH/ngp8wvbuXay+2X6YBqhesNdtOPZOV4lvsc/s4=,tag:GErA9zgdkTarUD6fWiMupg==,type:str] + unencrypted_suffix: _unencrypted + version: 3.10.2