From c7d4e2ca4c95a1a2f105ed1f51b66b0d166f3220 Mon Sep 17 00:00:00 2001 From: Malte Brandy Date: Tue, 1 Dec 2020 12:17:10 +0100 Subject: [PATCH] Try a bunch of rule improvements --- common/default.nix | 66 +++++++++++++++------------ nixos/machines/hera/cloud.nix | 10 ++-- nixos/machines/hera/mail.nix | 3 +- nixos/roles/monitoring/prometheus.nix | 1 + nixos/roles/monitoring/rules.yml | 6 +-- 5 files changed, 48 insertions(+), 38 deletions(-) diff --git a/common/default.nix b/common/default.nix index 413ad252..c710f84f 100644 --- a/common/default.nix +++ b/common/default.nix @@ -5,18 +5,20 @@ with lib; { config = { - m-0.monitoring = [{ - host = "apollo:9100"; - name = "apollo"; - }{ - name = "ved server"; - host = "bach.vocalensemble-darmstadt.de:9100"; - } - { - name = "ved postfix"; - host = "bach.vocalensemble-darmstadt.de:9154"; - } - ]; + m-0.monitoring = [ + { + host = "apollo:9100"; + name = "apollo"; + } + { + name = "ved server"; + host = "bach.vocalensemble-darmstadt.de:9100"; + } + { + name = "ved postfix"; + host = "bach.vocalensemble-darmstadt.de:9154"; + } + ]; }; options = { @@ -33,6 +35,10 @@ with lib; options = { name = mkOption { type = types.str; }; host = mkOption { type = types.str; }; + container = mkOption { + type = types.bool; + default = false; + }; }; }); default = [ ]; @@ -45,29 +51,29 @@ with lib; apollo-p = "${p}::1"; wg-p = "${p}::100"; v4-p = "10.0.0"; - in rec { - hera = "${p}::1"; - hera-wg-host = "${p}::100:0:1"; + in rec { + hera = "${p}::1"; + hera-wg-host = "${p}::100:0:1"; - hera-v4 = "213.136.94.190"; + hera-v4 = "213.136.94.190"; - hera-wg = "${wg-p}:1"; - apollo-wg = "${wg-p}:2"; + hera-wg = "${wg-p}:1"; + apollo-wg = "${wg-p}:2"; - hera-intern = "${hera-p}:1"; - git = "${hera-p}:2"; - borg = "${hera-p}:3"; - matrix = "${hera-p}:8"; - cloud = "${hera-p}:9"; - chor-cloud = "${hera-p}:b"; + hera-intern = "${hera-p}:1"; + git = "${hera-p}:2"; + borg = "${hera-p}:3"; + matrix = "${hera-p}:8"; + cloud = "${hera-p}:9"; + chor-cloud = "${hera-p}:b"; - apollo = apollo-wg; + apollo = apollo-wg; - hera-intern-v4 = "${v4-p}.1"; - cloud-intern-v4 = "${v4-p}.2"; - chor-cloud-intern-v4 = "${v4-p}.3"; - matrix-intern-v4 = "${v4-p}.4"; - }; + hera-intern-v4 = "${v4-p}.1"; + cloud-intern-v4 = "${v4-p}.2"; + chor-cloud-intern-v4 = "${v4-p}.3"; + matrix-intern-v4 = "${v4-p}.4"; + }; }; }; diff --git a/nixos/machines/hera/cloud.nix b/nixos/machines/hera/cloud.nix index 9a5299d9..c92b7c67 100644 --- a/nixos/machines/hera/cloud.nix +++ b/nixos/machines/hera/cloud.nix @@ -165,19 +165,21 @@ in { }; m-0.monitoring = [ { - name = "chor-cloud"; + name = "chor-cloud container"; host = "chor-cloud:9100"; + container = true; } { - name = "chor-cloud-nginx"; + name = "chor-cloud nginx"; host = "chor-cloud:9113"; } { - name = "cloud"; + name = "cloud container"; host = "cloud:9100"; + container = true; } { - name = "cloud-nginx"; + name = "cloud nginx"; host = "cloud:9113"; } ]; diff --git a/nixos/machines/hera/mail.nix b/nixos/machines/hera/mail.nix index 0c24d74a..8ae13635 100644 --- a/nixos/machines/hera/mail.nix +++ b/nixos/machines/hera/mail.nix @@ -5,8 +5,9 @@ in { m-0.monitoring = [ { - name = "mail-server"; + name = "mail container"; host = "hera-intern:9101"; + container = true; } { name = "hera postfix"; diff --git a/nixos/roles/monitoring/prometheus.nix b/nixos/roles/monitoring/prometheus.nix index 171781cf..f30cf245 100644 --- a/nixos/roles/monitoring/prometheus.nix +++ b/nixos/roles/monitoring/prometheus.nix @@ -34,6 +34,7 @@ labels = { inherit name; inherit alert_type; + inherit container; }; }]; }) config.m-0.monitoring; diff --git a/nixos/roles/monitoring/rules.yml b/nixos/roles/monitoring/rules.yml index 62e6795e..0eac7001 100644 --- a/nixos/roles/monitoring/rules.yml +++ b/nixos/roles/monitoring/rules.yml @@ -30,12 +30,12 @@ groups: annotations: description: 'service {{$labels.exported_name}} on {{$labels.name}} failed.' - alert: out_of_diskspace - expr: ((min by (device, name) (node_filesystem_avail_bytes{device!="tmpfs",name!="cloud",name!="chor-cloud",name!="mail-server"}) * 100) / max by (device,name) (node_filesystem_size_bytes) < 10) and (max by (device,name) (node_filesystem_avail_bytes) / 1024 / 1024 / 1024) < 100 + expr: min by (device, name) (node_filesystem_avail_bytes{device!="tmpfs",container!=true}) / max by (device,name) (node_filesystem_size_bytes) < 0.1 for: 5m labels: severity: warning annotations: - description: "{{ $labels.device }} on {{ $labels.name }} has less than 10% free diskspace." + description: "{{ $labels.device }} on {{ $labels.name }} has only {{ $value | humanizePercentage }} free diskspace." - alert: out_of_inodes expr: node_filesystem_files_free{fstype!="tmpfs"} / node_filesystem_files{fstype!="tmpfs"} * 100 < 10 for: 5m @@ -49,4 +49,4 @@ groups: labels: severity: warning annotations: - description: "mail queue {{ $labels.queue }} on {{ $labels.name }} has accumulated a waiting time of {{ $value }}s." + description: "mail queue {{ $labels.queue }} on {{ $labels.name }} has accumulated a waiting time of {{ $value | humanizeDuration }}."