Try a bunch of rule improvements
This commit is contained in:
parent
ce44ee1c3a
commit
c7d4e2ca4c
|
@ -5,10 +5,12 @@ with lib;
|
||||||
{
|
{
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
m-0.monitoring = [{
|
m-0.monitoring = [
|
||||||
|
{
|
||||||
host = "apollo:9100";
|
host = "apollo:9100";
|
||||||
name = "apollo";
|
name = "apollo";
|
||||||
}{
|
}
|
||||||
|
{
|
||||||
name = "ved server";
|
name = "ved server";
|
||||||
host = "bach.vocalensemble-darmstadt.de:9100";
|
host = "bach.vocalensemble-darmstadt.de:9100";
|
||||||
}
|
}
|
||||||
|
@ -33,6 +35,10 @@ with lib;
|
||||||
options = {
|
options = {
|
||||||
name = mkOption { type = types.str; };
|
name = mkOption { type = types.str; };
|
||||||
host = mkOption { type = types.str; };
|
host = mkOption { type = types.str; };
|
||||||
|
container = mkOption {
|
||||||
|
type = types.bool;
|
||||||
|
default = false;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
default = [ ];
|
default = [ ];
|
||||||
|
|
|
@ -165,19 +165,21 @@ in {
|
||||||
};
|
};
|
||||||
m-0.monitoring = [
|
m-0.monitoring = [
|
||||||
{
|
{
|
||||||
name = "chor-cloud";
|
name = "chor-cloud container";
|
||||||
host = "chor-cloud:9100";
|
host = "chor-cloud:9100";
|
||||||
|
container = true;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
name = "chor-cloud-nginx";
|
name = "chor-cloud nginx";
|
||||||
host = "chor-cloud:9113";
|
host = "chor-cloud:9113";
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
name = "cloud";
|
name = "cloud container";
|
||||||
host = "cloud:9100";
|
host = "cloud:9100";
|
||||||
|
container = true;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
name = "cloud-nginx";
|
name = "cloud nginx";
|
||||||
host = "cloud:9113";
|
host = "cloud:9113";
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
|
@ -5,8 +5,9 @@ in {
|
||||||
|
|
||||||
m-0.monitoring = [
|
m-0.monitoring = [
|
||||||
{
|
{
|
||||||
name = "mail-server";
|
name = "mail container";
|
||||||
host = "hera-intern:9101";
|
host = "hera-intern:9101";
|
||||||
|
container = true;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
name = "hera postfix";
|
name = "hera postfix";
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
labels = {
|
labels = {
|
||||||
inherit name;
|
inherit name;
|
||||||
inherit alert_type;
|
inherit alert_type;
|
||||||
|
inherit container;
|
||||||
};
|
};
|
||||||
}];
|
}];
|
||||||
}) config.m-0.monitoring;
|
}) config.m-0.monitoring;
|
||||||
|
|
|
@ -30,12 +30,12 @@ groups:
|
||||||
annotations:
|
annotations:
|
||||||
description: 'service {{$labels.exported_name}} on {{$labels.name}} failed.'
|
description: 'service {{$labels.exported_name}} on {{$labels.name}} failed.'
|
||||||
- alert: out_of_diskspace
|
- alert: out_of_diskspace
|
||||||
expr: ((min by (device, name) (node_filesystem_avail_bytes{device!="tmpfs",name!="cloud",name!="chor-cloud",name!="mail-server"}) * 100) / max by (device,name) (node_filesystem_size_bytes) < 10) and (max by (device,name) (node_filesystem_avail_bytes) / 1024 / 1024 / 1024) < 100
|
expr: min by (device, name) (node_filesystem_avail_bytes{device!="tmpfs",container!=true}) / max by (device,name) (node_filesystem_size_bytes) < 0.1
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: "{{ $labels.device }} on {{ $labels.name }} has less than 10% free diskspace."
|
description: "{{ $labels.device }} on {{ $labels.name }} has only {{ $value | humanizePercentage }} free diskspace."
|
||||||
- alert: out_of_inodes
|
- alert: out_of_inodes
|
||||||
expr: node_filesystem_files_free{fstype!="tmpfs"} / node_filesystem_files{fstype!="tmpfs"} * 100 < 10
|
expr: node_filesystem_files_free{fstype!="tmpfs"} / node_filesystem_files{fstype!="tmpfs"} * 100 < 10
|
||||||
for: 5m
|
for: 5m
|
||||||
|
@ -49,4 +49,4 @@ groups:
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: "mail queue {{ $labels.queue }} on {{ $labels.name }} has accumulated a waiting time of {{ $value }}s."
|
description: "mail queue {{ $labels.queue }} on {{ $labels.name }} has accumulated a waiting time of {{ $value | humanizeDuration }}."
|
||||||
|
|
Loading…
Reference in a new issue