52 lines
2.1 KiB
YAML
52 lines
2.1 KiB
YAML
groups:
|
|
- name: rules
|
|
rules:
|
|
- alert: probe_timeout
|
|
expr: probe_success == 0
|
|
for: 60m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: '{{ $labels.instance }} probe {{ $labels.job}} failed.'
|
|
- alert: nixpkgs
|
|
expr: hydra_job_failed == 1
|
|
for: 2h
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: 'hydra build {{ $labels.packageName }} on nixpkgs branch {{ $labels.jobset }} failed.'
|
|
- alert: node_down
|
|
expr: 'up{name!="apollo",instance!="hydra.nixos.org:443"} == 0'
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: '{{ $labels.name }} is not reachable.'
|
|
- alert: systemd_service_failed
|
|
expr: node_systemd_unit_state{state="failed",exported_name!~"configure-printer.*"} == 1
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: 'service {{$labels.exported_name}} on {{$labels.name}} failed.'
|
|
- alert: out_of_diskspace
|
|
expr: min by (device, name) (node_filesystem_avail_bytes{device!="tmpfs",inContainer!="true"}) / max by (device,name) (node_filesystem_size_bytes) < 0.1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: "{{ $labels.device }} on {{ $labels.name }} has only {{ $value | humanizePercentage }} free diskspace."
|
|
- alert: out_of_inodes
|
|
expr: node_filesystem_files_free{fstype!="tmpfs"} / node_filesystem_files{fstype!="tmpfs"} * 100 < 10
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: "mountpoint {{ $labels.mountpoint }} on {{ $labels.name }} out of inodes."
|
|
- alert: mailq
|
|
expr: postfix_showq_message_age_seconds_sum > 1800
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: "mail queue {{ $labels.queue }} of {{ $labels.name }} has accumulated a waiting time of {{ $value | humanizeDuration }}."
|