1
0
Fork 0

Fix monitoring timeouts

This commit is contained in:
Malte Brandy 2020-06-02 05:02:19 +02:00
parent f6c7992930
commit 92655376e0

View file

@ -3,28 +3,28 @@ groups:
rules: rules:
- alert: probe_timeout - alert: probe_timeout
expr: probe_success == 0 expr: probe_success == 0
for: 10s for: 60m
labels: labels:
severity: critical severity: critical
annotations: annotations:
description: '{{ $labels.instance }} probe {{ $labels.job}} failed for 60m.' description: '{{ $labels.instance }} probe {{ $labels.job}} failed for 60m.'
- alert: nixpkgs - alert: nixpkgs
expr: hydra_job_failed == 1 expr: hydra_job_failed == 1
for: 10s for: 2h
labels: labels:
severity: warning severity: warning
annotations: annotations:
description: '{{ $labels.packageName }} on nixpkgs branch {{ $labels.jobset }} failed for 2h.' description: '{{ $labels.packageName }} on nixpkgs branch {{ $labels.jobset }} failed for 2h.'
- alert: node_down - alert: node_down
expr: (up{name!="apollo",instance!="hydra.nixos.org:443"} == 0) expr: (up{name!="apollo",instance!="hydra.nixos.org:443"} == 0)
for: 10s for: 5m
labels: labels:
severity: critical severity: critical
annotations: annotations:
description: '{{ $labels.name }} is down for 5m.' description: '{{ $labels.name }} is down for 5m.'
- alert: systemd_service_failed - alert: systemd_service_failed
expr: node_systemd_unit_state{state="failed"} == 1 expr: node_systemd_unit_state{state="failed"} == 1
for: 10s for: 5m
labels: labels:
severity: critical severity: critical
annotations: annotations: