1
0
Fork 0

Fix monitoring timeouts

This commit is contained in:
Malte Brandy 2020-06-02 05:02:19 +02:00
parent f6c7992930
commit 92655376e0

View file

@ -3,28 +3,28 @@ groups:
rules:
- alert: probe_timeout
expr: probe_success == 0
for: 10s
for: 60m
labels:
severity: critical
annotations:
description: '{{ $labels.instance }} probe {{ $labels.job}} failed for 60m.'
- alert: nixpkgs
expr: hydra_job_failed == 1
for: 10s
for: 2h
labels:
severity: warning
annotations:
description: '{{ $labels.packageName }} on nixpkgs branch {{ $labels.jobset }} failed for 2h.'
- alert: node_down
expr: (up{name!="apollo",instance!="hydra.nixos.org:443"} == 0)
for: 10s
for: 5m
labels:
severity: critical
annotations:
description: '{{ $labels.name }} is down for 5m.'
- alert: systemd_service_failed
expr: node_systemd_unit_state{state="failed"} == 1
for: 10s
for: 5m
labels:
severity: critical
annotations: