Fix monitoring timeouts
This commit is contained in:
parent
f6c7992930
commit
92655376e0
|
@ -3,28 +3,28 @@ groups:
|
|||
rules:
|
||||
- alert: probe_timeout
|
||||
expr: probe_success == 0
|
||||
for: 10s
|
||||
for: 60m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} probe {{ $labels.job}} failed for 60m.'
|
||||
- alert: nixpkgs
|
||||
expr: hydra_job_failed == 1
|
||||
for: 10s
|
||||
for: 2h
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: '{{ $labels.packageName }} on nixpkgs branch {{ $labels.jobset }} failed for 2h.'
|
||||
- alert: node_down
|
||||
expr: (up{name!="apollo",instance!="hydra.nixos.org:443"} == 0)
|
||||
for: 10s
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
description: '{{ $labels.name }} is down for 5m.'
|
||||
- alert: systemd_service_failed
|
||||
expr: node_systemd_unit_state{state="failed"} == 1
|
||||
for: 10s
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
|
|
Loading…
Reference in a new issue