From 5086208e82ec972259bde01ce1801397e5bc8f40 Mon Sep 17 00:00:00 2001 From: Malte Brandy Date: Wed, 2 Dec 2020 03:42:42 +0100 Subject: [PATCH] Update health-status dashboard --- .../grafana-dashboards/health-status.json | 890 +++++++++++------- 1 file changed, 530 insertions(+), 360 deletions(-) diff --git a/nixos/roles/monitoring/grafana-dashboards/health-status.json b/nixos/roles/monitoring/grafana-dashboards/health-status.json index 9b75c91d..9e8bd23d 100644 --- a/nixos/roles/monitoring/grafana-dashboards/health-status.json +++ b/nixos/roles/monitoring/grafana-dashboards/health-status.json @@ -1,56 +1,65 @@ { - "annotations": { - "list": [{ - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - }] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": 1, - "links": [], - "panels": [{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "links": [], + "panels": [ + { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": null, "decimals": 0, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, "fill": 10, "fillGradient": 0, "gridPos": { - "h": 15, - "w": 12, - "x": 0, - "y": 0 + "h": 14, + "w": 12, + "x": 0, + "y": 0 }, "hiddenSeries": false, - "id": 6, + "id": 12, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": true + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true }, "lines": true, "linewidth": 0, "nullPointMode": "null as zero", "options": { - "dataLinks": [] + "alertThreshold": true }, "percentage": false, + "pluginVersion": "7.3.4", "pointradius": 2, "points": false, "renderer": "flot", @@ -58,266 +67,200 @@ "spaceLength": 10, "stack": true, "steppedLine": false, - "targets": [{ - "expr": "up{instance!=\"apollo:9100\"} +1 == 1", - "interval": "", - "legendFormat": "{{name}}", - "refId": "A" - }], + "targets": [ + { + "expr": "ALERTS{alert_type=\"infrastructure\"}", + "interval": "", + "legendFormat": "{{alertname}} for {{name}} ({{alertstate}})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Active alerts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 0, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 15, + "w": 12, + "x": 12, + "y": 0 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 0, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "up{instance!=\"apollo:9100\"} +1 == 1", + "interval": "", + "legendFormat": "{{name}}", + "refId": "A" + } + ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Unreachable nodes", "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" + "shared": true, + "sort": 2, + "value_type": "individual" }, "transparent": true, "type": "graph", "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "yaxes": [{ - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }], + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], "yaxis": { - "align": false, - "alignLevel": null + "align": false, + "alignLevel": null } - }, { - "aliasColors": {}, - "axis": { - "align": false, - "alignLevel": null - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 0, - "fill": 10, - "fillGradient": 0, - "gridPos": { - "h": 15, - "w": 12, - "x": 12, - "y": 0 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 0, - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [{ - "expr": "node_systemd_unit_state{state=\"failed\",exported_name!~\"configure-printer.*\"} > 0", - "format": "time_series", - "interval": "", - "legendFormat": "{{exported_name}} from {{ name }}", - "refId": "A" - }], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Failed Systemd Units", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [{ - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { + }, + { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "prometheus", "decimals": 0, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, "fill": 10, "fillGradient": 0, "gridPos": { - "h": 18, - "w": 12, - "x": 0, - "y": 15 - }, - "hiddenSeries": false, - "id": 2, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 0, - "nullPointMode": "null as zero", - "options": { - "dataLinks": [{ - "targetBlank": true, - "title": "${__field.labels.packageName} status on hydra", - "url": "${__field.labels.url}" - }] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [{ - "expr": "hydra_job_failed > 0", - "interval": "", - "legendFormat": "{{packageName}} on {{jobset}}", - "refId": "A" - }], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Nixpkgs Build Failures", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [{ - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 0, - "fill": 10, - "fillGradient": 0, - "gridPos": { - "h": 19, - "w": 12, - "x": 12, - "y": 15 + "h": 19, + "w": 12, + "x": 0, + "y": 14 }, "hiddenSeries": false, "id": 8, "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true }, "lines": true, "linewidth": 0, "nullPointMode": "null as zero", "options": { - "dataLinks": [] + "alertThreshold": true }, "percentage": false, + "pluginVersion": "7.3.4", "pointradius": 2, "points": false, "renderer": "flot", @@ -325,133 +268,360 @@ "spaceLength": 10, "stack": true, "steppedLine": false, - "targets": [{ - "expr": "1 - probe_success == 1", - "interval": "", - "legendFormat": "{{job}} against {{instance}}", - "refId": "A" - }], + "targets": [ + { + "expr": "1 - probe_success == 1", + "interval": "", + "legendFormat": "{{job}} against {{instance}}", + "refId": "A" + } + ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Probe Timeouts", "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" + "shared": true, + "sort": 2, + "value_type": "individual" }, "transparent": true, "type": "graph", "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "yaxes": [{ - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }], + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], "yaxis": { - "align": false, - "alignLevel": null + "align": false, + "alignLevel": null } - }, { - "columns": [{ - "text": "Current", - "value": "current" - }], + }, + { + "aliasColors": {}, + "axis": { + "align": false, + "alignLevel": null + }, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "prometheus", + "decimals": 0, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 15, + "w": 12, + "x": 12, + "y": 15 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 0, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_systemd_unit_state{state=\"failed\",exported_name!~\"configure-printer.*\"} > 0", + "format": "time_series", + "interval": "", + "legendFormat": "{{exported_name}} from {{ name }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Failed Systemd Units", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 0, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [ + { + "targetBlank": true, + "title": "${__field.labels.packageName} status on hydra", + "url": "${__field.labels.url}" + } + ] + }, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 18, + "w": 12, + "x": 12, + "y": 30 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 0, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "hydra_job_failed > 0", + "interval": "", + "legendFormat": "{{packageName}} on {{jobset}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Nixpkgs Build Failures", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, "fontSize": "100%", "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 + "h": 8, + "w": 12, + "x": 0, + "y": 33 }, "id": 10, "pageSize": null, "pluginVersion": "6.7.4", "showHeader": true, "sort": { - "col": 0, - "desc": true + "col": 0, + "desc": true }, - "styles": [{ - "alias": "Time", - "align": "right", - "colorMode": null, - "colors": [ + "styles": [ + { + "alias": "Time", + "align": "right", + "colorMode": null, + "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Current", - "thresholds": [], - "type": "date", - "unit": "short" - }], - "targets": [{ - "expr": "hydra_job_completion_time * hydra_job_failed * 1000 > 0", - "format": "time_series", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{job}}", - "refId": "A" - }], + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [], + "type": "date", + "unit": "short" + } + ], + "targets": [ + { + "expr": "hydra_job_completion_time * hydra_job_failed * 1000 > 0", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{job}}", + "refId": "A" + } + ], "timeFrom": null, "timeShift": null, "title": "Last failed hydra builds", "transform": "timeseries_aggregations", - "type": "table" - }], - "refresh": "1m", - "schemaVersion": 22, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ] - }, - "timezone": "", - "title": "Health Status", - "uid": "health-status", - "variables": { - "list": [] - }, - "version": 1 + "type": "table-old" + } + ], + "refresh": "1m", + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Health Status", + "uid": "health-status", + "version": 1 }