1
0
Fork 0

Improve monitoring and decontainerize

This commit is contained in:
Malte Brandy 2019-08-07 23:40:26 +02:00
parent 3da72aa0fd
commit f7801883c7
18 changed files with 552 additions and 595 deletions

View file

@ -50,22 +50,16 @@ with lib;
hera-intern = "${hera-p}:1";
git = "${hera-p}:2";
borg = "${hera-p}:3";
dav = "${hera-p}:5";
blog = "${hera-p}:6";
chor = "${hera-p}:7";
matrix = "${hera-p}:8";
cloud = "${hera-p}:9";
web = "${hera-p}:a";
mathechor-cloud = "${hera-p}:b";
monitoring = "${hera-p}:c";
chor-cloud = "${hera-p}:b";
apollo = apollo-wg;
hera-intern-v4 = "${v4-p}.1";
cloud-intern-v4 = "${v4-p}.2";
mathechor-cloud-intern-v4 = "${v4-p}.3";
chor-cloud-intern-v4 = "${v4-p}.3";
matrix-intern-v4 = "${v4-p}.4";
monitoring-intern-v4 = "${v4-p}.5";
};
};
};

Binary file not shown.

View file

@ -3,7 +3,14 @@ with lib;
let
inherit (config.m-0.private) me cloud;
inherit (config.m-0) hosts;
certPath = "/var/lib/acme";
nextcloud-container = { v6, v4, hostname, news-updater ? false }: {
bindMounts = {
"${certPath}" = {
hostPath = certPath;
isReadOnly = false;
};
};
autoStart = true;
privateNetwork = true;
hostBridge = "bridge";
@ -35,14 +42,7 @@ let
};
services = {
nginx = {
virtualHosts."${hostname}" = {
forceSSL = true;
enableACME = true;
default = true;
};
};
prometheus.exporters.node.openFirewall = true;
nextcloud = {
enable = true;
@ -55,6 +55,7 @@ let
memcached = false;
};
config = {
#extraTrustedDomains = [ "2a02:c207:3002:7584::3:1" ];
dbtype = "pgsql";
dbname = "nextcloud";
dbuser = "nextcloud";
@ -97,7 +98,7 @@ let
mode = "singlerun";
};
});
in "${pkgs.nextcloud-news-updater}/bin/nextcloud-news-updater -c ${config}";
in "${pkgs.nextcloud-news-updater}/bin/nextcloud-news-updater -c ${config}";
};
};
};
@ -106,14 +107,47 @@ let
};
in {
services = {
nginx = {
enable = true;
virtualHosts."cloud.maralorn.de" = {
enableACME = true;
forceSSL = true;
locations."/" = {
proxyPass = "http://cloud";
extraConfig = ''
proxy_set_header Host $host;
'';
# proxy_set_header X-Forwarded-Host :$server_port;
# proxy_set_header X-Forwarded-Server $host;
# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
};
};
virtualHosts."cloud.mathechor.de" = {
enableACME = true;
forceSSL = true;
locations."/" = {
proxyPass = "http://chor-cloud";
extraConfig = ''
proxy_set_header Host $host;
'';
# extraConfig = ''
# proxy_set_header X-Forwarded-Host :$server_port;
# proxy_set_header X-Forwarded-Server $host;
# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# '';
};
};
};
};
m-0.monitoring = [
{
name = "mathechor-cloud";
host = "mathechor-cloud:9100";
name = "chor-cloud";
host = "chor-cloud:9100";
}
{
name = "mathechor-cloud-nginx";
host = "mathechor-cloud:9113";
name = "chor-cloud-nginx";
host = "chor-cloud:9113";
}
{
name = "cloud";
@ -127,8 +161,8 @@ in {
containers = {
chor-cloud = nextcloud-container {
hostname = "cloud.mathechor.de";
v6 = hosts.mathechor-cloud;
v4 = hosts.mathechor-cloud-intern-v4;
v6 = hosts.chor-cloud;
v4 = hosts.chor-cloud-intern-v4;
};
cloud = nextcloud-container {
hostname = "cloud.maralorn.de";

View file

@ -13,16 +13,23 @@ in {
../../system/standalone
../../system/server
../../system/git.nix
../../system/riot.nix
../../system/mathechor.de.nix
../../system/monitoring
../../system/blog.nix
./web.nix
./borg.nix
./mail.nix
./boot.nix
./cloud.nix
./web.nix
./monitoring.nix
./network.nix
./matrix.nix
./secret
];
m-0.monitoring = [{
name = "hera";
host = "hera-intern:9100";
}];
nix.sshServe = {
enable = true;

View file

@ -6,198 +6,126 @@ let
in {
networking.firewall.allowedTCPPorts = [ 3478 8448 ];
m-0.monitoring = [
{
name = "matrix";
host = "matrix:9100";
}
{
name = "matrix-nginx";
host = "matrix:9113";
}
];
services.coturn = {
enable = true;
pkey = "/var/lib/acme/hera.m-0.eu/key.pem";
cert = "/var/lib/acme/hera.m-0.eu/fullchain.pem";
no-tcp = true;
static-auth-secret = config.m-0.private.turn_secret;
realm = "maralorn.de";
use-auth-secret = true;
};
containers.matrix = {
autoStart = true;
privateNetwork = true;
hostBridge = "bridge";
config = { pkgs, lib, ... }: {
imports = [ ../../system ];
networking = {
interfaces.eth0 = {
ipv6.addresses = [{
address = hosts.matrix;
prefixLength = 112;
}];
ipv4.addresses = [{
address = hosts.matrix-intern-v4;
prefixLength = 24;
}];
};
inherit (config.networking) nameservers;
defaultGateway6 = {
address = hosts.hera-intern;
interface = "eth0";
};
defaultGateway = {
address = hosts.hera-intern-v4;
interface = "eth0";
};
firewall.allowedTCPPorts = [ 80 443 8448 ];
};
m-0.riot = {
enable = true;
hostname = "riot.maralorn.de";
config = {
default_hs_url = "https://matrix.maralorn.de";
default_is_url = "https://vector.im";
integrations_ui_url = "";
integrations_rest_url = "";
integrations_widgets_urls = [ ];
bug_report_endpoint_url = "https://riot.im/bugreports/submit";
welcomeUserId = "@riot-bot:matrix.org";
piwik = false;
features = {
feature_lazyloading = "enable";
feature_room_breadcrumbs = "enable";
};
roomDirectory = { servers = [ "matrix.org" "maralorn.de" ]; };
branding = {
welcomeBackgroundUrl =
"https://cloud.maralorn.de/apps/theming/image/background";
services = {
coturn = {
enable = true;
pkey = "/var/lib/acme/hera.m-0.eu/key.pem";
cert = "/var/lib/acme/hera.m-0.eu/fullchain.pem";
no-tcp = true;
static-auth-secret = config.m-0.private.turn_secret;
realm = "maralorn.de";
use-auth-secret = true;
};
nginx = {
enable = true;
virtualHosts."${hostName}" = {
forceSSL = true;
enableACME = true;
locations = {
"/" = {
proxyPass = "http://[::1]:8008";
extraConfig = ''
proxy_set_header X-Forwarded-For $remote_addr;
'';
};
};
};
services = {
nginx = {
enable = true;
virtualHosts."${hostName}" = {
forceSSL = true;
enableACME = true;
locations = {
"/" = {
proxyPass = "http://[::1]:8008";
extraConfig = ''
proxy_http_version 1.1;
proxy_set_header X-Forwarded-For $remote_addr;
'';
};
};
};
};
};
# Postgres
postgresql = { enable = true; };
# Postgres
postgresql = { enable = true; };
# Synapse
matrix-synapse = {
enable = true;
enable_metrics = true;
server_name = "maralorn.de";
public_baseurl = "https://${hostName}";
url_preview_enabled = true;
database_type = "psycopg2";
max_upload_size = "30M";
create_local_database = false;
dynamic_thumbnails = true;
macaroon_secret_key = config.m-0.private.macaroon_secret;
turn_uris = [ "turn:hera.m-0.eu:3478?transport=udp" ];
turn_shared_secret = config.m-0.private.turn_secret;
turn_user_lifetime = "5h";
allow_guest_access = true;
logConfig = ''
version: 1
# Synapse
matrix-synapse = {
enable = true;
enable_metrics = true;
server_name = "maralorn.de";
public_baseurl = "https://${hostName}";
url_preview_enabled = true;
database_type = "psycopg2";
max_upload_size = "30M";
create_local_database = false;
dynamic_thumbnails = true;
macaroon_secret_key = config.m-0.private.macaroon_secret;
turn_uris = [ "turn:hera.m-0.eu:3478?transport=udp" ];
turn_shared_secret = config.m-0.private.turn_secret;
turn_user_lifetime = "5h";
allow_guest_access = true;
logConfig = ''
version: 1
formatters:
journal_fmt:
format: '%(name)s: [%(request)s] %(message)s'
formatters:
journal_fmt:
format: '%(name)s: [%(request)s] %(message)s'
filters:
context:
(): synapse.util.logcontext.LoggingContextFilter
request: ""
filters:
context:
(): synapse.util.logcontext.LoggingContextFilter
request: ""
handlers:
journal:
class: systemd.journal.JournalHandler
formatter: journal_fmt
filters: [context]
SYSLOG_IDENTIFIER: synapse
handlers:
journal:
class: systemd.journal.JournalHandler
formatter: journal_fmt
filters: [context]
SYSLOG_IDENTIFIER: synapse
disable_existing_loggers: True
disable_existing_loggers: True
loggers:
synapse:
level: WARN
synapse.storage.SQL:
level: WARN
loggers:
synapse:
level: WARN
synapse.storage.SQL:
level: WARN
root:
level: WARN
handlers: [journal]
'';
database_args = {
user = "matrix-synapse";
database = "matrix-synapse";
cp_min = 5;
cp_max = 10;
};
report_stats = true;
tls_certificate_path = "/var/lib/acme/${hostName}/fullchain.pem";
tls_private_key_path = "/var/lib/acme/${hostName}/key.pem";
listeners = [
root:
level: WARN
handlers: [journal]
'';
database_args = {
user = "matrix-synapse";
database = "matrix-synapse";
cp_min = 5;
cp_max = 10;
};
report_stats = true;
tls_certificate_path = "/var/lib/acme/${hostName}/fullchain.pem";
tls_private_key_path = "/var/lib/acme/${hostName}/key.pem";
listeners = [
{
port = 8448;
bind_address = "::";
resources = [{
compress = false;
names = [ "federation" ];
}];
x_forwarded = false;
}
{
port = 8008;
bind_address = "::1";
resources = [
{
port = 8448;
bind_address = "::";
resources = [
{
compress = true;
names = [ "client" ];
}
{
compress = false;
names = [ "federation" ];
}
];
x_forwarded = false;
compress = false;
names = [ "client" ];
}
{
port = 8008;
bind_address = "::1";
resources = [
{
compress = false;
names = [ "client" ];
}
{
compress = false;
names = [ "federation" ];
}
];
x_forwarded = true;
tls = false;
compress = false;
names = [ "federation" ];
}
];
};
};
security.acme.certs = {
"${hostName}" = {
group = "matrix-synapse";
allowKeysForGroup = true;
postRun =
"systemctl reload nginx.service; systemctl restart matrix-synapse.service";
};
};
x_forwarded = true;
tls = false;
}
];
};
};
security.acme.certs = {
"${hostName}" = {
group = "matrix-synapse";
allowKeysForGroup = true;
postRun =
"systemctl reload nginx.service; systemctl restart matrix-synapse.service";
};
};

View file

@ -1,179 +0,0 @@
{ config, ... }:
let inherit (config.m-0) hosts;
in {
services.prometheus.exporters.node = {
firewallFilter = "! -i ens18 -p tcp -m tcp --dport 9100";
};
m-0.monitoring = [
{
name = "hera";
host = "hera-intern:9100";
}
{
name = "monitoring-container";
host = "localhost:9100";
}
];
containers.monitoring = {
autoStart = true;
privateNetwork = true;
hostBridge = "bridge";
config = { pkgs, lib, ... }: {
imports = [ ../../system ];
networking = {
interfaces.eth0 = {
ipv6.addresses = [{
address = hosts.monitoring;
prefixLength = 112;
}];
ipv4.addresses = [{
address = hosts.monitoring-intern-v4;
prefixLength = 24;
}];
};
inherit (config.networking) nameservers;
defaultGateway6 = {
address = hosts.hera-intern;
interface = "eth0";
};
defaultGateway = {
address = hosts.hera-intern-v4;
interface = "eth0";
};
firewall.allowedTCPPorts = [ 9090 9093 ];
};
services.prometheus = {
enable = true;
rules = [''
ALERT node_down
IF (up{name!="apollo"} == 0)
FOR 5m
LABELS {
severity="page"
}
ANNOTATIONS {
summary = "{{$labels.name}}: Node is down.",
description = "{{$labels.name}} has been down for more than 5 minutes."
}
ALERT node_systemd_service_failed
IF node_systemd_unit_state{state="failed"} == 1
FOR 4m
LABELS {
severity="page"
}
ANNOTATIONS {
summary = "{{$labels.name}}: Service {{$labels.exported_name}} failed.",
description = "{{$labels.name}} failed to (re)start service {{$labels.exported_name}}."
}
ALERT node_filesystem_full_90percent
IF sort(node_filesystem_free{device!="ramfs"} < node_filesystem_size{device!="ramfs"} * 0.1) / 1024^3
FOR 5m
LABELS {
severity="page"
}
ANNOTATIONS {
summary = "{{$labels.alias}}: Filesystem is running out of space soon.",
description = "{{$labels.alias}} device {{$labels.device}} on {{$labels.mountpoint}} got less than 10% space left on its filesystem."
}
ALERT node_filesystem_full_in_4h
IF predict_linear(node_filesystem_free{device!="ramfs"}[1h], 4*3600) <= 0
FOR 5m
LABELS {
severity="page"
}
ANNOTATIONS {
summary = "{{$labels.alias}}: Filesystem is running out of space in 4 hours.",
description = "{{$labels.alias}} device {{$labels.device}} on {{$labels.mountpoint}} is running out of space of in approx. 4 hours"
}
ALERT node_filedescriptors_full_in_3h
IF predict_linear(node_filefd_allocated[1h], 3*3600) >= node_filefd_maximum
FOR 20m
LABELS {
severity="page"
}
ANNOTATIONS {
summary = "{{$labels.alias}} is running out of available file descriptors in 3 hours.",
description = "{{$labels.alias}} is running out of available file descriptors in approx. 3 hours"
}
ALERT node_load1_90percent
IF node_load1 / on(alias) count(node_cpu{mode="system"}) by (alias) >= 0.9
FOR 1h
LABELS {
severity="page"
}
ANNOTATIONS {
summary = "{{$labels.alias}}: Running on high load.",
description = "{{$labels.alias}} is running with > 90% total load for at least 1h."
}
ALERT node_cpu_util_90percent
IF 100 - (avg by (alias) (irate(node_cpu{mode="idle"}[5m])) * 100) >= 90
FOR 1h
LABELS {
severity="page"
}
ANNOTATIONS {
summary = "{{$labels.alias}}: High CPU utilization.",
description = "{{$labels.alias}} has total CPU utilization over 90% for at least 1h."
}
ALERT node_ram_using_90percent
IF node_memory_MemFree + node_memory_Buffers + node_memory_Cached < node_memory_MemTotal * 0.1
FOR 30m
LABELS {
severity="page"
}
ANNOTATIONS {
summary="{{$labels.alias}}: Using lots of RAM.",
description="{{$labels.alias}} is using at least 90% of its RAM for at least 30 minutes now.",
}
ALERT node_swap_using_80percent
IF node_memory_SwapTotal - (node_memory_SwapFree + node_memory_SwapCached) > node_memory_SwapTotal * 0.8
FOR 10m
LABELS {
severity="page"
}
ANNOTATIONS {
summary="{{$labels.alias}}: Running out of swap soon.",
description="{{$labels.alias}} is using 80% of its swap space for at least 10 minutes now."
}
''];
scrapeConfigs = [{
job_name = "nodes";
static_configs = map (entry: {
targets = [ entry.host ];
labels = { "name" = entry.name; };
}) config.m-0.monitoring;
}];
alertmanagerURL = [ "http://localhost:9093" ];
alertmanager = {
enable = true;
listenAddress = "0.0.0.0";
configuration = {
"global" = {
"smtp_smarthost" = "hera.m-0.eu:587";
"smtp_from" = "alertmanager@m-0.eu";
"smtp_auth_username" = "alertmanager@m-0.eu";
"smtp_auth_password" = config.m-0.private.alertmanager-mail-pw;
};
"route" = {
"group_by" = [ "alertname" "alias" ];
"group_wait" = "30s";
"group_interval" = "2m";
"repeat_interval" = "4h";
"receiver" = "team-admins";
};
"receivers" = [{
"name" = "team-admins";
"email_configs" = [{
"to" = "malte.brandy@maralorn.de";
"send_resolved" = true;
}];
}];
};
};
exporters.node.enable = true;
};
};
};
}

View file

@ -24,6 +24,7 @@ in {
firewall = {
extraCommands = ''
ip6tables -A INPUT -s ${config.m-0.prefix}::/64 -j ACCEPT
ip6tables -A FORWARD -p ipv6-icmp -j ACCEPT
ip6tables -A FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
ip6tables -A FORWARD ! -s ${config.m-0.prefix}::/64 -j DROP

View file

@ -524,8 +524,8 @@ in {
'';
};
extraConfig = ''
more_set_headers "Content-Security-Policy: frame-ancestors 'self' https://*.mathechor.de";
add_header X-Content-Type-Options nosniff;
add_header X-Content-Security "frame-ancestors 'http://*.mathechor.de'";;
add_header X-XSS-Protection "1; mode=block";
add_header X-Robots-Tag none;
add_header X-Download-Options noopen;

View file

@ -1,117 +1,34 @@
{ config, ... }:
let
inherit (config.m-0) hosts;
certPath = "/var/lib/acme/hera.m-0.eu";
in {
networking.firewall.allowedTCPPorts = [ 80 443 ];
m-0.monitoring = [
{
name = "web";
host = "web:9100";
}
{
name = "web-nginx";
host = "web:9113";
}
];
services.sniproxy = {
enable = true;
config = ''
error_log {
syslog daemon
priority warn
}
access_log {
syslog daemon
priority error
}
listen 80 {
proto http
}
listen 443 {
proto tls
}
listen 8448 {
proto tls
table matrix
fallback ${hosts.matrix}:8448
}
table {
cloud.maralorn.de ${hosts.cloud}
cloud.mathechor.de ${hosts.mathechor-cloud}
matrix.maralorn.de ${hosts.matrix}
riot.maralorn.de ${hosts.matrix}
.* ${hosts.web}
}
table matrix {
.* ${hosts.matrix}
}
'';
};
containers.web = {
bindMounts = {
"/var/www" = { hostPath = "/var/www"; };
"${certPath}" = {
hostPath = certPath;
isReadOnly = false;
};
locations = {
"/" = {
extraConfig = ''
return 200 "Hello there. I hope you are having a very nice day! If you don't know what to find here, you probably don't care about this domain.";
'';
};
autoStart = true;
privateNetwork = true;
hostBridge = "bridge";
config = { pkgs, lib, ... }: {
imports = [ ../../system ../../system/blog.nix ];
networking = {
interfaces.eth0 = {
ipv6.addresses = [{
address = config.m-0.hosts.web;
prefixLength = 112;
}];
};
inherit (config.networking) nameservers;
defaultGateway6 = {
address = config.m-0.hosts.hera-intern;
interface = "eth0";
};
firewall.allowedTCPPorts = [ 80 443 ];
};
in {
m-0.monitoring = [{
name = "hera-nginx";
host = "hera-intern:9113";
}];
services = {
nginx = {
enable = true;
virtualHosts."hera.m-0.eu" = {
enableACME = true;
forceSSL = true;
inherit locations;
};
m-0 = {
mathechor-de = {
enable = true;
password = config.m-0.private.mathechor-pw;
};
};
services = {
nginx = {
enable = true;
virtualHosts."hera.m-0.eu" = {
enableACME = true;
forceSSL = true;
locations = {
"/" = {
extraConfig = ''
return 200 "Hello there. I hope you are having a very nice day! If you don't know what to find here, you probably don't care about this domain.";
'';
};
};
};
virtualHosts."maralorn.de" = {
enableACME = true;
forceSSL = true;
locations = {
"/.well-known/matrix/server" = {
extraConfig = ''
default_type application/json;
return 200 "{\"m.server\": \"matrix.maralorn.de:443\"}";
'';
};
"/" = {
extraConfig = ''
return 200 "Hello there. I hope you are having a very nice day! If you don't know what to find here, you probably don't care about this domain.";
'';
};
};
virtualHosts."maralorn.de" = {
enableACME = true;
forceSSL = true;
locations = locations // {
"/.well-known/matrix/server" = {
extraConfig = ''
default_type application/json;
return 200 "{\"m.server\": \"matrix.maralorn.de:443\"}";
'';
};
};
};

View file

@ -1,6 +1,4 @@
{ config, pkgs, lib, ... }: {
networking.firewall.allowedTCPPorts = [ 80 443 ];
services = {
nginx = {
enable = true;

View file

@ -7,8 +7,6 @@ in {
../cachix.nix
../common
./modules/laptop.nix
./modules/mathechor.de.nix
./modules/riot.nix
./modules/loginctl-linger.nix
];
@ -52,7 +50,6 @@ in {
prometheus.exporters = {
node = {
enable = true;
openFirewall = true;
enabledCollectors = [ "systemd" "logind" ];
disabledCollectors = [ "timex" ];
};

42
system/mathechor.de.nix Normal file
View file

@ -0,0 +1,42 @@
{ config, pkgs, lib, ... }:
let inherit (config.m-0.private) mathechor-pw me;
in {
services = {
nginx = {
enable = true;
virtualHosts."mathechor.de" = {
serverAliases = [ "www.mathechor.de" ];
forceSSL = true;
enableACME = true;
locations = {
"/" = {
root = "/var/www/mathechor/public";
index = "index.html";
extraConfig =
"location ~* .(otf)$ {add_header Access-Control-Allow-Origin *;}";
};
};
};
virtualHosts."intern.mathechor.de" = {
forceSSL = true;
enableACME = true;
basicAuth.mathechor = mathechor-pw;
locations = {
"/" = {
root = "/var/www/mathechor/intern";
index = "index.html";
};
"/mathechor.ics" = {
proxyPass =
"https://cloud.mathechor.de/remote.php/dav/public-calendars/nebsfFTzQKGSSsDc?export";
extraConfig = ''
proxy_ssl_name cloud.mathechor.de;
proxy_ssl_server_name on;
'';
};
};
};
};
};
}

View file

@ -1,62 +0,0 @@
{ config, pkgs, lib, ... }:
with lib;
let
me = config.m-0.private.me;
in {
options = {
m-0.mathechor-de = {
enable = mkOption {
type = types.bool;
default = false;
};
password = mkOption { type = types.str; };
};
};
config = mkIf config.m-0.mathechor-de.enable {
networking.firewall.allowedTCPPorts = [ 80 443 ];
services = {
nginx = {
enable = true;
virtualHosts."mathechor.de" = {
serverAliases = [ "www.mathechor.de" ];
forceSSL = true;
enableACME = true;
locations = {
"/" = {
root = "/var/www/mathechor/public";
index = "index.html";
extraConfig =
"location ~* .(otf)$ {add_header Access-Control-Allow-Origin *;}";
};
};
};
virtualHosts."intern.mathechor.de" = {
forceSSL = true;
enableACME = true;
basicAuth.mathechor = config.m-0.mathechor-de.password;
locations = {
"/" = {
root = "/var/www/mathechor/intern";
index = "index.html";
};
"/mathechor.ics" = {
proxyPass =
"https://cloud.mathechor.de/remote.php/dav/public-calendars/nebsfFTzQKGSSsDc?export";
extraConfig = ''
proxy_ssl_name cloud.mathechor.de;
proxy_ssl_server_name on;
'';
};
};
};
};
};
};
}

View file

@ -1,37 +0,0 @@
{ config, pkgs, lib, ... }:
with lib;
let cfg = config.m-0.riot;
in {
options = {
m-0.riot = {
enable = mkOption {
type = types.bool;
default = false;
};
hostname = mkOption { type = types.str; };
config = mkOption { type = types.attrs; };
};
};
config = mkIf cfg.enable {
services = {
nginx = {
enable = true;
virtualHosts."${cfg.hostname}" = {
enableACME = true;
forceSSL = true;
root = (import <unstable> { }).riot-web;
locations."/config.json" = {
extraConfig = ''
default_type application/json;
return 200 '${builtins.toJSON cfg.config}';
'';
};
};
};
};
};
}

View file

@ -0,0 +1,76 @@
modules:
http_2xx:
prober: http
timeout: 5s
http:
valid_status_codes: [] # Defaults to 2xx
method: GET
tls_connect:
prober: tcp
timeout: 5s
tcp:
tls: true
tcp_connect:
prober: tcp
timeout: 5s
imap_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "OK.*STARTTLS"
- send: ". STARTTLS"
- expect: "OK"
- starttls: true
- send: ". capability"
- expect: "CAPABILITY IMAP4rev1"
smtp_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "^220 ([^ ]+) ESMTP (.+)$"
- send: "EHLO prober"
- expect: "^250-STARTTLS"
- send: "STARTTLS"
- expect: "^220"
- starttls: true
- send: "EHLO prober"
- expect: "^250-AUTH"
- send: "QUIT"
icmp_example:
prober: icmp
timeout: 5s
icmp:
preferred_ip_protocol: "ip4"
source_ip_address: "127.0.0.1"
dns_udp_example:
prober: dns
timeout: 5s
dns:
query_name: "www.prometheus.io"
query_type: "A"
valid_rcodes:
- NOERROR
validate_answer_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
fail_if_not_matches_regexp:
- "www.prometheus.io.\t300\tIN\tA\t127.0.0.1"
validate_authority_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
validate_additional_rrs:
fail_if_matches_regexp:
- ".*127.0.0.1"
dns_soa:
prober: dns
dns:
query_name: "prometheus.io"
query_type: "SOA"
dns_tcp_example:
prober: dns
dns:
transport_protocol: "tcp" # defaults to "udp"
preferred_ip_protocol: "ip4" # defaults to "ip6"
query_name: "www.prometheus.io"

View file

@ -0,0 +1,104 @@
{ config, ... }:
let
inherit (config.m-0) hosts;
inherit (config.m-0.private) monitoring-guest-pw monitoring-pw;
in {
services = {
nginx = {
enable = true;
virtualHosts."monitoring.maralorn.de" = {
enableACME = true;
basicAuth.maralorn = monitoring-pw;
basicAuth.guest = monitoring-guest-pw;
forceSSL = true;
locations."/" = { proxyPass = "http://localhost:9090"; };
};
virtualHosts."alerts.maralorn.de" = {
enableACME = true;
basicAuth.maralorn = monitoring-pw;
forceSSL = true;
locations."/" = { proxyPass = "http://localhost:9093"; };
};
};
prometheus = {
exporters = {
blackbox = {
enable = true;
configFile = ./blackbox_rules.yml;
};
};
alertmanager = {
enable = true;
listenAddress = "0.0.0.0";
configuration = {
"global" = {
"smtp_smarthost" = "hera.m-0.eu:587";
"smtp_from" = "alertmanager@m-0.eu";
"smtp_auth_username" = "alertmanager@m-0.eu";
"smtp_auth_password" = config.m-0.private.alertmanager-mail-pw;
};
"route" = {
"group_by" = [ "alertname" "alias" ];
"group_wait" = "30s";
"group_interval" = "2m";
"repeat_interval" = "4h";
"receiver" = "team-admins";
};
"receivers" = [{
"name" = "team-admins";
"email_configs" = [{
"to" = "malte.brandy@maralorn.de";
"send_resolved" = true;
}];
}];
};
};
};
prometheus2 = {
enable = true;
ruleFiles = [ ./rules.yml ];
scrapeConfigs = [
{
job_name = "blackbox";
metrics_path = "/probe";
params = { module = [ "http_2xx" ]; };
static_configs = [{
targets = [
"https://blog.maralorn.de"
"https://www.mathechor.de"
"https://cloud.mathechor.de/login"
"https://cloud.maralorn.de/login"
"https://riot.maralorn.de"
"https://wiki.vocalensemble-darmstadt.de"
"https://intern.vocalensemble-darmstadt.de"
"https://www.vocalensemble-darmstadt.de"
];
}];
relabel_configs = [
{
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
target_label = "__address__";
replacement = "localhost:9115";
} # The blackbox exporter's real hostname:port.
];
}
{
job_name = "nodes";
static_configs = map (entry: {
targets = [ entry.host ];
labels = { "name" = entry.name; };
}) config.m-0.monitoring;
}
];
alertmanagerURL = [ "localhost:9093" ];
};
};
}

View file

@ -0,0 +1,97 @@
groups:
- name: rules
rules:
- alert: BlackBoxProbeTimeout
expr: probe_success == 0
for: 5m
labels:
severity: page
annotations:
description: 'A BlackBoxProbe timed out.'
summary: 'Instance {{ $labels.instance }} does not respond as wished.'
- alert: node_down
expr: (up{name!="apollo"} == 0)
for: 5m
labels:
severity: page
annotations:
description: '{{ $labels.name }} has been down for more than 5 minutes.'
summary: '{{$labels.name}}: Node is down.'
- alert: systemd_service_failed
expr: node_systemd_unit_state{state="failed"} == 1
for: 4m
labels:
severity: page
annotations:
description: '{{$labels.name}} failed to (re)start service {{$labels.exported_name}}.'
summary: '{{$labels.name}}: Service {{$labels.exported_name}} failed.'
- alert: systemd_service_flapping
expr: changes(node_systemd_unit_state{state="failed"}[5m]) > 5 or (changes(node_systemd_unit_state{state="failed"}[1h]) > 15 unless changes(node_systemd_unit_state{state="failed"}[30m]) < 7)
labels:
severity: page
annotations:
description: '{{$labels.name}}: Service {{$labels.exported_name}} changed its state more than 5x/5min or 15x/1h'
summary: '{{$labels.name}}: Service {{$labels.exported_name}} is flapping.'
- alert: node_filesystem_full_90percent
expr: sort(node_filesystem_free{device!="ramfs"} < node_filesystem_size{device!="ramfs"} * 0.1) / 1024 ^ 3
for: 5m
labels:
severity: page
annotations:
description: '{{$labels.alias}} device {{$labels.device}} on {{$labels.mountpoint}} got less than 10% space left on its filesystem.'
summary: '{{$labels.alias}}: Filesystem is running out of space soon.'
- alert: node_filesystem_full_in_4h
expr: predict_linear(node_filesystem_free{device!="ramfs"}[1h], 4 * 3600) <= 0
for: 5m
labels:
severity: page
annotations:
description: '{{$labels.alias}} device {{$labels.device}} on {{$labels.mountpoint}}
is running out of space of in approx. 4 hours'
summary: '{{$labels.alias}}: Filesystem is running out of space in 4 hours.'
- alert: node_filedescriptors_full_in_3h
expr: predict_linear(node_filefd_allocated[1h], 3 * 3600) >= node_filefd_maximum
for: 20m
labels:
severity: page
annotations:
description: '{{$labels.alias}} is running out of available file descriptors
in approx. 3 hours'
summary: '{{$labels.alias}} is running out of available file descriptors in
3 hours.'
- alert: node_load1_90percent
expr: node_load1 / on(alias) count by(alias) (node_cpu{mode="system"}) >= 0.9
for: 1h
labels:
severity: page
annotations:
description: '{{$labels.alias}} is running with > 90% total load for at least
1h.'
summary: '{{$labels.alias}}: Running on high load.'
- alert: node_cpu_util_90percent
expr: 100 - (avg by(alias) (irate(node_cpu{mode="idle"}[5m])) * 100) >= 90
for: 1h
labels:
severity: page
annotations:
description: '{{$labels.alias}} has total CPU utilization over 90% for at least
1h.'
summary: '{{$labels.alias}}: High CPU utilization.'
- alert: node_ram_using_90percent
expr: node_memory_MemFree + node_memory_Buffers + node_memory_Cached < node_memory_MemTotal * 0.1
for: 30m
labels:
severity: page
annotations:
description: '{{$labels.alias}} is using at least 90% of its RAM for at least
30 minutes now.'
summary: '{{$labels.alias}}: Using lots of RAM.'
- alert: node_swap_using_80percent
expr: node_memory_SwapTotal - (node_memory_SwapFree + node_memory_SwapCached) > node_memory_SwapTotal * 0.8
for: 10m
labels:
severity: page
annotations:
description: '{{$labels.alias}} is using 80% of its swap space for at least
10 minutes now.'
summary: '{{$labels.alias}}: Running out of swap soon.'

40
system/riot.nix Normal file
View file

@ -0,0 +1,40 @@
{ config, pkgs, lib, ... }:
let
riot_config = {
default_hs_url = "https://matrix.maralorn.de";
default_is_url = "https://vector.im";
integrations_ui_url = "";
integrations_rest_url = "";
integrations_widgets_urls = [ ];
bug_report_endpoint_url = "https://riot.im/bugreports/submit";
welcomeUserId = "@riot-bot:matrix.org";
piwik = false;
features = {
feature_lazyloading = "enable";
feature_room_breadcrumbs = "enable";
};
roomDirectory = { servers = [ "matrix.org" "maralorn.de" ]; };
branding = {
welcomeBackgroundUrl =
"https://cloud.maralorn.de/apps/theming/image/background";
};
};
inherit (import ../lib) unstable;
in {
services.nginx = {
enable = true;
virtualHosts."riot.maralorn.de" = {
enableACME = true;
forceSSL = true;
root = unstable.riot-web;
locations."/config.json" = {
extraConfig = ''
default_type application/json;
return 200 '${builtins.toJSON riot_config}';
'';
};
};
};
}