diff --git a/monitoring.yml b/monitoring.yml index cc19a6bd..955294f6 100644 --- a/monitoring.yml +++ b/monitoring.yml @@ -12,8 +12,14 @@ - fy.adm.crans.org:9100 - vulcain.adm.crans.org:9100 # VM + - dhcp.adm.crans.org:9100 + - nem.adm.crans.org:9100 + - kenobi.adm.crans.org:9100 + - roundcube-srv.adm.crans.org:9100 + - kiwi.adm.crans.org:9100 + - eap.adm.crans.org:9100 + - radius.adm.crans.org:9100 - frontdaur.adm.crans.org:9100 - - gitzly.adm.crans.org:9100 - ethercalc-srv.adm.crans.org:9100 - ipv6-zayo.adm.crans.org:9100 - prometheus.adm.crans.org:9100 diff --git a/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 b/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 index d7337c47..2442108b 100644 --- a/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 +++ b/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 @@ -2,10 +2,11 @@ global: # The smarthost and SMTP sender used for mail notifications. - smtp_smarthost: 'localhost:25' - smtp_from: 'alertmanager@crans.org' + smtp_smarthost: 'smtp.adm.crans.org:25' + smtp_from: 'roots@crans.org' #smtp_auth_username: 'alertmanager' #smtp_auth_password: 'password' + smtp_require_tls: false # The directory from which notification templates are read. templates: diff --git a/roles/prometheus/templates/prometheus/alert.rules.j2 b/roles/prometheus/templates/prometheus/alert.rules.j2 new file mode 100644 index 00000000..383a181e --- /dev/null +++ b/roles/prometheus/templates/prometheus/alert.rules.j2 @@ -0,0 +1,13 @@ +# {{ ansible_managed }} + +{# As this is also Jinja2 it will conflict without a raw block #} +{% raw %} +ALERT InstanceDown + IF up == 0 + FOR 5m + LABELS { severity = "page" } + ANNOTATIONS { + summary = "Instance {{ $labels.instance }} down", + description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.", + } +{% endraw %}