Merge branch 'alertmanager' into 'master'

[monitoring] Prometheus Alertmanager See merge request nounous/ansible!8
2019-04-25 16:33:05 +00:00 · 2019-04-25 16:33:05 +00:00 · 84c3d66811
parent 8697aa0306 de63fbb99e
commit 84c3d66811
6 changed files with 91 additions and 2 deletions
--- a/monitoring.yml
+++ b/monitoring.yml
@ -23,6 +23,7 @@
          - localhost:9090
  roles:
    - prometheus
    - prometheus-alertmanager
 # Monitor all hosts
 - hosts: all
--- a/roles/prometheus-alertmanager/handlers/main.yml
+++ b/roles/prometheus-alertmanager/handlers/main.yml
@ -0,0 +1,5 @@
 ---
 - name: Restart Prometheus Alertmanager
  service:
    name: prometheus-alertmanager
    state: restarted
--- a/roles/prometheus-alertmanager/tasks/main.yml
+++ b/roles/prometheus-alertmanager/tasks/main.yml
@ -0,0 +1,14 @@
 ---
 - name: Install Prometheus Alertmanager
  apt:
    update_cache: true
    name: prometheus-alertmanager
  register: apt_result
  retries: 3
  until: apt_result is succeeded
 - name: Configure Prometheus Alertmanager
  template:
    src: prometheus/alertmanager.yml.j2
    dest: /etc/prometheus/alertmanager.yml
  notify: Restart Prometheus Alertmanager
--- a/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2
+++ b/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2
@ -0,0 +1,57 @@
 # {{ ansible_managed }}
 global:
  # The smarthost and SMTP sender used for mail notifications.
  smtp_smarthost: 'localhost:25'
  smtp_from: 'alertmanager@crans.org'
  #smtp_auth_username: 'alertmanager'
  #smtp_auth_password: 'password'
 # The directory from which notification templates are read.
 templates: 
 - '/etc/prometheus/alertmanager_templates/*.tmpl'
 # The root route on which each incoming alert enters.
 route:
  # The labels by which incoming alerts are grouped together. For example,
  # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
  # be batched into a single group.
  group_by: ['alertname', 'cluster', 'service']
  # When a new group of alerts is created by an incoming alert, wait at
  # least 'group_wait' to send the initial notification.
  # This way ensures that you get multiple alerts for the same group that start
  # firing shortly after another are batched together on the first 
  # notification.
  group_wait: 30s
  # When the first notification was sent, wait 'group_interval' to send a batch
  # of new alerts that started firing for that group.
  group_interval: 5m
  # If an alert has successfully been sent, wait 'repeat_interval' to
  # resend them.
  repeat_interval: 3h 
  # A default receiver
  receiver: team-roots-mails
 # Inhibition rules allow to mute a set of alerts given that another alert is
 # firing.
 # We use this to mute any warning-level notifications if the same alert is 
 # already critical.
 inhibit_rules:
 - source_match:
    severity: 'critical'
  target_match:
    severity: 'warning'
  # Apply inhibition if the alertname is the same.
  equal: ['alertname', 'cluster', 'service']
 receivers:
 - name: 'team-roots-mails'
  email_configs:
  - to: 'roots@crans.org'
--- a/roles/prometheus/tasks/main.yml
+++ b/roles/prometheus/tasks/main.yml
@ -13,6 +13,12 @@
    dest: /etc/prometheus/prometheus.yml
  notify: Restart Prometheus
 - name: Configure Prometheus alert rules
  template:
    src: prometheus/alert.rules.j2
    dest: /etc/prometheus/alert.rules
  notify: Restart Prometheus
 # We don't need to restart Prometheus when updating nodes
 - name: Configure Prometheus nodes
  copy:
--- a/roles/prometheus/templates/prometheus/prometheus.yml.j2
+++ b/roles/prometheus/templates/prometheus/prometheus.yml.j2
@ -12,8 +12,14 @@ global:
 # Load and evaluate rules in this file every 'evaluation_interval' seconds.
 rule_files:
-  # - "first.rules"
+  - "alert.rules"
-  # - "second.rules"
+
 # Route alerts to Prometheus Alertmanager
 alerting:
  alertmanagers:
  - static_configs:
    - targets:
      - 'localhost:9093'
 # A scrape configuration containing exactly one endpoint to scrape:
 # Here it's Prometheus itself.