Merge branch 'alertmanager' into 'master'
[monitoring] Prometheus Alertmanager See merge request nounous/ansible!8certbot_on_virtu
						commit
						84c3d66811
					
				| 
						 | 
					@ -23,6 +23,7 @@
 | 
				
			||||||
          - localhost:9090
 | 
					          - localhost:9090
 | 
				
			||||||
  roles:
 | 
					  roles:
 | 
				
			||||||
    - prometheus
 | 
					    - prometheus
 | 
				
			||||||
 | 
					    - prometheus-alertmanager
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Monitor all hosts
 | 
					# Monitor all hosts
 | 
				
			||||||
- hosts: all
 | 
					- hosts: all
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,5 @@
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					- name: Restart Prometheus Alertmanager
 | 
				
			||||||
 | 
					  service:
 | 
				
			||||||
 | 
					    name: prometheus-alertmanager
 | 
				
			||||||
 | 
					    state: restarted
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,14 @@
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					- name: Install Prometheus Alertmanager
 | 
				
			||||||
 | 
					  apt:
 | 
				
			||||||
 | 
					    update_cache: true
 | 
				
			||||||
 | 
					    name: prometheus-alertmanager
 | 
				
			||||||
 | 
					  register: apt_result
 | 
				
			||||||
 | 
					  retries: 3
 | 
				
			||||||
 | 
					  until: apt_result is succeeded
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- name: Configure Prometheus Alertmanager
 | 
				
			||||||
 | 
					  template:
 | 
				
			||||||
 | 
					    src: prometheus/alertmanager.yml.j2
 | 
				
			||||||
 | 
					    dest: /etc/prometheus/alertmanager.yml
 | 
				
			||||||
 | 
					  notify: Restart Prometheus Alertmanager
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,57 @@
 | 
				
			||||||
 | 
					# {{ ansible_managed }}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					global:
 | 
				
			||||||
 | 
					  # The smarthost and SMTP sender used for mail notifications.
 | 
				
			||||||
 | 
					  smtp_smarthost: 'localhost:25'
 | 
				
			||||||
 | 
					  smtp_from: 'alertmanager@crans.org'
 | 
				
			||||||
 | 
					  #smtp_auth_username: 'alertmanager'
 | 
				
			||||||
 | 
					  #smtp_auth_password: 'password'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# The directory from which notification templates are read.
 | 
				
			||||||
 | 
					templates: 
 | 
				
			||||||
 | 
					- '/etc/prometheus/alertmanager_templates/*.tmpl'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# The root route on which each incoming alert enters.
 | 
				
			||||||
 | 
					route:
 | 
				
			||||||
 | 
					  # The labels by which incoming alerts are grouped together. For example,
 | 
				
			||||||
 | 
					  # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
 | 
				
			||||||
 | 
					  # be batched into a single group.
 | 
				
			||||||
 | 
					  group_by: ['alertname', 'cluster', 'service']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # When a new group of alerts is created by an incoming alert, wait at
 | 
				
			||||||
 | 
					  # least 'group_wait' to send the initial notification.
 | 
				
			||||||
 | 
					  # This way ensures that you get multiple alerts for the same group that start
 | 
				
			||||||
 | 
					  # firing shortly after another are batched together on the first 
 | 
				
			||||||
 | 
					  # notification.
 | 
				
			||||||
 | 
					  group_wait: 30s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # When the first notification was sent, wait 'group_interval' to send a batch
 | 
				
			||||||
 | 
					  # of new alerts that started firing for that group.
 | 
				
			||||||
 | 
					  group_interval: 5m
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # If an alert has successfully been sent, wait 'repeat_interval' to
 | 
				
			||||||
 | 
					  # resend them.
 | 
				
			||||||
 | 
					  repeat_interval: 3h 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # A default receiver
 | 
				
			||||||
 | 
					  receiver: team-roots-mails
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Inhibition rules allow to mute a set of alerts given that another alert is
 | 
				
			||||||
 | 
					# firing.
 | 
				
			||||||
 | 
					# We use this to mute any warning-level notifications if the same alert is 
 | 
				
			||||||
 | 
					# already critical.
 | 
				
			||||||
 | 
					inhibit_rules:
 | 
				
			||||||
 | 
					- source_match:
 | 
				
			||||||
 | 
					    severity: 'critical'
 | 
				
			||||||
 | 
					  target_match:
 | 
				
			||||||
 | 
					    severity: 'warning'
 | 
				
			||||||
 | 
					  # Apply inhibition if the alertname is the same.
 | 
				
			||||||
 | 
					  equal: ['alertname', 'cluster', 'service']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					receivers:
 | 
				
			||||||
 | 
					- name: 'team-roots-mails'
 | 
				
			||||||
 | 
					  email_configs:
 | 
				
			||||||
 | 
					  - to: 'roots@crans.org'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,6 +13,12 @@
 | 
				
			||||||
    dest: /etc/prometheus/prometheus.yml
 | 
					    dest: /etc/prometheus/prometheus.yml
 | 
				
			||||||
  notify: Restart Prometheus
 | 
					  notify: Restart Prometheus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- name: Configure Prometheus alert rules
 | 
				
			||||||
 | 
					  template:
 | 
				
			||||||
 | 
					    src: prometheus/alert.rules.j2
 | 
				
			||||||
 | 
					    dest: /etc/prometheus/alert.rules
 | 
				
			||||||
 | 
					  notify: Restart Prometheus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# We don't need to restart Prometheus when updating nodes
 | 
					# We don't need to restart Prometheus when updating nodes
 | 
				
			||||||
- name: Configure Prometheus nodes
 | 
					- name: Configure Prometheus nodes
 | 
				
			||||||
  copy:
 | 
					  copy:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,8 +12,14 @@ global:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
 | 
					# Load and evaluate rules in this file every 'evaluation_interval' seconds.
 | 
				
			||||||
rule_files:
 | 
					rule_files:
 | 
				
			||||||
  # - "first.rules"
 | 
					  - "alert.rules"
 | 
				
			||||||
  # - "second.rules"
 | 
					
 | 
				
			||||||
 | 
					# Route alerts to Prometheus Alertmanager
 | 
				
			||||||
 | 
					alerting:
 | 
				
			||||||
 | 
					  alertmanagers:
 | 
				
			||||||
 | 
					  - static_configs:
 | 
				
			||||||
 | 
					    - targets:
 | 
				
			||||||
 | 
					      - 'localhost:9093'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# A scrape configuration containing exactly one endpoint to scrape:
 | 
					# A scrape configuration containing exactly one endpoint to scrape:
 | 
				
			||||||
# Here it's Prometheus itself.
 | 
					# Here it's Prometheus itself.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue