[Prometheus] Update to 2.x

certbot_on_virtu
Alexandre Iooss 2019-05-12 18:34:51 +02:00
parent 528e443d67
commit f30e5c9f21
No known key found for this signature in database
GPG Key ID: 6C79278F3FCDCC02
4 changed files with 35 additions and 24 deletions

View File

@ -15,8 +15,8 @@
- name: Configure Prometheus alert rules
template:
src: prometheus/alert.rules.j2
dest: /etc/prometheus/alert.rules
src: prometheus/alert.rules.yml.j2
dest: /etc/prometheus/alert.rules.yml
notify: Restart Prometheus
# We don't need to restart Prometheus when updating nodes

View File

@ -1,13 +0,0 @@
# {{ ansible_managed }}
{# As this is also Jinja2 it will conflict without a raw block #}
{% raw %}
ALERT InstanceDown
IF up == 0
FOR 5m
LABELS { severity = "page" }
ANNOTATIONS {
summary = "Instance {{ $labels.instance }} down",
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.",
}
{% endraw %}

View File

@ -0,0 +1,25 @@
# {{ ansible_managed }}
{# As this is also Jinja2 it will conflict without a raw block #}
{% raw %}
groups:
- name: example
rules:
# Alert for any instance that is unreachable for >5 minutes.
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
# Alert for any instance that has a median request latency >1s.
- alert: APIHighRequestLatency
expr: api_http_request_latencies_second{quantile="0.5"} > 1
for: 10m
annotations:
summary: "High request latency on {{ $labels.instance }}"
description: "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)"
{% endraw %}

View File

@ -1,8 +1,8 @@
# {{ ansible_managed }}
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
# scrape_interval is set to the global default (60s)
# evaluation_interval is set to the global default (60s)
# scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with
@ -10,16 +10,15 @@ global:
external_labels:
monitor: 'example'
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
- "alert.rules"
# Route alerts to Prometheus Alertmanager
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 'localhost:9093'
- targets: ['localhost:9093']
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "alert.rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.