From f30e5c9f2178aff41db1db4179a6a4a7cd442f8e Mon Sep 17 00:00:00 2001 From: Alexandre Iooss Date: Sun, 12 May 2019 18:34:51 +0200 Subject: [PATCH] [Prometheus] Update to 2.x --- roles/prometheus/tasks/main.yml | 4 +-- .../templates/prometheus/alert.rules.j2 | 13 ---------- .../templates/prometheus/alert.rules.yml.j2 | 25 +++++++++++++++++++ .../templates/prometheus/prometheus.yml.j2 | 17 ++++++------- 4 files changed, 35 insertions(+), 24 deletions(-) delete mode 100644 roles/prometheus/templates/prometheus/alert.rules.j2 create mode 100644 roles/prometheus/templates/prometheus/alert.rules.yml.j2 diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml index 0dae5d37..3f705421 100644 --- a/roles/prometheus/tasks/main.yml +++ b/roles/prometheus/tasks/main.yml @@ -15,8 +15,8 @@ - name: Configure Prometheus alert rules template: - src: prometheus/alert.rules.j2 - dest: /etc/prometheus/alert.rules + src: prometheus/alert.rules.yml.j2 + dest: /etc/prometheus/alert.rules.yml notify: Restart Prometheus # We don't need to restart Prometheus when updating nodes diff --git a/roles/prometheus/templates/prometheus/alert.rules.j2 b/roles/prometheus/templates/prometheus/alert.rules.j2 deleted file mode 100644 index 383a181e..00000000 --- a/roles/prometheus/templates/prometheus/alert.rules.j2 +++ /dev/null @@ -1,13 +0,0 @@ -# {{ ansible_managed }} - -{# As this is also Jinja2 it will conflict without a raw block #} -{% raw %} -ALERT InstanceDown - IF up == 0 - FOR 5m - LABELS { severity = "page" } - ANNOTATIONS { - summary = "Instance {{ $labels.instance }} down", - description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.", - } -{% endraw %} diff --git a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 new file mode 100644 index 00000000..9e603a4f --- /dev/null +++ b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 @@ -0,0 +1,25 @@ +# {{ ansible_managed }} +{# As this is also Jinja2 it will conflict without a raw block #} +{% raw %} +groups: +- name: example + rules: + + # Alert for any instance that is unreachable for >5 minutes. + - alert: InstanceDown + expr: up == 0 + for: 5m + labels: + severity: page + annotations: + summary: "Instance {{ $labels.instance }} down" + description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." + + # Alert for any instance that has a median request latency >1s. + - alert: APIHighRequestLatency + expr: api_http_request_latencies_second{quantile="0.5"} > 1 + for: 10m + annotations: + summary: "High request latency on {{ $labels.instance }}" + description: "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)" +{% endraw %} diff --git a/roles/prometheus/templates/prometheus/prometheus.yml.j2 b/roles/prometheus/templates/prometheus/prometheus.yml.j2 index 1844ad37..76573fa9 100644 --- a/roles/prometheus/templates/prometheus/prometheus.yml.j2 +++ b/roles/prometheus/templates/prometheus/prometheus.yml.j2 @@ -1,8 +1,8 @@ # {{ ansible_managed }} global: - scrape_interval: 15s # By default, scrape targets every 15 seconds. - evaluation_interval: 15s # By default, scrape targets every 15 seconds. + # scrape_interval is set to the global default (60s) + # evaluation_interval is set to the global default (60s) # scrape_timeout is set to the global default (10s). # Attach these labels to any time series or alerts when communicating with @@ -10,16 +10,15 @@ global: external_labels: monitor: 'example' -# Load and evaluate rules in this file every 'evaluation_interval' seconds. -rule_files: - - "alert.rules" - -# Route alerts to Prometheus Alertmanager +# Alertmanager configuration alerting: alertmanagers: - static_configs: - - targets: - - 'localhost:9093' + - targets: ['localhost:9093'] + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + - "alert.rules.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself.