diff --git a/monitoring.yml b/monitoring.yml index f753ad7c..4400869d 100644 --- a/monitoring.yml +++ b/monitoring.yml @@ -1,9 +1,23 @@ --- -# Playbook for monitoring +# Deploy Prometheus - hosts: prometheus.adm.crans.org + vars: + # Prometheus targets.json + prometheus_targets: + - labels: + job: node + targets: + - localhost:9100 + - vulcain.adm.crans.org:9100 + - odlyd.adm.crans.org:9100 + - labels: + job: prometheus + targets: + - localhost:9090 roles: - prometheus +# Monitor all hosts - hosts: all roles: - prometheus-node diff --git a/roles/prometheus/handlers/main.yml b/roles/prometheus/handlers/main.yml new file mode 100644 index 00000000..4214def5 --- /dev/null +++ b/roles/prometheus/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: Restart Prometheus + service: + name: prometheus + state: restarted diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml index 8ae973e8..f1702b94 100644 --- a/roles/prometheus/tasks/main.yml +++ b/roles/prometheus/tasks/main.yml @@ -1,9 +1,20 @@ --- - -- name: Instal Prometheus +- name: Install Prometheus apt: update_cache: true name: prometheus register: apt_result retries: 3 until: apt_result is succeeded + +- name: Configure Prometheus + template: + src: prometheus/prometheus.yml.j2 + dest: /etc/prometheus/prometheus.yml + notify: Restart Prometheus + +# We don't need to restart Prometheus when updating nodes +- name: Configure Prometheus nodes + copy: + content: "{{ prometheus_targets | to_nice_json }}" + dest: /etc/prometheus/targets.json diff --git a/roles/prometheus/templates/prometheus/prometheus.yml.j2 b/roles/prometheus/templates/prometheus/prometheus.yml.j2 new file mode 100644 index 00000000..e96dedea --- /dev/null +++ b/roles/prometheus/templates/prometheus/prometheus.yml.j2 @@ -0,0 +1,27 @@ +# {{ ansible_managed }} + +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + evaluation_interval: 15s # By default, scrape targets every 15 seconds. + # scrape_timeout is set to the global default (10s). + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'example' + +# Load and evaluate rules in this file every 'evaluation_interval' seconds. +rule_files: + # - "first.rules" + # - "second.rules" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + - job_name: dummy + # This reload dynamically the list of targets + # You don't need to restart Prometheus when updating targets.json + file_sd_configs: + - files: + - '/etc/prometheus/targets.json' +