[monitoring] PEPCRANS + monitoring of cachan infra
parent
3d528a1891
commit
f7347e41d2
|
@ -0,0 +1,3 @@
|
|||
---
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
|
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
glob_grafana:
|
||||
root_url: https://grafana.crans.org
|
||||
icon: crans_icon_white.svg
|
||||
ldap_base: "{{ glob_ldap.base }}"
|
||||
ldap_master_ipv4: "{{ glob_ldap.servers[0] }}"
|
||||
ldap_user_tree: "ou=passwd,{{ glob_ldap.base }}"
|
|
@ -30,3 +30,6 @@ glob_nginx:
|
|||
- "172.16.0.0/16"
|
||||
- "fd00:0:0:10::/64"
|
||||
deploy_robots_file: false
|
||||
|
||||
glob_prometheus_nginx_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
---
|
||||
glob_prometheus: {}
|
||||
|
||||
glob_snmp_exporter:
|
||||
procurve_password: "{{ vault.snmp_procurve_password }}"
|
||||
unifi_password: "{{ vault.snmp_unifi_password }}"
|
||||
|
||||
glob_ninjabot:
|
||||
config:
|
||||
nick: Prometheus
|
||||
server: irc.adm.crans.org
|
||||
port: 6667
|
||||
channel: "#monitoring"
|
|
@ -18,3 +18,6 @@ loc_borg:
|
|||
remote:
|
||||
- borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
|
||||
ssh_options: ""
|
||||
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
---
|
||||
interfaces:
|
||||
adm: ens18
|
||||
|
||||
loc_home_nounou:
|
||||
ip: 172.17.10.9
|
||||
mountpoint: /rpool/home
|
||||
|
||||
loc_ldap:
|
||||
servers:
|
||||
- 172.17.10.9
|
||||
base: 'dc=crans,dc=org'
|
||||
|
||||
loc_ntp_client:
|
||||
servers:
|
||||
- terenez.cachan-adm.crans.org
|
||||
|
||||
debian_mirror: http://172.17.10.202/debian
|
||||
|
||||
loc_mirror:
|
||||
name: mirror.cachan-adm.crans.org
|
||||
ip: "{{ query('ldap','ip','terenez','cachan-adm') | ipv4 | first }}"
|
||||
|
||||
loc_borg:
|
||||
remote:
|
||||
- borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
|
||||
ssh_options: ""
|
||||
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
||||
glob_snmp_exporter:
|
||||
procurve_password: "{{ vault.snmp_procurve_password }}"
|
||||
unifi_password: "{{ vault.snmp_unifi_password }}"
|
||||
|
||||
loc_ninjabot:
|
||||
config:
|
||||
nick: fyre
|
||||
server: irc.adm.crans.org
|
||||
port: 6667
|
||||
channel: "#monitoring"
|
||||
|
||||
loc_prometheus:
|
||||
node:
|
||||
file: targets_node.json
|
||||
targets: "{{ groups['server'] | select('match', '^.*\\.cachan-adm\\.crans\\.org$') | list | sort }}"
|
||||
config:
|
||||
- job_name: servers
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_node.json'
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- source_labels: [__param_target]
|
||||
target_label: __address__
|
||||
replacement: '$1:9100'
|
||||
|
||||
ups_snmp:
|
||||
file: targets_ups_snmp.json
|
||||
targets:
|
||||
- pulsar.cachan-adm.crans.org # 0B
|
||||
- quasar.cachan-adm.crans.org # 4J
|
||||
config:
|
||||
- job_name: ups_snmp
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_ups_snmp.json'
|
||||
metrics_path: /snmp
|
||||
params:
|
||||
module: [eatonups]
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 127.0.0.1:9116
|
||||
|
||||
unifi_snmp:
|
||||
file: targets_unifi_snmp.json
|
||||
targets: "{{ groups['crans_unifi'] | list | sort }}"
|
||||
config:
|
||||
- job_name: unifi_snmp
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_unifi_snmp.json'
|
||||
metrics_path: /snmp
|
||||
params:
|
||||
module: [ubiquiti_unifi]
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 127.0.0.1:9116
|
||||
|
||||
nginx:
|
||||
file: targets_nginx.json
|
||||
targets:
|
||||
- rodauh.cachan-adm.crans.org
|
||||
- terenez.cachan-adm.crans.org
|
||||
config:
|
||||
- job_name: nginx
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_nginx.json'
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: instance
|
||||
- source_labels: [instance]
|
||||
target_label: __address__
|
||||
replacement: '$1:9117'
|
|
@ -25,3 +25,6 @@ loc_borg:
|
|||
to_exclude:
|
||||
- /var/lib/lxcfs
|
||||
ssh_options: ""
|
||||
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
|
|
@ -1,4 +1,113 @@
|
|||
interfaces:
|
||||
adm: eth0
|
||||
srv_nat: eth1
|
||||
infra: eth2
|
||||
|
||||
loc_prometheus:
|
||||
node:
|
||||
file: targets_node.json
|
||||
targets: "{{ groups['server'] | select('match', '^.*\\.adm\\.crans\\.org$') | list | sort }}"
|
||||
config:
|
||||
- job_name: servers
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_node.json'
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- source_labels: [__param_target]
|
||||
target_label: __address__
|
||||
replacement: '$1:9100'
|
||||
|
||||
nginx:
|
||||
file: targets_nginx.json
|
||||
targets:
|
||||
- hodaur.adm.crans.org
|
||||
- charybde.adm.crans.org
|
||||
config:
|
||||
- job_name: nginx
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_nginx.json'
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: instance
|
||||
- source_labels: [instance]
|
||||
target_label: __address__
|
||||
replacement: '$1:9117'
|
||||
|
||||
blackbox:
|
||||
file: targets_blackbox.json
|
||||
targets:
|
||||
- https://crans.org/
|
||||
- https://www.crans.org/
|
||||
- https://webirc.crans.org/
|
||||
- https://jitsi.crans.org/
|
||||
- https://ftps.crans.org/
|
||||
- http://ftp.crans.org/
|
||||
- https://grafana.crans.org/
|
||||
- https://roundcube.crans.org/
|
||||
- https://zero.crans.org/
|
||||
- https://wiki.crans.org/PageAccueil
|
||||
- https://framadate.crans.org/
|
||||
- https://pad.crans.org/
|
||||
- https://lists.crans.org/
|
||||
- https://cas.crans.org/
|
||||
- https://ethercalc.crans.org/
|
||||
- https://phabricator.crans.org/
|
||||
- https://webmail.crans.org/horde/login.php
|
||||
- https://gitlab.crans.org/
|
||||
- https://perso.crans.org/crans/
|
||||
- https://install-party.crans.org/
|
||||
- https://intranet.crans.org/
|
||||
- https://owncloud.crans.org/
|
||||
config:
|
||||
- job_name: blackbox
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_blackbox.json'
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx] # Look for a HTTP 200 response.
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 127.0.0.1:9115
|
||||
|
||||
mtail:
|
||||
file: targets_mtail.json
|
||||
targets:
|
||||
- tealc.adm.crans.org
|
||||
config:
|
||||
- job_name: mtail
|
||||
static_configs:
|
||||
- targets: ["tealc.adm.crans.org"]
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: instance
|
||||
- source_labels: [instance]
|
||||
target_label: __address__
|
||||
replacement: '$1:3903'
|
||||
|
||||
|
||||
# apache:
|
||||
# targets:
|
||||
# config:
|
||||
# - job_name: apache
|
||||
# file_sd_configs:
|
||||
# - files:
|
||||
# - '/etc/prometheus/targets_apache.json'
|
||||
# relabel_configs:
|
||||
# - source_labels: [__address__]
|
||||
# target_label: instance
|
||||
# - source_labels: [instance]
|
||||
# target_label: __address__
|
||||
# replacement: '$1:9117'
|
||||
|
||||
# bird_targets:
|
||||
# - routeur-sam.adm.crans.org
|
||||
|
||||
|
|
|
@ -25,3 +25,6 @@ loc_borg:
|
|||
remote:
|
||||
- borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
|
||||
ssh_options: ""
|
||||
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
|
|
@ -18,3 +18,6 @@ loc_borg:
|
|||
remote:
|
||||
- borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
|
||||
ssh_options: ""
|
||||
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
|
|
@ -14,6 +14,9 @@ glob_ntp_client:
|
|||
|
||||
debian_mirror: http://172.17.10.202/debian
|
||||
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
||||
loc_borg:
|
||||
remote:
|
||||
- borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
|
||||
|
|
|
@ -37,3 +37,9 @@ loc_borg:
|
|||
remote:
|
||||
- borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
|
||||
ssh_options: ""
|
||||
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
||||
glob_prometheus_nginx_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
|
|
@ -22,3 +22,6 @@ loc_borg:
|
|||
remote:
|
||||
- borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
|
||||
ssh_options: ""
|
||||
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
|
|
@ -18,3 +18,9 @@ loc_borg:
|
|||
remote:
|
||||
- borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
|
||||
ssh_options: ""
|
||||
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
||||
glob_prometheus_nginx_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
|
|
@ -23,3 +23,6 @@ loc_borg:
|
|||
remote:
|
||||
- borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
|
||||
ssh_options: ""
|
||||
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
|
|
@ -27,3 +27,6 @@ loc_borg:
|
|||
ssh_options: ""
|
||||
to_exclude:
|
||||
- /var/lib/backuppc
|
||||
|
||||
glob_prometheus_node_exporter:
|
||||
listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
|
||||
|
|
14
hosts
14
hosts
|
@ -18,6 +18,9 @@ tealc.adm.crans.org
|
|||
tealc.adm.crans.org
|
||||
gulp.cachan-adm.crans.org
|
||||
|
||||
[blackbox]
|
||||
monitoring.adm.crans.org
|
||||
|
||||
[bdd:children]
|
||||
virtu
|
||||
|
||||
|
@ -81,6 +84,9 @@ neree.adm.crans.org
|
|||
[gitlab]
|
||||
gitzly.adm.crans.org
|
||||
|
||||
[grafana]
|
||||
monitoring.adm.crans.org
|
||||
|
||||
[horde]
|
||||
horde.adm.crans.org
|
||||
|
||||
|
@ -99,17 +105,16 @@ linx.adm.crans.org
|
|||
[mailman]
|
||||
mailman.adm.crans.org
|
||||
|
||||
[monitoring]
|
||||
[prometheus]
|
||||
monitoring.adm.crans.org
|
||||
|
||||
[nginx]
|
||||
charybde.adm.crans.org
|
||||
fyre.cachan-adm.crans.org
|
||||
|
||||
[nginx:children]
|
||||
django_cas
|
||||
galene
|
||||
jitsi
|
||||
mailman
|
||||
ntp_server
|
||||
re2o_front
|
||||
reverseproxy
|
||||
roundcube
|
||||
|
@ -212,6 +217,7 @@ cas.adm.crans.org
|
|||
codichotomie.adm.crans.org
|
||||
ethercalc.adm.crans.org
|
||||
fluxx.adm.crans.org
|
||||
fyre.cachan-adm.crans.org
|
||||
gitlab-ci.adm.crans.org
|
||||
gitzly.adm.crans.org
|
||||
hodaur.adm.crans.org
|
||||
|
|
|
@ -1,81 +1,43 @@
|
|||
#!/usr/bin/env ansible-playbook
|
||||
---
|
||||
# Deploy Prometheus and Grafana on monitoring server
|
||||
- hosts: monitoring
|
||||
|
||||
# Deploy Prometheus on monitoring server
|
||||
- hosts: prometheus
|
||||
vars:
|
||||
# Prometheus targets.json
|
||||
prometheus:
|
||||
node_targets: "{{ groups['server'] | list | sort }}"
|
||||
ups_snmp_targets:
|
||||
- pulsar.adm.crans.org # 0B
|
||||
- quasar.adm.crans.org # 4J
|
||||
procurve_snmp_targets:
|
||||
- batg-9.infra.crans.org
|
||||
unifi_snmp_targets: "{{ groups['crans_unifi'] | list | sort }}"
|
||||
blackbox_targets:
|
||||
- https://crans.org/
|
||||
- https://www.crans.org/
|
||||
- https://webirc.crans.org/
|
||||
- https://jitsi.crans.org/
|
||||
- https://ftps.crans.org/
|
||||
- http://ftp.crans.org/
|
||||
- https://grafana.crans.org/
|
||||
- https://roundcube.crans.org/
|
||||
- https://zero.crans.org/
|
||||
- https://wiki.crans.org/PageAccueil
|
||||
- https://framadate.crans.org/
|
||||
- https://pad.crans.org/
|
||||
- https://lists.crans.org/
|
||||
- https://cas.crans.org/
|
||||
- https://ethercalc.crans.org/
|
||||
- https://phabricator.crans.org/
|
||||
- https://webmail.crans.org/horde/login.php
|
||||
- https://gitlab.crans.org/
|
||||
- https://perso.crans.org/crans/
|
||||
- https://install-party.crans.org/
|
||||
- https://intranet.crans.org/
|
||||
- https://owncloud.crans.org/
|
||||
nginx_targets:
|
||||
- hodaur.adm.crans.org
|
||||
- charybde.adm.crans.org
|
||||
apache_targets: [] # [zamok.adm.crans.org]
|
||||
bird_targets:
|
||||
- routeur-sam.adm.crans.org
|
||||
|
||||
snmp_procurve_password: "{{ vault.snmp_procurve_password }}"
|
||||
snmp_unifi_password: "{{ vault.snmp_unifi_password }}"
|
||||
|
||||
grafana:
|
||||
root_url: https://grafana.crans.org
|
||||
icon: crans_icon_white.svg
|
||||
ldap_base: "{{ glob_ldap.base }}"
|
||||
ldap_master_ipv4: "{{ glob_ldap.servers[0] }}"
|
||||
ldap_user_tree: "ou=passwd,{{ glob_ldap.base }}"
|
||||
prometheus: "{{ glob_prometheus | default({}) | combine(loc_prometheus | default({})) }}"
|
||||
alertmanager: "{{ glob_alertmanager | default({}) | combine(loc_alertmanager | default({})) }}"
|
||||
snmp_exporter: "{{ glob_snmp_exporter | default({}) | combine(loc_snmp_exporter | default({})) }}"
|
||||
ninjabot: "{{ glob_ninjabot | default({}) | combine(loc_ninjabot | default({})) }}"
|
||||
roles:
|
||||
- prometheus
|
||||
- prometheus-alertmanager
|
||||
- prometheus-snmp-exporter
|
||||
- prometheus-blackbox-exporter
|
||||
- ninjabot
|
||||
- grafana
|
||||
|
||||
# # Deploy Grafana on monitoring server
|
||||
# - hosts: grafana
|
||||
# vars:
|
||||
# grafana: "{{ glob_grafana | default({}) | combine(loc_grafana | default({})) }}"
|
||||
# roles:
|
||||
# - grafana
|
||||
|
||||
- hosts: blackbox
|
||||
roles:
|
||||
- prometheus-blackbox-exporter
|
||||
|
||||
# Monitor all hosts
|
||||
- hosts: server
|
||||
vars:
|
||||
adm_ipv4: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
|
||||
roles: ["prometheus-node-exporter"]
|
||||
prometheus_node_exporter: "{{ glob_prometheus_node_exporter | default({}) | combine(loc_prometheus_node_exporter | default({})) }}"
|
||||
roles:
|
||||
- prometheus-node-exporter
|
||||
|
||||
# Export nginx metrics
|
||||
- hosts: nginx
|
||||
vars:
|
||||
adm_ipv4: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
|
||||
roles: ["prometheus-nginx-exporter"]
|
||||
|
||||
# Export apache metrics
|
||||
#- hosts: zamok.adm.crans.org
|
||||
# vars:
|
||||
# adm_ipv4: "{{ ansible_all_ipv4_addresses | ipaddr(adm_subnet) | first }}"
|
||||
# roles: ["prometheus-apache-exporter"]
|
||||
prometheus_nginx_exporter: "{{ glob_prometheus_nginx_exporter | default({}) | combine(loc_prometheus_nginx_exporter | default({})) }}"
|
||||
roles:
|
||||
- prometheus-nginx-exporter
|
||||
|
||||
# Monitor mailq with a special text exporter
|
||||
#- hosts: redisdead.adm.crans.org
|
||||
|
@ -85,7 +47,9 @@
|
|||
- hosts: tealc.adm.crans.org
|
||||
vars:
|
||||
mirror: '{{ glob_mirror | default({}) | combine(loc_mirror | default({})) }}'
|
||||
roles: ["mtail"]
|
||||
roles:
|
||||
- mtail
|
||||
|
||||
- hosts: sputnik.adm.crans.org
|
||||
roles: ["statping"]
|
||||
roles:
|
||||
- statping
|
||||
|
|
|
@ -17,6 +17,11 @@
|
|||
dest: /var/local/ninjabot
|
||||
version: master
|
||||
|
||||
- name: Deploy NinjaBot configuration
|
||||
template:
|
||||
src: ninjabot/ninjabot.json.j2
|
||||
dest: /var/local/ninjabot/ninjabot.json
|
||||
|
||||
- name: Deploy NinjaBot systemd unit
|
||||
template:
|
||||
src: systemd/system/ninjabot.service.j2
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
{{ ninjabot.config | to_nice_json(indent=2) }}
|
|
@ -8,7 +8,7 @@ Type=simple
|
|||
WorkingDirectory=/var/local/ninjabot
|
||||
User=nobody
|
||||
Group=nogroup
|
||||
ExecStart=/usr/bin/python3 /var/local/ninjabot/main.py
|
||||
ExecStart=/usr/bin/python3 /var/local/ninjabot/ninjabot.py
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
path: /etc/default/prometheus-nginx-exporter
|
||||
regexp: '^ARGS='
|
||||
line: |
|
||||
ARGS="-web.listen-address={{ adm_ipv4 }}:9117 -nginx.scrape-uri=http://[::1]:6424/stub_status"
|
||||
ARGS="-web.listen-address={{ prometheus_nginx_exporter.listen_addr }}:9117 -nginx.scrape-uri=http://[::1]:6424/stub_status"
|
||||
notify:
|
||||
- Restart nginx
|
||||
- Restart prometheus-nginx-exporter
|
||||
|
|
|
@ -7,21 +7,6 @@
|
|||
register: apt_result
|
||||
retries: 3
|
||||
until: apt_result is succeeded
|
||||
when:
|
||||
- ansible_lsb.codename != 'stretch'
|
||||
|
||||
# Prometheus 2 node is in stretch-backports
|
||||
- name: Install Prometheus node-exporter (stretch-backports)
|
||||
apt:
|
||||
update_cache: true
|
||||
name: prometheus-node-exporter
|
||||
install_recommends: false
|
||||
default_release: stretch-backports
|
||||
register: apt_result
|
||||
retries: 3
|
||||
until: apt_result is succeeded
|
||||
when:
|
||||
- ansible_lsb.codename == 'stretch'
|
||||
|
||||
- name: Install Prometheus node-exporter-collectors (bullseye)
|
||||
apt:
|
||||
|
@ -45,7 +30,7 @@
|
|||
path: /etc/default/prometheus-node-exporter
|
||||
regexp: '^ARGS='
|
||||
line: |
|
||||
ARGS="--web.listen-address={{ adm_ipv4 }}:9100"
|
||||
ARGS="--web.listen-address={{ prometheus_node_exporter.listen_addr }}:9100"
|
||||
tags: restart-node-exporter
|
||||
|
||||
# Install new APT textfile collector, it might be upstreamed one day
|
||||
|
@ -57,15 +42,4 @@
|
|||
owner: root
|
||||
group: root
|
||||
mode: 0755
|
||||
when: ansible_lsb.id == 'Debian' and ansible_distribution_release != "bullseye"
|
||||
|
||||
# Install new APT textfile collector, it might be upstreamed one day
|
||||
# https://github.com/prometheus-community/node-exporter-textfile-collector-scripts/pull/35
|
||||
- name: Patch APT textfile collector
|
||||
copy:
|
||||
src: apt.sh
|
||||
dest: /usr/share/prometheus-node-exporter-collectors/apt.sh
|
||||
owner: root
|
||||
group: root
|
||||
mode: 0755
|
||||
when: ansible_lsb.id == 'Ubuntu' or ansible_distribution_release == "bullseye"
|
||||
when: ansible_distribution_release != "bullseye"
|
||||
|
|
|
@ -113,14 +113,14 @@ procurve_switch:
|
|||
version: 3
|
||||
auth:
|
||||
# To create SNMPv3 user on HP procurve, execute:
|
||||
# snmpv3 user snmp_prometheus auth sha {{ snmp_procurve_password }} priv aes {{ snmp_procurve_password }}
|
||||
# snmpv3 user snmp_prometheus auth sha {{ snmp_exporter.procurve_password }} priv aes {{ snmp_exporter.procurve_password }}
|
||||
# snmpv3 group managerpriv user snmp_prometheus sec-model ver3
|
||||
security_level: authPriv
|
||||
username: snmp_prometheus
|
||||
password: {{ snmp_procurve_password }}
|
||||
password: {{ snmp_exporter.procurve_password }}
|
||||
auth_protocol: SHA
|
||||
priv_protocol: AES
|
||||
priv_password: {{ snmp_procurve_password }}
|
||||
priv_password: {{ snmp_exporter.procurve_password }}
|
||||
|
||||
ubiquiti_unifi:
|
||||
walk:
|
||||
|
@ -475,7 +475,7 @@ ubiquiti_unifi:
|
|||
auth:
|
||||
security_level: authPriv
|
||||
username: snmp_prometheus
|
||||
password: {{ snmp_unifi_password }}
|
||||
password: {{ snmp_exporter.unifi_password }}
|
||||
auth_protocol: SHA
|
||||
priv_protocol: AES
|
||||
priv_password: {{ snmp_unifi_password }}
|
||||
priv_password: {{ snmp_exporter.unifi_password }}
|
||||
|
|
|
@ -16,35 +16,18 @@
|
|||
|
||||
- name: Configure Prometheus alert rules
|
||||
template:
|
||||
src: "prometheus/{{ item }}.j2"
|
||||
dest: "/etc/prometheus/{{ item }}"
|
||||
src: prometheus/alert.rules.yml.j2
|
||||
dest: /etc/prometheus/alert.rules.yml
|
||||
mode: 0644
|
||||
notify: Restart Prometheus
|
||||
loop:
|
||||
- alert.rules.yml
|
||||
- django.rules.yml
|
||||
|
||||
# We don't need to restart Prometheus when updating nodes
|
||||
- name: Configure Prometheus targets
|
||||
copy:
|
||||
content: "{{ [{'targets': item.targets}] | to_nice_json }}\n"
|
||||
dest: "/etc/prometheus/{{ item.file }}.json"
|
||||
content: "{{ [{'targets': item.value.targets}] | to_nice_json }}\n"
|
||||
dest: "/etc/prometheus/{{ item.value.file }}"
|
||||
mode: 0644
|
||||
loop:
|
||||
- file: targets
|
||||
targets: "{{ prometheus.node_targets }}"
|
||||
- file: targets_ups_snmp
|
||||
targets: "{{ prometheus.ups_snmp_targets }}"
|
||||
- file: targets_procurve_snmp
|
||||
targets: "{{ prometheus.procurve_snmp_targets }}"
|
||||
- file: targets_unifi_snmp
|
||||
targets: "{{ prometheus.unifi_snmp_targets }}"
|
||||
- file: targets_nginx
|
||||
targets: "{{ prometheus.nginx_targets }}"
|
||||
- file: targets_apache
|
||||
targets: "{{ prometheus.apache_targets }}"
|
||||
- file: targets_blackbox
|
||||
targets: "{{ prometheus.blackbox_targets }}"
|
||||
loop: "{{ prometheus | dict2items }}"
|
||||
|
||||
- name: Activate prometheus service
|
||||
systemd:
|
||||
|
|
|
@ -144,7 +144,7 @@ groups:
|
|||
description: "https://grafana.crans.org/d/qtbg59mZz/alimentation"
|
||||
|
||||
- alert: UpsWrongOutputVoltage
|
||||
expr: (upsOutputVoltage < 225) or (upsOutputVoltage > 235)
|
||||
expr: (upsOutputVoltage < 215) or (upsOutputVoltage > 245)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -161,29 +161,27 @@ groups:
|
|||
summary: "{{ $value }} paquet(s) APT sont inutile(s) sur {{ $labels.instance }}."
|
||||
|
||||
- alert: MailqNotEmpty
|
||||
expr: postfix_mailq_length > 5
|
||||
expr: postfix_mailq_length > 25
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "{{ $value }} mails dans la mailq sur {{ $labels.instance }}."
|
||||
|
||||
# NTP (need NTP plugin in node)
|
||||
# - alert: ntp_drifting
|
||||
# expr: node_ntp_drift_seconds > 0.05
|
||||
# for: 3m
|
||||
# labels:
|
||||
# severity: critical
|
||||
# annotations:
|
||||
# summary: "Décalage NTP trop élevé sur {{ $labels.instance }}"
|
||||
# description: "Le décalage NTP est trop élevé ({{ $value }} > 0.05)"
|
||||
- alert: NoRadiusLogin
|
||||
expr: rate(radiusd_access_ok[3m]) == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Personne ne vient taper le RADIUS."
|
||||
|
||||
- alert: TooManyReallocatedSectors
|
||||
expr: smartmon_reallocated_sector_ct_raw_value > 1e3
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "{{ $labels.disk }} sur {{ $labels.instance }} a {{ $value }} secteurs réalloués."
|
||||
|
||||
# - alert: ntp_drifting
|
||||
# expr: node_ntp_drift_seconds > 0.01
|
||||
# for: 1m
|
||||
# labels:
|
||||
# severity: warning
|
||||
# annotations:
|
||||
# summary: "Décalage NTP élevé sur {{ $labels.instance }}"
|
||||
# description: "Le décalage NTP est élevé ({{ $value }} > 0.01)"
|
||||
{% endraw %}
|
||||
|
|
|
@ -1,106 +0,0 @@
|
|||
{{ ansible_header | comment }}
|
||||
{# As this is also Jinja2 it will conflict without a raw block #}
|
||||
{% raw %}
|
||||
groups:
|
||||
- name: django.rules
|
||||
rules:
|
||||
- record: job:django_http_requests_before_middlewares_total:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_before_middlewares_total[30s])) BY (job)
|
||||
- record: job:django_http_requests_unknown_latency_total:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_unknown_latency_total[30s])) BY (job)
|
||||
- record: job:django_http_ajax_requests_total:sum_rate30s
|
||||
expr: sum(rate(django_http_ajax_requests_total[30s])) BY (job)
|
||||
- record: job:django_http_responses_before_middlewares_total:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_before_middlewares_total[30s])) BY (job)
|
||||
- record: job:django_http_requests_unknown_latency_including_middlewares_total:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_unknown_latency_including_middlewares_total[30s]))
|
||||
BY (job)
|
||||
- record: job:django_http_requests_body_total_bytes:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_body_total_bytes[30s])) BY (job)
|
||||
- record: job:django_http_responses_streaming_total:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_streaming_total[30s])) BY (job)
|
||||
- record: job:django_http_responses_body_total_bytes:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_body_total_bytes[30s])) BY (job)
|
||||
- record: job:django_http_requests_total:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_total_by_method[30s])) BY (job)
|
||||
- record: job:django_http_requests_total_by_method:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_total_by_method[30s])) BY (job, method)
|
||||
- record: job:django_http_requests_total_by_transport:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_total_by_transport[30s])) BY (job, transport)
|
||||
- record: job:django_http_requests_total_by_view:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) BY (job,
|
||||
view)
|
||||
- record: job:django_http_requests_total_by_view_transport_method:sum_rate30s
|
||||
expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) BY (job,
|
||||
view, transport, method)
|
||||
- record: job:django_http_responses_total_by_templatename:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_total_by_templatename[30s])) BY (job, templatename)
|
||||
- record: job:django_http_responses_total_by_status:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_total_by_status[30s])) BY (job, status)
|
||||
- record: job:django_http_responses_total_by_charset:sum_rate30s
|
||||
expr: sum(rate(django_http_responses_total_by_charset[30s])) BY (job, charset)
|
||||
- record: job:django_http_exceptions_total_by_type:sum_rate30s
|
||||
expr: sum(rate(django_http_exceptions_total_by_type[30s])) BY (job, type)
|
||||
- record: job:django_http_exceptions_total_by_view:sum_rate30s
|
||||
expr: sum(rate(django_http_exceptions_total_by_view[30s])) BY (job, view)
|
||||
- record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.5, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "50"
|
||||
- record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "95"
|
||||
- record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "99"
|
||||
- record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "99.9"
|
||||
- record: job:django_http_requests_latency_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.5, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "50"
|
||||
- record: job:django_http_requests_latency_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "95"
|
||||
- record: job:django_http_requests_latency_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "99"
|
||||
- record: job:django_http_requests_latency_seconds:quantile_rate30s
|
||||
expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
|
||||
BY (job, le))
|
||||
labels:
|
||||
quantile: "99.9"
|
||||
- record: job:django_model_inserts_total:sum_rate1m
|
||||
expr: sum(rate(django_model_inserts_total[1m])) BY (job, model)
|
||||
- record: job:django_model_updates_total:sum_rate1m
|
||||
expr: sum(rate(django_model_updates_total[1m])) BY (job, model)
|
||||
- record: job:django_model_deletes_total:sum_rate1m
|
||||
expr: sum(rate(django_model_deletes_total[1m])) BY (job, model)
|
||||
- record: job:django_db_new_connections_total:sum_rate30s
|
||||
expr: sum(rate(django_db_new_connections_total[30s])) BY (alias, vendor)
|
||||
- record: job:django_db_new_connection_errors_total:sum_rate30s
|
||||
expr: sum(rate(django_db_new_connection_errors_total[30s])) BY (alias, vendor)
|
||||
- record: job:django_db_execute_total:sum_rate30s
|
||||
expr: sum(rate(django_db_execute_total[30s])) BY (alias, vendor)
|
||||
- record: job:django_db_execute_many_total:sum_rate30s
|
||||
expr: sum(rate(django_db_execute_many_total[30s])) BY (alias, vendor)
|
||||
- record: job:django_db_errors_total:sum_rate30s
|
||||
expr: sum(rate(django_db_errors_total[30s])) BY (alias, vendor, type)
|
||||
- record: job:django_migrations_applied_total:max
|
||||
expr: max(django_migrations_applied_total) BY (job, connection)
|
||||
- record: job:django_migrations_unapplied_total:max
|
||||
expr: max(django_migrations_unapplied_total) BY (job, connection)
|
||||
{% endraw %}
|
|
@ -20,156 +20,23 @@ alerting:
|
|||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
- "alert.rules.yml" # Monitoring alerts, this is the file you may be searching!
|
||||
- "django.rules.yml" # Custom rules specific for Django project monitoring
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
# The .json in file_sd_configs is dynamically reloaded
|
||||
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9090
|
||||
|
||||
- job_name: servers
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets.json'
|
||||
relabel_configs:
|
||||
# Do not put :9100 in instance name, rather here
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- source_labels: [__param_target]
|
||||
target_label: __address__
|
||||
replacement: '$1:9100'
|
||||
|
||||
{% if prometheus.ups_snmp_targets is defined %}
|
||||
- job_name: ups_snmp
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_ups_snmp.json'
|
||||
metrics_path: /snmp
|
||||
params:
|
||||
module: [eatonups]
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 127.0.0.1:9116
|
||||
{% endif %}
|
||||
|
||||
{% if prometheus.procurve_snmp_targets is defined %}
|
||||
- job_name: procurve_snmp
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_procurve_snmp.json'
|
||||
metrics_path: /snmp
|
||||
params:
|
||||
module: [procurve_switch]
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 127.0.0.1:9116
|
||||
{% endif %}
|
||||
|
||||
{% if prometheus.unifi_snmp_targets is defined %}
|
||||
- job_name: unifi_snmp
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_unifi_snmp.json'
|
||||
metrics_path: /snmp
|
||||
params:
|
||||
module: [ubiquiti_unifi]
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 127.0.0.1:9116
|
||||
{% endif %}
|
||||
|
||||
{% if prometheus.nginx_targets is defined %}
|
||||
- job_name: nginx
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_nginx.json'
|
||||
relabel_configs:
|
||||
# Do not put :9117 in instance name, rather here
|
||||
- source_labels: [__address__]
|
||||
target_label: instance
|
||||
- source_labels: [instance]
|
||||
target_label: __address__
|
||||
replacement: '$1:9117'
|
||||
{% endif %}
|
||||
|
||||
{% if prometheus.apache_targets is defined %}
|
||||
- job_name: apache
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_apache.json'
|
||||
relabel_configs:
|
||||
# Do not put :9117 in instance name, rather here
|
||||
- source_labels: [__address__]
|
||||
target_label: instance
|
||||
- source_labels: [instance]
|
||||
target_label: __address__
|
||||
replacement: '$1:9117'
|
||||
{% endif %}
|
||||
|
||||
{% if prometheus.blackbox_targets is defined %}
|
||||
- job_name: blackbox
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_blackbox.json'
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx] # Look for a HTTP 200 response.
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 127.0.0.1:9115
|
||||
{% endif %}
|
||||
|
||||
- job_name: mtail
|
||||
static_configs:
|
||||
- targets: ["tealc.adm.crans.org"]
|
||||
relabel_configs:
|
||||
# Do not put :3903 in instance name, rather here
|
||||
- source_labels: [__address__]
|
||||
target_label: instance
|
||||
- source_labels: [instance]
|
||||
target_label: __address__
|
||||
replacement: '$1:3903'
|
||||
|
||||
{% if prometheus.bird_targets is defined %}
|
||||
- job_name: bird
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- '/etc/prometheus/targets_bird.json'
|
||||
relabel_configs:
|
||||
# Do not put :3903 in instance name, rather here
|
||||
- source_labels: [__address__]
|
||||
target_label: instance
|
||||
- source_labels: [instance]
|
||||
target_label: __address__
|
||||
replacement: '$1:9324'
|
||||
{% endif %}
|
||||
|
||||
- job_name: django
|
||||
scheme: https
|
||||
static_configs:
|
||||
- targets: []
|
||||
# Activate this line when the captive portal need monitoring
|
||||
# - targets: ["portail-captif.crans.org:443"]
|
||||
{{
|
||||
{
|
||||
"scrape_configs":
|
||||
[
|
||||
{
|
||||
"job_name": "prometheus",
|
||||
"static_configs" : [
|
||||
{
|
||||
"targets": [
|
||||
"localhost:9090"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
] + (prometheus | json_query("*.config[0]"))
|
||||
} | to_nice_yaml(indent=2)
|
||||
}}
|
||||
|
|
Loading…
Reference in New Issue