[monitoring] PEPCRANS + monitoring of cachan infra
							parent
							
								
									3d528a1891
								
							
						
					
					
						commit
						f7347e41d2
					
				| 
						 | 
				
			
			@ -0,0 +1,3 @@
 | 
			
		|||
---
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,7 @@
 | 
			
		|||
---
 | 
			
		||||
glob_grafana:
 | 
			
		||||
  root_url: https://grafana.crans.org
 | 
			
		||||
  icon: crans_icon_white.svg
 | 
			
		||||
  ldap_base: "{{ glob_ldap.base }}"
 | 
			
		||||
  ldap_master_ipv4: "{{ glob_ldap.servers[0] }}"
 | 
			
		||||
  ldap_user_tree: "ou=passwd,{{ glob_ldap.base }}"
 | 
			
		||||
| 
						 | 
				
			
			@ -30,3 +30,6 @@ glob_nginx:
 | 
			
		|||
    - "172.16.0.0/16"
 | 
			
		||||
    - "fd00:0:0:10::/64"
 | 
			
		||||
  deploy_robots_file: false
 | 
			
		||||
 | 
			
		||||
glob_prometheus_nginx_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,13 @@
 | 
			
		|||
---
 | 
			
		||||
glob_prometheus: {}
 | 
			
		||||
 | 
			
		||||
glob_snmp_exporter:
 | 
			
		||||
  procurve_password: "{{ vault.snmp_procurve_password }}"
 | 
			
		||||
  unifi_password: "{{ vault.snmp_unifi_password }}"
 | 
			
		||||
 | 
			
		||||
glob_ninjabot:
 | 
			
		||||
  config:
 | 
			
		||||
    nick: Prometheus
 | 
			
		||||
    server: irc.adm.crans.org
 | 
			
		||||
    port: 6667
 | 
			
		||||
    channel: "#monitoring"
 | 
			
		||||
| 
						 | 
				
			
			@ -18,3 +18,6 @@ loc_borg:
 | 
			
		|||
  remote:
 | 
			
		||||
    - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
 | 
			
		||||
  ssh_options: ""
 | 
			
		||||
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,116 @@
 | 
			
		|||
---
 | 
			
		||||
interfaces:
 | 
			
		||||
  adm: ens18
 | 
			
		||||
 | 
			
		||||
loc_home_nounou:
 | 
			
		||||
  ip: 172.17.10.9
 | 
			
		||||
  mountpoint: /rpool/home
 | 
			
		||||
 | 
			
		||||
loc_ldap:
 | 
			
		||||
  servers:
 | 
			
		||||
    - 172.17.10.9
 | 
			
		||||
  base: 'dc=crans,dc=org'
 | 
			
		||||
 | 
			
		||||
loc_ntp_client:
 | 
			
		||||
  servers:
 | 
			
		||||
    - terenez.cachan-adm.crans.org
 | 
			
		||||
 | 
			
		||||
debian_mirror: http://172.17.10.202/debian
 | 
			
		||||
 | 
			
		||||
loc_mirror:
 | 
			
		||||
  name: mirror.cachan-adm.crans.org
 | 
			
		||||
  ip: "{{ query('ldap','ip','terenez','cachan-adm') | ipv4 | first }}"
 | 
			
		||||
 | 
			
		||||
loc_borg:
 | 
			
		||||
  remote:
 | 
			
		||||
    - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
 | 
			
		||||
  ssh_options: ""
 | 
			
		||||
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
 | 
			
		||||
glob_snmp_exporter:
 | 
			
		||||
  procurve_password: "{{ vault.snmp_procurve_password }}"
 | 
			
		||||
  unifi_password: "{{ vault.snmp_unifi_password }}"
 | 
			
		||||
 | 
			
		||||
loc_ninjabot:
 | 
			
		||||
  config:
 | 
			
		||||
    nick: fyre
 | 
			
		||||
    server: irc.adm.crans.org
 | 
			
		||||
    port: 6667
 | 
			
		||||
    channel: "#monitoring"
 | 
			
		||||
 | 
			
		||||
loc_prometheus:
 | 
			
		||||
  node:
 | 
			
		||||
    file: targets_node.json
 | 
			
		||||
    targets: "{{ groups['server'] | select('match', '^.*\\.cachan-adm\\.crans\\.org$')  | list | sort }}"
 | 
			
		||||
    config:
 | 
			
		||||
      - job_name: servers
 | 
			
		||||
        file_sd_configs:
 | 
			
		||||
          - files:
 | 
			
		||||
            - '/etc/prometheus/targets_node.json'
 | 
			
		||||
        relabel_configs:
 | 
			
		||||
          - source_labels: [__address__]
 | 
			
		||||
            target_label: __param_target
 | 
			
		||||
          - source_labels: [__param_target]
 | 
			
		||||
            target_label: instance
 | 
			
		||||
          - source_labels: [__param_target]
 | 
			
		||||
            target_label: __address__
 | 
			
		||||
            replacement: '$1:9100'
 | 
			
		||||
 | 
			
		||||
  ups_snmp:
 | 
			
		||||
    file: targets_ups_snmp.json
 | 
			
		||||
    targets:
 | 
			
		||||
      - pulsar.cachan-adm.crans.org  # 0B
 | 
			
		||||
      - quasar.cachan-adm.crans.org  # 4J
 | 
			
		||||
    config:
 | 
			
		||||
      - job_name: ups_snmp
 | 
			
		||||
        file_sd_configs:
 | 
			
		||||
          - files:
 | 
			
		||||
            - '/etc/prometheus/targets_ups_snmp.json'
 | 
			
		||||
        metrics_path: /snmp
 | 
			
		||||
        params:
 | 
			
		||||
          module: [eatonups]
 | 
			
		||||
        relabel_configs:
 | 
			
		||||
          - source_labels: [__address__]
 | 
			
		||||
            target_label: __param_target
 | 
			
		||||
          - source_labels: [__param_target]
 | 
			
		||||
            target_label: instance
 | 
			
		||||
          - target_label: __address__
 | 
			
		||||
            replacement: 127.0.0.1:9116
 | 
			
		||||
 | 
			
		||||
  unifi_snmp:
 | 
			
		||||
    file: targets_unifi_snmp.json
 | 
			
		||||
    targets: "{{ groups['crans_unifi'] | list | sort }}"
 | 
			
		||||
    config:
 | 
			
		||||
      - job_name: unifi_snmp
 | 
			
		||||
        file_sd_configs:
 | 
			
		||||
          - files:
 | 
			
		||||
            - '/etc/prometheus/targets_unifi_snmp.json'
 | 
			
		||||
        metrics_path: /snmp
 | 
			
		||||
        params:
 | 
			
		||||
          module: [ubiquiti_unifi]
 | 
			
		||||
        relabel_configs:
 | 
			
		||||
          - source_labels: [__address__]
 | 
			
		||||
            target_label: __param_target
 | 
			
		||||
          - source_labels: [__param_target]
 | 
			
		||||
            target_label: instance
 | 
			
		||||
          - target_label: __address__
 | 
			
		||||
            replacement: 127.0.0.1:9116
 | 
			
		||||
 | 
			
		||||
  nginx:
 | 
			
		||||
    file: targets_nginx.json
 | 
			
		||||
    targets:
 | 
			
		||||
      - rodauh.cachan-adm.crans.org
 | 
			
		||||
      - terenez.cachan-adm.crans.org
 | 
			
		||||
    config:
 | 
			
		||||
      - job_name: nginx
 | 
			
		||||
        file_sd_configs:
 | 
			
		||||
          - files:
 | 
			
		||||
            - '/etc/prometheus/targets_nginx.json'
 | 
			
		||||
        relabel_configs:
 | 
			
		||||
          - source_labels: [__address__]
 | 
			
		||||
            target_label: instance
 | 
			
		||||
          - source_labels: [instance]
 | 
			
		||||
            target_label: __address__
 | 
			
		||||
            replacement: '$1:9117'
 | 
			
		||||
| 
						 | 
				
			
			@ -25,3 +25,6 @@ loc_borg:
 | 
			
		|||
  to_exclude:
 | 
			
		||||
    - /var/lib/lxcfs
 | 
			
		||||
  ssh_options: ""
 | 
			
		||||
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,4 +1,113 @@
 | 
			
		|||
interfaces:
 | 
			
		||||
  adm: eth0
 | 
			
		||||
  srv_nat: eth1
 | 
			
		||||
  infra: eth2
 | 
			
		||||
 | 
			
		||||
loc_prometheus:
 | 
			
		||||
  node:
 | 
			
		||||
    file: targets_node.json
 | 
			
		||||
    targets: "{{ groups['server'] | select('match', '^.*\\.adm\\.crans\\.org$')  | list | sort }}"
 | 
			
		||||
    config:
 | 
			
		||||
      - job_name: servers
 | 
			
		||||
        file_sd_configs:
 | 
			
		||||
          - files:
 | 
			
		||||
            - '/etc/prometheus/targets_node.json'
 | 
			
		||||
        relabel_configs:
 | 
			
		||||
          - source_labels: [__address__]
 | 
			
		||||
            target_label: __param_target
 | 
			
		||||
          - source_labels: [__param_target]
 | 
			
		||||
            target_label: instance
 | 
			
		||||
          - source_labels: [__param_target]
 | 
			
		||||
            target_label: __address__
 | 
			
		||||
            replacement: '$1:9100'
 | 
			
		||||
 | 
			
		||||
  nginx:
 | 
			
		||||
    file: targets_nginx.json
 | 
			
		||||
    targets:
 | 
			
		||||
      - hodaur.adm.crans.org
 | 
			
		||||
      - charybde.adm.crans.org
 | 
			
		||||
    config:
 | 
			
		||||
      - job_name: nginx
 | 
			
		||||
        file_sd_configs:
 | 
			
		||||
          - files:
 | 
			
		||||
            - '/etc/prometheus/targets_nginx.json'
 | 
			
		||||
        relabel_configs:
 | 
			
		||||
          - source_labels: [__address__]
 | 
			
		||||
            target_label: instance
 | 
			
		||||
          - source_labels: [instance]
 | 
			
		||||
            target_label: __address__
 | 
			
		||||
            replacement: '$1:9117'
 | 
			
		||||
 | 
			
		||||
  blackbox:
 | 
			
		||||
    file: targets_blackbox.json
 | 
			
		||||
    targets:
 | 
			
		||||
      - https://crans.org/
 | 
			
		||||
      - https://www.crans.org/
 | 
			
		||||
      - https://webirc.crans.org/
 | 
			
		||||
      - https://jitsi.crans.org/
 | 
			
		||||
      - https://ftps.crans.org/
 | 
			
		||||
      - http://ftp.crans.org/
 | 
			
		||||
      - https://grafana.crans.org/
 | 
			
		||||
      - https://roundcube.crans.org/
 | 
			
		||||
      - https://zero.crans.org/
 | 
			
		||||
      - https://wiki.crans.org/PageAccueil
 | 
			
		||||
      - https://framadate.crans.org/
 | 
			
		||||
      - https://pad.crans.org/
 | 
			
		||||
      - https://lists.crans.org/
 | 
			
		||||
      - https://cas.crans.org/
 | 
			
		||||
      - https://ethercalc.crans.org/
 | 
			
		||||
      - https://phabricator.crans.org/
 | 
			
		||||
      - https://webmail.crans.org/horde/login.php
 | 
			
		||||
      - https://gitlab.crans.org/
 | 
			
		||||
      - https://perso.crans.org/crans/
 | 
			
		||||
      - https://install-party.crans.org/
 | 
			
		||||
      - https://intranet.crans.org/
 | 
			
		||||
      - https://owncloud.crans.org/
 | 
			
		||||
    config:
 | 
			
		||||
      - job_name: blackbox
 | 
			
		||||
        file_sd_configs:
 | 
			
		||||
          - files:
 | 
			
		||||
            - '/etc/prometheus/targets_blackbox.json'
 | 
			
		||||
        metrics_path: /probe
 | 
			
		||||
        params:
 | 
			
		||||
          module: [http_2xx]  # Look for a HTTP 200 response.
 | 
			
		||||
        relabel_configs:
 | 
			
		||||
          - source_labels: [__address__]
 | 
			
		||||
            target_label: __param_target
 | 
			
		||||
          - source_labels: [__param_target]
 | 
			
		||||
            target_label: instance
 | 
			
		||||
          - target_label: __address__
 | 
			
		||||
            replacement: 127.0.0.1:9115
 | 
			
		||||
 | 
			
		||||
  mtail:
 | 
			
		||||
    file: targets_mtail.json
 | 
			
		||||
    targets:
 | 
			
		||||
      - tealc.adm.crans.org
 | 
			
		||||
    config:
 | 
			
		||||
      - job_name: mtail
 | 
			
		||||
        static_configs:
 | 
			
		||||
          - targets: ["tealc.adm.crans.org"]
 | 
			
		||||
        relabel_configs:
 | 
			
		||||
          - source_labels: [__address__]
 | 
			
		||||
            target_label: instance
 | 
			
		||||
          - source_labels: [instance]
 | 
			
		||||
            target_label: __address__
 | 
			
		||||
            replacement: '$1:3903'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#  apache:
 | 
			
		||||
#    targets:
 | 
			
		||||
#    config:
 | 
			
		||||
#      - job_name: apache
 | 
			
		||||
#        file_sd_configs:
 | 
			
		||||
#          - files:
 | 
			
		||||
#            - '/etc/prometheus/targets_apache.json'
 | 
			
		||||
#        relabel_configs:
 | 
			
		||||
#          - source_labels: [__address__]
 | 
			
		||||
#            target_label: instance
 | 
			
		||||
#          - source_labels: [instance]
 | 
			
		||||
#            target_label: __address__
 | 
			
		||||
#            replacement: '$1:9117'
 | 
			
		||||
 | 
			
		||||
#  bird_targets:
 | 
			
		||||
#    - routeur-sam.adm.crans.org
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,3 +25,6 @@ loc_borg:
 | 
			
		|||
  remote:
 | 
			
		||||
    - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
 | 
			
		||||
  ssh_options: ""
 | 
			
		||||
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,3 +18,6 @@ loc_borg:
 | 
			
		|||
  remote:
 | 
			
		||||
    - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
 | 
			
		||||
  ssh_options: ""
 | 
			
		||||
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,6 +14,9 @@ glob_ntp_client:
 | 
			
		|||
 | 
			
		||||
debian_mirror: http://172.17.10.202/debian
 | 
			
		||||
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
 | 
			
		||||
loc_borg:
 | 
			
		||||
  remote:
 | 
			
		||||
    - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -37,3 +37,9 @@ loc_borg:
 | 
			
		|||
  remote:
 | 
			
		||||
    - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
 | 
			
		||||
  ssh_options: ""
 | 
			
		||||
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
 | 
			
		||||
glob_prometheus_nginx_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,3 +22,6 @@ loc_borg:
 | 
			
		|||
  remote:
 | 
			
		||||
    - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
 | 
			
		||||
  ssh_options: ""
 | 
			
		||||
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,3 +18,9 @@ loc_borg:
 | 
			
		|||
  remote:
 | 
			
		||||
    - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
 | 
			
		||||
  ssh_options: ""
 | 
			
		||||
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
 | 
			
		||||
glob_prometheus_nginx_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -23,3 +23,6 @@ loc_borg:
 | 
			
		|||
  remote:
 | 
			
		||||
    - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
 | 
			
		||||
  ssh_options: ""
 | 
			
		||||
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -27,3 +27,6 @@ loc_borg:
 | 
			
		|||
  ssh_options: ""
 | 
			
		||||
  to_exclude:
 | 
			
		||||
    - /var/lib/backuppc
 | 
			
		||||
 | 
			
		||||
glob_prometheus_node_exporter:
 | 
			
		||||
  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										14
									
								
								hosts
								
								
								
								
							
							
						
						
									
										14
									
								
								hosts
								
								
								
								
							| 
						 | 
				
			
			@ -18,6 +18,9 @@ tealc.adm.crans.org
 | 
			
		|||
tealc.adm.crans.org
 | 
			
		||||
gulp.cachan-adm.crans.org
 | 
			
		||||
 | 
			
		||||
[blackbox]
 | 
			
		||||
monitoring.adm.crans.org
 | 
			
		||||
 | 
			
		||||
[bdd:children]
 | 
			
		||||
virtu
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -81,6 +84,9 @@ neree.adm.crans.org
 | 
			
		|||
[gitlab]
 | 
			
		||||
gitzly.adm.crans.org
 | 
			
		||||
 | 
			
		||||
[grafana]
 | 
			
		||||
monitoring.adm.crans.org
 | 
			
		||||
 | 
			
		||||
[horde]
 | 
			
		||||
horde.adm.crans.org
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -99,17 +105,16 @@ linx.adm.crans.org
 | 
			
		|||
[mailman]
 | 
			
		||||
mailman.adm.crans.org
 | 
			
		||||
 | 
			
		||||
[monitoring]
 | 
			
		||||
[prometheus]
 | 
			
		||||
monitoring.adm.crans.org
 | 
			
		||||
 | 
			
		||||
[nginx]
 | 
			
		||||
charybde.adm.crans.org
 | 
			
		||||
fyre.cachan-adm.crans.org
 | 
			
		||||
 | 
			
		||||
[nginx:children]
 | 
			
		||||
django_cas
 | 
			
		||||
galene
 | 
			
		||||
jitsi
 | 
			
		||||
mailman
 | 
			
		||||
ntp_server
 | 
			
		||||
re2o_front
 | 
			
		||||
reverseproxy
 | 
			
		||||
roundcube
 | 
			
		||||
| 
						 | 
				
			
			@ -212,6 +217,7 @@ cas.adm.crans.org
 | 
			
		|||
codichotomie.adm.crans.org
 | 
			
		||||
ethercalc.adm.crans.org
 | 
			
		||||
fluxx.adm.crans.org
 | 
			
		||||
fyre.cachan-adm.crans.org
 | 
			
		||||
gitlab-ci.adm.crans.org
 | 
			
		||||
gitzly.adm.crans.org
 | 
			
		||||
hodaur.adm.crans.org
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,81 +1,43 @@
 | 
			
		|||
#!/usr/bin/env ansible-playbook
 | 
			
		||||
---
 | 
			
		||||
# Deploy Prometheus and Grafana on monitoring server
 | 
			
		||||
- hosts: monitoring
 | 
			
		||||
 | 
			
		||||
# Deploy Prometheus on monitoring server
 | 
			
		||||
- hosts: prometheus
 | 
			
		||||
  vars:
 | 
			
		||||
    # Prometheus targets.json
 | 
			
		||||
    prometheus:
 | 
			
		||||
      node_targets: "{{ groups['server'] | list | sort }}"
 | 
			
		||||
      ups_snmp_targets:
 | 
			
		||||
        - pulsar.adm.crans.org  # 0B
 | 
			
		||||
        - quasar.adm.crans.org  # 4J
 | 
			
		||||
      procurve_snmp_targets:
 | 
			
		||||
        - batg-9.infra.crans.org
 | 
			
		||||
      unifi_snmp_targets: "{{ groups['crans_unifi'] | list | sort }}"
 | 
			
		||||
      blackbox_targets:
 | 
			
		||||
        - https://crans.org/
 | 
			
		||||
        - https://www.crans.org/
 | 
			
		||||
        - https://webirc.crans.org/
 | 
			
		||||
        - https://jitsi.crans.org/
 | 
			
		||||
        - https://ftps.crans.org/
 | 
			
		||||
        - http://ftp.crans.org/
 | 
			
		||||
        - https://grafana.crans.org/
 | 
			
		||||
        - https://roundcube.crans.org/
 | 
			
		||||
        - https://zero.crans.org/
 | 
			
		||||
        - https://wiki.crans.org/PageAccueil
 | 
			
		||||
        - https://framadate.crans.org/
 | 
			
		||||
        - https://pad.crans.org/
 | 
			
		||||
        - https://lists.crans.org/
 | 
			
		||||
        - https://cas.crans.org/
 | 
			
		||||
        - https://ethercalc.crans.org/
 | 
			
		||||
        - https://phabricator.crans.org/
 | 
			
		||||
        - https://webmail.crans.org/horde/login.php
 | 
			
		||||
        - https://gitlab.crans.org/
 | 
			
		||||
        - https://perso.crans.org/crans/
 | 
			
		||||
        - https://install-party.crans.org/
 | 
			
		||||
        - https://intranet.crans.org/
 | 
			
		||||
        - https://owncloud.crans.org/
 | 
			
		||||
      nginx_targets:
 | 
			
		||||
        - hodaur.adm.crans.org
 | 
			
		||||
        - charybde.adm.crans.org
 | 
			
		||||
      apache_targets: []  # [zamok.adm.crans.org]
 | 
			
		||||
      bird_targets:
 | 
			
		||||
        - routeur-sam.adm.crans.org
 | 
			
		||||
 | 
			
		||||
    snmp_procurve_password: "{{ vault.snmp_procurve_password }}"
 | 
			
		||||
    snmp_unifi_password: "{{ vault.snmp_unifi_password }}"
 | 
			
		||||
 | 
			
		||||
    grafana:
 | 
			
		||||
      root_url: https://grafana.crans.org
 | 
			
		||||
      icon: crans_icon_white.svg
 | 
			
		||||
      ldap_base: "{{ glob_ldap.base }}"
 | 
			
		||||
      ldap_master_ipv4: "{{ glob_ldap.servers[0] }}"
 | 
			
		||||
      ldap_user_tree: "ou=passwd,{{ glob_ldap.base }}"
 | 
			
		||||
    prometheus: "{{ glob_prometheus | default({}) | combine(loc_prometheus | default({})) }}"
 | 
			
		||||
    alertmanager: "{{ glob_alertmanager | default({}) | combine(loc_alertmanager | default({})) }}"
 | 
			
		||||
    snmp_exporter: "{{ glob_snmp_exporter | default({}) | combine(loc_snmp_exporter | default({})) }}"
 | 
			
		||||
    ninjabot: "{{ glob_ninjabot | default({}) | combine(loc_ninjabot | default({})) }}"
 | 
			
		||||
  roles:
 | 
			
		||||
    - prometheus
 | 
			
		||||
    - prometheus-alertmanager
 | 
			
		||||
    - prometheus-snmp-exporter
 | 
			
		||||
    - prometheus-blackbox-exporter
 | 
			
		||||
    - ninjabot
 | 
			
		||||
    - grafana
 | 
			
		||||
 | 
			
		||||
# # Deploy Grafana on monitoring server
 | 
			
		||||
# - hosts: grafana
 | 
			
		||||
#   vars:
 | 
			
		||||
#     grafana: "{{ glob_grafana | default({}) | combine(loc_grafana | default({})) }}"
 | 
			
		||||
#   roles:
 | 
			
		||||
#     - grafana
 | 
			
		||||
 | 
			
		||||
- hosts: blackbox
 | 
			
		||||
  roles:
 | 
			
		||||
    - prometheus-blackbox-exporter
 | 
			
		||||
 | 
			
		||||
# Monitor all hosts
 | 
			
		||||
- hosts: server
 | 
			
		||||
  vars:
 | 
			
		||||
    adm_ipv4: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
 | 
			
		||||
  roles: ["prometheus-node-exporter"]
 | 
			
		||||
    prometheus_node_exporter: "{{ glob_prometheus_node_exporter | default({}) | combine(loc_prometheus_node_exporter | default({})) }}"
 | 
			
		||||
  roles:
 | 
			
		||||
    - prometheus-node-exporter
 | 
			
		||||
 | 
			
		||||
# Export nginx metrics
 | 
			
		||||
- hosts: nginx
 | 
			
		||||
  vars:
 | 
			
		||||
    adm_ipv4: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
 | 
			
		||||
  roles: ["prometheus-nginx-exporter"]
 | 
			
		||||
 | 
			
		||||
# Export apache metrics
 | 
			
		||||
#- hosts: zamok.adm.crans.org
 | 
			
		||||
#  vars:
 | 
			
		||||
#    adm_ipv4: "{{ ansible_all_ipv4_addresses | ipaddr(adm_subnet) | first }}"
 | 
			
		||||
#  roles: ["prometheus-apache-exporter"]
 | 
			
		||||
    prometheus_nginx_exporter: "{{ glob_prometheus_nginx_exporter | default({}) | combine(loc_prometheus_nginx_exporter | default({})) }}"
 | 
			
		||||
  roles:
 | 
			
		||||
    - prometheus-nginx-exporter
 | 
			
		||||
 | 
			
		||||
# Monitor mailq with a special text exporter
 | 
			
		||||
#- hosts: redisdead.adm.crans.org
 | 
			
		||||
| 
						 | 
				
			
			@ -85,7 +47,9 @@
 | 
			
		|||
- hosts: tealc.adm.crans.org
 | 
			
		||||
  vars:
 | 
			
		||||
    mirror: '{{ glob_mirror | default({}) | combine(loc_mirror | default({})) }}'
 | 
			
		||||
  roles: ["mtail"]
 | 
			
		||||
  roles:
 | 
			
		||||
    - mtail
 | 
			
		||||
 | 
			
		||||
- hosts: sputnik.adm.crans.org
 | 
			
		||||
  roles: ["statping"]
 | 
			
		||||
  roles:
 | 
			
		||||
    - statping
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -17,6 +17,11 @@
 | 
			
		|||
    dest: /var/local/ninjabot
 | 
			
		||||
    version: master
 | 
			
		||||
 | 
			
		||||
- name: Deploy NinjaBot configuration
 | 
			
		||||
  template:
 | 
			
		||||
    src: ninjabot/ninjabot.json.j2
 | 
			
		||||
    dest: /var/local/ninjabot/ninjabot.json
 | 
			
		||||
 | 
			
		||||
- name: Deploy NinjaBot systemd unit
 | 
			
		||||
  template:
 | 
			
		||||
    src: systemd/system/ninjabot.service.j2
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
{{ ninjabot.config | to_nice_json(indent=2) }}
 | 
			
		||||
| 
						 | 
				
			
			@ -8,7 +8,7 @@ Type=simple
 | 
			
		|||
WorkingDirectory=/var/local/ninjabot
 | 
			
		||||
User=nobody
 | 
			
		||||
Group=nogroup
 | 
			
		||||
ExecStart=/usr/bin/python3 /var/local/ninjabot/main.py
 | 
			
		||||
ExecStart=/usr/bin/python3 /var/local/ninjabot/ninjabot.py
 | 
			
		||||
Restart=always
 | 
			
		||||
 | 
			
		||||
[Install]
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,7 +14,7 @@
 | 
			
		|||
    path: /etc/default/prometheus-nginx-exporter
 | 
			
		||||
    regexp: '^ARGS='
 | 
			
		||||
    line: |
 | 
			
		||||
      ARGS="-web.listen-address={{ adm_ipv4 }}:9117 -nginx.scrape-uri=http://[::1]:6424/stub_status"
 | 
			
		||||
      ARGS="-web.listen-address={{ prometheus_nginx_exporter.listen_addr }}:9117 -nginx.scrape-uri=http://[::1]:6424/stub_status"
 | 
			
		||||
  notify:
 | 
			
		||||
    - Restart nginx
 | 
			
		||||
    - Restart prometheus-nginx-exporter
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,21 +7,6 @@
 | 
			
		|||
  register: apt_result
 | 
			
		||||
  retries: 3
 | 
			
		||||
  until: apt_result is succeeded
 | 
			
		||||
  when:
 | 
			
		||||
    - ansible_lsb.codename != 'stretch'
 | 
			
		||||
 | 
			
		||||
# Prometheus 2 node is in stretch-backports
 | 
			
		||||
- name: Install Prometheus node-exporter (stretch-backports)
 | 
			
		||||
  apt:
 | 
			
		||||
    update_cache: true
 | 
			
		||||
    name: prometheus-node-exporter
 | 
			
		||||
    install_recommends: false
 | 
			
		||||
    default_release: stretch-backports
 | 
			
		||||
  register: apt_result
 | 
			
		||||
  retries: 3
 | 
			
		||||
  until: apt_result is succeeded
 | 
			
		||||
  when:
 | 
			
		||||
    - ansible_lsb.codename == 'stretch'
 | 
			
		||||
 | 
			
		||||
- name: Install Prometheus node-exporter-collectors (bullseye)
 | 
			
		||||
  apt:
 | 
			
		||||
| 
						 | 
				
			
			@ -45,7 +30,7 @@
 | 
			
		|||
    path: /etc/default/prometheus-node-exporter
 | 
			
		||||
    regexp: '^ARGS='
 | 
			
		||||
    line: |
 | 
			
		||||
      ARGS="--web.listen-address={{ adm_ipv4 }}:9100"
 | 
			
		||||
      ARGS="--web.listen-address={{ prometheus_node_exporter.listen_addr }}:9100"
 | 
			
		||||
  tags: restart-node-exporter
 | 
			
		||||
 | 
			
		||||
# Install new APT textfile collector, it might be upstreamed one day
 | 
			
		||||
| 
						 | 
				
			
			@ -57,15 +42,4 @@
 | 
			
		|||
    owner: root
 | 
			
		||||
    group: root
 | 
			
		||||
    mode: 0755
 | 
			
		||||
  when: ansible_lsb.id == 'Debian' and ansible_distribution_release != "bullseye"
 | 
			
		||||
 | 
			
		||||
# Install new APT textfile collector, it might be upstreamed one day
 | 
			
		||||
# https://github.com/prometheus-community/node-exporter-textfile-collector-scripts/pull/35
 | 
			
		||||
- name: Patch APT textfile collector
 | 
			
		||||
  copy:
 | 
			
		||||
    src: apt.sh
 | 
			
		||||
    dest: /usr/share/prometheus-node-exporter-collectors/apt.sh
 | 
			
		||||
    owner: root
 | 
			
		||||
    group: root
 | 
			
		||||
    mode: 0755
 | 
			
		||||
  when: ansible_lsb.id == 'Ubuntu' or ansible_distribution_release == "bullseye"
 | 
			
		||||
  when: ansible_distribution_release != "bullseye"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -113,14 +113,14 @@ procurve_switch:
 | 
			
		|||
  version: 3
 | 
			
		||||
  auth:
 | 
			
		||||
    # To create SNMPv3 user on HP procurve, execute:
 | 
			
		||||
    # snmpv3 user snmp_prometheus auth sha {{ snmp_procurve_password }} priv aes {{ snmp_procurve_password }}
 | 
			
		||||
    # snmpv3 user snmp_prometheus auth sha {{ snmp_exporter.procurve_password }} priv aes {{ snmp_exporter.procurve_password }}
 | 
			
		||||
    # snmpv3 group managerpriv user snmp_prometheus sec-model ver3
 | 
			
		||||
    security_level: authPriv
 | 
			
		||||
    username: snmp_prometheus
 | 
			
		||||
    password: {{ snmp_procurve_password }}
 | 
			
		||||
    password: {{ snmp_exporter.procurve_password }}
 | 
			
		||||
    auth_protocol: SHA
 | 
			
		||||
    priv_protocol: AES
 | 
			
		||||
    priv_password: {{ snmp_procurve_password }}
 | 
			
		||||
    priv_password: {{ snmp_exporter.procurve_password }}
 | 
			
		||||
 | 
			
		||||
ubiquiti_unifi:
 | 
			
		||||
  walk:
 | 
			
		||||
| 
						 | 
				
			
			@ -475,7 +475,7 @@ ubiquiti_unifi:
 | 
			
		|||
  auth:
 | 
			
		||||
    security_level: authPriv
 | 
			
		||||
    username: snmp_prometheus
 | 
			
		||||
    password: {{ snmp_unifi_password }}
 | 
			
		||||
    password: {{ snmp_exporter.unifi_password }}
 | 
			
		||||
    auth_protocol: SHA
 | 
			
		||||
    priv_protocol: AES
 | 
			
		||||
    priv_password: {{ snmp_unifi_password }}
 | 
			
		||||
    priv_password: {{ snmp_exporter.unifi_password }}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -16,35 +16,18 @@
 | 
			
		|||
 | 
			
		||||
- name: Configure Prometheus alert rules
 | 
			
		||||
  template:
 | 
			
		||||
    src: "prometheus/{{ item }}.j2"
 | 
			
		||||
    dest: "/etc/prometheus/{{ item }}"
 | 
			
		||||
    src: prometheus/alert.rules.yml.j2
 | 
			
		||||
    dest: /etc/prometheus/alert.rules.yml
 | 
			
		||||
    mode: 0644
 | 
			
		||||
  notify: Restart Prometheus
 | 
			
		||||
  loop:
 | 
			
		||||
    - alert.rules.yml
 | 
			
		||||
    - django.rules.yml
 | 
			
		||||
 | 
			
		||||
# We don't need to restart Prometheus when updating nodes
 | 
			
		||||
- name: Configure Prometheus targets
 | 
			
		||||
  copy:
 | 
			
		||||
    content: "{{ [{'targets': item.targets}] | to_nice_json }}\n"
 | 
			
		||||
    dest: "/etc/prometheus/{{ item.file }}.json"
 | 
			
		||||
    content: "{{ [{'targets': item.value.targets}] | to_nice_json }}\n"
 | 
			
		||||
    dest: "/etc/prometheus/{{ item.value.file }}"
 | 
			
		||||
    mode: 0644
 | 
			
		||||
  loop:
 | 
			
		||||
    - file: targets
 | 
			
		||||
      targets: "{{ prometheus.node_targets }}"
 | 
			
		||||
    - file: targets_ups_snmp
 | 
			
		||||
      targets: "{{ prometheus.ups_snmp_targets }}"
 | 
			
		||||
    - file: targets_procurve_snmp
 | 
			
		||||
      targets: "{{ prometheus.procurve_snmp_targets }}"
 | 
			
		||||
    - file: targets_unifi_snmp
 | 
			
		||||
      targets: "{{ prometheus.unifi_snmp_targets }}"
 | 
			
		||||
    - file: targets_nginx
 | 
			
		||||
      targets: "{{ prometheus.nginx_targets }}"
 | 
			
		||||
    - file: targets_apache
 | 
			
		||||
      targets: "{{ prometheus.apache_targets }}"
 | 
			
		||||
    - file: targets_blackbox
 | 
			
		||||
      targets: "{{ prometheus.blackbox_targets }}"
 | 
			
		||||
  loop: "{{ prometheus | dict2items }}"
 | 
			
		||||
 | 
			
		||||
- name: Activate prometheus service
 | 
			
		||||
  systemd:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -144,7 +144,7 @@ groups:
 | 
			
		|||
      description: "https://grafana.crans.org/d/qtbg59mZz/alimentation"
 | 
			
		||||
 | 
			
		||||
  - alert: UpsWrongOutputVoltage
 | 
			
		||||
    expr: (upsOutputVoltage < 225) or (upsOutputVoltage > 235)
 | 
			
		||||
    expr: (upsOutputVoltage < 215) or (upsOutputVoltage > 245)
 | 
			
		||||
    for: 5m
 | 
			
		||||
    labels:
 | 
			
		||||
      severity: warning
 | 
			
		||||
| 
						 | 
				
			
			@ -161,29 +161,27 @@ groups:
 | 
			
		|||
      summary: "{{ $value }} paquet(s) APT sont inutile(s) sur {{ $labels.instance }}."
 | 
			
		||||
 | 
			
		||||
  - alert: MailqNotEmpty
 | 
			
		||||
    expr: postfix_mailq_length > 5
 | 
			
		||||
    expr: postfix_mailq_length > 25
 | 
			
		||||
    for: 1m
 | 
			
		||||
    labels:
 | 
			
		||||
      severity: warning
 | 
			
		||||
    annotations:
 | 
			
		||||
      summary: "{{ $value }} mails dans la mailq sur {{ $labels.instance }}."
 | 
			
		||||
 | 
			
		||||
  # NTP (need NTP plugin in node)
 | 
			
		||||
#  - alert: ntp_drifting
 | 
			
		||||
#    expr: node_ntp_drift_seconds > 0.05
 | 
			
		||||
#    for: 3m
 | 
			
		||||
#    labels:
 | 
			
		||||
#      severity: critical
 | 
			
		||||
#    annotations:
 | 
			
		||||
#      summary: "Décalage NTP trop élevé sur {{ $labels.instance }}"
 | 
			
		||||
#      description: "Le décalage NTP est trop élevé ({{ $value }} > 0.05)"
 | 
			
		||||
  - alert: NoRadiusLogin
 | 
			
		||||
    expr: rate(radiusd_access_ok[3m]) == 0
 | 
			
		||||
    for: 2m
 | 
			
		||||
    labels:
 | 
			
		||||
      severity: warning
 | 
			
		||||
    annotations:
 | 
			
		||||
      summary: "Personne ne vient taper le RADIUS."
 | 
			
		||||
 | 
			
		||||
  - alert: TooManyReallocatedSectors
 | 
			
		||||
    expr: smartmon_reallocated_sector_ct_raw_value > 1e3
 | 
			
		||||
    for: 5m
 | 
			
		||||
    labels:
 | 
			
		||||
      severity: warning
 | 
			
		||||
    annotations:
 | 
			
		||||
      summary: "{{ $labels.disk }} sur {{ $labels.instance }} a {{ $value }} secteurs réalloués."
 | 
			
		||||
 | 
			
		||||
#  - alert: ntp_drifting
 | 
			
		||||
#    expr: node_ntp_drift_seconds > 0.01
 | 
			
		||||
#    for: 1m
 | 
			
		||||
#    labels:
 | 
			
		||||
#      severity: warning
 | 
			
		||||
#    annotations:
 | 
			
		||||
#      summary: "Décalage NTP élevé sur {{ $labels.instance }}"
 | 
			
		||||
#      description: "Le décalage NTP est élevé ({{ $value }} > 0.01)"
 | 
			
		||||
{% endraw %}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,106 +0,0 @@
 | 
			
		|||
{{ ansible_header | comment }}
 | 
			
		||||
{# As this is also Jinja2 it will conflict without a raw block #}
 | 
			
		||||
{% raw %}
 | 
			
		||||
groups:
 | 
			
		||||
- name: django.rules
 | 
			
		||||
  rules:
 | 
			
		||||
  - record: job:django_http_requests_before_middlewares_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_requests_before_middlewares_total[30s])) BY (job)
 | 
			
		||||
  - record: job:django_http_requests_unknown_latency_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_requests_unknown_latency_total[30s])) BY (job)
 | 
			
		||||
  - record: job:django_http_ajax_requests_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_ajax_requests_total[30s])) BY (job)
 | 
			
		||||
  - record: job:django_http_responses_before_middlewares_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_responses_before_middlewares_total[30s])) BY (job)
 | 
			
		||||
  - record: job:django_http_requests_unknown_latency_including_middlewares_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_requests_unknown_latency_including_middlewares_total[30s]))
 | 
			
		||||
      BY (job)
 | 
			
		||||
  - record: job:django_http_requests_body_total_bytes:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_requests_body_total_bytes[30s])) BY (job)
 | 
			
		||||
  - record: job:django_http_responses_streaming_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_responses_streaming_total[30s])) BY (job)
 | 
			
		||||
  - record: job:django_http_responses_body_total_bytes:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_responses_body_total_bytes[30s])) BY (job)
 | 
			
		||||
  - record: job:django_http_requests_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_requests_total_by_method[30s])) BY (job)
 | 
			
		||||
  - record: job:django_http_requests_total_by_method:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_requests_total_by_method[30s])) BY (job, method)
 | 
			
		||||
  - record: job:django_http_requests_total_by_transport:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_requests_total_by_transport[30s])) BY (job, transport)
 | 
			
		||||
  - record: job:django_http_requests_total_by_view:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) BY (job,
 | 
			
		||||
      view)
 | 
			
		||||
  - record: job:django_http_requests_total_by_view_transport_method:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) BY (job,
 | 
			
		||||
      view, transport, method)
 | 
			
		||||
  - record: job:django_http_responses_total_by_templatename:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_responses_total_by_templatename[30s])) BY (job, templatename)
 | 
			
		||||
  - record: job:django_http_responses_total_by_status:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_responses_total_by_status[30s])) BY (job, status)
 | 
			
		||||
  - record: job:django_http_responses_total_by_charset:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_responses_total_by_charset[30s])) BY (job, charset)
 | 
			
		||||
  - record: job:django_http_exceptions_total_by_type:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_exceptions_total_by_type[30s])) BY (job, type)
 | 
			
		||||
  - record: job:django_http_exceptions_total_by_view:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_http_exceptions_total_by_view[30s])) BY (job, view)
 | 
			
		||||
  - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
 | 
			
		||||
    expr: histogram_quantile(0.5, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
 | 
			
		||||
      BY (job, le))
 | 
			
		||||
    labels:
 | 
			
		||||
      quantile: "50"
 | 
			
		||||
  - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
 | 
			
		||||
    expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
 | 
			
		||||
      BY (job, le))
 | 
			
		||||
    labels:
 | 
			
		||||
      quantile: "95"
 | 
			
		||||
  - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
 | 
			
		||||
    expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
 | 
			
		||||
      BY (job, le))
 | 
			
		||||
    labels:
 | 
			
		||||
      quantile: "99"
 | 
			
		||||
  - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
 | 
			
		||||
    expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
 | 
			
		||||
      BY (job, le))
 | 
			
		||||
    labels:
 | 
			
		||||
      quantile: "99.9"
 | 
			
		||||
  - record: job:django_http_requests_latency_seconds:quantile_rate30s
 | 
			
		||||
    expr: histogram_quantile(0.5, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
 | 
			
		||||
      BY (job, le))
 | 
			
		||||
    labels:
 | 
			
		||||
      quantile: "50"
 | 
			
		||||
  - record: job:django_http_requests_latency_seconds:quantile_rate30s
 | 
			
		||||
    expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
 | 
			
		||||
      BY (job, le))
 | 
			
		||||
    labels:
 | 
			
		||||
      quantile: "95"
 | 
			
		||||
  - record: job:django_http_requests_latency_seconds:quantile_rate30s
 | 
			
		||||
    expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
 | 
			
		||||
      BY (job, le))
 | 
			
		||||
    labels:
 | 
			
		||||
      quantile: "99"
 | 
			
		||||
  - record: job:django_http_requests_latency_seconds:quantile_rate30s
 | 
			
		||||
    expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
 | 
			
		||||
      BY (job, le))
 | 
			
		||||
    labels:
 | 
			
		||||
      quantile: "99.9"
 | 
			
		||||
  - record: job:django_model_inserts_total:sum_rate1m
 | 
			
		||||
    expr: sum(rate(django_model_inserts_total[1m])) BY (job, model)
 | 
			
		||||
  - record: job:django_model_updates_total:sum_rate1m
 | 
			
		||||
    expr: sum(rate(django_model_updates_total[1m])) BY (job, model)
 | 
			
		||||
  - record: job:django_model_deletes_total:sum_rate1m
 | 
			
		||||
    expr: sum(rate(django_model_deletes_total[1m])) BY (job, model)
 | 
			
		||||
  - record: job:django_db_new_connections_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_db_new_connections_total[30s])) BY (alias, vendor)
 | 
			
		||||
  - record: job:django_db_new_connection_errors_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_db_new_connection_errors_total[30s])) BY (alias, vendor)
 | 
			
		||||
  - record: job:django_db_execute_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_db_execute_total[30s])) BY (alias, vendor)
 | 
			
		||||
  - record: job:django_db_execute_many_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_db_execute_many_total[30s])) BY (alias, vendor)
 | 
			
		||||
  - record: job:django_db_errors_total:sum_rate30s
 | 
			
		||||
    expr: sum(rate(django_db_errors_total[30s])) BY (alias, vendor, type)
 | 
			
		||||
  - record: job:django_migrations_applied_total:max
 | 
			
		||||
    expr: max(django_migrations_applied_total) BY (job, connection)
 | 
			
		||||
  - record: job:django_migrations_unapplied_total:max
 | 
			
		||||
    expr: max(django_migrations_unapplied_total) BY (job, connection)
 | 
			
		||||
{% endraw %}
 | 
			
		||||
| 
						 | 
				
			
			@ -20,156 +20,23 @@ alerting:
 | 
			
		|||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
 | 
			
		||||
rule_files:
 | 
			
		||||
  - "alert.rules.yml"  # Monitoring alerts, this is the file you may be searching!
 | 
			
		||||
  - "django.rules.yml"  # Custom rules specific for Django project monitoring
 | 
			
		||||
 | 
			
		||||
# A scrape configuration containing exactly one endpoint to scrape:
 | 
			
		||||
# Here it's Prometheus itself.
 | 
			
		||||
scrape_configs:
 | 
			
		||||
  # The .json in file_sd_configs is dynamically reloaded
 | 
			
		||||
 | 
			
		||||
  - job_name: prometheus
 | 
			
		||||
    static_configs:
 | 
			
		||||
      - targets:
 | 
			
		||||
        - localhost:9090
 | 
			
		||||
 | 
			
		||||
  - job_name: servers
 | 
			
		||||
    file_sd_configs:
 | 
			
		||||
      - files:
 | 
			
		||||
        - '/etc/prometheus/targets.json'
 | 
			
		||||
    relabel_configs:
 | 
			
		||||
      # Do not put :9100 in instance name, rather here
 | 
			
		||||
      - source_labels: [__address__]
 | 
			
		||||
        target_label: __param_target
 | 
			
		||||
      - source_labels: [__param_target]
 | 
			
		||||
        target_label: instance
 | 
			
		||||
      - source_labels: [__param_target]
 | 
			
		||||
        target_label: __address__
 | 
			
		||||
        replacement: '$1:9100'
 | 
			
		||||
 | 
			
		||||
{% if prometheus.ups_snmp_targets is defined %}
 | 
			
		||||
  - job_name: ups_snmp
 | 
			
		||||
    file_sd_configs:
 | 
			
		||||
      - files:
 | 
			
		||||
        - '/etc/prometheus/targets_ups_snmp.json'
 | 
			
		||||
    metrics_path: /snmp
 | 
			
		||||
    params:
 | 
			
		||||
      module: [eatonups]
 | 
			
		||||
    relabel_configs:
 | 
			
		||||
      - source_labels: [__address__]
 | 
			
		||||
        target_label: __param_target
 | 
			
		||||
      - source_labels: [__param_target]
 | 
			
		||||
        target_label: instance
 | 
			
		||||
      - target_label: __address__
 | 
			
		||||
        replacement: 127.0.0.1:9116
 | 
			
		||||
{% endif %}
 | 
			
		||||
 | 
			
		||||
{% if prometheus.procurve_snmp_targets is defined %}
 | 
			
		||||
  - job_name: procurve_snmp
 | 
			
		||||
    file_sd_configs:
 | 
			
		||||
      - files:
 | 
			
		||||
        - '/etc/prometheus/targets_procurve_snmp.json'
 | 
			
		||||
    metrics_path: /snmp
 | 
			
		||||
    params:
 | 
			
		||||
      module: [procurve_switch]
 | 
			
		||||
    relabel_configs:
 | 
			
		||||
      - source_labels: [__address__]
 | 
			
		||||
        target_label: __param_target
 | 
			
		||||
      - source_labels: [__param_target]
 | 
			
		||||
        target_label: instance
 | 
			
		||||
      - target_label: __address__
 | 
			
		||||
        replacement: 127.0.0.1:9116
 | 
			
		||||
{% endif %}
 | 
			
		||||
 | 
			
		||||
{% if prometheus.unifi_snmp_targets is defined %}
 | 
			
		||||
  - job_name: unifi_snmp
 | 
			
		||||
    file_sd_configs:
 | 
			
		||||
      - files:
 | 
			
		||||
        - '/etc/prometheus/targets_unifi_snmp.json'
 | 
			
		||||
    metrics_path: /snmp
 | 
			
		||||
    params:
 | 
			
		||||
      module: [ubiquiti_unifi]
 | 
			
		||||
    relabel_configs:
 | 
			
		||||
      - source_labels: [__address__]
 | 
			
		||||
        target_label: __param_target
 | 
			
		||||
      - source_labels: [__param_target]
 | 
			
		||||
        target_label: instance
 | 
			
		||||
      - target_label: __address__
 | 
			
		||||
        replacement: 127.0.0.1:9116
 | 
			
		||||
{% endif %}
 | 
			
		||||
 | 
			
		||||
{% if prometheus.nginx_targets is defined %}
 | 
			
		||||
  - job_name: nginx
 | 
			
		||||
    file_sd_configs:
 | 
			
		||||
      - files:
 | 
			
		||||
        - '/etc/prometheus/targets_nginx.json'
 | 
			
		||||
    relabel_configs:
 | 
			
		||||
      # Do not put :9117 in instance name, rather here
 | 
			
		||||
      - source_labels: [__address__]
 | 
			
		||||
        target_label: instance
 | 
			
		||||
      - source_labels: [instance]
 | 
			
		||||
        target_label: __address__
 | 
			
		||||
        replacement: '$1:9117'
 | 
			
		||||
{% endif %}
 | 
			
		||||
 | 
			
		||||
{% if prometheus.apache_targets is defined %}
 | 
			
		||||
  - job_name: apache
 | 
			
		||||
    file_sd_configs:
 | 
			
		||||
      - files:
 | 
			
		||||
        - '/etc/prometheus/targets_apache.json'
 | 
			
		||||
    relabel_configs:
 | 
			
		||||
      # Do not put :9117 in instance name, rather here
 | 
			
		||||
      - source_labels: [__address__]
 | 
			
		||||
        target_label: instance
 | 
			
		||||
      - source_labels: [instance]
 | 
			
		||||
        target_label: __address__
 | 
			
		||||
        replacement: '$1:9117'
 | 
			
		||||
{% endif %}
 | 
			
		||||
 | 
			
		||||
{% if prometheus.blackbox_targets is defined %}
 | 
			
		||||
  - job_name: blackbox
 | 
			
		||||
    file_sd_configs:
 | 
			
		||||
      - files:
 | 
			
		||||
        - '/etc/prometheus/targets_blackbox.json'
 | 
			
		||||
    metrics_path: /probe
 | 
			
		||||
    params:
 | 
			
		||||
      module: [http_2xx]  # Look for a HTTP 200 response.
 | 
			
		||||
    relabel_configs:
 | 
			
		||||
      - source_labels: [__address__]
 | 
			
		||||
        target_label: __param_target
 | 
			
		||||
      - source_labels: [__param_target]
 | 
			
		||||
        target_label: instance
 | 
			
		||||
      - target_label: __address__
 | 
			
		||||
        replacement: 127.0.0.1:9115
 | 
			
		||||
{% endif %}
 | 
			
		||||
 | 
			
		||||
  - job_name: mtail
 | 
			
		||||
    static_configs:
 | 
			
		||||
      - targets: ["tealc.adm.crans.org"]
 | 
			
		||||
    relabel_configs:
 | 
			
		||||
      # Do not put :3903 in instance name, rather here
 | 
			
		||||
      - source_labels: [__address__]
 | 
			
		||||
        target_label: instance
 | 
			
		||||
      - source_labels: [instance]
 | 
			
		||||
        target_label: __address__
 | 
			
		||||
        replacement: '$1:3903'
 | 
			
		||||
 | 
			
		||||
{% if prometheus.bird_targets is defined %}
 | 
			
		||||
  - job_name: bird
 | 
			
		||||
    file_sd_configs:
 | 
			
		||||
      - files:
 | 
			
		||||
        - '/etc/prometheus/targets_bird.json'
 | 
			
		||||
    relabel_configs:
 | 
			
		||||
      # Do not put :3903 in instance name, rather here
 | 
			
		||||
      - source_labels: [__address__]
 | 
			
		||||
        target_label: instance
 | 
			
		||||
      - source_labels: [instance]
 | 
			
		||||
        target_label: __address__
 | 
			
		||||
        replacement: '$1:9324'
 | 
			
		||||
{% endif %}
 | 
			
		||||
 | 
			
		||||
  - job_name: django
 | 
			
		||||
    scheme: https
 | 
			
		||||
    static_configs:
 | 
			
		||||
      - targets: []
 | 
			
		||||
# Activate this line when the captive portal need monitoring
 | 
			
		||||
#      - targets: ["portail-captif.crans.org:443"]
 | 
			
		||||
{{
 | 
			
		||||
  {
 | 
			
		||||
    "scrape_configs":
 | 
			
		||||
    [
 | 
			
		||||
      {
 | 
			
		||||
        "job_name": "prometheus",
 | 
			
		||||
        "static_configs" : [
 | 
			
		||||
          {
 | 
			
		||||
            "targets": [
 | 
			
		||||
               "localhost:9090"
 | 
			
		||||
            ]
 | 
			
		||||
          }
 | 
			
		||||
        ]
 | 
			
		||||
      }
 | 
			
		||||
    ] + (prometheus | json_query("*.config[0]"))
 | 
			
		||||
  } | to_nice_yaml(indent=2)
 | 
			
		||||
}}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue