[Prometheus] Update to 2.x
							parent
							
								
									528e443d67
								
							
						
					
					
						commit
						f30e5c9f21
					
				|  | @ -15,8 +15,8 @@ | |||
| 
 | ||||
| - name: Configure Prometheus alert rules | ||||
|   template: | ||||
|     src: prometheus/alert.rules.j2 | ||||
|     dest: /etc/prometheus/alert.rules | ||||
|     src: prometheus/alert.rules.yml.j2 | ||||
|     dest: /etc/prometheus/alert.rules.yml | ||||
|   notify: Restart Prometheus | ||||
| 
 | ||||
| # We don't need to restart Prometheus when updating nodes | ||||
|  |  | |||
|  | @ -1,13 +0,0 @@ | |||
| # {{ ansible_managed }} | ||||
| 
 | ||||
| {# As this is also Jinja2 it will conflict without a raw block #} | ||||
| {% raw %} | ||||
| ALERT InstanceDown | ||||
|   IF up == 0 | ||||
|   FOR 5m | ||||
|   LABELS { severity = "page" } | ||||
|   ANNOTATIONS { | ||||
|     summary = "Instance {{ $labels.instance }} down", | ||||
|     description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.", | ||||
|   } | ||||
| {% endraw %} | ||||
|  | @ -0,0 +1,25 @@ | |||
| # {{ ansible_managed }} | ||||
| {# As this is also Jinja2 it will conflict without a raw block #} | ||||
| {% raw %} | ||||
| groups: | ||||
| - name: example | ||||
|   rules: | ||||
| 
 | ||||
|   # Alert for any instance that is unreachable for >5 minutes. | ||||
|   - alert: InstanceDown | ||||
|     expr: up == 0 | ||||
|     for: 5m | ||||
|     labels: | ||||
|       severity: page | ||||
|     annotations: | ||||
|       summary: "Instance {{ $labels.instance }} down" | ||||
|       description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." | ||||
| 
 | ||||
|   # Alert for any instance that has a median request latency >1s. | ||||
|   - alert: APIHighRequestLatency | ||||
|     expr: api_http_request_latencies_second{quantile="0.5"} > 1 | ||||
|     for: 10m | ||||
|     annotations: | ||||
|       summary: "High request latency on {{ $labels.instance }}" | ||||
|       description: "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)" | ||||
| {% endraw %} | ||||
|  | @ -1,8 +1,8 @@ | |||
| # {{ ansible_managed }} | ||||
| 
 | ||||
| global: | ||||
|   scrape_interval:     15s # By default, scrape targets every 15 seconds. | ||||
|   evaluation_interval: 15s # By default, scrape targets every 15 seconds. | ||||
|   # scrape_interval is set to the global default (60s) | ||||
|   # evaluation_interval is set to the global default (60s) | ||||
|   # scrape_timeout is set to the global default (10s). | ||||
| 
 | ||||
|   # Attach these labels to any time series or alerts when communicating with | ||||
|  | @ -10,16 +10,15 @@ global: | |||
|   external_labels: | ||||
|       monitor: 'example' | ||||
| 
 | ||||
| # Load and evaluate rules in this file every 'evaluation_interval' seconds. | ||||
| rule_files: | ||||
|   - "alert.rules" | ||||
| 
 | ||||
| # Route alerts to Prometheus Alertmanager | ||||
| # Alertmanager configuration | ||||
| alerting: | ||||
|   alertmanagers: | ||||
|   - static_configs: | ||||
|     - targets: | ||||
|       - 'localhost:9093' | ||||
|     - targets: ['localhost:9093'] | ||||
| 
 | ||||
| # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. | ||||
| rule_files: | ||||
|   - "alert.rules.yml" | ||||
| 
 | ||||
| # A scrape configuration containing exactly one endpoint to scrape: | ||||
| # Here it's Prometheus itself. | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue