From 2bfe4fae256532181b1035961f8f281d6fbba539 Mon Sep 17 00:00:00 2001 From: Alexandre Iooss Date: Sun, 26 Jan 2020 21:31:48 +0100 Subject: [PATCH] Mesure load rather than CPU time usage --- roles/prometheus/templates/prometheus/alert.rules.yml.j2 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 index ce7f711b..2b358276 100644 --- a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 +++ b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 @@ -43,13 +43,13 @@ groups: summary: "Presque plus d'inodes disponibles ({{ $value }}% restant) dans {{ $labels.mountpoint }} sur {{ $labels.instance }}." # Alert for high CPU usage - - alert: CpuUsage - expr: (100 - avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 75 + - alert: CpuBusy + expr: node_load5 > 3 for: 10m labels: severity: warning annotations: - summary: "CPU sur {{ $labels.instance }} à {{ $value }}%." + summary: "Charge sur {{ $labels.instance }} à {{ $value }}." # Check mdadm software RAID - alert: SoftwareRAIDDegraded