diff --git a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 index 1a58fd38..ce7f711b 100644 --- a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 +++ b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 @@ -51,6 +51,15 @@ groups: annotations: summary: "CPU sur {{ $labels.instance }} à {{ $value }}%." + # Check mdadm software RAID + - alert: SoftwareRAIDDegraded + expr: node_md_disks-node_md_disks_active > 0 + for: 3m + labels: + severity: warning + annotations: + summary: "Le RAID sur {{ $labels.instance }} a perdu {{ $value }} disque(s)." + # Check systemd unit (> buster) - alert: SystemdServiceFailed expr: node_systemd_unit_state{state="failed"} == 1