|
/etc/prometheus/rules/bakins-fpm-exporter.yml > BakinsFpmExporter
|
|
|
|
/etc/prometheus/rules/blackbox-exporter.yml > BlackboxExporter
|
|
|
alert: BlackboxProbeFailed
expr: probe_success
== 0
for: 3m
labels:
severity: critical
annotations:
description: |-
Probe failed
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Blackbox probe failed (instance {{ $labels.instance }})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/embedded-exporter.yml > EmbeddedExporter
|
|
|
alert: PrometheusAllTargetsMissing
expr: sum
by (job) (up) == 0
for: 1m
labels:
severity: critical
annotations:
description: |-
A Prometheus job does not have living target anymore.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Prometheus all targets missing (instance {{ $labels.instance }})
|
|
|
alert: PrometheusJobMissing
expr: absent(up{job="prometheus"})
for: 1m
labels:
severity: warning
annotations:
description: |-
A Prometheus job has disappeared
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Prometheus job missing (instance {{ $labels.instance }})
|
|
|
|
|
|
|
|
|
alert: PrometheusTargetEmpty
expr: prometheus_sd_discovered_targets
== 0
for: 1m
labels:
severity: critical
annotations:
description: |-
Prometheus has no target in service discovery
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Prometheus target empty (instance {{ $labels.instance }})
|
alert: PrometheusTargetMissing
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
description: |-
A Prometheus target has disappeared. An exporter might be crashed.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Prometheus target missing (instance {{ $labels.instance }})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/knyar-nginx-exporter.yml > KnyarNginxExporter
|
|
|
|
|
|
|
|
/etc/prometheus/rules/mysqld-exporter.yml > MysqldExporter
|
alert: MysqlDown
expr: mysql_up == 0
for: 1m
labels:
severity: critical
annotations:
description: |-
MySQL instance is down on {{ $labels.instance }}
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: MySQL down (instance {{ $labels.instance }})
|
|
|
|
|
|
|
|
|
|
|
|
|
alert: MysqlRestarted
expr: mysql_global_status_uptime
< 60
for: 1m
labels:
severity: info
annotations:
description: |-
MySQL has just been restarted, less than one minute ago on {{ $labels.instance }}.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: MySQL restarted (instance {{ $labels.instance }})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/node-exporter.yml > NodeExporter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alert: HostEdacUncorrectableErrorsDetected
expr: (node_edac_uncorrectable_errors_total
> 0)
for: 1m
labels:
severity: warning
annotations:
description: |-
Host {{ $labels.instance }} has had {{ printf "%.0f" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }})
|
|
|
|
|
|
|
alert: HostKernelVersionDeviations
expr: changes(node_uname_info[1h])
> 0
for: 1m
labels:
severity: info
annotations:
description: |-
Kernel version for {{ $labels.instance }} has changed.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Host kernel version deviations (instance {{ $labels.instance }})
|
|
|
alert: HostMemoryUnderMemoryPressure
expr: (rate(node_vmstat_pgmajfault[5m])
> 1000)
for: 1m
labels:
severity: warning
annotations:
description: |-
The node is under heavy memory pressure. High rate of loading memory pages from disk.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Host memory under memory pressure (instance {{ $labels.instance }})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alert: HostSoftwareRaidDiskFailure
expr: (node_md_disks{state="failed"}
> 0)
for: 2m
labels:
severity: warning
annotations:
description: |-
MD RAID array {{ $labels.device }} on {{ $labels.instance }} needs attention.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Host software RAID disk failure (instance {{ $labels.instance }})
|
|
|
|
|
alert: HostUnusualDiskIo
expr: rate(node_disk_io_time_seconds_total[5m])
> 0.8
for: 5m
labels:
severity: warning
annotations:
description: |-
Disk usage >80%. Check storage for issues or increase IOPS capabilities. Check storage for issues.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Host unusual disk IO (instance {{ $labels.instance }})
|
|
|
|
|
|
|
|
|
|
|