mirror of
https://github.com/ryan4yin/nix-config.git
synced 2026-04-26 02:38:30 +02:00
53 lines
2.0 KiB
YAML
53 lines
2.0 KiB
YAML
groups:
|
|
- name: Loki Exporter
|
|
|
|
rules:
|
|
- alert: LokiProcessTooManyRestarts
|
|
expr: 'changes(process_start_time_seconds{job=~".*loki.*"}[15m]) > 2'
|
|
for: 0m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Loki process too many restarts (instance {{ $labels.instance }})
|
|
description:
|
|
"A loki process had too many restarts (target {{ $labels.instance }})\n VALUE = {{
|
|
$value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: LokiRequestErrors
|
|
expr:
|
|
'100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by
|
|
(namespace, job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by
|
|
(namespace, job, route) > 10'
|
|
for: 15m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Loki request errors (instance {{ $labels.instance }})
|
|
description:
|
|
"The {{ $labels.job }} and {{ $labels.route }} are experiencing errors\n VALUE = {{
|
|
$value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: LokiRequestPanic
|
|
expr: "sum(increase(loki_panic_total[10m])) by (namespace, job) > 0"
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Loki request panic (instance {{ $labels.instance }})
|
|
description:
|
|
"The {{ $labels.job }} is experiencing {{ printf \"%.2f\" $value }}% increase of
|
|
panics\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: LokiRequestLatency
|
|
expr:
|
|
'(histogram_quantile(0.99,
|
|
sum(rate(loki_request_duration_seconds_bucket{route!~"(?i).*tail.*"}[5m])) by (le))) > 1'
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Loki request latency (instance {{ $labels.instance }})
|
|
description:
|
|
"The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s
|
|
99th percentile latency\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|