diff --git a/hosts/idols-aquamarine/monitoring/alert.nix b/hosts/idols-aquamarine/monitoring/alert.nix
index e5ac9f23..3a5c3d36 100644
--- a/hosts/idols-aquamarine/monitoring/alert.nix
+++ b/hosts/idols-aquamarine/monitoring/alert.nix
@@ -17,6 +17,11 @@
rule = [
"${./alert_rules}/*.yml"
];
+ # https://docs.victoriametrics.com/victoriametrics/vmalert/#link-to-alert-source
+ # Set this two args to generate the correct `.GeneratorURL`
+ "external.url" = "https://grafana.writefor.fun";
+ "external.alert.source" =
+ ''explore?left={"datasource":"{{ if eq .Type \"vlogs\" }}VictoriaLogs{{ else }}VictoriaMetrics{{ end }}","queries":[{"expr":{{ .Expr|jsonEscape|queryEscape }},"refId":"A"}],"range":{"from":"{{ .ActiveAt.UnixMilli }}","to":"now"}}'';
};
};
@@ -58,9 +63,9 @@
"type"
"host"
];
- group_wait = "5m";
- group_interval = "5m";
- repeat_interval = "4h";
+ group_wait = "3m"; # wait for other alerts to "group by" before send notification
+ group_interval = "5m"; # wait for an interval, before send a new alert in the same group
+ repeat_interval = "5h"; # avoiding repeating reminders too frequently
}
# {
# # Route only prod env's critical alerts to email (most severe alerts)
@@ -102,28 +107,30 @@
send_resolved = true;
# Disable notifications for resolved alerts
disable_notifications = false;
- # Parse mode for the message
- parse_mode = "Markdown";
+ # Telegram's MarkdownV2 & Markdown are all very painful, we use html instead.
+ # https://core.telegram.org/bots/api#formatting-options
+ parse_mode = "HTML";
# Message template
message = ''
- *Alert:* {{ .GroupLabels.alertname }}
- *Status:* {{ .Status }}
- *Severity:* {{ .CommonLabels.severity }}
- {{ if .GroupLabels.namespace }}*Namespace:* {{ .GroupLabels.namespace }}{{ end }}
- {{ if .GroupLabels.pod }}*Pod:* {{ .GroupLabels.pod }}{{ end }}
- {{ if .GroupLabels.job }}*Job:* {{ .GroupLabels.job }}{{ end }}
- {{ if .GroupLabels.host }}*Host:* {{ .GroupLabels.host }}{{ end }}
+ {{- if eq .Status "firing" }}
+ 🟡 告警触发 {{ .CommonLabels.alertname }} [{{ index .CommonLabels "severity" | title }}]
+ {{- else }}
+ 🟢 告警恢复 {{ .CommonLabels.alertname }} [{{ index .CommonLabels "severity" | title }}]
+ {{- end }}
- {{ range .Alerts }}
- *Alert:* {{ .Annotations.summary }}
- *Description:* {{ .Annotations.description }}
- {{ if .Labels.instance }}*Instance:* {{ .Labels.instance }}{{ end }}
- {{ if .Labels.container }}*Container:* {{ .Labels.container }}{{ end }}
- *Started:* {{ .StartsAt.Format "2006-01-02 15:04:05" }}
- {{ if .EndsAt }}
- *Ended:* {{ .EndsAt.Format "2006-01-02 15:04:05" }}
- {{ end }}
- {{ end }}
+ {{- range .Alerts }}
+
+ 📊 详情:
+ • 告警组: {{ .Labels.alertgroup }}
+ • 等级: {{ if eq .Labels.severity "critical" }}🔴{{ else }}🟡 {{ end }} {{ .Labels.severity | title }}
+ • 查询: Grafana Explore
+ • 触发值: {{ with .Annotations.value }}{{ . }}{{ else }}N/A{{ end }}
+ • Env: {{ with .Labels.env }}{{ . }}{{ else }}N/A{{ end }}
+ • Cluster: {{ with .Labels.cluster }}{{ . }}{{ else }}N/A{{ end }}
+ • Namespace: {{ with .Labels.namespace }}{{ . }}{{ else }}N/A{{ end }}
+ • 标签: {{ range .Labels.SortedPairs }}{{ .Name }}={{ .Value }},{{ end }}
+ • 触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }}
+ {{- end }}
'';
}
];