diff --git a/flake.lock b/flake.lock index 88422e5c..2644a3f9 100644 --- a/flake.lock +++ b/flake.lock @@ -526,10 +526,10 @@ "mysecrets": { "flake": false, "locked": { - "lastModified": 1752678564, - "narHash": "sha256-x2sbH7Umncbyc9oca5mqX8kMChHVUTytKk+QXEcB4i4=", + "lastModified": 1757651423, + "narHash": "sha256-w2hBme0vg3uDoEjP+0WuBT9hAhf1xJa4Np+GS2zQKXU=", "ref": "refs/heads/main", - "rev": "a231913597362c15c71fd9212cef5092ae85a64c", + "rev": "44b2943b7ebed5717bb9855c1b7a95c8a89fb7f7", "shallow": true, "type": "git", "url": "ssh://git@github.com/ryan4yin/nix-secrets.git" diff --git a/hosts/idols-aquamarine/caddy.nix b/hosts/idols-aquamarine/caddy.nix index 36766bd0..88932720 100644 --- a/hosts/idols-aquamarine/caddy.nix +++ b/hosts/idols-aquamarine/caddy.nix @@ -88,6 +88,11 @@ in encode zstd gzip reverse_proxy http://localhost:9093 ''; + virtualHosts."vmalert.writefor.fun".extraConfig = '' + ${hostCommonConfig} + encode zstd gzip + reverse_proxy http://localhost:8880 + ''; virtualHosts."minio.writefor.fun".extraConfig = '' ${hostCommonConfig} encode zstd gzip diff --git a/hosts/idols-aquamarine/monitoring/alert.nix b/hosts/idols-aquamarine/monitoring/alert.nix new file mode 100644 index 00000000..6da20c07 --- /dev/null +++ b/hosts/idols-aquamarine/monitoring/alert.nix @@ -0,0 +1,125 @@ +{ config, lib, ... }: +{ + services.vmalert = { + enable = true; + settings = { + "httpListenAddr" = "127.0.0.1:8880"; + + "datasource.url" = "http://localhost:9090"; + "notifier.url" = [ "http://localhost:9093" ]; # alertmanager's api + + # Whether to disable long-lived connections to the datasource. + "datasource.disableKeepAlive" = true; + # Whether to avoid stripping sensitive information such as auth headers or passwords + # from URLs in log messages or UI and exported metrics. + "datasource.showURL" = false; + # Path to the files with alerting and/or recording rules. + rule = [ + "${./alert_rules}/*.yml" + ]; + }; + }; + + services.prometheus.alertmanager = { + enable = true; + listenAddress = "127.0.0.1"; + port = 9093; + webExternalUrl = "http://alertmanager.writefor.fun"; + logLevel = "info"; + environmentFile = config.age.secrets."alertmanager.env".path; + configuration = { + global = { + # The smarthost and SMTP sender used for mail notifications. + smtp_smarthost = "smtp.qq.com:465"; + smtp_from = "$SMTP_SENDER_EMAIL"; + smtp_auth_username = "$SMTP_AUTH_USERNAME"; + smtp_auth_password = "$SMTP_AUTH_PASSWORD"; + # smtp.qq.com:465 support SSL only, so we need to disable TLS here. + # https://service.mail.qq.com/detail/0/310 + smtp_require_tls = false; + }; + route = { + receiver = "telegram"; + routes = [ + { + receiver = "telegram"; + # group alerts by labels + group_by = [ + "host" + "namespace" + "pod" + "job" + ]; + group_wait = "5m"; + group_interval = "5m"; + repeat_interval = "4h"; + } + # { + # # Route critical alerts to email (most severe alerts) + # match = { + # severity = "critical"; + # }; + # receiver = "email"; + # group_by = [ + # "host" + # "namespace" + # "pod" + # "job" + # ]; + # group_wait = "1m"; + # group_interval = "5m"; + # repeat_interval = "2h"; + # } + ]; + }; + receivers = [ + # { + # name = "email"; + # email_configs = [ + # { + # to = "ryan4yin@linux.com"; + # # Whether to notify about resolved alerts. + # send_resolved = true; + # } + # ]; + # } + { + name = "telegram"; + telegram_configs = [ + { + bot_token = "$TELEGRAM_BOT_TOKEN"; + chat_id = 586169186; # My Telegram ID + # Whether to notify about resolved alerts. + send_resolved = true; + # Disable notifications for resolved alerts + disable_notifications = false; + # Parse mode for the message + parse_mode = "Markdown"; + # Message template + message = '' + *Alert:* {{ .GroupLabels.alertname }} + *Status:* {{ .Status }} + *Severity:* {{ .CommonLabels.severity }} + {{ if .GroupLabels.namespace }}*Namespace:* {{ .GroupLabels.namespace }}{{ end }} + {{ if .GroupLabels.pod }}*Pod:* {{ .GroupLabels.pod }}{{ end }} + {{ if .GroupLabels.job }}*Job:* {{ .GroupLabels.job }}{{ end }} + {{ if .GroupLabels.host }}*Host:* {{ .GroupLabels.host }}{{ end }} + + {{ range .Alerts }} + *Alert:* {{ .Annotations.summary }} + *Description:* {{ .Annotations.description }} + {{ if .Labels.instance }}*Instance:* {{ .Labels.instance }}{{ end }} + {{ if .Labels.container }}*Container:* {{ .Labels.container }}{{ end }} + *Started:* {{ .StartsAt.Format "2006-01-02 15:04:05" }} + {{ if .EndsAt }} + *Ended:* {{ .EndsAt.Format "2006-01-02 15:04:05" }} + {{ end }} + {{ end }} + ''; + } + ]; + } + ]; + }; + }; +} diff --git a/hosts/idols-aquamarine/monitoring/alert_rules/istio-exporter.yml b/hosts/idols-aquamarine/monitoring/alert_rules/istio-exporter.yml index 8a2ff838..ec2676a3 100644 --- a/hosts/idols-aquamarine/monitoring/alert_rules/istio-exporter.yml +++ b/hosts/idols-aquamarine/monitoring/alert_rules/istio-exporter.yml @@ -69,7 +69,7 @@ groups: annotations: summary: Istio high 4xx error rate (instance {{ $labels.instance }}) description: - "High percentage of HTTP 5xx responses in Istio (> 5%).\n VALUE = {{ $value + "High percentage of HTTP 4xx responses in Istio (> 5%).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: IstioHigh5xxErrorRate diff --git a/hosts/idols-aquamarine/monitoring/alertmanager.nix b/hosts/idols-aquamarine/monitoring/alertmanager.nix deleted file mode 100644 index 3269d75d..00000000 --- a/hosts/idols-aquamarine/monitoring/alertmanager.nix +++ /dev/null @@ -1,48 +0,0 @@ -{ config, ... }: -{ - services.prometheus.alertmanager = { - enable = true; - listenAddress = "127.0.0.1"; - port = 9093; - webExternalUrl = "http://alertmanager.writefor.fun"; - logLevel = "info"; - - environmentFile = config.age.secrets."alertmanager.env".path; - configuration = { - global = { - # The smarthost and SMTP sender used for mail notifications. - smtp_smarthost = "smtp.qq.com:465"; - smtp_from = "$SMTP_SENDER_EMAIL"; - smtp_auth_username = "$SMTP_AUTH_USERNAME"; - smtp_auth_password = "$SMTP_AUTH_PASSWORD"; - # smtp.qq.com:465 support SSL only, so we need to disable TLS here. - # https://service.mail.qq.com/detail/0/310 - smtp_require_tls = false; - }; - route = { - receiver = "default"; - routes = [ - { - group_by = [ "host" ]; - group_wait = "5m"; - group_interval = "5m"; - repeat_interval = "4h"; - receiver = "default"; - } - ]; - }; - receivers = [ - { - name = "default"; - email_configs = [ - { - to = "ryan4yin@linux.com"; - # Whether to notify about resolved alerts. - send_resolved = true; - } - ]; - } - ]; - }; - }; -} diff --git a/hosts/idols-aquamarine/monitoring/default.nix b/hosts/idols-aquamarine/monitoring/default.nix index 4eec1724..4bac8bae 100644 --- a/hosts/idols-aquamarine/monitoring/default.nix +++ b/hosts/idols-aquamarine/monitoring/default.nix @@ -2,6 +2,6 @@ { imports = [ ./victoriametrics.nix - ./alertmanager.nix + ./alert.nix ]; } diff --git a/hosts/idols-aquamarine/monitoring/victoriametrics.nix b/hosts/idols-aquamarine/monitoring/victoriametrics.nix index 5a375c12..10a55b1a 100644 --- a/hosts/idols-aquamarine/monitoring/victoriametrics.nix +++ b/hosts/idols-aquamarine/monitoring/victoriametrics.nix @@ -116,25 +116,4 @@ ) [ ] myvars.networking.hostsAddr); }; }; - - services.vmalert = { - enable = true; - settings = { - "datasource.url" = "http://localhost:9090"; - "notifier.url" = [ "http://localhost:9093" ]; # alertmanager's api - - # Whether to disable long-lived connections to the datasource. - "datasource.disableKeepAlive" = true; - # Whether to avoid stripping sensitive information such as auth headers or passwords - # from URLs in log messages or UI and exported metrics. - "datasource.showURL" = false; - rule = [ - ./alert_rules/node-exporter.yml - ./alert_rules/kubestate-exporter.yml - ./alert_rules/etcd_embedded-exporter.yml - ./alert_rules/istio_embedded-exporter.yml - ./alert_rules/coredns_embedded-exporter.yml - ]; - }; - }; }