From a8ac95ebadc62c1741b5d522ecb68dc372535a97 Mon Sep 17 00:00:00 2001 From: Ryan Yin Date: Fri, 26 Sep 2025 18:02:44 +0800 Subject: [PATCH 1/4] chore: grafana - remove useless dashboards fix: alertmanager - metrics fix: victoria-metrics - job filter --- .../kubernetes/k8s-addons-prometheus.json | 3069 ----------------- .../victoria-metrics-single.json | 4 +- .../monitoring/victoriametrics.nix | 41 + 3 files changed, 43 insertions(+), 3071 deletions(-) delete mode 100644 hosts/idols-aquamarine/grafana/dashboards/kubernetes/k8s-addons-prometheus.json diff --git a/hosts/idols-aquamarine/grafana/dashboards/kubernetes/k8s-addons-prometheus.json b/hosts/idols-aquamarine/grafana/dashboards/kubernetes/k8s-addons-prometheus.json deleted file mode 100644 index deabdbcf..00000000 --- a/hosts/idols-aquamarine/grafana/dashboards/kubernetes/k8s-addons-prometheus.json +++ /dev/null @@ -1,3069 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__elements": [], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "8.5.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - }, - { - "type": "panel", - "id": "stat", - "name": "Stat", - "version": "" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "description": "This is a modern 'Prometheus' dashboard for your Kubernetes cluster(s). Made for kube-prometheus-stack and take advantage of the latest Grafana features. GitHub repository: https://github.com/dotdc/grafana-dashboards-kubernetes", - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 1, - "links": [], - "liveNow": false, - "panels": [ - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 89, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "refId": "A" - } - ], - "title": "Information", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "mappings": [], - "noValue": "?", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "orange", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 1 - }, - "id": 78, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": ["last"], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "name" - }, - "pluginVersion": "10.0.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "prometheus_build_info{pod=~\"$pod\", cluster=~\"$cluster\"}", - "instant": true, - "interval": "", - "legendFormat": "{{ version }}", - "range": false, - "refId": "A" - } - ], - "title": "Prometheus version", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 1 - }, - "id": 92, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": ["last"], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "value" - }, - "pluginVersion": "10.0.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "up{pod=~\"$pod\", cluster=~\"$cluster\"} < 1", - "instant": true, - "interval": "", - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Instance Down", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "text", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 12, - "y": 1 - }, - "id": 72, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "value" - }, - "pluginVersion": "10.0.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(prometheus_tsdb_head_series{pod=~\"$pod\", cluster=~\"$cluster\"}) by (pod)", - "interval": "", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "TSDB Head Series", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 1 - }, - "id": 94, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": ["last"], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "value" - }, - "pluginVersion": "10.0.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(prometheus_sd_discovered_targets{pod=~\"$pod\", cluster=~\"$cluster\"}) by (pod)", - "instant": true, - "interval": "", - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Discovered Targets", - "type": "stat" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "id": 64, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "refId": "A" - } - ], - "title": "Prometheus", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 6 - }, - "id": 93, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "up{pod=~\"$pod\", cluster=~\"$cluster\"}", - "interval": "", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "Liveness by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 6 - }, - "id": 96, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(prometheus_config_last_reload_successful{pod=~\"$pod\", cluster=~\"$cluster\"}) by (pod)", - "interval": "", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "Config - Last Successful Reload by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 14 - }, - "id": 74, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(prometheus_target_scrapes_exceeded_body_size_limit_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "", - "legendFormat": "{{ pod }} - Exceeded body size limit", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(rate(prometheus_target_scrapes_exceeded_sample_limit_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "hide": false, - "legendFormat": "{{ pod }} - Exceeded sample limit", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "hide": false, - "legendFormat": "{{ pod }} - Duplicate timestamp", - "range": true, - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(rate(prometheus_target_scrapes_sample_out_of_bounds_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "hide": false, - "legendFormat": "{{ pod }} - Sample out of bounds", - "range": true, - "refId": "D" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(rate(prometheus_target_scrapes_sample_out_of_order_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "hide": false, - "legendFormat": "{{ pod }} - Sample out of order", - "range": true, - "refId": "E" - } - ], - "title": "Target Scrapes Errors by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 14 - }, - "id": 84, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(prometheus_sd_discovered_targets{pod=~\"$pod\", cluster=~\"$cluster\"}) by (pod)", - "interval": "", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "Number of Targets by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 22 - }, - "id": 75, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod, scrape_job) * 1000", - "interval": "", - "legendFormat": "{{ pod }} - {{ scrape_job }}", - "range": true, - "refId": "A" - } - ], - "title": "Target Sync by pod, scrape_job", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 22 - }, - "id": 85, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "round(sum(rate(prometheus_target_interval_length_seconds_sum{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval]) / rate(prometheus_target_interval_length_seconds_count{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod))", - "interval": "", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "Average Scrape Interval by pod", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 98, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "refId": "A" - } - ], - "title": "Prometheus TSDB / Query Engine", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 31 - }, - "id": 59, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(prometheus_tsdb_head_series{pod=~\"$pod\", cluster=~\"$cluster\"}) by (pod)", - "interval": "", - "legendFormat": "{{ pod }} - Head Series", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(prometheus_tsdb_head_chunks{pod=~\"$pod\", cluster=~\"$cluster\"}) by (pod)", - "hide": false, - "legendFormat": "{{ pod }} - Head Chunks", - "range": true, - "refId": "B" - } - ], - "title": "TSDB Head Series & Chunks by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 31 - }, - "id": 60, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(prometheus_tsdb_head_samples_appended_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "TSDB Head samples appended - rate by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 39 - }, - "id": 101, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(prometheus_tsdb_blocks_loaded{pod=~\"$pod\", cluster=~\"$cluster\"}) by (pod)", - "interval": "", - "legendFormat": "{{ pod }} - Head Series", - "range": true, - "refId": "A" - } - ], - "title": "TSDB Blocks Loaded by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 39 - }, - "id": 102, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(prometheus_tsdb_compactions_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "", - "legendFormat": "{{ pod }} - Total Compactions", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(rate(prometheus_tsdb_compactions_triggered_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "hide": false, - "legendFormat": "{{ pod }} - Triggered Compactions", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(rate(prometheus_tsdb_compactions_skipped_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "hide": false, - "legendFormat": "{{ pod }} - Skipped Compactions", - "range": true, - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(rate(prometheus_tsdb_compactions_failed_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "hide": false, - "legendFormat": "{{ pod }} - Failed Compactions", - "range": true, - "refId": "D" - } - ], - "title": "TSDB Rate of Compactions by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 47 - }, - "id": 90, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(prometheus_tsdb_reloads_failures_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "TSDB Reload Failures by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 47 - }, - "id": 95, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(prometheus_tsdb_head_series_created_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "", - "legendFormat": "{{ pod }} - Created series", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(rate(prometheus_tsdb_head_series_removed_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "hide": false, - "legendFormat": "{{ pod }} - Deleted series", - "range": true, - "refId": "B" - } - ], - "title": "TSDB Created & Deleted series by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 55 - }, - "id": 73, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(prometheus_engine_query_duration_seconds_count{pod=~\"$pod\", slice=\"inner_eval\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "Engine Query Count by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 55 - }, - "id": 86, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "list", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "max(prometheus_engine_query_duration_seconds{pod=~\"$pod\", cluster=~\"$cluster\"}) by (pod, slice) * 1000", - "interval": "", - "legendFormat": "{{ pod }} - {{ slice }}", - "range": true, - "refId": "A" - } - ], - "title": "Engine Query Duration by pod, slice", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 63 - }, - "id": 47, - "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "refId": "A" - } - ], - "title": "Resources", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "CPU Cores", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 4, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 64 - }, - "id": 29, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "table", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{pod=~\"$pod\", image!=\"\", container!=\"\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod, container)", - "interval": "$resolution", - "legendFormat": "{{ pod }} - {{ container }}", - "range": true, - "refId": "A" - } - ], - "title": "CPU Usage by pod, container", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 64 - }, - "id": 51, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "table", - "placement": "right", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{pod=~\"$pod\", image!=\"\", container!=\"\", cluster=~\"$cluster\"}) by (pod, container)", - "interval": "", - "legendFormat": "{{ pod }} - {{ container }}", - "range": true, - "refId": "A" - } - ], - "title": "Memory Usage by container", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 72 - }, - "id": 66, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "refId": "A" - } - ], - "title": "Storage", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 73 - }, - "id": 62, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(kubelet_volume_stats_used_bytes{persistentvolumeclaim=~\".*prom.*\", cluster=~\"$cluster\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_capacity_bytes{persistentvolumeclaim=~\".*prom.*\", cluster=~\"$cluster\"}) by (persistentvolumeclaim)", - "interval": "", - "legendFormat": "{{ persistentvolumeclaim }}", - "range": true, - "refId": "A" - } - ], - "title": "Persistent Volumes - Capacity and usage in %", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 73 - }, - "id": 87, - "options": { - "legend": { - "calcs": ["min", "max", "mean"], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(kubelet_volume_stats_used_bytes{persistentvolumeclaim=~\".*prom.*\", cluster=~\"$cluster\"}) by (persistentvolumeclaim)", - "interval": "", - "legendFormat": "{{ persistentvolumeclaim }} - Used", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(kubelet_volume_stats_capacity_bytes{persistentvolumeclaim=~\".*prom.*\", cluster=~\"$cluster\"}) by (persistentvolumeclaim)", - "hide": false, - "legendFormat": "{{ persistentvolumeclaim }} - Capacity", - "range": true, - "refId": "B" - } - ], - "title": "Persistent Volumes - Capacity and usage in bytes", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 81 - }, - "id": 68, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.3.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "1 - sum(kubelet_volume_stats_inodes_used{persistentvolumeclaim=~\".*prom.*\", cluster=~\"$cluster\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_inodes{persistentvolumeclaim=~\".*prom.*\", cluster=~\"$cluster\"}) by (persistentvolumeclaim)", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ persistentvolumeclaim }}", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Persistent Volumes - Inodes", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 89 - }, - "id": 45, - "panels": [], - "targets": [ - { - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "refId": "A" - } - ], - "title": "Network", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 90 - }, - "id": 31, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(container_network_receive_bytes_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "$resolution", - "legendFormat": "{{ pod }} - Received", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "- sum(rate(container_network_transmit_bytes_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "$resolution", - "legendFormat": "{{ pod }} - Transmitted", - "range": true, - "refId": "B" - } - ], - "title": "Network - Bandwidth by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 90 - }, - "id": 34, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(container_network_receive_packets_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "$resolution", - "legendFormat": "{{ pod }} - Received", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "- sum(rate(container_network_transmit_packets_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "$resolution", - "legendFormat": "{{ pod }} - Transmitted", - "range": true, - "refId": "B" - } - ], - "title": "Network - Packets rate by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 98 - }, - "id": 36, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(container_network_receive_packets_dropped_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "$resolution", - "legendFormat": "{{ pod }} - Received", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "- sum(rate(container_network_transmit_packets_dropped_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "$resolution", - "legendFormat": "{{ pod }} - Transmitted", - "range": true, - "refId": "B" - } - ], - "title": "Network - Packets Dropped by pod", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 98 - }, - "id": 37, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(container_network_receive_errors_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "$resolution", - "legendFormat": "{{ pod }} - Received", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "- sum(rate(container_network_transmit_errors_total{pod=~\"$pod\", cluster=~\"$cluster\"}[$__rate_interval])) by (pod)", - "interval": "$resolution", - "legendFormat": "{{ pod }} - Transmitted", - "range": true, - "refId": "B" - } - ], - "title": "Network - Errors by pod", - "type": "timeseries" - } - ], - "refresh": "30s", - "revision": 1, - "schemaVersion": 38, - "style": "dark", - "tags": ["Kubernetes", "Prometheus"], - "templating": { - "list": [ - { - "current": { - "selected": true, - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "current": { - "isNone": true, - "selected": false, - "text": "None", - "value": "" - }, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "definition": "label_values(kube_node_info,cluster)", - "hide": 0, - "includeAll": false, - "multi": false, - "name": "cluster", - "options": [], - "query": { - "qryType": 1, - "query": "label_values(kube_node_info,cluster)", - "refId": "PrometheusVariableQueryEditor-VariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" - }, - { - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "definition": "label_values(prometheus_build_info{cluster=\"$cluster\"}, pod)", - "hide": 0, - "includeAll": true, - "multi": false, - "name": "pod", - "options": [], - "query": { - "query": "label_values(prometheus_build_info{cluster=\"$cluster\"}, pod)", - "refId": "StandardVariableQuery" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": false, - "text": "30s", - "value": "30s" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "resolution", - "options": [ - { - "selected": false, - "text": "1s", - "value": "1s" - }, - { - "selected": false, - "text": "15s", - "value": "15s" - }, - { - "selected": true, - "text": "30s", - "value": "30s" - }, - { - "selected": false, - "text": "1m", - "value": "1m" - }, - { - "selected": false, - "text": "3m", - "value": "3m" - }, - { - "selected": false, - "text": "5m", - "value": "5m" - } - ], - "query": "1s, 15s, 30s, 1m, 3m, 5m", - "queryValue": "", - "skipUrlSync": false, - "type": "custom" - } - ] - }, - "time": { - "from": "now-15m", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "Prometheus", - "uid": "k8s_addons_prometheus", - "version": 3, - "weekStart": "" -} diff --git a/hosts/idols-aquamarine/grafana/dashboards/victoriametrics/victoria-metrics-single.json b/hosts/idols-aquamarine/grafana/dashboards/victoriametrics/victoria-metrics-single.json index 4c0d21cb..1a7208f8 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/victoriametrics/victoria-metrics-single.json +++ b/hosts/idols-aquamarine/grafana/dashboards/victoriametrics/victoria-metrics-single.json @@ -6469,12 +6469,12 @@ "type": "prometheus", "uid": "$ds" }, - "definition": "label_values(vm_app_version{version=~\"victoria-metrics-.*\"}, job)", + "definition": "label_values(vm_app_version{}, job)", "includeAll": false, "name": "job", "options": [], "query": { - "query": "label_values(vm_app_version{version=~\"victoria-metrics-.*\"}, job)", + "query": "label_values(vm_app_version{}, job)", "refId": "VictoriaMetrics-job-Variable-Query" }, "refresh": 1, diff --git a/hosts/idols-aquamarine/monitoring/victoriametrics.nix b/hosts/idols-aquamarine/monitoring/victoriametrics.nix index 10a55b1a..a56ac3b2 100644 --- a/hosts/idols-aquamarine/monitoring/victoriametrics.nix +++ b/hosts/idols-aquamarine/monitoring/victoriametrics.nix @@ -50,6 +50,8 @@ labels.type = "app"; labels.app = "dnsmasq"; labels.host = "suzi"; + labels.env = "homelab"; + labels.cluster = "homelab"; } ]; } @@ -64,6 +66,8 @@ labels.type = "app"; labels.app = "v2ray"; labels.host = "aquamarine"; + labels.env = "homelab"; + labels.cluster = "homelab"; } ]; } @@ -77,6 +81,8 @@ labels.type = "app"; labels.app = "postgresql"; labels.host = "aquamarine"; + labels.env = "homelab"; + labels.cluster = "homelab"; } ]; } @@ -90,6 +96,39 @@ labels.type = "app"; labels.app = "sftpgo"; labels.host = "aquamarine"; + labels.env = "homelab"; + labels.cluster = "homelab"; + } + ]; + } + { + job_name = "alertmanager-embedded-exporter"; + scrape_interval = "30s"; + metrics_path = "/metrics"; + static_configs = [ + { + targets = [ "localhost:9093" ]; + labels.type = "app"; + labels.app = "alertmanager"; + labels.host = "aquamarine"; + labels.env = "homelab"; + labels.cluster = "homelab"; + } + ]; + } + { + job_name = "victoriametrics-embedded-exporter"; + scrape_interval = "30s"; + metrics_path = "/metrics"; + static_configs = [ + { + # scrape vm itself + targets = [ "localhost:9090" ]; + labels.type = "app"; + labels.app = "victoriametrics"; + labels.host = "aquamarine"; + labels.env = "homelab"; + labels.cluster = "homelab"; } ]; } @@ -109,6 +148,8 @@ targets = [ "${addr.ipv4}:9100" ]; labels.type = "node"; labels.host = hostname; + labels.env = "homelab"; + labels.cluster = "homelab"; } ]; } From 2961a9591a20303d3f080a462a5609f6df90790b Mon Sep 17 00:00:00 2001 From: Ryan Yin Date: Tue, 23 Sep 2025 14:31:48 +0800 Subject: [PATCH 2/4] feat: add recoding rules --- hosts/idols-aquamarine/monitoring/README.md | 8 +- hosts/idols-aquamarine/monitoring/alert.nix | 1 + .../monitoring/alert_rules/README.md | 8 + .../monitoring/alert_rules/general.yml | 57 +++++++ .../monitoring/alert_rules/kubernetes.yml | 120 ++++++++++++++ .../monitoring/recoding_rules/README.md | 7 + .../monitoring/recoding_rules/k8s.yml | 149 ++++++++++++++++++ .../recoding_rules/node-exporter.yml | 128 +++++++++++++++ 8 files changed, 475 insertions(+), 3 deletions(-) create mode 100644 hosts/idols-aquamarine/monitoring/alert_rules/README.md create mode 100644 hosts/idols-aquamarine/monitoring/alert_rules/general.yml create mode 100644 hosts/idols-aquamarine/monitoring/alert_rules/kubernetes.yml create mode 100644 hosts/idols-aquamarine/monitoring/recoding_rules/README.md create mode 100644 hosts/idols-aquamarine/monitoring/recoding_rules/k8s.yml create mode 100644 hosts/idols-aquamarine/monitoring/recoding_rules/node-exporter.yml diff --git a/hosts/idols-aquamarine/monitoring/README.md b/hosts/idols-aquamarine/monitoring/README.md index e7c050c5..d4ed16d2 100644 --- a/hosts/idols-aquamarine/monitoring/README.md +++ b/hosts/idols-aquamarine/monitoring/README.md @@ -1,6 +1,8 @@ # Monitoring & Alerting -## Alert Rules +## Alert Rules & Recoding Rules -- [awesome-prometheus-alerts](https://github.com/samber/awesome-prometheus-alerts): Collection of - Prometheus alerting rules +- [awesome-prometheus-alerts](https://github.com/samber/awesome-prometheus-alerts) + - Collection of Prometheus alerting rules. +- [victoria-metrics-k8s-stack/files/rules](https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-k8s-stack/files/rules/generated) + - Alert Rules & Recoding Rules used by kube-prometheus-stack. diff --git a/hosts/idols-aquamarine/monitoring/alert.nix b/hosts/idols-aquamarine/monitoring/alert.nix index 3a5c3d36..0013ac1b 100644 --- a/hosts/idols-aquamarine/monitoring/alert.nix +++ b/hosts/idols-aquamarine/monitoring/alert.nix @@ -16,6 +16,7 @@ # Path to the files with alerting and/or recording rules. rule = [ "${./alert_rules}/*.yml" + "${./recoding_rules}/*.yml" ]; # https://docs.victoriametrics.com/victoriametrics/vmalert/#link-to-alert-source # Set this two args to generate the correct `.GeneratorURL` diff --git a/hosts/idols-aquamarine/monitoring/alert_rules/README.md b/hosts/idols-aquamarine/monitoring/alert_rules/README.md new file mode 100644 index 00000000..40e55cb1 --- /dev/null +++ b/hosts/idols-aquamarine/monitoring/alert_rules/README.md @@ -0,0 +1,8 @@ +# Alert Rules + +Alert rules are configurations that define conditions, scope, and actions for generating alerts from +monitored signals, such as metrics, logs, or activity. When an alert rule's defined conditions are +met for a specific resource within its scope, the system generates a triggered alert, which is the +actual instance of the condition being met. These rules specify the data to monitor, the trigger +threshold, and the resulting actions, like sending notifications to specific receivers or performing +automated tasks. diff --git a/hosts/idols-aquamarine/monitoring/alert_rules/general.yml b/hosts/idols-aquamarine/monitoring/alert_rules/general.yml new file mode 100644 index 00000000..61789d24 --- /dev/null +++ b/hosts/idols-aquamarine/monitoring/alert_rules/general.yml @@ -0,0 +1,57 @@ +groups: + - name: general.rules + rules: + - alert: TargetDown + annotations: + description: + '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in + {{ $labels.namespace }} namespace are down.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown + summary: One or more targets are unreachable. + expr: + 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up) BY (cluster, job, + namespace, service)) > 10 + for: 10m + labels: + severity: warning + - alert: Watchdog + annotations: + description: 'This is an alert meant to ensure that the entire alerting pipeline is + functional. + + This alert is always firing, therefore it should always be firing in Alertmanager + + and always fire against a receiver. There are integrations with various notification + + mechanisms that send a notification when this alert is not firing. For example the + + "DeadMansSnitch" integration in PagerDuty.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog + summary: + An alert that should always be firing to certify that Alertmanager is working properly. + expr: vector(1) + labels: + severity: none + - alert: InfoInhibitor + annotations: + description: 'This is an alert that is used to inhibit info alerts. + + By themselves, the info-level alerts are sometimes very noisy, but they are relevant + when combined with + + other alerts. + + This alert fires whenever there''s a severity="info" alert, and stops firing when + another alert with a + + severity of ''warning'' or ''critical'' starts firing on the same namespace. + + This alert should be routed to a null receiver and configured to inhibit alerts with + severity="info".' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor + summary: Info-level alert inhibition. + expr: + ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", + severity =~ "warning|critical", alertstate="firing"} == 1 + labels: + severity: none diff --git a/hosts/idols-aquamarine/monitoring/alert_rules/kubernetes.yml b/hosts/idols-aquamarine/monitoring/alert_rules/kubernetes.yml new file mode 100644 index 00000000..327171e6 --- /dev/null +++ b/hosts/idols-aquamarine/monitoring/alert_rules/kubernetes.yml @@ -0,0 +1,120 @@ +groups: + - name: kubernetes-resources + rules: + - alert: KubeCPUOvercommit + annotations: + description: + Cluster {{ $labels.cluster }} has overcommitted CPU resource requests for Pods by {{ + $value }} CPU shares and cannot tolerate node failure. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit + summary: Cluster has overcommitted CPU resource requests. + expr: |- + sum(namespace_cpu:kube_pod_container_resource_requests:sum{job="kube-state-metrics",}) by (cluster) - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0 + and + (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0 + for: 10m + labels: + severity: warning + - alert: KubeMemoryOvercommit + annotations: + description: + Cluster {{ $labels.cluster }} has overcommitted memory resource requests for Pods by {{ + $value | humanize }} bytes and cannot tolerate node failure. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit + summary: Cluster has overcommitted memory resource requests. + expr: |- + sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0 + and + (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0 + for: 10m + labels: + severity: warning + - alert: KubeCPUQuotaOvercommit + annotations: + description: + Cluster {{ $labels.cluster }} has overcommitted CPU resource requests for Namespaces. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit + summary: Cluster has overcommitted CPU resource requests. + expr: |- + sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(cpu|requests.cpu)"})) by (cluster) + / + sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) by (cluster) + > 1.5 + for: 5m + labels: + severity: warning + - alert: KubeMemoryQuotaOvercommit + annotations: + description: + Cluster {{ $labels.cluster }} has overcommitted memory resource requests for + Namespaces. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit + summary: Cluster has overcommitted memory resource requests. + expr: |- + sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(memory|requests.memory)"})) by (cluster) + / + sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) + > 1.5 + for: 5m + labels: + severity: warning + - alert: KubeQuotaAlmostFull + annotations: + description: + Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ + $labels.resource }} quota. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull + summary: Namespace quota is going to be full. + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 0.9 < 1 + for: 15m + labels: + severity: info + - alert: KubeQuotaFullyUsed + annotations: + description: + Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ + $labels.resource }} quota. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused + summary: Namespace quota is fully used. + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + == 1 + for: 15m + labels: + severity: info + - alert: KubeQuotaExceeded + annotations: + description: + Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ + $labels.resource }} quota. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded + summary: Namespace quota has exceeded the limits. + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 1 + for: 15m + labels: + severity: warning + - alert: CPUThrottlingHigh + annotations: + description: + "{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace + }} for container {{ $labels.container }} in pod {{ $labels.pod }}." + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh + summary: Processes experience elevated CPU throttling. + expr: |- + sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace) + / + sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace) + > ( 25 / 100 ) + for: 15m + labels: + severity: info diff --git a/hosts/idols-aquamarine/monitoring/recoding_rules/README.md b/hosts/idols-aquamarine/monitoring/recoding_rules/README.md new file mode 100644 index 00000000..9af3baf9 --- /dev/null +++ b/hosts/idols-aquamarine/monitoring/recoding_rules/README.md @@ -0,0 +1,7 @@ +# Recording Rules + +Recording rules are pre-defined queries, often complex or computationally expensive, that are +evaluated periodically to create new, pre-computed time series metrics. + +These rules store the results in a metric backend, significantly speeding up queries for dashboards +and other alerts, and reducing system load by avoiding the re-computation of data. diff --git a/hosts/idols-aquamarine/monitoring/recoding_rules/k8s.yml b/hosts/idols-aquamarine/monitoring/recoding_rules/k8s.yml new file mode 100644 index 00000000..ca3d4a4a --- /dev/null +++ b/hosts/idols-aquamarine/monitoring/recoding_rules/k8s.yml @@ -0,0 +1,149 @@ +groups: + - name: k8s.rules + rules: + - expr: |- + sum by (cluster, namespace, pod, container) ( + irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) + ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( + 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate + - expr: |- + container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1, + max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_working_set_bytes + - expr: |- + container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1, + max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_rss + - expr: |- + container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1, + max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_cache + - expr: |- + container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1, + max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_swap + - expr: |- + kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} + ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_requests:sum + - expr: |- + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} + ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_requests:sum + - expr: |- + kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} + ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_limits:sum + - expr: |- + kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} + ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_limits:sum + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) ( + 1, max by (replicaset, namespace, owner_name) ( + kube_replicaset_owner{job="kube-state-metrics"} + ) + ), + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: deployment + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: daemonset + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: statefulset + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="Job"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: job + record: namespace_workload_pod:kube_pod_owner:relabel diff --git a/hosts/idols-aquamarine/monitoring/recoding_rules/node-exporter.yml b/hosts/idols-aquamarine/monitoring/recoding_rules/node-exporter.yml new file mode 100644 index 00000000..86d2933b --- /dev/null +++ b/hosts/idols-aquamarine/monitoring/recoding_rules/node-exporter.yml @@ -0,0 +1,128 @@ +groups: + - name: kube-prometheus-node-recording.rules + rules: + - expr: + sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY + (instance) + record: instance:node_cpu:rate:sum + - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) + record: instance:node_network_receive_bytes:rate:sum + - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) + record: instance:node_network_transmit_bytes:rate:sum + - expr: + sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT + (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, + cpu)) BY (instance) + record: instance:node_cpu:ratio + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) + record: cluster:node_cpu:sum_rate5m + - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) + record: cluster:node_cpu:ratio + + - name: node-exporter.rules + rules: + - expr: |- + count without (cpu, mode) ( + node_cpu_seconds_total{job="node-exporter",mode="idle"} + ) + record: instance:node_num_cpu:sum + - expr: |- + 1 - avg without (cpu) ( + sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m])) + ) + record: instance:node_cpu_utilisation:rate5m + - expr: |- + ( + node_load1{job="node-exporter"} + / + instance:node_num_cpu:sum{job="node-exporter"} + ) + record: instance:node_load1_per_cpu:ratio + - expr: |- + 1 - ( + ( + node_memory_MemAvailable_bytes{job="node-exporter"} + or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + + node_memory_Cached_bytes{job="node-exporter"} + + + node_memory_MemFree_bytes{job="node-exporter"} + + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) + / + node_memory_MemTotal_bytes{job="node-exporter"} + ) + record: instance:node_memory_utilisation:ratio + - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) + record: instance:node_vmstat_pgmajfault:rate5m + - expr: + rate(node_disk_io_time_seconds_total{job="node-exporter", + device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_seconds:rate5m + - expr: + rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", + device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_weighted_seconds:rate5m + - expr: |- + sum without (device) ( + rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_bytes_excluding_lo:rate5m + - expr: |- + sum without (device) ( + rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_bytes_excluding_lo:rate5m + - expr: |- + sum without (device) ( + rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_drop_excluding_lo:rate5m + - expr: |- + sum without (device) ( + rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_drop_excluding_lo:rate5m + + - name: node.rules + rules: + - expr: |- + topk by(cluster, namespace, pod) (1, + max by (cluster, node, namespace, pod) ( + label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)") + )) + record: "node_namespace_pod:kube_pod_info:" + - expr: |- + count by (cluster, node) ( + node_cpu_seconds_total{mode="idle",job="node-exporter"} + * on (namespace, pod) group_left(node) + topk by(namespace, pod) (1, node_namespace_pod:kube_pod_info:) + ) + record: node:node_num_cpu:sum + - expr: |- + sum( + node_memory_MemAvailable_bytes{job="node-exporter"} or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + node_memory_Cached_bytes{job="node-exporter"} + + node_memory_MemFree_bytes{job="node-exporter"} + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) by (cluster) + record: :node_memory_MemAvailable_bytes:sum + - expr: |- + avg by (cluster, node) ( + sum without (mode) ( + rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",job="node-exporter"}[5m]) + ) + ) + record: node:node_cpu_utilization:ratio_rate5m + - expr: |- + avg by (cluster) ( + node:node_cpu_utilization:ratio_rate5m + ) + record: cluster:node_cpu:ratio_rate5m From 902f558995c3586e6bcf024a0b83508549990026 Mon Sep 17 00:00:00 2001 From: Ryan Yin Date: Fri, 26 Sep 2025 18:44:54 +0800 Subject: [PATCH 3/4] fix: grafana - add or update uid for all dashboards --- .../databases/postgresql-database.json | 125 +- .../homelab/alertmanager-9578_rev4.json | 2 +- .../node-exporter-full-1860_rev33.json | 2 +- .../istio/istio-extension-dashboard.json | 12 +- .../istio/istio-mesh-dashboard.json | 82 +- .../istio/istio-performance-dashboard.json | 24 +- .../istio/istio-service-dashboard.json | 46 +- .../istio/istio-workload-dashboard.json | 38 +- .../dashboards/istio/pilot-dashboard.gen.json | 1659 ++++++++--------- .../istio/ztunnel-dashboard.gen.json | 864 +++++---- .../kubevirt/kubevirt-control-plane.json | 35 +- .../dashboards/loki-mixin/loki-chunks.json | 2 +- .../dashboards/loki-mixin/loki-deletion.json | 2 +- .../dashboards/loki-mixin/loki-logs.json | 2 +- .../loki-mixin/loki-operational.json | 2 +- .../dashboards/loki-mixin/loki-retention.json | 2 +- .../victoria-metrics-single.json | 2 +- 17 files changed, 1311 insertions(+), 1590 deletions(-) diff --git a/hosts/idols-aquamarine/grafana/dashboards/databases/postgresql-database.json b/hosts/idols-aquamarine/grafana/dashboards/databases/postgresql-database.json index 6724373d..8f8193d8 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/databases/postgresql-database.json +++ b/hosts/idols-aquamarine/grafana/dashboards/databases/postgresql-database.json @@ -73,11 +73,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": true, - "colors": [ - "#299c46", - "#7eb26d", - "#d44a3a" - ], + "colors": ["#299c46", "#7eb26d", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "format": "none", "gauge": { @@ -156,11 +152,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "description": "start time of the process", "format": "dateTimeFromNow", @@ -239,11 +231,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], + "colors": ["rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)"], "datasource": "${DS_PROMETHEUS}", "format": "decbytes", "gauge": { @@ -322,11 +310,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], + "colors": ["rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)"], "datasource": "${DS_PROMETHEUS}", "format": "decbytes", "gauge": { @@ -405,11 +389,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], + "colors": ["rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)"], "datasource": "${DS_PROMETHEUS}", "format": "decbytes", "gauge": { @@ -488,11 +468,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "format": "none", "gauge": { @@ -864,11 +840,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "format": "bytes", "gauge": { @@ -945,11 +917,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "format": "bytes", "gauge": { @@ -1026,11 +994,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "format": "bytes", "gauge": { @@ -1107,11 +1071,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "format": "bytes", "gauge": { @@ -1189,11 +1149,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "decimals": 1, "format": "bytes", @@ -1271,11 +1227,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "format": "none", "gauge": { @@ -1352,11 +1304,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "format": "none", "gauge": { @@ -1433,11 +1381,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "format": "none", "gauge": { @@ -1514,11 +1458,7 @@ "cacheTimeout": null, "colorBackground": false, "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], + "colors": ["#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a"], "datasource": "${DS_PROMETHEUS}", "format": "none", "gauge": { @@ -2944,11 +2884,7 @@ "refresh": "10s", "schemaVersion": 19, "style": "dark", - "tags": [ - "postgres", - "db", - "stats" - ], + "tags": ["postgres", "db", "stats"], "templating": { "list": [ { @@ -3136,32 +3072,11 @@ "to": "now" }, "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] + "refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"], + "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] }, "timezone": "", "title": "PostgreSQL Database", - "uid": "000000039", + "uid": "postgresql-database", "version": 1 -} \ No newline at end of file +} diff --git a/hosts/idols-aquamarine/grafana/dashboards/homelab/alertmanager-9578_rev4.json b/hosts/idols-aquamarine/grafana/dashboards/homelab/alertmanager-9578_rev4.json index 3176b7ac..6d4f5a4e 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/homelab/alertmanager-9578_rev4.json +++ b/hosts/idols-aquamarine/grafana/dashboards/homelab/alertmanager-9578_rev4.json @@ -11139,6 +11139,6 @@ }, "timezone": "", "title": "Alertmanager", - "uid": "eea-9_sik", + "uid": "alertmanager", "version": 27 } diff --git a/hosts/idols-aquamarine/grafana/dashboards/homelab/node-exporter-full-1860_rev33.json b/hosts/idols-aquamarine/grafana/dashboards/homelab/node-exporter-full-1860_rev33.json index bf25174c..95d944f6 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/homelab/node-exporter-full-1860_rev33.json +++ b/hosts/idols-aquamarine/grafana/dashboards/homelab/node-exporter-full-1860_rev33.json @@ -23262,7 +23262,7 @@ }, "timezone": "browser", "title": "Node Exporter Full", - "uid": "rYdddlPWk", + "uid": "node-exporter-full", "version": 87, "weekStart": "" } diff --git a/hosts/idols-aquamarine/grafana/dashboards/istio/istio-extension-dashboard.json b/hosts/idols-aquamarine/grafana/dashboards/istio/istio-extension-dashboard.json index 7b0bc27a..8790872d 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/istio/istio-extension-dashboard.json +++ b/hosts/idols-aquamarine/grafana/dashboards/istio/istio-extension-dashboard.json @@ -853,19 +853,11 @@ "to": "now" }, "timepicker": { - "refresh_intervals": [ - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ] + "refresh_intervals": ["30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"] }, "timezone": "", "title": "Istio Wasm Extension Dashboard", + "uid": "istio-wasm-extension", "version": 1, "weekStart": "" } diff --git a/hosts/idols-aquamarine/grafana/dashboards/istio/istio-mesh-dashboard.json b/hosts/idols-aquamarine/grafana/dashboards/istio/istio-mesh-dashboard.json index 76315214..47208951 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/istio/istio-mesh-dashboard.json +++ b/hosts/idols-aquamarine/grafana/dashboards/istio/istio-mesh-dashboard.json @@ -114,9 +114,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "mean" - ], + "calcs": ["mean"], "fields": "", "values": false }, @@ -196,9 +194,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "mean" - ], + "calcs": ["mean"], "fields": "", "values": false }, @@ -275,9 +271,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "mean" - ], + "calcs": ["mean"], "fields": "", "values": false }, @@ -354,9 +348,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "mean" - ], + "calcs": ["mean"], "fields": "", "values": false }, @@ -433,9 +425,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -511,9 +501,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -589,9 +577,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -667,9 +653,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -745,9 +729,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -823,9 +805,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -901,9 +881,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -979,9 +957,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -1329,9 +1305,7 @@ "footer": { "countRows": false, "fields": "", - "reducer": [ - "sum" - ], + "reducer": ["sum"], "show": false }, "showHeader": true @@ -1466,9 +1440,7 @@ "cellHeight": "sm", "footer": { "show": false, - "reducer": [ - "sum" - ], + "reducer": ["sum"], "countRows": false, "fields": "" } @@ -1832,30 +1804,12 @@ "to": "now" }, "timepicker": { - "refresh_intervals": [ - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] + "refresh_intervals": ["30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"], + "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] }, "timezone": "browser", "title": "Istio Mesh Dashboard", + "uid": "istio-mesh", "version": 1, "weekStart": "" -} \ No newline at end of file +} diff --git a/hosts/idols-aquamarine/grafana/dashboards/istio/istio-performance-dashboard.json b/hosts/idols-aquamarine/grafana/dashboards/istio/istio-performance-dashboard.json index 02b3485c..361ec244 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/istio/istio-performance-dashboard.json +++ b/hosts/idols-aquamarine/grafana/dashboards/istio/istio-performance-dashboard.json @@ -1574,30 +1574,12 @@ "to": "now" }, "timepicker": { - "refresh_intervals": [ - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] + "refresh_intervals": ["30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"], + "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] }, "timezone": "", "title": "Istio Performance Dashboard", + "uid": "istio-performance", "version": 1, "weekStart": "" } diff --git a/hosts/idols-aquamarine/grafana/dashboards/istio/istio-service-dashboard.json b/hosts/idols-aquamarine/grafana/dashboards/istio/istio-service-dashboard.json index f82615c3..49f03008 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/istio/istio-service-dashboard.json +++ b/hosts/idols-aquamarine/grafana/dashboards/istio/istio-service-dashboard.json @@ -123,9 +123,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -197,9 +195,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -398,9 +394,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "mean" - ], + "calcs": ["mean"], "fields": "", "values": false }, @@ -478,9 +472,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -552,9 +544,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -753,9 +743,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "mean" - ], + "calcs": ["mean"], "fields": "", "values": false }, @@ -3368,28 +3356,12 @@ "to": "now" }, "timepicker": { - "refresh_intervals": [ - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] + "refresh_intervals": ["5m", "15m", "30m", "1h", "2h", "1d"], + "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] }, "timezone": "", "title": "Istio Service Dashboard", + "uid": "istio-service", "version": 1, "weekStart": "" } diff --git a/hosts/idols-aquamarine/grafana/dashboards/istio/istio-workload-dashboard.json b/hosts/idols-aquamarine/grafana/dashboards/istio/istio-workload-dashboard.json index acd3587e..fa1474cf 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/istio/istio-workload-dashboard.json +++ b/hosts/idols-aquamarine/grafana/dashboards/istio/istio-workload-dashboard.json @@ -123,9 +123,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -206,9 +204,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "mean" - ], + "calcs": ["mean"], "fields": "", "values": false }, @@ -405,9 +401,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "mean" - ], + "calcs": ["mean"], "fields": "", "values": false }, @@ -485,9 +479,7 @@ "justifyMode": "auto", "orientation": "horizontal", "reduceOptions": { - "calcs": [ - "mean" - ], + "calcs": ["mean"], "fields": "", "values": false }, @@ -3040,28 +3032,12 @@ "to": "now" }, "timepicker": { - "refresh_intervals": [ - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] + "refresh_intervals": ["5m", "15m", "30m", "1h", "2h", "1d"], + "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] }, "timezone": "", "title": "Istio Workload Dashboard", + "uid": "istio-workload", "version": 1, "weekStart": "" } diff --git a/hosts/idols-aquamarine/grafana/dashboards/istio/pilot-dashboard.gen.json b/hosts/idols-aquamarine/grafana/dashboards/istio/pilot-dashboard.gen.json index 58eb733d..15ed9842 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/istio/pilot-dashboard.gen.json +++ b/hosts/idols-aquamarine/grafana/dashboards/istio/pilot-dashboard.gen.json @@ -1,861 +1,840 @@ { - "graphTooltip": 1, - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 1, - "panels": [ ], - "title": "Deployed Versions", - "type": "row" + "graphTooltip": 1, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Version number of each running instance", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 5, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 2, - "interval": "5s", - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum(istio_build{component=\"pilot\"}) by (tag)", - "legendFormat": "Version ({{tag}})" - } - ], - "title": "Pilot Versions", - "type": "timeseries" + "id": 1, + "panels": [], + "title": "Deployed Versions", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 3, - "panels": [ ], - "title": "Resource Usage", - "type": "row" + "description": "Version number of each running instance", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Memory usage of each running instance", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - }, - "unit": "bytes" - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 0, - "y": 2 - }, - "id": 4, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n container_memory_working_set_bytes{container=\"discovery\", pod=~\"istiod-.*\"}\n)", - "legendFormat": "Container ({{pod}})" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n go_memstats_stack_inuse_bytes{app=\"istiod\"}\n)", - "legendFormat": "Stack ({{pod}})" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n go_memstats_heap_inuse_bytes{app=\"istiod\"}\n)", - "legendFormat": "Heap (In Use) ({{pod}})" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n go_memstats_heap_alloc_bytes{app=\"istiod\"}\n)", - "legendFormat": "Heap (Allocated) ({{pod}})" - } - ], - "title": "Memory Usage", - "type": "timeseries" + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 1 }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Details about memory allocations", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - }, - "unit": "Bps" + "id": 2, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(istio_build{component=\"pilot\"}) by (tag)", + "legendFormat": "Version ({{tag}})" + } + ], + "title": "Pilot Versions", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 3, + "panels": [], + "title": "Resource Usage", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Memory usage of each running instance", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 0, + "y": 2 + }, + "id": 4, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n container_memory_working_set_bytes{container=\"discovery\", pod=~\"istiod-.*\"}\n)", + "legendFormat": "Container ({{pod}})" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n go_memstats_stack_inuse_bytes{app=\"istiod\"}\n)", + "legendFormat": "Stack ({{pod}})" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n go_memstats_heap_inuse_bytes{app=\"istiod\"}\n)", + "legendFormat": "Heap (In Use) ({{pod}})" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n go_memstats_heap_alloc_bytes{app=\"istiod\"}\n)", + "legendFormat": "Heap (Allocated) ({{pod}})" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Details about memory allocations", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" }, - "overrides": [ - { - "matcher": { - "id": "byFrameRefID", - "options": "B" - }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "unit", - "value": "c/s" - } - ] - } + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "c/s" + } ] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 6, - "y": 2 - }, - "id": 5, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" + } + ] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 6, + "y": 2 + }, + "id": 5, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n rate(\n go_memstats_alloc_bytes_total{app=\"istiod\"}\n [$__rate_interval])\n)", + "legendFormat": "Bytes ({{pod}})" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n rate(\n go_memstats_mallocs_total{app=\"istiod\"}\n [$__rate_interval])\n)", + "legendFormat": "Objects ({{pod}})" + } + ], + "title": "Memory Allocations", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "CPU usage of each running instance", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 12, + "y": 2 + }, + "id": 6, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n irate(\n container_cpu_usage_seconds_total{container=\"discovery\", pod=~\"istiod-.*\"}\n [$__rate_interval])\n)", + "legendFormat": "Container ({{pod}})" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Goroutine count for each running instance", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 2 + }, + "id": 7, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n go_goroutines{app=\"istiod\"}\n)", + "legendFormat": "Goroutines ({{pod}})" + } + ], + "title": "Goroutines", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 8, + "panels": [], + "title": "Push Information", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "showPoints": "never", + "stacking": { + "mode": "normal" } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n rate(\n go_memstats_alloc_bytes_total{app=\"istiod\"}\n [$__rate_interval])\n)", - "legendFormat": "Bytes ({{pod}})" + }, + "unit": "ops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cds" }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n rate(\n go_memstats_mallocs_total{app=\"istiod\"}\n [$__rate_interval])\n)", - "legendFormat": "Objects ({{pod}})" - } - ], - "title": "Memory Allocations", - "type": "timeseries" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "CPU usage of each running instance", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 2 - }, - "id": 6, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n irate(\n container_cpu_usage_seconds_total{container=\"discovery\", pod=~\"istiod-.*\"}\n [$__rate_interval])\n)", - "legendFormat": "Container ({{pod}})" - } - ], - "title": "CPU Usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Goroutine count for each running instance", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 2 - }, - "id": 7, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n go_goroutines{app=\"istiod\"}\n)", - "legendFormat": "Goroutines ({{pod}})" - } - ], - "title": "Goroutines", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "id": 8, - "panels": [ ], - "title": "Push Information", - "type": "row" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "bars", - "fillOpacity": 100, - "gradientMode": "none", - "showPoints": "never", - "stacking": { - "mode": "normal" - } - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "cds" - }, - "properties": [ - { - "id": "displayName", - "value": "Clusters" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "eds" - }, - "properties": [ - { - "id": "displayName", - "value": "Endpoints" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "lds" - }, - "properties": [ - { - "id": "displayName", - "value": "Listeners" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "rds" - }, - "properties": [ - { - "id": "displayName", - "value": "Routes" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "istio.io/debug" - }, - "properties": [ - { - "id": "displayName", - "value": "Debug" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "wads" - }, - "properties": [ - { - "id": "displayName", - "value": "Authorization" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "wds" - }, - "properties": [ - { - "id": "displayName", - "value": "Workloads" - } - ] - } + "properties": [ + { + "id": "displayName", + "value": "Clusters" + } ] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 4 - }, - "id": 9, - "interval": "15s", - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (type) (\n irate(\n pilot_xds_pushes{}\n [$__rate_interval])\n)", - "legendFormat": "{{type}}" - } - ], - "title": "XDS Pushes", - "type": "timeseries" + }, + { + "matcher": { + "id": "byName", + "options": "eds" + }, + "properties": [ + { + "id": "displayName", + "value": "Endpoints" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "lds" + }, + "properties": [ + { + "id": "displayName", + "value": "Listeners" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "rds" + }, + "properties": [ + { + "id": "displayName", + "value": "Routes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "istio.io/debug" + }, + "properties": [ + { + "id": "displayName", + "value": "Debug" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "wads" + }, + "properties": [ + { + "id": "displayName", + "value": "Authorization" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "wds" + }, + "properties": [ + { + "id": "displayName", + "value": "Workloads" + } + ] + } + ] }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Size of each xDS push.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "id": 10, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (type, event) (\n rate(\n pilot_k8s_reg_events{}\n [$__rate_interval])\n)", - "legendFormat": "{{event}} {{type}}" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (type, event) (\n rate(\n pilot_k8s_cfg_events{}\n [$__rate_interval])\n)", - "legendFormat": "{{event}} {{type}}" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (type) (\n rate(\n pilot_push_triggers{}\n [$__rate_interval])\n)", - "legendFormat": "Push {{type}}" - } - ], - "title": "Events", - "type": "timeseries" + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Total number of XDS connections\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 4 - }, - "id": 11, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum(envoy_cluster_upstream_cx_active{cluster_name=\"xds-grpc\"})", - "legendFormat": "Connections (client reported)" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum(pilot_xds{})", - "legendFormat": "Connections (server reported)" - } - ], - "title": "Connections", - "type": "timeseries" + "id": 9, + "interval": "15s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list" + } }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Number of push errors. Many of these are at least potentional fatal and should be explored in-depth via Istiod logs.\nNote: metrics here do not use rate() to avoid missing transition from \"No series\"; series are not reported if there are no errors at all.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 14 - }, - "id": 12, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (type) (\n pilot_total_xds_rejects{}\n)", - "legendFormat": "Rejected Config ({{type}})" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "pilot_total_xds_internal_errors{}", - "legendFormat": "Internal Errors" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "pilot_xds_push_context_errors{}", - "legendFormat": "Push Context Errors" - } - ], - "title": "Push Errors", - "type": "timeseries" + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (type) (\n irate(\n pilot_xds_pushes{}\n [$__rate_interval])\n)", + "legendFormat": "{{type}}" + } + ], + "title": "XDS Pushes", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Count of active and pending proxies managed by each instance.\nPending is expected to converge to zero.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 14 - }, - "id": 13, - "interval": "1m", - "options": { - "calculation": { - "xBuckets": { - "mode": "size", - "value": "1min" - } - }, - "cellGap": 0, - "color": { - "mode": "scheme", - "scheme": "Spectral", - "steps": 128 - }, - "yAxis": { - "decimals": 0, - "unit": "s" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum(rate(pilot_xds_push_time_bucket{}[1m])) by (le)", - "format": "heatmap", - "legendFormat": "{{le}}" - } - ], - "title": "Push Time", - "type": "heatmap" + "description": "Size of each xDS push.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Size of each xDS push.\n", - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 14 - }, - "id": 14, - "interval": "1m", - "options": { - "calculation": { - "xBuckets": { - "mode": "size", - "value": "1min" - } - }, - "cellGap": 0, - "color": { - "mode": "scheme", - "scheme": "Spectral", - "steps": 128 - }, - "yAxis": { - "decimals": 0, - "unit": "bytes" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum(rate(pilot_xds_config_size_bytes_bucket{}[1m])) by (le)", - "format": "heatmap", - "legendFormat": "{{le}}" - } - ], - "title": "Push Size", - "type": "heatmap" + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 100 - }, - "id": 15, - "panels": [ ], - "title": "Webhooks", - "type": "row" + "id": 10, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Rate of XDS push operations, by type. This is incremented on a per-proxy basis.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 101 - }, - "id": 16, - "interval": "5s", - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum(\n rate(\n galley_validation_passed{}\n [$__rate_interval])\n)", - "legendFormat": "Success" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum(\n rate(\n galley_validation_passed{}\n [$__rate_interval])\n)", - "legendFormat": "Failure" - } - ], - "title": "Validation", - "type": "timeseries" + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (type, event) (\n rate(\n pilot_k8s_reg_events{}\n [$__rate_interval])\n)", + "legendFormat": "{{event}} {{type}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (type, event) (\n rate(\n pilot_k8s_cfg_events{}\n [$__rate_interval])\n)", + "legendFormat": "{{event}} {{type}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (type) (\n rate(\n pilot_push_triggers{}\n [$__rate_interval])\n)", + "legendFormat": "Push {{type}}" + } + ], + "title": "Events", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" }, + "description": "Total number of XDS connections\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 11, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(envoy_cluster_upstream_cx_active{cluster_name=\"xds-grpc\"})", + "legendFormat": "Connections (client reported)" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(pilot_xds{})", + "legendFormat": "Connections (server reported)" + } + ], + "title": "Connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Number of push errors. Many of these are at least potential fatal and should be explored in-depth via Istiod logs.\nNote: metrics here do not use rate() to avoid missing transition from \"No series\"; series are not reported if there are no errors at all.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 14 + }, + "id": 12, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (type) (\n pilot_total_xds_rejects{}\n)", + "legendFormat": "Rejected Config ({{type}})" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "pilot_total_xds_internal_errors{}", + "legendFormat": "Internal Errors" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "pilot_xds_push_context_errors{}", + "legendFormat": "Push Context Errors" + } + ], + "title": "Push Errors", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Count of active and pending proxies managed by each instance.\nPending is expected to converge to zero.\n", + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 14 + }, + "id": 13, + "interval": "1m", + "options": { + "calculation": { + "xBuckets": { + "mode": "size", + "value": "1min" + } + }, + "cellGap": 0, + "color": { + "mode": "scheme", + "scheme": "Spectral", + "steps": 128 + }, + "yAxis": { + "decimals": 0, + "unit": "s" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(rate(pilot_xds_push_time_bucket{}[1m])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}" + } + ], + "title": "Push Time", + "type": "heatmap" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Size of each xDS push.\n", + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 14 + }, + "id": 14, + "interval": "1m", + "options": { + "calculation": { + "xBuckets": { + "mode": "size", + "value": "1min" + } + }, + "cellGap": 0, + "color": { + "mode": "scheme", + "scheme": "Spectral", + "steps": 128 + }, + "yAxis": { + "decimals": 0, + "unit": "bytes" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(rate(pilot_xds_config_size_bytes_bucket{}[1m])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}" + } + ], + "title": "Push Size", + "type": "heatmap" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 100 + }, + "id": 15, + "panels": [], + "title": "Webhooks", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Rate of XDS push operations, by type. This is incremented on a per-proxy basis.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 101 + }, + "id": 16, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n rate(\n galley_validation_passed{}\n [$__rate_interval])\n)", + "legendFormat": "Success" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n rate(\n galley_validation_passed{}\n [$__rate_interval])\n)", + "legendFormat": "Failure" + } + ], + "title": "Validation", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Size of each xDS push.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 101 + }, + "id": 17, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n rate(\n sidecar_injection_success_total{}\n [$__rate_interval])\n)", + "legendFormat": "Success" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n rate(\n sidecar_injection_failure_total{}\n [$__rate_interval])\n)", + "legendFormat": "Failure" + } + ], + "title": "Injection", + "type": "timeseries" + } + ], + "refresh": "15s", + "schemaVersion": 39, + "templating": { + "list": [ { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Size of each xDS push.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 101 - }, - "id": 17, - "interval": "5s", - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum(\n rate(\n sidecar_injection_success_total{}\n [$__rate_interval])\n)", - "legendFormat": "Success" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum(\n rate(\n sidecar_injection_failure_total{}\n [$__rate_interval])\n)", - "legendFormat": "Failure" - } - ], - "title": "Injection", - "type": "timeseries" + "name": "datasource", + "query": "prometheus", + "type": "datasource" } - ], - "refresh": "15s", - "schemaVersion": 39, - "templating": { - "list": [ - { - "name": "datasource", - "query": "prometheus", - "type": "datasource" - } - ] - }, - "time": { - "from": "now-30m", - "to": "now" - }, - "timezone": "utc", - "title": "Istio Control Plane Dashboard", - "uid": "1813f692a8e4ac77155348d4c7d2fba8" + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timezone": "utc", + "title": "Istio Control Plane Dashboard", + "uid": "istio-control-plane" } diff --git a/hosts/idols-aquamarine/grafana/dashboards/istio/ztunnel-dashboard.gen.json b/hosts/idols-aquamarine/grafana/dashboards/istio/ztunnel-dashboard.gen.json index 533514ed..dc31f3aa 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/istio/ztunnel-dashboard.gen.json +++ b/hosts/idols-aquamarine/grafana/dashboards/istio/ztunnel-dashboard.gen.json @@ -1,458 +1,434 @@ { - "graphTooltip": 1, - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 1, - "panels": [ ], - "title": "Process", - "type": "row" + "graphTooltip": 1, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Version number of each running instance", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 1 - }, - "id": 2, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum(istio_build{component=\"ztunnel\"}) by (tag)", - "legendFormat": "Version ({{tag}})" - } - ], - "title": "Ztunnel Versions", - "type": "timeseries" + "id": 1, + "panels": [], + "title": "Process", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Memory usage of each running instance", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - }, - "unit": "bytes" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 1 - }, - "id": 3, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n container_memory_working_set_bytes{container=\"istio-proxy\", pod=~\"ztunnel-.*\"}\n)", - "legendFormat": "Container ({{pod}})" - } - ], - "title": "Memory Usage", - "type": "timeseries" + "description": "Version number of each running instance", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "CPU usage of each running instance", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 1 - }, - "id": 4, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n irate(\n container_cpu_usage_seconds_total{container=\"istio-proxy\", pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", - "legendFormat": "Container ({{pod}})" - } - ], - "title": "CPU Usage", - "type": "timeseries" + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 1 }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 5, - "panels": [ ], - "title": "Network", - "type": "row" + "id": 2, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Connections opened and closed per instance", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - }, - "unit": "cps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 10 - }, - "id": 6, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n rate(\n istio_tcp_connections_opened_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", - "legendFormat": "Opened ({{pod}})" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "-sum by (pod) (\n rate(\n istio_tcp_connections_closed_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", - "legendFormat": "Closed ({{pod}})" - } - ], - "title": "Connections", - "type": "timeseries" + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(istio_build{component=\"ztunnel\"}) by (tag)", + "legendFormat": "Version ({{tag}})" + } + ], + "title": "Ztunnel Versions", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Bytes sent and received per instance", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - }, - "unit": "Bps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 10 - }, - "id": 7, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n rate(\n istio_tcp_sent_bytes_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", - "legendFormat": "Sent ({{pod}})" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n rate(\n istio_tcp_received_bytes_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", - "legendFormat": "Received ({{pod}})" - } - ], - "title": "Bytes Transmitted", - "type": "timeseries" + "description": "Memory usage of each running instance", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + }, + "unit": "bytes" + } }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "DNS queries received per instance", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - }, - "unit": "qps" - } - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 10 - }, - "id": 8, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n rate(\n istio_dns_requests_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", - "legendFormat": "Request ({{pod}})" - } - ], - "title": "DNS Request", - "type": "timeseries" + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 1 }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 9, - "panels": [ ], - "title": "Operations", - "type": "row" + "id": 3, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Count of XDS connection terminations.\nThis will typically spike every 30min for each instance.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 19 - }, - "id": 10, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (\n rate(\n istio_xds_connection_terminations_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", - "legendFormat": "XDS Connection Terminations ({{pod}})" - } - ], - "title": "XDS", - "type": "timeseries" + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n container_memory_working_set_bytes{container=\"istio-proxy\", pod=~\"ztunnel-.*\"}\n)", + "legendFormat": "Container ({{pod}})" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" }, + "description": "CPU usage of each running instance", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 4, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n irate(\n container_cpu_usage_seconds_total{container=\"istio-proxy\", pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", + "legendFormat": "Container ({{pod}})" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 5, + "panels": [], + "title": "Network", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Connections opened and closed per instance", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + }, + "unit": "cps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 6, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n rate(\n istio_tcp_connections_opened_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", + "legendFormat": "Opened ({{pod}})" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "-sum by (pod) (\n rate(\n istio_tcp_connections_closed_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", + "legendFormat": "Closed ({{pod}})" + } + ], + "title": "Connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Bytes sent and received per instance", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 7, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n rate(\n istio_tcp_sent_bytes_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", + "legendFormat": "Sent ({{pod}})" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n rate(\n istio_tcp_received_bytes_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", + "legendFormat": "Received ({{pod}})" + } + ], + "title": "Bytes Transmitted", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "DNS queries received per instance", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + }, + "unit": "qps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 10 + }, + "id": 8, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n rate(\n istio_dns_requests_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", + "legendFormat": "Request ({{pod}})" + } + ], + "title": "DNS Request", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 9, + "panels": [], + "title": "Operations", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Count of XDS connection terminations.\nThis will typically spike every 30min for each instance.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 10, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n rate(\n istio_xds_connection_terminations_total{pod=~\"ztunnel-.*\"}\n [$__rate_interval])\n)", + "legendFormat": "XDS Connection Terminations ({{pod}})" + } + ], + "title": "XDS", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Count of active and pending proxies managed by each instance.\nPending is expected to converge to zero.\n", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "gradientMode": "hue", + "showPoints": "never" + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 11, + "interval": "5s", + "options": { + "legend": { + "calcs": ["last", "max"], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (workload_manager_active_proxy_count{pod=~\"ztunnel-.*\"})", + "legendFormat": "Active Proxies ({{pod}})" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (workload_manager_pending_proxy_count{pod=~\"ztunnel-.*\"})", + "legendFormat": "Pending Proxies ({{pod}})" + } + ], + "title": "Workload Manager", + "type": "timeseries" + } + ], + "refresh": "15s", + "schemaVersion": 39, + "templating": { + "list": [ { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "Count of active and pending proxies managed by each instance.\nPending is expected to converge to zero.\n", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 10, - "gradientMode": "hue", - "showPoints": "never" - } - } - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 19 - }, - "id": 11, - "interval": "5s", - "options": { - "legend": { - "calcs": [ - "last", - "max" - ], - "displayMode": "table" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (workload_manager_active_proxy_count{pod=~\"ztunnel-.*\"})", - "legendFormat": "Active Proxies ({{pod}})" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "expr": "sum by (pod) (workload_manager_pending_proxy_count{pod=~\"ztunnel-.*\"})", - "legendFormat": "Pending Proxies ({{pod}})" - } - ], - "title": "Workload Manager", - "type": "timeseries" + "name": "datasource", + "query": "prometheus", + "type": "datasource" } - ], - "refresh": "15s", - "schemaVersion": 39, - "templating": { - "list": [ - { - "name": "datasource", - "query": "prometheus", - "type": "datasource" - } - ] - }, - "time": { - "from": "now-30m", - "to": "now" - }, - "timezone": "utc", - "title": "Istio Ztunnel Dashboard", - "uid": "12c58766acc81a1c835dd5059eaf2741" + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timezone": "utc", + "title": "Istio Ztunnel Dashboard", + "uid": "istio-ztunnel" } diff --git a/hosts/idols-aquamarine/grafana/dashboards/kubevirt/kubevirt-control-plane.json b/hosts/idols-aquamarine/grafana/dashboards/kubevirt/kubevirt-control-plane.json index f1cc1598..eafbe742 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/kubevirt/kubevirt-control-plane.json +++ b/hosts/idols-aquamarine/grafana/dashboards/kubevirt/kubevirt-control-plane.json @@ -4572,11 +4572,7 @@ "refresh": "1m", "schemaVersion": 26, "style": "dark", - "tags": [ - "kubevirt", - "kubevirt-control-plane", - "sig-scale" - ], + "tags": ["kubevirt", "kubevirt-control-plane", "sig-scale"], "templating": { "list": [ { @@ -5165,32 +5161,11 @@ "to": "now" }, "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] + "refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"], + "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] }, "timezone": "UTC", "title": "KubeVirt / Control Plane", - "uid": "V1Qq_IBM_za0", + "uid": "kubevirt-control-plane", "version": 3 -} \ No newline at end of file +} diff --git a/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-chunks.json b/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-chunks.json index b57c2830..eb3de718 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-chunks.json +++ b/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-chunks.json @@ -1157,6 +1157,6 @@ }, "timezone": "utc", "title": "Loki / Chunks", - "uid": "chunks", + "uid": "loki-chunks", "version": 0 } diff --git a/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-deletion.json b/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-deletion.json index db2de225..de2ef815 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-deletion.json +++ b/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-deletion.json @@ -720,6 +720,6 @@ }, "timezone": "utc", "title": "Loki / Deletion", - "uid": "deletion", + "uid": "loki-deletion", "version": 0 } diff --git a/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-logs.json b/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-logs.json index 8835f2a4..ce597a79 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-logs.json +++ b/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-logs.json @@ -1032,6 +1032,6 @@ }, "timezone": "utc", "title": "Loki / Logs", - "uid": "logs", + "uid": "loki-logs", "version": 0 } diff --git a/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-operational.json b/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-operational.json index 0a74a5b7..3f77098c 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-operational.json +++ b/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-operational.json @@ -6701,6 +6701,6 @@ }, "timezone": "utc", "title": "Loki / Operational", - "uid": "operational", + "uid": "loki-operational", "version": 0 } diff --git a/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-retention.json b/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-retention.json index a313440e..7cb392ce 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-retention.json +++ b/hosts/idols-aquamarine/grafana/dashboards/loki-mixin/loki-retention.json @@ -1464,6 +1464,6 @@ }, "timezone": "utc", "title": "Loki / Retention", - "uid": "retention", + "uid": "loki-retention", "version": 0 } diff --git a/hosts/idols-aquamarine/grafana/dashboards/victoriametrics/victoria-metrics-single.json b/hosts/idols-aquamarine/grafana/dashboards/victoriametrics/victoria-metrics-single.json index 1a7208f8..52743b7d 100644 --- a/hosts/idols-aquamarine/grafana/dashboards/victoriametrics/victoria-metrics-single.json +++ b/hosts/idols-aquamarine/grafana/dashboards/victoriametrics/victoria-metrics-single.json @@ -6542,7 +6542,7 @@ }, "timezone": "", "title": "VictoriaMetrics - single-node", - "uid": "wNf0q_kZk", + "uid": "victoriametrics-single-node", "version": 1, "weekStart": "", "gnetId": 10229 From 38f9a3e1bb38e72066dfc3eef3c7b5819387ec85 Mon Sep 17 00:00:00 2001 From: Ryan Yin Date: Fri, 26 Sep 2025 19:04:41 +0800 Subject: [PATCH 4/4] fix: vmalert - remoteWrite --- hosts/idols-aquamarine/monitoring/alert.nix | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hosts/idols-aquamarine/monitoring/alert.nix b/hosts/idols-aquamarine/monitoring/alert.nix index 0013ac1b..f13db13b 100644 --- a/hosts/idols-aquamarine/monitoring/alert.nix +++ b/hosts/idols-aquamarine/monitoring/alert.nix @@ -1,12 +1,16 @@ -{ config, lib, ... }: +{ config, ... }: { - services.vmalert = { + # https://docs.victoriametrics.com/victoriametrics/vmalert/ + services.vmalert.instances."homelab" = { enable = true; settings = { "httpListenAddr" = "127.0.0.1:8880"; "datasource.url" = "http://localhost:9090"; "notifier.url" = [ "http://localhost:9093" ]; # alertmanager's api + # Recording rules results are persisted via remote write. + "remoteWrite.url" = "http://localhost:9090"; + "remoteRead.url" = "http://localhost:9090"; # Whether to disable long-lived connections to the datasource. "datasource.disableKeepAlive" = true;