feat: monitoring + containers - grafana + prometheus + node_exporter + other exporters

This commit is contained in:
Ryan Yin
2024-02-17 19:53:20 +08:00
parent 9626986524
commit b6e51e1950
25 changed files with 739 additions and 184 deletions

View File

@@ -1,8 +1,9 @@
# Idols - Ruby
TODO: use ruby for backup / sync my personal data.
TODO: use ruby for backup / sync my personal data, and monitor the status/logs of my homelab.
For safety, those data should be encrypted before sending to the cloud or my NAS.
1. prometheus: Monitor the status of my homelab
1. restic: Backup file from homelab to NAS, or from NAS to Cloud
2. synthing: Sync file between android/macbook/PC and NAS
1. synthing: Sync file between android/macbook/PC and NAS

View File

@@ -1,4 +1,8 @@
{vars_networking, ...}:
{
vars_networking,
mylib,
...
}:
#############################################################
#
# Ruby - a NixOS VM running on Proxmox
@@ -8,9 +12,7 @@ let
hostName = "ruby"; # Define your hostname.
hostAddress = vars_networking.hostAddress.${hostName};
in {
imports = [
./restic.nix
];
imports = mylib.scanPaths ./.;
# Enable binfmt emulation of aarch64-linux, this is required for cross compilation.
boot.binfmt.emulatedSystems = ["aarch64-linux" "riscv64-linux"];

View File

@@ -0,0 +1,3 @@
{mylib, ...}: {
imports = mylib.scanPaths ./.;
}

View File

@@ -0,0 +1,4 @@
{
# TODO
# https://github.com/NixOS/nixpkgs/blob/nixos-23.11/nixos/modules/services/monitoring/prometheus/exporters/pve.nix
}

View File

View File

View File

@@ -0,0 +1,52 @@
{
config,
pkgs,
username,
useremail,
...
}: {
services.grafana = {
enable = true;
dataDir = "/var/lib/grafana";
# DeclarativePlugins = with pkgs.grafanaPlugins; [ grafana-piechart-panel ];
settings = {
server = {
http_addr = "0.0.0.0";
http_port = 80;
protocol = "http";
domain = "grafana.writefo.fun";
# Redirect to correct domain if the host header does not match the domain. Prevents DNS rebinding attacks.
serve_from_sub_path = false;
# Add subpath to the root_url if serve_from_sub_path is true
root_url = "%(protocol)s://%(domain)s:%(http_port)s/";
enforce_domain = false;
read_timeout = "180s";
# Enable HTTP compression, this can improve transfer speed and bandwidth utilization.
enable_gzip = true;
# Cdn for accelerating loading of frontend assets.
# cdn_url = "https://cdn.jsdelivr.net/npm/grafana@7.5.5";
};
security = {
admin_user = username;
admin_email = useremail;
# Use file provider to read the admin password from a file.
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
admin_password = "$__file{${config.age.secrets."grafana-admin-password".path}}";
};
users = {
allow_sign_up = false;
# home_page = "";
default_theme = "dark";
};
};
# Declaratively provision Grafana's data sources, dashboards, and alerting rules.
# Grafana's alerting rules is not recommended to use, we use Prometheus alertmanager instead.
# https://grafana.com/docs/grafana/latest/administration/provisioning/#data-sources
provision = {
datasources.path = ./datasources.yml;
dashboards.path = ./dashboards.yml;
};
};
}

View File

@@ -0,0 +1,108 @@
{
config,
vars_networking,
...
}: {
# https://prometheus.io/docs/prometheus/latest/configuration/configuration/
services.prometheus = {
enable = true;
checkConfig = true;
listenAddress = "0.0.0.0";
port = 9090;
webExternalUrl = "https://prometheus.writefor.fun";
extraFlags = ["--storage.tsdb.retention.time=15d"];
# Directory below /var/lib to store Prometheus metrics data.
stateDir = "prometheus2";
# Reload prometheus when configuration file changes (instead of restart).
enableReload = true;
# https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_read
# remoteRead = [];
# Rules are read from these files.
# https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/
#
# Prometheus supports two types of rules which may be configured
# and then evaluated at regular intervals:
# 1. Recording rules
# Recording rules allow you to precompute frequently needed or computationally
# expensive expressions and save their result as a new set of time series.
# Querying the precomputed result will then often be much faster than executing the original expression.
# This is especially useful for dashboards, which need to query the same expression repeatedly every time they refresh.
# 2. Alerting rules
# Alerting rules allow you to define alert conditions based on Prometheus expression language expressions
# and to send notifications about firing alerts to an external service.
ruleFiles = [
./recording_rules.yml
./alerting_rules.yml
];
# specifies a set of targets and parameters describing how to scrape metrics from them.
# https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
scrapeConfigs = [
{
job_name = "node-exporter";
scrape_interval = "30s";
metrics_path = "/metrics";
static_configs = [
{
# All my NixOS hosts.
targets =
map (host: "${host.address}:9100")
(builtins.attrValues vars_networking.hostAddress);
labels.type = "node";
}
];
}
];
# specifies Alertmanager instances the Prometheus server sends alerts to
# https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alertmanager_config
alertmanagers = [{static_configs = [{targets = ["localhost:9093"];}];}];
};
services.prometheus.alertmanager = {
enable = true;
logLevel = "info";
environmentFile = config.age.secrets."alertmanager.env".path;
webExternalUrl = "https://alertmanager.writefor.fun";
listenAddress = "[::1]";
configuration = {
global = {
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost = "smtp.qq.com:465";
smtp_from = "$SMTP_SENDER_EMAIL";
smtp_auth_username = "$SMTP_AUTH_USERNAME";
smtp_auth_password = "$SMTP_AUTH_PASSWORD";
# smtp.qq.com:465 support SSL only, so we need to disable TLS here.
# https://service.mail.qq.com/detail/0/310
smtp_require_tls = false;
};
route = {
receiver = "default";
routes = [
{
group_by = ["host"];
group_wait = "5m";
group_interval = "5m";
repeat_interval = "4h";
receiver = "default";
}
];
};
receivers = [
{
name = "default";
email_configs = [
{
to = "ryan4yin@linux.com";
# Whether to notify about resolved alerts.
send_resolved = true;
}
];
}
];
};
};
}

View File

@@ -1,16 +1,20 @@
{pkgs, ...}: {
{pkgs, ...}: let
passwordFile = "/etc/agenix/restic-password";
sshKeyPath = "/etc/agenix/ssh-key-for-restic-backup";
rcloneConfigFile = "/etc/agenix/rclone-conf-for-restic-backup";
in {
# https://github.com/NixOS/nixpkgs/blob/nixos-23.11/nixos/modules/services/backup/restic.nix
services.restic.backups = {
homelab-backup = {
inherit passwordFile;
initialize = true; # Initialize the repository if it doesn't exist.
passwordFile = "/etc/agenix/restic-password";
repository = "rclone:smb-downloads:/Downloads/proxmox-backup/"; # backup to a rclone remote
# rclone related
# rcloneOptions = {
# bwlimit = "100M"; # Limit the bandwidth used by rclone.
# };
rcloneConfigFile = "/etc/agenix/rclone-conf-for-restic-backup";
inherit rcloneConfigFile;
# Which local paths to backup, in addition to ones specified via `dynamicFilesFrom`.
paths = [
@@ -42,7 +46,7 @@
pve_nodes | each {|it|
rsync -avz \
-e "ssh -i /etc/agenix/ssh-key-for-restic-backup" \
-e "ssh -i ${sshKeyPath}" \
$"($it):/var/lib/vz" $"/tmp/restic-backup-temp/($it)"
}
'
@@ -55,7 +59,7 @@
# Extra arguments passed to restic backup.
# extraBackupArgs = [
# "--exclude-file=/etc/agenix/restic-excludes"
# "--exclude-file=/etc/restic/excludes-list"
# ];
# repository = "/mnt/backup-hdd"; # backup to a local directory