From 2a5a1965919f7d3b16afbbcb58bb547f09f901b8 Mon Sep 17 00:00:00 2001 From: Ryan Yin Date: Mon, 16 Mar 2026 11:10:01 +0800 Subject: [PATCH] fix: NVMe SSD Timeout / Freeze Fix for Linux (#249) --- hosts/idols-ai/hardware-configuration.nix | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/hosts/idols-ai/hardware-configuration.nix b/hosts/idols-ai/hardware-configuration.nix index 3037319c..0df24ba1 100644 --- a/hosts/idols-ai/hardware-configuration.nix +++ b/hosts/idols-ai/hardware-configuration.nix @@ -13,6 +13,27 @@ (modulesPath + "/installer/scan/not-detected.nix") ]; + boot.kernelParams = [ + # === NVMe SSD Timeout / Freeze Fix for Linux === + # https://community.frame.work/t/nvme-timeout-woes/54999 + + "nvme_core.default_ps_max_latency_us=0" + # Explanation: Completely disables NVMe Autonomous Power State Transition (APST) + # Why: Your drive enters deep sleep states during high load. Wake-up latency is too slow (>30 ms), + # causing the kernel to think the command timed out. + # Setting it to 0 = never let the drive sleep → root-cause fix for "freezes during big reads/writes" + + "nvme_core.io_timeout=4294967295" + # Explanation: Increases the kernel's NVMe command timeout to the maximum possible value (~49 days) + # Why: Linux default is only 30 seconds, after which it aborts the request and resets the controller. + # This makes the kernel "patient" so even if the drive is momentarily slow, it won't crash/reset. + + "pcie_aspm=off" + # Explanation: Fully disables PCIe Active State Power Management (link power saving) + # Why: The PCIe link dropping into L1/L1.2 low-power states is the #1 cause of NVMe timeouts on Linux. + # Turning it off keeps the link at full speed at all times → eliminates "Link is Down" + timeout errors. + ]; + # Use the EFI boot loader. boot.loader.efi.canTouchEfiVariables = true; # depending on how you configured your disk mounts, change this to /boot or /boot/efi. @@ -70,6 +91,6 @@ # networking.interfaces.wlo1.useDHCP = lib.mkDefault true; nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux"; - powerManagement.cpuFreqGovernor = lib.mkDefault "powersave"; + powerManagement.cpuFreqGovernor = lib.mkDefault "performance"; # ondemand / powersave / performance hardware.cpu.intel.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; }