mirror of
https://github.com/juanfont/headscale.git
synced 2026-04-01 15:03:23 +02:00
integration: scale remaining hardcoded timeouts and replace pingAllHelper
Apply CI-aware scaling to all remaining hardcoded timeouts: - requireAllClientsOfflineStaged: scale the three internal stage timeouts (15s/20s/60s) with ScaledTimeout. - validateReloginComplete: scale requireAllClientsOnline (120s) and requireAllClientsNetInfoAndDERP (3min) calls. - WaitForTailscaleSyncPerUser callers in acl_test.go (3 sites, 60s). - WaitForRunning callers in tags_test.go (10 sites): switch to PeerSyncTimeout() to match convention. - WaitForRunning/WaitForPeers direct callers in route_test.go. - requireAllClientsOnline callers in general_test.go and auth_key_test.go. Replace pingAllHelper with assertPingAll/assertPingAllWithCollect: - Wraps pings in EventuallyWithT so transient docker exec timeouts are retried instead of immediately failing the test. - Timeout scales with the ping matrix size (2s per ping budget for 2 full sweeps) so large tests get proportionally more time. - Uses CollectT correctly, fixing the broken EventuallyWithT usage in TestEphemeral where the old t.Errorf bypassed CollectT. - Follows the established assert*/assertWithCollect naming. Updates #3125
This commit is contained in:
@@ -153,8 +153,8 @@ func validateLogoutComplete(t *testing.T, headscale ControlServer, expectedNodes
|
||||
func validateReloginComplete(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID) {
|
||||
t.Helper()
|
||||
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after relogin", 120*time.Second)
|
||||
requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after relogin", 3*time.Minute)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after relogin", integrationutil.ScaledTimeout(120*time.Second))
|
||||
requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after relogin", integrationutil.ScaledTimeout(3*time.Minute))
|
||||
}
|
||||
|
||||
// requireAllClientsOnline validates that all nodes are online/offline across all headscale systems
|
||||
@@ -400,7 +400,7 @@ func requireAllClientsOfflineStaged(t *testing.T, headscale ControlServer, expec
|
||||
}
|
||||
|
||||
assert.True(c, allBatcherOffline, "All nodes should be disconnected from batcher")
|
||||
}, 15*time.Second, 1*time.Second, "batcher disconnection validation")
|
||||
}, integrationutil.ScaledTimeout(15*time.Second), 1*time.Second, "batcher disconnection validation")
|
||||
|
||||
// Stage 2: Verify nodestore offline status (up to 15 seconds due to disconnect detection delay)
|
||||
t.Logf("Stage 2: Verifying nodestore offline status for %d nodes (allowing for 10s disconnect detection delay)", len(expectedNodes))
|
||||
@@ -426,7 +426,7 @@ func requireAllClientsOfflineStaged(t *testing.T, headscale ControlServer, expec
|
||||
}
|
||||
|
||||
assert.True(c, allNodeStoreOffline, "All nodes should be offline in nodestore")
|
||||
}, 20*time.Second, 1*time.Second, "nodestore offline validation")
|
||||
}, integrationutil.ScaledTimeout(20*time.Second), 1*time.Second, "nodestore offline validation")
|
||||
|
||||
// Stage 3: Verify map response propagation (longest delay due to peer update timing)
|
||||
t.Logf("Stage 3: Verifying map response propagation for %d nodes (allowing for peer map update delays)", len(expectedNodes))
|
||||
@@ -468,7 +468,7 @@ func requireAllClientsOfflineStaged(t *testing.T, headscale ControlServer, expec
|
||||
}
|
||||
|
||||
assert.True(c, allMapResponsesOffline, "All nodes should be absent from peer map responses")
|
||||
}, 60*time.Second, 2*time.Second, "map response propagation validation")
|
||||
}, integrationutil.ScaledTimeout(60*time.Second), 2*time.Second, "map response propagation validation")
|
||||
|
||||
t.Logf("All stages completed: nodes are fully offline across all systems")
|
||||
}
|
||||
@@ -582,28 +582,43 @@ func assertTailscaleNodesLogout(t assert.TestingT, clients []TailscaleClient) {
|
||||
}
|
||||
}
|
||||
|
||||
// pingAllHelper performs ping tests between all clients and addresses, returning success count.
|
||||
// This is used to validate network connectivity in integration tests.
|
||||
// Returns the total number of successful ping operations.
|
||||
// assertPingAll verifies that every client can ping every address.
|
||||
// The entire ping matrix is retried via EventuallyWithT to handle
|
||||
// transient failures on slow CI runners. The timeout scales with
|
||||
// the number of pings since they run serially and each can take
|
||||
// up to ~2s on CI (docker exec overhead + ping timeout).
|
||||
//
|
||||
//nolint:unparam // opts is variadic for extensibility even though callers currently don't pass options
|
||||
func pingAllHelper(t *testing.T, clients []TailscaleClient, addrs []string, opts ...tsic.PingOption) int {
|
||||
func assertPingAll(t *testing.T, clients []TailscaleClient, addrs []string, opts ...tsic.PingOption) {
|
||||
t.Helper()
|
||||
|
||||
success := 0
|
||||
// Each ping can take up to ~2s on CI. Budget for 2 full sweeps
|
||||
// (one that might have transient failures + one clean pass).
|
||||
pingCount := len(clients) * len(addrs)
|
||||
perPingBudget := 2 * time.Second
|
||||
timeout := max(
|
||||
// Floor at 30s for small matrices.
|
||||
integrationutil.ScaledTimeout(time.Duration(pingCount)*perPingBudget*2), integrationutil.ScaledTimeout(30*time.Second))
|
||||
|
||||
assert.EventuallyWithT(t, func(c *assert.CollectT) {
|
||||
assertPingAllWithCollect(c, clients, addrs, opts...)
|
||||
}, timeout, 2*time.Second,
|
||||
"all %d clients should be able to ping all %d addresses",
|
||||
len(clients), len(addrs))
|
||||
}
|
||||
|
||||
// assertPingAllWithCollect pings every address from every client and
|
||||
// collects failures on the provided CollectT. Pings run serially to
|
||||
// avoid overloading the Docker daemon on resource-constrained CI
|
||||
// runners. For use inside EventuallyWithT blocks when the caller
|
||||
// needs custom timeout or retry control.
|
||||
func assertPingAllWithCollect(c *assert.CollectT, clients []TailscaleClient, addrs []string, opts ...tsic.PingOption) {
|
||||
for _, client := range clients {
|
||||
for _, addr := range addrs {
|
||||
err := client.Ping(addr, opts...)
|
||||
if err != nil {
|
||||
t.Errorf("failed to ping %s from %s: %s", addr, client.Hostname(), err)
|
||||
} else {
|
||||
success++
|
||||
}
|
||||
assert.NoError(c, err, "ping from %s to %s", client.Hostname(), addr) //nolint:testifylint // CollectT requires assert
|
||||
}
|
||||
}
|
||||
|
||||
return success
|
||||
}
|
||||
|
||||
// pingDerpAllHelper performs DERP-based ping tests between all clients and addresses.
|
||||
|
||||
Reference in New Issue
Block a user