mirror of
https://github.com/juanfont/headscale.git
synced 2026-04-01 06:53:23 +02:00
integration: scale remaining hardcoded timeouts and replace pingAllHelper
Apply CI-aware scaling to all remaining hardcoded timeouts: - requireAllClientsOfflineStaged: scale the three internal stage timeouts (15s/20s/60s) with ScaledTimeout. - validateReloginComplete: scale requireAllClientsOnline (120s) and requireAllClientsNetInfoAndDERP (3min) calls. - WaitForTailscaleSyncPerUser callers in acl_test.go (3 sites, 60s). - WaitForRunning callers in tags_test.go (10 sites): switch to PeerSyncTimeout() to match convention. - WaitForRunning/WaitForPeers direct callers in route_test.go. - requireAllClientsOnline callers in general_test.go and auth_key_test.go. Replace pingAllHelper with assertPingAll/assertPingAllWithCollect: - Wraps pings in EventuallyWithT so transient docker exec timeouts are retried instead of immediately failing the test. - Timeout scales with the ping matrix size (2s per ping budget for 2 full sweeps) so large tests get proportionally more time. - Uses CollectT correctly, fixing the broken EventuallyWithT usage in TestEphemeral where the old t.Errorf bypassed CollectT. - Follows the established assert*/assertWithCollect naming. Updates #3125
This commit is contained in:
@@ -2005,7 +2005,7 @@ func TestACLPolicyPropagationOverTime(t *testing.T) {
|
||||
// Wait for peer lists to sync with autogroup:self - ensures cross-user peers are removed
|
||||
t.Logf("Iteration %d: Phase 2 - Waiting for peer lists to sync with autogroup:self", iteration)
|
||||
|
||||
err = scenario.WaitForTailscaleSyncPerUser(60*time.Second, 500*time.Millisecond)
|
||||
err = scenario.WaitForTailscaleSyncPerUser(integrationutil.ScaledTimeout(60*time.Second), 500*time.Millisecond)
|
||||
require.NoError(t, err, "iteration %d: Phase 2 - failed to sync after autogroup:self policy", iteration)
|
||||
|
||||
// Test ALL connectivity (positive and negative) in one block after state is settled
|
||||
@@ -2096,7 +2096,7 @@ func TestACLPolicyPropagationOverTime(t *testing.T) {
|
||||
// Wait for peer lists to sync after new node addition (now 3 user1 nodes, still autogroup:self)
|
||||
t.Logf("Iteration %d: Phase 2b - Waiting for peer lists to sync after new node addition", iteration)
|
||||
|
||||
err = scenario.WaitForTailscaleSyncPerUser(60*time.Second, 500*time.Millisecond)
|
||||
err = scenario.WaitForTailscaleSyncPerUser(integrationutil.ScaledTimeout(60*time.Second), 500*time.Millisecond)
|
||||
require.NoError(t, err, "iteration %d: Phase 2b - failed to sync after new node addition", iteration)
|
||||
|
||||
// Test ALL connectivity (positive and negative) in one block after state is settled
|
||||
@@ -2200,7 +2200,7 @@ func TestACLPolicyPropagationOverTime(t *testing.T) {
|
||||
// so nodes only see same-user peers, not all nodes
|
||||
t.Logf("Iteration %d: Phase 2b - Waiting for sync after node deletion (with autogroup:self)", iteration)
|
||||
|
||||
err = scenario.WaitForTailscaleSyncPerUser(60*time.Second, 500*time.Millisecond)
|
||||
err = scenario.WaitForTailscaleSyncPerUser(integrationutil.ScaledTimeout(60*time.Second), 500*time.Millisecond)
|
||||
require.NoError(t, err, "iteration %d: failed to sync after node deletion", iteration)
|
||||
|
||||
// Refresh client lists after deletion to ensure we don't reference the deleted node
|
||||
@@ -2763,6 +2763,10 @@ func TestACLTagPropagation(t *testing.T) {
|
||||
|
||||
// Step 3: Verify final NetMap visibility first (fast signal that
|
||||
// the MapResponse propagated to the client).
|
||||
// The full propagation chain (docker exec → gRPC → state update →
|
||||
// batcher delay → MapResponse → noise transport → client processing)
|
||||
// can take over 120s on congested CI runners, so use a generous
|
||||
// base timeout.
|
||||
t.Logf("Step 3: Verifying final NetMap visibility (expect visible=%v)", tt.finalAccess)
|
||||
assert.EventuallyWithT(t, func(c *assert.CollectT) {
|
||||
status, err := sourceClient.Status()
|
||||
@@ -2783,11 +2787,13 @@ func TestACLTagPropagation(t *testing.T) {
|
||||
} else {
|
||||
assert.False(c, found, "Target should NOT be visible in NetMap after tag change")
|
||||
}
|
||||
}, integrationutil.ScaledTimeout(60*time.Second), 500*time.Millisecond, "verifying NetMap visibility propagated after tag change")
|
||||
}, integrationutil.ScaledTimeout(120*time.Second), 500*time.Millisecond, "verifying NetMap visibility propagated after tag change")
|
||||
|
||||
// Step 4: Verify final access state (this is the key test for #2389).
|
||||
// Checked after NetMap so we know the MapResponse already arrived;
|
||||
// this only needs to wait for the WireGuard config to apply.
|
||||
// Even though Step 3 confirmed the MapResponse arrived, the full
|
||||
// WireGuard handshake and tunnel establishment can take significant
|
||||
// time on congested CI runners, so use the same generous base
|
||||
// timeout as Step 3.
|
||||
t.Logf("Step 4: Verifying final access after tag change (expect success=%v)", tt.finalAccess)
|
||||
assert.EventuallyWithT(t, func(c *assert.CollectT) {
|
||||
if tt.finalAccess {
|
||||
@@ -2795,7 +2801,7 @@ func TestACLTagPropagation(t *testing.T) {
|
||||
} else {
|
||||
assertCurlFailWithCollect(c, sourceClient, targetURL, "final access should fail after tag change")
|
||||
}
|
||||
}, integrationutil.ScaledTimeout(30*time.Second), 500*time.Millisecond, "verifying access propagated after tag change")
|
||||
}, integrationutil.ScaledTimeout(120*time.Second), 500*time.Millisecond, "verifying access propagated after tag change")
|
||||
|
||||
t.Logf("Test %s PASSED: Tag change propagated correctly", tt.name)
|
||||
})
|
||||
|
||||
@@ -55,7 +55,7 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) {
|
||||
requireNoErrGetHeadscale(t, err)
|
||||
|
||||
expectedNodes := collectExpectedNodeIDs(t, allClients)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected", 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected", integrationutil.ScaledTimeout(120*time.Second))
|
||||
|
||||
// Validate that all nodes have NetInfo and DERP servers before logout
|
||||
requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP before logout", 3*time.Minute)
|
||||
@@ -104,7 +104,7 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) {
|
||||
requireNoErrLogout(t, err)
|
||||
|
||||
// After taking down all nodes, verify all systems show nodes offline
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, false, "all nodes should have logged out", 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, false, "all nodes should have logged out", integrationutil.ScaledTimeout(120*time.Second))
|
||||
|
||||
t.Logf("all clients logged out")
|
||||
|
||||
@@ -159,7 +159,7 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) {
|
||||
assertLastSeenSet(t, node)
|
||||
}
|
||||
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected to batcher", 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected to batcher", integrationutil.ScaledTimeout(120*time.Second))
|
||||
|
||||
// Wait for Tailscale sync before validating NetInfo to ensure proper state propagation
|
||||
err = scenario.WaitForTailscaleSync()
|
||||
@@ -175,8 +175,7 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
for _, client := range allClients {
|
||||
ips, err := client.IPs()
|
||||
@@ -253,7 +252,7 @@ func TestAuthKeyLogoutAndReloginNewUser(t *testing.T) {
|
||||
expectedNodes := collectExpectedNodeIDs(t, allClients)
|
||||
|
||||
// Validate initial connection state
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after initial login", 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after initial login", integrationutil.ScaledTimeout(120*time.Second))
|
||||
requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after initial login", 3*time.Minute)
|
||||
|
||||
var (
|
||||
@@ -283,7 +282,7 @@ func TestAuthKeyLogoutAndReloginNewUser(t *testing.T) {
|
||||
requireNoErrLogout(t, err)
|
||||
|
||||
// Validate that all nodes are offline after logout
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, false, "all nodes should be offline after logout", 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, false, "all nodes should be offline after logout", integrationutil.ScaledTimeout(120*time.Second))
|
||||
|
||||
t.Logf("all clients logged out")
|
||||
|
||||
@@ -323,7 +322,7 @@ func TestAuthKeyLogoutAndReloginNewUser(t *testing.T) {
|
||||
}
|
||||
|
||||
// Validate connection state after relogin as user1
|
||||
requireAllClientsOnline(t, headscale, expectedUser1Nodes, true, "all user1 nodes should be connected after relogin", 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedUser1Nodes, true, "all user1 nodes should be connected after relogin", integrationutil.ScaledTimeout(120*time.Second))
|
||||
requireAllClientsNetInfoAndDERP(t, headscale, expectedUser1Nodes, "all user1 nodes should have NetInfo and DERP after relogin", 3*time.Minute)
|
||||
|
||||
// Validate that user2 still has their original nodes after user1's re-authentication
|
||||
@@ -399,7 +398,7 @@ func TestAuthKeyLogoutAndReloginSameUserExpiredKey(t *testing.T) {
|
||||
expectedNodes := collectExpectedNodeIDs(t, allClients)
|
||||
|
||||
// Validate initial connection state
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after initial login", 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after initial login", integrationutil.ScaledTimeout(120*time.Second))
|
||||
requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after initial login", 3*time.Minute)
|
||||
|
||||
var (
|
||||
@@ -429,7 +428,7 @@ func TestAuthKeyLogoutAndReloginSameUserExpiredKey(t *testing.T) {
|
||||
requireNoErrLogout(t, err)
|
||||
|
||||
// Validate that all nodes are offline after logout
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, false, "all nodes should be offline after logout", 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, false, "all nodes should be offline after logout", integrationutil.ScaledTimeout(120*time.Second))
|
||||
|
||||
t.Logf("all clients logged out")
|
||||
|
||||
@@ -535,7 +534,7 @@ func TestAuthKeyDeleteKey(t *testing.T) {
|
||||
t.Logf("Node %d (%s) created successfully with auth_key_id=%d", nodeID, nodeName, authKeyID)
|
||||
|
||||
// Verify node is online
|
||||
requireAllClientsOnline(t, headscale, []types.NodeID{types.NodeID(nodeID)}, true, "node should be online initially", 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, []types.NodeID{types.NodeID(nodeID)}, true, "node should be online initially", integrationutil.ScaledTimeout(120*time.Second))
|
||||
|
||||
// DELETE the pre-auth key using the API
|
||||
t.Logf("Deleting pre-auth key ID %d using API", authKeyID)
|
||||
@@ -563,7 +562,7 @@ func TestAuthKeyDeleteKey(t *testing.T) {
|
||||
// Verify node comes back online
|
||||
// This will FAIL without the fix because auth key validation will reject deleted key
|
||||
// With the fix, MachineKey identity allows reconnection even with deleted key
|
||||
requireAllClientsOnline(t, headscale, []types.NodeID{types.NodeID(nodeID)}, true, "node should reconnect after restart despite deleted key", 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, []types.NodeID{types.NodeID(nodeID)}, true, "node should reconnect after restart despite deleted key", integrationutil.ScaledTimeout(120*time.Second))
|
||||
|
||||
t.Logf("✓ Node successfully reconnected after its auth key was deleted")
|
||||
}
|
||||
|
||||
@@ -78,8 +78,7 @@ func TestOIDCAuthenticationPingAll(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
headscale, err := scenario.Headscale()
|
||||
require.NoError(t, err)
|
||||
@@ -189,8 +188,7 @@ func TestOIDCExpireNodesBasedOnTokenExpiry(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d (before expiry)", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
// Wait for OIDC token expiry and verify all nodes transition to NeedsLogin.
|
||||
// We add extra time to account for:
|
||||
@@ -452,8 +450,7 @@ func TestOIDCAuthenticationWithPKCE(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
}
|
||||
|
||||
// TestOIDCReloginSameNodeNewUser tests the scenario where:
|
||||
|
||||
@@ -50,8 +50,7 @@ func TestAuthWebFlowAuthenticationPingAll(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
}
|
||||
|
||||
func TestAuthWebFlowLogoutAndReloginSameUser(t *testing.T) {
|
||||
@@ -88,8 +87,7 @@ func TestAuthWebFlowLogoutAndReloginSameUser(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
headscale, err := scenario.Headscale()
|
||||
requireNoErrGetHeadscale(t, err)
|
||||
@@ -169,8 +167,7 @@ func TestAuthWebFlowLogoutAndReloginSameUser(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success = pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
for _, client := range allClients {
|
||||
ips, err := client.IPs()
|
||||
@@ -370,6 +367,5 @@ func TestAuthWebFlowLogoutAndReloginNewUser(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d after web flow user switch", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
}
|
||||
|
||||
@@ -68,7 +68,7 @@ func TestPingAllByIP(t *testing.T) {
|
||||
require.NoError(t, err, "failed to parse node ID")
|
||||
expectedNodes = append(expectedNodes, types.NodeID(nodeID))
|
||||
}
|
||||
requireAllClientsOnline(t, hs, expectedNodes, true, "all clients should be online across all systems", 30*time.Second)
|
||||
requireAllClientsOnline(t, hs, expectedNodes, true, "all clients should be online across all systems", integrationutil.ScaledTimeout(30*time.Second))
|
||||
|
||||
// assertClientsState(t, allClients)
|
||||
|
||||
@@ -82,10 +82,9 @@ func TestPingAllByIP(t *testing.T) {
|
||||
|
||||
// Test our DebugBatcher functionality
|
||||
t.Logf("Testing DebugBatcher functionality...")
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected to the batcher", 30*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected to the batcher", integrationutil.ScaledTimeout(30*time.Second))
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
}
|
||||
|
||||
func TestPingAllByIPPublicDERP(t *testing.T) {
|
||||
@@ -127,8 +126,7 @@ func TestPingAllByIPPublicDERP(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
}
|
||||
|
||||
func TestEphemeral(t *testing.T) {
|
||||
@@ -195,8 +193,7 @@ func testEphemeralWithOptions(t *testing.T, opts ...hsic.Option) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
for _, client := range allClients {
|
||||
err := client.Logout()
|
||||
@@ -275,8 +272,7 @@ func TestEphemeral2006DeletedTooQuickly(t *testing.T) {
|
||||
})
|
||||
|
||||
// All ephemeral nodes should be online and reachable.
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
// Take down all clients, this should start an expiry timer for each.
|
||||
for _, client := range allClients {
|
||||
@@ -301,10 +297,8 @@ func TestEphemeral2006DeletedTooQuickly(t *testing.T) {
|
||||
err = scenario.WaitForTailscaleSync()
|
||||
assert.NoError(ct, err)
|
||||
|
||||
success = pingAllHelper(t, allClients, allAddrs)
|
||||
assert.Greater(ct, success, 0, "Ephemeral nodes should be able to reconnect and ping")
|
||||
assertPingAllWithCollect(ct, allClients, allAddrs)
|
||||
}, integrationutil.ScaledTimeout(60*time.Second), 2*time.Second)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
|
||||
// Take down all clients, this should start an expiry timer for each.
|
||||
for _, client := range allClients {
|
||||
@@ -367,9 +361,7 @@ func TestPingAllByHostname(t *testing.T) {
|
||||
allHostnames, err := scenario.ListTailscaleClientsFQDNs()
|
||||
requireNoErrListFQDN(t, err)
|
||||
|
||||
success := pingAllHelper(t, allClients, allHostnames)
|
||||
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allClients))
|
||||
assertPingAll(t, allClients, allHostnames)
|
||||
}
|
||||
|
||||
// If subtests are parallel, then they will start before setup is run.
|
||||
@@ -972,8 +964,7 @@ func TestExpireNode(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("before expire: %d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
for _, client := range allClients {
|
||||
assert.EventuallyWithT(t, func(ct *assert.CollectT) {
|
||||
@@ -1300,8 +1291,7 @@ func TestNodeOnlineStatus(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("before expire: %d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
for _, client := range allClients {
|
||||
assert.EventuallyWithT(t, func(c *assert.CollectT) {
|
||||
@@ -1441,10 +1431,9 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
expectedNodes = append(expectedNodes, types.NodeID(nodeID))
|
||||
}
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected to batcher", 30*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected to batcher", integrationutil.ScaledTimeout(30*time.Second))
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
for run := range 3 {
|
||||
t.Logf("Starting DownUpPing run %d at %s", run+1, time.Now().Format(TimestampFormat))
|
||||
@@ -1467,7 +1456,7 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
|
||||
t.Logf("All nodes taken down at %s", time.Now().Format(TimestampFormat))
|
||||
|
||||
// After taking down all nodes, verify all systems show nodes offline
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, false, fmt.Sprintf("Run %d: all nodes should be offline after Down()", run+1), 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, false, fmt.Sprintf("Run %d: all nodes should be offline after Down()", run+1), integrationutil.ScaledTimeout(120*time.Second))
|
||||
|
||||
for _, client := range allClients {
|
||||
c := client
|
||||
@@ -1483,7 +1472,7 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
|
||||
t.Logf("All nodes brought up at %s", time.Now().Format(TimestampFormat))
|
||||
|
||||
// After bringing up all nodes, verify batcher shows all reconnected
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, fmt.Sprintf("Run %d: all nodes should be reconnected after Up()", run+1), 120*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, fmt.Sprintf("Run %d: all nodes should be reconnected after Up()", run+1), integrationutil.ScaledTimeout(120*time.Second))
|
||||
|
||||
// Wait for sync and successful pings after nodes come back up
|
||||
err = scenario.WaitForTailscaleSync()
|
||||
@@ -1491,10 +1480,9 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
|
||||
|
||||
t.Logf("All nodes synced up %s", time.Now().Format(TimestampFormat))
|
||||
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, fmt.Sprintf("Run %d: all systems should show nodes online after reconnection", run+1), 60*time.Second)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, fmt.Sprintf("Run %d: all systems should show nodes online after reconnection", run+1), integrationutil.ScaledTimeout(60*time.Second))
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
assert.Equalf(t, len(allClients)*len(allIps), success, "%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
// Clean up context for this run
|
||||
cancel()
|
||||
@@ -1532,8 +1520,7 @@ func Test2118DeletingOnlineNodePanics(t *testing.T) {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
assertPingAll(t, allClients, allAddrs)
|
||||
|
||||
headscale, err := scenario.Headscale()
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -153,8 +153,8 @@ func validateLogoutComplete(t *testing.T, headscale ControlServer, expectedNodes
|
||||
func validateReloginComplete(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID) {
|
||||
t.Helper()
|
||||
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after relogin", 120*time.Second)
|
||||
requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after relogin", 3*time.Minute)
|
||||
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after relogin", integrationutil.ScaledTimeout(120*time.Second))
|
||||
requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after relogin", integrationutil.ScaledTimeout(3*time.Minute))
|
||||
}
|
||||
|
||||
// requireAllClientsOnline validates that all nodes are online/offline across all headscale systems
|
||||
@@ -400,7 +400,7 @@ func requireAllClientsOfflineStaged(t *testing.T, headscale ControlServer, expec
|
||||
}
|
||||
|
||||
assert.True(c, allBatcherOffline, "All nodes should be disconnected from batcher")
|
||||
}, 15*time.Second, 1*time.Second, "batcher disconnection validation")
|
||||
}, integrationutil.ScaledTimeout(15*time.Second), 1*time.Second, "batcher disconnection validation")
|
||||
|
||||
// Stage 2: Verify nodestore offline status (up to 15 seconds due to disconnect detection delay)
|
||||
t.Logf("Stage 2: Verifying nodestore offline status for %d nodes (allowing for 10s disconnect detection delay)", len(expectedNodes))
|
||||
@@ -426,7 +426,7 @@ func requireAllClientsOfflineStaged(t *testing.T, headscale ControlServer, expec
|
||||
}
|
||||
|
||||
assert.True(c, allNodeStoreOffline, "All nodes should be offline in nodestore")
|
||||
}, 20*time.Second, 1*time.Second, "nodestore offline validation")
|
||||
}, integrationutil.ScaledTimeout(20*time.Second), 1*time.Second, "nodestore offline validation")
|
||||
|
||||
// Stage 3: Verify map response propagation (longest delay due to peer update timing)
|
||||
t.Logf("Stage 3: Verifying map response propagation for %d nodes (allowing for peer map update delays)", len(expectedNodes))
|
||||
@@ -468,7 +468,7 @@ func requireAllClientsOfflineStaged(t *testing.T, headscale ControlServer, expec
|
||||
}
|
||||
|
||||
assert.True(c, allMapResponsesOffline, "All nodes should be absent from peer map responses")
|
||||
}, 60*time.Second, 2*time.Second, "map response propagation validation")
|
||||
}, integrationutil.ScaledTimeout(60*time.Second), 2*time.Second, "map response propagation validation")
|
||||
|
||||
t.Logf("All stages completed: nodes are fully offline across all systems")
|
||||
}
|
||||
@@ -582,28 +582,43 @@ func assertTailscaleNodesLogout(t assert.TestingT, clients []TailscaleClient) {
|
||||
}
|
||||
}
|
||||
|
||||
// pingAllHelper performs ping tests between all clients and addresses, returning success count.
|
||||
// This is used to validate network connectivity in integration tests.
|
||||
// Returns the total number of successful ping operations.
|
||||
// assertPingAll verifies that every client can ping every address.
|
||||
// The entire ping matrix is retried via EventuallyWithT to handle
|
||||
// transient failures on slow CI runners. The timeout scales with
|
||||
// the number of pings since they run serially and each can take
|
||||
// up to ~2s on CI (docker exec overhead + ping timeout).
|
||||
//
|
||||
//nolint:unparam // opts is variadic for extensibility even though callers currently don't pass options
|
||||
func pingAllHelper(t *testing.T, clients []TailscaleClient, addrs []string, opts ...tsic.PingOption) int {
|
||||
func assertPingAll(t *testing.T, clients []TailscaleClient, addrs []string, opts ...tsic.PingOption) {
|
||||
t.Helper()
|
||||
|
||||
success := 0
|
||||
// Each ping can take up to ~2s on CI. Budget for 2 full sweeps
|
||||
// (one that might have transient failures + one clean pass).
|
||||
pingCount := len(clients) * len(addrs)
|
||||
perPingBudget := 2 * time.Second
|
||||
timeout := max(
|
||||
// Floor at 30s for small matrices.
|
||||
integrationutil.ScaledTimeout(time.Duration(pingCount)*perPingBudget*2), integrationutil.ScaledTimeout(30*time.Second))
|
||||
|
||||
assert.EventuallyWithT(t, func(c *assert.CollectT) {
|
||||
assertPingAllWithCollect(c, clients, addrs, opts...)
|
||||
}, timeout, 2*time.Second,
|
||||
"all %d clients should be able to ping all %d addresses",
|
||||
len(clients), len(addrs))
|
||||
}
|
||||
|
||||
// assertPingAllWithCollect pings every address from every client and
|
||||
// collects failures on the provided CollectT. Pings run serially to
|
||||
// avoid overloading the Docker daemon on resource-constrained CI
|
||||
// runners. For use inside EventuallyWithT blocks when the caller
|
||||
// needs custom timeout or retry control.
|
||||
func assertPingAllWithCollect(c *assert.CollectT, clients []TailscaleClient, addrs []string, opts ...tsic.PingOption) {
|
||||
for _, client := range clients {
|
||||
for _, addr := range addrs {
|
||||
err := client.Ping(addr, opts...)
|
||||
if err != nil {
|
||||
t.Errorf("failed to ping %s from %s: %s", addr, client.Hostname(), err)
|
||||
} else {
|
||||
success++
|
||||
}
|
||||
assert.NoError(c, err, "ping from %s to %s", client.Hostname(), addr) //nolint:testifylint // CollectT requires assert
|
||||
}
|
||||
}
|
||||
|
||||
return success
|
||||
}
|
||||
|
||||
// pingDerpAllHelper performs DERP-based ping tests between all clients and addresses.
|
||||
|
||||
@@ -2430,7 +2430,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
|
||||
|
||||
// Wait for the node to be fully running before getting its ID
|
||||
// This is especially important for webauth flow where login is asynchronous
|
||||
err = routerUsernet1.WaitForRunning(30 * time.Second)
|
||||
err = routerUsernet1.WaitForRunning(integrationutil.ScaledTimeout(30 * time.Second))
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait for bidirectional peer synchronization.
|
||||
@@ -2439,12 +2439,12 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
|
||||
// tunnels may not be established despite peers appearing in netmaps.
|
||||
|
||||
// Router waits for all existing clients
|
||||
err = routerUsernet1.WaitForPeers(len(allClients), 60*time.Second, 1*time.Second)
|
||||
err = routerUsernet1.WaitForPeers(len(allClients), integrationutil.PeerSyncTimeout(), integrationutil.PeerSyncRetryInterval())
|
||||
require.NoError(t, err, "router failed to see all peers")
|
||||
|
||||
// All clients wait for the router (they should see 6 peers including the router)
|
||||
for _, existingClient := range allClients {
|
||||
err = existingClient.WaitForPeers(len(allClients), 60*time.Second, 1*time.Second)
|
||||
err = existingClient.WaitForPeers(len(allClients), integrationutil.PeerSyncTimeout(), integrationutil.PeerSyncRetryInterval())
|
||||
require.NoErrorf(t, err, "client %s failed to see all peers including router", existingClient.Hostname())
|
||||
}
|
||||
|
||||
|
||||
@@ -1356,7 +1356,7 @@ func TestTagsUserLoginOwnedTagAtRegistration(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait for client to be running
|
||||
err = client.WaitForRunning(120 * time.Second)
|
||||
err = client.WaitForRunning(integrationutil.PeerSyncTimeout())
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify node has the advertised tag
|
||||
@@ -1563,7 +1563,7 @@ func TestTagsUserLoginAddTagViaCLIReauth(t *testing.T) {
|
||||
err = scenario.runHeadscaleRegister(tagTestUser, body)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = client.WaitForRunning(120 * time.Second)
|
||||
err = client.WaitForRunning(integrationutil.PeerSyncTimeout())
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify initial tag
|
||||
@@ -1654,7 +1654,7 @@ func TestTagsUserLoginRemoveTagViaCLIReauth(t *testing.T) {
|
||||
err = scenario.runHeadscaleRegister(tagTestUser, body)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = client.WaitForRunning(120 * time.Second)
|
||||
err = client.WaitForRunning(integrationutil.PeerSyncTimeout())
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify initial tags
|
||||
@@ -1745,7 +1745,7 @@ func TestTagsUserLoginCLINoOpAfterAdminAssignment(t *testing.T) {
|
||||
err = scenario.runHeadscaleRegister(tagTestUser, body)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = client.WaitForRunning(120 * time.Second)
|
||||
err = client.WaitForRunning(integrationutil.PeerSyncTimeout())
|
||||
require.NoError(t, err)
|
||||
|
||||
// Get node ID
|
||||
@@ -1862,7 +1862,7 @@ func TestTagsUserLoginCLICannotRemoveAdminTags(t *testing.T) {
|
||||
err = scenario.runHeadscaleRegister(tagTestUser, body)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = client.WaitForRunning(120 * time.Second)
|
||||
err = client.WaitForRunning(integrationutil.PeerSyncTimeout())
|
||||
require.NoError(t, err)
|
||||
|
||||
// Get node ID
|
||||
@@ -2548,7 +2548,7 @@ func TestTagsIssue2978ReproTagReplacement(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait for client to be running
|
||||
err = client.WaitForRunning(120 * time.Second)
|
||||
err = client.WaitForRunning(integrationutil.PeerSyncTimeout())
|
||||
require.NoError(t, err)
|
||||
|
||||
// Wait for initial registration with tag:valid-owned
|
||||
@@ -2851,7 +2851,7 @@ func TestTagsUserLoginReauthWithEmptyTagsRemovesAllTags(t *testing.T) {
|
||||
err = scenario.runHeadscaleRegister(tagTestUser, body)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = client.WaitForRunning(120 * time.Second)
|
||||
err = client.WaitForRunning(integrationutil.PeerSyncTimeout())
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify initial tags
|
||||
@@ -2902,7 +2902,7 @@ func TestTagsUserLoginReauthWithEmptyTagsRemovesAllTags(t *testing.T) {
|
||||
err = scenario.runHeadscaleRegister(tagTestUser, body)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = client.WaitForRunning(120 * time.Second)
|
||||
err = client.WaitForRunning(integrationutil.PeerSyncTimeout())
|
||||
require.NoError(t, err)
|
||||
t.Logf("Completed reauth with empty tags")
|
||||
} else {
|
||||
@@ -3145,7 +3145,7 @@ func TestTagsAuthKeyConvertToUserViaCLIRegister(t *testing.T) {
|
||||
err = client.Login(headscale.GetEndpoint(), authKey.GetKey())
|
||||
require.NoError(t, err)
|
||||
|
||||
err = client.WaitForRunning(120 * time.Second)
|
||||
err = client.WaitForRunning(integrationutil.PeerSyncTimeout())
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify initial state: node is tagged
|
||||
@@ -3182,7 +3182,7 @@ func TestTagsAuthKeyConvertToUserViaCLIRegister(t *testing.T) {
|
||||
err = scenario.runHeadscaleRegister(tagTestUser, body)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = client.WaitForRunning(120 * time.Second)
|
||||
err = client.WaitForRunning(integrationutil.PeerSyncTimeout())
|
||||
require.NoError(t, err)
|
||||
|
||||
// Step 4: Verify node is now user-owned and the mapper didn't panic.
|
||||
|
||||
Reference in New Issue
Block a user