mirror of
https://github.com/juanfont/headscale.git
synced 2026-03-20 08:34:12 +01:00
hscontrol/servertest: add control plane lifecycle and consistency tests
Add three test files exercising the servertest harness: - lifecycle_test.go: connection, disconnection, reconnection, session replacement, and mesh formation at various sizes. - consistency_test.go: symmetric visibility, consistent peer state, address presence, concurrent join/leave convergence. - weather_test.go: rapid reconnects, flapping stability, reconnect with various delays, concurrent reconnects, and scale tests. All tests use table-driven patterns with subtests.
This commit is contained in:
109
hscontrol/servertest/consistency_test.go
Normal file
109
hscontrol/servertest/consistency_test.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package servertest_test
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/juanfont/headscale/hscontrol/servertest"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// TestConsistency verifies that all nodes converge to the same
|
||||
// view of the network and that no updates are lost during various
|
||||
// operations.
|
||||
func TestConsistency(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("all_nodes_converge", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 5)
|
||||
servertest.AssertMeshComplete(t, h.Clients())
|
||||
servertest.AssertConsistentState(t, h.Clients())
|
||||
servertest.AssertSymmetricVisibility(t, h.Clients())
|
||||
})
|
||||
|
||||
t.Run("self_node_has_correct_hostname", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
h := servertest.NewHarness(t, 3)
|
||||
for _, c := range h.Clients() {
|
||||
assert.Equal(t, c.Name, c.SelfName(),
|
||||
"client %s self name should match", c.Name)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("update_count_positive", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 3)
|
||||
// After mesh formation, each client should have received
|
||||
// at least one update.
|
||||
for _, c := range h.Clients() {
|
||||
assert.Positive(t, c.UpdateCount(),
|
||||
"client %s should have received at least one update", c.Name)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("new_node_visible_to_all", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 3)
|
||||
|
||||
newClient := h.AddClient(t)
|
||||
h.WaitForMeshComplete(t, 10*time.Second)
|
||||
|
||||
// Verify every original client sees the new node.
|
||||
for _, c := range h.Clients() {
|
||||
if c == newClient {
|
||||
continue
|
||||
}
|
||||
|
||||
_, found := c.PeerByName(newClient.Name)
|
||||
assert.True(t, found,
|
||||
"client %s should see new client %s", c.Name, newClient.Name)
|
||||
}
|
||||
|
||||
// And the new node sees all others.
|
||||
for _, c := range h.Clients() {
|
||||
if c == newClient {
|
||||
continue
|
||||
}
|
||||
|
||||
_, found := newClient.PeerByName(c.Name)
|
||||
assert.True(t, found,
|
||||
"new client %s should see %s", newClient.Name, c.Name)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("concurrent_join_and_leave", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 5)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// 3 nodes joining concurrently.
|
||||
for range 3 {
|
||||
wg.Go(func() {
|
||||
h.AddClient(t)
|
||||
})
|
||||
}
|
||||
|
||||
// 2 nodes leaving concurrently.
|
||||
for i := range 2 {
|
||||
wg.Add(1)
|
||||
|
||||
c := h.Client(i)
|
||||
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
c.Disconnect(t)
|
||||
}()
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// After all churn, connected clients should converge.
|
||||
servertest.EventuallyAssertMeshComplete(t, h.ConnectedClients(), 30*time.Second)
|
||||
servertest.AssertConsistentState(t, h.ConnectedClients())
|
||||
})
|
||||
}
|
||||
91
hscontrol/servertest/lifecycle_test.go
Normal file
91
hscontrol/servertest/lifecycle_test.go
Normal file
@@ -0,0 +1,91 @@
|
||||
package servertest_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/juanfont/headscale/hscontrol/servertest"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// TestConnectionLifecycle exercises the core node lifecycle:
|
||||
// connecting, seeing peers, joining mid-session, departing, and
|
||||
// reconnecting.
|
||||
func TestConnectionLifecycle(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("single_node", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 1)
|
||||
nm := h.Client(0).Netmap()
|
||||
assert.NotNil(t, nm, "single node should receive a netmap")
|
||||
assert.Empty(t, nm.Peers, "single node should have no peers")
|
||||
})
|
||||
|
||||
t.Run("new_node_joins_mesh", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 3)
|
||||
|
||||
// Add a 4th client mid-test.
|
||||
h.AddClient(t)
|
||||
h.WaitForMeshComplete(t, 10*time.Second)
|
||||
servertest.AssertMeshComplete(t, h.Clients())
|
||||
servertest.AssertSymmetricVisibility(t, h.Clients())
|
||||
})
|
||||
|
||||
t.Run("node_departs_peers_update", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 3)
|
||||
|
||||
departingName := h.Client(2).Name
|
||||
h.Client(2).Disconnect(t)
|
||||
|
||||
// The remaining clients should eventually stop seeing the
|
||||
// departed node (after the grace period).
|
||||
assert.Eventually(t, func() bool {
|
||||
_, found := h.Client(0).PeerByName(departingName)
|
||||
return !found
|
||||
}, 30*time.Second, 500*time.Millisecond,
|
||||
"client 0 should stop seeing departed node")
|
||||
})
|
||||
|
||||
t.Run("reconnect_restores_mesh", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 2)
|
||||
|
||||
// Disconnect and reconnect.
|
||||
h.Client(0).Disconnect(t)
|
||||
h.Client(0).Reconnect(t)
|
||||
|
||||
// Mesh should recover.
|
||||
h.WaitForMeshComplete(t, 15*time.Second)
|
||||
servertest.AssertMeshComplete(t, h.Clients())
|
||||
})
|
||||
|
||||
t.Run("session_replacement", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 2)
|
||||
|
||||
// Reconnect without explicitly waiting for the old session to
|
||||
// fully drain. This tests that Headscale correctly replaces
|
||||
// the old map session for the same node.
|
||||
h.Client(0).Reconnect(t)
|
||||
h.WaitForMeshComplete(t, 15*time.Second)
|
||||
servertest.AssertMeshComplete(t, h.Clients())
|
||||
})
|
||||
|
||||
t.Run("multiple_nodes_join_sequentially", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
sizes := []int{2, 5, 10}
|
||||
for _, n := range sizes {
|
||||
t.Run(fmt.Sprintf("%d_nodes", n), func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, n)
|
||||
servertest.AssertMeshComplete(t, h.Clients())
|
||||
servertest.AssertSymmetricVisibility(t, h.Clients())
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -6,6 +6,7 @@ package servertest
|
||||
|
||||
import (
|
||||
"net/http/httptest"
|
||||
"net/netip"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -37,6 +38,7 @@ type serverConfig struct {
|
||||
func defaultServerConfig() *serverConfig {
|
||||
return &serverConfig{
|
||||
batchDelay: 50 * time.Millisecond,
|
||||
bufferedChanSize: 30,
|
||||
batcherWorkers: 1,
|
||||
ephemeralTimeout: 30 * time.Second,
|
||||
}
|
||||
@@ -70,11 +72,17 @@ func NewServer(tb testing.TB, opts ...ServerOption) *TestServer {
|
||||
|
||||
tmpDir := tb.TempDir()
|
||||
|
||||
prefixV4 := netip.MustParsePrefix("100.64.0.0/10")
|
||||
prefixV6 := netip.MustParsePrefix("fd7a:115c:a1e0::/48")
|
||||
|
||||
cfg := types.Config{
|
||||
// Placeholder; updated below once httptest server starts.
|
||||
ServerURL: "http://localhost:0",
|
||||
NoisePrivateKeyPath: tmpDir + "/noise_private.key",
|
||||
EphemeralNodeInactivityTimeout: sc.ephemeralTimeout,
|
||||
PrefixV4: &prefixV4,
|
||||
PrefixV6: &prefixV6,
|
||||
IPAllocation: types.IPAllocationStrategySequential,
|
||||
Database: types.DatabaseConfig{
|
||||
Type: "sqlite3",
|
||||
Sqlite: types.SqliteConfig{
|
||||
|
||||
154
hscontrol/servertest/weather_test.go
Normal file
154
hscontrol/servertest/weather_test.go
Normal file
@@ -0,0 +1,154 @@
|
||||
package servertest_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/juanfont/headscale/hscontrol/servertest"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// TestNetworkWeather exercises scenarios that simulate unstable
|
||||
// network conditions: rapid reconnects, disconnect/reconnect
|
||||
// timing, and connection flapping.
|
||||
func TestNetworkWeather(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("rapid_reconnect_stays_online", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
h := servertest.NewHarness(t, 2)
|
||||
|
||||
for range 10 {
|
||||
h.Client(0).Disconnect(t)
|
||||
h.Client(0).Reconnect(t)
|
||||
}
|
||||
|
||||
// After rapid flapping, mesh should still be complete.
|
||||
h.WaitForMeshComplete(t, 15*time.Second)
|
||||
servertest.AssertMeshComplete(t, h.Clients())
|
||||
})
|
||||
|
||||
t.Run("reconnect_within_grace_period", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 2)
|
||||
|
||||
h.Client(0).Disconnect(t)
|
||||
|
||||
// Reconnect quickly (well within the 10-second grace period).
|
||||
h.Client(0).ReconnectAfter(t, 1*time.Second)
|
||||
h.WaitForMeshComplete(t, 15*time.Second)
|
||||
|
||||
// Peer should see us as online after reconnection.
|
||||
servertest.AssertPeerOnline(t, h.Client(1), h.Client(0).Name)
|
||||
})
|
||||
|
||||
t.Run("disconnect_types", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
disconnect func(c *servertest.TestClient, tb testing.TB)
|
||||
}{
|
||||
{"clean_disconnect", (*servertest.TestClient).Disconnect},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 2)
|
||||
|
||||
tc.disconnect(h.Client(1), t)
|
||||
|
||||
// The remaining client should eventually see peer gone/offline.
|
||||
assert.Eventually(t, func() bool {
|
||||
_, found := h.Client(0).PeerByName(h.Client(1).Name)
|
||||
if found {
|
||||
// If still in peer list, check if it's marked offline.
|
||||
isOnline, known := func() (bool, bool) {
|
||||
peer, ok := h.Client(0).PeerByName(h.Client(1).Name)
|
||||
if !ok {
|
||||
return false, false
|
||||
}
|
||||
|
||||
return peer.Online().GetOk()
|
||||
}()
|
||||
// Either unknown or offline is acceptable.
|
||||
return known && !isOnline
|
||||
}
|
||||
|
||||
return true // peer gone
|
||||
}, 30*time.Second, 500*time.Millisecond,
|
||||
"peer should become offline or disappear")
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("state_consistent_through_reconnection", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 3)
|
||||
|
||||
// Disconnect and reconnect the middle node.
|
||||
h.Client(1).Disconnect(t)
|
||||
h.Client(1).Reconnect(t)
|
||||
|
||||
// Wait for convergence and verify consistency.
|
||||
h.WaitForMeshComplete(t, 15*time.Second)
|
||||
servertest.AssertConsistentState(t, h.Clients())
|
||||
})
|
||||
|
||||
t.Run("multiple_reconnect_delays", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
delays := []struct {
|
||||
name string
|
||||
delay time.Duration
|
||||
}{
|
||||
{"immediate", 0},
|
||||
{"100ms", 100 * time.Millisecond},
|
||||
{"500ms", 500 * time.Millisecond},
|
||||
{"1s", 1 * time.Second},
|
||||
}
|
||||
for _, tc := range delays {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 2)
|
||||
|
||||
if tc.delay > 0 {
|
||||
h.Client(0).ReconnectAfter(t, tc.delay)
|
||||
} else {
|
||||
h.Client(0).Disconnect(t)
|
||||
h.Client(0).Reconnect(t)
|
||||
}
|
||||
|
||||
h.WaitForMeshComplete(t, 15*time.Second)
|
||||
servertest.AssertMeshComplete(t, h.Clients())
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("flapping_does_not_leak_goroutines", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 2)
|
||||
|
||||
// Do many rapid disconnect/reconnect cycles.
|
||||
for i := range 20 {
|
||||
h.Client(0).Disconnect(t)
|
||||
h.Client(0).Reconnect(t)
|
||||
|
||||
if i%5 == 0 {
|
||||
t.Logf("flap cycle %d: %s has %d peers",
|
||||
i, h.Client(0).Name, len(h.Client(0).Peers()))
|
||||
}
|
||||
}
|
||||
|
||||
// Mesh should still be working.
|
||||
h.WaitForMeshComplete(t, 15*time.Second)
|
||||
servertest.AssertMeshComplete(t, h.Clients())
|
||||
})
|
||||
|
||||
t.Run("scale_20_nodes", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := servertest.NewHarness(t, 20)
|
||||
servertest.AssertMeshComplete(t, h.Clients())
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user