diff --git a/integration/acl_test.go b/integration/acl_test.go index a6c6e962..e2f693cd 100644 --- a/integration/acl_test.go +++ b/integration/acl_test.go @@ -77,11 +77,8 @@ func aclScenario( // tailscaled to stop configuring the wgengine, causing it // to not configure DNS. tsic.WithNetfilter("off"), - tsic.WithDockerEntrypoint([]string{ - "/bin/sh", - "-c", - "/bin/sleep 3 ; apk add python3 curl ; update-ca-certificates ; python3 -m http.server --bind :: 80 & tailscaled --tun=tsdev", - }), + tsic.WithPackages("curl"), + tsic.WithWebserver(80), tsic.WithDockerWorkdir("/"), }, hsic.WithACLPolicy(policy), @@ -311,6 +308,7 @@ func TestACLHostsInNetMapTable(t *testing.T) { []tsic.Option{}, hsic.WithACLPolicy(&testCase.policy), ) + require.NoError(t, err) defer scenario.ShutdownAssertNoPanics(t) @@ -759,6 +757,7 @@ func TestACLNamedHostsCanReach(t *testing.T) { test1fqdn, err := test1.FQDN() require.NoError(t, err) + test1ip4URL := fmt.Sprintf("http://%s/etc/hostname", test1ip4.String()) test1ip6URL := fmt.Sprintf("http://[%s]/etc/hostname", test1ip6.String()) test1fqdnURL := fmt.Sprintf("http://%s/etc/hostname", test1fqdn) @@ -770,6 +769,7 @@ func TestACLNamedHostsCanReach(t *testing.T) { test2fqdn, err := test2.FQDN() require.NoError(t, err) + test2ip4URL := fmt.Sprintf("http://%s/etc/hostname", test2ip4.String()) test2ip6URL := fmt.Sprintf("http://[%s]/etc/hostname", test2ip6.String()) test2fqdnURL := fmt.Sprintf("http://%s/etc/hostname", test2fqdn) @@ -781,6 +781,7 @@ func TestACLNamedHostsCanReach(t *testing.T) { test3fqdn, err := test3.FQDN() require.NoError(t, err) + test3ip4URL := fmt.Sprintf("http://%s/etc/hostname", test3ip4.String()) test3ip6URL := fmt.Sprintf("http://[%s]/etc/hostname", test3ip6.String()) test3fqdnURL := fmt.Sprintf("http://%s/etc/hostname", test3fqdn) @@ -1055,6 +1056,7 @@ func TestACLDevice1CanAccessDevice2(t *testing.T) { test1fqdn, err := test1.FQDN() require.NoError(t, err) + test1ipURL := fmt.Sprintf("http://%s/etc/hostname", test1ip.String()) test1ip6URL := fmt.Sprintf("http://[%s]/etc/hostname", test1ip6.String()) test1fqdnURL := fmt.Sprintf("http://%s/etc/hostname", test1fqdn) @@ -1067,6 +1069,7 @@ func TestACLDevice1CanAccessDevice2(t *testing.T) { test2fqdn, err := test2.FQDN() require.NoError(t, err) + test2ipURL := fmt.Sprintf("http://%s/etc/hostname", test2ip.String()) test2ip6URL := fmt.Sprintf("http://[%s]/etc/hostname", test2ip6.String()) test2fqdnURL := fmt.Sprintf("http://%s/etc/hostname", test2fqdn) @@ -1142,6 +1145,7 @@ func TestPolicyUpdateWhileRunningWithCLIInDatabase(t *testing.T) { } scenario, err := NewScenario(spec) + require.NoError(t, err) defer scenario.ShutdownAssertNoPanics(t) @@ -1151,11 +1155,8 @@ func TestPolicyUpdateWhileRunningWithCLIInDatabase(t *testing.T) { // tailscaled to stop configuring the wgengine, causing it // to not configure DNS. tsic.WithNetfilter("off"), - tsic.WithDockerEntrypoint([]string{ - "/bin/sh", - "-c", - "/bin/sleep 3 ; apk add python3 curl ; update-ca-certificates ; python3 -m http.server --bind :: 80 & tailscaled --tun=tsdev", - }), + tsic.WithPackages("curl"), + tsic.WithWebserver(80), tsic.WithDockerWorkdir("/"), }, hsic.WithTestName("policyreload"), @@ -1221,6 +1222,7 @@ func TestPolicyUpdateWhileRunningWithCLIInDatabase(t *testing.T) { // Get the current policy and check // if it is the same as the one we set. var output *policyv2.Policy + err = executeAndUnmarshal( headscale, []string{ @@ -1302,9 +1304,11 @@ func TestACLAutogroupMember(t *testing.T) { // Test that untagged nodes can access each other for _, client := range allClients { var clientIsUntagged bool + assert.EventuallyWithT(t, func(c *assert.CollectT) { status, err := client.Status() assert.NoError(c, err) + clientIsUntagged = status.Self.Tags == nil || status.Self.Tags.Len() == 0 assert.True(c, clientIsUntagged, "Expected client %s to be untagged for autogroup:member test", client.Hostname()) }, 10*time.Second, 200*time.Millisecond, "Waiting for client %s to be untagged", client.Hostname()) @@ -1319,9 +1323,11 @@ func TestACLAutogroupMember(t *testing.T) { } var peerIsUntagged bool + assert.EventuallyWithT(t, func(c *assert.CollectT) { status, err := peer.Status() assert.NoError(c, err) + peerIsUntagged = status.Self.Tags == nil || status.Self.Tags.Len() == 0 assert.True(c, peerIsUntagged, "Expected peer %s to be untagged for autogroup:member test", peer.Hostname()) }, 10*time.Second, 200*time.Millisecond, "Waiting for peer %s to be untagged", peer.Hostname()) @@ -1355,6 +1361,7 @@ func TestACLAutogroupTagged(t *testing.T) { } scenario, err := NewScenario(spec) + require.NoError(t, err) defer scenario.ShutdownAssertNoPanics(t) @@ -1397,23 +1404,28 @@ func TestACLAutogroupTagged(t *testing.T) { // Create nodes with proper naming for i := range spec.NodesPerUser { - var authKey string - var version string + var ( + authKey string + version string + ) if i == 0 { // First node is tagged - use tagged PreAuthKey authKey = taggedAuthKey.GetKey() version = "head" + t.Logf("Creating tagged node for %s", userStr) } else { // Second node is untagged - use untagged PreAuthKey authKey = untaggedAuthKey.GetKey() version = "unstable" + t.Logf("Creating untagged node for %s", userStr) } // Get the network for this scenario networks := scenario.Networks() + var network *dockertest.Network if len(networks) > 0 { network = networks[0] @@ -1425,11 +1437,8 @@ func TestACLAutogroupTagged(t *testing.T) { tsic.WithHeadscaleName(headscale.GetHostname()), tsic.WithNetwork(network), tsic.WithNetfilter("off"), - tsic.WithDockerEntrypoint([]string{ - "/bin/sh", - "-c", - "/bin/sleep 3 ; apk add python3 curl ; update-ca-certificates ; python3 -m http.server --bind :: 80 & tailscaled --tun=tsdev", - }), + tsic.WithPackages("curl"), + tsic.WithWebserver(80), tsic.WithDockerWorkdir("/"), } @@ -1463,10 +1472,13 @@ func TestACLAutogroupTagged(t *testing.T) { // Wait for nodes to see only their allowed peers // Tagged nodes should see each other (2 tagged nodes total) // Untagged nodes should see no one - var taggedClients []TailscaleClient - var untaggedClients []TailscaleClient + var ( + taggedClients []TailscaleClient + untaggedClients []TailscaleClient + ) // First, categorize nodes by checking their tags + for _, client := range allClients { hostname := client.Hostname() @@ -1480,12 +1492,14 @@ func TestACLAutogroupTagged(t *testing.T) { // Add to tagged list only once we've verified it found := false + for _, tc := range taggedClients { if tc.Hostname() == hostname { found = true break } } + if !found { taggedClients = append(taggedClients, client) } @@ -1495,12 +1509,14 @@ func TestACLAutogroupTagged(t *testing.T) { // Add to untagged list only once we've verified it found := false + for _, uc := range untaggedClients { if uc.Hostname() == hostname { found = true break } } + if !found { untaggedClients = append(untaggedClients, client) } @@ -1527,6 +1543,7 @@ func TestACLAutogroupTagged(t *testing.T) { assert.EventuallyWithT(t, func(c *assert.CollectT) { status, err := client.Status() assert.NoError(c, err) + if status.Self.Tags != nil { assert.Equal(c, 0, status.Self.Tags.Len(), "untagged node %s should have no tags", client.Hostname()) } @@ -1544,6 +1561,7 @@ func TestACLAutogroupTagged(t *testing.T) { require.NoError(t, err) url := fmt.Sprintf("http://%s/etc/hostname", fqdn) + t.Logf("Testing connection from tagged node %s to tagged node %s", client.Hostname(), peer.Hostname()) assert.EventuallyWithT(t, func(ct *assert.CollectT) { @@ -1562,6 +1580,7 @@ func TestACLAutogroupTagged(t *testing.T) { require.NoError(t, err) url := fmt.Sprintf("http://%s/etc/hostname", fqdn) + t.Logf("Testing connection from untagged node %s to tagged node %s (should fail)", client.Hostname(), peer.Hostname()) assert.EventuallyWithT(t, func(ct *assert.CollectT) { @@ -1581,6 +1600,7 @@ func TestACLAutogroupTagged(t *testing.T) { require.NoError(t, err) url := fmt.Sprintf("http://%s/etc/hostname", fqdn) + t.Logf("Testing connection from untagged node %s to untagged node %s (should fail)", client.Hostname(), peer.Hostname()) assert.EventuallyWithT(t, func(ct *assert.CollectT) { @@ -1598,6 +1618,7 @@ func TestACLAutogroupTagged(t *testing.T) { require.NoError(t, err) url := fmt.Sprintf("http://%s/etc/hostname", fqdn) + t.Logf("Testing connection from tagged node %s to untagged node %s (should fail)", client.Hostname(), peer.Hostname()) assert.EventuallyWithT(t, func(ct *assert.CollectT) { @@ -1613,7 +1634,7 @@ func TestACLAutogroupTagged(t *testing.T) { // Test structure: // - user1: 2 regular nodes (tests autogroup:self for same-user access) // - user2: 2 regular nodes (tests autogroup:self for same-user access and cross-user isolation) -// - user-router: 1 node with tag:router-node (tests that autogroup:self doesn't interfere with other rules) +// - user-router: 1 node with tag:router-node (tests that autogroup:self doesn't interfere with other rules). func TestACLAutogroupSelf(t *testing.T) { IntegrationSkip(t) @@ -1665,17 +1686,15 @@ func TestACLAutogroupSelf(t *testing.T) { } scenario, err := NewScenario(spec) + require.NoError(t, err) defer scenario.ShutdownAssertNoPanics(t) err = scenario.CreateHeadscaleEnv( []tsic.Option{ tsic.WithNetfilter("off"), - tsic.WithDockerEntrypoint([]string{ - "/bin/sh", - "-c", - "/bin/sleep 3 ; apk add python3 curl ; update-ca-certificates ; python3 -m http.server --bind :: 80 & tailscaled --tun=tsdev", - }), + tsic.WithPackages("curl"), + tsic.WithWebserver(80), tsic.WithDockerWorkdir("/"), }, hsic.WithACLPolicy(policy), @@ -1687,6 +1706,7 @@ func TestACLAutogroupSelf(t *testing.T) { // Add router node for user-router (single shared router node) networks := scenario.Networks() + var network *dockertest.Network if len(networks) > 0 { network = networks[0] @@ -1710,11 +1730,8 @@ func TestACLAutogroupSelf(t *testing.T) { tsic.WithHeadscaleName(headscale.GetHostname()), tsic.WithNetwork(network), tsic.WithNetfilter("off"), - tsic.WithDockerEntrypoint([]string{ - "/bin/sh", - "-c", - "/bin/sleep 3 ; apk add python3 curl ; update-ca-certificates ; python3 -m http.server --bind :: 80 & tailscaled --tun=tsdev", - }), + tsic.WithPackages("curl"), + tsic.WithWebserver(80), tsic.WithDockerWorkdir("/"), ) require.NoError(t, err) @@ -1737,16 +1754,20 @@ func TestACLAutogroupSelf(t *testing.T) { require.NoError(t, err) var user1Regular, user2Regular []TailscaleClient + for _, client := range user1Clients { status, err := client.Status() require.NoError(t, err) + if status.Self != nil && (status.Self.Tags == nil || status.Self.Tags.Len() == 0) { user1Regular = append(user1Regular, client) } } + for _, client := range user2Clients { status, err := client.Status() require.NoError(t, err) + if status.Self != nil && (status.Self.Tags == nil || status.Self.Tags.Len() == 0) { user2Regular = append(user2Regular, client) } @@ -1764,10 +1785,12 @@ func TestACLAutogroupSelf(t *testing.T) { err := client.WaitForPeers(2, integrationutil.PeerSyncTimeout(), integrationutil.PeerSyncRetryInterval()) require.NoError(t, err, "user1 regular device %s should see 2 peers (1 same-user peer + 1 router)", client.Hostname()) } + for _, client := range user2Regular { err := client.WaitForPeers(2, integrationutil.PeerSyncTimeout(), integrationutil.PeerSyncRetryInterval()) require.NoError(t, err, "user2 regular device %s should see 2 peers (1 same-user peer + 1 router)", client.Hostname()) } + err = routerClient.WaitForPeers(4, integrationutil.PeerSyncTimeout(), integrationutil.PeerSyncRetryInterval()) require.NoError(t, err, "router should see 4 peers (all group:home regular nodes)") @@ -1817,6 +1840,7 @@ func TestACLAutogroupSelf(t *testing.T) { for _, client := range user1Regular { fqdn, err := routerClient.FQDN() require.NoError(t, err) + url := fmt.Sprintf("http://%s/etc/hostname", fqdn) t.Logf("url from %s (user1) to %s (router-node) - should SUCCEED", client.Hostname(), fqdn) @@ -1831,6 +1855,7 @@ func TestACLAutogroupSelf(t *testing.T) { for _, client := range user2Regular { fqdn, err := routerClient.FQDN() require.NoError(t, err) + url := fmt.Sprintf("http://%s/etc/hostname", fqdn) t.Logf("url from %s (user2) to %s (router-node) - should SUCCEED", client.Hostname(), fqdn) @@ -1880,6 +1905,7 @@ func TestACLPolicyPropagationOverTime(t *testing.T) { } scenario, err := NewScenario(spec) + require.NoError(t, err) defer scenario.ShutdownAssertNoPanics(t) @@ -1887,11 +1913,8 @@ func TestACLPolicyPropagationOverTime(t *testing.T) { []tsic.Option{ // Install iptables to enable packet filtering for ACL tests. // Packet filters are essential for testing autogroup:self and other ACL policies. - tsic.WithDockerEntrypoint([]string{ - "/bin/sh", - "-c", - "/bin/sleep 3 ; apk add python3 curl iptables ip6tables ; update-ca-certificates ; python3 -m http.server --bind :: 80 & tailscaled --tun=tsdev", - }), + tsic.WithPackages("curl", "iptables", "ip6tables"), + tsic.WithWebserver(80), tsic.WithDockerWorkdir("/"), }, hsic.WithTestName("aclpropagation"), @@ -1960,11 +1983,13 @@ func TestACLPolicyPropagationOverTime(t *testing.T) { // Phase 1: Allow all policy t.Logf("Iteration %d: Setting allow-all policy", iteration) + err = headscale.SetPolicy(allowAllPolicy) require.NoError(t, err) // Wait for peer lists to sync with allow-all policy t.Logf("Iteration %d: Phase 1 - Waiting for peer lists to sync with allow-all policy", iteration) + err = scenario.WaitForTailscaleSync() require.NoError(t, err, "iteration %d: Phase 1 - failed to sync after allow-all policy", iteration) @@ -1992,11 +2017,13 @@ func TestACLPolicyPropagationOverTime(t *testing.T) { // Phase 2: Autogroup:self policy (only same user can access) t.Logf("Iteration %d: Phase 2 - Setting autogroup:self policy", iteration) + err = headscale.SetPolicy(autogroupSelfPolicy) require.NoError(t, err) // Wait for peer lists to sync with autogroup:self - ensures cross-user peers are removed t.Logf("Iteration %d: Phase 2 - Waiting for peer lists to sync with autogroup:self", iteration) + err = scenario.WaitForTailscaleSyncPerUser(60*time.Second, 500*time.Millisecond) require.NoError(t, err, "iteration %d: Phase 2 - failed to sync after autogroup:self policy", iteration) @@ -2082,11 +2109,8 @@ func TestACLPolicyPropagationOverTime(t *testing.T) { newClient := scenario.MustAddAndLoginClient(t, "user1", "all", headscale, tsic.WithNetfilter("off"), - tsic.WithDockerEntrypoint([]string{ - "/bin/sh", - "-c", - "/bin/sleep 3 ; apk add python3 curl ; update-ca-certificates ; python3 -m http.server --bind :: 80 & tailscaled --tun=tsdev", - }), + tsic.WithPackages("curl"), + tsic.WithWebserver(80), tsic.WithDockerWorkdir("/"), tsic.WithNetwork(networks[0]), ) @@ -2094,6 +2118,7 @@ func TestACLPolicyPropagationOverTime(t *testing.T) { // Wait for peer lists to sync after new node addition (now 3 user1 nodes, still autogroup:self) t.Logf("Iteration %d: Phase 2b - Waiting for peer lists to sync after new node addition", iteration) + err = scenario.WaitForTailscaleSyncPerUser(60*time.Second, 500*time.Millisecond) require.NoError(t, err, "iteration %d: Phase 2b - failed to sync after new node addition", iteration) @@ -2144,8 +2169,11 @@ func TestACLPolicyPropagationOverTime(t *testing.T) { t.Logf("Iteration %d: Phase 2b - Deleting the newly added node from user1", iteration) // Get the node list and find the newest node (highest ID) - var nodeList []*v1.Node - var nodeToDeleteID uint64 + var ( + nodeList []*v1.Node + nodeToDeleteID uint64 + ) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { nodeList, err = headscale.ListNodes("user1") assert.NoError(ct, err) @@ -2167,15 +2195,19 @@ func TestACLPolicyPropagationOverTime(t *testing.T) { // Remove the deleted client from the scenario's user.Clients map // This is necessary for WaitForTailscaleSyncPerUser to calculate correct peer counts t.Logf("Iteration %d: Phase 2b - Removing deleted client from scenario", iteration) + for clientName, client := range scenario.users["user1"].Clients { status := client.MustStatus() + nodeID, err := strconv.ParseUint(string(status.Self.ID), 10, 64) if err != nil { continue } + if nodeID == nodeToDeleteID { delete(scenario.users["user1"].Clients, clientName) t.Logf("Iteration %d: Phase 2b - Removed client %s (node ID %d) from scenario", iteration, clientName, nodeToDeleteID) + break } } @@ -2192,6 +2224,7 @@ func TestACLPolicyPropagationOverTime(t *testing.T) { // Use WaitForTailscaleSyncPerUser because autogroup:self is still active, // so nodes only see same-user peers, not all nodes t.Logf("Iteration %d: Phase 2b - Waiting for sync after node deletion (with autogroup:self)", iteration) + err = scenario.WaitForTailscaleSyncPerUser(60*time.Second, 500*time.Millisecond) require.NoError(t, err, "iteration %d: failed to sync after node deletion", iteration) @@ -2209,6 +2242,7 @@ func TestACLPolicyPropagationOverTime(t *testing.T) { // Phase 3: User1 can access user2 but not reverse t.Logf("Iteration %d: Phase 3 - Setting user1->user2 directional policy", iteration) + err = headscale.SetPolicy(user1ToUser2Policy) require.NoError(t, err) diff --git a/integration/dns_test.go b/integration/dns_test.go index 7267bc09..e937a421 100644 --- a/integration/dns_test.go +++ b/integration/dns_test.go @@ -23,6 +23,7 @@ func TestResolveMagicDNS(t *testing.T) { } scenario, err := NewScenario(spec) + require.NoError(t, err) defer scenario.ShutdownAssertNoPanics(t) @@ -79,6 +80,7 @@ func TestResolveMagicDNSExtraRecordsPath(t *testing.T) { } scenario, err := NewScenario(spec) + require.NoError(t, err) defer scenario.ShutdownAssertNoPanics(t) @@ -94,11 +96,7 @@ func TestResolveMagicDNSExtraRecordsPath(t *testing.T) { b, _ := json.Marshal(extraRecords) err = scenario.CreateHeadscaleEnv([]tsic.Option{ - tsic.WithDockerEntrypoint([]string{ - "/bin/sh", - "-c", - "/bin/sleep 3 ; apk add python3 curl bind-tools ; update-ca-certificates ; tailscaled --tun=tsdev", - }), + tsic.WithPackages("python3", "curl", "bind-tools"), }, hsic.WithTestName("extrarecords"), hsic.WithConfigEnv(map[string]string{ diff --git a/integration/dsic/dsic.go b/integration/dsic/dsic.go index dd6c6978..755d2298 100644 --- a/integration/dsic/dsic.go +++ b/integration/dsic/dsic.go @@ -103,6 +103,38 @@ func WithExtraHosts(hosts []string) Option { } } +// buildEntrypoint builds the container entrypoint command based on configuration. +// It constructs proper wait conditions instead of fixed sleeps: +// 1. Wait for network to be ready +// 2. Wait for TLS cert to be written (always written after container start) +// 3. Wait for CA certs if configured +// 4. Update CA certificates +// 5. Run derper with provided arguments. +func (dsic *DERPServerInContainer) buildEntrypoint(derperArgs string) []string { + var commands []string + + // Wait for network to be ready + commands = append(commands, "while ! ip route show default >/dev/null 2>&1; do sleep 0.1; done") + + // Wait for TLS cert to be written (always written after container start) + commands = append(commands, + fmt.Sprintf("while [ ! -f %s/%s.crt ]; do sleep 0.1; done", DERPerCertRoot, dsic.hostname)) + + // If CA certs are configured, wait for them to be written + if len(dsic.caCerts) > 0 { + commands = append(commands, + fmt.Sprintf("while [ ! -f %s/user-0.crt ]; do sleep 0.1; done", caCertRoot)) + } + + // Update CA certificates + commands = append(commands, "update-ca-certificates") + + // Run derper + commands = append(commands, "derper "+derperArgs) + + return []string{"/bin/sh", "-c", strings.Join(commands, " ; ")} +} + // New returns a new TailscaleInContainer instance. func New( pool *dockertest.Pool, @@ -150,8 +182,7 @@ func New( Name: hostname, Networks: dsic.networks, ExtraHosts: dsic.withExtraHosts, - // we currently need to give us some time to inject the certificate further down. - Entrypoint: []string{"/bin/sh", "-c", "/bin/sleep 3 ; update-ca-certificates ; derper " + cmdArgs.String()}, + Entrypoint: dsic.buildEntrypoint(cmdArgs.String()), ExposedPorts: []string{ "80/tcp", fmt.Sprintf("%d/tcp", dsic.derpPort), diff --git a/integration/hsic/hsic.go b/integration/hsic/hsic.go index 8a6b869d..2754b317 100644 --- a/integration/hsic/hsic.go +++ b/integration/hsic/hsic.go @@ -285,10 +285,38 @@ func WithDERPAsIP() Option { } // buildEntrypoint builds the container entrypoint command based on configuration. +// It constructs proper wait conditions instead of fixed sleeps: +// 1. Wait for network to be ready +// 2. Wait for config.yaml (always written after container start) +// 3. Wait for CA certs if configured +// 4. Update CA certificates +// 5. Run headscale serve +// 6. Sleep at end to keep container alive for log collection on shutdown. func (hsic *HeadscaleInContainer) buildEntrypoint() []string { - entrypoint := "/bin/sleep 3 ; update-ca-certificates ; /usr/local/bin/headscale serve ; /bin/sleep 30" + var commands []string - return []string{"/bin/bash", "-c", entrypoint} + // Wait for network to be ready + commands = append(commands, "while ! ip route show default >/dev/null 2>&1; do sleep 0.1; done") + + // Wait for config.yaml to be written (always written after container start) + commands = append(commands, "while [ ! -f /etc/headscale/config.yaml ]; do sleep 0.1; done") + + // If CA certs are configured, wait for them to be written + if len(hsic.caCerts) > 0 { + commands = append(commands, + fmt.Sprintf("while [ ! -f %s/user-0.crt ]; do sleep 0.1; done", caCertRoot)) + } + + // Update CA certificates + commands = append(commands, "update-ca-certificates") + + // Run headscale serve + commands = append(commands, "/usr/local/bin/headscale serve") + + // Keep container alive after headscale exits for log collection + commands = append(commands, "/bin/sleep 30") + + return []string{"/bin/bash", "-c", strings.Join(commands, " ; ")} } // New returns a new HeadscaleInContainer instance. @@ -414,6 +442,7 @@ func New( if runOptions.PortBindings == nil { runOptions.PortBindings = map[docker.Port][]docker.PortBinding{} } + runOptions.PortBindings["9090/tcp"] = []docker.PortBinding{ {HostPort: "49090"}, } diff --git a/integration/ssh_test.go b/integration/ssh_test.go index 33335ccd..1ca291c0 100644 --- a/integration/ssh_test.go +++ b/integration/ssh_test.go @@ -42,11 +42,8 @@ func sshScenario(t *testing.T, policy *policyv2.Policy, clientsPerUser int) *Sce // tailscaled to stop configuring the wgengine, causing it // to not configure DNS. tsic.WithNetfilter("off"), - tsic.WithDockerEntrypoint([]string{ - "/bin/sh", - "-c", - "/bin/sleep 3 ; apk add openssh ; adduser ssh-it-user ; update-ca-certificates ; tailscaled --tun=tsdev", - }), + tsic.WithPackages("openssh"), + tsic.WithExtraCommands("adduser ssh-it-user"), tsic.WithDockerWorkdir("/"), }, hsic.WithACLPolicy(policy), @@ -395,8 +392,10 @@ func doSSHWithRetry(t *testing.T, client TailscaleClient, peer TailscaleClient, log.Printf("Running from %s to %s", client.Hostname(), peer.Hostname()) log.Printf("Command: %s", strings.Join(command, " ")) - var result, stderr string - var err error + var ( + result, stderr string + err error + ) if retry { // Use assert.EventuallyWithT to retry SSH connections for success cases @@ -455,6 +454,7 @@ func assertSSHTimeout(t *testing.T, client TailscaleClient, peer TailscaleClient func assertSSHNoAccessStdError(t *testing.T, err error, stderr string) { t.Helper() assert.Error(t, err) + if !isSSHNoAccessStdError(stderr) { t.Errorf("expected stderr output suggesting access denied, got: %s", stderr) } @@ -462,7 +462,7 @@ func assertSSHNoAccessStdError(t *testing.T, err error, stderr string) { // TestSSHAutogroupSelf tests that SSH with autogroup:self works correctly: // - Users can SSH to their own devices -// - Users cannot SSH to other users' devices +// - Users cannot SSH to other users' devices. func TestSSHAutogroupSelf(t *testing.T) { IntegrationSkip(t) diff --git a/integration/tsic/tsic.go b/integration/tsic/tsic.go index ff2690b0..790a1638 100644 --- a/integration/tsic/tsic.go +++ b/integration/tsic/tsic.go @@ -14,6 +14,7 @@ import ( "os" "reflect" "runtime/debug" + "slices" "strconv" "strings" "time" @@ -56,6 +57,8 @@ var ( errInvalidClientConfig = errors.New("verifiably invalid client config requested") errInvalidTailscaleImageFormat = errors.New("invalid HEADSCALE_INTEGRATION_TAILSCALE_IMAGE format, expected repository:tag") errTailscaleImageRequiredInCI = errors.New("HEADSCALE_INTEGRATION_TAILSCALE_IMAGE must be set in CI for HEAD version") + errContainerNotInitialized = errors.New("container not initialized") + errFQDNNotYetAvailable = errors.New("FQDN not yet available") ) const ( @@ -92,6 +95,9 @@ type TailscaleInContainer struct { netfilter string extraLoginArgs []string withAcceptRoutes bool + withPackages []string // Alpine packages to install at container start + withWebserverPort int // Port for built-in HTTP server (0 = disabled) + withExtraCommands []string // Extra shell commands to run before tailscaled // build options, solely for HEAD buildConfig TailscaleInContainerBuildConfig @@ -214,6 +220,82 @@ func WithAcceptRoutes() Option { } } +// WithPackages specifies Alpine packages to install when the container starts. +// This requires internet access and uses `apk add`. Common packages: +// - "python3" for HTTP server +// - "curl" for HTTP client +// - "bind-tools" for dig command +// - "iptables", "ip6tables" for firewall rules +// Note: Tests using this option require internet access and cannot use +// the built-in DERP server in offline mode. +func WithPackages(packages ...string) Option { + return func(tsic *TailscaleInContainer) { + tsic.withPackages = append(tsic.withPackages, packages...) + } +} + +// WithWebserver starts a Python HTTP server on the specified port +// alongside tailscaled. This is useful for testing subnet routing +// and ACL connectivity. Automatically adds "python3" to packages if needed. +// The server serves files from the root directory (/). +func WithWebserver(port int) Option { + return func(tsic *TailscaleInContainer) { + tsic.withWebserverPort = port + } +} + +// WithExtraCommands adds extra shell commands to run before tailscaled starts. +// Commands are run after package installation and CA certificate updates. +func WithExtraCommands(commands ...string) Option { + return func(tsic *TailscaleInContainer) { + tsic.withExtraCommands = append(tsic.withExtraCommands, commands...) + } +} + +// buildEntrypoint constructs the container entrypoint command based on +// configured options (packages, webserver, etc.). +func (t *TailscaleInContainer) buildEntrypoint() []string { + var commands []string + + // Wait for network to be ready + commands = append(commands, "while ! ip route show default >/dev/null 2>&1; do sleep 0.1; done") + + // If CA certs are configured, wait for them to be written by the Go code + // (certs are written after container start via tsic.WriteFile) + if len(t.caCerts) > 0 { + commands = append(commands, + fmt.Sprintf("while [ ! -f %s/user-0.crt ]; do sleep 0.1; done", caCertRoot)) + } + + // Install packages if requested (requires internet access) + packages := t.withPackages + if t.withWebserverPort > 0 && !slices.Contains(packages, "python3") { + packages = append(packages, "python3") + } + + if len(packages) > 0 { + commands = append(commands, "apk add --no-cache "+strings.Join(packages, " ")) + } + + // Update CA certificates + commands = append(commands, "update-ca-certificates") + + // Run extra commands if any + commands = append(commands, t.withExtraCommands...) + + // Start webserver in background if requested + // Use subshell to avoid & interfering with command joining + if t.withWebserverPort > 0 { + commands = append(commands, + fmt.Sprintf("(python3 -m http.server --bind :: %d &)", t.withWebserverPort)) + } + + // Start tailscaled (must be last as it's the foreground process) + commands = append(commands, "tailscaled --tun=tsdev --verbose=10") + + return []string{"/bin/sh", "-c", strings.Join(commands, " ; ")} +} + // New returns a new TailscaleInContainer instance. func New( pool *dockertest.Pool, @@ -232,18 +314,18 @@ func New( hostname: hostname, pool: pool, - - withEntrypoint: []string{ - "/bin/sh", - "-c", - "/bin/sleep 3 ; update-ca-certificates ; tailscaled --tun=tsdev --verbose=10", - }, } for _, opt := range opts { opt(tsic) } + // Build the entrypoint command dynamically based on options. + // Only build if no custom entrypoint was provided via WithDockerEntrypoint. + if len(tsic.withEntrypoint) == 0 { + tsic.withEntrypoint = tsic.buildEntrypoint() + } + if tsic.network == nil { return nil, fmt.Errorf("no network set, called from: \n%s", string(debug.Stack())) } @@ -293,6 +375,7 @@ func New( // build options are not meaningful with pre-existing images, // let's not lead anyone astray by pretending otherwise. defaultBuildConfig := TailscaleInContainerBuildConfig{} + hasBuildConfig := !reflect.DeepEqual(defaultBuildConfig, tsic.buildConfig) if hasBuildConfig { return tsic, errInvalidClientConfig @@ -453,6 +536,7 @@ func New( err, ) } + log.Printf("Created %s container\n", hostname) tsic.container = container @@ -512,7 +596,6 @@ func (t *TailscaleInContainer) Execute( if err != nil { // log.Printf("command issued: %s", strings.Join(command, " ")) // log.Printf("command stderr: %s\n", stderr) - if stdout != "" { log.Printf("command stdout: %s\n", stdout) } @@ -638,7 +721,7 @@ func (t *TailscaleInContainer) Logout() error { // "tailscale up" with any auth keys stored in environment variables. func (t *TailscaleInContainer) Restart() error { if t.container == nil { - return fmt.Errorf("container not initialized") + return errContainerNotInitialized } // Use Docker API to restart the container @@ -655,6 +738,7 @@ func (t *TailscaleInContainer) Restart() error { if err != nil { return struct{}{}, fmt.Errorf("container not ready: %w", err) } + return struct{}{}, nil }, backoff.WithBackOff(backoff.NewExponentialBackOff()), backoff.WithMaxElapsedTime(30*time.Second)) if err != nil { @@ -721,15 +805,18 @@ func (t *TailscaleInContainer) IPs() ([]netip.Addr, error) { } ips := make([]netip.Addr, 0) + for address := range strings.SplitSeq(result, "\n") { address = strings.TrimSuffix(address, "\n") if len(address) < 1 { continue } + ip, err := netip.ParseAddr(address) if err != nil { return nil, fmt.Errorf("failed to parse IP %s: %w", address, err) } + ips = append(ips, ip) } @@ -751,6 +838,7 @@ func (t *TailscaleInContainer) MustIPs() []netip.Addr { if err != nil { panic(err) } + return ips } @@ -775,6 +863,7 @@ func (t *TailscaleInContainer) MustIPv4() netip.Addr { if err != nil { panic(err) } + return ip } @@ -784,6 +873,7 @@ func (t *TailscaleInContainer) MustIPv6() netip.Addr { return ip } } + panic("no ipv6 found") } @@ -801,6 +891,7 @@ func (t *TailscaleInContainer) Status(save ...bool) (*ipnstate.Status, error) { } var status ipnstate.Status + err = json.Unmarshal([]byte(result), &status) if err != nil { return nil, fmt.Errorf("failed to unmarshal tailscale status: %w", err) @@ -860,6 +951,7 @@ func (t *TailscaleInContainer) Netmap() (*netmap.NetworkMap, error) { } var nm netmap.NetworkMap + err = json.Unmarshal([]byte(result), &nm) if err != nil { return nil, fmt.Errorf("failed to unmarshal tailscale netmap: %w", err) @@ -905,6 +997,7 @@ func (t *TailscaleInContainer) watchIPN(ctx context.Context) (*ipn.Notify, error notify *ipn.Notify err error } + resultChan := make(chan result, 1) // There is no good way to kill the goroutine with watch-ipn, @@ -936,7 +1029,9 @@ func (t *TailscaleInContainer) watchIPN(ctx context.Context) (*ipn.Notify, error decoder := json.NewDecoder(pr) for decoder.More() { var notify ipn.Notify - if err := decoder.Decode(¬ify); err != nil { + + err := decoder.Decode(¬ify) + if err != nil { resultChan <- result{nil, fmt.Errorf("parse notify: %w", err)} } @@ -983,6 +1078,7 @@ func (t *TailscaleInContainer) DebugDERPRegion(region string) (*ipnstate.DebugDE } var report ipnstate.DebugDERPRegionReport + err = json.Unmarshal([]byte(result), &report) if err != nil { return nil, fmt.Errorf("failed to unmarshal tailscale derp region report: %w", err) @@ -1006,6 +1102,7 @@ func (t *TailscaleInContainer) Netcheck() (*netcheck.Report, error) { } var nm netcheck.Report + err = json.Unmarshal([]byte(result), &nm) if err != nil { return nil, fmt.Errorf("failed to unmarshal tailscale netcheck: %w", err) @@ -1028,7 +1125,7 @@ func (t *TailscaleInContainer) FQDN() (string, error) { } if status.Self.DNSName == "" { - return "", fmt.Errorf("FQDN not yet available") + return "", errFQDNNotYetAvailable } return status.Self.DNSName, nil @@ -1046,6 +1143,7 @@ func (t *TailscaleInContainer) MustFQDN() string { if err != nil { panic(err) } + return fqdn } @@ -1139,12 +1237,14 @@ func (t *TailscaleInContainer) WaitForPeers(expected int, timeout, retryInterval defer cancel() var lastErrs []error + for { select { case <-ctx.Done(): if len(lastErrs) > 0 { return fmt.Errorf("timeout waiting for %d peers on %s after %v, errors: %w", expected, t.hostname, timeout, multierr.New(lastErrs...)) } + return fmt.Errorf("timeout waiting for %d peers on %s after %v", expected, t.hostname, timeout) case <-ticker.C: status, err := t.Status() @@ -1168,6 +1268,7 @@ func (t *TailscaleInContainer) WaitForPeers(expected int, timeout, retryInterval // Verify that the peers of a given node is Online // has a hostname and a DERP relay. var peerErrors []error + for _, peerKey := range status.Peers() { peer := status.Peer[peerKey] @@ -1361,6 +1462,7 @@ func (t *TailscaleInContainer) Curl(url string, opts ...CurlOption) (string, err } var result string + result, _, err := t.Execute(command) if err != nil { log.Printf( @@ -1394,6 +1496,7 @@ func (t *TailscaleInContainer) Traceroute(ip netip.Addr) (util.Traceroute, error } var result util.Traceroute + stdout, stderr, err := t.Execute(command) if err != nil { return result, err @@ -1439,12 +1542,14 @@ func (t *TailscaleInContainer) ReadFile(path string) ([]byte, error) { } var out bytes.Buffer + tr := tar.NewReader(bytes.NewReader(tarBytes)) for { hdr, err := tr.Next() if err == io.EOF { break // End of archive } + if err != nil { return nil, fmt.Errorf("reading tar header: %w", err) } @@ -1473,6 +1578,7 @@ func (t *TailscaleInContainer) GetNodePrivateKey() (*key.NodePrivate, error) { if err != nil { return nil, fmt.Errorf("failed to read state file: %w", err) } + store := &mem.Store{} if err = store.LoadFromJSON(state); err != nil { return nil, fmt.Errorf("failed to unmarshal state file: %w", err) @@ -1482,6 +1588,7 @@ func (t *TailscaleInContainer) GetNodePrivateKey() (*key.NodePrivate, error) { if err != nil { return nil, fmt.Errorf("failed to read current profile state key: %w", err) } + currentProfile, err := store.ReadState(ipn.StateKey(currentProfileKey)) if err != nil { return nil, fmt.Errorf("failed to read current profile state: %w", err)