From 0bcfdc29ada99b491cff641b386b28bff8bb4709 Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Fri, 9 Jan 2026 11:19:11 +0000 Subject: [PATCH] cmd/hi: enable concurrent test execution Remove the concurrent test prevention logic and update cleanup to use run ID-based isolation, allowing multiple tests to run simultaneously. Changes: - cleanup: Add killTestContainersByRunID() to clean only containers belonging to a specific run, add cleanupStaleTestContainers() to remove only stopped/exited containers without affecting running tests - docker: Remove RunningTestInfo, checkForRunningTests(), and related error types, update cleanupAfterTest() to use run ID-based cleanup - run: Remove Force flag and concurrent test prevention check The test runner now: - Allows multiple concurrent test runs on the same Docker daemon - Cleans only stale containers before tests (not running ones) - Cleans only containers with matching run ID after tests - Prints run ID and monitoring info for operator visibility --- cmd/hi/cleanup.go | 120 ++++++++++++++++++++++++++++++++++++++++++++-- cmd/hi/docker.go | 95 ++++-------------------------------- cmd/hi/run.go | 73 +--------------------------- 3 files changed, 124 insertions(+), 164 deletions(-) diff --git a/cmd/hi/cleanup.go b/cmd/hi/cleanup.go index e8d7b926..7c5b5214 100644 --- a/cmd/hi/cleanup.go +++ b/cmd/hi/cleanup.go @@ -18,9 +18,11 @@ import ( ) // cleanupBeforeTest performs cleanup operations before running tests. +// Only removes stale (stopped/exited) test containers to avoid interfering with concurrent test runs. func cleanupBeforeTest(ctx context.Context) error { - if err := killTestContainers(ctx); err != nil { - return fmt.Errorf("failed to kill test containers: %w", err) + err := cleanupStaleTestContainers(ctx) + if err != nil { + return fmt.Errorf("failed to clean stale test containers: %w", err) } if err := pruneDockerNetworks(ctx); err != nil { @@ -30,11 +32,25 @@ func cleanupBeforeTest(ctx context.Context) error { return nil } -// cleanupAfterTest removes the test container after completion. -func cleanupAfterTest(ctx context.Context, cli *client.Client, containerID string) error { - return cli.ContainerRemove(ctx, containerID, container.RemoveOptions{ +// cleanupAfterTest removes the test container and all associated integration test containers for the run. +func cleanupAfterTest(ctx context.Context, cli *client.Client, containerID, runID string) error { + // Remove the main test container + err := cli.ContainerRemove(ctx, containerID, container.RemoveOptions{ Force: true, }) + if err != nil { + return fmt.Errorf("failed to remove test container: %w", err) + } + + // Clean up integration test containers for this run only + if runID != "" { + err := killTestContainersByRunID(ctx, runID) + if err != nil { + return fmt.Errorf("failed to clean up containers for run %s: %w", runID, err) + } + } + + return nil } // killTestContainers terminates and removes all test containers. @@ -87,6 +103,100 @@ func killTestContainers(ctx context.Context) error { return nil } +// killTestContainersByRunID terminates and removes all test containers for a specific run ID. +// This function filters containers by the hi.run-id label to only affect containers +// belonging to the specified test run, leaving other concurrent test runs untouched. +func killTestContainersByRunID(ctx context.Context, runID string) error { + cli, err := createDockerClient() + if err != nil { + return fmt.Errorf("failed to create Docker client: %w", err) + } + defer cli.Close() + + // Filter containers by hi.run-id label + containers, err := cli.ContainerList(ctx, container.ListOptions{ + All: true, + Filters: filters.NewArgs( + filters.Arg("label", "hi.run-id="+runID), + ), + }) + if err != nil { + return fmt.Errorf("failed to list containers for run %s: %w", runID, err) + } + + removed := 0 + + for _, cont := range containers { + // Kill the container if it's running + if cont.State == "running" { + _ = cli.ContainerKill(ctx, cont.ID, "KILL") + } + + // Remove the container with retry logic + if removeContainerWithRetry(ctx, cli, cont.ID) { + removed++ + } + } + + if removed > 0 { + fmt.Printf("Removed %d containers for run ID %s\n", removed, runID) + } + + return nil +} + +// cleanupStaleTestContainers removes stopped/exited test containers without affecting running tests. +// This is useful for cleaning up leftover containers from previous crashed or interrupted test runs +// without interfering with currently running concurrent tests. +func cleanupStaleTestContainers(ctx context.Context) error { + cli, err := createDockerClient() + if err != nil { + return fmt.Errorf("failed to create Docker client: %w", err) + } + defer cli.Close() + + // Only get stopped/exited containers + containers, err := cli.ContainerList(ctx, container.ListOptions{ + All: true, + Filters: filters.NewArgs( + filters.Arg("status", "exited"), + filters.Arg("status", "dead"), + ), + }) + if err != nil { + return fmt.Errorf("failed to list stopped containers: %w", err) + } + + removed := 0 + + for _, cont := range containers { + // Only remove containers that look like test containers + shouldRemove := false + + for _, name := range cont.Names { + if strings.Contains(name, "headscale-test-suite") || + strings.Contains(name, "hs-") || + strings.Contains(name, "ts-") || + strings.Contains(name, "derp-") { + shouldRemove = true + break + } + } + + if shouldRemove { + if removeContainerWithRetry(ctx, cli, cont.ID) { + removed++ + } + } + } + + if removed > 0 { + fmt.Printf("Removed %d stale test containers\n", removed) + } + + return nil +} + const ( containerRemoveInitialInterval = 100 * time.Millisecond containerRemoveMaxElapsedTime = 2 * time.Second diff --git a/cmd/hi/docker.go b/cmd/hi/docker.go index 65250e65..a6b94b25 100644 --- a/cmd/hi/docker.go +++ b/cmd/hi/docker.go @@ -26,93 +26,8 @@ var ( ErrTestFailed = errors.New("test failed") ErrUnexpectedContainerWait = errors.New("unexpected end of container wait") ErrNoDockerContext = errors.New("no docker context found") - ErrAnotherRunInProgress = errors.New("another integration test run is already in progress") ) -// RunningTestInfo contains information about a currently running integration test. -type RunningTestInfo struct { - RunID string - ContainerID string - ContainerName string - StartTime time.Time - Duration time.Duration - TestPattern string -} - -// ErrNoRunningTests indicates that no integration test is currently running. -var ErrNoRunningTests = errors.New("no running tests found") - -// checkForRunningTests checks if there's already an integration test running. -// Returns ErrNoRunningTests if no test is running, or RunningTestInfo with details about the running test. -func checkForRunningTests(ctx context.Context) (*RunningTestInfo, error) { - cli, err := createDockerClient() - if err != nil { - return nil, fmt.Errorf("failed to create Docker client: %w", err) - } - defer cli.Close() - - // List all running containers - containers, err := cli.ContainerList(ctx, container.ListOptions{ - All: false, // Only running containers - }) - if err != nil { - return nil, fmt.Errorf("failed to list containers: %w", err) - } - - // Look for containers with hi.test-type=test-runner label - for _, cont := range containers { - if cont.Labels != nil && cont.Labels["hi.test-type"] == "test-runner" { - // Found a running test runner container - runID := cont.Labels["hi.run-id"] - - containerName := "" - for _, name := range cont.Names { - containerName = strings.TrimPrefix(name, "/") - - break - } - - // Get more details via inspection - inspect, err := cli.ContainerInspect(ctx, cont.ID) - if err != nil { - // Return basic info if inspection fails - return &RunningTestInfo{ - RunID: runID, - ContainerID: cont.ID, - ContainerName: containerName, - }, nil - } - - startTime, _ := time.Parse(time.RFC3339Nano, inspect.State.StartedAt) - duration := time.Since(startTime) - - // Try to extract test pattern from command - testPattern := "" - - if len(inspect.Config.Cmd) > 0 { - for i, arg := range inspect.Config.Cmd { - if arg == "-run" && i+1 < len(inspect.Config.Cmd) { - testPattern = inspect.Config.Cmd[i+1] - - break - } - } - } - - return &RunningTestInfo{ - RunID: runID, - ContainerID: cont.ID, - ContainerName: containerName, - StartTime: startTime, - Duration: duration, - TestPattern: testPattern, - }, nil - } - } - - return nil, ErrNoRunningTests -} - // runTestContainer executes integration tests in a Docker container. func runTestContainer(ctx context.Context, config *RunConfig) error { cli, err := createDockerClient() @@ -174,6 +89,9 @@ func runTestContainer(ctx context.Context, config *RunConfig) error { } log.Printf("Starting test: %s", config.TestPattern) + log.Printf("Run ID: %s", runID) + log.Printf("Monitor with: docker logs -f %s", containerName) + log.Printf("Logs directory: %s", logsDir) // Start stats collection for container resource monitoring (if enabled) var statsCollector *StatsCollector @@ -234,9 +152,12 @@ func runTestContainer(ctx context.Context, config *RunConfig) error { shouldCleanup := config.CleanAfter && (!config.KeepOnFailure || exitCode == 0) if shouldCleanup { if config.Verbose { - log.Printf("Running post-test cleanup...") + log.Printf("Running post-test cleanup for run %s...", runID) } - if cleanErr := cleanupAfterTest(ctx, cli, resp.ID); cleanErr != nil && config.Verbose { + + cleanErr := cleanupAfterTest(ctx, cli, resp.ID, runID) + + if cleanErr != nil && config.Verbose { log.Printf("Warning: post-test cleanup failed: %v", cleanErr) } diff --git a/cmd/hi/run.go b/cmd/hi/run.go index 3456b668..1694399d 100644 --- a/cmd/hi/run.go +++ b/cmd/hi/run.go @@ -6,7 +6,6 @@ import ( "log" "os" "path/filepath" - "strings" "time" "github.com/creachadair/command" @@ -14,65 +13,13 @@ import ( var ErrTestPatternRequired = errors.New("test pattern is required as first argument or use --test flag") -// formatRunningTestError creates a detailed error message about a running test. -func formatRunningTestError(info *RunningTestInfo) error { - var msg strings.Builder - msg.WriteString("\n") - msg.WriteString("╔══════════════════════════════════════════════════════════════════╗\n") - msg.WriteString("║ Another integration test run is already in progress! ║\n") - msg.WriteString("╚══════════════════════════════════════════════════════════════════╝\n") - msg.WriteString("\n") - msg.WriteString("Running test details:\n") - msg.WriteString(fmt.Sprintf(" Run ID: %s\n", info.RunID)) - msg.WriteString(fmt.Sprintf(" Container: %s\n", info.ContainerName)) - - if info.TestPattern != "" { - msg.WriteString(fmt.Sprintf(" Test: %s\n", info.TestPattern)) - } - - if !info.StartTime.IsZero() { - msg.WriteString(fmt.Sprintf(" Started: %s\n", info.StartTime.Format("2006-01-02 15:04:05"))) - msg.WriteString(fmt.Sprintf(" Running for: %s\n", formatDuration(info.Duration))) - } - - msg.WriteString("\n") - msg.WriteString("Please wait for the current test to complete, or stop it with:\n") - msg.WriteString(" go run ./cmd/hi clean containers\n") - msg.WriteString("\n") - msg.WriteString("To monitor the running test:\n") - msg.WriteString(fmt.Sprintf(" docker logs -f %s\n", info.ContainerName)) - - return fmt.Errorf("%w\n%s", ErrAnotherRunInProgress, msg.String()) -} - -const secondsPerMinute = 60 - -// formatDuration formats a duration in a human-readable way. -func formatDuration(d time.Duration) string { - if d < time.Minute { - return fmt.Sprintf("%d seconds", int(d.Seconds())) - } - - if d < time.Hour { - minutes := int(d.Minutes()) - seconds := int(d.Seconds()) % secondsPerMinute - - return fmt.Sprintf("%d minutes, %d seconds", minutes, seconds) - } - - hours := int(d.Hours()) - minutes := int(d.Minutes()) % secondsPerMinute - - return fmt.Sprintf("%d hours, %d minutes", hours, minutes) -} - type RunConfig struct { TestPattern string `flag:"test,Test pattern to run"` Timeout time.Duration `flag:"timeout,default=120m,Test timeout"` FailFast bool `flag:"failfast,default=true,Stop on first test failure"` UsePostgres bool `flag:"postgres,default=false,Use PostgreSQL instead of SQLite"` GoVersion string `flag:"go-version,Go version to use (auto-detected from go.mod)"` - CleanBefore bool `flag:"clean-before,default=true,Clean resources before test"` + CleanBefore bool `flag:"clean-before,default=true,Clean stale resources before test"` CleanAfter bool `flag:"clean-after,default=true,Clean resources after test"` KeepOnFailure bool `flag:"keep-on-failure,default=false,Keep containers on test failure"` LogsDir string `flag:"logs-dir,default=control_logs,Control logs directory"` @@ -80,7 +27,6 @@ type RunConfig struct { Stats bool `flag:"stats,default=false,Collect and display container resource usage statistics"` HSMemoryLimit float64 `flag:"hs-memory-limit,default=0,Fail test if any Headscale container exceeds this memory limit in MB (0 = disabled)"` TSMemoryLimit float64 `flag:"ts-memory-limit,default=0,Fail test if any Tailscale container exceeds this memory limit in MB (0 = disabled)"` - Force bool `flag:"force,default=false,Kill any running test and start a new one"` } // runIntegrationTest executes the integration test workflow. @@ -98,23 +44,6 @@ func runIntegrationTest(env *command.Env) error { runConfig.GoVersion = detectGoVersion() } - // Check if another test run is already in progress - runningTest, err := checkForRunningTests(env.Context()) - if err != nil && !errors.Is(err, ErrNoRunningTests) { - log.Printf("Warning: failed to check for running tests: %v", err) - } else if runningTest != nil { - if runConfig.Force { - log.Printf("Force flag set, killing existing test run: %s", runningTest.RunID) - - err = killTestContainers(env.Context()) - if err != nil { - return fmt.Errorf("failed to kill existing test containers: %w", err) - } - } else { - return formatRunningTestError(runningTest) - } - } - // Run pre-flight checks if runConfig.Verbose { log.Printf("Running pre-flight system checks...")