cmd/hi: enable concurrent test execution

Remove the concurrent test prevention logic and update cleanup to use
run ID-based isolation, allowing multiple tests to run simultaneously.

Changes:
- cleanup: Add killTestContainersByRunID() to clean only containers
  belonging to a specific run, add cleanupStaleTestContainers() to
  remove only stopped/exited containers without affecting running tests
- docker: Remove RunningTestInfo, checkForRunningTests(), and related
  error types, update cleanupAfterTest() to use run ID-based cleanup
- run: Remove Force flag and concurrent test prevention check

The test runner now:
- Allows multiple concurrent test runs on the same Docker daemon
- Cleans only stale containers before tests (not running ones)
- Cleans only containers with matching run ID after tests
- Prints run ID and monitoring info for operator visibility
This commit is contained in:
Kristoffer Dalby
2026-01-09 11:19:11 +00:00
parent 87c230d251
commit 0bcfdc29ad
3 changed files with 124 additions and 164 deletions

View File

@@ -18,9 +18,11 @@ import (
)
// cleanupBeforeTest performs cleanup operations before running tests.
// Only removes stale (stopped/exited) test containers to avoid interfering with concurrent test runs.
func cleanupBeforeTest(ctx context.Context) error {
if err := killTestContainers(ctx); err != nil {
return fmt.Errorf("failed to kill test containers: %w", err)
err := cleanupStaleTestContainers(ctx)
if err != nil {
return fmt.Errorf("failed to clean stale test containers: %w", err)
}
if err := pruneDockerNetworks(ctx); err != nil {
@@ -30,11 +32,25 @@ func cleanupBeforeTest(ctx context.Context) error {
return nil
}
// cleanupAfterTest removes the test container after completion.
func cleanupAfterTest(ctx context.Context, cli *client.Client, containerID string) error {
return cli.ContainerRemove(ctx, containerID, container.RemoveOptions{
// cleanupAfterTest removes the test container and all associated integration test containers for the run.
func cleanupAfterTest(ctx context.Context, cli *client.Client, containerID, runID string) error {
// Remove the main test container
err := cli.ContainerRemove(ctx, containerID, container.RemoveOptions{
Force: true,
})
if err != nil {
return fmt.Errorf("failed to remove test container: %w", err)
}
// Clean up integration test containers for this run only
if runID != "" {
err := killTestContainersByRunID(ctx, runID)
if err != nil {
return fmt.Errorf("failed to clean up containers for run %s: %w", runID, err)
}
}
return nil
}
// killTestContainers terminates and removes all test containers.
@@ -87,6 +103,100 @@ func killTestContainers(ctx context.Context) error {
return nil
}
// killTestContainersByRunID terminates and removes all test containers for a specific run ID.
// This function filters containers by the hi.run-id label to only affect containers
// belonging to the specified test run, leaving other concurrent test runs untouched.
func killTestContainersByRunID(ctx context.Context, runID string) error {
cli, err := createDockerClient()
if err != nil {
return fmt.Errorf("failed to create Docker client: %w", err)
}
defer cli.Close()
// Filter containers by hi.run-id label
containers, err := cli.ContainerList(ctx, container.ListOptions{
All: true,
Filters: filters.NewArgs(
filters.Arg("label", "hi.run-id="+runID),
),
})
if err != nil {
return fmt.Errorf("failed to list containers for run %s: %w", runID, err)
}
removed := 0
for _, cont := range containers {
// Kill the container if it's running
if cont.State == "running" {
_ = cli.ContainerKill(ctx, cont.ID, "KILL")
}
// Remove the container with retry logic
if removeContainerWithRetry(ctx, cli, cont.ID) {
removed++
}
}
if removed > 0 {
fmt.Printf("Removed %d containers for run ID %s\n", removed, runID)
}
return nil
}
// cleanupStaleTestContainers removes stopped/exited test containers without affecting running tests.
// This is useful for cleaning up leftover containers from previous crashed or interrupted test runs
// without interfering with currently running concurrent tests.
func cleanupStaleTestContainers(ctx context.Context) error {
cli, err := createDockerClient()
if err != nil {
return fmt.Errorf("failed to create Docker client: %w", err)
}
defer cli.Close()
// Only get stopped/exited containers
containers, err := cli.ContainerList(ctx, container.ListOptions{
All: true,
Filters: filters.NewArgs(
filters.Arg("status", "exited"),
filters.Arg("status", "dead"),
),
})
if err != nil {
return fmt.Errorf("failed to list stopped containers: %w", err)
}
removed := 0
for _, cont := range containers {
// Only remove containers that look like test containers
shouldRemove := false
for _, name := range cont.Names {
if strings.Contains(name, "headscale-test-suite") ||
strings.Contains(name, "hs-") ||
strings.Contains(name, "ts-") ||
strings.Contains(name, "derp-") {
shouldRemove = true
break
}
}
if shouldRemove {
if removeContainerWithRetry(ctx, cli, cont.ID) {
removed++
}
}
}
if removed > 0 {
fmt.Printf("Removed %d stale test containers\n", removed)
}
return nil
}
const (
containerRemoveInitialInterval = 100 * time.Millisecond
containerRemoveMaxElapsedTime = 2 * time.Second

View File

@@ -26,93 +26,8 @@ var (
ErrTestFailed = errors.New("test failed")
ErrUnexpectedContainerWait = errors.New("unexpected end of container wait")
ErrNoDockerContext = errors.New("no docker context found")
ErrAnotherRunInProgress = errors.New("another integration test run is already in progress")
)
// RunningTestInfo contains information about a currently running integration test.
type RunningTestInfo struct {
RunID string
ContainerID string
ContainerName string
StartTime time.Time
Duration time.Duration
TestPattern string
}
// ErrNoRunningTests indicates that no integration test is currently running.
var ErrNoRunningTests = errors.New("no running tests found")
// checkForRunningTests checks if there's already an integration test running.
// Returns ErrNoRunningTests if no test is running, or RunningTestInfo with details about the running test.
func checkForRunningTests(ctx context.Context) (*RunningTestInfo, error) {
cli, err := createDockerClient()
if err != nil {
return nil, fmt.Errorf("failed to create Docker client: %w", err)
}
defer cli.Close()
// List all running containers
containers, err := cli.ContainerList(ctx, container.ListOptions{
All: false, // Only running containers
})
if err != nil {
return nil, fmt.Errorf("failed to list containers: %w", err)
}
// Look for containers with hi.test-type=test-runner label
for _, cont := range containers {
if cont.Labels != nil && cont.Labels["hi.test-type"] == "test-runner" {
// Found a running test runner container
runID := cont.Labels["hi.run-id"]
containerName := ""
for _, name := range cont.Names {
containerName = strings.TrimPrefix(name, "/")
break
}
// Get more details via inspection
inspect, err := cli.ContainerInspect(ctx, cont.ID)
if err != nil {
// Return basic info if inspection fails
return &RunningTestInfo{
RunID: runID,
ContainerID: cont.ID,
ContainerName: containerName,
}, nil
}
startTime, _ := time.Parse(time.RFC3339Nano, inspect.State.StartedAt)
duration := time.Since(startTime)
// Try to extract test pattern from command
testPattern := ""
if len(inspect.Config.Cmd) > 0 {
for i, arg := range inspect.Config.Cmd {
if arg == "-run" && i+1 < len(inspect.Config.Cmd) {
testPattern = inspect.Config.Cmd[i+1]
break
}
}
}
return &RunningTestInfo{
RunID: runID,
ContainerID: cont.ID,
ContainerName: containerName,
StartTime: startTime,
Duration: duration,
TestPattern: testPattern,
}, nil
}
}
return nil, ErrNoRunningTests
}
// runTestContainer executes integration tests in a Docker container.
func runTestContainer(ctx context.Context, config *RunConfig) error {
cli, err := createDockerClient()
@@ -174,6 +89,9 @@ func runTestContainer(ctx context.Context, config *RunConfig) error {
}
log.Printf("Starting test: %s", config.TestPattern)
log.Printf("Run ID: %s", runID)
log.Printf("Monitor with: docker logs -f %s", containerName)
log.Printf("Logs directory: %s", logsDir)
// Start stats collection for container resource monitoring (if enabled)
var statsCollector *StatsCollector
@@ -234,9 +152,12 @@ func runTestContainer(ctx context.Context, config *RunConfig) error {
shouldCleanup := config.CleanAfter && (!config.KeepOnFailure || exitCode == 0)
if shouldCleanup {
if config.Verbose {
log.Printf("Running post-test cleanup...")
log.Printf("Running post-test cleanup for run %s...", runID)
}
if cleanErr := cleanupAfterTest(ctx, cli, resp.ID); cleanErr != nil && config.Verbose {
cleanErr := cleanupAfterTest(ctx, cli, resp.ID, runID)
if cleanErr != nil && config.Verbose {
log.Printf("Warning: post-test cleanup failed: %v", cleanErr)
}

View File

@@ -6,7 +6,6 @@ import (
"log"
"os"
"path/filepath"
"strings"
"time"
"github.com/creachadair/command"
@@ -14,65 +13,13 @@ import (
var ErrTestPatternRequired = errors.New("test pattern is required as first argument or use --test flag")
// formatRunningTestError creates a detailed error message about a running test.
func formatRunningTestError(info *RunningTestInfo) error {
var msg strings.Builder
msg.WriteString("\n")
msg.WriteString("╔══════════════════════════════════════════════════════════════════╗\n")
msg.WriteString("║ Another integration test run is already in progress! ║\n")
msg.WriteString("╚══════════════════════════════════════════════════════════════════╝\n")
msg.WriteString("\n")
msg.WriteString("Running test details:\n")
msg.WriteString(fmt.Sprintf(" Run ID: %s\n", info.RunID))
msg.WriteString(fmt.Sprintf(" Container: %s\n", info.ContainerName))
if info.TestPattern != "" {
msg.WriteString(fmt.Sprintf(" Test: %s\n", info.TestPattern))
}
if !info.StartTime.IsZero() {
msg.WriteString(fmt.Sprintf(" Started: %s\n", info.StartTime.Format("2006-01-02 15:04:05")))
msg.WriteString(fmt.Sprintf(" Running for: %s\n", formatDuration(info.Duration)))
}
msg.WriteString("\n")
msg.WriteString("Please wait for the current test to complete, or stop it with:\n")
msg.WriteString(" go run ./cmd/hi clean containers\n")
msg.WriteString("\n")
msg.WriteString("To monitor the running test:\n")
msg.WriteString(fmt.Sprintf(" docker logs -f %s\n", info.ContainerName))
return fmt.Errorf("%w\n%s", ErrAnotherRunInProgress, msg.String())
}
const secondsPerMinute = 60
// formatDuration formats a duration in a human-readable way.
func formatDuration(d time.Duration) string {
if d < time.Minute {
return fmt.Sprintf("%d seconds", int(d.Seconds()))
}
if d < time.Hour {
minutes := int(d.Minutes())
seconds := int(d.Seconds()) % secondsPerMinute
return fmt.Sprintf("%d minutes, %d seconds", minutes, seconds)
}
hours := int(d.Hours())
minutes := int(d.Minutes()) % secondsPerMinute
return fmt.Sprintf("%d hours, %d minutes", hours, minutes)
}
type RunConfig struct {
TestPattern string `flag:"test,Test pattern to run"`
Timeout time.Duration `flag:"timeout,default=120m,Test timeout"`
FailFast bool `flag:"failfast,default=true,Stop on first test failure"`
UsePostgres bool `flag:"postgres,default=false,Use PostgreSQL instead of SQLite"`
GoVersion string `flag:"go-version,Go version to use (auto-detected from go.mod)"`
CleanBefore bool `flag:"clean-before,default=true,Clean resources before test"`
CleanBefore bool `flag:"clean-before,default=true,Clean stale resources before test"`
CleanAfter bool `flag:"clean-after,default=true,Clean resources after test"`
KeepOnFailure bool `flag:"keep-on-failure,default=false,Keep containers on test failure"`
LogsDir string `flag:"logs-dir,default=control_logs,Control logs directory"`
@@ -80,7 +27,6 @@ type RunConfig struct {
Stats bool `flag:"stats,default=false,Collect and display container resource usage statistics"`
HSMemoryLimit float64 `flag:"hs-memory-limit,default=0,Fail test if any Headscale container exceeds this memory limit in MB (0 = disabled)"`
TSMemoryLimit float64 `flag:"ts-memory-limit,default=0,Fail test if any Tailscale container exceeds this memory limit in MB (0 = disabled)"`
Force bool `flag:"force,default=false,Kill any running test and start a new one"`
}
// runIntegrationTest executes the integration test workflow.
@@ -98,23 +44,6 @@ func runIntegrationTest(env *command.Env) error {
runConfig.GoVersion = detectGoVersion()
}
// Check if another test run is already in progress
runningTest, err := checkForRunningTests(env.Context())
if err != nil && !errors.Is(err, ErrNoRunningTests) {
log.Printf("Warning: failed to check for running tests: %v", err)
} else if runningTest != nil {
if runConfig.Force {
log.Printf("Force flag set, killing existing test run: %s", runningTest.RunID)
err = killTestContainers(env.Context())
if err != nil {
return fmt.Errorf("failed to kill existing test containers: %w", err)
}
} else {
return formatRunningTestError(runningTest)
}
}
// Run pre-flight checks
if runConfig.Verbose {
log.Printf("Running pre-flight system checks...")