cmd/hi: enable concurrent test execution

Remove the concurrent test prevention logic and update cleanup to use
run ID-based isolation, allowing multiple tests to run simultaneously.

Changes:
- cleanup: Add killTestContainersByRunID() to clean only containers
  belonging to a specific run, add cleanupStaleTestContainers() to
  remove only stopped/exited containers without affecting running tests
- docker: Remove RunningTestInfo, checkForRunningTests(), and related
  error types, update cleanupAfterTest() to use run ID-based cleanup
- run: Remove Force flag and concurrent test prevention check

The test runner now:
- Allows multiple concurrent test runs on the same Docker daemon
- Cleans only stale containers before tests (not running ones)
- Cleans only containers with matching run ID after tests
- Prints run ID and monitoring info for operator visibility
This commit is contained in:
Kristoffer Dalby
2026-01-09 11:19:11 +00:00
parent 87c230d251
commit 0bcfdc29ad
3 changed files with 124 additions and 164 deletions

View File

@@ -18,9 +18,11 @@ import (
)
// cleanupBeforeTest performs cleanup operations before running tests.
// Only removes stale (stopped/exited) test containers to avoid interfering with concurrent test runs.
func cleanupBeforeTest(ctx context.Context) error {
if err := killTestContainers(ctx); err != nil {
return fmt.Errorf("failed to kill test containers: %w", err)
err := cleanupStaleTestContainers(ctx)
if err != nil {
return fmt.Errorf("failed to clean stale test containers: %w", err)
}
if err := pruneDockerNetworks(ctx); err != nil {
@@ -30,11 +32,25 @@ func cleanupBeforeTest(ctx context.Context) error {
return nil
}
// cleanupAfterTest removes the test container after completion.
func cleanupAfterTest(ctx context.Context, cli *client.Client, containerID string) error {
return cli.ContainerRemove(ctx, containerID, container.RemoveOptions{
// cleanupAfterTest removes the test container and all associated integration test containers for the run.
func cleanupAfterTest(ctx context.Context, cli *client.Client, containerID, runID string) error {
// Remove the main test container
err := cli.ContainerRemove(ctx, containerID, container.RemoveOptions{
Force: true,
})
if err != nil {
return fmt.Errorf("failed to remove test container: %w", err)
}
// Clean up integration test containers for this run only
if runID != "" {
err := killTestContainersByRunID(ctx, runID)
if err != nil {
return fmt.Errorf("failed to clean up containers for run %s: %w", runID, err)
}
}
return nil
}
// killTestContainers terminates and removes all test containers.
@@ -87,6 +103,100 @@ func killTestContainers(ctx context.Context) error {
return nil
}
// killTestContainersByRunID terminates and removes all test containers for a specific run ID.
// This function filters containers by the hi.run-id label to only affect containers
// belonging to the specified test run, leaving other concurrent test runs untouched.
func killTestContainersByRunID(ctx context.Context, runID string) error {
cli, err := createDockerClient()
if err != nil {
return fmt.Errorf("failed to create Docker client: %w", err)
}
defer cli.Close()
// Filter containers by hi.run-id label
containers, err := cli.ContainerList(ctx, container.ListOptions{
All: true,
Filters: filters.NewArgs(
filters.Arg("label", "hi.run-id="+runID),
),
})
if err != nil {
return fmt.Errorf("failed to list containers for run %s: %w", runID, err)
}
removed := 0
for _, cont := range containers {
// Kill the container if it's running
if cont.State == "running" {
_ = cli.ContainerKill(ctx, cont.ID, "KILL")
}
// Remove the container with retry logic
if removeContainerWithRetry(ctx, cli, cont.ID) {
removed++
}
}
if removed > 0 {
fmt.Printf("Removed %d containers for run ID %s\n", removed, runID)
}
return nil
}
// cleanupStaleTestContainers removes stopped/exited test containers without affecting running tests.
// This is useful for cleaning up leftover containers from previous crashed or interrupted test runs
// without interfering with currently running concurrent tests.
func cleanupStaleTestContainers(ctx context.Context) error {
cli, err := createDockerClient()
if err != nil {
return fmt.Errorf("failed to create Docker client: %w", err)
}
defer cli.Close()
// Only get stopped/exited containers
containers, err := cli.ContainerList(ctx, container.ListOptions{
All: true,
Filters: filters.NewArgs(
filters.Arg("status", "exited"),
filters.Arg("status", "dead"),
),
})
if err != nil {
return fmt.Errorf("failed to list stopped containers: %w", err)
}
removed := 0
for _, cont := range containers {
// Only remove containers that look like test containers
shouldRemove := false
for _, name := range cont.Names {
if strings.Contains(name, "headscale-test-suite") ||
strings.Contains(name, "hs-") ||
strings.Contains(name, "ts-") ||
strings.Contains(name, "derp-") {
shouldRemove = true
break
}
}
if shouldRemove {
if removeContainerWithRetry(ctx, cli, cont.ID) {
removed++
}
}
}
if removed > 0 {
fmt.Printf("Removed %d stale test containers\n", removed)
}
return nil
}
const (
containerRemoveInitialInterval = 100 * time.Millisecond
containerRemoveMaxElapsedTime = 2 * time.Second