package harness import ( "context" "crypto/rand" "encoding/hex" "encoding/json" "fmt" "net" "net/http" "os/exec" "strings" "testing" "time" ) const ( dockerNetworkPrefix = "lobbyrtm-it-" dockerNetworkTimeout = 30 * time.Second dockerCLITimeout = 30 * time.Second containerHealthzPort = 8080 containerHealthzTimeout = 5 * time.Second containerHealthzPoll = 100 * time.Millisecond ) // EnsureDockerNetwork creates a uniquely-named Docker bridge network // for the caller's test and registers cleanup. Each test gets its own // network so concurrent scenarios cannot collide on the per-game DNS // hostname (`galaxy-game-{game_id}`). The helper skips the test when // no Docker daemon is reachable. func EnsureDockerNetwork(t testing.TB) string { t.Helper() requireDockerDaemon(t) name := dockerNetworkPrefix + uniqueSuffix(t) ctx, cancel := context.WithTimeout(context.Background(), dockerNetworkTimeout) defer cancel() cmd := exec.CommandContext(ctx, "docker", "network", "create", "--driver", "bridge", name) output, err := cmd.CombinedOutput() if err != nil { t.Fatalf("integration harness: create docker network %q: %v; output:\n%s", name, err, strings.TrimSpace(string(output))) } t.Cleanup(func() { cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), dockerNetworkTimeout) defer cleanupCancel() removeCmd := exec.CommandContext(cleanupCtx, "docker", "network", "rm", name) if rmErr := removeCmd.Run(); rmErr != nil { t.Logf("integration harness: remove docker network %q: %v", name, rmErr) } }) return name } // FindContainerIDByLabel returns the id of the single running container // labelled with the given game id, or an empty string when no match is // found. The label keys are the ones rtmanager attaches at start time // (`com.galaxy.owner=rtmanager`, `com.galaxy.game_id=`). func FindContainerIDByLabel(t testing.TB, gameID string) string { t.Helper() requireDockerDaemon(t) ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout) defer cancel() cmd := exec.CommandContext(ctx, "docker", "ps", "-aq", "--no-trunc", "--filter", "label=com.galaxy.owner=rtmanager", "--filter", "label=com.galaxy.game_id="+gameID, ) output, err := cmd.CombinedOutput() if err != nil { t.Fatalf("integration harness: docker ps for game %s: %v; output:\n%s", gameID, err, strings.TrimSpace(string(output))) } id := strings.TrimSpace(string(output)) if id == "" { return "" } if strings.Contains(id, "\n") { t.Fatalf("integration harness: multiple containers for game %s:\n%s", gameID, id) } return id } // ContainerState returns the runtime state string (e.g. `running`, // `exited`) of the container with the given id, looked up via // `docker inspect`. func ContainerState(t testing.TB, containerID string) string { t.Helper() requireDockerDaemon(t) ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout) defer cancel() cmd := exec.CommandContext(ctx, "docker", "inspect", "--format", "{{.State.Status}}", containerID) output, err := cmd.CombinedOutput() if err != nil { t.Fatalf("integration harness: docker inspect %s: %v; output:\n%s", containerID, err, strings.TrimSpace(string(output))) } return strings.TrimSpace(string(output)) } // ContainerNetworkIP returns the IPv4 address of the named container // inside the named bridge network. Returns an empty string when the // container has no endpoint on that network. func ContainerNetworkIP(t testing.TB, containerID, networkName string) string { t.Helper() requireDockerDaemon(t) ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout) defer cancel() cmd := exec.CommandContext(ctx, "docker", "inspect", "--format", "{{json .NetworkSettings.Networks}}", containerID) output, err := cmd.CombinedOutput() if err != nil { t.Fatalf("integration harness: docker inspect networks %s: %v; output:\n%s", containerID, err, strings.TrimSpace(string(output))) } var networks map[string]struct { IPAddress string `json:"IPAddress"` } if err := json.Unmarshal(output, &networks); err != nil { t.Fatalf("integration harness: parse network json for %s: %v; payload=%s", containerID, err, strings.TrimSpace(string(output))) } if entry, ok := networks[networkName]; ok { return entry.IPAddress } return "" } // WaitForEngineHealthz polls the engine `/healthz` on port 8080 until // it returns 200 or the timeout fires. On macOS the docker bridge IP is // not routable from the host, so the helper falls back to a transient // `busybox` probe container on the same docker network. On Linux it // dials the bridge IP directly. func WaitForEngineHealthz(t testing.TB, ip string, timeout time.Duration) { t.Helper() if ip == "" { t.Fatalf("integration harness: empty engine ip") } if timeout <= 0 { timeout = containerHealthzTimeout } if dialFromHost(ip, containerHealthzPort, 500*time.Millisecond) { waitForHealthzFromHost(t, ip, timeout) return } network, hostname := containerNetworkAndHostname(t, ip) if network == "" || hostname == "" { t.Fatalf("integration harness: cannot resolve docker network/hostname for engine ip %s", ip) } waitForHealthzViaProbe(t, network, hostname, timeout) } // dialFromHost reports whether tcp connect to ip:port succeeds within // timeout. Used to detect the macOS routing limitation cheaply. func dialFromHost(ip string, port int, timeout time.Duration) bool { conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip, fmt.Sprintf("%d", port)), timeout) if err != nil { return false } _ = conn.Close() return true } func waitForHealthzFromHost(t testing.TB, ip string, timeout time.Duration) { t.Helper() url := fmt.Sprintf("http://%s/healthz", net.JoinHostPort(ip, fmt.Sprintf("%d", containerHealthzPort))) client := &http.Client{ Timeout: 500 * time.Millisecond, Transport: &http.Transport{DisableKeepAlives: true}, } t.Cleanup(client.CloseIdleConnections) deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { req, err := http.NewRequest(http.MethodGet, url, nil) if err != nil { t.Fatalf("integration harness: build healthz request for %s: %v", url, err) } resp, err := client.Do(req) if err == nil { resp.Body.Close() if resp.StatusCode == http.StatusOK { return } } time.Sleep(containerHealthzPoll) } t.Fatalf("integration harness: engine /healthz on %s did not return 200 within %s", url, timeout) } // containerNetworkAndHostname locates the bridge network and engine // container hostname behind the given IP so the busybox probe can use // the docker DNS name rather than rely on host routing. The lookup is // scoped to RTM-owned containers (`com.galaxy.owner=rtmanager`). func containerNetworkAndHostname(t testing.TB, ip string) (string, string) { t.Helper() requireDockerDaemon(t) ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout) defer cancel() cmd := exec.CommandContext(ctx, "docker", "ps", "-aq", "--no-trunc", "--filter", "label=com.galaxy.owner=rtmanager", ) output, err := cmd.CombinedOutput() if err != nil { t.Fatalf("integration harness: docker ps for engine probe: %v; output:\n%s", err, strings.TrimSpace(string(output))) } for _, id := range strings.Split(strings.TrimSpace(string(output)), "\n") { id = strings.TrimSpace(id) if id == "" { continue } ipsByNetwork, hostname, ok := inspectIPAndHostname(t, id) if !ok { continue } for networkName, networkIP := range ipsByNetwork { if networkIP == ip { return networkName, hostname } } } return "", "" } func inspectIPAndHostname(t testing.TB, containerID string) (map[string]string, string, bool) { t.Helper() ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout) defer cancel() cmd := exec.CommandContext(ctx, "docker", "inspect", "--format", "{{json .NetworkSettings.Networks}}|{{.Config.Hostname}}", containerID) output, err := cmd.CombinedOutput() if err != nil { return nil, "", false } parts := strings.SplitN(strings.TrimSpace(string(output)), "|", 2) if len(parts) != 2 { return nil, "", false } var networks map[string]struct { IPAddress string `json:"IPAddress"` } if err := json.Unmarshal([]byte(parts[0]), &networks); err != nil { return nil, "", false } ipsByNetwork := make(map[string]string, len(networks)) for name, entry := range networks { ipsByNetwork[name] = entry.IPAddress } return ipsByNetwork, parts[1], true } // waitForHealthzViaProbe runs `wget -qO- http://:8080/healthz` // inside a transient busybox container on networkName until the probe // exits 0 or the timeout fires. func waitForHealthzViaProbe(t testing.TB, networkName, hostname string, timeout time.Duration) { t.Helper() deadline := time.Now().Add(timeout) url := fmt.Sprintf("http://%s:%d/healthz", hostname, containerHealthzPort) for time.Now().Before(deadline) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) cmd := exec.CommandContext(ctx, "docker", "run", "--rm", "--network", networkName, "busybox:stable", "wget", "-qO-", url, ) out, err := cmd.CombinedOutput() cancel() if err == nil && strings.Contains(string(out), "ok") { return } time.Sleep(containerHealthzPoll) } t.Fatalf("integration harness: engine /healthz on %s did not return 200 via probe within %s", url, timeout) } func uniqueSuffix(t testing.TB) string { t.Helper() buf := make([]byte, 4) if _, err := rand.Read(buf); err != nil { t.Fatalf("integration harness: read random suffix: %v", err) } return hex.EncodeToString(buf) }