feat: runtime manager
This commit is contained in:
@@ -0,0 +1,289 @@
|
||||
package harness
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
dockerNetworkPrefix = "lobbyrtm-it-"
|
||||
dockerNetworkTimeout = 30 * time.Second
|
||||
dockerCLITimeout = 30 * time.Second
|
||||
|
||||
containerHealthzPort = 8080
|
||||
containerHealthzTimeout = 5 * time.Second
|
||||
containerHealthzPoll = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
// EnsureDockerNetwork creates a uniquely-named Docker bridge network
|
||||
// for the caller's test and registers cleanup. Each test gets its own
|
||||
// network so concurrent scenarios cannot collide on the per-game DNS
|
||||
// hostname (`galaxy-game-{game_id}`). The helper skips the test when
|
||||
// no Docker daemon is reachable.
|
||||
func EnsureDockerNetwork(t testing.TB) string {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
|
||||
name := dockerNetworkPrefix + uniqueSuffix(t)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerNetworkTimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "network", "create", "--driver", "bridge", name)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: create docker network %q: %v; output:\n%s",
|
||||
name, err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
|
||||
t.Cleanup(func() {
|
||||
cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), dockerNetworkTimeout)
|
||||
defer cleanupCancel()
|
||||
removeCmd := exec.CommandContext(cleanupCtx, "docker", "network", "rm", name)
|
||||
if rmErr := removeCmd.Run(); rmErr != nil {
|
||||
t.Logf("integration harness: remove docker network %q: %v", name, rmErr)
|
||||
}
|
||||
})
|
||||
return name
|
||||
}
|
||||
|
||||
// FindContainerIDByLabel returns the id of the single running container
|
||||
// labelled with the given game id, or an empty string when no match is
|
||||
// found. The label keys are the ones rtmanager attaches at start time
|
||||
// (`com.galaxy.owner=rtmanager`, `com.galaxy.game_id=<gameID>`).
|
||||
func FindContainerIDByLabel(t testing.TB, gameID string) string {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "ps", "-aq", "--no-trunc",
|
||||
"--filter", "label=com.galaxy.owner=rtmanager",
|
||||
"--filter", "label=com.galaxy.game_id="+gameID,
|
||||
)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: docker ps for game %s: %v; output:\n%s",
|
||||
gameID, err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
id := strings.TrimSpace(string(output))
|
||||
if id == "" {
|
||||
return ""
|
||||
}
|
||||
if strings.Contains(id, "\n") {
|
||||
t.Fatalf("integration harness: multiple containers for game %s:\n%s", gameID, id)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// ContainerState returns the runtime state string (e.g. `running`,
|
||||
// `exited`) of the container with the given id, looked up via
|
||||
// `docker inspect`.
|
||||
func ContainerState(t testing.TB, containerID string) string {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "inspect", "--format", "{{.State.Status}}", containerID)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: docker inspect %s: %v; output:\n%s",
|
||||
containerID, err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
return strings.TrimSpace(string(output))
|
||||
}
|
||||
|
||||
// ContainerNetworkIP returns the IPv4 address of the named container
|
||||
// inside the named bridge network. Returns an empty string when the
|
||||
// container has no endpoint on that network.
|
||||
func ContainerNetworkIP(t testing.TB, containerID, networkName string) string {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "inspect", "--format", "{{json .NetworkSettings.Networks}}", containerID)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: docker inspect networks %s: %v; output:\n%s",
|
||||
containerID, err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
var networks map[string]struct {
|
||||
IPAddress string `json:"IPAddress"`
|
||||
}
|
||||
if err := json.Unmarshal(output, &networks); err != nil {
|
||||
t.Fatalf("integration harness: parse network json for %s: %v; payload=%s",
|
||||
containerID, err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
if entry, ok := networks[networkName]; ok {
|
||||
return entry.IPAddress
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// WaitForEngineHealthz polls the engine `/healthz` on port 8080 until
|
||||
// it returns 200 or the timeout fires. On macOS the docker bridge IP is
|
||||
// not routable from the host, so the helper falls back to a transient
|
||||
// `busybox` probe container on the same docker network. On Linux it
|
||||
// dials the bridge IP directly.
|
||||
func WaitForEngineHealthz(t testing.TB, ip string, timeout time.Duration) {
|
||||
t.Helper()
|
||||
if ip == "" {
|
||||
t.Fatalf("integration harness: empty engine ip")
|
||||
}
|
||||
if timeout <= 0 {
|
||||
timeout = containerHealthzTimeout
|
||||
}
|
||||
|
||||
if dialFromHost(ip, containerHealthzPort, 500*time.Millisecond) {
|
||||
waitForHealthzFromHost(t, ip, timeout)
|
||||
return
|
||||
}
|
||||
|
||||
network, hostname := containerNetworkAndHostname(t, ip)
|
||||
if network == "" || hostname == "" {
|
||||
t.Fatalf("integration harness: cannot resolve docker network/hostname for engine ip %s", ip)
|
||||
}
|
||||
waitForHealthzViaProbe(t, network, hostname, timeout)
|
||||
}
|
||||
|
||||
// dialFromHost reports whether tcp connect to ip:port succeeds within
|
||||
// timeout. Used to detect the macOS routing limitation cheaply.
|
||||
func dialFromHost(ip string, port int, timeout time.Duration) bool {
|
||||
conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip, fmt.Sprintf("%d", port)), timeout)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_ = conn.Close()
|
||||
return true
|
||||
}
|
||||
|
||||
func waitForHealthzFromHost(t testing.TB, ip string, timeout time.Duration) {
|
||||
t.Helper()
|
||||
url := fmt.Sprintf("http://%s/healthz", net.JoinHostPort(ip, fmt.Sprintf("%d", containerHealthzPort)))
|
||||
client := &http.Client{
|
||||
Timeout: 500 * time.Millisecond,
|
||||
Transport: &http.Transport{DisableKeepAlives: true},
|
||||
}
|
||||
t.Cleanup(client.CloseIdleConnections)
|
||||
|
||||
deadline := time.Now().Add(timeout)
|
||||
for time.Now().Before(deadline) {
|
||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: build healthz request for %s: %v", url, err)
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err == nil {
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
return
|
||||
}
|
||||
}
|
||||
time.Sleep(containerHealthzPoll)
|
||||
}
|
||||
t.Fatalf("integration harness: engine /healthz on %s did not return 200 within %s", url, timeout)
|
||||
}
|
||||
|
||||
// containerNetworkAndHostname locates the bridge network and engine
|
||||
// container hostname behind the given IP so the busybox probe can use
|
||||
// the docker DNS name rather than rely on host routing. The lookup is
|
||||
// scoped to RTM-owned containers (`com.galaxy.owner=rtmanager`).
|
||||
func containerNetworkAndHostname(t testing.TB, ip string) (string, string) {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "ps", "-aq", "--no-trunc",
|
||||
"--filter", "label=com.galaxy.owner=rtmanager",
|
||||
)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: docker ps for engine probe: %v; output:\n%s", err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
for _, id := range strings.Split(strings.TrimSpace(string(output)), "\n") {
|
||||
id = strings.TrimSpace(id)
|
||||
if id == "" {
|
||||
continue
|
||||
}
|
||||
ipsByNetwork, hostname, ok := inspectIPAndHostname(t, id)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for networkName, networkIP := range ipsByNetwork {
|
||||
if networkIP == ip {
|
||||
return networkName, hostname
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
|
||||
func inspectIPAndHostname(t testing.TB, containerID string) (map[string]string, string, bool) {
|
||||
t.Helper()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "inspect", "--format",
|
||||
"{{json .NetworkSettings.Networks}}|{{.Config.Hostname}}", containerID)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return nil, "", false
|
||||
}
|
||||
parts := strings.SplitN(strings.TrimSpace(string(output)), "|", 2)
|
||||
if len(parts) != 2 {
|
||||
return nil, "", false
|
||||
}
|
||||
var networks map[string]struct {
|
||||
IPAddress string `json:"IPAddress"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(parts[0]), &networks); err != nil {
|
||||
return nil, "", false
|
||||
}
|
||||
ipsByNetwork := make(map[string]string, len(networks))
|
||||
for name, entry := range networks {
|
||||
ipsByNetwork[name] = entry.IPAddress
|
||||
}
|
||||
return ipsByNetwork, parts[1], true
|
||||
}
|
||||
|
||||
// waitForHealthzViaProbe runs `wget -qO- http://<hostname>:8080/healthz`
|
||||
// inside a transient busybox container on networkName until the probe
|
||||
// exits 0 or the timeout fires.
|
||||
func waitForHealthzViaProbe(t testing.TB, networkName, hostname string, timeout time.Duration) {
|
||||
t.Helper()
|
||||
deadline := time.Now().Add(timeout)
|
||||
url := fmt.Sprintf("http://%s:%d/healthz", hostname, containerHealthzPort)
|
||||
for time.Now().Before(deadline) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
cmd := exec.CommandContext(ctx, "docker", "run", "--rm",
|
||||
"--network", networkName,
|
||||
"busybox:stable",
|
||||
"wget", "-qO-", url,
|
||||
)
|
||||
out, err := cmd.CombinedOutput()
|
||||
cancel()
|
||||
if err == nil && strings.Contains(string(out), "ok") {
|
||||
return
|
||||
}
|
||||
time.Sleep(containerHealthzPoll)
|
||||
}
|
||||
t.Fatalf("integration harness: engine /healthz on %s did not return 200 via probe within %s", url, timeout)
|
||||
}
|
||||
|
||||
func uniqueSuffix(t testing.TB) string {
|
||||
t.Helper()
|
||||
buf := make([]byte, 4)
|
||||
if _, err := rand.Read(buf); err != nil {
|
||||
t.Fatalf("integration harness: read random suffix: %v", err)
|
||||
}
|
||||
return hex.EncodeToString(buf)
|
||||
}
|
||||
Reference in New Issue
Block a user