feat: runtime manager
This commit is contained in:
@@ -0,0 +1,289 @@
|
||||
package harness
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
dockerNetworkPrefix = "lobbyrtm-it-"
|
||||
dockerNetworkTimeout = 30 * time.Second
|
||||
dockerCLITimeout = 30 * time.Second
|
||||
|
||||
containerHealthzPort = 8080
|
||||
containerHealthzTimeout = 5 * time.Second
|
||||
containerHealthzPoll = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
// EnsureDockerNetwork creates a uniquely-named Docker bridge network
|
||||
// for the caller's test and registers cleanup. Each test gets its own
|
||||
// network so concurrent scenarios cannot collide on the per-game DNS
|
||||
// hostname (`galaxy-game-{game_id}`). The helper skips the test when
|
||||
// no Docker daemon is reachable.
|
||||
func EnsureDockerNetwork(t testing.TB) string {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
|
||||
name := dockerNetworkPrefix + uniqueSuffix(t)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerNetworkTimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "network", "create", "--driver", "bridge", name)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: create docker network %q: %v; output:\n%s",
|
||||
name, err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
|
||||
t.Cleanup(func() {
|
||||
cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), dockerNetworkTimeout)
|
||||
defer cleanupCancel()
|
||||
removeCmd := exec.CommandContext(cleanupCtx, "docker", "network", "rm", name)
|
||||
if rmErr := removeCmd.Run(); rmErr != nil {
|
||||
t.Logf("integration harness: remove docker network %q: %v", name, rmErr)
|
||||
}
|
||||
})
|
||||
return name
|
||||
}
|
||||
|
||||
// FindContainerIDByLabel returns the id of the single running container
|
||||
// labelled with the given game id, or an empty string when no match is
|
||||
// found. The label keys are the ones rtmanager attaches at start time
|
||||
// (`com.galaxy.owner=rtmanager`, `com.galaxy.game_id=<gameID>`).
|
||||
func FindContainerIDByLabel(t testing.TB, gameID string) string {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "ps", "-aq", "--no-trunc",
|
||||
"--filter", "label=com.galaxy.owner=rtmanager",
|
||||
"--filter", "label=com.galaxy.game_id="+gameID,
|
||||
)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: docker ps for game %s: %v; output:\n%s",
|
||||
gameID, err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
id := strings.TrimSpace(string(output))
|
||||
if id == "" {
|
||||
return ""
|
||||
}
|
||||
if strings.Contains(id, "\n") {
|
||||
t.Fatalf("integration harness: multiple containers for game %s:\n%s", gameID, id)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// ContainerState returns the runtime state string (e.g. `running`,
|
||||
// `exited`) of the container with the given id, looked up via
|
||||
// `docker inspect`.
|
||||
func ContainerState(t testing.TB, containerID string) string {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "inspect", "--format", "{{.State.Status}}", containerID)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: docker inspect %s: %v; output:\n%s",
|
||||
containerID, err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
return strings.TrimSpace(string(output))
|
||||
}
|
||||
|
||||
// ContainerNetworkIP returns the IPv4 address of the named container
|
||||
// inside the named bridge network. Returns an empty string when the
|
||||
// container has no endpoint on that network.
|
||||
func ContainerNetworkIP(t testing.TB, containerID, networkName string) string {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "inspect", "--format", "{{json .NetworkSettings.Networks}}", containerID)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: docker inspect networks %s: %v; output:\n%s",
|
||||
containerID, err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
var networks map[string]struct {
|
||||
IPAddress string `json:"IPAddress"`
|
||||
}
|
||||
if err := json.Unmarshal(output, &networks); err != nil {
|
||||
t.Fatalf("integration harness: parse network json for %s: %v; payload=%s",
|
||||
containerID, err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
if entry, ok := networks[networkName]; ok {
|
||||
return entry.IPAddress
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// WaitForEngineHealthz polls the engine `/healthz` on port 8080 until
|
||||
// it returns 200 or the timeout fires. On macOS the docker bridge IP is
|
||||
// not routable from the host, so the helper falls back to a transient
|
||||
// `busybox` probe container on the same docker network. On Linux it
|
||||
// dials the bridge IP directly.
|
||||
func WaitForEngineHealthz(t testing.TB, ip string, timeout time.Duration) {
|
||||
t.Helper()
|
||||
if ip == "" {
|
||||
t.Fatalf("integration harness: empty engine ip")
|
||||
}
|
||||
if timeout <= 0 {
|
||||
timeout = containerHealthzTimeout
|
||||
}
|
||||
|
||||
if dialFromHost(ip, containerHealthzPort, 500*time.Millisecond) {
|
||||
waitForHealthzFromHost(t, ip, timeout)
|
||||
return
|
||||
}
|
||||
|
||||
network, hostname := containerNetworkAndHostname(t, ip)
|
||||
if network == "" || hostname == "" {
|
||||
t.Fatalf("integration harness: cannot resolve docker network/hostname for engine ip %s", ip)
|
||||
}
|
||||
waitForHealthzViaProbe(t, network, hostname, timeout)
|
||||
}
|
||||
|
||||
// dialFromHost reports whether tcp connect to ip:port succeeds within
|
||||
// timeout. Used to detect the macOS routing limitation cheaply.
|
||||
func dialFromHost(ip string, port int, timeout time.Duration) bool {
|
||||
conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip, fmt.Sprintf("%d", port)), timeout)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_ = conn.Close()
|
||||
return true
|
||||
}
|
||||
|
||||
func waitForHealthzFromHost(t testing.TB, ip string, timeout time.Duration) {
|
||||
t.Helper()
|
||||
url := fmt.Sprintf("http://%s/healthz", net.JoinHostPort(ip, fmt.Sprintf("%d", containerHealthzPort)))
|
||||
client := &http.Client{
|
||||
Timeout: 500 * time.Millisecond,
|
||||
Transport: &http.Transport{DisableKeepAlives: true},
|
||||
}
|
||||
t.Cleanup(client.CloseIdleConnections)
|
||||
|
||||
deadline := time.Now().Add(timeout)
|
||||
for time.Now().Before(deadline) {
|
||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: build healthz request for %s: %v", url, err)
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err == nil {
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
return
|
||||
}
|
||||
}
|
||||
time.Sleep(containerHealthzPoll)
|
||||
}
|
||||
t.Fatalf("integration harness: engine /healthz on %s did not return 200 within %s", url, timeout)
|
||||
}
|
||||
|
||||
// containerNetworkAndHostname locates the bridge network and engine
|
||||
// container hostname behind the given IP so the busybox probe can use
|
||||
// the docker DNS name rather than rely on host routing. The lookup is
|
||||
// scoped to RTM-owned containers (`com.galaxy.owner=rtmanager`).
|
||||
func containerNetworkAndHostname(t testing.TB, ip string) (string, string) {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "ps", "-aq", "--no-trunc",
|
||||
"--filter", "label=com.galaxy.owner=rtmanager",
|
||||
)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("integration harness: docker ps for engine probe: %v; output:\n%s", err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
for _, id := range strings.Split(strings.TrimSpace(string(output)), "\n") {
|
||||
id = strings.TrimSpace(id)
|
||||
if id == "" {
|
||||
continue
|
||||
}
|
||||
ipsByNetwork, hostname, ok := inspectIPAndHostname(t, id)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for networkName, networkIP := range ipsByNetwork {
|
||||
if networkIP == ip {
|
||||
return networkName, hostname
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
|
||||
func inspectIPAndHostname(t testing.TB, containerID string) (map[string]string, string, bool) {
|
||||
t.Helper()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "inspect", "--format",
|
||||
"{{json .NetworkSettings.Networks}}|{{.Config.Hostname}}", containerID)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return nil, "", false
|
||||
}
|
||||
parts := strings.SplitN(strings.TrimSpace(string(output)), "|", 2)
|
||||
if len(parts) != 2 {
|
||||
return nil, "", false
|
||||
}
|
||||
var networks map[string]struct {
|
||||
IPAddress string `json:"IPAddress"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(parts[0]), &networks); err != nil {
|
||||
return nil, "", false
|
||||
}
|
||||
ipsByNetwork := make(map[string]string, len(networks))
|
||||
for name, entry := range networks {
|
||||
ipsByNetwork[name] = entry.IPAddress
|
||||
}
|
||||
return ipsByNetwork, parts[1], true
|
||||
}
|
||||
|
||||
// waitForHealthzViaProbe runs `wget -qO- http://<hostname>:8080/healthz`
|
||||
// inside a transient busybox container on networkName until the probe
|
||||
// exits 0 or the timeout fires.
|
||||
func waitForHealthzViaProbe(t testing.TB, networkName, hostname string, timeout time.Duration) {
|
||||
t.Helper()
|
||||
deadline := time.Now().Add(timeout)
|
||||
url := fmt.Sprintf("http://%s:%d/healthz", hostname, containerHealthzPort)
|
||||
for time.Now().Before(deadline) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
cmd := exec.CommandContext(ctx, "docker", "run", "--rm",
|
||||
"--network", networkName,
|
||||
"busybox:stable",
|
||||
"wget", "-qO-", url,
|
||||
)
|
||||
out, err := cmd.CombinedOutput()
|
||||
cancel()
|
||||
if err == nil && strings.Contains(string(out), "ok") {
|
||||
return
|
||||
}
|
||||
time.Sleep(containerHealthzPoll)
|
||||
}
|
||||
t.Fatalf("integration harness: engine /healthz on %s did not return 200 via probe within %s", url, timeout)
|
||||
}
|
||||
|
||||
func uniqueSuffix(t testing.TB) string {
|
||||
t.Helper()
|
||||
buf := make([]byte, 4)
|
||||
if _, err := rand.Read(buf); err != nil {
|
||||
t.Fatalf("integration harness: read random suffix: %v", err)
|
||||
}
|
||||
return hex.EncodeToString(buf)
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
package harness
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// EngineImageRef is the canonical tag the lobbyrtm boundary suite (and
|
||||
// any future suite that needs the galaxy/game engine binary) builds and
|
||||
// runs against. The `-lobbyrtm-it` suffix differs from the
|
||||
// `-rtm-it` tag the service-local rtmanager/integration harness uses, so
|
||||
// an operator running both suites locally cannot accidentally consume
|
||||
// the wrong image, and `docker image rm` of one suite's leftovers does
|
||||
// not remove the other suite's tag.
|
||||
const EngineImageRef = "galaxy/game:1.0.0-lobbyrtm-it"
|
||||
|
||||
const (
|
||||
imageBuildTimeout = 10 * time.Minute
|
||||
dockerDaemonPingTimeout = 5 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
engineImageOnce sync.Once
|
||||
engineImageErr error
|
||||
|
||||
dockerAvailableOnce sync.Once
|
||||
dockerAvailableErr error
|
||||
)
|
||||
|
||||
// RequireDockerDaemon skips the calling test when no Docker daemon is
|
||||
// reachable from this process. Suites that need Docker but stand up
|
||||
// testcontainers (Postgres/Redis) before any RTM-specific helper
|
||||
// should call this helper first so the skip path runs *before* the
|
||||
// testcontainer client probes the daemon and fails hard.
|
||||
func RequireDockerDaemon(t testing.TB) {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
}
|
||||
|
||||
// EnsureGalaxyGameImage builds the galaxy/game engine image from the
|
||||
// workspace root once per test process and returns the canonical tag.
|
||||
// On hosts without a reachable Docker daemon the helper calls `t.Skip`
|
||||
// so suites stay green when `/var/run/docker.sock` is missing and
|
||||
// `DOCKER_HOST` is unset.
|
||||
//
|
||||
// The build is wrapped in `sync.Once`; concurrent suite invocations
|
||||
// share the same image. The Dockerfile path and build context match
|
||||
// `rtmanager/integration/harness/docker.go::buildAndTagEngineImage` —
|
||||
// galaxy's `go.work` resolves `galaxy/{model,error,...}` only when the
|
||||
// workspace root is the build context.
|
||||
func EnsureGalaxyGameImage(t testing.TB) string {
|
||||
t.Helper()
|
||||
requireDockerDaemon(t)
|
||||
|
||||
engineImageOnce.Do(func() {
|
||||
engineImageErr = buildEngineImage()
|
||||
})
|
||||
if engineImageErr != nil {
|
||||
t.Fatalf("integration harness: build galaxy/game image: %v", engineImageErr)
|
||||
}
|
||||
return EngineImageRef
|
||||
}
|
||||
|
||||
func buildEngineImage() error {
|
||||
root, err := workspaceRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolve workspace root: %w", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), imageBuildTimeout)
|
||||
defer cancel()
|
||||
|
||||
dockerfilePath := filepath.Join("game", "Dockerfile")
|
||||
cmd := exec.CommandContext(ctx, "docker", "build",
|
||||
"-f", dockerfilePath,
|
||||
"-t", EngineImageRef,
|
||||
".",
|
||||
)
|
||||
cmd.Dir = root
|
||||
cmd.Env = append(os.Environ(), "DOCKER_BUILDKIT=1")
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("docker build (-f %s) in %s: %w; output:\n%s",
|
||||
dockerfilePath, root, err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// requireDockerDaemon skips the calling test when no Docker daemon is
|
||||
// reachable from this process. The check runs once per process and
|
||||
// caches the verdict so successive callers do not pay the ping cost.
|
||||
func requireDockerDaemon(t testing.TB) {
|
||||
t.Helper()
|
||||
dockerAvailableOnce.Do(func() {
|
||||
dockerAvailableErr = pingDockerDaemon()
|
||||
})
|
||||
if dockerAvailableErr != nil {
|
||||
t.Skipf("integration harness: docker daemon unavailable: %v", dockerAvailableErr)
|
||||
}
|
||||
}
|
||||
|
||||
func pingDockerDaemon() error {
|
||||
if os.Getenv("DOCKER_HOST") == "" {
|
||||
if _, err := os.Stat("/var/run/docker.sock"); err != nil {
|
||||
return fmt.Errorf("set DOCKER_HOST or expose /var/run/docker.sock: %w", err)
|
||||
}
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), dockerDaemonPingTimeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(ctx, "docker", "version", "--format", "{{.Server.Version}}")
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("docker version: %w; output:\n%s", err, strings.TrimSpace(string(output)))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// workspaceRoot resolves the absolute path of the galaxy/ workspace
|
||||
// root by anchoring on this file's location. The harness lives at
|
||||
// `galaxy/integration/internal/harness/engineimage.go`; the workspace
|
||||
// root is three directories up.
|
||||
func workspaceRoot() (string, error) {
|
||||
_, file, _, ok := runtime.Caller(0)
|
||||
if !ok {
|
||||
return "", errors.New("resolve runtime caller for workspace root")
|
||||
}
|
||||
dir := filepath.Dir(file)
|
||||
root := filepath.Clean(filepath.Join(dir, "..", "..", ".."))
|
||||
return root, nil
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
package harness
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// RTManagerServicePersistence captures the per-test persistence
|
||||
// dependencies of the Runtime Manager binary: a PostgreSQL container
|
||||
// hosting the `rtmanager` schema owned by the `rtmanagerservice` role,
|
||||
// plus the Redis credentials that point the service at the
|
||||
// caller-supplied master address.
|
||||
type RTManagerServicePersistence struct {
|
||||
// Postgres exposes the started container so tests that need direct
|
||||
// SQL access to the rtmanager schema can read or write through it.
|
||||
Postgres *PostgresRuntime
|
||||
|
||||
// Env carries the environment entries that must be passed to the
|
||||
// rtmanager process. It is safe to merge into the caller's existing
|
||||
// env map, or to use as-is and append further RTMANAGER_* knobs in
|
||||
// place. RTMANAGER_GAME_STATE_ROOT is intentionally omitted; the
|
||||
// caller supplies a per-test directory.
|
||||
Env map[string]string
|
||||
}
|
||||
|
||||
// StartRTManagerServicePersistence brings up one isolated PostgreSQL
|
||||
// container, provisions the `rtmanager` schema with the
|
||||
// `rtmanagerservice` role, and returns the environment entries that
|
||||
// wire the rtmanager binary at that container plus the supplied Redis
|
||||
// master address.
|
||||
//
|
||||
// The Redis password value matches the architectural rule that Redis
|
||||
// traffic is password-protected; miniredis accepts arbitrary password
|
||||
// values when its own RequireAuth is not engaged, and the same value
|
||||
// works against the real testcontainers Redis runtime.
|
||||
//
|
||||
// Cleanup of the container is handled by StartPostgresContainer through
|
||||
// `t.Cleanup`; callers do not need to defer anything.
|
||||
func StartRTManagerServicePersistence(t testing.TB, redisMasterAddr string) RTManagerServicePersistence {
|
||||
t.Helper()
|
||||
|
||||
rt := StartPostgresContainer(t)
|
||||
if err := rt.EnsureRoleAndSchema(context.Background(), "rtmanager", "rtmanagerservice", "rtmanagerservice"); err != nil {
|
||||
t.Fatalf("ensure rtmanager schema/role: %v", err)
|
||||
}
|
||||
|
||||
env := WithPostgres(rt, "RTMANAGER", "rtmanager", "rtmanagerservice")
|
||||
env["RTMANAGER_REDIS_MASTER_ADDR"] = redisMasterAddr
|
||||
env["RTMANAGER_REDIS_PASSWORD"] = "integration"
|
||||
return RTManagerServicePersistence{
|
||||
Postgres: rt,
|
||||
Env: env,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user