399 lines
12 KiB
Go
399 lines
12 KiB
Go
package harness
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"log/slog"
|
|
"net/url"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"galaxy/postgres"
|
|
"galaxy/redisconn"
|
|
"galaxy/rtmanager/internal/app"
|
|
"galaxy/rtmanager/internal/config"
|
|
|
|
"github.com/redis/go-redis/v9"
|
|
)
|
|
|
|
// Default stream key shapes used by the integration suite. They match
|
|
// the production defaults so the wire shapes asserted in `streams.go`
|
|
// are identical to what Game Lobby sees in `integration/lobbyrtm`.
|
|
const (
|
|
StartJobsStream = "runtime:start_jobs"
|
|
StopJobsStream = "runtime:stop_jobs"
|
|
JobResultsStream = "runtime:job_results"
|
|
HealthEventsStream = "runtime:health_events"
|
|
NotificationIntentsKey = "notification:intents"
|
|
gameStateRootSubdir = "game-state"
|
|
listenAddr = "127.0.0.1:0"
|
|
listenerWaitTimeout = 10 * time.Second
|
|
readyzPollInterval = 25 * time.Millisecond
|
|
cleanupShutdownTimeout = 30 * time.Second
|
|
)
|
|
|
|
// Env carries everything one integration scenario needs to drive the
|
|
// Runtime Manager process. The struct is value-typed so tests reach
|
|
// fields without intermediate getters.
|
|
type Env struct {
|
|
// Cfg is the resolved Runtime Manager configuration handed to
|
|
// `app.NewRuntime`. Tests inspect it for stream key shapes,
|
|
// container defaults, and timeout knobs.
|
|
Cfg config.Config
|
|
|
|
// Runtime is the in-process Runtime Manager exposed for tests that
|
|
// need to peek at internal state (`runtime.InternalServer().Addr()`).
|
|
Runtime *app.Runtime
|
|
|
|
// Postgres holds the per-package PostgreSQL fixture.
|
|
Postgres *PostgresEnv
|
|
|
|
// Redis holds the per-package Redis fixture plus a fresh client the
|
|
// test owns.
|
|
Redis *RedisEnv
|
|
RedisClient *redis.Client
|
|
|
|
// Docker holds the per-package Docker daemon handle.
|
|
Docker *DockerEnv
|
|
|
|
// Lobby is the per-test stub HTTP server.
|
|
Lobby *LobbyStub
|
|
|
|
// Network is the unique Docker network name created for this test.
|
|
Network string
|
|
|
|
// EngineImageRef and PatchedImageRef are the two semver-compatible
|
|
// engine image tags the harness builds once per package. Patch
|
|
// scenarios point at the second tag.
|
|
EngineImageRef string
|
|
PatchedImageRef string
|
|
|
|
// GameStateRoot is the host filesystem path RTM writes per-game
|
|
// state directories under. It lives inside `t.ArtifactDir()` so
|
|
// failed scenarios leave the engine state behind for inspection.
|
|
GameStateRoot string
|
|
|
|
// InternalAddr is the bound address of RTM's internal HTTP listener
|
|
// (resolved after Run binds the port).
|
|
InternalAddr string
|
|
}
|
|
|
|
// EnvOptions carry per-test overrides to the harness defaults. Empty
|
|
// fields fall back to the defaults declared at the top of this file.
|
|
type EnvOptions struct {
|
|
// ReconcileInterval overrides the periodic reconciler interval.
|
|
// Default 500ms (so reconcile drift is observable inside a single
|
|
// scenario timeout).
|
|
ReconcileInterval time.Duration
|
|
|
|
// CleanupInterval overrides the container-cleanup interval.
|
|
CleanupInterval time.Duration
|
|
|
|
// InspectInterval overrides the Docker inspect worker interval.
|
|
InspectInterval time.Duration
|
|
|
|
// ProbeInterval / ProbeTimeout / ProbeFailuresThreshold override
|
|
// the active engine probe knobs.
|
|
ProbeInterval time.Duration
|
|
ProbeTimeout time.Duration
|
|
ProbeFailuresThreshold int
|
|
|
|
// GameLeaseTTL overrides the per-game Redis lease TTL.
|
|
GameLeaseTTL time.Duration
|
|
|
|
// StreamBlockTimeout overrides the consumer XREAD block window.
|
|
StreamBlockTimeout time.Duration
|
|
|
|
// LogToStderr makes the harness write the runtime's structured
|
|
// logs to stderr; the default discards them so test output stays
|
|
// focused on assertions.
|
|
LogToStderr bool
|
|
}
|
|
|
|
// NewEnv stands up a fresh Runtime Manager process for the calling
|
|
// test. It blocks until the internal HTTP listener is bound; tests can
|
|
// issue REST and stream requests immediately after the call returns.
|
|
//
|
|
// `t.Cleanup` runs in reverse order: stop the runtime, close the
|
|
// runtime, close the per-test redis client, remove the docker network,
|
|
// terminate the lobby stub. Containers RTM created during the test are
|
|
// removed by the test's own cleanup paths or by the integration
|
|
// `health_test` external-action helpers.
|
|
func NewEnv(t *testing.T, opts EnvOptions) *Env {
|
|
t.Helper()
|
|
|
|
pg := EnsurePostgres(t)
|
|
rd := EnsureRedis(t)
|
|
dk := EnsureDocker(t)
|
|
imageRef := EnsureEngineImage(t)
|
|
TruncatePostgres(t)
|
|
FlushRedis(t)
|
|
network := EnsureNetwork(t)
|
|
lobby := NewLobbyStub(t)
|
|
stateRoot := stateRoot(t)
|
|
|
|
cfg := buildConfig(buildConfigInput{
|
|
PostgresDSN: pg.DSN(),
|
|
RedisAddr: rd.Addr(),
|
|
DockerHost: resolveDockerHost(),
|
|
Network: network,
|
|
LobbyURL: lobby.URL(),
|
|
GameStateRoot: stateRoot,
|
|
ReconcileInterval: pickDuration(opts.ReconcileInterval, 500*time.Millisecond),
|
|
CleanupInterval: pickDuration(opts.CleanupInterval, 500*time.Millisecond),
|
|
InspectInterval: pickDuration(opts.InspectInterval, 500*time.Millisecond),
|
|
ProbeInterval: pickDuration(opts.ProbeInterval, 500*time.Millisecond),
|
|
ProbeTimeout: pickDuration(opts.ProbeTimeout, time.Second),
|
|
ProbeFailures: pickInt(opts.ProbeFailuresThreshold, 2),
|
|
GameLeaseTTL: pickDuration(opts.GameLeaseTTL, 5*time.Second),
|
|
StreamBlockTimeout: pickDuration(opts.StreamBlockTimeout, 200*time.Millisecond),
|
|
})
|
|
|
|
logger := newLogger(opts.LogToStderr)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
runtime, err := app.NewRuntime(ctx, cfg, logger)
|
|
if err != nil {
|
|
cancel()
|
|
t.Fatalf("rtmanager integration: new runtime: %v", err)
|
|
}
|
|
|
|
runDone := make(chan error, 1)
|
|
go func() {
|
|
runDone <- runtime.Run(ctx)
|
|
}()
|
|
|
|
internalAddr := waitForListener(t, runtime)
|
|
waitForReady(t, runtime, listenerWaitTimeout)
|
|
|
|
var cleanupOnce sync.Once
|
|
t.Cleanup(func() {
|
|
cleanupOnce.Do(func() {
|
|
cancel()
|
|
waitCtx, waitCancel := context.WithTimeout(context.Background(), cleanupShutdownTimeout)
|
|
defer waitCancel()
|
|
select {
|
|
case err := <-runDone:
|
|
if err != nil && !isCleanShutdownErr(err) {
|
|
t.Logf("rtmanager integration: runtime.Run returned: %v", err)
|
|
}
|
|
case <-waitCtx.Done():
|
|
t.Logf("rtmanager integration: runtime did not stop within %s", cleanupShutdownTimeout)
|
|
}
|
|
if err := runtime.Close(); err != nil {
|
|
t.Logf("rtmanager integration: runtime.Close: %v", err)
|
|
}
|
|
})
|
|
})
|
|
|
|
return &Env{
|
|
Cfg: cfg,
|
|
Runtime: runtime,
|
|
Postgres: pg,
|
|
Redis: rd,
|
|
RedisClient: rd.NewClient(t),
|
|
Docker: dk,
|
|
Lobby: lobby,
|
|
Network: network,
|
|
EngineImageRef: imageRef,
|
|
PatchedImageRef: PatchedEngineImageRef,
|
|
GameStateRoot: stateRoot,
|
|
InternalAddr: internalAddr,
|
|
}
|
|
}
|
|
|
|
type buildConfigInput struct {
|
|
PostgresDSN string
|
|
RedisAddr string
|
|
DockerHost string
|
|
Network string
|
|
LobbyURL string
|
|
GameStateRoot string
|
|
ReconcileInterval time.Duration
|
|
CleanupInterval time.Duration
|
|
InspectInterval time.Duration
|
|
ProbeInterval time.Duration
|
|
ProbeTimeout time.Duration
|
|
ProbeFailures int
|
|
GameLeaseTTL time.Duration
|
|
StreamBlockTimeout time.Duration
|
|
}
|
|
|
|
func buildConfig(in buildConfigInput) config.Config {
|
|
cfg := config.DefaultConfig()
|
|
cfg.InternalHTTP.Addr = listenAddr
|
|
|
|
cfg.Docker.Host = in.DockerHost
|
|
cfg.Docker.Network = in.Network
|
|
cfg.Docker.PullPolicy = config.ImagePullPolicyIfMissing
|
|
|
|
cfg.Postgres = config.PostgresConfig{
|
|
Conn: postgres.Config{
|
|
PrimaryDSN: in.PostgresDSN,
|
|
OperationTimeout: pgOperationTimeout,
|
|
MaxOpenConns: 5,
|
|
MaxIdleConns: 2,
|
|
ConnMaxLifetime: 30 * time.Minute,
|
|
},
|
|
}
|
|
|
|
cfg.Redis = config.RedisConfig{
|
|
Conn: redisconn.Config{
|
|
MasterAddr: in.RedisAddr,
|
|
Password: "integration",
|
|
OperationTimeout: 2 * time.Second,
|
|
},
|
|
}
|
|
|
|
cfg.Streams.StartJobs = StartJobsStream
|
|
cfg.Streams.StopJobs = StopJobsStream
|
|
cfg.Streams.JobResults = JobResultsStream
|
|
cfg.Streams.HealthEvents = HealthEventsStream
|
|
cfg.Streams.NotificationIntents = NotificationIntentsKey
|
|
cfg.Streams.BlockTimeout = in.StreamBlockTimeout
|
|
|
|
cfg.Container.GameStateRoot = in.GameStateRoot
|
|
// Pin chown target to the current process uid/gid; the dev sandbox
|
|
// (and unprivileged dev machines) cannot chown to root.
|
|
cfg.Container.GameStateOwnerUID = os.Getuid()
|
|
cfg.Container.GameStateOwnerGID = os.Getgid()
|
|
|
|
cfg.Health.InspectInterval = in.InspectInterval
|
|
cfg.Health.ProbeInterval = in.ProbeInterval
|
|
cfg.Health.ProbeTimeout = in.ProbeTimeout
|
|
cfg.Health.ProbeFailuresThreshold = in.ProbeFailures
|
|
|
|
cfg.Cleanup.ReconcileInterval = in.ReconcileInterval
|
|
cfg.Cleanup.CleanupInterval = in.CleanupInterval
|
|
|
|
cfg.Coordination.GameLeaseTTL = in.GameLeaseTTL
|
|
|
|
cfg.Lobby = config.LobbyConfig{
|
|
BaseURL: in.LobbyURL,
|
|
Timeout: 2 * time.Second,
|
|
}
|
|
|
|
cfg.Telemetry.TracesExporter = "none"
|
|
cfg.Telemetry.MetricsExporter = "none"
|
|
|
|
return cfg
|
|
}
|
|
|
|
func newLogger(toStderr bool) *slog.Logger {
|
|
if toStderr {
|
|
return slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}))
|
|
}
|
|
return slog.New(slog.NewTextHandler(io.Discard, &slog.HandlerOptions{Level: slog.LevelError}))
|
|
}
|
|
|
|
func stateRoot(t *testing.T) string {
|
|
t.Helper()
|
|
dir := t.ArtifactDir()
|
|
root := dir + string(os.PathSeparator) + gameStateRootSubdir
|
|
if err := os.MkdirAll(root, 0o755); err != nil {
|
|
t.Fatalf("rtmanager integration: create game-state root %q: %v", root, err)
|
|
}
|
|
return root
|
|
}
|
|
|
|
func resolveDockerHost() string {
|
|
if host := strings.TrimSpace(os.Getenv("DOCKER_HOST")); host != "" {
|
|
return host
|
|
}
|
|
return "unix:///var/run/docker.sock"
|
|
}
|
|
|
|
func pickDuration(value, fallback time.Duration) time.Duration {
|
|
if value > 0 {
|
|
return value
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
func pickInt(value, fallback int) int {
|
|
if value > 0 {
|
|
return value
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
// waitForListener spins until `runtime.InternalServer().Addr()` returns
|
|
// a non-empty value or the deadline fires. The internal listener binds
|
|
// during `runtime.Run`, which runs in its own goroutine; this helper
|
|
// is the bridge between "Run started" and "tests can use REST".
|
|
func waitForListener(t *testing.T, runtime *app.Runtime) string {
|
|
t.Helper()
|
|
deadline := time.Now().Add(listenerWaitTimeout)
|
|
for {
|
|
if runtime != nil && runtime.InternalServer() != nil {
|
|
if addr := runtime.InternalServer().Addr(); addr != "" {
|
|
return addr
|
|
}
|
|
}
|
|
if time.Now().After(deadline) {
|
|
t.Fatalf("rtmanager integration: internal HTTP listener did not bind within %s", listenerWaitTimeout)
|
|
}
|
|
time.Sleep(readyzPollInterval)
|
|
}
|
|
}
|
|
|
|
// waitForReady polls `/readyz` until it returns 200 or the deadline
|
|
// fires. RTM's readyz pings PG, Redis, and Docker; a successful
|
|
// response means every dependency is reachable through the runtime
|
|
// process.
|
|
func waitForReady(t *testing.T, runtime *app.Runtime, timeout time.Duration) {
|
|
t.Helper()
|
|
deadline := time.Now().Add(timeout)
|
|
addr := runtime.InternalServer().Addr()
|
|
probeURL := (&url.URL{Scheme: "http", Host: addr, Path: "/readyz"}).String()
|
|
for {
|
|
req, err := newRequest(context.Background(), "GET", probeURL, nil)
|
|
if err == nil {
|
|
resp, err := defaultHTTPClient.Do(req)
|
|
if err == nil {
|
|
_, _ = io.Copy(io.Discard, resp.Body)
|
|
_ = resp.Body.Close()
|
|
if resp.StatusCode == 200 {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
if time.Now().After(deadline) {
|
|
t.Fatalf("rtmanager integration: /readyz did not return 200 within %s", timeout)
|
|
}
|
|
time.Sleep(readyzPollInterval)
|
|
}
|
|
}
|
|
|
|
func isCleanShutdownErr(err error) bool {
|
|
return err == nil || errors.Is(err, context.Canceled)
|
|
}
|
|
|
|
// IDFromTestName builds a deterministic-but-unique game id from the
|
|
// caller's test name. Two tests with the same name running back-to-back
|
|
// would otherwise collide on PG state through the per-test
|
|
// `TruncatePostgres` window; pinning the suffix to `Now().UnixNano()`
|
|
// rules that out.
|
|
func IDFromTestName(t *testing.T) string {
|
|
t.Helper()
|
|
// The container hostname is `galaxy-game-{game_id}` and must fit
|
|
// HOST_NAME_MAX=64 chars; runc rejects longer values with
|
|
// "sethostname: invalid argument". Cap the lowercased test-name
|
|
// component at 36 chars and append a 16-char base36 suffix so the
|
|
// total stays comfortably under the limit (12 + 36 + 1 + 16 = 65 →
|
|
// trim further if needed).
|
|
const maxNameLen = 35
|
|
suffix := strconv.FormatInt(time.Now().UnixNano(), 36)
|
|
prefix := strings.ToLower(strings.NewReplacer("/", "-", " ", "-").Replace(t.Name()))
|
|
if len(prefix) > maxNameLen {
|
|
prefix = prefix[:maxNameLen]
|
|
}
|
|
return prefix + "-" + suffix
|
|
}
|