Files
Ilia Denisov cf41be9eff fix: mock /healthz in runtime service e2e test
TestServiceStartGameEndToEnd's httptest server had no handler for
/healthz, the path engineclient.Healthz probes after a runtime
container starts. Without it the runtime never transitions out of
starting state and the test fails on its 5s deadline. Surfaced by
introducing CI that runs the backend service tests outside the
integration harness.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-07 08:24:25 +02:00

301 lines
9.2 KiB
Go

package runtime_test
import (
"context"
"database/sql"
"encoding/json"
"net/http"
"net/http/httptest"
"net/url"
"sync"
"testing"
"time"
"galaxy/backend/internal/config"
"galaxy/backend/internal/dockerclient"
"galaxy/backend/internal/engineclient"
backendpg "galaxy/backend/internal/postgres"
"galaxy/backend/internal/runtime"
"galaxy/model/rest"
pgshared "galaxy/postgres"
"github.com/google/uuid"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
"go.uber.org/zap/zaptest"
)
const (
pgImage = "postgres:16-alpine"
pgUser = "galaxy"
pgPassword = "galaxy"
pgDatabase = "galaxy_backend"
pgSchema = "backend"
pgStartup = 90 * time.Second
pgOpTO = 10 * time.Second
)
func dsnWithSearchPath(raw, schema string) (string, error) {
parsed, err := url.Parse(raw)
if err != nil {
return "", err
}
q := parsed.Query()
q.Set("search_path", schema)
parsed.RawQuery = q.Encode()
return parsed.String(), nil
}
func startPostgres(t *testing.T) *sql.DB {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
t.Cleanup(cancel)
container, err := tcpostgres.Run(ctx, pgImage,
tcpostgres.WithDatabase(pgDatabase),
tcpostgres.WithUsername(pgUser),
tcpostgres.WithPassword(pgPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(pgStartup),
),
)
if err != nil {
t.Skipf("postgres testcontainer unavailable, skipping: %v", err)
}
t.Cleanup(func() {
if termErr := testcontainers.TerminateContainer(container); termErr != nil {
t.Errorf("terminate postgres container: %v", termErr)
}
})
baseDSN, err := container.ConnectionString(ctx, "sslmode=disable")
if err != nil {
t.Fatalf("connection string: %v", err)
}
scopedDSN, err := dsnWithSearchPath(baseDSN, pgSchema)
if err != nil {
t.Fatalf("scope dsn: %v", err)
}
cfg := pgshared.DefaultConfig()
cfg.PrimaryDSN = scopedDSN
cfg.OperationTimeout = pgOpTO
db, err := pgshared.OpenPrimary(ctx, cfg, backendpg.NoObservabilityOptions()...)
if err != nil {
t.Fatalf("open primary: %v", err)
}
t.Cleanup(func() { _ = db.Close() })
if err := backendpg.ApplyMigrations(ctx, db); err != nil {
t.Fatalf("apply migrations: %v", err)
}
return db
}
// fakeDocker implements dockerclient.Client for tests.
type fakeDocker struct {
mu sync.Mutex
runs []dockerclient.RunSpec
stoppedIDs []string
removedIDs []string
listResult []dockerclient.ContainerSummary
endpointFor func(spec dockerclient.RunSpec) string
}
func (f *fakeDocker) EnsureNetwork(_ context.Context, _ string) error { return nil }
func (f *fakeDocker) PullImage(_ context.Context, _ string, _ dockerclient.PullPolicy) error {
return nil
}
func (f *fakeDocker) InspectImage(_ context.Context, ref string) (dockerclient.ImageInspect, error) {
return dockerclient.ImageInspect{Ref: ref}, nil
}
func (f *fakeDocker) InspectContainer(_ context.Context, _ string) (dockerclient.ContainerInspect, error) {
return dockerclient.ContainerInspect{}, nil
}
func (f *fakeDocker) Run(_ context.Context, spec dockerclient.RunSpec) (dockerclient.RunResult, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.runs = append(f.runs, spec)
endpoint := "http://" + spec.Hostname + ":8080"
if f.endpointFor != nil {
endpoint = f.endpointFor(spec)
}
return dockerclient.RunResult{
ContainerID: "container-" + spec.Name,
EngineEndpoint: endpoint,
StartedAt: time.Now().UTC(),
}, nil
}
func (f *fakeDocker) Stop(_ context.Context, id string, _ int) error {
f.mu.Lock()
f.stoppedIDs = append(f.stoppedIDs, id)
f.mu.Unlock()
return nil
}
func (f *fakeDocker) Remove(_ context.Context, id string) error {
f.mu.Lock()
f.removedIDs = append(f.removedIDs, id)
f.mu.Unlock()
return nil
}
func (f *fakeDocker) List(_ context.Context, _ dockerclient.ListFilter) ([]dockerclient.ContainerSummary, error) {
return f.listResult, nil
}
// fakeLobbyConsumer captures runtime → lobby callbacks.
type fakeLobbyConsumer struct {
mu sync.Mutex
snapshots []runtime.LobbySnapshot
jobs []runtime.JobResult
}
func (f *fakeLobbyConsumer) OnRuntimeSnapshot(_ context.Context, _ uuid.UUID, snapshot runtime.LobbySnapshot) error {
f.mu.Lock()
defer f.mu.Unlock()
f.snapshots = append(f.snapshots, snapshot)
return nil
}
func (f *fakeLobbyConsumer) OnRuntimeJobResult(_ context.Context, _ uuid.UUID, result runtime.JobResult) error {
f.mu.Lock()
defer f.mu.Unlock()
f.jobs = append(f.jobs, result)
return nil
}
func TestServiceStartGameEndToEnd(t *testing.T) {
if testing.Short() {
t.Skip("postgres-backed test skipped in -short")
}
ctx := context.Background()
db := startPostgres(t)
gameID := uuid.New()
userID := uuid.New()
if _, err := db.ExecContext(ctx, `
INSERT INTO backend.games (
game_id, owner_user_id, visibility, status, game_name, description,
min_players, max_players, start_gap_hours, start_gap_players,
enrollment_ends_at, turn_schedule, target_engine_version,
runtime_snapshot
) VALUES ($1, NULL, 'public', 'starting', 'test-game', '',
1, 4, 0, 0, $2, '*/5 * * * *', '0.1.0', '{}'::jsonb)
`, gameID, time.Now().Add(time.Hour)); err != nil {
t.Fatalf("insert game: %v", err)
}
if _, err := db.ExecContext(ctx, `
INSERT INTO backend.memberships (membership_id, game_id, user_id, race_name, canonical_key, status)
VALUES ($1, $2, $3, 'Alpha', 'alpha', 'active')
`, uuid.New(), gameID, userID); err != nil {
t.Fatalf("insert membership: %v", err)
}
if _, err := db.ExecContext(ctx, `
INSERT INTO backend.engine_versions (version, image_ref, enabled)
VALUES ('0.1.0', 'galaxy-game:0.1.0', true)
`); err != nil {
t.Fatalf("insert engine version: %v", err)
}
engineSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
switch r.URL.Path {
case "/healthz":
w.WriteHeader(http.StatusOK)
case "/api/v1/admin/init":
_ = json.NewEncoder(w).Encode(rest.StateResponse{ID: gameID, Turn: 0, Players: []rest.PlayerState{{RaceName: "Alpha", Planets: 3, Population: 10}}})
case "/api/v1/admin/status":
_ = json.NewEncoder(w).Encode(rest.StateResponse{ID: gameID, Turn: 1, Players: []rest.PlayerState{{RaceName: "Alpha", Planets: 5, Population: 12}}})
case "/api/v1/admin/turn":
_ = json.NewEncoder(w).Encode(rest.StateResponse{ID: gameID, Turn: 2, Players: []rest.PlayerState{{RaceName: "Alpha", Planets: 6, Population: 14}}, Finished: true})
default:
http.NotFound(w, r)
}
}))
t.Cleanup(engineSrv.Close)
docker := &fakeDocker{endpointFor: func(_ dockerclient.RunSpec) string { return engineSrv.URL }}
engineCli, err := engineclient.NewClientWithHTTP(engineclient.Config{CallTimeout: time.Second, ProbeTimeout: time.Second}, engineSrv.Client())
if err != nil {
t.Fatalf("engineclient: %v", err)
}
store := runtime.NewStore(db)
cache := runtime.NewCache()
if err := cache.Warm(ctx, store); err != nil {
t.Fatalf("warm cache: %v", err)
}
versions := runtime.NewEngineVersionService(store, cache, nil)
consumer := &fakeLobbyConsumer{}
svc, err := runtime.NewService(runtime.Deps{
Store: store,
Cache: cache,
EngineVersions: versions,
Docker: docker,
Engine: engineCli,
Lobby: consumer,
DockerNetwork: "galaxy",
HostStateRoot: t.TempDir(),
Config: config.RuntimeConfig{
WorkerPoolSize: 1,
JobQueueSize: 4,
ReconcileInterval: time.Hour,
ImagePullPolicy: "if_missing",
ContainerLogDriver: "json-file",
ContainerCPUQuota: 1.0,
ContainerMemory: "128m",
ContainerPIDsLimit: 64,
ContainerStateMount: "/var/lib/galaxy-game",
StopGracePeriod: time.Second,
},
Logger: zaptest.NewLogger(t),
})
if err != nil {
t.Fatalf("NewService: %v", err)
}
// Drive StartGame; the worker pool is not running so we invoke
// the worker entry directly through the public API. StartGame
// enqueues; we drain by calling Workers().Run in a goroutine and
// shutting it down once we observe the side effects.
pool := svc.Workers()
runCtx, runCancel := context.WithCancel(ctx)
t.Cleanup(runCancel)
go func() { _ = pool.Run(runCtx) }()
if err := svc.StartGame(ctx, gameID); err != nil {
t.Fatalf("StartGame: %v", err)
}
deadline := time.Now().Add(5 * time.Second)
for time.Now().Before(deadline) {
rec, err := svc.GetRuntime(ctx, gameID)
if err == nil && rec.Status == runtime.RuntimeStatusRunning {
break
}
time.Sleep(50 * time.Millisecond)
}
rec, err := svc.GetRuntime(ctx, gameID)
if err != nil {
t.Fatalf("GetRuntime: %v", err)
}
if rec.Status != runtime.RuntimeStatusRunning {
t.Fatalf("runtime status = %s, want running", rec.Status)
}
if rec.CurrentImageRef != "galaxy-game:0.1.0" {
t.Fatalf("image_ref = %s", rec.CurrentImageRef)
}
consumer.mu.Lock()
snapshotCount := len(consumer.snapshots)
consumer.mu.Unlock()
if snapshotCount == 0 {
t.Fatalf("expected runtime snapshot")
}
mappings, err := store.ListPlayerMappingsForGame(ctx, gameID)
if err != nil {
t.Fatalf("ListPlayerMappingsForGame: %v", err)
}
if len(mappings) != 1 || mappings[0].UserID != userID {
t.Fatalf("unexpected mappings: %+v", mappings)
}
}