//go:build integration // Package integration_test owns the service-local end-to-end scenarios // for Runtime Manager. The build tag keeps the suite out of the // default `go test ./...` run; CI invokes the suite explicitly with // `go test -tags=integration ./rtmanager/integration/...`. // // Design rationale for the suite — build tag, in-process harness, // per-test isolation, two-tag engine image — lives in // `rtmanager/docs/integration-tests.md`. Each test stands up its own // Runtime Manager process via `harness.NewEnv`, drives the same // streams Game Lobby uses in `integration/lobbyrtm`, and asserts the // resulting PostgreSQL, Redis-stream, and Docker side-effects. package integration_test import ( "context" "net/http" "testing" "time" "galaxy/rtmanager/integration/harness" "galaxy/rtmanager/internal/domain/operation" "galaxy/rtmanager/internal/domain/runtime" "galaxy/rtmanager/internal/ports" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) // TestMain centralises shared-container teardown so individual // failing tests do not leak the testcontainers postgres / redis pair. func TestMain(m *testing.M) { harness.RunMain(m) } // TestLifecycle_StartInspectStopRestartPatchCleanup drives one game // through every supported lifecycle operation against the real engine // image and asserts each step's PG, Redis-stream, and Docker // side-effects. func TestLifecycle_StartInspectStopRestartPatchCleanup(t *testing.T) { env := harness.NewEnv(t, harness.EnvOptions{LogToStderr: true}) rest := harness.NewREST(env) gameID := harness.IDFromTestName(t) // Step 1 — start through the Lobby async stream contract. startEntryID := harness.XAddStartJob(t, env, gameID, env.EngineImageRef) t.Logf("start_jobs xadd id=%s", startEntryID) startResult := harness.WaitForJobResult(t, env, harness.JobOutcomeIs(gameID, ports.JobOutcomeSuccess), 30*time.Second, ) require.Equal(t, "", startResult.ErrorCode, "fresh start must publish empty error_code") require.NotEmpty(t, startResult.ContainerID, "fresh start job result must carry container_id") require.NotEmpty(t, startResult.EngineEndpoint, "fresh start job result must carry engine_endpoint") // PG record reflects the start. startedRecord := harness.EventuallyRuntimeRecord(t, env, gameID, func(r runtime.RuntimeRecord) bool { return r.Status == runtime.StatusRunning }, 15*time.Second, ) assert.Equal(t, env.EngineImageRef, startedRecord.CurrentImageRef) assert.Equal(t, env.Network, startedRecord.DockerNetwork) assert.Equal(t, startResult.ContainerID, startedRecord.CurrentContainerID) assert.Equal(t, startResult.EngineEndpoint, startedRecord.EngineEndpoint) // operation_log captures the start. startEntry := harness.EventuallyOperationKind(t, env, gameID, operation.OpKindStart, 5*time.Second) assert.Equal(t, operation.OutcomeSuccess, startEntry.Outcome) assert.Equal(t, operation.OpSourceLobbyStream, startEntry.OpSource) // Step 2 — inspect via the GM/Admin REST surface. getResp, status := rest.GetRuntime(t, gameID) require.Equal(t, http.StatusOK, status) require.Equal(t, "running", getResp.Status) require.NotNil(t, getResp.CurrentContainerID) require.Equal(t, startResult.ContainerID, *getResp.CurrentContainerID) require.NotNil(t, getResp.CurrentImageRef) require.Equal(t, env.EngineImageRef, *getResp.CurrentImageRef) require.NotNil(t, getResp.EngineEndpoint) require.Equal(t, startResult.EngineEndpoint, *getResp.EngineEndpoint) // Step 3 — stop through the Lobby async stream contract. harness.XAddStopJob(t, env, gameID, "cancelled") stopResult := waitForLatestStopOrStartResult(t, env, gameID) require.Equal(t, ports.JobOutcomeSuccess, stopResult.Outcome) require.Equal(t, "", stopResult.ErrorCode, "fresh stop must publish empty error_code") stoppedRecord := harness.EventuallyRuntimeRecord(t, env, gameID, func(r runtime.RuntimeRecord) bool { return r.Status == runtime.StatusStopped }, 15*time.Second, ) assert.Equal(t, startResult.ContainerID, stoppedRecord.CurrentContainerID, "stop preserves the current container id until cleanup") // Step 4 — restart via REST. Container id changes; engine endpoint // stays stable. restartResp, status := rest.RestartRuntime(t, gameID) require.Equal(t, http.StatusOK, status) require.Equal(t, "running", restartResp.Status) require.NotNil(t, restartResp.CurrentContainerID) require.NotEqual(t, startResult.ContainerID, *restartResp.CurrentContainerID, "restart must produce a new container id") require.NotNil(t, restartResp.EngineEndpoint) require.Equal(t, startResult.EngineEndpoint, *restartResp.EngineEndpoint, "restart must keep the engine endpoint stable") restartContainerID := *restartResp.CurrentContainerID restartEntry := harness.EventuallyOperationKind(t, env, gameID, operation.OpKindRestart, 5*time.Second) assert.Equal(t, operation.OutcomeSuccess, restartEntry.Outcome) assert.Equal(t, operation.OpSourceAdminRest, restartEntry.OpSource) // Step 5 — patch to the second semver-compatible tag. Same image // content, but the runtime should still record the new tag and // recreate the container. patchResp, status := rest.PatchRuntime(t, gameID, env.PatchedImageRef) require.Equal(t, http.StatusOK, status) require.Equal(t, "running", patchResp.Status) require.NotNil(t, patchResp.CurrentImageRef) assert.Equal(t, env.PatchedImageRef, *patchResp.CurrentImageRef) require.NotNil(t, patchResp.CurrentContainerID) assert.NotEqual(t, restartContainerID, *patchResp.CurrentContainerID, "patch must recreate the container") patchEntry := harness.EventuallyOperationKind(t, env, gameID, operation.OpKindPatch, 5*time.Second) assert.Equal(t, operation.OutcomeSuccess, patchEntry.Outcome) // Step 6 — quiesce via REST stop so cleanup is allowed (cleanup // refuses to remove a running container per // `rtmanager/README.md §Lifecycles → Cleanup`). stopResp, status := rest.StopRuntime(t, gameID, "admin_request") require.Equal(t, http.StatusOK, status) require.Equal(t, "stopped", stopResp.Status) // Step 7 — cleanup the container. PG record flips to removed and // current_container_id becomes nil. cleanupResp, status := rest.CleanupRuntime(t, gameID) require.Equal(t, http.StatusOK, status) require.Equal(t, "removed", cleanupResp.Status) require.Nil(t, cleanupResp.CurrentContainerID) cleanupEntry := harness.EventuallyOperationKind(t, env, gameID, operation.OpKindCleanupContainer, 5*time.Second) assert.Equal(t, operation.OutcomeSuccess, cleanupEntry.Outcome) assert.Equal(t, operation.OpSourceAdminRest, cleanupEntry.OpSource) } // TestReplay_StartJobIsNoop publishes the same start envelope twice // and asserts that Runtime Manager produces a fresh job_result for // the first XADD and a `replay_no_op` outcome for the second, without // recreating the engine container. func TestReplay_StartJobIsNoop(t *testing.T) { env := harness.NewEnv(t, harness.EnvOptions{}) gameID := harness.IDFromTestName(t) // First XADD: fresh start. harness.XAddStartJob(t, env, gameID, env.EngineImageRef) first := harness.WaitForJobResult(t, env, harness.JobOutcomeIs(gameID, ports.JobOutcomeSuccess), 30*time.Second, ) require.Equal(t, "", first.ErrorCode) // Second XADD: same envelope; the start service must short-circuit // at the `runtime_records.status=running && image_ref` check. harness.XAddStartJob(t, env, gameID, env.EngineImageRef) replay := harness.WaitForJobResult(t, env, harness.JobOutcomeWithErrorCode(gameID, ports.JobOutcomeSuccess, "replay_no_op"), 15*time.Second, ) assert.Equal(t, first.ContainerID, replay.ContainerID, "replay must surface the same container id as the original start") assert.Equal(t, first.EngineEndpoint, replay.EngineEndpoint) // Docker view: exactly one engine container exists for this game. assertSingleEngineContainer(t, env, gameID) // Lifecycle stream produced exactly two entries: fresh + replay. entries := harness.AllJobResults(t, env) require.Len(t, entries, 2) assert.Equal(t, "", entries[0].ErrorCode) assert.Equal(t, "replay_no_op", entries[1].ErrorCode) } // TestReplay_StopJobIsNoop publishes a stop envelope twice after a // successful start and asserts the second stop surfaces as // `replay_no_op` without altering the runtime record's `stopped_at`. func TestReplay_StopJobIsNoop(t *testing.T) { env := harness.NewEnv(t, harness.EnvOptions{}) gameID := harness.IDFromTestName(t) // Bring the game to `running`. The start path publishes one entry // to `runtime:job_results`; the stops below publish two more, so // per-game stream order is [start, first-stop, replay-stop]. harness.XAddStartJob(t, env, gameID, env.EngineImageRef) harness.WaitForJobResult(t, env, harness.JobOutcomeIs(gameID, ports.JobOutcomeSuccess), 30*time.Second, ) // First stop: fresh. The expectedCount accounts for the start // entry that is already on the stream. harness.XAddStopJob(t, env, gameID, "cancelled") first := waitForJobResultByIndex(t, env, gameID, 2) require.Equal(t, ports.JobOutcomeSuccess, first.Outcome) require.Equal(t, "", first.ErrorCode) stoppedRecord := harness.EventuallyRuntimeRecord(t, env, gameID, func(r runtime.RuntimeRecord) bool { return r.Status == runtime.StatusStopped }, 15*time.Second, ) require.NotNil(t, stoppedRecord.StoppedAt, "stopped record must carry stopped_at") originalStoppedAt := *stoppedRecord.StoppedAt // Second stop: replay (third entry on the per-game stream). harness.XAddStopJob(t, env, gameID, "cancelled") replay := waitForJobResultByIndex(t, env, gameID, 3) require.Equal(t, ports.JobOutcomeSuccess, replay.Outcome) assert.Equal(t, "replay_no_op", replay.ErrorCode) // stopped_at stays anchored to the first stop. postReplay := harness.MustRuntimeRecord(t, env, gameID) require.Equal(t, runtime.StatusStopped, postReplay.Status) require.NotNil(t, postReplay.StoppedAt) assert.True(t, originalStoppedAt.Equal(*postReplay.StoppedAt), "stopped_at must not move on a replay stop; was %s, now %s", originalStoppedAt, *postReplay.StoppedAt) } // waitForLatestStopOrStartResult finds the most recent `outcome=success` // entry on `runtime:job_results` for gameID. The lifecycle scenario // emits two consecutive successes (start then stop); the helper picks // the second one without re-scanning the stream every iteration. func waitForLatestStopOrStartResult(t *testing.T, env *harness.Env, gameID string) harness.JobResultEntry { t.Helper() deadline := time.Now().Add(30 * time.Second) for { entries := harness.AllJobResults(t, env) // Two entries means we've observed both the start and stop // outcomes for this game. matched := 0 var last harness.JobResultEntry for _, entry := range entries { if entry.GameID == gameID && entry.Outcome == ports.JobOutcomeSuccess { matched++ last = entry } } if matched >= 2 { return last } if time.Now().After(deadline) { t.Fatalf("expected two job_results for %s, got %d", gameID, matched) } time.Sleep(50 * time.Millisecond) } } // waitForJobResultByIndex polls the job_results stream until it has // at least `expectedCount` entries for gameID and returns the // expectedCount-th. Used by the replay tests to deterministically // pick the second / nth result. func waitForJobResultByIndex(t *testing.T, env *harness.Env, gameID string, expectedCount int) harness.JobResultEntry { t.Helper() deadline := time.Now().Add(30 * time.Second) for { entries := harness.AllJobResults(t, env) matches := make([]harness.JobResultEntry, 0, len(entries)) for _, entry := range entries { if entry.GameID == gameID { matches = append(matches, entry) } } if len(matches) >= expectedCount { return matches[expectedCount-1] } if time.Now().After(deadline) { t.Fatalf("expected at least %d job_results for %s, got %d", expectedCount, gameID, len(matches)) } time.Sleep(50 * time.Millisecond) } } // assertSingleEngineContainer queries Docker by the per-game label and // asserts exactly one matching container exists. Catches replay // regressions that would let RTM start two containers for the same // game id. func assertSingleEngineContainer(t *testing.T, env *harness.Env, gameID string) { t.Helper() args := filters.NewArgs( filters.Arg("label", "com.galaxy.owner=rtmanager"), filters.Arg("label", "com.galaxy.game_id="+gameID), ) containers, err := env.Docker.Client().ContainerList( context.Background(), container.ListOptions{All: true, Filters: args}, ) require.NoError(t, err) require.Lenf(t, containers, 1, "expected one engine container for game %s, got %d", gameID, len(containers)) }