304 lines
12 KiB
Go
304 lines
12 KiB
Go
//go:build integration
|
|
|
|
// Package integration_test owns the service-local end-to-end scenarios
|
|
// for Runtime Manager. The build tag keeps the suite out of the
|
|
// default `go test ./...` run; CI invokes the suite explicitly with
|
|
// `go test -tags=integration ./rtmanager/integration/...`.
|
|
//
|
|
// Design rationale for the suite — build tag, in-process harness,
|
|
// per-test isolation, two-tag engine image — lives in
|
|
// `rtmanager/docs/integration-tests.md`. Each test stands up its own
|
|
// Runtime Manager process via `harness.NewEnv`, drives the same
|
|
// streams Game Lobby uses in `integration/lobbyrtm`, and asserts the
|
|
// resulting PostgreSQL, Redis-stream, and Docker side-effects.
|
|
package integration_test
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"testing"
|
|
"time"
|
|
|
|
"galaxy/rtmanager/integration/harness"
|
|
"galaxy/rtmanager/internal/domain/operation"
|
|
"galaxy/rtmanager/internal/domain/runtime"
|
|
"galaxy/rtmanager/internal/ports"
|
|
|
|
"github.com/docker/docker/api/types/container"
|
|
"github.com/docker/docker/api/types/filters"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
// TestMain centralises shared-container teardown so individual
|
|
// failing tests do not leak the testcontainers postgres / redis pair.
|
|
func TestMain(m *testing.M) {
|
|
harness.RunMain(m)
|
|
}
|
|
|
|
// TestLifecycle_StartInspectStopRestartPatchCleanup drives one game
|
|
// through every supported lifecycle operation against the real engine
|
|
// image and asserts each step's PG, Redis-stream, and Docker
|
|
// side-effects.
|
|
func TestLifecycle_StartInspectStopRestartPatchCleanup(t *testing.T) {
|
|
env := harness.NewEnv(t, harness.EnvOptions{LogToStderr: true})
|
|
rest := harness.NewREST(env)
|
|
gameID := harness.IDFromTestName(t)
|
|
|
|
// Step 1 — start through the Lobby async stream contract.
|
|
startEntryID := harness.XAddStartJob(t, env, gameID, env.EngineImageRef)
|
|
t.Logf("start_jobs xadd id=%s", startEntryID)
|
|
|
|
startResult := harness.WaitForJobResult(t, env,
|
|
harness.JobOutcomeIs(gameID, ports.JobOutcomeSuccess),
|
|
30*time.Second,
|
|
)
|
|
require.Equal(t, "", startResult.ErrorCode, "fresh start must publish empty error_code")
|
|
require.NotEmpty(t, startResult.ContainerID, "fresh start job result must carry container_id")
|
|
require.NotEmpty(t, startResult.EngineEndpoint, "fresh start job result must carry engine_endpoint")
|
|
|
|
// PG record reflects the start.
|
|
startedRecord := harness.EventuallyRuntimeRecord(t, env, gameID,
|
|
func(r runtime.RuntimeRecord) bool { return r.Status == runtime.StatusRunning },
|
|
15*time.Second,
|
|
)
|
|
assert.Equal(t, env.EngineImageRef, startedRecord.CurrentImageRef)
|
|
assert.Equal(t, env.Network, startedRecord.DockerNetwork)
|
|
assert.Equal(t, startResult.ContainerID, startedRecord.CurrentContainerID)
|
|
assert.Equal(t, startResult.EngineEndpoint, startedRecord.EngineEndpoint)
|
|
|
|
// operation_log captures the start.
|
|
startEntry := harness.EventuallyOperationKind(t, env, gameID, operation.OpKindStart, 5*time.Second)
|
|
assert.Equal(t, operation.OutcomeSuccess, startEntry.Outcome)
|
|
assert.Equal(t, operation.OpSourceLobbyStream, startEntry.OpSource)
|
|
|
|
// Step 2 — inspect via the GM/Admin REST surface.
|
|
getResp, status := rest.GetRuntime(t, gameID)
|
|
require.Equal(t, http.StatusOK, status)
|
|
require.Equal(t, "running", getResp.Status)
|
|
require.NotNil(t, getResp.CurrentContainerID)
|
|
require.Equal(t, startResult.ContainerID, *getResp.CurrentContainerID)
|
|
require.NotNil(t, getResp.CurrentImageRef)
|
|
require.Equal(t, env.EngineImageRef, *getResp.CurrentImageRef)
|
|
require.NotNil(t, getResp.EngineEndpoint)
|
|
require.Equal(t, startResult.EngineEndpoint, *getResp.EngineEndpoint)
|
|
|
|
// Step 3 — stop through the Lobby async stream contract.
|
|
harness.XAddStopJob(t, env, gameID, "cancelled")
|
|
stopResult := waitForLatestStopOrStartResult(t, env, gameID)
|
|
require.Equal(t, ports.JobOutcomeSuccess, stopResult.Outcome)
|
|
require.Equal(t, "", stopResult.ErrorCode, "fresh stop must publish empty error_code")
|
|
|
|
stoppedRecord := harness.EventuallyRuntimeRecord(t, env, gameID,
|
|
func(r runtime.RuntimeRecord) bool { return r.Status == runtime.StatusStopped },
|
|
15*time.Second,
|
|
)
|
|
assert.Equal(t, startResult.ContainerID, stoppedRecord.CurrentContainerID,
|
|
"stop preserves the current container id until cleanup")
|
|
|
|
// Step 4 — restart via REST. Container id changes; engine endpoint
|
|
// stays stable.
|
|
restartResp, status := rest.RestartRuntime(t, gameID)
|
|
require.Equal(t, http.StatusOK, status)
|
|
require.Equal(t, "running", restartResp.Status)
|
|
require.NotNil(t, restartResp.CurrentContainerID)
|
|
require.NotEqual(t, startResult.ContainerID, *restartResp.CurrentContainerID,
|
|
"restart must produce a new container id")
|
|
require.NotNil(t, restartResp.EngineEndpoint)
|
|
require.Equal(t, startResult.EngineEndpoint, *restartResp.EngineEndpoint,
|
|
"restart must keep the engine endpoint stable")
|
|
|
|
restartContainerID := *restartResp.CurrentContainerID
|
|
restartEntry := harness.EventuallyOperationKind(t, env, gameID, operation.OpKindRestart, 5*time.Second)
|
|
assert.Equal(t, operation.OutcomeSuccess, restartEntry.Outcome)
|
|
assert.Equal(t, operation.OpSourceAdminRest, restartEntry.OpSource)
|
|
|
|
// Step 5 — patch to the second semver-compatible tag. Same image
|
|
// content, but the runtime should still record the new tag and
|
|
// recreate the container.
|
|
patchResp, status := rest.PatchRuntime(t, gameID, env.PatchedImageRef)
|
|
require.Equal(t, http.StatusOK, status)
|
|
require.Equal(t, "running", patchResp.Status)
|
|
require.NotNil(t, patchResp.CurrentImageRef)
|
|
assert.Equal(t, env.PatchedImageRef, *patchResp.CurrentImageRef)
|
|
require.NotNil(t, patchResp.CurrentContainerID)
|
|
assert.NotEqual(t, restartContainerID, *patchResp.CurrentContainerID,
|
|
"patch must recreate the container")
|
|
|
|
patchEntry := harness.EventuallyOperationKind(t, env, gameID, operation.OpKindPatch, 5*time.Second)
|
|
assert.Equal(t, operation.OutcomeSuccess, patchEntry.Outcome)
|
|
|
|
// Step 6 — quiesce via REST stop so cleanup is allowed (cleanup
|
|
// refuses to remove a running container per
|
|
// `rtmanager/README.md §Lifecycles → Cleanup`).
|
|
stopResp, status := rest.StopRuntime(t, gameID, "admin_request")
|
|
require.Equal(t, http.StatusOK, status)
|
|
require.Equal(t, "stopped", stopResp.Status)
|
|
|
|
// Step 7 — cleanup the container. PG record flips to removed and
|
|
// current_container_id becomes nil.
|
|
cleanupResp, status := rest.CleanupRuntime(t, gameID)
|
|
require.Equal(t, http.StatusOK, status)
|
|
require.Equal(t, "removed", cleanupResp.Status)
|
|
require.Nil(t, cleanupResp.CurrentContainerID)
|
|
|
|
cleanupEntry := harness.EventuallyOperationKind(t, env, gameID, operation.OpKindCleanupContainer, 5*time.Second)
|
|
assert.Equal(t, operation.OutcomeSuccess, cleanupEntry.Outcome)
|
|
assert.Equal(t, operation.OpSourceAdminRest, cleanupEntry.OpSource)
|
|
}
|
|
|
|
// TestReplay_StartJobIsNoop publishes the same start envelope twice
|
|
// and asserts that Runtime Manager produces a fresh job_result for
|
|
// the first XADD and a `replay_no_op` outcome for the second, without
|
|
// recreating the engine container.
|
|
func TestReplay_StartJobIsNoop(t *testing.T) {
|
|
env := harness.NewEnv(t, harness.EnvOptions{})
|
|
gameID := harness.IDFromTestName(t)
|
|
|
|
// First XADD: fresh start.
|
|
harness.XAddStartJob(t, env, gameID, env.EngineImageRef)
|
|
first := harness.WaitForJobResult(t, env,
|
|
harness.JobOutcomeIs(gameID, ports.JobOutcomeSuccess),
|
|
30*time.Second,
|
|
)
|
|
require.Equal(t, "", first.ErrorCode)
|
|
|
|
// Second XADD: same envelope; the start service must short-circuit
|
|
// at the `runtime_records.status=running && image_ref` check.
|
|
harness.XAddStartJob(t, env, gameID, env.EngineImageRef)
|
|
replay := harness.WaitForJobResult(t, env,
|
|
harness.JobOutcomeWithErrorCode(gameID, ports.JobOutcomeSuccess, "replay_no_op"),
|
|
15*time.Second,
|
|
)
|
|
assert.Equal(t, first.ContainerID, replay.ContainerID,
|
|
"replay must surface the same container id as the original start")
|
|
assert.Equal(t, first.EngineEndpoint, replay.EngineEndpoint)
|
|
|
|
// Docker view: exactly one engine container exists for this game.
|
|
assertSingleEngineContainer(t, env, gameID)
|
|
|
|
// Lifecycle stream produced exactly two entries: fresh + replay.
|
|
entries := harness.AllJobResults(t, env)
|
|
require.Len(t, entries, 2)
|
|
assert.Equal(t, "", entries[0].ErrorCode)
|
|
assert.Equal(t, "replay_no_op", entries[1].ErrorCode)
|
|
}
|
|
|
|
// TestReplay_StopJobIsNoop publishes a stop envelope twice after a
|
|
// successful start and asserts the second stop surfaces as
|
|
// `replay_no_op` without altering the runtime record's `stopped_at`.
|
|
func TestReplay_StopJobIsNoop(t *testing.T) {
|
|
env := harness.NewEnv(t, harness.EnvOptions{})
|
|
gameID := harness.IDFromTestName(t)
|
|
|
|
// Bring the game to `running`. The start path publishes one entry
|
|
// to `runtime:job_results`; the stops below publish two more, so
|
|
// per-game stream order is [start, first-stop, replay-stop].
|
|
harness.XAddStartJob(t, env, gameID, env.EngineImageRef)
|
|
harness.WaitForJobResult(t, env,
|
|
harness.JobOutcomeIs(gameID, ports.JobOutcomeSuccess),
|
|
30*time.Second,
|
|
)
|
|
|
|
// First stop: fresh. The expectedCount accounts for the start
|
|
// entry that is already on the stream.
|
|
harness.XAddStopJob(t, env, gameID, "cancelled")
|
|
first := waitForJobResultByIndex(t, env, gameID, 2)
|
|
require.Equal(t, ports.JobOutcomeSuccess, first.Outcome)
|
|
require.Equal(t, "", first.ErrorCode)
|
|
|
|
stoppedRecord := harness.EventuallyRuntimeRecord(t, env, gameID,
|
|
func(r runtime.RuntimeRecord) bool { return r.Status == runtime.StatusStopped },
|
|
15*time.Second,
|
|
)
|
|
require.NotNil(t, stoppedRecord.StoppedAt, "stopped record must carry stopped_at")
|
|
originalStoppedAt := *stoppedRecord.StoppedAt
|
|
|
|
// Second stop: replay (third entry on the per-game stream).
|
|
harness.XAddStopJob(t, env, gameID, "cancelled")
|
|
replay := waitForJobResultByIndex(t, env, gameID, 3)
|
|
require.Equal(t, ports.JobOutcomeSuccess, replay.Outcome)
|
|
assert.Equal(t, "replay_no_op", replay.ErrorCode)
|
|
|
|
// stopped_at stays anchored to the first stop.
|
|
postReplay := harness.MustRuntimeRecord(t, env, gameID)
|
|
require.Equal(t, runtime.StatusStopped, postReplay.Status)
|
|
require.NotNil(t, postReplay.StoppedAt)
|
|
assert.True(t, originalStoppedAt.Equal(*postReplay.StoppedAt),
|
|
"stopped_at must not move on a replay stop; was %s, now %s",
|
|
originalStoppedAt, *postReplay.StoppedAt)
|
|
}
|
|
|
|
// waitForLatestStopOrStartResult finds the most recent `outcome=success`
|
|
// entry on `runtime:job_results` for gameID. The lifecycle scenario
|
|
// emits two consecutive successes (start then stop); the helper picks
|
|
// the second one without re-scanning the stream every iteration.
|
|
func waitForLatestStopOrStartResult(t *testing.T, env *harness.Env, gameID string) harness.JobResultEntry {
|
|
t.Helper()
|
|
deadline := time.Now().Add(30 * time.Second)
|
|
for {
|
|
entries := harness.AllJobResults(t, env)
|
|
// Two entries means we've observed both the start and stop
|
|
// outcomes for this game.
|
|
matched := 0
|
|
var last harness.JobResultEntry
|
|
for _, entry := range entries {
|
|
if entry.GameID == gameID && entry.Outcome == ports.JobOutcomeSuccess {
|
|
matched++
|
|
last = entry
|
|
}
|
|
}
|
|
if matched >= 2 {
|
|
return last
|
|
}
|
|
if time.Now().After(deadline) {
|
|
t.Fatalf("expected two job_results for %s, got %d", gameID, matched)
|
|
}
|
|
time.Sleep(50 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
// waitForJobResultByIndex polls the job_results stream until it has
|
|
// at least `expectedCount` entries for gameID and returns the
|
|
// expectedCount-th. Used by the replay tests to deterministically
|
|
// pick the second / nth result.
|
|
func waitForJobResultByIndex(t *testing.T, env *harness.Env, gameID string, expectedCount int) harness.JobResultEntry {
|
|
t.Helper()
|
|
deadline := time.Now().Add(30 * time.Second)
|
|
for {
|
|
entries := harness.AllJobResults(t, env)
|
|
matches := make([]harness.JobResultEntry, 0, len(entries))
|
|
for _, entry := range entries {
|
|
if entry.GameID == gameID {
|
|
matches = append(matches, entry)
|
|
}
|
|
}
|
|
if len(matches) >= expectedCount {
|
|
return matches[expectedCount-1]
|
|
}
|
|
if time.Now().After(deadline) {
|
|
t.Fatalf("expected at least %d job_results for %s, got %d",
|
|
expectedCount, gameID, len(matches))
|
|
}
|
|
time.Sleep(50 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
// assertSingleEngineContainer queries Docker by the per-game label and
|
|
// asserts exactly one matching container exists. Catches replay
|
|
// regressions that would let RTM start two containers for the same
|
|
// game id.
|
|
func assertSingleEngineContainer(t *testing.T, env *harness.Env, gameID string) {
|
|
t.Helper()
|
|
args := filters.NewArgs(
|
|
filters.Arg("label", "com.galaxy.owner=rtmanager"),
|
|
filters.Arg("label", "com.galaxy.game_id="+gameID),
|
|
)
|
|
containers, err := env.Docker.Client().ContainerList(
|
|
context.Background(),
|
|
container.ListOptions{All: true, Filters: args},
|
|
)
|
|
require.NoError(t, err)
|
|
require.Lenf(t, containers, 1, "expected one engine container for game %s, got %d", gameID, len(containers))
|
|
}
|