package restartruntime_test import ( "context" "errors" "sync" "testing" "time" "galaxy/notificationintent" "galaxy/rtmanager/internal/adapters/docker/mocks" "galaxy/rtmanager/internal/config" "galaxy/rtmanager/internal/domain/operation" "galaxy/rtmanager/internal/domain/runtime" "galaxy/rtmanager/internal/ports" "galaxy/rtmanager/internal/service/restartruntime" "galaxy/rtmanager/internal/service/startruntime" "galaxy/rtmanager/internal/service/stopruntime" "galaxy/rtmanager/internal/telemetry" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/mock/gomock" ) // --- shared fake doubles ---------------------------------------------- type fakeRuntimeRecords struct { mu sync.Mutex stored map[string]runtime.RuntimeRecord getErr error upsertErr error updateStatusErr error upserts []runtime.RuntimeRecord updates []ports.UpdateStatusInput } func newFakeRuntimeRecords() *fakeRuntimeRecords { return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}} } func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) { s.mu.Lock() defer s.mu.Unlock() if s.getErr != nil { return runtime.RuntimeRecord{}, s.getErr } record, ok := s.stored[gameID] if !ok { return runtime.RuntimeRecord{}, runtime.ErrNotFound } return record, nil } func (s *fakeRuntimeRecords) Upsert(_ context.Context, record runtime.RuntimeRecord) error { s.mu.Lock() defer s.mu.Unlock() if s.upsertErr != nil { return s.upsertErr } s.upserts = append(s.upserts, record) s.stored[record.GameID] = record return nil } func (s *fakeRuntimeRecords) UpdateStatus(_ context.Context, input ports.UpdateStatusInput) error { s.mu.Lock() defer s.mu.Unlock() s.updates = append(s.updates, input) if s.updateStatusErr != nil { return s.updateStatusErr } record, ok := s.stored[input.GameID] if !ok { return runtime.ErrNotFound } if record.Status != input.ExpectedFrom { return runtime.ErrConflict } if input.ExpectedContainerID != "" && record.CurrentContainerID != input.ExpectedContainerID { return runtime.ErrConflict } record.Status = input.To record.LastOpAt = input.Now switch input.To { case runtime.StatusStopped: stoppedAt := input.Now record.StoppedAt = &stoppedAt case runtime.StatusRemoved: removedAt := input.Now record.RemovedAt = &removedAt record.CurrentContainerID = "" } s.stored[input.GameID] = record return nil } func (s *fakeRuntimeRecords) ListByStatus(_ context.Context, _ runtime.Status) ([]runtime.RuntimeRecord, error) { return nil, errors.New("not used in restart tests") } func (s *fakeRuntimeRecords) List(_ context.Context) ([]runtime.RuntimeRecord, error) { return nil, errors.New("not used in restart tests") } type fakeOperationLogs struct { mu sync.Mutex appendErr error appends []operation.OperationEntry } func (s *fakeOperationLogs) Append(_ context.Context, entry operation.OperationEntry) (int64, error) { s.mu.Lock() defer s.mu.Unlock() if s.appendErr != nil { return 0, s.appendErr } s.appends = append(s.appends, entry) return int64(len(s.appends)), nil } func (s *fakeOperationLogs) ListByGame(_ context.Context, _ string, _ int) ([]operation.OperationEntry, error) { return nil, errors.New("not used in restart tests") } func (s *fakeOperationLogs) byKind(kind operation.OpKind) []operation.OperationEntry { s.mu.Lock() defer s.mu.Unlock() out := []operation.OperationEntry{} for _, entry := range s.appends { if entry.OpKind == kind { out = append(out, entry) } } return out } type fakeLeases struct { mu sync.Mutex acquired bool acquireErr error releaseErr error acquires []string releases []string } func (l *fakeLeases) TryAcquire(_ context.Context, _, token string, _ time.Duration) (bool, error) { l.mu.Lock() defer l.mu.Unlock() l.acquires = append(l.acquires, token) if l.acquireErr != nil { return false, l.acquireErr } return l.acquired, nil } func (l *fakeLeases) Release(_ context.Context, _, token string) error { l.mu.Lock() defer l.mu.Unlock() l.releases = append(l.releases, token) return l.releaseErr } type fakeHealthEvents struct { mu sync.Mutex publishErr error envelopes []ports.HealthEventEnvelope } func (h *fakeHealthEvents) Publish(_ context.Context, envelope ports.HealthEventEnvelope) error { h.mu.Lock() defer h.mu.Unlock() if h.publishErr != nil { return h.publishErr } h.envelopes = append(h.envelopes, envelope) return nil } type fakeNotifications struct { mu sync.Mutex publishErr error intents []notificationintent.Intent } func (n *fakeNotifications) Publish(_ context.Context, intent notificationintent.Intent) error { n.mu.Lock() defer n.mu.Unlock() if n.publishErr != nil { return n.publishErr } n.intents = append(n.intents, intent) return nil } type fakeLobby struct { record ports.LobbyGameRecord err error } func (l *fakeLobby) GetGame(_ context.Context, _ string) (ports.LobbyGameRecord, error) { if l.err != nil { return ports.LobbyGameRecord{}, l.err } return l.record, nil } // --- harness ---------------------------------------------------------- type harness struct { records *fakeRuntimeRecords operationLogs *fakeOperationLogs docker *mocks.MockDockerClient leases *fakeLeases healthEvents *fakeHealthEvents notifications *fakeNotifications lobby *fakeLobby telemetry *telemetry.Runtime now time.Time stateDir string startService *startruntime.Service stopService *stopruntime.Service } func newHarness(t *testing.T) *harness { t.Helper() ctrl := gomock.NewController(t) t.Cleanup(ctrl.Finish) telemetryRuntime, err := telemetry.NewWithProviders(nil, nil) require.NoError(t, err) h := &harness{ records: newFakeRuntimeRecords(), operationLogs: &fakeOperationLogs{}, docker: mocks.NewMockDockerClient(ctrl), leases: &fakeLeases{acquired: true}, healthEvents: &fakeHealthEvents{}, notifications: &fakeNotifications{}, lobby: &fakeLobby{}, telemetry: telemetryRuntime, now: time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC), stateDir: "/var/lib/galaxy/games/game-1", } containerCfg := config.ContainerConfig{ DefaultCPUQuota: 1.0, DefaultMemory: "512m", DefaultPIDsLimit: 512, StopTimeout: 30 * time.Second, Retention: 30 * 24 * time.Hour, EngineStateMountPath: "/var/lib/galaxy-game", EngineStateEnvName: "GAME_STATE_PATH", GameStateDirMode: 0o750, GameStateRoot: "/var/lib/galaxy/games", } dockerCfg := config.DockerConfig{ Host: "unix:///var/run/docker.sock", Network: "galaxy-net", LogDriver: "json-file", PullPolicy: config.ImagePullPolicyIfMissing, } coordinationCfg := config.CoordinationConfig{GameLeaseTTL: time.Minute} startService, err := startruntime.NewService(startruntime.Dependencies{ RuntimeRecords: h.records, OperationLogs: h.operationLogs, Docker: h.docker, Leases: h.leases, HealthEvents: h.healthEvents, Notifications: h.notifications, Lobby: h.lobby, Container: containerCfg, DockerCfg: dockerCfg, Coordination: coordinationCfg, Telemetry: h.telemetry, Clock: func() time.Time { return h.now }, NewToken: func() string { return "inner-start-token" }, PrepareStateDir: func(_ string) (string, error) { return h.stateDir, nil }, }) require.NoError(t, err) h.startService = startService stopService, err := stopruntime.NewService(stopruntime.Dependencies{ RuntimeRecords: h.records, OperationLogs: h.operationLogs, Docker: h.docker, Leases: h.leases, HealthEvents: h.healthEvents, Container: containerCfg, Coordination: coordinationCfg, Telemetry: h.telemetry, Clock: func() time.Time { return h.now }, NewToken: func() string { return "inner-stop-token" }, }) require.NoError(t, err) h.stopService = stopService return h } func (h *harness) build(t *testing.T, tokens ...string) *restartruntime.Service { t.Helper() tokenIdx := 0 tokenGen := func() string { if tokenIdx >= len(tokens) { return "outer-fallback" } t := tokens[tokenIdx] tokenIdx++ return t } service, err := restartruntime.NewService(restartruntime.Dependencies{ RuntimeRecords: h.records, OperationLogs: h.operationLogs, Docker: h.docker, Leases: h.leases, StopService: h.stopService, StartService: h.startService, Coordination: config.CoordinationConfig{GameLeaseTTL: time.Minute}, Telemetry: h.telemetry, Clock: func() time.Time { return h.now }, NewToken: tokenGen, }) require.NoError(t, err) return service } const imageRef = "registry.example.com/galaxy/game:1.4.7" func runningRecord(now time.Time) runtime.RuntimeRecord { startedAt := now.Add(-time.Hour) return runtime.RuntimeRecord{ GameID: "game-1", Status: runtime.StatusRunning, CurrentContainerID: "ctr-old", CurrentImageRef: imageRef, EngineEndpoint: "http://galaxy-game-game-1:8080", StatePath: "/var/lib/galaxy/games/game-1", DockerNetwork: "galaxy-net", StartedAt: &startedAt, LastOpAt: startedAt, CreatedAt: startedAt, } } func basicInput() restartruntime.Input { return restartruntime.Input{ GameID: "game-1", OpSource: operation.OpSourceGMRest, SourceRef: "rest-req-42", } } func sampleRunResult(now time.Time) ports.RunResult { return ports.RunResult{ ContainerID: "ctr-new", EngineEndpoint: "http://galaxy-game-game-1:8080", StartedAt: now, } } func expectInnerStart(h *harness) { h.docker.EXPECT().EnsureNetwork(gomock.Any(), "galaxy-net").Return(nil) h.docker.EXPECT().PullImage(gomock.Any(), imageRef, gomock.Any()).Return(nil) h.docker.EXPECT().InspectImage(gomock.Any(), imageRef).Return(ports.ImageInspect{Ref: imageRef}, nil) h.docker.EXPECT().Run(gomock.Any(), gomock.Any()).Return(sampleRunResult(h.now), nil) } // --- happy path ------------------------------------------------------- func TestHandleRestartFromRunning(t *testing.T) { h := newHarness(t) h.records.stored["game-1"] = runningRecord(h.now) h.docker.EXPECT().Stop(gomock.Any(), "ctr-old", 30*time.Second).Return(nil) h.docker.EXPECT().Remove(gomock.Any(), "ctr-old").Return(nil) expectInnerStart(h) service := h.build(t, "outer-token") result, err := service.Handle(context.Background(), basicInput()) require.NoError(t, err) assert.Equal(t, operation.OutcomeSuccess, result.Outcome) assert.Empty(t, result.ErrorCode) assert.Equal(t, "ctr-new", result.Record.CurrentContainerID) assert.Equal(t, imageRef, result.Record.CurrentImageRef) assert.Equal(t, runtime.StatusRunning, result.Record.Status) stops := h.operationLogs.byKind(operation.OpKindStop) starts := h.operationLogs.byKind(operation.OpKindStart) restarts := h.operationLogs.byKind(operation.OpKindRestart) require.Len(t, stops, 1, "inner stop appended its own entry") require.Len(t, starts, 1, "inner start appended its own entry") require.Len(t, restarts, 1, "outer restart appended one summary entry") assert.Equal(t, "rest-req-42", stops[0].SourceRef, "correlation id propagated to inner stop") assert.Equal(t, "rest-req-42", starts[0].SourceRef, "correlation id propagated to inner start") assert.Equal(t, "rest-req-42", restarts[0].SourceRef, "correlation id stored on outer restart") assert.Equal(t, "ctr-new", restarts[0].ContainerID) assert.Equal(t, imageRef, restarts[0].ImageRef) assert.Equal(t, []string{"outer-token"}, h.leases.acquires) assert.Equal(t, []string{"outer-token"}, h.leases.releases) } func TestHandleRestartFromStopped(t *testing.T) { h := newHarness(t) stoppedRecord := runningRecord(h.now) stoppedRecord.Status = runtime.StatusStopped stoppedAt := h.now.Add(-30 * time.Minute) stoppedRecord.StoppedAt = &stoppedAt h.records.stored["game-1"] = stoppedRecord // No docker.Stop because inner stop short-circuits via replay no-op. h.docker.EXPECT().Remove(gomock.Any(), "ctr-old").Return(nil) expectInnerStart(h) service := h.build(t, "outer-token") result, err := service.Handle(context.Background(), basicInput()) require.NoError(t, err) assert.Equal(t, operation.OutcomeSuccess, result.Outcome) assert.Equal(t, "ctr-new", result.Record.CurrentContainerID) } // --- correlation id fallback ----------------------------------------- func TestHandleGeneratesCorrelationWhenSourceRefEmpty(t *testing.T) { h := newHarness(t) h.records.stored["game-1"] = runningRecord(h.now) h.docker.EXPECT().Stop(gomock.Any(), "ctr-old", 30*time.Second).Return(nil) h.docker.EXPECT().Remove(gomock.Any(), "ctr-old").Return(nil) expectInnerStart(h) input := basicInput() input.SourceRef = "" // First newToken call yields the lease token, second yields the // correlation id fallback. service := h.build(t, "outer-token", "correlation-fallback") result, err := service.Handle(context.Background(), input) require.NoError(t, err) assert.Equal(t, operation.OutcomeSuccess, result.Outcome) stops := h.operationLogs.byKind(operation.OpKindStop) starts := h.operationLogs.byKind(operation.OpKindStart) restarts := h.operationLogs.byKind(operation.OpKindRestart) require.Len(t, stops, 1) require.Len(t, starts, 1) require.Len(t, restarts, 1) assert.Equal(t, "correlation-fallback", stops[0].SourceRef) assert.Equal(t, "correlation-fallback", starts[0].SourceRef) assert.Equal(t, "correlation-fallback", restarts[0].SourceRef) } // --- failure paths --------------------------------------------------- func TestHandleNotFoundForMissingRecord(t *testing.T) { h := newHarness(t) service := h.build(t, "outer-token") result, err := service.Handle(context.Background(), basicInput()) require.NoError(t, err) assert.Equal(t, operation.OutcomeFailure, result.Outcome) assert.Equal(t, startruntime.ErrorCodeNotFound, result.ErrorCode) assert.Empty(t, h.operationLogs.byKind(operation.OpKindStop)) assert.Empty(t, h.operationLogs.byKind(operation.OpKindStart)) require.Len(t, h.operationLogs.byKind(operation.OpKindRestart), 1) } func TestHandleConflictForRemovedRecord(t *testing.T) { h := newHarness(t) removed := runningRecord(h.now) removed.Status = runtime.StatusRemoved removed.CurrentContainerID = "" removedAt := h.now.Add(-time.Hour) removed.RemovedAt = &removedAt h.records.stored["game-1"] = removed service := h.build(t, "outer-token") result, err := service.Handle(context.Background(), basicInput()) require.NoError(t, err) assert.Equal(t, operation.OutcomeFailure, result.Outcome) assert.Equal(t, startruntime.ErrorCodeConflict, result.ErrorCode) } func TestHandleConflictWhenLeaseBusy(t *testing.T) { h := newHarness(t) h.leases.acquired = false service := h.build(t, "outer-token") result, err := service.Handle(context.Background(), basicInput()) require.NoError(t, err) assert.Equal(t, operation.OutcomeFailure, result.Outcome) assert.Equal(t, startruntime.ErrorCodeConflict, result.ErrorCode) assert.Empty(t, h.leases.releases, "release must not run when acquire returned false") } func TestHandlePropagatesInnerStopFailure(t *testing.T) { h := newHarness(t) h.records.stored["game-1"] = runningRecord(h.now) h.docker.EXPECT().Stop(gomock.Any(), "ctr-old", 30*time.Second).Return(errors.New("daemon unreachable")) service := h.build(t, "outer-token") result, err := service.Handle(context.Background(), basicInput()) require.NoError(t, err) assert.Equal(t, operation.OutcomeFailure, result.Outcome) assert.Equal(t, startruntime.ErrorCodeServiceUnavailable, result.ErrorCode) assert.Contains(t, result.ErrorMessage, "inner stop failed") } func TestHandleServiceUnavailableOnDockerRemoveFailure(t *testing.T) { h := newHarness(t) h.records.stored["game-1"] = runningRecord(h.now) h.docker.EXPECT().Stop(gomock.Any(), "ctr-old", 30*time.Second).Return(nil) h.docker.EXPECT().Remove(gomock.Any(), "ctr-old").Return(errors.New("disk i/o")) service := h.build(t, "outer-token") result, err := service.Handle(context.Background(), basicInput()) require.NoError(t, err) assert.Equal(t, operation.OutcomeFailure, result.Outcome) assert.Equal(t, startruntime.ErrorCodeServiceUnavailable, result.ErrorCode) assert.Contains(t, result.ErrorMessage, "docker remove") // inner stop did succeed and write its log entry; outer restart records failure. require.Len(t, h.operationLogs.byKind(operation.OpKindStop), 1) require.Len(t, h.operationLogs.byKind(operation.OpKindRestart), 1) } func TestHandlePropagatesInnerStartFailure(t *testing.T) { h := newHarness(t) h.records.stored["game-1"] = runningRecord(h.now) h.docker.EXPECT().Stop(gomock.Any(), "ctr-old", 30*time.Second).Return(nil) h.docker.EXPECT().Remove(gomock.Any(), "ctr-old").Return(nil) h.docker.EXPECT().EnsureNetwork(gomock.Any(), "galaxy-net").Return(nil) h.docker.EXPECT().PullImage(gomock.Any(), imageRef, gomock.Any()).Return(errors.New("manifest unknown")) service := h.build(t, "outer-token") result, err := service.Handle(context.Background(), basicInput()) require.NoError(t, err) assert.Equal(t, operation.OutcomeFailure, result.Outcome) assert.Equal(t, startruntime.ErrorCodeImagePullFailed, result.ErrorCode) assert.Contains(t, result.ErrorMessage, "inner start failed") } // --- input validation ------------------------------------------------ func TestHandleRejectsInvalidInput(t *testing.T) { h := newHarness(t) service := h.build(t, "outer-token") cases := []restartruntime.Input{ {GameID: "", OpSource: operation.OpSourceGMRest}, {GameID: "g", OpSource: operation.OpSource("bogus")}, } for _, input := range cases { result, err := service.Handle(context.Background(), input) require.NoError(t, err) assert.Equal(t, startruntime.ErrorCodeInvalidRequest, result.ErrorCode) } } // --- constructor ----------------------------------------------------- func TestNewServiceRejectsMissingDependencies(t *testing.T) { h := newHarness(t) deps := restartruntime.Dependencies{ Coordination: config.CoordinationConfig{GameLeaseTTL: time.Minute}, Telemetry: h.telemetry, } _, err := restartruntime.NewService(deps) require.Error(t, err) }