feat: gamemaster

This commit is contained in:
Ilia Denisov
2026-05-03 07:59:03 +02:00
committed by GitHub
parent a7cee15115
commit 3e2622757e
229 changed files with 41521 additions and 1098 deletions
@@ -0,0 +1,42 @@
package adminbanish
// Stable error codes returned in `Result.ErrorCode`. The values match
// the vocabulary frozen by `gamemaster/README.md §Error Model` and
// `gamemaster/api/internal-openapi.yaml`. Service-layer callers (Stage
// 19 handlers) import these names rather than redeclare them; renaming
// any of them is a contract change.
const (
// ErrorCodeInvalidRequest reports that the request envelope failed
// structural validation (empty GameID or RaceName).
ErrorCodeInvalidRequest = "invalid_request"
// ErrorCodeRuntimeNotFound reports that no runtime_records row
// exists for the requested game id.
ErrorCodeRuntimeNotFound = "runtime_not_found"
// ErrorCodeForbidden reports that the requested race is not in the
// game's roster (`player_mappings.GetByRace` returned not-found).
ErrorCodeForbidden = "forbidden"
// ErrorCodeEngineUnreachable reports that the engine
// `/admin/race/banish` call returned a 5xx, timed out, or could
// not be dispatched.
ErrorCodeEngineUnreachable = "engine_unreachable"
// ErrorCodeEngineValidationError reports that the engine
// `/admin/race/banish` call returned a 4xx response (e.g. invalid
// race name).
ErrorCodeEngineValidationError = "engine_validation_error"
// ErrorCodeEngineProtocolViolation reports that the engine
// response did not match the expected protocol shape.
ErrorCodeEngineProtocolViolation = "engine_protocol_violation"
// ErrorCodeServiceUnavailable reports that a steady-state
// dependency (PostgreSQL) was unreachable for this call.
ErrorCodeServiceUnavailable = "service_unavailable"
// ErrorCodeInternal reports an unexpected error not classified by
// the other codes.
ErrorCodeInternal = "internal_error"
)
@@ -0,0 +1,317 @@
// Package adminbanish implements the admin banish service-layer
// orchestrator owned by Game Master. It is driven by Game Lobby (and,
// in a later iteration, Admin Service) through
// `POST /api/v1/internal/games/{game_id}/race/{race_name}/banish` after
// a permanent membership removal at the platform level. The flow
// resolves the race against the installed roster, calls the engine
// `/admin/race/banish` endpoint, and writes one operation_log row.
//
// Lifecycle and failure-mode semantics follow `gamemaster/README.md
// §Lifecycles → Banish`. Design rationale (no runtime status check,
// missing race surfaces as `forbidden`) is captured in
// `gamemaster/docs/stage17-admin-operations.md`.
package adminbanish
import (
"context"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/telemetry"
)
// Input stores the per-call arguments for one admin banish operation.
type Input struct {
// GameID identifies the runtime the race belongs to.
GameID string
// RaceName stores the platform race name to banish.
RaceName string
// OpSource classifies how the request entered Game Master. Used to
// stamp `operation_log.op_source`. Defaults to `lobby_internal`
// when missing or unrecognised — Lobby is the only v1 caller.
OpSource operation.OpSource
// SourceRef stores the optional opaque per-source reference (REST
// request id). Empty when the caller does not provide one.
SourceRef string
}
// Validate reports whether input carries the structural invariants the
// service requires before any store is touched.
func (input Input) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("game id must not be empty")
}
if strings.TrimSpace(input.RaceName) == "" {
return fmt.Errorf("race name must not be empty")
}
return nil
}
// Result stores the deterministic outcome of one Handle call. Business
// outcomes flow through Result; the Go-level error return is reserved
// for non-business failures (nil context, nil receiver).
type Result struct {
// Outcome reports whether the operation completed (success) or
// produced a stable failure code.
Outcome operation.Outcome
// ErrorCode stores the stable error code on failure. Empty on
// success.
ErrorCode string
// ErrorMessage stores the operator-readable detail on failure.
// Empty on success.
ErrorMessage string
}
// IsSuccess reports whether the result represents a successful
// operation.
func (result Result) IsSuccess() bool {
return result.Outcome == operation.OutcomeSuccess
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// RuntimeRecords supplies the engine endpoint used for the engine
// call.
RuntimeRecords ports.RuntimeRecordStore
// PlayerMappings resolves the race against the installed roster.
PlayerMappings ports.PlayerMappingStore
// OperationLogs records the audit entry.
OperationLogs ports.OperationLogStore
// Engine drives the `/admin/race/banish` call.
Engine ports.EngineClient
// Telemetry is required: every banish call ends with a
// `gamemaster.banish.outcomes` counter sample.
Telemetry *telemetry.Runtime
// Logger records structured service-level events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for operation timestamps.
// Defaults to `time.Now` when nil.
Clock func() time.Time
}
// Service executes the admin banish lifecycle operation.
type Service struct {
runtimeRecords ports.RuntimeRecordStore
playerMappings ports.PlayerMappingStore
operationLogs ports.OperationLogStore
engine ports.EngineClient
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
switch {
case deps.RuntimeRecords == nil:
return nil, errors.New("new admin banish service: nil runtime records")
case deps.PlayerMappings == nil:
return nil, errors.New("new admin banish service: nil player mappings")
case deps.OperationLogs == nil:
return nil, errors.New("new admin banish service: nil operation logs")
case deps.Engine == nil:
return nil, errors.New("new admin banish service: nil engine client")
case deps.Telemetry == nil:
return nil, errors.New("new admin banish service: nil telemetry runtime")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.adminbanish")
return &Service{
runtimeRecords: deps.RuntimeRecords,
playerMappings: deps.PlayerMappings,
operationLogs: deps.OperationLogs,
engine: deps.Engine,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
}, nil
}
// Handle executes one admin banish operation end-to-end. The Go-level
// error return is reserved for non-business failures (nil context, nil
// receiver). Every business outcome flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
if service == nil {
return Result{}, errors.New("admin banish: nil service")
}
if ctx == nil {
return Result{}, errors.New("admin banish: nil context")
}
opStartedAt := service.clock().UTC()
if err := input.Validate(); err != nil {
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeInvalidRequest, err.Error()), nil
}
record, err := service.runtimeRecords.Get(ctx, input.GameID)
switch {
case errors.Is(err, runtime.ErrNotFound):
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotFound, "runtime record does not exist"), nil
case err != nil:
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get runtime record: %s", err.Error())), nil
}
if _, err := service.playerMappings.GetByRace(ctx, input.GameID, input.RaceName); err != nil {
switch {
case errors.Is(err, playermapping.ErrNotFound):
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeForbidden, fmt.Sprintf("race %q not in roster", input.RaceName)), nil
default:
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get player mapping by race: %s", err.Error())), nil
}
}
if err := service.engine.BanishRace(ctx, record.EngineEndpoint, input.RaceName); err != nil {
errorCode := classifyEngineError(err)
return service.recordFailure(ctx, opStartedAt, input,
errorCode, fmt.Sprintf("engine banish: %s", err.Error())), nil
}
service.appendSuccessLog(ctx, opStartedAt, input)
service.telemetry.RecordBanishOutcome(ctx, string(operation.OutcomeSuccess), "")
logArgs := []any{
"game_id", input.GameID,
"race_name", input.RaceName,
"op_source", string(fallbackOpSource(input.OpSource)),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "race banished", logArgs...)
return Result{Outcome: operation.OutcomeSuccess}, nil
}
// recordFailure assembles the failure Result, appends the
// operation_log failure entry, emits telemetry, and returns the
// structured outcome.
func (service *Service) recordFailure(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) Result {
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
service.telemetry.RecordBanishOutcome(ctx, string(operation.OutcomeFailure), errorCode)
logArgs := []any{
"game_id", input.GameID,
"race_name", input.RaceName,
"op_source", string(input.OpSource),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "admin banish rejected", logArgs...)
return Result{
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// classifyEngineError maps the engine port sentinels to the
// admin-banish stable error codes.
func classifyEngineError(err error) string {
switch {
case errors.Is(err, ports.ErrEngineValidation):
return ErrorCodeEngineValidationError
case errors.Is(err, ports.ErrEngineProtocolViolation):
return ErrorCodeEngineProtocolViolation
case errors.Is(err, ports.ErrEngineUnreachable):
return ErrorCodeEngineUnreachable
default:
return ErrorCodeEngineUnreachable
}
}
// appendSuccessLog records the success operation_log entry.
func (service *Service) appendSuccessLog(ctx context.Context, opStartedAt time.Time, input Input) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindBanish,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeSuccess,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// appendFailureLog records the failure operation_log entry. Skipped
// when the input game id is empty so the entry validator does not
// reject an audit row that adds no value.
func (service *Service) appendFailureLog(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) {
if strings.TrimSpace(input.GameID) == "" {
return
}
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindBanish,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// bestEffortAppend writes one operation_log entry. A failure is logged
// and discarded; the engine state and runtime row are the source of
// truth.
func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
if _, err := service.operationLogs.Append(ctx, entry); err != nil {
service.logger.ErrorContext(ctx, "append operation log",
"game_id", entry.GameID,
"op_kind", string(entry.OpKind),
"outcome", string(entry.Outcome),
"error_code", entry.ErrorCode,
"err", err.Error(),
)
}
}
// fallbackOpSource defaults to `lobby_internal` when the caller did
// not supply a known op source. Lobby is the only v1 banish caller; an
// `admin_rest` source is preserved when explicitly set so future Admin
// Service traffic is identifiable.
func fallbackOpSource(source operation.OpSource) operation.OpSource {
if source.IsKnown() {
return source
}
return operation.OpSourceLobbyInternal
}
@@ -0,0 +1,415 @@
package adminbanish_test
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/gamemaster/internal/adapters/mocks"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/adminbanish"
"galaxy/gamemaster/internal/telemetry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
)
// --- test doubles -----------------------------------------------------
type fakeRuntimeRecords struct {
mu sync.Mutex
stored map[string]runtime.RuntimeRecord
getErr error
}
func newFakeRuntimeRecords() *fakeRuntimeRecords {
return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}}
}
func (s *fakeRuntimeRecords) seed(record runtime.RuntimeRecord) {
s.mu.Lock()
defer s.mu.Unlock()
s.stored[record.GameID] = record
}
func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return runtime.RuntimeRecord{}, s.getErr
}
record, ok := s.stored[gameID]
if !ok {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
return record, nil
}
func (s *fakeRuntimeRecords) Insert(context.Context, runtime.RuntimeRecord) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateStatus(context.Context, ports.UpdateStatusInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateScheduling(context.Context, ports.UpdateSchedulingInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateImage(context.Context, ports.UpdateImageInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateEngineHealth(context.Context, ports.UpdateEngineHealthInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) Delete(context.Context, string) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) ListDueRunning(context.Context, time.Time) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) ListByStatus(context.Context, runtime.Status) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) List(context.Context) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
type fakePlayerMappings struct {
mu sync.Mutex
races map[string]map[string]playermapping.PlayerMapping
getErr error
}
func newFakePlayerMappings() *fakePlayerMappings {
return &fakePlayerMappings{races: map[string]map[string]playermapping.PlayerMapping{}}
}
func (s *fakePlayerMappings) seedRace(gameID, raceName, userID, uuid string) {
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.races[gameID]; !ok {
s.races[gameID] = map[string]playermapping.PlayerMapping{}
}
s.races[gameID][raceName] = playermapping.PlayerMapping{
GameID: gameID, UserID: userID, RaceName: raceName, EnginePlayerUUID: uuid,
CreatedAt: time.Now(),
}
}
func (s *fakePlayerMappings) BulkInsert(context.Context, []playermapping.PlayerMapping) error {
return errors.New("not used")
}
func (s *fakePlayerMappings) Get(context.Context, string, string) (playermapping.PlayerMapping, error) {
return playermapping.PlayerMapping{}, errors.New("not used")
}
func (s *fakePlayerMappings) GetByRace(_ context.Context, gameID, raceName string) (playermapping.PlayerMapping, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return playermapping.PlayerMapping{}, s.getErr
}
gameRaces, ok := s.races[gameID]
if !ok {
return playermapping.PlayerMapping{}, playermapping.ErrNotFound
}
rec, ok := gameRaces[raceName]
if !ok {
return playermapping.PlayerMapping{}, playermapping.ErrNotFound
}
return rec, nil
}
func (s *fakePlayerMappings) ListByGame(context.Context, string) ([]playermapping.PlayerMapping, error) {
return nil, errors.New("not used")
}
func (s *fakePlayerMappings) DeleteByGame(context.Context, string) error {
return errors.New("not used")
}
type fakeOperationLogs struct {
mu sync.Mutex
entries []operation.OperationEntry
}
func (s *fakeOperationLogs) Append(_ context.Context, entry operation.OperationEntry) (int64, error) {
s.mu.Lock()
defer s.mu.Unlock()
if err := entry.Validate(); err != nil {
return 0, err
}
s.entries = append(s.entries, entry)
return int64(len(s.entries)), nil
}
func (s *fakeOperationLogs) ListByGame(context.Context, string, int) ([]operation.OperationEntry, error) {
return nil, errors.New("not used")
}
func (s *fakeOperationLogs) lastEntry() (operation.OperationEntry, bool) {
s.mu.Lock()
defer s.mu.Unlock()
if len(s.entries) == 0 {
return operation.OperationEntry{}, false
}
return s.entries[len(s.entries)-1], true
}
// --- harness ----------------------------------------------------------
type harness struct {
t *testing.T
ctrl *gomock.Controller
runtime *fakeRuntimeRecords
mappings *fakePlayerMappings
logs *fakeOperationLogs
engine *mocks.MockEngineClient
telemetry *telemetry.Runtime
now time.Time
service *adminbanish.Service
}
func newHarness(t *testing.T) *harness {
t.Helper()
ctrl := gomock.NewController(t)
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
h := &harness{
t: t,
ctrl: ctrl,
runtime: newFakeRuntimeRecords(),
mappings: newFakePlayerMappings(),
logs: &fakeOperationLogs{},
engine: mocks.NewMockEngineClient(ctrl),
telemetry: telemetryRuntime,
now: time.Date(2026, time.May, 1, 12, 0, 0, 0, time.UTC),
}
service, err := adminbanish.NewService(adminbanish.Dependencies{
RuntimeRecords: h.runtime,
PlayerMappings: h.mappings,
OperationLogs: h.logs,
Engine: h.engine,
Telemetry: h.telemetry,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.service = service
return h
}
const (
testGameID = "game-001"
testRaceName = "Aelinari"
testEndpoint = "http://galaxy-game-game-001:8080"
)
func (h *harness) seedRuntime(status runtime.Status) {
created := h.now.Add(-time.Hour)
started := h.now.Add(-30 * time.Minute)
record := runtime.RuntimeRecord{
GameID: testGameID,
Status: status,
EngineEndpoint: testEndpoint,
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: "0 18 * * *",
CurrentTurn: 7,
CreatedAt: created,
UpdatedAt: started,
StartedAt: &started,
}
h.runtime.seed(record)
}
func baseInput() adminbanish.Input {
return adminbanish.Input{
GameID: testGameID,
RaceName: testRaceName,
OpSource: operation.OpSourceLobbyInternal,
SourceRef: "req-banish-001",
}
}
// --- tests ------------------------------------------------------------
func TestNewServiceRejectsMissingDeps(t *testing.T) {
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
cases := []struct {
name string
mut func(*adminbanish.Dependencies)
}{
{"runtime records", func(d *adminbanish.Dependencies) { d.RuntimeRecords = nil }},
{"player mappings", func(d *adminbanish.Dependencies) { d.PlayerMappings = nil }},
{"operation logs", func(d *adminbanish.Dependencies) { d.OperationLogs = nil }},
{"engine", func(d *adminbanish.Dependencies) { d.Engine = nil }},
{"telemetry", func(d *adminbanish.Dependencies) { d.Telemetry = nil }},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
ctrl := gomock.NewController(t)
deps := adminbanish.Dependencies{
RuntimeRecords: newFakeRuntimeRecords(),
PlayerMappings: newFakePlayerMappings(),
OperationLogs: &fakeOperationLogs{},
Engine: mocks.NewMockEngineClient(ctrl),
Telemetry: telemetryRuntime,
}
tc.mut(&deps)
service, err := adminbanish.NewService(deps)
require.Error(t, err)
require.Nil(t, service)
})
}
}
func TestHandleHappyPath(t *testing.T) {
h := newHarness(t)
h.seedRuntime(runtime.StatusRunning)
h.mappings.seedRace(testGameID, testRaceName, "user-1", "uuid-1")
h.engine.EXPECT().BanishRace(gomock.Any(), testEndpoint, testRaceName).Return(nil)
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
require.True(t, result.IsSuccess(), "want success, got %+v", result)
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OpKindBanish, entry.OpKind)
assert.Equal(t, operation.OpSourceLobbyInternal, entry.OpSource)
assert.Equal(t, operation.OutcomeSuccess, entry.Outcome)
}
func TestHandleHappyPathOnStoppedRuntime(t *testing.T) {
// README §Banish does not check status; the engine call may fail
// later with engine_unreachable, but the service runs the call.
h := newHarness(t)
h.seedRuntime(runtime.StatusStopped)
h.mappings.seedRace(testGameID, testRaceName, "user-1", "uuid-1")
h.engine.EXPECT().BanishRace(gomock.Any(), testEndpoint, testRaceName).Return(nil)
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
require.True(t, result.IsSuccess())
}
func TestHandleRuntimeNotFound(t *testing.T) {
h := newHarness(t)
h.mappings.seedRace(testGameID, testRaceName, "user-1", "uuid-1")
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, adminbanish.ErrorCodeRuntimeNotFound, result.ErrorCode)
}
func TestHandleForbiddenWhenRaceMissing(t *testing.T) {
h := newHarness(t)
h.seedRuntime(runtime.StatusRunning)
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, adminbanish.ErrorCodeForbidden, result.ErrorCode)
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OutcomeFailure, entry.Outcome)
assert.Equal(t, adminbanish.ErrorCodeForbidden, entry.ErrorCode)
}
func TestHandleEngineUnreachable(t *testing.T) {
h := newHarness(t)
h.seedRuntime(runtime.StatusRunning)
h.mappings.seedRace(testGameID, testRaceName, "user-1", "uuid-1")
h.engine.EXPECT().BanishRace(gomock.Any(), testEndpoint, testRaceName).
Return(ports.ErrEngineUnreachable)
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, adminbanish.ErrorCodeEngineUnreachable, result.ErrorCode)
}
func TestHandleEngineValidation(t *testing.T) {
h := newHarness(t)
h.seedRuntime(runtime.StatusRunning)
h.mappings.seedRace(testGameID, testRaceName, "user-1", "uuid-1")
h.engine.EXPECT().BanishRace(gomock.Any(), testEndpoint, testRaceName).
Return(ports.ErrEngineValidation)
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, adminbanish.ErrorCodeEngineValidationError, result.ErrorCode)
}
func TestHandleEngineProtocolViolation(t *testing.T) {
h := newHarness(t)
h.seedRuntime(runtime.StatusRunning)
h.mappings.seedRace(testGameID, testRaceName, "user-1", "uuid-1")
h.engine.EXPECT().BanishRace(gomock.Any(), testEndpoint, testRaceName).
Return(ports.ErrEngineProtocolViolation)
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, adminbanish.ErrorCodeEngineProtocolViolation, result.ErrorCode)
}
func TestHandleStoreReadFailure(t *testing.T) {
h := newHarness(t)
h.runtime.getErr = errors.New("connection refused")
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, adminbanish.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleMappingStoreFailure(t *testing.T) {
h := newHarness(t)
h.seedRuntime(runtime.StatusRunning)
h.mappings.getErr = errors.New("connection refused")
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, adminbanish.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleInvalidRequest(t *testing.T) {
cases := []struct {
name string
input adminbanish.Input
}{
{"empty game id", adminbanish.Input{GameID: "", RaceName: "X", OpSource: operation.OpSourceLobbyInternal}},
{"empty race", adminbanish.Input{GameID: testGameID, RaceName: "", OpSource: operation.OpSourceLobbyInternal}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
result, err := h.service.Handle(context.Background(), tc.input)
require.NoError(t, err)
assert.Equal(t, adminbanish.ErrorCodeInvalidRequest, result.ErrorCode)
})
}
}
func TestHandleNilContextReturnsError(t *testing.T) {
h := newHarness(t)
_, err := h.service.Handle(nil, baseInput()) //nolint:staticcheck // guard test
require.Error(t, err)
}
func TestHandleDefaultsOpSourceToLobbyInternal(t *testing.T) {
h := newHarness(t)
h.seedRuntime(runtime.StatusRunning)
h.mappings.seedRace(testGameID, testRaceName, "user-1", "uuid-1")
h.engine.EXPECT().BanishRace(gomock.Any(), testEndpoint, testRaceName).Return(nil)
input := baseInput()
input.OpSource = ""
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
require.True(t, result.IsSuccess())
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OpSourceLobbyInternal, entry.OpSource)
}
@@ -0,0 +1,50 @@
package adminforce
// Stable error codes returned in `Result.ErrorCode`. The values match
// the vocabulary frozen by `gamemaster/README.md §Error Model` and
// `gamemaster/api/internal-openapi.yaml`. Service-layer callers (Stage
// 19 handlers) import these names rather than redeclare them; renaming
// any of them is a contract change.
const (
// ErrorCodeInvalidRequest reports that the request envelope failed
// structural validation (empty GameID).
ErrorCodeInvalidRequest = "invalid_request"
// ErrorCodeRuntimeNotFound reports that the underlying turn
// generation could not find a runtime_records row for the
// requested game id.
ErrorCodeRuntimeNotFound = "runtime_not_found"
// ErrorCodeRuntimeNotRunning reports that the runtime is not in
// `running`. Force-next-turn requires the same precondition the
// scheduler ticker enforces.
ErrorCodeRuntimeNotRunning = "runtime_not_running"
// ErrorCodeConflict reports that the underlying CAS to
// `generation_in_progress` lost the race to a concurrent mutation
// (admin stop / health observation / scheduler tick).
ErrorCodeConflict = "conflict"
// ErrorCodeEngineUnreachable reports that the engine /admin/turn
// call returned a 5xx, timed out, or could not be dispatched.
ErrorCodeEngineUnreachable = "engine_unreachable"
// ErrorCodeEngineValidationError reports that the engine
// /admin/turn call returned a 4xx.
ErrorCodeEngineValidationError = "engine_validation_error"
// ErrorCodeEngineProtocolViolation reports that the engine
// response did not match the expected schema or the installed
// roster.
ErrorCodeEngineProtocolViolation = "engine_protocol_violation"
// ErrorCodeServiceUnavailable reports that a steady-state
// dependency (PostgreSQL, Redis, Lobby) was unreachable for this
// call. Also covers the post-success scheduling write that
// installs `skip_next_tick=true`.
ErrorCodeServiceUnavailable = "service_unavailable"
// ErrorCodeInternal reports an unexpected error not classified by
// the other codes.
ErrorCodeInternal = "internal_error"
)
@@ -0,0 +1,343 @@
// Package adminforce implements the admin force-next-turn service-layer
// orchestrator owned by Game Master. It is driven by Admin Service or
// system administrators through
// `POST /api/v1/internal/runtimes/{game_id}/force-next-turn` and runs
// the turn-generation flow synchronously, then sets
// `runtime_records.skip_next_tick=true` so the next scheduler-driven
// generation skips one regular cron step.
//
// The skip rule guarantees that the inter-turn spacing is never shorter
// than one schedule interval, regardless of when the force is issued.
// Lifecycle and failure-mode semantics follow `gamemaster/README.md
// §Lifecycles → Force-next-turn`. Design rationale is captured in
// `gamemaster/docs/stage17-admin-operations.md`.
package adminforce
import (
"context"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/turngeneration"
"galaxy/gamemaster/internal/telemetry"
)
// TurnGenerator narrows `*turngeneration.Service` to the single method
// adminforce calls. The interface lets tests substitute a stub without
// constructing the entire turn-generation collaborator graph.
type TurnGenerator interface {
Handle(ctx context.Context, input turngeneration.Input) (turngeneration.Result, error)
}
// Input stores the per-call arguments for one admin force-next-turn
// operation.
type Input struct {
// GameID identifies the runtime to advance.
GameID string
// OpSource classifies how the request entered Game Master. Used to
// stamp `operation_log.op_source` on both the driver entry and the
// inner turn-generation entry. Defaults to `admin_rest` when
// missing or unrecognised.
OpSource operation.OpSource
// SourceRef stores the optional opaque per-source reference (REST
// request id, admin user id). Empty when the caller does not
// provide one.
SourceRef string
}
// Validate reports whether input carries the structural invariants the
// service requires before the inner turn-generation call.
func (input Input) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("game id must not be empty")
}
return nil
}
// Result stores the deterministic outcome of one Handle call. Business
// outcomes flow through Result; the Go-level error return is reserved
// for non-business failures (nil context, nil receiver).
type Result struct {
// TurnGeneration carries the inner turn-generation result. Always
// populated when Handle returns nil error and the input passed
// validation; zero on early-rejection failures
// (invalid_request).
TurnGeneration turngeneration.Result
// SkipScheduled reports whether the post-success
// `skip_next_tick=true` write landed. False on failure paths and
// when the inner turn-generation surfaced a failure.
SkipScheduled bool
// Outcome reports whether the operation completed (success) or
// produced a stable failure code.
Outcome operation.Outcome
// ErrorCode stores the stable error code on failure. Empty on
// success.
ErrorCode string
// ErrorMessage stores the operator-readable detail on failure.
// Empty on success.
ErrorMessage string
}
// IsSuccess reports whether the result represents a successful
// operation.
func (result Result) IsSuccess() bool {
return result.Outcome == operation.OutcomeSuccess
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// RuntimeRecords drives the post-success scheduling write that
// installs `skip_next_tick=true`.
RuntimeRecords ports.RuntimeRecordStore
// OperationLogs records the audit driver entry
// (`op_kind=force_next_turn`).
OperationLogs ports.OperationLogStore
// TurnGeneration runs the inner turn-generation flow. Required.
TurnGeneration TurnGenerator
// Telemetry is required: every adminforce call ends with a
// telemetry record on the inner turn-generation counter.
Telemetry *telemetry.Runtime
// Logger records structured service-level events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for operation timestamps.
// Defaults to `time.Now` when nil.
Clock func() time.Time
}
// Service executes the admin force-next-turn lifecycle operation.
type Service struct {
runtimeRecords ports.RuntimeRecordStore
operationLogs ports.OperationLogStore
turnGen TurnGenerator
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
switch {
case deps.RuntimeRecords == nil:
return nil, errors.New("new admin force service: nil runtime records")
case deps.OperationLogs == nil:
return nil, errors.New("new admin force service: nil operation logs")
case deps.TurnGeneration == nil:
return nil, errors.New("new admin force service: nil turn generation")
case deps.Telemetry == nil:
return nil, errors.New("new admin force service: nil telemetry runtime")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.adminforce")
return &Service{
runtimeRecords: deps.RuntimeRecords,
operationLogs: deps.OperationLogs,
turnGen: deps.TurnGeneration,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
}, nil
}
// Handle executes one admin force-next-turn operation end-to-end.
// The Go-level error return is reserved for non-business failures (nil
// context, nil receiver). Every business outcome flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
if service == nil {
return Result{}, errors.New("admin force: nil service")
}
if ctx == nil {
return Result{}, errors.New("admin force: nil context")
}
opStartedAt := service.clock().UTC()
if err := input.Validate(); err != nil {
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeInvalidRequest, err.Error()), nil
}
turnResult, err := service.turnGen.Handle(ctx, turngeneration.Input{
GameID: input.GameID,
Trigger: turngeneration.TriggerForce,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
})
if err != nil {
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeInternal, fmt.Sprintf("turn generation: %s", err.Error())), nil
}
if !turnResult.IsSuccess() {
errorCode := turnResult.ErrorCode
if errorCode == "" {
errorCode = ErrorCodeInternal
}
return service.recordFailureWithTurn(ctx, opStartedAt, input, turnResult,
errorCode, turnResult.ErrorMessage), nil
}
scheduledAt := service.clock().UTC()
scheduling := ports.UpdateSchedulingInput{
GameID: input.GameID,
NextGenerationAt: turnResult.Record.NextGenerationAt,
SkipNextTick: true,
CurrentTurn: turnResult.Record.CurrentTurn,
Now: scheduledAt,
}
if err := service.runtimeRecords.UpdateScheduling(ctx, scheduling); err != nil {
// The forced turn already landed; the skip flag did not. Report
// as a service_unavailable so the admin UI can retry the skip
// without re-driving the engine.
return service.recordFailureWithTurn(ctx, opStartedAt, input, turnResult,
ErrorCodeServiceUnavailable,
fmt.Sprintf("update scheduling skip flag: %s", err.Error())), nil
}
service.appendSuccessLog(ctx, opStartedAt, input)
logArgs := []any{
"game_id", input.GameID,
"current_turn", turnResult.Record.CurrentTurn,
"finished", turnResult.Finished,
"op_source", string(fallbackOpSource(input.OpSource)),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "force next turn applied", logArgs...)
return Result{
TurnGeneration: turnResult,
SkipScheduled: true,
Outcome: operation.OutcomeSuccess,
}, nil
}
// recordFailure records a failure that occurred before the inner
// turn-generation result was available.
func (service *Service) recordFailure(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) Result {
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
logArgs := []any{
"game_id", input.GameID,
"op_source", string(input.OpSource),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "force next turn rejected", logArgs...)
return Result{
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// recordFailureWithTurn records a failure after the inner turn-
// generation step ran, propagating its result for caller-side
// telemetry.
func (service *Service) recordFailureWithTurn(ctx context.Context, opStartedAt time.Time, input Input, turnResult turngeneration.Result, errorCode string, errorMessage string) Result {
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
logArgs := []any{
"game_id", input.GameID,
"op_source", string(input.OpSource),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "force next turn failed", logArgs...)
return Result{
TurnGeneration: turnResult,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// appendSuccessLog records the success driver operation_log entry.
func (service *Service) appendSuccessLog(ctx context.Context, opStartedAt time.Time, input Input) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindForceNextTurn,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeSuccess,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// appendFailureLog records the failure driver operation_log entry.
func (service *Service) appendFailureLog(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) {
finishedAt := service.clock().UTC()
gameID := input.GameID
if strings.TrimSpace(gameID) == "" {
// Validation guard: the entry validator rejects empty GameID.
// Skip the audit entry instead of crashing the service.
return
}
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: gameID,
OpKind: operation.OpKindForceNextTurn,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// bestEffortAppend writes one operation_log entry. A failure is logged
// and discarded; the runtime row is the source of truth.
func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
if _, err := service.operationLogs.Append(ctx, entry); err != nil {
service.logger.ErrorContext(ctx, "append operation log",
"game_id", entry.GameID,
"op_kind", string(entry.OpKind),
"outcome", string(entry.Outcome),
"error_code", entry.ErrorCode,
"err", err.Error(),
)
}
}
// fallbackOpSource defaults to `admin_rest` when the caller did not
// supply a known op source. Mirrors `gamemaster/README.md §Trusted
// Surfaces`.
func fallbackOpSource(source operation.OpSource) operation.OpSource {
if source.IsKnown() {
return source
}
return operation.OpSourceAdminRest
}
@@ -0,0 +1,437 @@
package adminforce_test
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/adminforce"
"galaxy/gamemaster/internal/service/turngeneration"
"galaxy/gamemaster/internal/telemetry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// --- test doubles -----------------------------------------------------
type fakeRuntimeRecords struct {
mu sync.Mutex
stored map[string]runtime.RuntimeRecord
schErr error
scheds []ports.UpdateSchedulingInput
}
func newFakeRuntimeRecords() *fakeRuntimeRecords {
return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}}
}
func (s *fakeRuntimeRecords) seed(record runtime.RuntimeRecord) {
s.mu.Lock()
defer s.mu.Unlock()
s.stored[record.GameID] = record
}
func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
record, ok := s.stored[gameID]
if !ok {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
return record, nil
}
func (s *fakeRuntimeRecords) Insert(context.Context, runtime.RuntimeRecord) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateStatus(context.Context, ports.UpdateStatusInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateScheduling(_ context.Context, input ports.UpdateSchedulingInput) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.schErr != nil {
return s.schErr
}
record, ok := s.stored[input.GameID]
if !ok {
return runtime.ErrNotFound
}
if input.NextGenerationAt != nil {
next := *input.NextGenerationAt
record.NextGenerationAt = &next
} else {
record.NextGenerationAt = nil
}
record.SkipNextTick = input.SkipNextTick
record.CurrentTurn = input.CurrentTurn
record.UpdatedAt = input.Now
s.stored[input.GameID] = record
s.scheds = append(s.scheds, input)
return nil
}
func (s *fakeRuntimeRecords) UpdateImage(context.Context, ports.UpdateImageInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateEngineHealth(context.Context, ports.UpdateEngineHealthInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) Delete(context.Context, string) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) ListDueRunning(context.Context, time.Time) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) ListByStatus(context.Context, runtime.Status) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) List(context.Context) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
type fakeOperationLogs struct {
mu sync.Mutex
entries []operation.OperationEntry
}
func (s *fakeOperationLogs) Append(_ context.Context, entry operation.OperationEntry) (int64, error) {
s.mu.Lock()
defer s.mu.Unlock()
if err := entry.Validate(); err != nil {
return 0, err
}
s.entries = append(s.entries, entry)
return int64(len(s.entries)), nil
}
func (s *fakeOperationLogs) ListByGame(context.Context, string, int) ([]operation.OperationEntry, error) {
return nil, errors.New("not used")
}
func (s *fakeOperationLogs) snapshot() []operation.OperationEntry {
s.mu.Lock()
defer s.mu.Unlock()
out := make([]operation.OperationEntry, len(s.entries))
copy(out, s.entries)
return out
}
func (s *fakeOperationLogs) lastEntry() (operation.OperationEntry, bool) {
s.mu.Lock()
defer s.mu.Unlock()
if len(s.entries) == 0 {
return operation.OperationEntry{}, false
}
return s.entries[len(s.entries)-1], true
}
type fakeTurnGenerator struct {
mu sync.Mutex
calls []turngeneration.Input
result turngeneration.Result
err error
}
func (s *fakeTurnGenerator) Handle(_ context.Context, input turngeneration.Input) (turngeneration.Result, error) {
s.mu.Lock()
defer s.mu.Unlock()
s.calls = append(s.calls, input)
return s.result, s.err
}
// --- harness ----------------------------------------------------------
type harness struct {
t *testing.T
runtime *fakeRuntimeRecords
logs *fakeOperationLogs
turn *fakeTurnGenerator
telemetry *telemetry.Runtime
now time.Time
service *adminforce.Service
}
func newHarness(t *testing.T) *harness {
t.Helper()
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
h := &harness{
t: t,
runtime: newFakeRuntimeRecords(),
logs: &fakeOperationLogs{},
turn: &fakeTurnGenerator{},
telemetry: telemetryRuntime,
now: time.Date(2026, time.May, 1, 12, 0, 0, 0, time.UTC),
}
service, err := adminforce.NewService(adminforce.Dependencies{
RuntimeRecords: h.runtime,
OperationLogs: h.logs,
TurnGeneration: h.turn,
Telemetry: h.telemetry,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.service = service
return h
}
func (h *harness) seedRunningRecord() runtime.RuntimeRecord {
created := h.now.Add(-time.Hour)
started := h.now.Add(-30 * time.Minute)
next := h.now.Add(30 * time.Minute)
record := runtime.RuntimeRecord{
GameID: "game-001",
Status: runtime.StatusRunning,
EngineEndpoint: "http://galaxy-game-game-001:8080",
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: "0 18 * * *",
CurrentTurn: 5,
NextGenerationAt: &next,
EngineHealth: "healthy",
CreatedAt: created,
UpdatedAt: started,
StartedAt: &started,
}
h.runtime.seed(record)
return record
}
func baseInput() adminforce.Input {
return adminforce.Input{
GameID: "game-001",
OpSource: operation.OpSourceAdminRest,
SourceRef: "req-force-001",
}
}
// --- tests ------------------------------------------------------------
func TestNewServiceRejectsMissingDeps(t *testing.T) {
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
cases := []struct {
name string
mut func(*adminforce.Dependencies)
}{
{"runtime records", func(d *adminforce.Dependencies) { d.RuntimeRecords = nil }},
{"operation logs", func(d *adminforce.Dependencies) { d.OperationLogs = nil }},
{"turn generation", func(d *adminforce.Dependencies) { d.TurnGeneration = nil }},
{"telemetry", func(d *adminforce.Dependencies) { d.Telemetry = nil }},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
deps := adminforce.Dependencies{
RuntimeRecords: newFakeRuntimeRecords(),
OperationLogs: &fakeOperationLogs{},
TurnGeneration: &fakeTurnGenerator{},
Telemetry: telemetryRuntime,
}
tc.mut(&deps)
service, err := adminforce.NewService(deps)
require.Error(t, err)
require.Nil(t, service)
})
}
}
func TestHandleHappyPathSetsSkipNextTick(t *testing.T) {
h := newHarness(t)
original := h.seedRunningRecord()
postTurn := original
postTurn.CurrentTurn = original.CurrentTurn + 1
nextGen := h.now.Add(time.Hour)
postTurn.NextGenerationAt = &nextGen
postTurn.SkipNextTick = false
h.turn.result = turngeneration.Result{
Record: postTurn,
Trigger: turngeneration.TriggerForce,
Outcome: operation.OutcomeSuccess,
}
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
require.True(t, result.IsSuccess(), "want success, got %+v", result)
assert.True(t, result.SkipScheduled)
// turngeneration.Handle invoked once with TriggerForce.
require.Len(t, h.turn.calls, 1)
assert.Equal(t, turngeneration.TriggerForce, h.turn.calls[0].Trigger)
assert.Equal(t, operation.OpSourceAdminRest, h.turn.calls[0].OpSource)
assert.Equal(t, "req-force-001", h.turn.calls[0].SourceRef)
// Exactly one UpdateScheduling call with skip=true and identical
// next_generation_at / current_turn from the inner result.
require.Len(t, h.runtime.scheds, 1)
scheds := h.runtime.scheds[0]
assert.True(t, scheds.SkipNextTick)
require.NotNil(t, scheds.NextGenerationAt)
assert.True(t, scheds.NextGenerationAt.Equal(nextGen))
assert.Equal(t, postTurn.CurrentTurn, scheds.CurrentTurn)
// Driver entry op_kind=force_next_turn, outcome=success.
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OpKindForceNextTurn, entry.OpKind)
assert.Equal(t, operation.OutcomeSuccess, entry.Outcome)
assert.Equal(t, "req-force-001", entry.SourceRef)
}
func TestHandleSetsSkipEvenWhenFinished(t *testing.T) {
h := newHarness(t)
original := h.seedRunningRecord()
// Inner turn-generation finished the game: NextGenerationAt is
// cleared, status flipped to finished. adminforce still issues the
// scheduling write per stage 17 D3.
finished := original
finished.Status = runtime.StatusFinished
finished.NextGenerationAt = nil
finished.CurrentTurn = original.CurrentTurn + 1
h.turn.result = turngeneration.Result{
Record: finished,
Trigger: turngeneration.TriggerForce,
Finished: true,
Outcome: operation.OutcomeSuccess,
}
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
require.True(t, result.IsSuccess())
require.Len(t, h.runtime.scheds, 1, "skip must still be written even when finished")
assert.True(t, h.runtime.scheds[0].SkipNextTick)
assert.Nil(t, h.runtime.scheds[0].NextGenerationAt, "must propagate inner result's nil next-gen")
assert.Equal(t, finished.CurrentTurn, h.runtime.scheds[0].CurrentTurn)
}
func TestHandlePropagatesInnerFailure(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.turn.result = turngeneration.Result{
Trigger: turngeneration.TriggerForce,
Outcome: operation.OutcomeFailure,
ErrorCode: turngeneration.ErrorCodeEngineUnreachable,
ErrorMessage: "engine 503",
}
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, adminforce.ErrorCodeEngineUnreachable, result.ErrorCode)
assert.False(t, result.SkipScheduled)
assert.Empty(t, h.runtime.scheds, "scheduling must not run after failure")
// Driver entry recorded with the propagated error code.
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OpKindForceNextTurn, entry.OpKind)
assert.Equal(t, operation.OutcomeFailure, entry.Outcome)
assert.Equal(t, adminforce.ErrorCodeEngineUnreachable, entry.ErrorCode)
}
func TestHandlePropagatesRuntimeNotRunning(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.turn.result = turngeneration.Result{
Trigger: turngeneration.TriggerForce,
Outcome: operation.OutcomeFailure,
ErrorCode: turngeneration.ErrorCodeRuntimeNotRunning,
ErrorMessage: "runtime status is \"stopped\"",
}
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, adminforce.ErrorCodeRuntimeNotRunning, result.ErrorCode)
}
func TestHandleSchedulingFailureAfterTurn(t *testing.T) {
h := newHarness(t)
original := h.seedRunningRecord()
postTurn := original
postTurn.CurrentTurn = original.CurrentTurn + 1
h.turn.result = turngeneration.Result{
Record: postTurn,
Trigger: turngeneration.TriggerForce,
Outcome: operation.OutcomeSuccess,
}
h.runtime.schErr = errors.New("connection lost")
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, adminforce.ErrorCodeServiceUnavailable, result.ErrorCode)
assert.False(t, result.SkipScheduled)
// The driver entry records failure even though turn-generation
// committed successfully.
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OutcomeFailure, entry.Outcome)
assert.Equal(t, adminforce.ErrorCodeServiceUnavailable, entry.ErrorCode)
}
func TestHandleTurnGeneratorReturnsError(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.turn.err = errors.New("nil context")
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, adminforce.ErrorCodeInternal, result.ErrorCode)
assert.Empty(t, h.runtime.scheds)
}
func TestHandleInvalidRequest(t *testing.T) {
h := newHarness(t)
input := baseInput()
input.GameID = ""
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, adminforce.ErrorCodeInvalidRequest, result.ErrorCode)
assert.Empty(t, h.turn.calls, "turn generator must not be called on invalid input")
assert.Empty(t, h.logs.snapshot(), "audit entry skipped when game id missing")
}
func TestHandleNilContextReturnsError(t *testing.T) {
h := newHarness(t)
_, err := h.service.Handle(nil, baseInput()) //nolint:staticcheck // guard test
require.Error(t, err)
}
func TestHandleDefaultsOpSource(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
postTurn := runtime.RuntimeRecord{
GameID: "game-001",
Status: runtime.StatusRunning,
CurrentTurn: 7,
}
h.turn.result = turngeneration.Result{
Record: postTurn,
Trigger: turngeneration.TriggerForce,
Outcome: operation.OutcomeSuccess,
}
input := baseInput()
input.OpSource = ""
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
require.True(t, result.IsSuccess())
require.Len(t, h.turn.calls, 1)
assert.Equal(t, operation.OpSourceAdminRest, h.turn.calls[0].OpSource)
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OpSourceAdminRest, entry.OpSource)
}
@@ -0,0 +1,45 @@
package adminpatch
// Stable error codes returned in `Result.ErrorCode`. The values match
// the vocabulary frozen by `gamemaster/README.md §Error Model` and
// `gamemaster/api/internal-openapi.yaml`. Service-layer callers (Stage
// 19 handlers) import these names rather than redeclare them; renaming
// any of them is a contract change.
const (
// ErrorCodeInvalidRequest reports that the request envelope failed
// structural validation (empty GameID/Version, malformed semver).
ErrorCodeInvalidRequest = "invalid_request"
// ErrorCodeRuntimeNotFound reports that no runtime_records row
// exists for the requested game id.
ErrorCodeRuntimeNotFound = "runtime_not_found"
// ErrorCodeRuntimeNotRunning reports that the runtime is not in
// `running`. Patch is supported only for runtimes RTM can recreate
// in place.
ErrorCodeRuntimeNotRunning = "runtime_not_running"
// ErrorCodeEngineVersionNotFound reports that the requested target
// version is missing from the engine_versions registry, or that it
// is present but `status=deprecated`.
ErrorCodeEngineVersionNotFound = "engine_version_not_found"
// ErrorCodeSemverPatchOnly reports that the requested target
// version differs in major or minor from the current one. Patch
// upgrades are constrained to same-major.minor.
ErrorCodeSemverPatchOnly = "semver_patch_only"
// ErrorCodeConflict reports that the runtime's status changed
// concurrently between the lookup and the post-RTM image rotation
// CAS.
ErrorCodeConflict = "conflict"
// ErrorCodeServiceUnavailable reports that a steady-state
// dependency (PostgreSQL, Runtime Manager) was unreachable for
// this call.
ErrorCodeServiceUnavailable = "service_unavailable"
// ErrorCodeInternal reports an unexpected error not classified by
// the other codes.
ErrorCodeInternal = "internal_error"
)
@@ -0,0 +1,375 @@
// Package adminpatch implements the admin patch service-layer
// orchestrator owned by Game Master. It is driven by Admin Service or
// system administrators through
// `POST /api/v1/internal/runtimes/{game_id}/patch` and tells Runtime
// Manager to recreate the engine container with a new image, then
// rotates `runtime_records.current_image_ref` and
// `runtime_records.current_engine_version` while keeping the runtime in
// `running`.
//
// Lifecycle and failure-mode semantics follow `gamemaster/README.md
// §Lifecycles → Patch`. Design rationale (the dedicated UpdateImage
// port, rejection of deprecated targets, `service_unavailable` mapping
// for RTM failures) is captured in
// `gamemaster/docs/stage17-admin-operations.md`.
package adminpatch
import (
"context"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/gamemaster/internal/domain/engineversion"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/telemetry"
)
// Input stores the per-call arguments for one admin patch operation.
type Input struct {
// GameID identifies the runtime to patch.
GameID string
// Version stores the target engine version (semver). Must be
// present in `engine_versions` with `status=active` and a same
// major.minor as the runtime's current version.
Version string
// OpSource classifies how the request entered Game Master. Used to
// stamp `operation_log.op_source`. Defaults to `admin_rest` when
// missing or unrecognised.
OpSource operation.OpSource
// SourceRef stores the optional opaque per-source reference (REST
// request id, admin user id). Empty when the caller does not
// provide one.
SourceRef string
}
// Validate reports whether input carries the structural invariants the
// service requires before any store is touched.
func (input Input) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("game id must not be empty")
}
if _, err := engineversion.ParseSemver(input.Version); err != nil {
return fmt.Errorf("version: %w", err)
}
return nil
}
// Result stores the deterministic outcome of one Handle call. Business
// outcomes flow through Result; the Go-level error return is reserved
// for non-business failures (nil context, nil receiver).
type Result struct {
// Record carries the post-rotation runtime record. Populated on
// success; zero on early-rejection failures.
Record runtime.RuntimeRecord
// Outcome reports whether the operation completed (success) or
// produced a stable failure code.
Outcome operation.Outcome
// ErrorCode stores the stable error code on failure. Empty on
// success.
ErrorCode string
// ErrorMessage stores the operator-readable detail on failure.
// Empty on success.
ErrorMessage string
}
// IsSuccess reports whether the result represents a successful
// operation.
func (result Result) IsSuccess() bool {
return result.Outcome == operation.OutcomeSuccess
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// RuntimeRecords drives the row read plus the post-RTM image
// rotation under a CAS guard.
RuntimeRecords ports.RuntimeRecordStore
// EngineVersions resolves the target version's image ref and
// status.
EngineVersions ports.EngineVersionStore
// OperationLogs records the audit entry.
OperationLogs ports.OperationLogStore
// RTM drives the Runtime Manager patch call.
RTM ports.RTMClient
// Telemetry is required by the audit/log path. The Stage 17
// service does not introduce a dedicated counter; outcome metrics
// land under the future Admin Service surface.
Telemetry *telemetry.Runtime
// Logger records structured service-level events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for operation timestamps.
// Defaults to `time.Now` when nil.
Clock func() time.Time
}
// Service executes the admin patch lifecycle operation.
type Service struct {
runtimeRecords ports.RuntimeRecordStore
engineVersions ports.EngineVersionStore
operationLogs ports.OperationLogStore
rtm ports.RTMClient
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
switch {
case deps.RuntimeRecords == nil:
return nil, errors.New("new admin patch service: nil runtime records")
case deps.EngineVersions == nil:
return nil, errors.New("new admin patch service: nil engine versions")
case deps.OperationLogs == nil:
return nil, errors.New("new admin patch service: nil operation logs")
case deps.RTM == nil:
return nil, errors.New("new admin patch service: nil rtm client")
case deps.Telemetry == nil:
return nil, errors.New("new admin patch service: nil telemetry runtime")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.adminpatch")
return &Service{
runtimeRecords: deps.RuntimeRecords,
engineVersions: deps.EngineVersions,
operationLogs: deps.OperationLogs,
rtm: deps.RTM,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
}, nil
}
// Handle executes one admin patch operation end-to-end. The Go-level
// error return is reserved for non-business failures (nil context, nil
// receiver). Every business outcome flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
if service == nil {
return Result{}, errors.New("admin patch: nil service")
}
if ctx == nil {
return Result{}, errors.New("admin patch: nil context")
}
opStartedAt := service.clock().UTC()
if err := input.Validate(); err != nil {
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeInvalidRequest, err.Error()), nil
}
record, err := service.runtimeRecords.Get(ctx, input.GameID)
switch {
case errors.Is(err, runtime.ErrNotFound):
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotFound, "runtime record does not exist"), nil
case err != nil:
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get runtime record: %s", err.Error())), nil
}
if record.Status != runtime.StatusRunning {
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotRunning,
fmt.Sprintf("runtime status is %q, expected %q",
record.Status, runtime.StatusRunning)), nil
}
target, err := service.engineVersions.Get(ctx, input.Version)
switch {
case errors.Is(err, engineversion.ErrNotFound):
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeEngineVersionNotFound,
fmt.Sprintf("engine version %q not found", input.Version)), nil
case err != nil:
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get engine version: %s", err.Error())), nil
}
if target.Status != engineversion.StatusActive {
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeEngineVersionNotFound,
fmt.Sprintf("engine version %q is %q, expected %q",
input.Version, target.Status, engineversion.StatusActive)), nil
}
patchOK, semErr := engineversion.IsPatchUpgrade(record.CurrentEngineVersion, input.Version)
if semErr != nil {
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeInvalidRequest, fmt.Sprintf("compare semver: %s", semErr.Error())), nil
}
if !patchOK {
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeSemverPatchOnly,
fmt.Sprintf("target %q is not a same-major.minor patch of %q",
input.Version, record.CurrentEngineVersion)), nil
}
if err := service.rtm.Patch(ctx, input.GameID, target.ImageRef); err != nil {
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("rtm patch: %s", err.Error())), nil
}
rotatedAt := service.clock().UTC()
updateErr := service.runtimeRecords.UpdateImage(ctx, ports.UpdateImageInput{
GameID: input.GameID,
ExpectedStatus: runtime.StatusRunning,
CurrentImageRef: target.ImageRef,
CurrentEngineVersion: input.Version,
Now: rotatedAt,
})
switch {
case updateErr == nil:
case errors.Is(updateErr, runtime.ErrConflict):
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeConflict,
fmt.Sprintf("runtime status changed during patch: %s", updateErr.Error())), nil
case errors.Is(updateErr, runtime.ErrNotFound):
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotFound,
fmt.Sprintf("runtime record disappeared during patch: %s", updateErr.Error())), nil
default:
return service.recordFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("update runtime image: %s", updateErr.Error())), nil
}
persisted, reloadErr := service.runtimeRecords.Get(ctx, input.GameID)
if reloadErr != nil {
// The image rotation already committed; surface the success
// outcome with the in-memory projection so the caller still
// sees the new image_ref / engine_version.
service.logger.WarnContext(ctx, "reload runtime record after patch",
"game_id", input.GameID,
"err", reloadErr.Error(),
)
persisted = record
persisted.CurrentImageRef = target.ImageRef
persisted.CurrentEngineVersion = input.Version
persisted.UpdatedAt = rotatedAt
}
service.appendSuccessLog(ctx, opStartedAt, input)
logArgs := []any{
"game_id", input.GameID,
"new_image_ref", target.ImageRef,
"new_engine_version", input.Version,
"previous_engine_version", record.CurrentEngineVersion,
"op_source", string(fallbackOpSource(input.OpSource)),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "runtime patched", logArgs...)
return Result{
Record: persisted,
Outcome: operation.OutcomeSuccess,
}, nil
}
// recordFailure assembles the failure Result, appends the
// operation_log failure entry, and returns the structured outcome.
func (service *Service) recordFailure(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) Result {
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
logArgs := []any{
"game_id", input.GameID,
"target_version", input.Version,
"op_source", string(input.OpSource),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "admin patch rejected", logArgs...)
return Result{
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// appendSuccessLog records the success operation_log entry.
func (service *Service) appendSuccessLog(ctx context.Context, opStartedAt time.Time, input Input) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindPatch,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeSuccess,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// appendFailureLog records the failure operation_log entry. Skipped
// when the input game id is empty so the entry validator does not
// reject an audit row that adds no value.
func (service *Service) appendFailureLog(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) {
if strings.TrimSpace(input.GameID) == "" {
return
}
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindPatch,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// bestEffortAppend writes one operation_log entry. A failure is logged
// and discarded; the runtime row is the source of truth.
func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
if _, err := service.operationLogs.Append(ctx, entry); err != nil {
service.logger.ErrorContext(ctx, "append operation log",
"game_id", entry.GameID,
"op_kind", string(entry.OpKind),
"outcome", string(entry.Outcome),
"error_code", entry.ErrorCode,
"err", err.Error(),
)
}
}
// fallbackOpSource defaults to `admin_rest` when the caller did not
// supply a known op source. Mirrors `gamemaster/README.md §Trusted
// Surfaces`.
func fallbackOpSource(source operation.OpSource) operation.OpSource {
if source.IsKnown() {
return source
}
return operation.OpSourceAdminRest
}
@@ -0,0 +1,448 @@
package adminpatch_test
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/gamemaster/internal/adapters/mocks"
"galaxy/gamemaster/internal/domain/engineversion"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/adminpatch"
"galaxy/gamemaster/internal/telemetry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
)
// --- test doubles -----------------------------------------------------
type fakeRuntimeRecords struct {
mu sync.Mutex
stored map[string]runtime.RuntimeRecord
getErr error
imgErr error
images []ports.UpdateImageInput
}
func newFakeRuntimeRecords() *fakeRuntimeRecords {
return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}}
}
func (s *fakeRuntimeRecords) seed(record runtime.RuntimeRecord) {
s.mu.Lock()
defer s.mu.Unlock()
s.stored[record.GameID] = record
}
func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return runtime.RuntimeRecord{}, s.getErr
}
record, ok := s.stored[gameID]
if !ok {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
return record, nil
}
func (s *fakeRuntimeRecords) Insert(context.Context, runtime.RuntimeRecord) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateStatus(context.Context, ports.UpdateStatusInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateScheduling(context.Context, ports.UpdateSchedulingInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateImage(_ context.Context, input ports.UpdateImageInput) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.imgErr != nil {
s.images = append(s.images, input)
return s.imgErr
}
record, ok := s.stored[input.GameID]
if !ok {
s.images = append(s.images, input)
return runtime.ErrNotFound
}
if record.Status != input.ExpectedStatus {
s.images = append(s.images, input)
return runtime.ErrConflict
}
record.CurrentImageRef = input.CurrentImageRef
record.CurrentEngineVersion = input.CurrentEngineVersion
record.UpdatedAt = input.Now
s.stored[input.GameID] = record
s.images = append(s.images, input)
return nil
}
func (s *fakeRuntimeRecords) UpdateEngineHealth(context.Context, ports.UpdateEngineHealthInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) Delete(context.Context, string) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) ListDueRunning(context.Context, time.Time) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) ListByStatus(context.Context, runtime.Status) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) List(context.Context) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
type fakeEngineVersions struct {
mu sync.Mutex
versions map[string]engineversion.EngineVersion
getErr error
}
func newFakeEngineVersions() *fakeEngineVersions {
return &fakeEngineVersions{versions: map[string]engineversion.EngineVersion{}}
}
func (s *fakeEngineVersions) seed(record engineversion.EngineVersion) {
s.mu.Lock()
defer s.mu.Unlock()
s.versions[record.Version] = record
}
func (s *fakeEngineVersions) Get(_ context.Context, version string) (engineversion.EngineVersion, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return engineversion.EngineVersion{}, s.getErr
}
rec, ok := s.versions[version]
if !ok {
return engineversion.EngineVersion{}, engineversion.ErrNotFound
}
return rec, nil
}
func (s *fakeEngineVersions) List(context.Context, *engineversion.Status) ([]engineversion.EngineVersion, error) {
return nil, errors.New("not used")
}
func (s *fakeEngineVersions) Insert(context.Context, engineversion.EngineVersion) error {
return errors.New("not used")
}
func (s *fakeEngineVersions) Update(context.Context, ports.UpdateEngineVersionInput) error {
return errors.New("not used")
}
func (s *fakeEngineVersions) Deprecate(context.Context, string, time.Time) error {
return errors.New("not used")
}
func (s *fakeEngineVersions) Delete(context.Context, string) error {
return errors.New("not used")
}
func (s *fakeEngineVersions) IsReferencedByActiveRuntime(context.Context, string) (bool, error) {
return false, errors.New("not used")
}
type fakeOperationLogs struct {
mu sync.Mutex
entries []operation.OperationEntry
}
func (s *fakeOperationLogs) Append(_ context.Context, entry operation.OperationEntry) (int64, error) {
s.mu.Lock()
defer s.mu.Unlock()
if err := entry.Validate(); err != nil {
return 0, err
}
s.entries = append(s.entries, entry)
return int64(len(s.entries)), nil
}
func (s *fakeOperationLogs) ListByGame(context.Context, string, int) ([]operation.OperationEntry, error) {
return nil, errors.New("not used")
}
func (s *fakeOperationLogs) lastEntry() (operation.OperationEntry, bool) {
s.mu.Lock()
defer s.mu.Unlock()
if len(s.entries) == 0 {
return operation.OperationEntry{}, false
}
return s.entries[len(s.entries)-1], true
}
func (s *fakeOperationLogs) snapshot() []operation.OperationEntry {
s.mu.Lock()
defer s.mu.Unlock()
out := make([]operation.OperationEntry, len(s.entries))
copy(out, s.entries)
return out
}
// --- harness ----------------------------------------------------------
type harness struct {
t *testing.T
ctrl *gomock.Controller
runtime *fakeRuntimeRecords
versions *fakeEngineVersions
logs *fakeOperationLogs
rtm *mocks.MockRTMClient
telemetry *telemetry.Runtime
now time.Time
service *adminpatch.Service
}
func newHarness(t *testing.T) *harness {
t.Helper()
ctrl := gomock.NewController(t)
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
h := &harness{
t: t,
ctrl: ctrl,
runtime: newFakeRuntimeRecords(),
versions: newFakeEngineVersions(),
logs: &fakeOperationLogs{},
rtm: mocks.NewMockRTMClient(ctrl),
telemetry: telemetryRuntime,
now: time.Date(2026, time.May, 1, 12, 0, 0, 0, time.UTC),
}
service, err := adminpatch.NewService(adminpatch.Dependencies{
RuntimeRecords: h.runtime,
EngineVersions: h.versions,
OperationLogs: h.logs,
RTM: h.rtm,
Telemetry: h.telemetry,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.service = service
return h
}
func (h *harness) seedRunningOnVersion(version, image string) runtime.RuntimeRecord {
created := h.now.Add(-time.Hour)
started := h.now.Add(-30 * time.Minute)
next := h.now.Add(30 * time.Minute)
record := runtime.RuntimeRecord{
GameID: "game-001",
Status: runtime.StatusRunning,
EngineEndpoint: "http://galaxy-game-game-001:8080",
CurrentImageRef: image,
CurrentEngineVersion: version,
TurnSchedule: "0 18 * * *",
CurrentTurn: 7,
NextGenerationAt: &next,
EngineHealth: "healthy",
CreatedAt: created,
UpdatedAt: started,
StartedAt: &started,
}
h.runtime.seed(record)
return record
}
func (h *harness) seedTarget(version, image string, status engineversion.Status) {
h.versions.seed(engineversion.EngineVersion{
Version: version,
ImageRef: image,
Status: status,
CreatedAt: h.now.Add(-24 * time.Hour),
UpdatedAt: h.now.Add(-24 * time.Hour),
})
}
func baseInput(version string) adminpatch.Input {
return adminpatch.Input{
GameID: "game-001",
Version: version,
OpSource: operation.OpSourceAdminRest,
SourceRef: "req-patch-001",
}
}
// --- tests ------------------------------------------------------------
func TestNewServiceRejectsMissingDeps(t *testing.T) {
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
cases := []struct {
name string
mut func(*adminpatch.Dependencies)
}{
{"runtime records", func(d *adminpatch.Dependencies) { d.RuntimeRecords = nil }},
{"engine versions", func(d *adminpatch.Dependencies) { d.EngineVersions = nil }},
{"operation logs", func(d *adminpatch.Dependencies) { d.OperationLogs = nil }},
{"rtm", func(d *adminpatch.Dependencies) { d.RTM = nil }},
{"telemetry", func(d *adminpatch.Dependencies) { d.Telemetry = nil }},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
ctrl := gomock.NewController(t)
deps := adminpatch.Dependencies{
RuntimeRecords: newFakeRuntimeRecords(),
EngineVersions: newFakeEngineVersions(),
OperationLogs: &fakeOperationLogs{},
RTM: mocks.NewMockRTMClient(ctrl),
Telemetry: telemetryRuntime,
}
tc.mut(&deps)
service, err := adminpatch.NewService(deps)
require.Error(t, err)
require.Nil(t, service)
})
}
}
func TestHandleHappyPathRotatesImage(t *testing.T) {
h := newHarness(t)
h.seedRunningOnVersion("v1.2.3", "ghcr.io/galaxy/game:v1.2.3")
h.seedTarget("v1.2.4", "ghcr.io/galaxy/game:v1.2.4", engineversion.StatusActive)
h.rtm.EXPECT().Patch(gomock.Any(), "game-001", "ghcr.io/galaxy/game:v1.2.4").Return(nil)
result, err := h.service.Handle(context.Background(), baseInput("v1.2.4"))
require.NoError(t, err)
require.True(t, result.IsSuccess(), "want success, got %+v", result)
assert.Equal(t, "ghcr.io/galaxy/game:v1.2.4", result.Record.CurrentImageRef)
assert.Equal(t, "v1.2.4", result.Record.CurrentEngineVersion)
assert.Equal(t, runtime.StatusRunning, result.Record.Status)
require.Len(t, h.runtime.images, 1)
assert.Equal(t, runtime.StatusRunning, h.runtime.images[0].ExpectedStatus)
assert.Equal(t, "ghcr.io/galaxy/game:v1.2.4", h.runtime.images[0].CurrentImageRef)
assert.Equal(t, "v1.2.4", h.runtime.images[0].CurrentEngineVersion)
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OpKindPatch, entry.OpKind)
assert.Equal(t, operation.OutcomeSuccess, entry.Outcome)
}
func TestHandleRuntimeNotFound(t *testing.T) {
h := newHarness(t)
h.seedTarget("v1.2.4", "ghcr.io/galaxy/game:v1.2.4", engineversion.StatusActive)
result, err := h.service.Handle(context.Background(), baseInput("v1.2.4"))
require.NoError(t, err)
assert.Equal(t, adminpatch.ErrorCodeRuntimeNotFound, result.ErrorCode)
}
func TestHandleRuntimeNotRunning(t *testing.T) {
h := newHarness(t)
rec := h.seedRunningOnVersion("v1.2.3", "ghcr.io/galaxy/game:v1.2.3")
rec.Status = runtime.StatusStopped
h.runtime.seed(rec)
h.seedTarget("v1.2.4", "ghcr.io/galaxy/game:v1.2.4", engineversion.StatusActive)
result, err := h.service.Handle(context.Background(), baseInput("v1.2.4"))
require.NoError(t, err)
assert.Equal(t, adminpatch.ErrorCodeRuntimeNotRunning, result.ErrorCode)
assert.Empty(t, h.runtime.images, "no UpdateImage when status precondition fails")
}
func TestHandleEngineVersionMissing(t *testing.T) {
h := newHarness(t)
h.seedRunningOnVersion("v1.2.3", "ghcr.io/galaxy/game:v1.2.3")
result, err := h.service.Handle(context.Background(), baseInput("v1.2.4"))
require.NoError(t, err)
assert.Equal(t, adminpatch.ErrorCodeEngineVersionNotFound, result.ErrorCode)
}
func TestHandleEngineVersionDeprecated(t *testing.T) {
h := newHarness(t)
h.seedRunningOnVersion("v1.2.3", "ghcr.io/galaxy/game:v1.2.3")
h.seedTarget("v1.2.4", "ghcr.io/galaxy/game:v1.2.4", engineversion.StatusDeprecated)
result, err := h.service.Handle(context.Background(), baseInput("v1.2.4"))
require.NoError(t, err)
assert.Equal(t, adminpatch.ErrorCodeEngineVersionNotFound, result.ErrorCode)
assert.Contains(t, result.ErrorMessage, "deprecated")
}
func TestHandleSemverPatchOnlyMajor(t *testing.T) {
h := newHarness(t)
h.seedRunningOnVersion("v1.2.3", "ghcr.io/galaxy/game:v1.2.3")
h.seedTarget("v2.0.0", "ghcr.io/galaxy/game:v2.0.0", engineversion.StatusActive)
result, err := h.service.Handle(context.Background(), baseInput("v2.0.0"))
require.NoError(t, err)
assert.Equal(t, adminpatch.ErrorCodeSemverPatchOnly, result.ErrorCode)
assert.Empty(t, h.runtime.images)
}
func TestHandleSemverPatchOnlyMinor(t *testing.T) {
h := newHarness(t)
h.seedRunningOnVersion("v1.2.3", "ghcr.io/galaxy/game:v1.2.3")
h.seedTarget("v1.3.0", "ghcr.io/galaxy/game:v1.3.0", engineversion.StatusActive)
result, err := h.service.Handle(context.Background(), baseInput("v1.3.0"))
require.NoError(t, err)
assert.Equal(t, adminpatch.ErrorCodeSemverPatchOnly, result.ErrorCode)
}
func TestHandleRTMUnavailable(t *testing.T) {
h := newHarness(t)
h.seedRunningOnVersion("v1.2.3", "ghcr.io/galaxy/game:v1.2.3")
h.seedTarget("v1.2.4", "ghcr.io/galaxy/game:v1.2.4", engineversion.StatusActive)
h.rtm.EXPECT().Patch(gomock.Any(), "game-001", "ghcr.io/galaxy/game:v1.2.4").
Return(ports.ErrRTMUnavailable)
result, err := h.service.Handle(context.Background(), baseInput("v1.2.4"))
require.NoError(t, err)
assert.Equal(t, adminpatch.ErrorCodeServiceUnavailable, result.ErrorCode)
assert.Empty(t, h.runtime.images, "no UpdateImage when RTM fails")
}
func TestHandleCASLostAfterRTM(t *testing.T) {
h := newHarness(t)
h.seedRunningOnVersion("v1.2.3", "ghcr.io/galaxy/game:v1.2.3")
h.seedTarget("v1.2.4", "ghcr.io/galaxy/game:v1.2.4", engineversion.StatusActive)
h.rtm.EXPECT().Patch(gomock.Any(), "game-001", "ghcr.io/galaxy/game:v1.2.4").Return(nil)
h.runtime.imgErr = runtime.ErrConflict
result, err := h.service.Handle(context.Background(), baseInput("v1.2.4"))
require.NoError(t, err)
assert.Equal(t, adminpatch.ErrorCodeConflict, result.ErrorCode)
require.Len(t, h.runtime.images, 1)
}
func TestHandleInvalidRequest(t *testing.T) {
cases := []struct {
name string
input adminpatch.Input
}{
{"empty game id", adminpatch.Input{GameID: "", Version: "v1.2.4", OpSource: operation.OpSourceAdminRest}},
{"malformed version", adminpatch.Input{GameID: "game-001", Version: "not-a-semver", OpSource: operation.OpSourceAdminRest}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
result, err := h.service.Handle(context.Background(), tc.input)
require.NoError(t, err)
assert.Equal(t, adminpatch.ErrorCodeInvalidRequest, result.ErrorCode)
})
}
}
func TestHandleNilContextReturnsError(t *testing.T) {
h := newHarness(t)
_, err := h.service.Handle(nil, baseInput("v1.2.4")) //nolint:staticcheck // guard test
require.Error(t, err)
}
func TestHandleStoreReadFailure(t *testing.T) {
h := newHarness(t)
h.runtime.getErr = errors.New("connection refused")
result, err := h.service.Handle(context.Background(), baseInput("v1.2.4"))
require.NoError(t, err)
assert.Equal(t, adminpatch.ErrorCodeServiceUnavailable, result.ErrorCode)
}
@@ -0,0 +1,48 @@
package adminstop
// Stable error codes returned in `Result.ErrorCode`. The values match
// the vocabulary frozen by `gamemaster/README.md §Error Model` and
// `gamemaster/api/internal-openapi.yaml`. Service-layer callers (Stage
// 19 handlers) import these names rather than redeclare them; renaming
// any of them is a contract change.
const (
// ErrorCodeInvalidRequest reports that the request envelope failed
// structural validation (empty GameID, unknown stop reason).
ErrorCodeInvalidRequest = "invalid_request"
// ErrorCodeRuntimeNotFound reports that no runtime_records row
// exists for the requested game id.
ErrorCodeRuntimeNotFound = "runtime_not_found"
// ErrorCodeConflict reports that the runtime is in a status that
// cannot transition to `stopped` (currently only `starting`), or
// that a CAS guard mid-flow lost the race to a concurrent mutation.
ErrorCodeConflict = "conflict"
// ErrorCodeServiceUnavailable reports that a steady-state dependency
// (PostgreSQL, Runtime Manager) was unreachable for this call.
ErrorCodeServiceUnavailable = "service_unavailable"
// ErrorCodeInternal reports an unexpected error not classified by
// the other codes.
ErrorCodeInternal = "internal_error"
)
// Allowed values of Input.Reason mirror the README §Stop wording
// «reason ∈ {admin_request, finished, timeout}». Callers that pass an
// empty string get the documented default `admin_request`.
const (
// ReasonAdminRequest is the operator-driven stop reason and the
// default when Input.Reason is empty.
ReasonAdminRequest = "admin_request"
// ReasonFinished is reserved for callers that wrap a
// finish-detected stop (currently unused; documented for
// completeness).
ReasonFinished = "finished"
// ReasonTimeout is reserved for callers that wrap an automated
// timeout-driven stop (currently unused; documented for
// completeness).
ReasonTimeout = "timeout"
)
@@ -0,0 +1,396 @@
// Package adminstop implements the admin stop service-layer
// orchestrator owned by Game Master. It is driven by Admin Service or
// system administrators through
// `POST /api/v1/internal/runtimes/{game_id}/stop` and tells Runtime
// Manager to stop the game's container while transitioning the runtime
// record to `stopped`.
//
// Lifecycle and failure-mode semantics follow `gamemaster/README.md
// §Lifecycles → Stop`. The idempotent-on-terminal-status and
// conflict-on-starting rules are recorded in
// `gamemaster/docs/stage17-admin-operations.md`.
package adminstop
import (
"context"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/telemetry"
)
// Input stores the per-call arguments for one admin stop operation.
type Input struct {
// GameID identifies the runtime to stop.
GameID string
// Reason classifies the stop. Empty defaults to
// `admin_request`. Allowed values: `admin_request`, `finished`,
// `timeout`.
Reason string
// OpSource classifies how the request entered Game Master. Used to
// stamp `operation_log.op_source`. Defaults to `admin_rest` when
// missing or unrecognised.
OpSource operation.OpSource
// SourceRef stores the optional opaque per-source reference (REST
// request id, admin user id). Empty when the caller does not
// provide one.
SourceRef string
}
// Validate reports whether input carries the structural invariants the
// service requires before any store is touched.
func (input Input) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("game id must not be empty")
}
switch strings.TrimSpace(input.Reason) {
case "", ReasonAdminRequest, ReasonFinished, ReasonTimeout:
return nil
default:
return fmt.Errorf("reason %q is unsupported", input.Reason)
}
}
// Result stores the deterministic outcome of one Handle call. Business
// outcomes flow through Result; the Go-level error return is reserved
// for non-business failures (nil context, nil receiver).
type Result struct {
// Record carries the runtime record observed (and on success
// transitioned) by the operation. Populated on success and on the
// idempotent no-op branch; zero on early-rejection failures
// (invalid_request, runtime_not_found).
Record runtime.RuntimeRecord
// Outcome reports whether the operation completed (success) or
// produced a stable failure code.
Outcome operation.Outcome
// ErrorCode stores the stable error code on failure. Empty on
// success.
ErrorCode string
// ErrorMessage stores the operator-readable detail on failure.
// Empty on success.
ErrorMessage string
}
// IsSuccess reports whether the result represents a successful
// operation.
func (result Result) IsSuccess() bool {
return result.Outcome == operation.OutcomeSuccess
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// RuntimeRecords drives the read of the current row plus the CAS
// transition to `stopped`.
RuntimeRecords ports.RuntimeRecordStore
// OperationLogs records the audit entry for the operation.
OperationLogs ports.OperationLogStore
// RTM drives the Runtime Manager stop call.
RTM ports.RTMClient
// LobbyEvents publishes the post-success
// `runtime_snapshot_update` to `gm:lobby_events`.
LobbyEvents ports.LobbyEventsPublisher
// Telemetry is required by the lobby-events publication helper.
Telemetry *telemetry.Runtime
// Logger records structured service-level events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for operation timestamps.
// Defaults to `time.Now` when nil.
Clock func() time.Time
}
// Service executes the admin stop lifecycle operation.
type Service struct {
runtimeRecords ports.RuntimeRecordStore
operationLogs ports.OperationLogStore
rtm ports.RTMClient
lobbyEvents ports.LobbyEventsPublisher
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
switch {
case deps.RuntimeRecords == nil:
return nil, errors.New("new admin stop service: nil runtime records")
case deps.OperationLogs == nil:
return nil, errors.New("new admin stop service: nil operation logs")
case deps.RTM == nil:
return nil, errors.New("new admin stop service: nil rtm client")
case deps.LobbyEvents == nil:
return nil, errors.New("new admin stop service: nil lobby events publisher")
case deps.Telemetry == nil:
return nil, errors.New("new admin stop service: nil telemetry runtime")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.adminstop")
return &Service{
runtimeRecords: deps.RuntimeRecords,
operationLogs: deps.OperationLogs,
rtm: deps.RTM,
lobbyEvents: deps.LobbyEvents,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
}, nil
}
// Handle executes one admin stop operation end-to-end. The Go-level
// error return is reserved for non-business failures (nil context, nil
// receiver). Every business outcome flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
if service == nil {
return Result{}, errors.New("admin stop: nil service")
}
if ctx == nil {
return Result{}, errors.New("admin stop: nil context")
}
opStartedAt := service.clock().UTC()
if err := input.Validate(); err != nil {
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeInvalidRequest, err.Error()), nil
}
reason := strings.TrimSpace(input.Reason)
if reason == "" {
reason = ReasonAdminRequest
}
record, err := service.runtimeRecords.Get(ctx, input.GameID)
switch {
case errors.Is(err, runtime.ErrNotFound):
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotFound, "runtime record does not exist"), nil
case err != nil:
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get runtime record: %s", err.Error())), nil
}
switch record.Status {
case runtime.StatusStopped, runtime.StatusFinished:
return service.completeIdempotent(ctx, opStartedAt, input, record), nil
case runtime.StatusStarting:
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, record,
ErrorCodeConflict,
fmt.Sprintf("runtime status is %q; stop requires a started runtime", record.Status)), nil
}
if err := service.rtm.Stop(ctx, input.GameID, reason); err != nil {
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, record,
ErrorCodeServiceUnavailable, fmt.Sprintf("rtm stop: %s", err.Error())), nil
}
stoppedAt := service.clock().UTC()
casErr := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: input.GameID,
ExpectedFrom: record.Status,
To: runtime.StatusStopped,
Now: stoppedAt,
})
switch {
case casErr == nil:
case errors.Is(casErr, runtime.ErrConflict):
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, record,
ErrorCodeConflict,
fmt.Sprintf("cas runtime status to stopped: %s", casErr.Error())), nil
case errors.Is(casErr, runtime.ErrNotFound):
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, record,
ErrorCodeRuntimeNotFound,
fmt.Sprintf("cas runtime status to stopped: %s", casErr.Error())), nil
default:
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, record,
ErrorCodeServiceUnavailable,
fmt.Sprintf("cas runtime status to stopped: %s", casErr.Error())), nil
}
persisted, reloadErr := service.runtimeRecords.Get(ctx, input.GameID)
if reloadErr != nil {
// CAS already committed; surface the success outcome but log the
// degraded reload so operators know the response carries the
// pre-CAS record.
service.logger.WarnContext(ctx, "reload runtime record after stop",
"game_id", input.GameID,
"err", reloadErr.Error(),
)
persisted = record
persisted.Status = runtime.StatusStopped
persisted.UpdatedAt = stoppedAt
persisted.StoppedAt = &stoppedAt
}
service.publishSnapshot(ctx, persisted, stoppedAt)
service.appendSuccessLog(ctx, opStartedAt, input)
logArgs := []any{
"game_id", input.GameID,
"reason", reason,
"from_status", string(record.Status),
"op_source", string(fallbackOpSource(input.OpSource)),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "runtime stopped", logArgs...)
return Result{
Record: persisted,
Outcome: operation.OutcomeSuccess,
}, nil
}
// completeIdempotent records the no-op success path used when the
// runtime is already terminal (stopped or finished). RTM is not
// invoked, no snapshot is published, but the audit row is written so
// operators can confirm the call landed.
func (service *Service) completeIdempotent(ctx context.Context, opStartedAt time.Time, input Input, record runtime.RuntimeRecord) Result {
service.appendSuccessLog(ctx, opStartedAt, input)
logArgs := []any{
"game_id", input.GameID,
"observed_status", string(record.Status),
"op_source", string(fallbackOpSource(input.OpSource)),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "runtime stop already terminal", logArgs...)
return Result{
Record: record,
Outcome: operation.OutcomeSuccess,
}
}
// recordEarlyFailure records a failure that occurred before the runtime
// row was read or in the validation phase.
func (service *Service) recordEarlyFailure(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) Result {
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, runtime.RuntimeRecord{}, errorCode, errorMessage)
}
// recordEarlyFailureWithRecord records a failure and propagates the
// observed runtime record (when available) to the caller.
func (service *Service) recordEarlyFailureWithRecord(ctx context.Context, opStartedAt time.Time, input Input, record runtime.RuntimeRecord, errorCode string, errorMessage string) Result {
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
logArgs := []any{
"game_id", input.GameID,
"op_source", string(input.OpSource),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "admin stop rejected", logArgs...)
return Result{
Record: record,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// publishSnapshot publishes the post-success
// `runtime_snapshot_update` per `gamemaster/README.md §Lifecycles →
// Stop` step 4. Failure is logged but never rolls back the just-applied
// CAS; the snapshot stream is best-effort by contract.
func (service *Service) publishSnapshot(ctx context.Context, record runtime.RuntimeRecord, occurredAt time.Time) {
msg := ports.RuntimeSnapshotUpdate{
GameID: record.GameID,
CurrentTurn: record.CurrentTurn,
RuntimeStatus: record.Status,
EngineHealthSummary: record.EngineHealth,
PlayerTurnStats: nil,
OccurredAt: occurredAt,
}
if err := service.lobbyEvents.PublishSnapshotUpdate(ctx, msg); err != nil {
service.logger.ErrorContext(ctx, "publish runtime snapshot update",
"game_id", record.GameID,
"err", err.Error(),
)
return
}
service.telemetry.RecordLobbyEventPublished(ctx, "runtime_snapshot_update")
}
// appendSuccessLog records the success operation_log entry.
func (service *Service) appendSuccessLog(ctx context.Context, opStartedAt time.Time, input Input) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindStop,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeSuccess,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// appendFailureLog records the failure operation_log entry.
func (service *Service) appendFailureLog(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindStop,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// bestEffortAppend writes one operation_log entry. A failure is logged
// and discarded; the runtime row is the source of truth.
func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
if _, err := service.operationLogs.Append(ctx, entry); err != nil {
service.logger.ErrorContext(ctx, "append operation log",
"game_id", entry.GameID,
"op_kind", string(entry.OpKind),
"outcome", string(entry.Outcome),
"error_code", entry.ErrorCode,
"err", err.Error(),
)
}
}
// fallbackOpSource defaults to `admin_rest` when the caller did not
// supply a known op source. Mirrors `gamemaster/README.md §Trusted
// Surfaces`.
func fallbackOpSource(source operation.OpSource) operation.OpSource {
if source.IsKnown() {
return source
}
return operation.OpSourceAdminRest
}
@@ -0,0 +1,459 @@
package adminstop_test
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/gamemaster/internal/adapters/mocks"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/adminstop"
"galaxy/gamemaster/internal/telemetry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
)
// --- test doubles -----------------------------------------------------
type fakeRuntimeRecords struct {
mu sync.Mutex
stored map[string]runtime.RuntimeRecord
getErr error
updErr error
updates []ports.UpdateStatusInput
}
func newFakeRuntimeRecords() *fakeRuntimeRecords {
return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}}
}
func (s *fakeRuntimeRecords) seed(record runtime.RuntimeRecord) {
s.mu.Lock()
defer s.mu.Unlock()
s.stored[record.GameID] = record
}
func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return runtime.RuntimeRecord{}, s.getErr
}
record, ok := s.stored[gameID]
if !ok {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
return record, nil
}
func (s *fakeRuntimeRecords) Insert(context.Context, runtime.RuntimeRecord) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateStatus(_ context.Context, input ports.UpdateStatusInput) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.updErr != nil {
return s.updErr
}
record, ok := s.stored[input.GameID]
if !ok {
return runtime.ErrNotFound
}
if record.Status != input.ExpectedFrom {
return runtime.ErrConflict
}
record.Status = input.To
record.UpdatedAt = input.Now
if input.To == runtime.StatusStopped {
stopped := input.Now
record.StoppedAt = &stopped
}
s.stored[input.GameID] = record
s.updates = append(s.updates, input)
return nil
}
func (s *fakeRuntimeRecords) UpdateScheduling(context.Context, ports.UpdateSchedulingInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateImage(context.Context, ports.UpdateImageInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateEngineHealth(context.Context, ports.UpdateEngineHealthInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) Delete(context.Context, string) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) ListDueRunning(context.Context, time.Time) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) ListByStatus(context.Context, runtime.Status) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) List(context.Context) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) updateCount() int {
s.mu.Lock()
defer s.mu.Unlock()
return len(s.updates)
}
type fakeOperationLogs struct {
mu sync.Mutex
entries []operation.OperationEntry
appErr error
}
func (s *fakeOperationLogs) Append(_ context.Context, entry operation.OperationEntry) (int64, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.appErr != nil {
return 0, s.appErr
}
if err := entry.Validate(); err != nil {
return 0, err
}
s.entries = append(s.entries, entry)
return int64(len(s.entries)), nil
}
func (s *fakeOperationLogs) ListByGame(context.Context, string, int) ([]operation.OperationEntry, error) {
return nil, errors.New("not used")
}
func (s *fakeOperationLogs) lastEntry() (operation.OperationEntry, bool) {
s.mu.Lock()
defer s.mu.Unlock()
if len(s.entries) == 0 {
return operation.OperationEntry{}, false
}
return s.entries[len(s.entries)-1], true
}
func (s *fakeOperationLogs) snapshot() []operation.OperationEntry {
s.mu.Lock()
defer s.mu.Unlock()
out := make([]operation.OperationEntry, len(s.entries))
copy(out, s.entries)
return out
}
// --- harness ----------------------------------------------------------
type harness struct {
t *testing.T
ctrl *gomock.Controller
runtime *fakeRuntimeRecords
logs *fakeOperationLogs
rtm *mocks.MockRTMClient
lobby *mocks.MockLobbyEventsPublisher
telemetry *telemetry.Runtime
now time.Time
service *adminstop.Service
}
func newHarness(t *testing.T) *harness {
t.Helper()
ctrl := gomock.NewController(t)
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
h := &harness{
t: t,
ctrl: ctrl,
runtime: newFakeRuntimeRecords(),
logs: &fakeOperationLogs{},
rtm: mocks.NewMockRTMClient(ctrl),
lobby: mocks.NewMockLobbyEventsPublisher(ctrl),
telemetry: telemetryRuntime,
now: time.Date(2026, time.May, 1, 12, 0, 0, 0, time.UTC),
}
service, err := adminstop.NewService(adminstop.Dependencies{
RuntimeRecords: h.runtime,
OperationLogs: h.logs,
RTM: h.rtm,
LobbyEvents: h.lobby,
Telemetry: h.telemetry,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.service = service
return h
}
func (h *harness) seedRecord(status runtime.Status) runtime.RuntimeRecord {
created := h.now.Add(-time.Hour)
started := h.now.Add(-30 * time.Minute)
next := h.now.Add(30 * time.Minute)
record := runtime.RuntimeRecord{
GameID: "game-001",
Status: status,
EngineEndpoint: "http://galaxy-game-game-001:8080",
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: "0 18 * * *",
CurrentTurn: 7,
NextGenerationAt: &next,
EngineHealth: "healthy",
CreatedAt: created,
UpdatedAt: started,
StartedAt: &started,
}
h.runtime.seed(record)
return record
}
func baseInput() adminstop.Input {
return adminstop.Input{
GameID: "game-001",
Reason: adminstop.ReasonAdminRequest,
OpSource: operation.OpSourceAdminRest,
SourceRef: "req-stop-001",
}
}
// --- tests ------------------------------------------------------------
func TestNewServiceRejectsMissingDeps(t *testing.T) {
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
cases := []struct {
name string
mut func(*adminstop.Dependencies)
}{
{"runtime records", func(d *adminstop.Dependencies) { d.RuntimeRecords = nil }},
{"operation logs", func(d *adminstop.Dependencies) { d.OperationLogs = nil }},
{"rtm", func(d *adminstop.Dependencies) { d.RTM = nil }},
{"lobby events", func(d *adminstop.Dependencies) { d.LobbyEvents = nil }},
{"telemetry", func(d *adminstop.Dependencies) { d.Telemetry = nil }},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
ctrl := gomock.NewController(t)
deps := adminstop.Dependencies{
RuntimeRecords: newFakeRuntimeRecords(),
OperationLogs: &fakeOperationLogs{},
RTM: mocks.NewMockRTMClient(ctrl),
LobbyEvents: mocks.NewMockLobbyEventsPublisher(ctrl),
Telemetry: telemetryRuntime,
}
tc.mut(&deps)
service, err := adminstop.NewService(deps)
require.Error(t, err)
require.Nil(t, service)
})
}
}
func TestHandleHappyPath(t *testing.T) {
h := newHarness(t)
original := h.seedRecord(runtime.StatusRunning)
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).Return(nil)
h.lobby.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.AssignableToTypeOf(ports.RuntimeSnapshotUpdate{})).
DoAndReturn(func(_ context.Context, msg ports.RuntimeSnapshotUpdate) error {
assert.Equal(t, "game-001", msg.GameID)
assert.Equal(t, runtime.StatusStopped, msg.RuntimeStatus)
assert.Equal(t, original.CurrentTurn, msg.CurrentTurn)
assert.Equal(t, original.EngineHealth, msg.EngineHealthSummary)
assert.Empty(t, msg.PlayerTurnStats)
assert.True(t, msg.OccurredAt.Equal(h.now))
return nil
})
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
require.True(t, result.IsSuccess(), "want success, got %+v", result)
assert.Equal(t, runtime.StatusStopped, result.Record.Status)
assert.Equal(t, 1, h.runtime.updateCount(), "exactly one CAS call expected")
entry, ok := h.logs.lastEntry()
require.True(t, ok, "operation log entry must be appended")
assert.Equal(t, operation.OpKindStop, entry.OpKind)
assert.Equal(t, operation.OpSourceAdminRest, entry.OpSource)
assert.Equal(t, operation.OutcomeSuccess, entry.Outcome)
assert.Empty(t, entry.ErrorCode)
}
func TestHandleHappyPathFromGenerationFailed(t *testing.T) {
h := newHarness(t)
h.seedRecord(runtime.StatusGenerationFailed)
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).Return(nil)
h.lobby.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).Return(nil)
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
require.True(t, result.IsSuccess())
assert.Equal(t, runtime.StatusStopped, result.Record.Status)
require.Len(t, h.runtime.updates, 1)
assert.Equal(t, runtime.StatusGenerationFailed, h.runtime.updates[0].ExpectedFrom)
}
func TestHandleEmptyReasonDefaultsToAdminRequest(t *testing.T) {
h := newHarness(t)
h.seedRecord(runtime.StatusRunning)
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).Return(nil)
h.lobby.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).Return(nil)
input := baseInput()
input.Reason = ""
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
require.True(t, result.IsSuccess())
}
func TestHandleIdempotentOnAlreadyStopped(t *testing.T) {
h := newHarness(t)
original := h.seedRecord(runtime.StatusStopped)
// No RTM call, no snapshot publication expected.
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
require.True(t, result.IsSuccess())
assert.Equal(t, runtime.StatusStopped, result.Record.Status)
assert.Equal(t, original.UpdatedAt, result.Record.UpdatedAt, "no mutation expected")
assert.Zero(t, h.runtime.updateCount(), "no CAS expected on idempotent path")
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OutcomeSuccess, entry.Outcome)
}
func TestHandleIdempotentOnFinished(t *testing.T) {
h := newHarness(t)
h.seedRecord(runtime.StatusFinished)
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
require.True(t, result.IsSuccess())
assert.Equal(t, runtime.StatusFinished, result.Record.Status)
}
func TestHandleConflictOnStarting(t *testing.T) {
h := newHarness(t)
h.seedRecord(runtime.StatusStarting)
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, adminstop.ErrorCodeConflict, result.ErrorCode)
assert.Zero(t, h.runtime.updateCount())
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OutcomeFailure, entry.Outcome)
assert.Equal(t, adminstop.ErrorCodeConflict, entry.ErrorCode)
}
func TestHandleRuntimeNotFound(t *testing.T) {
h := newHarness(t)
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, adminstop.ErrorCodeRuntimeNotFound, result.ErrorCode)
}
func TestHandleRTMUnavailable(t *testing.T) {
h := newHarness(t)
h.seedRecord(runtime.StatusRunning)
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).
Return(ports.ErrRTMUnavailable)
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, adminstop.ErrorCodeServiceUnavailable, result.ErrorCode)
assert.Zero(t, h.runtime.updateCount(), "CAS must not run after RTM failure")
}
func TestHandleCASLostRace(t *testing.T) {
h := newHarness(t)
h.seedRecord(runtime.StatusRunning)
// RTM stop succeeds, but a concurrent mutation flipped the row out
// of `running` before our CAS lands.
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).Return(nil)
h.runtime.updErr = runtime.ErrConflict
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, adminstop.ErrorCodeConflict, result.ErrorCode)
}
func TestHandleStoreReadFailure(t *testing.T) {
h := newHarness(t)
h.runtime.getErr = errors.New("connection refused")
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, adminstop.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleInvalidRequest(t *testing.T) {
cases := []struct {
name string
mut func(*adminstop.Input)
}{
{"empty game id", func(in *adminstop.Input) { in.GameID = "" }},
{"unknown reason", func(in *adminstop.Input) { in.Reason = "panic" }},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
input := baseInput()
tc.mut(&input)
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, adminstop.ErrorCodeInvalidRequest, result.ErrorCode)
// Audit log uses the validated game id; for the empty-id
// case it would fail entry validation, so we only assert
// when game id is present.
if input.GameID != "" {
_, ok := h.logs.lastEntry()
assert.True(t, ok)
}
})
}
}
func TestHandleNilContextReturnsError(t *testing.T) {
h := newHarness(t)
_, err := h.service.Handle(nil, baseInput()) //nolint:staticcheck // intentional nil for guard test
require.Error(t, err)
}
func TestHandleSnapshotPublishFailureSurfacesSuccess(t *testing.T) {
h := newHarness(t)
h.seedRecord(runtime.StatusRunning)
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).Return(nil)
h.lobby.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).
Return(errors.New("redis down"))
result, err := h.service.Handle(context.Background(), baseInput())
require.NoError(t, err)
require.True(t, result.IsSuccess(), "snapshot publication is best-effort")
assert.Equal(t, runtime.StatusStopped, result.Record.Status)
}
@@ -0,0 +1,51 @@
package commandexecute
// Stable error codes returned in `Result.ErrorCode`. The values match the
// vocabulary frozen by `gamemaster/README.md §Error Model` and
// `gamemaster/api/internal-openapi.yaml`. Stage 19's REST handler imports
// these names rather than redeclare them; renaming any of them is a
// contract change.
const (
// ErrorCodeInvalidRequest reports that the request envelope failed
// structural validation (empty required field, malformed payload,
// non-object payload, payload missing the `commands` array).
ErrorCodeInvalidRequest = "invalid_request"
// ErrorCodeRuntimeNotFound reports that no `runtime_records` row
// exists for the requested game id.
ErrorCodeRuntimeNotFound = "runtime_not_found"
// ErrorCodeRuntimeNotRunning reports that the runtime exists but its
// current status is not `running`. Hot-path commands are rejected
// outside the running state to avoid racing with admin transitions
// and turn generation.
ErrorCodeRuntimeNotRunning = "runtime_not_running"
// ErrorCodeForbidden reports that the caller is not an active member
// of the game, or that the (game_id, user_id) pair lacks a player
// mapping. Either way the caller is not authorised to act.
ErrorCodeForbidden = "forbidden"
// ErrorCodeEngineUnreachable reports that the engine /api/v1/command
// call returned a 5xx status, timed out, or could not be dispatched.
ErrorCodeEngineUnreachable = "engine_unreachable"
// ErrorCodeEngineValidationError reports that the engine returned
// 4xx with a per-command result. The body is forwarded verbatim
// through `Result.RawResponse` so the gateway can surface the
// per-command error vocabulary.
ErrorCodeEngineValidationError = "engine_validation_error"
// ErrorCodeEngineProtocolViolation reports that the engine response
// did not match the expected schema (malformed JSON, unexpected
// types). Stage 19 maps this to 502.
ErrorCodeEngineProtocolViolation = "engine_protocol_violation"
// ErrorCodeServiceUnavailable reports that a steady-state dependency
// (PostgreSQL, Lobby) was unreachable for this call.
ErrorCodeServiceUnavailable = "service_unavailable"
// ErrorCodeInternal reports an unexpected error not classified by
// the other codes.
ErrorCodeInternal = "internal_error"
)
@@ -0,0 +1,367 @@
// Package commandexecute implements the player-command hot-path service
// owned by Game Master. It accepts a verified `(game_id, user_id, payload)`
// envelope from Edge Gateway, authorises the caller against the membership
// cache, resolves `actor=race_name` from `player_mappings`, reshapes the
// payload to the engine `CommandRequest{actor, cmd}` schema, and forwards
// the call to the engine `/api/v1/command` endpoint.
//
// Lifecycle and error semantics follow `gamemaster/README.md §Hot Path →
// Player commands and orders`. Design rationale is captured in
// `gamemaster/docs/stage16-membership-cache-and-invalidation.md`.
package commandexecute
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/membership"
"galaxy/gamemaster/internal/telemetry"
)
const (
engineCallOp = "command"
membershipStatusActive = "active"
payloadCommandsKey = "commands"
payloadCmdKey = "cmd"
payloadActorKey = "actor"
)
// Input stores the per-call arguments for one command-execute operation.
// The shape mirrors `ExecuteCommandsRequest` from
// `gamemaster/api/internal-openapi.yaml` plus the verified user identity
// captured from the `X-User-ID` header by the Stage 19 handler.
type Input struct {
// GameID identifies the platform game the command targets.
GameID string
// UserID identifies the platform user submitting the command. The
// service derives `actor=race_name` from this value via
// `player_mappings`.
UserID string
// Payload stores the raw `ExecuteCommandsRequest` body. The service
// rewrites it to the engine `CommandRequest{actor, cmd}` shape
// before forwarding.
Payload json.RawMessage
}
// Validate reports whether input carries the structural invariants the
// service requires before any store is touched.
func (input Input) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("game id must not be empty")
}
if strings.TrimSpace(input.UserID) == "" {
return fmt.Errorf("user id must not be empty")
}
if len(input.Payload) == 0 {
return fmt.Errorf("payload must not be empty")
}
return nil
}
// Result stores the deterministic outcome of one Handle call.
type Result struct {
// Outcome reports whether the operation completed (success) or
// produced a stable failure code.
Outcome operation.Outcome
// ErrorCode stores the stable error code on failure. Empty on
// success.
ErrorCode string
// ErrorMessage stores the operator-readable detail on failure.
// Empty on success.
ErrorMessage string
// RawResponse stores the engine response body. Populated on success
// and on `engine_validation_error` (where the engine 4xx body
// carries the per-command result vocabulary the gateway forwards).
// Empty on every other terminal branch.
RawResponse json.RawMessage
}
// IsSuccess reports whether the result represents a successful operation.
func (result Result) IsSuccess() bool {
return result.Outcome == operation.OutcomeSuccess
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// RuntimeRecords loads the engine endpoint and the runtime status.
RuntimeRecords ports.RuntimeRecordStore
// PlayerMappings resolves `(game_id, user_id) → race_name`.
PlayerMappings ports.PlayerMappingStore
// Membership authorises the caller. Hot-path services share one
// cache instance with `orderput` and `reportget`.
Membership *membership.Cache
// Engine forwards the reshaped payload to `/api/v1/command`.
Engine ports.EngineClient
// Telemetry records the per-outcome counter and the engine-call
// latency histogram.
Telemetry *telemetry.Runtime
// Logger records structured service-level events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for engine-call latency.
// Defaults to `time.Now` when nil.
Clock func() time.Time
}
// Service executes the command-execute hot-path operation.
type Service struct {
runtimeRecords ports.RuntimeRecordStore
playerMappings ports.PlayerMappingStore
membership *membership.Cache
engine ports.EngineClient
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
switch {
case deps.RuntimeRecords == nil:
return nil, errors.New("new command execute service: nil runtime records")
case deps.PlayerMappings == nil:
return nil, errors.New("new command execute service: nil player mappings")
case deps.Membership == nil:
return nil, errors.New("new command execute service: nil membership cache")
case deps.Engine == nil:
return nil, errors.New("new command execute service: nil engine client")
case deps.Telemetry == nil:
return nil, errors.New("new command execute service: nil telemetry runtime")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.commandexecute")
return &Service{
runtimeRecords: deps.RuntimeRecords,
playerMappings: deps.PlayerMappings,
membership: deps.Membership,
engine: deps.Engine,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
}, nil
}
// Handle executes one command-execute operation end-to-end. The Go-level
// error return is reserved for non-business failures (nil context, nil
// receiver). Every business outcome flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
if service == nil {
return Result{}, errors.New("command execute: nil service")
}
if ctx == nil {
return Result{}, errors.New("command execute: nil context")
}
if err := input.Validate(); err != nil {
return service.recordFailure(ctx, input, ErrorCodeInvalidRequest, err.Error(), nil), nil
}
record, result, ok := service.loadRecord(ctx, input)
if !ok {
return result, nil
}
if record.Status != runtime.StatusRunning {
message := fmt.Sprintf("runtime status is %q, expected %q", record.Status, runtime.StatusRunning)
return service.recordFailure(ctx, input, ErrorCodeRuntimeNotRunning, message, nil), nil
}
mapping, result, ok := service.authorise(ctx, input)
if !ok {
return result, nil
}
payload, err := rewriteCommandPayload(input.Payload, mapping.RaceName)
if err != nil {
return service.recordFailure(ctx, input, ErrorCodeInvalidRequest, err.Error(), nil), nil
}
body, engineErr := service.callEngine(ctx, record.EngineEndpoint, payload)
if engineErr != nil {
errorCode := classifyEngineError(engineErr)
message := fmt.Sprintf("engine command: %s", engineErr.Error())
var bodyForCaller json.RawMessage
if errorCode == ErrorCodeEngineValidationError {
bodyForCaller = body
}
return service.recordFailure(ctx, input, errorCode, message, bodyForCaller), nil
}
service.telemetry.RecordCommandExecuteOutcome(ctx,
string(operation.OutcomeSuccess), "")
logArgs := []any{
"game_id", input.GameID,
"user_id", input.UserID,
"actor", mapping.RaceName,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "command execute succeeded", logArgs...)
return Result{
Outcome: operation.OutcomeSuccess,
RawResponse: body,
}, nil
}
// loadRecord reads the runtime record and maps store errors to
// orchestrator outcomes. ok=false means the flow stops with the returned
// Result.
func (service *Service) loadRecord(ctx context.Context, input Input) (runtime.RuntimeRecord, Result, bool) {
record, err := service.runtimeRecords.Get(ctx, input.GameID)
switch {
case err == nil:
return record, Result{}, true
case errors.Is(err, runtime.ErrNotFound):
return runtime.RuntimeRecord{}, service.recordFailure(ctx, input,
ErrorCodeRuntimeNotFound, "runtime record does not exist", nil), false
default:
return runtime.RuntimeRecord{}, service.recordFailure(ctx, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get runtime record: %s", err.Error()), nil), false
}
}
// authorise resolves the membership status and the player mapping for the
// caller. ok=false means the flow stops with the returned Result.
func (service *Service) authorise(ctx context.Context, input Input) (playermapping.PlayerMapping, Result, bool) {
status, err := service.membership.Resolve(ctx, input.GameID, input.UserID)
if err != nil {
if errors.Is(err, membership.ErrLobbyUnavailable) {
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("resolve membership: %s", err.Error()), nil), false
}
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("resolve membership: %s", err.Error()), nil), false
}
if status != membershipStatusActive {
message := fmt.Sprintf("membership status %q does not authorise commands", status)
if status == "" {
message = "user is not a member of the game"
}
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeForbidden, message, nil), false
}
mapping, err := service.playerMappings.Get(ctx, input.GameID, input.UserID)
switch {
case err == nil:
return mapping, Result{}, true
case errors.Is(err, playermapping.ErrNotFound):
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeForbidden, "player mapping not installed for active member", nil), false
default:
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get player mapping: %s", err.Error()), nil), false
}
}
// callEngine forwards the reshaped payload to the engine and records the
// wall-clock latency under the `command` op label.
func (service *Service) callEngine(ctx context.Context, baseURL string, payload json.RawMessage) (json.RawMessage, error) {
start := service.clock()
body, err := service.engine.ExecuteCommands(ctx, baseURL, payload)
service.telemetry.RecordEngineCall(ctx, engineCallOp, service.clock().Sub(start))
return body, err
}
// classifyEngineError maps the engine port sentinels to the
// command-execute stable error codes.
func classifyEngineError(err error) string {
switch {
case errors.Is(err, ports.ErrEngineValidation):
return ErrorCodeEngineValidationError
case errors.Is(err, ports.ErrEngineProtocolViolation):
return ErrorCodeEngineProtocolViolation
case errors.Is(err, ports.ErrEngineUnreachable):
return ErrorCodeEngineUnreachable
default:
return ErrorCodeEngineUnreachable
}
}
// recordFailure emits the service-level outcome counter and a structured
// log entry, then returns the Result the caller surfaces. The caller is
// responsible for the runtime-side mutation (none for hot-path).
func (service *Service) recordFailure(ctx context.Context, input Input, errorCode, errorMessage string, rawResponse json.RawMessage) Result {
service.telemetry.RecordCommandExecuteOutcome(ctx,
string(operation.OutcomeFailure), errorCode)
logArgs := []any{
"game_id", input.GameID,
"user_id", input.UserID,
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "command execute rejected", logArgs...)
return Result{
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
RawResponse: rawResponse,
}
}
// rewriteCommandPayload reshapes the GM `ExecuteCommandsRequest` body
// (`{commands:[…]}`) to the engine `CommandRequest` body
// (`{actor:<raceName>, cmd:[…]}`). Every other top-level key is
// discarded; GM never trusts caller-supplied envelope fields per the
// README §Hot Path rule. Returns an error when the payload is not a JSON
// object or the `commands` field is missing or not an array.
func rewriteCommandPayload(payload json.RawMessage, raceName string) (json.RawMessage, error) {
var fields map[string]json.RawMessage
if err := json.Unmarshal(payload, &fields); err != nil {
return nil, fmt.Errorf("payload must decode as a JSON object: %w", err)
}
commands, ok := fields[payloadCommandsKey]
if !ok {
return nil, fmt.Errorf("payload missing required %q field", payloadCommandsKey)
}
var commandList []json.RawMessage
if err := json.Unmarshal(commands, &commandList); err != nil {
return nil, fmt.Errorf("payload %q field must decode as an array: %w", payloadCommandsKey, err)
}
actor, err := json.Marshal(raceName)
if err != nil {
return nil, fmt.Errorf("marshal actor: %w", err)
}
out := map[string]json.RawMessage{
payloadActorKey: actor,
payloadCmdKey: commands,
}
encoded, err := json.Marshal(out)
if err != nil {
return nil, fmt.Errorf("marshal engine payload: %w", err)
}
_ = commandList // ensure the array shape is validated before forwarding
return encoded, nil
}
@@ -0,0 +1,614 @@
package commandexecute_test
import (
"context"
"encoding/json"
"errors"
"fmt"
"sync"
"testing"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/commandexecute"
"galaxy/gamemaster/internal/service/membership"
"galaxy/gamemaster/internal/telemetry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// --- fakes ------------------------------------------------------------
type fakeRuntimeRecords struct {
mu sync.Mutex
stored map[string]runtime.RuntimeRecord
getErr error
}
func newFakeRuntimeRecords() *fakeRuntimeRecords {
return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}}
}
func (s *fakeRuntimeRecords) seed(record runtime.RuntimeRecord) {
s.mu.Lock()
defer s.mu.Unlock()
s.stored[record.GameID] = record
}
func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return runtime.RuntimeRecord{}, s.getErr
}
record, ok := s.stored[gameID]
if !ok {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
return record, nil
}
func (s *fakeRuntimeRecords) Insert(context.Context, runtime.RuntimeRecord) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateStatus(context.Context, ports.UpdateStatusInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateScheduling(context.Context, ports.UpdateSchedulingInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) ListDueRunning(context.Context, time.Time) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) ListByStatus(context.Context, runtime.Status) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) List(context.Context) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) CountByStatus(context.Context) (map[string]int, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) CountDue(context.Context) (int, error) {
return 0, errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateImage(context.Context, ports.UpdateImageInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateEngineHealth(context.Context, ports.UpdateEngineHealthInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) Delete(context.Context, string) error {
return errors.New("not used")
}
type fakePlayerMappings struct {
mu sync.Mutex
stored map[string]map[string]playermapping.PlayerMapping
getErr error
}
func newFakePlayerMappings() *fakePlayerMappings {
return &fakePlayerMappings{stored: map[string]map[string]playermapping.PlayerMapping{}}
}
func (s *fakePlayerMappings) seed(record playermapping.PlayerMapping) {
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.stored[record.GameID]; !ok {
s.stored[record.GameID] = map[string]playermapping.PlayerMapping{}
}
s.stored[record.GameID][record.UserID] = record
}
func (s *fakePlayerMappings) Get(_ context.Context, gameID, userID string) (playermapping.PlayerMapping, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return playermapping.PlayerMapping{}, s.getErr
}
record, ok := s.stored[gameID][userID]
if !ok {
return playermapping.PlayerMapping{}, playermapping.ErrNotFound
}
return record, nil
}
func (s *fakePlayerMappings) BulkInsert(context.Context, []playermapping.PlayerMapping) error {
return errors.New("not used")
}
func (s *fakePlayerMappings) GetByRace(context.Context, string, string) (playermapping.PlayerMapping, error) {
return playermapping.PlayerMapping{}, errors.New("not used")
}
func (s *fakePlayerMappings) ListByGame(context.Context, string) ([]playermapping.PlayerMapping, error) {
return nil, errors.New("not used")
}
func (s *fakePlayerMappings) DeleteByGame(context.Context, string) error {
return errors.New("not used")
}
type recordedCall struct {
baseURL string
payload json.RawMessage
}
type fakeEngine struct {
mu sync.Mutex
body json.RawMessage
err error
calls []recordedCall
}
func (f *fakeEngine) ExecuteCommands(_ context.Context, baseURL string, payload json.RawMessage) (json.RawMessage, error) {
f.mu.Lock()
defer f.mu.Unlock()
stored := append(json.RawMessage(nil), payload...)
f.calls = append(f.calls, recordedCall{baseURL: baseURL, payload: stored})
return f.body, f.err
}
func (f *fakeEngine) Init(context.Context, string, ports.InitRequest) (ports.StateResponse, error) {
return ports.StateResponse{}, errors.New("not used")
}
func (f *fakeEngine) Status(context.Context, string) (ports.StateResponse, error) {
return ports.StateResponse{}, errors.New("not used")
}
func (f *fakeEngine) Turn(context.Context, string) (ports.StateResponse, error) {
return ports.StateResponse{}, errors.New("not used")
}
func (f *fakeEngine) BanishRace(context.Context, string, string) error {
return errors.New("not used")
}
func (f *fakeEngine) PutOrders(context.Context, string, json.RawMessage) (json.RawMessage, error) {
return nil, errors.New("not used")
}
func (f *fakeEngine) GetReport(context.Context, string, string, int) (json.RawMessage, error) {
return nil, errors.New("not used")
}
type fakeLobby struct {
mu sync.Mutex
answers map[string][]ports.Membership
errs map[string]error
}
func newFakeLobby() *fakeLobby {
return &fakeLobby{
answers: map[string][]ports.Membership{},
errs: map[string]error{},
}
}
func (f *fakeLobby) seed(gameID string, members []ports.Membership) {
f.mu.Lock()
defer f.mu.Unlock()
f.answers[gameID] = members
}
func (f *fakeLobby) seedErr(gameID string, err error) {
f.mu.Lock()
defer f.mu.Unlock()
f.errs[gameID] = err
}
func (f *fakeLobby) GetMemberships(_ context.Context, gameID string) ([]ports.Membership, error) {
f.mu.Lock()
defer f.mu.Unlock()
if err, ok := f.errs[gameID]; ok {
return nil, err
}
return append([]ports.Membership(nil), f.answers[gameID]...), nil
}
func (f *fakeLobby) GetGameSummary(context.Context, string) (ports.GameSummary, error) {
return ports.GameSummary{}, errors.New("not used")
}
// --- harness ----------------------------------------------------------
type harness struct {
t *testing.T
now time.Time
runtimes *fakeRuntimeRecords
mappings *fakePlayerMappings
engine *fakeEngine
lobby *fakeLobby
cache *membership.Cache
service *commandexecute.Service
}
const (
testGameID = "game-001"
testUserID = "user-1"
testRaceName = "Aelinari"
testEngineEndpoint = "http://galaxy-game-game-001:8080"
)
func newHarness(t *testing.T) *harness {
t.Helper()
tel, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
now := time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)
h := &harness{
t: t,
now: now,
runtimes: newFakeRuntimeRecords(),
mappings: newFakePlayerMappings(),
engine: &fakeEngine{},
lobby: newFakeLobby(),
}
cache, err := membership.NewCache(membership.Dependencies{
Lobby: h.lobby,
Telemetry: tel,
TTL: time.Minute,
MaxGames: 16,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.cache = cache
svc, err := commandexecute.NewService(commandexecute.Dependencies{
RuntimeRecords: h.runtimes,
PlayerMappings: h.mappings,
Membership: h.cache,
Engine: h.engine,
Telemetry: tel,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.service = svc
return h
}
func (h *harness) seedRunningRecord() {
startedAt := h.now.Add(-1 * time.Hour)
h.runtimes.seed(runtime.RuntimeRecord{
GameID: testGameID,
Status: runtime.StatusRunning,
EngineEndpoint: testEngineEndpoint,
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: "0 18 * * *",
EngineHealth: "healthy",
CreatedAt: h.now.Add(-2 * time.Hour),
UpdatedAt: h.now.Add(-2 * time.Hour),
StartedAt: &startedAt,
})
}
func (h *harness) seedActiveMembership() {
h.lobby.seed(testGameID, []ports.Membership{{
UserID: testUserID,
RaceName: testRaceName,
Status: "active",
JoinedAt: h.now.Add(-2 * time.Hour),
}})
}
func (h *harness) seedPlayerMapping() {
h.mappings.seed(playermapping.PlayerMapping{
GameID: testGameID,
UserID: testUserID,
RaceName: testRaceName,
EnginePlayerUUID: "uuid-1",
CreatedAt: h.now.Add(-2 * time.Hour),
})
}
func (h *harness) inputWithCommands(payload string) commandexecute.Input {
return commandexecute.Input{
GameID: testGameID,
UserID: testUserID,
Payload: json.RawMessage(payload),
}
}
func basicCommandsPayload() string {
return `{"commands":[{"@type":"BUILD_SHIP","cmdId":"00000000-0000-0000-0000-000000000001"}]}`
}
// --- tests ------------------------------------------------------------
func TestNewServiceRejectsBadDependencies(t *testing.T) {
tel, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
cache, err := membership.NewCache(membership.Dependencies{
Lobby: newFakeLobby(), Telemetry: tel, TTL: time.Minute, MaxGames: 1,
})
require.NoError(t, err)
cases := []struct {
name string
deps commandexecute.Dependencies
}{
{"nil runtime records", commandexecute.Dependencies{PlayerMappings: newFakePlayerMappings(), Membership: cache, Engine: &fakeEngine{}, Telemetry: tel}},
{"nil player mappings", commandexecute.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), Membership: cache, Engine: &fakeEngine{}, Telemetry: tel}},
{"nil membership", commandexecute.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), PlayerMappings: newFakePlayerMappings(), Engine: &fakeEngine{}, Telemetry: tel}},
{"nil engine", commandexecute.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), PlayerMappings: newFakePlayerMappings(), Membership: cache, Telemetry: tel}},
{"nil telemetry", commandexecute.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), PlayerMappings: newFakePlayerMappings(), Membership: cache, Engine: &fakeEngine{}}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
svc, err := commandexecute.NewService(tc.deps)
require.Error(t, err)
assert.Nil(t, svc)
})
}
}
func TestHandleHappyPath(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.body = json.RawMessage(`{"results":[{"cmd_id":"00000000-0000-0000-0000-000000000001","cmd_applied":true}]}`)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicCommandsPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeSuccess, result.Outcome)
assert.Empty(t, result.ErrorCode)
assert.JSONEq(t, string(h.engine.body), string(result.RawResponse))
require.Len(t, h.engine.calls, 1)
assert.Equal(t, testEngineEndpoint, h.engine.calls[0].baseURL)
var sentToEngine map[string]json.RawMessage
require.NoError(t, json.Unmarshal(h.engine.calls[0].payload, &sentToEngine))
assert.Contains(t, sentToEngine, "actor")
assert.Contains(t, sentToEngine, "cmd")
assert.NotContains(t, sentToEngine, "commands", "GM must rewrite the field name")
var actor string
require.NoError(t, json.Unmarshal(sentToEngine["actor"], &actor))
assert.Equal(t, testRaceName, actor)
var cmd []json.RawMessage
require.NoError(t, json.Unmarshal(sentToEngine["cmd"], &cmd))
assert.Len(t, cmd, 1)
}
func TestHandleHappyPathDoesNotTrustCallerActor(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.body = json.RawMessage(`{}`)
payload := `{"actor":"Hacker","commands":[{"@type":"BUILD_SHIP","cmdId":"00000000-0000-0000-0000-000000000001"}]}`
result, err := h.service.Handle(context.Background(), h.inputWithCommands(payload))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeSuccess, result.Outcome)
require.Len(t, h.engine.calls, 1)
var sentToEngine map[string]json.RawMessage
require.NoError(t, json.Unmarshal(h.engine.calls[0].payload, &sentToEngine))
var actor string
require.NoError(t, json.Unmarshal(sentToEngine["actor"], &actor))
assert.Equal(t, testRaceName, actor, "GM must override caller-supplied actor")
}
func TestHandleInvalidRequest(t *testing.T) {
cases := []struct {
name string
input commandexecute.Input
message string
}{
{"empty game id", commandexecute.Input{UserID: testUserID, Payload: json.RawMessage(basicCommandsPayload())}, "game id"},
{"empty user id", commandexecute.Input{GameID: testGameID, Payload: json.RawMessage(basicCommandsPayload())}, "user id"},
{"empty payload", commandexecute.Input{GameID: testGameID, UserID: testUserID}, "payload"},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
result, err := h.service.Handle(context.Background(), tc.input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeInvalidRequest, result.ErrorCode)
assert.Contains(t, result.ErrorMessage, tc.message)
})
}
}
func TestHandleMalformedPayload(t *testing.T) {
cases := []struct {
name string
payload string
}{
{"non-object", `[1,2,3]`},
{"missing commands", `{"orders":[]}`},
{"commands not array", `{"commands":"oops"}`},
{"non-json", `not json`},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
result, err := h.service.Handle(context.Background(), h.inputWithCommands(tc.payload))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeInvalidRequest, result.ErrorCode)
assert.Empty(t, h.engine.calls)
})
}
}
func TestHandleRuntimeNotFound(t *testing.T) {
h := newHarness(t)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicCommandsPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeRuntimeNotFound, result.ErrorCode)
}
func TestHandleRuntimeStoreError(t *testing.T) {
h := newHarness(t)
h.runtimes.getErr = errors.New("postgres down")
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicCommandsPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleRuntimeNotRunning(t *testing.T) {
for _, status := range []runtime.Status{
runtime.StatusStarting,
runtime.StatusGenerationInProgress,
runtime.StatusGenerationFailed,
runtime.StatusStopped,
runtime.StatusEngineUnreachable,
runtime.StatusFinished,
} {
t.Run(string(status), func(t *testing.T) {
h := newHarness(t)
startedAt := h.now.Add(-1 * time.Hour)
finishedAt := h.now
record := runtime.RuntimeRecord{
GameID: testGameID,
Status: status,
EngineEndpoint: testEngineEndpoint,
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: "0 18 * * *",
CreatedAt: h.now.Add(-2 * time.Hour),
UpdatedAt: h.now.Add(-2 * time.Hour),
}
if status != runtime.StatusStarting {
record.StartedAt = &startedAt
}
if status == runtime.StatusStopped {
record.StoppedAt = &finishedAt
}
if status == runtime.StatusFinished {
record.FinishedAt = &finishedAt
}
h.runtimes.seed(record)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicCommandsPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeRuntimeNotRunning, result.ErrorCode)
assert.Empty(t, h.engine.calls)
})
}
}
func TestHandleForbiddenInactiveMembership(t *testing.T) {
cases := []struct {
name string
members []ports.Membership
}{
{"removed", []ports.Membership{{UserID: testUserID, RaceName: testRaceName, Status: "removed"}}},
{"blocked", []ports.Membership{{UserID: testUserID, RaceName: testRaceName, Status: "blocked"}}},
{"unknown user", []ports.Membership{{UserID: "ghost", RaceName: "Ghost", Status: "active"}}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedPlayerMapping()
h.lobby.seed(testGameID, tc.members)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicCommandsPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeForbidden, result.ErrorCode)
assert.Empty(t, h.engine.calls)
})
}
}
func TestHandleForbiddenMissingPlayerMapping(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
// no player mapping
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicCommandsPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeForbidden, result.ErrorCode)
assert.Empty(t, h.engine.calls)
}
func TestHandleServiceUnavailableLobbyDown(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedPlayerMapping()
h.lobby.seedErr(testGameID, fmt.Errorf("dial: %w", ports.ErrLobbyUnavailable))
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicCommandsPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleServiceUnavailablePlayerMappingsError(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.mappings.getErr = errors.New("postgres down")
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicCommandsPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleEngineUnreachable(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.err = fmt.Errorf("dial: %w", ports.ErrEngineUnreachable)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicCommandsPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeEngineUnreachable, result.ErrorCode)
assert.Empty(t, result.RawResponse, "engine_unreachable does not forward a body")
}
func TestHandleEngineValidationErrorForwardsBody(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.body = json.RawMessage(`{"results":[{"cmd_id":"x","cmd_error_code":"INVALID_TARGET"}]}`)
h.engine.err = fmt.Errorf("400: %w", ports.ErrEngineValidation)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicCommandsPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeEngineValidationError, result.ErrorCode)
assert.JSONEq(t, string(h.engine.body), string(result.RawResponse))
}
func TestHandleEngineProtocolViolation(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.err = fmt.Errorf("garbled: %w", ports.ErrEngineProtocolViolation)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicCommandsPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, commandexecute.ErrorCodeEngineProtocolViolation, result.ErrorCode)
}
func TestHandleNilContext(t *testing.T) {
h := newHarness(t)
var nilCtx context.Context
_, err := h.service.Handle(nilCtx, h.inputWithCommands(basicCommandsPayload()))
require.Error(t, err)
}
func TestHandleNilReceiver(t *testing.T) {
var svc *commandexecute.Service
_, err := svc.Handle(context.Background(), commandexecute.Input{})
require.Error(t, err)
}
@@ -0,0 +1,36 @@
package engineversion
// Stable error codes returned alongside service-level errors. The values
// match the vocabulary frozen by `gamemaster/README.md §Error Model` and
// `gamemaster/api/internal-openapi.yaml`. The handler layer (Stage 19)
// maps the wrapped sentinel error to one of these codes; tests compare
// against the constant.
const (
// ErrorCodeInvalidRequest reports that the request envelope failed
// structural validation (empty required fields, malformed JSON
// options, malformed semver, malformed Docker reference, partial
// Update with no fields set, unsupported status enum).
ErrorCodeInvalidRequest = "invalid_request"
// ErrorCodeConflict reports that an Insert was rejected because a
// row with the same `version` already exists.
ErrorCodeConflict = "conflict"
// ErrorCodeEngineVersionNotFound reports that the requested
// version is not present in the registry. Returned by Get,
// Update, Deprecate, Delete, and ResolveImageRef.
ErrorCodeEngineVersionNotFound = "engine_version_not_found"
// ErrorCodeEngineVersionInUse reports that a hard-delete attempt
// was rejected because the version is still referenced by a
// non-finished `runtime_records` row.
ErrorCodeEngineVersionInUse = "engine_version_in_use"
// ErrorCodeServiceUnavailable reports that a steady-state
// dependency (PostgreSQL) was unreachable for this call.
ErrorCodeServiceUnavailable = "service_unavailable"
// ErrorCodeInternal reports an unexpected error not classified by
// the other codes.
ErrorCodeInternal = "internal_error"
)
@@ -0,0 +1,752 @@
// Package engineversion implements the engine version registry service
// owned by Game Master. The service backs the
// `/api/v1/internal/engine-versions/*` REST surface (Stage 19) and the
// hot-path `image_ref` resolve called synchronously by Game Lobby's
// start flow.
//
// Responsibilities and stable error codes are frozen by
// `gamemaster/README.md §Engine Version Registry` and
// `gamemaster/api/internal-openapi.yaml`. Design rationale for stage 14
// is captured in `gamemaster/docs/stage14-engine-version-registry.md`.
package engineversion
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/gamemaster/internal/domain/engineversion"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"github.com/distribution/reference"
)
// Sentinel errors returned by the service. Handlers translate these
// into the stable `ErrorCode...` constants from `errors.go`. The
// adapter-level sentinels (`engineversion.ErrNotFound`,
// `engineversion.ErrConflict`, `engineversion.ErrInUse`,
// `engineversion.ErrInvalidSemver`) are wrapped with one of the
// service-level sentinels below before crossing the package boundary.
var (
// ErrInvalidRequest reports that the input envelope failed
// structural validation.
ErrInvalidRequest = errors.New("invalid request")
// ErrNotFound reports that the requested version does not exist
// in the registry.
ErrNotFound = errors.New("engine version not found")
// ErrConflict reports that an Insert was rejected because a row
// with the same version already exists.
ErrConflict = errors.New("engine version already exists")
// ErrInUse reports that a hard-delete attempt was rejected
// because a non-finished runtime references the version.
ErrInUse = errors.New("engine version in use")
// ErrServiceUnavailable reports that a steady-state dependency
// was unreachable for this call.
ErrServiceUnavailable = errors.New("service unavailable")
)
// CreateInput stores the per-call arguments for one Create operation.
// Mirrors `CreateEngineVersionRequest` plus the audit-only OpSource /
// SourceRef pair.
type CreateInput struct {
// Version stores the canonical semver (with or without the leading
// "v"; ParseSemver normalises it).
Version string
// ImageRef stores the Docker reference of the engine image.
// Validated against `github.com/distribution/reference` before
// the row is persisted.
ImageRef string
// Options stores the engine-side options document as raw JSON.
// Empty means "use the schema default `{}`". When non-empty the
// service validates the bytes parse as a JSON object.
Options []byte
// OpSource classifies how the request entered Game Master.
// Defaults to `admin_rest` when missing or unknown.
OpSource operation.OpSource
// SourceRef stores the optional opaque per-source reference.
SourceRef string
}
// UpdateInput stores the per-call arguments for one Update operation.
// Pointer fields communicate "leave alone" (nil) vs. "write the value"
// (non-nil); at least one must be set.
type UpdateInput struct {
// Version identifies the row to mutate.
Version string
// ImageRef is the new image reference. Nil leaves the column
// unchanged; non-nil must be a valid Docker reference.
ImageRef *string
// Options is the new options document. Nil leaves the column
// unchanged; non-nil must be a JSON object (possibly the empty
// object).
Options *[]byte
// Status is the new registry status. Nil leaves the column
// unchanged; non-nil must be a known status value.
Status *engineversion.Status
// OpSource classifies how the request entered Game Master.
OpSource operation.OpSource
// SourceRef stores the optional opaque per-source reference.
SourceRef string
}
// DeprecateInput stores the per-call arguments for one Deprecate
// operation.
type DeprecateInput struct {
// Version identifies the row to deprecate.
Version string
// OpSource classifies how the request entered Game Master.
OpSource operation.OpSource
// SourceRef stores the optional opaque per-source reference.
SourceRef string
}
// DeleteInput stores the per-call arguments for one hard Delete
// operation.
type DeleteInput struct {
// Version identifies the row to delete.
Version string
// OpSource classifies how the request entered Game Master.
OpSource operation.OpSource
// SourceRef stores the optional opaque per-source reference.
SourceRef string
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// EngineVersions persists the registry rows. Required.
EngineVersions ports.EngineVersionStore
// OperationLogs records the audit entry for every mutation
// (Create, Update, Deprecate, Delete). Required.
OperationLogs ports.OperationLogStore
// Logger records structured service-level events. Defaults to
// slog.Default when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for created_at / updated_at
// and audit timestamps. Defaults to time.Now when nil.
Clock func() time.Time
}
// Service implements the engine version registry operations.
type Service struct {
versions ports.EngineVersionStore
operationLogs ports.OperationLogStore
logger *slog.Logger
clock func() time.Time
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
switch {
case deps.EngineVersions == nil:
return nil, errors.New("new engine version service: nil engine version store")
case deps.OperationLogs == nil:
return nil, errors.New("new engine version service: nil operation log store")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.engineversion")
return &Service{
versions: deps.EngineVersions,
operationLogs: deps.OperationLogs,
logger: logger,
clock: clock,
}, nil
}
// List returns every registry row, optionally filtered by status. A
// non-nil statusFilter must reference a known engineversion.Status.
func (service *Service) List(ctx context.Context, statusFilter *engineversion.Status) ([]engineversion.EngineVersion, error) {
if service == nil {
return nil, errors.New("engine version list: nil service")
}
if ctx == nil {
return nil, errors.New("engine version list: nil context")
}
if statusFilter != nil && !statusFilter.IsKnown() {
return nil, fmt.Errorf("%w: status %q is unsupported", ErrInvalidRequest, *statusFilter)
}
versions, err := service.versions.List(ctx, statusFilter)
if err != nil {
return nil, fmt.Errorf("%w: list engine versions: %s", ErrServiceUnavailable, err.Error())
}
return versions, nil
}
// Get returns the registry row identified by version. Returns
// ErrNotFound when no row matches.
func (service *Service) Get(ctx context.Context, version string) (engineversion.EngineVersion, error) {
if service == nil {
return engineversion.EngineVersion{}, errors.New("engine version get: nil service")
}
if ctx == nil {
return engineversion.EngineVersion{}, errors.New("engine version get: nil context")
}
if strings.TrimSpace(version) == "" {
return engineversion.EngineVersion{}, fmt.Errorf("%w: version must not be empty", ErrInvalidRequest)
}
got, err := service.versions.Get(ctx, version)
switch {
case errors.Is(err, engineversion.ErrNotFound):
return engineversion.EngineVersion{}, fmt.Errorf("%w: %q", ErrNotFound, version)
case err != nil:
return engineversion.EngineVersion{}, fmt.Errorf("%w: get engine version: %s", ErrServiceUnavailable, err.Error())
}
return got, nil
}
// ResolveImageRef returns the image_ref of the requested version. This
// is the hot path used by Game Lobby's start flow synchronously per
// register-runtime envelope.
func (service *Service) ResolveImageRef(ctx context.Context, version string) (string, error) {
got, err := service.Get(ctx, version)
if err != nil {
return "", err
}
return got.ImageRef, nil
}
// Create installs a fresh registry row. Validates the semver shape and
// Docker reference before touching the store. On success appends a
// success entry to operation_log; on classified failure (validation,
// conflict, store error) appends a failure entry.
func (service *Service) Create(ctx context.Context, input CreateInput) (engineversion.EngineVersion, error) {
if service == nil {
return engineversion.EngineVersion{}, errors.New("engine version create: nil service")
}
if ctx == nil {
return engineversion.EngineVersion{}, errors.New("engine version create: nil context")
}
startedAt := service.clock().UTC()
canonicalVersion, err := engineversion.ParseSemver(input.Version)
if err != nil {
return engineversion.EngineVersion{}, service.recordCreateFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeInvalidRequest, fmt.Sprintf("parse semver: %s", err.Error()),
fmt.Errorf("%w: %s", ErrInvalidRequest, err.Error()),
)
}
if err := validateImageRef(input.ImageRef); err != nil {
return engineversion.EngineVersion{}, service.recordCreateFailure(
ctx, startedAt, canonicalVersion, input.OpSource, input.SourceRef,
ErrorCodeInvalidRequest, fmt.Sprintf("validate image_ref: %s", err.Error()),
fmt.Errorf("%w: %s", ErrInvalidRequest, err.Error()),
)
}
options, err := normalizeOptions(input.Options)
if err != nil {
return engineversion.EngineVersion{}, service.recordCreateFailure(
ctx, startedAt, canonicalVersion, input.OpSource, input.SourceRef,
ErrorCodeInvalidRequest, fmt.Sprintf("validate options: %s", err.Error()),
fmt.Errorf("%w: %s", ErrInvalidRequest, err.Error()),
)
}
record := engineversion.EngineVersion{
Version: canonicalVersion,
ImageRef: strings.TrimSpace(input.ImageRef),
Options: options,
Status: engineversion.StatusActive,
CreatedAt: startedAt,
UpdatedAt: startedAt,
}
if err := service.versions.Insert(ctx, record); err != nil {
switch {
case errors.Is(err, engineversion.ErrConflict):
return engineversion.EngineVersion{}, service.recordCreateFailure(
ctx, startedAt, canonicalVersion, input.OpSource, input.SourceRef,
ErrorCodeConflict, "engine version already exists",
fmt.Errorf("%w: %s", ErrConflict, canonicalVersion),
)
default:
return engineversion.EngineVersion{}, service.recordCreateFailure(
ctx, startedAt, canonicalVersion, input.OpSource, input.SourceRef,
ErrorCodeServiceUnavailable, fmt.Sprintf("insert engine version: %s", err.Error()),
fmt.Errorf("%w: insert engine version: %s", ErrServiceUnavailable, err.Error()),
)
}
}
service.appendSuccess(ctx, operation.OpKindEngineVersionCreate, canonicalVersion, input.OpSource, input.SourceRef, startedAt)
logArgs := []any{
"version", canonicalVersion,
"image_ref", record.ImageRef,
"op_source", string(fallbackOpSource(input.OpSource)),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "engine version created", logArgs...)
return record, nil
}
// Update applies a partial update to one registry row. At least one of
// ImageRef, Options, Status must be non-nil.
func (service *Service) Update(ctx context.Context, input UpdateInput) (engineversion.EngineVersion, error) {
if service == nil {
return engineversion.EngineVersion{}, errors.New("engine version update: nil service")
}
if ctx == nil {
return engineversion.EngineVersion{}, errors.New("engine version update: nil context")
}
startedAt := service.clock().UTC()
if strings.TrimSpace(input.Version) == "" {
return engineversion.EngineVersion{}, service.recordUpdateFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeInvalidRequest, "version must not be empty",
fmt.Errorf("%w: version must not be empty", ErrInvalidRequest),
)
}
if input.ImageRef == nil && input.Options == nil && input.Status == nil {
return engineversion.EngineVersion{}, service.recordUpdateFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeInvalidRequest, "at least one field must be set",
fmt.Errorf("%w: at least one field must be set", ErrInvalidRequest),
)
}
if input.ImageRef != nil {
if err := validateImageRef(*input.ImageRef); err != nil {
return engineversion.EngineVersion{}, service.recordUpdateFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeInvalidRequest, fmt.Sprintf("validate image_ref: %s", err.Error()),
fmt.Errorf("%w: %s", ErrInvalidRequest, err.Error()),
)
}
}
if input.Status != nil && !input.Status.IsKnown() {
return engineversion.EngineVersion{}, service.recordUpdateFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeInvalidRequest, fmt.Sprintf("status %q is unsupported", *input.Status),
fmt.Errorf("%w: status %q is unsupported", ErrInvalidRequest, *input.Status),
)
}
var normalizedOptions *[]byte
if input.Options != nil {
opts, err := normalizeOptions(*input.Options)
if err != nil {
return engineversion.EngineVersion{}, service.recordUpdateFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeInvalidRequest, fmt.Sprintf("validate options: %s", err.Error()),
fmt.Errorf("%w: %s", ErrInvalidRequest, err.Error()),
)
}
normalizedOptions = &opts
}
storeInput := ports.UpdateEngineVersionInput{
Version: input.Version,
Options: normalizedOptions,
Status: input.Status,
Now: startedAt,
}
if input.ImageRef != nil {
trimmed := strings.TrimSpace(*input.ImageRef)
storeInput.ImageRef = &trimmed
}
if err := service.versions.Update(ctx, storeInput); err != nil {
switch {
case errors.Is(err, engineversion.ErrNotFound):
return engineversion.EngineVersion{}, service.recordUpdateFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeEngineVersionNotFound, fmt.Sprintf("engine version %q not found", input.Version),
fmt.Errorf("%w: %q", ErrNotFound, input.Version),
)
default:
return engineversion.EngineVersion{}, service.recordUpdateFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeServiceUnavailable, fmt.Sprintf("update engine version: %s", err.Error()),
fmt.Errorf("%w: update engine version: %s", ErrServiceUnavailable, err.Error()),
)
}
}
persisted, err := service.versions.Get(ctx, input.Version)
if err != nil {
// The Update succeeded but the post-read failed. Surface the
// store error; the audit entry still records the successful
// mutation against operation_log.
service.appendSuccess(ctx, operation.OpKindEngineVersionUpdate, input.Version, input.OpSource, input.SourceRef, startedAt)
return engineversion.EngineVersion{}, fmt.Errorf("%w: reload engine version: %s", ErrServiceUnavailable, err.Error())
}
service.appendSuccess(ctx, operation.OpKindEngineVersionUpdate, input.Version, input.OpSource, input.SourceRef, startedAt)
logArgs := []any{
"version", input.Version,
"op_source", string(fallbackOpSource(input.OpSource)),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "engine version updated", logArgs...)
return persisted, nil
}
// Deprecate marks one registry row as deprecated. Idempotent: the call
// succeeds even when the row is already deprecated. Returns ErrNotFound
// when no row matches.
func (service *Service) Deprecate(ctx context.Context, input DeprecateInput) error {
if service == nil {
return errors.New("engine version deprecate: nil service")
}
if ctx == nil {
return errors.New("engine version deprecate: nil context")
}
startedAt := service.clock().UTC()
if strings.TrimSpace(input.Version) == "" {
return service.recordDeprecateFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeInvalidRequest, "version must not be empty",
fmt.Errorf("%w: version must not be empty", ErrInvalidRequest),
)
}
if err := service.versions.Deprecate(ctx, input.Version, startedAt); err != nil {
switch {
case errors.Is(err, engineversion.ErrNotFound):
return service.recordDeprecateFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeEngineVersionNotFound, fmt.Sprintf("engine version %q not found", input.Version),
fmt.Errorf("%w: %q", ErrNotFound, input.Version),
)
default:
return service.recordDeprecateFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeServiceUnavailable, fmt.Sprintf("deprecate engine version: %s", err.Error()),
fmt.Errorf("%w: deprecate engine version: %s", ErrServiceUnavailable, err.Error()),
)
}
}
service.appendSuccess(ctx, operation.OpKindEngineVersionDeprecate, input.Version, input.OpSource, input.SourceRef, startedAt)
logArgs := []any{
"version", input.Version,
"op_source", string(fallbackOpSource(input.OpSource)),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "engine version deprecated", logArgs...)
return nil
}
// Delete hard-deletes one registry row. Rejected with ErrInUse when any
// non-finished runtime still references the version. The reference
// probe runs first so the conflict is surfaced before the row is
// removed.
func (service *Service) Delete(ctx context.Context, input DeleteInput) error {
if service == nil {
return errors.New("engine version delete: nil service")
}
if ctx == nil {
return errors.New("engine version delete: nil context")
}
startedAt := service.clock().UTC()
if strings.TrimSpace(input.Version) == "" {
return service.recordDeleteFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeInvalidRequest, "version must not be empty",
fmt.Errorf("%w: version must not be empty", ErrInvalidRequest),
)
}
referenced, err := service.versions.IsReferencedByActiveRuntime(ctx, input.Version)
if err != nil {
return service.recordDeleteFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeServiceUnavailable, fmt.Sprintf("is referenced by active runtime: %s", err.Error()),
fmt.Errorf("%w: is referenced by active runtime: %s", ErrServiceUnavailable, err.Error()),
)
}
if referenced {
return service.recordDeleteFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeEngineVersionInUse, fmt.Sprintf("engine version %q is referenced by an active runtime", input.Version),
fmt.Errorf("%w: %q", ErrInUse, input.Version),
)
}
if err := service.versions.Delete(ctx, input.Version); err != nil {
switch {
case errors.Is(err, engineversion.ErrNotFound):
return service.recordDeleteFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeEngineVersionNotFound, fmt.Sprintf("engine version %q not found", input.Version),
fmt.Errorf("%w: %q", ErrNotFound, input.Version),
)
default:
return service.recordDeleteFailure(
ctx, startedAt, input.Version, input.OpSource, input.SourceRef,
ErrorCodeServiceUnavailable, fmt.Sprintf("delete engine version: %s", err.Error()),
fmt.Errorf("%w: delete engine version: %s", ErrServiceUnavailable, err.Error()),
)
}
}
service.appendSuccess(ctx, operation.OpKindEngineVersionDelete, input.Version, input.OpSource, input.SourceRef, startedAt)
logArgs := []any{
"version", input.Version,
"op_source", string(fallbackOpSource(input.OpSource)),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "engine version deleted", logArgs...)
return nil
}
// validateImageRef enforces the Docker reference shape required by
// `engine_versions.image_ref`: non-empty trimmed, parseable through
// `distribution/reference.ParseNormalizedNamed`. The check is the same
// one Runtime Manager applies in startruntime so the registry never
// stores a value the runtime cannot pull.
func validateImageRef(imageRef string) error {
trimmed := strings.TrimSpace(imageRef)
if trimmed == "" {
return fmt.Errorf("image_ref must not be empty")
}
if _, err := reference.ParseNormalizedNamed(trimmed); err != nil {
return fmt.Errorf("parse image reference %q: %w", trimmed, err)
}
return nil
}
// normalizeOptions validates that raw is a JSON document encoding a
// single object. Empty input is treated as `{}` and stored verbatim by
// the adapter (see stage 11 D5).
func normalizeOptions(raw []byte) ([]byte, error) {
trimmed := bytesTrim(raw)
if len(trimmed) == 0 {
return nil, nil
}
var probe map[string]any
if err := json.Unmarshal(trimmed, &probe); err != nil {
return nil, fmt.Errorf("options must be a JSON object: %w", err)
}
return trimmed, nil
}
// bytesTrim returns raw with surrounding ASCII whitespace removed. The
// helper avoids the round-trip through `string` for raw JSON inputs.
func bytesTrim(raw []byte) []byte {
start, end := 0, len(raw)
for start < end && isASCIISpace(raw[start]) {
start++
}
for end > start && isASCIISpace(raw[end-1]) {
end--
}
return raw[start:end]
}
func isASCIISpace(b byte) bool {
switch b {
case ' ', '\t', '\n', '\r':
return true
default:
return false
}
}
// recordCreateFailure appends an audit failure entry for a Create call
// and returns the original sentinel error wrapped with the failure
// reason. The audit entry is written best-effort; storage failures are
// logged and discarded.
func (service *Service) recordCreateFailure(
ctx context.Context,
startedAt time.Time,
subject string,
source operation.OpSource,
sourceRef string,
errorCode string,
errorMessage string,
wrappedErr error,
) error {
service.appendFailure(ctx, operation.OpKindEngineVersionCreate, subject, source, sourceRef, startedAt, errorCode, errorMessage)
service.logFailure(ctx, "engine version create failed", subject, source, errorCode, errorMessage)
return wrappedErr
}
func (service *Service) recordUpdateFailure(
ctx context.Context,
startedAt time.Time,
subject string,
source operation.OpSource,
sourceRef string,
errorCode string,
errorMessage string,
wrappedErr error,
) error {
service.appendFailure(ctx, operation.OpKindEngineVersionUpdate, subject, source, sourceRef, startedAt, errorCode, errorMessage)
service.logFailure(ctx, "engine version update failed", subject, source, errorCode, errorMessage)
return wrappedErr
}
func (service *Service) recordDeprecateFailure(
ctx context.Context,
startedAt time.Time,
subject string,
source operation.OpSource,
sourceRef string,
errorCode string,
errorMessage string,
wrappedErr error,
) error {
service.appendFailure(ctx, operation.OpKindEngineVersionDeprecate, subject, source, sourceRef, startedAt, errorCode, errorMessage)
service.logFailure(ctx, "engine version deprecate failed", subject, source, errorCode, errorMessage)
return wrappedErr
}
func (service *Service) recordDeleteFailure(
ctx context.Context,
startedAt time.Time,
subject string,
source operation.OpSource,
sourceRef string,
errorCode string,
errorMessage string,
wrappedErr error,
) error {
service.appendFailure(ctx, operation.OpKindEngineVersionDelete, subject, source, sourceRef, startedAt, errorCode, errorMessage)
service.logFailure(ctx, "engine version delete failed", subject, source, errorCode, errorMessage)
return wrappedErr
}
// appendSuccess writes a success entry to operation_log. Subject is the
// canonical version string; the entry's GameID column doubles as the
// audit subject for engine-version operations (stage 14 decision —
// the registry is global, not per-game).
func (service *Service) appendSuccess(
ctx context.Context,
kind operation.OpKind,
subject string,
source operation.OpSource,
sourceRef string,
startedAt time.Time,
) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: subject,
OpKind: kind,
OpSource: fallbackOpSource(source),
SourceRef: sourceRef,
Outcome: operation.OutcomeSuccess,
StartedAt: startedAt,
FinishedAt: &finishedAt,
})
}
// appendFailure writes a failure entry to operation_log. Subject and
// the GameID column overload follow the same rule as appendSuccess.
func (service *Service) appendFailure(
ctx context.Context,
kind operation.OpKind,
subject string,
source operation.OpSource,
sourceRef string,
startedAt time.Time,
errorCode string,
errorMessage string,
) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: subject,
OpKind: kind,
OpSource: fallbackOpSource(source),
SourceRef: sourceRef,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
StartedAt: startedAt,
FinishedAt: &finishedAt,
})
}
// bestEffortAppend writes one operation_log entry. A failure is logged
// and discarded; the registry mutation (or its absence) remains the
// source of truth.
func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
if _, err := service.operationLogs.Append(ctx, entry); err != nil {
service.logger.ErrorContext(ctx, "append operation log",
"subject", entry.GameID,
"op_kind", string(entry.OpKind),
"outcome", string(entry.Outcome),
"error_code", entry.ErrorCode,
"err", err.Error(),
)
}
}
// logFailure emits one structured warn-level entry per service-level
// failure, mirroring registerruntime's log shape.
func (service *Service) logFailure(
ctx context.Context,
message string,
subject string,
source operation.OpSource,
errorCode string,
errorMessage string,
) {
logArgs := []any{
"version", subject,
"op_source", string(fallbackOpSource(source)),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, message, logArgs...)
}
// fallbackOpSource defaults to admin_rest when source is missing or
// unrecognised. Mirrors `gamemaster/README.md §Trusted Surfaces`.
func fallbackOpSource(source operation.OpSource) operation.OpSource {
if source.IsKnown() {
return source
}
return operation.OpSourceAdminRest
}
@@ -0,0 +1,631 @@
package engineversion_test
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/gamemaster/internal/adapters/mocks"
domainengineversion "galaxy/gamemaster/internal/domain/engineversion"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/engineversion"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
)
// fakeOperationLogs is a thread-safe stub recorder for the few
// operation_log entries the engine-version service writes per call.
// Using a stub keeps the operation_log assertions table-driven without
// introducing the verbosity of a gomock recorder for every entry.
type fakeOperationLogs struct {
mu sync.Mutex
entries []operation.OperationEntry
err error
}
func newFakeOperationLogs() *fakeOperationLogs {
return &fakeOperationLogs{}
}
func (s *fakeOperationLogs) Append(_ context.Context, entry operation.OperationEntry) (int64, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.err != nil {
return 0, s.err
}
s.entries = append(s.entries, entry)
return int64(len(s.entries)), nil
}
func (s *fakeOperationLogs) ListByGame(_ context.Context, _ string, _ int) ([]operation.OperationEntry, error) {
return nil, errors.New("not used in engineversion tests")
}
func (s *fakeOperationLogs) snapshot() []operation.OperationEntry {
s.mu.Lock()
defer s.mu.Unlock()
out := make([]operation.OperationEntry, len(s.entries))
copy(out, s.entries)
return out
}
type harness struct {
ctrl *gomock.Controller
store *mocks.MockEngineVersionStore
oplog *fakeOperationLogs
clock time.Time
service *engineversion.Service
}
func newHarness(t *testing.T) *harness {
t.Helper()
ctrl := gomock.NewController(t)
store := mocks.NewMockEngineVersionStore(ctrl)
oplog := newFakeOperationLogs()
clock := time.Date(2026, time.April, 30, 12, 0, 0, 0, time.UTC)
service, err := engineversion.NewService(engineversion.Dependencies{
EngineVersions: store,
OperationLogs: oplog,
Clock: func() time.Time { return clock },
})
require.NoError(t, err)
return &harness{
ctrl: ctrl,
store: store,
oplog: oplog,
clock: clock,
service: service,
}
}
func TestNewServiceRejectsMissingDeps(t *testing.T) {
ctrl := gomock.NewController(t)
store := mocks.NewMockEngineVersionStore(ctrl)
oplog := newFakeOperationLogs()
tests := []struct {
name string
deps engineversion.Dependencies
}{
{"nil store", engineversion.Dependencies{OperationLogs: oplog}},
{"nil oplog", engineversion.Dependencies{EngineVersions: store}},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
s, err := engineversion.NewService(tc.deps)
require.Error(t, err)
require.Nil(t, s)
})
}
}
func TestNewServiceDefaultsClockAndLogger(t *testing.T) {
ctrl := gomock.NewController(t)
service, err := engineversion.NewService(engineversion.Dependencies{
EngineVersions: mocks.NewMockEngineVersionStore(ctrl),
OperationLogs: newFakeOperationLogs(),
})
require.NoError(t, err)
require.NotNil(t, service)
}
// --- List ------------------------------------------------------------
func TestListNoFilter(t *testing.T) {
h := newHarness(t)
rows := []domainengineversion.EngineVersion{
{Version: "v1.2.3", ImageRef: "ghcr.io/galaxy/game:v1.2.3", Status: domainengineversion.StatusActive},
{Version: "v1.3.0", ImageRef: "ghcr.io/galaxy/game:v1.3.0", Status: domainengineversion.StatusDeprecated},
}
h.store.EXPECT().List(gomock.Any(), nil).Return(rows, nil)
got, err := h.service.List(context.Background(), nil)
require.NoError(t, err)
assert.Equal(t, rows, got)
}
func TestListWithStatusFilter(t *testing.T) {
h := newHarness(t)
active := domainengineversion.StatusActive
expected := []domainengineversion.EngineVersion{
{Version: "v1.2.3", ImageRef: "ghcr.io/galaxy/game:v1.2.3", Status: active},
}
h.store.EXPECT().List(gomock.Any(), &active).Return(expected, nil)
got, err := h.service.List(context.Background(), &active)
require.NoError(t, err)
assert.Equal(t, expected, got)
}
func TestListRejectsUnknownStatusFilter(t *testing.T) {
h := newHarness(t)
exotic := domainengineversion.Status("exotic")
got, err := h.service.List(context.Background(), &exotic)
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
assert.Nil(t, got)
}
func TestListWrapsStoreErrorAsServiceUnavailable(t *testing.T) {
h := newHarness(t)
storeErr := errors.New("pg down")
h.store.EXPECT().List(gomock.Any(), nil).Return(nil, storeErr)
_, err := h.service.List(context.Background(), nil)
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrServiceUnavailable))
}
// --- Get -------------------------------------------------------------
func TestGetHappyPath(t *testing.T) {
h := newHarness(t)
row := domainengineversion.EngineVersion{
Version: "v1.2.3", ImageRef: "ghcr.io/galaxy/game:v1.2.3", Status: domainengineversion.StatusActive,
}
h.store.EXPECT().Get(gomock.Any(), "v1.2.3").Return(row, nil)
got, err := h.service.Get(context.Background(), "v1.2.3")
require.NoError(t, err)
assert.Equal(t, row, got)
}
func TestGetNotFound(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Get(gomock.Any(), "v9.9.9").Return(domainengineversion.EngineVersion{}, domainengineversion.ErrNotFound)
_, err := h.service.Get(context.Background(), "v9.9.9")
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrNotFound))
}
func TestGetRejectsEmptyVersion(t *testing.T) {
h := newHarness(t)
_, err := h.service.Get(context.Background(), " ")
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
}
func TestGetWrapsStoreError(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Get(gomock.Any(), "v1.2.3").Return(domainengineversion.EngineVersion{}, errors.New("pg down"))
_, err := h.service.Get(context.Background(), "v1.2.3")
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrServiceUnavailable))
}
// --- ResolveImageRef -------------------------------------------------
func TestResolveImageRefHappyPath(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Get(gomock.Any(), "v1.2.3").Return(domainengineversion.EngineVersion{
Version: "v1.2.3", ImageRef: "ghcr.io/galaxy/game:v1.2.3", Status: domainengineversion.StatusActive,
}, nil)
got, err := h.service.ResolveImageRef(context.Background(), "v1.2.3")
require.NoError(t, err)
assert.Equal(t, "ghcr.io/galaxy/game:v1.2.3", got)
}
func TestResolveImageRefSeededTable(t *testing.T) {
tests := []struct {
name string
seedVersion string
seedRef string
}{
{"v1.0.0", "v1.0.0", "ghcr.io/galaxy/game:v1.0.0"},
{"v1.2.3 with prerelease metadata", "v1.2.3-rc1", "ghcr.io/galaxy/game:v1.2.3-rc1"},
{"v2.0.0 fully-qualified", "v2.0.0", "registry.galaxy.local/game:v2.0.0"},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Get(gomock.Any(), tc.seedVersion).Return(domainengineversion.EngineVersion{
Version: tc.seedVersion, ImageRef: tc.seedRef, Status: domainengineversion.StatusActive,
}, nil)
got, err := h.service.ResolveImageRef(context.Background(), tc.seedVersion)
require.NoError(t, err)
assert.Equal(t, tc.seedRef, got)
})
}
}
func TestResolveImageRefNotFound(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Get(gomock.Any(), "v9.9.9").Return(domainengineversion.EngineVersion{}, domainengineversion.ErrNotFound)
_, err := h.service.ResolveImageRef(context.Background(), "v9.9.9")
require.True(t, errors.Is(err, engineversion.ErrNotFound))
}
// --- Create ----------------------------------------------------------
func TestCreateHappyPath(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Insert(gomock.Any(), gomock.Any()).DoAndReturn(
func(_ context.Context, record domainengineversion.EngineVersion) error {
assert.Equal(t, "v1.2.3", record.Version)
assert.Equal(t, "ghcr.io/galaxy/game:v1.2.3", record.ImageRef)
assert.Equal(t, domainengineversion.StatusActive, record.Status)
assert.Equal(t, h.clock, record.CreatedAt)
assert.Equal(t, h.clock, record.UpdatedAt)
return nil
},
)
got, err := h.service.Create(context.Background(), engineversion.CreateInput{
Version: "1.2.3",
ImageRef: "ghcr.io/galaxy/game:v1.2.3",
Options: []byte(`{"max_planets":120}`),
OpSource: operation.OpSourceAdminRest,
SourceRef: "request-1",
})
require.NoError(t, err)
assert.Equal(t, "v1.2.3", got.Version)
entries := h.oplog.snapshot()
require.Len(t, entries, 1)
assert.Equal(t, operation.OpKindEngineVersionCreate, entries[0].OpKind)
assert.Equal(t, "v1.2.3", entries[0].GameID)
assert.Equal(t, operation.OutcomeSuccess, entries[0].Outcome)
assert.Equal(t, operation.OpSourceAdminRest, entries[0].OpSource)
assert.Equal(t, "request-1", entries[0].SourceRef)
}
func TestCreateRejectsInvalidSemver(t *testing.T) {
tests := []string{"", " ", "not-a-version", "v1.2", "1.2"}
for _, version := range tests {
t.Run(version, func(t *testing.T) {
h := newHarness(t)
_, err := h.service.Create(context.Background(), engineversion.CreateInput{
Version: version,
ImageRef: "ghcr.io/galaxy/game:v1.2.3",
})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
})
}
}
func TestCreateAuditFailureForBadImageRef(t *testing.T) {
h := newHarness(t)
_, err := h.service.Create(context.Background(), engineversion.CreateInput{
Version: "v1.2.3",
ImageRef: " ",
})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
entries := h.oplog.snapshot()
require.Len(t, entries, 1)
assert.Equal(t, operation.OpKindEngineVersionCreate, entries[0].OpKind)
assert.Equal(t, "v1.2.3", entries[0].GameID)
assert.Equal(t, operation.OutcomeFailure, entries[0].Outcome)
assert.Equal(t, engineversion.ErrorCodeInvalidRequest, entries[0].ErrorCode)
}
func TestCreateRejectsBadDockerReference(t *testing.T) {
h := newHarness(t)
_, err := h.service.Create(context.Background(), engineversion.CreateInput{
Version: "v1.2.3",
ImageRef: "BAD//Ref::",
})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
}
func TestCreateRejectsNonObjectOptions(t *testing.T) {
h := newHarness(t)
_, err := h.service.Create(context.Background(), engineversion.CreateInput{
Version: "v1.2.3",
ImageRef: "ghcr.io/galaxy/game:v1.2.3",
Options: []byte(`[1,2,3]`),
})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
}
func TestCreateAcceptsEmptyOptionsAsNil(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Insert(gomock.Any(), gomock.Any()).DoAndReturn(
func(_ context.Context, record domainengineversion.EngineVersion) error {
assert.Empty(t, record.Options, "expected empty options pass-through (adapter writes default {})")
return nil
},
)
_, err := h.service.Create(context.Background(), engineversion.CreateInput{
Version: "v1.2.3",
ImageRef: "ghcr.io/galaxy/game:v1.2.3",
Options: nil,
})
require.NoError(t, err)
}
func TestCreateConflict(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Insert(gomock.Any(), gomock.Any()).Return(domainengineversion.ErrConflict)
_, err := h.service.Create(context.Background(), engineversion.CreateInput{
Version: "v1.2.3",
ImageRef: "ghcr.io/galaxy/game:v1.2.3",
})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrConflict))
entries := h.oplog.snapshot()
require.Len(t, entries, 1)
assert.Equal(t, operation.OutcomeFailure, entries[0].Outcome)
assert.Equal(t, engineversion.ErrorCodeConflict, entries[0].ErrorCode)
}
func TestCreateUnknownStoreError(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Insert(gomock.Any(), gomock.Any()).Return(errors.New("pg down"))
_, err := h.service.Create(context.Background(), engineversion.CreateInput{
Version: "v1.2.3",
ImageRef: "ghcr.io/galaxy/game:v1.2.3",
})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrServiceUnavailable))
}
// --- Update ----------------------------------------------------------
func TestUpdateHappyPath(t *testing.T) {
h := newHarness(t)
newRef := "ghcr.io/galaxy/game:v1.2.4"
deprecated := domainengineversion.StatusDeprecated
gomock.InOrder(
h.store.EXPECT().Update(gomock.Any(), gomock.Any()).DoAndReturn(
func(_ context.Context, input ports.UpdateEngineVersionInput) error {
require.NotNil(t, input.ImageRef)
assert.Equal(t, newRef, *input.ImageRef)
require.NotNil(t, input.Status)
assert.Equal(t, deprecated, *input.Status)
assert.Equal(t, h.clock, input.Now)
return nil
},
),
h.store.EXPECT().Get(gomock.Any(), "v1.2.3").Return(domainengineversion.EngineVersion{
Version: "v1.2.3", ImageRef: newRef, Status: deprecated, UpdatedAt: h.clock,
}, nil),
)
got, err := h.service.Update(context.Background(), engineversion.UpdateInput{
Version: "v1.2.3",
ImageRef: &newRef,
Status: &deprecated,
})
require.NoError(t, err)
assert.Equal(t, deprecated, got.Status)
entries := h.oplog.snapshot()
require.Len(t, entries, 1)
assert.Equal(t, operation.OpKindEngineVersionUpdate, entries[0].OpKind)
assert.Equal(t, operation.OutcomeSuccess, entries[0].Outcome)
}
func TestUpdateRejectsEmptyVersion(t *testing.T) {
h := newHarness(t)
newRef := "ghcr.io/galaxy/game:v1.2.4"
_, err := h.service.Update(context.Background(), engineversion.UpdateInput{
Version: " ",
ImageRef: &newRef,
})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
}
func TestUpdateRejectsEmptyPatch(t *testing.T) {
h := newHarness(t)
_, err := h.service.Update(context.Background(), engineversion.UpdateInput{Version: "v1.2.3"})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
}
func TestUpdateRejectsBadImageRef(t *testing.T) {
h := newHarness(t)
bad := "BAD//Ref::"
_, err := h.service.Update(context.Background(), engineversion.UpdateInput{
Version: "v1.2.3",
ImageRef: &bad,
})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
}
func TestUpdateRejectsUnknownStatus(t *testing.T) {
h := newHarness(t)
bad := domainengineversion.Status("exotic")
_, err := h.service.Update(context.Background(), engineversion.UpdateInput{
Version: "v1.2.3",
Status: &bad,
})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
}
func TestUpdateRejectsBadOptions(t *testing.T) {
h := newHarness(t)
bad := []byte(`"not-an-object"`)
_, err := h.service.Update(context.Background(), engineversion.UpdateInput{
Version: "v1.2.3",
Options: &bad,
})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
}
func TestUpdateNotFound(t *testing.T) {
h := newHarness(t)
newRef := "ghcr.io/galaxy/game:v1.2.4"
h.store.EXPECT().Update(gomock.Any(), gomock.Any()).Return(domainengineversion.ErrNotFound)
_, err := h.service.Update(context.Background(), engineversion.UpdateInput{
Version: "v1.2.3",
ImageRef: &newRef,
})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrNotFound))
entries := h.oplog.snapshot()
require.Len(t, entries, 1)
assert.Equal(t, engineversion.ErrorCodeEngineVersionNotFound, entries[0].ErrorCode)
}
// --- Deprecate -------------------------------------------------------
func TestDeprecateHappyPath(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Deprecate(gomock.Any(), "v1.2.3", h.clock).Return(nil)
err := h.service.Deprecate(context.Background(), engineversion.DeprecateInput{Version: "v1.2.3"})
require.NoError(t, err)
entries := h.oplog.snapshot()
require.Len(t, entries, 1)
assert.Equal(t, operation.OpKindEngineVersionDeprecate, entries[0].OpKind)
assert.Equal(t, operation.OutcomeSuccess, entries[0].Outcome)
}
func TestDeprecateRejectsEmptyVersion(t *testing.T) {
h := newHarness(t)
err := h.service.Deprecate(context.Background(), engineversion.DeprecateInput{})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
}
func TestDeprecateNotFound(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Deprecate(gomock.Any(), "v9.9.9", h.clock).Return(domainengineversion.ErrNotFound)
err := h.service.Deprecate(context.Background(), engineversion.DeprecateInput{Version: "v9.9.9"})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrNotFound))
entries := h.oplog.snapshot()
require.Len(t, entries, 1)
assert.Equal(t, operation.OutcomeFailure, entries[0].Outcome)
assert.Equal(t, engineversion.ErrorCodeEngineVersionNotFound, entries[0].ErrorCode)
}
func TestDeprecateUnknownStoreError(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().Deprecate(gomock.Any(), "v1.2.3", h.clock).Return(errors.New("pg down"))
err := h.service.Deprecate(context.Background(), engineversion.DeprecateInput{Version: "v1.2.3"})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrServiceUnavailable))
}
// --- Delete ----------------------------------------------------------
func TestDeleteHappyPath(t *testing.T) {
h := newHarness(t)
gomock.InOrder(
h.store.EXPECT().IsReferencedByActiveRuntime(gomock.Any(), "v1.2.3").Return(false, nil),
h.store.EXPECT().Delete(gomock.Any(), "v1.2.3").Return(nil),
)
err := h.service.Delete(context.Background(), engineversion.DeleteInput{
Version: "v1.2.3",
OpSource: operation.OpSourceAdminRest,
SourceRef: "ticket-42",
})
require.NoError(t, err)
entries := h.oplog.snapshot()
require.Len(t, entries, 1)
assert.Equal(t, operation.OpKindEngineVersionDelete, entries[0].OpKind)
assert.Equal(t, operation.OutcomeSuccess, entries[0].Outcome)
assert.Equal(t, "ticket-42", entries[0].SourceRef)
}
func TestDeleteRejectsEmptyVersion(t *testing.T) {
h := newHarness(t)
err := h.service.Delete(context.Background(), engineversion.DeleteInput{})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInvalidRequest))
}
func TestDeleteRejectedWhenReferenced(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().IsReferencedByActiveRuntime(gomock.Any(), "v1.2.3").Return(true, nil)
// Delete must not be called when the row is referenced.
err := h.service.Delete(context.Background(), engineversion.DeleteInput{Version: "v1.2.3"})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrInUse))
entries := h.oplog.snapshot()
require.Len(t, entries, 1)
assert.Equal(t, operation.OutcomeFailure, entries[0].Outcome)
assert.Equal(t, engineversion.ErrorCodeEngineVersionInUse, entries[0].ErrorCode)
}
func TestDeleteIsReferencedProbeError(t *testing.T) {
h := newHarness(t)
h.store.EXPECT().IsReferencedByActiveRuntime(gomock.Any(), "v1.2.3").Return(false, errors.New("pg down"))
err := h.service.Delete(context.Background(), engineversion.DeleteInput{Version: "v1.2.3"})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrServiceUnavailable))
}
func TestDeleteNotFound(t *testing.T) {
h := newHarness(t)
gomock.InOrder(
h.store.EXPECT().IsReferencedByActiveRuntime(gomock.Any(), "v9.9.9").Return(false, nil),
h.store.EXPECT().Delete(gomock.Any(), "v9.9.9").Return(domainengineversion.ErrNotFound),
)
err := h.service.Delete(context.Background(), engineversion.DeleteInput{Version: "v9.9.9"})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrNotFound))
}
func TestDeleteUnknownStoreError(t *testing.T) {
h := newHarness(t)
gomock.InOrder(
h.store.EXPECT().IsReferencedByActiveRuntime(gomock.Any(), "v1.2.3").Return(false, nil),
h.store.EXPECT().Delete(gomock.Any(), "v1.2.3").Return(errors.New("pg down")),
)
err := h.service.Delete(context.Background(), engineversion.DeleteInput{Version: "v1.2.3"})
require.Error(t, err)
require.True(t, errors.Is(err, engineversion.ErrServiceUnavailable))
}
// --- guard rails -----------------------------------------------------
func TestNilContextReturnsError(t *testing.T) {
h := newHarness(t)
t.Run("List", func(t *testing.T) {
_, err := h.service.List(nil, nil) //nolint:staticcheck // intentional nil context
require.Error(t, err)
})
t.Run("Get", func(t *testing.T) {
_, err := h.service.Get(nil, "v1.2.3") //nolint:staticcheck // intentional nil context
require.Error(t, err)
})
t.Run("Create", func(t *testing.T) {
_, err := h.service.Create(nil, engineversion.CreateInput{}) //nolint:staticcheck // intentional nil context
require.Error(t, err)
})
t.Run("Update", func(t *testing.T) {
_, err := h.service.Update(nil, engineversion.UpdateInput{}) //nolint:staticcheck // intentional nil context
require.Error(t, err)
})
t.Run("Deprecate", func(t *testing.T) {
err := h.service.Deprecate(nil, engineversion.DeprecateInput{}) //nolint:staticcheck // intentional nil context
require.Error(t, err)
})
t.Run("Delete", func(t *testing.T) {
err := h.service.Delete(nil, engineversion.DeleteInput{}) //nolint:staticcheck // intentional nil context
require.Error(t, err)
})
}
func TestNilServiceReturnsError(t *testing.T) {
var s *engineversion.Service
_, err := s.Get(context.Background(), "v1.2.3")
require.Error(t, err)
_, err = s.Create(context.Background(), engineversion.CreateInput{})
require.Error(t, err)
}
@@ -0,0 +1,19 @@
package livenessreply
// Stable error codes returned by Handle as Go-level errors. Liveness
// reply itself never produces a 4xx/5xx response — the endpoint always
// answers 200 — but the service surfaces structural validation
// failures to the handler so it can return the standard error envelope.
const (
// ErrorCodeInvalidRequest reports that the request envelope failed
// structural validation (empty GameID).
ErrorCodeInvalidRequest = "invalid_request"
// ErrorCodeServiceUnavailable reports that a steady-state
// dependency (PostgreSQL) was unreachable for this call.
ErrorCodeServiceUnavailable = "service_unavailable"
// ErrorCodeInternal reports an unexpected error not classified by
// the other codes.
ErrorCodeInternal = "internal_error"
)
@@ -0,0 +1,114 @@
// Package livenessreply implements the Lobby-facing liveness service-
// layer answer owned by Game Master. It is driven by Game Lobby
// resuming a paused game through
// `GET /api/v1/internal/games/{game_id}/liveness` and reflects GM's
// own view of the runtime without ever calling the engine.
//
// Lifecycle and failure-mode semantics follow `gamemaster/README.md
// §Liveness reply`. The 200 / status="" response on
// `runtime_not_found` is the Stage 17 D5 decision recorded in
// `gamemaster/docs/stage17-admin-operations.md`.
package livenessreply
import (
"context"
"errors"
"fmt"
"log/slog"
"strings"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/ports"
)
// Input stores the per-call arguments for one liveness reply.
type Input struct {
// GameID identifies the runtime to inspect.
GameID string
}
// Validate reports whether input carries the structural invariants the
// service requires before any store is touched.
func (input Input) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("game id must not be empty")
}
return nil
}
// Result stores the deterministic outcome of one Handle call. The
// endpoint always answers 200; the result fields populate the JSON
// body. ErrorCode / ErrorMessage are reserved for handler-side error
// envelopes and are never set by Handle on a successful read.
type Result struct {
// Ready is true when the runtime exists and is in `running`.
Ready bool
// Status carries the observed runtime status. Empty when the
// runtime record does not exist (Stage 17 D5).
Status runtime.Status
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// RuntimeRecords supplies the runtime status read.
RuntimeRecords ports.RuntimeRecordStore
// Logger records structured service-level events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
}
// Service executes the liveness reply lookup.
type Service struct {
runtimeRecords ports.RuntimeRecordStore
logger *slog.Logger
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
if deps.RuntimeRecords == nil {
return nil, errors.New("new liveness reply service: nil runtime records")
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.livenessreply")
return &Service{
runtimeRecords: deps.RuntimeRecords,
logger: logger,
}, nil
}
// Handle executes one liveness reply lookup. The Go-level error return
// is reserved for non-business failures: nil context, nil receiver,
// invalid input (so the handler can answer `invalid_request`), or a
// store read failure (so the handler can answer `service_unavailable`).
// `runtime.ErrNotFound` is intentionally absorbed into Result with
// `Ready=false` and an empty status.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
if service == nil {
return Result{}, errors.New("liveness reply: nil service")
}
if ctx == nil {
return Result{}, errors.New("liveness reply: nil context")
}
if err := input.Validate(); err != nil {
return Result{}, fmt.Errorf("%s: %w", ErrorCodeInvalidRequest, err)
}
record, err := service.runtimeRecords.Get(ctx, input.GameID)
switch {
case err == nil:
return Result{
Ready: record.Status == runtime.StatusRunning,
Status: record.Status,
}, nil
case errors.Is(err, runtime.ErrNotFound):
return Result{Ready: false, Status: ""}, nil
default:
return Result{}, fmt.Errorf("%s: get runtime record: %w", ErrorCodeServiceUnavailable, err)
}
}
@@ -0,0 +1,175 @@
package livenessreply_test
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/livenessreply"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type fakeRuntimeRecords struct {
mu sync.Mutex
stored map[string]runtime.RuntimeRecord
getErr error
}
func newFakeRuntimeRecords() *fakeRuntimeRecords {
return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}}
}
func (s *fakeRuntimeRecords) seed(record runtime.RuntimeRecord) {
s.mu.Lock()
defer s.mu.Unlock()
s.stored[record.GameID] = record
}
func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return runtime.RuntimeRecord{}, s.getErr
}
record, ok := s.stored[gameID]
if !ok {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
return record, nil
}
func (s *fakeRuntimeRecords) Insert(context.Context, runtime.RuntimeRecord) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateStatus(context.Context, ports.UpdateStatusInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateScheduling(context.Context, ports.UpdateSchedulingInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateImage(context.Context, ports.UpdateImageInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateEngineHealth(context.Context, ports.UpdateEngineHealthInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) Delete(context.Context, string) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) ListDueRunning(context.Context, time.Time) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) ListByStatus(context.Context, runtime.Status) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) List(context.Context) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func newService(t *testing.T, store *fakeRuntimeRecords) *livenessreply.Service {
t.Helper()
service, err := livenessreply.NewService(livenessreply.Dependencies{
RuntimeRecords: store,
})
require.NoError(t, err)
return service
}
func runningRecord(gameID string) runtime.RuntimeRecord {
now := time.Date(2026, time.May, 1, 12, 0, 0, 0, time.UTC)
return runtime.RuntimeRecord{
GameID: gameID,
Status: runtime.StatusRunning,
EngineEndpoint: "http://galaxy-game-" + gameID + ":8080",
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: "0 18 * * *",
CurrentTurn: 5,
CreatedAt: now,
UpdatedAt: now,
}
}
func TestNewServiceRejectsNilRuntimeRecords(t *testing.T) {
_, err := livenessreply.NewService(livenessreply.Dependencies{})
require.Error(t, err)
}
func TestHandleRunningReturnsReadyTrue(t *testing.T) {
store := newFakeRuntimeRecords()
store.seed(runningRecord("game-001"))
service := newService(t, store)
result, err := service.Handle(context.Background(), livenessreply.Input{GameID: "game-001"})
require.NoError(t, err)
assert.True(t, result.Ready)
assert.Equal(t, runtime.StatusRunning, result.Status)
}
func TestHandleNonRunningReturnsReadyFalseWithStatus(t *testing.T) {
cases := []runtime.Status{
runtime.StatusStarting,
runtime.StatusGenerationInProgress,
runtime.StatusGenerationFailed,
runtime.StatusEngineUnreachable,
runtime.StatusStopped,
runtime.StatusFinished,
}
for _, status := range cases {
t.Run(string(status), func(t *testing.T) {
store := newFakeRuntimeRecords()
rec := runningRecord("game-001")
rec.Status = status
store.seed(rec)
service := newService(t, store)
result, err := service.Handle(context.Background(), livenessreply.Input{GameID: "game-001"})
require.NoError(t, err)
assert.False(t, result.Ready)
assert.Equal(t, status, result.Status)
})
}
}
func TestHandleRuntimeNotFoundReturnsEmptyStatus(t *testing.T) {
store := newFakeRuntimeRecords()
service := newService(t, store)
result, err := service.Handle(context.Background(), livenessreply.Input{GameID: "missing"})
require.NoError(t, err, "runtime_not_found is absorbed into 200 response per Stage 17 D5")
assert.False(t, result.Ready)
assert.Equal(t, runtime.Status(""), result.Status)
}
func TestHandleStoreReadFailureReturnsServiceUnavailable(t *testing.T) {
store := newFakeRuntimeRecords()
store.getErr = errors.New("connection refused")
service := newService(t, store)
_, err := service.Handle(context.Background(), livenessreply.Input{GameID: "game-001"})
require.Error(t, err)
assert.Contains(t, err.Error(), livenessreply.ErrorCodeServiceUnavailable)
}
func TestHandleEmptyGameIDReturnsInvalidRequest(t *testing.T) {
store := newFakeRuntimeRecords()
service := newService(t, store)
_, err := service.Handle(context.Background(), livenessreply.Input{GameID: ""})
require.Error(t, err)
assert.Contains(t, err.Error(), livenessreply.ErrorCodeInvalidRequest)
}
func TestHandleNilContextReturnsError(t *testing.T) {
store := newFakeRuntimeRecords()
service := newService(t, store)
_, err := service.Handle(nil, livenessreply.Input{GameID: "game-001"}) //nolint:staticcheck // guard test
require.Error(t, err)
}
@@ -0,0 +1,280 @@
// Package membership implements the in-process membership cache that
// authorises every hot-path call (commandexecute, orderput, reportget)
// owned by Game Master.
//
// The cache is a per-game TTL projection of Lobby's
// `/api/v1/internal/games/{game_id}/memberships` view. Lobby invokes the
// invalidation hook (`POST /api/v1/internal/games/{game_id}/memberships/invalidate`)
// post-commit on every roster mutation; the TTL is the safety net for any
// missed invalidation. Cache rules and trade-offs are documented in
// `gamemaster/README.md §Hot Path → Membership cache and invalidation` and
// `gamemaster/docs/stage16-membership-cache-and-invalidation.md`.
package membership
import (
"container/list"
"context"
"errors"
"fmt"
"log/slog"
"sync"
"time"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/telemetry"
)
// Result labels used with `telemetry.Runtime.RecordMembershipCacheResult`.
const (
resultHit = "hit"
resultMiss = "miss"
resultInvalidate = "invalidate"
)
// Dependencies groups the collaborators required by Cache.
type Dependencies struct {
// Lobby loads the per-game membership projection on cache miss.
Lobby ports.LobbyClient
// Telemetry records `gamemaster.membership_cache.hits` outcomes.
Telemetry *telemetry.Runtime
// Logger records structured cache events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for entry freshness. Defaults
// to `time.Now` when nil.
Clock func() time.Time
// TTL bounds the freshness of one cached entry; expired entries are
// re-fetched from Lobby. Must be positive.
TTL time.Duration
// MaxGames bounds the cache size in number of games. The
// least-recently-used entry is evicted when an insert overflows the
// bound. Must be positive.
MaxGames int
}
// Cache stores the per-game membership projection used by hot-path
// services. The zero value is not usable; construct with NewCache.
type Cache struct {
lobby ports.LobbyClient
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
ttl time.Duration
maxGames int
mu sync.Mutex
entries map[string]*list.Element // gameID → element holding *cacheEntry
lru *list.List // *cacheEntry, MRU at front
inflight map[string]*flight // gameID → in-flight Lobby fetch
}
// cacheEntry stores one per-game membership projection.
type cacheEntry struct {
gameID string
members map[string]string // user_id → status ("active"|"removed"|"blocked")
loadedAt time.Time
}
// flight coordinates concurrent misses on the same gameID so only one
// Lobby fetch is issued. Joiners wait on `done`; the leader populates
// `members` (or `err`) before closing the channel.
type flight struct {
done chan struct{}
members map[string]string
err error
}
// NewCache constructs a Cache from deps. Returns a Go-level error when a
// required dependency is missing or a numeric bound is non-positive.
func NewCache(deps Dependencies) (*Cache, error) {
switch {
case deps.Lobby == nil:
return nil, errors.New("new membership cache: nil lobby client")
case deps.Telemetry == nil:
return nil, errors.New("new membership cache: nil telemetry runtime")
case deps.TTL <= 0:
return nil, fmt.Errorf("new membership cache: ttl must be positive, got %s", deps.TTL)
case deps.MaxGames <= 0:
return nil, fmt.Errorf("new membership cache: max games must be positive, got %d", deps.MaxGames)
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("component", "gamemaster.membership_cache")
clock := deps.Clock
if clock == nil {
clock = time.Now
}
return &Cache{
lobby: deps.Lobby,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
ttl: deps.TTL,
maxGames: deps.MaxGames,
entries: make(map[string]*list.Element),
lru: list.New(),
inflight: make(map[string]*flight),
}, nil
}
// Resolve returns the membership status of userID inside gameID. The
// returned status is the raw Lobby vocabulary (`"active"`, `"removed"`,
// `"blocked"`) and is empty when the user is not present in the roster at
// all; callers must compare against `"active"` to authorise a hot-path
// call.
//
// Resolve fetches from Lobby on cache miss, on TTL expiry, or after an
// Invalidate. Concurrent misses on the same gameID share a single Lobby
// call. A failed Lobby fetch surfaces as ErrLobbyUnavailable and is not
// cached.
func (cache *Cache) Resolve(ctx context.Context, gameID, userID string) (string, error) {
if cache == nil {
return "", errors.New("membership cache: nil receiver")
}
if ctx == nil {
return "", errors.New("membership cache: nil context")
}
if entry, ok := cache.lookupFresh(gameID); ok {
cache.telemetry.RecordMembershipCacheResult(ctx, resultHit)
return entry.members[userID], nil
}
members, err := cache.fetch(ctx, gameID)
cache.telemetry.RecordMembershipCacheResult(ctx, resultMiss)
if err != nil {
logArgs := []any{
"game_id", gameID,
"err", err.Error(),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
cache.logger.WarnContext(ctx, "lobby fetch failed", logArgs...)
return "", err
}
return members[userID], nil
}
// Invalidate purges the cache entry for gameID, if any. Subsequent
// Resolve calls fetch from Lobby. Safe to call from the invalidation
// hook handler (Stage 19) at any time.
func (cache *Cache) Invalidate(gameID string) {
if cache == nil {
return
}
cache.mu.Lock()
if element, ok := cache.entries[gameID]; ok {
cache.lru.Remove(element)
delete(cache.entries, gameID)
}
cache.mu.Unlock()
cache.telemetry.RecordMembershipCacheResult(context.Background(), resultInvalidate)
}
// lookupFresh returns the cached entry for gameID when it exists and is
// still fresh. The MRU position is updated under the lock.
func (cache *Cache) lookupFresh(gameID string) (*cacheEntry, bool) {
cache.mu.Lock()
defer cache.mu.Unlock()
element, ok := cache.entries[gameID]
if !ok {
return nil, false
}
entry := element.Value.(*cacheEntry)
if cache.clock().Sub(entry.loadedAt) >= cache.ttl {
return nil, false
}
cache.lru.MoveToFront(element)
return entry, true
}
// fetch loads the membership projection from Lobby, deduplicating
// concurrent misses on the same gameID through the inflight map. The
// successful result is cached; failures are not.
func (cache *Cache) fetch(ctx context.Context, gameID string) (map[string]string, error) {
cache.mu.Lock()
if existing, ok := cache.inflight[gameID]; ok {
cache.mu.Unlock()
select {
case <-existing.done:
if existing.err != nil {
return nil, existing.err
}
return existing.members, nil
case <-ctx.Done():
return nil, ctx.Err()
}
}
current := &flight{done: make(chan struct{})}
cache.inflight[gameID] = current
cache.mu.Unlock()
members, err := cache.loadFromLobby(ctx, gameID)
cache.mu.Lock()
delete(cache.inflight, gameID)
if err == nil {
cache.installLocked(gameID, members)
}
cache.mu.Unlock()
if err != nil {
current.err = err
} else {
current.members = members
}
close(current.done)
if err != nil {
return nil, err
}
return members, nil
}
// loadFromLobby calls the LobbyClient and projects the raw response to
// the user_id → status map the cache stores.
func (cache *Cache) loadFromLobby(ctx context.Context, gameID string) (map[string]string, error) {
records, err := cache.lobby.GetMemberships(ctx, gameID)
if err != nil {
return nil, fmt.Errorf("%w: %w", ErrLobbyUnavailable, err)
}
members := make(map[string]string, len(records))
for _, record := range records {
members[record.UserID] = record.Status
}
return members, nil
}
// installLocked stores members under gameID, evicting the least-recently
// -used entry if the cache is at capacity. Caller must hold cache.mu.
func (cache *Cache) installLocked(gameID string, members map[string]string) {
now := cache.clock()
if element, ok := cache.entries[gameID]; ok {
entry := element.Value.(*cacheEntry)
entry.members = members
entry.loadedAt = now
cache.lru.MoveToFront(element)
return
}
entry := &cacheEntry{gameID: gameID, members: members, loadedAt: now}
cache.entries[gameID] = cache.lru.PushFront(entry)
for cache.lru.Len() > cache.maxGames {
oldest := cache.lru.Back()
if oldest == nil {
break
}
evicted := oldest.Value.(*cacheEntry)
cache.lru.Remove(oldest)
delete(cache.entries, evicted.gameID)
}
}
@@ -0,0 +1,376 @@
package membership_test
import (
"context"
"errors"
"fmt"
"sync"
"sync/atomic"
"testing"
"time"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/membership"
"galaxy/gamemaster/internal/telemetry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// fakeLobby is a hand-rolled LobbyClient stub used by membership tests.
// It mirrors the test-double style used elsewhere in the gamemaster
// service tree.
type fakeLobby struct {
mu sync.Mutex
calls atomic.Int32
answers map[string][]ports.Membership
errs map[string]error
delay time.Duration
released chan struct{}
}
func newFakeLobby() *fakeLobby {
return &fakeLobby{
answers: map[string][]ports.Membership{},
errs: map[string]error{},
}
}
func (f *fakeLobby) seed(gameID string, members []ports.Membership) {
f.mu.Lock()
defer f.mu.Unlock()
f.answers[gameID] = members
}
func (f *fakeLobby) seedErr(gameID string, err error) {
f.mu.Lock()
defer f.mu.Unlock()
f.errs[gameID] = err
}
func (f *fakeLobby) GetMemberships(ctx context.Context, gameID string) ([]ports.Membership, error) {
f.calls.Add(1)
if f.delay > 0 {
select {
case <-time.After(f.delay):
case <-ctx.Done():
return nil, ctx.Err()
}
}
if f.released != nil {
select {
case <-f.released:
case <-ctx.Done():
return nil, ctx.Err()
}
}
f.mu.Lock()
defer f.mu.Unlock()
if err, ok := f.errs[gameID]; ok {
return nil, err
}
if members, ok := f.answers[gameID]; ok {
out := make([]ports.Membership, len(members))
copy(out, members)
return out, nil
}
return []ports.Membership{}, nil
}
func (f *fakeLobby) GetGameSummary(_ context.Context, _ string) (ports.GameSummary, error) {
return ports.GameSummary{}, errors.New("not used in cache tests")
}
func newTelemetry(t *testing.T) *telemetry.Runtime {
t.Helper()
tel, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
return tel
}
func active(userID, raceName string) ports.Membership {
return ports.Membership{UserID: userID, RaceName: raceName, Status: "active", JoinedAt: time.Unix(0, 0).UTC()}
}
func newCacheForTest(t *testing.T, lobby ports.LobbyClient, ttl time.Duration, maxGames int, clock func() time.Time) *membership.Cache {
t.Helper()
cache, err := membership.NewCache(membership.Dependencies{
Lobby: lobby,
Telemetry: newTelemetry(t),
TTL: ttl,
MaxGames: maxGames,
Clock: clock,
})
require.NoError(t, err)
return cache
}
func TestNewCacheRejectsBadDependencies(t *testing.T) {
tel := newTelemetry(t)
cases := []struct {
name string
deps membership.Dependencies
}{
{"nil lobby", membership.Dependencies{Telemetry: tel, TTL: time.Second, MaxGames: 1}},
{"nil telemetry", membership.Dependencies{Lobby: newFakeLobby(), TTL: time.Second, MaxGames: 1}},
{"zero ttl", membership.Dependencies{Lobby: newFakeLobby(), Telemetry: tel, TTL: 0, MaxGames: 1}},
{"negative ttl", membership.Dependencies{Lobby: newFakeLobby(), Telemetry: tel, TTL: -time.Second, MaxGames: 1}},
{"zero max games", membership.Dependencies{Lobby: newFakeLobby(), Telemetry: tel, TTL: time.Second, MaxGames: 0}},
{"negative max games", membership.Dependencies{Lobby: newFakeLobby(), Telemetry: tel, TTL: time.Second, MaxGames: -1}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
cache, err := membership.NewCache(tc.deps)
require.Error(t, err)
assert.Nil(t, cache)
})
}
}
func TestResolveHitServesCachedEntry(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari"), active("user-2", "Drazi")})
now := time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)
clock := func() time.Time { return now }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
first, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, "active", first)
second, err := cache.Resolve(context.Background(), "game-1", "user-2")
require.NoError(t, err)
assert.Equal(t, "active", second)
assert.Equal(t, int32(1), lobby.calls.Load())
}
func TestResolveUnknownUserReturnsEmptyString(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari")})
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
status, err := cache.Resolve(context.Background(), "game-1", "ghost")
require.NoError(t, err)
assert.Empty(t, status)
}
func TestResolveTTLExpiryRefetches(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari")})
now := time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)
clockTime := now
clock := func() time.Time { return clockTime }
cache := newCacheForTest(t, lobby, 30*time.Second, 8, clock)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(1), lobby.calls.Load())
clockTime = now.Add(20 * time.Second)
_, err = cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(1), lobby.calls.Load(), "fresh entry must not refetch")
clockTime = now.Add(31 * time.Second)
_, err = cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(2), lobby.calls.Load(), "expired entry must refetch")
}
func TestInvalidatePurgesEntry(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari")})
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(1), lobby.calls.Load())
cache.Invalidate("game-1")
_, err = cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(2), lobby.calls.Load())
}
func TestInvalidateOnAbsentGameIsNoop(t *testing.T) {
lobby := newFakeLobby()
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
cache.Invalidate("missing")
}
func TestLRUEvictsOldestEntry(t *testing.T) {
lobby := newFakeLobby()
for index := range 4 {
gameID := fmt.Sprintf("game-%d", index)
lobby.seed(gameID, []ports.Membership{active("user-1", "Aelinari")})
}
now := time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)
clockTime := now
clock := func() time.Time { return clockTime }
cache := newCacheForTest(t, lobby, time.Minute, 2, clock)
// Load games 0, 1, 2 sequentially. The cache holds at most 2; game-0
// must have been evicted by the time game-2 lands.
for index := range 3 {
clockTime = now.Add(time.Duration(index) * time.Second)
_, err := cache.Resolve(context.Background(), fmt.Sprintf("game-%d", index), "user-1")
require.NoError(t, err)
}
require.Equal(t, int32(3), lobby.calls.Load())
// Re-resolving game-1 hits the cache.
clockTime = now.Add(3 * time.Second)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(3), lobby.calls.Load(), "game-1 must still be cached")
// Re-resolving game-0 misses (it was the LRU victim).
clockTime = now.Add(4 * time.Second)
_, err = cache.Resolve(context.Background(), "game-0", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(4), lobby.calls.Load(), "game-0 must have been evicted")
}
func TestResolveLobbyUnavailableSurfacesAndDoesNotCache(t *testing.T) {
lobby := newFakeLobby()
lobby.seedErr("game-1", fmt.Errorf("dial: %w", ports.ErrLobbyUnavailable))
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.Error(t, err)
assert.True(t, errors.Is(err, membership.ErrLobbyUnavailable))
assert.True(t, errors.Is(err, ports.ErrLobbyUnavailable))
_, err = cache.Resolve(context.Background(), "game-1", "user-1")
require.Error(t, err)
assert.Equal(t, int32(2), lobby.calls.Load(), "failed fetch must not be cached")
}
func TestResolveUnwrappedLobbyErrorIsStillSurfacedAsLobbyUnavailable(t *testing.T) {
lobby := newFakeLobby()
lobby.seedErr("game-1", errors.New("transport"))
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.Error(t, err)
assert.True(t, errors.Is(err, membership.ErrLobbyUnavailable))
}
func TestResolveDeduplicatesConcurrentMisses(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari")})
gate := make(chan struct{})
lobby.released = gate
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
const callers = 16
var wg sync.WaitGroup
results := make([]string, callers)
errs := make([]error, callers)
wg.Add(callers)
for index := range callers {
go func(slot int) {
defer wg.Done()
results[slot], errs[slot] = cache.Resolve(context.Background(), "game-1", "user-1")
}(index)
}
// Give all goroutines a moment to register on the inflight map
// before releasing the Lobby fetch.
time.Sleep(10 * time.Millisecond)
close(gate)
wg.Wait()
for index := range callers {
require.NoError(t, errs[index])
assert.Equal(t, "active", results[index])
}
assert.Equal(t, int32(1), lobby.calls.Load(), "concurrent misses must collapse to one Lobby call")
}
func TestResolveRespectsContextCancellation(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari")})
gate := make(chan struct{})
lobby.released = gate
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
leaderDone := make(chan struct{})
go func() {
defer close(leaderDone)
_, _ = cache.Resolve(context.Background(), "game-1", "user-1")
}()
// Wait for leader to register the inflight slot.
time.Sleep(10 * time.Millisecond)
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := cache.Resolve(ctx, "game-1", "user-1")
require.Error(t, err)
assert.True(t, errors.Is(err, context.Canceled))
close(gate)
<-leaderDone
}
func TestResolveRefreshAfterErrorReturnsSuccess(t *testing.T) {
lobby := newFakeLobby()
lobby.seedErr("game-1", errors.New("transport"))
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.Error(t, err)
lobby.mu.Lock()
delete(lobby.errs, "game-1")
lobby.answers["game-1"] = []ports.Membership{active("user-1", "Aelinari")}
lobby.mu.Unlock()
status, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, "active", status)
}
func TestResolveRejectsNilContextAndReceiver(t *testing.T) {
lobby := newFakeLobby()
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
var nilCtx context.Context
_, err := cache.Resolve(nilCtx, "game-1", "user-1")
require.Error(t, err)
var nilCache *membership.Cache
_, err = nilCache.Resolve(context.Background(), "game-1", "user-1")
require.Error(t, err)
}
func TestStatusFromLobbyIsPreserved(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{
{UserID: "user-1", RaceName: "Aelinari", Status: "active", JoinedAt: time.Unix(0, 0).UTC()},
{UserID: "user-2", RaceName: "Drazi", Status: "removed", JoinedAt: time.Unix(0, 0).UTC()},
{UserID: "user-3", RaceName: "Vorlons", Status: "blocked", JoinedAt: time.Unix(0, 0).UTC()},
})
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
for userID, expected := range map[string]string{"user-1": "active", "user-2": "removed", "user-3": "blocked"} {
status, err := cache.Resolve(context.Background(), "game-1", userID)
require.NoError(t, err)
assert.Equal(t, expected, status, "user %s", userID)
}
}
@@ -0,0 +1,13 @@
package membership
import "errors"
// ErrLobbyUnavailable signals that a Resolve call could not be completed
// because the upstream Lobby service was unreachable. The cache wraps
// `ports.ErrLobbyUnavailable` returned by the LobbyClient adapter; hot-path
// services map this sentinel to `service_unavailable`.
//
// Callers branch with errors.Is. Returned only on cache miss / TTL expiry
// when the Lobby fetch fails; cached entries are served regardless of
// upstream availability until the TTL elapses.
var ErrLobbyUnavailable = errors.New("membership cache: lobby unavailable")
@@ -0,0 +1,49 @@
package orderput
// Stable error codes returned in `Result.ErrorCode`. The values match the
// vocabulary frozen by `gamemaster/README.md §Error Model` and
// `gamemaster/api/internal-openapi.yaml`. Stage 19's REST handler imports
// these names rather than redeclare them; renaming any of them is a
// contract change.
const (
// ErrorCodeInvalidRequest reports that the request envelope failed
// structural validation (empty required field, malformed payload,
// non-object payload, payload missing the `commands` array).
ErrorCodeInvalidRequest = "invalid_request"
// ErrorCodeRuntimeNotFound reports that no `runtime_records` row
// exists for the requested game id.
ErrorCodeRuntimeNotFound = "runtime_not_found"
// ErrorCodeRuntimeNotRunning reports that the runtime exists but its
// current status is not `running`. Hot-path orders are rejected
// outside the running state to avoid racing with admin transitions
// and turn generation.
ErrorCodeRuntimeNotRunning = "runtime_not_running"
// ErrorCodeForbidden reports that the caller is not an active member
// of the game, or that the (game_id, user_id) pair lacks a player
// mapping.
ErrorCodeForbidden = "forbidden"
// ErrorCodeEngineUnreachable reports that the engine /api/v1/order
// call returned a 5xx status, timed out, or could not be dispatched.
ErrorCodeEngineUnreachable = "engine_unreachable"
// ErrorCodeEngineValidationError reports that the engine returned
// 4xx with a per-command result. The body is forwarded verbatim
// through `Result.RawResponse`.
ErrorCodeEngineValidationError = "engine_validation_error"
// ErrorCodeEngineProtocolViolation reports that the engine response
// did not match the expected schema. Stage 19 maps this to 502.
ErrorCodeEngineProtocolViolation = "engine_protocol_violation"
// ErrorCodeServiceUnavailable reports that a steady-state dependency
// (PostgreSQL, Lobby) was unreachable for this call.
ErrorCodeServiceUnavailable = "service_unavailable"
// ErrorCodeInternal reports an unexpected error not classified by
// the other codes.
ErrorCodeInternal = "internal_error"
)
@@ -0,0 +1,361 @@
// Package orderput implements the player-order hot-path service owned by
// Game Master. It accepts a verified `(game_id, user_id, payload)`
// envelope from Edge Gateway, authorises the caller against the membership
// cache, resolves `actor=race_name` from `player_mappings`, reshapes the
// payload to the engine `CommandRequest{actor, cmd}` schema, and forwards
// the call to the engine `/api/v1/order` endpoint.
//
// Lifecycle and error semantics follow `gamemaster/README.md §Hot Path →
// Player commands and orders`. Design rationale is captured in
// `gamemaster/docs/stage16-membership-cache-and-invalidation.md`.
package orderput
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/membership"
"galaxy/gamemaster/internal/telemetry"
)
const (
engineCallOp = "order"
membershipStatusActive = "active"
payloadCommandsKey = "commands"
payloadCmdKey = "cmd"
payloadActorKey = "actor"
)
// Input stores the per-call arguments for one order-put operation. The
// shape mirrors `PutOrdersRequest` from
// `gamemaster/api/internal-openapi.yaml` plus the verified user identity
// captured from the `X-User-ID` header by the Stage 19 handler.
type Input struct {
// GameID identifies the platform game the order targets.
GameID string
// UserID identifies the platform user submitting the order. The
// service derives `actor=race_name` from this value via
// `player_mappings`.
UserID string
// Payload stores the raw `PutOrdersRequest` body. The service
// rewrites it to the engine `CommandRequest{actor, cmd}` shape
// before forwarding.
Payload json.RawMessage
}
// Validate reports whether input carries the structural invariants the
// service requires before any store is touched.
func (input Input) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("game id must not be empty")
}
if strings.TrimSpace(input.UserID) == "" {
return fmt.Errorf("user id must not be empty")
}
if len(input.Payload) == 0 {
return fmt.Errorf("payload must not be empty")
}
return nil
}
// Result stores the deterministic outcome of one Handle call.
type Result struct {
// Outcome reports whether the operation completed (success) or
// produced a stable failure code.
Outcome operation.Outcome
// ErrorCode stores the stable error code on failure. Empty on
// success.
ErrorCode string
// ErrorMessage stores the operator-readable detail on failure.
// Empty on success.
ErrorMessage string
// RawResponse stores the engine response body. Populated on success
// and on `engine_validation_error`. Empty on every other terminal
// branch.
RawResponse json.RawMessage
}
// IsSuccess reports whether the result represents a successful operation.
func (result Result) IsSuccess() bool {
return result.Outcome == operation.OutcomeSuccess
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// RuntimeRecords loads the engine endpoint and the runtime status.
RuntimeRecords ports.RuntimeRecordStore
// PlayerMappings resolves `(game_id, user_id) → race_name`.
PlayerMappings ports.PlayerMappingStore
// Membership authorises the caller. Hot-path services share one
// cache instance with `commandexecute` and `reportget`.
Membership *membership.Cache
// Engine forwards the reshaped payload to `/api/v1/order`.
Engine ports.EngineClient
// Telemetry records the per-outcome counter and the engine-call
// latency histogram.
Telemetry *telemetry.Runtime
// Logger records structured service-level events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for engine-call latency.
// Defaults to `time.Now` when nil.
Clock func() time.Time
}
// Service executes the order-put hot-path operation.
type Service struct {
runtimeRecords ports.RuntimeRecordStore
playerMappings ports.PlayerMappingStore
membership *membership.Cache
engine ports.EngineClient
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
switch {
case deps.RuntimeRecords == nil:
return nil, errors.New("new order put service: nil runtime records")
case deps.PlayerMappings == nil:
return nil, errors.New("new order put service: nil player mappings")
case deps.Membership == nil:
return nil, errors.New("new order put service: nil membership cache")
case deps.Engine == nil:
return nil, errors.New("new order put service: nil engine client")
case deps.Telemetry == nil:
return nil, errors.New("new order put service: nil telemetry runtime")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.orderput")
return &Service{
runtimeRecords: deps.RuntimeRecords,
playerMappings: deps.PlayerMappings,
membership: deps.Membership,
engine: deps.Engine,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
}, nil
}
// Handle executes one order-put operation end-to-end. The Go-level error
// return is reserved for non-business failures (nil context, nil
// receiver). Every business outcome flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
if service == nil {
return Result{}, errors.New("order put: nil service")
}
if ctx == nil {
return Result{}, errors.New("order put: nil context")
}
if err := input.Validate(); err != nil {
return service.recordFailure(ctx, input, ErrorCodeInvalidRequest, err.Error(), nil), nil
}
record, result, ok := service.loadRecord(ctx, input)
if !ok {
return result, nil
}
if record.Status != runtime.StatusRunning {
message := fmt.Sprintf("runtime status is %q, expected %q", record.Status, runtime.StatusRunning)
return service.recordFailure(ctx, input, ErrorCodeRuntimeNotRunning, message, nil), nil
}
mapping, result, ok := service.authorise(ctx, input)
if !ok {
return result, nil
}
payload, err := rewriteOrderPayload(input.Payload, mapping.RaceName)
if err != nil {
return service.recordFailure(ctx, input, ErrorCodeInvalidRequest, err.Error(), nil), nil
}
body, engineErr := service.callEngine(ctx, record.EngineEndpoint, payload)
if engineErr != nil {
errorCode := classifyEngineError(engineErr)
message := fmt.Sprintf("engine order: %s", engineErr.Error())
var bodyForCaller json.RawMessage
if errorCode == ErrorCodeEngineValidationError {
bodyForCaller = body
}
return service.recordFailure(ctx, input, errorCode, message, bodyForCaller), nil
}
service.telemetry.RecordOrderPutOutcome(ctx,
string(operation.OutcomeSuccess), "")
logArgs := []any{
"game_id", input.GameID,
"user_id", input.UserID,
"actor", mapping.RaceName,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "order put succeeded", logArgs...)
return Result{
Outcome: operation.OutcomeSuccess,
RawResponse: body,
}, nil
}
// loadRecord reads the runtime record and maps store errors to
// orchestrator outcomes. ok=false means the flow stops with the returned
// Result.
func (service *Service) loadRecord(ctx context.Context, input Input) (runtime.RuntimeRecord, Result, bool) {
record, err := service.runtimeRecords.Get(ctx, input.GameID)
switch {
case err == nil:
return record, Result{}, true
case errors.Is(err, runtime.ErrNotFound):
return runtime.RuntimeRecord{}, service.recordFailure(ctx, input,
ErrorCodeRuntimeNotFound, "runtime record does not exist", nil), false
default:
return runtime.RuntimeRecord{}, service.recordFailure(ctx, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get runtime record: %s", err.Error()), nil), false
}
}
// authorise resolves the membership status and the player mapping for
// the caller. ok=false means the flow stops with the returned Result.
func (service *Service) authorise(ctx context.Context, input Input) (playermapping.PlayerMapping, Result, bool) {
status, err := service.membership.Resolve(ctx, input.GameID, input.UserID)
if err != nil {
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("resolve membership: %s", err.Error()), nil), false
}
if status != membershipStatusActive {
message := fmt.Sprintf("membership status %q does not authorise orders", status)
if status == "" {
message = "user is not a member of the game"
}
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeForbidden, message, nil), false
}
mapping, err := service.playerMappings.Get(ctx, input.GameID, input.UserID)
switch {
case err == nil:
return mapping, Result{}, true
case errors.Is(err, playermapping.ErrNotFound):
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeForbidden, "player mapping not installed for active member", nil), false
default:
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get player mapping: %s", err.Error()), nil), false
}
}
// callEngine forwards the reshaped payload to the engine and records the
// wall-clock latency under the `order` op label.
func (service *Service) callEngine(ctx context.Context, baseURL string, payload json.RawMessage) (json.RawMessage, error) {
start := service.clock()
body, err := service.engine.PutOrders(ctx, baseURL, payload)
service.telemetry.RecordEngineCall(ctx, engineCallOp, service.clock().Sub(start))
return body, err
}
// classifyEngineError maps the engine port sentinels to the order-put
// stable error codes.
func classifyEngineError(err error) string {
switch {
case errors.Is(err, ports.ErrEngineValidation):
return ErrorCodeEngineValidationError
case errors.Is(err, ports.ErrEngineProtocolViolation):
return ErrorCodeEngineProtocolViolation
case errors.Is(err, ports.ErrEngineUnreachable):
return ErrorCodeEngineUnreachable
default:
return ErrorCodeEngineUnreachable
}
}
// recordFailure emits the service-level outcome counter and a structured
// log entry, then returns the Result the caller surfaces.
func (service *Service) recordFailure(ctx context.Context, input Input, errorCode, errorMessage string, rawResponse json.RawMessage) Result {
service.telemetry.RecordOrderPutOutcome(ctx,
string(operation.OutcomeFailure), errorCode)
logArgs := []any{
"game_id", input.GameID,
"user_id", input.UserID,
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "order put rejected", logArgs...)
return Result{
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
RawResponse: rawResponse,
}
}
// rewriteOrderPayload reshapes the GM `PutOrdersRequest` body
// (`{commands:[…]}`) to the engine `CommandRequest` body
// (`{actor:<raceName>, cmd:[…]}`). Every other top-level key is
// discarded; GM never trusts caller-supplied envelope fields per the
// README §Hot Path rule. Returns an error when the payload is not a JSON
// object or the `commands` field is missing or not an array.
func rewriteOrderPayload(payload json.RawMessage, raceName string) (json.RawMessage, error) {
var fields map[string]json.RawMessage
if err := json.Unmarshal(payload, &fields); err != nil {
return nil, fmt.Errorf("payload must decode as a JSON object: %w", err)
}
commands, ok := fields[payloadCommandsKey]
if !ok {
return nil, fmt.Errorf("payload missing required %q field", payloadCommandsKey)
}
var commandList []json.RawMessage
if err := json.Unmarshal(commands, &commandList); err != nil {
return nil, fmt.Errorf("payload %q field must decode as an array: %w", payloadCommandsKey, err)
}
actor, err := json.Marshal(raceName)
if err != nil {
return nil, fmt.Errorf("marshal actor: %w", err)
}
out := map[string]json.RawMessage{
payloadActorKey: actor,
payloadCmdKey: commands,
}
encoded, err := json.Marshal(out)
if err != nil {
return nil, fmt.Errorf("marshal engine payload: %w", err)
}
_ = commandList // ensure the array shape is validated before forwarding
return encoded, nil
}
@@ -0,0 +1,600 @@
package orderput_test
import (
"context"
"encoding/json"
"errors"
"fmt"
"sync"
"testing"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/membership"
"galaxy/gamemaster/internal/service/orderput"
"galaxy/gamemaster/internal/telemetry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// --- fakes ------------------------------------------------------------
type fakeRuntimeRecords struct {
mu sync.Mutex
stored map[string]runtime.RuntimeRecord
getErr error
}
func newFakeRuntimeRecords() *fakeRuntimeRecords {
return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}}
}
func (s *fakeRuntimeRecords) seed(record runtime.RuntimeRecord) {
s.mu.Lock()
defer s.mu.Unlock()
s.stored[record.GameID] = record
}
func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return runtime.RuntimeRecord{}, s.getErr
}
record, ok := s.stored[gameID]
if !ok {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
return record, nil
}
func (s *fakeRuntimeRecords) Insert(context.Context, runtime.RuntimeRecord) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateStatus(context.Context, ports.UpdateStatusInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateScheduling(context.Context, ports.UpdateSchedulingInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) ListDueRunning(context.Context, time.Time) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) ListByStatus(context.Context, runtime.Status) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) List(context.Context) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateImage(context.Context, ports.UpdateImageInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateEngineHealth(context.Context, ports.UpdateEngineHealthInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) Delete(context.Context, string) error {
return errors.New("not used")
}
type fakePlayerMappings struct {
mu sync.Mutex
stored map[string]map[string]playermapping.PlayerMapping
getErr error
}
func newFakePlayerMappings() *fakePlayerMappings {
return &fakePlayerMappings{stored: map[string]map[string]playermapping.PlayerMapping{}}
}
func (s *fakePlayerMappings) seed(record playermapping.PlayerMapping) {
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.stored[record.GameID]; !ok {
s.stored[record.GameID] = map[string]playermapping.PlayerMapping{}
}
s.stored[record.GameID][record.UserID] = record
}
func (s *fakePlayerMappings) Get(_ context.Context, gameID, userID string) (playermapping.PlayerMapping, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return playermapping.PlayerMapping{}, s.getErr
}
record, ok := s.stored[gameID][userID]
if !ok {
return playermapping.PlayerMapping{}, playermapping.ErrNotFound
}
return record, nil
}
func (s *fakePlayerMappings) BulkInsert(context.Context, []playermapping.PlayerMapping) error {
return errors.New("not used")
}
func (s *fakePlayerMappings) GetByRace(context.Context, string, string) (playermapping.PlayerMapping, error) {
return playermapping.PlayerMapping{}, errors.New("not used")
}
func (s *fakePlayerMappings) ListByGame(context.Context, string) ([]playermapping.PlayerMapping, error) {
return nil, errors.New("not used")
}
func (s *fakePlayerMappings) DeleteByGame(context.Context, string) error {
return errors.New("not used")
}
type recordedCall struct {
baseURL string
payload json.RawMessage
}
type fakeEngine struct {
mu sync.Mutex
body json.RawMessage
err error
calls []recordedCall
}
func (f *fakeEngine) PutOrders(_ context.Context, baseURL string, payload json.RawMessage) (json.RawMessage, error) {
f.mu.Lock()
defer f.mu.Unlock()
stored := append(json.RawMessage(nil), payload...)
f.calls = append(f.calls, recordedCall{baseURL: baseURL, payload: stored})
return f.body, f.err
}
func (f *fakeEngine) Init(context.Context, string, ports.InitRequest) (ports.StateResponse, error) {
return ports.StateResponse{}, errors.New("not used")
}
func (f *fakeEngine) Status(context.Context, string) (ports.StateResponse, error) {
return ports.StateResponse{}, errors.New("not used")
}
func (f *fakeEngine) Turn(context.Context, string) (ports.StateResponse, error) {
return ports.StateResponse{}, errors.New("not used")
}
func (f *fakeEngine) BanishRace(context.Context, string, string) error {
return errors.New("not used")
}
func (f *fakeEngine) ExecuteCommands(context.Context, string, json.RawMessage) (json.RawMessage, error) {
return nil, errors.New("not used")
}
func (f *fakeEngine) GetReport(context.Context, string, string, int) (json.RawMessage, error) {
return nil, errors.New("not used")
}
type fakeLobby struct {
mu sync.Mutex
answers map[string][]ports.Membership
errs map[string]error
}
func newFakeLobby() *fakeLobby {
return &fakeLobby{
answers: map[string][]ports.Membership{},
errs: map[string]error{},
}
}
func (f *fakeLobby) seed(gameID string, members []ports.Membership) {
f.mu.Lock()
defer f.mu.Unlock()
f.answers[gameID] = members
}
func (f *fakeLobby) seedErr(gameID string, err error) {
f.mu.Lock()
defer f.mu.Unlock()
f.errs[gameID] = err
}
func (f *fakeLobby) GetMemberships(_ context.Context, gameID string) ([]ports.Membership, error) {
f.mu.Lock()
defer f.mu.Unlock()
if err, ok := f.errs[gameID]; ok {
return nil, err
}
return append([]ports.Membership(nil), f.answers[gameID]...), nil
}
func (f *fakeLobby) GetGameSummary(context.Context, string) (ports.GameSummary, error) {
return ports.GameSummary{}, errors.New("not used")
}
// --- harness ----------------------------------------------------------
type harness struct {
t *testing.T
now time.Time
runtimes *fakeRuntimeRecords
mappings *fakePlayerMappings
engine *fakeEngine
lobby *fakeLobby
cache *membership.Cache
service *orderput.Service
}
const (
testGameID = "game-001"
testUserID = "user-1"
testRaceName = "Aelinari"
testEngineEndpoint = "http://galaxy-game-game-001:8080"
)
func newHarness(t *testing.T) *harness {
t.Helper()
tel, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
now := time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)
h := &harness{
t: t,
now: now,
runtimes: newFakeRuntimeRecords(),
mappings: newFakePlayerMappings(),
engine: &fakeEngine{},
lobby: newFakeLobby(),
}
cache, err := membership.NewCache(membership.Dependencies{
Lobby: h.lobby,
Telemetry: tel,
TTL: time.Minute,
MaxGames: 16,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.cache = cache
svc, err := orderput.NewService(orderput.Dependencies{
RuntimeRecords: h.runtimes,
PlayerMappings: h.mappings,
Membership: h.cache,
Engine: h.engine,
Telemetry: tel,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.service = svc
return h
}
func (h *harness) seedRunningRecord() {
startedAt := h.now.Add(-1 * time.Hour)
h.runtimes.seed(runtime.RuntimeRecord{
GameID: testGameID,
Status: runtime.StatusRunning,
EngineEndpoint: testEngineEndpoint,
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: "0 18 * * *",
EngineHealth: "healthy",
CreatedAt: h.now.Add(-2 * time.Hour),
UpdatedAt: h.now.Add(-2 * time.Hour),
StartedAt: &startedAt,
})
}
func (h *harness) seedActiveMembership() {
h.lobby.seed(testGameID, []ports.Membership{{
UserID: testUserID,
RaceName: testRaceName,
Status: "active",
JoinedAt: h.now.Add(-2 * time.Hour),
}})
}
func (h *harness) seedPlayerMapping() {
h.mappings.seed(playermapping.PlayerMapping{
GameID: testGameID,
UserID: testUserID,
RaceName: testRaceName,
EnginePlayerUUID: "uuid-1",
CreatedAt: h.now.Add(-2 * time.Hour),
})
}
func (h *harness) inputWithCommands(payload string) orderput.Input {
return orderput.Input{
GameID: testGameID,
UserID: testUserID,
Payload: json.RawMessage(payload),
}
}
func basicOrdersPayload() string {
return `{"commands":[{"@type":"BUILD_SHIP","cmdId":"00000000-0000-0000-0000-000000000001"}]}`
}
// --- tests ------------------------------------------------------------
func TestNewServiceRejectsBadDependencies(t *testing.T) {
tel, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
cache, err := membership.NewCache(membership.Dependencies{
Lobby: newFakeLobby(), Telemetry: tel, TTL: time.Minute, MaxGames: 1,
})
require.NoError(t, err)
cases := []struct {
name string
deps orderput.Dependencies
}{
{"nil runtime records", orderput.Dependencies{PlayerMappings: newFakePlayerMappings(), Membership: cache, Engine: &fakeEngine{}, Telemetry: tel}},
{"nil player mappings", orderput.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), Membership: cache, Engine: &fakeEngine{}, Telemetry: tel}},
{"nil membership", orderput.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), PlayerMappings: newFakePlayerMappings(), Engine: &fakeEngine{}, Telemetry: tel}},
{"nil engine", orderput.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), PlayerMappings: newFakePlayerMappings(), Membership: cache, Telemetry: tel}},
{"nil telemetry", orderput.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), PlayerMappings: newFakePlayerMappings(), Membership: cache, Engine: &fakeEngine{}}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
svc, err := orderput.NewService(tc.deps)
require.Error(t, err)
assert.Nil(t, svc)
})
}
}
func TestHandleHappyPath(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.body = json.RawMessage(`{"results":[{"cmd_id":"00000000-0000-0000-0000-000000000001","cmd_applied":true}]}`)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicOrdersPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeSuccess, result.Outcome)
assert.JSONEq(t, string(h.engine.body), string(result.RawResponse))
require.Len(t, h.engine.calls, 1)
assert.Equal(t, testEngineEndpoint, h.engine.calls[0].baseURL)
var sentToEngine map[string]json.RawMessage
require.NoError(t, json.Unmarshal(h.engine.calls[0].payload, &sentToEngine))
assert.Contains(t, sentToEngine, "actor")
assert.Contains(t, sentToEngine, "cmd")
assert.NotContains(t, sentToEngine, "commands", "GM must rewrite the field name")
var actor string
require.NoError(t, json.Unmarshal(sentToEngine["actor"], &actor))
assert.Equal(t, testRaceName, actor)
}
func TestHandleHappyPathDoesNotTrustCallerActor(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.body = json.RawMessage(`{}`)
payload := `{"actor":"Hacker","commands":[{"@type":"BUILD_SHIP","cmdId":"00000000-0000-0000-0000-000000000001"}]}`
result, err := h.service.Handle(context.Background(), h.inputWithCommands(payload))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeSuccess, result.Outcome)
var sentToEngine map[string]json.RawMessage
require.NoError(t, json.Unmarshal(h.engine.calls[0].payload, &sentToEngine))
var actor string
require.NoError(t, json.Unmarshal(sentToEngine["actor"], &actor))
assert.Equal(t, testRaceName, actor, "GM must override caller-supplied actor")
}
func TestHandleInvalidRequest(t *testing.T) {
cases := []struct {
name string
input orderput.Input
message string
}{
{"empty game id", orderput.Input{UserID: testUserID, Payload: json.RawMessage(basicOrdersPayload())}, "game id"},
{"empty user id", orderput.Input{GameID: testGameID, Payload: json.RawMessage(basicOrdersPayload())}, "user id"},
{"empty payload", orderput.Input{GameID: testGameID, UserID: testUserID}, "payload"},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
result, err := h.service.Handle(context.Background(), tc.input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeInvalidRequest, result.ErrorCode)
assert.Contains(t, result.ErrorMessage, tc.message)
})
}
}
func TestHandleMalformedPayload(t *testing.T) {
cases := []struct {
name string
payload string
}{
{"non-object", `[1,2,3]`},
{"missing commands", `{"orders":[]}`},
{"commands not array", `{"commands":"oops"}`},
{"non-json", `not json`},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
result, err := h.service.Handle(context.Background(), h.inputWithCommands(tc.payload))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeInvalidRequest, result.ErrorCode)
assert.Empty(t, h.engine.calls)
})
}
}
func TestHandleRuntimeNotFound(t *testing.T) {
h := newHarness(t)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicOrdersPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeRuntimeNotFound, result.ErrorCode)
}
func TestHandleRuntimeStoreError(t *testing.T) {
h := newHarness(t)
h.runtimes.getErr = errors.New("postgres down")
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicOrdersPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleRuntimeNotRunning(t *testing.T) {
for _, status := range []runtime.Status{
runtime.StatusStarting,
runtime.StatusGenerationInProgress,
runtime.StatusGenerationFailed,
runtime.StatusStopped,
runtime.StatusEngineUnreachable,
runtime.StatusFinished,
} {
t.Run(string(status), func(t *testing.T) {
h := newHarness(t)
startedAt := h.now.Add(-1 * time.Hour)
finishedAt := h.now
record := runtime.RuntimeRecord{
GameID: testGameID,
Status: status,
EngineEndpoint: testEngineEndpoint,
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: "0 18 * * *",
CreatedAt: h.now.Add(-2 * time.Hour),
UpdatedAt: h.now.Add(-2 * time.Hour),
}
if status != runtime.StatusStarting {
record.StartedAt = &startedAt
}
if status == runtime.StatusStopped {
record.StoppedAt = &finishedAt
}
if status == runtime.StatusFinished {
record.FinishedAt = &finishedAt
}
h.runtimes.seed(record)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicOrdersPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeRuntimeNotRunning, result.ErrorCode)
assert.Empty(t, h.engine.calls)
})
}
}
func TestHandleForbiddenInactiveMembership(t *testing.T) {
cases := []struct {
name string
members []ports.Membership
}{
{"removed", []ports.Membership{{UserID: testUserID, RaceName: testRaceName, Status: "removed"}}},
{"blocked", []ports.Membership{{UserID: testUserID, RaceName: testRaceName, Status: "blocked"}}},
{"unknown user", []ports.Membership{{UserID: "ghost", RaceName: "Ghost", Status: "active"}}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedPlayerMapping()
h.lobby.seed(testGameID, tc.members)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicOrdersPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeForbidden, result.ErrorCode)
assert.Empty(t, h.engine.calls)
})
}
}
func TestHandleForbiddenMissingPlayerMapping(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicOrdersPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeForbidden, result.ErrorCode)
assert.Empty(t, h.engine.calls)
}
func TestHandleServiceUnavailableLobbyDown(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedPlayerMapping()
h.lobby.seedErr(testGameID, fmt.Errorf("dial: %w", ports.ErrLobbyUnavailable))
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicOrdersPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleServiceUnavailablePlayerMappingsError(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.mappings.getErr = errors.New("postgres down")
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicOrdersPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleEngineUnreachable(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.err = fmt.Errorf("dial: %w", ports.ErrEngineUnreachable)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicOrdersPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeEngineUnreachable, result.ErrorCode)
}
func TestHandleEngineValidationErrorForwardsBody(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.body = json.RawMessage(`{"results":[{"cmd_id":"x","cmd_error_code":"INVALID_TARGET"}]}`)
h.engine.err = fmt.Errorf("400: %w", ports.ErrEngineValidation)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicOrdersPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeEngineValidationError, result.ErrorCode)
assert.JSONEq(t, string(h.engine.body), string(result.RawResponse))
}
func TestHandleEngineProtocolViolation(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord()
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.err = fmt.Errorf("garbled: %w", ports.ErrEngineProtocolViolation)
result, err := h.service.Handle(context.Background(), h.inputWithCommands(basicOrdersPayload()))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, orderput.ErrorCodeEngineProtocolViolation, result.ErrorCode)
}
func TestHandleNilContext(t *testing.T) {
h := newHarness(t)
var nilCtx context.Context
_, err := h.service.Handle(nilCtx, h.inputWithCommands(basicOrdersPayload()))
require.Error(t, err)
}
func TestHandleNilReceiver(t *testing.T) {
var svc *orderput.Service
_, err := svc.Handle(context.Background(), orderput.Input{})
require.Error(t, err)
}
@@ -0,0 +1,50 @@
package registerruntime
// Stable error codes returned in `Result.ErrorCode`. The values match the
// vocabulary frozen by `gamemaster/README.md §Error Model` and
// `gamemaster/api/internal-openapi.yaml`. Service-layer stages 14-17
// import these names rather than redeclare them; renaming any of them is
// a contract change.
const (
// ErrorCodeInvalidRequest reports that the request envelope failed
// structural validation (empty required fields, unknown enum values,
// malformed turn schedule).
ErrorCodeInvalidRequest = "invalid_request"
// ErrorCodeConflict reports that a runtime record already exists for
// the requested game id (idempotent re-registration not supported in
// v1) or that a CAS guard failed mid-flow because the row changed
// concurrently.
ErrorCodeConflict = "conflict"
// ErrorCodeEngineVersionNotFound reports that the requested
// `target_engine_version` is not present in the engine_versions
// registry. Returned before any engine call is attempted.
ErrorCodeEngineVersionNotFound = "engine_version_not_found"
// ErrorCodeEngineUnreachable reports that the engine /admin/init call
// returned a 5xx status, timed out, or could not be dispatched. The
// runtime_records and player_mappings rows are rolled back before
// the error reaches the caller.
ErrorCodeEngineUnreachable = "engine_unreachable"
// ErrorCodeEngineValidationError reports that the engine /admin/init
// call returned a 4xx status. Distinguished from
// `engine_unreachable` so the operator knows the engine is
// reachable but rejected the request shape (per Stage 13 D1).
ErrorCodeEngineValidationError = "engine_validation_error"
// ErrorCodeEngineProtocolViolation reports that the engine response
// did not match the expected schema or did not match the input
// roster (player count mismatch, race-name set mismatch, missing
// required fields).
ErrorCodeEngineProtocolViolation = "engine_protocol_violation"
// ErrorCodeServiceUnavailable reports that a steady-state dependency
// (PostgreSQL, Redis) was unreachable for this call.
ErrorCodeServiceUnavailable = "service_unavailable"
// ErrorCodeInternal reports an unexpected error not classified by the
// other codes.
ErrorCodeInternal = "internal_error"
)
@@ -0,0 +1,726 @@
// Package registerruntime implements the register-runtime service-layer
// orchestrator owned by Game Master. The service is the single entry
// point Game Lobby uses (after Runtime Manager has reported a successful
// container start) to install a freshly-started game in Game Master.
//
// Lifecycle and failure-mode semantics follow `gamemaster/README.md
// §Lifecycles → Register-runtime`. Design rationale is captured in
// `gamemaster/docs/stage13-register-runtime.md`.
package registerruntime
import (
"context"
"errors"
"fmt"
"log/slog"
"sort"
"strings"
"time"
"galaxy/gamemaster/internal/domain/engineversion"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/domain/schedule"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/telemetry"
)
// Member stores one entry of Input.Members. The shape mirrors
// `RegisterRuntimeMember` in `gamemaster/api/internal-openapi.yaml`.
type Member struct {
// UserID identifies an active platform member of the game.
UserID string
// RaceName stores the race name reserved for the member by Game
// Lobby. Used both to build the engine /admin/init roster and to
// resolve the engine response back to user_id.
RaceName string
}
// Input stores the per-call arguments for one register-runtime
// operation. The shape mirrors `RegisterRuntimeRequest` plus the
// audit-only OpSource / SourceRef pair.
type Input struct {
// GameID identifies the platform game whose runtime is being
// registered.
GameID string
// EngineEndpoint stores the engine container URL Game Master uses
// for every subsequent call against the runtime
// (`http://galaxy-game-{game_id}:8080`).
EngineEndpoint string
// Members stores the per-active-member roster Game Lobby committed
// when the platform game opened. Must be non-empty.
Members []Member
// TargetEngineVersion stores the semver under which Runtime Manager
// started the container. Resolved against the engine_versions
// registry to recover the matching image_ref.
TargetEngineVersion string
// TurnSchedule stores the five-field cron expression governing turn
// generation, copied from the platform game record.
TurnSchedule string
// OpSource classifies how the request entered Game Master. Required:
// every operation_log entry carries an op_source.
OpSource operation.OpSource
// SourceRef stores the optional opaque per-source reference (request
// id, admin user id). Empty when the caller does not provide one.
SourceRef string
}
// Validate reports whether input carries the structural invariants the
// service requires before any store is touched.
func (input Input) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("game id must not be empty")
}
if strings.TrimSpace(input.EngineEndpoint) == "" {
return fmt.Errorf("engine endpoint must not be empty")
}
if len(input.Members) == 0 {
return fmt.Errorf("members must not be empty")
}
for index, member := range input.Members {
if strings.TrimSpace(member.UserID) == "" {
return fmt.Errorf("members[%d]: user id must not be empty", index)
}
if strings.TrimSpace(member.RaceName) == "" {
return fmt.Errorf("members[%d]: race name must not be empty", index)
}
}
if strings.TrimSpace(input.TargetEngineVersion) == "" {
return fmt.Errorf("target engine version must not be empty")
}
if strings.TrimSpace(input.TurnSchedule) == "" {
return fmt.Errorf("turn schedule must not be empty")
}
if !input.OpSource.IsKnown() {
return fmt.Errorf("op source %q is unsupported", input.OpSource)
}
if duplicate := firstDuplicateMember(input.Members); duplicate != "" {
return fmt.Errorf("members carry duplicate entries for %q", duplicate)
}
return nil
}
// firstDuplicateMember returns the first user_id or race_name that
// appears more than once in members. Empty when every entry is unique.
func firstDuplicateMember(members []Member) string {
seenUsers := make(map[string]struct{}, len(members))
seenRaces := make(map[string]struct{}, len(members))
for _, member := range members {
if _, ok := seenUsers[member.UserID]; ok {
return member.UserID
}
seenUsers[member.UserID] = struct{}{}
if _, ok := seenRaces[member.RaceName]; ok {
return member.RaceName
}
seenRaces[member.RaceName] = struct{}{}
}
return ""
}
// Result stores the deterministic outcome of one Handle call. Business
// outcomes flow through Result; the Go-level error return is reserved
// for non-business failures (nil context, nil receiver).
type Result struct {
// Record carries the runtime record installed by the operation.
// Populated on success; zero on failure.
Record runtime.RuntimeRecord
// Outcome reports whether the operation completed (success) or
// produced a stable failure code.
Outcome operation.Outcome
// ErrorCode stores the stable error code on failure. Empty on
// success.
ErrorCode string
// ErrorMessage stores the operator-readable detail on failure.
// Empty on success.
ErrorMessage string
}
// IsSuccess reports whether the result represents a successful
// operation.
func (result Result) IsSuccess() bool {
return result.Outcome == operation.OutcomeSuccess
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// RuntimeRecords stores the runtime_records row installed by the
// flow.
RuntimeRecords ports.RuntimeRecordStore
// EngineVersions resolves `target_engine_version` to the matching
// image_ref and validates the version exists.
EngineVersions ports.EngineVersionStore
// PlayerMappings persists the (game_id, user_id) → race_name
// projection derived from the engine /admin/init response.
PlayerMappings ports.PlayerMappingStore
// OperationLogs records the audit entry for the operation.
OperationLogs ports.OperationLogStore
// Engine drives the engine /admin/init call and decodes the
// response.
Engine ports.EngineClient
// LobbyEvents publishes the post-success runtime_snapshot_update
// to `gm:lobby_events`.
LobbyEvents ports.LobbyEventsPublisher
// Telemetry records register-runtime outcomes plus the snapshot
// publication counter. Required.
Telemetry *telemetry.Runtime
// Logger records structured service-level events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for operation timestamps.
// Defaults to `time.Now` when nil.
Clock func() time.Time
}
// Service executes the register-runtime lifecycle operation.
type Service struct {
runtimeRecords ports.RuntimeRecordStore
engineVersions ports.EngineVersionStore
playerMappings ports.PlayerMappingStore
operationLogs ports.OperationLogStore
engine ports.EngineClient
lobbyEvents ports.LobbyEventsPublisher
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
switch {
case deps.RuntimeRecords == nil:
return nil, errors.New("new register runtime service: nil runtime records")
case deps.EngineVersions == nil:
return nil, errors.New("new register runtime service: nil engine versions")
case deps.PlayerMappings == nil:
return nil, errors.New("new register runtime service: nil player mappings")
case deps.OperationLogs == nil:
return nil, errors.New("new register runtime service: nil operation logs")
case deps.Engine == nil:
return nil, errors.New("new register runtime service: nil engine client")
case deps.LobbyEvents == nil:
return nil, errors.New("new register runtime service: nil lobby events publisher")
case deps.Telemetry == nil:
return nil, errors.New("new register runtime service: nil telemetry runtime")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.registerruntime")
return &Service{
runtimeRecords: deps.RuntimeRecords,
engineVersions: deps.EngineVersions,
playerMappings: deps.PlayerMappings,
operationLogs: deps.OperationLogs,
engine: deps.Engine,
lobbyEvents: deps.LobbyEvents,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
}, nil
}
// Handle executes one register-runtime operation end-to-end. The
// Go-level error return is reserved for non-business failures (nil
// context, nil receiver). Every business outcome flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
if service == nil {
return Result{}, errors.New("register runtime: nil service")
}
if ctx == nil {
return Result{}, errors.New("register runtime: nil context")
}
opStartedAt := service.clock().UTC()
if err := input.Validate(); err != nil {
return service.recordFailure(ctx, opStartedAt, input, false, false,
ErrorCodeInvalidRequest, err.Error()), nil
}
if outcome, ok := service.rejectExisting(ctx, opStartedAt, input); ok {
return outcome, nil
}
imageRef, outcome, ok := service.resolveImageRef(ctx, opStartedAt, input)
if !ok {
return outcome, nil
}
record := service.buildStartingRecord(input, imageRef, opStartedAt)
if err := service.runtimeRecords.Insert(ctx, record); err != nil {
switch {
case errors.Is(err, runtime.ErrConflict):
return service.recordFailure(ctx, opStartedAt, input, false, false,
ErrorCodeConflict, "runtime record already exists"), nil
default:
return service.recordFailure(ctx, opStartedAt, input, false, false,
ErrorCodeServiceUnavailable, fmt.Sprintf("insert runtime record: %s", err.Error())), nil
}
}
engineState, outcome, ok := service.callEngineInit(ctx, opStartedAt, input)
if !ok {
return outcome, nil
}
if outcome, ok := service.validateRoster(ctx, opStartedAt, input, engineState); !ok {
return outcome, nil
}
if outcome, ok := service.installPlayerMappings(ctx, opStartedAt, input, engineState); !ok {
return outcome, nil
}
nextGenerationAt, outcome, ok := service.computeNextGeneration(ctx, opStartedAt, input)
if !ok {
return outcome, nil
}
if outcome, ok := service.casToRunning(ctx, opStartedAt, input); !ok {
return outcome, nil
}
if outcome, ok := service.persistInitialScheduling(ctx, opStartedAt, input, nextGenerationAt); !ok {
return outcome, nil
}
persisted, outcome, ok := service.reloadRecord(ctx, opStartedAt, input)
if !ok {
return outcome, nil
}
stats := projectInitToStats(engineState, input.Members)
service.appendSuccessLog(ctx, opStartedAt, input)
service.publishSnapshot(ctx, persisted, stats, opStartedAt)
service.telemetry.RecordRegisterRuntimeOutcome(ctx, string(operation.OutcomeSuccess), "")
logArgs := []any{
"game_id", input.GameID,
"engine_version", input.TargetEngineVersion,
"members", len(input.Members),
"op_source", string(input.OpSource),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "runtime registered", logArgs...)
return Result{
Record: persisted,
Outcome: operation.OutcomeSuccess,
}, nil
}
// rejectExisting returns a Result and ok=true when the runtime record
// already exists or the lookup itself failed; ok=false continues the
// flow.
func (service *Service) rejectExisting(ctx context.Context, opStartedAt time.Time, input Input) (Result, bool) {
_, err := service.runtimeRecords.Get(ctx, input.GameID)
switch {
case errors.Is(err, runtime.ErrNotFound):
return Result{}, false
case err != nil:
return service.recordFailure(ctx, opStartedAt, input, false, false,
ErrorCodeServiceUnavailable, fmt.Sprintf("get runtime record: %s", err.Error())), true
default:
return service.recordFailure(ctx, opStartedAt, input, false, false,
ErrorCodeConflict, "runtime record already exists"), true
}
}
// resolveImageRef resolves the target engine version against the
// engine_versions registry. Returns ok=false on failure with the
// matching Result.
func (service *Service) resolveImageRef(ctx context.Context, opStartedAt time.Time, input Input) (string, Result, bool) {
version, err := service.engineVersions.Get(ctx, input.TargetEngineVersion)
switch {
case errors.Is(err, engineversion.ErrNotFound):
return "", service.recordFailure(ctx, opStartedAt, input, false, false,
ErrorCodeEngineVersionNotFound,
fmt.Sprintf("engine version %q not found", input.TargetEngineVersion)), false
case err != nil:
return "", service.recordFailure(ctx, opStartedAt, input, false, false,
ErrorCodeServiceUnavailable, fmt.Sprintf("get engine version: %s", err.Error())), false
}
return version.ImageRef, Result{}, true
}
// buildStartingRecord assembles the initial runtime_records row,
// matching `gamemaster/README.md §Lifecycles → Register-runtime` step 4.
func (service *Service) buildStartingRecord(input Input, imageRef string, now time.Time) runtime.RuntimeRecord {
return runtime.RuntimeRecord{
GameID: input.GameID,
Status: runtime.StatusStarting,
EngineEndpoint: input.EngineEndpoint,
CurrentImageRef: imageRef,
CurrentEngineVersion: input.TargetEngineVersion,
TurnSchedule: input.TurnSchedule,
CurrentTurn: 0,
NextGenerationAt: nil,
SkipNextTick: false,
EngineHealth: "",
CreatedAt: now,
UpdatedAt: now,
}
}
// callEngineInit dispatches the engine /admin/init call and maps the
// transport-layer error to a stable Result code. ok=false means the
// flow stops.
func (service *Service) callEngineInit(ctx context.Context, opStartedAt time.Time, input Input) (ports.StateResponse, Result, bool) {
races := make([]ports.InitRace, 0, len(input.Members))
for _, member := range input.Members {
races = append(races, ports.InitRace{RaceName: member.RaceName})
}
state, err := service.engine.Init(ctx, input.EngineEndpoint, ports.InitRequest{Races: races})
if err == nil {
return state, Result{}, true
}
code := classifyEngineError(err)
message := fmt.Sprintf("engine init: %s", err.Error())
return ports.StateResponse{}, service.recordFailure(ctx, opStartedAt, input, true, false, code, message), false
}
// classifyEngineError maps the engine port sentinels to the
// register-runtime stable error codes per Stage 13 D1.
func classifyEngineError(err error) string {
switch {
case errors.Is(err, ports.ErrEngineValidation):
return ErrorCodeEngineValidationError
case errors.Is(err, ports.ErrEngineProtocolViolation):
return ErrorCodeEngineProtocolViolation
case errors.Is(err, ports.ErrEngineUnreachable):
return ErrorCodeEngineUnreachable
default:
return ErrorCodeEngineUnreachable
}
}
// validateRoster checks that the engine response carries exactly the
// race set Game Master sent on /admin/init. ok=false means the flow
// stops.
func (service *Service) validateRoster(ctx context.Context, opStartedAt time.Time, input Input, state ports.StateResponse) (Result, bool) {
if len(state.Players) != len(input.Members) {
message := fmt.Sprintf("engine player count %d does not match roster size %d", len(state.Players), len(input.Members))
return service.recordFailure(ctx, opStartedAt, input, true, false,
ErrorCodeEngineProtocolViolation, message), false
}
expected := make(map[string]struct{}, len(input.Members))
for _, member := range input.Members {
expected[member.RaceName] = struct{}{}
}
for _, player := range state.Players {
if _, ok := expected[player.RaceName]; !ok {
message := fmt.Sprintf("engine returned race %q not present in roster", player.RaceName)
return service.recordFailure(ctx, opStartedAt, input, true, false,
ErrorCodeEngineProtocolViolation, message), false
}
}
return Result{}, true
}
// installPlayerMappings projects the engine response onto
// player_mappings rows and persists them in one batch. ok=false means
// the flow stops (and rolls back both stores).
func (service *Service) installPlayerMappings(ctx context.Context, opStartedAt time.Time, input Input, state ports.StateResponse) (Result, bool) {
userByRace := make(map[string]string, len(input.Members))
for _, member := range input.Members {
userByRace[member.RaceName] = member.UserID
}
mappings := make([]playermapping.PlayerMapping, 0, len(state.Players))
for _, player := range state.Players {
userID, ok := userByRace[player.RaceName]
if !ok {
message := fmt.Sprintf("engine returned race %q not present in roster", player.RaceName)
return service.recordFailure(ctx, opStartedAt, input, true, false,
ErrorCodeEngineProtocolViolation, message), false
}
mappings = append(mappings, playermapping.PlayerMapping{
GameID: input.GameID,
UserID: userID,
RaceName: player.RaceName,
EnginePlayerUUID: player.EnginePlayerUUID,
CreatedAt: opStartedAt,
})
}
if err := service.playerMappings.BulkInsert(ctx, mappings); err != nil {
// BulkInsert is per-statement atomic (stage 11 D7), so a failure
// leaves no mappings to clean up — only the runtime row.
switch {
case errors.Is(err, playermapping.ErrConflict):
return service.recordFailure(ctx, opStartedAt, input, true, false,
ErrorCodeConflict, fmt.Sprintf("bulk insert player mappings: %s", err.Error())), false
default:
return service.recordFailure(ctx, opStartedAt, input, true, false,
ErrorCodeServiceUnavailable, fmt.Sprintf("bulk insert player mappings: %s", err.Error())), false
}
}
return Result{}, true
}
// computeNextGeneration parses the cron schedule and computes the first
// next-generation timestamp (no skip pending). ok=false means the flow
// stops with rollback.
func (service *Service) computeNextGeneration(ctx context.Context, opStartedAt time.Time, input Input) (time.Time, Result, bool) {
sched, err := schedule.Parse(input.TurnSchedule)
if err != nil {
return time.Time{}, service.recordFailure(ctx, opStartedAt, input, true, true,
ErrorCodeInvalidRequest, fmt.Sprintf("parse turn schedule: %s", err.Error())), false
}
next, _ := sched.Next(opStartedAt, false)
return next.UTC(), Result{}, true
}
// casToRunning flips the runtime record from `starting` to `running`.
// On CAS failure or any storage error the flow rolls back both stores.
func (service *Service) casToRunning(ctx context.Context, opStartedAt time.Time, input Input) (Result, bool) {
err := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: input.GameID,
ExpectedFrom: runtime.StatusStarting,
To: runtime.StatusRunning,
Now: opStartedAt,
})
switch {
case err == nil:
return Result{}, true
case errors.Is(err, runtime.ErrConflict):
return service.recordFailure(ctx, opStartedAt, input, true, true,
ErrorCodeConflict, fmt.Sprintf("cas runtime status to running: %s", err.Error())), false
default:
return service.recordFailure(ctx, opStartedAt, input, true, true,
ErrorCodeServiceUnavailable, fmt.Sprintf("cas runtime status to running: %s", err.Error())), false
}
}
// persistInitialScheduling writes the first `next_generation_at` and
// the (already false) skip flag plus turn=0 on the runtime row.
// Failure rolls back both stores.
func (service *Service) persistInitialScheduling(ctx context.Context, opStartedAt time.Time, input Input, next time.Time) (Result, bool) {
err := service.runtimeRecords.UpdateScheduling(ctx, ports.UpdateSchedulingInput{
GameID: input.GameID,
NextGenerationAt: &next,
SkipNextTick: false,
CurrentTurn: 0,
Now: opStartedAt,
})
if err != nil {
return service.recordFailure(ctx, opStartedAt, input, true, true,
ErrorCodeServiceUnavailable, fmt.Sprintf("update initial scheduling: %s", err.Error())), false
}
return Result{}, true
}
// reloadRecord re-reads the runtime row so the returned Result.Record
// carries the post-CAS, post-scheduling timestamps the adapters set.
// On read failure the flow rolls back both stores.
func (service *Service) reloadRecord(ctx context.Context, opStartedAt time.Time, input Input) (runtime.RuntimeRecord, Result, bool) {
persisted, err := service.runtimeRecords.Get(ctx, input.GameID)
if err != nil {
return runtime.RuntimeRecord{}, service.recordFailure(ctx, opStartedAt, input, true, true,
ErrorCodeServiceUnavailable, fmt.Sprintf("reload runtime record: %s", err.Error())), false
}
return persisted, Result{}, true
}
// projectInitToStats joins the engine /admin/init response on RaceName
// against the input roster to produce one PlayerTurnStats per active
// member. The caller has already validated that every player race name
// is present in the roster, so the lookup is total.
func projectInitToStats(state ports.StateResponse, members []Member) []ports.PlayerTurnStats {
if len(state.Players) == 0 {
return nil
}
userByRace := make(map[string]string, len(members))
for _, member := range members {
userByRace[member.RaceName] = member.UserID
}
stats := make([]ports.PlayerTurnStats, 0, len(state.Players))
for _, player := range state.Players {
userID, ok := userByRace[player.RaceName]
if !ok {
continue
}
stats = append(stats, ports.PlayerTurnStats{
UserID: userID,
Planets: player.Planets,
Population: player.Population,
})
}
sort.Slice(stats, func(i, j int) bool { return stats[i].UserID < stats[j].UserID })
return stats
}
// recordFailure assembles the failure Result, rolls back any installed
// state, appends the operation_log failure entry, and emits telemetry.
// runtimeInserted reports whether the runtime row was already
// installed; playerMappingsInstalled reports whether the player_mappings
// rows were installed too. The two booleans gate the rollback so a
// race-induced ErrConflict from Insert does not delete a row owned by
// another caller.
func (service *Service) recordFailure(
ctx context.Context,
opStartedAt time.Time,
input Input,
runtimeInserted bool,
playerMappingsInstalled bool,
errorCode string,
errorMessage string,
) Result {
if runtimeInserted {
service.rollback(ctx, input.GameID, playerMappingsInstalled)
}
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindRegisterRuntime,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
service.telemetry.RecordRegisterRuntimeOutcome(ctx, string(operation.OutcomeFailure), errorCode)
logArgs := []any{
"game_id", input.GameID,
"engine_version", input.TargetEngineVersion,
"op_source", string(input.OpSource),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "register runtime failed", logArgs...)
return Result{
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// rollback removes any installed state. Both store calls are
// idempotent; failures are logged but never overwrite the original
// failure reason. A fresh background context is used so a cancelled
// request context does not strand the row.
func (service *Service) rollback(ctx context.Context, gameID string, playerMappingsInstalled bool) {
cleanupCtx, cancel := context.WithTimeout(context.Background(), rollbackTimeout)
defer cancel()
if playerMappingsInstalled {
if err := service.playerMappings.DeleteByGame(cleanupCtx, gameID); err != nil {
service.logger.ErrorContext(ctx, "rollback player mappings",
"game_id", gameID,
"err", err.Error(),
)
}
}
if err := service.runtimeRecords.Delete(cleanupCtx, gameID); err != nil {
service.logger.ErrorContext(ctx, "rollback runtime record",
"game_id", gameID,
"err", err.Error(),
)
}
}
// rollbackTimeout bounds each rollback storage call. A fresh background
// context is used so a canceled request context does not block the
// cleanup; the timeout matches the shape used by
// `rtmanager/internal/service/startruntime.Service.releaseLease`.
const rollbackTimeout = 5 * time.Second
// appendSuccessLog records the success operation_log entry for the
// completed register-runtime operation.
func (service *Service) appendSuccessLog(ctx context.Context, opStartedAt time.Time, input Input) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindRegisterRuntime,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeSuccess,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// publishSnapshot publishes the post-success runtime_snapshot_update
// per `gamemaster/README.md §Lifecycles → Register-runtime` step 9.
// Failures are logged but do not roll back the just-installed runtime
// record; the snapshot stream is best-effort by contract.
func (service *Service) publishSnapshot(ctx context.Context, record runtime.RuntimeRecord, stats []ports.PlayerTurnStats, occurredAt time.Time) {
msg := ports.RuntimeSnapshotUpdate{
GameID: record.GameID,
CurrentTurn: record.CurrentTurn,
RuntimeStatus: record.Status,
EngineHealthSummary: record.EngineHealth,
PlayerTurnStats: stats,
OccurredAt: occurredAt,
}
if err := service.lobbyEvents.PublishSnapshotUpdate(ctx, msg); err != nil {
service.logger.ErrorContext(ctx, "publish runtime snapshot update",
"game_id", record.GameID,
"err", err.Error(),
)
return
}
service.telemetry.RecordLobbyEventPublished(ctx, "runtime_snapshot_update")
}
// bestEffortAppend writes one operation_log entry. A failure is logged
// and discarded; the runtime record (or its absence after rollback) is
// the source of truth.
func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
if _, err := service.operationLogs.Append(ctx, entry); err != nil {
service.logger.ErrorContext(ctx, "append operation log",
"game_id", entry.GameID,
"op_kind", string(entry.OpKind),
"outcome", string(entry.Outcome),
"error_code", entry.ErrorCode,
"err", err.Error(),
)
}
}
// fallbackOpSource defaults to `admin_rest` when the caller did not
// supply a known op source. Mirrors the README §Trusted Surfaces rule
// "when missing or unrecognised, GM defaults to `op_source=admin_rest`".
func fallbackOpSource(source operation.OpSource) operation.OpSource {
if source.IsKnown() {
return source
}
return operation.OpSourceAdminRest
}
@@ -0,0 +1,796 @@
package registerruntime_test
import (
"context"
"errors"
"fmt"
"sort"
"sync"
"testing"
"time"
"galaxy/gamemaster/internal/adapters/mocks"
"galaxy/gamemaster/internal/domain/engineversion"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/registerruntime"
"galaxy/gamemaster/internal/telemetry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
)
// --- test doubles -----------------------------------------------------
type fakeRuntimeRecords struct {
mu sync.Mutex
stored map[string]runtime.RuntimeRecord
getErr error
insErr error
updErr error
schErr error
delErr error
deletes []string
updates []ports.UpdateStatusInput
scheds []ports.UpdateSchedulingInput
}
func newFakeRuntimeRecords() *fakeRuntimeRecords {
return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}}
}
func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return runtime.RuntimeRecord{}, s.getErr
}
record, ok := s.stored[gameID]
if !ok {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
return record, nil
}
func (s *fakeRuntimeRecords) Insert(_ context.Context, record runtime.RuntimeRecord) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.insErr != nil {
return s.insErr
}
if _, ok := s.stored[record.GameID]; ok {
return runtime.ErrConflict
}
s.stored[record.GameID] = record
return nil
}
func (s *fakeRuntimeRecords) UpdateStatus(_ context.Context, input ports.UpdateStatusInput) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.updErr != nil {
return s.updErr
}
record, ok := s.stored[input.GameID]
if !ok {
return runtime.ErrNotFound
}
if record.Status != input.ExpectedFrom {
return runtime.ErrConflict
}
record.Status = input.To
record.UpdatedAt = input.Now
if input.To == runtime.StatusRunning && record.StartedAt == nil {
started := input.Now
record.StartedAt = &started
}
s.stored[input.GameID] = record
s.updates = append(s.updates, input)
return nil
}
func (s *fakeRuntimeRecords) UpdateScheduling(_ context.Context, input ports.UpdateSchedulingInput) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.schErr != nil {
return s.schErr
}
record, ok := s.stored[input.GameID]
if !ok {
return runtime.ErrNotFound
}
if input.NextGenerationAt != nil {
next := *input.NextGenerationAt
record.NextGenerationAt = &next
} else {
record.NextGenerationAt = nil
}
record.SkipNextTick = input.SkipNextTick
record.CurrentTurn = input.CurrentTurn
record.UpdatedAt = input.Now
s.stored[input.GameID] = record
s.scheds = append(s.scheds, input)
return nil
}
func (s *fakeRuntimeRecords) UpdateImage(_ context.Context, input ports.UpdateImageInput) error {
s.mu.Lock()
defer s.mu.Unlock()
record, ok := s.stored[input.GameID]
if !ok {
return runtime.ErrNotFound
}
if record.Status != input.ExpectedStatus {
return runtime.ErrConflict
}
record.CurrentImageRef = input.CurrentImageRef
record.CurrentEngineVersion = input.CurrentEngineVersion
record.UpdatedAt = input.Now
s.stored[input.GameID] = record
return nil
}
func (s *fakeRuntimeRecords) UpdateEngineHealth(context.Context, ports.UpdateEngineHealthInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) Delete(_ context.Context, gameID string) error {
s.mu.Lock()
defer s.mu.Unlock()
s.deletes = append(s.deletes, gameID)
if s.delErr != nil {
return s.delErr
}
delete(s.stored, gameID)
return nil
}
func (s *fakeRuntimeRecords) ListDueRunning(_ context.Context, _ time.Time) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used in registerruntime tests")
}
func (s *fakeRuntimeRecords) ListByStatus(_ context.Context, _ runtime.Status) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used in registerruntime tests")
}
func (s *fakeRuntimeRecords) List(_ context.Context) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used in registerruntime tests")
}
func (s *fakeRuntimeRecords) deleteCount() int {
s.mu.Lock()
defer s.mu.Unlock()
return len(s.deletes)
}
func (s *fakeRuntimeRecords) hasRecord(gameID string) bool {
s.mu.Lock()
defer s.mu.Unlock()
_, ok := s.stored[gameID]
return ok
}
func (s *fakeRuntimeRecords) record(gameID string) (runtime.RuntimeRecord, bool) {
s.mu.Lock()
defer s.mu.Unlock()
record, ok := s.stored[gameID]
return record, ok
}
type fakeEngineVersions struct {
mu sync.Mutex
versions map[string]engineversion.EngineVersion
getErr error
}
func newFakeEngineVersions() *fakeEngineVersions {
return &fakeEngineVersions{versions: map[string]engineversion.EngineVersion{}}
}
func (s *fakeEngineVersions) seed(version, imageRef string) {
s.mu.Lock()
defer s.mu.Unlock()
s.versions[version] = engineversion.EngineVersion{
Version: version,
ImageRef: imageRef,
Status: engineversion.StatusActive,
CreatedAt: time.Now().UTC(),
UpdatedAt: time.Now().UTC(),
}
}
func (s *fakeEngineVersions) Get(_ context.Context, version string) (engineversion.EngineVersion, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return engineversion.EngineVersion{}, s.getErr
}
record, ok := s.versions[version]
if !ok {
return engineversion.EngineVersion{}, engineversion.ErrNotFound
}
return record, nil
}
func (s *fakeEngineVersions) List(_ context.Context, _ *engineversion.Status) ([]engineversion.EngineVersion, error) {
return nil, errors.New("not used in registerruntime tests")
}
func (s *fakeEngineVersions) Insert(_ context.Context, _ engineversion.EngineVersion) error {
return errors.New("not used in registerruntime tests")
}
func (s *fakeEngineVersions) Update(_ context.Context, _ ports.UpdateEngineVersionInput) error {
return errors.New("not used in registerruntime tests")
}
func (s *fakeEngineVersions) Deprecate(_ context.Context, _ string, _ time.Time) error {
return errors.New("not used in registerruntime tests")
}
func (s *fakeEngineVersions) Delete(_ context.Context, _ string) error {
return errors.New("not used in registerruntime tests")
}
func (s *fakeEngineVersions) IsReferencedByActiveRuntime(_ context.Context, _ string) (bool, error) {
return false, errors.New("not used in registerruntime tests")
}
type fakePlayerMappings struct {
mu sync.Mutex
stored map[string][]playermapping.PlayerMapping
bulkErr error
delErr error
deletes []string
inserted [][]playermapping.PlayerMapping
}
func newFakePlayerMappings() *fakePlayerMappings {
return &fakePlayerMappings{stored: map[string][]playermapping.PlayerMapping{}}
}
func (s *fakePlayerMappings) BulkInsert(_ context.Context, records []playermapping.PlayerMapping) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.bulkErr != nil {
return s.bulkErr
}
if len(records) == 0 {
return nil
}
for _, record := range records {
s.stored[record.GameID] = append(s.stored[record.GameID], record)
}
copyOf := make([]playermapping.PlayerMapping, len(records))
copy(copyOf, records)
s.inserted = append(s.inserted, copyOf)
return nil
}
func (s *fakePlayerMappings) Get(_ context.Context, _, _ string) (playermapping.PlayerMapping, error) {
return playermapping.PlayerMapping{}, errors.New("not used in registerruntime tests")
}
func (s *fakePlayerMappings) GetByRace(_ context.Context, _, _ string) (playermapping.PlayerMapping, error) {
return playermapping.PlayerMapping{}, errors.New("not used in registerruntime tests")
}
func (s *fakePlayerMappings) ListByGame(_ context.Context, gameID string) ([]playermapping.PlayerMapping, error) {
s.mu.Lock()
defer s.mu.Unlock()
return append([]playermapping.PlayerMapping(nil), s.stored[gameID]...), nil
}
func (s *fakePlayerMappings) DeleteByGame(_ context.Context, gameID string) error {
s.mu.Lock()
defer s.mu.Unlock()
s.deletes = append(s.deletes, gameID)
if s.delErr != nil {
return s.delErr
}
delete(s.stored, gameID)
return nil
}
func (s *fakePlayerMappings) deleteCount() int {
s.mu.Lock()
defer s.mu.Unlock()
return len(s.deletes)
}
func (s *fakePlayerMappings) hasRecords(gameID string) bool {
s.mu.Lock()
defer s.mu.Unlock()
return len(s.stored[gameID]) > 0
}
type fakeOperationLogs struct {
mu sync.Mutex
appErr error
entries []operation.OperationEntry
}
func (s *fakeOperationLogs) Append(_ context.Context, entry operation.OperationEntry) (int64, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.appErr != nil {
return 0, s.appErr
}
if err := entry.Validate(); err != nil {
return 0, err
}
s.entries = append(s.entries, entry)
return int64(len(s.entries)), nil
}
func (s *fakeOperationLogs) ListByGame(_ context.Context, _ string, _ int) ([]operation.OperationEntry, error) {
return nil, errors.New("not used in registerruntime tests")
}
func (s *fakeOperationLogs) lastEntry() (operation.OperationEntry, bool) {
s.mu.Lock()
defer s.mu.Unlock()
if len(s.entries) == 0 {
return operation.OperationEntry{}, false
}
return s.entries[len(s.entries)-1], true
}
// --- harness ----------------------------------------------------------
type harness struct {
t *testing.T
ctrl *gomock.Controller
runtime *fakeRuntimeRecords
versions *fakeEngineVersions
mappings *fakePlayerMappings
logs *fakeOperationLogs
engine *mocks.MockEngineClient
lobby *mocks.MockLobbyEventsPublisher
telemetry *telemetry.Runtime
now time.Time
service *registerruntime.Service
}
func newHarness(t *testing.T) *harness {
t.Helper()
ctrl := gomock.NewController(t)
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
h := &harness{
t: t,
ctrl: ctrl,
runtime: newFakeRuntimeRecords(),
versions: newFakeEngineVersions(),
mappings: newFakePlayerMappings(),
logs: &fakeOperationLogs{},
engine: mocks.NewMockEngineClient(ctrl),
lobby: mocks.NewMockLobbyEventsPublisher(ctrl),
telemetry: telemetryRuntime,
now: time.Date(2026, time.April, 30, 12, 0, 0, 0, time.UTC),
}
h.versions.seed("v1.2.3", "ghcr.io/galaxy/game:v1.2.3")
service, err := registerruntime.NewService(registerruntime.Dependencies{
RuntimeRecords: h.runtime,
EngineVersions: h.versions,
PlayerMappings: h.mappings,
OperationLogs: h.logs,
Engine: h.engine,
LobbyEvents: h.lobby,
Telemetry: h.telemetry,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.service = service
return h
}
func baseInput() registerruntime.Input {
return registerruntime.Input{
GameID: "game-001",
EngineEndpoint: "http://galaxy-game-game-001:8080",
Members: []registerruntime.Member{
{UserID: "user-1", RaceName: "Aelinari"},
{UserID: "user-2", RaceName: "Drazi"},
},
TargetEngineVersion: "v1.2.3",
TurnSchedule: "0 18 * * *",
OpSource: operation.OpSourceLobbyInternal,
SourceRef: "req-abc",
}
}
func enginePlayers() []ports.PlayerState {
return []ports.PlayerState{
{RaceName: "Aelinari", EnginePlayerUUID: "uuid-1", Planets: 3, Population: 100},
{RaceName: "Drazi", EnginePlayerUUID: "uuid-2", Planets: 2, Population: 80},
}
}
// --- tests ------------------------------------------------------------
func TestNewServiceRejectsMissingDeps(t *testing.T) {
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
cases := []struct {
name string
mut func(*registerruntime.Dependencies)
}{
{"runtime records", func(d *registerruntime.Dependencies) { d.RuntimeRecords = nil }},
{"engine versions", func(d *registerruntime.Dependencies) { d.EngineVersions = nil }},
{"player mappings", func(d *registerruntime.Dependencies) { d.PlayerMappings = nil }},
{"operation logs", func(d *registerruntime.Dependencies) { d.OperationLogs = nil }},
{"engine", func(d *registerruntime.Dependencies) { d.Engine = nil }},
{"lobby events", func(d *registerruntime.Dependencies) { d.LobbyEvents = nil }},
{"telemetry", func(d *registerruntime.Dependencies) { d.Telemetry = nil }},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
ctrl := gomock.NewController(t)
deps := registerruntime.Dependencies{
RuntimeRecords: newFakeRuntimeRecords(),
EngineVersions: newFakeEngineVersions(),
PlayerMappings: newFakePlayerMappings(),
OperationLogs: &fakeOperationLogs{},
Engine: mocks.NewMockEngineClient(ctrl),
LobbyEvents: mocks.NewMockLobbyEventsPublisher(ctrl),
Telemetry: telemetryRuntime,
}
tc.mut(&deps)
service, err := registerruntime.NewService(deps)
require.Error(t, err)
require.Nil(t, service)
})
}
}
func TestHandleHappyPath(t *testing.T) {
h := newHarness(t)
input := baseInput()
h.engine.EXPECT().
Init(gomock.Any(), input.EngineEndpoint, ports.InitRequest{
Races: []ports.InitRace{{RaceName: "Aelinari"}, {RaceName: "Drazi"}},
}).
Return(ports.StateResponse{
Turn: 0,
Players: enginePlayers(),
}, nil)
var captured ports.RuntimeSnapshotUpdate
h.lobby.EXPECT().
PublishSnapshotUpdate(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, msg ports.RuntimeSnapshotUpdate) error {
captured = msg
return nil
})
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
require.True(t, result.IsSuccess(), "outcome %q error_code=%q", result.Outcome, result.ErrorCode)
require.Equal(t, runtime.StatusRunning, result.Record.Status)
require.Equal(t, "ghcr.io/galaxy/game:v1.2.3", result.Record.CurrentImageRef)
require.NotNil(t, result.Record.NextGenerationAt)
require.NotNil(t, result.Record.StartedAt)
stored, ok := h.runtime.record(input.GameID)
require.True(t, ok)
assert.Equal(t, runtime.StatusRunning, stored.Status)
assert.Equal(t, 0, stored.CurrentTurn)
assert.False(t, stored.SkipNextTick)
require.NotNil(t, stored.NextGenerationAt)
assert.True(t, stored.NextGenerationAt.After(h.now))
mappings, err := h.mappings.ListByGame(context.Background(), input.GameID)
require.NoError(t, err)
require.Len(t, mappings, 2)
sort.Slice(mappings, func(i, j int) bool { return mappings[i].UserID < mappings[j].UserID })
assert.Equal(t, "user-1", mappings[0].UserID)
assert.Equal(t, "Aelinari", mappings[0].RaceName)
assert.Equal(t, "uuid-1", mappings[0].EnginePlayerUUID)
assert.Equal(t, "user-2", mappings[1].UserID)
assert.Equal(t, "Drazi", mappings[1].RaceName)
assert.Equal(t, "uuid-2", mappings[1].EnginePlayerUUID)
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OutcomeSuccess, entry.Outcome)
assert.Equal(t, operation.OpKindRegisterRuntime, entry.OpKind)
assert.Equal(t, operation.OpSourceLobbyInternal, entry.OpSource)
assert.Equal(t, "req-abc", entry.SourceRef)
assert.Equal(t, input.GameID, captured.GameID)
assert.Equal(t, runtime.StatusRunning, captured.RuntimeStatus)
assert.Equal(t, 0, captured.CurrentTurn)
assert.Equal(t, "", captured.EngineHealthSummary)
require.Len(t, captured.PlayerTurnStats, 2)
assert.Equal(t, "user-1", captured.PlayerTurnStats[0].UserID)
assert.Equal(t, 3, captured.PlayerTurnStats[0].Planets)
assert.Equal(t, 100, captured.PlayerTurnStats[0].Population)
assert.Equal(t, "user-2", captured.PlayerTurnStats[1].UserID)
assert.Equal(t, 2, captured.PlayerTurnStats[1].Planets)
assert.Equal(t, 80, captured.PlayerTurnStats[1].Population)
assert.Equal(t, h.now.UTC(), captured.OccurredAt)
}
func TestHandleRejectsInvalidInput(t *testing.T) {
cases := []struct {
name string
mut func(*registerruntime.Input)
}{
{"empty game id", func(i *registerruntime.Input) { i.GameID = "" }},
{"empty engine endpoint", func(i *registerruntime.Input) { i.EngineEndpoint = "" }},
{"empty members", func(i *registerruntime.Input) { i.Members = nil }},
{"empty target version", func(i *registerruntime.Input) { i.TargetEngineVersion = "" }},
{"empty turn schedule", func(i *registerruntime.Input) { i.TurnSchedule = "" }},
{"missing user id", func(i *registerruntime.Input) {
i.Members = []registerruntime.Member{{UserID: "", RaceName: "Aelinari"}}
}},
{"missing race name", func(i *registerruntime.Input) {
i.Members = []registerruntime.Member{{UserID: "user-1", RaceName: ""}}
}},
{"unknown op source", func(i *registerruntime.Input) { i.OpSource = "exotic" }},
{"duplicate user id", func(i *registerruntime.Input) {
i.Members = []registerruntime.Member{
{UserID: "user-1", RaceName: "Aelinari"},
{UserID: "user-1", RaceName: "Drazi"},
}
}},
{"duplicate race name", func(i *registerruntime.Input) {
i.Members = []registerruntime.Member{
{UserID: "user-1", RaceName: "Aelinari"},
{UserID: "user-2", RaceName: "Aelinari"},
}
}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
input := baseInput()
tc.mut(&input)
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, registerruntime.ErrorCodeInvalidRequest, result.ErrorCode)
// No persistence should have happened.
assert.False(t, h.runtime.hasRecord(input.GameID))
assert.False(t, h.mappings.hasRecords(input.GameID))
})
}
}
func TestHandleRejectsExistingRuntime(t *testing.T) {
h := newHarness(t)
input := baseInput()
require.NoError(t, h.runtime.Insert(context.Background(), runtime.RuntimeRecord{
GameID: input.GameID,
Status: runtime.StatusRunning,
EngineEndpoint: input.EngineEndpoint,
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: input.TurnSchedule,
CreatedAt: h.now,
UpdatedAt: h.now,
StartedAt: &h.now,
}))
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, registerruntime.ErrorCodeConflict, result.ErrorCode)
assert.True(t, h.runtime.hasRecord(input.GameID), "existing record must not be removed")
assert.Equal(t, 0, h.runtime.deleteCount())
assert.Equal(t, 0, h.mappings.deleteCount())
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OutcomeFailure, entry.Outcome)
assert.Equal(t, registerruntime.ErrorCodeConflict, entry.ErrorCode)
}
func TestHandleRejectsMissingEngineVersion(t *testing.T) {
h := newHarness(t)
input := baseInput()
input.TargetEngineVersion = "v9.9.9"
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, registerruntime.ErrorCodeEngineVersionNotFound, result.ErrorCode)
assert.False(t, h.runtime.hasRecord(input.GameID))
assert.Equal(t, 0, h.runtime.deleteCount())
}
func TestHandleRollsBackOnEngineUnreachable(t *testing.T) {
h := newHarness(t)
input := baseInput()
h.engine.EXPECT().
Init(gomock.Any(), input.EngineEndpoint, gomock.Any()).
Return(ports.StateResponse{}, fmt.Errorf("dial: %w", ports.ErrEngineUnreachable))
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, registerruntime.ErrorCodeEngineUnreachable, result.ErrorCode)
assert.False(t, h.runtime.hasRecord(input.GameID))
assert.Equal(t, 1, h.runtime.deleteCount())
// player_mappings were never installed; rollback skips them.
assert.Equal(t, 0, h.mappings.deleteCount())
}
func TestHandleRollsBackOnEngineValidationError(t *testing.T) {
h := newHarness(t)
input := baseInput()
h.engine.EXPECT().
Init(gomock.Any(), input.EngineEndpoint, gomock.Any()).
Return(ports.StateResponse{}, fmt.Errorf("init body: %w", ports.ErrEngineValidation))
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, registerruntime.ErrorCodeEngineValidationError, result.ErrorCode)
assert.False(t, h.runtime.hasRecord(input.GameID))
assert.Equal(t, 1, h.runtime.deleteCount())
}
func TestHandleRollsBackOnEngineProtocolViolation(t *testing.T) {
h := newHarness(t)
input := baseInput()
h.engine.EXPECT().
Init(gomock.Any(), input.EngineEndpoint, gomock.Any()).
Return(ports.StateResponse{
Players: []ports.PlayerState{
{RaceName: "Unknown", EnginePlayerUUID: "uuid-x", Planets: 1, Population: 10},
{RaceName: "Aelinari", EnginePlayerUUID: "uuid-1", Planets: 2, Population: 50},
},
}, nil)
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, registerruntime.ErrorCodeEngineProtocolViolation, result.ErrorCode)
assert.False(t, h.runtime.hasRecord(input.GameID))
assert.Equal(t, 1, h.runtime.deleteCount())
}
func TestHandleRollsBackOnPlayerCountMismatch(t *testing.T) {
h := newHarness(t)
input := baseInput()
h.engine.EXPECT().
Init(gomock.Any(), input.EngineEndpoint, gomock.Any()).
Return(ports.StateResponse{
Players: []ports.PlayerState{
{RaceName: "Aelinari", EnginePlayerUUID: "uuid-1", Planets: 1, Population: 10},
},
}, nil)
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, registerruntime.ErrorCodeEngineProtocolViolation, result.ErrorCode)
assert.False(t, h.runtime.hasRecord(input.GameID))
}
func TestHandleRollsBackOnPlayerMappingConflict(t *testing.T) {
h := newHarness(t)
input := baseInput()
h.mappings.bulkErr = fmt.Errorf("duplicate row: %w", playermapping.ErrConflict)
h.engine.EXPECT().
Init(gomock.Any(), input.EngineEndpoint, gomock.Any()).
Return(ports.StateResponse{Players: enginePlayers()}, nil)
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, registerruntime.ErrorCodeConflict, result.ErrorCode)
assert.False(t, h.runtime.hasRecord(input.GameID))
assert.Equal(t, 1, h.runtime.deleteCount())
// BulkInsert is per-statement atomic, so a failure leaves no rows
// to clean up.
assert.Equal(t, 0, h.mappings.deleteCount())
}
func TestHandleRollsBackOnSchedulingUpdateFailure(t *testing.T) {
h := newHarness(t)
input := baseInput()
h.runtime.schErr = errors.New("postgres timeout")
h.engine.EXPECT().
Init(gomock.Any(), input.EngineEndpoint, gomock.Any()).
Return(ports.StateResponse{Players: enginePlayers()}, nil)
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, registerruntime.ErrorCodeServiceUnavailable, result.ErrorCode)
assert.False(t, h.runtime.hasRecord(input.GameID))
assert.Equal(t, 1, h.runtime.deleteCount())
assert.Equal(t, 1, h.mappings.deleteCount())
}
func TestHandleRollsBackOnInvalidTurnSchedule(t *testing.T) {
h := newHarness(t)
input := baseInput()
input.TurnSchedule = "not-a-cron"
// Engine init still happens because TurnSchedule is parsed only
// after the engine roster validation step.
h.engine.EXPECT().
Init(gomock.Any(), input.EngineEndpoint, gomock.Any()).
Return(ports.StateResponse{Players: enginePlayers()}, nil)
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, registerruntime.ErrorCodeInvalidRequest, result.ErrorCode)
assert.False(t, h.runtime.hasRecord(input.GameID))
assert.Equal(t, 1, h.runtime.deleteCount())
assert.Equal(t, 1, h.mappings.deleteCount())
}
func TestHandleAppendsOperationLogOnFailure(t *testing.T) {
h := newHarness(t)
input := baseInput()
h.engine.EXPECT().
Init(gomock.Any(), input.EngineEndpoint, gomock.Any()).
Return(ports.StateResponse{}, fmt.Errorf("dial: %w", ports.ErrEngineUnreachable))
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
require.Equal(t, operation.OutcomeFailure, result.Outcome)
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OpKindRegisterRuntime, entry.OpKind)
assert.Equal(t, operation.OpSourceLobbyInternal, entry.OpSource)
assert.Equal(t, operation.OutcomeFailure, entry.Outcome)
assert.Equal(t, registerruntime.ErrorCodeEngineUnreachable, entry.ErrorCode)
require.NotNil(t, entry.FinishedAt)
assert.False(t, entry.FinishedAt.Before(entry.StartedAt))
}
func TestHandleSurfaceServiceUnavailableOnGetRuntimeError(t *testing.T) {
h := newHarness(t)
input := baseInput()
h.runtime.getErr = errors.New("postgres dial timeout")
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, registerruntime.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleRejectsNilContext(t *testing.T) {
h := newHarness(t)
_, err := h.service.Handle(nil, baseInput()) //nolint:staticcheck // intentional nil context
require.Error(t, err)
}
func TestHandleNilServiceReturnsError(t *testing.T) {
var service *registerruntime.Service
_, err := service.Handle(context.Background(), baseInput())
require.Error(t, err)
}
@@ -0,0 +1,48 @@
package reportget
// Stable error codes returned in `Result.ErrorCode`. The values match the
// vocabulary frozen by `gamemaster/README.md §Error Model` and
// `gamemaster/api/internal-openapi.yaml`. Stage 19's REST handler imports
// these names rather than redeclare them; renaming any of them is a
// contract change.
//
// Note: the report-get operation does **not** require the runtime to be
// in `running` state. Reports may be served against any runtime that
// exists in `runtime_records`; an unreachable engine surfaces naturally
// through `engine_unreachable`. Therefore `runtime_not_running` is not
// part of this vocabulary.
const (
// ErrorCodeInvalidRequest reports that the request envelope failed
// structural validation (empty required field, negative turn).
ErrorCodeInvalidRequest = "invalid_request"
// ErrorCodeRuntimeNotFound reports that no `runtime_records` row
// exists for the requested game id.
ErrorCodeRuntimeNotFound = "runtime_not_found"
// ErrorCodeForbidden reports that the caller is not an active member
// of the game, or that the (game_id, user_id) pair lacks a player
// mapping.
ErrorCodeForbidden = "forbidden"
// ErrorCodeEngineUnreachable reports that the engine /api/v1/report
// call returned a 5xx status, timed out, or could not be dispatched.
ErrorCodeEngineUnreachable = "engine_unreachable"
// ErrorCodeEngineValidationError reports that the engine returned
// 4xx. The body is forwarded verbatim through `Result.RawResponse`.
ErrorCodeEngineValidationError = "engine_validation_error"
// ErrorCodeEngineProtocolViolation reports that the engine response
// did not match the expected schema (empty body, malformed JSON).
// Stage 19 maps this to 502.
ErrorCodeEngineProtocolViolation = "engine_protocol_violation"
// ErrorCodeServiceUnavailable reports that a steady-state dependency
// (PostgreSQL, Lobby) was unreachable for this call.
ErrorCodeServiceUnavailable = "service_unavailable"
// ErrorCodeInternal reports an unexpected error not classified by
// the other codes.
ErrorCodeInternal = "internal_error"
)
@@ -0,0 +1,314 @@
// Package reportget implements the per-player turn-report hot-path
// service owned by Game Master. It accepts a verified `(game_id, user_id,
// turn)` envelope from Edge Gateway, authorises the caller against the
// membership cache, resolves `race_name` from `player_mappings`, and
// forwards `GET /api/v1/report?player={race_name}&turn={turn}` to the
// engine.
//
// Lifecycle and error semantics follow `gamemaster/README.md §Hot Path →
// Reports`. Unlike commandexecute and orderput, the report service does
// not require `runtime_records.status = running`: reports may be served
// against any runtime that exists in the table, allowing post-finish
// inspection. Design rationale (decision D1) is captured in
// `gamemaster/docs/stage16-membership-cache-and-invalidation.md`.
package reportget
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/membership"
"galaxy/gamemaster/internal/telemetry"
)
const (
engineCallOp = "report"
membershipStatusActive = "active"
)
// Input stores the per-call arguments for one report-get operation.
type Input struct {
// GameID identifies the platform game whose report is being read.
GameID string
// UserID identifies the platform user submitting the request. The
// service derives `race_name` from this value via `player_mappings`
// before calling the engine.
UserID string
// Turn identifies the turn number to read. Must be non-negative;
// zero requests the initial state report.
Turn int
}
// Validate reports whether input carries the structural invariants the
// service requires before any store is touched.
func (input Input) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("game id must not be empty")
}
if strings.TrimSpace(input.UserID) == "" {
return fmt.Errorf("user id must not be empty")
}
if input.Turn < 0 {
return fmt.Errorf("turn must not be negative, got %d", input.Turn)
}
return nil
}
// Result stores the deterministic outcome of one Handle call.
type Result struct {
// Outcome reports whether the operation completed (success) or
// produced a stable failure code.
Outcome operation.Outcome
// ErrorCode stores the stable error code on failure. Empty on
// success.
ErrorCode string
// ErrorMessage stores the operator-readable detail on failure.
// Empty on success.
ErrorMessage string
// RawResponse stores the engine response body. Populated on success
// and on `engine_validation_error`. Empty on every other terminal
// branch.
RawResponse json.RawMessage
}
// IsSuccess reports whether the result represents a successful operation.
func (result Result) IsSuccess() bool {
return result.Outcome == operation.OutcomeSuccess
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// RuntimeRecords loads the engine endpoint.
RuntimeRecords ports.RuntimeRecordStore
// PlayerMappings resolves `(game_id, user_id) → race_name`.
PlayerMappings ports.PlayerMappingStore
// Membership authorises the caller.
Membership *membership.Cache
// Engine forwards `GET /api/v1/report` calls.
Engine ports.EngineClient
// Telemetry records the per-outcome counter and the engine-call
// latency histogram.
Telemetry *telemetry.Runtime
// Logger records structured service-level events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for engine-call latency.
// Defaults to `time.Now` when nil.
Clock func() time.Time
}
// Service executes the report-get hot-path operation.
type Service struct {
runtimeRecords ports.RuntimeRecordStore
playerMappings ports.PlayerMappingStore
membership *membership.Cache
engine ports.EngineClient
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
switch {
case deps.RuntimeRecords == nil:
return nil, errors.New("new report get service: nil runtime records")
case deps.PlayerMappings == nil:
return nil, errors.New("new report get service: nil player mappings")
case deps.Membership == nil:
return nil, errors.New("new report get service: nil membership cache")
case deps.Engine == nil:
return nil, errors.New("new report get service: nil engine client")
case deps.Telemetry == nil:
return nil, errors.New("new report get service: nil telemetry runtime")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.reportget")
return &Service{
runtimeRecords: deps.RuntimeRecords,
playerMappings: deps.PlayerMappings,
membership: deps.Membership,
engine: deps.Engine,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
}, nil
}
// Handle executes one report-get operation end-to-end. The Go-level error
// return is reserved for non-business failures (nil context, nil
// receiver). Every business outcome flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
if service == nil {
return Result{}, errors.New("report get: nil service")
}
if ctx == nil {
return Result{}, errors.New("report get: nil context")
}
if err := input.Validate(); err != nil {
return service.recordFailure(ctx, input, ErrorCodeInvalidRequest, err.Error(), nil), nil
}
record, result, ok := service.loadRecord(ctx, input)
if !ok {
return result, nil
}
mapping, result, ok := service.authorise(ctx, input)
if !ok {
return result, nil
}
body, engineErr := service.callEngine(ctx, record.EngineEndpoint, mapping.RaceName, input.Turn)
if engineErr != nil {
errorCode := classifyEngineError(engineErr)
message := fmt.Sprintf("engine report: %s", engineErr.Error())
var bodyForCaller json.RawMessage
if errorCode == ErrorCodeEngineValidationError {
bodyForCaller = body
}
return service.recordFailure(ctx, input, errorCode, message, bodyForCaller), nil
}
service.telemetry.RecordReportGetOutcome(ctx,
string(operation.OutcomeSuccess), "")
logArgs := []any{
"game_id", input.GameID,
"user_id", input.UserID,
"actor", mapping.RaceName,
"turn", input.Turn,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "report get succeeded", logArgs...)
return Result{
Outcome: operation.OutcomeSuccess,
RawResponse: body,
}, nil
}
// loadRecord reads the runtime record and maps store errors to
// orchestrator outcomes. ok=false means the flow stops with the returned
// Result. Reports tolerate any non-deleted runtime status; the running
// guard from commandexecute / orderput is intentionally absent.
func (service *Service) loadRecord(ctx context.Context, input Input) (runtime.RuntimeRecord, Result, bool) {
record, err := service.runtimeRecords.Get(ctx, input.GameID)
switch {
case err == nil:
return record, Result{}, true
case errors.Is(err, runtime.ErrNotFound):
return runtime.RuntimeRecord{}, service.recordFailure(ctx, input,
ErrorCodeRuntimeNotFound, "runtime record does not exist", nil), false
default:
return runtime.RuntimeRecord{}, service.recordFailure(ctx, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get runtime record: %s", err.Error()), nil), false
}
}
// authorise resolves the membership status and the player mapping for
// the caller. ok=false means the flow stops with the returned Result.
func (service *Service) authorise(ctx context.Context, input Input) (playermapping.PlayerMapping, Result, bool) {
status, err := service.membership.Resolve(ctx, input.GameID, input.UserID)
if err != nil {
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("resolve membership: %s", err.Error()), nil), false
}
if status != membershipStatusActive {
message := fmt.Sprintf("membership status %q does not authorise reports", status)
if status == "" {
message = "user is not a member of the game"
}
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeForbidden, message, nil), false
}
mapping, err := service.playerMappings.Get(ctx, input.GameID, input.UserID)
switch {
case err == nil:
return mapping, Result{}, true
case errors.Is(err, playermapping.ErrNotFound):
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeForbidden, "player mapping not installed for active member", nil), false
default:
return playermapping.PlayerMapping{}, service.recordFailure(ctx, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get player mapping: %s", err.Error()), nil), false
}
}
// callEngine forwards the read to the engine and records the wall-clock
// latency under the `report` op label.
func (service *Service) callEngine(ctx context.Context, baseURL, raceName string, turn int) (json.RawMessage, error) {
start := service.clock()
body, err := service.engine.GetReport(ctx, baseURL, raceName, turn)
service.telemetry.RecordEngineCall(ctx, engineCallOp, service.clock().Sub(start))
return body, err
}
// classifyEngineError maps the engine port sentinels to the report-get
// stable error codes.
func classifyEngineError(err error) string {
switch {
case errors.Is(err, ports.ErrEngineValidation):
return ErrorCodeEngineValidationError
case errors.Is(err, ports.ErrEngineProtocolViolation):
return ErrorCodeEngineProtocolViolation
case errors.Is(err, ports.ErrEngineUnreachable):
return ErrorCodeEngineUnreachable
default:
return ErrorCodeEngineUnreachable
}
}
// recordFailure emits the service-level outcome counter and a structured
// log entry, then returns the Result the caller surfaces.
func (service *Service) recordFailure(ctx context.Context, input Input, errorCode, errorMessage string, rawResponse json.RawMessage) Result {
service.telemetry.RecordReportGetOutcome(ctx,
string(operation.OutcomeFailure), errorCode)
logArgs := []any{
"game_id", input.GameID,
"user_id", input.UserID,
"turn", input.Turn,
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "report get rejected", logArgs...)
return Result{
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
RawResponse: rawResponse,
}
}
@@ -0,0 +1,533 @@
package reportget_test
import (
"context"
"encoding/json"
"errors"
"fmt"
"sync"
"testing"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/membership"
"galaxy/gamemaster/internal/service/reportget"
"galaxy/gamemaster/internal/telemetry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// --- fakes ------------------------------------------------------------
type fakeRuntimeRecords struct {
mu sync.Mutex
stored map[string]runtime.RuntimeRecord
getErr error
}
func newFakeRuntimeRecords() *fakeRuntimeRecords {
return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}}
}
func (s *fakeRuntimeRecords) seed(record runtime.RuntimeRecord) {
s.mu.Lock()
defer s.mu.Unlock()
s.stored[record.GameID] = record
}
func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return runtime.RuntimeRecord{}, s.getErr
}
record, ok := s.stored[gameID]
if !ok {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
return record, nil
}
func (s *fakeRuntimeRecords) Insert(context.Context, runtime.RuntimeRecord) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateStatus(context.Context, ports.UpdateStatusInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateScheduling(context.Context, ports.UpdateSchedulingInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) ListDueRunning(context.Context, time.Time) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) ListByStatus(context.Context, runtime.Status) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) List(context.Context) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateImage(context.Context, ports.UpdateImageInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) UpdateEngineHealth(context.Context, ports.UpdateEngineHealthInput) error {
return errors.New("not used")
}
func (s *fakeRuntimeRecords) Delete(context.Context, string) error {
return errors.New("not used")
}
type fakePlayerMappings struct {
mu sync.Mutex
stored map[string]map[string]playermapping.PlayerMapping
getErr error
}
func newFakePlayerMappings() *fakePlayerMappings {
return &fakePlayerMappings{stored: map[string]map[string]playermapping.PlayerMapping{}}
}
func (s *fakePlayerMappings) seed(record playermapping.PlayerMapping) {
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.stored[record.GameID]; !ok {
s.stored[record.GameID] = map[string]playermapping.PlayerMapping{}
}
s.stored[record.GameID][record.UserID] = record
}
func (s *fakePlayerMappings) Get(_ context.Context, gameID, userID string) (playermapping.PlayerMapping, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.getErr != nil {
return playermapping.PlayerMapping{}, s.getErr
}
record, ok := s.stored[gameID][userID]
if !ok {
return playermapping.PlayerMapping{}, playermapping.ErrNotFound
}
return record, nil
}
func (s *fakePlayerMappings) BulkInsert(context.Context, []playermapping.PlayerMapping) error {
return errors.New("not used")
}
func (s *fakePlayerMappings) GetByRace(context.Context, string, string) (playermapping.PlayerMapping, error) {
return playermapping.PlayerMapping{}, errors.New("not used")
}
func (s *fakePlayerMappings) ListByGame(context.Context, string) ([]playermapping.PlayerMapping, error) {
return nil, errors.New("not used")
}
func (s *fakePlayerMappings) DeleteByGame(context.Context, string) error {
return errors.New("not used")
}
type recordedReport struct {
baseURL string
raceName string
turn int
}
type fakeEngine struct {
mu sync.Mutex
body json.RawMessage
err error
calls []recordedReport
}
func (f *fakeEngine) GetReport(_ context.Context, baseURL, raceName string, turn int) (json.RawMessage, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.calls = append(f.calls, recordedReport{baseURL: baseURL, raceName: raceName, turn: turn})
return f.body, f.err
}
func (f *fakeEngine) Init(context.Context, string, ports.InitRequest) (ports.StateResponse, error) {
return ports.StateResponse{}, errors.New("not used")
}
func (f *fakeEngine) Status(context.Context, string) (ports.StateResponse, error) {
return ports.StateResponse{}, errors.New("not used")
}
func (f *fakeEngine) Turn(context.Context, string) (ports.StateResponse, error) {
return ports.StateResponse{}, errors.New("not used")
}
func (f *fakeEngine) BanishRace(context.Context, string, string) error {
return errors.New("not used")
}
func (f *fakeEngine) ExecuteCommands(context.Context, string, json.RawMessage) (json.RawMessage, error) {
return nil, errors.New("not used")
}
func (f *fakeEngine) PutOrders(context.Context, string, json.RawMessage) (json.RawMessage, error) {
return nil, errors.New("not used")
}
type fakeLobby struct {
mu sync.Mutex
answers map[string][]ports.Membership
errs map[string]error
}
func newFakeLobby() *fakeLobby {
return &fakeLobby{
answers: map[string][]ports.Membership{},
errs: map[string]error{},
}
}
func (f *fakeLobby) seed(gameID string, members []ports.Membership) {
f.mu.Lock()
defer f.mu.Unlock()
f.answers[gameID] = members
}
func (f *fakeLobby) seedErr(gameID string, err error) {
f.mu.Lock()
defer f.mu.Unlock()
f.errs[gameID] = err
}
func (f *fakeLobby) GetMemberships(_ context.Context, gameID string) ([]ports.Membership, error) {
f.mu.Lock()
defer f.mu.Unlock()
if err, ok := f.errs[gameID]; ok {
return nil, err
}
return append([]ports.Membership(nil), f.answers[gameID]...), nil
}
func (f *fakeLobby) GetGameSummary(context.Context, string) (ports.GameSummary, error) {
return ports.GameSummary{}, errors.New("not used")
}
// --- harness ----------------------------------------------------------
type harness struct {
t *testing.T
now time.Time
runtimes *fakeRuntimeRecords
mappings *fakePlayerMappings
engine *fakeEngine
lobby *fakeLobby
cache *membership.Cache
service *reportget.Service
}
const (
testGameID = "game-001"
testUserID = "user-1"
testRaceName = "Aelinari"
testEngineEndpoint = "http://galaxy-game-game-001:8080"
)
func newHarness(t *testing.T) *harness {
t.Helper()
tel, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
now := time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)
h := &harness{
t: t,
now: now,
runtimes: newFakeRuntimeRecords(),
mappings: newFakePlayerMappings(),
engine: &fakeEngine{},
lobby: newFakeLobby(),
}
cache, err := membership.NewCache(membership.Dependencies{
Lobby: h.lobby,
Telemetry: tel,
TTL: time.Minute,
MaxGames: 16,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.cache = cache
svc, err := reportget.NewService(reportget.Dependencies{
RuntimeRecords: h.runtimes,
PlayerMappings: h.mappings,
Membership: h.cache,
Engine: h.engine,
Telemetry: tel,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.service = svc
return h
}
func (h *harness) seedRecordWithStatus(status runtime.Status) {
startedAt := h.now.Add(-1 * time.Hour)
finishedAt := h.now
record := runtime.RuntimeRecord{
GameID: testGameID,
Status: status,
EngineEndpoint: testEngineEndpoint,
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: "0 18 * * *",
EngineHealth: "healthy",
CreatedAt: h.now.Add(-2 * time.Hour),
UpdatedAt: h.now.Add(-2 * time.Hour),
}
if status != runtime.StatusStarting {
record.StartedAt = &startedAt
}
if status == runtime.StatusStopped {
record.StoppedAt = &finishedAt
}
if status == runtime.StatusFinished {
record.FinishedAt = &finishedAt
}
h.runtimes.seed(record)
}
func (h *harness) seedActiveMembership() {
h.lobby.seed(testGameID, []ports.Membership{{
UserID: testUserID,
RaceName: testRaceName,
Status: "active",
JoinedAt: h.now.Add(-2 * time.Hour),
}})
}
func (h *harness) seedPlayerMapping() {
h.mappings.seed(playermapping.PlayerMapping{
GameID: testGameID,
UserID: testUserID,
RaceName: testRaceName,
EnginePlayerUUID: "uuid-1",
CreatedAt: h.now.Add(-2 * time.Hour),
})
}
func (h *harness) input(turn int) reportget.Input {
return reportget.Input{GameID: testGameID, UserID: testUserID, Turn: turn}
}
// --- tests ------------------------------------------------------------
func TestNewServiceRejectsBadDependencies(t *testing.T) {
tel, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
cache, err := membership.NewCache(membership.Dependencies{
Lobby: newFakeLobby(), Telemetry: tel, TTL: time.Minute, MaxGames: 1,
})
require.NoError(t, err)
cases := []struct {
name string
deps reportget.Dependencies
}{
{"nil runtime records", reportget.Dependencies{PlayerMappings: newFakePlayerMappings(), Membership: cache, Engine: &fakeEngine{}, Telemetry: tel}},
{"nil player mappings", reportget.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), Membership: cache, Engine: &fakeEngine{}, Telemetry: tel}},
{"nil membership", reportget.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), PlayerMappings: newFakePlayerMappings(), Engine: &fakeEngine{}, Telemetry: tel}},
{"nil engine", reportget.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), PlayerMappings: newFakePlayerMappings(), Membership: cache, Telemetry: tel}},
{"nil telemetry", reportget.Dependencies{RuntimeRecords: newFakeRuntimeRecords(), PlayerMappings: newFakePlayerMappings(), Membership: cache, Engine: &fakeEngine{}}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
svc, err := reportget.NewService(tc.deps)
require.Error(t, err)
assert.Nil(t, svc)
})
}
}
func TestHandleHappyPath(t *testing.T) {
h := newHarness(t)
h.seedRecordWithStatus(runtime.StatusRunning)
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.body = json.RawMessage(`{"version":1,"turn":3,"player":[]}`)
result, err := h.service.Handle(context.Background(), h.input(3))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeSuccess, result.Outcome)
assert.JSONEq(t, string(h.engine.body), string(result.RawResponse))
require.Len(t, h.engine.calls, 1)
assert.Equal(t, testEngineEndpoint, h.engine.calls[0].baseURL)
assert.Equal(t, testRaceName, h.engine.calls[0].raceName)
assert.Equal(t, 3, h.engine.calls[0].turn)
}
func TestHandleAcceptsAnyNonNotFoundStatus(t *testing.T) {
for _, status := range []runtime.Status{
runtime.StatusStarting,
runtime.StatusRunning,
runtime.StatusGenerationInProgress,
runtime.StatusGenerationFailed,
runtime.StatusStopped,
runtime.StatusEngineUnreachable,
runtime.StatusFinished,
} {
t.Run(string(status), func(t *testing.T) {
h := newHarness(t)
h.seedRecordWithStatus(status)
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.body = json.RawMessage(`{"version":1,"turn":0,"player":[]}`)
result, err := h.service.Handle(context.Background(), h.input(0))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeSuccess, result.Outcome,
"reports must be served regardless of status; got %s", result.ErrorCode)
})
}
}
func TestHandleInvalidRequest(t *testing.T) {
cases := []struct {
name string
input reportget.Input
message string
}{
{"empty game id", reportget.Input{UserID: testUserID, Turn: 0}, "game id"},
{"empty user id", reportget.Input{GameID: testGameID, Turn: 0}, "user id"},
{"negative turn", reportget.Input{GameID: testGameID, UserID: testUserID, Turn: -1}, "turn"},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
result, err := h.service.Handle(context.Background(), tc.input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, reportget.ErrorCodeInvalidRequest, result.ErrorCode)
assert.Contains(t, result.ErrorMessage, tc.message)
})
}
}
func TestHandleRuntimeNotFound(t *testing.T) {
h := newHarness(t)
result, err := h.service.Handle(context.Background(), h.input(0))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, reportget.ErrorCodeRuntimeNotFound, result.ErrorCode)
}
func TestHandleRuntimeStoreError(t *testing.T) {
h := newHarness(t)
h.runtimes.getErr = errors.New("postgres down")
result, err := h.service.Handle(context.Background(), h.input(0))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, reportget.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleForbiddenInactiveMembership(t *testing.T) {
cases := []struct {
name string
members []ports.Membership
}{
{"removed", []ports.Membership{{UserID: testUserID, RaceName: testRaceName, Status: "removed"}}},
{"blocked", []ports.Membership{{UserID: testUserID, RaceName: testRaceName, Status: "blocked"}}},
{"unknown user", []ports.Membership{{UserID: "ghost", RaceName: "Ghost", Status: "active"}}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
h.seedRecordWithStatus(runtime.StatusRunning)
h.seedPlayerMapping()
h.lobby.seed(testGameID, tc.members)
result, err := h.service.Handle(context.Background(), h.input(0))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, reportget.ErrorCodeForbidden, result.ErrorCode)
assert.Empty(t, h.engine.calls)
})
}
}
func TestHandleForbiddenMissingPlayerMapping(t *testing.T) {
h := newHarness(t)
h.seedRecordWithStatus(runtime.StatusRunning)
h.seedActiveMembership()
result, err := h.service.Handle(context.Background(), h.input(0))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, reportget.ErrorCodeForbidden, result.ErrorCode)
assert.Empty(t, h.engine.calls)
}
func TestHandleServiceUnavailableLobbyDown(t *testing.T) {
h := newHarness(t)
h.seedRecordWithStatus(runtime.StatusRunning)
h.seedPlayerMapping()
h.lobby.seedErr(testGameID, fmt.Errorf("dial: %w", ports.ErrLobbyUnavailable))
result, err := h.service.Handle(context.Background(), h.input(0))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, reportget.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleServiceUnavailablePlayerMappingsError(t *testing.T) {
h := newHarness(t)
h.seedRecordWithStatus(runtime.StatusRunning)
h.seedActiveMembership()
h.mappings.getErr = errors.New("postgres down")
result, err := h.service.Handle(context.Background(), h.input(0))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, reportget.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleEngineUnreachable(t *testing.T) {
h := newHarness(t)
h.seedRecordWithStatus(runtime.StatusRunning)
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.err = fmt.Errorf("dial: %w", ports.ErrEngineUnreachable)
result, err := h.service.Handle(context.Background(), h.input(0))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, reportget.ErrorCodeEngineUnreachable, result.ErrorCode)
}
func TestHandleEngineValidationErrorForwardsBody(t *testing.T) {
h := newHarness(t)
h.seedRecordWithStatus(runtime.StatusRunning)
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.body = json.RawMessage(`{"error":"unknown turn"}`)
h.engine.err = fmt.Errorf("400: %w", ports.ErrEngineValidation)
result, err := h.service.Handle(context.Background(), h.input(99))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, reportget.ErrorCodeEngineValidationError, result.ErrorCode)
assert.JSONEq(t, string(h.engine.body), string(result.RawResponse))
}
func TestHandleEngineProtocolViolation(t *testing.T) {
h := newHarness(t)
h.seedRecordWithStatus(runtime.StatusRunning)
h.seedActiveMembership()
h.seedPlayerMapping()
h.engine.err = fmt.Errorf("garbled: %w", ports.ErrEngineProtocolViolation)
result, err := h.service.Handle(context.Background(), h.input(0))
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, reportget.ErrorCodeEngineProtocolViolation, result.ErrorCode)
}
func TestHandleNilContext(t *testing.T) {
h := newHarness(t)
var nilCtx context.Context
_, err := h.service.Handle(nilCtx, h.input(0))
require.Error(t, err)
}
func TestHandleNilReceiver(t *testing.T) {
var svc *reportget.Service
_, err := svc.Handle(context.Background(), reportget.Input{})
require.Error(t, err)
}
@@ -0,0 +1,59 @@
// Package scheduler exposes the next-tick computation Game Master uses
// to advance `runtime_records.next_generation_at` after a successful
// turn generation. It is a thin, stateless wrapper over
// `domain/schedule.Schedule.Next` with the force-next-turn skip rule
// baked in via the `skipNextTick` parameter.
//
// Two callers consume the wrapper today:
//
// - `service/turngeneration` recomputes the next tick after a
// successful (non-finished) generation;
// - `service/adminforce` (Stage 17) reuses the same instance so the
// skip rule lives in exactly one place.
//
// The package depends only on `domain/schedule` and stdlib `time`. It
// holds no clock and no logger; callers pass `after` explicitly.
package scheduler
import (
"errors"
"strings"
"time"
"galaxy/gamemaster/internal/domain/schedule"
)
// Service computes the next scheduler-driven turn-generation tick.
type Service struct{}
// New constructs a stateless Service value. Returning a pointer keeps
// the construction shape consistent with the other GM services even
// though Service has no dependencies.
func New() *Service {
return &Service{}
}
// ComputeNext parses turnSchedule and returns the next firing time
// strictly after `after`, applying the force-next-turn skip rule when
// skipNextTick is true.
//
// When skipNextTick is true the wrapper computes the immediate next
// cron step and then advances by one further step, so the inter-turn
// spacing is never shorter than one schedule interval. The returned
// `skipConsumed` flag reports whether the wrapper consumed the skip
// (true when skipNextTick was true).
//
// On parse error ComputeNext returns the zero time, false, and the
// error wrapped from `schedule.Parse`. The caller is responsible for
// mapping it to the orchestrator-level `invalid_request` code.
func (service *Service) ComputeNext(turnSchedule string, after time.Time, skipNextTick bool) (time.Time, bool, error) {
if service == nil {
return time.Time{}, false, errors.New("scheduler compute next: nil service")
}
parsed, err := schedule.Parse(strings.TrimSpace(turnSchedule))
if err != nil {
return time.Time{}, false, err
}
next, skipConsumed := parsed.Next(after, skipNextTick)
return next, skipConsumed, nil
}
@@ -0,0 +1,63 @@
package scheduler_test
import (
"testing"
"time"
"galaxy/gamemaster/internal/service/scheduler"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestComputeNextHappyPathWithoutSkip(t *testing.T) {
service := scheduler.New()
after := time.Date(2026, time.April, 30, 12, 0, 0, 0, time.UTC)
next, skipConsumed, err := service.ComputeNext("0 18 * * *", after, false)
require.NoError(t, err)
assert.False(t, skipConsumed)
expected := time.Date(2026, time.April, 30, 18, 0, 0, 0, time.UTC)
assert.Equal(t, expected, next)
assert.Equal(t, time.UTC, next.Location())
}
func TestComputeNextConsumesSkip(t *testing.T) {
service := scheduler.New()
after := time.Date(2026, time.April, 30, 12, 0, 0, 0, time.UTC)
next, skipConsumed, err := service.ComputeNext("0 18 * * *", after, true)
require.NoError(t, err)
assert.True(t, skipConsumed)
expected := time.Date(2026, time.May, 1, 18, 0, 0, 0, time.UTC)
assert.Equal(t, expected, next)
}
func TestComputeNextEveryQuarterHourSkip(t *testing.T) {
service := scheduler.New()
after := time.Date(2026, time.April, 30, 12, 0, 0, 0, time.UTC)
first, _, err := service.ComputeNext("*/15 * * * *", after, false)
require.NoError(t, err)
skipped, _, err := service.ComputeNext("*/15 * * * *", after, true)
require.NoError(t, err)
assert.Equal(t, first.Add(15*time.Minute), skipped, "skip advances by exactly one cron step")
}
func TestComputeNextRejectsInvalidCron(t *testing.T) {
service := scheduler.New()
_, _, err := service.ComputeNext("not-a-cron", time.Now().UTC(), false)
require.Error(t, err)
}
func TestComputeNextTrimsWhitespace(t *testing.T) {
service := scheduler.New()
after := time.Date(2026, time.April, 30, 12, 0, 0, 0, time.UTC)
next, _, err := service.ComputeNext(" 0 18 * * * ", after, false)
require.NoError(t, err)
expected := time.Date(2026, time.April, 30, 18, 0, 0, 0, time.UTC)
assert.Equal(t, expected, next)
}
func TestNilServiceRejected(t *testing.T) {
var service *scheduler.Service
_, _, err := service.ComputeNext("0 18 * * *", time.Now().UTC(), false)
require.Error(t, err)
}
@@ -0,0 +1,56 @@
package turngeneration
// Stable error codes returned in `Result.ErrorCode`. The values match
// the vocabulary frozen by `gamemaster/README.md §Error Model` and
// `gamemaster/api/internal-openapi.yaml`. Stages 17 and 19 import these
// names rather than redeclare them; renaming any of them is a contract
// change.
const (
// ErrorCodeInvalidRequest reports that the input envelope failed
// structural validation (empty game id, unsupported trigger,
// unsupported op_source) or that the runtime record's stored
// `turn_schedule` could not be parsed at recompute time.
ErrorCodeInvalidRequest = "invalid_request"
// ErrorCodeRuntimeNotFound reports that no `runtime_records` row
// exists for the requested game id. The orchestrator does no other
// work and never publishes events.
ErrorCodeRuntimeNotFound = "runtime_not_found"
// ErrorCodeRuntimeNotRunning reports that the runtime exists but
// its current status is not `running`. The orchestrator returns
// without calling the engine.
ErrorCodeRuntimeNotRunning = "runtime_not_running"
// ErrorCodeConflict reports that a CAS guard failed mid-flow
// because the runtime row changed concurrently (typical cause:
// admin issued a stop while a generation was in progress).
ErrorCodeConflict = "conflict"
// ErrorCodeEngineUnreachable reports that the engine /admin/turn
// call returned a 5xx status, timed out, or could not be
// dispatched. The runtime row is moved to `generation_failed` and a
// snapshot plus admin notification are published before the code
// reaches the caller.
ErrorCodeEngineUnreachable = "engine_unreachable"
// ErrorCodeEngineValidationError reports that the engine
// /admin/turn call returned a 4xx status. Distinguished from
// `engine_unreachable` so operators can tell "engine is alive but
// rejected the request shape" from "engine is unreachable".
ErrorCodeEngineValidationError = "engine_validation_error"
// ErrorCodeEngineProtocolViolation reports that the engine response
// did not match the expected schema or did not match the runtime's
// installed roster (player count mismatch, race-name set mismatch,
// missing required fields).
ErrorCodeEngineProtocolViolation = "engine_protocol_violation"
// ErrorCodeServiceUnavailable reports that a steady-state
// dependency (PostgreSQL, Redis) was unreachable for this call.
ErrorCodeServiceUnavailable = "service_unavailable"
// ErrorCodeInternal reports an unexpected error not classified by
// the other codes.
ErrorCodeInternal = "internal_error"
)
@@ -0,0 +1,971 @@
// Package turngeneration implements the turn-generation orchestrator
// owned by Game Master. It is the single entry point through which the
// scheduler ticker (Stage 15 worker) and the admin force-next-turn flow
// (Stage 17) drive a turn through the engine container.
//
// Lifecycle and failure-mode semantics follow `gamemaster/README.md
// §Lifecycles → Turn generation` and §Force-next-turn. Design rationale
// is captured in
// `gamemaster/docs/stage15-scheduler-and-turn-generation.md`.
package turngeneration
import (
"context"
"errors"
"fmt"
"log/slog"
"sort"
"strings"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/scheduler"
"galaxy/gamemaster/internal/telemetry"
"galaxy/notificationintent"
)
// Trigger classifies the caller of one turn-generation operation. The
// value flows into telemetry and structured logs only — it does not
// branch the orchestrator's persistence path. The skip-tick mechanic is
// driven exclusively by the runtime record's `skip_next_tick` column.
type Trigger string
const (
// TriggerScheduler labels turn generations dispatched by the
// `schedulerticker` worker.
TriggerScheduler Trigger = "scheduler"
// TriggerForce labels turn generations dispatched by the admin
// force-next-turn flow (Stage 17 `service/adminforce`).
TriggerForce Trigger = "force"
)
// IsKnown reports whether trigger belongs to the frozen trigger
// vocabulary.
func (trigger Trigger) IsKnown() bool {
switch trigger {
case TriggerScheduler, TriggerForce:
return true
default:
return false
}
}
// Input stores the per-call arguments for one turn-generation
// operation.
type Input struct {
// GameID identifies the runtime to drive.
GameID string
// Trigger classifies the caller. Used for telemetry and logs only.
Trigger Trigger
// OpSource classifies how the request entered Game Master. Used to
// stamp `operation_log.op_source`. Defaults to `admin_rest` when
// missing or unrecognised.
OpSource operation.OpSource
// SourceRef stores the optional opaque per-source reference (REST
// request id, scheduler tick id). Empty when the caller does not
// provide one.
SourceRef string
}
// Validate reports whether input carries the structural invariants the
// service requires before any store is touched.
func (input Input) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("game id must not be empty")
}
if !input.Trigger.IsKnown() {
return fmt.Errorf("trigger %q is unsupported", input.Trigger)
}
if !input.OpSource.IsKnown() {
return fmt.Errorf("op source %q is unsupported", input.OpSource)
}
return nil
}
// Result stores the deterministic outcome of one Handle call.
type Result struct {
// Record carries the post-mutation runtime record. Populated on
// every success outcome and on `engine_*` failures (where the row
// was moved to `generation_failed`); zero on early-rejection
// outcomes (`invalid_request`, `runtime_not_found`,
// `runtime_not_running`, `conflict` on initial CAS,
// `service_unavailable` on initial Get).
Record runtime.RuntimeRecord
// Trigger echoes back Input.Trigger for log/telemetry consumers.
Trigger Trigger
// Finished is true when the engine reported `finished=true` on this
// turn and the runtime transitioned to `finished`.
Finished bool
// Outcome reports whether the operation completed (success) or
// produced a stable failure code.
Outcome operation.Outcome
// ErrorCode stores the stable error code on failure. Empty on
// success.
ErrorCode string
// ErrorMessage stores the operator-readable detail on failure.
// Empty on success.
ErrorMessage string
}
// IsSuccess reports whether the result represents a successful
// operation.
func (result Result) IsSuccess() bool {
return result.Outcome == operation.OutcomeSuccess
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// RuntimeRecords drives every CAS and scheduling persistence step.
RuntimeRecords ports.RuntimeRecordStore
// PlayerMappings supplies the per-game roster used to project
// engine player state to user-facing notification recipients and
// `player_turn_stats`.
PlayerMappings ports.PlayerMappingStore
// OperationLogs records the audit entry for the operation.
OperationLogs ports.OperationLogStore
// Engine drives the engine /admin/turn call.
Engine ports.EngineClient
// LobbyEvents publishes `runtime_snapshot_update` and
// `game_finished` to `gm:lobby_events`.
LobbyEvents ports.LobbyEventsPublisher
// Notifications publishes `game.turn.ready`, `game.finished`, and
// `game.generation_failed` intents to `notification:intents`.
Notifications ports.NotificationIntentPublisher
// Lobby resolves the human-readable `game_name` consumed by
// notification payloads. Failure is fail-soft: the orchestrator
// falls back to `game_id`.
Lobby ports.LobbyClient
// Scheduler computes the post-success `next_generation_at` value.
Scheduler *scheduler.Service
// Telemetry records the turn-generation outcome counter, lobby
// publication counter, and notification publish-attempt counter.
Telemetry *telemetry.Runtime
// Logger records structured service-level events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for operation timestamps.
// Defaults to `time.Now` when nil.
Clock func() time.Time
}
// Service executes the turn-generation lifecycle operation.
type Service struct {
runtimeRecords ports.RuntimeRecordStore
playerMappings ports.PlayerMappingStore
operationLogs ports.OperationLogStore
engine ports.EngineClient
lobbyEvents ports.LobbyEventsPublisher
notifications ports.NotificationIntentPublisher
lobby ports.LobbyClient
scheduler *scheduler.Service
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
switch {
case deps.RuntimeRecords == nil:
return nil, errors.New("new turn generation service: nil runtime records")
case deps.PlayerMappings == nil:
return nil, errors.New("new turn generation service: nil player mappings")
case deps.OperationLogs == nil:
return nil, errors.New("new turn generation service: nil operation logs")
case deps.Engine == nil:
return nil, errors.New("new turn generation service: nil engine client")
case deps.LobbyEvents == nil:
return nil, errors.New("new turn generation service: nil lobby events publisher")
case deps.Notifications == nil:
return nil, errors.New("new turn generation service: nil notification publisher")
case deps.Lobby == nil:
return nil, errors.New("new turn generation service: nil lobby client")
case deps.Scheduler == nil:
return nil, errors.New("new turn generation service: nil scheduler")
case deps.Telemetry == nil:
return nil, errors.New("new turn generation service: nil telemetry runtime")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.turngeneration")
return &Service{
runtimeRecords: deps.RuntimeRecords,
playerMappings: deps.PlayerMappings,
operationLogs: deps.OperationLogs,
engine: deps.Engine,
lobbyEvents: deps.LobbyEvents,
notifications: deps.Notifications,
lobby: deps.Lobby,
scheduler: deps.Scheduler,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
}, nil
}
// Handle executes one turn-generation operation end-to-end. The
// Go-level error return is reserved for non-business failures (nil
// context, nil receiver). Every business outcome flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
if service == nil {
return Result{}, errors.New("turn generation: nil service")
}
if ctx == nil {
return Result{}, errors.New("turn generation: nil context")
}
opStartedAt := service.clock().UTC()
if err := input.Validate(); err != nil {
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeInvalidRequest, err.Error()), nil
}
record, outcome, ok := service.loadRecord(ctx, opStartedAt, input)
if !ok {
return outcome, nil
}
if record.Status != runtime.StatusRunning {
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotRunning,
fmt.Sprintf("runtime status is %q, expected %q",
record.Status, runtime.StatusRunning)), nil
}
if outcome, ok := service.casToInProgress(ctx, opStartedAt, input); !ok {
return outcome, nil
}
state, engineOK, engineCode, engineMsg := service.callEngineTurn(ctx, record)
mappings, listErr := service.playerMappings.ListByGame(ctx, input.GameID)
if listErr != nil {
// Without mappings we cannot project player_turn_stats; treat
// as a service_unavailable failure but still try to roll the
// runtime to generation_failed because the engine call may
// have already mutated state.
return service.failGeneration(ctx, opStartedAt, input, record,
ErrorCodeServiceUnavailable,
fmt.Sprintf("list player mappings: %s", listErr.Error())), nil
}
if !engineOK {
return service.failGeneration(ctx, opStartedAt, input, record,
engineCode, engineMsg), nil
}
if outcome, ok := service.validateRoster(ctx, opStartedAt, input, record, state, mappings); !ok {
return outcome, nil
}
if state.Finished {
return service.completeFinished(ctx, opStartedAt, input, record, state, mappings), nil
}
return service.completeRunning(ctx, opStartedAt, input, record, state, mappings), nil
}
// loadRecord reads the runtime record and maps store errors to
// orchestrator outcomes. ok=false means the flow stops with the
// returned Result.
func (service *Service) loadRecord(ctx context.Context, opStartedAt time.Time, input Input) (runtime.RuntimeRecord, Result, bool) {
record, err := service.runtimeRecords.Get(ctx, input.GameID)
switch {
case err == nil:
return record, Result{}, true
case errors.Is(err, runtime.ErrNotFound):
return runtime.RuntimeRecord{}, service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotFound, "runtime record does not exist"), false
default:
return runtime.RuntimeRecord{}, service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get runtime record: %s", err.Error())), false
}
}
// casToInProgress flips the runtime row from `running` to
// `generation_in_progress`. ok=false means the flow stops with the
// returned Result; the caller has not touched the engine yet.
func (service *Service) casToInProgress(ctx context.Context, opStartedAt time.Time, input Input) (Result, bool) {
err := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: input.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusGenerationInProgress,
Now: opStartedAt,
})
switch {
case err == nil:
return Result{}, true
case errors.Is(err, runtime.ErrConflict):
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeConflict,
fmt.Sprintf("cas runtime status to generation_in_progress: %s", err.Error())), false
case errors.Is(err, runtime.ErrNotFound):
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotFound,
fmt.Sprintf("cas runtime status to generation_in_progress: %s", err.Error())), false
default:
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("cas runtime status to generation_in_progress: %s", err.Error())), false
}
}
// callEngineTurn dispatches the engine /admin/turn call and classifies
// the outcome. engineOK=true means the response is well-formed at the
// transport level; engineOK=false populates errorCode / errorMessage
// with a stable failure shape.
func (service *Service) callEngineTurn(ctx context.Context, record runtime.RuntimeRecord) (state ports.StateResponse, engineOK bool, errorCode string, errorMessage string) {
state, err := service.engine.Turn(ctx, record.EngineEndpoint)
if err == nil {
return state, true, "", ""
}
return ports.StateResponse{}, false, classifyEngineError(err), fmt.Sprintf("engine turn: %s", err.Error())
}
// classifyEngineError maps the engine port sentinels to the
// turn-generation stable error codes.
func classifyEngineError(err error) string {
switch {
case errors.Is(err, ports.ErrEngineValidation):
return ErrorCodeEngineValidationError
case errors.Is(err, ports.ErrEngineProtocolViolation):
return ErrorCodeEngineProtocolViolation
case errors.Is(err, ports.ErrEngineUnreachable):
return ErrorCodeEngineUnreachable
default:
return ErrorCodeEngineUnreachable
}
}
// validateRoster checks that the engine response carries exactly the
// race set installed at register-runtime. ok=false means the flow stops
// (and the runtime row is moved to `generation_failed`).
func (service *Service) validateRoster(ctx context.Context, opStartedAt time.Time, input Input, record runtime.RuntimeRecord, state ports.StateResponse, mappings []playermapping.PlayerMapping) (Result, bool) {
if len(state.Players) != len(mappings) {
message := fmt.Sprintf("engine player count %d does not match roster size %d",
len(state.Players), len(mappings))
return service.failGeneration(ctx, opStartedAt, input, record,
ErrorCodeEngineProtocolViolation, message), false
}
expected := make(map[string]struct{}, len(mappings))
for _, mapping := range mappings {
expected[mapping.RaceName] = struct{}{}
}
for _, player := range state.Players {
if _, ok := expected[player.RaceName]; !ok {
message := fmt.Sprintf("engine returned race %q not present in roster", player.RaceName)
return service.failGeneration(ctx, opStartedAt, input, record,
ErrorCodeEngineProtocolViolation, message), false
}
}
return Result{}, true
}
// completeFinished handles the `finished=true` branch: CAS to finished,
// clear scheduling, publish game_finished, publish game.finished
// notification, audit success.
func (service *Service) completeFinished(ctx context.Context, opStartedAt time.Time, input Input, record runtime.RuntimeRecord, state ports.StateResponse, mappings []playermapping.PlayerMapping) Result {
finishedAt := service.clock().UTC()
err := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: input.GameID,
ExpectedFrom: runtime.StatusGenerationInProgress,
To: runtime.StatusFinished,
Now: finishedAt,
})
if err != nil {
return service.handlePostEngineCASFailure(ctx, opStartedAt, input, record, err)
}
if err := service.runtimeRecords.UpdateScheduling(ctx, ports.UpdateSchedulingInput{
GameID: input.GameID,
NextGenerationAt: nil,
SkipNextTick: false,
CurrentTurn: state.Turn,
Now: finishedAt,
}); err != nil {
// The CAS to finished succeeded; the row is in the terminal
// state. Surface a service_unavailable to the caller but keep
// the audit and snapshot consistent.
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("update scheduling on finish: %s", err.Error()))
}
persisted, reloadErr := service.runtimeRecords.Get(ctx, input.GameID)
if reloadErr != nil {
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("reload runtime record: %s", reloadErr.Error()))
}
stats := projectPlayerStats(state, mappings)
finishedMsg := ports.GameFinished{
GameID: input.GameID,
FinalTurnNumber: state.Turn,
RuntimeStatus: runtime.StatusFinished,
PlayerTurnStats: stats,
FinishedAt: finishedAt,
}
if err := service.lobbyEvents.PublishGameFinished(ctx, finishedMsg); err != nil {
service.logger.ErrorContext(ctx, "publish game finished",
"game_id", input.GameID,
"err", err.Error(),
)
} else {
service.telemetry.RecordLobbyEventPublished(ctx, "game_finished")
}
gameName := service.resolveGameName(ctx, input.GameID)
recipients := recipientUserIDs(mappings)
service.publishGameFinishedIntent(ctx, input, gameName, state.Turn, recipients, finishedAt)
service.appendSuccessLog(ctx, opStartedAt, input)
service.telemetry.RecordTurnGenerationOutcome(ctx,
string(operation.OutcomeSuccess), "", string(input.Trigger))
logArgs := []any{
"game_id", input.GameID,
"trigger", string(input.Trigger),
"final_turn", state.Turn,
"finished", true,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "turn generation finished game", logArgs...)
return Result{
Record: persisted,
Trigger: input.Trigger,
Finished: true,
Outcome: operation.OutcomeSuccess,
}
}
// completeRunning handles the `finished=false` branch: recompute next
// tick, CAS back to running, publish snapshot, publish
// game.turn.ready notification, audit success.
func (service *Service) completeRunning(ctx context.Context, opStartedAt time.Time, input Input, record runtime.RuntimeRecord, state ports.StateResponse, mappings []playermapping.PlayerMapping) Result {
completedAt := service.clock().UTC()
next, _, err := service.scheduler.ComputeNext(record.TurnSchedule, completedAt, record.SkipNextTick)
if err != nil {
return service.failGeneration(ctx, opStartedAt, input, record,
ErrorCodeInvalidRequest,
fmt.Sprintf("recompute next tick: %s", err.Error()))
}
if err := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: input.GameID,
ExpectedFrom: runtime.StatusGenerationInProgress,
To: runtime.StatusRunning,
Now: completedAt,
}); err != nil {
return service.handlePostEngineCASFailure(ctx, opStartedAt, input, record, err)
}
if err := service.runtimeRecords.UpdateScheduling(ctx, ports.UpdateSchedulingInput{
GameID: input.GameID,
NextGenerationAt: &next,
SkipNextTick: false,
CurrentTurn: state.Turn,
Now: completedAt,
}); err != nil {
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("update scheduling on running: %s", err.Error()))
}
persisted, reloadErr := service.runtimeRecords.Get(ctx, input.GameID)
if reloadErr != nil {
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("reload runtime record: %s", reloadErr.Error()))
}
stats := projectPlayerStats(state, mappings)
snapshot := ports.RuntimeSnapshotUpdate{
GameID: input.GameID,
CurrentTurn: state.Turn,
RuntimeStatus: runtime.StatusRunning,
EngineHealthSummary: persisted.EngineHealth,
PlayerTurnStats: stats,
OccurredAt: completedAt,
}
if err := service.lobbyEvents.PublishSnapshotUpdate(ctx, snapshot); err != nil {
service.logger.ErrorContext(ctx, "publish runtime snapshot update",
"game_id", input.GameID,
"err", err.Error(),
)
} else {
service.telemetry.RecordLobbyEventPublished(ctx, "runtime_snapshot_update")
}
gameName := service.resolveGameName(ctx, input.GameID)
recipients := recipientUserIDs(mappings)
service.publishGameTurnReadyIntent(ctx, input, gameName, state.Turn, recipients, completedAt)
service.appendSuccessLog(ctx, opStartedAt, input)
service.telemetry.RecordTurnGenerationOutcome(ctx,
string(operation.OutcomeSuccess), "", string(input.Trigger))
logArgs := []any{
"game_id", input.GameID,
"trigger", string(input.Trigger),
"current_turn", state.Turn,
"next_generation_at", next.Format(time.RFC3339Nano),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "turn generation succeeded", logArgs...)
return Result{
Record: persisted,
Trigger: input.Trigger,
Outcome: operation.OutcomeSuccess,
}
}
// failGeneration handles every post-CAS failure path: CAS to
// generation_failed, publish snapshot, publish game.generation_failed
// admin notification, audit failure.
func (service *Service) failGeneration(ctx context.Context, opStartedAt time.Time, input Input, _ runtime.RuntimeRecord, errorCode string, errorMessage string) Result {
failedAt := service.clock().UTC()
casErr := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: input.GameID,
ExpectedFrom: runtime.StatusGenerationInProgress,
To: runtime.StatusGenerationFailed,
Now: failedAt,
})
if casErr != nil && !errors.Is(casErr, runtime.ErrConflict) {
// Best-effort transition. The original error code remains the
// caller-visible one; log the secondary failure.
service.logger.ErrorContext(ctx, "cas runtime status to generation_failed",
"game_id", input.GameID,
"err", casErr.Error(),
)
}
persisted, reloadErr := service.runtimeRecords.Get(ctx, input.GameID)
publishedStatus := runtime.StatusGenerationFailed
if reloadErr == nil {
publishedStatus = persisted.Status
}
snapshot := ports.RuntimeSnapshotUpdate{
GameID: input.GameID,
CurrentTurn: persistedTurn(persisted, reloadErr),
RuntimeStatus: publishedStatus,
EngineHealthSummary: persistedHealth(persisted, reloadErr),
PlayerTurnStats: nil,
OccurredAt: failedAt,
}
if err := service.lobbyEvents.PublishSnapshotUpdate(ctx, snapshot); err != nil {
service.logger.ErrorContext(ctx, "publish runtime snapshot update on failure",
"game_id", input.GameID,
"err", err.Error(),
)
} else {
service.telemetry.RecordLobbyEventPublished(ctx, "runtime_snapshot_update")
}
gameName := service.resolveGameName(ctx, input.GameID)
service.publishGameGenerationFailedIntent(ctx, input, gameName, errorCode, errorMessage, failedAt)
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
service.telemetry.RecordTurnGenerationOutcome(ctx,
string(operation.OutcomeFailure), errorCode, string(input.Trigger))
logArgs := []any{
"game_id", input.GameID,
"trigger", string(input.Trigger),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "turn generation failed", logArgs...)
return Result{
Record: persisted,
Trigger: input.Trigger,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// handlePostEngineCASFailure maps a CAS error that surfaced after the
// engine call already succeeded. Conflict means an external actor (e.g.
// admin stop) won the race; other errors are treated as
// service_unavailable. No publication is issued — the external mutation
// owns its own snapshot.
func (service *Service) handlePostEngineCASFailure(ctx context.Context, opStartedAt time.Time, input Input, _ runtime.RuntimeRecord, casErr error) Result {
switch {
case errors.Is(casErr, runtime.ErrConflict):
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeConflict,
fmt.Sprintf("cas runtime status post-engine: %s", casErr.Error()))
case errors.Is(casErr, runtime.ErrNotFound):
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotFound,
fmt.Sprintf("cas runtime status post-engine: %s", casErr.Error()))
default:
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("cas runtime status post-engine: %s", casErr.Error()))
}
}
// recordEarlyFailure handles failures that occur before the runtime row
// is in `generation_in_progress`. No status mutation, no publication;
// only audit and telemetry.
func (service *Service) recordEarlyFailure(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) Result {
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
service.telemetry.RecordTurnGenerationOutcome(ctx,
string(operation.OutcomeFailure), errorCode, string(input.Trigger))
logArgs := []any{
"game_id", input.GameID,
"trigger", string(input.Trigger),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "turn generation rejected", logArgs...)
return Result{
Trigger: input.Trigger,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// recordTerminalFailure handles failures after a post-engine CAS or a
// reload failed. The runtime row is in an undetermined state owned by
// whatever mutation won; we record the audit and surface the failure
// without further publication.
func (service *Service) recordTerminalFailure(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) Result {
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
service.telemetry.RecordTurnGenerationOutcome(ctx,
string(operation.OutcomeFailure), errorCode, string(input.Trigger))
logArgs := []any{
"game_id", input.GameID,
"trigger", string(input.Trigger),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "turn generation post-engine failure", logArgs...)
return Result{
Trigger: input.Trigger,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// resolveGameName fetches the human-readable game name from Lobby and
// falls back to the platform game id on any error per Stage 15 D1.
func (service *Service) resolveGameName(ctx context.Context, gameID string) string {
summary, err := service.lobby.GetGameSummary(ctx, gameID)
if err != nil {
logArgs := []any{
"game_id", gameID,
"error_code", "lobby_unavailable",
"err", err.Error(),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "resolve game name fell back to game id", logArgs...)
return gameID
}
if strings.TrimSpace(summary.GameName) == "" {
return gameID
}
return summary.GameName
}
// publishGameTurnReadyIntent publishes the user-targeted notification
// that announces a freshly generated turn. Empty recipient sets are
// dropped silently — the validator inside notificationintent rejects
// them outright, but the orchestrator should not break commit.
func (service *Service) publishGameTurnReadyIntent(ctx context.Context, input Input, gameName string, turnNumber int, recipients []string, occurredAt time.Time) {
if len(recipients) == 0 {
service.logger.WarnContext(ctx, "skip game.turn.ready notification: empty recipient set",
"game_id", input.GameID,
)
return
}
intent, err := notificationintent.NewGameTurnReadyIntent(
notificationintent.Metadata{
IdempotencyKey: fmt.Sprintf("game.turn.ready:%s:%d", input.GameID, turnNumber),
OccurredAt: occurredAt,
RequestID: logging.RequestIDFromContext(ctx),
},
recipients,
notificationintent.GameTurnReadyPayload{
GameID: input.GameID,
GameName: gameName,
TurnNumber: int64(turnNumber),
},
)
if err != nil {
service.logger.ErrorContext(ctx, "build game.turn.ready intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameTurnReady), "error")
return
}
if err := service.notifications.Publish(ctx, intent); err != nil {
service.logger.ErrorContext(ctx, "publish game.turn.ready intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameTurnReady), "error")
return
}
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameTurnReady), "ok")
}
// publishGameFinishedIntent publishes the user-targeted notification
// that announces a finished game.
func (service *Service) publishGameFinishedIntent(ctx context.Context, input Input, gameName string, finalTurnNumber int, recipients []string, occurredAt time.Time) {
if len(recipients) == 0 {
service.logger.WarnContext(ctx, "skip game.finished notification: empty recipient set",
"game_id", input.GameID,
)
return
}
intent, err := notificationintent.NewGameFinishedIntent(
notificationintent.Metadata{
IdempotencyKey: fmt.Sprintf("game.finished:%s:%d", input.GameID, finalTurnNumber),
OccurredAt: occurredAt,
RequestID: logging.RequestIDFromContext(ctx),
},
recipients,
notificationintent.GameFinishedPayload{
GameID: input.GameID,
GameName: gameName,
FinalTurnNumber: int64(finalTurnNumber),
},
)
if err != nil {
service.logger.ErrorContext(ctx, "build game.finished intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameFinished), "error")
return
}
if err := service.notifications.Publish(ctx, intent); err != nil {
service.logger.ErrorContext(ctx, "publish game.finished intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameFinished), "error")
return
}
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameFinished), "ok")
}
// publishGameGenerationFailedIntent publishes the admin-email
// notification that announces a failed turn generation.
func (service *Service) publishGameGenerationFailedIntent(ctx context.Context, input Input, gameName string, errorCode string, errorMessage string, occurredAt time.Time) {
failureReason := errorCode
if strings.TrimSpace(errorMessage) != "" {
failureReason = fmt.Sprintf("%s: %s", errorCode, errorMessage)
}
intent, err := notificationintent.NewGameGenerationFailedIntent(
notificationintent.Metadata{
IdempotencyKey: fmt.Sprintf("game.generation_failed:%s:%d",
input.GameID, occurredAt.UnixMilli()),
OccurredAt: occurredAt,
RequestID: logging.RequestIDFromContext(ctx),
},
notificationintent.GameGenerationFailedPayload{
GameID: input.GameID,
GameName: gameName,
FailureReason: failureReason,
},
)
if err != nil {
service.logger.ErrorContext(ctx, "build game.generation_failed intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameGenerationFailed), "error")
return
}
if err := service.notifications.Publish(ctx, intent); err != nil {
service.logger.ErrorContext(ctx, "publish game.generation_failed intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameGenerationFailed), "error")
return
}
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameGenerationFailed), "ok")
}
// projectPlayerStats joins the engine response on RaceName against the
// installed roster to build one PlayerTurnStats per active member.
// Result is sorted by UserID for a deterministic wire order.
func projectPlayerStats(state ports.StateResponse, mappings []playermapping.PlayerMapping) []ports.PlayerTurnStats {
if len(state.Players) == 0 || len(mappings) == 0 {
return nil
}
userByRace := make(map[string]string, len(mappings))
for _, mapping := range mappings {
userByRace[mapping.RaceName] = mapping.UserID
}
stats := make([]ports.PlayerTurnStats, 0, len(state.Players))
for _, player := range state.Players {
userID, ok := userByRace[player.RaceName]
if !ok {
continue
}
stats = append(stats, ports.PlayerTurnStats{
UserID: userID,
Planets: player.Planets,
Population: player.Population,
})
}
sort.Slice(stats, func(i, j int) bool { return stats[i].UserID < stats[j].UserID })
return stats
}
// recipientUserIDs returns the deduplicated, sorted-ascending list of
// platform user ids derived from the roster. Mirrors the
// notificationintent validator's expectations.
func recipientUserIDs(mappings []playermapping.PlayerMapping) []string {
if len(mappings) == 0 {
return nil
}
seen := make(map[string]struct{}, len(mappings))
result := make([]string, 0, len(mappings))
for _, mapping := range mappings {
userID := strings.TrimSpace(mapping.UserID)
if userID == "" {
continue
}
if _, ok := seen[userID]; ok {
continue
}
seen[userID] = struct{}{}
result = append(result, userID)
}
sort.Strings(result)
return result
}
// persistedTurn returns the stored CurrentTurn when reloadErr is nil,
// or zero otherwise. Used to populate the failure-side snapshot
// without making a second DB read.
func persistedTurn(record runtime.RuntimeRecord, reloadErr error) int {
if reloadErr != nil {
return 0
}
return record.CurrentTurn
}
// persistedHealth returns the stored EngineHealth when reloadErr is
// nil, or empty string otherwise.
func persistedHealth(record runtime.RuntimeRecord, reloadErr error) string {
if reloadErr != nil {
return ""
}
return record.EngineHealth
}
// appendSuccessLog records the success operation_log entry.
func (service *Service) appendSuccessLog(ctx context.Context, opStartedAt time.Time, input Input) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindTurnGeneration,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeSuccess,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// appendFailureLog records the failure operation_log entry.
func (service *Service) appendFailureLog(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindTurnGeneration,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// bestEffortAppend writes one operation_log entry. A failure is logged
// and discarded; the runtime row is the source of truth.
func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
if _, err := service.operationLogs.Append(ctx, entry); err != nil {
service.logger.ErrorContext(ctx, "append operation log",
"game_id", entry.GameID,
"op_kind", string(entry.OpKind),
"outcome", string(entry.Outcome),
"error_code", entry.ErrorCode,
"err", err.Error(),
)
}
}
// fallbackOpSource defaults to admin_rest when source is missing or
// unrecognised. Mirrors `gamemaster/README.md §Trusted Surfaces`.
func fallbackOpSource(source operation.OpSource) operation.OpSource {
if source.IsKnown() {
return source
}
return operation.OpSourceAdminRest
}
@@ -0,0 +1,841 @@
package turngeneration_test
import (
"context"
"errors"
"fmt"
"sync"
"testing"
"time"
"galaxy/gamemaster/internal/adapters/mocks"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/scheduler"
"galaxy/gamemaster/internal/service/turngeneration"
"galaxy/gamemaster/internal/telemetry"
"galaxy/notificationintent"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
)
// --- test doubles -----------------------------------------------------
type fakeRuntimeRecords struct {
mu sync.Mutex
stored map[string]runtime.RuntimeRecord
getErr error
updErr error
schErr error
insErr error
updates []ports.UpdateStatusInput
scheds []ports.UpdateSchedulingInput
getCalls int
}
func newFakeRuntimeRecords() *fakeRuntimeRecords {
return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}}
}
func (s *fakeRuntimeRecords) seed(record runtime.RuntimeRecord) {
s.mu.Lock()
defer s.mu.Unlock()
s.stored[record.GameID] = record
}
func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) {
s.mu.Lock()
defer s.mu.Unlock()
s.getCalls++
if s.getErr != nil {
return runtime.RuntimeRecord{}, s.getErr
}
record, ok := s.stored[gameID]
if !ok {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
return record, nil
}
func (s *fakeRuntimeRecords) Insert(_ context.Context, record runtime.RuntimeRecord) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.insErr != nil {
return s.insErr
}
if _, ok := s.stored[record.GameID]; ok {
return runtime.ErrConflict
}
s.stored[record.GameID] = record
return nil
}
func (s *fakeRuntimeRecords) UpdateStatus(_ context.Context, input ports.UpdateStatusInput) error {
s.mu.Lock()
defer s.mu.Unlock()
s.updates = append(s.updates, input)
if s.updErr != nil {
return s.updErr
}
record, ok := s.stored[input.GameID]
if !ok {
return runtime.ErrNotFound
}
if record.Status != input.ExpectedFrom {
return runtime.ErrConflict
}
record.Status = input.To
record.UpdatedAt = input.Now
if input.To == runtime.StatusFinished {
finishedAt := input.Now
record.FinishedAt = &finishedAt
}
if input.To == runtime.StatusRunning && record.StartedAt == nil {
startedAt := input.Now
record.StartedAt = &startedAt
}
s.stored[input.GameID] = record
return nil
}
func (s *fakeRuntimeRecords) UpdateScheduling(_ context.Context, input ports.UpdateSchedulingInput) error {
s.mu.Lock()
defer s.mu.Unlock()
s.scheds = append(s.scheds, input)
if s.schErr != nil {
return s.schErr
}
record, ok := s.stored[input.GameID]
if !ok {
return runtime.ErrNotFound
}
if input.NextGenerationAt != nil {
next := *input.NextGenerationAt
record.NextGenerationAt = &next
} else {
record.NextGenerationAt = nil
}
record.SkipNextTick = input.SkipNextTick
record.CurrentTurn = input.CurrentTurn
record.UpdatedAt = input.Now
s.stored[input.GameID] = record
return nil
}
func (s *fakeRuntimeRecords) UpdateImage(_ context.Context, _ ports.UpdateImageInput) error {
return errors.New("not used in turngeneration tests")
}
func (s *fakeRuntimeRecords) UpdateEngineHealth(_ context.Context, _ ports.UpdateEngineHealthInput) error {
return errors.New("not used in turngeneration tests")
}
func (s *fakeRuntimeRecords) Delete(_ context.Context, _ string) error {
return errors.New("not used in turngeneration tests")
}
func (s *fakeRuntimeRecords) ListDueRunning(_ context.Context, _ time.Time) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used in turngeneration tests")
}
func (s *fakeRuntimeRecords) ListByStatus(_ context.Context, _ runtime.Status) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used in turngeneration tests")
}
func (s *fakeRuntimeRecords) List(_ context.Context) ([]runtime.RuntimeRecord, error) {
return nil, errors.New("not used in turngeneration tests")
}
func (s *fakeRuntimeRecords) record(gameID string) (runtime.RuntimeRecord, bool) {
s.mu.Lock()
defer s.mu.Unlock()
record, ok := s.stored[gameID]
return record, ok
}
func (s *fakeRuntimeRecords) statusUpdates() []ports.UpdateStatusInput {
s.mu.Lock()
defer s.mu.Unlock()
out := make([]ports.UpdateStatusInput, len(s.updates))
copy(out, s.updates)
return out
}
func (s *fakeRuntimeRecords) scheduling() []ports.UpdateSchedulingInput {
s.mu.Lock()
defer s.mu.Unlock()
out := make([]ports.UpdateSchedulingInput, len(s.scheds))
copy(out, s.scheds)
return out
}
type fakePlayerMappings struct {
mu sync.Mutex
stored map[string][]playermapping.PlayerMapping
listErr error
}
func newFakePlayerMappings() *fakePlayerMappings {
return &fakePlayerMappings{stored: map[string][]playermapping.PlayerMapping{}}
}
func (s *fakePlayerMappings) seed(gameID string, members ...playermapping.PlayerMapping) {
s.mu.Lock()
defer s.mu.Unlock()
s.stored[gameID] = append([]playermapping.PlayerMapping(nil), members...)
}
func (s *fakePlayerMappings) BulkInsert(_ context.Context, _ []playermapping.PlayerMapping) error {
return errors.New("not used in turngeneration tests")
}
func (s *fakePlayerMappings) Get(_ context.Context, _, _ string) (playermapping.PlayerMapping, error) {
return playermapping.PlayerMapping{}, errors.New("not used in turngeneration tests")
}
func (s *fakePlayerMappings) GetByRace(_ context.Context, _, _ string) (playermapping.PlayerMapping, error) {
return playermapping.PlayerMapping{}, errors.New("not used in turngeneration tests")
}
func (s *fakePlayerMappings) ListByGame(_ context.Context, gameID string) ([]playermapping.PlayerMapping, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.listErr != nil {
return nil, s.listErr
}
return append([]playermapping.PlayerMapping(nil), s.stored[gameID]...), nil
}
func (s *fakePlayerMappings) DeleteByGame(_ context.Context, _ string) error {
return errors.New("not used in turngeneration tests")
}
type fakeOperationLogs struct {
mu sync.Mutex
appErr error
entries []operation.OperationEntry
}
func (s *fakeOperationLogs) Append(_ context.Context, entry operation.OperationEntry) (int64, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.appErr != nil {
return 0, s.appErr
}
if err := entry.Validate(); err != nil {
return 0, err
}
s.entries = append(s.entries, entry)
return int64(len(s.entries)), nil
}
func (s *fakeOperationLogs) ListByGame(_ context.Context, _ string, _ int) ([]operation.OperationEntry, error) {
return nil, errors.New("not used in turngeneration tests")
}
func (s *fakeOperationLogs) lastEntry() (operation.OperationEntry, bool) {
s.mu.Lock()
defer s.mu.Unlock()
if len(s.entries) == 0 {
return operation.OperationEntry{}, false
}
return s.entries[len(s.entries)-1], true
}
// --- harness ----------------------------------------------------------
type harness struct {
t *testing.T
ctrl *gomock.Controller
runtimeStore *fakeRuntimeRecords
mappings *fakePlayerMappings
logs *fakeOperationLogs
engine *mocks.MockEngineClient
lobbyEvents *mocks.MockLobbyEventsPublisher
notifications *mocks.MockNotificationIntentPublisher
lobby *mocks.MockLobbyClient
telemetry *telemetry.Runtime
now time.Time
service *turngeneration.Service
}
const (
testGameID = "game-001"
testEngineEndpoint = "http://galaxy-game-game-001:8080"
testTurnSchedule = "0 18 * * *"
testGameName = "Andromeda Conquest"
)
func newHarness(t *testing.T) *harness {
t.Helper()
ctrl := gomock.NewController(t)
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
h := &harness{
t: t,
ctrl: ctrl,
runtimeStore: newFakeRuntimeRecords(),
mappings: newFakePlayerMappings(),
logs: &fakeOperationLogs{},
engine: mocks.NewMockEngineClient(ctrl),
lobbyEvents: mocks.NewMockLobbyEventsPublisher(ctrl),
notifications: mocks.NewMockNotificationIntentPublisher(ctrl),
lobby: mocks.NewMockLobbyClient(ctrl),
telemetry: telemetryRuntime,
now: time.Date(2026, time.April, 30, 12, 0, 0, 0, time.UTC),
}
service, err := turngeneration.NewService(turngeneration.Dependencies{
RuntimeRecords: h.runtimeStore,
PlayerMappings: h.mappings,
OperationLogs: h.logs,
Engine: h.engine,
LobbyEvents: h.lobbyEvents,
Notifications: h.notifications,
Lobby: h.lobby,
Scheduler: scheduler.New(),
Telemetry: h.telemetry,
Clock: func() time.Time { return h.now },
})
require.NoError(t, err)
h.service = service
return h
}
func (h *harness) seedRunningRecord(skip bool) {
startedAt := h.now.Add(-1 * time.Hour)
h.runtimeStore.seed(runtime.RuntimeRecord{
GameID: testGameID,
Status: runtime.StatusRunning,
EngineEndpoint: testEngineEndpoint,
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: testTurnSchedule,
CurrentTurn: 0,
SkipNextTick: skip,
EngineHealth: "healthy",
CreatedAt: h.now.Add(-2 * time.Hour),
UpdatedAt: h.now.Add(-2 * time.Hour),
StartedAt: &startedAt,
})
h.mappings.seed(testGameID,
playermapping.PlayerMapping{
GameID: testGameID,
UserID: "user-1",
RaceName: "Aelinari",
EnginePlayerUUID: "uuid-1",
CreatedAt: h.now.Add(-2 * time.Hour),
},
playermapping.PlayerMapping{
GameID: testGameID,
UserID: "user-2",
RaceName: "Drazi",
EnginePlayerUUID: "uuid-2",
CreatedAt: h.now.Add(-2 * time.Hour),
},
)
}
func successInput() turngeneration.Input {
return turngeneration.Input{
GameID: testGameID,
Trigger: turngeneration.TriggerScheduler,
OpSource: operation.OpSourceAdminRest,
SourceRef: "tick-1",
}
}
func enginePlayers() []ports.PlayerState {
return []ports.PlayerState{
{RaceName: "Aelinari", EnginePlayerUUID: "uuid-1", Planets: 3, Population: 100},
{RaceName: "Drazi", EnginePlayerUUID: "uuid-2", Planets: 2, Population: 80},
}
}
func (h *harness) expectGameSummary() {
h.lobby.EXPECT().
GetGameSummary(gomock.Any(), testGameID).
Return(ports.GameSummary{GameID: testGameID, GameName: testGameName, Status: "running"}, nil)
}
// --- tests ------------------------------------------------------------
func TestNewServiceRejectsMissingDeps(t *testing.T) {
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
cases := []struct {
name string
mut func(*turngeneration.Dependencies)
}{
{"runtime records", func(d *turngeneration.Dependencies) { d.RuntimeRecords = nil }},
{"player mappings", func(d *turngeneration.Dependencies) { d.PlayerMappings = nil }},
{"operation logs", func(d *turngeneration.Dependencies) { d.OperationLogs = nil }},
{"engine", func(d *turngeneration.Dependencies) { d.Engine = nil }},
{"lobby events", func(d *turngeneration.Dependencies) { d.LobbyEvents = nil }},
{"notifications", func(d *turngeneration.Dependencies) { d.Notifications = nil }},
{"lobby", func(d *turngeneration.Dependencies) { d.Lobby = nil }},
{"scheduler", func(d *turngeneration.Dependencies) { d.Scheduler = nil }},
{"telemetry", func(d *turngeneration.Dependencies) { d.Telemetry = nil }},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
ctrl := gomock.NewController(t)
deps := turngeneration.Dependencies{
RuntimeRecords: newFakeRuntimeRecords(),
PlayerMappings: newFakePlayerMappings(),
OperationLogs: &fakeOperationLogs{},
Engine: mocks.NewMockEngineClient(ctrl),
LobbyEvents: mocks.NewMockLobbyEventsPublisher(ctrl),
Notifications: mocks.NewMockNotificationIntentPublisher(ctrl),
Lobby: mocks.NewMockLobbyClient(ctrl),
Scheduler: scheduler.New(),
Telemetry: telemetryRuntime,
}
tc.mut(&deps)
service, err := turngeneration.NewService(deps)
require.Error(t, err)
require.Nil(t, service)
})
}
}
func TestHandleRejectsInvalidInput(t *testing.T) {
cases := []struct {
name string
mut func(*turngeneration.Input)
}{
{"empty game id", func(i *turngeneration.Input) { i.GameID = "" }},
{"unknown trigger", func(i *turngeneration.Input) { i.Trigger = "exotic" }},
{"unknown op source", func(i *turngeneration.Input) { i.OpSource = "exotic" }},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
h := newHarness(t)
input := successInput()
tc.mut(&input)
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, turngeneration.ErrorCodeInvalidRequest, result.ErrorCode)
})
}
}
func TestHandleHappyPathScheduler(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(false)
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
Return(ports.StateResponse{Turn: 1, Players: enginePlayers(), Finished: false}, nil)
var snapshot ports.RuntimeSnapshotUpdate
h.lobbyEvents.EXPECT().
PublishSnapshotUpdate(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, msg ports.RuntimeSnapshotUpdate) error {
snapshot = msg
return nil
})
h.expectGameSummary()
var publishedIntent notificationintent.Intent
h.notifications.EXPECT().
Publish(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, intent notificationintent.Intent) error {
publishedIntent = intent
return nil
})
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
require.True(t, result.IsSuccess(), "outcome %q error_code=%q", result.Outcome, result.ErrorCode)
assert.False(t, result.Finished)
assert.Equal(t, turngeneration.TriggerScheduler, result.Trigger)
assert.Equal(t, runtime.StatusRunning, result.Record.Status)
assert.Equal(t, 1, result.Record.CurrentTurn)
require.NotNil(t, result.Record.NextGenerationAt)
assert.Equal(t, time.Date(2026, time.April, 30, 18, 0, 0, 0, time.UTC), *result.Record.NextGenerationAt)
assert.False(t, result.Record.SkipNextTick)
updates := h.runtimeStore.statusUpdates()
require.Len(t, updates, 2)
assert.Equal(t, runtime.StatusRunning, updates[0].ExpectedFrom)
assert.Equal(t, runtime.StatusGenerationInProgress, updates[0].To)
assert.Equal(t, runtime.StatusGenerationInProgress, updates[1].ExpectedFrom)
assert.Equal(t, runtime.StatusRunning, updates[1].To)
scheds := h.runtimeStore.scheduling()
require.Len(t, scheds, 1)
require.NotNil(t, scheds[0].NextGenerationAt)
assert.False(t, scheds[0].SkipNextTick)
assert.Equal(t, 1, scheds[0].CurrentTurn)
assert.Equal(t, runtime.StatusRunning, snapshot.RuntimeStatus)
assert.Equal(t, 1, snapshot.CurrentTurn)
assert.Equal(t, "healthy", snapshot.EngineHealthSummary)
require.Len(t, snapshot.PlayerTurnStats, 2)
assert.Equal(t, "user-1", snapshot.PlayerTurnStats[0].UserID)
assert.Equal(t, 3, snapshot.PlayerTurnStats[0].Planets)
assert.Equal(t, 100, snapshot.PlayerTurnStats[0].Population)
assert.Equal(t, "user-2", snapshot.PlayerTurnStats[1].UserID)
assert.Equal(t, notificationintent.NotificationTypeGameTurnReady, publishedIntent.NotificationType)
assert.Equal(t, []string{"user-1", "user-2"}, publishedIntent.RecipientUserIDs)
assert.Equal(t, notificationintent.AudienceKindUser, publishedIntent.AudienceKind)
assert.Contains(t, publishedIntent.PayloadJSON, fmt.Sprintf(`"game_name":%q`, testGameName))
assert.Contains(t, publishedIntent.PayloadJSON, `"turn_number":1`)
entry, ok := h.logs.lastEntry()
require.True(t, ok)
assert.Equal(t, operation.OpKindTurnGeneration, entry.OpKind)
assert.Equal(t, operation.OutcomeSuccess, entry.Outcome)
assert.Equal(t, "tick-1", entry.SourceRef)
}
func TestHandleConsumesSkipNextTick(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(true)
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
Return(ports.StateResponse{Turn: 5, Players: enginePlayers(), Finished: false}, nil)
h.lobbyEvents.EXPECT().
PublishSnapshotUpdate(gomock.Any(), gomock.Any()).
Return(nil)
h.expectGameSummary()
h.notifications.EXPECT().
Publish(gomock.Any(), gomock.Any()).
Return(nil)
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
require.True(t, result.IsSuccess(), "outcome %q error_code=%q", result.Outcome, result.ErrorCode)
require.NotNil(t, result.Record.NextGenerationAt)
expected := time.Date(2026, time.May, 1, 18, 0, 0, 0, time.UTC)
assert.Equal(t, expected, *result.Record.NextGenerationAt, "skip advances by one extra cron step")
assert.False(t, result.Record.SkipNextTick, "skip flag cleared after consumption")
}
func TestHandleForceTriggerLabelsTelemetry(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(false)
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
Return(ports.StateResponse{Turn: 1, Players: enginePlayers()}, nil)
h.lobbyEvents.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).Return(nil)
h.expectGameSummary()
h.notifications.EXPECT().Publish(gomock.Any(), gomock.Any()).Return(nil)
input := successInput()
input.Trigger = turngeneration.TriggerForce
result, err := h.service.Handle(context.Background(), input)
require.NoError(t, err)
require.True(t, result.IsSuccess())
assert.Equal(t, turngeneration.TriggerForce, result.Trigger)
}
func TestHandleFinishedTransitionsAndClearsTick(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(false)
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
Return(ports.StateResponse{Turn: 42, Players: enginePlayers(), Finished: true}, nil)
var finishedMsg ports.GameFinished
h.lobbyEvents.EXPECT().
PublishGameFinished(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, msg ports.GameFinished) error {
finishedMsg = msg
return nil
})
h.expectGameSummary()
var publishedIntent notificationintent.Intent
h.notifications.EXPECT().
Publish(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, intent notificationintent.Intent) error {
publishedIntent = intent
return nil
})
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
require.True(t, result.IsSuccess(), "outcome %q error_code=%q", result.Outcome, result.ErrorCode)
assert.True(t, result.Finished)
assert.Equal(t, runtime.StatusFinished, result.Record.Status)
assert.Nil(t, result.Record.NextGenerationAt)
require.NotNil(t, result.Record.FinishedAt)
assert.Equal(t, h.now, *result.Record.FinishedAt)
assert.Equal(t, runtime.StatusFinished, finishedMsg.RuntimeStatus)
assert.Equal(t, 42, finishedMsg.FinalTurnNumber)
require.Len(t, finishedMsg.PlayerTurnStats, 2)
assert.Equal(t, h.now, finishedMsg.FinishedAt)
assert.Equal(t, notificationintent.NotificationTypeGameFinished, publishedIntent.NotificationType)
assert.Contains(t, publishedIntent.PayloadJSON, `"final_turn_number":42`)
}
func TestHandleEngineUnreachable(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(false)
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
Return(ports.StateResponse{}, fmt.Errorf("dial: %w", ports.ErrEngineUnreachable))
var snapshot ports.RuntimeSnapshotUpdate
h.lobbyEvents.EXPECT().
PublishSnapshotUpdate(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, msg ports.RuntimeSnapshotUpdate) error {
snapshot = msg
return nil
})
h.expectGameSummary()
var publishedIntent notificationintent.Intent
h.notifications.EXPECT().
Publish(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, intent notificationintent.Intent) error {
publishedIntent = intent
return nil
})
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, turngeneration.ErrorCodeEngineUnreachable, result.ErrorCode)
stored, ok := h.runtimeStore.record(testGameID)
require.True(t, ok)
assert.Equal(t, runtime.StatusGenerationFailed, stored.Status)
assert.Equal(t, runtime.StatusGenerationFailed, snapshot.RuntimeStatus)
assert.Empty(t, snapshot.PlayerTurnStats)
assert.Equal(t, notificationintent.NotificationTypeGameGenerationFailed, publishedIntent.NotificationType)
assert.Equal(t, notificationintent.AudienceKindAdminEmail, publishedIntent.AudienceKind)
assert.Empty(t, publishedIntent.RecipientUserIDs)
}
func TestHandleEngineValidationError(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(false)
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
Return(ports.StateResponse{}, fmt.Errorf("400: %w", ports.ErrEngineValidation))
h.lobbyEvents.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).Return(nil)
h.expectGameSummary()
h.notifications.EXPECT().Publish(gomock.Any(), gomock.Any()).Return(nil)
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
assert.Equal(t, turngeneration.ErrorCodeEngineValidationError, result.ErrorCode)
stored, ok := h.runtimeStore.record(testGameID)
require.True(t, ok)
assert.Equal(t, runtime.StatusGenerationFailed, stored.Status)
}
func TestHandleEngineProtocolViolationOnRosterMismatch(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(false)
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
Return(ports.StateResponse{
Turn: 1,
Players: []ports.PlayerState{
{RaceName: "Aelinari", EnginePlayerUUID: "uuid-1", Planets: 1, Population: 10},
{RaceName: "Unknown", EnginePlayerUUID: "uuid-x", Planets: 1, Population: 5},
},
}, nil)
h.lobbyEvents.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).Return(nil)
h.expectGameSummary()
h.notifications.EXPECT().Publish(gomock.Any(), gomock.Any()).Return(nil)
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
assert.Equal(t, turngeneration.ErrorCodeEngineProtocolViolation, result.ErrorCode)
stored, ok := h.runtimeStore.record(testGameID)
require.True(t, ok)
assert.Equal(t, runtime.StatusGenerationFailed, stored.Status)
}
func TestHandleEngineProtocolViolationOnCountMismatch(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(false)
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
Return(ports.StateResponse{
Turn: 1,
Players: []ports.PlayerState{
{RaceName: "Aelinari", EnginePlayerUUID: "uuid-1", Planets: 1, Population: 10},
},
}, nil)
h.lobbyEvents.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).Return(nil)
h.expectGameSummary()
h.notifications.EXPECT().Publish(gomock.Any(), gomock.Any()).Return(nil)
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
assert.Equal(t, turngeneration.ErrorCodeEngineProtocolViolation, result.ErrorCode)
}
func TestHandleConflictOnInitialCAS(t *testing.T) {
h := newHarness(t)
startedAt := h.now.Add(-1 * time.Hour)
h.runtimeStore.seed(runtime.RuntimeRecord{
GameID: testGameID,
Status: runtime.StatusStopped,
EngineEndpoint: testEngineEndpoint,
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
CurrentEngineVersion: "v1.2.3",
TurnSchedule: testTurnSchedule,
CreatedAt: h.now.Add(-2 * time.Hour),
UpdatedAt: h.now.Add(-1 * time.Hour),
StartedAt: &startedAt,
StoppedAt: &startedAt,
})
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, turngeneration.ErrorCodeRuntimeNotRunning, result.ErrorCode)
assert.Empty(t, h.runtimeStore.statusUpdates(), "no CAS attempted on non-running record")
}
func TestHandleConflictOnPostEngineCAS(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(false)
// Simulate a concurrent admin stop that wins the race during the
// engine call by mutating the stored row mid-flight.
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
DoAndReturn(func(_ context.Context, _ string) (ports.StateResponse, error) {
h.runtimeStore.mu.Lock()
rec := h.runtimeStore.stored[testGameID]
rec.Status = runtime.StatusStopped
h.runtimeStore.stored[testGameID] = rec
h.runtimeStore.mu.Unlock()
return ports.StateResponse{Turn: 1, Players: enginePlayers()}, nil
})
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, turngeneration.ErrorCodeConflict, result.ErrorCode)
}
func TestHandleRuntimeNotFound(t *testing.T) {
h := newHarness(t)
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
assert.Equal(t, turngeneration.ErrorCodeRuntimeNotFound, result.ErrorCode)
}
func TestHandleServiceUnavailableOnGet(t *testing.T) {
h := newHarness(t)
h.runtimeStore.getErr = errors.New("postgres dial timeout")
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
assert.Equal(t, turngeneration.ErrorCodeServiceUnavailable, result.ErrorCode)
}
func TestHandleLobbyFallbackToGameID(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(false)
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
Return(ports.StateResponse{Turn: 1, Players: enginePlayers()}, nil)
h.lobbyEvents.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).Return(nil)
h.lobby.EXPECT().
GetGameSummary(gomock.Any(), testGameID).
Return(ports.GameSummary{}, fmt.Errorf("dial: %w", ports.ErrLobbyUnavailable))
var publishedIntent notificationintent.Intent
h.notifications.EXPECT().
Publish(gomock.Any(), gomock.Any()).
DoAndReturn(func(_ context.Context, intent notificationintent.Intent) error {
publishedIntent = intent
return nil
})
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
require.True(t, result.IsSuccess())
assert.Contains(t, publishedIntent.PayloadJSON, fmt.Sprintf(`"game_name":%q`, testGameID))
}
func TestHandleLobbyEventPublishFailureDoesNotRollBack(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(false)
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
Return(ports.StateResponse{Turn: 1, Players: enginePlayers()}, nil)
h.lobbyEvents.EXPECT().
PublishSnapshotUpdate(gomock.Any(), gomock.Any()).
Return(errors.New("redis broken"))
h.expectGameSummary()
h.notifications.EXPECT().Publish(gomock.Any(), gomock.Any()).Return(nil)
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
require.True(t, result.IsSuccess(), "outcome %q error_code=%q", result.Outcome, result.ErrorCode)
assert.Equal(t, runtime.StatusRunning, result.Record.Status)
assert.Equal(t, 1, result.Record.CurrentTurn)
}
func TestHandleNotificationFailureDoesNotRollBack(t *testing.T) {
h := newHarness(t)
h.seedRunningRecord(false)
h.engine.EXPECT().
Turn(gomock.Any(), testEngineEndpoint).
Return(ports.StateResponse{Turn: 1, Players: enginePlayers()}, nil)
h.lobbyEvents.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).Return(nil)
h.expectGameSummary()
h.notifications.EXPECT().
Publish(gomock.Any(), gomock.Any()).
Return(errors.New("notification stream broken"))
result, err := h.service.Handle(context.Background(), successInput())
require.NoError(t, err)
require.True(t, result.IsSuccess(), "outcome %q error_code=%q", result.Outcome, result.ErrorCode)
}
func TestHandleNilContext(t *testing.T) {
h := newHarness(t)
_, err := h.service.Handle(nil, successInput()) //nolint:staticcheck // intentional nil context
require.Error(t, err)
}
func TestHandleNilService(t *testing.T) {
var service *turngeneration.Service
_, err := service.Handle(context.Background(), successInput())
require.Error(t, err)
}