Files
galaxy-game/gamemaster/internal/service/turngeneration/service.go
T
2026-05-03 07:59:03 +02:00

972 lines
35 KiB
Go

// Package turngeneration implements the turn-generation orchestrator
// owned by Game Master. It is the single entry point through which the
// scheduler ticker (Stage 15 worker) and the admin force-next-turn flow
// (Stage 17) drive a turn through the engine container.
//
// Lifecycle and failure-mode semantics follow `gamemaster/README.md
// §Lifecycles → Turn generation` and §Force-next-turn. Design rationale
// is captured in
// `gamemaster/docs/stage15-scheduler-and-turn-generation.md`.
package turngeneration
import (
"context"
"errors"
"fmt"
"log/slog"
"sort"
"strings"
"time"
"galaxy/gamemaster/internal/domain/operation"
"galaxy/gamemaster/internal/domain/playermapping"
"galaxy/gamemaster/internal/domain/runtime"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/scheduler"
"galaxy/gamemaster/internal/telemetry"
"galaxy/notificationintent"
)
// Trigger classifies the caller of one turn-generation operation. The
// value flows into telemetry and structured logs only — it does not
// branch the orchestrator's persistence path. The skip-tick mechanic is
// driven exclusively by the runtime record's `skip_next_tick` column.
type Trigger string
const (
// TriggerScheduler labels turn generations dispatched by the
// `schedulerticker` worker.
TriggerScheduler Trigger = "scheduler"
// TriggerForce labels turn generations dispatched by the admin
// force-next-turn flow (Stage 17 `service/adminforce`).
TriggerForce Trigger = "force"
)
// IsKnown reports whether trigger belongs to the frozen trigger
// vocabulary.
func (trigger Trigger) IsKnown() bool {
switch trigger {
case TriggerScheduler, TriggerForce:
return true
default:
return false
}
}
// Input stores the per-call arguments for one turn-generation
// operation.
type Input struct {
// GameID identifies the runtime to drive.
GameID string
// Trigger classifies the caller. Used for telemetry and logs only.
Trigger Trigger
// OpSource classifies how the request entered Game Master. Used to
// stamp `operation_log.op_source`. Defaults to `admin_rest` when
// missing or unrecognised.
OpSource operation.OpSource
// SourceRef stores the optional opaque per-source reference (REST
// request id, scheduler tick id). Empty when the caller does not
// provide one.
SourceRef string
}
// Validate reports whether input carries the structural invariants the
// service requires before any store is touched.
func (input Input) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("game id must not be empty")
}
if !input.Trigger.IsKnown() {
return fmt.Errorf("trigger %q is unsupported", input.Trigger)
}
if !input.OpSource.IsKnown() {
return fmt.Errorf("op source %q is unsupported", input.OpSource)
}
return nil
}
// Result stores the deterministic outcome of one Handle call.
type Result struct {
// Record carries the post-mutation runtime record. Populated on
// every success outcome and on `engine_*` failures (where the row
// was moved to `generation_failed`); zero on early-rejection
// outcomes (`invalid_request`, `runtime_not_found`,
// `runtime_not_running`, `conflict` on initial CAS,
// `service_unavailable` on initial Get).
Record runtime.RuntimeRecord
// Trigger echoes back Input.Trigger for log/telemetry consumers.
Trigger Trigger
// Finished is true when the engine reported `finished=true` on this
// turn and the runtime transitioned to `finished`.
Finished bool
// Outcome reports whether the operation completed (success) or
// produced a stable failure code.
Outcome operation.Outcome
// ErrorCode stores the stable error code on failure. Empty on
// success.
ErrorCode string
// ErrorMessage stores the operator-readable detail on failure.
// Empty on success.
ErrorMessage string
}
// IsSuccess reports whether the result represents a successful
// operation.
func (result Result) IsSuccess() bool {
return result.Outcome == operation.OutcomeSuccess
}
// Dependencies groups the collaborators required by Service.
type Dependencies struct {
// RuntimeRecords drives every CAS and scheduling persistence step.
RuntimeRecords ports.RuntimeRecordStore
// PlayerMappings supplies the per-game roster used to project
// engine player state to user-facing notification recipients and
// `player_turn_stats`.
PlayerMappings ports.PlayerMappingStore
// OperationLogs records the audit entry for the operation.
OperationLogs ports.OperationLogStore
// Engine drives the engine /admin/turn call.
Engine ports.EngineClient
// LobbyEvents publishes `runtime_snapshot_update` and
// `game_finished` to `gm:lobby_events`.
LobbyEvents ports.LobbyEventsPublisher
// Notifications publishes `game.turn.ready`, `game.finished`, and
// `game.generation_failed` intents to `notification:intents`.
Notifications ports.NotificationIntentPublisher
// Lobby resolves the human-readable `game_name` consumed by
// notification payloads. Failure is fail-soft: the orchestrator
// falls back to `game_id`.
Lobby ports.LobbyClient
// Scheduler computes the post-success `next_generation_at` value.
Scheduler *scheduler.Service
// Telemetry records the turn-generation outcome counter, lobby
// publication counter, and notification publish-attempt counter.
Telemetry *telemetry.Runtime
// Logger records structured service-level events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for operation timestamps.
// Defaults to `time.Now` when nil.
Clock func() time.Time
}
// Service executes the turn-generation lifecycle operation.
type Service struct {
runtimeRecords ports.RuntimeRecordStore
playerMappings ports.PlayerMappingStore
operationLogs ports.OperationLogStore
engine ports.EngineClient
lobbyEvents ports.LobbyEventsPublisher
notifications ports.NotificationIntentPublisher
lobby ports.LobbyClient
scheduler *scheduler.Service
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
}
// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
switch {
case deps.RuntimeRecords == nil:
return nil, errors.New("new turn generation service: nil runtime records")
case deps.PlayerMappings == nil:
return nil, errors.New("new turn generation service: nil player mappings")
case deps.OperationLogs == nil:
return nil, errors.New("new turn generation service: nil operation logs")
case deps.Engine == nil:
return nil, errors.New("new turn generation service: nil engine client")
case deps.LobbyEvents == nil:
return nil, errors.New("new turn generation service: nil lobby events publisher")
case deps.Notifications == nil:
return nil, errors.New("new turn generation service: nil notification publisher")
case deps.Lobby == nil:
return nil, errors.New("new turn generation service: nil lobby client")
case deps.Scheduler == nil:
return nil, errors.New("new turn generation service: nil scheduler")
case deps.Telemetry == nil:
return nil, errors.New("new turn generation service: nil telemetry runtime")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("service", "gamemaster.turngeneration")
return &Service{
runtimeRecords: deps.RuntimeRecords,
playerMappings: deps.PlayerMappings,
operationLogs: deps.OperationLogs,
engine: deps.Engine,
lobbyEvents: deps.LobbyEvents,
notifications: deps.Notifications,
lobby: deps.Lobby,
scheduler: deps.Scheduler,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
}, nil
}
// Handle executes one turn-generation operation end-to-end. The
// Go-level error return is reserved for non-business failures (nil
// context, nil receiver). Every business outcome flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
if service == nil {
return Result{}, errors.New("turn generation: nil service")
}
if ctx == nil {
return Result{}, errors.New("turn generation: nil context")
}
opStartedAt := service.clock().UTC()
if err := input.Validate(); err != nil {
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeInvalidRequest, err.Error()), nil
}
record, outcome, ok := service.loadRecord(ctx, opStartedAt, input)
if !ok {
return outcome, nil
}
if record.Status != runtime.StatusRunning {
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotRunning,
fmt.Sprintf("runtime status is %q, expected %q",
record.Status, runtime.StatusRunning)), nil
}
if outcome, ok := service.casToInProgress(ctx, opStartedAt, input); !ok {
return outcome, nil
}
state, engineOK, engineCode, engineMsg := service.callEngineTurn(ctx, record)
mappings, listErr := service.playerMappings.ListByGame(ctx, input.GameID)
if listErr != nil {
// Without mappings we cannot project player_turn_stats; treat
// as a service_unavailable failure but still try to roll the
// runtime to generation_failed because the engine call may
// have already mutated state.
return service.failGeneration(ctx, opStartedAt, input, record,
ErrorCodeServiceUnavailable,
fmt.Sprintf("list player mappings: %s", listErr.Error())), nil
}
if !engineOK {
return service.failGeneration(ctx, opStartedAt, input, record,
engineCode, engineMsg), nil
}
if outcome, ok := service.validateRoster(ctx, opStartedAt, input, record, state, mappings); !ok {
return outcome, nil
}
if state.Finished {
return service.completeFinished(ctx, opStartedAt, input, record, state, mappings), nil
}
return service.completeRunning(ctx, opStartedAt, input, record, state, mappings), nil
}
// loadRecord reads the runtime record and maps store errors to
// orchestrator outcomes. ok=false means the flow stops with the
// returned Result.
func (service *Service) loadRecord(ctx context.Context, opStartedAt time.Time, input Input) (runtime.RuntimeRecord, Result, bool) {
record, err := service.runtimeRecords.Get(ctx, input.GameID)
switch {
case err == nil:
return record, Result{}, true
case errors.Is(err, runtime.ErrNotFound):
return runtime.RuntimeRecord{}, service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotFound, "runtime record does not exist"), false
default:
return runtime.RuntimeRecord{}, service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable, fmt.Sprintf("get runtime record: %s", err.Error())), false
}
}
// casToInProgress flips the runtime row from `running` to
// `generation_in_progress`. ok=false means the flow stops with the
// returned Result; the caller has not touched the engine yet.
func (service *Service) casToInProgress(ctx context.Context, opStartedAt time.Time, input Input) (Result, bool) {
err := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: input.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusGenerationInProgress,
Now: opStartedAt,
})
switch {
case err == nil:
return Result{}, true
case errors.Is(err, runtime.ErrConflict):
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeConflict,
fmt.Sprintf("cas runtime status to generation_in_progress: %s", err.Error())), false
case errors.Is(err, runtime.ErrNotFound):
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotFound,
fmt.Sprintf("cas runtime status to generation_in_progress: %s", err.Error())), false
default:
return service.recordEarlyFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("cas runtime status to generation_in_progress: %s", err.Error())), false
}
}
// callEngineTurn dispatches the engine /admin/turn call and classifies
// the outcome. engineOK=true means the response is well-formed at the
// transport level; engineOK=false populates errorCode / errorMessage
// with a stable failure shape.
func (service *Service) callEngineTurn(ctx context.Context, record runtime.RuntimeRecord) (state ports.StateResponse, engineOK bool, errorCode string, errorMessage string) {
state, err := service.engine.Turn(ctx, record.EngineEndpoint)
if err == nil {
return state, true, "", ""
}
return ports.StateResponse{}, false, classifyEngineError(err), fmt.Sprintf("engine turn: %s", err.Error())
}
// classifyEngineError maps the engine port sentinels to the
// turn-generation stable error codes.
func classifyEngineError(err error) string {
switch {
case errors.Is(err, ports.ErrEngineValidation):
return ErrorCodeEngineValidationError
case errors.Is(err, ports.ErrEngineProtocolViolation):
return ErrorCodeEngineProtocolViolation
case errors.Is(err, ports.ErrEngineUnreachable):
return ErrorCodeEngineUnreachable
default:
return ErrorCodeEngineUnreachable
}
}
// validateRoster checks that the engine response carries exactly the
// race set installed at register-runtime. ok=false means the flow stops
// (and the runtime row is moved to `generation_failed`).
func (service *Service) validateRoster(ctx context.Context, opStartedAt time.Time, input Input, record runtime.RuntimeRecord, state ports.StateResponse, mappings []playermapping.PlayerMapping) (Result, bool) {
if len(state.Players) != len(mappings) {
message := fmt.Sprintf("engine player count %d does not match roster size %d",
len(state.Players), len(mappings))
return service.failGeneration(ctx, opStartedAt, input, record,
ErrorCodeEngineProtocolViolation, message), false
}
expected := make(map[string]struct{}, len(mappings))
for _, mapping := range mappings {
expected[mapping.RaceName] = struct{}{}
}
for _, player := range state.Players {
if _, ok := expected[player.RaceName]; !ok {
message := fmt.Sprintf("engine returned race %q not present in roster", player.RaceName)
return service.failGeneration(ctx, opStartedAt, input, record,
ErrorCodeEngineProtocolViolation, message), false
}
}
return Result{}, true
}
// completeFinished handles the `finished=true` branch: CAS to finished,
// clear scheduling, publish game_finished, publish game.finished
// notification, audit success.
func (service *Service) completeFinished(ctx context.Context, opStartedAt time.Time, input Input, record runtime.RuntimeRecord, state ports.StateResponse, mappings []playermapping.PlayerMapping) Result {
finishedAt := service.clock().UTC()
err := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: input.GameID,
ExpectedFrom: runtime.StatusGenerationInProgress,
To: runtime.StatusFinished,
Now: finishedAt,
})
if err != nil {
return service.handlePostEngineCASFailure(ctx, opStartedAt, input, record, err)
}
if err := service.runtimeRecords.UpdateScheduling(ctx, ports.UpdateSchedulingInput{
GameID: input.GameID,
NextGenerationAt: nil,
SkipNextTick: false,
CurrentTurn: state.Turn,
Now: finishedAt,
}); err != nil {
// The CAS to finished succeeded; the row is in the terminal
// state. Surface a service_unavailable to the caller but keep
// the audit and snapshot consistent.
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("update scheduling on finish: %s", err.Error()))
}
persisted, reloadErr := service.runtimeRecords.Get(ctx, input.GameID)
if reloadErr != nil {
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("reload runtime record: %s", reloadErr.Error()))
}
stats := projectPlayerStats(state, mappings)
finishedMsg := ports.GameFinished{
GameID: input.GameID,
FinalTurnNumber: state.Turn,
RuntimeStatus: runtime.StatusFinished,
PlayerTurnStats: stats,
FinishedAt: finishedAt,
}
if err := service.lobbyEvents.PublishGameFinished(ctx, finishedMsg); err != nil {
service.logger.ErrorContext(ctx, "publish game finished",
"game_id", input.GameID,
"err", err.Error(),
)
} else {
service.telemetry.RecordLobbyEventPublished(ctx, "game_finished")
}
gameName := service.resolveGameName(ctx, input.GameID)
recipients := recipientUserIDs(mappings)
service.publishGameFinishedIntent(ctx, input, gameName, state.Turn, recipients, finishedAt)
service.appendSuccessLog(ctx, opStartedAt, input)
service.telemetry.RecordTurnGenerationOutcome(ctx,
string(operation.OutcomeSuccess), "", string(input.Trigger))
logArgs := []any{
"game_id", input.GameID,
"trigger", string(input.Trigger),
"final_turn", state.Turn,
"finished", true,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "turn generation finished game", logArgs...)
return Result{
Record: persisted,
Trigger: input.Trigger,
Finished: true,
Outcome: operation.OutcomeSuccess,
}
}
// completeRunning handles the `finished=false` branch: recompute next
// tick, CAS back to running, publish snapshot, publish
// game.turn.ready notification, audit success.
func (service *Service) completeRunning(ctx context.Context, opStartedAt time.Time, input Input, record runtime.RuntimeRecord, state ports.StateResponse, mappings []playermapping.PlayerMapping) Result {
completedAt := service.clock().UTC()
next, _, err := service.scheduler.ComputeNext(record.TurnSchedule, completedAt, record.SkipNextTick)
if err != nil {
return service.failGeneration(ctx, opStartedAt, input, record,
ErrorCodeInvalidRequest,
fmt.Sprintf("recompute next tick: %s", err.Error()))
}
if err := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: input.GameID,
ExpectedFrom: runtime.StatusGenerationInProgress,
To: runtime.StatusRunning,
Now: completedAt,
}); err != nil {
return service.handlePostEngineCASFailure(ctx, opStartedAt, input, record, err)
}
if err := service.runtimeRecords.UpdateScheduling(ctx, ports.UpdateSchedulingInput{
GameID: input.GameID,
NextGenerationAt: &next,
SkipNextTick: false,
CurrentTurn: state.Turn,
Now: completedAt,
}); err != nil {
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("update scheduling on running: %s", err.Error()))
}
persisted, reloadErr := service.runtimeRecords.Get(ctx, input.GameID)
if reloadErr != nil {
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("reload runtime record: %s", reloadErr.Error()))
}
stats := projectPlayerStats(state, mappings)
snapshot := ports.RuntimeSnapshotUpdate{
GameID: input.GameID,
CurrentTurn: state.Turn,
RuntimeStatus: runtime.StatusRunning,
EngineHealthSummary: persisted.EngineHealth,
PlayerTurnStats: stats,
OccurredAt: completedAt,
}
if err := service.lobbyEvents.PublishSnapshotUpdate(ctx, snapshot); err != nil {
service.logger.ErrorContext(ctx, "publish runtime snapshot update",
"game_id", input.GameID,
"err", err.Error(),
)
} else {
service.telemetry.RecordLobbyEventPublished(ctx, "runtime_snapshot_update")
}
gameName := service.resolveGameName(ctx, input.GameID)
recipients := recipientUserIDs(mappings)
service.publishGameTurnReadyIntent(ctx, input, gameName, state.Turn, recipients, completedAt)
service.appendSuccessLog(ctx, opStartedAt, input)
service.telemetry.RecordTurnGenerationOutcome(ctx,
string(operation.OutcomeSuccess), "", string(input.Trigger))
logArgs := []any{
"game_id", input.GameID,
"trigger", string(input.Trigger),
"current_turn", state.Turn,
"next_generation_at", next.Format(time.RFC3339Nano),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.InfoContext(ctx, "turn generation succeeded", logArgs...)
return Result{
Record: persisted,
Trigger: input.Trigger,
Outcome: operation.OutcomeSuccess,
}
}
// failGeneration handles every post-CAS failure path: CAS to
// generation_failed, publish snapshot, publish game.generation_failed
// admin notification, audit failure.
func (service *Service) failGeneration(ctx context.Context, opStartedAt time.Time, input Input, _ runtime.RuntimeRecord, errorCode string, errorMessage string) Result {
failedAt := service.clock().UTC()
casErr := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: input.GameID,
ExpectedFrom: runtime.StatusGenerationInProgress,
To: runtime.StatusGenerationFailed,
Now: failedAt,
})
if casErr != nil && !errors.Is(casErr, runtime.ErrConflict) {
// Best-effort transition. The original error code remains the
// caller-visible one; log the secondary failure.
service.logger.ErrorContext(ctx, "cas runtime status to generation_failed",
"game_id", input.GameID,
"err", casErr.Error(),
)
}
persisted, reloadErr := service.runtimeRecords.Get(ctx, input.GameID)
publishedStatus := runtime.StatusGenerationFailed
if reloadErr == nil {
publishedStatus = persisted.Status
}
snapshot := ports.RuntimeSnapshotUpdate{
GameID: input.GameID,
CurrentTurn: persistedTurn(persisted, reloadErr),
RuntimeStatus: publishedStatus,
EngineHealthSummary: persistedHealth(persisted, reloadErr),
PlayerTurnStats: nil,
OccurredAt: failedAt,
}
if err := service.lobbyEvents.PublishSnapshotUpdate(ctx, snapshot); err != nil {
service.logger.ErrorContext(ctx, "publish runtime snapshot update on failure",
"game_id", input.GameID,
"err", err.Error(),
)
} else {
service.telemetry.RecordLobbyEventPublished(ctx, "runtime_snapshot_update")
}
gameName := service.resolveGameName(ctx, input.GameID)
service.publishGameGenerationFailedIntent(ctx, input, gameName, errorCode, errorMessage, failedAt)
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
service.telemetry.RecordTurnGenerationOutcome(ctx,
string(operation.OutcomeFailure), errorCode, string(input.Trigger))
logArgs := []any{
"game_id", input.GameID,
"trigger", string(input.Trigger),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "turn generation failed", logArgs...)
return Result{
Record: persisted,
Trigger: input.Trigger,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// handlePostEngineCASFailure maps a CAS error that surfaced after the
// engine call already succeeded. Conflict means an external actor (e.g.
// admin stop) won the race; other errors are treated as
// service_unavailable. No publication is issued — the external mutation
// owns its own snapshot.
func (service *Service) handlePostEngineCASFailure(ctx context.Context, opStartedAt time.Time, input Input, _ runtime.RuntimeRecord, casErr error) Result {
switch {
case errors.Is(casErr, runtime.ErrConflict):
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeConflict,
fmt.Sprintf("cas runtime status post-engine: %s", casErr.Error()))
case errors.Is(casErr, runtime.ErrNotFound):
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeRuntimeNotFound,
fmt.Sprintf("cas runtime status post-engine: %s", casErr.Error()))
default:
return service.recordTerminalFailure(ctx, opStartedAt, input,
ErrorCodeServiceUnavailable,
fmt.Sprintf("cas runtime status post-engine: %s", casErr.Error()))
}
}
// recordEarlyFailure handles failures that occur before the runtime row
// is in `generation_in_progress`. No status mutation, no publication;
// only audit and telemetry.
func (service *Service) recordEarlyFailure(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) Result {
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
service.telemetry.RecordTurnGenerationOutcome(ctx,
string(operation.OutcomeFailure), errorCode, string(input.Trigger))
logArgs := []any{
"game_id", input.GameID,
"trigger", string(input.Trigger),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "turn generation rejected", logArgs...)
return Result{
Trigger: input.Trigger,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// recordTerminalFailure handles failures after a post-engine CAS or a
// reload failed. The runtime row is in an undetermined state owned by
// whatever mutation won; we record the audit and surface the failure
// without further publication.
func (service *Service) recordTerminalFailure(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) Result {
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
service.telemetry.RecordTurnGenerationOutcome(ctx,
string(operation.OutcomeFailure), errorCode, string(input.Trigger))
logArgs := []any{
"game_id", input.GameID,
"trigger", string(input.Trigger),
"error_code", errorCode,
"error_message", errorMessage,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "turn generation post-engine failure", logArgs...)
return Result{
Trigger: input.Trigger,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
}
}
// resolveGameName fetches the human-readable game name from Lobby and
// falls back to the platform game id on any error per Stage 15 D1.
func (service *Service) resolveGameName(ctx context.Context, gameID string) string {
summary, err := service.lobby.GetGameSummary(ctx, gameID)
if err != nil {
logArgs := []any{
"game_id", gameID,
"error_code", "lobby_unavailable",
"err", err.Error(),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
service.logger.WarnContext(ctx, "resolve game name fell back to game id", logArgs...)
return gameID
}
if strings.TrimSpace(summary.GameName) == "" {
return gameID
}
return summary.GameName
}
// publishGameTurnReadyIntent publishes the user-targeted notification
// that announces a freshly generated turn. Empty recipient sets are
// dropped silently — the validator inside notificationintent rejects
// them outright, but the orchestrator should not break commit.
func (service *Service) publishGameTurnReadyIntent(ctx context.Context, input Input, gameName string, turnNumber int, recipients []string, occurredAt time.Time) {
if len(recipients) == 0 {
service.logger.WarnContext(ctx, "skip game.turn.ready notification: empty recipient set",
"game_id", input.GameID,
)
return
}
intent, err := notificationintent.NewGameTurnReadyIntent(
notificationintent.Metadata{
IdempotencyKey: fmt.Sprintf("game.turn.ready:%s:%d", input.GameID, turnNumber),
OccurredAt: occurredAt,
RequestID: logging.RequestIDFromContext(ctx),
},
recipients,
notificationintent.GameTurnReadyPayload{
GameID: input.GameID,
GameName: gameName,
TurnNumber: int64(turnNumber),
},
)
if err != nil {
service.logger.ErrorContext(ctx, "build game.turn.ready intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameTurnReady), "error")
return
}
if err := service.notifications.Publish(ctx, intent); err != nil {
service.logger.ErrorContext(ctx, "publish game.turn.ready intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameTurnReady), "error")
return
}
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameTurnReady), "ok")
}
// publishGameFinishedIntent publishes the user-targeted notification
// that announces a finished game.
func (service *Service) publishGameFinishedIntent(ctx context.Context, input Input, gameName string, finalTurnNumber int, recipients []string, occurredAt time.Time) {
if len(recipients) == 0 {
service.logger.WarnContext(ctx, "skip game.finished notification: empty recipient set",
"game_id", input.GameID,
)
return
}
intent, err := notificationintent.NewGameFinishedIntent(
notificationintent.Metadata{
IdempotencyKey: fmt.Sprintf("game.finished:%s:%d", input.GameID, finalTurnNumber),
OccurredAt: occurredAt,
RequestID: logging.RequestIDFromContext(ctx),
},
recipients,
notificationintent.GameFinishedPayload{
GameID: input.GameID,
GameName: gameName,
FinalTurnNumber: int64(finalTurnNumber),
},
)
if err != nil {
service.logger.ErrorContext(ctx, "build game.finished intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameFinished), "error")
return
}
if err := service.notifications.Publish(ctx, intent); err != nil {
service.logger.ErrorContext(ctx, "publish game.finished intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameFinished), "error")
return
}
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameFinished), "ok")
}
// publishGameGenerationFailedIntent publishes the admin-email
// notification that announces a failed turn generation.
func (service *Service) publishGameGenerationFailedIntent(ctx context.Context, input Input, gameName string, errorCode string, errorMessage string, occurredAt time.Time) {
failureReason := errorCode
if strings.TrimSpace(errorMessage) != "" {
failureReason = fmt.Sprintf("%s: %s", errorCode, errorMessage)
}
intent, err := notificationintent.NewGameGenerationFailedIntent(
notificationintent.Metadata{
IdempotencyKey: fmt.Sprintf("game.generation_failed:%s:%d",
input.GameID, occurredAt.UnixMilli()),
OccurredAt: occurredAt,
RequestID: logging.RequestIDFromContext(ctx),
},
notificationintent.GameGenerationFailedPayload{
GameID: input.GameID,
GameName: gameName,
FailureReason: failureReason,
},
)
if err != nil {
service.logger.ErrorContext(ctx, "build game.generation_failed intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameGenerationFailed), "error")
return
}
if err := service.notifications.Publish(ctx, intent); err != nil {
service.logger.ErrorContext(ctx, "publish game.generation_failed intent",
"game_id", input.GameID,
"err", err.Error(),
)
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameGenerationFailed), "error")
return
}
service.telemetry.RecordNotificationPublishAttempt(ctx,
string(notificationintent.NotificationTypeGameGenerationFailed), "ok")
}
// projectPlayerStats joins the engine response on RaceName against the
// installed roster to build one PlayerTurnStats per active member.
// Result is sorted by UserID for a deterministic wire order.
func projectPlayerStats(state ports.StateResponse, mappings []playermapping.PlayerMapping) []ports.PlayerTurnStats {
if len(state.Players) == 0 || len(mappings) == 0 {
return nil
}
userByRace := make(map[string]string, len(mappings))
for _, mapping := range mappings {
userByRace[mapping.RaceName] = mapping.UserID
}
stats := make([]ports.PlayerTurnStats, 0, len(state.Players))
for _, player := range state.Players {
userID, ok := userByRace[player.RaceName]
if !ok {
continue
}
stats = append(stats, ports.PlayerTurnStats{
UserID: userID,
Planets: player.Planets,
Population: player.Population,
})
}
sort.Slice(stats, func(i, j int) bool { return stats[i].UserID < stats[j].UserID })
return stats
}
// recipientUserIDs returns the deduplicated, sorted-ascending list of
// platform user ids derived from the roster. Mirrors the
// notificationintent validator's expectations.
func recipientUserIDs(mappings []playermapping.PlayerMapping) []string {
if len(mappings) == 0 {
return nil
}
seen := make(map[string]struct{}, len(mappings))
result := make([]string, 0, len(mappings))
for _, mapping := range mappings {
userID := strings.TrimSpace(mapping.UserID)
if userID == "" {
continue
}
if _, ok := seen[userID]; ok {
continue
}
seen[userID] = struct{}{}
result = append(result, userID)
}
sort.Strings(result)
return result
}
// persistedTurn returns the stored CurrentTurn when reloadErr is nil,
// or zero otherwise. Used to populate the failure-side snapshot
// without making a second DB read.
func persistedTurn(record runtime.RuntimeRecord, reloadErr error) int {
if reloadErr != nil {
return 0
}
return record.CurrentTurn
}
// persistedHealth returns the stored EngineHealth when reloadErr is
// nil, or empty string otherwise.
func persistedHealth(record runtime.RuntimeRecord, reloadErr error) string {
if reloadErr != nil {
return ""
}
return record.EngineHealth
}
// appendSuccessLog records the success operation_log entry.
func (service *Service) appendSuccessLog(ctx context.Context, opStartedAt time.Time, input Input) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindTurnGeneration,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeSuccess,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// appendFailureLog records the failure operation_log entry.
func (service *Service) appendFailureLog(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) {
finishedAt := service.clock().UTC()
service.bestEffortAppend(ctx, operation.OperationEntry{
GameID: input.GameID,
OpKind: operation.OpKindTurnGeneration,
OpSource: fallbackOpSource(input.OpSource),
SourceRef: input.SourceRef,
Outcome: operation.OutcomeFailure,
ErrorCode: errorCode,
ErrorMessage: errorMessage,
StartedAt: opStartedAt,
FinishedAt: &finishedAt,
})
}
// bestEffortAppend writes one operation_log entry. A failure is logged
// and discarded; the runtime row is the source of truth.
func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
if _, err := service.operationLogs.Append(ctx, entry); err != nil {
service.logger.ErrorContext(ctx, "append operation log",
"game_id", entry.GameID,
"op_kind", string(entry.OpKind),
"outcome", string(entry.Outcome),
"error_code", entry.ErrorCode,
"err", err.Error(),
)
}
}
// fallbackOpSource defaults to admin_rest when source is missing or
// unrecognised. Mirrors `gamemaster/README.md §Trusted Surfaces`.
func fallbackOpSource(source operation.OpSource) operation.OpSource {
if source.IsKnown() {
return source
}
return operation.OpSourceAdminRest
}