727 lines
26 KiB
Go
727 lines
26 KiB
Go
// Package registerruntime implements the register-runtime service-layer
|
|
// orchestrator owned by Game Master. The service is the single entry
|
|
// point Game Lobby uses (after Runtime Manager has reported a successful
|
|
// container start) to install a freshly-started game in Game Master.
|
|
//
|
|
// Lifecycle and failure-mode semantics follow `gamemaster/README.md
|
|
// §Lifecycles → Register-runtime`. Design rationale is captured in
|
|
// `gamemaster/docs/stage13-register-runtime.md`.
|
|
package registerruntime
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"galaxy/gamemaster/internal/domain/engineversion"
|
|
"galaxy/gamemaster/internal/domain/operation"
|
|
"galaxy/gamemaster/internal/domain/playermapping"
|
|
"galaxy/gamemaster/internal/domain/runtime"
|
|
"galaxy/gamemaster/internal/domain/schedule"
|
|
"galaxy/gamemaster/internal/logging"
|
|
"galaxy/gamemaster/internal/ports"
|
|
"galaxy/gamemaster/internal/telemetry"
|
|
)
|
|
|
|
// Member stores one entry of Input.Members. The shape mirrors
|
|
// `RegisterRuntimeMember` in `gamemaster/api/internal-openapi.yaml`.
|
|
type Member struct {
|
|
// UserID identifies an active platform member of the game.
|
|
UserID string
|
|
|
|
// RaceName stores the race name reserved for the member by Game
|
|
// Lobby. Used both to build the engine /admin/init roster and to
|
|
// resolve the engine response back to user_id.
|
|
RaceName string
|
|
}
|
|
|
|
// Input stores the per-call arguments for one register-runtime
|
|
// operation. The shape mirrors `RegisterRuntimeRequest` plus the
|
|
// audit-only OpSource / SourceRef pair.
|
|
type Input struct {
|
|
// GameID identifies the platform game whose runtime is being
|
|
// registered.
|
|
GameID string
|
|
|
|
// EngineEndpoint stores the engine container URL Game Master uses
|
|
// for every subsequent call against the runtime
|
|
// (`http://galaxy-game-{game_id}:8080`).
|
|
EngineEndpoint string
|
|
|
|
// Members stores the per-active-member roster Game Lobby committed
|
|
// when the platform game opened. Must be non-empty.
|
|
Members []Member
|
|
|
|
// TargetEngineVersion stores the semver under which Runtime Manager
|
|
// started the container. Resolved against the engine_versions
|
|
// registry to recover the matching image_ref.
|
|
TargetEngineVersion string
|
|
|
|
// TurnSchedule stores the five-field cron expression governing turn
|
|
// generation, copied from the platform game record.
|
|
TurnSchedule string
|
|
|
|
// OpSource classifies how the request entered Game Master. Required:
|
|
// every operation_log entry carries an op_source.
|
|
OpSource operation.OpSource
|
|
|
|
// SourceRef stores the optional opaque per-source reference (request
|
|
// id, admin user id). Empty when the caller does not provide one.
|
|
SourceRef string
|
|
}
|
|
|
|
// Validate reports whether input carries the structural invariants the
|
|
// service requires before any store is touched.
|
|
func (input Input) Validate() error {
|
|
if strings.TrimSpace(input.GameID) == "" {
|
|
return fmt.Errorf("game id must not be empty")
|
|
}
|
|
if strings.TrimSpace(input.EngineEndpoint) == "" {
|
|
return fmt.Errorf("engine endpoint must not be empty")
|
|
}
|
|
if len(input.Members) == 0 {
|
|
return fmt.Errorf("members must not be empty")
|
|
}
|
|
for index, member := range input.Members {
|
|
if strings.TrimSpace(member.UserID) == "" {
|
|
return fmt.Errorf("members[%d]: user id must not be empty", index)
|
|
}
|
|
if strings.TrimSpace(member.RaceName) == "" {
|
|
return fmt.Errorf("members[%d]: race name must not be empty", index)
|
|
}
|
|
}
|
|
if strings.TrimSpace(input.TargetEngineVersion) == "" {
|
|
return fmt.Errorf("target engine version must not be empty")
|
|
}
|
|
if strings.TrimSpace(input.TurnSchedule) == "" {
|
|
return fmt.Errorf("turn schedule must not be empty")
|
|
}
|
|
if !input.OpSource.IsKnown() {
|
|
return fmt.Errorf("op source %q is unsupported", input.OpSource)
|
|
}
|
|
if duplicate := firstDuplicateMember(input.Members); duplicate != "" {
|
|
return fmt.Errorf("members carry duplicate entries for %q", duplicate)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// firstDuplicateMember returns the first user_id or race_name that
|
|
// appears more than once in members. Empty when every entry is unique.
|
|
func firstDuplicateMember(members []Member) string {
|
|
seenUsers := make(map[string]struct{}, len(members))
|
|
seenRaces := make(map[string]struct{}, len(members))
|
|
for _, member := range members {
|
|
if _, ok := seenUsers[member.UserID]; ok {
|
|
return member.UserID
|
|
}
|
|
seenUsers[member.UserID] = struct{}{}
|
|
if _, ok := seenRaces[member.RaceName]; ok {
|
|
return member.RaceName
|
|
}
|
|
seenRaces[member.RaceName] = struct{}{}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// Result stores the deterministic outcome of one Handle call. Business
|
|
// outcomes flow through Result; the Go-level error return is reserved
|
|
// for non-business failures (nil context, nil receiver).
|
|
type Result struct {
|
|
// Record carries the runtime record installed by the operation.
|
|
// Populated on success; zero on failure.
|
|
Record runtime.RuntimeRecord
|
|
|
|
// Outcome reports whether the operation completed (success) or
|
|
// produced a stable failure code.
|
|
Outcome operation.Outcome
|
|
|
|
// ErrorCode stores the stable error code on failure. Empty on
|
|
// success.
|
|
ErrorCode string
|
|
|
|
// ErrorMessage stores the operator-readable detail on failure.
|
|
// Empty on success.
|
|
ErrorMessage string
|
|
}
|
|
|
|
// IsSuccess reports whether the result represents a successful
|
|
// operation.
|
|
func (result Result) IsSuccess() bool {
|
|
return result.Outcome == operation.OutcomeSuccess
|
|
}
|
|
|
|
// Dependencies groups the collaborators required by Service.
|
|
type Dependencies struct {
|
|
// RuntimeRecords stores the runtime_records row installed by the
|
|
// flow.
|
|
RuntimeRecords ports.RuntimeRecordStore
|
|
|
|
// EngineVersions resolves `target_engine_version` to the matching
|
|
// image_ref and validates the version exists.
|
|
EngineVersions ports.EngineVersionStore
|
|
|
|
// PlayerMappings persists the (game_id, user_id) → race_name
|
|
// projection derived from the engine /admin/init response.
|
|
PlayerMappings ports.PlayerMappingStore
|
|
|
|
// OperationLogs records the audit entry for the operation.
|
|
OperationLogs ports.OperationLogStore
|
|
|
|
// Engine drives the engine /admin/init call and decodes the
|
|
// response.
|
|
Engine ports.EngineClient
|
|
|
|
// LobbyEvents publishes the post-success runtime_snapshot_update
|
|
// to `gm:lobby_events`.
|
|
LobbyEvents ports.LobbyEventsPublisher
|
|
|
|
// Telemetry records register-runtime outcomes plus the snapshot
|
|
// publication counter. Required.
|
|
Telemetry *telemetry.Runtime
|
|
|
|
// Logger records structured service-level events. Defaults to
|
|
// `slog.Default()` when nil.
|
|
Logger *slog.Logger
|
|
|
|
// Clock supplies the wall-clock used for operation timestamps.
|
|
// Defaults to `time.Now` when nil.
|
|
Clock func() time.Time
|
|
}
|
|
|
|
// Service executes the register-runtime lifecycle operation.
|
|
type Service struct {
|
|
runtimeRecords ports.RuntimeRecordStore
|
|
engineVersions ports.EngineVersionStore
|
|
playerMappings ports.PlayerMappingStore
|
|
operationLogs ports.OperationLogStore
|
|
engine ports.EngineClient
|
|
lobbyEvents ports.LobbyEventsPublisher
|
|
|
|
telemetry *telemetry.Runtime
|
|
logger *slog.Logger
|
|
clock func() time.Time
|
|
}
|
|
|
|
// NewService constructs one Service from deps.
|
|
func NewService(deps Dependencies) (*Service, error) {
|
|
switch {
|
|
case deps.RuntimeRecords == nil:
|
|
return nil, errors.New("new register runtime service: nil runtime records")
|
|
case deps.EngineVersions == nil:
|
|
return nil, errors.New("new register runtime service: nil engine versions")
|
|
case deps.PlayerMappings == nil:
|
|
return nil, errors.New("new register runtime service: nil player mappings")
|
|
case deps.OperationLogs == nil:
|
|
return nil, errors.New("new register runtime service: nil operation logs")
|
|
case deps.Engine == nil:
|
|
return nil, errors.New("new register runtime service: nil engine client")
|
|
case deps.LobbyEvents == nil:
|
|
return nil, errors.New("new register runtime service: nil lobby events publisher")
|
|
case deps.Telemetry == nil:
|
|
return nil, errors.New("new register runtime service: nil telemetry runtime")
|
|
}
|
|
|
|
clock := deps.Clock
|
|
if clock == nil {
|
|
clock = time.Now
|
|
}
|
|
logger := deps.Logger
|
|
if logger == nil {
|
|
logger = slog.Default()
|
|
}
|
|
logger = logger.With("service", "gamemaster.registerruntime")
|
|
|
|
return &Service{
|
|
runtimeRecords: deps.RuntimeRecords,
|
|
engineVersions: deps.EngineVersions,
|
|
playerMappings: deps.PlayerMappings,
|
|
operationLogs: deps.OperationLogs,
|
|
engine: deps.Engine,
|
|
lobbyEvents: deps.LobbyEvents,
|
|
telemetry: deps.Telemetry,
|
|
logger: logger,
|
|
clock: clock,
|
|
}, nil
|
|
}
|
|
|
|
// Handle executes one register-runtime operation end-to-end. The
|
|
// Go-level error return is reserved for non-business failures (nil
|
|
// context, nil receiver). Every business outcome flows through Result.
|
|
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
|
|
if service == nil {
|
|
return Result{}, errors.New("register runtime: nil service")
|
|
}
|
|
if ctx == nil {
|
|
return Result{}, errors.New("register runtime: nil context")
|
|
}
|
|
|
|
opStartedAt := service.clock().UTC()
|
|
|
|
if err := input.Validate(); err != nil {
|
|
return service.recordFailure(ctx, opStartedAt, input, false, false,
|
|
ErrorCodeInvalidRequest, err.Error()), nil
|
|
}
|
|
|
|
if outcome, ok := service.rejectExisting(ctx, opStartedAt, input); ok {
|
|
return outcome, nil
|
|
}
|
|
|
|
imageRef, outcome, ok := service.resolveImageRef(ctx, opStartedAt, input)
|
|
if !ok {
|
|
return outcome, nil
|
|
}
|
|
|
|
record := service.buildStartingRecord(input, imageRef, opStartedAt)
|
|
if err := service.runtimeRecords.Insert(ctx, record); err != nil {
|
|
switch {
|
|
case errors.Is(err, runtime.ErrConflict):
|
|
return service.recordFailure(ctx, opStartedAt, input, false, false,
|
|
ErrorCodeConflict, "runtime record already exists"), nil
|
|
default:
|
|
return service.recordFailure(ctx, opStartedAt, input, false, false,
|
|
ErrorCodeServiceUnavailable, fmt.Sprintf("insert runtime record: %s", err.Error())), nil
|
|
}
|
|
}
|
|
|
|
engineState, outcome, ok := service.callEngineInit(ctx, opStartedAt, input)
|
|
if !ok {
|
|
return outcome, nil
|
|
}
|
|
|
|
if outcome, ok := service.validateRoster(ctx, opStartedAt, input, engineState); !ok {
|
|
return outcome, nil
|
|
}
|
|
|
|
if outcome, ok := service.installPlayerMappings(ctx, opStartedAt, input, engineState); !ok {
|
|
return outcome, nil
|
|
}
|
|
|
|
nextGenerationAt, outcome, ok := service.computeNextGeneration(ctx, opStartedAt, input)
|
|
if !ok {
|
|
return outcome, nil
|
|
}
|
|
|
|
if outcome, ok := service.casToRunning(ctx, opStartedAt, input); !ok {
|
|
return outcome, nil
|
|
}
|
|
|
|
if outcome, ok := service.persistInitialScheduling(ctx, opStartedAt, input, nextGenerationAt); !ok {
|
|
return outcome, nil
|
|
}
|
|
|
|
persisted, outcome, ok := service.reloadRecord(ctx, opStartedAt, input)
|
|
if !ok {
|
|
return outcome, nil
|
|
}
|
|
|
|
stats := projectInitToStats(engineState, input.Members)
|
|
|
|
service.appendSuccessLog(ctx, opStartedAt, input)
|
|
service.publishSnapshot(ctx, persisted, stats, opStartedAt)
|
|
service.telemetry.RecordRegisterRuntimeOutcome(ctx, string(operation.OutcomeSuccess), "")
|
|
|
|
logArgs := []any{
|
|
"game_id", input.GameID,
|
|
"engine_version", input.TargetEngineVersion,
|
|
"members", len(input.Members),
|
|
"op_source", string(input.OpSource),
|
|
}
|
|
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
|
|
service.logger.InfoContext(ctx, "runtime registered", logArgs...)
|
|
|
|
return Result{
|
|
Record: persisted,
|
|
Outcome: operation.OutcomeSuccess,
|
|
}, nil
|
|
}
|
|
|
|
// rejectExisting returns a Result and ok=true when the runtime record
|
|
// already exists or the lookup itself failed; ok=false continues the
|
|
// flow.
|
|
func (service *Service) rejectExisting(ctx context.Context, opStartedAt time.Time, input Input) (Result, bool) {
|
|
_, err := service.runtimeRecords.Get(ctx, input.GameID)
|
|
switch {
|
|
case errors.Is(err, runtime.ErrNotFound):
|
|
return Result{}, false
|
|
case err != nil:
|
|
return service.recordFailure(ctx, opStartedAt, input, false, false,
|
|
ErrorCodeServiceUnavailable, fmt.Sprintf("get runtime record: %s", err.Error())), true
|
|
default:
|
|
return service.recordFailure(ctx, opStartedAt, input, false, false,
|
|
ErrorCodeConflict, "runtime record already exists"), true
|
|
}
|
|
}
|
|
|
|
// resolveImageRef resolves the target engine version against the
|
|
// engine_versions registry. Returns ok=false on failure with the
|
|
// matching Result.
|
|
func (service *Service) resolveImageRef(ctx context.Context, opStartedAt time.Time, input Input) (string, Result, bool) {
|
|
version, err := service.engineVersions.Get(ctx, input.TargetEngineVersion)
|
|
switch {
|
|
case errors.Is(err, engineversion.ErrNotFound):
|
|
return "", service.recordFailure(ctx, opStartedAt, input, false, false,
|
|
ErrorCodeEngineVersionNotFound,
|
|
fmt.Sprintf("engine version %q not found", input.TargetEngineVersion)), false
|
|
case err != nil:
|
|
return "", service.recordFailure(ctx, opStartedAt, input, false, false,
|
|
ErrorCodeServiceUnavailable, fmt.Sprintf("get engine version: %s", err.Error())), false
|
|
}
|
|
return version.ImageRef, Result{}, true
|
|
}
|
|
|
|
// buildStartingRecord assembles the initial runtime_records row,
|
|
// matching `gamemaster/README.md §Lifecycles → Register-runtime` step 4.
|
|
func (service *Service) buildStartingRecord(input Input, imageRef string, now time.Time) runtime.RuntimeRecord {
|
|
return runtime.RuntimeRecord{
|
|
GameID: input.GameID,
|
|
Status: runtime.StatusStarting,
|
|
EngineEndpoint: input.EngineEndpoint,
|
|
CurrentImageRef: imageRef,
|
|
CurrentEngineVersion: input.TargetEngineVersion,
|
|
TurnSchedule: input.TurnSchedule,
|
|
CurrentTurn: 0,
|
|
NextGenerationAt: nil,
|
|
SkipNextTick: false,
|
|
EngineHealth: "",
|
|
CreatedAt: now,
|
|
UpdatedAt: now,
|
|
}
|
|
}
|
|
|
|
// callEngineInit dispatches the engine /admin/init call and maps the
|
|
// transport-layer error to a stable Result code. ok=false means the
|
|
// flow stops.
|
|
func (service *Service) callEngineInit(ctx context.Context, opStartedAt time.Time, input Input) (ports.StateResponse, Result, bool) {
|
|
races := make([]ports.InitRace, 0, len(input.Members))
|
|
for _, member := range input.Members {
|
|
races = append(races, ports.InitRace{RaceName: member.RaceName})
|
|
}
|
|
state, err := service.engine.Init(ctx, input.EngineEndpoint, ports.InitRequest{Races: races})
|
|
if err == nil {
|
|
return state, Result{}, true
|
|
}
|
|
|
|
code := classifyEngineError(err)
|
|
message := fmt.Sprintf("engine init: %s", err.Error())
|
|
return ports.StateResponse{}, service.recordFailure(ctx, opStartedAt, input, true, false, code, message), false
|
|
}
|
|
|
|
// classifyEngineError maps the engine port sentinels to the
|
|
// register-runtime stable error codes per Stage 13 D1.
|
|
func classifyEngineError(err error) string {
|
|
switch {
|
|
case errors.Is(err, ports.ErrEngineValidation):
|
|
return ErrorCodeEngineValidationError
|
|
case errors.Is(err, ports.ErrEngineProtocolViolation):
|
|
return ErrorCodeEngineProtocolViolation
|
|
case errors.Is(err, ports.ErrEngineUnreachable):
|
|
return ErrorCodeEngineUnreachable
|
|
default:
|
|
return ErrorCodeEngineUnreachable
|
|
}
|
|
}
|
|
|
|
// validateRoster checks that the engine response carries exactly the
|
|
// race set Game Master sent on /admin/init. ok=false means the flow
|
|
// stops.
|
|
func (service *Service) validateRoster(ctx context.Context, opStartedAt time.Time, input Input, state ports.StateResponse) (Result, bool) {
|
|
if len(state.Players) != len(input.Members) {
|
|
message := fmt.Sprintf("engine player count %d does not match roster size %d", len(state.Players), len(input.Members))
|
|
return service.recordFailure(ctx, opStartedAt, input, true, false,
|
|
ErrorCodeEngineProtocolViolation, message), false
|
|
}
|
|
expected := make(map[string]struct{}, len(input.Members))
|
|
for _, member := range input.Members {
|
|
expected[member.RaceName] = struct{}{}
|
|
}
|
|
for _, player := range state.Players {
|
|
if _, ok := expected[player.RaceName]; !ok {
|
|
message := fmt.Sprintf("engine returned race %q not present in roster", player.RaceName)
|
|
return service.recordFailure(ctx, opStartedAt, input, true, false,
|
|
ErrorCodeEngineProtocolViolation, message), false
|
|
}
|
|
}
|
|
return Result{}, true
|
|
}
|
|
|
|
// installPlayerMappings projects the engine response onto
|
|
// player_mappings rows and persists them in one batch. ok=false means
|
|
// the flow stops (and rolls back both stores).
|
|
func (service *Service) installPlayerMappings(ctx context.Context, opStartedAt time.Time, input Input, state ports.StateResponse) (Result, bool) {
|
|
userByRace := make(map[string]string, len(input.Members))
|
|
for _, member := range input.Members {
|
|
userByRace[member.RaceName] = member.UserID
|
|
}
|
|
|
|
mappings := make([]playermapping.PlayerMapping, 0, len(state.Players))
|
|
for _, player := range state.Players {
|
|
userID, ok := userByRace[player.RaceName]
|
|
if !ok {
|
|
message := fmt.Sprintf("engine returned race %q not present in roster", player.RaceName)
|
|
return service.recordFailure(ctx, opStartedAt, input, true, false,
|
|
ErrorCodeEngineProtocolViolation, message), false
|
|
}
|
|
mappings = append(mappings, playermapping.PlayerMapping{
|
|
GameID: input.GameID,
|
|
UserID: userID,
|
|
RaceName: player.RaceName,
|
|
EnginePlayerUUID: player.EnginePlayerUUID,
|
|
CreatedAt: opStartedAt,
|
|
})
|
|
}
|
|
|
|
if err := service.playerMappings.BulkInsert(ctx, mappings); err != nil {
|
|
// BulkInsert is per-statement atomic (stage 11 D7), so a failure
|
|
// leaves no mappings to clean up — only the runtime row.
|
|
switch {
|
|
case errors.Is(err, playermapping.ErrConflict):
|
|
return service.recordFailure(ctx, opStartedAt, input, true, false,
|
|
ErrorCodeConflict, fmt.Sprintf("bulk insert player mappings: %s", err.Error())), false
|
|
default:
|
|
return service.recordFailure(ctx, opStartedAt, input, true, false,
|
|
ErrorCodeServiceUnavailable, fmt.Sprintf("bulk insert player mappings: %s", err.Error())), false
|
|
}
|
|
}
|
|
return Result{}, true
|
|
}
|
|
|
|
// computeNextGeneration parses the cron schedule and computes the first
|
|
// next-generation timestamp (no skip pending). ok=false means the flow
|
|
// stops with rollback.
|
|
func (service *Service) computeNextGeneration(ctx context.Context, opStartedAt time.Time, input Input) (time.Time, Result, bool) {
|
|
sched, err := schedule.Parse(input.TurnSchedule)
|
|
if err != nil {
|
|
return time.Time{}, service.recordFailure(ctx, opStartedAt, input, true, true,
|
|
ErrorCodeInvalidRequest, fmt.Sprintf("parse turn schedule: %s", err.Error())), false
|
|
}
|
|
next, _ := sched.Next(opStartedAt, false)
|
|
return next.UTC(), Result{}, true
|
|
}
|
|
|
|
// casToRunning flips the runtime record from `starting` to `running`.
|
|
// On CAS failure or any storage error the flow rolls back both stores.
|
|
func (service *Service) casToRunning(ctx context.Context, opStartedAt time.Time, input Input) (Result, bool) {
|
|
err := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
|
|
GameID: input.GameID,
|
|
ExpectedFrom: runtime.StatusStarting,
|
|
To: runtime.StatusRunning,
|
|
Now: opStartedAt,
|
|
})
|
|
switch {
|
|
case err == nil:
|
|
return Result{}, true
|
|
case errors.Is(err, runtime.ErrConflict):
|
|
return service.recordFailure(ctx, opStartedAt, input, true, true,
|
|
ErrorCodeConflict, fmt.Sprintf("cas runtime status to running: %s", err.Error())), false
|
|
default:
|
|
return service.recordFailure(ctx, opStartedAt, input, true, true,
|
|
ErrorCodeServiceUnavailable, fmt.Sprintf("cas runtime status to running: %s", err.Error())), false
|
|
}
|
|
}
|
|
|
|
// persistInitialScheduling writes the first `next_generation_at` and
|
|
// the (already false) skip flag plus turn=0 on the runtime row.
|
|
// Failure rolls back both stores.
|
|
func (service *Service) persistInitialScheduling(ctx context.Context, opStartedAt time.Time, input Input, next time.Time) (Result, bool) {
|
|
err := service.runtimeRecords.UpdateScheduling(ctx, ports.UpdateSchedulingInput{
|
|
GameID: input.GameID,
|
|
NextGenerationAt: &next,
|
|
SkipNextTick: false,
|
|
CurrentTurn: 0,
|
|
Now: opStartedAt,
|
|
})
|
|
if err != nil {
|
|
return service.recordFailure(ctx, opStartedAt, input, true, true,
|
|
ErrorCodeServiceUnavailable, fmt.Sprintf("update initial scheduling: %s", err.Error())), false
|
|
}
|
|
return Result{}, true
|
|
}
|
|
|
|
// reloadRecord re-reads the runtime row so the returned Result.Record
|
|
// carries the post-CAS, post-scheduling timestamps the adapters set.
|
|
// On read failure the flow rolls back both stores.
|
|
func (service *Service) reloadRecord(ctx context.Context, opStartedAt time.Time, input Input) (runtime.RuntimeRecord, Result, bool) {
|
|
persisted, err := service.runtimeRecords.Get(ctx, input.GameID)
|
|
if err != nil {
|
|
return runtime.RuntimeRecord{}, service.recordFailure(ctx, opStartedAt, input, true, true,
|
|
ErrorCodeServiceUnavailable, fmt.Sprintf("reload runtime record: %s", err.Error())), false
|
|
}
|
|
return persisted, Result{}, true
|
|
}
|
|
|
|
// projectInitToStats joins the engine /admin/init response on RaceName
|
|
// against the input roster to produce one PlayerTurnStats per active
|
|
// member. The caller has already validated that every player race name
|
|
// is present in the roster, so the lookup is total.
|
|
func projectInitToStats(state ports.StateResponse, members []Member) []ports.PlayerTurnStats {
|
|
if len(state.Players) == 0 {
|
|
return nil
|
|
}
|
|
userByRace := make(map[string]string, len(members))
|
|
for _, member := range members {
|
|
userByRace[member.RaceName] = member.UserID
|
|
}
|
|
stats := make([]ports.PlayerTurnStats, 0, len(state.Players))
|
|
for _, player := range state.Players {
|
|
userID, ok := userByRace[player.RaceName]
|
|
if !ok {
|
|
continue
|
|
}
|
|
stats = append(stats, ports.PlayerTurnStats{
|
|
UserID: userID,
|
|
Planets: player.Planets,
|
|
Population: player.Population,
|
|
})
|
|
}
|
|
sort.Slice(stats, func(i, j int) bool { return stats[i].UserID < stats[j].UserID })
|
|
return stats
|
|
}
|
|
|
|
// recordFailure assembles the failure Result, rolls back any installed
|
|
// state, appends the operation_log failure entry, and emits telemetry.
|
|
// runtimeInserted reports whether the runtime row was already
|
|
// installed; playerMappingsInstalled reports whether the player_mappings
|
|
// rows were installed too. The two booleans gate the rollback so a
|
|
// race-induced ErrConflict from Insert does not delete a row owned by
|
|
// another caller.
|
|
func (service *Service) recordFailure(
|
|
ctx context.Context,
|
|
opStartedAt time.Time,
|
|
input Input,
|
|
runtimeInserted bool,
|
|
playerMappingsInstalled bool,
|
|
errorCode string,
|
|
errorMessage string,
|
|
) Result {
|
|
if runtimeInserted {
|
|
service.rollback(ctx, input.GameID, playerMappingsInstalled)
|
|
}
|
|
|
|
finishedAt := service.clock().UTC()
|
|
service.bestEffortAppend(ctx, operation.OperationEntry{
|
|
GameID: input.GameID,
|
|
OpKind: operation.OpKindRegisterRuntime,
|
|
OpSource: fallbackOpSource(input.OpSource),
|
|
SourceRef: input.SourceRef,
|
|
Outcome: operation.OutcomeFailure,
|
|
ErrorCode: errorCode,
|
|
ErrorMessage: errorMessage,
|
|
StartedAt: opStartedAt,
|
|
FinishedAt: &finishedAt,
|
|
})
|
|
|
|
service.telemetry.RecordRegisterRuntimeOutcome(ctx, string(operation.OutcomeFailure), errorCode)
|
|
|
|
logArgs := []any{
|
|
"game_id", input.GameID,
|
|
"engine_version", input.TargetEngineVersion,
|
|
"op_source", string(input.OpSource),
|
|
"error_code", errorCode,
|
|
"error_message", errorMessage,
|
|
}
|
|
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
|
|
service.logger.WarnContext(ctx, "register runtime failed", logArgs...)
|
|
|
|
return Result{
|
|
Outcome: operation.OutcomeFailure,
|
|
ErrorCode: errorCode,
|
|
ErrorMessage: errorMessage,
|
|
}
|
|
}
|
|
|
|
// rollback removes any installed state. Both store calls are
|
|
// idempotent; failures are logged but never overwrite the original
|
|
// failure reason. A fresh background context is used so a cancelled
|
|
// request context does not strand the row.
|
|
func (service *Service) rollback(ctx context.Context, gameID string, playerMappingsInstalled bool) {
|
|
cleanupCtx, cancel := context.WithTimeout(context.Background(), rollbackTimeout)
|
|
defer cancel()
|
|
if playerMappingsInstalled {
|
|
if err := service.playerMappings.DeleteByGame(cleanupCtx, gameID); err != nil {
|
|
service.logger.ErrorContext(ctx, "rollback player mappings",
|
|
"game_id", gameID,
|
|
"err", err.Error(),
|
|
)
|
|
}
|
|
}
|
|
if err := service.runtimeRecords.Delete(cleanupCtx, gameID); err != nil {
|
|
service.logger.ErrorContext(ctx, "rollback runtime record",
|
|
"game_id", gameID,
|
|
"err", err.Error(),
|
|
)
|
|
}
|
|
}
|
|
|
|
// rollbackTimeout bounds each rollback storage call. A fresh background
|
|
// context is used so a canceled request context does not block the
|
|
// cleanup; the timeout matches the shape used by
|
|
// `rtmanager/internal/service/startruntime.Service.releaseLease`.
|
|
const rollbackTimeout = 5 * time.Second
|
|
|
|
// appendSuccessLog records the success operation_log entry for the
|
|
// completed register-runtime operation.
|
|
func (service *Service) appendSuccessLog(ctx context.Context, opStartedAt time.Time, input Input) {
|
|
finishedAt := service.clock().UTC()
|
|
service.bestEffortAppend(ctx, operation.OperationEntry{
|
|
GameID: input.GameID,
|
|
OpKind: operation.OpKindRegisterRuntime,
|
|
OpSource: fallbackOpSource(input.OpSource),
|
|
SourceRef: input.SourceRef,
|
|
Outcome: operation.OutcomeSuccess,
|
|
StartedAt: opStartedAt,
|
|
FinishedAt: &finishedAt,
|
|
})
|
|
}
|
|
|
|
// publishSnapshot publishes the post-success runtime_snapshot_update
|
|
// per `gamemaster/README.md §Lifecycles → Register-runtime` step 9.
|
|
// Failures are logged but do not roll back the just-installed runtime
|
|
// record; the snapshot stream is best-effort by contract.
|
|
func (service *Service) publishSnapshot(ctx context.Context, record runtime.RuntimeRecord, stats []ports.PlayerTurnStats, occurredAt time.Time) {
|
|
msg := ports.RuntimeSnapshotUpdate{
|
|
GameID: record.GameID,
|
|
CurrentTurn: record.CurrentTurn,
|
|
RuntimeStatus: record.Status,
|
|
EngineHealthSummary: record.EngineHealth,
|
|
PlayerTurnStats: stats,
|
|
OccurredAt: occurredAt,
|
|
}
|
|
if err := service.lobbyEvents.PublishSnapshotUpdate(ctx, msg); err != nil {
|
|
service.logger.ErrorContext(ctx, "publish runtime snapshot update",
|
|
"game_id", record.GameID,
|
|
"err", err.Error(),
|
|
)
|
|
return
|
|
}
|
|
service.telemetry.RecordLobbyEventPublished(ctx, "runtime_snapshot_update")
|
|
}
|
|
|
|
// bestEffortAppend writes one operation_log entry. A failure is logged
|
|
// and discarded; the runtime record (or its absence after rollback) is
|
|
// the source of truth.
|
|
func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
|
|
if _, err := service.operationLogs.Append(ctx, entry); err != nil {
|
|
service.logger.ErrorContext(ctx, "append operation log",
|
|
"game_id", entry.GameID,
|
|
"op_kind", string(entry.OpKind),
|
|
"outcome", string(entry.Outcome),
|
|
"error_code", entry.ErrorCode,
|
|
"err", err.Error(),
|
|
)
|
|
}
|
|
}
|
|
|
|
// fallbackOpSource defaults to `admin_rest` when the caller did not
|
|
// supply a known op source. Mirrors the README §Trusted Surfaces rule
|
|
// "when missing or unrecognised, GM defaults to `op_source=admin_rest`".
|
|
func fallbackOpSource(source operation.OpSource) operation.OpSource {
|
|
if source.IsKnown() {
|
|
return source
|
|
}
|
|
return operation.OpSourceAdminRest
|
|
}
|