// Package engineversion implements the engine version registry service // owned by Game Master. The service backs the // `/api/v1/internal/engine-versions/*` REST surface (Stage 19) and the // hot-path `image_ref` resolve called synchronously by Game Lobby's // start flow. // // Responsibilities and stable error codes are frozen by // `gamemaster/README.md §Engine Version Registry` and // `gamemaster/api/internal-openapi.yaml`. Design rationale for stage 14 // is captured in `gamemaster/docs/stage14-engine-version-registry.md`. package engineversion import ( "context" "encoding/json" "errors" "fmt" "log/slog" "strings" "time" "galaxy/gamemaster/internal/domain/engineversion" "galaxy/gamemaster/internal/domain/operation" "galaxy/gamemaster/internal/logging" "galaxy/gamemaster/internal/ports" "github.com/distribution/reference" ) // Sentinel errors returned by the service. Handlers translate these // into the stable `ErrorCode...` constants from `errors.go`. The // adapter-level sentinels (`engineversion.ErrNotFound`, // `engineversion.ErrConflict`, `engineversion.ErrInUse`, // `engineversion.ErrInvalidSemver`) are wrapped with one of the // service-level sentinels below before crossing the package boundary. var ( // ErrInvalidRequest reports that the input envelope failed // structural validation. ErrInvalidRequest = errors.New("invalid request") // ErrNotFound reports that the requested version does not exist // in the registry. ErrNotFound = errors.New("engine version not found") // ErrConflict reports that an Insert was rejected because a row // with the same version already exists. ErrConflict = errors.New("engine version already exists") // ErrInUse reports that a hard-delete attempt was rejected // because a non-finished runtime references the version. ErrInUse = errors.New("engine version in use") // ErrServiceUnavailable reports that a steady-state dependency // was unreachable for this call. ErrServiceUnavailable = errors.New("service unavailable") ) // CreateInput stores the per-call arguments for one Create operation. // Mirrors `CreateEngineVersionRequest` plus the audit-only OpSource / // SourceRef pair. type CreateInput struct { // Version stores the canonical semver (with or without the leading // "v"; ParseSemver normalises it). Version string // ImageRef stores the Docker reference of the engine image. // Validated against `github.com/distribution/reference` before // the row is persisted. ImageRef string // Options stores the engine-side options document as raw JSON. // Empty means "use the schema default `{}`". When non-empty the // service validates the bytes parse as a JSON object. Options []byte // OpSource classifies how the request entered Game Master. // Defaults to `admin_rest` when missing or unknown. OpSource operation.OpSource // SourceRef stores the optional opaque per-source reference. SourceRef string } // UpdateInput stores the per-call arguments for one Update operation. // Pointer fields communicate "leave alone" (nil) vs. "write the value" // (non-nil); at least one must be set. type UpdateInput struct { // Version identifies the row to mutate. Version string // ImageRef is the new image reference. Nil leaves the column // unchanged; non-nil must be a valid Docker reference. ImageRef *string // Options is the new options document. Nil leaves the column // unchanged; non-nil must be a JSON object (possibly the empty // object). Options *[]byte // Status is the new registry status. Nil leaves the column // unchanged; non-nil must be a known status value. Status *engineversion.Status // OpSource classifies how the request entered Game Master. OpSource operation.OpSource // SourceRef stores the optional opaque per-source reference. SourceRef string } // DeprecateInput stores the per-call arguments for one Deprecate // operation. type DeprecateInput struct { // Version identifies the row to deprecate. Version string // OpSource classifies how the request entered Game Master. OpSource operation.OpSource // SourceRef stores the optional opaque per-source reference. SourceRef string } // DeleteInput stores the per-call arguments for one hard Delete // operation. type DeleteInput struct { // Version identifies the row to delete. Version string // OpSource classifies how the request entered Game Master. OpSource operation.OpSource // SourceRef stores the optional opaque per-source reference. SourceRef string } // Dependencies groups the collaborators required by Service. type Dependencies struct { // EngineVersions persists the registry rows. Required. EngineVersions ports.EngineVersionStore // OperationLogs records the audit entry for every mutation // (Create, Update, Deprecate, Delete). Required. OperationLogs ports.OperationLogStore // Logger records structured service-level events. Defaults to // slog.Default when nil. Logger *slog.Logger // Clock supplies the wall-clock used for created_at / updated_at // and audit timestamps. Defaults to time.Now when nil. Clock func() time.Time } // Service implements the engine version registry operations. type Service struct { versions ports.EngineVersionStore operationLogs ports.OperationLogStore logger *slog.Logger clock func() time.Time } // NewService constructs one Service from deps. func NewService(deps Dependencies) (*Service, error) { switch { case deps.EngineVersions == nil: return nil, errors.New("new engine version service: nil engine version store") case deps.OperationLogs == nil: return nil, errors.New("new engine version service: nil operation log store") } clock := deps.Clock if clock == nil { clock = time.Now } logger := deps.Logger if logger == nil { logger = slog.Default() } logger = logger.With("service", "gamemaster.engineversion") return &Service{ versions: deps.EngineVersions, operationLogs: deps.OperationLogs, logger: logger, clock: clock, }, nil } // List returns every registry row, optionally filtered by status. A // non-nil statusFilter must reference a known engineversion.Status. func (service *Service) List(ctx context.Context, statusFilter *engineversion.Status) ([]engineversion.EngineVersion, error) { if service == nil { return nil, errors.New("engine version list: nil service") } if ctx == nil { return nil, errors.New("engine version list: nil context") } if statusFilter != nil && !statusFilter.IsKnown() { return nil, fmt.Errorf("%w: status %q is unsupported", ErrInvalidRequest, *statusFilter) } versions, err := service.versions.List(ctx, statusFilter) if err != nil { return nil, fmt.Errorf("%w: list engine versions: %s", ErrServiceUnavailable, err.Error()) } return versions, nil } // Get returns the registry row identified by version. Returns // ErrNotFound when no row matches. func (service *Service) Get(ctx context.Context, version string) (engineversion.EngineVersion, error) { if service == nil { return engineversion.EngineVersion{}, errors.New("engine version get: nil service") } if ctx == nil { return engineversion.EngineVersion{}, errors.New("engine version get: nil context") } if strings.TrimSpace(version) == "" { return engineversion.EngineVersion{}, fmt.Errorf("%w: version must not be empty", ErrInvalidRequest) } got, err := service.versions.Get(ctx, version) switch { case errors.Is(err, engineversion.ErrNotFound): return engineversion.EngineVersion{}, fmt.Errorf("%w: %q", ErrNotFound, version) case err != nil: return engineversion.EngineVersion{}, fmt.Errorf("%w: get engine version: %s", ErrServiceUnavailable, err.Error()) } return got, nil } // ResolveImageRef returns the image_ref of the requested version. This // is the hot path used by Game Lobby's start flow synchronously per // register-runtime envelope. func (service *Service) ResolveImageRef(ctx context.Context, version string) (string, error) { got, err := service.Get(ctx, version) if err != nil { return "", err } return got.ImageRef, nil } // Create installs a fresh registry row. Validates the semver shape and // Docker reference before touching the store. On success appends a // success entry to operation_log; on classified failure (validation, // conflict, store error) appends a failure entry. func (service *Service) Create(ctx context.Context, input CreateInput) (engineversion.EngineVersion, error) { if service == nil { return engineversion.EngineVersion{}, errors.New("engine version create: nil service") } if ctx == nil { return engineversion.EngineVersion{}, errors.New("engine version create: nil context") } startedAt := service.clock().UTC() canonicalVersion, err := engineversion.ParseSemver(input.Version) if err != nil { return engineversion.EngineVersion{}, service.recordCreateFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeInvalidRequest, fmt.Sprintf("parse semver: %s", err.Error()), fmt.Errorf("%w: %s", ErrInvalidRequest, err.Error()), ) } if err := validateImageRef(input.ImageRef); err != nil { return engineversion.EngineVersion{}, service.recordCreateFailure( ctx, startedAt, canonicalVersion, input.OpSource, input.SourceRef, ErrorCodeInvalidRequest, fmt.Sprintf("validate image_ref: %s", err.Error()), fmt.Errorf("%w: %s", ErrInvalidRequest, err.Error()), ) } options, err := normalizeOptions(input.Options) if err != nil { return engineversion.EngineVersion{}, service.recordCreateFailure( ctx, startedAt, canonicalVersion, input.OpSource, input.SourceRef, ErrorCodeInvalidRequest, fmt.Sprintf("validate options: %s", err.Error()), fmt.Errorf("%w: %s", ErrInvalidRequest, err.Error()), ) } record := engineversion.EngineVersion{ Version: canonicalVersion, ImageRef: strings.TrimSpace(input.ImageRef), Options: options, Status: engineversion.StatusActive, CreatedAt: startedAt, UpdatedAt: startedAt, } if err := service.versions.Insert(ctx, record); err != nil { switch { case errors.Is(err, engineversion.ErrConflict): return engineversion.EngineVersion{}, service.recordCreateFailure( ctx, startedAt, canonicalVersion, input.OpSource, input.SourceRef, ErrorCodeConflict, "engine version already exists", fmt.Errorf("%w: %s", ErrConflict, canonicalVersion), ) default: return engineversion.EngineVersion{}, service.recordCreateFailure( ctx, startedAt, canonicalVersion, input.OpSource, input.SourceRef, ErrorCodeServiceUnavailable, fmt.Sprintf("insert engine version: %s", err.Error()), fmt.Errorf("%w: insert engine version: %s", ErrServiceUnavailable, err.Error()), ) } } service.appendSuccess(ctx, operation.OpKindEngineVersionCreate, canonicalVersion, input.OpSource, input.SourceRef, startedAt) logArgs := []any{ "version", canonicalVersion, "image_ref", record.ImageRef, "op_source", string(fallbackOpSource(input.OpSource)), } logArgs = append(logArgs, logging.ContextAttrs(ctx)...) service.logger.InfoContext(ctx, "engine version created", logArgs...) return record, nil } // Update applies a partial update to one registry row. At least one of // ImageRef, Options, Status must be non-nil. func (service *Service) Update(ctx context.Context, input UpdateInput) (engineversion.EngineVersion, error) { if service == nil { return engineversion.EngineVersion{}, errors.New("engine version update: nil service") } if ctx == nil { return engineversion.EngineVersion{}, errors.New("engine version update: nil context") } startedAt := service.clock().UTC() if strings.TrimSpace(input.Version) == "" { return engineversion.EngineVersion{}, service.recordUpdateFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeInvalidRequest, "version must not be empty", fmt.Errorf("%w: version must not be empty", ErrInvalidRequest), ) } if input.ImageRef == nil && input.Options == nil && input.Status == nil { return engineversion.EngineVersion{}, service.recordUpdateFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeInvalidRequest, "at least one field must be set", fmt.Errorf("%w: at least one field must be set", ErrInvalidRequest), ) } if input.ImageRef != nil { if err := validateImageRef(*input.ImageRef); err != nil { return engineversion.EngineVersion{}, service.recordUpdateFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeInvalidRequest, fmt.Sprintf("validate image_ref: %s", err.Error()), fmt.Errorf("%w: %s", ErrInvalidRequest, err.Error()), ) } } if input.Status != nil && !input.Status.IsKnown() { return engineversion.EngineVersion{}, service.recordUpdateFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeInvalidRequest, fmt.Sprintf("status %q is unsupported", *input.Status), fmt.Errorf("%w: status %q is unsupported", ErrInvalidRequest, *input.Status), ) } var normalizedOptions *[]byte if input.Options != nil { opts, err := normalizeOptions(*input.Options) if err != nil { return engineversion.EngineVersion{}, service.recordUpdateFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeInvalidRequest, fmt.Sprintf("validate options: %s", err.Error()), fmt.Errorf("%w: %s", ErrInvalidRequest, err.Error()), ) } normalizedOptions = &opts } storeInput := ports.UpdateEngineVersionInput{ Version: input.Version, Options: normalizedOptions, Status: input.Status, Now: startedAt, } if input.ImageRef != nil { trimmed := strings.TrimSpace(*input.ImageRef) storeInput.ImageRef = &trimmed } if err := service.versions.Update(ctx, storeInput); err != nil { switch { case errors.Is(err, engineversion.ErrNotFound): return engineversion.EngineVersion{}, service.recordUpdateFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeEngineVersionNotFound, fmt.Sprintf("engine version %q not found", input.Version), fmt.Errorf("%w: %q", ErrNotFound, input.Version), ) default: return engineversion.EngineVersion{}, service.recordUpdateFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeServiceUnavailable, fmt.Sprintf("update engine version: %s", err.Error()), fmt.Errorf("%w: update engine version: %s", ErrServiceUnavailable, err.Error()), ) } } persisted, err := service.versions.Get(ctx, input.Version) if err != nil { // The Update succeeded but the post-read failed. Surface the // store error; the audit entry still records the successful // mutation against operation_log. service.appendSuccess(ctx, operation.OpKindEngineVersionUpdate, input.Version, input.OpSource, input.SourceRef, startedAt) return engineversion.EngineVersion{}, fmt.Errorf("%w: reload engine version: %s", ErrServiceUnavailable, err.Error()) } service.appendSuccess(ctx, operation.OpKindEngineVersionUpdate, input.Version, input.OpSource, input.SourceRef, startedAt) logArgs := []any{ "version", input.Version, "op_source", string(fallbackOpSource(input.OpSource)), } logArgs = append(logArgs, logging.ContextAttrs(ctx)...) service.logger.InfoContext(ctx, "engine version updated", logArgs...) return persisted, nil } // Deprecate marks one registry row as deprecated. Idempotent: the call // succeeds even when the row is already deprecated. Returns ErrNotFound // when no row matches. func (service *Service) Deprecate(ctx context.Context, input DeprecateInput) error { if service == nil { return errors.New("engine version deprecate: nil service") } if ctx == nil { return errors.New("engine version deprecate: nil context") } startedAt := service.clock().UTC() if strings.TrimSpace(input.Version) == "" { return service.recordDeprecateFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeInvalidRequest, "version must not be empty", fmt.Errorf("%w: version must not be empty", ErrInvalidRequest), ) } if err := service.versions.Deprecate(ctx, input.Version, startedAt); err != nil { switch { case errors.Is(err, engineversion.ErrNotFound): return service.recordDeprecateFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeEngineVersionNotFound, fmt.Sprintf("engine version %q not found", input.Version), fmt.Errorf("%w: %q", ErrNotFound, input.Version), ) default: return service.recordDeprecateFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeServiceUnavailable, fmt.Sprintf("deprecate engine version: %s", err.Error()), fmt.Errorf("%w: deprecate engine version: %s", ErrServiceUnavailable, err.Error()), ) } } service.appendSuccess(ctx, operation.OpKindEngineVersionDeprecate, input.Version, input.OpSource, input.SourceRef, startedAt) logArgs := []any{ "version", input.Version, "op_source", string(fallbackOpSource(input.OpSource)), } logArgs = append(logArgs, logging.ContextAttrs(ctx)...) service.logger.InfoContext(ctx, "engine version deprecated", logArgs...) return nil } // Delete hard-deletes one registry row. Rejected with ErrInUse when any // non-finished runtime still references the version. The reference // probe runs first so the conflict is surfaced before the row is // removed. func (service *Service) Delete(ctx context.Context, input DeleteInput) error { if service == nil { return errors.New("engine version delete: nil service") } if ctx == nil { return errors.New("engine version delete: nil context") } startedAt := service.clock().UTC() if strings.TrimSpace(input.Version) == "" { return service.recordDeleteFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeInvalidRequest, "version must not be empty", fmt.Errorf("%w: version must not be empty", ErrInvalidRequest), ) } referenced, err := service.versions.IsReferencedByActiveRuntime(ctx, input.Version) if err != nil { return service.recordDeleteFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeServiceUnavailable, fmt.Sprintf("is referenced by active runtime: %s", err.Error()), fmt.Errorf("%w: is referenced by active runtime: %s", ErrServiceUnavailable, err.Error()), ) } if referenced { return service.recordDeleteFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeEngineVersionInUse, fmt.Sprintf("engine version %q is referenced by an active runtime", input.Version), fmt.Errorf("%w: %q", ErrInUse, input.Version), ) } if err := service.versions.Delete(ctx, input.Version); err != nil { switch { case errors.Is(err, engineversion.ErrNotFound): return service.recordDeleteFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeEngineVersionNotFound, fmt.Sprintf("engine version %q not found", input.Version), fmt.Errorf("%w: %q", ErrNotFound, input.Version), ) default: return service.recordDeleteFailure( ctx, startedAt, input.Version, input.OpSource, input.SourceRef, ErrorCodeServiceUnavailable, fmt.Sprintf("delete engine version: %s", err.Error()), fmt.Errorf("%w: delete engine version: %s", ErrServiceUnavailable, err.Error()), ) } } service.appendSuccess(ctx, operation.OpKindEngineVersionDelete, input.Version, input.OpSource, input.SourceRef, startedAt) logArgs := []any{ "version", input.Version, "op_source", string(fallbackOpSource(input.OpSource)), } logArgs = append(logArgs, logging.ContextAttrs(ctx)...) service.logger.InfoContext(ctx, "engine version deleted", logArgs...) return nil } // validateImageRef enforces the Docker reference shape required by // `engine_versions.image_ref`: non-empty trimmed, parseable through // `distribution/reference.ParseNormalizedNamed`. The check is the same // one Runtime Manager applies in startruntime so the registry never // stores a value the runtime cannot pull. func validateImageRef(imageRef string) error { trimmed := strings.TrimSpace(imageRef) if trimmed == "" { return fmt.Errorf("image_ref must not be empty") } if _, err := reference.ParseNormalizedNamed(trimmed); err != nil { return fmt.Errorf("parse image reference %q: %w", trimmed, err) } return nil } // normalizeOptions validates that raw is a JSON document encoding a // single object. Empty input is treated as `{}` and stored verbatim by // the adapter (see stage 11 D5). func normalizeOptions(raw []byte) ([]byte, error) { trimmed := bytesTrim(raw) if len(trimmed) == 0 { return nil, nil } var probe map[string]any if err := json.Unmarshal(trimmed, &probe); err != nil { return nil, fmt.Errorf("options must be a JSON object: %w", err) } return trimmed, nil } // bytesTrim returns raw with surrounding ASCII whitespace removed. The // helper avoids the round-trip through `string` for raw JSON inputs. func bytesTrim(raw []byte) []byte { start, end := 0, len(raw) for start < end && isASCIISpace(raw[start]) { start++ } for end > start && isASCIISpace(raw[end-1]) { end-- } return raw[start:end] } func isASCIISpace(b byte) bool { switch b { case ' ', '\t', '\n', '\r': return true default: return false } } // recordCreateFailure appends an audit failure entry for a Create call // and returns the original sentinel error wrapped with the failure // reason. The audit entry is written best-effort; storage failures are // logged and discarded. func (service *Service) recordCreateFailure( ctx context.Context, startedAt time.Time, subject string, source operation.OpSource, sourceRef string, errorCode string, errorMessage string, wrappedErr error, ) error { service.appendFailure(ctx, operation.OpKindEngineVersionCreate, subject, source, sourceRef, startedAt, errorCode, errorMessage) service.logFailure(ctx, "engine version create failed", subject, source, errorCode, errorMessage) return wrappedErr } func (service *Service) recordUpdateFailure( ctx context.Context, startedAt time.Time, subject string, source operation.OpSource, sourceRef string, errorCode string, errorMessage string, wrappedErr error, ) error { service.appendFailure(ctx, operation.OpKindEngineVersionUpdate, subject, source, sourceRef, startedAt, errorCode, errorMessage) service.logFailure(ctx, "engine version update failed", subject, source, errorCode, errorMessage) return wrappedErr } func (service *Service) recordDeprecateFailure( ctx context.Context, startedAt time.Time, subject string, source operation.OpSource, sourceRef string, errorCode string, errorMessage string, wrappedErr error, ) error { service.appendFailure(ctx, operation.OpKindEngineVersionDeprecate, subject, source, sourceRef, startedAt, errorCode, errorMessage) service.logFailure(ctx, "engine version deprecate failed", subject, source, errorCode, errorMessage) return wrappedErr } func (service *Service) recordDeleteFailure( ctx context.Context, startedAt time.Time, subject string, source operation.OpSource, sourceRef string, errorCode string, errorMessage string, wrappedErr error, ) error { service.appendFailure(ctx, operation.OpKindEngineVersionDelete, subject, source, sourceRef, startedAt, errorCode, errorMessage) service.logFailure(ctx, "engine version delete failed", subject, source, errorCode, errorMessage) return wrappedErr } // appendSuccess writes a success entry to operation_log. Subject is the // canonical version string; the entry's GameID column doubles as the // audit subject for engine-version operations (stage 14 decision — // the registry is global, not per-game). func (service *Service) appendSuccess( ctx context.Context, kind operation.OpKind, subject string, source operation.OpSource, sourceRef string, startedAt time.Time, ) { finishedAt := service.clock().UTC() service.bestEffortAppend(ctx, operation.OperationEntry{ GameID: subject, OpKind: kind, OpSource: fallbackOpSource(source), SourceRef: sourceRef, Outcome: operation.OutcomeSuccess, StartedAt: startedAt, FinishedAt: &finishedAt, }) } // appendFailure writes a failure entry to operation_log. Subject and // the GameID column overload follow the same rule as appendSuccess. func (service *Service) appendFailure( ctx context.Context, kind operation.OpKind, subject string, source operation.OpSource, sourceRef string, startedAt time.Time, errorCode string, errorMessage string, ) { finishedAt := service.clock().UTC() service.bestEffortAppend(ctx, operation.OperationEntry{ GameID: subject, OpKind: kind, OpSource: fallbackOpSource(source), SourceRef: sourceRef, Outcome: operation.OutcomeFailure, ErrorCode: errorCode, ErrorMessage: errorMessage, StartedAt: startedAt, FinishedAt: &finishedAt, }) } // bestEffortAppend writes one operation_log entry. A failure is logged // and discarded; the registry mutation (or its absence) remains the // source of truth. func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) { if _, err := service.operationLogs.Append(ctx, entry); err != nil { service.logger.ErrorContext(ctx, "append operation log", "subject", entry.GameID, "op_kind", string(entry.OpKind), "outcome", string(entry.Outcome), "error_code", entry.ErrorCode, "err", err.Error(), ) } } // logFailure emits one structured warn-level entry per service-level // failure, mirroring registerruntime's log shape. func (service *Service) logFailure( ctx context.Context, message string, subject string, source operation.OpSource, errorCode string, errorMessage string, ) { logArgs := []any{ "version", subject, "op_source", string(fallbackOpSource(source)), "error_code", errorCode, "error_message", errorMessage, } logArgs = append(logArgs, logging.ContextAttrs(ctx)...) service.logger.WarnContext(ctx, message, logArgs...) } // fallbackOpSource defaults to admin_rest when source is missing or // unrecognised. Mirrors `gamemaster/README.md §Trusted Surfaces`. func fallbackOpSource(source operation.OpSource) operation.OpSource { if source.IsKnown() { return source } return operation.OpSourceAdminRest }