feat: gamemaster
This commit is contained in:
@@ -0,0 +1,48 @@
|
||||
package adminstop
|
||||
|
||||
// Stable error codes returned in `Result.ErrorCode`. The values match
|
||||
// the vocabulary frozen by `gamemaster/README.md §Error Model` and
|
||||
// `gamemaster/api/internal-openapi.yaml`. Service-layer callers (Stage
|
||||
// 19 handlers) import these names rather than redeclare them; renaming
|
||||
// any of them is a contract change.
|
||||
const (
|
||||
// ErrorCodeInvalidRequest reports that the request envelope failed
|
||||
// structural validation (empty GameID, unknown stop reason).
|
||||
ErrorCodeInvalidRequest = "invalid_request"
|
||||
|
||||
// ErrorCodeRuntimeNotFound reports that no runtime_records row
|
||||
// exists for the requested game id.
|
||||
ErrorCodeRuntimeNotFound = "runtime_not_found"
|
||||
|
||||
// ErrorCodeConflict reports that the runtime is in a status that
|
||||
// cannot transition to `stopped` (currently only `starting`), or
|
||||
// that a CAS guard mid-flow lost the race to a concurrent mutation.
|
||||
ErrorCodeConflict = "conflict"
|
||||
|
||||
// ErrorCodeServiceUnavailable reports that a steady-state dependency
|
||||
// (PostgreSQL, Runtime Manager) was unreachable for this call.
|
||||
ErrorCodeServiceUnavailable = "service_unavailable"
|
||||
|
||||
// ErrorCodeInternal reports an unexpected error not classified by
|
||||
// the other codes.
|
||||
ErrorCodeInternal = "internal_error"
|
||||
)
|
||||
|
||||
// Allowed values of Input.Reason mirror the README §Stop wording
|
||||
// «reason ∈ {admin_request, finished, timeout}». Callers that pass an
|
||||
// empty string get the documented default `admin_request`.
|
||||
const (
|
||||
// ReasonAdminRequest is the operator-driven stop reason and the
|
||||
// default when Input.Reason is empty.
|
||||
ReasonAdminRequest = "admin_request"
|
||||
|
||||
// ReasonFinished is reserved for callers that wrap a
|
||||
// finish-detected stop (currently unused; documented for
|
||||
// completeness).
|
||||
ReasonFinished = "finished"
|
||||
|
||||
// ReasonTimeout is reserved for callers that wrap an automated
|
||||
// timeout-driven stop (currently unused; documented for
|
||||
// completeness).
|
||||
ReasonTimeout = "timeout"
|
||||
)
|
||||
@@ -0,0 +1,396 @@
|
||||
// Package adminstop implements the admin stop service-layer
|
||||
// orchestrator owned by Game Master. It is driven by Admin Service or
|
||||
// system administrators through
|
||||
// `POST /api/v1/internal/runtimes/{game_id}/stop` and tells Runtime
|
||||
// Manager to stop the game's container while transitioning the runtime
|
||||
// record to `stopped`.
|
||||
//
|
||||
// Lifecycle and failure-mode semantics follow `gamemaster/README.md
|
||||
// §Lifecycles → Stop`. The idempotent-on-terminal-status and
|
||||
// conflict-on-starting rules are recorded in
|
||||
// `gamemaster/docs/stage17-admin-operations.md`.
|
||||
package adminstop
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/gamemaster/internal/domain/operation"
|
||||
"galaxy/gamemaster/internal/domain/runtime"
|
||||
"galaxy/gamemaster/internal/logging"
|
||||
"galaxy/gamemaster/internal/ports"
|
||||
"galaxy/gamemaster/internal/telemetry"
|
||||
)
|
||||
|
||||
// Input stores the per-call arguments for one admin stop operation.
|
||||
type Input struct {
|
||||
// GameID identifies the runtime to stop.
|
||||
GameID string
|
||||
|
||||
// Reason classifies the stop. Empty defaults to
|
||||
// `admin_request`. Allowed values: `admin_request`, `finished`,
|
||||
// `timeout`.
|
||||
Reason string
|
||||
|
||||
// OpSource classifies how the request entered Game Master. Used to
|
||||
// stamp `operation_log.op_source`. Defaults to `admin_rest` when
|
||||
// missing or unrecognised.
|
||||
OpSource operation.OpSource
|
||||
|
||||
// SourceRef stores the optional opaque per-source reference (REST
|
||||
// request id, admin user id). Empty when the caller does not
|
||||
// provide one.
|
||||
SourceRef string
|
||||
}
|
||||
|
||||
// Validate reports whether input carries the structural invariants the
|
||||
// service requires before any store is touched.
|
||||
func (input Input) Validate() error {
|
||||
if strings.TrimSpace(input.GameID) == "" {
|
||||
return fmt.Errorf("game id must not be empty")
|
||||
}
|
||||
switch strings.TrimSpace(input.Reason) {
|
||||
case "", ReasonAdminRequest, ReasonFinished, ReasonTimeout:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("reason %q is unsupported", input.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
// Result stores the deterministic outcome of one Handle call. Business
|
||||
// outcomes flow through Result; the Go-level error return is reserved
|
||||
// for non-business failures (nil context, nil receiver).
|
||||
type Result struct {
|
||||
// Record carries the runtime record observed (and on success
|
||||
// transitioned) by the operation. Populated on success and on the
|
||||
// idempotent no-op branch; zero on early-rejection failures
|
||||
// (invalid_request, runtime_not_found).
|
||||
Record runtime.RuntimeRecord
|
||||
|
||||
// Outcome reports whether the operation completed (success) or
|
||||
// produced a stable failure code.
|
||||
Outcome operation.Outcome
|
||||
|
||||
// ErrorCode stores the stable error code on failure. Empty on
|
||||
// success.
|
||||
ErrorCode string
|
||||
|
||||
// ErrorMessage stores the operator-readable detail on failure.
|
||||
// Empty on success.
|
||||
ErrorMessage string
|
||||
}
|
||||
|
||||
// IsSuccess reports whether the result represents a successful
|
||||
// operation.
|
||||
func (result Result) IsSuccess() bool {
|
||||
return result.Outcome == operation.OutcomeSuccess
|
||||
}
|
||||
|
||||
// Dependencies groups the collaborators required by Service.
|
||||
type Dependencies struct {
|
||||
// RuntimeRecords drives the read of the current row plus the CAS
|
||||
// transition to `stopped`.
|
||||
RuntimeRecords ports.RuntimeRecordStore
|
||||
|
||||
// OperationLogs records the audit entry for the operation.
|
||||
OperationLogs ports.OperationLogStore
|
||||
|
||||
// RTM drives the Runtime Manager stop call.
|
||||
RTM ports.RTMClient
|
||||
|
||||
// LobbyEvents publishes the post-success
|
||||
// `runtime_snapshot_update` to `gm:lobby_events`.
|
||||
LobbyEvents ports.LobbyEventsPublisher
|
||||
|
||||
// Telemetry is required by the lobby-events publication helper.
|
||||
Telemetry *telemetry.Runtime
|
||||
|
||||
// Logger records structured service-level events. Defaults to
|
||||
// `slog.Default()` when nil.
|
||||
Logger *slog.Logger
|
||||
|
||||
// Clock supplies the wall-clock used for operation timestamps.
|
||||
// Defaults to `time.Now` when nil.
|
||||
Clock func() time.Time
|
||||
}
|
||||
|
||||
// Service executes the admin stop lifecycle operation.
|
||||
type Service struct {
|
||||
runtimeRecords ports.RuntimeRecordStore
|
||||
operationLogs ports.OperationLogStore
|
||||
rtm ports.RTMClient
|
||||
lobbyEvents ports.LobbyEventsPublisher
|
||||
|
||||
telemetry *telemetry.Runtime
|
||||
logger *slog.Logger
|
||||
clock func() time.Time
|
||||
}
|
||||
|
||||
// NewService constructs one Service from deps.
|
||||
func NewService(deps Dependencies) (*Service, error) {
|
||||
switch {
|
||||
case deps.RuntimeRecords == nil:
|
||||
return nil, errors.New("new admin stop service: nil runtime records")
|
||||
case deps.OperationLogs == nil:
|
||||
return nil, errors.New("new admin stop service: nil operation logs")
|
||||
case deps.RTM == nil:
|
||||
return nil, errors.New("new admin stop service: nil rtm client")
|
||||
case deps.LobbyEvents == nil:
|
||||
return nil, errors.New("new admin stop service: nil lobby events publisher")
|
||||
case deps.Telemetry == nil:
|
||||
return nil, errors.New("new admin stop service: nil telemetry runtime")
|
||||
}
|
||||
|
||||
clock := deps.Clock
|
||||
if clock == nil {
|
||||
clock = time.Now
|
||||
}
|
||||
logger := deps.Logger
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
logger = logger.With("service", "gamemaster.adminstop")
|
||||
|
||||
return &Service{
|
||||
runtimeRecords: deps.RuntimeRecords,
|
||||
operationLogs: deps.OperationLogs,
|
||||
rtm: deps.RTM,
|
||||
lobbyEvents: deps.LobbyEvents,
|
||||
telemetry: deps.Telemetry,
|
||||
logger: logger,
|
||||
clock: clock,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Handle executes one admin stop operation end-to-end. The Go-level
|
||||
// error return is reserved for non-business failures (nil context, nil
|
||||
// receiver). Every business outcome flows through Result.
|
||||
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
|
||||
if service == nil {
|
||||
return Result{}, errors.New("admin stop: nil service")
|
||||
}
|
||||
if ctx == nil {
|
||||
return Result{}, errors.New("admin stop: nil context")
|
||||
}
|
||||
|
||||
opStartedAt := service.clock().UTC()
|
||||
|
||||
if err := input.Validate(); err != nil {
|
||||
return service.recordEarlyFailure(ctx, opStartedAt, input,
|
||||
ErrorCodeInvalidRequest, err.Error()), nil
|
||||
}
|
||||
|
||||
reason := strings.TrimSpace(input.Reason)
|
||||
if reason == "" {
|
||||
reason = ReasonAdminRequest
|
||||
}
|
||||
|
||||
record, err := service.runtimeRecords.Get(ctx, input.GameID)
|
||||
switch {
|
||||
case errors.Is(err, runtime.ErrNotFound):
|
||||
return service.recordEarlyFailure(ctx, opStartedAt, input,
|
||||
ErrorCodeRuntimeNotFound, "runtime record does not exist"), nil
|
||||
case err != nil:
|
||||
return service.recordEarlyFailure(ctx, opStartedAt, input,
|
||||
ErrorCodeServiceUnavailable, fmt.Sprintf("get runtime record: %s", err.Error())), nil
|
||||
}
|
||||
|
||||
switch record.Status {
|
||||
case runtime.StatusStopped, runtime.StatusFinished:
|
||||
return service.completeIdempotent(ctx, opStartedAt, input, record), nil
|
||||
case runtime.StatusStarting:
|
||||
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, record,
|
||||
ErrorCodeConflict,
|
||||
fmt.Sprintf("runtime status is %q; stop requires a started runtime", record.Status)), nil
|
||||
}
|
||||
|
||||
if err := service.rtm.Stop(ctx, input.GameID, reason); err != nil {
|
||||
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, record,
|
||||
ErrorCodeServiceUnavailable, fmt.Sprintf("rtm stop: %s", err.Error())), nil
|
||||
}
|
||||
|
||||
stoppedAt := service.clock().UTC()
|
||||
casErr := service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
|
||||
GameID: input.GameID,
|
||||
ExpectedFrom: record.Status,
|
||||
To: runtime.StatusStopped,
|
||||
Now: stoppedAt,
|
||||
})
|
||||
switch {
|
||||
case casErr == nil:
|
||||
case errors.Is(casErr, runtime.ErrConflict):
|
||||
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, record,
|
||||
ErrorCodeConflict,
|
||||
fmt.Sprintf("cas runtime status to stopped: %s", casErr.Error())), nil
|
||||
case errors.Is(casErr, runtime.ErrNotFound):
|
||||
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, record,
|
||||
ErrorCodeRuntimeNotFound,
|
||||
fmt.Sprintf("cas runtime status to stopped: %s", casErr.Error())), nil
|
||||
default:
|
||||
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, record,
|
||||
ErrorCodeServiceUnavailable,
|
||||
fmt.Sprintf("cas runtime status to stopped: %s", casErr.Error())), nil
|
||||
}
|
||||
|
||||
persisted, reloadErr := service.runtimeRecords.Get(ctx, input.GameID)
|
||||
if reloadErr != nil {
|
||||
// CAS already committed; surface the success outcome but log the
|
||||
// degraded reload so operators know the response carries the
|
||||
// pre-CAS record.
|
||||
service.logger.WarnContext(ctx, "reload runtime record after stop",
|
||||
"game_id", input.GameID,
|
||||
"err", reloadErr.Error(),
|
||||
)
|
||||
persisted = record
|
||||
persisted.Status = runtime.StatusStopped
|
||||
persisted.UpdatedAt = stoppedAt
|
||||
persisted.StoppedAt = &stoppedAt
|
||||
}
|
||||
|
||||
service.publishSnapshot(ctx, persisted, stoppedAt)
|
||||
service.appendSuccessLog(ctx, opStartedAt, input)
|
||||
|
||||
logArgs := []any{
|
||||
"game_id", input.GameID,
|
||||
"reason", reason,
|
||||
"from_status", string(record.Status),
|
||||
"op_source", string(fallbackOpSource(input.OpSource)),
|
||||
}
|
||||
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
|
||||
service.logger.InfoContext(ctx, "runtime stopped", logArgs...)
|
||||
|
||||
return Result{
|
||||
Record: persisted,
|
||||
Outcome: operation.OutcomeSuccess,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// completeIdempotent records the no-op success path used when the
|
||||
// runtime is already terminal (stopped or finished). RTM is not
|
||||
// invoked, no snapshot is published, but the audit row is written so
|
||||
// operators can confirm the call landed.
|
||||
func (service *Service) completeIdempotent(ctx context.Context, opStartedAt time.Time, input Input, record runtime.RuntimeRecord) Result {
|
||||
service.appendSuccessLog(ctx, opStartedAt, input)
|
||||
|
||||
logArgs := []any{
|
||||
"game_id", input.GameID,
|
||||
"observed_status", string(record.Status),
|
||||
"op_source", string(fallbackOpSource(input.OpSource)),
|
||||
}
|
||||
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
|
||||
service.logger.InfoContext(ctx, "runtime stop already terminal", logArgs...)
|
||||
|
||||
return Result{
|
||||
Record: record,
|
||||
Outcome: operation.OutcomeSuccess,
|
||||
}
|
||||
}
|
||||
|
||||
// recordEarlyFailure records a failure that occurred before the runtime
|
||||
// row was read or in the validation phase.
|
||||
func (service *Service) recordEarlyFailure(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) Result {
|
||||
return service.recordEarlyFailureWithRecord(ctx, opStartedAt, input, runtime.RuntimeRecord{}, errorCode, errorMessage)
|
||||
}
|
||||
|
||||
// recordEarlyFailureWithRecord records a failure and propagates the
|
||||
// observed runtime record (when available) to the caller.
|
||||
func (service *Service) recordEarlyFailureWithRecord(ctx context.Context, opStartedAt time.Time, input Input, record runtime.RuntimeRecord, errorCode string, errorMessage string) Result {
|
||||
service.appendFailureLog(ctx, opStartedAt, input, errorCode, errorMessage)
|
||||
|
||||
logArgs := []any{
|
||||
"game_id", input.GameID,
|
||||
"op_source", string(input.OpSource),
|
||||
"error_code", errorCode,
|
||||
"error_message", errorMessage,
|
||||
}
|
||||
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
|
||||
service.logger.WarnContext(ctx, "admin stop rejected", logArgs...)
|
||||
|
||||
return Result{
|
||||
Record: record,
|
||||
Outcome: operation.OutcomeFailure,
|
||||
ErrorCode: errorCode,
|
||||
ErrorMessage: errorMessage,
|
||||
}
|
||||
}
|
||||
|
||||
// publishSnapshot publishes the post-success
|
||||
// `runtime_snapshot_update` per `gamemaster/README.md §Lifecycles →
|
||||
// Stop` step 4. Failure is logged but never rolls back the just-applied
|
||||
// CAS; the snapshot stream is best-effort by contract.
|
||||
func (service *Service) publishSnapshot(ctx context.Context, record runtime.RuntimeRecord, occurredAt time.Time) {
|
||||
msg := ports.RuntimeSnapshotUpdate{
|
||||
GameID: record.GameID,
|
||||
CurrentTurn: record.CurrentTurn,
|
||||
RuntimeStatus: record.Status,
|
||||
EngineHealthSummary: record.EngineHealth,
|
||||
PlayerTurnStats: nil,
|
||||
OccurredAt: occurredAt,
|
||||
}
|
||||
if err := service.lobbyEvents.PublishSnapshotUpdate(ctx, msg); err != nil {
|
||||
service.logger.ErrorContext(ctx, "publish runtime snapshot update",
|
||||
"game_id", record.GameID,
|
||||
"err", err.Error(),
|
||||
)
|
||||
return
|
||||
}
|
||||
service.telemetry.RecordLobbyEventPublished(ctx, "runtime_snapshot_update")
|
||||
}
|
||||
|
||||
// appendSuccessLog records the success operation_log entry.
|
||||
func (service *Service) appendSuccessLog(ctx context.Context, opStartedAt time.Time, input Input) {
|
||||
finishedAt := service.clock().UTC()
|
||||
service.bestEffortAppend(ctx, operation.OperationEntry{
|
||||
GameID: input.GameID,
|
||||
OpKind: operation.OpKindStop,
|
||||
OpSource: fallbackOpSource(input.OpSource),
|
||||
SourceRef: input.SourceRef,
|
||||
Outcome: operation.OutcomeSuccess,
|
||||
StartedAt: opStartedAt,
|
||||
FinishedAt: &finishedAt,
|
||||
})
|
||||
}
|
||||
|
||||
// appendFailureLog records the failure operation_log entry.
|
||||
func (service *Service) appendFailureLog(ctx context.Context, opStartedAt time.Time, input Input, errorCode string, errorMessage string) {
|
||||
finishedAt := service.clock().UTC()
|
||||
service.bestEffortAppend(ctx, operation.OperationEntry{
|
||||
GameID: input.GameID,
|
||||
OpKind: operation.OpKindStop,
|
||||
OpSource: fallbackOpSource(input.OpSource),
|
||||
SourceRef: input.SourceRef,
|
||||
Outcome: operation.OutcomeFailure,
|
||||
ErrorCode: errorCode,
|
||||
ErrorMessage: errorMessage,
|
||||
StartedAt: opStartedAt,
|
||||
FinishedAt: &finishedAt,
|
||||
})
|
||||
}
|
||||
|
||||
// bestEffortAppend writes one operation_log entry. A failure is logged
|
||||
// and discarded; the runtime row is the source of truth.
|
||||
func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
|
||||
if _, err := service.operationLogs.Append(ctx, entry); err != nil {
|
||||
service.logger.ErrorContext(ctx, "append operation log",
|
||||
"game_id", entry.GameID,
|
||||
"op_kind", string(entry.OpKind),
|
||||
"outcome", string(entry.Outcome),
|
||||
"error_code", entry.ErrorCode,
|
||||
"err", err.Error(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// fallbackOpSource defaults to `admin_rest` when the caller did not
|
||||
// supply a known op source. Mirrors `gamemaster/README.md §Trusted
|
||||
// Surfaces`.
|
||||
func fallbackOpSource(source operation.OpSource) operation.OpSource {
|
||||
if source.IsKnown() {
|
||||
return source
|
||||
}
|
||||
return operation.OpSourceAdminRest
|
||||
}
|
||||
@@ -0,0 +1,459 @@
|
||||
package adminstop_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/gamemaster/internal/adapters/mocks"
|
||||
"galaxy/gamemaster/internal/domain/operation"
|
||||
"galaxy/gamemaster/internal/domain/runtime"
|
||||
"galaxy/gamemaster/internal/ports"
|
||||
"galaxy/gamemaster/internal/service/adminstop"
|
||||
"galaxy/gamemaster/internal/telemetry"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/mock/gomock"
|
||||
)
|
||||
|
||||
// --- test doubles -----------------------------------------------------
|
||||
|
||||
type fakeRuntimeRecords struct {
|
||||
mu sync.Mutex
|
||||
stored map[string]runtime.RuntimeRecord
|
||||
getErr error
|
||||
updErr error
|
||||
updates []ports.UpdateStatusInput
|
||||
}
|
||||
|
||||
func newFakeRuntimeRecords() *fakeRuntimeRecords {
|
||||
return &fakeRuntimeRecords{stored: map[string]runtime.RuntimeRecord{}}
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) seed(record runtime.RuntimeRecord) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.stored[record.GameID] = record
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) Get(_ context.Context, gameID string) (runtime.RuntimeRecord, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.getErr != nil {
|
||||
return runtime.RuntimeRecord{}, s.getErr
|
||||
}
|
||||
record, ok := s.stored[gameID]
|
||||
if !ok {
|
||||
return runtime.RuntimeRecord{}, runtime.ErrNotFound
|
||||
}
|
||||
return record, nil
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) Insert(context.Context, runtime.RuntimeRecord) error {
|
||||
return errors.New("not used")
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) UpdateStatus(_ context.Context, input ports.UpdateStatusInput) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.updErr != nil {
|
||||
return s.updErr
|
||||
}
|
||||
record, ok := s.stored[input.GameID]
|
||||
if !ok {
|
||||
return runtime.ErrNotFound
|
||||
}
|
||||
if record.Status != input.ExpectedFrom {
|
||||
return runtime.ErrConflict
|
||||
}
|
||||
record.Status = input.To
|
||||
record.UpdatedAt = input.Now
|
||||
if input.To == runtime.StatusStopped {
|
||||
stopped := input.Now
|
||||
record.StoppedAt = &stopped
|
||||
}
|
||||
s.stored[input.GameID] = record
|
||||
s.updates = append(s.updates, input)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) UpdateScheduling(context.Context, ports.UpdateSchedulingInput) error {
|
||||
return errors.New("not used")
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) UpdateImage(context.Context, ports.UpdateImageInput) error {
|
||||
return errors.New("not used")
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) UpdateEngineHealth(context.Context, ports.UpdateEngineHealthInput) error {
|
||||
return errors.New("not used")
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) Delete(context.Context, string) error {
|
||||
return errors.New("not used")
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) ListDueRunning(context.Context, time.Time) ([]runtime.RuntimeRecord, error) {
|
||||
return nil, errors.New("not used")
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) ListByStatus(context.Context, runtime.Status) ([]runtime.RuntimeRecord, error) {
|
||||
return nil, errors.New("not used")
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) List(context.Context) ([]runtime.RuntimeRecord, error) {
|
||||
return nil, errors.New("not used")
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) updateCount() int {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return len(s.updates)
|
||||
}
|
||||
|
||||
type fakeOperationLogs struct {
|
||||
mu sync.Mutex
|
||||
entries []operation.OperationEntry
|
||||
appErr error
|
||||
}
|
||||
|
||||
func (s *fakeOperationLogs) Append(_ context.Context, entry operation.OperationEntry) (int64, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.appErr != nil {
|
||||
return 0, s.appErr
|
||||
}
|
||||
if err := entry.Validate(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
s.entries = append(s.entries, entry)
|
||||
return int64(len(s.entries)), nil
|
||||
}
|
||||
|
||||
func (s *fakeOperationLogs) ListByGame(context.Context, string, int) ([]operation.OperationEntry, error) {
|
||||
return nil, errors.New("not used")
|
||||
}
|
||||
|
||||
func (s *fakeOperationLogs) lastEntry() (operation.OperationEntry, bool) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if len(s.entries) == 0 {
|
||||
return operation.OperationEntry{}, false
|
||||
}
|
||||
return s.entries[len(s.entries)-1], true
|
||||
}
|
||||
|
||||
func (s *fakeOperationLogs) snapshot() []operation.OperationEntry {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
out := make([]operation.OperationEntry, len(s.entries))
|
||||
copy(out, s.entries)
|
||||
return out
|
||||
}
|
||||
|
||||
// --- harness ----------------------------------------------------------
|
||||
|
||||
type harness struct {
|
||||
t *testing.T
|
||||
ctrl *gomock.Controller
|
||||
runtime *fakeRuntimeRecords
|
||||
logs *fakeOperationLogs
|
||||
rtm *mocks.MockRTMClient
|
||||
lobby *mocks.MockLobbyEventsPublisher
|
||||
telemetry *telemetry.Runtime
|
||||
now time.Time
|
||||
service *adminstop.Service
|
||||
}
|
||||
|
||||
func newHarness(t *testing.T) *harness {
|
||||
t.Helper()
|
||||
ctrl := gomock.NewController(t)
|
||||
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
|
||||
require.NoError(t, err)
|
||||
h := &harness{
|
||||
t: t,
|
||||
ctrl: ctrl,
|
||||
runtime: newFakeRuntimeRecords(),
|
||||
logs: &fakeOperationLogs{},
|
||||
rtm: mocks.NewMockRTMClient(ctrl),
|
||||
lobby: mocks.NewMockLobbyEventsPublisher(ctrl),
|
||||
telemetry: telemetryRuntime,
|
||||
now: time.Date(2026, time.May, 1, 12, 0, 0, 0, time.UTC),
|
||||
}
|
||||
service, err := adminstop.NewService(adminstop.Dependencies{
|
||||
RuntimeRecords: h.runtime,
|
||||
OperationLogs: h.logs,
|
||||
RTM: h.rtm,
|
||||
LobbyEvents: h.lobby,
|
||||
Telemetry: h.telemetry,
|
||||
Clock: func() time.Time { return h.now },
|
||||
})
|
||||
require.NoError(t, err)
|
||||
h.service = service
|
||||
return h
|
||||
}
|
||||
|
||||
func (h *harness) seedRecord(status runtime.Status) runtime.RuntimeRecord {
|
||||
created := h.now.Add(-time.Hour)
|
||||
started := h.now.Add(-30 * time.Minute)
|
||||
next := h.now.Add(30 * time.Minute)
|
||||
record := runtime.RuntimeRecord{
|
||||
GameID: "game-001",
|
||||
Status: status,
|
||||
EngineEndpoint: "http://galaxy-game-game-001:8080",
|
||||
CurrentImageRef: "ghcr.io/galaxy/game:v1.2.3",
|
||||
CurrentEngineVersion: "v1.2.3",
|
||||
TurnSchedule: "0 18 * * *",
|
||||
CurrentTurn: 7,
|
||||
NextGenerationAt: &next,
|
||||
EngineHealth: "healthy",
|
||||
CreatedAt: created,
|
||||
UpdatedAt: started,
|
||||
StartedAt: &started,
|
||||
}
|
||||
h.runtime.seed(record)
|
||||
return record
|
||||
}
|
||||
|
||||
func baseInput() adminstop.Input {
|
||||
return adminstop.Input{
|
||||
GameID: "game-001",
|
||||
Reason: adminstop.ReasonAdminRequest,
|
||||
OpSource: operation.OpSourceAdminRest,
|
||||
SourceRef: "req-stop-001",
|
||||
}
|
||||
}
|
||||
|
||||
// --- tests ------------------------------------------------------------
|
||||
|
||||
func TestNewServiceRejectsMissingDeps(t *testing.T) {
|
||||
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
|
||||
require.NoError(t, err)
|
||||
cases := []struct {
|
||||
name string
|
||||
mut func(*adminstop.Dependencies)
|
||||
}{
|
||||
{"runtime records", func(d *adminstop.Dependencies) { d.RuntimeRecords = nil }},
|
||||
{"operation logs", func(d *adminstop.Dependencies) { d.OperationLogs = nil }},
|
||||
{"rtm", func(d *adminstop.Dependencies) { d.RTM = nil }},
|
||||
{"lobby events", func(d *adminstop.Dependencies) { d.LobbyEvents = nil }},
|
||||
{"telemetry", func(d *adminstop.Dependencies) { d.Telemetry = nil }},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ctrl := gomock.NewController(t)
|
||||
deps := adminstop.Dependencies{
|
||||
RuntimeRecords: newFakeRuntimeRecords(),
|
||||
OperationLogs: &fakeOperationLogs{},
|
||||
RTM: mocks.NewMockRTMClient(ctrl),
|
||||
LobbyEvents: mocks.NewMockLobbyEventsPublisher(ctrl),
|
||||
Telemetry: telemetryRuntime,
|
||||
}
|
||||
tc.mut(&deps)
|
||||
service, err := adminstop.NewService(deps)
|
||||
require.Error(t, err)
|
||||
require.Nil(t, service)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleHappyPath(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
original := h.seedRecord(runtime.StatusRunning)
|
||||
|
||||
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).Return(nil)
|
||||
h.lobby.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.AssignableToTypeOf(ports.RuntimeSnapshotUpdate{})).
|
||||
DoAndReturn(func(_ context.Context, msg ports.RuntimeSnapshotUpdate) error {
|
||||
assert.Equal(t, "game-001", msg.GameID)
|
||||
assert.Equal(t, runtime.StatusStopped, msg.RuntimeStatus)
|
||||
assert.Equal(t, original.CurrentTurn, msg.CurrentTurn)
|
||||
assert.Equal(t, original.EngineHealth, msg.EngineHealthSummary)
|
||||
assert.Empty(t, msg.PlayerTurnStats)
|
||||
assert.True(t, msg.OccurredAt.Equal(h.now))
|
||||
return nil
|
||||
})
|
||||
|
||||
result, err := h.service.Handle(context.Background(), baseInput())
|
||||
require.NoError(t, err)
|
||||
require.True(t, result.IsSuccess(), "want success, got %+v", result)
|
||||
assert.Equal(t, runtime.StatusStopped, result.Record.Status)
|
||||
assert.Equal(t, 1, h.runtime.updateCount(), "exactly one CAS call expected")
|
||||
|
||||
entry, ok := h.logs.lastEntry()
|
||||
require.True(t, ok, "operation log entry must be appended")
|
||||
assert.Equal(t, operation.OpKindStop, entry.OpKind)
|
||||
assert.Equal(t, operation.OpSourceAdminRest, entry.OpSource)
|
||||
assert.Equal(t, operation.OutcomeSuccess, entry.Outcome)
|
||||
assert.Empty(t, entry.ErrorCode)
|
||||
}
|
||||
|
||||
func TestHandleHappyPathFromGenerationFailed(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.seedRecord(runtime.StatusGenerationFailed)
|
||||
|
||||
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).Return(nil)
|
||||
h.lobby.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).Return(nil)
|
||||
|
||||
result, err := h.service.Handle(context.Background(), baseInput())
|
||||
require.NoError(t, err)
|
||||
require.True(t, result.IsSuccess())
|
||||
assert.Equal(t, runtime.StatusStopped, result.Record.Status)
|
||||
require.Len(t, h.runtime.updates, 1)
|
||||
assert.Equal(t, runtime.StatusGenerationFailed, h.runtime.updates[0].ExpectedFrom)
|
||||
}
|
||||
|
||||
func TestHandleEmptyReasonDefaultsToAdminRequest(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.seedRecord(runtime.StatusRunning)
|
||||
|
||||
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).Return(nil)
|
||||
h.lobby.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).Return(nil)
|
||||
|
||||
input := baseInput()
|
||||
input.Reason = ""
|
||||
result, err := h.service.Handle(context.Background(), input)
|
||||
require.NoError(t, err)
|
||||
require.True(t, result.IsSuccess())
|
||||
}
|
||||
|
||||
func TestHandleIdempotentOnAlreadyStopped(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
original := h.seedRecord(runtime.StatusStopped)
|
||||
|
||||
// No RTM call, no snapshot publication expected.
|
||||
result, err := h.service.Handle(context.Background(), baseInput())
|
||||
require.NoError(t, err)
|
||||
require.True(t, result.IsSuccess())
|
||||
assert.Equal(t, runtime.StatusStopped, result.Record.Status)
|
||||
assert.Equal(t, original.UpdatedAt, result.Record.UpdatedAt, "no mutation expected")
|
||||
assert.Zero(t, h.runtime.updateCount(), "no CAS expected on idempotent path")
|
||||
|
||||
entry, ok := h.logs.lastEntry()
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, operation.OutcomeSuccess, entry.Outcome)
|
||||
}
|
||||
|
||||
func TestHandleIdempotentOnFinished(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.seedRecord(runtime.StatusFinished)
|
||||
|
||||
result, err := h.service.Handle(context.Background(), baseInput())
|
||||
require.NoError(t, err)
|
||||
require.True(t, result.IsSuccess())
|
||||
assert.Equal(t, runtime.StatusFinished, result.Record.Status)
|
||||
}
|
||||
|
||||
func TestHandleConflictOnStarting(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.seedRecord(runtime.StatusStarting)
|
||||
|
||||
result, err := h.service.Handle(context.Background(), baseInput())
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
|
||||
assert.Equal(t, adminstop.ErrorCodeConflict, result.ErrorCode)
|
||||
assert.Zero(t, h.runtime.updateCount())
|
||||
|
||||
entry, ok := h.logs.lastEntry()
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, operation.OutcomeFailure, entry.Outcome)
|
||||
assert.Equal(t, adminstop.ErrorCodeConflict, entry.ErrorCode)
|
||||
}
|
||||
|
||||
func TestHandleRuntimeNotFound(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
|
||||
result, err := h.service.Handle(context.Background(), baseInput())
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
|
||||
assert.Equal(t, adminstop.ErrorCodeRuntimeNotFound, result.ErrorCode)
|
||||
}
|
||||
|
||||
func TestHandleRTMUnavailable(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.seedRecord(runtime.StatusRunning)
|
||||
|
||||
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).
|
||||
Return(ports.ErrRTMUnavailable)
|
||||
|
||||
result, err := h.service.Handle(context.Background(), baseInput())
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
|
||||
assert.Equal(t, adminstop.ErrorCodeServiceUnavailable, result.ErrorCode)
|
||||
assert.Zero(t, h.runtime.updateCount(), "CAS must not run after RTM failure")
|
||||
}
|
||||
|
||||
func TestHandleCASLostRace(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.seedRecord(runtime.StatusRunning)
|
||||
|
||||
// RTM stop succeeds, but a concurrent mutation flipped the row out
|
||||
// of `running` before our CAS lands.
|
||||
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).Return(nil)
|
||||
h.runtime.updErr = runtime.ErrConflict
|
||||
|
||||
result, err := h.service.Handle(context.Background(), baseInput())
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
|
||||
assert.Equal(t, adminstop.ErrorCodeConflict, result.ErrorCode)
|
||||
}
|
||||
|
||||
func TestHandleStoreReadFailure(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.runtime.getErr = errors.New("connection refused")
|
||||
|
||||
result, err := h.service.Handle(context.Background(), baseInput())
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
|
||||
assert.Equal(t, adminstop.ErrorCodeServiceUnavailable, result.ErrorCode)
|
||||
}
|
||||
|
||||
func TestHandleInvalidRequest(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
mut func(*adminstop.Input)
|
||||
}{
|
||||
{"empty game id", func(in *adminstop.Input) { in.GameID = "" }},
|
||||
{"unknown reason", func(in *adminstop.Input) { in.Reason = "panic" }},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
input := baseInput()
|
||||
tc.mut(&input)
|
||||
result, err := h.service.Handle(context.Background(), input)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, operation.OutcomeFailure, result.Outcome)
|
||||
assert.Equal(t, adminstop.ErrorCodeInvalidRequest, result.ErrorCode)
|
||||
// Audit log uses the validated game id; for the empty-id
|
||||
// case it would fail entry validation, so we only assert
|
||||
// when game id is present.
|
||||
if input.GameID != "" {
|
||||
_, ok := h.logs.lastEntry()
|
||||
assert.True(t, ok)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleNilContextReturnsError(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
_, err := h.service.Handle(nil, baseInput()) //nolint:staticcheck // intentional nil for guard test
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
func TestHandleSnapshotPublishFailureSurfacesSuccess(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.seedRecord(runtime.StatusRunning)
|
||||
|
||||
h.rtm.EXPECT().Stop(gomock.Any(), "game-001", adminstop.ReasonAdminRequest).Return(nil)
|
||||
h.lobby.EXPECT().PublishSnapshotUpdate(gomock.Any(), gomock.Any()).
|
||||
Return(errors.New("redis down"))
|
||||
|
||||
result, err := h.service.Handle(context.Background(), baseInput())
|
||||
require.NoError(t, err)
|
||||
require.True(t, result.IsSuccess(), "snapshot publication is best-effort")
|
||||
assert.Equal(t, runtime.StatusStopped, result.Record.Status)
|
||||
}
|
||||
Reference in New Issue
Block a user