feat: game lobby service

This commit is contained in:
Ilia Denisov
2026-04-25 23:20:55 +02:00
committed by GitHub
parent 32dc29359a
commit 48b0056b49
336 changed files with 57074 additions and 1418 deletions
@@ -0,0 +1,263 @@
// Package enrollmentautomation implements the periodic worker that
// transitions games from `enrollment_open` to `ready_to_start` along the
// three automatic paths frozen in lobby/README.md §Enrollment Rules:
// deadline, gap window time exhaustion, and gap window roster
// exhaustion. The same enrollment-close pipeline that
// service/manualreadytostart calls is reused via shared.CloseEnrollment
// so manual and automatic closes stay aligned.
package enrollmentautomation
import (
"context"
"errors"
"fmt"
"log/slog"
"time"
"galaxy/lobby/internal/domain/game"
"galaxy/lobby/internal/logging"
"galaxy/lobby/internal/ports"
"galaxy/lobby/internal/service/shared"
"galaxy/lobby/internal/telemetry"
)
// Dependencies groups the collaborators consumed by Worker. The struct
// mirrors the shape used by Lobby services: each store / publisher port
// is supplied explicitly so wiring stays a single concrete-adapter site.
type Dependencies struct {
// Games is scanned once per tick for records in
// game.StatusEnrollmentOpen.
Games ports.GameStore
// Memberships supplies the active-membership count used to evaluate
// the deadline and gap-roster preconditions.
Memberships ports.MembershipStore
// Invites is forwarded to shared.CloseEnrollment for the cascading
// expiry of created invites on close.
Invites ports.InviteStore
// Intents publishes lobby.invite.expired notifications produced by
// the cascading expiry.
Intents ports.IntentPublisher
// GapStore exposes the gap-window activation timestamp recorded by
// approveapplication and redeeminvite. It must implement the Get
// accessor introduced in the
GapStore ports.GapActivationStore
// Interval controls the tick cadence. It must be positive.
Interval time.Duration
// Clock supplies the wall-clock used for the per-tick "now"
// reference and for the close timestamp. Defaults to time.Now when
// nil.
Clock func() time.Time
// Logger receives structured worker-level events. Defaults to
// slog.Default when nil.
Logger *slog.Logger
// Telemetry records the `lobby.enrollment_automation.checks`
// counter per inspected game per tick and forwards into
// shared.CloseEnrollment for the `lobby.game.transitions` and
// `lobby.invite.outcomes` counters. Optional; nil disables metric
// emission.
Telemetry *telemetry.Runtime
}
// Worker drives the periodic enrollment-automation loop.
type Worker struct {
games ports.GameStore
memberships ports.MembershipStore
invites ports.InviteStore
intents ports.IntentPublisher
gapStore ports.GapActivationStore
interval time.Duration
clock func() time.Time
logger *slog.Logger
telemetry *telemetry.Runtime
}
// NewWorker constructs one Worker from deps.
func NewWorker(deps Dependencies) (*Worker, error) {
if deps.Games == nil {
return nil, errors.New("new enrollment automation worker: nil game store")
}
if deps.Memberships == nil {
return nil, errors.New("new enrollment automation worker: nil membership store")
}
if deps.Invites == nil {
return nil, errors.New("new enrollment automation worker: nil invite store")
}
if deps.Intents == nil {
return nil, errors.New("new enrollment automation worker: nil intent publisher")
}
if deps.GapStore == nil {
return nil, errors.New("new enrollment automation worker: nil gap activation store")
}
if deps.Interval <= 0 {
return nil, fmt.Errorf("new enrollment automation worker: interval must be positive, got %s", deps.Interval)
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
return &Worker{
games: deps.Games,
memberships: deps.Memberships,
invites: deps.Invites,
intents: deps.Intents,
gapStore: deps.GapStore,
interval: deps.Interval,
clock: clock,
logger: logger.With("worker", "lobby.enrollmentautomation"),
telemetry: deps.Telemetry,
}, nil
}
// Run drives the periodic ticker. It returns when ctx is cancelled. A
// failure inside one tick does not terminate the loop — every tick logs
// its outcome and the worker stays alive so subsequent ticks can pick up
// once the underlying issue clears.
func (worker *Worker) Run(ctx context.Context) error {
if worker == nil {
return errors.New("run enrollment automation worker: nil worker")
}
if ctx == nil {
return errors.New("run enrollment automation worker: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
worker.logger.Info("enrollment automation worker started", "interval", worker.interval.String())
defer worker.logger.Info("enrollment automation worker stopped")
ticker := time.NewTicker(worker.interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
worker.Tick(ctx)
}
}
}
// Shutdown is a no-op: the worker holds no resources beyond its own
// goroutine, which Run releases on context cancellation.
func (worker *Worker) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown enrollment automation worker: nil context")
}
return nil
}
// Tick performs one automation pass over every enrollment_open game. It
// is exported so tests may drive the loop deterministically without a
// real ticker. Tick never returns an error; per-game failures are logged
// and the iteration continues so a single bad record does not block the
// rest of the roster.
func (worker *Worker) Tick(ctx context.Context) {
if worker == nil || ctx == nil {
return
}
now := worker.clock().UTC()
games, err := worker.games.GetByStatus(ctx, game.StatusEnrollmentOpen)
if err != nil {
worker.logger.WarnContext(ctx, "list enrollment_open games",
"err", err.Error(),
)
return
}
for _, record := range games {
worker.evaluate(ctx, record, now)
}
}
// evaluate inspects one game record and, when one of the three
// auto-close preconditions holds, performs the enrollment close.
// Per-record failures are logged and absorbed; they do not propagate to
// the caller because that would terminate the worker.
func (worker *Worker) evaluate(ctx context.Context, record game.Game, now time.Time) {
approvedCount, err := shared.CountActiveMemberships(ctx, worker.memberships, record.GameID)
if err != nil {
worker.logger.WarnContext(ctx, "count active memberships",
"game_id", record.GameID.String(),
"err", err.Error(),
)
return
}
gapActivatedAt, gapActive, err := worker.gapStore.Get(ctx, record.GameID)
if err != nil {
worker.logger.WarnContext(ctx, "read gap activation",
"game_id", record.GameID.String(),
"err", err.Error(),
)
gapActive = false
}
gapPlayersExhausted := gapActive && approvedCount >= record.MaxPlayers+record.StartGapPlayers
gapTimeExhausted := gapActive && !now.Before(gapActivatedAt.Add(time.Duration(record.StartGapHours)*time.Hour))
deadlineReady := !now.Before(record.EnrollmentEndsAt) && approvedCount >= record.MinPlayers
var trigger game.Trigger
switch {
case gapPlayersExhausted, gapTimeExhausted:
trigger = game.TriggerGap
case deadlineReady:
trigger = game.TriggerDeadline
default:
worker.telemetry.RecordEnrollmentAutomationCheck(ctx, "no_op")
return
}
updated, err := shared.CloseEnrollment(ctx, shared.CloseEnrollmentDeps{
Games: worker.games,
Invites: worker.invites,
Intents: worker.intents,
Logger: worker.logger,
Telemetry: worker.telemetry,
}, record.GameID, trigger, now)
if err != nil {
if errors.Is(err, game.ErrConflict) {
worker.telemetry.RecordEnrollmentAutomationCheck(ctx, "no_op")
worker.logger.InfoContext(ctx, "skipped game closed by another path",
"game_id", record.GameID.String(),
"trigger", string(trigger),
)
return
}
worker.logger.WarnContext(ctx, "close enrollment",
"game_id", record.GameID.String(),
"trigger", string(trigger),
"err", err.Error(),
)
return
}
worker.telemetry.RecordEnrollmentAutomationCheck(ctx, "transitioned")
logArgs := []any{
"game_id", updated.GameID.String(),
"from_status", string(game.StatusEnrollmentOpen),
"to_status", string(updated.Status),
"trigger", string(trigger),
"approved_count", approvedCount,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
worker.logger.InfoContext(ctx, "game moved to ready_to_start", logArgs...)
}
@@ -0,0 +1,282 @@
package enrollmentautomation_test
import (
"context"
"io"
"log/slog"
"testing"
"time"
"galaxy/lobby/internal/adapters/gamestub"
"galaxy/lobby/internal/adapters/gapactivationstub"
"galaxy/lobby/internal/adapters/intentpubstub"
"galaxy/lobby/internal/adapters/invitestub"
"galaxy/lobby/internal/adapters/membershipstub"
"galaxy/lobby/internal/domain/common"
"galaxy/lobby/internal/domain/game"
"galaxy/lobby/internal/domain/invite"
"galaxy/lobby/internal/domain/membership"
"galaxy/lobby/internal/worker/enrollmentautomation"
"galaxy/notificationintent"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const (
gameID = common.GameID("game-private")
ownerUserID = "user-owner"
)
func silentLogger() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) }
func fixedClock(at time.Time) func() time.Time { return func() time.Time { return at } }
type fixture struct {
now time.Time
games *gamestub.Store
invites *invitestub.Store
memberships *membershipstub.Store
gapStore *gapactivationstub.Store
intents *intentpubstub.Publisher
game game.Game
}
type fixtureOptions struct {
minPlayers int
maxPlayers int
startGapHours int
startGapPlayers int
enrollmentEndsAt time.Time
}
func newFixture(t *testing.T, opts fixtureOptions) *fixture {
t.Helper()
now := time.Date(2026, 4, 25, 10, 0, 0, 0, time.UTC)
if opts.minPlayers == 0 {
opts.minPlayers = 2
}
if opts.maxPlayers == 0 {
opts.maxPlayers = 4
}
if opts.startGapHours == 0 {
opts.startGapHours = 2
}
if opts.startGapPlayers == 0 {
opts.startGapPlayers = 1
}
if opts.enrollmentEndsAt.IsZero() {
opts.enrollmentEndsAt = now.Add(24 * time.Hour)
}
rec, err := game.New(game.NewGameInput{
GameID: gameID,
GameName: "Friends Only",
GameType: game.GameTypePrivate,
OwnerUserID: ownerUserID,
MinPlayers: opts.minPlayers,
MaxPlayers: opts.maxPlayers,
StartGapHours: opts.startGapHours,
StartGapPlayers: opts.startGapPlayers,
EnrollmentEndsAt: opts.enrollmentEndsAt,
TurnSchedule: "0 */6 * * *",
TargetEngineVersion: "1.0.0",
Now: now,
})
require.NoError(t, err)
rec.Status = game.StatusEnrollmentOpen
games := gamestub.NewStore()
require.NoError(t, games.Save(context.Background(), rec))
return &fixture{
now: now,
games: games,
invites: invitestub.NewStore(),
memberships: membershipstub.NewStore(),
gapStore: gapactivationstub.NewStore(),
intents: intentpubstub.NewPublisher(),
game: rec,
}
}
func (f *fixture) addActiveMember(t *testing.T, membershipID common.MembershipID, userID string) {
t.Helper()
mem, err := membership.New(membership.NewMembershipInput{
MembershipID: membershipID,
GameID: gameID,
UserID: userID,
RaceName: "Race " + userID,
CanonicalKey: "race-" + userID,
Now: f.now,
})
require.NoError(t, err)
require.NoError(t, f.memberships.Save(context.Background(), mem))
}
func (f *fixture) addCreatedInvite(t *testing.T, inviteID common.InviteID, invitee string) {
t.Helper()
rec, err := invite.New(invite.NewInviteInput{
InviteID: inviteID,
GameID: gameID,
InviterUserID: ownerUserID,
InviteeUserID: invitee,
Now: f.now,
ExpiresAt: f.game.EnrollmentEndsAt,
})
require.NoError(t, err)
require.NoError(t, f.invites.Save(context.Background(), rec))
}
func (f *fixture) markGapActivatedAt(t *testing.T, at time.Time) {
t.Helper()
require.NoError(t, f.gapStore.MarkActivated(context.Background(), gameID, at))
}
func (f *fixture) newWorker(t *testing.T, tickAt time.Time) *enrollmentautomation.Worker {
t.Helper()
worker, err := enrollmentautomation.NewWorker(enrollmentautomation.Dependencies{
Games: f.games,
Memberships: f.memberships,
Invites: f.invites,
Intents: f.intents,
GapStore: f.gapStore,
Interval: time.Minute,
Clock: fixedClock(tickAt),
Logger: silentLogger(),
})
require.NoError(t, err)
return worker
}
func currentStatus(t *testing.T, f *fixture) game.Status {
t.Helper()
rec, err := f.games.Get(context.Background(), gameID)
require.NoError(t, err)
return rec.Status
}
func TestNewWorkerRejectsZeroInterval(t *testing.T) {
t.Parallel()
_, err := enrollmentautomation.NewWorker(enrollmentautomation.Dependencies{
Games: gamestub.NewStore(),
Memberships: membershipstub.NewStore(),
Invites: invitestub.NewStore(),
Intents: intentpubstub.NewPublisher(),
GapStore: gapactivationstub.NewStore(),
Interval: 0,
})
require.Error(t, err)
}
func TestTickDeadlineTriggers(t *testing.T) {
t.Parallel()
f := newFixture(t, fixtureOptions{minPlayers: 2})
f.addActiveMember(t, "membership-1", "user-a")
f.addActiveMember(t, "membership-2", "user-b")
f.addCreatedInvite(t, "invite-1", "user-c")
tickAt := f.game.EnrollmentEndsAt.Add(time.Minute)
f.newWorker(t, tickAt).Tick(context.Background())
assert.Equal(t, game.StatusReadyToStart, currentStatus(t, f))
expired, err := f.invites.Get(context.Background(), "invite-1")
require.NoError(t, err)
assert.Equal(t, invite.StatusExpired, expired.Status)
intents := f.intents.Published()
require.Len(t, intents, 1)
assert.Equal(t, notificationintent.NotificationTypeLobbyInviteExpired, intents[0].NotificationType)
}
func TestTickDeadlineSkipsBelowMinPlayers(t *testing.T) {
t.Parallel()
f := newFixture(t, fixtureOptions{minPlayers: 3})
f.addActiveMember(t, "membership-1", "user-a")
f.addActiveMember(t, "membership-2", "user-b")
tickAt := f.game.EnrollmentEndsAt.Add(time.Minute)
f.newWorker(t, tickAt).Tick(context.Background())
assert.Equal(t, game.StatusEnrollmentOpen, currentStatus(t, f))
assert.Empty(t, f.intents.Published())
}
func TestTickGapTimeTriggers(t *testing.T) {
t.Parallel()
f := newFixture(t, fixtureOptions{minPlayers: 2, maxPlayers: 4, startGapHours: 2, startGapPlayers: 2})
f.addActiveMember(t, "membership-1", "user-a")
f.addActiveMember(t, "membership-2", "user-b")
f.addActiveMember(t, "membership-3", "user-c")
f.addActiveMember(t, "membership-4", "user-d")
f.markGapActivatedAt(t, f.now)
tickAt := f.now.Add(2 * time.Hour).Add(time.Minute)
f.newWorker(t, tickAt).Tick(context.Background())
assert.Equal(t, game.StatusReadyToStart, currentStatus(t, f))
}
func TestTickGapPlayersTriggersBeforeTime(t *testing.T) {
t.Parallel()
f := newFixture(t, fixtureOptions{minPlayers: 2, maxPlayers: 4, startGapHours: 24, startGapPlayers: 1})
f.addActiveMember(t, "membership-1", "user-a")
f.addActiveMember(t, "membership-2", "user-b")
f.addActiveMember(t, "membership-3", "user-c")
f.addActiveMember(t, "membership-4", "user-d")
f.addActiveMember(t, "membership-5", "user-e")
f.markGapActivatedAt(t, f.now)
tickAt := f.now.Add(15 * time.Minute)
f.newWorker(t, tickAt).Tick(context.Background())
assert.Equal(t, game.StatusReadyToStart, currentStatus(t, f))
}
func TestTickGapInactiveSkipsBeforeDeadline(t *testing.T) {
t.Parallel()
f := newFixture(t, fixtureOptions{minPlayers: 2, maxPlayers: 4, startGapHours: 1})
f.addActiveMember(t, "membership-1", "user-a")
f.addActiveMember(t, "membership-2", "user-b")
tickAt := f.now.Add(2 * time.Hour)
f.newWorker(t, tickAt).Tick(context.Background())
assert.Equal(t, game.StatusEnrollmentOpen, currentStatus(t, f))
}
func TestTickIsIdempotent(t *testing.T) {
t.Parallel()
f := newFixture(t, fixtureOptions{minPlayers: 2})
f.addActiveMember(t, "membership-1", "user-a")
f.addActiveMember(t, "membership-2", "user-b")
f.addCreatedInvite(t, "invite-1", "user-c")
tickAt := f.game.EnrollmentEndsAt.Add(time.Minute)
worker := f.newWorker(t, tickAt)
worker.Tick(context.Background())
worker.Tick(context.Background())
assert.Equal(t, game.StatusReadyToStart, currentStatus(t, f))
assert.Len(t, f.intents.Published(), 1)
}
func TestRunStopsOnContextCancel(t *testing.T) {
t.Parallel()
f := newFixture(t, fixtureOptions{minPlayers: 2})
worker := f.newWorker(t, f.now)
ctx, cancel := context.WithCancel(context.Background())
done := make(chan error, 1)
go func() { done <- worker.Run(ctx) }()
cancel()
select {
case err := <-done:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
t.Fatal("worker did not stop after context cancel")
}
}
+579
View File
@@ -0,0 +1,579 @@
// Package gmevents implements the worker that consumes Game Master
// runtime events from the `gm:lobby_events` Redis Stream and drives the
// surface area: keeping the denormalized runtime snapshot
// current, feeding the per-game stats aggregate, and
// dispatching capability evaluation at game finish.
//
// The consumer recognizes two event kinds documented in
// lobby/README.md §Runtime Snapshot:
//
// - `runtime_snapshot_update` — applied to the game record snapshot
// (current_turn, runtime_status, engine_health_summary) and to the
// per-user stats aggregate (initial fields frozen on the first
// observation, max fields raised by per-component maximum on every
// event);
// - `game_finished` — the same snapshot update plus a status
// transition to `finished` and a capability-evaluation hand-off.
//
// Replay protection rests on three ingredients:
//
// 1. Status transitions (running/paused → finished) use the existing
// ports.GameStore CAS guard, so a replayed game_finished finds the
// game already in `finished` and the second pass is a no-op for the
// status field.
// 2. Snapshot updates use a fresh `At` timestamp on each call but
// overwrite a deterministic snapshot blob, so re-applying an older
// event does not corrupt the record.
// 3. Capability evaluation uses ports.EvaluationGuardStore to recognise
// replay and skip mutations.
//
// Stream-offset bookkeeping advances after each successfully handled
// event. A `game_finished` event that fails capability evaluation
// (transient store error) leaves the offset behind so the next loop
// iteration retries the same entry.
package gmevents
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"strconv"
"strings"
"time"
"galaxy/lobby/internal/domain/common"
"galaxy/lobby/internal/domain/game"
"galaxy/lobby/internal/logging"
"galaxy/lobby/internal/ports"
"galaxy/lobby/internal/telemetry"
"github.com/redis/go-redis/v9"
)
// streamOffsetLabel identifies the gmevents consumer in the stream offset
// store. It stays stable when the underlying stream key is renamed via
// configuration.
const streamOffsetLabel = "gm_lobby_events"
// Event kinds carried in the GM stream `kind` field.
const (
kindRuntimeSnapshotUpdate = "runtime_snapshot_update"
kindGameFinished = "game_finished"
)
// CapabilityEvaluator is the minimal interface the gmevents consumer
// requires from the capability evaluation service. The interface
// lives here rather than in the service package to avoid an import cycle.
type CapabilityEvaluator interface {
Evaluate(ctx context.Context, gameID common.GameID, finishedAt time.Time) error
}
// Config groups the dependencies used by Consumer.
type Config struct {
// Client provides XREAD access to the GM events stream.
Client *redis.Client
// Stream stores the Redis Streams key consumed by the worker.
Stream string
// BlockTimeout bounds the blocking XREAD window.
BlockTimeout time.Duration
// Games persists the runtime snapshot updates and the game-finished
// status transitions.
Games ports.GameStore
// Stats persists the per-user stats aggregate fed by every snapshot
// event.
Stats ports.GameTurnStatsStore
// Capability runs capability evaluation after a successful
// `game_finished` transition.
Capability CapabilityEvaluator
// OffsetStore persists the last successfully processed entry id.
OffsetStore ports.StreamOffsetStore
// Clock supplies the wall-clock used for snapshot UpdatedAt and for
// status transition timestamps when the GM event does not carry one.
// Defaults to time.Now when nil.
Clock func() time.Time
// Logger receives structured worker-level events. Defaults to
// slog.Default when nil.
Logger *slog.Logger
// Telemetry records the `lobby.game.transitions` counter on each
// successful game-finished transition. Optional; nil disables
// metric emission.
Telemetry *telemetry.Runtime
}
// Consumer drives the gmevents processing loop.
type Consumer struct {
client *redis.Client
stream string
blockTimeout time.Duration
games ports.GameStore
stats ports.GameTurnStatsStore
capability CapabilityEvaluator
offsetStore ports.StreamOffsetStore
clock func() time.Time
logger *slog.Logger
telemetry *telemetry.Runtime
}
// NewConsumer constructs one Consumer from cfg.
func NewConsumer(cfg Config) (*Consumer, error) {
switch {
case cfg.Client == nil:
return nil, errors.New("new gm events consumer: nil redis client")
case strings.TrimSpace(cfg.Stream) == "":
return nil, errors.New("new gm events consumer: stream must not be empty")
case cfg.BlockTimeout <= 0:
return nil, errors.New("new gm events consumer: block timeout must be positive")
case cfg.Games == nil:
return nil, errors.New("new gm events consumer: nil game store")
case cfg.Stats == nil:
return nil, errors.New("new gm events consumer: nil game turn stats store")
case cfg.Capability == nil:
return nil, errors.New("new gm events consumer: nil capability evaluator")
case cfg.OffsetStore == nil:
return nil, errors.New("new gm events consumer: nil offset store")
}
clock := cfg.Clock
if clock == nil {
clock = time.Now
}
logger := cfg.Logger
if logger == nil {
logger = slog.Default()
}
return &Consumer{
client: cfg.Client,
stream: cfg.Stream,
blockTimeout: cfg.BlockTimeout,
games: cfg.Games,
stats: cfg.Stats,
capability: cfg.Capability,
offsetStore: cfg.OffsetStore,
clock: clock,
logger: logger.With("worker", "lobby.gmevents", "stream", cfg.Stream),
telemetry: cfg.Telemetry,
}, nil
}
// Run drives the XREAD loop until ctx is cancelled. Per-message outcomes
// are absorbed by HandleMessage; the loop only exits on context
// cancellation or a fatal Redis error. The offset advances only after a
// successful HandleMessage call so capability evaluation failure replays
// the same entry on the next iteration.
func (consumer *Consumer) Run(ctx context.Context) error {
if consumer == nil || consumer.client == nil {
return errors.New("run gm events consumer: nil consumer")
}
if ctx == nil {
return errors.New("run gm events consumer: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
lastID, found, err := consumer.offsetStore.Load(ctx, streamOffsetLabel)
if err != nil {
return fmt.Errorf("run gm events consumer: load offset: %w", err)
}
if !found {
lastID = "0-0"
}
consumer.logger.Info("gm events consumer started",
"block_timeout", consumer.blockTimeout.String(),
"start_entry_id", lastID,
)
defer consumer.logger.Info("gm events consumer stopped")
for {
streams, err := consumer.client.XRead(ctx, &redis.XReadArgs{
Streams: []string{consumer.stream, lastID},
Count: 1,
Block: consumer.blockTimeout,
}).Result()
switch {
case err == nil:
for _, stream := range streams {
for _, message := range stream.Messages {
if !consumer.HandleMessage(ctx, message) {
continue
}
if err := consumer.offsetStore.Save(ctx, streamOffsetLabel, message.ID); err != nil {
return fmt.Errorf("run gm events consumer: save offset: %w", err)
}
lastID = message.ID
}
}
case errors.Is(err, redis.Nil):
continue
case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, redis.ErrClosed)):
return ctx.Err()
case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded), errors.Is(err, redis.ErrClosed):
return fmt.Errorf("run gm events consumer: %w", err)
default:
return fmt.Errorf("run gm events consumer: %w", err)
}
}
}
// Shutdown is a no-op; the consumer relies on context cancellation.
func (consumer *Consumer) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown gm events consumer: nil context")
}
return nil
}
// HandleMessage processes one Redis Stream message and reports whether
// the offset is allowed to advance. Decoding errors and logical replays
// return true (the message is consumed and the offset advances). A
// `game_finished` whose capability evaluation fails returns false so the
// caller leaves the offset behind and the next iteration retries.
//
// Exported so tests can drive the consumer deterministically without
// spinning up a real XREAD loop.
func (consumer *Consumer) HandleMessage(ctx context.Context, message redis.XMessage) bool {
if consumer == nil {
return false
}
event, err := decodeGMEvent(message)
if err != nil {
consumer.logger.WarnContext(ctx, "decode gm event",
"stream_entry_id", message.ID,
"err", err.Error(),
)
return true
}
switch event.Kind {
case kindRuntimeSnapshotUpdate:
consumer.handleSnapshotUpdate(ctx, message.ID, event)
return true
case kindGameFinished:
return consumer.handleGameFinished(ctx, message.ID, event)
default:
consumer.logger.WarnContext(ctx, "unknown gm event kind",
"stream_entry_id", message.ID,
"game_id", event.GameID.String(),
"kind", event.Kind,
)
return true
}
}
// handleSnapshotUpdate applies the snapshot to the game record and the
// stats aggregate. Errors are logged and absorbed: the message advances
// in either case so a transient Redis hiccup does not stall the stream.
// CAS-protected mutations naturally absorb replays without further
// bookkeeping.
func (consumer *Consumer) handleSnapshotUpdate(ctx context.Context, entryID string, event gmEvent) {
at := consumer.clock().UTC()
if err := consumer.games.UpdateRuntimeSnapshot(ctx, ports.UpdateRuntimeSnapshotInput{
GameID: event.GameID,
Snapshot: event.Snapshot,
At: at,
}); err != nil && !errors.Is(err, game.ErrNotFound) {
consumer.logger.WarnContext(ctx, "apply runtime snapshot",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
}
if len(event.PlayerStats) == 0 {
return
}
if err := consumer.stats.SaveInitial(ctx, event.GameID, event.PlayerStats); err != nil {
consumer.logger.WarnContext(ctx, "save initial player stats",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
}
if err := consumer.stats.UpdateMax(ctx, event.GameID, event.PlayerStats); err != nil {
consumer.logger.WarnContext(ctx, "update max player stats",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
}
}
// handleGameFinished applies the final snapshot, transitions the game to
// `finished` (or absorbs a replay where the game is already finished),
// then drives capability evaluation. The function returns false to hold
// the stream offset when a recoverable failure prevents capability
// evaluation; the next loop iteration retries the same entry.
func (consumer *Consumer) handleGameFinished(ctx context.Context, entryID string, event gmEvent) bool {
at := consumer.clock().UTC()
finishedAt := event.FinishedAt
if finishedAt.IsZero() {
finishedAt = at
}
if err := consumer.games.UpdateRuntimeSnapshot(ctx, ports.UpdateRuntimeSnapshotInput{
GameID: event.GameID,
Snapshot: event.Snapshot,
At: at,
}); err != nil && !errors.Is(err, game.ErrNotFound) {
consumer.logger.WarnContext(ctx, "apply final runtime snapshot",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
}
if len(event.PlayerStats) > 0 {
if err := consumer.stats.SaveInitial(ctx, event.GameID, event.PlayerStats); err != nil {
consumer.logger.WarnContext(ctx, "save initial player stats on finish",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
}
if err := consumer.stats.UpdateMax(ctx, event.GameID, event.PlayerStats); err != nil {
consumer.logger.WarnContext(ctx, "update max player stats on finish",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
}
}
record, err := consumer.games.Get(ctx, event.GameID)
switch {
case err == nil:
case errors.Is(err, game.ErrNotFound):
consumer.logger.WarnContext(ctx, "game finished for unknown game id",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
)
return true
default:
consumer.logger.WarnContext(ctx, "load game for finish",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
return false
}
switch record.Status {
case game.StatusRunning, game.StatusPaused:
if err := consumer.games.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: record.Status,
To: game.StatusFinished,
Trigger: game.TriggerRuntimeEvent,
At: finishedAt,
}); err != nil {
switch {
case errors.Is(err, game.ErrConflict), errors.Is(err, game.ErrInvalidTransition):
consumer.logger.InfoContext(ctx, "game finished transition absorbed by status conflict",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
)
default:
consumer.logger.WarnContext(ctx, "transition game to finished",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
"err", err.Error(),
)
return false
}
} else {
consumer.telemetry.RecordGameTransition(ctx,
string(record.Status),
string(game.StatusFinished),
string(game.TriggerRuntimeEvent),
)
logArgs := []any{
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
"from_status", string(record.Status),
"to_status", string(game.StatusFinished),
"trigger", string(game.TriggerRuntimeEvent),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
consumer.logger.InfoContext(ctx, "game finished", logArgs...)
}
case game.StatusFinished:
consumer.logger.InfoContext(ctx, "game finished event observed for already finished game",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
)
default:
consumer.logger.InfoContext(ctx, "game finished event ignored for unexpected status",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
"current_status", string(record.Status),
)
return true
}
refreshed, err := consumer.games.Get(ctx, event.GameID)
if err != nil {
consumer.logger.WarnContext(ctx, "reload finished game record",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
return false
}
if refreshed.Status != game.StatusFinished {
consumer.logger.WarnContext(ctx, "game record not finished after transition attempt",
"stream_entry_id", entryID,
"game_id", refreshed.GameID.String(),
"current_status", string(refreshed.Status),
)
return true
}
if refreshed.FinishedAt == nil {
consumer.logger.WarnContext(ctx, "finished game missing finished_at",
"stream_entry_id", entryID,
"game_id", refreshed.GameID.String(),
)
return true
}
if err := consumer.capability.Evaluate(ctx, refreshed.GameID, *refreshed.FinishedAt); err != nil {
consumer.logger.WarnContext(ctx, "capability evaluation failed",
"stream_entry_id", entryID,
"game_id", refreshed.GameID.String(),
"err", err.Error(),
)
return false
}
consumer.logger.InfoContext(ctx, "game finished processed",
"stream_entry_id", entryID,
"game_id", refreshed.GameID.String(),
)
return true
}
// gmEvent stores the decoded shape of one `gm:lobby_events` entry shared
// by the snapshot and finish handlers.
type gmEvent struct {
Kind string
GameID common.GameID
Snapshot game.RuntimeSnapshot
PlayerStats []ports.PlayerObservedStats
FinishedAt time.Time
}
func decodeGMEvent(message redis.XMessage) (gmEvent, error) {
kind := optionalString(message.Values, "kind")
if kind != kindRuntimeSnapshotUpdate && kind != kindGameFinished {
return gmEvent{}, fmt.Errorf("unsupported event kind %q", kind)
}
gameIDRaw := optionalString(message.Values, "game_id")
if strings.TrimSpace(gameIDRaw) == "" {
return gmEvent{}, errors.New("missing game_id")
}
gameID := common.GameID(gameIDRaw)
if err := gameID.Validate(); err != nil {
return gmEvent{}, fmt.Errorf("invalid game_id: %w", err)
}
snapshot := game.RuntimeSnapshot{
RuntimeStatus: optionalString(message.Values, "runtime_status"),
EngineHealthSummary: optionalString(message.Values, "engine_health_summary"),
}
if turnRaw := optionalString(message.Values, "current_turn"); turnRaw != "" {
parsed, err := strconv.Atoi(turnRaw)
if err != nil {
return gmEvent{}, fmt.Errorf("invalid current_turn: %w", err)
}
if parsed < 0 {
return gmEvent{}, fmt.Errorf("invalid current_turn: must not be negative")
}
snapshot.CurrentTurn = parsed
}
playerStats, err := decodePlayerStats(optionalString(message.Values, "player_turn_stats"))
if err != nil {
return gmEvent{}, fmt.Errorf("invalid player_turn_stats: %w", err)
}
var finishedAt time.Time
if raw := optionalString(message.Values, "finished_at_ms"); raw != "" {
ms, err := strconv.ParseInt(raw, 10, 64)
if err != nil {
return gmEvent{}, fmt.Errorf("invalid finished_at_ms: %w", err)
}
if ms <= 0 {
return gmEvent{}, fmt.Errorf("invalid finished_at_ms: must be positive")
}
finishedAt = time.UnixMilli(ms).UTC()
}
return gmEvent{
Kind: kind,
GameID: gameID,
Snapshot: snapshot,
PlayerStats: playerStats,
FinishedAt: finishedAt,
}, nil
}
// playerStatsLine mirrors the JSON shape of one `player_turn_stats`
// element on the GM stream.
type playerStatsLine struct {
UserID string `json:"user_id"`
Planets int64 `json:"planets"`
Population int64 `json:"population"`
ShipsBuilt int64 `json:"ships_built"`
}
func decodePlayerStats(payload string) ([]ports.PlayerObservedStats, error) {
if strings.TrimSpace(payload) == "" {
return nil, nil
}
var lines []playerStatsLine
if err := json.Unmarshal([]byte(payload), &lines); err != nil {
return nil, err
}
stats := make([]ports.PlayerObservedStats, 0, len(lines))
for _, line := range lines {
entry := ports.PlayerObservedStats{
UserID: strings.TrimSpace(line.UserID),
Planets: line.Planets,
Population: line.Population,
ShipsBuilt: line.ShipsBuilt,
}
if err := entry.Validate(); err != nil {
return nil, err
}
stats = append(stats, entry)
}
return stats, nil
}
func optionalString(values map[string]any, key string) string {
raw, ok := values[key]
if !ok {
return ""
}
switch typed := raw.(type) {
case string:
return typed
case []byte:
return string(typed)
default:
return ""
}
}
@@ -0,0 +1,470 @@
package gmevents_test
import (
"context"
"encoding/json"
"errors"
"io"
"log/slog"
"strconv"
"sync"
"sync/atomic"
"testing"
"time"
"galaxy/lobby/internal/adapters/gamestub"
"galaxy/lobby/internal/adapters/gameturnstatsstub"
"galaxy/lobby/internal/adapters/streamoffsetstub"
"galaxy/lobby/internal/domain/common"
"galaxy/lobby/internal/domain/game"
"galaxy/lobby/internal/ports"
"galaxy/lobby/internal/worker/gmevents"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func silentLogger() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) }
// fakeEvaluator implements gmevents.CapabilityEvaluator and records calls.
type fakeEvaluator struct {
mu sync.Mutex
calls []evaluatorCall
err error
}
type evaluatorCall struct {
GameID common.GameID
FinishedAt time.Time
}
func (e *fakeEvaluator) Evaluate(_ context.Context, gameID common.GameID, finishedAt time.Time) error {
e.mu.Lock()
defer e.mu.Unlock()
e.calls = append(e.calls, evaluatorCall{GameID: gameID, FinishedAt: finishedAt})
return e.err
}
func (e *fakeEvaluator) Calls() []evaluatorCall {
e.mu.Lock()
defer e.mu.Unlock()
return append([]evaluatorCall(nil), e.calls...)
}
func (e *fakeEvaluator) SetError(err error) {
e.mu.Lock()
defer e.mu.Unlock()
e.err = err
}
type harness struct {
games *gamestub.Store
stats *gameturnstatsstub.Store
evaluator *fakeEvaluator
offsets *streamoffsetstub.Store
consumer *gmevents.Consumer
server *miniredis.Miniredis
clientRedis *redis.Client
stream string
at time.Time
gameRecord game.Game
}
func newHarness(t *testing.T) *harness {
t.Helper()
server := miniredis.RunT(t)
clientRedis := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = clientRedis.Close() })
games := gamestub.NewStore()
stats := gameturnstatsstub.NewStore()
evaluator := &fakeEvaluator{}
offsets := streamoffsetstub.NewStore()
at := time.Date(2026, 4, 25, 14, 0, 0, 0, time.UTC)
now := at.Add(-2 * time.Hour)
record, err := game.New(game.NewGameInput{
GameID: common.GameID("game-w"),
GameName: "test worker game",
GameType: game.GameTypePublic,
MinPlayers: 2,
MaxPlayers: 4,
StartGapHours: 2,
StartGapPlayers: 1,
EnrollmentEndsAt: now.Add(2 * time.Hour),
TurnSchedule: "0 */6 * * *",
TargetEngineVersion: "1.0.0",
Now: now,
})
require.NoError(t, err)
record.Status = game.StatusRunning
startedAt := at.Add(-time.Hour)
record.StartedAt = &startedAt
require.NoError(t, games.Save(context.Background(), record))
consumer, err := gmevents.NewConsumer(gmevents.Config{
Client: clientRedis,
Stream: "gm:lobby_events",
BlockTimeout: 100 * time.Millisecond,
Games: games,
Stats: stats,
Capability: evaluator,
OffsetStore: offsets,
Clock: func() time.Time { return at },
Logger: silentLogger(),
})
require.NoError(t, err)
return &harness{
games: games,
stats: stats,
evaluator: evaluator,
offsets: offsets,
consumer: consumer,
server: server,
clientRedis: clientRedis,
stream: "gm:lobby_events",
at: at,
gameRecord: record,
}
}
func snapshotMessage(t *testing.T, h *harness, id string, currentTurn int, lines []ports.PlayerObservedStats) redis.XMessage {
t.Helper()
stats, err := json.Marshal(toJSONLines(lines))
require.NoError(t, err)
return redis.XMessage{
ID: id,
Values: map[string]any{
"kind": "runtime_snapshot_update",
"game_id": h.gameRecord.GameID.String(),
"current_turn": strconv.Itoa(currentTurn),
"runtime_status": "running_accepting_commands",
"engine_health_summary": "ok",
"player_turn_stats": string(stats),
},
}
}
func gameFinishedMessage(t *testing.T, h *harness, id string, finishedAt time.Time, lines []ports.PlayerObservedStats) redis.XMessage {
t.Helper()
stats, err := json.Marshal(toJSONLines(lines))
require.NoError(t, err)
return redis.XMessage{
ID: id,
Values: map[string]any{
"kind": "game_finished",
"game_id": h.gameRecord.GameID.String(),
"current_turn": "42",
"runtime_status": "stopped",
"engine_health_summary": "ok",
"player_turn_stats": string(stats),
"finished_at_ms": strconv.FormatInt(finishedAt.UTC().UnixMilli(), 10),
},
}
}
type statsLine struct {
UserID string `json:"user_id"`
Planets int64 `json:"planets"`
Population int64 `json:"population"`
ShipsBuilt int64 `json:"ships_built"`
}
func toJSONLines(stats []ports.PlayerObservedStats) []statsLine {
out := make([]statsLine, 0, len(stats))
for _, line := range stats {
out = append(out, statsLine{
UserID: line.UserID,
Planets: line.Planets,
Population: line.Population,
ShipsBuilt: line.ShipsBuilt,
})
}
return out
}
func TestNewConsumerRejectsMissingDeps(t *testing.T) {
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = client.Close() })
_, err := gmevents.NewConsumer(gmevents.Config{
Stream: "gm:lobby_events",
BlockTimeout: time.Second,
})
require.Error(t, err)
_, err = gmevents.NewConsumer(gmevents.Config{
Client: client,
BlockTimeout: time.Second,
})
require.Error(t, err)
_, err = gmevents.NewConsumer(gmevents.Config{
Client: client,
Stream: "gm:lobby_events",
BlockTimeout: time.Second,
Games: gamestub.NewStore(),
Stats: gameturnstatsstub.NewStore(),
})
require.Error(t, err, "missing capability evaluator")
}
func TestHandleSnapshotUpdate(t *testing.T) {
h := newHarness(t)
stats := []ports.PlayerObservedStats{
{UserID: "user-a", Planets: 3, Population: 100, ShipsBuilt: 0},
{UserID: "user-b", Planets: 4, Population: 80, ShipsBuilt: 1},
}
ack := h.consumer.HandleMessage(context.Background(), snapshotMessage(t, h, "1700000000000-0", 5, stats))
assert.True(t, ack)
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusRunning, got.Status, "snapshot must not change status")
assert.Equal(t, 5, got.RuntimeSnapshot.CurrentTurn)
assert.Equal(t, "running_accepting_commands", got.RuntimeSnapshot.RuntimeStatus)
assert.Equal(t, "ok", got.RuntimeSnapshot.EngineHealthSummary)
aggregate, err := h.stats.Load(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
require.Len(t, aggregate.Players, 2)
assert.Equal(t, int64(3), aggregate.Players[0].InitialPlanets)
assert.Equal(t, int64(3), aggregate.Players[0].MaxPlanets)
assert.Equal(t, int64(4), aggregate.Players[1].InitialPlanets)
}
func TestSnapshotReplayDoesNotMutateInitialAndKeepsMaxMonotonic(t *testing.T) {
h := newHarness(t)
first := []ports.PlayerObservedStats{
{UserID: "user-a", Planets: 3, Population: 100, ShipsBuilt: 0},
}
second := []ports.PlayerObservedStats{
{UserID: "user-a", Planets: 5, Population: 80, ShipsBuilt: 2},
}
third := []ports.PlayerObservedStats{
{UserID: "user-a", Planets: 1, Population: 1, ShipsBuilt: 1},
}
require.True(t, h.consumer.HandleMessage(context.Background(), snapshotMessage(t, h, "1-0", 1, first)))
require.True(t, h.consumer.HandleMessage(context.Background(), snapshotMessage(t, h, "2-0", 2, second)))
require.True(t, h.consumer.HandleMessage(context.Background(), snapshotMessage(t, h, "3-0", 3, third)))
aggregate, err := h.stats.Load(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
require.Len(t, aggregate.Players, 1)
assert.Equal(t, int64(3), aggregate.Players[0].InitialPlanets)
assert.Equal(t, int64(100), aggregate.Players[0].InitialPopulation)
assert.Equal(t, int64(0), aggregate.Players[0].InitialShipsBuilt)
assert.Equal(t, int64(5), aggregate.Players[0].MaxPlanets)
assert.Equal(t, int64(100), aggregate.Players[0].MaxPopulation)
assert.Equal(t, int64(2), aggregate.Players[0].MaxShipsBuilt)
}
func TestHandleGameFinishedTransitionsAndCallsEvaluator(t *testing.T) {
h := newHarness(t)
finishedAt := h.at.Add(-30 * time.Second)
stats := []ports.PlayerObservedStats{
{UserID: "user-a", Planets: 9, Population: 200, ShipsBuilt: 3},
}
ack := h.consumer.HandleMessage(context.Background(), gameFinishedMessage(t, h, "5-0", finishedAt, stats))
assert.True(t, ack)
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusFinished, got.Status)
require.NotNil(t, got.FinishedAt)
assert.True(t, got.FinishedAt.Equal(finishedAt))
calls := h.evaluator.Calls()
require.Len(t, calls, 1)
assert.Equal(t, h.gameRecord.GameID, calls[0].GameID)
assert.True(t, calls[0].FinishedAt.Equal(finishedAt))
}
func TestHandleGameFinishedFromPaused(t *testing.T) {
h := newHarness(t)
record, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
require.NoError(t, h.games.UpdateStatus(context.Background(), ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: game.StatusRunning,
To: game.StatusPaused,
Trigger: game.TriggerCommand,
At: h.at.Add(-time.Minute),
}))
finishedAt := h.at.Add(-10 * time.Second)
require.True(t, h.consumer.HandleMessage(context.Background(), gameFinishedMessage(t, h, "6-0", finishedAt, nil)))
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusFinished, got.Status)
require.Len(t, h.evaluator.Calls(), 1)
}
func TestHandleGameFinishedReplayCallsEvaluatorOnceOnDuplicate(t *testing.T) {
h := newHarness(t)
finishedAt := h.at.Add(-30 * time.Second)
stats := []ports.PlayerObservedStats{
{UserID: "user-a", Planets: 9, Population: 200, ShipsBuilt: 3},
}
ack := h.consumer.HandleMessage(context.Background(), gameFinishedMessage(t, h, "7-0", finishedAt, stats))
assert.True(t, ack)
// Replay: duplicate event id arrives. Status is already finished; the
// transition is absorbed by the CAS guard but the evaluator is still
// invoked (the evaluator owns its own replay guard via
// EvaluationGuardStore in production wiring).
ack2 := h.consumer.HandleMessage(context.Background(), gameFinishedMessage(t, h, "7-1", finishedAt, stats))
assert.True(t, ack2)
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusFinished, got.Status)
require.NotNil(t, got.FinishedAt)
assert.True(t, got.FinishedAt.Equal(finishedAt), "finished_at preserved across replay")
calls := h.evaluator.Calls()
require.Len(t, calls, 2, "consumer always hands off to evaluator; deduplication is the evaluator's job")
}
func TestHandleGameFinishedIgnoredForCancelledGame(t *testing.T) {
h := newHarness(t)
require.NoError(t, h.games.UpdateStatus(context.Background(), ports.UpdateStatusInput{
GameID: h.gameRecord.GameID,
ExpectedFrom: game.StatusRunning,
To: game.StatusPaused,
Trigger: game.TriggerCommand,
At: h.at.Add(-2 * time.Minute),
}))
require.NoError(t, h.games.UpdateStatus(context.Background(), ports.UpdateStatusInput{
GameID: h.gameRecord.GameID,
ExpectedFrom: game.StatusPaused,
To: game.StatusFinished,
Trigger: game.TriggerRuntimeEvent,
At: h.at.Add(-time.Minute),
}))
finishedAt := h.at.Add(-30 * time.Second)
require.True(t, h.consumer.HandleMessage(context.Background(), gameFinishedMessage(t, h, "8-0", finishedAt, nil)))
calls := h.evaluator.Calls()
require.Len(t, calls, 1, "event still drives evaluator handoff for already-finished games")
}
func TestHandleGameFinishedRetainsOffsetOnEvaluatorError(t *testing.T) {
h := newHarness(t)
h.evaluator.SetError(errors.New("transient redis"))
ack := h.consumer.HandleMessage(context.Background(), gameFinishedMessage(t, h, "9-0", h.at, nil))
assert.False(t, ack, "evaluator error must hold offset")
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusFinished, got.Status, "transition still committed")
}
func TestHandleSnapshotForUnknownGameIsAbsorbed(t *testing.T) {
h := newHarness(t)
msg := snapshotMessage(t, h, "10-0", 5, nil)
msg.Values["game_id"] = "game-does-not-exist"
ack := h.consumer.HandleMessage(context.Background(), msg)
assert.True(t, ack)
}
func TestHandleMalformedEventsAreAbsorbed(t *testing.T) {
h := newHarness(t)
cases := []redis.XMessage{
{ID: "11-0", Values: map[string]any{"kind": "runtime_snapshot_update"}}, // missing game_id
{ID: "11-1", Values: map[string]any{"kind": "runtime_snapshot_update", "game_id": "bogus"}}, // invalid game_id
{ID: "11-2", Values: map[string]any{"kind": "weird", "game_id": h.gameRecord.GameID.String()}}, // unknown kind
{ID: "11-3", Values: map[string]any{"kind": "runtime_snapshot_update", "game_id": h.gameRecord.GameID.String(), "current_turn": "abc"}},
{ID: "11-4", Values: map[string]any{"kind": "runtime_snapshot_update", "game_id": h.gameRecord.GameID.String(), "player_turn_stats": "not json"}},
}
for _, msg := range cases {
assert.True(t, h.consumer.HandleMessage(context.Background(), msg))
}
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusRunning, got.Status, "malformed events leave game untouched")
}
func TestRunResumesFromPersistedOffset(t *testing.T) {
h := newHarness(t)
// Pre-publish two events into the stream; persist an offset such
// that only the second one will be processed by Run.
firstID, err := h.clientRedis.XAdd(context.Background(), &redis.XAddArgs{
Stream: h.stream,
Values: snapshotMessage(t, h, "_", 1, []ports.PlayerObservedStats{{UserID: "user-a", Planets: 1, Population: 1, ShipsBuilt: 0}}).Values,
}).Result()
require.NoError(t, err)
secondID, err := h.clientRedis.XAdd(context.Background(), &redis.XAddArgs{
Stream: h.stream,
Values: snapshotMessage(t, h, "_", 2, []ports.PlayerObservedStats{{UserID: "user-a", Planets: 9, Population: 9, ShipsBuilt: 0}}).Values,
}).Result()
require.NoError(t, err)
h.offsets.Set("gm_lobby_events", firstID)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
done := make(chan error, 1)
go func() { done <- h.consumer.Run(ctx) }()
deadline := time.Now().Add(1500 * time.Millisecond)
for time.Now().Before(deadline) {
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
if got.RuntimeSnapshot.CurrentTurn == 2 {
break
}
time.Sleep(20 * time.Millisecond)
}
cancel()
select {
case <-done:
case <-time.After(2 * time.Second):
t.Fatalf("consumer did not stop")
}
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, 2, got.RuntimeSnapshot.CurrentTurn, "first event was skipped via persisted offset")
aggregate, err := h.stats.Load(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
require.Len(t, aggregate.Players, 1)
assert.Equal(t, int64(9), aggregate.Players[0].InitialPlanets, "initial freezes on the FIRST event the consumer sees, not the historic one")
saved, found, err := h.offsets.Load(context.Background(), "gm_lobby_events")
require.NoError(t, err)
assert.True(t, found)
assert.Equal(t, secondID, saved)
}
func TestRunStopsCleanlyOnContextCancel(t *testing.T) {
h := newHarness(t)
ctx, cancel := context.WithCancel(context.Background())
var startCount int32
go func() {
atomic.AddInt32(&startCount, 1)
_ = h.consumer.Run(ctx)
}()
time.Sleep(50 * time.Millisecond)
cancel()
time.Sleep(150 * time.Millisecond)
assert.Equal(t, int32(1), atomic.LoadInt32(&startCount))
}
@@ -0,0 +1,162 @@
// Package pendingregistration implements the periodic worker that
// releases every Race Name Directory pending_registration whose
// eligible_until has lapsed. The worker delegates to
// ports.RaceNameDirectory.ExpirePendingRegistrations and emits one
// informational log entry per released binding using the structured
// fields frozen in lobby/README.md §Observability. Replay safety is a
// directory invariant: a second pass over the same state returns an
// empty slice and produces no extra side effects.
package pendingregistration
import (
"context"
"errors"
"fmt"
"log/slog"
"time"
"galaxy/lobby/internal/logging"
"galaxy/lobby/internal/ports"
"galaxy/lobby/internal/telemetry"
)
// Dependencies groups the collaborators consumed by Worker.
type Dependencies struct {
// Directory exposes the Race Name Directory expiration entry point.
Directory ports.RaceNameDirectory
// Interval controls the tick cadence. It must be positive.
Interval time.Duration
// Clock supplies the wall-clock used to derive the "now" reference
// passed to ExpirePendingRegistrations. Defaults to time.Now when nil.
Clock func() time.Time
// Logger receives structured worker-level events. Defaults to
// slog.Default when nil.
Logger *slog.Logger
// Telemetry records the
// `lobby.pending_registration.expirations` counter once per
// released entry. Optional; nil disables metric emission.
Telemetry *telemetry.Runtime
}
// Worker drives the periodic pending-registration expiration loop.
type Worker struct {
directory ports.RaceNameDirectory
interval time.Duration
clock func() time.Time
logger *slog.Logger
telemetry *telemetry.Runtime
}
// NewWorker constructs one Worker from deps.
func NewWorker(deps Dependencies) (*Worker, error) {
if deps.Directory == nil {
return nil, errors.New("new pending registration worker: nil race name directory")
}
if deps.Interval <= 0 {
return nil, fmt.Errorf("new pending registration worker: interval must be positive, got %s", deps.Interval)
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
return &Worker{
directory: deps.Directory,
interval: deps.Interval,
clock: clock,
logger: logger.With("worker", "lobby.pendingregistration"),
telemetry: deps.Telemetry,
}, nil
}
// Run drives the periodic ticker until ctx is cancelled. A failure
// inside one tick does not terminate the loop — every tick logs its
// outcome and the worker stays alive so subsequent ticks pick up once
// the underlying issue clears.
func (worker *Worker) Run(ctx context.Context) error {
if worker == nil {
return errors.New("run pending registration worker: nil worker")
}
if ctx == nil {
return errors.New("run pending registration worker: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
worker.logger.Info("pending registration worker started", "interval", worker.interval.String())
defer worker.logger.Info("pending registration worker stopped")
ticker := time.NewTicker(worker.interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
worker.Tick(ctx)
}
}
}
// Shutdown is a no-op: the worker holds no resources beyond its own
// goroutine, which Run releases on context cancellation.
func (worker *Worker) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown pending registration worker: nil context")
}
return nil
}
// Tick performs one expiration pass. It is exported so tests may drive
// the worker deterministically without a real ticker. Tick never
// returns an error; a failed expiration call is logged and absorbed so
// the worker survives transient backend issues.
func (worker *Worker) Tick(ctx context.Context) {
if worker == nil || ctx == nil {
return
}
now := worker.clock().UTC()
expired, err := worker.directory.ExpirePendingRegistrations(ctx, now)
if err != nil {
worker.logger.WarnContext(ctx, "expire pending race name registrations",
"err", err.Error(),
)
return
}
if len(expired) == 0 {
return
}
for _, entry := range expired {
worker.telemetry.RecordPendingRegistrationExpiration(ctx, "tick")
logArgs := []any{
"canonical_key", entry.CanonicalKey,
"race_name", entry.RaceName,
"game_id", entry.GameID,
"user_id", entry.UserID,
"eligible_until_ms", entry.EligibleUntilMs,
"reservation_kind", ports.KindPendingRegistration,
"trigger", "tick",
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
worker.logger.InfoContext(ctx, "released pending race name registration", logArgs...)
}
logArgs := []any{
"released", len(expired),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
worker.logger.InfoContext(ctx, "pending registration tick released entries", logArgs...)
}
@@ -0,0 +1,253 @@
package pendingregistration_test
import (
"context"
"errors"
"io"
"log/slog"
"testing"
"time"
"galaxy/lobby/internal/adapters/racenamestub"
"galaxy/lobby/internal/ports"
"galaxy/lobby/internal/worker/pendingregistration"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const (
gameA = "game-A"
gameB = "game-B"
userA = "user-A"
userB = "user-B"
raceNameA = "PilotNova"
raceNameB = "Vanguard"
)
func silentLogger() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) }
type controlledClock struct{ instant time.Time }
func (clock *controlledClock) now() time.Time { return clock.instant }
func (clock *controlledClock) advance(d time.Duration) { clock.instant = clock.instant.Add(d) }
func newDirectory(t *testing.T, clock *controlledClock) *racenamestub.Directory {
t.Helper()
directory, err := racenamestub.NewDirectory(racenamestub.WithClock(clock.now))
require.NoError(t, err)
return directory
}
func newWorker(
t *testing.T,
directory ports.RaceNameDirectory,
clock func() time.Time,
) *pendingregistration.Worker {
t.Helper()
worker, err := pendingregistration.NewWorker(pendingregistration.Dependencies{
Directory: directory,
Interval: time.Hour,
Clock: clock,
Logger: silentLogger(),
})
require.NoError(t, err)
return worker
}
func reserveAndPend(
t *testing.T,
directory ports.RaceNameDirectory,
gameID, userID, raceName string,
eligibleUntil time.Time,
) {
t.Helper()
ctx := context.Background()
require.NoError(t, directory.Reserve(ctx, gameID, userID, raceName))
require.NoError(t, directory.MarkPendingRegistration(ctx, gameID, userID, raceName, eligibleUntil))
}
func TestNewWorkerRejectsNilDirectory(t *testing.T) {
t.Parallel()
_, err := pendingregistration.NewWorker(pendingregistration.Dependencies{
Interval: time.Hour,
})
require.Error(t, err)
}
func TestNewWorkerRejectsNonPositiveInterval(t *testing.T) {
t.Parallel()
directory, err := racenamestub.NewDirectory()
require.NoError(t, err)
_, err = pendingregistration.NewWorker(pendingregistration.Dependencies{
Directory: directory,
Interval: 0,
})
require.Error(t, err)
}
func TestTickReleasesExpiredEntries(t *testing.T) {
t.Parallel()
clock := &controlledClock{instant: time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)}
directory := newDirectory(t, clock)
eligibleUntil := clock.instant.Add(time.Hour)
reserveAndPend(t, directory, gameA, userA, raceNameA, eligibleUntil)
clock.advance(2 * time.Hour)
worker := newWorker(t, directory, clock.now)
worker.Tick(context.Background())
pending, err := directory.ListPendingRegistrations(context.Background(), userA)
require.NoError(t, err)
assert.Empty(t, pending)
availability, err := directory.Check(context.Background(), raceNameA, userB)
require.NoError(t, err)
assert.False(t, availability.Taken)
}
func TestTickAtBoundaryReleasesEntry(t *testing.T) {
t.Parallel()
clock := &controlledClock{instant: time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)}
directory := newDirectory(t, clock)
eligibleUntil := clock.instant.Add(time.Hour)
reserveAndPend(t, directory, gameA, userA, raceNameA, eligibleUntil)
clock.instant = eligibleUntil
worker := newWorker(t, directory, clock.now)
worker.Tick(context.Background())
pending, err := directory.ListPendingRegistrations(context.Background(), userA)
require.NoError(t, err)
assert.Empty(t, pending)
}
func TestTickKeepsFutureEntries(t *testing.T) {
t.Parallel()
clock := &controlledClock{instant: time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)}
directory := newDirectory(t, clock)
eligibleUntil := clock.instant.Add(time.Hour)
reserveAndPend(t, directory, gameA, userA, raceNameA, eligibleUntil)
worker := newWorker(t, directory, clock.now)
worker.Tick(context.Background())
pending, err := directory.ListPendingRegistrations(context.Background(), userA)
require.NoError(t, err)
require.Len(t, pending, 1)
assert.Equal(t, raceNameA, pending[0].RaceName)
}
func TestTickReleasesMixedAgeEntries(t *testing.T) {
t.Parallel()
clock := &controlledClock{instant: time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)}
directory := newDirectory(t, clock)
expiredUntil := clock.instant.Add(time.Hour)
freshUntil := clock.instant.Add(48 * time.Hour)
reserveAndPend(t, directory, gameA, userA, raceNameA, expiredUntil)
reserveAndPend(t, directory, gameB, userB, raceNameB, freshUntil)
clock.advance(2 * time.Hour)
worker := newWorker(t, directory, clock.now)
worker.Tick(context.Background())
pendingA, err := directory.ListPendingRegistrations(context.Background(), userA)
require.NoError(t, err)
assert.Empty(t, pendingA)
pendingB, err := directory.ListPendingRegistrations(context.Background(), userB)
require.NoError(t, err)
require.Len(t, pendingB, 1)
assert.Equal(t, raceNameB, pendingB[0].RaceName)
}
func TestTickIsIdempotent(t *testing.T) {
t.Parallel()
clock := &controlledClock{instant: time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)}
directory := newDirectory(t, clock)
reserveAndPend(t, directory, gameA, userA, raceNameA, clock.instant.Add(time.Hour))
clock.advance(2 * time.Hour)
worker := newWorker(t, directory, clock.now)
worker.Tick(context.Background())
worker.Tick(context.Background())
pending, err := directory.ListPendingRegistrations(context.Background(), userA)
require.NoError(t, err)
assert.Empty(t, pending)
}
func TestTickReservedEntriesUntouched(t *testing.T) {
t.Parallel()
clock := &controlledClock{instant: time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)}
directory := newDirectory(t, clock)
require.NoError(t, directory.Reserve(context.Background(), gameA, userA, raceNameA))
clock.advance(48 * time.Hour)
worker := newWorker(t, directory, clock.now)
worker.Tick(context.Background())
reservations, err := directory.ListReservations(context.Background(), userA)
require.NoError(t, err)
require.Len(t, reservations, 1)
assert.Equal(t, raceNameA, reservations[0].RaceName)
}
func TestTickAbsorbsDirectoryError(t *testing.T) {
t.Parallel()
directory := failingDirectory{err: errors.New("redis unavailable")}
worker, err := pendingregistration.NewWorker(pendingregistration.Dependencies{
Directory: directory,
Interval: time.Hour,
Clock: func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) },
Logger: silentLogger(),
})
require.NoError(t, err)
worker.Tick(context.Background())
}
func TestRunStopsOnContextCancel(t *testing.T) {
t.Parallel()
clock := &controlledClock{instant: time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)}
directory := newDirectory(t, clock)
worker := newWorker(t, directory, clock.now)
ctx, cancel := context.WithCancel(context.Background())
done := make(chan error, 1)
go func() { done <- worker.Run(ctx) }()
cancel()
select {
case err := <-done:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
t.Fatal("worker did not stop after context cancel")
}
require.NoError(t, worker.Shutdown(context.Background()))
}
// failingDirectory is a stand-in that surfaces a fixed error from
// ExpirePendingRegistrations so the worker's failure path can be
// exercised without spinning up the full Redis adapter.
type failingDirectory struct {
ports.RaceNameDirectory
err error
}
func (directory failingDirectory) ExpirePendingRegistrations(
context.Context,
time.Time,
) ([]ports.ExpiredPending, error) {
return nil, directory.err
}
@@ -0,0 +1,564 @@
// Package runtimejobresult implements the worker that consumes runtime
// job results published by Runtime Manager and drives the second half of
// the game start flow: persisting the runtime binding, calling
// Game Master to register the running game, and transitioning the game
// status to `running`, `paused`, or `start_failed` accordingly.
//
// Replay protection relies on the CAS-based UpdateStatus semantics: a
// duplicate result event finds the game in a non-`starting` status and
// the second pass becomes a no-op without any extra side effects. The
// stream offset advances after each message so the consumer survives
// restarts without re-emitting state changes.
package runtimejobresult
import (
"context"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/lobby/internal/domain/common"
"galaxy/lobby/internal/domain/game"
"galaxy/lobby/internal/logging"
"galaxy/lobby/internal/ports"
"galaxy/lobby/internal/telemetry"
"galaxy/notificationintent"
"github.com/redis/go-redis/v9"
)
// streamOffsetLabel identifies the runtime-job-results consumer in the
// stream offset store. The label stays stable when the underlying
// stream key is renamed via configuration.
const streamOffsetLabel = "runtime_results"
// IntentPublisher publishes notification intents.
type IntentPublisher interface {
Publish(ctx context.Context, intent notificationintent.Intent) (string, error)
}
// Config groups the dependencies used by Consumer.
type Config struct {
// Client provides XREAD access to the runtime job results stream.
Client *redis.Client
// Stream stores the Redis Streams key consumed by the worker.
Stream string
// BlockTimeout bounds the blocking XREAD window.
BlockTimeout time.Duration
// Games persists the post-start game record updates.
Games ports.GameStore
// RuntimeManager publishes stop jobs in the orphan-container path.
RuntimeManager ports.RuntimeManager
// GMClient registers the running game with Game Master after a
// successful binding persistence.
GMClient ports.GMClient
// Intents publishes the lobby.runtime_paused_after_start
// notification when GM is unavailable.
Intents IntentPublisher
// OffsetStore persists the last successfully processed entry id.
OffsetStore ports.StreamOffsetStore
// Clock supplies the wall-clock used for status transition
// timestamps. Defaults to time.Now when nil.
Clock func() time.Time
// Logger receives structured worker-level events. Defaults to
// slog.Default when nil.
Logger *slog.Logger
// Telemetry records the `lobby.start_flow.outcomes` and
// `lobby.game.transitions` counters per processed result. Optional;
// nil disables metric emission.
Telemetry *telemetry.Runtime
}
// Consumer drives the runtime-job-results processing loop.
type Consumer struct {
client *redis.Client
stream string
blockTimeout time.Duration
games ports.GameStore
runtimeManager ports.RuntimeManager
gmClient ports.GMClient
intents IntentPublisher
offsetStore ports.StreamOffsetStore
clock func() time.Time
logger *slog.Logger
telemetry *telemetry.Runtime
}
// NewConsumer constructs one Consumer from cfg.
func NewConsumer(cfg Config) (*Consumer, error) {
switch {
case cfg.Client == nil:
return nil, errors.New("new runtime job result consumer: nil redis client")
case strings.TrimSpace(cfg.Stream) == "":
return nil, errors.New("new runtime job result consumer: stream must not be empty")
case cfg.BlockTimeout <= 0:
return nil, errors.New("new runtime job result consumer: block timeout must be positive")
case cfg.Games == nil:
return nil, errors.New("new runtime job result consumer: nil game store")
case cfg.RuntimeManager == nil:
return nil, errors.New("new runtime job result consumer: nil runtime manager")
case cfg.GMClient == nil:
return nil, errors.New("new runtime job result consumer: nil gm client")
case cfg.Intents == nil:
return nil, errors.New("new runtime job result consumer: nil intent publisher")
case cfg.OffsetStore == nil:
return nil, errors.New("new runtime job result consumer: nil offset store")
}
clock := cfg.Clock
if clock == nil {
clock = time.Now
}
logger := cfg.Logger
if logger == nil {
logger = slog.Default()
}
return &Consumer{
client: cfg.Client,
stream: cfg.Stream,
blockTimeout: cfg.BlockTimeout,
games: cfg.Games,
runtimeManager: cfg.RuntimeManager,
gmClient: cfg.GMClient,
intents: cfg.Intents,
offsetStore: cfg.OffsetStore,
clock: clock,
logger: logger.With("worker", "lobby.runtimejobresult", "stream", cfg.Stream),
telemetry: cfg.Telemetry,
}, nil
}
// Run drives the XREAD loop until ctx is cancelled. Per-message
// outcomes are absorbed by HandleMessage; the loop only exits on
// context cancellation or a fatal Redis error.
func (consumer *Consumer) Run(ctx context.Context) error {
if consumer == nil || consumer.client == nil {
return errors.New("run runtime job result consumer: nil consumer")
}
if ctx == nil {
return errors.New("run runtime job result consumer: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
lastID, found, err := consumer.offsetStore.Load(ctx, streamOffsetLabel)
if err != nil {
return fmt.Errorf("run runtime job result consumer: load offset: %w", err)
}
if !found {
lastID = "0-0"
}
consumer.logger.Info("runtime job result consumer started", "block_timeout", consumer.blockTimeout.String(), "start_entry_id", lastID)
defer consumer.logger.Info("runtime job result consumer stopped")
for {
streams, err := consumer.client.XRead(ctx, &redis.XReadArgs{
Streams: []string{consumer.stream, lastID},
Count: 1,
Block: consumer.blockTimeout,
}).Result()
switch {
case err == nil:
for _, stream := range streams {
for _, message := range stream.Messages {
consumer.HandleMessage(ctx, message)
if err := consumer.offsetStore.Save(ctx, streamOffsetLabel, message.ID); err != nil {
return fmt.Errorf("run runtime job result consumer: save offset: %w", err)
}
lastID = message.ID
}
}
case errors.Is(err, redis.Nil):
continue
case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, redis.ErrClosed)):
return ctx.Err()
case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded), errors.Is(err, redis.ErrClosed):
return fmt.Errorf("run runtime job result consumer: %w", err)
default:
return fmt.Errorf("run runtime job result consumer: %w", err)
}
}
}
// Shutdown is a no-op; the consumer relies on context cancellation.
func (consumer *Consumer) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown runtime job result consumer: nil context")
}
return nil
}
// HandleMessage processes one Redis Stream message. Exported so tests
// can drive the consumer deterministically without spinning up a real
// XREAD loop.
//
// Per-message errors are logged and absorbed: the worker keeps running
// and the offset is allowed to advance. CAS-status conflicts (typical
// for replayed events) are also absorbed.
func (consumer *Consumer) HandleMessage(ctx context.Context, message redis.XMessage) {
if consumer == nil {
return
}
event, err := decodeJobResult(message)
if err != nil {
consumer.logger.WarnContext(ctx, "decode runtime job result",
"stream_entry_id", message.ID,
"err", err.Error(),
)
return
}
switch event.Outcome {
case outcomeFailure:
consumer.handleFailure(ctx, message.ID, event)
case outcomeSuccess:
consumer.handleSuccess(ctx, message.ID, event)
default:
consumer.logger.WarnContext(ctx, "unknown runtime job outcome",
"stream_entry_id", message.ID,
"outcome", event.Outcome,
"game_id", event.GameID.String(),
)
}
}
// handleFailure transitions the game from `starting` to `start_failed`.
// The CAS-status update absorbs replays naturally: if the game is no
// longer in `starting`, the second call returns ErrConflict /
// ErrInvalidTransition and the worker treats it as an already-handled
// duplicate.
func (consumer *Consumer) handleFailure(ctx context.Context, entryID string, event jobResultEvent) {
at := consumer.clock().UTC()
err := consumer.games.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: event.GameID,
ExpectedFrom: game.StatusStarting,
To: game.StatusStartFailed,
Trigger: game.TriggerRuntimeEvent,
At: at,
})
switch {
case err == nil:
consumer.telemetry.RecordGameTransition(ctx,
string(game.StatusStarting),
string(game.StatusStartFailed),
string(game.TriggerRuntimeEvent),
)
consumer.telemetry.RecordStartFlowOutcome(ctx, "start_failed")
logArgs := []any{
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"from_status", string(game.StatusStarting),
"to_status", string(game.StatusStartFailed),
"trigger", string(game.TriggerRuntimeEvent),
"error_code", event.ErrorCode,
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
consumer.logger.InfoContext(ctx, "game start failed", logArgs...)
case errors.Is(err, game.ErrConflict), errors.Is(err, game.ErrInvalidTransition):
consumer.logger.InfoContext(ctx, "ignored runtime failure for game not in starting",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
)
default:
consumer.logger.WarnContext(ctx, "transition game to start_failed",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
}
}
// handleSuccess applies the success-path branches: persist binding,
// call GM, transition status. Any failure branches into the
// orphan-container or paused-after-start paths defined by the README.
//
// Replay protection: the worker re-reads the game record up front. If
// the record is no longer in `starting`, the event is treated as an
// already-handled replay and processing exits without further side
// effects (no binding overwrite, no GM call, no status transition).
func (consumer *Consumer) handleSuccess(ctx context.Context, entryID string, event jobResultEvent) {
at := consumer.clock().UTC()
if err := event.validateSuccess(); err != nil {
consumer.logger.WarnContext(ctx, "invalid runtime job success event",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
return
}
record, err := consumer.games.Get(ctx, event.GameID)
if err != nil {
consumer.logger.WarnContext(ctx, "load game for runtime success",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
return
}
if record.Status != game.StatusStarting {
consumer.logger.InfoContext(ctx, "ignored runtime success for game not in starting",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
"current_status", string(record.Status),
)
return
}
binding := game.RuntimeBinding{
ContainerID: event.ContainerID,
EngineEndpoint: event.EngineEndpoint,
RuntimeJobID: entryID,
BoundAt: at,
}
if err := consumer.games.UpdateRuntimeBinding(ctx, ports.UpdateRuntimeBindingInput{
GameID: event.GameID,
Binding: binding,
At: at,
}); err != nil {
consumer.handleOrphan(ctx, entryID, event, at, err)
return
}
gmErr := consumer.gmClient.RegisterGame(ctx, ports.RegisterGameRequest{
GameID: record.GameID,
ContainerID: binding.ContainerID,
EngineEndpoint: binding.EngineEndpoint,
TargetEngineVersion: record.TargetEngineVersion,
TurnSchedule: record.TurnSchedule,
})
if gmErr != nil {
consumer.handleGMUnavailable(ctx, entryID, record, at, gmErr)
return
}
if err := consumer.games.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: game.StatusStarting,
To: game.StatusRunning,
Trigger: game.TriggerRuntimeEvent,
At: at,
}); err != nil {
switch {
case errors.Is(err, game.ErrConflict), errors.Is(err, game.ErrInvalidTransition):
consumer.logger.InfoContext(ctx, "ignored running transition for game not in starting",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
)
default:
consumer.logger.WarnContext(ctx, "transition game to running",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
"err", err.Error(),
)
}
return
}
consumer.telemetry.RecordGameTransition(ctx,
string(game.StatusStarting),
string(game.StatusRunning),
string(game.TriggerRuntimeEvent),
)
consumer.telemetry.RecordStartFlowOutcome(ctx, "running")
logArgs := []any{
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
"from_status", string(game.StatusStarting),
"to_status", string(game.StatusRunning),
"trigger", string(game.TriggerRuntimeEvent),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
consumer.logger.InfoContext(ctx, "game running after runtime registration", logArgs...)
}
// handleOrphan implements the orphan-container path: the container
// started but Lobby could not persist the binding metadata. We publish
// a stop job to Runtime Manager and transition the game to
// `start_failed`. Stop-job dispatch is attempted before the status
// transition so a process crash between the two leaves the stop-job
// safely re-published on replay (Runtime Manager idempotency is
// required).
func (consumer *Consumer) handleOrphan(ctx context.Context, entryID string, event jobResultEvent, at time.Time, cause error) {
consumer.logger.WarnContext(ctx, "persist runtime binding failed; orphan container path",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", cause.Error(),
)
if err := consumer.runtimeManager.PublishStopJob(ctx, event.GameID.String()); err != nil {
consumer.logger.WarnContext(ctx, "publish stop job for orphan container",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
}
if err := consumer.games.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: event.GameID,
ExpectedFrom: game.StatusStarting,
To: game.StatusStartFailed,
Trigger: game.TriggerRuntimeEvent,
At: at,
}); err != nil && !errors.Is(err, game.ErrConflict) && !errors.Is(err, game.ErrInvalidTransition) {
consumer.logger.WarnContext(ctx, "transition orphan game to start_failed",
"stream_entry_id", entryID,
"game_id", event.GameID.String(),
"err", err.Error(),
)
return
}
consumer.telemetry.RecordGameTransition(ctx,
string(game.StatusStarting),
string(game.StatusStartFailed),
string(game.TriggerRuntimeEvent),
)
consumer.telemetry.RecordStartFlowOutcome(ctx, "start_failed")
}
// handleGMUnavailable implements the paused-after-start path: the
// container is alive but Game Master could not be registered. The game
// is moved to `paused` and an admin notification is published.
func (consumer *Consumer) handleGMUnavailable(ctx context.Context, entryID string, record game.Game, at time.Time, cause error) {
consumer.logger.WarnContext(ctx, "gm registration failed; pause-after-start path",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
"err", cause.Error(),
)
if err := consumer.games.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: game.StatusStarting,
To: game.StatusPaused,
Trigger: game.TriggerRuntimeEvent,
At: at,
}); err != nil {
switch {
case errors.Is(err, game.ErrConflict), errors.Is(err, game.ErrInvalidTransition):
consumer.logger.InfoContext(ctx, "ignored paused transition for game not in starting",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
)
default:
consumer.logger.WarnContext(ctx, "transition game to paused",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
"err", err.Error(),
)
}
return
}
consumer.telemetry.RecordGameTransition(ctx,
string(game.StatusStarting),
string(game.StatusPaused),
string(game.TriggerRuntimeEvent),
)
consumer.telemetry.RecordStartFlowOutcome(ctx, "paused")
intent, err := notificationintent.NewLobbyRuntimePausedAfterStartIntent(
notificationintent.Metadata{
IdempotencyKey: "lobby.runtime_paused_after_start:" + entryID,
OccurredAt: at,
},
notificationintent.LobbyRuntimePausedAfterStartPayload{
GameID: record.GameID.String(),
GameName: record.GameName,
},
)
if err != nil {
consumer.logger.ErrorContext(ctx, "build runtime paused intent",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
"err", err.Error(),
)
return
}
if _, err := consumer.intents.Publish(ctx, intent); err != nil {
consumer.logger.WarnContext(ctx, "publish runtime paused intent",
"stream_entry_id", entryID,
"game_id", record.GameID.String(),
"err", err.Error(),
)
}
}
// outcomeSuccess and outcomeFailure are the two accepted values of the
// runtime job result `outcome` field.
const (
outcomeSuccess = "success"
outcomeFailure = "failure"
)
// jobResultEvent stores the decoded shape of one runtime:job_results
// stream entry.
type jobResultEvent struct {
GameID common.GameID
Outcome string
ContainerID string
EngineEndpoint string
ErrorCode string
ErrorMessage string
}
func (event jobResultEvent) validateSuccess() error {
if strings.TrimSpace(event.ContainerID) == "" {
return errors.New("success event missing container_id")
}
if strings.TrimSpace(event.EngineEndpoint) == "" {
return errors.New("success event missing engine_endpoint")
}
return nil
}
func decodeJobResult(message redis.XMessage) (jobResultEvent, error) {
gameIDRaw := optionalString(message.Values, "game_id")
if strings.TrimSpace(gameIDRaw) == "" {
return jobResultEvent{}, errors.New("missing game_id")
}
gameID := common.GameID(gameIDRaw)
if err := gameID.Validate(); err != nil {
return jobResultEvent{}, fmt.Errorf("invalid game_id: %w", err)
}
outcome := optionalString(message.Values, "outcome")
if outcome != outcomeSuccess && outcome != outcomeFailure {
return jobResultEvent{}, fmt.Errorf("unsupported outcome %q", outcome)
}
return jobResultEvent{
GameID: gameID,
Outcome: outcome,
ContainerID: optionalString(message.Values, "container_id"),
EngineEndpoint: optionalString(message.Values, "engine_endpoint"),
ErrorCode: optionalString(message.Values, "error_code"),
ErrorMessage: optionalString(message.Values, "error_message"),
}, nil
}
func optionalString(values map[string]any, key string) string {
raw, ok := values[key]
if !ok {
return ""
}
switch typed := raw.(type) {
case string:
return typed
case []byte:
return string(typed)
default:
return ""
}
}
@@ -0,0 +1,372 @@
package runtimejobresult_test
import (
"context"
"errors"
"io"
"log/slog"
"testing"
"time"
"galaxy/lobby/internal/adapters/gamestub"
"galaxy/lobby/internal/adapters/gmclientstub"
"galaxy/lobby/internal/adapters/intentpubstub"
"galaxy/lobby/internal/adapters/runtimemanagerstub"
"galaxy/lobby/internal/adapters/streamoffsetstub"
"galaxy/lobby/internal/domain/common"
"galaxy/lobby/internal/domain/game"
"galaxy/lobby/internal/ports"
"galaxy/lobby/internal/worker/runtimejobresult"
"galaxy/notificationintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func silentLogger() *slog.Logger {
return slog.New(slog.NewTextHandler(io.Discard, nil))
}
type harness struct {
games *gamestub.Store
runtime *runtimemanagerstub.Publisher
gm *gmclientstub.Client
intents *intentpubstub.Publisher
offsets *streamoffsetstub.Store
consumer *runtimejobresult.Consumer
server *miniredis.Miniredis
clientRedis *redis.Client
stream string
at time.Time
gameRecord game.Game
}
func newHarness(t *testing.T) *harness {
t.Helper()
server := miniredis.RunT(t)
clientRedis := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = clientRedis.Close() })
games := gamestub.NewStore()
runtime := runtimemanagerstub.NewPublisher()
gm := gmclientstub.NewClient()
intents := intentpubstub.NewPublisher()
offsets := streamoffsetstub.NewStore()
at := time.Date(2026, 4, 25, 13, 0, 0, 0, time.UTC)
h := &harness{
games: games,
runtime: runtime,
gm: gm,
intents: intents,
offsets: offsets,
server: server,
clientRedis: clientRedis,
stream: "runtime:job_results",
at: at,
}
now := at.Add(-time.Hour)
record, err := game.New(game.NewGameInput{
GameID: common.GameID("game-w"),
GameName: "test worker game",
GameType: game.GameTypePublic,
MinPlayers: 4,
MaxPlayers: 8,
StartGapHours: 12,
StartGapPlayers: 2,
EnrollmentEndsAt: now.Add(24 * time.Hour),
TurnSchedule: "0 18 * * *",
TargetEngineVersion: "v1.0.0",
Now: now,
})
require.NoError(t, err)
record.Status = game.StatusStarting
require.NoError(t, games.Save(context.Background(), record))
h.gameRecord = record
consumer, err := runtimejobresult.NewConsumer(runtimejobresult.Config{
Client: clientRedis,
Stream: h.stream,
BlockTimeout: 100 * time.Millisecond,
Games: games,
RuntimeManager: runtime,
GMClient: gm,
Intents: intents,
OffsetStore: offsets,
Clock: func() time.Time { return at },
Logger: silentLogger(),
})
require.NoError(t, err)
h.consumer = consumer
return h
}
func successMessage(t *testing.T, h *harness, id string) redis.XMessage {
t.Helper()
return redis.XMessage{
ID: id,
Values: map[string]any{
"game_id": h.gameRecord.GameID.String(),
"outcome": "success",
"container_id": "container-1",
"engine_endpoint": "engine.local:9000",
"completed_at_ms": "1745581200000",
},
}
}
func failureMessage(t *testing.T, h *harness, id string) redis.XMessage {
t.Helper()
return redis.XMessage{
ID: id,
Values: map[string]any{
"game_id": h.gameRecord.GameID.String(),
"outcome": "failure",
"error_code": "image_pull_failed",
"error_message": "registry unreachable",
"completed_at_ms": "1745581200000",
},
}
}
func TestNewConsumerRejectsMissingDeps(t *testing.T) {
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = client.Close() })
_, err := runtimejobresult.NewConsumer(runtimejobresult.Config{
Stream: "runtime:job_results",
BlockTimeout: time.Second,
})
require.Error(t, err)
_, err = runtimejobresult.NewConsumer(runtimejobresult.Config{
Client: client,
BlockTimeout: time.Second,
})
require.Error(t, err)
}
func TestHandleSuccessTransitionsToRunning(t *testing.T) {
h := newHarness(t)
h.consumer.HandleMessage(context.Background(), successMessage(t, h, "1700000000000-0"))
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusRunning, got.Status)
require.NotNil(t, got.RuntimeBinding)
assert.Equal(t, "container-1", got.RuntimeBinding.ContainerID)
assert.Equal(t, "engine.local:9000", got.RuntimeBinding.EngineEndpoint)
assert.Equal(t, "1700000000000-0", got.RuntimeBinding.RuntimeJobID)
require.NotNil(t, got.StartedAt)
assert.True(t, got.StartedAt.Equal(h.at))
require.Len(t, h.gm.Requests(), 1)
req := h.gm.Requests()[0]
assert.Equal(t, h.gameRecord.GameID, req.GameID)
assert.Equal(t, "container-1", req.ContainerID)
assert.Equal(t, "engine.local:9000", req.EngineEndpoint)
assert.Equal(t, h.gameRecord.TargetEngineVersion, req.TargetEngineVersion)
assert.Equal(t, h.gameRecord.TurnSchedule, req.TurnSchedule)
assert.Empty(t, h.runtime.StopJobs())
assert.Empty(t, h.intents.Published())
}
func TestHandleSuccessGMUnavailableMovesToPausedAndPublishesIntent(t *testing.T) {
h := newHarness(t)
h.gm.SetError(ports.ErrGMUnavailable)
h.consumer.HandleMessage(context.Background(), successMessage(t, h, "1700000000001-0"))
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusPaused, got.Status)
require.NotNil(t, got.RuntimeBinding, "binding still persisted before paused")
published := h.intents.Published()
require.Len(t, published, 1)
assert.Equal(t, notificationintent.NotificationTypeLobbyRuntimePausedAfterStart, published[0].NotificationType)
assert.Empty(t, h.runtime.StopJobs())
}
func TestHandleFailureTransitionsToStartFailed(t *testing.T) {
h := newHarness(t)
h.consumer.HandleMessage(context.Background(), failureMessage(t, h, "1700000000002-0"))
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusStartFailed, got.Status)
assert.Nil(t, got.RuntimeBinding)
assert.Empty(t, h.runtime.StopJobs())
assert.Empty(t, h.gm.Requests())
assert.Empty(t, h.intents.Published())
}
func TestHandleSuccessOrphanContainerWhenBindingFails(t *testing.T) {
h := newHarness(t)
// Force binding update to fail by removing the game record from
// the store before the message lands.
require.NoError(t, h.games.Save(context.Background(), h.gameRecord))
failingGames := &fakeBindingFailer{Store: h.games, err: errors.New("redis tx failed")}
consumer, err := runtimejobresult.NewConsumer(runtimejobresult.Config{
Client: h.clientRedis,
Stream: h.stream,
BlockTimeout: 100 * time.Millisecond,
Games: failingGames,
RuntimeManager: h.runtime,
GMClient: h.gm,
Intents: h.intents,
OffsetStore: h.offsets,
Clock: func() time.Time { return h.at },
Logger: silentLogger(),
})
require.NoError(t, err)
consumer.HandleMessage(context.Background(), successMessage(t, h, "1700000000003-0"))
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusStartFailed, got.Status,
"orphan path must move game to start_failed")
assert.Nil(t, got.RuntimeBinding, "binding never persisted")
assert.Equal(t, []string{h.gameRecord.GameID.String()}, h.runtime.StopJobs())
assert.Empty(t, h.gm.Requests())
assert.Empty(t, h.intents.Published())
}
func TestHandleSuccessReplayIsNoOp(t *testing.T) {
h := newHarness(t)
h.consumer.HandleMessage(context.Background(), successMessage(t, h, "1700000000004-0"))
require.Len(t, h.gm.Requests(), 1)
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
originalUpdatedAt := got.UpdatedAt
// Replay the same event: status is already running, so the early
// status check exits before any side-effect call (no binding
// overwrite, no GM call, no transition).
h.gm.SetError(errors.New("must not be called again"))
h.consumer.HandleMessage(context.Background(), successMessage(t, h, "1700000000004-0"))
require.Len(t, h.gm.Requests(), 1, "GM register-game is invoked once across replays")
got, err = h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusRunning, got.Status)
assert.True(t, got.UpdatedAt.Equal(originalUpdatedAt), "no further mutations on replay")
assert.Empty(t, h.intents.Published())
}
func TestHandleFailureReplayIsNoOp(t *testing.T) {
h := newHarness(t)
h.consumer.HandleMessage(context.Background(), failureMessage(t, h, "1700000000005-0"))
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusStartFailed, got.Status)
originalUpdatedAt := got.UpdatedAt
h.consumer.HandleMessage(context.Background(), failureMessage(t, h, "1700000000005-0"))
got, err = h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusStartFailed, got.Status)
assert.True(t, got.UpdatedAt.Equal(originalUpdatedAt), "no further mutations on replay")
}
func TestHandleMalformedEvents(t *testing.T) {
h := newHarness(t)
cases := []redis.XMessage{
{ID: "1-0", Values: map[string]any{"outcome": "success"}}, // missing game_id
{ID: "1-1", Values: map[string]any{"game_id": "bogus", "outcome": "success"}}, // invalid game_id format
{ID: "1-2", Values: map[string]any{"game_id": h.gameRecord.GameID.String(), "outcome": "weird"}}, // bad outcome
{ID: "1-3", Values: map[string]any{"game_id": h.gameRecord.GameID.String(), "outcome": "success"}}, // missing container_id
{ID: "1-4", Values: map[string]any{"game_id": h.gameRecord.GameID.String(), "outcome": "success", "container_id": "c"}}, // missing engine_endpoint
}
for _, msg := range cases {
h.consumer.HandleMessage(context.Background(), msg)
}
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusStarting, got.Status, "malformed events leave game untouched")
assert.Empty(t, h.runtime.StopJobs())
assert.Empty(t, h.gm.Requests())
}
// fakeBindingFailer wraps gamestub.Store and forces UpdateRuntimeBinding
// to fail; everything else delegates to the embedded store.
type fakeBindingFailer struct {
*gamestub.Store
err error
}
func (f *fakeBindingFailer) UpdateRuntimeBinding(_ context.Context, _ ports.UpdateRuntimeBindingInput) error {
return f.err
}
var _ ports.GameStore = (*fakeBindingFailer)(nil)
func TestRunDrainsStreamUntilCancelled(t *testing.T) {
h := newHarness(t)
// Pre-publish a success message into the real miniredis stream
// before Run starts.
_, err := h.clientRedis.XAdd(context.Background(), &redis.XAddArgs{
Stream: h.stream,
Values: map[string]any{
"game_id": h.gameRecord.GameID.String(),
"outcome": "success",
"container_id": "container-2",
"engine_endpoint": "engine.local:9001",
"completed_at_ms": "1745581200000",
},
}).Result()
require.NoError(t, err)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
done := make(chan error, 1)
go func() {
done <- h.consumer.Run(ctx)
}()
// Poll for the running transition; once observed cancel context.
deadline := time.Now().Add(1500 * time.Millisecond)
for time.Now().Before(deadline) {
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
if got.Status == game.StatusRunning {
break
}
time.Sleep(20 * time.Millisecond)
}
cancel()
select {
case <-done:
case <-time.After(2 * time.Second):
t.Fatalf("consumer did not stop")
}
got, err := h.games.Get(context.Background(), h.gameRecord.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusRunning, got.Status)
require.NotNil(t, got.RuntimeBinding)
assert.Equal(t, "container-2", got.RuntimeBinding.ContainerID)
// Offset must have been persisted at least once.
id, found, err := h.offsets.Load(context.Background(), "runtime_results")
require.NoError(t, err)
assert.True(t, found)
assert.NotEmpty(t, id)
}
@@ -0,0 +1,478 @@
// Package userlifecycle implements the cascade worker that reacts to
// `user.lifecycle.permanent_blocked` and `user.lifecycle.deleted` events
// from the User Service stream. The worker registers itself as a handler
// on a ports.UserLifecycleConsumer (typically the Redis adapter) and
// settles every Lobby artefact tied to the affected user:
//
// 1. Race Name Directory: release every registered, reservation, and
// pending_registration binding via RND.ReleaseAllByUser.
// 2. Memberships: every active membership transitions to `blocked`. For
// each affected private game, publish a `lobby.membership.blocked`
// intent to the owner.
// 3. Applications: every `submitted` application transitions to
// `rejected`.
// 4. Invites: every `created` invite where the user is invitee or
// inviter transitions to `revoked`.
// 5. Owned games: every owner-side game (status != cancelled/finished)
// transitions to `cancelled` via the `external_block` trigger. For
// in-flight games (`starting`, `running`, `paused`), publish a
// stop-job to Runtime Manager before the status transition.
//
// All store mutations are CAS-protected; replays detect the post-state
// via *.ErrConflict / *.ErrInvalidTransition and short-circuit without
// raising errors. Any non-conflict error returns to the consumer so the
// stream offset is held and the next iteration retries.
package userlifecycle
import (
"context"
"errors"
"fmt"
"log/slog"
"time"
"galaxy/lobby/internal/domain/application"
"galaxy/lobby/internal/domain/common"
"galaxy/lobby/internal/domain/game"
"galaxy/lobby/internal/domain/invite"
"galaxy/lobby/internal/domain/membership"
"galaxy/lobby/internal/logging"
"galaxy/lobby/internal/ports"
"galaxy/lobby/internal/telemetry"
"galaxy/notificationintent"
)
// inflightGameStatuses lists the statuses for which a stop-job to
// Runtime Manager must be published before the cascade transitions the
// game to `cancelled`.
var inflightGameStatuses = map[game.Status]struct{}{
game.StatusStarting: {},
game.StatusRunning: {},
game.StatusPaused: {},
}
// Dependencies groups the collaborators consumed by Worker.
type Dependencies struct {
// Directory exposes the Race Name Directory cascade entry point.
Directory ports.RaceNameDirectory
// Memberships persists the active → blocked transition for every
// membership held by the affected user.
Memberships ports.MembershipStore
// Applications persists the submitted → rejected transition for every
// application authored by the affected user.
Applications ports.ApplicationStore
// Invites persists the created → revoked transition for every invite
// the affected user is invitee or inviter on.
Invites ports.InviteStore
// Games owns the cascade-cancel transition for games owned by the
// affected user.
Games ports.GameStore
// RuntimeManager publishes stop-jobs for in-flight cancelled games.
RuntimeManager ports.RuntimeManager
// Intents publishes `lobby.membership.blocked` notifications to
// private-game owners whose roster lost the affected member.
Intents ports.IntentPublisher
// Clock supplies the wall-clock used for status transition
// timestamps. Defaults to time.Now when nil.
Clock func() time.Time
// Logger receives structured worker-level events. Defaults to
// slog.Default when nil.
Logger *slog.Logger
// Telemetry records the
// `lobby.user_lifecycle.cascade_releases`,
// `lobby.membership.changes`, and `lobby.game.transitions`
// counters per processed event. Optional; nil disables metric
// emission.
Telemetry *telemetry.Runtime
}
// Worker executes the cascade triggered by one user-lifecycle event.
type Worker struct {
directory ports.RaceNameDirectory
memberships ports.MembershipStore
applications ports.ApplicationStore
invites ports.InviteStore
games ports.GameStore
runtimeManager ports.RuntimeManager
intents ports.IntentPublisher
clock func() time.Time
logger *slog.Logger
telemetry *telemetry.Runtime
}
// NewWorker constructs one Worker from deps.
func NewWorker(deps Dependencies) (*Worker, error) {
switch {
case deps.Directory == nil:
return nil, errors.New("new user lifecycle worker: nil race name directory")
case deps.Memberships == nil:
return nil, errors.New("new user lifecycle worker: nil membership store")
case deps.Applications == nil:
return nil, errors.New("new user lifecycle worker: nil application store")
case deps.Invites == nil:
return nil, errors.New("new user lifecycle worker: nil invite store")
case deps.Games == nil:
return nil, errors.New("new user lifecycle worker: nil game store")
case deps.RuntimeManager == nil:
return nil, errors.New("new user lifecycle worker: nil runtime manager")
case deps.Intents == nil:
return nil, errors.New("new user lifecycle worker: nil intent publisher")
}
clock := deps.Clock
if clock == nil {
clock = time.Now
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
return &Worker{
directory: deps.Directory,
memberships: deps.Memberships,
applications: deps.Applications,
invites: deps.Invites,
games: deps.Games,
runtimeManager: deps.RuntimeManager,
intents: deps.Intents,
clock: clock,
logger: logger.With("worker", "lobby.userlifecycle"),
telemetry: deps.Telemetry,
}, nil
}
// Handle processes one decoded lifecycle event and runs the full
// cascade. The function returns nil when every per-entity step either
// succeeded or was absorbed as an idempotent replay; non-conflict errors
// abort processing and bubble up so the consumer can retry the entry.
func (worker *Worker) Handle(ctx context.Context, event ports.UserLifecycleEvent) error {
if worker == nil {
return errors.New("user lifecycle handle: nil worker")
}
if ctx == nil {
return errors.New("user lifecycle handle: nil context")
}
if err := event.Validate(); err != nil {
// Decode-level guard so an obviously malformed event is rejected
// at the boundary rather than wandering through the cascade.
worker.logger.WarnContext(ctx, "drop invalid user lifecycle event",
"stream_entry_id", event.EntryID,
"err", err.Error(),
)
return nil
}
reason := reasonForEvent(event.EventType)
now := worker.clock().UTC()
startArgs := []any{
"stream_entry_id", event.EntryID,
"lifecycle_event", string(event.EventType),
"user_id", event.UserID,
}
startArgs = append(startArgs, logging.ContextAttrs(ctx)...)
worker.logger.InfoContext(ctx, "user lifecycle cascade starting", startArgs...)
worker.telemetry.RecordUserLifecycleCascadeRelease(ctx, string(event.EventType))
if err := worker.directory.ReleaseAllByUser(ctx, event.UserID); err != nil {
return fmt.Errorf("user lifecycle handle: release race names: %w", err)
}
memberCount, err := worker.cascadeMemberships(ctx, event, reason, now)
if err != nil {
return err
}
applicationCount, err := worker.cascadeApplications(ctx, event.UserID, now)
if err != nil {
return err
}
inviteCount, err := worker.cascadeInvites(ctx, event.UserID, now)
if err != nil {
return err
}
gameCount, err := worker.cascadeOwnedGames(ctx, event.UserID, now)
if err != nil {
return err
}
completedArgs := []any{
"stream_entry_id", event.EntryID,
"lifecycle_event", string(event.EventType),
"user_id", event.UserID,
"memberships_blocked", memberCount,
"applications_rejected", applicationCount,
"invites_revoked", inviteCount,
"games_cancelled", gameCount,
}
completedArgs = append(completedArgs, logging.ContextAttrs(ctx)...)
worker.logger.InfoContext(ctx, "user lifecycle cascade completed", completedArgs...)
return nil
}
func (worker *Worker) cascadeMemberships(
ctx context.Context,
event ports.UserLifecycleEvent,
reason string,
now time.Time,
) (int, error) {
records, err := worker.memberships.GetByUser(ctx, event.UserID)
if err != nil {
return 0, fmt.Errorf("user lifecycle handle: load memberships: %w", err)
}
blocked := 0
for _, record := range records {
if record.Status != membership.StatusActive {
continue
}
updateErr := worker.memberships.UpdateStatus(ctx, ports.UpdateMembershipStatusInput{
MembershipID: record.MembershipID,
ExpectedFrom: membership.StatusActive,
To: membership.StatusBlocked,
At: now,
})
switch {
case updateErr == nil:
blocked++
worker.telemetry.RecordMembershipChange(ctx, "external_block")
worker.publishMembershipBlocked(ctx, event, record, reason, now)
case errors.Is(updateErr, membership.ErrConflict),
errors.Is(updateErr, membership.ErrInvalidTransition),
errors.Is(updateErr, membership.ErrNotFound):
worker.logger.InfoContext(ctx, "membership cascade absorbed",
"membership_id", record.MembershipID.String(),
"user_id", record.UserID,
"err", updateErr.Error(),
)
default:
return blocked, fmt.Errorf("user lifecycle handle: block membership %s: %w",
record.MembershipID, updateErr)
}
}
return blocked, nil
}
func (worker *Worker) publishMembershipBlocked(
ctx context.Context,
event ports.UserLifecycleEvent,
record membership.Membership,
reason string,
now time.Time,
) {
gameRecord, err := worker.games.Get(ctx, record.GameID)
if err != nil {
worker.logger.WarnContext(ctx, "load game for membership.blocked intent",
"membership_id", record.MembershipID.String(),
"game_id", record.GameID.String(),
"err", err.Error(),
)
return
}
// Intent target is the private-game owner. Public games and self-owned
// memberships do not produce a notification.
if gameRecord.GameType != game.GameTypePrivate {
return
}
if gameRecord.OwnerUserID == "" || gameRecord.OwnerUserID == record.UserID {
return
}
intent, err := notificationintent.NewLobbyMembershipBlockedIntent(
notificationintent.Metadata{
IdempotencyKey: "lobby.membership.blocked:" + record.MembershipID.String() + ":" + event.EntryID,
OccurredAt: now,
TraceID: event.TraceID,
},
gameRecord.OwnerUserID,
notificationintent.LobbyMembershipBlockedPayload{
GameID: gameRecord.GameID.String(),
GameName: gameRecord.GameName,
MembershipUserID: record.UserID,
MembershipUserName: record.RaceName,
Reason: reason,
},
)
if err != nil {
worker.logger.WarnContext(ctx, "build membership.blocked intent",
"membership_id", record.MembershipID.String(),
"err", err.Error(),
)
return
}
if _, err := worker.intents.Publish(ctx, intent); err != nil {
worker.logger.WarnContext(ctx, "publish membership.blocked intent",
"membership_id", record.MembershipID.String(),
"owner_user_id", gameRecord.OwnerUserID,
"err", err.Error(),
)
}
}
func (worker *Worker) cascadeApplications(
ctx context.Context,
userID string,
now time.Time,
) (int, error) {
records, err := worker.applications.GetByUser(ctx, userID)
if err != nil {
return 0, fmt.Errorf("user lifecycle handle: load applications: %w", err)
}
rejected := 0
for _, record := range records {
if record.Status != application.StatusSubmitted {
continue
}
updateErr := worker.applications.UpdateStatus(ctx, ports.UpdateApplicationStatusInput{
ApplicationID: record.ApplicationID,
ExpectedFrom: application.StatusSubmitted,
To: application.StatusRejected,
At: now,
})
switch {
case updateErr == nil:
rejected++
case errors.Is(updateErr, application.ErrConflict),
errors.Is(updateErr, application.ErrInvalidTransition),
errors.Is(updateErr, application.ErrNotFound):
worker.logger.InfoContext(ctx, "application cascade absorbed",
"application_id", record.ApplicationID.String(),
"err", updateErr.Error(),
)
default:
return rejected, fmt.Errorf("user lifecycle handle: reject application %s: %w",
record.ApplicationID, updateErr)
}
}
return rejected, nil
}
func (worker *Worker) cascadeInvites(
ctx context.Context,
userID string,
now time.Time,
) (int, error) {
addressed, err := worker.invites.GetByUser(ctx, userID)
if err != nil {
return 0, fmt.Errorf("user lifecycle handle: load invitee invites: %w", err)
}
owned, err := worker.invites.GetByInviter(ctx, userID)
if err != nil {
return 0, fmt.Errorf("user lifecycle handle: load inviter invites: %w", err)
}
visited := make(map[common.InviteID]struct{}, len(addressed)+len(owned))
revoked := 0
for _, record := range append(append([]invite.Invite(nil), addressed...), owned...) {
if _, seen := visited[record.InviteID]; seen {
continue
}
visited[record.InviteID] = struct{}{}
if record.Status != invite.StatusCreated {
continue
}
updateErr := worker.invites.UpdateStatus(ctx, ports.UpdateInviteStatusInput{
InviteID: record.InviteID,
ExpectedFrom: invite.StatusCreated,
To: invite.StatusRevoked,
At: now,
})
switch {
case updateErr == nil:
revoked++
case errors.Is(updateErr, invite.ErrConflict),
errors.Is(updateErr, invite.ErrInvalidTransition),
errors.Is(updateErr, invite.ErrNotFound):
worker.logger.InfoContext(ctx, "invite cascade absorbed",
"invite_id", record.InviteID.String(),
"err", updateErr.Error(),
)
default:
return revoked, fmt.Errorf("user lifecycle handle: revoke invite %s: %w",
record.InviteID, updateErr)
}
}
return revoked, nil
}
func (worker *Worker) cascadeOwnedGames(
ctx context.Context,
userID string,
now time.Time,
) (int, error) {
records, err := worker.games.GetByOwner(ctx, userID)
if err != nil {
return 0, fmt.Errorf("user lifecycle handle: load owned games: %w", err)
}
cancelled := 0
for _, record := range records {
if record.Status.IsTerminal() {
continue
}
if _, inflight := inflightGameStatuses[record.Status]; inflight {
if err := worker.runtimeManager.PublishStopJob(ctx, record.GameID.String()); err != nil {
return cancelled, fmt.Errorf("user lifecycle handle: publish stop job for %s: %w",
record.GameID, err)
}
}
updateErr := worker.games.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: record.Status,
To: game.StatusCancelled,
Trigger: game.TriggerExternalBlock,
At: now,
})
switch {
case updateErr == nil:
cancelled++
worker.telemetry.RecordGameTransition(ctx,
string(record.Status),
string(game.StatusCancelled),
string(game.TriggerExternalBlock),
)
case errors.Is(updateErr, game.ErrConflict),
errors.Is(updateErr, game.ErrInvalidTransition),
errors.Is(updateErr, game.ErrNotFound):
worker.logger.InfoContext(ctx, "game cascade absorbed",
"game_id", record.GameID.String(),
"current_status", string(record.Status),
"err", updateErr.Error(),
)
default:
return cancelled, fmt.Errorf("user lifecycle handle: cancel game %s: %w",
record.GameID, updateErr)
}
}
return cancelled, nil
}
func reasonForEvent(eventType ports.UserLifecycleEventType) string {
switch eventType {
case ports.UserLifecycleEventTypePermanentBlocked:
return "permanent_blocked"
case ports.UserLifecycleEventTypeDeleted:
return "deleted"
default:
return string(eventType)
}
}
@@ -0,0 +1,416 @@
package userlifecycle_test
import (
"context"
"errors"
"io"
"log/slog"
"strings"
"testing"
"time"
"galaxy/lobby/internal/adapters/applicationstub"
"galaxy/lobby/internal/adapters/gamestub"
"galaxy/lobby/internal/adapters/intentpubstub"
"galaxy/lobby/internal/adapters/invitestub"
"galaxy/lobby/internal/adapters/membershipstub"
"galaxy/lobby/internal/adapters/racenamestub"
"galaxy/lobby/internal/adapters/runtimemanagerstub"
"galaxy/lobby/internal/domain/application"
"galaxy/lobby/internal/domain/common"
"galaxy/lobby/internal/domain/game"
"galaxy/lobby/internal/domain/invite"
"galaxy/lobby/internal/domain/membership"
"galaxy/lobby/internal/ports"
"galaxy/lobby/internal/worker/userlifecycle"
"galaxy/notificationintent"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func silentLogger() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) }
type fixture struct {
directory *racenamestub.Directory
memberships *membershipstub.Store
applications *applicationstub.Store
invites *invitestub.Store
games *gamestub.Store
runtimeManager *runtimemanagerstub.Publisher
intents *intentpubstub.Publisher
worker *userlifecycle.Worker
now time.Time
}
func newFixture(t *testing.T) *fixture {
t.Helper()
directory, err := racenamestub.NewDirectory()
require.NoError(t, err)
now := time.Date(2026, 4, 25, 12, 0, 0, 0, time.UTC)
f := &fixture{
directory: directory,
memberships: membershipstub.NewStore(),
applications: applicationstub.NewStore(),
invites: invitestub.NewStore(),
games: gamestub.NewStore(),
runtimeManager: runtimemanagerstub.NewPublisher(),
intents: intentpubstub.NewPublisher(),
now: now,
}
worker, err := userlifecycle.NewWorker(userlifecycle.Dependencies{
Directory: directory,
Memberships: f.memberships,
Applications: f.applications,
Invites: f.invites,
Games: f.games,
RuntimeManager: f.runtimeManager,
Intents: f.intents,
Clock: func() time.Time { return now },
Logger: silentLogger(),
})
require.NoError(t, err)
f.worker = worker
return f
}
func (f *fixture) seedGame(
t *testing.T,
id common.GameID,
gameType game.GameType,
ownerUserID string,
status game.Status,
) game.Game {
t.Helper()
createdAt := f.now.Add(-2 * time.Hour)
record, err := game.New(game.NewGameInput{
GameID: id,
GameName: "cascade " + id.String(),
GameType: gameType,
OwnerUserID: ownerUserID,
MinPlayers: 2,
MaxPlayers: 4,
StartGapHours: 1,
StartGapPlayers: 1,
EnrollmentEndsAt: createdAt.Add(24 * time.Hour),
TurnSchedule: "0 18 * * *",
TargetEngineVersion: "v1.0.0",
Now: createdAt,
})
require.NoError(t, err)
if status != game.StatusDraft {
record.Status = status
record.UpdatedAt = f.now
switch status {
case game.StatusRunning, game.StatusPaused:
startedAt := f.now.Add(-time.Hour)
record.StartedAt = &startedAt
}
}
require.NoError(t, f.games.Save(context.Background(), record))
return record
}
func (f *fixture) seedMembership(
t *testing.T,
gameID common.GameID,
id common.MembershipID,
userID, raceName string,
) membership.Membership {
t.Helper()
record, err := membership.New(membership.NewMembershipInput{
MembershipID: id,
GameID: gameID,
UserID: userID,
RaceName: raceName,
CanonicalKey: strings.ToLower(strings.ReplaceAll(raceName, " ", "")),
Now: f.now,
})
require.NoError(t, err)
require.NoError(t, f.memberships.Save(context.Background(), record))
require.NoError(t, f.directory.Reserve(context.Background(), gameID.String(), userID, raceName))
return record
}
func (f *fixture) seedApplication(
t *testing.T,
gameID common.GameID,
id common.ApplicationID,
userID, raceName string,
) application.Application {
t.Helper()
record, err := application.New(application.NewApplicationInput{
ApplicationID: id,
GameID: gameID,
ApplicantUserID: userID,
RaceName: raceName,
Now: f.now,
})
require.NoError(t, err)
require.NoError(t, f.applications.Save(context.Background(), record))
return record
}
func (f *fixture) seedInvite(
t *testing.T,
gameID common.GameID,
id common.InviteID,
inviterUserID, inviteeUserID string,
) invite.Invite {
t.Helper()
record, err := invite.New(invite.NewInviteInput{
InviteID: id,
GameID: gameID,
InviterUserID: inviterUserID,
InviteeUserID: inviteeUserID,
Now: f.now,
ExpiresAt: f.now.Add(48 * time.Hour),
})
require.NoError(t, err)
require.NoError(t, f.invites.Save(context.Background(), record))
return record
}
func (f *fixture) reserveRegistered(t *testing.T, gameID, userID, raceName string, registered bool) {
t.Helper()
require.NoError(t, f.directory.Reserve(context.Background(), gameID, userID, raceName))
if registered {
require.NoError(t, f.directory.MarkPendingRegistration(
context.Background(), gameID, userID, raceName, f.now.Add(30*24*time.Hour)))
require.NoError(t, f.directory.Register(context.Background(), gameID, userID, raceName))
}
}
func TestNewWorkerRejectsMissingDeps(t *testing.T) {
t.Parallel()
_, err := userlifecycle.NewWorker(userlifecycle.Dependencies{})
require.Error(t, err)
}
func TestHandleFullCascadePermanentBlock(t *testing.T) {
t.Parallel()
f := newFixture(t)
// Owned private game in running status (must publish stop job).
ownedRunning := f.seedGame(t, "game-owned-1", game.GameTypePrivate, "user-victim", game.StatusRunning)
// Owned private game in enrollment_open (no stop job needed).
ownedDraft := f.seedGame(t, "game-owned-2", game.GameTypePrivate, "user-victim", game.StatusEnrollmentOpen)
// Third party private game where the victim has an active membership.
thirdPartyGame := f.seedGame(t, "game-third-1", game.GameTypePrivate, "owner-other", game.StatusEnrollmentOpen)
member := f.seedMembership(t, thirdPartyGame.GameID, "membership-1", "user-victim", "PrismHawk")
// Public game where the victim has an active membership.
publicGame := f.seedGame(t, "game-pub-1", game.GameTypePublic, "", game.StatusRunning)
publicMember := f.seedMembership(t, publicGame.GameID, "membership-2", "user-victim", "Nebula")
// Pending application by the victim.
app := f.seedApplication(t, "game-pub-1", "application-1", "user-victim", "Nebula")
// Pending invite addressed to the victim.
inv1 := f.seedInvite(t, "game-third-1", "invite-1", "owner-other", "user-victim")
// Pending invite where the victim is the inviter.
inv2 := f.seedInvite(t, "game-owned-2", "invite-2", "user-victim", "guest-1")
// Race name registered by the victim (RND should release it).
f.reserveRegistered(t, "game-third-1", "user-victim", "PrismHawk", true)
require.NoError(t, f.worker.Handle(context.Background(), ports.UserLifecycleEvent{
EntryID: "1700000000000-0",
EventType: ports.UserLifecycleEventTypePermanentBlocked,
UserID: "user-victim",
OccurredAt: f.now,
Source: "admin_internal_api",
ActorType: "admin_user",
ActorID: "admin-1",
ReasonCode: "policy_violation",
}))
// RND is fully cleared for the user.
registered, err := f.directory.ListRegistered(context.Background(), "user-victim")
require.NoError(t, err)
assert.Empty(t, registered)
pending, err := f.directory.ListPendingRegistrations(context.Background(), "user-victim")
require.NoError(t, err)
assert.Empty(t, pending)
reservations, err := f.directory.ListReservations(context.Background(), "user-victim")
require.NoError(t, err)
assert.Empty(t, reservations)
// Both memberships are blocked.
got, err := f.memberships.Get(context.Background(), member.MembershipID)
require.NoError(t, err)
assert.Equal(t, membership.StatusBlocked, got.Status)
gotPub, err := f.memberships.Get(context.Background(), publicMember.MembershipID)
require.NoError(t, err)
assert.Equal(t, membership.StatusBlocked, gotPub.Status)
// Application rejected.
gotApp, err := f.applications.Get(context.Background(), app.ApplicationID)
require.NoError(t, err)
assert.Equal(t, application.StatusRejected, gotApp.Status)
// Both invites revoked.
gotInv1, err := f.invites.Get(context.Background(), inv1.InviteID)
require.NoError(t, err)
assert.Equal(t, invite.StatusRevoked, gotInv1.Status)
gotInv2, err := f.invites.Get(context.Background(), inv2.InviteID)
require.NoError(t, err)
assert.Equal(t, invite.StatusRevoked, gotInv2.Status)
// Owned games cancelled, stop job published only for in-flight ones.
gotOwned1, err := f.games.Get(context.Background(), ownedRunning.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusCancelled, gotOwned1.Status)
gotOwned2, err := f.games.Get(context.Background(), ownedDraft.GameID)
require.NoError(t, err)
assert.Equal(t, game.StatusCancelled, gotOwned2.Status)
stopJobs := f.runtimeManager.StopJobs()
require.Len(t, stopJobs, 1)
assert.Equal(t, ownedRunning.GameID.String(), stopJobs[0])
// Notification published only for the third-party private game owner.
intents := f.intents.Published()
require.Len(t, intents, 1)
assert.Equal(t, notificationintent.NotificationTypeLobbyMembershipBlocked, intents[0].NotificationType)
assert.Equal(t, []string{"owner-other"}, intents[0].RecipientUserIDs)
assert.Contains(t, intents[0].PayloadJSON, `"reason":"permanent_blocked"`)
assert.Contains(t, intents[0].PayloadJSON, `"membership_user_id":"user-victim"`)
}
func TestHandleIsIdempotentOnReplay(t *testing.T) {
t.Parallel()
f := newFixture(t)
thirdParty := f.seedGame(t, "game-third-2", game.GameTypePrivate, "owner-other", game.StatusEnrollmentOpen)
f.seedMembership(t, thirdParty.GameID, "membership-3", "user-victim", "PrismHawk")
event := ports.UserLifecycleEvent{
EntryID: "1700000000000-0",
EventType: ports.UserLifecycleEventTypeDeleted,
UserID: "user-victim",
OccurredAt: f.now,
Source: "admin_internal_api",
ActorType: "system",
ReasonCode: "user_request",
}
require.NoError(t, f.worker.Handle(context.Background(), event))
require.NoError(t, f.worker.Handle(context.Background(), event))
intents := f.intents.Published()
require.Len(t, intents, 1, "second pass must not double-publish")
assert.Contains(t, intents[0].PayloadJSON, `"reason":"deleted"`)
}
func TestHandleRetryAfterMembershipBackendError(t *testing.T) {
t.Parallel()
f := newFixture(t)
thirdParty := f.seedGame(t, "game-third-3", game.GameTypePrivate, "owner-other", game.StatusEnrollmentOpen)
member := f.seedMembership(t, thirdParty.GameID, "membership-4", "user-victim", "Stardust")
failingMemberships := &flakyMembershipStore{
Store: f.memberships,
failOnce: true,
failError: errors.New("redis flake"),
}
worker, err := userlifecycle.NewWorker(userlifecycle.Dependencies{
Directory: f.directory,
Memberships: failingMemberships,
Applications: f.applications,
Invites: f.invites,
Games: f.games,
RuntimeManager: f.runtimeManager,
Intents: f.intents,
Clock: func() time.Time { return f.now },
Logger: silentLogger(),
})
require.NoError(t, err)
event := ports.UserLifecycleEvent{
EntryID: "1700000000000-0",
EventType: ports.UserLifecycleEventTypePermanentBlocked,
UserID: "user-victim",
OccurredAt: f.now,
Source: "admin_internal_api",
ActorType: "admin_user",
ReasonCode: "abuse",
}
err = worker.Handle(context.Background(), event)
require.Error(t, err)
// The failing call already consumed its single failure budget.
require.NoError(t, worker.Handle(context.Background(), event))
// Confirm membership is now blocked.
got, err := f.memberships.Get(context.Background(), member.MembershipID)
require.NoError(t, err)
assert.Equal(t, membership.StatusBlocked, got.Status)
}
func TestHandleUnknownEventTypeIsNoop(t *testing.T) {
t.Parallel()
f := newFixture(t)
thirdParty := f.seedGame(t, "game-third-4", game.GameTypePrivate, "owner-other", game.StatusEnrollmentOpen)
member := f.seedMembership(t, thirdParty.GameID, "membership-5", "user-victim", "Comet")
require.NoError(t, f.worker.Handle(context.Background(), ports.UserLifecycleEvent{
EntryID: "1700000000000-0",
EventType: ports.UserLifecycleEventType("user.lifecycle.unknown"),
UserID: "user-victim",
OccurredAt: f.now,
}))
got, err := f.memberships.Get(context.Background(), member.MembershipID)
require.NoError(t, err)
assert.Equal(t, membership.StatusActive, got.Status)
assert.Empty(t, f.intents.Published())
}
func TestHandlePropagatesStopJobError(t *testing.T) {
t.Parallel()
f := newFixture(t)
f.seedGame(t, "game-owned-3", game.GameTypePrivate, "user-victim", game.StatusRunning)
f.runtimeManager.SetStopError(errors.New("runtime down"))
err := f.worker.Handle(context.Background(), ports.UserLifecycleEvent{
EntryID: "1700000000000-0",
EventType: ports.UserLifecycleEventTypePermanentBlocked,
UserID: "user-victim",
OccurredAt: f.now,
ActorType: "admin_user",
ReasonCode: "abuse",
})
require.Error(t, err)
}
// flakyMembershipStore wraps membershipstub.Store with a one-shot
// UpdateStatus failure injection used by the retry-after-error test.
type flakyMembershipStore struct {
*membershipstub.Store
failOnce bool
failError error
}
func (f *flakyMembershipStore) UpdateStatus(ctx context.Context, input ports.UpdateMembershipStatusInput) error {
if f.failOnce {
f.failOnce = false
return f.failError
}
return f.Store.UpdateStatus(ctx, input)
}