refactor(dev): remove the dev-sandbox bootstrap everywhere
Tests · Go / test (push) Successful in 1m59s

Stage 1 of the dev-as-prod-mirror rework. The auto-provisioned "Dev
Sandbox" game and dummy users are removed so the dev contour starts
empty like prod; the separate legacy-report loader stays as the
test-data path.

- delete backend/internal/devsandbox (package + tests)
- drop the bootstrap call + DevSandboxConfig (struct, Config field,
  BACKEND_DEV_SANDBOX_* env, defaults, loader, validation)
- strip BACKEND_DEV_SANDBOX_* from dev-deploy + local-dev compose and
  .env.example; the generic engine-recycle / prune-broken-engines logic
  stays (it serves real games)
- update tooling docs (dev-deploy README + KNOWN-ISSUES, local-dev
  README + Makefile) and stale comments; DeleteGame and
  InsertMembershipDirect remain (exercised by lobby integration tests)

No app behaviour change beyond not auto-creating the sandbox game.
This commit is contained in:
Ilia Denisov
2026-05-31 22:28:03 +02:00
parent 26f1e62924
commit 0cae89cba2
17 changed files with 60 additions and 737 deletions
+2 -5
View File
@@ -153,7 +153,7 @@ jobs:
# Compare the freshly-built `galaxy-engine:dev` SHA against # Compare the freshly-built `galaxy-engine:dev` SHA against
# every running `galaxy-game-*` container. The backend # every running `galaxy-game-*` container. The backend
# reconciler adopts pre-existing labelled engine containers # reconciler adopts pre-existing labelled engine containers
# without checking image drift, so a running sandbox would # without checking image drift, so a running game would
# otherwise keep serving the previous engine code until the # otherwise keep serving the previous engine code until the
# container is recycled by hand. This step makes the recycle # container is recycled by hand. This step makes the recycle
# automatic but only when it is actually needed: # automatic but only when it is actually needed:
@@ -168,10 +168,7 @@ jobs:
# silent state corruption otherwise), and cascade-delete # silent state corruption otherwise), and cascade-delete
# the lobby `games` row (the FKs in `00001_init.sql` # the lobby `games` row (the FKs in `00001_init.sql`
# drop the matching `runtime_records`, `memberships`, # drop the matching `runtime_records`, `memberships`,
# `player_mappings`, etc. in the same write). The # `player_mappings`, etc. in the same write).
# `dev-sandbox` bootstrap on the next backend boot finds
# no live sandbox and provisions a fresh one on the new
# engine image.
# #
# Backend is stopped first to keep the reconciler from # Backend is stopped first to keep the reconciler from
# racing the recycle (mid-stream adoption / restart). The # racing the recycle (mid-stream adoption / restart). The
+6 -18
View File
@@ -26,7 +26,6 @@ import (
"galaxy/backend/internal/app" "galaxy/backend/internal/app"
"galaxy/backend/internal/auth" "galaxy/backend/internal/auth"
"galaxy/backend/internal/config" "galaxy/backend/internal/config"
"galaxy/backend/internal/devsandbox"
"galaxy/backend/internal/diplomail" "galaxy/backend/internal/diplomail"
"galaxy/backend/internal/diplomail/detector" "galaxy/backend/internal/diplomail/detector"
"galaxy/backend/internal/diplomail/translator" "galaxy/backend/internal/diplomail/translator"
@@ -274,29 +273,18 @@ func run(ctx context.Context) (err error) {
) )
runtimeGateway.svc = runtimeSvc runtimeGateway.svc = runtimeSvc
// Run a single reconciliation pass before the dev-sandbox // Run a single reconciliation pass at startup so any runtime row
// bootstrap so any runtime row pointing at a vanished engine // pointing at a vanished engine container (a host reboot wiped
// container (host reboot wiped /tmp/galaxy-game-state/<uuid>; // /tmp/galaxy-game-state/<uuid>; `tools/local-dev`'s
// `tools/local-dev`'s `prune-broken-engines` target reaped the // `prune-broken-engines` target reaped the husk) is cascaded
// husk) is already cascaded through `markRemoved` → lobby // through `markRemoved` → lobby `cancelled` before the server
// `cancelled` by the time the bootstrap walks the sandbox list. // starts serving requests. Failures are
// Without this pre-tick the bootstrap would reuse the
// soon-to-be-cancelled game and force the developer into a
// second `make up` cycle to land a healthy sandbox. Failures are
// non-fatal: the periodic ticker started later catches up, and // non-fatal: the periodic ticker started later catches up, and
// the worst case degrades to the legacy two-cycle recovery. // the worst case degrades to the legacy two-cycle recovery.
if err := runtimeSvc.Reconciler().Tick(ctx); err != nil { if err := runtimeSvc.Reconciler().Tick(ctx); err != nil {
logger.Warn("pre-bootstrap reconciler tick failed", zap.Error(err)) logger.Warn("pre-bootstrap reconciler tick failed", zap.Error(err))
} }
if err := devsandbox.Bootstrap(ctx, devsandbox.Deps{
Users: userSvc,
Lobby: lobbySvc,
EngineVersions: engineVersionSvc,
}, cfg.DevSandbox, logger); err != nil {
return fmt.Errorf("dev sandbox bootstrap: %w", err)
}
notifStore := notification.NewStore(db) notifStore := notification.NewStore(db)
notifSvc := notification.NewService(notification.Deps{ notifSvc := notification.NewService(notification.Deps{
Store: notifStore, Store: notifStore,
-51
View File
@@ -105,11 +105,6 @@ const (
envDiplomailTranslatorTimeout = "BACKEND_DIPLOMAIL_TRANSLATOR_TIMEOUT" envDiplomailTranslatorTimeout = "BACKEND_DIPLOMAIL_TRANSLATOR_TIMEOUT"
envDiplomailTranslatorMaxAttempts = "BACKEND_DIPLOMAIL_TRANSLATOR_MAX_ATTEMPTS" envDiplomailTranslatorMaxAttempts = "BACKEND_DIPLOMAIL_TRANSLATOR_MAX_ATTEMPTS"
envDiplomailWorkerInterval = "BACKEND_DIPLOMAIL_WORKER_INTERVAL" envDiplomailWorkerInterval = "BACKEND_DIPLOMAIL_WORKER_INTERVAL"
envDevSandboxEmail = "BACKEND_DEV_SANDBOX_EMAIL"
envDevSandboxEngineImage = "BACKEND_DEV_SANDBOX_ENGINE_IMAGE"
envDevSandboxEngineVersion = "BACKEND_DEV_SANDBOX_ENGINE_VERSION"
envDevSandboxPlayerCount = "BACKEND_DEV_SANDBOX_PLAYER_COUNT"
) )
// Default values applied when an environment variable is absent. // Default values applied when an environment variable is absent.
@@ -178,9 +173,6 @@ const (
defaultDiplomailTranslatorTimeout = 10 * time.Second defaultDiplomailTranslatorTimeout = 10 * time.Second
defaultDiplomailTranslatorMaxAttempts = 5 defaultDiplomailTranslatorMaxAttempts = 5
defaultDiplomailWorkerInterval = 2 * time.Second defaultDiplomailWorkerInterval = 2 * time.Second
defaultDevSandboxEngineVersion = "0.1.0"
defaultDevSandboxPlayerCount = 20
) )
// Allowed values for the closed-set string options. // Allowed values for the closed-set string options.
@@ -219,29 +211,12 @@ type Config struct {
Runtime RuntimeConfig Runtime RuntimeConfig
Notification NotificationConfig Notification NotificationConfig
Diplomail DiplomailConfig Diplomail DiplomailConfig
DevSandbox DevSandboxConfig
// FreshnessWindow mirrors the gateway freshness window and is used by the // FreshnessWindow mirrors the gateway freshness window and is used by the
// push server to bound the cursor TTL. // push server to bound the cursor TTL.
FreshnessWindow time.Duration FreshnessWindow time.Duration
} }
// DevSandboxConfig configures the boot-time bootstrap implemented in
// `backend/internal/devsandbox`. When Email is empty the bootstrap
// is a no-op, which is the production posture. When Email is set —
// from `BACKEND_DEV_SANDBOX_EMAIL` in the `tools/local-dev` stack —
// the bootstrap idempotently provisions a real user, the configured
// number of dummy participants, a private "Dev Sandbox" game, the
// matching memberships, and drives the lifecycle to `running`. The
// engine image and engine version refer to a row that the bootstrap
// also seeds in `engine_versions`.
type DevSandboxConfig struct {
Email string
EngineImage string
EngineVersion string
PlayerCount int
}
// LoggingConfig stores the parameters used by the structured logger. // LoggingConfig stores the parameters used by the structured logger.
type LoggingConfig struct { type LoggingConfig struct {
// Level is the zap level name (e.g. "debug", "info", "warn", "error"). // Level is the zap level name (e.g. "debug", "info", "warn", "error").
@@ -572,10 +547,6 @@ func DefaultConfig() Config {
TranslatorMaxAttempts: defaultDiplomailTranslatorMaxAttempts, TranslatorMaxAttempts: defaultDiplomailTranslatorMaxAttempts,
WorkerInterval: defaultDiplomailWorkerInterval, WorkerInterval: defaultDiplomailWorkerInterval,
}, },
DevSandbox: DevSandboxConfig{
EngineVersion: defaultDevSandboxEngineVersion,
PlayerCount: defaultDevSandboxPlayerCount,
},
Runtime: RuntimeConfig{ Runtime: RuntimeConfig{
WorkerPoolSize: defaultRuntimeWorkerPoolSize, WorkerPoolSize: defaultRuntimeWorkerPoolSize,
JobQueueSize: defaultRuntimeJobQueueSize, JobQueueSize: defaultRuntimeJobQueueSize,
@@ -755,13 +726,6 @@ func LoadFromEnv() (Config, error) {
return Config{}, err return Config{}, err
} }
cfg.DevSandbox.Email = strings.TrimSpace(loadString(envDevSandboxEmail, cfg.DevSandbox.Email))
cfg.DevSandbox.EngineImage = strings.TrimSpace(loadString(envDevSandboxEngineImage, cfg.DevSandbox.EngineImage))
cfg.DevSandbox.EngineVersion = strings.TrimSpace(loadString(envDevSandboxEngineVersion, cfg.DevSandbox.EngineVersion))
if cfg.DevSandbox.PlayerCount, err = loadInt(envDevSandboxPlayerCount, cfg.DevSandbox.PlayerCount); err != nil {
return Config{}, err
}
if err := cfg.Validate(); err != nil { if err := cfg.Validate(); err != nil {
return Config{}, err return Config{}, err
} }
@@ -973,21 +937,6 @@ func (c Config) Validate() error {
} }
} }
if email := strings.TrimSpace(c.DevSandbox.Email); email != "" {
if _, err := netmail.ParseAddress(email); err != nil {
return fmt.Errorf("%s must be a valid RFC 5322 address: %w", envDevSandboxEmail, err)
}
if strings.TrimSpace(c.DevSandbox.EngineImage) == "" {
return fmt.Errorf("%s must not be empty when %s is set", envDevSandboxEngineImage, envDevSandboxEmail)
}
if strings.TrimSpace(c.DevSandbox.EngineVersion) == "" {
return fmt.Errorf("%s must not be empty when %s is set", envDevSandboxEngineVersion, envDevSandboxEmail)
}
if c.DevSandbox.PlayerCount <= 0 {
return fmt.Errorf("%s must be positive when %s is set", envDevSandboxPlayerCount, envDevSandboxEmail)
}
}
return nil return nil
} }
-287
View File
@@ -1,287 +0,0 @@
// Package devsandbox provisions a ready-to-play game on backend boot
// for the `tools/local-dev` stack.
//
// Bootstrap is invoked from `backend/cmd/backend/main.go` after the
// admin bootstrap and before the HTTP listener starts. It reads
// `cfg.DevSandbox`; when `Email` is empty (the production posture)
// the function logs "skipped" and returns nil. When set, it
// idempotently:
//
// 1. registers the configured engine version and image;
// 2. find-or-creates the real dev user with the configured email;
// 3. find-or-creates `cfg.PlayerCount - 1` deterministic dummy
// users so the engine's minimum-players constraint is met;
// 4. find-or-creates a private "Dev Sandbox" game owned by the
// real user with min/max_players = cfg.PlayerCount and a
// year-out turn schedule (effectively frozen at turn 1);
// 5. inserts memberships for all participants bypassing the
// application/approval flow;
// 6. drives the lifecycle to `running` (or as far as possible if
// the runtime is busy).
//
// The function is a no-op on subsequent boots once the game is
// running; partial states from earlier crashes are recovered.
package devsandbox
import (
"context"
"errors"
"fmt"
"time"
"galaxy/backend/internal/config"
"galaxy/backend/internal/lobby"
"galaxy/backend/internal/runtime"
"github.com/google/uuid"
"go.uber.org/zap"
)
// SandboxGameName is the display name used to identify the
// auto-provisioned game on subsequent reboots. The combination of
// game_name and owner_user_id is unique enough in practice — only
// the dev sandbox bootstrap creates a game owned by the configured
// real user with this exact name.
const SandboxGameName = "Dev Sandbox"
// SandboxTurnSchedule keeps the game on turn 1 by scheduling the
// next turn a year out. The runtime scheduler still parses this and
// will tick once a year — long enough to never interfere with
// solo UI development.
const SandboxTurnSchedule = "0 0 1 1 *"
// UserEnsurer matches `auth.UserEnsurer`. We define a local
// interface to avoid importing the auth package and circular
// dependencies — the production wiring passes the same `*user.Service`
// instance used by auth.
type UserEnsurer interface {
EnsureByEmail(ctx context.Context, email, preferredLanguage, timeZone, declaredCountry string) (uuid.UUID, error)
}
// Deps aggregates the collaborators Bootstrap needs.
type Deps struct {
Users UserEnsurer
Lobby *lobby.Service
EngineVersions *runtime.EngineVersionService
}
// Bootstrap runs the seven-step provisioning flow described on the
// package doc comment. Errors are returned to the caller; the boot
// path in `cmd/backend/main.go` aborts startup if Bootstrap fails so
// a misconfigured dev environment surfaces immediately rather than
// silently leaving the lobby empty.
func Bootstrap(ctx context.Context, deps Deps, cfg config.DevSandboxConfig, logger *zap.Logger) error {
if logger == nil {
logger = zap.NewNop()
}
logger = logger.Named("dev_sandbox")
if cfg.Email == "" {
logger.Info("skipped (no email)")
return nil
}
if deps.Users == nil || deps.Lobby == nil || deps.EngineVersions == nil {
return errors.New("dev_sandbox: deps.Users, deps.Lobby and deps.EngineVersions are required")
}
if cfg.PlayerCount <= 0 {
return fmt.Errorf("dev_sandbox: PlayerCount must be positive, got %d", cfg.PlayerCount)
}
if err := ensureEngineVersion(ctx, deps.EngineVersions, cfg, logger); err != nil {
return err
}
realID, err := deps.Users.EnsureByEmail(ctx, cfg.Email, "en", "UTC", "")
if err != nil {
return fmt.Errorf("dev_sandbox: ensure real user: %w", err)
}
dummyIDs := make([]uuid.UUID, 0, cfg.PlayerCount-1)
for i := 1; i < cfg.PlayerCount; i++ {
email := fmt.Sprintf("dev-dummy-%02d@local.test", i)
id, err := deps.Users.EnsureByEmail(ctx, email, "en", "UTC", "")
if err != nil {
return fmt.Errorf("dev_sandbox: ensure dummy %d: %w", i, err)
}
dummyIDs = append(dummyIDs, id)
}
if err := purgeTerminalSandboxGames(ctx, deps.Lobby, realID, logger); err != nil {
return err
}
game, err := findOrCreateSandboxGame(ctx, deps.Lobby, realID, cfg)
if err != nil {
return err
}
game, err = ensureMembershipsAndDrive(ctx, deps.Lobby, game, realID, dummyIDs, logger)
if err != nil {
return err
}
logger.Info("bootstrap complete",
zap.String("user_id", realID.String()),
zap.String("game_id", game.GameID.String()),
zap.String("status", game.Status),
)
return nil
}
func ensureEngineVersion(ctx context.Context, svc *runtime.EngineVersionService, cfg config.DevSandboxConfig, logger *zap.Logger) error {
_, err := svc.Register(ctx, runtime.RegisterInput{
Version: cfg.EngineVersion,
ImageRef: cfg.EngineImage,
})
switch {
case err == nil:
logger.Info("engine version registered",
zap.String("version", cfg.EngineVersion),
zap.String("image", cfg.EngineImage),
)
return nil
case errors.Is(err, runtime.ErrEngineVersionTaken):
logger.Debug("engine version already registered",
zap.String("version", cfg.EngineVersion),
)
return nil
default:
return fmt.Errorf("dev_sandbox: register engine version: %w", err)
}
}
// terminalSandboxStatus reports whether a sandbox game has reached a
// state from which it can no longer be driven back to running. We
// treat such games as "absent" so the next bootstrap creates a fresh
// one rather than handing the developer a dead lobby tile.
func terminalSandboxStatus(status string) bool {
switch status {
case lobby.GameStatusCancelled, lobby.GameStatusFinished, lobby.GameStatusStartFailed:
return true
}
return false
}
// purgeTerminalSandboxGames deletes every previous "Dev Sandbox" game
// the dev user owns that has reached a terminal state
// (cancelled / finished / start_failed). The cascade declared in
// `00001_init.sql` removes the matching memberships, applications,
// invites, runtime records, and player mappings in the same write,
// so the developer's lobby never piles up dead tiles between
// `make rebuild` cycles. Non-terminal games are left untouched —
// a `running` sandbox from a previous boot is the happy path.
func purgeTerminalSandboxGames(ctx context.Context, svc *lobby.Service, ownerID uuid.UUID, logger *zap.Logger) error {
games, err := svc.ListMyGames(ctx, ownerID)
if err != nil {
return fmt.Errorf("dev_sandbox: list my games: %w", err)
}
for _, g := range games {
if g.GameName != SandboxGameName || g.OwnerUserID == nil || *g.OwnerUserID != ownerID {
continue
}
if !terminalSandboxStatus(g.Status) {
continue
}
if err := svc.DeleteGame(ctx, g.GameID); err != nil {
return fmt.Errorf("dev_sandbox: delete terminal sandbox %s: %w", g.GameID, err)
}
logger.Info("purged terminal sandbox game",
zap.String("game_id", g.GameID.String()),
zap.String("status", g.Status),
)
}
return nil
}
func findOrCreateSandboxGame(ctx context.Context, svc *lobby.Service, ownerID uuid.UUID, cfg config.DevSandboxConfig) (lobby.GameRecord, error) {
games, err := svc.ListMyGames(ctx, ownerID)
if err != nil {
return lobby.GameRecord{}, fmt.Errorf("dev_sandbox: list my games: %w", err)
}
for _, g := range games {
if g.GameName != SandboxGameName || g.OwnerUserID == nil || *g.OwnerUserID != ownerID {
continue
}
// `purgeTerminalSandboxGames` ran before us, so any sandbox
// game still in the list is either a live one we should
// reuse or a transient state we can drive forward.
return g, nil
}
rec, err := svc.CreateGame(ctx, lobby.CreateGameInput{
OwnerUserID: &ownerID,
Visibility: lobby.VisibilityPrivate,
GameName: SandboxGameName,
Description: "Auto-provisioned by backend/internal/devsandbox for solo UI development.",
MinPlayers: int32(cfg.PlayerCount),
MaxPlayers: int32(cfg.PlayerCount),
StartGapHours: 0,
StartGapPlayers: 0,
EnrollmentEndsAt: time.Now().Add(365 * 24 * time.Hour),
TurnSchedule: SandboxTurnSchedule,
TargetEngineVersion: cfg.EngineVersion,
})
if err != nil {
return lobby.GameRecord{}, fmt.Errorf("dev_sandbox: create game: %w", err)
}
return rec, nil
}
func ensureMembershipsAndDrive(ctx context.Context, svc *lobby.Service, game lobby.GameRecord, realID uuid.UUID, dummyIDs []uuid.UUID, logger *zap.Logger) (lobby.GameRecord, error) {
caller := realID
if game.Status == lobby.GameStatusDraft {
next, err := svc.OpenEnrollment(ctx, &caller, false, game.GameID)
if err != nil {
return game, fmt.Errorf("dev_sandbox: open enrollment: %w", err)
}
game = next
}
if game.Status == lobby.GameStatusEnrollmentOpen {
users := append([]uuid.UUID{realID}, dummyIDs...)
for i, uid := range users {
raceName := fmt.Sprintf("Sandbox-%02d", i+1)
if _, err := svc.InsertMembershipDirect(ctx, lobby.InsertMembershipDirectInput{
GameID: game.GameID,
UserID: uid,
RaceName: raceName,
}); err != nil {
return game, fmt.Errorf("dev_sandbox: insert membership %d: %w", i+1, err)
}
}
logger.Info("memberships ensured",
zap.Int("count", len(users)),
zap.String("game_id", game.GameID.String()),
)
next, err := svc.ReadyToStart(ctx, &caller, false, game.GameID)
if err != nil {
return game, fmt.Errorf("dev_sandbox: ready to start: %w", err)
}
game = next
}
if game.Status == lobby.GameStatusReadyToStart {
next, err := svc.Start(ctx, &caller, false, game.GameID)
if err != nil {
return game, fmt.Errorf("dev_sandbox: start: %w", err)
}
game = next
}
if game.Status == lobby.GameStatusStartFailed {
next, err := svc.RetryStart(ctx, &caller, false, game.GameID)
if err != nil {
logger.Warn("retry start failed", zap.Error(err))
return game, nil
}
game = next
if game.Status == lobby.GameStatusReadyToStart {
next, err := svc.Start(ctx, &caller, false, game.GameID)
if err != nil {
return game, fmt.Errorf("dev_sandbox: start after retry: %w", err)
}
game = next
}
}
return game, nil
}
@@ -1,106 +0,0 @@
package devsandbox
import (
"context"
"errors"
"testing"
"galaxy/backend/internal/config"
"github.com/google/uuid"
"go.uber.org/zap"
)
// TestBootstrapSkippedWhenEmailEmpty exercises the no-op branch: with
// the production posture (Email == "") Bootstrap must return without
// touching any dependency. The fact that Users/Lobby/EngineVersions
// are nil here doubles as a check that the early-return runs first.
func TestBootstrapSkippedWhenEmailEmpty(t *testing.T) {
err := Bootstrap(
context.Background(),
Deps{},
config.DevSandboxConfig{},
zap.NewNop(),
)
if err != nil {
t.Fatalf("expected nil error on empty email, got: %v", err)
}
}
// TestBootstrapRejectsZeroPlayerCount confirms the validation
// short-circuits the flow before any DB call when PlayerCount is
// non-positive but Email is set. The error path is fast and never
// dereferences the (still-nil) Users/Lobby deps.
func TestBootstrapRejectsZeroPlayerCount(t *testing.T) {
err := Bootstrap(
context.Background(),
Deps{Users: stubEnsurer{}, Lobby: nil, EngineVersions: nil},
config.DevSandboxConfig{
Email: "dev@local.test",
EngineImage: "galaxy-engine:local-dev",
EngineVersion: "0.0.0-local-dev",
PlayerCount: 0,
},
zap.NewNop(),
)
if err == nil {
t.Fatal("expected error on zero PlayerCount, got nil")
}
}
// TestBootstrapRejectsMissingDeps checks that a misconfigured wiring
// (Email set but one of the required services nil) fails fast rather
// than panicking when the bootstrap reaches its first service call.
func TestBootstrapRejectsMissingDeps(t *testing.T) {
err := Bootstrap(
context.Background(),
Deps{Users: stubEnsurer{}, Lobby: nil, EngineVersions: nil},
config.DevSandboxConfig{
Email: "dev@local.test",
EngineImage: "galaxy-engine:local-dev",
EngineVersion: "0.0.0-local-dev",
PlayerCount: 20,
},
zap.NewNop(),
)
if err == nil {
t.Fatal("expected error on missing deps, got nil")
}
if !errors.Is(err, errMissingDepsSentinel) && err.Error() == "" {
// The exact wording is not part of the contract; this branch
// only asserts the error is non-nil and human-readable.
t.Fatalf("error has empty message: %v", err)
}
}
// errMissingDepsSentinel exists so the assertion above can compile;
// the real error is constructed via errors.New inside Bootstrap and
// is intentionally not exported. The test only needs to confirm the
// returned error has a message.
var errMissingDepsSentinel = errors.New("sentinel")
// TestTerminalSandboxStatus pins the contract that decides whether a
// previously created sandbox game gets purged on the next boot.
// Terminal states are deleted (cascade-style) so the developer's
// lobby never piles up dead tiles between `make rebuild` cycles.
func TestTerminalSandboxStatus(t *testing.T) {
terminal := []string{"cancelled", "finished", "start_failed"}
live := []string{"draft", "enrollment_open", "ready_to_start", "starting", "running", "paused"}
for _, status := range terminal {
if !terminalSandboxStatus(status) {
t.Errorf("expected %q to be terminal", status)
}
}
for _, status := range live {
if terminalSandboxStatus(status) {
t.Errorf("expected %q to be non-terminal", status)
}
}
}
type stubEnsurer struct{}
func (stubEnsurer) EnsureByEmail(_ context.Context, _, _, _, _ string) (uuid.UUID, error) {
return uuid.UUID{}, nil
}
+4 -5
View File
@@ -274,11 +274,10 @@ func (s *Service) ListFinishedGamesBefore(ctx context.Context, cutoff time.Time)
// `ON DELETE CASCADE` constraints declared in `00001_init.sql`. // `ON DELETE CASCADE` constraints declared in `00001_init.sql`.
// Idempotent: returns nil when no game matches. // Idempotent: returns nil when no game matches.
// //
// Phase 14 introduces this method for the dev-sandbox bootstrap so a // `DeleteGame` is destructive — a hard delete that bypasses the
// terminal "Dev Sandbox" tile from a previous local-dev session can // cascade-notification machinery — so production callers stay on the
// be scrubbed before a fresh game spawns. Production callers must // regular cancel / finish lifecycle. It is exercised by the lobby
// stay on the regular cancel / finish lifecycle — `DeleteGame` is // integration tests.
// destructive and bypasses the cascade-notification machinery.
func (s *Service) DeleteGame(ctx context.Context, gameID uuid.UUID) error { func (s *Service) DeleteGame(ctx context.Context, gameID uuid.UUID) error {
if err := s.deps.Store.DeleteGame(ctx, gameID); err != nil { if err := s.deps.Store.DeleteGame(ctx, gameID); err != nil {
return err return err
+2 -2
View File
@@ -248,8 +248,8 @@ func TestEndToEndPrivateGameFlow(t *testing.T) {
} }
} }
// TestDeleteGameCascadesEverything pins the contract the dev-sandbox // TestDeleteGameCascadesEverything pins the DeleteGame contract:
// bootstrap relies on: removing a game wipes every referencing row // removing a game wipes every referencing row
// (memberships, applications, invites, runtime_records, // (memberships, applications, invites, runtime_records,
// player_mappings) in a single SQL statement. Before this is wired // player_mappings) in a single SQL statement. Before this is wired
// the developer's lobby pile up cancelled tiles between // the developer's lobby pile up cancelled tiles between
+5 -6
View File
@@ -20,9 +20,9 @@ type InsertMembershipDirectInput struct {
// writes as ApproveApplication: the per-game race-name reservation // writes as ApproveApplication: the per-game race-name reservation
// row plus the membership row, and refreshes the in-memory caches. // row plus the membership row, and refreshes the in-memory caches.
// //
// The method is intended for boot-time provisioning by // The method is intended for trusted boot-time provisioning and
// `backend/internal/devsandbox` and similar trusted callers. It is // integration tests; it is not exposed through any HTTP handler. The
// not exposed through any HTTP handler. The caller must guarantee // caller must guarantee
// game.Status == GameStatusEnrollmentOpen — the function returns // game.Status == GameStatusEnrollmentOpen — the function returns
// ErrConflict otherwise — and that the race-name policy and // ErrConflict otherwise — and that the race-name policy and
// canonical-key invariants are honoured (the implementation reuses // canonical-key invariants are honoured (the implementation reuses
@@ -30,9 +30,8 @@ type InsertMembershipDirectInput struct {
// or unsuitable name still fails). // or unsuitable name still fails).
// //
// Idempotency: if a membership for (GameID, UserID) already exists // Idempotency: if a membership for (GameID, UserID) already exists
// the function returns the existing row without modifying state. // the function returns the existing row without modifying state, so
// This makes the helper safe to call on every backend boot from // the helper is safe to call repeatedly.
// devsandbox.Bootstrap.
func (s *Service) InsertMembershipDirect(ctx context.Context, in InsertMembershipDirectInput) (Membership, error) { func (s *Service) InsertMembershipDirect(ctx context.Context, in InsertMembershipDirectInput) (Membership, error) {
displayName, err := ValidateDisplayName(in.RaceName) displayName, err := ValidateDisplayName(in.RaceName)
if err != nil { if err != nil {
+2 -3
View File
@@ -236,9 +236,8 @@ func (s *Store) ListMyGames(ctx context.Context, userID uuid.UUID) ([]GameRecord
// referencing table (memberships / applications / invites / // referencing table (memberships / applications / invites /
// runtime_records / player_mappings — all declared with ON DELETE // runtime_records / player_mappings — all declared with ON DELETE
// CASCADE in `00001_init.sql`). Idempotent: returns nil when no row // CASCADE in `00001_init.sql`). Idempotent: returns nil when no row
// matches. Used by the dev-sandbox bootstrap to scrub terminal // matches. A hard delete for trusted callers and integration tests;
// games on every backend boot so the developer's lobby never piles // production lifecycle uses cancel / finish.
// up cancelled tiles.
func (s *Store) DeleteGame(ctx context.Context, gameID uuid.UUID) error { func (s *Store) DeleteGame(ctx context.Context, gameID uuid.UUID) error {
g := table.Games g := table.Games
stmt := g.DELETE().WHERE(g.GameID.EQ(postgres.UUID(gameID))) stmt := g.DELETE().WHERE(g.GameID.EQ(postgres.UUID(gameID)))
-6
View File
@@ -7,12 +7,6 @@
# baked into `docker-compose.yml`, so this file documents the knobs # baked into `docker-compose.yml`, so this file documents the knobs
# rather than driving them. # rather than driving them.
# Auto-provisioned sandbox bootstrap. Empty disables the bootstrap.
BACKEND_DEV_SANDBOX_EMAIL=dev@galaxy.lan
BACKEND_DEV_SANDBOX_ENGINE_IMAGE=galaxy-engine:dev
BACKEND_DEV_SANDBOX_ENGINE_VERSION=0.1.0
BACKEND_DEV_SANDBOX_PLAYER_COUNT=20
# `123456` short-circuits the email-code path for the dev account. # `123456` short-circuits the email-code path for the dev account.
# This is also the docker-compose default — set the variable to an # This is also the docker-compose default — set the variable to an
# empty string here when the environment must rely on real Mailpit # empty string here when the environment must rely on real Mailpit
+3 -159
View File
@@ -1,164 +1,8 @@
# `tools/dev-deploy/` — known issues # `tools/dev-deploy/` — known issues
Issues that surface in the long-lived dev environment but are not yet Issues that surfaced in the long-lived dev environment. Each entry lists
fixed. Each entry lists the observed symptom, the diagnostic evidence, the observed symptom, the diagnostic evidence, and the fix or the open
the working hypothesis, and the open questions that have to be questions that have to be answered before a fix lands.
answered before a fix lands.
## Dev Sandbox game flips to `cancelled` after a `dev-deploy` redispatch
### Symptom
A previously `running` "Dev Sandbox" game (created by
`backend/internal/devsandbox`) transitions to `cancelled` ~15 minutes
after a `dev-deploy.yaml` workflow_dispatch run finishes. The user's
browser session survives (the same `device_session_id` keeps working),
but the lobby shows no game because the only game it had is now
terminal. `purgeTerminalSandboxGames` does pick it up on the **next**
boot and creates a fresh sandbox — but the first redispatch leaves
the user with an empty lobby until backend restarts again.
### Diagnostic evidence
Backend logs from the broken cycle (timestamps abbreviated):
```text
20:24:40 dev_sandbox: purged terminal sandbox game game_id=<prev> status=cancelled
20:24:40 dev_sandbox: memberships ensured count=20 game_id=<new>
20:24:40 dev_sandbox: bootstrap complete user_id=<owner> game_id=<new> status=starting
...
20:25:09 user mail sent failed (diplomail tables missing — unrelated)
...
20:39:40 lobby: game cancelled by runtime reconciler game_id=<new>
op=reconcile status=removed message="container disappeared"
```
Between 20:24:40 (`status=starting`) and 20:39:40 (reconciler cancel)
the backend logs are silent on the runtime / engine paths — no
`engine spawned`, no `engine container started`, no `runtime
transition` lines. The reconciler then fires and reports the engine
container as missing.
`docker ps -a --filter 'label=org.opencontainers.image.title=galaxy-game-engine'`
returns no rows during this window — the engine container is neither
running nor stopped on the host, so it either was never spawned or
was removed before the host snapshot.
### What has been ruled out
A live `docker inspect` on a healthy engine container shows:
```text
Labels: galaxy.backend=1, galaxy.engine_version=0.1.0,
galaxy.game_id=<uuid>,
org.opencontainers.image.title=galaxy-game-engine,
com.galaxy.{cpu_quota,memory,pids_limit}
AutoRemove: false
RestartPolicy: on-failure
NetworkMode: galaxy-dev-internal
```
There are no `com.docker.compose.*` labels and `AutoRemove=false`,
so `--remove-orphans` cannot reap the engine and a `--rm`-style
self-destruct is not in play. Two redispatches captured under
`docker events --filter event=create,start,die,destroy,kill,stop`
also confirmed it: across both runs the only `die` / `destroy`
events were for `galaxy-dev-{backend,api,caddy}`. The live engine
container survived both redispatches, and the reconciler that
fires 60 seconds after the new backend boots correctly matched
it through `byGameID` / `byContainerID`.
`backend/internal/runtime/service.go` only removes engine
containers from the explicit `runStop` / `runRestart` / `runPatch`
paths. There is no `runtime.Service.Shutdown` that proactively
kills containers on backend exit, so a graceful SIGTERM to
`galaxy-dev-backend` will not touch its child engine containers.
### Host-side hypotheses considered and rejected by the owner
The natural follow-up suspects after compose was cleared — host-side
`docker prune` cron jobs, a manual `docker rm`, an out-of-band
`dockerd` restart, and an idle-state engine crash — were all
rejected by the project owner: the dev host runs none of those
periodic cleanups, no one manually removed the container, dockerd
was not restarted in the window, and the engine binary does not
crash while idling on API calls.
### Best remaining suspicion
Something the `dev-deploy.yaml` CI run does between successful
image builds and the final `docker compose up -d --wait
--remove-orphans` clobbers the previously-spawned engine container.
The chain at runtime contains:
1. `docker build -t galaxy-engine:dev -f game/Dockerfile .`
2. `docker compose build galaxy-backend galaxy-api`
3. `docker run --rm` alpine for the UI volume seed
4. `docker compose up -d --wait --remove-orphans`
None of these *should* touch an unmanaged engine container, but
the reproduction window points squarely inside this sequence. A
deliberate next reproduction with `docker events --since 0` armed
*before* the deploy starts and live for the entire job — captured
end-to-end on the dev host, not just the chunk after backend
recreate — would pin which step emits the `destroy` on the engine.
### Update 2026-05-19: integration preclean identified as one cause
A live reproduction during the post-merge auto-deploy cycle (Gitea
run #188 dev-deploy plus parallel run #190 integration) pinned one
clobbering source: `integration/scripts/preclean.sh` was unscoped
and removed *every* container labelled `galaxy.backend=1`, including
the dev-deploy engine. Timeline from the dev host:
```text
23:10:40 backend pre-bootstrap reconciler tick: engine alive
23:10:40 dev_sandbox bootstrap: status=running
23:10:56 preclean: removing 1 backend-managed engine containers ← integration run #190
23:11:40 reconciler: container disappeared → game cancelled
```
Fix landed: `BACKEND_STACK_LABEL=integration` is now passed to
every integration backend (see
`integration/testenv/backend.go`) and `preclean.sh` AND-combines
`galaxy.backend=1` with `galaxy.stack=integration`, so dev-deploy /
local-dev engines stamped with different stack values are no longer
collateral.
This covers **push**-triggered cycles where `dev-deploy.yaml` and
`integration.yaml` run on the same Gitea host. The original
hypothesis (a `workflow_dispatch dev-deploy` solo run also losing
the engine) is *not* explained by the integration fix — manual
dispatches do not trigger `integration.yaml`. Keep this entry open
until a solo-dispatch reproduction confirms whether the symptom
still occurs.
### Status
Partially fixed (push-triggered cycles). Solo `workflow_dispatch`
reproductions still open. If the symptom recurs after the
integration fix lands, capture `docker events --since 0` for the
full dispatch window and attach here.
### Workaround in use today
When the sandbox game flips to `cancelled`, redispatch `dev-deploy`:
```sh
curl -X POST -n -H 'Content-Type: application/json' \
-d '{"ref":"<branch>"}' \
https://gitea.iliadenisov.ru/api/v1/repos/developer/galaxy-game/actions/workflows/dev-deploy.yaml/dispatches
```
The next boot's `purgeTerminalSandboxGames` removes the cancelled
row, `findOrCreateSandboxGame` creates a fresh one, and
`ensureMembershipsAndDrive` puts the new game back to `running`.
### Owner
Unassigned. File an issue once we have the runtime / reconciler
analysis above; reference this section in the issue body so future
redeploys can short-circuit the diagnostic loop.
## `docker restart galaxy-dev-backend` fails after the CI runner cleans up ## `docker restart galaxy-dev-backend` fails after the CI runner cleans up
+9 -13
View File
@@ -114,8 +114,7 @@ calls `make clean-data`.
The same dev-mode email-code override as `tools/local-dev/` applies, The same dev-mode email-code override as `tools/local-dev/` applies,
and the dev-deploy compose ships with it enabled by default: and the dev-deploy compose ships with it enabled by default:
1. Enter `dev@galaxy.lan` (or whatever `BACKEND_DEV_SANDBOX_EMAIL` 1. Enter your email address in the login form.
resolves to) in the login form.
2. Submit `123456` as the code — the docker-compose default for 2. Submit `123456` as the code — the docker-compose default for
`BACKEND_AUTH_DEV_FIXED_CODE` is `123456`, so the bcrypt-hashed `BACKEND_AUTH_DEV_FIXED_CODE` is `123456`, so the bcrypt-hashed
email code stays a fallback. To force real Mailpit codes (e.g. for email code stays a fallback. To force real Mailpit codes (e.g. for
@@ -212,8 +211,7 @@ make clean-data Stop everything and wipe volumes + game-state dir
## Known issues ## Known issues
See [`KNOWN-ISSUES.md`](KNOWN-ISSUES.md) for symptoms that surface See [`KNOWN-ISSUES.md`](KNOWN-ISSUES.md) for symptoms that surface
in the long-lived dev environment but are not yet fixed (currently: in the long-lived dev environment but are not yet fixed.
the sandbox game flipping to `cancelled` after a redispatch).
## Deployment cadence ## Deployment cadence
@@ -237,12 +235,12 @@ behind. There is no separate state to clean up between the two paths.
### Engine image drift recycle ### Engine image drift recycle
`backend` spawns one engine container per game (the long-lived "Dev `backend` spawns one engine container per running game and the
Sandbox" plus any user-created games) and the reconciler reattaches reconciler reattaches to whatever it finds with the
to whatever it finds with the `galaxy.stack=dev-deploy` label. That `galaxy.stack=dev-deploy` label. That reattach does not check the
reattach does not check the running container's image SHA against the running container's image SHA against the freshly-built
freshly-built `galaxy-engine:dev` tag, so an unchanged container would `galaxy-engine:dev` tag, so an unchanged container would otherwise
otherwise keep serving the previous engine code after a redeploy. keep serving the previous engine code after a redeploy.
The `dev-deploy.yaml` workflow handles this in the The `dev-deploy.yaml` workflow handles this in the
`Recycle engine containers on image drift` step. When `docker build` `Recycle engine containers on image drift` step. When `docker build`
@@ -250,9 +248,7 @@ produces a new `galaxy-engine:dev` SHA, the step compares it against
every running `galaxy-game-*` container and, for each drifted one, every running `galaxy-game-*` container and, for each drifted one,
stops the backend, removes the container, wipes its bind-mounted stops the backend, removes the container, wipes its bind-mounted
state directory (Engine.Init() writes turn-0 over any pre-existing state directory (Engine.Init() writes turn-0 over any pre-existing
`turn-N` files), and cascade-deletes the lobby `games` row. The `turn-N` files), and cascade-deletes the lobby `games` row.
`dev-sandbox` bootstrap on the next backend boot finds no live
sandbox and provisions a fresh one on the new engine image.
When the engine sources are unchanged, the BuildKit cache hits and When the engine sources are unchanged, the BuildKit cache hits and
the SHA stays the same — the recycle step is a no-op and the running the SHA stays the same — the recycle step is a no-op and the running
-9
View File
@@ -127,15 +127,6 @@ services:
# bcrypt-hashed code is single-use). Set the var to an empty # bcrypt-hashed code is single-use). Set the var to an empty
# string in `.env` to disable. # string in `.env` to disable.
BACKEND_AUTH_DEV_FIXED_CODE: ${BACKEND_AUTH_DEV_FIXED_CODE:-123456} BACKEND_AUTH_DEV_FIXED_CODE: ${BACKEND_AUTH_DEV_FIXED_CODE:-123456}
# Long-lived dev environment always bootstraps the "Dev Sandbox"
# game owned by this email so a freshly redeployed stack already
# has one ready-to-play game in the lobby. Set the variable to an
# empty string in `.env` to disable the bootstrap (e.g. for a
# cold-start QA pass).
BACKEND_DEV_SANDBOX_EMAIL: ${BACKEND_DEV_SANDBOX_EMAIL:-dev@galaxy.lan}
BACKEND_DEV_SANDBOX_ENGINE_IMAGE: ${BACKEND_DEV_SANDBOX_ENGINE_IMAGE:-galaxy-engine:dev}
BACKEND_DEV_SANDBOX_ENGINE_VERSION: ${BACKEND_DEV_SANDBOX_ENGINE_VERSION:-0.1.0}
BACKEND_DEV_SANDBOX_PLAYER_COUNT: ${BACKEND_DEV_SANDBOX_PLAYER_COUNT:-20}
volumes: volumes:
- /var/run/docker.sock:/var/run/docker.sock - /var/run/docker.sock:/var/run/docker.sock
# Per-game state directories live under the same absolute path # Per-game state directories live under the same absolute path
+7 -9
View File
@@ -22,7 +22,7 @@ help:
@echo " make up Build (if needed) and bring up the stack, wait until healthy" @echo " make up Build (if needed) and bring up the stack, wait until healthy"
@echo " make down Stop compose containers, leave engines + volumes intact" @echo " make down Stop compose containers, leave engines + volumes intact"
@echo " make rebuild Force rebuild of backend / gateway images and bring up" @echo " make rebuild Force rebuild of backend / gateway images and bring up"
@echo " make build-engine Build the engine image $(ENGINE_IMAGE) used by the dev sandbox" @echo " make build-engine Build the engine image $(ENGINE_IMAGE) used by running games"
@echo " make stop-engines Stop and remove only the per-game engine containers" @echo " make stop-engines Stop and remove only the per-game engine containers"
@echo " make prune-broken-engines Remove non-running engine containers Docker can't heal (run inside 'up')" @echo " make prune-broken-engines Remove non-running engine containers Docker can't heal (run inside 'up')"
@echo " make clean Stop everything (incl. engines) and wipe volumes + game state" @echo " make clean Stop everything (incl. engines) and wipe volumes + game state"
@@ -37,8 +37,9 @@ help:
@echo " pnpm -C ui/frontend dev" @echo " pnpm -C ui/frontend dev"
@echo "and open http://localhost:5173 (UI) plus http://localhost:8025 (Mailpit)." @echo "and open http://localhost:5173 (UI) plus http://localhost:8025 (Mailpit)."
@echo "" @echo ""
@echo "Default login for the auto-provisioned dev sandbox: dev@local.test" @echo "Sign in with email-OTP; the fixed login code 123456 works when"
@echo "(see BACKEND_DEV_SANDBOX_EMAIL in .env). Login code: 123456." @echo "BACKEND_AUTH_DEV_FIXED_CODE is set in .env. No game is auto-provisioned —"
@echo "load a legacy report via the UI's DEV report loader to exercise the map."
up: build-engine prune-broken-engines up: build-engine prune-broken-engines
$(COMPOSE) up -d --wait $(COMPOSE) up -d --wait
@@ -88,12 +89,9 @@ stop-engines:
# bind-mount source and leaves it stuck in `exited` / `created` # bind-mount source and leaves it stuck in `exited` / `created`
# state. This target prunes the husks before `compose up`; the # state. This target prunes the husks before `compose up`; the
# backend's pre-bootstrap reconciler tick (`backend/cmd/backend/main.go`) # backend's pre-bootstrap reconciler tick (`backend/cmd/backend/main.go`)
# then cascades the orphan runtime row to `removed`, the lobby # then cascades the orphan runtime row to `removed` and the lobby
# cancels the game, and the dev-sandbox bootstrap purges the # cancels the game. Healthy `running` / `restarting` containers are
# cancelled tile and provisions a fresh sandbox in the same # left intact so a long-lived game survives normal up/down cycles.
# `make up` cycle. Healthy `running` / `restarting` containers are
# left intact so a long-lived sandbox survives normal up/down
# cycles.
prune-broken-engines: prune-broken-engines:
@ids=""; \ @ids=""; \
for cid in $$(docker ps -aq \ for cid in $$(docker ps -aq \
+16 -50
View File
@@ -78,49 +78,24 @@ To force the second path (no fast-bypass), edit
`make rebuild` (or simply `docker compose up -d backend` to recreate `make rebuild` (or simply `docker compose up -d backend` to recreate
the backend with the new env). the backend with the new env).
## Auto-provisioned dev sandbox ## No auto-provisioned game
`make up` provisions a private game called **Dev Sandbox** owned by `make up` brings up the stack with an empty lobby — there is no
the dev user (default `dev@local.test`). The flow is implemented in auto-provisioned game. Sign in with email-OTP (the fixed dev code
`backend/internal/devsandbox` and runs on every backend boot when `123456` works when `BACKEND_AUTH_DEV_FIXED_CODE` is set in
`BACKEND_DEV_SANDBOX_EMAIL` is non-empty in `tools/local-dev/.env`. `tools/local-dev/.env`):
Bootstrap is idempotent — re-running `make up` after a `make down`
finds the existing user, dummy participants, game, and memberships
without creating duplicates. If a previous boot crashed mid-way
(game stuck in `enrollment_open` or `ready_to_start`), the next boot
resumes the lifecycle.
To log in straight into the sandbox:
1. `make -C tools/local-dev up` 1. `make -C tools/local-dev up`
2. `pnpm -C ui/frontend dev` (in another terminal) 2. `pnpm -C ui/frontend dev` (in another terminal)
3. Open <http://localhost:5173/login>, enter `dev@local.test`, then 3. Open <http://localhost:5173/login>, enter your email, then the dev
the dev code `123456`. code `123456`.
4. The lobby shows **Dev Sandbox** in *My Games*; click in.
To disable the bootstrap, clear `BACKEND_DEV_SANDBOX_EMAIL` in To exercise the map and report views without running a full game, use
`tools/local-dev/.env` and `docker compose up -d backend` (or the UI's DEV **synthetic report loader**: convert a legacy `.REP` with
`make rebuild`). Existing users / games are not removed. `tools/local-dev/legacy-report/` and load the resulting JSON through the
loader (see that tool's README). To play a real game, create one in the
Terminal sandbox games — anything in `cancelled`, `finished`, or lobby and let the engine (`galaxy-engine:local-dev`, built by
`start_failed` — are deleted on every boot before find-or-create `make build-engine`) run it.
runs. The cascade declared in `00001_init.sql` removes the
matching memberships, applications, invites, runtime records,
and player mappings in the same write, so the dev user's lobby
shows exactly one running tile at all times. Cancelling the
sandbox manually and running `docker compose restart backend`
(or `make rebuild`) yields a fresh game without leaving dead
tiles behind.
The bootstrap requires:
- `galaxy-engine:local-dev` Docker image (`make build-engine`).
- `BACKEND_DEV_SANDBOX_ENGINE_VERSION` parses as plain semver
(`MAJOR.MINOR.PATCH`); the default `0.1.0` is what the bootstrap
registers in the `engine_versions` row that points at the image.
- `BACKEND_DEV_SANDBOX_PLAYER_COUNT` ≥ 20 (the engine's minimum;
19 deterministic dummies fill the slots so the single real user
can start the game).
- A frozen turn schedule (`0 0 1 1 *` — once a year) so the visible - A frozen turn schedule (`0 0 1 1 *` — once a year) so the visible
game state stays at turn 1 until you explicitly progress it. game state stays at turn 1 until you explicitly progress it.
@@ -239,24 +214,15 @@ make status docker compose ps
this in one cycle: `prune-broken-engines` (runs as part of `up`) this in one cycle: `prune-broken-engines` (runs as part of `up`)
removes every engine container that is not in `running` / removes every engine container that is not in `running` /
`restarting` state, the backend's pre-bootstrap reconciler tick `restarting` state, the backend's pre-bootstrap reconciler tick
cascades the orphan runtime row to `removed`, the lobby cancels cascades the orphan runtime row to `removed`, and the lobby cancels
the matching sandbox game, and the dev-sandbox bootstrap purges the matching game. To run the cleanup by hand without restarting the
the cancelled tile and provisions a fresh sandbox with a brand rest of the stack, `make prune-broken-engines`.
new state directory. To run the cleanup by hand without restarting
the rest of the stack, `make prune-broken-engines`.
The cycle relies on the backend image carrying the pre-bootstrap The cycle relies on the backend image carrying the pre-bootstrap
reconciler tick (`backend/cmd/backend/main.go`). `make up` reuses reconciler tick (`backend/cmd/backend/main.go`). `make up` reuses
the cached image, so after pulling this commit the first time you the cached image, so after pulling this commit the first time you
must `make rebuild` once to bake the fix in. Future `make up` must `make rebuild` once to bake the fix in. Future `make up`
cycles will heal in one shot. cycles will heal in one shot.
If after the heal cycle the lobby still shows only a `cancelled`
sandbox tile and no running game, the running backend image
predates the pre-bootstrap reconciler tick — the periodic ticker
cancels the orphan after bootstrap has already returned, leaving
the lobby in the half-baked state. `make rebuild` recreates the
image and then `make up` lands a fresh sandbox.
- **`make up` reports a build error mentioning `pkg/cronutil`** — - **`make up` reports a build error mentioning `pkg/cronutil`** —
upstream module list drifted; copy any new `pkg/<name>/` line into upstream module list drifted; copy any new `pkg/<name>/` line into
the local-dev `backend.Dockerfile` / `gateway.Dockerfile` to match the local-dev `backend.Dockerfile` / `gateway.Dockerfile` to match
-4
View File
@@ -122,10 +122,6 @@ services:
BACKEND_OTEL_TRACES_EXPORTER: none BACKEND_OTEL_TRACES_EXPORTER: none
BACKEND_OTEL_METRICS_EXPORTER: none BACKEND_OTEL_METRICS_EXPORTER: none
BACKEND_AUTH_DEV_FIXED_CODE: ${BACKEND_AUTH_DEV_FIXED_CODE:-} BACKEND_AUTH_DEV_FIXED_CODE: ${BACKEND_AUTH_DEV_FIXED_CODE:-}
BACKEND_DEV_SANDBOX_EMAIL: ${BACKEND_DEV_SANDBOX_EMAIL:-}
BACKEND_DEV_SANDBOX_ENGINE_IMAGE: ${BACKEND_DEV_SANDBOX_ENGINE_IMAGE:-}
BACKEND_DEV_SANDBOX_ENGINE_VERSION: ${BACKEND_DEV_SANDBOX_ENGINE_VERSION:-}
BACKEND_DEV_SANDBOX_PLAYER_COUNT: ${BACKEND_DEV_SANDBOX_PLAYER_COUNT:-}
volumes: volumes:
- /var/run/docker.sock:/var/run/docker.sock - /var/run/docker.sock:/var/run/docker.sock
# Per-game state directories live under the same absolute path # Per-game state directories live under the same absolute path
+4 -4
View File
@@ -85,16 +85,16 @@ report to fetch. Two alternatives were rejected:
- a brand-new `user.games.state` message — adds a full wire-flow - a brand-new `user.games.state` message — adds a full wire-flow
(fbs schema, transcoder, gateway routing, backend handler) for a (fbs schema, transcoder, gateway routing, backend handler) for a
one-field response; one-field response;
- hard-coding `turn=0` for all games — works for the dev sandbox - hard-coding `turn=0` for all games — works for a synthetic report
(which never advances past turn zero) but renders the initial loaded at turn zero but mis-renders the initial state for any real
state for any real game past turn zero. game past turn zero.
Extending `GameSummary` reuses the existing lobby pipeline; the Extending `GameSummary` reuses the existing lobby pipeline; the
backend already tracks `current_turn` in its runtime projection backend already tracks `current_turn` in its runtime projection
(`backend/internal/server/handlers_user_lobby_helpers.go` (`backend/internal/server/handlers_user_lobby_helpers.go`
`gameSummaryToWire` reads it from `g.RuntimeSnapshot.CurrentTurn`). `gameSummaryToWire` reads it from `g.RuntimeSnapshot.CurrentTurn`).
The `current_turn` field defaults to zero on the FB side, so existing The `current_turn` field defaults to zero on the FB side, so existing
tests and the dev sandbox flow continue to work unchanged. tests and the synthetic-report flow continue to work unchanged.
## State binding ## State binding