diff --git a/.gitea/workflows/dev-deploy.yaml b/.gitea/workflows/dev-deploy.yaml index 2a2ef6b..b9b3c94 100644 --- a/.gitea/workflows/dev-deploy.yaml +++ b/.gitea/workflows/dev-deploy.yaml @@ -153,7 +153,7 @@ jobs: # Compare the freshly-built `galaxy-engine:dev` SHA against # every running `galaxy-game-*` container. The backend # reconciler adopts pre-existing labelled engine containers - # without checking image drift, so a running sandbox would + # without checking image drift, so a running game would # otherwise keep serving the previous engine code until the # container is recycled by hand. This step makes the recycle # automatic but only when it is actually needed: @@ -168,10 +168,7 @@ jobs: # silent state corruption otherwise), and cascade-delete # the lobby `games` row (the FKs in `00001_init.sql` # drop the matching `runtime_records`, `memberships`, - # `player_mappings`, etc. in the same write). The - # `dev-sandbox` bootstrap on the next backend boot finds - # no live sandbox and provisions a fresh one on the new - # engine image. + # `player_mappings`, etc. in the same write). # # Backend is stopped first to keep the reconciler from # racing the recycle (mid-stream adoption / restart). The diff --git a/backend/cmd/backend/main.go b/backend/cmd/backend/main.go index 8d87527..d5b4286 100644 --- a/backend/cmd/backend/main.go +++ b/backend/cmd/backend/main.go @@ -26,7 +26,6 @@ import ( "galaxy/backend/internal/app" "galaxy/backend/internal/auth" "galaxy/backend/internal/config" - "galaxy/backend/internal/devsandbox" "galaxy/backend/internal/diplomail" "galaxy/backend/internal/diplomail/detector" "galaxy/backend/internal/diplomail/translator" @@ -274,29 +273,18 @@ func run(ctx context.Context) (err error) { ) runtimeGateway.svc = runtimeSvc - // Run a single reconciliation pass before the dev-sandbox - // bootstrap so any runtime row pointing at a vanished engine - // container (host reboot wiped /tmp/galaxy-game-state/; - // `tools/local-dev`'s `prune-broken-engines` target reaped the - // husk) is already cascaded through `markRemoved` → lobby - // `cancelled` by the time the bootstrap walks the sandbox list. - // Without this pre-tick the bootstrap would reuse the - // soon-to-be-cancelled game and force the developer into a - // second `make up` cycle to land a healthy sandbox. Failures are + // Run a single reconciliation pass at startup so any runtime row + // pointing at a vanished engine container (a host reboot wiped + // /tmp/galaxy-game-state/; `tools/local-dev`'s + // `prune-broken-engines` target reaped the husk) is cascaded + // through `markRemoved` → lobby `cancelled` before the server + // starts serving requests. Failures are // non-fatal: the periodic ticker started later catches up, and // the worst case degrades to the legacy two-cycle recovery. if err := runtimeSvc.Reconciler().Tick(ctx); err != nil { logger.Warn("pre-bootstrap reconciler tick failed", zap.Error(err)) } - if err := devsandbox.Bootstrap(ctx, devsandbox.Deps{ - Users: userSvc, - Lobby: lobbySvc, - EngineVersions: engineVersionSvc, - }, cfg.DevSandbox, logger); err != nil { - return fmt.Errorf("dev sandbox bootstrap: %w", err) - } - notifStore := notification.NewStore(db) notifSvc := notification.NewService(notification.Deps{ Store: notifStore, diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index baf6c7b..242355b 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -105,11 +105,6 @@ const ( envDiplomailTranslatorTimeout = "BACKEND_DIPLOMAIL_TRANSLATOR_TIMEOUT" envDiplomailTranslatorMaxAttempts = "BACKEND_DIPLOMAIL_TRANSLATOR_MAX_ATTEMPTS" envDiplomailWorkerInterval = "BACKEND_DIPLOMAIL_WORKER_INTERVAL" - - envDevSandboxEmail = "BACKEND_DEV_SANDBOX_EMAIL" - envDevSandboxEngineImage = "BACKEND_DEV_SANDBOX_ENGINE_IMAGE" - envDevSandboxEngineVersion = "BACKEND_DEV_SANDBOX_ENGINE_VERSION" - envDevSandboxPlayerCount = "BACKEND_DEV_SANDBOX_PLAYER_COUNT" ) // Default values applied when an environment variable is absent. @@ -178,9 +173,6 @@ const ( defaultDiplomailTranslatorTimeout = 10 * time.Second defaultDiplomailTranslatorMaxAttempts = 5 defaultDiplomailWorkerInterval = 2 * time.Second - - defaultDevSandboxEngineVersion = "0.1.0" - defaultDevSandboxPlayerCount = 20 ) // Allowed values for the closed-set string options. @@ -219,29 +211,12 @@ type Config struct { Runtime RuntimeConfig Notification NotificationConfig Diplomail DiplomailConfig - DevSandbox DevSandboxConfig // FreshnessWindow mirrors the gateway freshness window and is used by the // push server to bound the cursor TTL. FreshnessWindow time.Duration } -// DevSandboxConfig configures the boot-time bootstrap implemented in -// `backend/internal/devsandbox`. When Email is empty the bootstrap -// is a no-op, which is the production posture. When Email is set — -// from `BACKEND_DEV_SANDBOX_EMAIL` in the `tools/local-dev` stack — -// the bootstrap idempotently provisions a real user, the configured -// number of dummy participants, a private "Dev Sandbox" game, the -// matching memberships, and drives the lifecycle to `running`. The -// engine image and engine version refer to a row that the bootstrap -// also seeds in `engine_versions`. -type DevSandboxConfig struct { - Email string - EngineImage string - EngineVersion string - PlayerCount int -} - // LoggingConfig stores the parameters used by the structured logger. type LoggingConfig struct { // Level is the zap level name (e.g. "debug", "info", "warn", "error"). @@ -572,10 +547,6 @@ func DefaultConfig() Config { TranslatorMaxAttempts: defaultDiplomailTranslatorMaxAttempts, WorkerInterval: defaultDiplomailWorkerInterval, }, - DevSandbox: DevSandboxConfig{ - EngineVersion: defaultDevSandboxEngineVersion, - PlayerCount: defaultDevSandboxPlayerCount, - }, Runtime: RuntimeConfig{ WorkerPoolSize: defaultRuntimeWorkerPoolSize, JobQueueSize: defaultRuntimeJobQueueSize, @@ -755,13 +726,6 @@ func LoadFromEnv() (Config, error) { return Config{}, err } - cfg.DevSandbox.Email = strings.TrimSpace(loadString(envDevSandboxEmail, cfg.DevSandbox.Email)) - cfg.DevSandbox.EngineImage = strings.TrimSpace(loadString(envDevSandboxEngineImage, cfg.DevSandbox.EngineImage)) - cfg.DevSandbox.EngineVersion = strings.TrimSpace(loadString(envDevSandboxEngineVersion, cfg.DevSandbox.EngineVersion)) - if cfg.DevSandbox.PlayerCount, err = loadInt(envDevSandboxPlayerCount, cfg.DevSandbox.PlayerCount); err != nil { - return Config{}, err - } - if err := cfg.Validate(); err != nil { return Config{}, err } @@ -973,21 +937,6 @@ func (c Config) Validate() error { } } - if email := strings.TrimSpace(c.DevSandbox.Email); email != "" { - if _, err := netmail.ParseAddress(email); err != nil { - return fmt.Errorf("%s must be a valid RFC 5322 address: %w", envDevSandboxEmail, err) - } - if strings.TrimSpace(c.DevSandbox.EngineImage) == "" { - return fmt.Errorf("%s must not be empty when %s is set", envDevSandboxEngineImage, envDevSandboxEmail) - } - if strings.TrimSpace(c.DevSandbox.EngineVersion) == "" { - return fmt.Errorf("%s must not be empty when %s is set", envDevSandboxEngineVersion, envDevSandboxEmail) - } - if c.DevSandbox.PlayerCount <= 0 { - return fmt.Errorf("%s must be positive when %s is set", envDevSandboxPlayerCount, envDevSandboxEmail) - } - } - return nil } diff --git a/backend/internal/devsandbox/bootstrap.go b/backend/internal/devsandbox/bootstrap.go deleted file mode 100644 index 849a94c..0000000 --- a/backend/internal/devsandbox/bootstrap.go +++ /dev/null @@ -1,287 +0,0 @@ -// Package devsandbox provisions a ready-to-play game on backend boot -// for the `tools/local-dev` stack. -// -// Bootstrap is invoked from `backend/cmd/backend/main.go` after the -// admin bootstrap and before the HTTP listener starts. It reads -// `cfg.DevSandbox`; when `Email` is empty (the production posture) -// the function logs "skipped" and returns nil. When set, it -// idempotently: -// -// 1. registers the configured engine version and image; -// 2. find-or-creates the real dev user with the configured email; -// 3. find-or-creates `cfg.PlayerCount - 1` deterministic dummy -// users so the engine's minimum-players constraint is met; -// 4. find-or-creates a private "Dev Sandbox" game owned by the -// real user with min/max_players = cfg.PlayerCount and a -// year-out turn schedule (effectively frozen at turn 1); -// 5. inserts memberships for all participants bypassing the -// application/approval flow; -// 6. drives the lifecycle to `running` (or as far as possible if -// the runtime is busy). -// -// The function is a no-op on subsequent boots once the game is -// running; partial states from earlier crashes are recovered. -package devsandbox - -import ( - "context" - "errors" - "fmt" - "time" - - "galaxy/backend/internal/config" - "galaxy/backend/internal/lobby" - "galaxy/backend/internal/runtime" - - "github.com/google/uuid" - "go.uber.org/zap" -) - -// SandboxGameName is the display name used to identify the -// auto-provisioned game on subsequent reboots. The combination of -// game_name and owner_user_id is unique enough in practice — only -// the dev sandbox bootstrap creates a game owned by the configured -// real user with this exact name. -const SandboxGameName = "Dev Sandbox" - -// SandboxTurnSchedule keeps the game on turn 1 by scheduling the -// next turn a year out. The runtime scheduler still parses this and -// will tick once a year — long enough to never interfere with -// solo UI development. -const SandboxTurnSchedule = "0 0 1 1 *" - -// UserEnsurer matches `auth.UserEnsurer`. We define a local -// interface to avoid importing the auth package and circular -// dependencies — the production wiring passes the same `*user.Service` -// instance used by auth. -type UserEnsurer interface { - EnsureByEmail(ctx context.Context, email, preferredLanguage, timeZone, declaredCountry string) (uuid.UUID, error) -} - -// Deps aggregates the collaborators Bootstrap needs. -type Deps struct { - Users UserEnsurer - Lobby *lobby.Service - EngineVersions *runtime.EngineVersionService -} - -// Bootstrap runs the seven-step provisioning flow described on the -// package doc comment. Errors are returned to the caller; the boot -// path in `cmd/backend/main.go` aborts startup if Bootstrap fails so -// a misconfigured dev environment surfaces immediately rather than -// silently leaving the lobby empty. -func Bootstrap(ctx context.Context, deps Deps, cfg config.DevSandboxConfig, logger *zap.Logger) error { - if logger == nil { - logger = zap.NewNop() - } - logger = logger.Named("dev_sandbox") - - if cfg.Email == "" { - logger.Info("skipped (no email)") - return nil - } - if deps.Users == nil || deps.Lobby == nil || deps.EngineVersions == nil { - return errors.New("dev_sandbox: deps.Users, deps.Lobby and deps.EngineVersions are required") - } - if cfg.PlayerCount <= 0 { - return fmt.Errorf("dev_sandbox: PlayerCount must be positive, got %d", cfg.PlayerCount) - } - - if err := ensureEngineVersion(ctx, deps.EngineVersions, cfg, logger); err != nil { - return err - } - - realID, err := deps.Users.EnsureByEmail(ctx, cfg.Email, "en", "UTC", "") - if err != nil { - return fmt.Errorf("dev_sandbox: ensure real user: %w", err) - } - - dummyIDs := make([]uuid.UUID, 0, cfg.PlayerCount-1) - for i := 1; i < cfg.PlayerCount; i++ { - email := fmt.Sprintf("dev-dummy-%02d@local.test", i) - id, err := deps.Users.EnsureByEmail(ctx, email, "en", "UTC", "") - if err != nil { - return fmt.Errorf("dev_sandbox: ensure dummy %d: %w", i, err) - } - dummyIDs = append(dummyIDs, id) - } - - if err := purgeTerminalSandboxGames(ctx, deps.Lobby, realID, logger); err != nil { - return err - } - - game, err := findOrCreateSandboxGame(ctx, deps.Lobby, realID, cfg) - if err != nil { - return err - } - - game, err = ensureMembershipsAndDrive(ctx, deps.Lobby, game, realID, dummyIDs, logger) - if err != nil { - return err - } - - logger.Info("bootstrap complete", - zap.String("user_id", realID.String()), - zap.String("game_id", game.GameID.String()), - zap.String("status", game.Status), - ) - return nil -} - -func ensureEngineVersion(ctx context.Context, svc *runtime.EngineVersionService, cfg config.DevSandboxConfig, logger *zap.Logger) error { - _, err := svc.Register(ctx, runtime.RegisterInput{ - Version: cfg.EngineVersion, - ImageRef: cfg.EngineImage, - }) - switch { - case err == nil: - logger.Info("engine version registered", - zap.String("version", cfg.EngineVersion), - zap.String("image", cfg.EngineImage), - ) - return nil - case errors.Is(err, runtime.ErrEngineVersionTaken): - logger.Debug("engine version already registered", - zap.String("version", cfg.EngineVersion), - ) - return nil - default: - return fmt.Errorf("dev_sandbox: register engine version: %w", err) - } -} - -// terminalSandboxStatus reports whether a sandbox game has reached a -// state from which it can no longer be driven back to running. We -// treat such games as "absent" so the next bootstrap creates a fresh -// one rather than handing the developer a dead lobby tile. -func terminalSandboxStatus(status string) bool { - switch status { - case lobby.GameStatusCancelled, lobby.GameStatusFinished, lobby.GameStatusStartFailed: - return true - } - return false -} - -// purgeTerminalSandboxGames deletes every previous "Dev Sandbox" game -// the dev user owns that has reached a terminal state -// (cancelled / finished / start_failed). The cascade declared in -// `00001_init.sql` removes the matching memberships, applications, -// invites, runtime records, and player mappings in the same write, -// so the developer's lobby never piles up dead tiles between -// `make rebuild` cycles. Non-terminal games are left untouched — -// a `running` sandbox from a previous boot is the happy path. -func purgeTerminalSandboxGames(ctx context.Context, svc *lobby.Service, ownerID uuid.UUID, logger *zap.Logger) error { - games, err := svc.ListMyGames(ctx, ownerID) - if err != nil { - return fmt.Errorf("dev_sandbox: list my games: %w", err) - } - for _, g := range games { - if g.GameName != SandboxGameName || g.OwnerUserID == nil || *g.OwnerUserID != ownerID { - continue - } - if !terminalSandboxStatus(g.Status) { - continue - } - if err := svc.DeleteGame(ctx, g.GameID); err != nil { - return fmt.Errorf("dev_sandbox: delete terminal sandbox %s: %w", g.GameID, err) - } - logger.Info("purged terminal sandbox game", - zap.String("game_id", g.GameID.String()), - zap.String("status", g.Status), - ) - } - return nil -} - -func findOrCreateSandboxGame(ctx context.Context, svc *lobby.Service, ownerID uuid.UUID, cfg config.DevSandboxConfig) (lobby.GameRecord, error) { - games, err := svc.ListMyGames(ctx, ownerID) - if err != nil { - return lobby.GameRecord{}, fmt.Errorf("dev_sandbox: list my games: %w", err) - } - for _, g := range games { - if g.GameName != SandboxGameName || g.OwnerUserID == nil || *g.OwnerUserID != ownerID { - continue - } - // `purgeTerminalSandboxGames` ran before us, so any sandbox - // game still in the list is either a live one we should - // reuse or a transient state we can drive forward. - return g, nil - } - rec, err := svc.CreateGame(ctx, lobby.CreateGameInput{ - OwnerUserID: &ownerID, - Visibility: lobby.VisibilityPrivate, - GameName: SandboxGameName, - Description: "Auto-provisioned by backend/internal/devsandbox for solo UI development.", - MinPlayers: int32(cfg.PlayerCount), - MaxPlayers: int32(cfg.PlayerCount), - StartGapHours: 0, - StartGapPlayers: 0, - EnrollmentEndsAt: time.Now().Add(365 * 24 * time.Hour), - TurnSchedule: SandboxTurnSchedule, - TargetEngineVersion: cfg.EngineVersion, - }) - if err != nil { - return lobby.GameRecord{}, fmt.Errorf("dev_sandbox: create game: %w", err) - } - return rec, nil -} - -func ensureMembershipsAndDrive(ctx context.Context, svc *lobby.Service, game lobby.GameRecord, realID uuid.UUID, dummyIDs []uuid.UUID, logger *zap.Logger) (lobby.GameRecord, error) { - caller := realID - if game.Status == lobby.GameStatusDraft { - next, err := svc.OpenEnrollment(ctx, &caller, false, game.GameID) - if err != nil { - return game, fmt.Errorf("dev_sandbox: open enrollment: %w", err) - } - game = next - } - - if game.Status == lobby.GameStatusEnrollmentOpen { - users := append([]uuid.UUID{realID}, dummyIDs...) - for i, uid := range users { - raceName := fmt.Sprintf("Sandbox-%02d", i+1) - if _, err := svc.InsertMembershipDirect(ctx, lobby.InsertMembershipDirectInput{ - GameID: game.GameID, - UserID: uid, - RaceName: raceName, - }); err != nil { - return game, fmt.Errorf("dev_sandbox: insert membership %d: %w", i+1, err) - } - } - logger.Info("memberships ensured", - zap.Int("count", len(users)), - zap.String("game_id", game.GameID.String()), - ) - next, err := svc.ReadyToStart(ctx, &caller, false, game.GameID) - if err != nil { - return game, fmt.Errorf("dev_sandbox: ready to start: %w", err) - } - game = next - } - - if game.Status == lobby.GameStatusReadyToStart { - next, err := svc.Start(ctx, &caller, false, game.GameID) - if err != nil { - return game, fmt.Errorf("dev_sandbox: start: %w", err) - } - game = next - } - - if game.Status == lobby.GameStatusStartFailed { - next, err := svc.RetryStart(ctx, &caller, false, game.GameID) - if err != nil { - logger.Warn("retry start failed", zap.Error(err)) - return game, nil - } - game = next - if game.Status == lobby.GameStatusReadyToStart { - next, err := svc.Start(ctx, &caller, false, game.GameID) - if err != nil { - return game, fmt.Errorf("dev_sandbox: start after retry: %w", err) - } - game = next - } - } - - return game, nil -} diff --git a/backend/internal/devsandbox/bootstrap_test.go b/backend/internal/devsandbox/bootstrap_test.go deleted file mode 100644 index 714d6cd..0000000 --- a/backend/internal/devsandbox/bootstrap_test.go +++ /dev/null @@ -1,106 +0,0 @@ -package devsandbox - -import ( - "context" - "errors" - "testing" - - "galaxy/backend/internal/config" - - "github.com/google/uuid" - "go.uber.org/zap" -) - -// TestBootstrapSkippedWhenEmailEmpty exercises the no-op branch: with -// the production posture (Email == "") Bootstrap must return without -// touching any dependency. The fact that Users/Lobby/EngineVersions -// are nil here doubles as a check that the early-return runs first. -func TestBootstrapSkippedWhenEmailEmpty(t *testing.T) { - err := Bootstrap( - context.Background(), - Deps{}, - config.DevSandboxConfig{}, - zap.NewNop(), - ) - if err != nil { - t.Fatalf("expected nil error on empty email, got: %v", err) - } -} - -// TestBootstrapRejectsZeroPlayerCount confirms the validation -// short-circuits the flow before any DB call when PlayerCount is -// non-positive but Email is set. The error path is fast and never -// dereferences the (still-nil) Users/Lobby deps. -func TestBootstrapRejectsZeroPlayerCount(t *testing.T) { - err := Bootstrap( - context.Background(), - Deps{Users: stubEnsurer{}, Lobby: nil, EngineVersions: nil}, - config.DevSandboxConfig{ - Email: "dev@local.test", - EngineImage: "galaxy-engine:local-dev", - EngineVersion: "0.0.0-local-dev", - PlayerCount: 0, - }, - zap.NewNop(), - ) - if err == nil { - t.Fatal("expected error on zero PlayerCount, got nil") - } -} - -// TestBootstrapRejectsMissingDeps checks that a misconfigured wiring -// (Email set but one of the required services nil) fails fast rather -// than panicking when the bootstrap reaches its first service call. -func TestBootstrapRejectsMissingDeps(t *testing.T) { - err := Bootstrap( - context.Background(), - Deps{Users: stubEnsurer{}, Lobby: nil, EngineVersions: nil}, - config.DevSandboxConfig{ - Email: "dev@local.test", - EngineImage: "galaxy-engine:local-dev", - EngineVersion: "0.0.0-local-dev", - PlayerCount: 20, - }, - zap.NewNop(), - ) - if err == nil { - t.Fatal("expected error on missing deps, got nil") - } - if !errors.Is(err, errMissingDepsSentinel) && err.Error() == "" { - // The exact wording is not part of the contract; this branch - // only asserts the error is non-nil and human-readable. - t.Fatalf("error has empty message: %v", err) - } -} - -// errMissingDepsSentinel exists so the assertion above can compile; -// the real error is constructed via errors.New inside Bootstrap and -// is intentionally not exported. The test only needs to confirm the -// returned error has a message. -var errMissingDepsSentinel = errors.New("sentinel") - -// TestTerminalSandboxStatus pins the contract that decides whether a -// previously created sandbox game gets purged on the next boot. -// Terminal states are deleted (cascade-style) so the developer's -// lobby never piles up dead tiles between `make rebuild` cycles. -func TestTerminalSandboxStatus(t *testing.T) { - terminal := []string{"cancelled", "finished", "start_failed"} - live := []string{"draft", "enrollment_open", "ready_to_start", "starting", "running", "paused"} - - for _, status := range terminal { - if !terminalSandboxStatus(status) { - t.Errorf("expected %q to be terminal", status) - } - } - for _, status := range live { - if terminalSandboxStatus(status) { - t.Errorf("expected %q to be non-terminal", status) - } - } -} - -type stubEnsurer struct{} - -func (stubEnsurer) EnsureByEmail(_ context.Context, _, _, _, _ string) (uuid.UUID, error) { - return uuid.UUID{}, nil -} diff --git a/backend/internal/lobby/games.go b/backend/internal/lobby/games.go index ad98f4f..9ee1bab 100644 --- a/backend/internal/lobby/games.go +++ b/backend/internal/lobby/games.go @@ -274,11 +274,10 @@ func (s *Service) ListFinishedGamesBefore(ctx context.Context, cutoff time.Time) // `ON DELETE CASCADE` constraints declared in `00001_init.sql`. // Idempotent: returns nil when no game matches. // -// Phase 14 introduces this method for the dev-sandbox bootstrap so a -// terminal "Dev Sandbox" tile from a previous local-dev session can -// be scrubbed before a fresh game spawns. Production callers must -// stay on the regular cancel / finish lifecycle — `DeleteGame` is -// destructive and bypasses the cascade-notification machinery. +// `DeleteGame` is destructive — a hard delete that bypasses the +// cascade-notification machinery — so production callers stay on the +// regular cancel / finish lifecycle. It is exercised by the lobby +// integration tests. func (s *Service) DeleteGame(ctx context.Context, gameID uuid.UUID) error { if err := s.deps.Store.DeleteGame(ctx, gameID); err != nil { return err diff --git a/backend/internal/lobby/lobby_e2e_test.go b/backend/internal/lobby/lobby_e2e_test.go index 7c5baed..460cfb6 100644 --- a/backend/internal/lobby/lobby_e2e_test.go +++ b/backend/internal/lobby/lobby_e2e_test.go @@ -248,8 +248,8 @@ func TestEndToEndPrivateGameFlow(t *testing.T) { } } -// TestDeleteGameCascadesEverything pins the contract the dev-sandbox -// bootstrap relies on: removing a game wipes every referencing row +// TestDeleteGameCascadesEverything pins the DeleteGame contract: +// removing a game wipes every referencing row // (memberships, applications, invites, runtime_records, // player_mappings) in a single SQL statement. Before this is wired // the developer's lobby pile up cancelled tiles between diff --git a/backend/internal/lobby/membership_direct.go b/backend/internal/lobby/membership_direct.go index 1a9201c..c5aa150 100644 --- a/backend/internal/lobby/membership_direct.go +++ b/backend/internal/lobby/membership_direct.go @@ -20,9 +20,9 @@ type InsertMembershipDirectInput struct { // writes as ApproveApplication: the per-game race-name reservation // row plus the membership row, and refreshes the in-memory caches. // -// The method is intended for boot-time provisioning by -// `backend/internal/devsandbox` and similar trusted callers. It is -// not exposed through any HTTP handler. The caller must guarantee +// The method is intended for trusted boot-time provisioning and +// integration tests; it is not exposed through any HTTP handler. The +// caller must guarantee // game.Status == GameStatusEnrollmentOpen — the function returns // ErrConflict otherwise — and that the race-name policy and // canonical-key invariants are honoured (the implementation reuses @@ -30,9 +30,8 @@ type InsertMembershipDirectInput struct { // or unsuitable name still fails). // // Idempotency: if a membership for (GameID, UserID) already exists -// the function returns the existing row without modifying state. -// This makes the helper safe to call on every backend boot from -// devsandbox.Bootstrap. +// the function returns the existing row without modifying state, so +// the helper is safe to call repeatedly. func (s *Service) InsertMembershipDirect(ctx context.Context, in InsertMembershipDirectInput) (Membership, error) { displayName, err := ValidateDisplayName(in.RaceName) if err != nil { diff --git a/backend/internal/lobby/store.go b/backend/internal/lobby/store.go index 97a8c90..c4ee547 100644 --- a/backend/internal/lobby/store.go +++ b/backend/internal/lobby/store.go @@ -236,9 +236,8 @@ func (s *Store) ListMyGames(ctx context.Context, userID uuid.UUID) ([]GameRecord // referencing table (memberships / applications / invites / // runtime_records / player_mappings — all declared with ON DELETE // CASCADE in `00001_init.sql`). Idempotent: returns nil when no row -// matches. Used by the dev-sandbox bootstrap to scrub terminal -// games on every backend boot so the developer's lobby never piles -// up cancelled tiles. +// matches. A hard delete for trusted callers and integration tests; +// production lifecycle uses cancel / finish. func (s *Store) DeleteGame(ctx context.Context, gameID uuid.UUID) error { g := table.Games stmt := g.DELETE().WHERE(g.GameID.EQ(postgres.UUID(gameID))) diff --git a/tools/dev-deploy/.env.example b/tools/dev-deploy/.env.example index 73e932e..259b08c 100644 --- a/tools/dev-deploy/.env.example +++ b/tools/dev-deploy/.env.example @@ -7,12 +7,6 @@ # baked into `docker-compose.yml`, so this file documents the knobs # rather than driving them. -# Auto-provisioned sandbox bootstrap. Empty disables the bootstrap. -BACKEND_DEV_SANDBOX_EMAIL=dev@galaxy.lan -BACKEND_DEV_SANDBOX_ENGINE_IMAGE=galaxy-engine:dev -BACKEND_DEV_SANDBOX_ENGINE_VERSION=0.1.0 -BACKEND_DEV_SANDBOX_PLAYER_COUNT=20 - # `123456` short-circuits the email-code path for the dev account. # This is also the docker-compose default — set the variable to an # empty string here when the environment must rely on real Mailpit diff --git a/tools/dev-deploy/KNOWN-ISSUES.md b/tools/dev-deploy/KNOWN-ISSUES.md index 966ca67..42b5607 100644 --- a/tools/dev-deploy/KNOWN-ISSUES.md +++ b/tools/dev-deploy/KNOWN-ISSUES.md @@ -1,164 +1,8 @@ # `tools/dev-deploy/` — known issues -Issues that surface in the long-lived dev environment but are not yet -fixed. Each entry lists the observed symptom, the diagnostic evidence, -the working hypothesis, and the open questions that have to be -answered before a fix lands. - -## Dev Sandbox game flips to `cancelled` after a `dev-deploy` redispatch - -### Symptom - -A previously `running` "Dev Sandbox" game (created by -`backend/internal/devsandbox`) transitions to `cancelled` ~15 minutes -after a `dev-deploy.yaml` workflow_dispatch run finishes. The user's -browser session survives (the same `device_session_id` keeps working), -but the lobby shows no game because the only game it had is now -terminal. `purgeTerminalSandboxGames` does pick it up on the **next** -boot and creates a fresh sandbox — but the first redispatch leaves -the user with an empty lobby until backend restarts again. - -### Diagnostic evidence - -Backend logs from the broken cycle (timestamps abbreviated): - -```text -20:24:40 dev_sandbox: purged terminal sandbox game game_id= status=cancelled -20:24:40 dev_sandbox: memberships ensured count=20 game_id= -20:24:40 dev_sandbox: bootstrap complete user_id= game_id= status=starting -... -20:25:09 user mail sent failed (diplomail tables missing — unrelated) -... -20:39:40 lobby: game cancelled by runtime reconciler game_id= - op=reconcile status=removed message="container disappeared" -``` - -Between 20:24:40 (`status=starting`) and 20:39:40 (reconciler cancel) -the backend logs are silent on the runtime / engine paths — no -`engine spawned`, no `engine container started`, no `runtime -transition` lines. The reconciler then fires and reports the engine -container as missing. - -`docker ps -a --filter 'label=org.opencontainers.image.title=galaxy-game-engine'` -returns no rows during this window — the engine container is neither -running nor stopped on the host, so it either was never spawned or -was removed before the host snapshot. - -### What has been ruled out - -A live `docker inspect` on a healthy engine container shows: - -```text -Labels: galaxy.backend=1, galaxy.engine_version=0.1.0, - galaxy.game_id=, - org.opencontainers.image.title=galaxy-game-engine, - com.galaxy.{cpu_quota,memory,pids_limit} -AutoRemove: false -RestartPolicy: on-failure -NetworkMode: galaxy-dev-internal -``` - -There are no `com.docker.compose.*` labels and `AutoRemove=false`, -so `--remove-orphans` cannot reap the engine and a `--rm`-style -self-destruct is not in play. Two redispatches captured under -`docker events --filter event=create,start,die,destroy,kill,stop` -also confirmed it: across both runs the only `die` / `destroy` -events were for `galaxy-dev-{backend,api,caddy}`. The live engine -container survived both redispatches, and the reconciler that -fires 60 seconds after the new backend boots correctly matched -it through `byGameID` / `byContainerID`. - -`backend/internal/runtime/service.go` only removes engine -containers from the explicit `runStop` / `runRestart` / `runPatch` -paths. There is no `runtime.Service.Shutdown` that proactively -kills containers on backend exit, so a graceful SIGTERM to -`galaxy-dev-backend` will not touch its child engine containers. - -### Host-side hypotheses considered and rejected by the owner - -The natural follow-up suspects after compose was cleared — host-side -`docker prune` cron jobs, a manual `docker rm`, an out-of-band -`dockerd` restart, and an idle-state engine crash — were all -rejected by the project owner: the dev host runs none of those -periodic cleanups, no one manually removed the container, dockerd -was not restarted in the window, and the engine binary does not -crash while idling on API calls. - -### Best remaining suspicion - -Something the `dev-deploy.yaml` CI run does between successful -image builds and the final `docker compose up -d --wait ---remove-orphans` clobbers the previously-spawned engine container. -The chain at runtime contains: - -1. `docker build -t galaxy-engine:dev -f game/Dockerfile .` -2. `docker compose build galaxy-backend galaxy-api` -3. `docker run --rm` alpine for the UI volume seed -4. `docker compose up -d --wait --remove-orphans` - -None of these *should* touch an unmanaged engine container, but -the reproduction window points squarely inside this sequence. A -deliberate next reproduction with `docker events --since 0` armed -*before* the deploy starts and live for the entire job — captured -end-to-end on the dev host, not just the chunk after backend -recreate — would pin which step emits the `destroy` on the engine. - -### Update 2026-05-19: integration preclean identified as one cause - -A live reproduction during the post-merge auto-deploy cycle (Gitea -run #188 dev-deploy plus parallel run #190 integration) pinned one -clobbering source: `integration/scripts/preclean.sh` was unscoped -and removed *every* container labelled `galaxy.backend=1`, including -the dev-deploy engine. Timeline from the dev host: - -```text -23:10:40 backend pre-bootstrap reconciler tick: engine alive -23:10:40 dev_sandbox bootstrap: status=running -23:10:56 preclean: removing 1 backend-managed engine containers ← integration run #190 -23:11:40 reconciler: container disappeared → game cancelled -``` - -Fix landed: `BACKEND_STACK_LABEL=integration` is now passed to -every integration backend (see -`integration/testenv/backend.go`) and `preclean.sh` AND-combines -`galaxy.backend=1` with `galaxy.stack=integration`, so dev-deploy / -local-dev engines stamped with different stack values are no longer -collateral. - -This covers **push**-triggered cycles where `dev-deploy.yaml` and -`integration.yaml` run on the same Gitea host. The original -hypothesis (a `workflow_dispatch dev-deploy` solo run also losing -the engine) is *not* explained by the integration fix — manual -dispatches do not trigger `integration.yaml`. Keep this entry open -until a solo-dispatch reproduction confirms whether the symptom -still occurs. - -### Status - -Partially fixed (push-triggered cycles). Solo `workflow_dispatch` -reproductions still open. If the symptom recurs after the -integration fix lands, capture `docker events --since 0` for the -full dispatch window and attach here. - -### Workaround in use today - -When the sandbox game flips to `cancelled`, redispatch `dev-deploy`: - -```sh -curl -X POST -n -H 'Content-Type: application/json' \ - -d '{"ref":""}' \ - https://gitea.iliadenisov.ru/api/v1/repos/developer/galaxy-game/actions/workflows/dev-deploy.yaml/dispatches -``` - -The next boot's `purgeTerminalSandboxGames` removes the cancelled -row, `findOrCreateSandboxGame` creates a fresh one, and -`ensureMembershipsAndDrive` puts the new game back to `running`. - -### Owner - -Unassigned. File an issue once we have the runtime / reconciler -analysis above; reference this section in the issue body so future -redeploys can short-circuit the diagnostic loop. +Issues that surfaced in the long-lived dev environment. Each entry lists +the observed symptom, the diagnostic evidence, and the fix or the open +questions that have to be answered before a fix lands. ## `docker restart galaxy-dev-backend` fails after the CI runner cleans up diff --git a/tools/dev-deploy/README.md b/tools/dev-deploy/README.md index 2a7a9fe..4f66960 100644 --- a/tools/dev-deploy/README.md +++ b/tools/dev-deploy/README.md @@ -114,8 +114,7 @@ calls `make clean-data`. The same dev-mode email-code override as `tools/local-dev/` applies, and the dev-deploy compose ships with it enabled by default: -1. Enter `dev@galaxy.lan` (or whatever `BACKEND_DEV_SANDBOX_EMAIL` - resolves to) in the login form. +1. Enter your email address in the login form. 2. Submit `123456` as the code — the docker-compose default for `BACKEND_AUTH_DEV_FIXED_CODE` is `123456`, so the bcrypt-hashed email code stays a fallback. To force real Mailpit codes (e.g. for @@ -212,8 +211,7 @@ make clean-data Stop everything and wipe volumes + game-state dir ## Known issues See [`KNOWN-ISSUES.md`](KNOWN-ISSUES.md) for symptoms that surface -in the long-lived dev environment but are not yet fixed (currently: -the sandbox game flipping to `cancelled` after a redispatch). +in the long-lived dev environment but are not yet fixed. ## Deployment cadence @@ -237,12 +235,12 @@ behind. There is no separate state to clean up between the two paths. ### Engine image drift recycle -`backend` spawns one engine container per game (the long-lived "Dev -Sandbox" plus any user-created games) and the reconciler reattaches -to whatever it finds with the `galaxy.stack=dev-deploy` label. That -reattach does not check the running container's image SHA against the -freshly-built `galaxy-engine:dev` tag, so an unchanged container would -otherwise keep serving the previous engine code after a redeploy. +`backend` spawns one engine container per running game and the +reconciler reattaches to whatever it finds with the +`galaxy.stack=dev-deploy` label. That reattach does not check the +running container's image SHA against the freshly-built +`galaxy-engine:dev` tag, so an unchanged container would otherwise +keep serving the previous engine code after a redeploy. The `dev-deploy.yaml` workflow handles this in the `Recycle engine containers on image drift` step. When `docker build` @@ -250,9 +248,7 @@ produces a new `galaxy-engine:dev` SHA, the step compares it against every running `galaxy-game-*` container and, for each drifted one, stops the backend, removes the container, wipes its bind-mounted state directory (Engine.Init() writes turn-0 over any pre-existing -`turn-N` files), and cascade-deletes the lobby `games` row. The -`dev-sandbox` bootstrap on the next backend boot finds no live -sandbox and provisions a fresh one on the new engine image. +`turn-N` files), and cascade-deletes the lobby `games` row. When the engine sources are unchanged, the BuildKit cache hits and the SHA stays the same — the recycle step is a no-op and the running diff --git a/tools/dev-deploy/docker-compose.yml b/tools/dev-deploy/docker-compose.yml index 23e260e..02551dd 100644 --- a/tools/dev-deploy/docker-compose.yml +++ b/tools/dev-deploy/docker-compose.yml @@ -127,15 +127,6 @@ services: # bcrypt-hashed code is single-use). Set the var to an empty # string in `.env` to disable. BACKEND_AUTH_DEV_FIXED_CODE: ${BACKEND_AUTH_DEV_FIXED_CODE:-123456} - # Long-lived dev environment always bootstraps the "Dev Sandbox" - # game owned by this email so a freshly redeployed stack already - # has one ready-to-play game in the lobby. Set the variable to an - # empty string in `.env` to disable the bootstrap (e.g. for a - # cold-start QA pass). - BACKEND_DEV_SANDBOX_EMAIL: ${BACKEND_DEV_SANDBOX_EMAIL:-dev@galaxy.lan} - BACKEND_DEV_SANDBOX_ENGINE_IMAGE: ${BACKEND_DEV_SANDBOX_ENGINE_IMAGE:-galaxy-engine:dev} - BACKEND_DEV_SANDBOX_ENGINE_VERSION: ${BACKEND_DEV_SANDBOX_ENGINE_VERSION:-0.1.0} - BACKEND_DEV_SANDBOX_PLAYER_COUNT: ${BACKEND_DEV_SANDBOX_PLAYER_COUNT:-20} volumes: - /var/run/docker.sock:/var/run/docker.sock # Per-game state directories live under the same absolute path diff --git a/tools/local-dev/Makefile b/tools/local-dev/Makefile index 4981f23..02b1b94 100644 --- a/tools/local-dev/Makefile +++ b/tools/local-dev/Makefile @@ -22,7 +22,7 @@ help: @echo " make up Build (if needed) and bring up the stack, wait until healthy" @echo " make down Stop compose containers, leave engines + volumes intact" @echo " make rebuild Force rebuild of backend / gateway images and bring up" - @echo " make build-engine Build the engine image $(ENGINE_IMAGE) used by the dev sandbox" + @echo " make build-engine Build the engine image $(ENGINE_IMAGE) used by running games" @echo " make stop-engines Stop and remove only the per-game engine containers" @echo " make prune-broken-engines Remove non-running engine containers Docker can't heal (run inside 'up')" @echo " make clean Stop everything (incl. engines) and wipe volumes + game state" @@ -37,8 +37,9 @@ help: @echo " pnpm -C ui/frontend dev" @echo "and open http://localhost:5173 (UI) plus http://localhost:8025 (Mailpit)." @echo "" - @echo "Default login for the auto-provisioned dev sandbox: dev@local.test" - @echo "(see BACKEND_DEV_SANDBOX_EMAIL in .env). Login code: 123456." + @echo "Sign in with email-OTP; the fixed login code 123456 works when" + @echo "BACKEND_AUTH_DEV_FIXED_CODE is set in .env. No game is auto-provisioned —" + @echo "load a legacy report via the UI's DEV report loader to exercise the map." up: build-engine prune-broken-engines $(COMPOSE) up -d --wait @@ -88,12 +89,9 @@ stop-engines: # bind-mount source and leaves it stuck in `exited` / `created` # state. This target prunes the husks before `compose up`; the # backend's pre-bootstrap reconciler tick (`backend/cmd/backend/main.go`) -# then cascades the orphan runtime row to `removed`, the lobby -# cancels the game, and the dev-sandbox bootstrap purges the -# cancelled tile and provisions a fresh sandbox in the same -# `make up` cycle. Healthy `running` / `restarting` containers are -# left intact so a long-lived sandbox survives normal up/down -# cycles. +# then cascades the orphan runtime row to `removed` and the lobby +# cancels the game. Healthy `running` / `restarting` containers are +# left intact so a long-lived game survives normal up/down cycles. prune-broken-engines: @ids=""; \ for cid in $$(docker ps -aq \ diff --git a/tools/local-dev/README.md b/tools/local-dev/README.md index 26404db..8201568 100644 --- a/tools/local-dev/README.md +++ b/tools/local-dev/README.md @@ -78,49 +78,24 @@ To force the second path (no fast-bypass), edit `make rebuild` (or simply `docker compose up -d backend` to recreate the backend with the new env). -## Auto-provisioned dev sandbox +## No auto-provisioned game -`make up` provisions a private game called **Dev Sandbox** owned by -the dev user (default `dev@local.test`). The flow is implemented in -`backend/internal/devsandbox` and runs on every backend boot when -`BACKEND_DEV_SANDBOX_EMAIL` is non-empty in `tools/local-dev/.env`. - -Bootstrap is idempotent — re-running `make up` after a `make down` -finds the existing user, dummy participants, game, and memberships -without creating duplicates. If a previous boot crashed mid-way -(game stuck in `enrollment_open` or `ready_to_start`), the next boot -resumes the lifecycle. - -To log in straight into the sandbox: +`make up` brings up the stack with an empty lobby — there is no +auto-provisioned game. Sign in with email-OTP (the fixed dev code +`123456` works when `BACKEND_AUTH_DEV_FIXED_CODE` is set in +`tools/local-dev/.env`): 1. `make -C tools/local-dev up` 2. `pnpm -C ui/frontend dev` (in another terminal) -3. Open , enter `dev@local.test`, then - the dev code `123456`. -4. The lobby shows **Dev Sandbox** in *My Games*; click in. +3. Open , enter your email, then the dev + code `123456`. -To disable the bootstrap, clear `BACKEND_DEV_SANDBOX_EMAIL` in -`tools/local-dev/.env` and `docker compose up -d backend` (or -`make rebuild`). Existing users / games are not removed. - -Terminal sandbox games — anything in `cancelled`, `finished`, or -`start_failed` — are deleted on every boot before find-or-create -runs. The cascade declared in `00001_init.sql` removes the -matching memberships, applications, invites, runtime records, -and player mappings in the same write, so the dev user's lobby -shows exactly one running tile at all times. Cancelling the -sandbox manually and running `docker compose restart backend` -(or `make rebuild`) yields a fresh game without leaving dead -tiles behind. - -The bootstrap requires: -- `galaxy-engine:local-dev` Docker image (`make build-engine`). -- `BACKEND_DEV_SANDBOX_ENGINE_VERSION` parses as plain semver - (`MAJOR.MINOR.PATCH`); the default `0.1.0` is what the bootstrap - registers in the `engine_versions` row that points at the image. -- `BACKEND_DEV_SANDBOX_PLAYER_COUNT` ≥ 20 (the engine's minimum; - 19 deterministic dummies fill the slots so the single real user - can start the game). +To exercise the map and report views without running a full game, use +the UI's DEV **synthetic report loader**: convert a legacy `.REP` with +`tools/local-dev/legacy-report/` and load the resulting JSON through the +loader (see that tool's README). To play a real game, create one in the +lobby and let the engine (`galaxy-engine:local-dev`, built by +`make build-engine`) run it. - A frozen turn schedule (`0 0 1 1 *` — once a year) so the visible game state stays at turn 1 until you explicitly progress it. @@ -239,24 +214,15 @@ make status docker compose ps this in one cycle: `prune-broken-engines` (runs as part of `up`) removes every engine container that is not in `running` / `restarting` state, the backend's pre-bootstrap reconciler tick - cascades the orphan runtime row to `removed`, the lobby cancels - the matching sandbox game, and the dev-sandbox bootstrap purges - the cancelled tile and provisions a fresh sandbox with a brand - new state directory. To run the cleanup by hand without restarting - the rest of the stack, `make prune-broken-engines`. + cascades the orphan runtime row to `removed`, and the lobby cancels + the matching game. To run the cleanup by hand without restarting the + rest of the stack, `make prune-broken-engines`. The cycle relies on the backend image carrying the pre-bootstrap reconciler tick (`backend/cmd/backend/main.go`). `make up` reuses the cached image, so after pulling this commit the first time you must `make rebuild` once to bake the fix in. Future `make up` cycles will heal in one shot. - - If after the heal cycle the lobby still shows only a `cancelled` - sandbox tile and no running game, the running backend image - predates the pre-bootstrap reconciler tick — the periodic ticker - cancels the orphan after bootstrap has already returned, leaving - the lobby in the half-baked state. `make rebuild` recreates the - image and then `make up` lands a fresh sandbox. - **`make up` reports a build error mentioning `pkg/cronutil`** — upstream module list drifted; copy any new `pkg//` line into the local-dev `backend.Dockerfile` / `gateway.Dockerfile` to match diff --git a/tools/local-dev/docker-compose.yml b/tools/local-dev/docker-compose.yml index d063382..b37a509 100644 --- a/tools/local-dev/docker-compose.yml +++ b/tools/local-dev/docker-compose.yml @@ -122,10 +122,6 @@ services: BACKEND_OTEL_TRACES_EXPORTER: none BACKEND_OTEL_METRICS_EXPORTER: none BACKEND_AUTH_DEV_FIXED_CODE: ${BACKEND_AUTH_DEV_FIXED_CODE:-} - BACKEND_DEV_SANDBOX_EMAIL: ${BACKEND_DEV_SANDBOX_EMAIL:-} - BACKEND_DEV_SANDBOX_ENGINE_IMAGE: ${BACKEND_DEV_SANDBOX_ENGINE_IMAGE:-} - BACKEND_DEV_SANDBOX_ENGINE_VERSION: ${BACKEND_DEV_SANDBOX_ENGINE_VERSION:-} - BACKEND_DEV_SANDBOX_PLAYER_COUNT: ${BACKEND_DEV_SANDBOX_PLAYER_COUNT:-} volumes: - /var/run/docker.sock:/var/run/docker.sock # Per-game state directories live under the same absolute path diff --git a/ui/docs/game-state.md b/ui/docs/game-state.md index d377e2e..756419d 100644 --- a/ui/docs/game-state.md +++ b/ui/docs/game-state.md @@ -85,16 +85,16 @@ report to fetch. Two alternatives were rejected: - a brand-new `user.games.state` message — adds a full wire-flow (fbs schema, transcoder, gateway routing, backend handler) for a one-field response; -- hard-coding `turn=0` for all games — works for the dev sandbox - (which never advances past turn zero) but renders the initial - state for any real game past turn zero. +- hard-coding `turn=0` for all games — works for a synthetic report + loaded at turn zero but mis-renders the initial state for any real + game past turn zero. Extending `GameSummary` reuses the existing lobby pipeline; the backend already tracks `current_turn` in its runtime projection (`backend/internal/server/handlers_user_lobby_helpers.go` `gameSummaryToWire` reads it from `g.RuntimeSnapshot.CurrentTurn`). The `current_turn` field defaults to zero on the FB side, so existing -tests and the dev sandbox flow continue to work unchanged. +tests and the synthetic-report flow continue to work unchanged. ## State binding