dev-deploy: production mirror + full observability behind the /_gm gate #88
@@ -148,12 +148,37 @@ jobs:
|
|||||||
-v "${{ gitea.workspace }}/pkg/geoip/test-data/test-data:/src:ro" \
|
-v "${{ gitea.workspace }}/pkg/geoip/test-data/test-data:/src:ro" \
|
||||||
alpine sh -c 'cp /src/GeoIP2-Country-Test.mmdb /dst/geoip.mmdb'
|
alpine sh -c 'cp /src/GeoIP2-Country-Test.mmdb /dst/geoip.mmdb'
|
||||||
|
|
||||||
|
- name: Seed mailpit relay config
|
||||||
|
env:
|
||||||
|
GALAXY_DEV_MAIL_RELAY_USERNAME: ${{ secrets.GALAXY_DEV_MAIL_RELAY_USERNAME }}
|
||||||
|
GALAXY_DEV_MAIL_RELAY_PASSWORD: ${{ secrets.GALAXY_DEV_MAIL_RELAY_PASSWORD }}
|
||||||
|
run: |
|
||||||
|
# Render the Mailpit relay upstream config from the template,
|
||||||
|
# substituting the Gmail App Password from a Gitea secret, then
|
||||||
|
# seed it into a named volume (same rationale as the geoip seed:
|
||||||
|
# a workspace bind-mount would vanish with the runner workspace).
|
||||||
|
# The secret never lands in git or a committed file; it is
|
||||||
|
# rendered to a tmpfile outside the repo and removed after. Gmail
|
||||||
|
# App Passwords are [a-z]{16}, so the `|` sed delimiter is safe.
|
||||||
|
# When the secret is unset the creds render empty and the compose
|
||||||
|
# default relay-match is non-routable, so the stack only captures.
|
||||||
|
rendered="$(mktemp)"
|
||||||
|
sed -e "s|\${GALAXY_DEV_MAIL_RELAY_USERNAME}|${GALAXY_DEV_MAIL_RELAY_USERNAME}|g" \
|
||||||
|
-e "s|\${GALAXY_DEV_MAIL_RELAY_PASSWORD}|${GALAXY_DEV_MAIL_RELAY_PASSWORD}|g" \
|
||||||
|
"${{ gitea.workspace }}/tools/dev-deploy/mailpit/relay.conf.tmpl" > "$rendered"
|
||||||
|
docker volume create galaxy-dev-mailpit-config >/dev/null
|
||||||
|
docker run --rm \
|
||||||
|
-v galaxy-dev-mailpit-config:/dst \
|
||||||
|
-v "$rendered:/src/relay.conf:ro" \
|
||||||
|
alpine sh -c 'cp /src/relay.conf /dst/relay.conf && chmod 600 /dst/relay.conf'
|
||||||
|
rm -f "$rendered"
|
||||||
|
|
||||||
- name: Recycle engine containers on image drift
|
- name: Recycle engine containers on image drift
|
||||||
run: |
|
run: |
|
||||||
# Compare the freshly-built `galaxy-engine:dev` SHA against
|
# Compare the freshly-built `galaxy-engine:dev` SHA against
|
||||||
# every running `galaxy-game-*` container. The backend
|
# every running `galaxy-game-*` container. The backend
|
||||||
# reconciler adopts pre-existing labelled engine containers
|
# reconciler adopts pre-existing labelled engine containers
|
||||||
# without checking image drift, so a running sandbox would
|
# without checking image drift, so a running game would
|
||||||
# otherwise keep serving the previous engine code until the
|
# otherwise keep serving the previous engine code until the
|
||||||
# container is recycled by hand. This step makes the recycle
|
# container is recycled by hand. This step makes the recycle
|
||||||
# automatic but only when it is actually needed:
|
# automatic but only when it is actually needed:
|
||||||
@@ -168,10 +193,7 @@ jobs:
|
|||||||
# silent state corruption otherwise), and cascade-delete
|
# silent state corruption otherwise), and cascade-delete
|
||||||
# the lobby `games` row (the FKs in `00001_init.sql`
|
# the lobby `games` row (the FKs in `00001_init.sql`
|
||||||
# drop the matching `runtime_records`, `memberships`,
|
# drop the matching `runtime_records`, `memberships`,
|
||||||
# `player_mappings`, etc. in the same write). The
|
# `player_mappings`, etc. in the same write).
|
||||||
# `dev-sandbox` bootstrap on the next backend boot finds
|
|
||||||
# no live sandbox and provisions a fresh one on the new
|
|
||||||
# engine image.
|
|
||||||
#
|
#
|
||||||
# Backend is stopped first to keep the reconciler from
|
# Backend is stopped first to keep the reconciler from
|
||||||
# racing the recycle (mid-stream adoption / restart). The
|
# racing the recycle (mid-stream adoption / restart). The
|
||||||
@@ -234,11 +256,24 @@ jobs:
|
|||||||
|
|
||||||
- name: Bring up the stack
|
- name: Bring up the stack
|
||||||
working-directory: tools/dev-deploy
|
working-directory: tools/dev-deploy
|
||||||
|
env:
|
||||||
|
# Recipient regex Mailpit auto-relays to the owner's Gmail.
|
||||||
|
# Unset/empty → the compose default (non-routable) keeps the
|
||||||
|
# stack capture-only.
|
||||||
|
GALAXY_DEV_MAIL_RELAY_MATCH: ${{ vars.GALAXY_DEV_MAIL_RELAY_MATCH }}
|
||||||
|
# Grafana admin password; unset/empty -> compose default 'admin'.
|
||||||
|
GALAXY_DEV_GRAFANA_ADMIN_PASSWORD: ${{ secrets.GALAXY_DEV_GRAFANA_ADMIN_PASSWORD }}
|
||||||
run: |
|
run: |
|
||||||
# Resolve in the shell, not in YAML expressions — `env.HOME`
|
# Resolve in the shell, not in YAML expressions — `env.HOME`
|
||||||
# is empty at the workflow-evaluation stage.
|
# is empty at the workflow-evaluation stage.
|
||||||
export GALAXY_DEV_GAME_STATE_DIR="$HOME/.galaxy-dev/game-state"
|
export GALAXY_DEV_GAME_STATE_DIR="$HOME/.galaxy-dev/game-state"
|
||||||
mkdir -p "$GALAXY_DEV_GAME_STATE_DIR"
|
mkdir -p "$GALAXY_DEV_GAME_STATE_DIR"
|
||||||
|
# Seed the monitoring config to a stable, reboot-surviving host
|
||||||
|
# path (compose binds \${GALAXY_DEV_MONITORING_DIR} read-only).
|
||||||
|
export GALAXY_DEV_MONITORING_DIR="$HOME/.galaxy-dev/monitoring"
|
||||||
|
rm -rf "$GALAXY_DEV_MONITORING_DIR"
|
||||||
|
mkdir -p "$GALAXY_DEV_MONITORING_DIR"
|
||||||
|
cp -r monitoring/. "$GALAXY_DEV_MONITORING_DIR/"
|
||||||
docker compose up -d --wait --remove-orphans
|
docker compose up -d --wait --remove-orphans
|
||||||
|
|
||||||
- name: Probe the stack
|
- name: Probe the stack
|
||||||
|
|||||||
@@ -26,7 +26,6 @@ import (
|
|||||||
"galaxy/backend/internal/app"
|
"galaxy/backend/internal/app"
|
||||||
"galaxy/backend/internal/auth"
|
"galaxy/backend/internal/auth"
|
||||||
"galaxy/backend/internal/config"
|
"galaxy/backend/internal/config"
|
||||||
"galaxy/backend/internal/devsandbox"
|
|
||||||
"galaxy/backend/internal/diplomail"
|
"galaxy/backend/internal/diplomail"
|
||||||
"galaxy/backend/internal/diplomail/detector"
|
"galaxy/backend/internal/diplomail/detector"
|
||||||
"galaxy/backend/internal/diplomail/translator"
|
"galaxy/backend/internal/diplomail/translator"
|
||||||
@@ -274,29 +273,18 @@ func run(ctx context.Context) (err error) {
|
|||||||
)
|
)
|
||||||
runtimeGateway.svc = runtimeSvc
|
runtimeGateway.svc = runtimeSvc
|
||||||
|
|
||||||
// Run a single reconciliation pass before the dev-sandbox
|
// Run a single reconciliation pass at startup so any runtime row
|
||||||
// bootstrap so any runtime row pointing at a vanished engine
|
// pointing at a vanished engine container (a host reboot wiped
|
||||||
// container (host reboot wiped /tmp/galaxy-game-state/<uuid>;
|
// /tmp/galaxy-game-state/<uuid>; `tools/local-dev`'s
|
||||||
// `tools/local-dev`'s `prune-broken-engines` target reaped the
|
// `prune-broken-engines` target reaped the husk) is cascaded
|
||||||
// husk) is already cascaded through `markRemoved` → lobby
|
// through `markRemoved` → lobby `cancelled` before the server
|
||||||
// `cancelled` by the time the bootstrap walks the sandbox list.
|
// starts serving requests. Failures are
|
||||||
// Without this pre-tick the bootstrap would reuse the
|
|
||||||
// soon-to-be-cancelled game and force the developer into a
|
|
||||||
// second `make up` cycle to land a healthy sandbox. Failures are
|
|
||||||
// non-fatal: the periodic ticker started later catches up, and
|
// non-fatal: the periodic ticker started later catches up, and
|
||||||
// the worst case degrades to the legacy two-cycle recovery.
|
// the worst case degrades to the legacy two-cycle recovery.
|
||||||
if err := runtimeSvc.Reconciler().Tick(ctx); err != nil {
|
if err := runtimeSvc.Reconciler().Tick(ctx); err != nil {
|
||||||
logger.Warn("pre-bootstrap reconciler tick failed", zap.Error(err))
|
logger.Warn("pre-bootstrap reconciler tick failed", zap.Error(err))
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := devsandbox.Bootstrap(ctx, devsandbox.Deps{
|
|
||||||
Users: userSvc,
|
|
||||||
Lobby: lobbySvc,
|
|
||||||
EngineVersions: engineVersionSvc,
|
|
||||||
}, cfg.DevSandbox, logger); err != nil {
|
|
||||||
return fmt.Errorf("dev sandbox bootstrap: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
notifStore := notification.NewStore(db)
|
notifStore := notification.NewStore(db)
|
||||||
notifSvc := notification.NewService(notification.Deps{
|
notifSvc := notification.NewService(notification.Deps{
|
||||||
Store: notifStore,
|
Store: notifStore,
|
||||||
|
|||||||
@@ -17,6 +17,8 @@
|
|||||||
<a href="/_gm/games"{{if eq .ActiveNav "games"}} class="active"{{end}}>Games</a>
|
<a href="/_gm/games"{{if eq .ActiveNav "games"}} class="active"{{end}}>Games</a>
|
||||||
<a href="/_gm/operators"{{if eq .ActiveNav "operators"}} class="active"{{end}}>Operators</a>
|
<a href="/_gm/operators"{{if eq .ActiveNav "operators"}} class="active"{{end}}>Operators</a>
|
||||||
<a href="/_gm/mail"{{if eq .ActiveNav "mail"}} class="active"{{end}}>Mail</a>
|
<a href="/_gm/mail"{{if eq .ActiveNav "mail"}} class="active"{{end}}>Mail</a>
|
||||||
|
<a href="/_gm/grafana/" target="_blank" rel="noopener">Grafana</a>
|
||||||
|
<a href="/_gm/mailpit/" target="_blank" rel="noopener">Mailpit</a>
|
||||||
</nav>
|
</nav>
|
||||||
<span class="who">{{.Username}}</span>
|
<span class="who">{{.Username}}</span>
|
||||||
</header>
|
</header>
|
||||||
|
|||||||
@@ -105,11 +105,6 @@ const (
|
|||||||
envDiplomailTranslatorTimeout = "BACKEND_DIPLOMAIL_TRANSLATOR_TIMEOUT"
|
envDiplomailTranslatorTimeout = "BACKEND_DIPLOMAIL_TRANSLATOR_TIMEOUT"
|
||||||
envDiplomailTranslatorMaxAttempts = "BACKEND_DIPLOMAIL_TRANSLATOR_MAX_ATTEMPTS"
|
envDiplomailTranslatorMaxAttempts = "BACKEND_DIPLOMAIL_TRANSLATOR_MAX_ATTEMPTS"
|
||||||
envDiplomailWorkerInterval = "BACKEND_DIPLOMAIL_WORKER_INTERVAL"
|
envDiplomailWorkerInterval = "BACKEND_DIPLOMAIL_WORKER_INTERVAL"
|
||||||
|
|
||||||
envDevSandboxEmail = "BACKEND_DEV_SANDBOX_EMAIL"
|
|
||||||
envDevSandboxEngineImage = "BACKEND_DEV_SANDBOX_ENGINE_IMAGE"
|
|
||||||
envDevSandboxEngineVersion = "BACKEND_DEV_SANDBOX_ENGINE_VERSION"
|
|
||||||
envDevSandboxPlayerCount = "BACKEND_DEV_SANDBOX_PLAYER_COUNT"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Default values applied when an environment variable is absent.
|
// Default values applied when an environment variable is absent.
|
||||||
@@ -178,9 +173,6 @@ const (
|
|||||||
defaultDiplomailTranslatorTimeout = 10 * time.Second
|
defaultDiplomailTranslatorTimeout = 10 * time.Second
|
||||||
defaultDiplomailTranslatorMaxAttempts = 5
|
defaultDiplomailTranslatorMaxAttempts = 5
|
||||||
defaultDiplomailWorkerInterval = 2 * time.Second
|
defaultDiplomailWorkerInterval = 2 * time.Second
|
||||||
|
|
||||||
defaultDevSandboxEngineVersion = "0.1.0"
|
|
||||||
defaultDevSandboxPlayerCount = 20
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Allowed values for the closed-set string options.
|
// Allowed values for the closed-set string options.
|
||||||
@@ -219,29 +211,12 @@ type Config struct {
|
|||||||
Runtime RuntimeConfig
|
Runtime RuntimeConfig
|
||||||
Notification NotificationConfig
|
Notification NotificationConfig
|
||||||
Diplomail DiplomailConfig
|
Diplomail DiplomailConfig
|
||||||
DevSandbox DevSandboxConfig
|
|
||||||
|
|
||||||
// FreshnessWindow mirrors the gateway freshness window and is used by the
|
// FreshnessWindow mirrors the gateway freshness window and is used by the
|
||||||
// push server to bound the cursor TTL.
|
// push server to bound the cursor TTL.
|
||||||
FreshnessWindow time.Duration
|
FreshnessWindow time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// DevSandboxConfig configures the boot-time bootstrap implemented in
|
|
||||||
// `backend/internal/devsandbox`. When Email is empty the bootstrap
|
|
||||||
// is a no-op, which is the production posture. When Email is set —
|
|
||||||
// from `BACKEND_DEV_SANDBOX_EMAIL` in the `tools/local-dev` stack —
|
|
||||||
// the bootstrap idempotently provisions a real user, the configured
|
|
||||||
// number of dummy participants, a private "Dev Sandbox" game, the
|
|
||||||
// matching memberships, and drives the lifecycle to `running`. The
|
|
||||||
// engine image and engine version refer to a row that the bootstrap
|
|
||||||
// also seeds in `engine_versions`.
|
|
||||||
type DevSandboxConfig struct {
|
|
||||||
Email string
|
|
||||||
EngineImage string
|
|
||||||
EngineVersion string
|
|
||||||
PlayerCount int
|
|
||||||
}
|
|
||||||
|
|
||||||
// LoggingConfig stores the parameters used by the structured logger.
|
// LoggingConfig stores the parameters used by the structured logger.
|
||||||
type LoggingConfig struct {
|
type LoggingConfig struct {
|
||||||
// Level is the zap level name (e.g. "debug", "info", "warn", "error").
|
// Level is the zap level name (e.g. "debug", "info", "warn", "error").
|
||||||
@@ -572,10 +547,6 @@ func DefaultConfig() Config {
|
|||||||
TranslatorMaxAttempts: defaultDiplomailTranslatorMaxAttempts,
|
TranslatorMaxAttempts: defaultDiplomailTranslatorMaxAttempts,
|
||||||
WorkerInterval: defaultDiplomailWorkerInterval,
|
WorkerInterval: defaultDiplomailWorkerInterval,
|
||||||
},
|
},
|
||||||
DevSandbox: DevSandboxConfig{
|
|
||||||
EngineVersion: defaultDevSandboxEngineVersion,
|
|
||||||
PlayerCount: defaultDevSandboxPlayerCount,
|
|
||||||
},
|
|
||||||
Runtime: RuntimeConfig{
|
Runtime: RuntimeConfig{
|
||||||
WorkerPoolSize: defaultRuntimeWorkerPoolSize,
|
WorkerPoolSize: defaultRuntimeWorkerPoolSize,
|
||||||
JobQueueSize: defaultRuntimeJobQueueSize,
|
JobQueueSize: defaultRuntimeJobQueueSize,
|
||||||
@@ -755,13 +726,6 @@ func LoadFromEnv() (Config, error) {
|
|||||||
return Config{}, err
|
return Config{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg.DevSandbox.Email = strings.TrimSpace(loadString(envDevSandboxEmail, cfg.DevSandbox.Email))
|
|
||||||
cfg.DevSandbox.EngineImage = strings.TrimSpace(loadString(envDevSandboxEngineImage, cfg.DevSandbox.EngineImage))
|
|
||||||
cfg.DevSandbox.EngineVersion = strings.TrimSpace(loadString(envDevSandboxEngineVersion, cfg.DevSandbox.EngineVersion))
|
|
||||||
if cfg.DevSandbox.PlayerCount, err = loadInt(envDevSandboxPlayerCount, cfg.DevSandbox.PlayerCount); err != nil {
|
|
||||||
return Config{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := cfg.Validate(); err != nil {
|
if err := cfg.Validate(); err != nil {
|
||||||
return Config{}, err
|
return Config{}, err
|
||||||
}
|
}
|
||||||
@@ -973,21 +937,6 @@ func (c Config) Validate() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if email := strings.TrimSpace(c.DevSandbox.Email); email != "" {
|
|
||||||
if _, err := netmail.ParseAddress(email); err != nil {
|
|
||||||
return fmt.Errorf("%s must be a valid RFC 5322 address: %w", envDevSandboxEmail, err)
|
|
||||||
}
|
|
||||||
if strings.TrimSpace(c.DevSandbox.EngineImage) == "" {
|
|
||||||
return fmt.Errorf("%s must not be empty when %s is set", envDevSandboxEngineImage, envDevSandboxEmail)
|
|
||||||
}
|
|
||||||
if strings.TrimSpace(c.DevSandbox.EngineVersion) == "" {
|
|
||||||
return fmt.Errorf("%s must not be empty when %s is set", envDevSandboxEngineVersion, envDevSandboxEmail)
|
|
||||||
}
|
|
||||||
if c.DevSandbox.PlayerCount <= 0 {
|
|
||||||
return fmt.Errorf("%s must be positive when %s is set", envDevSandboxPlayerCount, envDevSandboxEmail)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,287 +0,0 @@
|
|||||||
// Package devsandbox provisions a ready-to-play game on backend boot
|
|
||||||
// for the `tools/local-dev` stack.
|
|
||||||
//
|
|
||||||
// Bootstrap is invoked from `backend/cmd/backend/main.go` after the
|
|
||||||
// admin bootstrap and before the HTTP listener starts. It reads
|
|
||||||
// `cfg.DevSandbox`; when `Email` is empty (the production posture)
|
|
||||||
// the function logs "skipped" and returns nil. When set, it
|
|
||||||
// idempotently:
|
|
||||||
//
|
|
||||||
// 1. registers the configured engine version and image;
|
|
||||||
// 2. find-or-creates the real dev user with the configured email;
|
|
||||||
// 3. find-or-creates `cfg.PlayerCount - 1` deterministic dummy
|
|
||||||
// users so the engine's minimum-players constraint is met;
|
|
||||||
// 4. find-or-creates a private "Dev Sandbox" game owned by the
|
|
||||||
// real user with min/max_players = cfg.PlayerCount and a
|
|
||||||
// year-out turn schedule (effectively frozen at turn 1);
|
|
||||||
// 5. inserts memberships for all participants bypassing the
|
|
||||||
// application/approval flow;
|
|
||||||
// 6. drives the lifecycle to `running` (or as far as possible if
|
|
||||||
// the runtime is busy).
|
|
||||||
//
|
|
||||||
// The function is a no-op on subsequent boots once the game is
|
|
||||||
// running; partial states from earlier crashes are recovered.
|
|
||||||
package devsandbox
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"galaxy/backend/internal/config"
|
|
||||||
"galaxy/backend/internal/lobby"
|
|
||||||
"galaxy/backend/internal/runtime"
|
|
||||||
|
|
||||||
"github.com/google/uuid"
|
|
||||||
"go.uber.org/zap"
|
|
||||||
)
|
|
||||||
|
|
||||||
// SandboxGameName is the display name used to identify the
|
|
||||||
// auto-provisioned game on subsequent reboots. The combination of
|
|
||||||
// game_name and owner_user_id is unique enough in practice — only
|
|
||||||
// the dev sandbox bootstrap creates a game owned by the configured
|
|
||||||
// real user with this exact name.
|
|
||||||
const SandboxGameName = "Dev Sandbox"
|
|
||||||
|
|
||||||
// SandboxTurnSchedule keeps the game on turn 1 by scheduling the
|
|
||||||
// next turn a year out. The runtime scheduler still parses this and
|
|
||||||
// will tick once a year — long enough to never interfere with
|
|
||||||
// solo UI development.
|
|
||||||
const SandboxTurnSchedule = "0 0 1 1 *"
|
|
||||||
|
|
||||||
// UserEnsurer matches `auth.UserEnsurer`. We define a local
|
|
||||||
// interface to avoid importing the auth package and circular
|
|
||||||
// dependencies — the production wiring passes the same `*user.Service`
|
|
||||||
// instance used by auth.
|
|
||||||
type UserEnsurer interface {
|
|
||||||
EnsureByEmail(ctx context.Context, email, preferredLanguage, timeZone, declaredCountry string) (uuid.UUID, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Deps aggregates the collaborators Bootstrap needs.
|
|
||||||
type Deps struct {
|
|
||||||
Users UserEnsurer
|
|
||||||
Lobby *lobby.Service
|
|
||||||
EngineVersions *runtime.EngineVersionService
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bootstrap runs the seven-step provisioning flow described on the
|
|
||||||
// package doc comment. Errors are returned to the caller; the boot
|
|
||||||
// path in `cmd/backend/main.go` aborts startup if Bootstrap fails so
|
|
||||||
// a misconfigured dev environment surfaces immediately rather than
|
|
||||||
// silently leaving the lobby empty.
|
|
||||||
func Bootstrap(ctx context.Context, deps Deps, cfg config.DevSandboxConfig, logger *zap.Logger) error {
|
|
||||||
if logger == nil {
|
|
||||||
logger = zap.NewNop()
|
|
||||||
}
|
|
||||||
logger = logger.Named("dev_sandbox")
|
|
||||||
|
|
||||||
if cfg.Email == "" {
|
|
||||||
logger.Info("skipped (no email)")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if deps.Users == nil || deps.Lobby == nil || deps.EngineVersions == nil {
|
|
||||||
return errors.New("dev_sandbox: deps.Users, deps.Lobby and deps.EngineVersions are required")
|
|
||||||
}
|
|
||||||
if cfg.PlayerCount <= 0 {
|
|
||||||
return fmt.Errorf("dev_sandbox: PlayerCount must be positive, got %d", cfg.PlayerCount)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := ensureEngineVersion(ctx, deps.EngineVersions, cfg, logger); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
realID, err := deps.Users.EnsureByEmail(ctx, cfg.Email, "en", "UTC", "")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("dev_sandbox: ensure real user: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
dummyIDs := make([]uuid.UUID, 0, cfg.PlayerCount-1)
|
|
||||||
for i := 1; i < cfg.PlayerCount; i++ {
|
|
||||||
email := fmt.Sprintf("dev-dummy-%02d@local.test", i)
|
|
||||||
id, err := deps.Users.EnsureByEmail(ctx, email, "en", "UTC", "")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("dev_sandbox: ensure dummy %d: %w", i, err)
|
|
||||||
}
|
|
||||||
dummyIDs = append(dummyIDs, id)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := purgeTerminalSandboxGames(ctx, deps.Lobby, realID, logger); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
game, err := findOrCreateSandboxGame(ctx, deps.Lobby, realID, cfg)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
game, err = ensureMembershipsAndDrive(ctx, deps.Lobby, game, realID, dummyIDs, logger)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.Info("bootstrap complete",
|
|
||||||
zap.String("user_id", realID.String()),
|
|
||||||
zap.String("game_id", game.GameID.String()),
|
|
||||||
zap.String("status", game.Status),
|
|
||||||
)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func ensureEngineVersion(ctx context.Context, svc *runtime.EngineVersionService, cfg config.DevSandboxConfig, logger *zap.Logger) error {
|
|
||||||
_, err := svc.Register(ctx, runtime.RegisterInput{
|
|
||||||
Version: cfg.EngineVersion,
|
|
||||||
ImageRef: cfg.EngineImage,
|
|
||||||
})
|
|
||||||
switch {
|
|
||||||
case err == nil:
|
|
||||||
logger.Info("engine version registered",
|
|
||||||
zap.String("version", cfg.EngineVersion),
|
|
||||||
zap.String("image", cfg.EngineImage),
|
|
||||||
)
|
|
||||||
return nil
|
|
||||||
case errors.Is(err, runtime.ErrEngineVersionTaken):
|
|
||||||
logger.Debug("engine version already registered",
|
|
||||||
zap.String("version", cfg.EngineVersion),
|
|
||||||
)
|
|
||||||
return nil
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("dev_sandbox: register engine version: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// terminalSandboxStatus reports whether a sandbox game has reached a
|
|
||||||
// state from which it can no longer be driven back to running. We
|
|
||||||
// treat such games as "absent" so the next bootstrap creates a fresh
|
|
||||||
// one rather than handing the developer a dead lobby tile.
|
|
||||||
func terminalSandboxStatus(status string) bool {
|
|
||||||
switch status {
|
|
||||||
case lobby.GameStatusCancelled, lobby.GameStatusFinished, lobby.GameStatusStartFailed:
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// purgeTerminalSandboxGames deletes every previous "Dev Sandbox" game
|
|
||||||
// the dev user owns that has reached a terminal state
|
|
||||||
// (cancelled / finished / start_failed). The cascade declared in
|
|
||||||
// `00001_init.sql` removes the matching memberships, applications,
|
|
||||||
// invites, runtime records, and player mappings in the same write,
|
|
||||||
// so the developer's lobby never piles up dead tiles between
|
|
||||||
// `make rebuild` cycles. Non-terminal games are left untouched —
|
|
||||||
// a `running` sandbox from a previous boot is the happy path.
|
|
||||||
func purgeTerminalSandboxGames(ctx context.Context, svc *lobby.Service, ownerID uuid.UUID, logger *zap.Logger) error {
|
|
||||||
games, err := svc.ListMyGames(ctx, ownerID)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("dev_sandbox: list my games: %w", err)
|
|
||||||
}
|
|
||||||
for _, g := range games {
|
|
||||||
if g.GameName != SandboxGameName || g.OwnerUserID == nil || *g.OwnerUserID != ownerID {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if !terminalSandboxStatus(g.Status) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := svc.DeleteGame(ctx, g.GameID); err != nil {
|
|
||||||
return fmt.Errorf("dev_sandbox: delete terminal sandbox %s: %w", g.GameID, err)
|
|
||||||
}
|
|
||||||
logger.Info("purged terminal sandbox game",
|
|
||||||
zap.String("game_id", g.GameID.String()),
|
|
||||||
zap.String("status", g.Status),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func findOrCreateSandboxGame(ctx context.Context, svc *lobby.Service, ownerID uuid.UUID, cfg config.DevSandboxConfig) (lobby.GameRecord, error) {
|
|
||||||
games, err := svc.ListMyGames(ctx, ownerID)
|
|
||||||
if err != nil {
|
|
||||||
return lobby.GameRecord{}, fmt.Errorf("dev_sandbox: list my games: %w", err)
|
|
||||||
}
|
|
||||||
for _, g := range games {
|
|
||||||
if g.GameName != SandboxGameName || g.OwnerUserID == nil || *g.OwnerUserID != ownerID {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// `purgeTerminalSandboxGames` ran before us, so any sandbox
|
|
||||||
// game still in the list is either a live one we should
|
|
||||||
// reuse or a transient state we can drive forward.
|
|
||||||
return g, nil
|
|
||||||
}
|
|
||||||
rec, err := svc.CreateGame(ctx, lobby.CreateGameInput{
|
|
||||||
OwnerUserID: &ownerID,
|
|
||||||
Visibility: lobby.VisibilityPrivate,
|
|
||||||
GameName: SandboxGameName,
|
|
||||||
Description: "Auto-provisioned by backend/internal/devsandbox for solo UI development.",
|
|
||||||
MinPlayers: int32(cfg.PlayerCount),
|
|
||||||
MaxPlayers: int32(cfg.PlayerCount),
|
|
||||||
StartGapHours: 0,
|
|
||||||
StartGapPlayers: 0,
|
|
||||||
EnrollmentEndsAt: time.Now().Add(365 * 24 * time.Hour),
|
|
||||||
TurnSchedule: SandboxTurnSchedule,
|
|
||||||
TargetEngineVersion: cfg.EngineVersion,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return lobby.GameRecord{}, fmt.Errorf("dev_sandbox: create game: %w", err)
|
|
||||||
}
|
|
||||||
return rec, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func ensureMembershipsAndDrive(ctx context.Context, svc *lobby.Service, game lobby.GameRecord, realID uuid.UUID, dummyIDs []uuid.UUID, logger *zap.Logger) (lobby.GameRecord, error) {
|
|
||||||
caller := realID
|
|
||||||
if game.Status == lobby.GameStatusDraft {
|
|
||||||
next, err := svc.OpenEnrollment(ctx, &caller, false, game.GameID)
|
|
||||||
if err != nil {
|
|
||||||
return game, fmt.Errorf("dev_sandbox: open enrollment: %w", err)
|
|
||||||
}
|
|
||||||
game = next
|
|
||||||
}
|
|
||||||
|
|
||||||
if game.Status == lobby.GameStatusEnrollmentOpen {
|
|
||||||
users := append([]uuid.UUID{realID}, dummyIDs...)
|
|
||||||
for i, uid := range users {
|
|
||||||
raceName := fmt.Sprintf("Sandbox-%02d", i+1)
|
|
||||||
if _, err := svc.InsertMembershipDirect(ctx, lobby.InsertMembershipDirectInput{
|
|
||||||
GameID: game.GameID,
|
|
||||||
UserID: uid,
|
|
||||||
RaceName: raceName,
|
|
||||||
}); err != nil {
|
|
||||||
return game, fmt.Errorf("dev_sandbox: insert membership %d: %w", i+1, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
logger.Info("memberships ensured",
|
|
||||||
zap.Int("count", len(users)),
|
|
||||||
zap.String("game_id", game.GameID.String()),
|
|
||||||
)
|
|
||||||
next, err := svc.ReadyToStart(ctx, &caller, false, game.GameID)
|
|
||||||
if err != nil {
|
|
||||||
return game, fmt.Errorf("dev_sandbox: ready to start: %w", err)
|
|
||||||
}
|
|
||||||
game = next
|
|
||||||
}
|
|
||||||
|
|
||||||
if game.Status == lobby.GameStatusReadyToStart {
|
|
||||||
next, err := svc.Start(ctx, &caller, false, game.GameID)
|
|
||||||
if err != nil {
|
|
||||||
return game, fmt.Errorf("dev_sandbox: start: %w", err)
|
|
||||||
}
|
|
||||||
game = next
|
|
||||||
}
|
|
||||||
|
|
||||||
if game.Status == lobby.GameStatusStartFailed {
|
|
||||||
next, err := svc.RetryStart(ctx, &caller, false, game.GameID)
|
|
||||||
if err != nil {
|
|
||||||
logger.Warn("retry start failed", zap.Error(err))
|
|
||||||
return game, nil
|
|
||||||
}
|
|
||||||
game = next
|
|
||||||
if game.Status == lobby.GameStatusReadyToStart {
|
|
||||||
next, err := svc.Start(ctx, &caller, false, game.GameID)
|
|
||||||
if err != nil {
|
|
||||||
return game, fmt.Errorf("dev_sandbox: start after retry: %w", err)
|
|
||||||
}
|
|
||||||
game = next
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return game, nil
|
|
||||||
}
|
|
||||||
@@ -1,106 +0,0 @@
|
|||||||
package devsandbox
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"errors"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"galaxy/backend/internal/config"
|
|
||||||
|
|
||||||
"github.com/google/uuid"
|
|
||||||
"go.uber.org/zap"
|
|
||||||
)
|
|
||||||
|
|
||||||
// TestBootstrapSkippedWhenEmailEmpty exercises the no-op branch: with
|
|
||||||
// the production posture (Email == "") Bootstrap must return without
|
|
||||||
// touching any dependency. The fact that Users/Lobby/EngineVersions
|
|
||||||
// are nil here doubles as a check that the early-return runs first.
|
|
||||||
func TestBootstrapSkippedWhenEmailEmpty(t *testing.T) {
|
|
||||||
err := Bootstrap(
|
|
||||||
context.Background(),
|
|
||||||
Deps{},
|
|
||||||
config.DevSandboxConfig{},
|
|
||||||
zap.NewNop(),
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("expected nil error on empty email, got: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestBootstrapRejectsZeroPlayerCount confirms the validation
|
|
||||||
// short-circuits the flow before any DB call when PlayerCount is
|
|
||||||
// non-positive but Email is set. The error path is fast and never
|
|
||||||
// dereferences the (still-nil) Users/Lobby deps.
|
|
||||||
func TestBootstrapRejectsZeroPlayerCount(t *testing.T) {
|
|
||||||
err := Bootstrap(
|
|
||||||
context.Background(),
|
|
||||||
Deps{Users: stubEnsurer{}, Lobby: nil, EngineVersions: nil},
|
|
||||||
config.DevSandboxConfig{
|
|
||||||
Email: "dev@local.test",
|
|
||||||
EngineImage: "galaxy-engine:local-dev",
|
|
||||||
EngineVersion: "0.0.0-local-dev",
|
|
||||||
PlayerCount: 0,
|
|
||||||
},
|
|
||||||
zap.NewNop(),
|
|
||||||
)
|
|
||||||
if err == nil {
|
|
||||||
t.Fatal("expected error on zero PlayerCount, got nil")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestBootstrapRejectsMissingDeps checks that a misconfigured wiring
|
|
||||||
// (Email set but one of the required services nil) fails fast rather
|
|
||||||
// than panicking when the bootstrap reaches its first service call.
|
|
||||||
func TestBootstrapRejectsMissingDeps(t *testing.T) {
|
|
||||||
err := Bootstrap(
|
|
||||||
context.Background(),
|
|
||||||
Deps{Users: stubEnsurer{}, Lobby: nil, EngineVersions: nil},
|
|
||||||
config.DevSandboxConfig{
|
|
||||||
Email: "dev@local.test",
|
|
||||||
EngineImage: "galaxy-engine:local-dev",
|
|
||||||
EngineVersion: "0.0.0-local-dev",
|
|
||||||
PlayerCount: 20,
|
|
||||||
},
|
|
||||||
zap.NewNop(),
|
|
||||||
)
|
|
||||||
if err == nil {
|
|
||||||
t.Fatal("expected error on missing deps, got nil")
|
|
||||||
}
|
|
||||||
if !errors.Is(err, errMissingDepsSentinel) && err.Error() == "" {
|
|
||||||
// The exact wording is not part of the contract; this branch
|
|
||||||
// only asserts the error is non-nil and human-readable.
|
|
||||||
t.Fatalf("error has empty message: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// errMissingDepsSentinel exists so the assertion above can compile;
|
|
||||||
// the real error is constructed via errors.New inside Bootstrap and
|
|
||||||
// is intentionally not exported. The test only needs to confirm the
|
|
||||||
// returned error has a message.
|
|
||||||
var errMissingDepsSentinel = errors.New("sentinel")
|
|
||||||
|
|
||||||
// TestTerminalSandboxStatus pins the contract that decides whether a
|
|
||||||
// previously created sandbox game gets purged on the next boot.
|
|
||||||
// Terminal states are deleted (cascade-style) so the developer's
|
|
||||||
// lobby never piles up dead tiles between `make rebuild` cycles.
|
|
||||||
func TestTerminalSandboxStatus(t *testing.T) {
|
|
||||||
terminal := []string{"cancelled", "finished", "start_failed"}
|
|
||||||
live := []string{"draft", "enrollment_open", "ready_to_start", "starting", "running", "paused"}
|
|
||||||
|
|
||||||
for _, status := range terminal {
|
|
||||||
if !terminalSandboxStatus(status) {
|
|
||||||
t.Errorf("expected %q to be terminal", status)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, status := range live {
|
|
||||||
if terminalSandboxStatus(status) {
|
|
||||||
t.Errorf("expected %q to be non-terminal", status)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type stubEnsurer struct{}
|
|
||||||
|
|
||||||
func (stubEnsurer) EnsureByEmail(_ context.Context, _, _, _, _ string) (uuid.UUID, error) {
|
|
||||||
return uuid.UUID{}, nil
|
|
||||||
}
|
|
||||||
@@ -274,11 +274,10 @@ func (s *Service) ListFinishedGamesBefore(ctx context.Context, cutoff time.Time)
|
|||||||
// `ON DELETE CASCADE` constraints declared in `00001_init.sql`.
|
// `ON DELETE CASCADE` constraints declared in `00001_init.sql`.
|
||||||
// Idempotent: returns nil when no game matches.
|
// Idempotent: returns nil when no game matches.
|
||||||
//
|
//
|
||||||
// Phase 14 introduces this method for the dev-sandbox bootstrap so a
|
// `DeleteGame` is destructive — a hard delete that bypasses the
|
||||||
// terminal "Dev Sandbox" tile from a previous local-dev session can
|
// cascade-notification machinery — so production callers stay on the
|
||||||
// be scrubbed before a fresh game spawns. Production callers must
|
// regular cancel / finish lifecycle. It is exercised by the lobby
|
||||||
// stay on the regular cancel / finish lifecycle — `DeleteGame` is
|
// integration tests.
|
||||||
// destructive and bypasses the cascade-notification machinery.
|
|
||||||
func (s *Service) DeleteGame(ctx context.Context, gameID uuid.UUID) error {
|
func (s *Service) DeleteGame(ctx context.Context, gameID uuid.UUID) error {
|
||||||
if err := s.deps.Store.DeleteGame(ctx, gameID); err != nil {
|
if err := s.deps.Store.DeleteGame(ctx, gameID); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|||||||
@@ -248,8 +248,8 @@ func TestEndToEndPrivateGameFlow(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestDeleteGameCascadesEverything pins the contract the dev-sandbox
|
// TestDeleteGameCascadesEverything pins the DeleteGame contract:
|
||||||
// bootstrap relies on: removing a game wipes every referencing row
|
// removing a game wipes every referencing row
|
||||||
// (memberships, applications, invites, runtime_records,
|
// (memberships, applications, invites, runtime_records,
|
||||||
// player_mappings) in a single SQL statement. Before this is wired
|
// player_mappings) in a single SQL statement. Before this is wired
|
||||||
// the developer's lobby pile up cancelled tiles between
|
// the developer's lobby pile up cancelled tiles between
|
||||||
|
|||||||
@@ -20,9 +20,9 @@ type InsertMembershipDirectInput struct {
|
|||||||
// writes as ApproveApplication: the per-game race-name reservation
|
// writes as ApproveApplication: the per-game race-name reservation
|
||||||
// row plus the membership row, and refreshes the in-memory caches.
|
// row plus the membership row, and refreshes the in-memory caches.
|
||||||
//
|
//
|
||||||
// The method is intended for boot-time provisioning by
|
// The method is intended for trusted boot-time provisioning and
|
||||||
// `backend/internal/devsandbox` and similar trusted callers. It is
|
// integration tests; it is not exposed through any HTTP handler. The
|
||||||
// not exposed through any HTTP handler. The caller must guarantee
|
// caller must guarantee
|
||||||
// game.Status == GameStatusEnrollmentOpen — the function returns
|
// game.Status == GameStatusEnrollmentOpen — the function returns
|
||||||
// ErrConflict otherwise — and that the race-name policy and
|
// ErrConflict otherwise — and that the race-name policy and
|
||||||
// canonical-key invariants are honoured (the implementation reuses
|
// canonical-key invariants are honoured (the implementation reuses
|
||||||
@@ -30,9 +30,8 @@ type InsertMembershipDirectInput struct {
|
|||||||
// or unsuitable name still fails).
|
// or unsuitable name still fails).
|
||||||
//
|
//
|
||||||
// Idempotency: if a membership for (GameID, UserID) already exists
|
// Idempotency: if a membership for (GameID, UserID) already exists
|
||||||
// the function returns the existing row without modifying state.
|
// the function returns the existing row without modifying state, so
|
||||||
// This makes the helper safe to call on every backend boot from
|
// the helper is safe to call repeatedly.
|
||||||
// devsandbox.Bootstrap.
|
|
||||||
func (s *Service) InsertMembershipDirect(ctx context.Context, in InsertMembershipDirectInput) (Membership, error) {
|
func (s *Service) InsertMembershipDirect(ctx context.Context, in InsertMembershipDirectInput) (Membership, error) {
|
||||||
displayName, err := ValidateDisplayName(in.RaceName)
|
displayName, err := ValidateDisplayName(in.RaceName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -236,9 +236,8 @@ func (s *Store) ListMyGames(ctx context.Context, userID uuid.UUID) ([]GameRecord
|
|||||||
// referencing table (memberships / applications / invites /
|
// referencing table (memberships / applications / invites /
|
||||||
// runtime_records / player_mappings — all declared with ON DELETE
|
// runtime_records / player_mappings — all declared with ON DELETE
|
||||||
// CASCADE in `00001_init.sql`). Idempotent: returns nil when no row
|
// CASCADE in `00001_init.sql`). Idempotent: returns nil when no row
|
||||||
// matches. Used by the dev-sandbox bootstrap to scrub terminal
|
// matches. A hard delete for trusted callers and integration tests;
|
||||||
// games on every backend boot so the developer's lobby never piles
|
// production lifecycle uses cancel / finish.
|
||||||
// up cancelled tiles.
|
|
||||||
func (s *Store) DeleteGame(ctx context.Context, gameID uuid.UUID) error {
|
func (s *Store) DeleteGame(ctx context.Context, gameID uuid.UUID) error {
|
||||||
g := table.Games
|
g := table.Games
|
||||||
stmt := g.DELETE().WHERE(g.GameID.EQ(postgres.UUID(gameID)))
|
stmt := g.DELETE().WHERE(g.GameID.EQ(postgres.UUID(gameID)))
|
||||||
|
|||||||
@@ -888,6 +888,19 @@ addition.
|
|||||||
- Health probes are unauthenticated `GET /healthz` (process liveness) and
|
- Health probes are unauthenticated `GET /healthz` (process liveness) and
|
||||||
`GET /readyz` (Postgres reachable, migrations applied, gRPC listener
|
`GET /readyz` (Postgres reachable, migrations applied, gRPC listener
|
||||||
bound). Probes are excluded from anti-replay and rate limiting.
|
bound). Probes are excluded from anti-replay and rate limiting.
|
||||||
|
- **Collection (dev, production mirror).** The long-lived dev environment
|
||||||
|
(`tools/dev-deploy/`) runs a full metrics + logs + traces stack on its
|
||||||
|
internal network with no host ports: Prometheus scrapes the backend
|
||||||
|
(`:9100`) and gateway (`:9191`) endpoints plus `node-exporter` and
|
||||||
|
cAdvisor; Tempo ingests OTLP traces from backend and gateway; Loki
|
||||||
|
stores container logs shipped by promtail (Docker service-discovery on
|
||||||
|
the `galaxy.stack=dev-deploy` label). Grafana (provisioned datasources
|
||||||
|
+ dashboards) and the Mailpit capture UI are reached only through the
|
||||||
|
operator console's single `/_gm` Basic Auth gate (§14.1) — at
|
||||||
|
`/_gm/grafana/` and `/_gm/mailpit/` — so one password covers the
|
||||||
|
console and both UIs. Retention is tuned small (Prometheus 15d, Loki
|
||||||
|
7d, Tempo 3d). The same compose fragment is meant to back production.
|
||||||
|
See `tools/dev-deploy/monitoring/README.md`.
|
||||||
|
|
||||||
## 18. CI and Environments
|
## 18. CI and Environments
|
||||||
|
|
||||||
|
|||||||
+4
-1
@@ -1182,7 +1182,10 @@ The console landing page is a dashboard that summarises operational
|
|||||||
health: whether the backend is ready and the database reachable, how many
|
health: whether the backend is ready and the database reachable, how many
|
||||||
game runtimes sit in each state, and the depth of the mail and
|
game runtimes sit in each state, and the depth of the mail and
|
||||||
notification queues. It is a read-only point-in-time view for quick
|
notification queues. It is a read-only point-in-time view for quick
|
||||||
triage, not a metrics history.
|
triage, not a metrics history. The console nav also links to Grafana
|
||||||
|
(metrics, logs and traces) and the Mailpit capture UI, which the
|
||||||
|
deployment serves under the same `/_gm` Basic Auth gate — one sign-in
|
||||||
|
covers the console and both UIs.
|
||||||
|
|
||||||
### 10.3 Admin account management
|
### 10.3 Admin account management
|
||||||
|
|
||||||
|
|||||||
@@ -1218,7 +1218,9 @@ admin-API, либо через серверно-рендеримую веб-ко
|
|||||||
здоровье: готов ли backend и доступна ли БД, сколько игровых рантаймов
|
здоровье: готов ли backend и доступна ли БД, сколько игровых рантаймов
|
||||||
в каждом состоянии, какова глубина очередей почты и уведомлений. Это
|
в каждом состоянии, какова глубина очередей почты и уведомлений. Это
|
||||||
read-only-срез на текущий момент для быстрой диагностики, не история
|
read-only-срез на текущий момент для быстрой диагностики, не история
|
||||||
метрик.
|
метрик. Навигация консоли также ведёт в Grafana (метрики, логи и
|
||||||
|
трейсы) и в UI захвата почты Mailpit, которые деплой отдаёт под тем же
|
||||||
|
шлюзом Basic Auth `/_gm` — один вход покрывает консоль и оба UI.
|
||||||
|
|
||||||
### 10.3 Управление admin-аккаунтами
|
### 10.3 Управление admin-аккаунтами
|
||||||
|
|
||||||
|
|||||||
@@ -7,12 +7,6 @@
|
|||||||
# baked into `docker-compose.yml`, so this file documents the knobs
|
# baked into `docker-compose.yml`, so this file documents the knobs
|
||||||
# rather than driving them.
|
# rather than driving them.
|
||||||
|
|
||||||
# Auto-provisioned sandbox bootstrap. Empty disables the bootstrap.
|
|
||||||
BACKEND_DEV_SANDBOX_EMAIL=dev@galaxy.lan
|
|
||||||
BACKEND_DEV_SANDBOX_ENGINE_IMAGE=galaxy-engine:dev
|
|
||||||
BACKEND_DEV_SANDBOX_ENGINE_VERSION=0.1.0
|
|
||||||
BACKEND_DEV_SANDBOX_PLAYER_COUNT=20
|
|
||||||
|
|
||||||
# `123456` short-circuits the email-code path for the dev account.
|
# `123456` short-circuits the email-code path for the dev account.
|
||||||
# This is also the docker-compose default — set the variable to an
|
# This is also the docker-compose default — set the variable to an
|
||||||
# empty string here when the environment must rely on real Mailpit
|
# empty string here when the environment must rely on real Mailpit
|
||||||
|
|||||||
@@ -29,12 +29,34 @@
|
|||||||
reverse_proxy galaxy-api:8080
|
reverse_proxy galaxy-api:8080
|
||||||
}
|
}
|
||||||
|
|
||||||
# Operator console. Shares the gateway public listener with `/api`; the
|
# Operator console + observability behind one Basic Auth gate. The gate
|
||||||
# gateway applies the admin anti-abuse class and reverse-proxies to the
|
# credential equals the admin-console account (dev: gm / gm-dev-password),
|
||||||
# backend `/_gm` surface, which enforces Basic Auth and renders the pages.
|
# so Caddy forwards the same Authorization header to the backend `/_gm`
|
||||||
|
# surface (its own Basic Auth) and to Grafana/Mailpit — one prompt covers
|
||||||
|
# all three. The gateway applies the admin anti-abuse class to the console.
|
||||||
@gm path /_gm /_gm/*
|
@gm path /_gm /_gm/*
|
||||||
handle @gm {
|
handle @gm {
|
||||||
reverse_proxy galaxy-api:8080
|
basic_auth {
|
||||||
|
gm "$2a$14$xVh1TLaZxh8fazlKrI9Mx.NQMQlMarYWtr3FRELmZIXuac/DeeTRO"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Grafana under /_gm/grafana/ (sub-path mode; anonymous Admin, so the
|
||||||
|
# /_gm gate is the only barrier — GF_AUTH_BASIC_ENABLED=false makes it
|
||||||
|
# ignore the forwarded Authorization header).
|
||||||
|
handle /_gm/grafana/* {
|
||||||
|
reverse_proxy galaxy-grafana:3000
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mailpit captured-mail UI under /_gm/mailpit/ (MP_WEBROOT). Shows
|
||||||
|
# every message the backend sent, relayed or not.
|
||||||
|
handle /_gm/mailpit/* {
|
||||||
|
reverse_proxy galaxy-mailpit:8025
|
||||||
|
}
|
||||||
|
|
||||||
|
# The operator console itself (gateway -> backend /_gm surface).
|
||||||
|
handle {
|
||||||
|
reverse_proxy galaxy-api:8080
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Bare `/game` (no trailing slash) -> `/game/` so the SPA root
|
# Bare `/game` (no trailing slash) -> `/game/` so the SPA root
|
||||||
|
|||||||
@@ -1,164 +1,8 @@
|
|||||||
# `tools/dev-deploy/` — known issues
|
# `tools/dev-deploy/` — known issues
|
||||||
|
|
||||||
Issues that surface in the long-lived dev environment but are not yet
|
Issues that surfaced in the long-lived dev environment. Each entry lists
|
||||||
fixed. Each entry lists the observed symptom, the diagnostic evidence,
|
the observed symptom, the diagnostic evidence, and the fix or the open
|
||||||
the working hypothesis, and the open questions that have to be
|
questions that have to be answered before a fix lands.
|
||||||
answered before a fix lands.
|
|
||||||
|
|
||||||
## Dev Sandbox game flips to `cancelled` after a `dev-deploy` redispatch
|
|
||||||
|
|
||||||
### Symptom
|
|
||||||
|
|
||||||
A previously `running` "Dev Sandbox" game (created by
|
|
||||||
`backend/internal/devsandbox`) transitions to `cancelled` ~15 minutes
|
|
||||||
after a `dev-deploy.yaml` workflow_dispatch run finishes. The user's
|
|
||||||
browser session survives (the same `device_session_id` keeps working),
|
|
||||||
but the lobby shows no game because the only game it had is now
|
|
||||||
terminal. `purgeTerminalSandboxGames` does pick it up on the **next**
|
|
||||||
boot and creates a fresh sandbox — but the first redispatch leaves
|
|
||||||
the user with an empty lobby until backend restarts again.
|
|
||||||
|
|
||||||
### Diagnostic evidence
|
|
||||||
|
|
||||||
Backend logs from the broken cycle (timestamps abbreviated):
|
|
||||||
|
|
||||||
```text
|
|
||||||
20:24:40 dev_sandbox: purged terminal sandbox game game_id=<prev> status=cancelled
|
|
||||||
20:24:40 dev_sandbox: memberships ensured count=20 game_id=<new>
|
|
||||||
20:24:40 dev_sandbox: bootstrap complete user_id=<owner> game_id=<new> status=starting
|
|
||||||
...
|
|
||||||
20:25:09 user mail sent failed (diplomail tables missing — unrelated)
|
|
||||||
...
|
|
||||||
20:39:40 lobby: game cancelled by runtime reconciler game_id=<new>
|
|
||||||
op=reconcile status=removed message="container disappeared"
|
|
||||||
```
|
|
||||||
|
|
||||||
Between 20:24:40 (`status=starting`) and 20:39:40 (reconciler cancel)
|
|
||||||
the backend logs are silent on the runtime / engine paths — no
|
|
||||||
`engine spawned`, no `engine container started`, no `runtime
|
|
||||||
transition` lines. The reconciler then fires and reports the engine
|
|
||||||
container as missing.
|
|
||||||
|
|
||||||
`docker ps -a --filter 'label=org.opencontainers.image.title=galaxy-game-engine'`
|
|
||||||
returns no rows during this window — the engine container is neither
|
|
||||||
running nor stopped on the host, so it either was never spawned or
|
|
||||||
was removed before the host snapshot.
|
|
||||||
|
|
||||||
### What has been ruled out
|
|
||||||
|
|
||||||
A live `docker inspect` on a healthy engine container shows:
|
|
||||||
|
|
||||||
```text
|
|
||||||
Labels: galaxy.backend=1, galaxy.engine_version=0.1.0,
|
|
||||||
galaxy.game_id=<uuid>,
|
|
||||||
org.opencontainers.image.title=galaxy-game-engine,
|
|
||||||
com.galaxy.{cpu_quota,memory,pids_limit}
|
|
||||||
AutoRemove: false
|
|
||||||
RestartPolicy: on-failure
|
|
||||||
NetworkMode: galaxy-dev-internal
|
|
||||||
```
|
|
||||||
|
|
||||||
There are no `com.docker.compose.*` labels and `AutoRemove=false`,
|
|
||||||
so `--remove-orphans` cannot reap the engine and a `--rm`-style
|
|
||||||
self-destruct is not in play. Two redispatches captured under
|
|
||||||
`docker events --filter event=create,start,die,destroy,kill,stop`
|
|
||||||
also confirmed it: across both runs the only `die` / `destroy`
|
|
||||||
events were for `galaxy-dev-{backend,api,caddy}`. The live engine
|
|
||||||
container survived both redispatches, and the reconciler that
|
|
||||||
fires 60 seconds after the new backend boots correctly matched
|
|
||||||
it through `byGameID` / `byContainerID`.
|
|
||||||
|
|
||||||
`backend/internal/runtime/service.go` only removes engine
|
|
||||||
containers from the explicit `runStop` / `runRestart` / `runPatch`
|
|
||||||
paths. There is no `runtime.Service.Shutdown` that proactively
|
|
||||||
kills containers on backend exit, so a graceful SIGTERM to
|
|
||||||
`galaxy-dev-backend` will not touch its child engine containers.
|
|
||||||
|
|
||||||
### Host-side hypotheses considered and rejected by the owner
|
|
||||||
|
|
||||||
The natural follow-up suspects after compose was cleared — host-side
|
|
||||||
`docker prune` cron jobs, a manual `docker rm`, an out-of-band
|
|
||||||
`dockerd` restart, and an idle-state engine crash — were all
|
|
||||||
rejected by the project owner: the dev host runs none of those
|
|
||||||
periodic cleanups, no one manually removed the container, dockerd
|
|
||||||
was not restarted in the window, and the engine binary does not
|
|
||||||
crash while idling on API calls.
|
|
||||||
|
|
||||||
### Best remaining suspicion
|
|
||||||
|
|
||||||
Something the `dev-deploy.yaml` CI run does between successful
|
|
||||||
image builds and the final `docker compose up -d --wait
|
|
||||||
--remove-orphans` clobbers the previously-spawned engine container.
|
|
||||||
The chain at runtime contains:
|
|
||||||
|
|
||||||
1. `docker build -t galaxy-engine:dev -f game/Dockerfile .`
|
|
||||||
2. `docker compose build galaxy-backend galaxy-api`
|
|
||||||
3. `docker run --rm` alpine for the UI volume seed
|
|
||||||
4. `docker compose up -d --wait --remove-orphans`
|
|
||||||
|
|
||||||
None of these *should* touch an unmanaged engine container, but
|
|
||||||
the reproduction window points squarely inside this sequence. A
|
|
||||||
deliberate next reproduction with `docker events --since 0` armed
|
|
||||||
*before* the deploy starts and live for the entire job — captured
|
|
||||||
end-to-end on the dev host, not just the chunk after backend
|
|
||||||
recreate — would pin which step emits the `destroy` on the engine.
|
|
||||||
|
|
||||||
### Update 2026-05-19: integration preclean identified as one cause
|
|
||||||
|
|
||||||
A live reproduction during the post-merge auto-deploy cycle (Gitea
|
|
||||||
run #188 dev-deploy plus parallel run #190 integration) pinned one
|
|
||||||
clobbering source: `integration/scripts/preclean.sh` was unscoped
|
|
||||||
and removed *every* container labelled `galaxy.backend=1`, including
|
|
||||||
the dev-deploy engine. Timeline from the dev host:
|
|
||||||
|
|
||||||
```text
|
|
||||||
23:10:40 backend pre-bootstrap reconciler tick: engine alive
|
|
||||||
23:10:40 dev_sandbox bootstrap: status=running
|
|
||||||
23:10:56 preclean: removing 1 backend-managed engine containers ← integration run #190
|
|
||||||
23:11:40 reconciler: container disappeared → game cancelled
|
|
||||||
```
|
|
||||||
|
|
||||||
Fix landed: `BACKEND_STACK_LABEL=integration` is now passed to
|
|
||||||
every integration backend (see
|
|
||||||
`integration/testenv/backend.go`) and `preclean.sh` AND-combines
|
|
||||||
`galaxy.backend=1` with `galaxy.stack=integration`, so dev-deploy /
|
|
||||||
local-dev engines stamped with different stack values are no longer
|
|
||||||
collateral.
|
|
||||||
|
|
||||||
This covers **push**-triggered cycles where `dev-deploy.yaml` and
|
|
||||||
`integration.yaml` run on the same Gitea host. The original
|
|
||||||
hypothesis (a `workflow_dispatch dev-deploy` solo run also losing
|
|
||||||
the engine) is *not* explained by the integration fix — manual
|
|
||||||
dispatches do not trigger `integration.yaml`. Keep this entry open
|
|
||||||
until a solo-dispatch reproduction confirms whether the symptom
|
|
||||||
still occurs.
|
|
||||||
|
|
||||||
### Status
|
|
||||||
|
|
||||||
Partially fixed (push-triggered cycles). Solo `workflow_dispatch`
|
|
||||||
reproductions still open. If the symptom recurs after the
|
|
||||||
integration fix lands, capture `docker events --since 0` for the
|
|
||||||
full dispatch window and attach here.
|
|
||||||
|
|
||||||
### Workaround in use today
|
|
||||||
|
|
||||||
When the sandbox game flips to `cancelled`, redispatch `dev-deploy`:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
curl -X POST -n -H 'Content-Type: application/json' \
|
|
||||||
-d '{"ref":"<branch>"}' \
|
|
||||||
https://gitea.iliadenisov.ru/api/v1/repos/developer/galaxy-game/actions/workflows/dev-deploy.yaml/dispatches
|
|
||||||
```
|
|
||||||
|
|
||||||
The next boot's `purgeTerminalSandboxGames` removes the cancelled
|
|
||||||
row, `findOrCreateSandboxGame` creates a fresh one, and
|
|
||||||
`ensureMembershipsAndDrive` puts the new game back to `running`.
|
|
||||||
|
|
||||||
### Owner
|
|
||||||
|
|
||||||
Unassigned. File an issue once we have the runtime / reconciler
|
|
||||||
analysis above; reference this section in the issue body so future
|
|
||||||
redeploys can short-circuit the diagnostic loop.
|
|
||||||
|
|
||||||
## `docker restart galaxy-dev-backend` fails after the CI runner cleans up
|
## `docker restart galaxy-dev-backend` fails after the CI runner cleans up
|
||||||
|
|
||||||
|
|||||||
+79
-19
@@ -114,17 +114,72 @@ calls `make clean-data`.
|
|||||||
The same dev-mode email-code override as `tools/local-dev/` applies,
|
The same dev-mode email-code override as `tools/local-dev/` applies,
|
||||||
and the dev-deploy compose ships with it enabled by default:
|
and the dev-deploy compose ships with it enabled by default:
|
||||||
|
|
||||||
1. Enter `dev@galaxy.lan` (or whatever `BACKEND_DEV_SANDBOX_EMAIL`
|
1. Enter your email address in the login form.
|
||||||
resolves to) in the login form.
|
|
||||||
2. Submit `123456` as the code — the docker-compose default for
|
2. Submit `123456` as the code — the docker-compose default for
|
||||||
`BACKEND_AUTH_DEV_FIXED_CODE` is `123456`, so the bcrypt-hashed
|
`BACKEND_AUTH_DEV_FIXED_CODE` is `123456`, so the bcrypt-hashed
|
||||||
email code stays a fallback. To force real Mailpit codes (e.g. for
|
email code stays a fallback. To force the real email code (which
|
||||||
mail-flow QA), set `BACKEND_AUTH_DEV_FIXED_CODE=` (empty) in a
|
Mailpit then relays to your Gmail — see **Mail** below), set
|
||||||
local `.env` and `make rebuild`.
|
`BACKEND_AUTH_DEV_FIXED_CODE=` (empty) and redeploy.
|
||||||
|
|
||||||
The fixed-code override is rejected by production env loaders, so it
|
The fixed-code override is rejected by production env loaders, so it
|
||||||
cannot leak into the prod environment.
|
cannot leak into the prod environment.
|
||||||
|
|
||||||
|
## Mail
|
||||||
|
|
||||||
|
The backend always submits mail to **Mailpit** (`galaxy-mailpit:1025`),
|
||||||
|
exactly as it would to a production SMTP server. Mailpit captures every
|
||||||
|
message in its UI (internal `:8025`) and, when configured, **relays**
|
||||||
|
the ones whose recipient matches `GALAXY_DEV_MAIL_RELAY_MATCH` up to a
|
||||||
|
real Gmail account — so an OTP addressed to you lands in your real inbox
|
||||||
|
while everything else stays captured-only.
|
||||||
|
|
||||||
|
Configure the relay through Gitea Actions secrets/vars (never
|
||||||
|
committed); the `dev-deploy.yaml` workflow renders Mailpit's
|
||||||
|
`relay.conf` (from `tools/dev-deploy/mailpit/relay.conf.tmpl`) and seeds
|
||||||
|
it into the `galaxy-dev-mailpit-config` volume:
|
||||||
|
|
||||||
|
| Name | Kind | Purpose |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `GALAXY_DEV_MAIL_RELAY_USERNAME` | secret | Gmail address used as the relay login + From. |
|
||||||
|
| `GALAXY_DEV_MAIL_RELAY_PASSWORD` | secret | Gmail **App Password** (requires 2FA; not the account password). |
|
||||||
|
| `GALAXY_DEV_MAIL_RELAY_MATCH` | var | Recipient regex to auto-relay (e.g. your Gmail address). Unset → capture-only. |
|
||||||
|
|
||||||
|
With none set the stack only captures mail (the compose relay-match
|
||||||
|
defaults to a non-routable address), so it can never email third
|
||||||
|
parties.
|
||||||
|
|
||||||
|
The capture UI is exposed through the operator console's `/_gm` gate at
|
||||||
|
[`/_gm/mailpit/`](https://galaxy.lan/_gm/mailpit/) — one Basic Auth for
|
||||||
|
the console, Grafana and Mailpit (see **Observability**). It shows
|
||||||
|
**every** message the backend sent, relayed or not, so you can read any
|
||||||
|
account's OTP regardless of the relay-match. For multi-account testing:
|
||||||
|
register several `you+tag@gmail.com` aliases and widen the match to a
|
||||||
|
regex such as `^you(\+[^@]+)?@gmail\.com$` (Gmail folds every `+tag`
|
||||||
|
into one inbox), or just read the codes in the Mailpit UI, or skip mail
|
||||||
|
entirely with the `123456` dev-code.
|
||||||
|
|
||||||
|
## Observability
|
||||||
|
|
||||||
|
A full metrics + logs + traces stack runs alongside the app on the
|
||||||
|
internal network (no host ports), as a production mirror. **Grafana**
|
||||||
|
and the **Mailpit** UI are reached only through the operator console's
|
||||||
|
single `/_gm` Basic Auth gate — one password (the admin-console account)
|
||||||
|
unlocks the console, [`/_gm/grafana/`](https://galaxy.lan/_gm/grafana/)
|
||||||
|
and [`/_gm/mailpit/`](https://galaxy.lan/_gm/mailpit/), with links in the
|
||||||
|
console nav. Grafana runs anonymous-Admin behind the gate (no own
|
||||||
|
login); Prometheus, Loki and Tempo stay internal-only.
|
||||||
|
|
||||||
|
- **Metrics** — Prometheus scrapes backend, gateway, `node-exporter` and
|
||||||
|
cAdvisor.
|
||||||
|
- **Logs** — promtail → Loki (Docker SD on the `galaxy.stack=dev-deploy`
|
||||||
|
label).
|
||||||
|
- **Traces** — backend + gateway → Tempo over OTLP.
|
||||||
|
|
||||||
|
Grafana's admin user is seeded from `GALAXY_DEV_GRAFANA_ADMIN_PASSWORD`
|
||||||
|
(for provisioning/API; the UI needs no Grafana login). See
|
||||||
|
[`monitoring/README.md`](monitoring/README.md) for services, configs and
|
||||||
|
tuning knobs.
|
||||||
|
|
||||||
## Networking
|
## Networking
|
||||||
|
|
||||||
```
|
```
|
||||||
@@ -139,6 +194,8 @@ galaxy-caddy (networks: edge + galaxy-dev-internal)
|
|||||||
│ /game/* -> file_server /srv/galaxy-ui (volume galaxy-dev-ui-dist)
|
│ /game/* -> file_server /srv/galaxy-ui (volume galaxy-dev-ui-dist)
|
||||||
│ /api/*, /healthz -> reverse_proxy galaxy-api:8080
|
│ /api/*, /healthz -> reverse_proxy galaxy-api:8080
|
||||||
│ /rpc/* -> reverse_proxy galaxy-api:9090 (strips /rpc)
|
│ /rpc/* -> reverse_proxy galaxy-api:9090 (strips /rpc)
|
||||||
|
│ /_gm, /_gm/* -> reverse_proxy galaxy-api:8080 (Basic Auth gate;
|
||||||
|
│ /_gm/grafana/ -> grafana, /_gm/mailpit/ -> mailpit)
|
||||||
▼
|
▼
|
||||||
galaxy-dev-internal
|
galaxy-dev-internal
|
||||||
├─ galaxy-api (gateway: :8080 REST, :9090 gRPC)
|
├─ galaxy-api (gateway: :8080 REST, :9090 gRPC)
|
||||||
@@ -146,7 +203,9 @@ galaxy-dev-internal
|
|||||||
├─ galaxy-postgres (postgres: :5432)
|
├─ galaxy-postgres (postgres: :5432)
|
||||||
├─ galaxy-redis (redis: :6379)
|
├─ galaxy-redis (redis: :6379)
|
||||||
├─ galaxy-mailpit (mailpit: :8025 UI, :1025 SMTP)
|
├─ galaxy-mailpit (mailpit: :8025 UI, :1025 SMTP)
|
||||||
└─ engine containers (spawned by backend on demand)
|
├─ engine containers (spawned by backend on demand)
|
||||||
|
└─ observability (prometheus, grafana, loki, promtail, tempo,
|
||||||
|
node-exporter, cadvisor)
|
||||||
```
|
```
|
||||||
|
|
||||||
The compose project deliberately exposes no host ports. Diagnostics
|
The compose project deliberately exposes no host ports. Diagnostics
|
||||||
@@ -191,8 +250,10 @@ make clean-data Stop everything and wipe volumes + game-state dir
|
|||||||
|
|
||||||
## Files
|
## Files
|
||||||
|
|
||||||
- `docker-compose.yml` — six services: postgres, redis, mailpit,
|
- `docker-compose.yml` — the application services (postgres, redis,
|
||||||
galaxy-backend, galaxy-api, galaxy-caddy. `galaxy-caddy` mounts both
|
mailpit, galaxy-backend, galaxy-api, galaxy-caddy) plus the
|
||||||
|
observability stack (prometheus, grafana, loki, promtail, tempo,
|
||||||
|
node-exporter, cadvisor). `galaxy-caddy` mounts both
|
||||||
the `galaxy-dev-site-dist` (`/srv/galaxy-site`) and
|
the `galaxy-dev-site-dist` (`/srv/galaxy-site`) and
|
||||||
`galaxy-dev-ui-dist` (`/srv/galaxy-ui`) volumes and reverse-proxies
|
`galaxy-dev-ui-dist` (`/srv/galaxy-ui`) volumes and reverse-proxies
|
||||||
both gateway tiers (REST/health on `:8080`, Connect/gRPC-web on
|
both gateway tiers (REST/health on `:8080`, Connect/gRPC-web on
|
||||||
@@ -204,6 +265,8 @@ make clean-data Stop everything and wipe volumes + game-state dir
|
|||||||
at `/etc/caddy/Caddyfile`.
|
at `/etc/caddy/Caddyfile`.
|
||||||
- `Caddyfile.prod` — placeholder for a future prod deployment; not used
|
- `Caddyfile.prod` — placeholder for a future prod deployment; not used
|
||||||
by this compose.
|
by this compose.
|
||||||
|
- `monitoring/` — Prometheus / Loki / promtail / Tempo / Grafana
|
||||||
|
configuration, provisioned as code; see `monitoring/README.md`.
|
||||||
- `Makefile` — wrapper over `docker compose` with helpers for engine,
|
- `Makefile` — wrapper over `docker compose` with helpers for engine,
|
||||||
site/UI seeding, health probes, and full wipe.
|
site/UI seeding, health probes, and full wipe.
|
||||||
- `.env.example` — non-secret defaults for the compose `${VAR:-}`
|
- `.env.example` — non-secret defaults for the compose `${VAR:-}`
|
||||||
@@ -212,8 +275,7 @@ make clean-data Stop everything and wipe volumes + game-state dir
|
|||||||
## Known issues
|
## Known issues
|
||||||
|
|
||||||
See [`KNOWN-ISSUES.md`](KNOWN-ISSUES.md) for symptoms that surface
|
See [`KNOWN-ISSUES.md`](KNOWN-ISSUES.md) for symptoms that surface
|
||||||
in the long-lived dev environment but are not yet fixed (currently:
|
in the long-lived dev environment but are not yet fixed.
|
||||||
the sandbox game flipping to `cancelled` after a redispatch).
|
|
||||||
|
|
||||||
## Deployment cadence
|
## Deployment cadence
|
||||||
|
|
||||||
@@ -237,12 +299,12 @@ behind. There is no separate state to clean up between the two paths.
|
|||||||
|
|
||||||
### Engine image drift recycle
|
### Engine image drift recycle
|
||||||
|
|
||||||
`backend` spawns one engine container per game (the long-lived "Dev
|
`backend` spawns one engine container per running game and the
|
||||||
Sandbox" plus any user-created games) and the reconciler reattaches
|
reconciler reattaches to whatever it finds with the
|
||||||
to whatever it finds with the `galaxy.stack=dev-deploy` label. That
|
`galaxy.stack=dev-deploy` label. That reattach does not check the
|
||||||
reattach does not check the running container's image SHA against the
|
running container's image SHA against the freshly-built
|
||||||
freshly-built `galaxy-engine:dev` tag, so an unchanged container would
|
`galaxy-engine:dev` tag, so an unchanged container would otherwise
|
||||||
otherwise keep serving the previous engine code after a redeploy.
|
keep serving the previous engine code after a redeploy.
|
||||||
|
|
||||||
The `dev-deploy.yaml` workflow handles this in the
|
The `dev-deploy.yaml` workflow handles this in the
|
||||||
`Recycle engine containers on image drift` step. When `docker build`
|
`Recycle engine containers on image drift` step. When `docker build`
|
||||||
@@ -250,9 +312,7 @@ produces a new `galaxy-engine:dev` SHA, the step compares it against
|
|||||||
every running `galaxy-game-*` container and, for each drifted one,
|
every running `galaxy-game-*` container and, for each drifted one,
|
||||||
stops the backend, removes the container, wipes its bind-mounted
|
stops the backend, removes the container, wipes its bind-mounted
|
||||||
state directory (Engine.Init() writes turn-0 over any pre-existing
|
state directory (Engine.Init() writes turn-0 over any pre-existing
|
||||||
`turn-N` files), and cascade-deletes the lobby `games` row. The
|
`turn-N` files), and cascade-deletes the lobby `games` row.
|
||||||
`dev-sandbox` bootstrap on the next backend boot finds no live
|
|
||||||
sandbox and provisions a fresh one on the new engine image.
|
|
||||||
|
|
||||||
When the engine sources are unchanged, the BuildKit cache hits and
|
When the engine sources are unchanged, the BuildKit cache hits and
|
||||||
the SHA stays the same — the recycle step is a no-op and the running
|
the SHA stays the same — the recycle step is a no-op and the running
|
||||||
|
|||||||
@@ -66,12 +66,26 @@ services:
|
|||||||
image: axllent/mailpit:v1.21
|
image: axllent/mailpit:v1.21
|
||||||
container_name: galaxy-dev-mailpit
|
container_name: galaxy-dev-mailpit
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
# Mailpit is both the SMTP submission point and a relay: it captures
|
||||||
|
# every message in its UI and auto-relays the ones whose recipient
|
||||||
|
# matches GALAXY_DEV_MAIL_RELAY_MATCH to the Gmail account in the
|
||||||
|
# secret-rendered relay config. The default match is non-routable, so
|
||||||
|
# a stack brought up without the relay secret only captures, never sends.
|
||||||
|
command:
|
||||||
|
- "--smtp-relay-config=/etc/mailpit/relay.conf"
|
||||||
|
- "--smtp-relay-matching=${GALAXY_DEV_MAIL_RELAY_MATCH:-nobody@invalid.example}"
|
||||||
|
# Serve the capture UI under /_gm/mailpit so the host Caddy can expose
|
||||||
|
# it at https://galaxy.lan/_gm/mailpit/ behind the shared /_gm gate;
|
||||||
|
# SMTP is unaffected.
|
||||||
|
- "--webroot=/_gm/mailpit"
|
||||||
labels:
|
labels:
|
||||||
galaxy.stack: dev-deploy
|
galaxy.stack: dev-deploy
|
||||||
networks:
|
networks:
|
||||||
- galaxy-internal
|
- galaxy-internal
|
||||||
|
volumes:
|
||||||
|
- galaxy-dev-mailpit-config:/etc/mailpit:ro
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "wget", "-q", "-O-", "http://localhost:8025/livez"]
|
test: ["CMD", "wget", "-q", "-O-", "http://localhost:8025/_gm/mailpit/livez"]
|
||||||
interval: 3s
|
interval: 3s
|
||||||
timeout: 3s
|
timeout: 3s
|
||||||
retries: 30
|
retries: 30
|
||||||
@@ -108,7 +122,13 @@ services:
|
|||||||
BACKEND_NOTIFICATION_ADMIN_EMAIL: admin@galaxy.lan
|
BACKEND_NOTIFICATION_ADMIN_EMAIL: admin@galaxy.lan
|
||||||
BACKEND_MAIL_WORKER_INTERVAL: 500ms
|
BACKEND_MAIL_WORKER_INTERVAL: 500ms
|
||||||
BACKEND_NOTIFICATION_WORKER_INTERVAL: 500ms
|
BACKEND_NOTIFICATION_WORKER_INTERVAL: 500ms
|
||||||
BACKEND_OTEL_TRACES_EXPORTER: none
|
BACKEND_OTEL_TRACES_EXPORTER: otlp
|
||||||
|
BACKEND_OTEL_PROTOCOL: grpc
|
||||||
|
BACKEND_OTEL_ENDPOINT: "galaxy-tempo:4317"
|
||||||
|
# Tempo's OTLP receiver is plaintext on the internal network; the
|
||||||
|
# backend's gRPC exporter defaults to TLS, so disable it via the
|
||||||
|
# standard SDK env (applied on top of WithEndpoint).
|
||||||
|
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||||
# Prometheus metrics are enabled in dev so the `/metrics` scrape
|
# Prometheus metrics are enabled in dev so the `/metrics` scrape
|
||||||
# endpoint is live and stable ahead of standing up a Prometheus +
|
# endpoint is live and stable ahead of standing up a Prometheus +
|
||||||
# Grafana stack on the internal network. The listener stays internal
|
# Grafana stack on the internal network. The listener stays internal
|
||||||
@@ -127,15 +147,6 @@ services:
|
|||||||
# bcrypt-hashed code is single-use). Set the var to an empty
|
# bcrypt-hashed code is single-use). Set the var to an empty
|
||||||
# string in `.env` to disable.
|
# string in `.env` to disable.
|
||||||
BACKEND_AUTH_DEV_FIXED_CODE: ${BACKEND_AUTH_DEV_FIXED_CODE:-123456}
|
BACKEND_AUTH_DEV_FIXED_CODE: ${BACKEND_AUTH_DEV_FIXED_CODE:-123456}
|
||||||
# Long-lived dev environment always bootstraps the "Dev Sandbox"
|
|
||||||
# game owned by this email so a freshly redeployed stack already
|
|
||||||
# has one ready-to-play game in the lobby. Set the variable to an
|
|
||||||
# empty string in `.env` to disable the bootstrap (e.g. for a
|
|
||||||
# cold-start QA pass).
|
|
||||||
BACKEND_DEV_SANDBOX_EMAIL: ${BACKEND_DEV_SANDBOX_EMAIL:-dev@galaxy.lan}
|
|
||||||
BACKEND_DEV_SANDBOX_ENGINE_IMAGE: ${BACKEND_DEV_SANDBOX_ENGINE_IMAGE:-galaxy-engine:dev}
|
|
||||||
BACKEND_DEV_SANDBOX_ENGINE_VERSION: ${BACKEND_DEV_SANDBOX_ENGINE_VERSION:-0.1.0}
|
|
||||||
BACKEND_DEV_SANDBOX_PLAYER_COUNT: ${BACKEND_DEV_SANDBOX_PLAYER_COUNT:-20}
|
|
||||||
volumes:
|
volumes:
|
||||||
- /var/run/docker.sock:/var/run/docker.sock
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
# Per-game state directories live under the same absolute path
|
# Per-game state directories live under the same absolute path
|
||||||
@@ -195,6 +206,12 @@ services:
|
|||||||
# the internal network — live and stable for a future scrape, not
|
# the internal network — live and stable for a future scrape, not
|
||||||
# mapped to the host.
|
# mapped to the host.
|
||||||
GATEWAY_ADMIN_HTTP_ADDR: ":9191"
|
GATEWAY_ADMIN_HTTP_ADDR: ":9191"
|
||||||
|
# Traces -> Tempo over OTLP gRPC (plaintext on the internal net).
|
||||||
|
OTEL_SERVICE_NAME: galaxy-gateway
|
||||||
|
OTEL_TRACES_EXPORTER: otlp
|
||||||
|
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT: "http://galaxy-tempo:4317"
|
||||||
|
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||||
GATEWAY_BACKEND_HTTP_URL: "http://galaxy-backend:8080"
|
GATEWAY_BACKEND_HTTP_URL: "http://galaxy-backend:8080"
|
||||||
GATEWAY_BACKEND_GRPC_PUSH_URL: "galaxy-backend:8081"
|
GATEWAY_BACKEND_GRPC_PUSH_URL: "galaxy-backend:8081"
|
||||||
GATEWAY_BACKEND_GATEWAY_CLIENT_ID: dev-gateway-1
|
GATEWAY_BACKEND_GATEWAY_CLIENT_ID: dev-gateway-1
|
||||||
@@ -263,6 +280,163 @@ services:
|
|||||||
- galaxy-internal
|
- galaxy-internal
|
||||||
- edge
|
- edge
|
||||||
|
|
||||||
|
galaxy-prometheus:
|
||||||
|
image: prom/prometheus:v2.55.1
|
||||||
|
container_name: galaxy-dev-prometheus
|
||||||
|
restart: unless-stopped
|
||||||
|
labels:
|
||||||
|
galaxy.stack: dev-deploy
|
||||||
|
command:
|
||||||
|
- --config.file=/etc/prometheus/prometheus.yml
|
||||||
|
- --storage.tsdb.path=/prometheus
|
||||||
|
- --storage.tsdb.retention.time=15d
|
||||||
|
- --web.enable-lifecycle
|
||||||
|
volumes:
|
||||||
|
- ${GALAXY_DEV_MONITORING_DIR:-./monitoring}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||||
|
- galaxy-dev-prometheus-data:/prometheus
|
||||||
|
networks:
|
||||||
|
- galaxy-internal
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 384m
|
||||||
|
|
||||||
|
galaxy-loki:
|
||||||
|
image: grafana/loki:3.3.2
|
||||||
|
container_name: galaxy-dev-loki
|
||||||
|
restart: unless-stopped
|
||||||
|
labels:
|
||||||
|
galaxy.stack: dev-deploy
|
||||||
|
command: ["-config.file=/etc/loki/loki.yml"]
|
||||||
|
volumes:
|
||||||
|
- ${GALAXY_DEV_MONITORING_DIR:-./monitoring}/loki/loki.yml:/etc/loki/loki.yml:ro
|
||||||
|
- galaxy-dev-loki-data:/loki
|
||||||
|
networks:
|
||||||
|
- galaxy-internal
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 384m
|
||||||
|
|
||||||
|
galaxy-promtail:
|
||||||
|
image: grafana/promtail:3.3.2
|
||||||
|
container_name: galaxy-dev-promtail
|
||||||
|
restart: unless-stopped
|
||||||
|
labels:
|
||||||
|
galaxy.stack: dev-deploy
|
||||||
|
command: ["-config.file=/etc/promtail/promtail.yml"]
|
||||||
|
volumes:
|
||||||
|
- ${GALAXY_DEV_MONITORING_DIR:-./monitoring}/promtail/promtail.yml:/etc/promtail/promtail.yml:ro
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
|
networks:
|
||||||
|
- galaxy-internal
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 128m
|
||||||
|
|
||||||
|
galaxy-tempo:
|
||||||
|
image: grafana/tempo:2.7.1
|
||||||
|
container_name: galaxy-dev-tempo
|
||||||
|
restart: unless-stopped
|
||||||
|
labels:
|
||||||
|
galaxy.stack: dev-deploy
|
||||||
|
command: ["-config.file=/etc/tempo/tempo.yml"]
|
||||||
|
volumes:
|
||||||
|
- ${GALAXY_DEV_MONITORING_DIR:-./monitoring}/tempo/tempo.yml:/etc/tempo/tempo.yml:ro
|
||||||
|
- galaxy-dev-tempo-data:/var/tempo
|
||||||
|
networks:
|
||||||
|
- galaxy-internal
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 384m
|
||||||
|
|
||||||
|
galaxy-node-exporter:
|
||||||
|
image: prom/node-exporter:v1.8.2
|
||||||
|
container_name: galaxy-dev-node-exporter
|
||||||
|
restart: unless-stopped
|
||||||
|
labels:
|
||||||
|
galaxy.stack: dev-deploy
|
||||||
|
command:
|
||||||
|
- --path.procfs=/host/proc
|
||||||
|
- --path.sysfs=/host/sys
|
||||||
|
- --path.rootfs=/rootfs
|
||||||
|
- --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
|
||||||
|
volumes:
|
||||||
|
- /proc:/host/proc:ro
|
||||||
|
- /sys:/host/sys:ro
|
||||||
|
- /:/rootfs:ro
|
||||||
|
pid: host
|
||||||
|
networks:
|
||||||
|
- galaxy-internal
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 64m
|
||||||
|
|
||||||
|
galaxy-cadvisor:
|
||||||
|
image: gcr.io/cadvisor/cadvisor:v0.49.1
|
||||||
|
container_name: galaxy-dev-cadvisor
|
||||||
|
restart: unless-stopped
|
||||||
|
labels:
|
||||||
|
galaxy.stack: dev-deploy
|
||||||
|
command:
|
||||||
|
- --housekeeping_interval=30s
|
||||||
|
- --docker_only=true
|
||||||
|
- --store_container_labels=false
|
||||||
|
privileged: true
|
||||||
|
devices:
|
||||||
|
- /dev/kmsg
|
||||||
|
volumes:
|
||||||
|
- /:/rootfs:ro
|
||||||
|
- /var/run:/var/run:ro
|
||||||
|
- /sys:/sys:ro
|
||||||
|
- /var/lib/docker/:/var/lib/docker:ro
|
||||||
|
- /dev/disk/:/dev/disk:ro
|
||||||
|
networks:
|
||||||
|
- galaxy-internal
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 256m
|
||||||
|
|
||||||
|
galaxy-grafana:
|
||||||
|
image: grafana/grafana:11.4.0
|
||||||
|
container_name: galaxy-dev-grafana
|
||||||
|
restart: unless-stopped
|
||||||
|
labels:
|
||||||
|
galaxy.stack: dev-deploy
|
||||||
|
depends_on:
|
||||||
|
- galaxy-prometheus
|
||||||
|
- galaxy-loki
|
||||||
|
- galaxy-tempo
|
||||||
|
environment:
|
||||||
|
GF_SECURITY_ADMIN_PASSWORD: ${GALAXY_DEV_GRAFANA_ADMIN_PASSWORD:-admin}
|
||||||
|
GF_SERVER_ROOT_URL: https://galaxy.lan/_gm/grafana/
|
||||||
|
GF_SERVER_SERVE_FROM_SUB_PATH: "true"
|
||||||
|
# No own login: the /_gm Basic Auth gate is the only barrier, so
|
||||||
|
# serve everyone as anonymous Admin and ignore the forwarded
|
||||||
|
# Authorization header (basic auth off, login form off).
|
||||||
|
GF_AUTH_ANONYMOUS_ENABLED: "true"
|
||||||
|
GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
|
||||||
|
GF_AUTH_DISABLE_LOGIN_FORM: "true"
|
||||||
|
GF_AUTH_BASIC_ENABLED: "false"
|
||||||
|
GF_USERS_ALLOW_SIGN_UP: "false"
|
||||||
|
GF_ANALYTICS_REPORTING_ENABLED: "false"
|
||||||
|
GF_ANALYTICS_CHECK_FOR_UPDATES: "false"
|
||||||
|
GF_NEWS_NEWS_FEED_ENABLED: "false"
|
||||||
|
volumes:
|
||||||
|
- ${GALAXY_DEV_MONITORING_DIR:-./monitoring}/grafana/provisioning:/etc/grafana/provisioning:ro
|
||||||
|
- ${GALAXY_DEV_MONITORING_DIR:-./monitoring}/grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||||||
|
- galaxy-dev-grafana-data:/var/lib/grafana
|
||||||
|
networks:
|
||||||
|
- galaxy-internal
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 256m
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
galaxy-internal:
|
galaxy-internal:
|
||||||
name: galaxy-dev-internal
|
name: galaxy-dev-internal
|
||||||
@@ -292,3 +466,13 @@ volumes:
|
|||||||
name: galaxy-dev-site-dist
|
name: galaxy-dev-site-dist
|
||||||
galaxy-dev-geoip-data:
|
galaxy-dev-geoip-data:
|
||||||
name: galaxy-dev-geoip-data
|
name: galaxy-dev-geoip-data
|
||||||
|
galaxy-dev-mailpit-config:
|
||||||
|
name: galaxy-dev-mailpit-config
|
||||||
|
galaxy-dev-prometheus-data:
|
||||||
|
name: galaxy-dev-prometheus-data
|
||||||
|
galaxy-dev-grafana-data:
|
||||||
|
name: galaxy-dev-grafana-data
|
||||||
|
galaxy-dev-loki-data:
|
||||||
|
name: galaxy-dev-loki-data
|
||||||
|
galaxy-dev-tempo-data:
|
||||||
|
name: galaxy-dev-tempo-data
|
||||||
|
|||||||
@@ -0,0 +1,18 @@
|
|||||||
|
# Mailpit SMTP relay upstream — RENDERED AT DEPLOY TIME by
|
||||||
|
# .gitea/workflows/dev-deploy.yaml from Gitea Actions secrets, then
|
||||||
|
# seeded into the `galaxy-dev-mailpit-config` volume. The Gmail App
|
||||||
|
# Password is a secret and MUST NOT be committed: this template only
|
||||||
|
# carries ${PLACEHOLDER}s that the workflow substitutes. See
|
||||||
|
# tools/dev-deploy/README.md ("Mail").
|
||||||
|
#
|
||||||
|
# Mailpit captures every message; the `--smtp-relay-matching` flag (set
|
||||||
|
# from GALAXY_DEV_MAIL_RELAY_MATCH in the compose) decides which
|
||||||
|
# recipients are actually relayed up to this Gmail account.
|
||||||
|
host: smtp.gmail.com
|
||||||
|
port: 587
|
||||||
|
starttls: true
|
||||||
|
allow-insecure: false
|
||||||
|
auth: login
|
||||||
|
username: ${GALAXY_DEV_MAIL_RELAY_USERNAME}
|
||||||
|
password: ${GALAXY_DEV_MAIL_RELAY_PASSWORD}
|
||||||
|
return-path: ${GALAXY_DEV_MAIL_RELAY_USERNAME}
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
# `tools/dev-deploy/monitoring/` — observability stack
|
||||||
|
|
||||||
|
The long-lived dev environment runs a full metrics + logs + traces stack
|
||||||
|
alongside the application as a **production mirror**: the same compose
|
||||||
|
fragment and collector configs are meant to back production later. Every
|
||||||
|
collector lives on the internal `galaxy-dev-internal` network and
|
||||||
|
publishes **no host port**. The browser-reachable pieces (Grafana and
|
||||||
|
the Mailpit UI) sit behind the operator console's single `/_gm` Basic
|
||||||
|
Auth gate — see [`../README.md`](../README.md) and `ARCHITECTURE.md §14`.
|
||||||
|
|
||||||
|
## Services
|
||||||
|
|
||||||
|
| Service | Image | Role | Reachable |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| `galaxy-prometheus` | `prom/prometheus` | Scrape + store metrics (15d) | internal `:9090` |
|
||||||
|
| `galaxy-loki` | `grafana/loki` | Log store (7d) | internal `:3100` |
|
||||||
|
| `galaxy-promtail` | `grafana/promtail` | Ship container logs to Loki | — |
|
||||||
|
| `galaxy-tempo` | `grafana/tempo` | Trace store (3d), OTLP receiver | internal `:3200`, OTLP `:4317`/`:4318` |
|
||||||
|
| `galaxy-node-exporter` | `prom/node-exporter` | Host metrics | internal `:9100` |
|
||||||
|
| `galaxy-cadvisor` | `cadvisor` | Per-container CPU/memory/IO | internal `:8080` |
|
||||||
|
| `galaxy-grafana` | `grafana/grafana` | Dashboards + Explore | Caddy `/_gm/grafana/` |
|
||||||
|
|
||||||
|
## What is collected
|
||||||
|
|
||||||
|
- **Metrics.** Prometheus (30s interval) scrapes the backend Prometheus
|
||||||
|
endpoint (`galaxy-backend:9100`), the gateway admin endpoint
|
||||||
|
(`galaxy-api:9191`), `node-exporter` (host) and cAdvisor (per
|
||||||
|
container). Engine containers expose no `/metrics`; cAdvisor covers
|
||||||
|
their resource use.
|
||||||
|
- **Logs.** promtail discovers containers through the Docker API,
|
||||||
|
filtered to the `galaxy.stack=dev-deploy` label, and ships their
|
||||||
|
stdout/stderr to Loki labelled by `container`.
|
||||||
|
- **Traces.** backend and gateway export OTLP traces over gRPC to Tempo
|
||||||
|
(`galaxy-tempo:4317`), plaintext on the internal network
|
||||||
|
(`OTEL_EXPORTER_OTLP_INSECURE=true`, since Tempo's receiver is not
|
||||||
|
TLS-wrapped inside the contour).
|
||||||
|
|
||||||
|
## Grafana access (behind the `/_gm` gate)
|
||||||
|
|
||||||
|
Grafana is served under `/_gm/grafana/` (`GF_SERVER_ROOT_URL` +
|
||||||
|
`GF_SERVER_SERVE_FROM_SUB_PATH=true`) **behind the shared operator gate**:
|
||||||
|
the Caddy `/_gm/*` Basic Auth (the admin-console account) is the only
|
||||||
|
barrier. Grafana itself runs as **anonymous Admin** with its login form
|
||||||
|
and basic auth disabled (`GF_AUTH_ANONYMOUS_ENABLED=true`,
|
||||||
|
`GF_AUTH_ANONYMOUS_ORG_ROLE=Admin`, `GF_AUTH_DISABLE_LOGIN_FORM=true`,
|
||||||
|
`GF_AUTH_BASIC_ENABLED=false`), so it ignores the forwarded credentials
|
||||||
|
and asks for no second password. `GALAXY_DEV_GRAFANA_ADMIN_PASSWORD`
|
||||||
|
still seeds the admin user for provisioning/API use.
|
||||||
|
|
||||||
|
Datasources (Prometheus, Loki, Tempo) and a starter dashboard
|
||||||
|
(`grafana/dashboards/galaxy-overview.json`) are provisioned as code under
|
||||||
|
`grafana/provisioning/`.
|
||||||
|
|
||||||
|
## Config delivery
|
||||||
|
|
||||||
|
`dev-deploy.yaml` copies this directory to a stable host path
|
||||||
|
(`$HOME/.galaxy-dev/monitoring`, exported as `GALAXY_DEV_MONITORING_DIR`)
|
||||||
|
before `compose up`, and the compose binds it read-only into the
|
||||||
|
collectors. A stable path — not the ephemeral CI workspace — keeps the
|
||||||
|
mounts valid across container restarts and host reboots (the same lesson
|
||||||
|
as the geoip volume; see `../KNOWN-ISSUES.md`).
|
||||||
|
|
||||||
|
## Tuning (cost knobs)
|
||||||
|
|
||||||
|
Defaults favour the smallest workable footprint; all are config/compose
|
||||||
|
values:
|
||||||
|
|
||||||
|
- Prometheus `scrape_interval=30s`, `--storage.tsdb.retention.time=15d`.
|
||||||
|
- Loki `retention_period=168h` (7d); Tempo `block_retention=72h` (3d).
|
||||||
|
- cAdvisor `--housekeeping_interval=30s`.
|
||||||
|
- Per-service `deploy.resources.limits.memory` caps (~1.5 GB total cap;
|
||||||
|
steady-state well under that).
|
||||||
|
|
||||||
|
Seven always-on containers cost roughly ~1.1 GB steady RAM and
|
||||||
|
~1.5–2.5 GB disk at these retention windows. cAdvisor is the main CPU
|
||||||
|
cost; on a constrained host it can be dropped (host + app metrics still
|
||||||
|
cover most needs).
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
{
|
||||||
|
"annotations": { "list": [] },
|
||||||
|
"editable": true,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||||
|
"id": 1,
|
||||||
|
"title": "Backend HTTP request rate",
|
||||||
|
"type": "timeseries",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "sum by (group) (rate(http_requests_total[5m]))",
|
||||||
|
"legendFormat": "{{group}}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": { "defaults": { "unit": "bytes" }, "overrides": [] },
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||||
|
"id": 2,
|
||||||
|
"title": "Container memory (cadvisor)",
|
||||||
|
"type": "timeseries",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "sum by (name) (container_memory_usage_bytes{name=~\"galaxy-dev-.*|galaxy-game-.*\"})",
|
||||||
|
"legendFormat": "{{name}}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": ["galaxy"],
|
||||||
|
"templating": { "list": [] },
|
||||||
|
"time": { "from": "now-6h", "to": "now" },
|
||||||
|
"timepicker": {},
|
||||||
|
"title": "Galaxy — overview",
|
||||||
|
"uid": "galaxy-overview",
|
||||||
|
"version": 1,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
# Grafana dashboard provider: load every JSON under the mounted
|
||||||
|
# dashboards directory at startup (provisioned as code).
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: galaxy
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
allowUiUpdates: true
|
||||||
|
options:
|
||||||
|
path: /var/lib/grafana/dashboards
|
||||||
|
foldersFromFilesStructure: false
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
# Grafana datasources provisioned as code (dev↔prod parity). All reach
|
||||||
|
# the collectors by Docker DNS (compose service names) on
|
||||||
|
# galaxy-dev-internal.
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
access: proxy
|
||||||
|
uid: prometheus
|
||||||
|
url: http://galaxy-prometheus:9090
|
||||||
|
isDefault: true
|
||||||
|
- name: Loki
|
||||||
|
type: loki
|
||||||
|
access: proxy
|
||||||
|
uid: loki
|
||||||
|
url: http://galaxy-loki:3100
|
||||||
|
- name: Tempo
|
||||||
|
type: tempo
|
||||||
|
access: proxy
|
||||||
|
uid: tempo
|
||||||
|
url: http://galaxy-tempo:3200
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
# Single-binary Loki for the dev stack: filesystem storage, in-memory
|
||||||
|
# ring, 7-day retention. Internal-only (no host port).
|
||||||
|
auth_enabled: false
|
||||||
|
|
||||||
|
server:
|
||||||
|
http_listen_port: 3100
|
||||||
|
grpc_listen_port: 9095
|
||||||
|
log_level: warn
|
||||||
|
|
||||||
|
common:
|
||||||
|
instance_addr: 127.0.0.1
|
||||||
|
path_prefix: /loki
|
||||||
|
storage:
|
||||||
|
filesystem:
|
||||||
|
chunks_directory: /loki/chunks
|
||||||
|
rules_directory: /loki/rules
|
||||||
|
replication_factor: 1
|
||||||
|
ring:
|
||||||
|
kvstore:
|
||||||
|
store: inmemory
|
||||||
|
|
||||||
|
schema_config:
|
||||||
|
configs:
|
||||||
|
- from: 2024-01-01
|
||||||
|
store: tsdb
|
||||||
|
object_store: filesystem
|
||||||
|
schema: v13
|
||||||
|
index:
|
||||||
|
prefix: index_
|
||||||
|
period: 24h
|
||||||
|
|
||||||
|
limits_config:
|
||||||
|
retention_period: 168h
|
||||||
|
reject_old_samples: true
|
||||||
|
reject_old_samples_max_age: 168h
|
||||||
|
|
||||||
|
compactor:
|
||||||
|
working_directory: /loki/compactor
|
||||||
|
retention_enabled: true
|
||||||
|
delete_request_store: filesystem
|
||||||
|
|
||||||
|
query_range:
|
||||||
|
results_cache:
|
||||||
|
cache:
|
||||||
|
embedded_cache:
|
||||||
|
enabled: true
|
||||||
|
max_size_mb: 64
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
# Prometheus scrape config for the dev observability stack. Retention is
|
||||||
|
# a CLI flag in the compose command, not here. Targets are reached by
|
||||||
|
# Docker DNS (compose service names) on galaxy-dev-internal; nothing is
|
||||||
|
# published to the host.
|
||||||
|
global:
|
||||||
|
scrape_interval: 30s
|
||||||
|
evaluation_interval: 30s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: backend
|
||||||
|
static_configs:
|
||||||
|
- targets: ["galaxy-backend:9100"]
|
||||||
|
- job_name: gateway
|
||||||
|
static_configs:
|
||||||
|
- targets: ["galaxy-api:9191"]
|
||||||
|
- job_name: node
|
||||||
|
static_configs:
|
||||||
|
- targets: ["galaxy-node-exporter:9100"]
|
||||||
|
- job_name: cadvisor
|
||||||
|
static_configs:
|
||||||
|
- targets: ["galaxy-cadvisor:8080"]
|
||||||
|
- job_name: prometheus
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:9090"]
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# Promtail tails the dev stack's container logs via the Docker API
|
||||||
|
# (service discovery filtered to the galaxy.stack=dev-deploy label) and
|
||||||
|
# ships them to Loki. Requires the Docker socket mounted read-only.
|
||||||
|
server:
|
||||||
|
http_listen_port: 9080
|
||||||
|
grpc_listen_port: 0
|
||||||
|
log_level: warn
|
||||||
|
|
||||||
|
positions:
|
||||||
|
filename: /tmp/positions.yaml
|
||||||
|
|
||||||
|
clients:
|
||||||
|
- url: http://galaxy-loki:3100/loki/api/v1/push
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: docker
|
||||||
|
docker_sd_configs:
|
||||||
|
- host: unix:///var/run/docker.sock
|
||||||
|
refresh_interval: 15s
|
||||||
|
filters:
|
||||||
|
- name: label
|
||||||
|
values: ["galaxy.stack=dev-deploy"]
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: ["__meta_docker_container_name"]
|
||||||
|
regex: "/?(.*)"
|
||||||
|
target_label: container
|
||||||
|
- source_labels: ["__meta_docker_container_label_galaxy_game_id"]
|
||||||
|
target_label: game_id
|
||||||
|
- source_labels: ["__meta_docker_container_log_stream"]
|
||||||
|
target_label: stream
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# Single-binary Tempo for the dev stack: OTLP receivers, local block
|
||||||
|
# storage, 3-day retention. Internal-only (no host port). Backend and
|
||||||
|
# gateway push traces here over OTLP gRPC (4317).
|
||||||
|
server:
|
||||||
|
http_listen_port: 3200
|
||||||
|
log_level: warn
|
||||||
|
|
||||||
|
distributor:
|
||||||
|
receivers:
|
||||||
|
otlp:
|
||||||
|
protocols:
|
||||||
|
grpc:
|
||||||
|
endpoint: 0.0.0.0:4317
|
||||||
|
http:
|
||||||
|
endpoint: 0.0.0.0:4318
|
||||||
|
|
||||||
|
ingester:
|
||||||
|
max_block_duration: 5m
|
||||||
|
|
||||||
|
compactor:
|
||||||
|
compaction:
|
||||||
|
block_retention: 72h
|
||||||
|
|
||||||
|
storage:
|
||||||
|
trace:
|
||||||
|
backend: local
|
||||||
|
local:
|
||||||
|
path: /var/tempo/blocks
|
||||||
|
wal:
|
||||||
|
path: /var/tempo/wal
|
||||||
@@ -22,7 +22,7 @@ help:
|
|||||||
@echo " make up Build (if needed) and bring up the stack, wait until healthy"
|
@echo " make up Build (if needed) and bring up the stack, wait until healthy"
|
||||||
@echo " make down Stop compose containers, leave engines + volumes intact"
|
@echo " make down Stop compose containers, leave engines + volumes intact"
|
||||||
@echo " make rebuild Force rebuild of backend / gateway images and bring up"
|
@echo " make rebuild Force rebuild of backend / gateway images and bring up"
|
||||||
@echo " make build-engine Build the engine image $(ENGINE_IMAGE) used by the dev sandbox"
|
@echo " make build-engine Build the engine image $(ENGINE_IMAGE) used by running games"
|
||||||
@echo " make stop-engines Stop and remove only the per-game engine containers"
|
@echo " make stop-engines Stop and remove only the per-game engine containers"
|
||||||
@echo " make prune-broken-engines Remove non-running engine containers Docker can't heal (run inside 'up')"
|
@echo " make prune-broken-engines Remove non-running engine containers Docker can't heal (run inside 'up')"
|
||||||
@echo " make clean Stop everything (incl. engines) and wipe volumes + game state"
|
@echo " make clean Stop everything (incl. engines) and wipe volumes + game state"
|
||||||
@@ -37,8 +37,9 @@ help:
|
|||||||
@echo " pnpm -C ui/frontend dev"
|
@echo " pnpm -C ui/frontend dev"
|
||||||
@echo "and open http://localhost:5173 (UI) plus http://localhost:8025 (Mailpit)."
|
@echo "and open http://localhost:5173 (UI) plus http://localhost:8025 (Mailpit)."
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "Default login for the auto-provisioned dev sandbox: dev@local.test"
|
@echo "Sign in with email-OTP; the fixed login code 123456 works when"
|
||||||
@echo "(see BACKEND_DEV_SANDBOX_EMAIL in .env). Login code: 123456."
|
@echo "BACKEND_AUTH_DEV_FIXED_CODE is set in .env. No game is auto-provisioned —"
|
||||||
|
@echo "load a legacy report via the UI's DEV report loader to exercise the map."
|
||||||
|
|
||||||
up: build-engine prune-broken-engines
|
up: build-engine prune-broken-engines
|
||||||
$(COMPOSE) up -d --wait
|
$(COMPOSE) up -d --wait
|
||||||
@@ -88,12 +89,9 @@ stop-engines:
|
|||||||
# bind-mount source and leaves it stuck in `exited` / `created`
|
# bind-mount source and leaves it stuck in `exited` / `created`
|
||||||
# state. This target prunes the husks before `compose up`; the
|
# state. This target prunes the husks before `compose up`; the
|
||||||
# backend's pre-bootstrap reconciler tick (`backend/cmd/backend/main.go`)
|
# backend's pre-bootstrap reconciler tick (`backend/cmd/backend/main.go`)
|
||||||
# then cascades the orphan runtime row to `removed`, the lobby
|
# then cascades the orphan runtime row to `removed` and the lobby
|
||||||
# cancels the game, and the dev-sandbox bootstrap purges the
|
# cancels the game. Healthy `running` / `restarting` containers are
|
||||||
# cancelled tile and provisions a fresh sandbox in the same
|
# left intact so a long-lived game survives normal up/down cycles.
|
||||||
# `make up` cycle. Healthy `running` / `restarting` containers are
|
|
||||||
# left intact so a long-lived sandbox survives normal up/down
|
|
||||||
# cycles.
|
|
||||||
prune-broken-engines:
|
prune-broken-engines:
|
||||||
@ids=""; \
|
@ids=""; \
|
||||||
for cid in $$(docker ps -aq \
|
for cid in $$(docker ps -aq \
|
||||||
|
|||||||
+16
-50
@@ -78,49 +78,24 @@ To force the second path (no fast-bypass), edit
|
|||||||
`make rebuild` (or simply `docker compose up -d backend` to recreate
|
`make rebuild` (or simply `docker compose up -d backend` to recreate
|
||||||
the backend with the new env).
|
the backend with the new env).
|
||||||
|
|
||||||
## Auto-provisioned dev sandbox
|
## No auto-provisioned game
|
||||||
|
|
||||||
`make up` provisions a private game called **Dev Sandbox** owned by
|
`make up` brings up the stack with an empty lobby — there is no
|
||||||
the dev user (default `dev@local.test`). The flow is implemented in
|
auto-provisioned game. Sign in with email-OTP (the fixed dev code
|
||||||
`backend/internal/devsandbox` and runs on every backend boot when
|
`123456` works when `BACKEND_AUTH_DEV_FIXED_CODE` is set in
|
||||||
`BACKEND_DEV_SANDBOX_EMAIL` is non-empty in `tools/local-dev/.env`.
|
`tools/local-dev/.env`):
|
||||||
|
|
||||||
Bootstrap is idempotent — re-running `make up` after a `make down`
|
|
||||||
finds the existing user, dummy participants, game, and memberships
|
|
||||||
without creating duplicates. If a previous boot crashed mid-way
|
|
||||||
(game stuck in `enrollment_open` or `ready_to_start`), the next boot
|
|
||||||
resumes the lifecycle.
|
|
||||||
|
|
||||||
To log in straight into the sandbox:
|
|
||||||
|
|
||||||
1. `make -C tools/local-dev up`
|
1. `make -C tools/local-dev up`
|
||||||
2. `pnpm -C ui/frontend dev` (in another terminal)
|
2. `pnpm -C ui/frontend dev` (in another terminal)
|
||||||
3. Open <http://localhost:5173/login>, enter `dev@local.test`, then
|
3. Open <http://localhost:5173/login>, enter your email, then the dev
|
||||||
the dev code `123456`.
|
code `123456`.
|
||||||
4. The lobby shows **Dev Sandbox** in *My Games*; click in.
|
|
||||||
|
|
||||||
To disable the bootstrap, clear `BACKEND_DEV_SANDBOX_EMAIL` in
|
To exercise the map and report views without running a full game, use
|
||||||
`tools/local-dev/.env` and `docker compose up -d backend` (or
|
the UI's DEV **synthetic report loader**: convert a legacy `.REP` with
|
||||||
`make rebuild`). Existing users / games are not removed.
|
`tools/local-dev/legacy-report/` and load the resulting JSON through the
|
||||||
|
loader (see that tool's README). To play a real game, create one in the
|
||||||
Terminal sandbox games — anything in `cancelled`, `finished`, or
|
lobby and let the engine (`galaxy-engine:local-dev`, built by
|
||||||
`start_failed` — are deleted on every boot before find-or-create
|
`make build-engine`) run it.
|
||||||
runs. The cascade declared in `00001_init.sql` removes the
|
|
||||||
matching memberships, applications, invites, runtime records,
|
|
||||||
and player mappings in the same write, so the dev user's lobby
|
|
||||||
shows exactly one running tile at all times. Cancelling the
|
|
||||||
sandbox manually and running `docker compose restart backend`
|
|
||||||
(or `make rebuild`) yields a fresh game without leaving dead
|
|
||||||
tiles behind.
|
|
||||||
|
|
||||||
The bootstrap requires:
|
|
||||||
- `galaxy-engine:local-dev` Docker image (`make build-engine`).
|
|
||||||
- `BACKEND_DEV_SANDBOX_ENGINE_VERSION` parses as plain semver
|
|
||||||
(`MAJOR.MINOR.PATCH`); the default `0.1.0` is what the bootstrap
|
|
||||||
registers in the `engine_versions` row that points at the image.
|
|
||||||
- `BACKEND_DEV_SANDBOX_PLAYER_COUNT` ≥ 20 (the engine's minimum;
|
|
||||||
19 deterministic dummies fill the slots so the single real user
|
|
||||||
can start the game).
|
|
||||||
- A frozen turn schedule (`0 0 1 1 *` — once a year) so the visible
|
- A frozen turn schedule (`0 0 1 1 *` — once a year) so the visible
|
||||||
game state stays at turn 1 until you explicitly progress it.
|
game state stays at turn 1 until you explicitly progress it.
|
||||||
|
|
||||||
@@ -239,24 +214,15 @@ make status docker compose ps
|
|||||||
this in one cycle: `prune-broken-engines` (runs as part of `up`)
|
this in one cycle: `prune-broken-engines` (runs as part of `up`)
|
||||||
removes every engine container that is not in `running` /
|
removes every engine container that is not in `running` /
|
||||||
`restarting` state, the backend's pre-bootstrap reconciler tick
|
`restarting` state, the backend's pre-bootstrap reconciler tick
|
||||||
cascades the orphan runtime row to `removed`, the lobby cancels
|
cascades the orphan runtime row to `removed`, and the lobby cancels
|
||||||
the matching sandbox game, and the dev-sandbox bootstrap purges
|
the matching game. To run the cleanup by hand without restarting the
|
||||||
the cancelled tile and provisions a fresh sandbox with a brand
|
rest of the stack, `make prune-broken-engines`.
|
||||||
new state directory. To run the cleanup by hand without restarting
|
|
||||||
the rest of the stack, `make prune-broken-engines`.
|
|
||||||
|
|
||||||
The cycle relies on the backend image carrying the pre-bootstrap
|
The cycle relies on the backend image carrying the pre-bootstrap
|
||||||
reconciler tick (`backend/cmd/backend/main.go`). `make up` reuses
|
reconciler tick (`backend/cmd/backend/main.go`). `make up` reuses
|
||||||
the cached image, so after pulling this commit the first time you
|
the cached image, so after pulling this commit the first time you
|
||||||
must `make rebuild` once to bake the fix in. Future `make up`
|
must `make rebuild` once to bake the fix in. Future `make up`
|
||||||
cycles will heal in one shot.
|
cycles will heal in one shot.
|
||||||
|
|
||||||
If after the heal cycle the lobby still shows only a `cancelled`
|
|
||||||
sandbox tile and no running game, the running backend image
|
|
||||||
predates the pre-bootstrap reconciler tick — the periodic ticker
|
|
||||||
cancels the orphan after bootstrap has already returned, leaving
|
|
||||||
the lobby in the half-baked state. `make rebuild` recreates the
|
|
||||||
image and then `make up` lands a fresh sandbox.
|
|
||||||
- **`make up` reports a build error mentioning `pkg/cronutil`** —
|
- **`make up` reports a build error mentioning `pkg/cronutil`** —
|
||||||
upstream module list drifted; copy any new `pkg/<name>/` line into
|
upstream module list drifted; copy any new `pkg/<name>/` line into
|
||||||
the local-dev `backend.Dockerfile` / `gateway.Dockerfile` to match
|
the local-dev `backend.Dockerfile` / `gateway.Dockerfile` to match
|
||||||
|
|||||||
@@ -122,10 +122,6 @@ services:
|
|||||||
BACKEND_OTEL_TRACES_EXPORTER: none
|
BACKEND_OTEL_TRACES_EXPORTER: none
|
||||||
BACKEND_OTEL_METRICS_EXPORTER: none
|
BACKEND_OTEL_METRICS_EXPORTER: none
|
||||||
BACKEND_AUTH_DEV_FIXED_CODE: ${BACKEND_AUTH_DEV_FIXED_CODE:-}
|
BACKEND_AUTH_DEV_FIXED_CODE: ${BACKEND_AUTH_DEV_FIXED_CODE:-}
|
||||||
BACKEND_DEV_SANDBOX_EMAIL: ${BACKEND_DEV_SANDBOX_EMAIL:-}
|
|
||||||
BACKEND_DEV_SANDBOX_ENGINE_IMAGE: ${BACKEND_DEV_SANDBOX_ENGINE_IMAGE:-}
|
|
||||||
BACKEND_DEV_SANDBOX_ENGINE_VERSION: ${BACKEND_DEV_SANDBOX_ENGINE_VERSION:-}
|
|
||||||
BACKEND_DEV_SANDBOX_PLAYER_COUNT: ${BACKEND_DEV_SANDBOX_PLAYER_COUNT:-}
|
|
||||||
volumes:
|
volumes:
|
||||||
- /var/run/docker.sock:/var/run/docker.sock
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
# Per-game state directories live under the same absolute path
|
# Per-game state directories live under the same absolute path
|
||||||
|
|||||||
@@ -85,16 +85,16 @@ report to fetch. Two alternatives were rejected:
|
|||||||
- a brand-new `user.games.state` message — adds a full wire-flow
|
- a brand-new `user.games.state` message — adds a full wire-flow
|
||||||
(fbs schema, transcoder, gateway routing, backend handler) for a
|
(fbs schema, transcoder, gateway routing, backend handler) for a
|
||||||
one-field response;
|
one-field response;
|
||||||
- hard-coding `turn=0` for all games — works for the dev sandbox
|
- hard-coding `turn=0` for all games — works for a synthetic report
|
||||||
(which never advances past turn zero) but renders the initial
|
loaded at turn zero but mis-renders the initial state for any real
|
||||||
state for any real game past turn zero.
|
game past turn zero.
|
||||||
|
|
||||||
Extending `GameSummary` reuses the existing lobby pipeline; the
|
Extending `GameSummary` reuses the existing lobby pipeline; the
|
||||||
backend already tracks `current_turn` in its runtime projection
|
backend already tracks `current_turn` in its runtime projection
|
||||||
(`backend/internal/server/handlers_user_lobby_helpers.go`
|
(`backend/internal/server/handlers_user_lobby_helpers.go`
|
||||||
`gameSummaryToWire` reads it from `g.RuntimeSnapshot.CurrentTurn`).
|
`gameSummaryToWire` reads it from `g.RuntimeSnapshot.CurrentTurn`).
|
||||||
The `current_turn` field defaults to zero on the FB side, so existing
|
The `current_turn` field defaults to zero on the FB side, so existing
|
||||||
tests and the dev sandbox flow continue to work unchanged.
|
tests and the synthetic-report flow continue to work unchanged.
|
||||||
|
|
||||||
## State binding
|
## State binding
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
// DEV-only synthetic-report loader. Backs the "Load synthetic report"
|
// DEV-only synthetic-report loader. Backs the "Load synthetic report"
|
||||||
// affordance on the lobby (visible behind `import.meta.env.DEV`) and
|
// affordance on the lobby (visible when the build-time flag
|
||||||
// the in-game shell layout's bypass for the synthetic game id range.
|
// `VITE_GALAXY_DEV_AFFORDANCES === "true"` — the dev and dev-deploy
|
||||||
|
// bundles; stripped from prod) and the in-game shell layout's bypass
|
||||||
|
// for the synthetic game id range.
|
||||||
//
|
//
|
||||||
// The accepted JSON shape mirrors `pkg/model/report.Report` as
|
// The accepted JSON shape mirrors `pkg/model/report.Report` as
|
||||||
// emitted by `tools/local-dev/legacy-report/cmd/legacy-report-to-json`.
|
// emitted by `tools/local-dev/legacy-report/cmd/legacy-report-to-json`.
|
||||||
|
|||||||
Reference in New Issue
Block a user