ui/phase-25: backend turn-cutoff guard + auto-pause + UI sync protocol

Backend now owns the turn-cutoff and pause guards the order tab
relies on: the scheduler flips runtime_status between
generation_in_progress and running around every engine tick, a
failed tick auto-pauses the game through OnRuntimeSnapshot, and a
new game.paused notification kind fans out alongside
game.turn.ready. The user-games handlers reject submits with
HTTP 409 turn_already_closed or game_paused depending on the
runtime state.

UI delegates auto-sync to a new OrderQueue: offline detection,
single retry on reconnect, conflict / paused classification.
OrderDraftStore surfaces conflictBanner / pausedBanner runes,
clears them on local mutation or on a game.turn.ready push via
resetForNewTurn. The order tab renders the matching banners and
the new conflict per-row badge; i18n bundles cover en + ru.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ilia Denisov
2026-05-11 22:00:16 +02:00
parent bbdcc36e05
commit 2ca47eb4df
35 changed files with 2539 additions and 143 deletions
+19
View File
@@ -42,4 +42,23 @@ var (
// ErrShutdown means the runtime service has stopped accepting
// work because the parent context was cancelled.
ErrShutdown = errors.New("runtime: shutting down")
// ErrTurnAlreadyClosed reports that the runtime is currently
// producing a turn — runtime status is `generation_in_progress`
// — and the engine is not accepting writes for the closing
// turn. Handlers map this to HTTP 409 with httperr code
// `turn_already_closed`; the UI shows a conflict banner and
// waits for the next `game.turn.ready` push.
ErrTurnAlreadyClosed = errors.New("runtime: turn already closed")
// ErrGamePaused reports that the game is not in a state that
// accepts user-games commands or orders: the runtime row
// carries `paused = true`, or the runtime status lands on any
// terminal value (`engine_unreachable`, `generation_failed`,
// `stopped`, `finished`, `removed`), or the game has not yet
// finished bootstrapping (`starting`). Handlers map this to
// HTTP 409 with httperr code `game_paused`; the UI surfaces a
// pause banner and waits for an admin resume or a fresh
// snapshot.
ErrGamePaused = errors.New("runtime: game paused")
)
@@ -0,0 +1,82 @@
package runtime
import (
"errors"
"testing"
)
// TestOrdersAcceptStatus pins down the Phase 25 pre-check that
// gates the user-games command/order handlers against the runtime
// record. The decision must distinguish a turn cutoff (engine is
// producing) from a paused game so the UI can surface the right
// banner; all other non-running runtime statuses collapse into
// `ErrGamePaused`.
func TestOrdersAcceptStatus(t *testing.T) {
t.Parallel()
tests := []struct {
name string
rec RuntimeRecord
want error
}{
{
name: "running and not paused accepts orders",
rec: RuntimeRecord{Status: RuntimeStatusRunning, Paused: false},
want: nil,
},
{
name: "running but paused returns game paused",
rec: RuntimeRecord{Status: RuntimeStatusRunning, Paused: true},
want: ErrGamePaused,
},
{
name: "generation in progress returns turn already closed",
rec: RuntimeRecord{Status: RuntimeStatusGenerationInProgress},
want: ErrTurnAlreadyClosed,
},
{
name: "generation failed returns game paused",
rec: RuntimeRecord{Status: RuntimeStatusGenerationFailed},
want: ErrGamePaused,
},
{
name: "engine unreachable returns game paused",
rec: RuntimeRecord{Status: RuntimeStatusEngineUnreachable},
want: ErrGamePaused,
},
{
name: "stopped returns game paused",
rec: RuntimeRecord{Status: RuntimeStatusStopped},
want: ErrGamePaused,
},
{
name: "finished returns game paused",
rec: RuntimeRecord{Status: RuntimeStatusFinished},
want: ErrGamePaused,
},
{
name: "removed returns game paused",
rec: RuntimeRecord{Status: RuntimeStatusRemoved},
want: ErrGamePaused,
},
{
name: "starting returns game paused",
rec: RuntimeRecord{Status: RuntimeStatusStarting},
want: ErrGamePaused,
},
{
name: "paused takes precedence over generation in progress",
rec: RuntimeRecord{Status: RuntimeStatusGenerationInProgress, Paused: true},
want: ErrGamePaused,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
got := OrdersAcceptStatus(tt.rec)
if !errors.Is(got, tt.want) {
t.Errorf("OrdersAcceptStatus = %v, want %v", got, tt.want)
}
})
}
}
+38 -1
View File
@@ -7,6 +7,7 @@ import (
"time"
"galaxy/backend/internal/dockerclient"
"galaxy/backend/internal/engineclient"
"galaxy/cronutil"
"github.com/google/uuid"
@@ -213,6 +214,22 @@ func (sch *Scheduler) loop(ctx context.Context, rec RuntimeRecord, done chan str
// tick runs one engine /admin/turn call under the per-game mutex,
// publishes the resulting snapshot, and clears `skip_next_tick`.
//
// Phase 25 wraps the engine call between two runtime-status flips so
// the backend order handler can reject late submits while the engine
// is producing:
//
// - before `Engine.Turn`: runtime status moves to
// `generation_in_progress`; the loop's running-only guard tolerates
// this because the flip back happens inside the same tick.
// - on success: runtime status moves back to `running` (unless the
// engine reports `finished`, in which case `publishSnapshot` has
// already promoted the row to `finished`).
// - on error: runtime status moves to `generation_failed` (engine
// validation failure) or `engine_unreachable` (transport / 5xx).
// The matching snapshot is forwarded to lobby through
// `publishFailureSnapshot` so lobby can flip the game to `paused`
// and emit `game.paused`.
func (sch *Scheduler) tick(ctx context.Context, rec RuntimeRecord) error {
mu := sch.svc.gameLock(rec.GameID)
if !mu.TryLock() {
@@ -224,10 +241,24 @@ func (sch *Scheduler) tick(ctx context.Context, rec RuntimeRecord) error {
if err != nil {
return err
}
if _, err := sch.svc.transitionRuntimeStatus(ctx, rec.GameID, RuntimeStatusGenerationInProgress, ""); err != nil {
sch.svc.completeOperation(ctx, op, err)
return err
}
state, err := sch.svc.deps.Engine.Turn(ctx, rec.EngineEndpoint)
if err != nil {
sch.svc.completeOperation(ctx, op, err)
_, _ = sch.svc.transitionRuntimeStatus(ctx, rec.GameID, RuntimeStatusEngineUnreachable, "")
failureStatus := RuntimeStatusEngineUnreachable
if errors.Is(err, engineclient.ErrEngineValidation) {
failureStatus = RuntimeStatusGenerationFailed
}
_, _ = sch.svc.transitionRuntimeStatus(ctx, rec.GameID, failureStatus, "down")
if pubErr := sch.svc.publishFailureSnapshot(ctx, rec.GameID, failureStatus); pubErr != nil {
sch.svc.deps.Logger.Warn("publish failure snapshot to lobby",
zap.String("game_id", rec.GameID.String()),
zap.String("runtime_status", failureStatus),
zap.Error(pubErr))
}
// On engine unreachable, also clear skip_next_tick so the next
// real tick can start fresh.
_ = sch.clearSkipFlag(ctx, rec.GameID)
@@ -244,6 +275,12 @@ func (sch *Scheduler) tick(ctx context.Context, rec RuntimeRecord) error {
sch.svc.completeOperation(ctx, op, err)
return err
}
if !state.Finished {
// `publishSnapshot` patches CurrentTurn / EngineHealth but does
// not reset the status column; reopen the orders window here so
// the next loop iteration finds the runtime back in `running`.
_, _ = sch.svc.transitionRuntimeStatus(ctx, rec.GameID, RuntimeStatusRunning, "ok")
}
sch.svc.completeOperation(ctx, op, nil)
_ = sch.clearSkipFlag(ctx, rec.GameID)
return nil
+78
View File
@@ -257,6 +257,57 @@ func (s *Service) ResolvePlayerMapping(ctx context.Context, gameID, userID uuid.
return s.deps.Store.LoadPlayerMapping(ctx, gameID, userID)
}
// CheckOrdersAccept verifies that the runtime is in a state that
// accepts user-games commands and orders. It is called by the user
// game-proxy handlers (`Commands`, `Orders`) before forwarding to
// engine, so the backend's turn-cutoff and pause guards run before
// network traffic leaves the host. The decision itself lives in the
// pure helper `OrdersAcceptStatus` so it can be unit-tested without
// constructing a full Service.
//
// A missing runtime row is surfaced as `ErrNotFound` so the handler
// keeps its existing 404 behaviour.
func (s *Service) CheckOrdersAccept(ctx context.Context, gameID uuid.UUID) error {
rec, err := s.GetRuntime(ctx, gameID)
if err != nil {
return err
}
return OrdersAcceptStatus(rec)
}
// OrdersAcceptStatus inspects a runtime record and returns the
// matching sentinel for the user-games order/command pre-check:
//
// - `runtime_status = generation_in_progress` → `ErrTurnAlreadyClosed`.
// The cron-driven `Scheduler.tick` has flipped the row before
// calling the engine. The order window reopens once the tick
// completes successfully.
//
// - `runtime_status ∈ {engine_unreachable, generation_failed,
// stopped, finished, removed, starting}` → `ErrGamePaused`.
// The game is not in a state that accepts writes; the lobby
// state machine has either already flipped the game to
// `paused` / `finished` or is still bootstrapping.
//
// - `runtime.Paused = true` → `ErrGamePaused`. The lobby admin
// paused the game explicitly.
//
// - `runtime_status = running` and `Paused = false` → nil
// (forward).
func OrdersAcceptStatus(rec RuntimeRecord) error {
if rec.Paused {
return ErrGamePaused
}
switch rec.Status {
case RuntimeStatusRunning:
return nil
case RuntimeStatusGenerationInProgress:
return ErrTurnAlreadyClosed
default:
return ErrGamePaused
}
}
// EngineEndpoint returns the engine endpoint URL for gameID. Used by
// the user game-proxy handlers.
func (s *Service) EngineEndpoint(ctx context.Context, gameID uuid.UUID) (string, error) {
@@ -812,6 +863,33 @@ func (s *Service) publishSnapshot(ctx context.Context, gameID uuid.UUID, state r
return nil
}
// publishFailureSnapshot forwards a runtime-failure observation to
// lobby so the game lifecycle can react (e.g. flipping `running` to
// `paused` on `engine_unreachable` / `generation_failed` per Phase
// 25). The snapshot carries the unchanged `current_turn` because no
// new turn has been produced; lobby uses the turn number to anchor
// the `game.paused` idempotency key.
//
// The call is best-effort: lobby errors are returned to the caller
// (the scheduler tick) so the warn-level logging stays in one place.
// A missing runtime cache entry (e.g. the row was just removed by
// the reconciler) collapses into a silent no-op.
func (s *Service) publishFailureSnapshot(ctx context.Context, gameID uuid.UUID, runtimeStatus string) error {
if s.deps.Lobby == nil {
return nil
}
rec, ok := s.deps.Cache.GetRuntime(gameID)
if !ok {
return nil
}
return s.deps.Lobby.OnRuntimeSnapshot(ctx, gameID, LobbySnapshot{
CurrentTurn: rec.CurrentTurn,
RuntimeStatus: runtimeStatus,
EngineHealth: "down",
ObservedAt: s.deps.Now().UTC(),
})
}
// transitionRuntimeStatus updates the status / engine_health columns
// and refreshes the cache.
func (s *Service) transitionRuntimeStatus(ctx context.Context, gameID uuid.UUID, status, health string) (RuntimeRecord, error) {