feat: runtime manager

This commit is contained in:
Ilia Denisov
2026-04-28 20:39:18 +02:00
committed by GitHub
parent e0a99b346b
commit a7cee15115
289 changed files with 45660 additions and 2207 deletions
+226
View File
@@ -0,0 +1,226 @@
asyncapi: 3.1.0
info:
title: Galaxy Runtime Jobs Stream Contract
version: 1.0.0
description: |
Stable Redis Streams contract carrying runtime jobs between
`Game Lobby` and `Runtime Manager`.
`Game Lobby` is the sole producer for `runtime:start_jobs` and
`runtime:stop_jobs`. `Runtime Manager` consumes both, executes the
Docker work, and publishes one outcome per job to `runtime:job_results`,
which is consumed by `Game Lobby`'s runtime-job-result worker.
Replay safety:
- duplicate start jobs for an already-running game with the same
`image_ref` produce a `success` job result with
`error_code=replay_no_op`;
- duplicate stop jobs for an already-stopped or already-removed game
produce a `success` job result with `error_code=replay_no_op`.
The `reason` enum on `runtime:stop_jobs` is fixed in this contract.
Adding a new value requires a contract bump and a coordinated
Lobby/Runtime Manager change.
channels:
startJobs:
address: runtime:start_jobs
messages:
runtimeStartJob:
$ref: '#/components/messages/RuntimeStartJob'
stopJobs:
address: runtime:stop_jobs
messages:
runtimeStopJob:
$ref: '#/components/messages/RuntimeStopJob'
jobResults:
address: runtime:job_results
messages:
runtimeJobResult:
$ref: '#/components/messages/RuntimeJobResult'
operations:
consumeStartJob:
action: receive
summary: Receive one start job from Game Lobby and run a container.
channel:
$ref: '#/channels/startJobs'
messages:
- $ref: '#/channels/startJobs/messages/runtimeStartJob'
consumeStopJob:
action: receive
summary: Receive one stop job from Game Lobby and stop a container.
channel:
$ref: '#/channels/stopJobs'
messages:
- $ref: '#/channels/stopJobs/messages/runtimeStopJob'
publishJobResult:
action: send
summary: Publish one runtime job outcome for Game Lobby.
channel:
$ref: '#/channels/jobResults'
messages:
- $ref: '#/channels/jobResults/messages/runtimeJobResult'
components:
messages:
RuntimeStartJob:
name: RuntimeStartJob
title: Runtime start job
summary: Lobby request to start one game engine container.
payload:
$ref: '#/components/schemas/RuntimeStartJobPayload'
examples:
- name: startJob
summary: Start a game engine container with a producer-resolved image_ref.
payload:
game_id: game-123
image_ref: registry.example.com/galaxy/game:1.4.7
requested_at_ms: 1775121700000
RuntimeStopJob:
name: RuntimeStopJob
title: Runtime stop job
summary: Lobby request to stop one game engine container.
payload:
$ref: '#/components/schemas/RuntimeStopJobPayload'
examples:
- name: cancelled
summary: Stop the engine because the game was cancelled.
payload:
game_id: game-123
reason: cancelled
requested_at_ms: 1775121800000
- name: orphanCleanup
summary: Stop an engine whose Lobby metadata persistence failed.
payload:
game_id: game-456
reason: orphan_cleanup
requested_at_ms: 1775121810000
RuntimeJobResult:
name: RuntimeJobResult
title: Runtime job result
summary: Outcome of one start or stop job.
payload:
$ref: '#/components/schemas/RuntimeJobResultPayload'
examples:
- name: startSuccess
summary: Successful start, container_id and engine_endpoint are populated.
payload:
game_id: game-123
outcome: success
container_id: 7c2b5d1a4f6e
engine_endpoint: http://galaxy-game-game-123:8080
error_code: ""
error_message: ""
- name: imagePullFailed
summary: Failed start due to an image pull error.
payload:
game_id: game-789
outcome: failure
container_id: ""
engine_endpoint: ""
error_code: image_pull_failed
error_message: "manifest unknown"
- name: replayNoOp
summary: Idempotent replay; the job was a no-op.
payload:
game_id: game-123
outcome: success
container_id: 7c2b5d1a4f6e
engine_endpoint: http://galaxy-game-game-123:8080
error_code: replay_no_op
error_message: ""
schemas:
RuntimeStartJobPayload:
type: object
additionalProperties: false
required:
- game_id
- image_ref
- requested_at_ms
properties:
game_id:
type: string
description: Opaque stable game identifier owned by Lobby.
image_ref:
type: string
description: Docker reference resolved by Lobby from LOBBY_ENGINE_IMAGE_TEMPLATE.
requested_at_ms:
type: integer
format: int64
description: UTC milliseconds; used for diagnostics, not authoritative.
RuntimeStopJobPayload:
type: object
additionalProperties: false
required:
- game_id
- reason
- requested_at_ms
properties:
game_id:
type: string
description: Opaque stable game identifier owned by Lobby.
reason:
$ref: '#/components/schemas/StopReason'
requested_at_ms:
type: integer
format: int64
description: UTC milliseconds; used for diagnostics, not authoritative.
RuntimeJobResultPayload:
type: object
additionalProperties: false
required:
- game_id
- outcome
- container_id
- engine_endpoint
- error_code
- error_message
properties:
game_id:
type: string
description: Opaque stable game identifier matching the originating job.
outcome:
type: string
enum:
- success
- failure
description: High-level outcome of the runtime job.
container_id:
type: string
description: Docker container id of the engine; populated on success, empty on failure.
engine_endpoint:
type: string
description: Stable engine URL `http://galaxy-game-{game_id}:8080`; populated on success, empty on failure.
error_code:
$ref: '#/components/schemas/ErrorCode'
error_message:
type: string
description: Operator-readable detail; empty when not applicable.
StopReason:
type: string
enum:
- orphan_cleanup
- cancelled
- finished
- admin_request
- timeout
description: Reason value carried by every runtime:stop_jobs envelope.
ErrorCode:
type: string
enum:
- ""
- invalid_request
- not_found
- conflict
- service_unavailable
- internal_error
- image_pull_failed
- image_ref_not_semver
- semver_patch_only
- container_start_failed
- start_config_invalid
- docker_unavailable
- replay_no_op
description: |
Stable error code identical to the internal REST contract. The empty
string is a valid value for successful job results that did not
produce a code (the field is required to be present so consumers
can rely on the schema).