feat: runtime manager
This commit is contained in:
@@ -0,0 +1,226 @@
|
||||
asyncapi: 3.1.0
|
||||
info:
|
||||
title: Galaxy Runtime Jobs Stream Contract
|
||||
version: 1.0.0
|
||||
description: |
|
||||
Stable Redis Streams contract carrying runtime jobs between
|
||||
`Game Lobby` and `Runtime Manager`.
|
||||
|
||||
`Game Lobby` is the sole producer for `runtime:start_jobs` and
|
||||
`runtime:stop_jobs`. `Runtime Manager` consumes both, executes the
|
||||
Docker work, and publishes one outcome per job to `runtime:job_results`,
|
||||
which is consumed by `Game Lobby`'s runtime-job-result worker.
|
||||
|
||||
Replay safety:
|
||||
- duplicate start jobs for an already-running game with the same
|
||||
`image_ref` produce a `success` job result with
|
||||
`error_code=replay_no_op`;
|
||||
- duplicate stop jobs for an already-stopped or already-removed game
|
||||
produce a `success` job result with `error_code=replay_no_op`.
|
||||
|
||||
The `reason` enum on `runtime:stop_jobs` is fixed in this contract.
|
||||
Adding a new value requires a contract bump and a coordinated
|
||||
Lobby/Runtime Manager change.
|
||||
channels:
|
||||
startJobs:
|
||||
address: runtime:start_jobs
|
||||
messages:
|
||||
runtimeStartJob:
|
||||
$ref: '#/components/messages/RuntimeStartJob'
|
||||
stopJobs:
|
||||
address: runtime:stop_jobs
|
||||
messages:
|
||||
runtimeStopJob:
|
||||
$ref: '#/components/messages/RuntimeStopJob'
|
||||
jobResults:
|
||||
address: runtime:job_results
|
||||
messages:
|
||||
runtimeJobResult:
|
||||
$ref: '#/components/messages/RuntimeJobResult'
|
||||
operations:
|
||||
consumeStartJob:
|
||||
action: receive
|
||||
summary: Receive one start job from Game Lobby and run a container.
|
||||
channel:
|
||||
$ref: '#/channels/startJobs'
|
||||
messages:
|
||||
- $ref: '#/channels/startJobs/messages/runtimeStartJob'
|
||||
consumeStopJob:
|
||||
action: receive
|
||||
summary: Receive one stop job from Game Lobby and stop a container.
|
||||
channel:
|
||||
$ref: '#/channels/stopJobs'
|
||||
messages:
|
||||
- $ref: '#/channels/stopJobs/messages/runtimeStopJob'
|
||||
publishJobResult:
|
||||
action: send
|
||||
summary: Publish one runtime job outcome for Game Lobby.
|
||||
channel:
|
||||
$ref: '#/channels/jobResults'
|
||||
messages:
|
||||
- $ref: '#/channels/jobResults/messages/runtimeJobResult'
|
||||
components:
|
||||
messages:
|
||||
RuntimeStartJob:
|
||||
name: RuntimeStartJob
|
||||
title: Runtime start job
|
||||
summary: Lobby request to start one game engine container.
|
||||
payload:
|
||||
$ref: '#/components/schemas/RuntimeStartJobPayload'
|
||||
examples:
|
||||
- name: startJob
|
||||
summary: Start a game engine container with a producer-resolved image_ref.
|
||||
payload:
|
||||
game_id: game-123
|
||||
image_ref: registry.example.com/galaxy/game:1.4.7
|
||||
requested_at_ms: 1775121700000
|
||||
RuntimeStopJob:
|
||||
name: RuntimeStopJob
|
||||
title: Runtime stop job
|
||||
summary: Lobby request to stop one game engine container.
|
||||
payload:
|
||||
$ref: '#/components/schemas/RuntimeStopJobPayload'
|
||||
examples:
|
||||
- name: cancelled
|
||||
summary: Stop the engine because the game was cancelled.
|
||||
payload:
|
||||
game_id: game-123
|
||||
reason: cancelled
|
||||
requested_at_ms: 1775121800000
|
||||
- name: orphanCleanup
|
||||
summary: Stop an engine whose Lobby metadata persistence failed.
|
||||
payload:
|
||||
game_id: game-456
|
||||
reason: orphan_cleanup
|
||||
requested_at_ms: 1775121810000
|
||||
RuntimeJobResult:
|
||||
name: RuntimeJobResult
|
||||
title: Runtime job result
|
||||
summary: Outcome of one start or stop job.
|
||||
payload:
|
||||
$ref: '#/components/schemas/RuntimeJobResultPayload'
|
||||
examples:
|
||||
- name: startSuccess
|
||||
summary: Successful start, container_id and engine_endpoint are populated.
|
||||
payload:
|
||||
game_id: game-123
|
||||
outcome: success
|
||||
container_id: 7c2b5d1a4f6e
|
||||
engine_endpoint: http://galaxy-game-game-123:8080
|
||||
error_code: ""
|
||||
error_message: ""
|
||||
- name: imagePullFailed
|
||||
summary: Failed start due to an image pull error.
|
||||
payload:
|
||||
game_id: game-789
|
||||
outcome: failure
|
||||
container_id: ""
|
||||
engine_endpoint: ""
|
||||
error_code: image_pull_failed
|
||||
error_message: "manifest unknown"
|
||||
- name: replayNoOp
|
||||
summary: Idempotent replay; the job was a no-op.
|
||||
payload:
|
||||
game_id: game-123
|
||||
outcome: success
|
||||
container_id: 7c2b5d1a4f6e
|
||||
engine_endpoint: http://galaxy-game-game-123:8080
|
||||
error_code: replay_no_op
|
||||
error_message: ""
|
||||
schemas:
|
||||
RuntimeStartJobPayload:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- game_id
|
||||
- image_ref
|
||||
- requested_at_ms
|
||||
properties:
|
||||
game_id:
|
||||
type: string
|
||||
description: Opaque stable game identifier owned by Lobby.
|
||||
image_ref:
|
||||
type: string
|
||||
description: Docker reference resolved by Lobby from LOBBY_ENGINE_IMAGE_TEMPLATE.
|
||||
requested_at_ms:
|
||||
type: integer
|
||||
format: int64
|
||||
description: UTC milliseconds; used for diagnostics, not authoritative.
|
||||
RuntimeStopJobPayload:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- game_id
|
||||
- reason
|
||||
- requested_at_ms
|
||||
properties:
|
||||
game_id:
|
||||
type: string
|
||||
description: Opaque stable game identifier owned by Lobby.
|
||||
reason:
|
||||
$ref: '#/components/schemas/StopReason'
|
||||
requested_at_ms:
|
||||
type: integer
|
||||
format: int64
|
||||
description: UTC milliseconds; used for diagnostics, not authoritative.
|
||||
RuntimeJobResultPayload:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- game_id
|
||||
- outcome
|
||||
- container_id
|
||||
- engine_endpoint
|
||||
- error_code
|
||||
- error_message
|
||||
properties:
|
||||
game_id:
|
||||
type: string
|
||||
description: Opaque stable game identifier matching the originating job.
|
||||
outcome:
|
||||
type: string
|
||||
enum:
|
||||
- success
|
||||
- failure
|
||||
description: High-level outcome of the runtime job.
|
||||
container_id:
|
||||
type: string
|
||||
description: Docker container id of the engine; populated on success, empty on failure.
|
||||
engine_endpoint:
|
||||
type: string
|
||||
description: Stable engine URL `http://galaxy-game-{game_id}:8080`; populated on success, empty on failure.
|
||||
error_code:
|
||||
$ref: '#/components/schemas/ErrorCode'
|
||||
error_message:
|
||||
type: string
|
||||
description: Operator-readable detail; empty when not applicable.
|
||||
StopReason:
|
||||
type: string
|
||||
enum:
|
||||
- orphan_cleanup
|
||||
- cancelled
|
||||
- finished
|
||||
- admin_request
|
||||
- timeout
|
||||
description: Reason value carried by every runtime:stop_jobs envelope.
|
||||
ErrorCode:
|
||||
type: string
|
||||
enum:
|
||||
- ""
|
||||
- invalid_request
|
||||
- not_found
|
||||
- conflict
|
||||
- service_unavailable
|
||||
- internal_error
|
||||
- image_pull_failed
|
||||
- image_ref_not_semver
|
||||
- semver_patch_only
|
||||
- container_start_failed
|
||||
- start_config_invalid
|
||||
- docker_unavailable
|
||||
- replay_no_op
|
||||
description: |
|
||||
Stable error code identical to the internal REST contract. The empty
|
||||
string is a valid value for successful job results that did not
|
||||
produce a code (the field is required to be present so consumers
|
||||
can rely on the schema).
|
||||
Reference in New Issue
Block a user