feat: runtime manager
This commit is contained in:
@@ -0,0 +1,534 @@
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: Galaxy Runtime Manager Internal REST API
|
||||
version: v1
|
||||
description: |
|
||||
This specification documents the internal trusted REST contract of
|
||||
`galaxy/rtmanager` served on `RTMANAGER_INTERNAL_HTTP_ADDR`
|
||||
(default `:8096`).
|
||||
|
||||
The listener is not reachable from the public internet. Two caller
|
||||
classes use it: `Game Master` (inspect / restart / patch / stop /
|
||||
cleanup) and `Admin Service` (operational tooling, including
|
||||
force-cleanup). Runtime Manager treats every caller on this port as
|
||||
trusted and performs no user-level authorization; downstream services
|
||||
rely on network segmentation. There is no `X-User-ID` header
|
||||
contract.
|
||||
|
||||
Transport rules:
|
||||
- request bodies are strict JSON only; unknown fields are rejected
|
||||
with `invalid_request`;
|
||||
- error responses use `{ "error": { "code", "message" } }`, identical
|
||||
to the Lobby contract;
|
||||
- stable error codes are: `invalid_request`, `not_found`, `conflict`,
|
||||
`service_unavailable`, `internal_error`, `image_pull_failed`,
|
||||
`image_ref_not_semver`, `semver_patch_only`,
|
||||
`container_start_failed`, `start_config_invalid`,
|
||||
`docker_unavailable`, `replay_no_op`.
|
||||
|
||||
Caller identification:
|
||||
- the optional `X-Galaxy-Caller` request header carries the calling
|
||||
service identity (`gm` for `Game Master`, `admin` for `Admin
|
||||
Service`). Runtime Manager records the value as `op_source` in
|
||||
the `operation_log` (`gm_rest` or `admin_rest`). When the header
|
||||
is missing or carries an unknown value, Runtime Manager defaults
|
||||
to `op_source = admin_rest`.
|
||||
servers:
|
||||
- url: http://localhost:8096
|
||||
description: Default local internal listener for Runtime Manager.
|
||||
tags:
|
||||
- name: Runtimes
|
||||
description: Runtime lifecycle endpoints called by Game Master and Admin Service.
|
||||
- name: Probes
|
||||
description: Health and readiness probes.
|
||||
paths:
|
||||
/healthz:
|
||||
get:
|
||||
tags:
|
||||
- Probes
|
||||
operationId: internalHealthz
|
||||
summary: Internal listener health probe
|
||||
responses:
|
||||
"200":
|
||||
description: Service is alive.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ProbeResponse"
|
||||
examples:
|
||||
ok:
|
||||
value:
|
||||
status: ok
|
||||
/readyz:
|
||||
get:
|
||||
tags:
|
||||
- Probes
|
||||
operationId: internalReadyz
|
||||
summary: Internal listener readiness probe
|
||||
description: |
|
||||
Returns `200` only when the PostgreSQL primary, Redis master, and
|
||||
Docker daemon are reachable and the configured Docker network
|
||||
exists. Returns `503` with the standard error envelope otherwise.
|
||||
responses:
|
||||
"200":
|
||||
description: Service is ready to serve traffic.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ProbeResponse"
|
||||
examples:
|
||||
ready:
|
||||
value:
|
||||
status: ready
|
||||
"503":
|
||||
$ref: "#/components/responses/ServiceUnavailableError"
|
||||
/api/v1/internal/runtimes:
|
||||
get:
|
||||
tags:
|
||||
- Runtimes
|
||||
operationId: internalListRuntimes
|
||||
summary: List all known runtime records
|
||||
description: |
|
||||
Returns the full list of runtime records known to Runtime Manager.
|
||||
Pagination is not supported in v1 — the working set is bounded by
|
||||
the number of games tracked by Lobby and is small enough to return
|
||||
in one response.
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/XGalaxyCallerHeader"
|
||||
responses:
|
||||
"200":
|
||||
description: All runtime records.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RuntimesList"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
"503":
|
||||
$ref: "#/components/responses/ServiceUnavailableError"
|
||||
/api/v1/internal/runtimes/{game_id}:
|
||||
get:
|
||||
tags:
|
||||
- Runtimes
|
||||
operationId: internalGetRuntime
|
||||
summary: Get one runtime record by game id
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/GameIDPath"
|
||||
- $ref: "#/components/parameters/XGalaxyCallerHeader"
|
||||
responses:
|
||||
"200":
|
||||
description: Runtime record for the game.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RuntimeRecord"
|
||||
"404":
|
||||
$ref: "#/components/responses/NotFoundError"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
"503":
|
||||
$ref: "#/components/responses/ServiceUnavailableError"
|
||||
/api/v1/internal/runtimes/{game_id}/start:
|
||||
post:
|
||||
tags:
|
||||
- Runtimes
|
||||
operationId: internalStartRuntime
|
||||
summary: Start a game engine container
|
||||
description: |
|
||||
Pulls the supplied `image_ref` per the configured pull policy and
|
||||
creates the engine container. Idempotent: a re-start with the same
|
||||
`image_ref` for an already-running record returns `200` with the
|
||||
current record and `error_code=replay_no_op` recorded in the
|
||||
operation log.
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/GameIDPath"
|
||||
- $ref: "#/components/parameters/XGalaxyCallerHeader"
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/StartRequest"
|
||||
responses:
|
||||
"200":
|
||||
description: Runtime record after the start operation.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RuntimeRecord"
|
||||
"400":
|
||||
$ref: "#/components/responses/InvalidRequestError"
|
||||
"409":
|
||||
$ref: "#/components/responses/ConflictError"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
"503":
|
||||
$ref: "#/components/responses/ServiceUnavailableError"
|
||||
/api/v1/internal/runtimes/{game_id}/stop:
|
||||
post:
|
||||
tags:
|
||||
- Runtimes
|
||||
operationId: internalStopRuntime
|
||||
summary: Stop a running game engine container
|
||||
description: |
|
||||
Issues `docker stop` with the configured timeout. Idempotent: stop
|
||||
on a record that is already `stopped` or `removed` returns
|
||||
success with `error_code=replay_no_op` recorded in the operation
|
||||
log.
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/GameIDPath"
|
||||
- $ref: "#/components/parameters/XGalaxyCallerHeader"
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/StopRequest"
|
||||
responses:
|
||||
"200":
|
||||
description: Runtime record after the stop operation.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RuntimeRecord"
|
||||
"400":
|
||||
$ref: "#/components/responses/InvalidRequestError"
|
||||
"404":
|
||||
$ref: "#/components/responses/NotFoundError"
|
||||
"409":
|
||||
$ref: "#/components/responses/ConflictError"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
"503":
|
||||
$ref: "#/components/responses/ServiceUnavailableError"
|
||||
/api/v1/internal/runtimes/{game_id}/restart:
|
||||
post:
|
||||
tags:
|
||||
- Runtimes
|
||||
operationId: internalRestartRuntime
|
||||
summary: Recreate a game engine container with the same image
|
||||
description: |
|
||||
Stops, removes, and re-runs the container with the current
|
||||
`image_ref`. The container id changes; the engine endpoint stays
|
||||
stable.
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/GameIDPath"
|
||||
- $ref: "#/components/parameters/XGalaxyCallerHeader"
|
||||
responses:
|
||||
"200":
|
||||
description: Runtime record after the restart operation.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RuntimeRecord"
|
||||
"404":
|
||||
$ref: "#/components/responses/NotFoundError"
|
||||
"409":
|
||||
$ref: "#/components/responses/ConflictError"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
"503":
|
||||
$ref: "#/components/responses/ServiceUnavailableError"
|
||||
/api/v1/internal/runtimes/{game_id}/patch:
|
||||
post:
|
||||
tags:
|
||||
- Runtimes
|
||||
operationId: internalPatchRuntime
|
||||
summary: Recreate a game engine container with a new image
|
||||
description: |
|
||||
Restart with a new `image_ref`. Allowed only as a semver patch
|
||||
within the same major and minor line. Cross-major or cross-minor
|
||||
attempts return `409 conflict` with `error_code=semver_patch_only`.
|
||||
A non-semver `image_ref` returns `400 invalid_request` with
|
||||
`error_code=image_ref_not_semver`.
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/GameIDPath"
|
||||
- $ref: "#/components/parameters/XGalaxyCallerHeader"
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/PatchRequest"
|
||||
responses:
|
||||
"200":
|
||||
description: Runtime record after the patch operation.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RuntimeRecord"
|
||||
"400":
|
||||
$ref: "#/components/responses/InvalidRequestError"
|
||||
"404":
|
||||
$ref: "#/components/responses/NotFoundError"
|
||||
"409":
|
||||
$ref: "#/components/responses/ConflictError"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
"503":
|
||||
$ref: "#/components/responses/ServiceUnavailableError"
|
||||
/api/v1/internal/runtimes/{game_id}/container:
|
||||
delete:
|
||||
tags:
|
||||
- Runtimes
|
||||
operationId: internalCleanupRuntimeContainer
|
||||
summary: Remove an exited container
|
||||
description: |
|
||||
Calls `docker rm` for an already-stopped container and updates the
|
||||
runtime record to `removed`. Refuses with `409 conflict` if the
|
||||
record is still `running`. The host state directory is not
|
||||
deleted.
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/GameIDPath"
|
||||
- $ref: "#/components/parameters/XGalaxyCallerHeader"
|
||||
responses:
|
||||
"200":
|
||||
description: Runtime record after the cleanup operation.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RuntimeRecord"
|
||||
"404":
|
||||
$ref: "#/components/responses/NotFoundError"
|
||||
"409":
|
||||
$ref: "#/components/responses/ConflictError"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
"503":
|
||||
$ref: "#/components/responses/ServiceUnavailableError"
|
||||
components:
|
||||
parameters:
|
||||
GameIDPath:
|
||||
name: game_id
|
||||
in: path
|
||||
required: true
|
||||
description: Opaque stable game identifier owned by Lobby.
|
||||
schema:
|
||||
type: string
|
||||
XGalaxyCallerHeader:
|
||||
name: X-Galaxy-Caller
|
||||
in: header
|
||||
required: false
|
||||
description: |
|
||||
Identifies the calling service so Runtime Manager can record the
|
||||
right `op_source` in `operation_log` (`gm_rest` for `gm`,
|
||||
`admin_rest` for `admin`). Missing or unknown values default to
|
||||
`admin_rest`.
|
||||
schema:
|
||||
type: string
|
||||
enum:
|
||||
- gm
|
||||
- admin
|
||||
schemas:
|
||||
RuntimeRecord:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- game_id
|
||||
- status
|
||||
- state_path
|
||||
- docker_network
|
||||
- last_op_at
|
||||
- created_at
|
||||
properties:
|
||||
game_id:
|
||||
type: string
|
||||
description: Opaque stable game identifier owned by Lobby.
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- running
|
||||
- stopped
|
||||
- removed
|
||||
description: Current runtime status maintained by Runtime Manager.
|
||||
current_container_id:
|
||||
type: string
|
||||
nullable: true
|
||||
description: Docker container id; null when status is removed.
|
||||
current_image_ref:
|
||||
type: string
|
||||
nullable: true
|
||||
description: Image reference of the current container; null when status is removed.
|
||||
engine_endpoint:
|
||||
type: string
|
||||
nullable: true
|
||||
description: Stable engine URL `http://galaxy-game-{game_id}:8080`; null when status is removed.
|
||||
state_path:
|
||||
type: string
|
||||
description: Absolute host path of the per-game bind-mounted state directory.
|
||||
docker_network:
|
||||
type: string
|
||||
description: Docker network name observed when the container was created.
|
||||
started_at:
|
||||
type: string
|
||||
format: date-time
|
||||
nullable: true
|
||||
description: UTC timestamp of the most recent successful start.
|
||||
stopped_at:
|
||||
type: string
|
||||
format: date-time
|
||||
nullable: true
|
||||
description: UTC timestamp of the most recent stop.
|
||||
removed_at:
|
||||
type: string
|
||||
format: date-time
|
||||
nullable: true
|
||||
description: UTC timestamp of the most recent container removal.
|
||||
last_op_at:
|
||||
type: string
|
||||
format: date-time
|
||||
description: UTC timestamp of the most recent operation; drives retention TTL.
|
||||
created_at:
|
||||
type: string
|
||||
format: date-time
|
||||
description: UTC timestamp of the first observation of this game.
|
||||
RuntimesList:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- items
|
||||
properties:
|
||||
items:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/RuntimeRecord"
|
||||
StartRequest:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- image_ref
|
||||
properties:
|
||||
image_ref:
|
||||
type: string
|
||||
description: Docker reference resolved by the producer (Game Master or Admin Service).
|
||||
StopRequest:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reason
|
||||
properties:
|
||||
reason:
|
||||
$ref: "#/components/schemas/StopReason"
|
||||
PatchRequest:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- image_ref
|
||||
properties:
|
||||
image_ref:
|
||||
type: string
|
||||
description: New Docker reference within the same semver major and minor line.
|
||||
StopReason:
|
||||
type: string
|
||||
enum:
|
||||
- orphan_cleanup
|
||||
- cancelled
|
||||
- finished
|
||||
- admin_request
|
||||
- timeout
|
||||
description: Reason carried in the stop envelope and recorded in the operation log.
|
||||
ErrorCode:
|
||||
type: string
|
||||
enum:
|
||||
- invalid_request
|
||||
- not_found
|
||||
- conflict
|
||||
- service_unavailable
|
||||
- internal_error
|
||||
- image_pull_failed
|
||||
- image_ref_not_semver
|
||||
- semver_patch_only
|
||||
- container_start_failed
|
||||
- start_config_invalid
|
||||
- docker_unavailable
|
||||
- replay_no_op
|
||||
description: Stable internal API error code.
|
||||
ProbeResponse:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- status
|
||||
properties:
|
||||
status:
|
||||
type: string
|
||||
ErrorResponse:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- error
|
||||
properties:
|
||||
error:
|
||||
$ref: "#/components/schemas/ErrorBody"
|
||||
ErrorBody:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- code
|
||||
- message
|
||||
properties:
|
||||
code:
|
||||
$ref: "#/components/schemas/ErrorCode"
|
||||
message:
|
||||
type: string
|
||||
description: Human-readable trusted error message.
|
||||
responses:
|
||||
InvalidRequestError:
|
||||
description: Request validation failed.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ErrorResponse"
|
||||
examples:
|
||||
invalidRequest:
|
||||
value:
|
||||
error:
|
||||
code: invalid_request
|
||||
message: request is invalid
|
||||
NotFoundError:
|
||||
description: The requested runtime record does not exist.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ErrorResponse"
|
||||
examples:
|
||||
notFound:
|
||||
value:
|
||||
error:
|
||||
code: not_found
|
||||
message: runtime record not found
|
||||
ConflictError:
|
||||
description: The requested operation is not allowed in the current runtime state.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ErrorResponse"
|
||||
examples:
|
||||
conflict:
|
||||
value:
|
||||
error:
|
||||
code: conflict
|
||||
message: operation not allowed in current status
|
||||
InternalError:
|
||||
description: Unexpected internal service error.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ErrorResponse"
|
||||
examples:
|
||||
internal:
|
||||
value:
|
||||
error:
|
||||
code: internal_error
|
||||
message: internal server error
|
||||
ServiceUnavailableError:
|
||||
description: An upstream dependency is unavailable.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ErrorResponse"
|
||||
examples:
|
||||
unavailable:
|
||||
value:
|
||||
error:
|
||||
code: service_unavailable
|
||||
message: service is unavailable
|
||||
Reference in New Issue
Block a user