feat: runtime manager
This commit is contained in:
@@ -0,0 +1,60 @@
|
||||
# syntax=docker/dockerfile:1.7
|
||||
|
||||
# Build context is the workspace root (galaxy/), not the game/ subdirectory,
|
||||
# because the game module pulls galaxy/{calc,error,model,util} through the
|
||||
# go.work replace directives. Build with:
|
||||
#
|
||||
# docker build -t galaxy/game:test -f game/Dockerfile .
|
||||
|
||||
FROM golang:1.26.2-alpine AS builder
|
||||
WORKDIR /src
|
||||
ENV CGO_ENABLED=0 GOFLAGS=-trimpath
|
||||
|
||||
# Only the four pkg/ modules the engine binary actually imports.
|
||||
COPY pkg/calc/ ./pkg/calc/
|
||||
COPY pkg/error/ ./pkg/error/
|
||||
COPY pkg/model/ ./pkg/model/
|
||||
COPY pkg/util/ ./pkg/util/
|
||||
COPY game/ ./game/
|
||||
|
||||
# Minimal workspace. The repository-level go.work also lists service
|
||||
# modules (lobby, notification, ...) that the engine binary does not
|
||||
# need, so we synthesise a workspace tailored to this image instead of
|
||||
# dragging the rest of the monorepo into the build context.
|
||||
RUN <<'EOF' cat > go.work
|
||||
go 1.26.2
|
||||
|
||||
use (
|
||||
./game
|
||||
./pkg/calc
|
||||
./pkg/error
|
||||
./pkg/model
|
||||
./pkg/util
|
||||
)
|
||||
|
||||
replace (
|
||||
galaxy/calc v0.0.0 => ./pkg/calc
|
||||
galaxy/error v0.0.0 => ./pkg/error
|
||||
galaxy/model v0.0.0 => ./pkg/model
|
||||
galaxy/util v0.0.0 => ./pkg/util
|
||||
)
|
||||
EOF
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/go-build \
|
||||
--mount=type=cache,target=/go/pkg/mod \
|
||||
go build -ldflags="-s -w" -o /out/server ./game/cmd/http
|
||||
|
||||
FROM gcr.io/distroless/static-debian12:nonroot AS runtime
|
||||
|
||||
LABEL com.galaxy.cpu_quota="1.0"
|
||||
LABEL com.galaxy.memory="512m"
|
||||
LABEL com.galaxy.pids_limit="512"
|
||||
LABEL org.opencontainers.image.title="galaxy-game-engine"
|
||||
|
||||
ENV STORAGE_PATH=/var/lib/galaxy-game
|
||||
EXPOSE 8080
|
||||
USER nonroot:nonroot
|
||||
|
||||
COPY --from=builder /out/server /usr/local/bin/server
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/server"]
|
||||
+180
-4
@@ -1,8 +1,184 @@
|
||||
# Game Service Engine
|
||||
|
||||
Galaxy game engine — hosts a single game instance and exposes a REST API for
|
||||
game initialization, turn advancement, player reports, and command execution.
|
||||
`galaxy/game` is the game engine binary that runs inside one
|
||||
`galaxy-game-{game_id}` container. It hosts a single game instance and exposes
|
||||
a REST API for game initialization, turn advancement, player reports, and
|
||||
batched player command execution.
|
||||
|
||||
## API
|
||||
## References
|
||||
|
||||
The REST contract is documented in [`openapi.yaml`](openapi.yaml).
|
||||
- [`openapi.yaml`](openapi.yaml) — REST contract.
|
||||
- [`../ARCHITECTURE.md`](../ARCHITECTURE.md) — system architecture.
|
||||
- [`../rtmanager/README.md`](../rtmanager/README.md) — Runtime Manager owns
|
||||
container lifecycle for this binary.
|
||||
|
||||
## Container model
|
||||
|
||||
The engine is meant to be run inside a Docker container managed by
|
||||
`Runtime Manager`. One container hosts exactly one game instance and listens
|
||||
on TCP `:8080` inside the container. Outside the container the endpoint is
|
||||
addressed as `http://galaxy-game-{game_id}:8080` through Docker's embedded DNS
|
||||
on the configured `RTMANAGER_DOCKER_NETWORK`.
|
||||
|
||||
The container image is built from [`Dockerfile`](Dockerfile) at the root of
|
||||
this module. The Dockerfile is a multi-stage build (Go builder + small runtime
|
||||
base) that exposes `:8080`, runs as a non-root user, and ships container
|
||||
labels that `Runtime Manager` reads at create time:
|
||||
|
||||
| Label | Meaning |
|
||||
| --- | --- |
|
||||
| `com.galaxy.cpu_quota` | CPU quota for the container (`--cpus`). |
|
||||
| `com.galaxy.memory` | Memory limit for the container (`--memory`). |
|
||||
| `com.galaxy.pids_limit` | PID limit for the container (`--pids-limit`). |
|
||||
| `org.opencontainers.image.title` | `galaxy-game-engine`. |
|
||||
|
||||
Image defaults are `cpu_quota=1.0`, `memory=512m`, `pids_limit=512`. Operators
|
||||
override them at image-build time by editing the Dockerfile labels; producers
|
||||
do not pass per-game limits.
|
||||
|
||||
## Endpoints
|
||||
|
||||
The contract is the union of `openapi.yaml` and the technical liveness probe
|
||||
described below.
|
||||
|
||||
### Game endpoints
|
||||
|
||||
Documented in [`openapi.yaml`](openapi.yaml). When the engine has not been
|
||||
initialised through `POST /api/v1/init`, game endpoints respond `501 Not
|
||||
Implemented` to make the uninitialised state unambiguous.
|
||||
|
||||
### `GET /healthz`
|
||||
|
||||
Technical liveness probe used by `Runtime Manager` and operator tooling.
|
||||
|
||||
- Returns `{"status":"ok"}` with HTTP `200` whenever the HTTP server is
|
||||
serving requests, regardless of whether the engine has been initialised
|
||||
through `POST /api/v1/init`.
|
||||
- Carries no game-state semantics. Use `GET /api/v1/status` for game-state
|
||||
inspection.
|
||||
|
||||
This endpoint exists so that `Runtime Manager` can probe a freshly started
|
||||
container before `init` runs.
|
||||
|
||||
## Storage
|
||||
|
||||
The engine reads its persistent storage path from environment variables in
|
||||
the following order of precedence:
|
||||
|
||||
1. `STORAGE_PATH` — historical name; honoured for backward compatibility.
|
||||
2. `GAME_STATE_PATH` — canonical name written by `Runtime Manager`.
|
||||
|
||||
If both are set, `STORAGE_PATH` wins. If neither is set, the binary fails
|
||||
fast on startup. The Dockerfile defaults `STORAGE_PATH=/var/lib/galaxy-game`
|
||||
so the image runs out of the box if the operator does not supply either
|
||||
variable.
|
||||
|
||||
`Runtime Manager` creates a per-game host directory under
|
||||
`<RTMANAGER_GAME_STATE_ROOT>/{game_id}` and bind-mounts it into the container
|
||||
at `RTMANAGER_ENGINE_STATE_MOUNT_PATH` (default `/var/lib/galaxy-game`). The
|
||||
mount path is then exposed to the engine through `GAME_STATE_PATH` (and, for
|
||||
compatibility, also as `STORAGE_PATH`).
|
||||
|
||||
The engine is responsible for the contents of the storage directory.
|
||||
`Runtime Manager` never reads or writes the directory contents, never
|
||||
deletes the directory, and never inspects per-game state files.
|
||||
|
||||
### Design rationale: storage-path env precedence
|
||||
|
||||
`STORAGE_PATH` wins over `GAME_STATE_PATH` because the engine already
|
||||
shipped with `STORAGE_PATH` (see `game/Makefile` and
|
||||
`game/internal/router/handler/handler.go`). Keeping `STORAGE_PATH` as
|
||||
the authoritative variable means existing engine deployments and
|
||||
integration fixtures continue to work without code change, while
|
||||
`GAME_STATE_PATH` is the platform contract written by `Runtime Manager`
|
||||
and documented in `ARCHITECTURE.md §9`.
|
||||
|
||||
Alternatives considered and rejected:
|
||||
|
||||
- accept only `GAME_STATE_PATH` — would force a breaking change on the
|
||||
engine binary and on every existing `STORAGE_PATH=...` invocation in
|
||||
`game/Makefile` and dev scripts;
|
||||
- `GAME_STATE_PATH` wins over `STORAGE_PATH` — would silently invert
|
||||
the meaning of an explicit `STORAGE_PATH=` invocation if the operator
|
||||
also sets `GAME_STATE_PATH` for any reason.
|
||||
|
||||
### Design rationale: storage-path validation site
|
||||
|
||||
`game/internal/router/handler/handler.go` exports `ResolveStoragePath`,
|
||||
which returns the engine storage path from the env-var pair above and
|
||||
an error when neither is set. `cmd/http/main.go` calls it before
|
||||
constructing the router, prints the error to stderr, and exits non-zero.
|
||||
The existing `initConfig` closure also calls `ResolveStoragePath` to
|
||||
populate `controller.Param.StoragePath` at request time; the error there
|
||||
is dropped because `main` already validated the environment at startup.
|
||||
|
||||
This keeps the public router surface (`router.NewRouter`) unchanged —
|
||||
the env binding is satisfied by one helper plus a startup check, with
|
||||
no API ripple. Moving env reading entirely into `main` and changing
|
||||
`NewRouter` / `NewDefaultExecutor` to accept an explicit path was
|
||||
rejected: it churns multiple call sites for no functional gain. The
|
||||
current shape leaves the configurer closure ready for future
|
||||
config-injection refactors without forcing one now.
|
||||
|
||||
## Build
|
||||
|
||||
The container image is built from [`Dockerfile`](Dockerfile). The Docker
|
||||
build context is the workspace root (`galaxy/`) rather than the `game/`
|
||||
subdirectory, because `game/` resolves `galaxy/{model,error,util,...}`
|
||||
through `go.work` `replace` directives. From the workspace root:
|
||||
|
||||
```sh
|
||||
docker build -t galaxy/game:test -f game/Dockerfile .
|
||||
```
|
||||
|
||||
The build is two-staged: a `golang:1.26.2-alpine` builder produces a
|
||||
statically linked binary (`CGO_ENABLED=0`), then `gcr.io/distroless/static-debian12:nonroot`
|
||||
runs it as the `nonroot` user and exposes `:8080`.
|
||||
|
||||
### Design rationale: workspace-root build context
|
||||
|
||||
`game/` is a member of the multi-module `go.work` workspace at the
|
||||
repository root. Its imports of `galaxy/model`, `galaxy/error`,
|
||||
`galaxy/util`, etc. are satisfied by `replace` directives in `go.work`
|
||||
that point at sibling modules under `pkg/`. There is no published
|
||||
`galaxy/model` module to download.
|
||||
|
||||
A standalone `docker build ./game` therefore cannot resolve those
|
||||
imports: the `pkg/` tree is outside the build context, and `game/go.mod`
|
||||
alone has no `replace` directives pointing at it.
|
||||
|
||||
Alternatives rejected:
|
||||
|
||||
- adding `replace` directives to `game/go.mod` and copying `pkg/` into a
|
||||
vendored layout — duplicates the workspace inside `game/`, drifts from
|
||||
the rest of the repository, and forces every other workspace member
|
||||
that ships a Dockerfile to repeat the trick;
|
||||
- running `go mod vendor` inside `game/` before each build — workspaces
|
||||
do not vendor cleanly, the resulting `vendor/` would be noisy, and CI
|
||||
/ Makefile would need a custom pre-build step.
|
||||
|
||||
No `.dockerignore` is needed: every `COPY` in `game/Dockerfile` names an
|
||||
explicit subdirectory (`pkg/calc`, `pkg/error`, `pkg/model`, `pkg/util`,
|
||||
`game`), and BuildKit (forced by `# syntax=docker/dockerfile:1.7`) only
|
||||
transfers the paths a `COPY` actually references.
|
||||
|
||||
### Design rationale: `gcr.io/distroless/static-debian12:nonroot` runtime base
|
||||
|
||||
Distroless static is roughly 2 MB and contains no shell or package
|
||||
manager, which keeps the attack surface and CVE exposure minimal —
|
||||
appropriate for a service that `Runtime Manager` will start by the
|
||||
dozen. The image already runs as UID `65532:65532` named `nonroot`,
|
||||
satisfying the non-root-user requirement without an explicit
|
||||
`RUN adduser`.
|
||||
|
||||
Alternatives rejected:
|
||||
|
||||
- `alpine:3.20` — provides a shell for ad-hoc debugging but is roughly
|
||||
10 MB and inherits regular CVE churn on `musl` / `apk`. The convenience
|
||||
is not worth the larger attack surface for a fleet of identical engine
|
||||
containers; operators can always `docker exec` from a debug image when
|
||||
needed;
|
||||
- `scratch` — smallest possible image, but ships no `/tmp`, no CA bundle,
|
||||
and no `/etc/passwd`. Distroless wins on the same security axis while
|
||||
leaving room for future needs (TLS, logging) without rebuilding the
|
||||
base layout.
|
||||
|
||||
@@ -5,9 +5,15 @@ import (
|
||||
"os"
|
||||
|
||||
"galaxy/game/internal/router"
|
||||
"galaxy/game/internal/router/handler"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if _, err := handler.ResolveStoragePath(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
r := router.NewRouter()
|
||||
if err := r.Run(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"errors"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"galaxy/model/order"
|
||||
"galaxy/model/report"
|
||||
@@ -33,9 +34,25 @@ type executor struct {
|
||||
cfg controller.Configurer
|
||||
}
|
||||
|
||||
// ResolveStoragePath returns the engine storage path resolved from
|
||||
// STORAGE_PATH (preferred, historical name) or GAME_STATE_PATH (canonical
|
||||
// name written by Runtime Manager). It returns an error when neither
|
||||
// variable is set; callers are expected to fail fast at startup.
|
||||
func ResolveStoragePath() (string, error) {
|
||||
if v := strings.TrimSpace(os.Getenv("STORAGE_PATH")); v != "" {
|
||||
return v, nil
|
||||
}
|
||||
if v := strings.TrimSpace(os.Getenv("GAME_STATE_PATH")); v != "" {
|
||||
return v, nil
|
||||
}
|
||||
return "", errors.New("storage path is not set: provide STORAGE_PATH or GAME_STATE_PATH")
|
||||
}
|
||||
|
||||
func initConfig() controller.Configurer {
|
||||
return func(p *controller.Param) {
|
||||
p.StoragePath = os.Getenv("STORAGE_PATH")
|
||||
// Validated once at startup by ResolveStoragePath; the error
|
||||
// is dropped here to keep the Configurer signature simple.
|
||||
p.StoragePath, _ = ResolveStoragePath()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// HealthzHandler is the technical liveness probe used by Runtime Manager
|
||||
// and operator tooling. It returns 200 with {"status":"ok"} regardless
|
||||
// of whether the engine has been initialised through POST /api/v1/init.
|
||||
func HealthzHandler(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{"status": "ok"})
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
package router_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"galaxy/game/internal/controller"
|
||||
"galaxy/game/internal/router"
|
||||
"galaxy/game/internal/router/handler"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestHealthzReturnsOKWithoutInit(t *testing.T) {
|
||||
r := router.SetupRouter(handler.NewDefaultConfigExecutor(func(p *controller.Param) {
|
||||
p.StoragePath = ""
|
||||
}))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
req, _ := http.NewRequest(http.MethodGet, "/healthz", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
require.Equal(t, http.StatusOK, w.Code, w.Body)
|
||||
|
||||
var body map[string]string
|
||||
require.NoError(t, json.Unmarshal(w.Body.Bytes(), &body))
|
||||
assert.Equal(t, "ok", body["status"])
|
||||
}
|
||||
|
||||
func TestResolveStoragePathPrecedence(t *testing.T) {
|
||||
t.Setenv("STORAGE_PATH", "/tmp/storage")
|
||||
t.Setenv("GAME_STATE_PATH", "/tmp/state")
|
||||
|
||||
got, err := handler.ResolveStoragePath()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "/tmp/storage", got)
|
||||
}
|
||||
|
||||
func TestResolveStoragePathFallback(t *testing.T) {
|
||||
t.Setenv("STORAGE_PATH", "")
|
||||
t.Setenv("GAME_STATE_PATH", "/tmp/state")
|
||||
|
||||
got, err := handler.ResolveStoragePath()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "/tmp/state", got)
|
||||
}
|
||||
|
||||
func TestResolveStoragePathMissing(t *testing.T) {
|
||||
t.Setenv("STORAGE_PATH", "")
|
||||
t.Setenv("GAME_STATE_PATH", "")
|
||||
|
||||
_, err := handler.ResolveStoragePath()
|
||||
require.Error(t, err)
|
||||
}
|
||||
@@ -63,6 +63,8 @@ func setupRouter(executor handler.CommandExecutor) *gin.Engine {
|
||||
}
|
||||
}
|
||||
|
||||
r.GET("/healthz", handler.HealthzHandler)
|
||||
|
||||
groupV1 := r.Group("/api/v1")
|
||||
|
||||
groupV1.GET("/status", func(ctx *gin.Context) { handler.StatusHandler(ctx, executor) })
|
||||
|
||||
@@ -27,6 +27,8 @@ tags:
|
||||
description: Game initialization, state retrieval, and turn advancement.
|
||||
- name: PlayerActions
|
||||
description: Player command execution, order validation, and turn-report retrieval.
|
||||
- name: Health
|
||||
description: Technical liveness probes used by Runtime Manager and operator tooling.
|
||||
paths:
|
||||
/api/v1/status:
|
||||
get:
|
||||
@@ -164,6 +166,26 @@ paths:
|
||||
$ref: "#/components/schemas/StateResponse"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
/healthz:
|
||||
get:
|
||||
tags:
|
||||
- Health
|
||||
operationId: healthz
|
||||
summary: Engine liveness probe
|
||||
description: |
|
||||
Returns `{"status":"ok"}` with HTTP `200` whenever the HTTP server
|
||||
is serving requests, regardless of whether the engine has been
|
||||
initialised through `POST /api/v1/init`. Used by `Runtime Manager`
|
||||
to probe a freshly started container before `init` runs. Carries
|
||||
no game-state semantics; use `GET /api/v1/status` for game-state
|
||||
inspection.
|
||||
responses:
|
||||
"200":
|
||||
description: Engine HTTP server is up.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/HealthzResponse"
|
||||
components:
|
||||
parameters:
|
||||
PlayerParam:
|
||||
@@ -184,6 +206,17 @@ components:
|
||||
minimum: 0
|
||||
default: 0
|
||||
schemas:
|
||||
HealthzResponse:
|
||||
type: object
|
||||
description: Engine liveness probe response payload.
|
||||
required:
|
||||
- status
|
||||
properties:
|
||||
status:
|
||||
type: string
|
||||
description: Always "ok" while the engine HTTP server is serving requests.
|
||||
enum:
|
||||
- ok
|
||||
StateResponse:
|
||||
type: object
|
||||
description: Summary game state returned after initialization and at each turn boundary.
|
||||
|
||||
@@ -58,6 +58,13 @@ func TestGameOpenAPISpecFreezesResponseSchemas(t *testing.T) {
|
||||
status: http.StatusOK,
|
||||
wantRef: "#/components/schemas/StateResponse",
|
||||
},
|
||||
{
|
||||
name: "healthz probe",
|
||||
path: "/healthz",
|
||||
method: http.MethodGet,
|
||||
status: http.StatusOK,
|
||||
wantRef: "#/components/schemas/HealthzResponse",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
@@ -108,6 +115,19 @@ func TestGameOpenAPISpecFreezesCommandRequest(t *testing.T) {
|
||||
require.Equal(t, uint64(1), cmdSchema.Value.MinItems, "CommandRequest.cmd minItems must be 1")
|
||||
}
|
||||
|
||||
func TestGameOpenAPISpecHealthzStatusEnum(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
doc := loadOpenAPISpec(t)
|
||||
schema := componentSchemaRef(t, doc, "HealthzResponse")
|
||||
|
||||
assertRequiredFields(t, schema, "status")
|
||||
|
||||
statusSchema := schema.Value.Properties["status"]
|
||||
require.NotNil(t, statusSchema, "HealthzResponse.status schema must exist")
|
||||
require.Equal(t, []any{"ok"}, statusSchema.Value.Enum, "HealthzResponse.status enum must be [\"ok\"]")
|
||||
}
|
||||
|
||||
func TestGameOpenAPISpecCommandTypeEnumIsComplete(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user