feat: runtime manager

This commit is contained in:
Ilia Denisov
2026-04-28 20:39:18 +02:00
committed by GitHub
parent e0a99b346b
commit a7cee15115
289 changed files with 45660 additions and 2207 deletions
@@ -0,0 +1,106 @@
-- +goose Up
-- Initial Runtime Manager PostgreSQL schema.
--
-- Three tables cover the durable surface of the service:
-- * runtime_records — one row per game with the latest known runtime
-- status and Docker container binding;
-- * operation_log — append-only audit of every start/stop/restart/
-- patch/cleanup/reconcile_* operation RTM performed;
-- * health_snapshots — latest technical health observation per game.
--
-- Schema and the matching `rtmanagerservice` role are provisioned
-- outside this script (in tests via cmd/jetgen/main.go::provisionRoleAndSchema;
-- in production via an ops init script). This migration runs as the
-- schema owner with `search_path=rtmanager` and only contains DDL for the
-- service-owned tables and indexes. ARCHITECTURE.md §Database topology
-- mandates that the per-service role's grants stay restricted to its own
-- schema; consequently this file deliberately deviates from PLAN.md
-- Stage 09's literal `CREATE SCHEMA IF NOT EXISTS rtmanager;` instruction.
-- runtime_records holds one durable record per game with the latest
-- known runtime status and Docker container binding. The status enum
-- (running | stopped | removed) is enforced by a CHECK so domain code
-- can rely on it without reading every callsite. The (status, last_op_at)
-- index drives the periodic container-cleanup worker that scans
-- `status='stopped' AND last_op_at < now() - retention`.
CREATE TABLE runtime_records (
game_id text PRIMARY KEY,
status text NOT NULL,
current_container_id text,
current_image_ref text,
engine_endpoint text NOT NULL,
state_path text NOT NULL,
docker_network text NOT NULL,
started_at timestamptz,
stopped_at timestamptz,
removed_at timestamptz,
last_op_at timestamptz NOT NULL,
created_at timestamptz NOT NULL,
CONSTRAINT runtime_records_status_chk
CHECK (status IN ('running', 'stopped', 'removed'))
);
CREATE INDEX runtime_records_status_last_op_idx
ON runtime_records (status, last_op_at);
-- operation_log is an append-only audit of every operation Runtime
-- Manager performed against a game's runtime. The (game_id, started_at
-- DESC) index drives audit reads from the GM/Admin REST surface;
-- finished_at is nullable for in-flight rows even though Stage 13+
-- always finalises the row in the same transaction. The op_kind /
-- op_source / outcome enums are enforced by CHECK constraints to keep
-- the audit schema honest without a separate Go validator.
CREATE TABLE operation_log (
id bigserial PRIMARY KEY,
game_id text NOT NULL,
op_kind text NOT NULL,
op_source text NOT NULL,
source_ref text NOT NULL DEFAULT '',
image_ref text NOT NULL DEFAULT '',
container_id text NOT NULL DEFAULT '',
outcome text NOT NULL,
error_code text NOT NULL DEFAULT '',
error_message text NOT NULL DEFAULT '',
started_at timestamptz NOT NULL,
finished_at timestamptz,
CONSTRAINT operation_log_op_kind_chk
CHECK (op_kind IN (
'start', 'stop', 'restart', 'patch',
'cleanup_container', 'reconcile_adopt', 'reconcile_dispose'
)),
CONSTRAINT operation_log_op_source_chk
CHECK (op_source IN (
'lobby_stream', 'gm_rest', 'admin_rest',
'auto_ttl', 'auto_reconcile'
)),
CONSTRAINT operation_log_outcome_chk
CHECK (outcome IN ('success', 'failure'))
);
CREATE INDEX operation_log_game_started_idx
ON operation_log (game_id, started_at DESC);
-- health_snapshots stores the latest technical health observation per
-- game. One row per game; later observations overwrite. The status enum
-- mirrors the `event_type` vocabulary on `runtime:health_events`
-- (collapsed to a flat status column for the latest-observation view).
CREATE TABLE health_snapshots (
game_id text PRIMARY KEY,
container_id text NOT NULL DEFAULT '',
status text NOT NULL,
source text NOT NULL,
details jsonb NOT NULL DEFAULT '{}'::jsonb,
observed_at timestamptz NOT NULL,
CONSTRAINT health_snapshots_status_chk
CHECK (status IN (
'healthy', 'probe_failed', 'exited',
'oom', 'inspect_unhealthy', 'container_disappeared'
)),
CONSTRAINT health_snapshots_source_chk
CHECK (source IN ('docker_event', 'inspect', 'probe'))
);
-- +goose Down
DROP TABLE IF EXISTS health_snapshots;
DROP TABLE IF EXISTS operation_log;
DROP TABLE IF EXISTS runtime_records;
@@ -0,0 +1,19 @@
// Package migrations exposes the embedded goose migration files used by
// Runtime Manager to provision its `rtmanager` schema in PostgreSQL.
//
// The embedded filesystem is consumed by `pkg/postgres.RunMigrations`
// during rtmanager-service startup and by `cmd/jetgen` when regenerating
// the `internal/adapters/postgres/jet/` code against a transient
// PostgreSQL instance.
package migrations
import "embed"
//go:embed *.sql
var fs embed.FS
// FS returns the embedded filesystem containing every numbered goose
// migration shipped with Runtime Manager.
func FS() embed.FS {
return fs
}