feat: runtime manager
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
-- +goose Up
|
||||
-- Initial Runtime Manager PostgreSQL schema.
|
||||
--
|
||||
-- Three tables cover the durable surface of the service:
|
||||
-- * runtime_records — one row per game with the latest known runtime
|
||||
-- status and Docker container binding;
|
||||
-- * operation_log — append-only audit of every start/stop/restart/
|
||||
-- patch/cleanup/reconcile_* operation RTM performed;
|
||||
-- * health_snapshots — latest technical health observation per game.
|
||||
--
|
||||
-- Schema and the matching `rtmanagerservice` role are provisioned
|
||||
-- outside this script (in tests via cmd/jetgen/main.go::provisionRoleAndSchema;
|
||||
-- in production via an ops init script). This migration runs as the
|
||||
-- schema owner with `search_path=rtmanager` and only contains DDL for the
|
||||
-- service-owned tables and indexes. ARCHITECTURE.md §Database topology
|
||||
-- mandates that the per-service role's grants stay restricted to its own
|
||||
-- schema; consequently this file deliberately deviates from PLAN.md
|
||||
-- Stage 09's literal `CREATE SCHEMA IF NOT EXISTS rtmanager;` instruction.
|
||||
|
||||
-- runtime_records holds one durable record per game with the latest
|
||||
-- known runtime status and Docker container binding. The status enum
|
||||
-- (running | stopped | removed) is enforced by a CHECK so domain code
|
||||
-- can rely on it without reading every callsite. The (status, last_op_at)
|
||||
-- index drives the periodic container-cleanup worker that scans
|
||||
-- `status='stopped' AND last_op_at < now() - retention`.
|
||||
CREATE TABLE runtime_records (
|
||||
game_id text PRIMARY KEY,
|
||||
status text NOT NULL,
|
||||
current_container_id text,
|
||||
current_image_ref text,
|
||||
engine_endpoint text NOT NULL,
|
||||
state_path text NOT NULL,
|
||||
docker_network text NOT NULL,
|
||||
started_at timestamptz,
|
||||
stopped_at timestamptz,
|
||||
removed_at timestamptz,
|
||||
last_op_at timestamptz NOT NULL,
|
||||
created_at timestamptz NOT NULL,
|
||||
CONSTRAINT runtime_records_status_chk
|
||||
CHECK (status IN ('running', 'stopped', 'removed'))
|
||||
);
|
||||
|
||||
CREATE INDEX runtime_records_status_last_op_idx
|
||||
ON runtime_records (status, last_op_at);
|
||||
|
||||
-- operation_log is an append-only audit of every operation Runtime
|
||||
-- Manager performed against a game's runtime. The (game_id, started_at
|
||||
-- DESC) index drives audit reads from the GM/Admin REST surface;
|
||||
-- finished_at is nullable for in-flight rows even though Stage 13+
|
||||
-- always finalises the row in the same transaction. The op_kind /
|
||||
-- op_source / outcome enums are enforced by CHECK constraints to keep
|
||||
-- the audit schema honest without a separate Go validator.
|
||||
CREATE TABLE operation_log (
|
||||
id bigserial PRIMARY KEY,
|
||||
game_id text NOT NULL,
|
||||
op_kind text NOT NULL,
|
||||
op_source text NOT NULL,
|
||||
source_ref text NOT NULL DEFAULT '',
|
||||
image_ref text NOT NULL DEFAULT '',
|
||||
container_id text NOT NULL DEFAULT '',
|
||||
outcome text NOT NULL,
|
||||
error_code text NOT NULL DEFAULT '',
|
||||
error_message text NOT NULL DEFAULT '',
|
||||
started_at timestamptz NOT NULL,
|
||||
finished_at timestamptz,
|
||||
CONSTRAINT operation_log_op_kind_chk
|
||||
CHECK (op_kind IN (
|
||||
'start', 'stop', 'restart', 'patch',
|
||||
'cleanup_container', 'reconcile_adopt', 'reconcile_dispose'
|
||||
)),
|
||||
CONSTRAINT operation_log_op_source_chk
|
||||
CHECK (op_source IN (
|
||||
'lobby_stream', 'gm_rest', 'admin_rest',
|
||||
'auto_ttl', 'auto_reconcile'
|
||||
)),
|
||||
CONSTRAINT operation_log_outcome_chk
|
||||
CHECK (outcome IN ('success', 'failure'))
|
||||
);
|
||||
|
||||
CREATE INDEX operation_log_game_started_idx
|
||||
ON operation_log (game_id, started_at DESC);
|
||||
|
||||
-- health_snapshots stores the latest technical health observation per
|
||||
-- game. One row per game; later observations overwrite. The status enum
|
||||
-- mirrors the `event_type` vocabulary on `runtime:health_events`
|
||||
-- (collapsed to a flat status column for the latest-observation view).
|
||||
CREATE TABLE health_snapshots (
|
||||
game_id text PRIMARY KEY,
|
||||
container_id text NOT NULL DEFAULT '',
|
||||
status text NOT NULL,
|
||||
source text NOT NULL,
|
||||
details jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||||
observed_at timestamptz NOT NULL,
|
||||
CONSTRAINT health_snapshots_status_chk
|
||||
CHECK (status IN (
|
||||
'healthy', 'probe_failed', 'exited',
|
||||
'oom', 'inspect_unhealthy', 'container_disappeared'
|
||||
)),
|
||||
CONSTRAINT health_snapshots_source_chk
|
||||
CHECK (source IN ('docker_event', 'inspect', 'probe'))
|
||||
);
|
||||
|
||||
-- +goose Down
|
||||
DROP TABLE IF EXISTS health_snapshots;
|
||||
DROP TABLE IF EXISTS operation_log;
|
||||
DROP TABLE IF EXISTS runtime_records;
|
||||
@@ -0,0 +1,19 @@
|
||||
// Package migrations exposes the embedded goose migration files used by
|
||||
// Runtime Manager to provision its `rtmanager` schema in PostgreSQL.
|
||||
//
|
||||
// The embedded filesystem is consumed by `pkg/postgres.RunMigrations`
|
||||
// during rtmanager-service startup and by `cmd/jetgen` when regenerating
|
||||
// the `internal/adapters/postgres/jet/` code against a transient
|
||||
// PostgreSQL instance.
|
||||
package migrations
|
||||
|
||||
import "embed"
|
||||
|
||||
//go:embed *.sql
|
||||
var fs embed.FS
|
||||
|
||||
// FS returns the embedded filesystem containing every numbered goose
|
||||
// migration shipped with Runtime Manager.
|
||||
func FS() embed.FS {
|
||||
return fs
|
||||
}
|
||||
Reference in New Issue
Block a user