feat: runtime manager

This commit is contained in:
Ilia Denisov
2026-04-28 20:39:18 +02:00
committed by GitHub
parent e0a99b346b
commit a7cee15115
289 changed files with 45660 additions and 2207 deletions
@@ -0,0 +1,203 @@
// Package healthsnapshotstore implements the PostgreSQL-backed adapter
// for `ports.HealthSnapshotStore`.
//
// The package owns the on-disk shape of the `health_snapshots` table
// defined in
// `galaxy/rtmanager/internal/adapters/postgres/migrations/00001_init.sql`
// and translates the schema-agnostic `ports.HealthSnapshotStore` interface
// declared in `internal/ports/healthsnapshotstore.go` into concrete
// go-jet/v2 statements driven by the pgx driver.
//
// The `details` jsonb column round-trips as a `json.RawMessage`. Empty
// payloads are substituted with the SQL default `{}` on Upsert so the
// CHECK constraints and downstream readers never observe a non-JSON
// empty string.
package healthsnapshotstore
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/sqlx"
pgtable "galaxy/rtmanager/internal/adapters/postgres/jet/rtmanager/table"
"galaxy/rtmanager/internal/domain/health"
"galaxy/rtmanager/internal/domain/runtime"
"galaxy/rtmanager/internal/ports"
pg "github.com/go-jet/jet/v2/postgres"
)
// emptyDetails is the canonical jsonb payload installed when the caller
// supplies an empty Details slice. It matches the SQL DEFAULT for the
// column.
const emptyDetails = "{}"
// Config configures one PostgreSQL-backed health-snapshot store instance.
type Config struct {
// DB stores the connection pool the store uses for every query.
DB *sql.DB
// OperationTimeout bounds one round trip.
OperationTimeout time.Duration
}
// Store persists Runtime Manager health snapshots in PostgreSQL.
type Store struct {
db *sql.DB
operationTimeout time.Duration
}
// New constructs one PostgreSQL-backed health-snapshot store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.DB == nil {
return nil, errors.New("new postgres health snapshot store: db must not be nil")
}
if cfg.OperationTimeout <= 0 {
return nil, errors.New("new postgres health snapshot store: operation timeout must be positive")
}
return &Store{
db: cfg.DB,
operationTimeout: cfg.OperationTimeout,
}, nil
}
// healthSnapshotSelectColumns is the canonical SELECT list for the
// health_snapshots table, matching scanSnapshot's column order.
var healthSnapshotSelectColumns = pg.ColumnList{
pgtable.HealthSnapshots.GameID,
pgtable.HealthSnapshots.ContainerID,
pgtable.HealthSnapshots.Status,
pgtable.HealthSnapshots.Source,
pgtable.HealthSnapshots.Details,
pgtable.HealthSnapshots.ObservedAt,
}
// Upsert installs snapshot as the latest observation for snapshot.GameID.
// snapshot is validated through health.HealthSnapshot.Validate before the
// SQL is issued.
func (store *Store) Upsert(ctx context.Context, snapshot health.HealthSnapshot) error {
if store == nil || store.db == nil {
return errors.New("upsert health snapshot: nil store")
}
if err := snapshot.Validate(); err != nil {
return fmt.Errorf("upsert health snapshot: %w", err)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "upsert health snapshot", store.operationTimeout)
if err != nil {
return err
}
defer cancel()
details := emptyDetails
if len(snapshot.Details) > 0 {
details = string(snapshot.Details)
}
stmt := pgtable.HealthSnapshots.INSERT(
pgtable.HealthSnapshots.GameID,
pgtable.HealthSnapshots.ContainerID,
pgtable.HealthSnapshots.Status,
pgtable.HealthSnapshots.Source,
pgtable.HealthSnapshots.Details,
pgtable.HealthSnapshots.ObservedAt,
).VALUES(
snapshot.GameID,
snapshot.ContainerID,
string(snapshot.Status),
string(snapshot.Source),
details,
snapshot.ObservedAt.UTC(),
).ON_CONFLICT(pgtable.HealthSnapshots.GameID).DO_UPDATE(
pg.SET(
pgtable.HealthSnapshots.ContainerID.SET(pgtable.HealthSnapshots.EXCLUDED.ContainerID),
pgtable.HealthSnapshots.Status.SET(pgtable.HealthSnapshots.EXCLUDED.Status),
pgtable.HealthSnapshots.Source.SET(pgtable.HealthSnapshots.EXCLUDED.Source),
pgtable.HealthSnapshots.Details.SET(pgtable.HealthSnapshots.EXCLUDED.Details),
pgtable.HealthSnapshots.ObservedAt.SET(pgtable.HealthSnapshots.EXCLUDED.ObservedAt),
),
)
query, args := stmt.Sql()
if _, err := store.db.ExecContext(operationCtx, query, args...); err != nil {
return fmt.Errorf("upsert health snapshot: %w", err)
}
return nil
}
// Get returns the latest snapshot for gameID. It returns
// runtime.ErrNotFound when no snapshot has been recorded yet.
func (store *Store) Get(ctx context.Context, gameID string) (health.HealthSnapshot, error) {
if store == nil || store.db == nil {
return health.HealthSnapshot{}, errors.New("get health snapshot: nil store")
}
if strings.TrimSpace(gameID) == "" {
return health.HealthSnapshot{}, fmt.Errorf("get health snapshot: game id must not be empty")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "get health snapshot", store.operationTimeout)
if err != nil {
return health.HealthSnapshot{}, err
}
defer cancel()
stmt := pg.SELECT(healthSnapshotSelectColumns).
FROM(pgtable.HealthSnapshots).
WHERE(pgtable.HealthSnapshots.GameID.EQ(pg.String(gameID)))
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
snapshot, err := scanSnapshot(row)
if sqlx.IsNoRows(err) {
return health.HealthSnapshot{}, runtime.ErrNotFound
}
if err != nil {
return health.HealthSnapshot{}, fmt.Errorf("get health snapshot: %w", err)
}
return snapshot, nil
}
// rowScanner abstracts *sql.Row and *sql.Rows so scanSnapshot can be
// shared across both single-row reads and iterated reads.
type rowScanner interface {
Scan(dest ...any) error
}
// scanSnapshot scans one health_snapshots row from rs.
func scanSnapshot(rs rowScanner) (health.HealthSnapshot, error) {
var (
gameID string
containerID string
status string
source string
details []byte
observedAt time.Time
)
if err := rs.Scan(
&gameID,
&containerID,
&status,
&source,
&details,
&observedAt,
); err != nil {
return health.HealthSnapshot{}, err
}
return health.HealthSnapshot{
GameID: gameID,
ContainerID: containerID,
Status: health.SnapshotStatus(status),
Source: health.SnapshotSource(source),
Details: json.RawMessage(details),
ObservedAt: observedAt.UTC(),
}, nil
}
// Ensure Store satisfies the ports.HealthSnapshotStore interface at
// compile time.
var _ ports.HealthSnapshotStore = (*Store)(nil)
@@ -0,0 +1,157 @@
package healthsnapshotstore_test
import (
"context"
"encoding/json"
"testing"
"time"
"galaxy/rtmanager/internal/adapters/postgres/healthsnapshotstore"
"galaxy/rtmanager/internal/adapters/postgres/internal/pgtest"
"galaxy/rtmanager/internal/domain/health"
"galaxy/rtmanager/internal/domain/runtime"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) { pgtest.RunMain(m) }
func newStore(t *testing.T) *healthsnapshotstore.Store {
t.Helper()
pgtest.TruncateAll(t)
store, err := healthsnapshotstore.New(healthsnapshotstore.Config{
DB: pgtest.Ensure(t).Pool(),
OperationTimeout: pgtest.OperationTimeout,
})
require.NoError(t, err)
return store
}
func probeFailedSnapshot(gameID string, observedAt time.Time) health.HealthSnapshot {
return health.HealthSnapshot{
GameID: gameID,
ContainerID: "container-1",
Status: health.SnapshotStatusProbeFailed,
Source: health.SnapshotSourceProbe,
Details: json.RawMessage(`{"consecutive_failures":3,"last_status":503,"last_error":"timeout"}`),
ObservedAt: observedAt,
}
}
func TestUpsertAndGetRoundTrip(t *testing.T) {
ctx := context.Background()
store := newStore(t)
snapshot := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
require.NoError(t, store.Upsert(ctx, snapshot))
got, err := store.Get(ctx, "game-001")
require.NoError(t, err)
assert.Equal(t, snapshot.GameID, got.GameID)
assert.Equal(t, snapshot.ContainerID, got.ContainerID)
assert.Equal(t, snapshot.Status, got.Status)
assert.Equal(t, snapshot.Source, got.Source)
assert.JSONEq(t, string(snapshot.Details), string(got.Details))
assert.True(t, snapshot.ObservedAt.Equal(got.ObservedAt))
assert.Equal(t, time.UTC, got.ObservedAt.Location())
}
func TestUpsertOverwritesPriorSnapshot(t *testing.T) {
ctx := context.Background()
store := newStore(t)
first := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
require.NoError(t, store.Upsert(ctx, first))
second := health.HealthSnapshot{
GameID: "game-001",
ContainerID: "container-2",
Status: health.SnapshotStatusHealthy,
Source: health.SnapshotSourceInspect,
Details: json.RawMessage(`{"restart_count":0,"state":"running"}`),
ObservedAt: first.ObservedAt.Add(time.Minute),
}
require.NoError(t, store.Upsert(ctx, second))
got, err := store.Get(ctx, "game-001")
require.NoError(t, err)
assert.Equal(t, "container-2", got.ContainerID)
assert.Equal(t, health.SnapshotStatusHealthy, got.Status)
assert.Equal(t, health.SnapshotSourceInspect, got.Source)
assert.JSONEq(t, string(second.Details), string(got.Details))
assert.True(t, second.ObservedAt.Equal(got.ObservedAt))
}
func TestGetReturnsNotFound(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.Get(ctx, "game-missing")
require.ErrorIs(t, err, runtime.ErrNotFound)
}
func TestUpsertEmptyDetailsRoundTripsAsEmptyObject(t *testing.T) {
ctx := context.Background()
store := newStore(t)
snapshot := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
snapshot.Details = nil
require.NoError(t, store.Upsert(ctx, snapshot))
got, err := store.Get(ctx, "game-001")
require.NoError(t, err)
assert.JSONEq(t, "{}", string(got.Details),
"empty json.RawMessage must round-trip as the SQL default {}, got %q",
string(got.Details))
}
func TestUpsertValidatesSnapshot(t *testing.T) {
ctx := context.Background()
store := newStore(t)
tests := []struct {
name string
mutate func(*health.HealthSnapshot)
}{
{"empty game id", func(s *health.HealthSnapshot) { s.GameID = "" }},
{"unknown status", func(s *health.HealthSnapshot) { s.Status = "exotic" }},
{"unknown source", func(s *health.HealthSnapshot) { s.Source = "exotic" }},
{"zero observed at", func(s *health.HealthSnapshot) { s.ObservedAt = time.Time{} }},
{"invalid json details", func(s *health.HealthSnapshot) {
s.Details = json.RawMessage("not json")
}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
snapshot := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
tt.mutate(&snapshot)
err := store.Upsert(ctx, snapshot)
require.Error(t, err)
})
}
}
func TestGetRejectsEmptyGameID(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.Get(ctx, "")
require.Error(t, err)
}
func TestNewRejectsNilDB(t *testing.T) {
_, err := healthsnapshotstore.New(healthsnapshotstore.Config{OperationTimeout: time.Second})
require.Error(t, err)
}
func TestNewRejectsNonPositiveTimeout(t *testing.T) {
_, err := healthsnapshotstore.New(healthsnapshotstore.Config{
DB: pgtest.Ensure(t).Pool(),
})
require.Error(t, err)
}
@@ -0,0 +1,209 @@
// Package pgtest exposes the testcontainers-backed PostgreSQL bootstrap
// shared by every Runtime Manager PG adapter test. The package is regular
// Go code — not a `_test.go` file — so it can be imported by the
// `_test.go` files in the three sibling store packages
// (`runtimerecordstore`, `operationlogstore`, `healthsnapshotstore`).
//
// No production code in `cmd/rtmanager` or in the runtime imports this
// package. The testcontainers-go dependency therefore stays out of the
// production binary's import graph.
package pgtest
import (
"context"
"database/sql"
"net/url"
"os"
"sync"
"testing"
"time"
"galaxy/postgres"
"galaxy/rtmanager/internal/adapters/postgres/migrations"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
)
const (
postgresImage = "postgres:16-alpine"
superUser = "galaxy"
superPassword = "galaxy"
superDatabase = "galaxy_rtmanager"
serviceRole = "rtmanagerservice"
servicePassword = "rtmanagerservice"
serviceSchema = "rtmanager"
containerStartup = 90 * time.Second
// OperationTimeout is the per-statement timeout used by every store
// constructed via the per-package newStore helpers. Tests may pass a
// smaller value if they need to assert deadline behaviour explicitly.
OperationTimeout = 10 * time.Second
)
// Env holds the per-process container plus the *sql.DB pool already
// provisioned with the rtmanager schema, role, and migrations applied.
type Env struct {
container *tcpostgres.PostgresContainer
pool *sql.DB
}
// Pool returns the shared pool. Tests truncate per-table state before
// each run via TruncateAll.
func (env *Env) Pool() *sql.DB { return env.pool }
var (
once sync.Once
cur *Env
curEr error
)
// Ensure starts the PostgreSQL container on first invocation and applies
// the embedded goose migrations. Subsequent invocations reuse the same
// container/pool. When Docker is unavailable Ensure calls t.Skip with the
// underlying error so the test suite still passes on machines without
// Docker.
func Ensure(t testing.TB) *Env {
t.Helper()
once.Do(func() {
cur, curEr = start()
})
if curEr != nil {
t.Skipf("postgres container start failed (Docker unavailable?): %v", curEr)
}
return cur
}
// TruncateAll wipes every Runtime Manager table inside the shared pool,
// leaving the schema and indexes intact. Use it from each test that needs
// a clean slate.
func TruncateAll(t testing.TB) {
t.Helper()
env := Ensure(t)
const stmt = `TRUNCATE TABLE runtime_records, operation_log, health_snapshots RESTART IDENTITY CASCADE`
if _, err := env.pool.ExecContext(context.Background(), stmt); err != nil {
t.Fatalf("truncate rtmanager tables: %v", err)
}
}
// Shutdown terminates the shared container and closes the pool. It is
// invoked from each test package's TestMain after `m.Run` returns so the
// container is released even if individual tests panic.
func Shutdown() {
if cur == nil {
return
}
if cur.pool != nil {
_ = cur.pool.Close()
}
if cur.container != nil {
_ = testcontainers.TerminateContainer(cur.container)
}
cur = nil
}
// RunMain is a convenience helper for each store package's TestMain: it
// runs the test main, captures the exit code, shuts the container down,
// and exits. Wiring it through one helper keeps every TestMain to two
// lines.
func RunMain(m *testing.M) {
code := m.Run()
Shutdown()
os.Exit(code)
}
func start() (*Env, error) {
ctx := context.Background()
container, err := tcpostgres.Run(ctx, postgresImage,
tcpostgres.WithDatabase(superDatabase),
tcpostgres.WithUsername(superUser),
tcpostgres.WithPassword(superPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(containerStartup),
),
)
if err != nil {
return nil, err
}
baseDSN, err := container.ConnectionString(ctx, "sslmode=disable")
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := provisionRoleAndSchema(ctx, baseDSN); err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
scopedDSN, err := dsnForServiceRole(baseDSN)
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
cfg := postgres.DefaultConfig()
cfg.PrimaryDSN = scopedDSN
cfg.OperationTimeout = OperationTimeout
pool, err := postgres.OpenPrimary(ctx, cfg)
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := postgres.Ping(ctx, pool, OperationTimeout); err != nil {
_ = pool.Close()
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := postgres.RunMigrations(ctx, pool, migrations.FS(), "."); err != nil {
_ = pool.Close()
_ = testcontainers.TerminateContainer(container)
return nil, err
}
return &Env{container: container, pool: pool}, nil
}
func provisionRoleAndSchema(ctx context.Context, baseDSN string) error {
cfg := postgres.DefaultConfig()
cfg.PrimaryDSN = baseDSN
cfg.OperationTimeout = OperationTimeout
db, err := postgres.OpenPrimary(ctx, cfg)
if err != nil {
return err
}
defer func() { _ = db.Close() }()
statements := []string{
`DO $$ BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'rtmanagerservice') THEN
CREATE ROLE rtmanagerservice LOGIN PASSWORD 'rtmanagerservice';
END IF;
END $$;`,
`CREATE SCHEMA IF NOT EXISTS rtmanager AUTHORIZATION rtmanagerservice;`,
`GRANT USAGE ON SCHEMA rtmanager TO rtmanagerservice;`,
}
for _, statement := range statements {
if _, err := db.ExecContext(ctx, statement); err != nil {
return err
}
}
return nil
}
func dsnForServiceRole(baseDSN string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := url.Values{}
values.Set("search_path", serviceSchema)
values.Set("sslmode", "disable")
scoped := url.URL{
Scheme: parsed.Scheme,
User: url.UserPassword(serviceRole, servicePassword),
Host: parsed.Host,
Path: parsed.Path,
RawQuery: values.Encode(),
}
return scoped.String(), nil
}
@@ -0,0 +1,112 @@
// Package sqlx contains the small set of helpers shared by every Runtime
// Manager PostgreSQL adapter (runtimerecordstore, operationlogstore,
// healthsnapshotstore). The helpers centralise the boundary translations
// for nullable timestamps and the pgx SQLSTATE codes the adapters
// interpret as domain conflicts.
package sqlx
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"github.com/jackc/pgx/v5/pgconn"
)
// PgUniqueViolationCode identifies the SQLSTATE returned by PostgreSQL
// when a UNIQUE constraint is violated by INSERT or UPDATE.
const PgUniqueViolationCode = "23505"
// IsUniqueViolation reports whether err is a PostgreSQL unique-violation,
// regardless of constraint name.
func IsUniqueViolation(err error) bool {
var pgErr *pgconn.PgError
if !errors.As(err, &pgErr) {
return false
}
return pgErr.Code == PgUniqueViolationCode
}
// IsNoRows reports whether err is sql.ErrNoRows.
func IsNoRows(err error) bool {
return errors.Is(err, sql.ErrNoRows)
}
// NullableTime returns t.UTC() when non-zero, otherwise nil so the column
// is bound as SQL NULL.
func NullableTime(t time.Time) any {
if t.IsZero() {
return nil
}
return t.UTC()
}
// NullableTimePtr returns t.UTC() when t is non-nil and non-zero, otherwise
// nil. Companion of NullableTime for domain types that use *time.Time to
// express absent timestamps.
func NullableTimePtr(t *time.Time) any {
if t == nil {
return nil
}
return NullableTime(*t)
}
// NullableString returns value when non-empty, otherwise nil so the column
// is bound as SQL NULL. Used for Runtime Manager columns that map empty
// domain strings to NULL (current_container_id, current_image_ref).
func NullableString(value string) any {
if value == "" {
return nil
}
return value
}
// StringFromNullable copies an optional sql.NullString into a domain
// string. NULL becomes the empty string, matching the Runtime Manager
// domain convention that empty == NULL for nullable text columns.
func StringFromNullable(value sql.NullString) string {
if !value.Valid {
return ""
}
return value.String
}
// TimeFromNullable copies an optional sql.NullTime into a domain
// time.Time, applying the global UTC normalisation rule. NULL values
// become the zero time.Time.
func TimeFromNullable(value sql.NullTime) time.Time {
if !value.Valid {
return time.Time{}
}
return value.Time.UTC()
}
// TimePtrFromNullable copies an optional sql.NullTime into a domain
// *time.Time. NULL becomes nil; non-NULL values are wrapped after UTC
// normalisation.
func TimePtrFromNullable(value sql.NullTime) *time.Time {
if !value.Valid {
return nil
}
t := value.Time.UTC()
return &t
}
// WithTimeout derives a child context bounded by timeout and prefixes
// context errors with operation. Callers must always invoke the returned
// cancel.
func WithTimeout(ctx context.Context, operation string, timeout time.Duration) (context.Context, context.CancelFunc, error) {
if ctx == nil {
return nil, nil, fmt.Errorf("%s: nil context", operation)
}
if err := ctx.Err(); err != nil {
return nil, nil, fmt.Errorf("%s: %w", operation, err)
}
if timeout <= 0 {
return nil, nil, fmt.Errorf("%s: operation timeout must be positive", operation)
}
bounded, cancel := context.WithTimeout(ctx, timeout)
return bounded, cancel, nil
}
@@ -0,0 +1,19 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type GooseDbVersion struct {
ID int32 `sql:"primary_key"`
VersionID int64
IsApplied bool
Tstamp time.Time
}
@@ -0,0 +1,21 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type HealthSnapshots struct {
GameID string `sql:"primary_key"`
ContainerID string
Status string
Source string
Details string
ObservedAt time.Time
}
@@ -0,0 +1,27 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type OperationLog struct {
ID int64 `sql:"primary_key"`
GameID string
OpKind string
OpSource string
SourceRef string
ImageRef string
ContainerID string
Outcome string
ErrorCode string
ErrorMessage string
StartedAt time.Time
FinishedAt *time.Time
}
@@ -0,0 +1,27 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type RuntimeRecords struct {
GameID string `sql:"primary_key"`
Status string
CurrentContainerID *string
CurrentImageRef *string
EngineEndpoint string
StatePath string
DockerNetwork string
StartedAt *time.Time
StoppedAt *time.Time
RemovedAt *time.Time
LastOpAt time.Time
CreatedAt time.Time
}
@@ -0,0 +1,87 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var GooseDbVersion = newGooseDbVersionTable("rtmanager", "goose_db_version", "")
type gooseDbVersionTable struct {
postgres.Table
// Columns
ID postgres.ColumnInteger
VersionID postgres.ColumnInteger
IsApplied postgres.ColumnBool
Tstamp postgres.ColumnTimestamp
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type GooseDbVersionTable struct {
gooseDbVersionTable
EXCLUDED gooseDbVersionTable
}
// AS creates new GooseDbVersionTable with assigned alias
func (a GooseDbVersionTable) AS(alias string) *GooseDbVersionTable {
return newGooseDbVersionTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new GooseDbVersionTable with assigned schema name
func (a GooseDbVersionTable) FromSchema(schemaName string) *GooseDbVersionTable {
return newGooseDbVersionTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new GooseDbVersionTable with assigned table prefix
func (a GooseDbVersionTable) WithPrefix(prefix string) *GooseDbVersionTable {
return newGooseDbVersionTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new GooseDbVersionTable with assigned table suffix
func (a GooseDbVersionTable) WithSuffix(suffix string) *GooseDbVersionTable {
return newGooseDbVersionTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newGooseDbVersionTable(schemaName, tableName, alias string) *GooseDbVersionTable {
return &GooseDbVersionTable{
gooseDbVersionTable: newGooseDbVersionTableImpl(schemaName, tableName, alias),
EXCLUDED: newGooseDbVersionTableImpl("", "excluded", ""),
}
}
func newGooseDbVersionTableImpl(schemaName, tableName, alias string) gooseDbVersionTable {
var (
IDColumn = postgres.IntegerColumn("id")
VersionIDColumn = postgres.IntegerColumn("version_id")
IsAppliedColumn = postgres.BoolColumn("is_applied")
TstampColumn = postgres.TimestampColumn("tstamp")
allColumns = postgres.ColumnList{IDColumn, VersionIDColumn, IsAppliedColumn, TstampColumn}
mutableColumns = postgres.ColumnList{VersionIDColumn, IsAppliedColumn, TstampColumn}
defaultColumns = postgres.ColumnList{TstampColumn}
)
return gooseDbVersionTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
ID: IDColumn,
VersionID: VersionIDColumn,
IsApplied: IsAppliedColumn,
Tstamp: TstampColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,93 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var HealthSnapshots = newHealthSnapshotsTable("rtmanager", "health_snapshots", "")
type healthSnapshotsTable struct {
postgres.Table
// Columns
GameID postgres.ColumnString
ContainerID postgres.ColumnString
Status postgres.ColumnString
Source postgres.ColumnString
Details postgres.ColumnString
ObservedAt postgres.ColumnTimestampz
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type HealthSnapshotsTable struct {
healthSnapshotsTable
EXCLUDED healthSnapshotsTable
}
// AS creates new HealthSnapshotsTable with assigned alias
func (a HealthSnapshotsTable) AS(alias string) *HealthSnapshotsTable {
return newHealthSnapshotsTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new HealthSnapshotsTable with assigned schema name
func (a HealthSnapshotsTable) FromSchema(schemaName string) *HealthSnapshotsTable {
return newHealthSnapshotsTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new HealthSnapshotsTable with assigned table prefix
func (a HealthSnapshotsTable) WithPrefix(prefix string) *HealthSnapshotsTable {
return newHealthSnapshotsTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new HealthSnapshotsTable with assigned table suffix
func (a HealthSnapshotsTable) WithSuffix(suffix string) *HealthSnapshotsTable {
return newHealthSnapshotsTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newHealthSnapshotsTable(schemaName, tableName, alias string) *HealthSnapshotsTable {
return &HealthSnapshotsTable{
healthSnapshotsTable: newHealthSnapshotsTableImpl(schemaName, tableName, alias),
EXCLUDED: newHealthSnapshotsTableImpl("", "excluded", ""),
}
}
func newHealthSnapshotsTableImpl(schemaName, tableName, alias string) healthSnapshotsTable {
var (
GameIDColumn = postgres.StringColumn("game_id")
ContainerIDColumn = postgres.StringColumn("container_id")
StatusColumn = postgres.StringColumn("status")
SourceColumn = postgres.StringColumn("source")
DetailsColumn = postgres.StringColumn("details")
ObservedAtColumn = postgres.TimestampzColumn("observed_at")
allColumns = postgres.ColumnList{GameIDColumn, ContainerIDColumn, StatusColumn, SourceColumn, DetailsColumn, ObservedAtColumn}
mutableColumns = postgres.ColumnList{ContainerIDColumn, StatusColumn, SourceColumn, DetailsColumn, ObservedAtColumn}
defaultColumns = postgres.ColumnList{ContainerIDColumn, DetailsColumn}
)
return healthSnapshotsTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
GameID: GameIDColumn,
ContainerID: ContainerIDColumn,
Status: StatusColumn,
Source: SourceColumn,
Details: DetailsColumn,
ObservedAt: ObservedAtColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,111 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var OperationLog = newOperationLogTable("rtmanager", "operation_log", "")
type operationLogTable struct {
postgres.Table
// Columns
ID postgres.ColumnInteger
GameID postgres.ColumnString
OpKind postgres.ColumnString
OpSource postgres.ColumnString
SourceRef postgres.ColumnString
ImageRef postgres.ColumnString
ContainerID postgres.ColumnString
Outcome postgres.ColumnString
ErrorCode postgres.ColumnString
ErrorMessage postgres.ColumnString
StartedAt postgres.ColumnTimestampz
FinishedAt postgres.ColumnTimestampz
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type OperationLogTable struct {
operationLogTable
EXCLUDED operationLogTable
}
// AS creates new OperationLogTable with assigned alias
func (a OperationLogTable) AS(alias string) *OperationLogTable {
return newOperationLogTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new OperationLogTable with assigned schema name
func (a OperationLogTable) FromSchema(schemaName string) *OperationLogTable {
return newOperationLogTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new OperationLogTable with assigned table prefix
func (a OperationLogTable) WithPrefix(prefix string) *OperationLogTable {
return newOperationLogTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new OperationLogTable with assigned table suffix
func (a OperationLogTable) WithSuffix(suffix string) *OperationLogTable {
return newOperationLogTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newOperationLogTable(schemaName, tableName, alias string) *OperationLogTable {
return &OperationLogTable{
operationLogTable: newOperationLogTableImpl(schemaName, tableName, alias),
EXCLUDED: newOperationLogTableImpl("", "excluded", ""),
}
}
func newOperationLogTableImpl(schemaName, tableName, alias string) operationLogTable {
var (
IDColumn = postgres.IntegerColumn("id")
GameIDColumn = postgres.StringColumn("game_id")
OpKindColumn = postgres.StringColumn("op_kind")
OpSourceColumn = postgres.StringColumn("op_source")
SourceRefColumn = postgres.StringColumn("source_ref")
ImageRefColumn = postgres.StringColumn("image_ref")
ContainerIDColumn = postgres.StringColumn("container_id")
OutcomeColumn = postgres.StringColumn("outcome")
ErrorCodeColumn = postgres.StringColumn("error_code")
ErrorMessageColumn = postgres.StringColumn("error_message")
StartedAtColumn = postgres.TimestampzColumn("started_at")
FinishedAtColumn = postgres.TimestampzColumn("finished_at")
allColumns = postgres.ColumnList{IDColumn, GameIDColumn, OpKindColumn, OpSourceColumn, SourceRefColumn, ImageRefColumn, ContainerIDColumn, OutcomeColumn, ErrorCodeColumn, ErrorMessageColumn, StartedAtColumn, FinishedAtColumn}
mutableColumns = postgres.ColumnList{GameIDColumn, OpKindColumn, OpSourceColumn, SourceRefColumn, ImageRefColumn, ContainerIDColumn, OutcomeColumn, ErrorCodeColumn, ErrorMessageColumn, StartedAtColumn, FinishedAtColumn}
defaultColumns = postgres.ColumnList{IDColumn, SourceRefColumn, ImageRefColumn, ContainerIDColumn, ErrorCodeColumn, ErrorMessageColumn}
)
return operationLogTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
ID: IDColumn,
GameID: GameIDColumn,
OpKind: OpKindColumn,
OpSource: OpSourceColumn,
SourceRef: SourceRefColumn,
ImageRef: ImageRefColumn,
ContainerID: ContainerIDColumn,
Outcome: OutcomeColumn,
ErrorCode: ErrorCodeColumn,
ErrorMessage: ErrorMessageColumn,
StartedAt: StartedAtColumn,
FinishedAt: FinishedAtColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,111 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var RuntimeRecords = newRuntimeRecordsTable("rtmanager", "runtime_records", "")
type runtimeRecordsTable struct {
postgres.Table
// Columns
GameID postgres.ColumnString
Status postgres.ColumnString
CurrentContainerID postgres.ColumnString
CurrentImageRef postgres.ColumnString
EngineEndpoint postgres.ColumnString
StatePath postgres.ColumnString
DockerNetwork postgres.ColumnString
StartedAt postgres.ColumnTimestampz
StoppedAt postgres.ColumnTimestampz
RemovedAt postgres.ColumnTimestampz
LastOpAt postgres.ColumnTimestampz
CreatedAt postgres.ColumnTimestampz
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type RuntimeRecordsTable struct {
runtimeRecordsTable
EXCLUDED runtimeRecordsTable
}
// AS creates new RuntimeRecordsTable with assigned alias
func (a RuntimeRecordsTable) AS(alias string) *RuntimeRecordsTable {
return newRuntimeRecordsTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new RuntimeRecordsTable with assigned schema name
func (a RuntimeRecordsTable) FromSchema(schemaName string) *RuntimeRecordsTable {
return newRuntimeRecordsTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new RuntimeRecordsTable with assigned table prefix
func (a RuntimeRecordsTable) WithPrefix(prefix string) *RuntimeRecordsTable {
return newRuntimeRecordsTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new RuntimeRecordsTable with assigned table suffix
func (a RuntimeRecordsTable) WithSuffix(suffix string) *RuntimeRecordsTable {
return newRuntimeRecordsTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newRuntimeRecordsTable(schemaName, tableName, alias string) *RuntimeRecordsTable {
return &RuntimeRecordsTable{
runtimeRecordsTable: newRuntimeRecordsTableImpl(schemaName, tableName, alias),
EXCLUDED: newRuntimeRecordsTableImpl("", "excluded", ""),
}
}
func newRuntimeRecordsTableImpl(schemaName, tableName, alias string) runtimeRecordsTable {
var (
GameIDColumn = postgres.StringColumn("game_id")
StatusColumn = postgres.StringColumn("status")
CurrentContainerIDColumn = postgres.StringColumn("current_container_id")
CurrentImageRefColumn = postgres.StringColumn("current_image_ref")
EngineEndpointColumn = postgres.StringColumn("engine_endpoint")
StatePathColumn = postgres.StringColumn("state_path")
DockerNetworkColumn = postgres.StringColumn("docker_network")
StartedAtColumn = postgres.TimestampzColumn("started_at")
StoppedAtColumn = postgres.TimestampzColumn("stopped_at")
RemovedAtColumn = postgres.TimestampzColumn("removed_at")
LastOpAtColumn = postgres.TimestampzColumn("last_op_at")
CreatedAtColumn = postgres.TimestampzColumn("created_at")
allColumns = postgres.ColumnList{GameIDColumn, StatusColumn, CurrentContainerIDColumn, CurrentImageRefColumn, EngineEndpointColumn, StatePathColumn, DockerNetworkColumn, StartedAtColumn, StoppedAtColumn, RemovedAtColumn, LastOpAtColumn, CreatedAtColumn}
mutableColumns = postgres.ColumnList{StatusColumn, CurrentContainerIDColumn, CurrentImageRefColumn, EngineEndpointColumn, StatePathColumn, DockerNetworkColumn, StartedAtColumn, StoppedAtColumn, RemovedAtColumn, LastOpAtColumn, CreatedAtColumn}
defaultColumns = postgres.ColumnList{}
)
return runtimeRecordsTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
GameID: GameIDColumn,
Status: StatusColumn,
CurrentContainerID: CurrentContainerIDColumn,
CurrentImageRef: CurrentImageRefColumn,
EngineEndpoint: EngineEndpointColumn,
StatePath: StatePathColumn,
DockerNetwork: DockerNetworkColumn,
StartedAt: StartedAtColumn,
StoppedAt: StoppedAtColumn,
RemovedAt: RemovedAtColumn,
LastOpAt: LastOpAtColumn,
CreatedAt: CreatedAtColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,17 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
// UseSchema sets a new schema name for all generated table SQL builder types. It is recommended to invoke
// this method only once at the beginning of the program.
func UseSchema(schema string) {
GooseDbVersion = GooseDbVersion.FromSchema(schema)
HealthSnapshots = HealthSnapshots.FromSchema(schema)
OperationLog = OperationLog.FromSchema(schema)
RuntimeRecords = RuntimeRecords.FromSchema(schema)
}
@@ -0,0 +1,106 @@
-- +goose Up
-- Initial Runtime Manager PostgreSQL schema.
--
-- Three tables cover the durable surface of the service:
-- * runtime_records — one row per game with the latest known runtime
-- status and Docker container binding;
-- * operation_log — append-only audit of every start/stop/restart/
-- patch/cleanup/reconcile_* operation RTM performed;
-- * health_snapshots — latest technical health observation per game.
--
-- Schema and the matching `rtmanagerservice` role are provisioned
-- outside this script (in tests via cmd/jetgen/main.go::provisionRoleAndSchema;
-- in production via an ops init script). This migration runs as the
-- schema owner with `search_path=rtmanager` and only contains DDL for the
-- service-owned tables and indexes. ARCHITECTURE.md §Database topology
-- mandates that the per-service role's grants stay restricted to its own
-- schema; consequently this file deliberately deviates from PLAN.md
-- Stage 09's literal `CREATE SCHEMA IF NOT EXISTS rtmanager;` instruction.
-- runtime_records holds one durable record per game with the latest
-- known runtime status and Docker container binding. The status enum
-- (running | stopped | removed) is enforced by a CHECK so domain code
-- can rely on it without reading every callsite. The (status, last_op_at)
-- index drives the periodic container-cleanup worker that scans
-- `status='stopped' AND last_op_at < now() - retention`.
CREATE TABLE runtime_records (
game_id text PRIMARY KEY,
status text NOT NULL,
current_container_id text,
current_image_ref text,
engine_endpoint text NOT NULL,
state_path text NOT NULL,
docker_network text NOT NULL,
started_at timestamptz,
stopped_at timestamptz,
removed_at timestamptz,
last_op_at timestamptz NOT NULL,
created_at timestamptz NOT NULL,
CONSTRAINT runtime_records_status_chk
CHECK (status IN ('running', 'stopped', 'removed'))
);
CREATE INDEX runtime_records_status_last_op_idx
ON runtime_records (status, last_op_at);
-- operation_log is an append-only audit of every operation Runtime
-- Manager performed against a game's runtime. The (game_id, started_at
-- DESC) index drives audit reads from the GM/Admin REST surface;
-- finished_at is nullable for in-flight rows even though Stage 13+
-- always finalises the row in the same transaction. The op_kind /
-- op_source / outcome enums are enforced by CHECK constraints to keep
-- the audit schema honest without a separate Go validator.
CREATE TABLE operation_log (
id bigserial PRIMARY KEY,
game_id text NOT NULL,
op_kind text NOT NULL,
op_source text NOT NULL,
source_ref text NOT NULL DEFAULT '',
image_ref text NOT NULL DEFAULT '',
container_id text NOT NULL DEFAULT '',
outcome text NOT NULL,
error_code text NOT NULL DEFAULT '',
error_message text NOT NULL DEFAULT '',
started_at timestamptz NOT NULL,
finished_at timestamptz,
CONSTRAINT operation_log_op_kind_chk
CHECK (op_kind IN (
'start', 'stop', 'restart', 'patch',
'cleanup_container', 'reconcile_adopt', 'reconcile_dispose'
)),
CONSTRAINT operation_log_op_source_chk
CHECK (op_source IN (
'lobby_stream', 'gm_rest', 'admin_rest',
'auto_ttl', 'auto_reconcile'
)),
CONSTRAINT operation_log_outcome_chk
CHECK (outcome IN ('success', 'failure'))
);
CREATE INDEX operation_log_game_started_idx
ON operation_log (game_id, started_at DESC);
-- health_snapshots stores the latest technical health observation per
-- game. One row per game; later observations overwrite. The status enum
-- mirrors the `event_type` vocabulary on `runtime:health_events`
-- (collapsed to a flat status column for the latest-observation view).
CREATE TABLE health_snapshots (
game_id text PRIMARY KEY,
container_id text NOT NULL DEFAULT '',
status text NOT NULL,
source text NOT NULL,
details jsonb NOT NULL DEFAULT '{}'::jsonb,
observed_at timestamptz NOT NULL,
CONSTRAINT health_snapshots_status_chk
CHECK (status IN (
'healthy', 'probe_failed', 'exited',
'oom', 'inspect_unhealthy', 'container_disappeared'
)),
CONSTRAINT health_snapshots_source_chk
CHECK (source IN ('docker_event', 'inspect', 'probe'))
);
-- +goose Down
DROP TABLE IF EXISTS health_snapshots;
DROP TABLE IF EXISTS operation_log;
DROP TABLE IF EXISTS runtime_records;
@@ -0,0 +1,19 @@
// Package migrations exposes the embedded goose migration files used by
// Runtime Manager to provision its `rtmanager` schema in PostgreSQL.
//
// The embedded filesystem is consumed by `pkg/postgres.RunMigrations`
// during rtmanager-service startup and by `cmd/jetgen` when regenerating
// the `internal/adapters/postgres/jet/` code against a transient
// PostgreSQL instance.
package migrations
import "embed"
//go:embed *.sql
var fs embed.FS
// FS returns the embedded filesystem containing every numbered goose
// migration shipped with Runtime Manager.
func FS() embed.FS {
return fs
}
@@ -0,0 +1,235 @@
// Package operationlogstore implements the PostgreSQL-backed adapter for
// `ports.OperationLogStore`.
//
// The package owns the on-disk shape of the `operation_log` table defined
// in
// `galaxy/rtmanager/internal/adapters/postgres/migrations/00001_init.sql`
// and translates the schema-agnostic `ports.OperationLogStore` interface
// declared in `internal/ports/operationlogstore.go` into concrete
// go-jet/v2 statements driven by the pgx driver.
//
// Append uses `INSERT ... RETURNING id` to surface the bigserial id back
// to callers; ListByGame is index-driven by `operation_log_game_started_idx`.
package operationlogstore
import (
"context"
"database/sql"
"errors"
"fmt"
"strings"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/sqlx"
pgtable "galaxy/rtmanager/internal/adapters/postgres/jet/rtmanager/table"
"galaxy/rtmanager/internal/domain/operation"
"galaxy/rtmanager/internal/ports"
pg "github.com/go-jet/jet/v2/postgres"
)
// Config configures one PostgreSQL-backed operation-log store instance.
type Config struct {
// DB stores the connection pool the store uses for every query.
DB *sql.DB
// OperationTimeout bounds one round trip.
OperationTimeout time.Duration
}
// Store persists Runtime Manager operation-log entries in PostgreSQL.
type Store struct {
db *sql.DB
operationTimeout time.Duration
}
// New constructs one PostgreSQL-backed operation-log store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.DB == nil {
return nil, errors.New("new postgres operation log store: db must not be nil")
}
if cfg.OperationTimeout <= 0 {
return nil, errors.New("new postgres operation log store: operation timeout must be positive")
}
return &Store{
db: cfg.DB,
operationTimeout: cfg.OperationTimeout,
}, nil
}
// operationLogSelectColumns is the canonical SELECT list for the
// operation_log table, matching scanEntry's column order.
var operationLogSelectColumns = pg.ColumnList{
pgtable.OperationLog.ID,
pgtable.OperationLog.GameID,
pgtable.OperationLog.OpKind,
pgtable.OperationLog.OpSource,
pgtable.OperationLog.SourceRef,
pgtable.OperationLog.ImageRef,
pgtable.OperationLog.ContainerID,
pgtable.OperationLog.Outcome,
pgtable.OperationLog.ErrorCode,
pgtable.OperationLog.ErrorMessage,
pgtable.OperationLog.StartedAt,
pgtable.OperationLog.FinishedAt,
}
// Append inserts entry into the operation log and returns the generated
// bigserial id. entry is validated through operation.OperationEntry.Validate
// before the SQL is issued.
func (store *Store) Append(ctx context.Context, entry operation.OperationEntry) (int64, error) {
if store == nil || store.db == nil {
return 0, errors.New("append operation log entry: nil store")
}
if err := entry.Validate(); err != nil {
return 0, fmt.Errorf("append operation log entry: %w", err)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "append operation log entry", store.operationTimeout)
if err != nil {
return 0, err
}
defer cancel()
stmt := pgtable.OperationLog.INSERT(
pgtable.OperationLog.GameID,
pgtable.OperationLog.OpKind,
pgtable.OperationLog.OpSource,
pgtable.OperationLog.SourceRef,
pgtable.OperationLog.ImageRef,
pgtable.OperationLog.ContainerID,
pgtable.OperationLog.Outcome,
pgtable.OperationLog.ErrorCode,
pgtable.OperationLog.ErrorMessage,
pgtable.OperationLog.StartedAt,
pgtable.OperationLog.FinishedAt,
).VALUES(
entry.GameID,
string(entry.OpKind),
string(entry.OpSource),
entry.SourceRef,
entry.ImageRef,
entry.ContainerID,
string(entry.Outcome),
entry.ErrorCode,
entry.ErrorMessage,
entry.StartedAt.UTC(),
sqlx.NullableTimePtr(entry.FinishedAt),
).RETURNING(pgtable.OperationLog.ID)
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
var id int64
if err := row.Scan(&id); err != nil {
return 0, fmt.Errorf("append operation log entry: %w", err)
}
return id, nil
}
// ListByGame returns the most recent entries for gameID, ordered by
// started_at descending and capped by limit. The (game_id,
// started_at DESC) index drives the read.
func (store *Store) ListByGame(ctx context.Context, gameID string, limit int) ([]operation.OperationEntry, error) {
if store == nil || store.db == nil {
return nil, errors.New("list operation log entries by game: nil store")
}
if strings.TrimSpace(gameID) == "" {
return nil, fmt.Errorf("list operation log entries by game: game id must not be empty")
}
if limit <= 0 {
return nil, fmt.Errorf("list operation log entries by game: limit must be positive, got %d", limit)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "list operation log entries by game", store.operationTimeout)
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(operationLogSelectColumns).
FROM(pgtable.OperationLog).
WHERE(pgtable.OperationLog.GameID.EQ(pg.String(gameID))).
ORDER_BY(pgtable.OperationLog.StartedAt.DESC(), pgtable.OperationLog.ID.DESC()).
LIMIT(int64(limit))
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("list operation log entries by game: %w", err)
}
defer rows.Close()
entries := make([]operation.OperationEntry, 0)
for rows.Next() {
entry, err := scanEntry(rows)
if err != nil {
return nil, fmt.Errorf("list operation log entries by game: scan: %w", err)
}
entries = append(entries, entry)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("list operation log entries by game: %w", err)
}
if len(entries) == 0 {
return nil, nil
}
return entries, nil
}
// rowScanner abstracts *sql.Row and *sql.Rows so scanEntry can be shared
// across both single-row reads and iterated reads.
type rowScanner interface {
Scan(dest ...any) error
}
// scanEntry scans one operation_log row from rs.
func scanEntry(rs rowScanner) (operation.OperationEntry, error) {
var (
id int64
gameID string
opKind string
opSource string
sourceRef string
imageRef string
containerID string
outcome string
errorCode string
errorMessage string
startedAt time.Time
finishedAt sql.NullTime
)
if err := rs.Scan(
&id,
&gameID,
&opKind,
&opSource,
&sourceRef,
&imageRef,
&containerID,
&outcome,
&errorCode,
&errorMessage,
&startedAt,
&finishedAt,
); err != nil {
return operation.OperationEntry{}, err
}
return operation.OperationEntry{
ID: id,
GameID: gameID,
OpKind: operation.OpKind(opKind),
OpSource: operation.OpSource(opSource),
SourceRef: sourceRef,
ImageRef: imageRef,
ContainerID: containerID,
Outcome: operation.Outcome(outcome),
ErrorCode: errorCode,
ErrorMessage: errorMessage,
StartedAt: startedAt.UTC(),
FinishedAt: sqlx.TimePtrFromNullable(finishedAt),
}, nil
}
// Ensure Store satisfies the ports.OperationLogStore interface at compile
// time.
var _ ports.OperationLogStore = (*Store)(nil)
@@ -0,0 +1,207 @@
package operationlogstore_test
import (
"context"
"testing"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/pgtest"
"galaxy/rtmanager/internal/adapters/postgres/operationlogstore"
"galaxy/rtmanager/internal/domain/operation"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) { pgtest.RunMain(m) }
func newStore(t *testing.T) *operationlogstore.Store {
t.Helper()
pgtest.TruncateAll(t)
store, err := operationlogstore.New(operationlogstore.Config{
DB: pgtest.Ensure(t).Pool(),
OperationTimeout: pgtest.OperationTimeout,
})
require.NoError(t, err)
return store
}
func successStartEntry(gameID string, startedAt time.Time, sourceRef string) operation.OperationEntry {
finishedAt := startedAt.Add(time.Second)
return operation.OperationEntry{
GameID: gameID,
OpKind: operation.OpKindStart,
OpSource: operation.OpSourceLobbyStream,
SourceRef: sourceRef,
ImageRef: "galaxy/game:v1.2.3",
ContainerID: "container-1",
Outcome: operation.OutcomeSuccess,
StartedAt: startedAt,
FinishedAt: &finishedAt,
}
}
func TestAppendReturnsPositiveIDs(t *testing.T) {
ctx := context.Background()
store := newStore(t)
startedAt := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
id1, err := store.Append(ctx, successStartEntry("game-001", startedAt, "1700000000000-0"))
require.NoError(t, err)
assert.Greater(t, id1, int64(0))
id2, err := store.Append(ctx, successStartEntry("game-001", startedAt.Add(time.Minute), "1700000000001-0"))
require.NoError(t, err)
assert.Greater(t, id2, id1)
}
func TestAppendValidatesEntry(t *testing.T) {
ctx := context.Background()
store := newStore(t)
tests := []struct {
name string
mutate func(*operation.OperationEntry)
}{
{"empty game id", func(e *operation.OperationEntry) { e.GameID = "" }},
{"unknown op kind", func(e *operation.OperationEntry) { e.OpKind = "exotic" }},
{"unknown op source", func(e *operation.OperationEntry) { e.OpSource = "exotic" }},
{"unknown outcome", func(e *operation.OperationEntry) { e.Outcome = "exotic" }},
{"zero started at", func(e *operation.OperationEntry) { e.StartedAt = time.Time{} }},
{"failure without error code", func(e *operation.OperationEntry) {
e.Outcome = operation.OutcomeFailure
e.ErrorCode = ""
}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
entry := successStartEntry("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC), "ref")
tt.mutate(&entry)
_, err := store.Append(ctx, entry)
require.Error(t, err)
})
}
}
func TestListByGameReturnsEntriesNewestFirst(t *testing.T) {
ctx := context.Background()
store := newStore(t)
base := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
for index := range 3 {
_, err := store.Append(ctx, successStartEntry("game-001",
base.Add(time.Duration(index)*time.Minute),
"ref-game-001-"))
require.NoError(t, err)
}
// Foreign-game entry must not appear in the list.
_, err := store.Append(ctx, successStartEntry("game-other", base, "ref-other"))
require.NoError(t, err)
entries, err := store.ListByGame(ctx, "game-001", 10)
require.NoError(t, err)
require.Len(t, entries, 3)
for index := range 2 {
assert.True(t,
!entries[index].StartedAt.Before(entries[index+1].StartedAt),
"entries must be ordered started_at DESC; got %s before %s",
entries[index].StartedAt, entries[index+1].StartedAt,
)
}
}
func TestListByGameRespectsLimit(t *testing.T) {
ctx := context.Background()
store := newStore(t)
base := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
for index := range 5 {
_, err := store.Append(ctx, successStartEntry("game-001",
base.Add(time.Duration(index)*time.Minute), "ref"))
require.NoError(t, err)
}
entries, err := store.ListByGame(ctx, "game-001", 2)
require.NoError(t, err)
require.Len(t, entries, 2)
}
func TestListByGameReturnsEmptyForUnknownGame(t *testing.T) {
ctx := context.Background()
store := newStore(t)
entries, err := store.ListByGame(ctx, "game-missing", 10)
require.NoError(t, err)
assert.Empty(t, entries)
}
func TestListByGameRejectsInvalidArgs(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.ListByGame(ctx, "", 10)
require.Error(t, err)
_, err = store.ListByGame(ctx, "game-001", 0)
require.Error(t, err)
_, err = store.ListByGame(ctx, "game-001", -3)
require.Error(t, err)
}
func TestAppendRoundTripsAllFields(t *testing.T) {
ctx := context.Background()
store := newStore(t)
startedAt := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
finishedAt := startedAt.Add(2 * time.Second)
original := operation.OperationEntry{
GameID: "game-001",
OpKind: operation.OpKindStop,
OpSource: operation.OpSourceGMRest,
SourceRef: "request-7",
ImageRef: "galaxy/game:v2.0.0",
ContainerID: "container-X",
Outcome: operation.OutcomeFailure,
ErrorCode: "container_start_failed",
ErrorMessage: "stop deadline exceeded",
StartedAt: startedAt,
FinishedAt: &finishedAt,
}
id, err := store.Append(ctx, original)
require.NoError(t, err)
entries, err := store.ListByGame(ctx, "game-001", 10)
require.NoError(t, err)
require.Len(t, entries, 1)
got := entries[0]
assert.Equal(t, id, got.ID)
assert.Equal(t, original.GameID, got.GameID)
assert.Equal(t, original.OpKind, got.OpKind)
assert.Equal(t, original.OpSource, got.OpSource)
assert.Equal(t, original.SourceRef, got.SourceRef)
assert.Equal(t, original.ImageRef, got.ImageRef)
assert.Equal(t, original.ContainerID, got.ContainerID)
assert.Equal(t, original.Outcome, got.Outcome)
assert.Equal(t, original.ErrorCode, got.ErrorCode)
assert.Equal(t, original.ErrorMessage, got.ErrorMessage)
assert.True(t, original.StartedAt.Equal(got.StartedAt))
require.NotNil(t, got.FinishedAt)
assert.True(t, original.FinishedAt.Equal(*got.FinishedAt))
assert.Equal(t, time.UTC, got.StartedAt.Location())
assert.Equal(t, time.UTC, got.FinishedAt.Location())
}
func TestNewRejectsNilDB(t *testing.T) {
_, err := operationlogstore.New(operationlogstore.Config{OperationTimeout: time.Second})
require.Error(t, err)
}
func TestNewRejectsNonPositiveTimeout(t *testing.T) {
_, err := operationlogstore.New(operationlogstore.Config{
DB: pgtest.Ensure(t).Pool(),
})
require.Error(t, err)
}
@@ -0,0 +1,500 @@
// Package runtimerecordstore implements the PostgreSQL-backed adapter for
// `ports.RuntimeRecordStore`.
//
// The package owns the on-disk shape of the `runtime_records` table
// defined in
// `galaxy/rtmanager/internal/adapters/postgres/migrations/00001_init.sql`
// and translates the schema-agnostic `ports.RuntimeRecordStore` interface
// declared in `internal/ports/runtimerecordstore.go` into concrete
// go-jet/v2 statements driven by the pgx driver.
//
// Lifecycle transitions (UpdateStatus) use compare-and-swap on
// `(status, current_container_id)` rather than holding a SELECT ... FOR
// UPDATE lock across the caller's logic, mirroring the pattern used by
// `lobby/internal/adapters/postgres/gamestore`.
package runtimerecordstore
import (
"context"
"database/sql"
"errors"
"fmt"
"strings"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/sqlx"
pgtable "galaxy/rtmanager/internal/adapters/postgres/jet/rtmanager/table"
"galaxy/rtmanager/internal/domain/runtime"
"galaxy/rtmanager/internal/ports"
pg "github.com/go-jet/jet/v2/postgres"
)
// Config configures one PostgreSQL-backed runtime-record store instance.
// The store does not own the underlying *sql.DB lifecycle: the caller
// (typically the service runtime) opens, instruments, migrates, and
// closes the pool.
type Config struct {
// DB stores the connection pool the store uses for every query.
DB *sql.DB
// OperationTimeout bounds one round trip. The store creates a
// derived context for each operation so callers cannot starve the
// pool with an unbounded ctx.
OperationTimeout time.Duration
}
// Store persists Runtime Manager runtime records in PostgreSQL.
type Store struct {
db *sql.DB
operationTimeout time.Duration
}
// New constructs one PostgreSQL-backed runtime-record store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.DB == nil {
return nil, errors.New("new postgres runtime record store: db must not be nil")
}
if cfg.OperationTimeout <= 0 {
return nil, errors.New("new postgres runtime record store: operation timeout must be positive")
}
return &Store{
db: cfg.DB,
operationTimeout: cfg.OperationTimeout,
}, nil
}
// runtimeSelectColumns is the canonical SELECT list for the runtime_records
// table, matching scanRecord's column order.
var runtimeSelectColumns = pg.ColumnList{
pgtable.RuntimeRecords.GameID,
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.CurrentContainerID,
pgtable.RuntimeRecords.CurrentImageRef,
pgtable.RuntimeRecords.EngineEndpoint,
pgtable.RuntimeRecords.StatePath,
pgtable.RuntimeRecords.DockerNetwork,
pgtable.RuntimeRecords.StartedAt,
pgtable.RuntimeRecords.StoppedAt,
pgtable.RuntimeRecords.RemovedAt,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.CreatedAt,
}
// Get returns the record identified by gameID. It returns
// runtime.ErrNotFound when no record exists.
func (store *Store) Get(ctx context.Context, gameID string) (runtime.RuntimeRecord, error) {
if store == nil || store.db == nil {
return runtime.RuntimeRecord{}, errors.New("get runtime record: nil store")
}
if strings.TrimSpace(gameID) == "" {
return runtime.RuntimeRecord{}, fmt.Errorf("get runtime record: game id must not be empty")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "get runtime record", store.operationTimeout)
if err != nil {
return runtime.RuntimeRecord{}, err
}
defer cancel()
stmt := pg.SELECT(runtimeSelectColumns).
FROM(pgtable.RuntimeRecords).
WHERE(pgtable.RuntimeRecords.GameID.EQ(pg.String(gameID)))
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
record, err := scanRecord(row)
if sqlx.IsNoRows(err) {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
if err != nil {
return runtime.RuntimeRecord{}, fmt.Errorf("get runtime record: %w", err)
}
return record, nil
}
// Upsert inserts record when no row exists for record.GameID and
// otherwise overwrites every mutable column verbatim. created_at is
// preserved across upserts so the "first time RTM saw the game"
// timestamp stays stable.
func (store *Store) Upsert(ctx context.Context, record runtime.RuntimeRecord) error {
if store == nil || store.db == nil {
return errors.New("upsert runtime record: nil store")
}
if err := record.Validate(); err != nil {
return fmt.Errorf("upsert runtime record: %w", err)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "upsert runtime record", store.operationTimeout)
if err != nil {
return err
}
defer cancel()
stmt := pgtable.RuntimeRecords.INSERT(
pgtable.RuntimeRecords.GameID,
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.CurrentContainerID,
pgtable.RuntimeRecords.CurrentImageRef,
pgtable.RuntimeRecords.EngineEndpoint,
pgtable.RuntimeRecords.StatePath,
pgtable.RuntimeRecords.DockerNetwork,
pgtable.RuntimeRecords.StartedAt,
pgtable.RuntimeRecords.StoppedAt,
pgtable.RuntimeRecords.RemovedAt,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.CreatedAt,
).VALUES(
record.GameID,
string(record.Status),
sqlx.NullableString(record.CurrentContainerID),
sqlx.NullableString(record.CurrentImageRef),
record.EngineEndpoint,
record.StatePath,
record.DockerNetwork,
sqlx.NullableTimePtr(record.StartedAt),
sqlx.NullableTimePtr(record.StoppedAt),
sqlx.NullableTimePtr(record.RemovedAt),
record.LastOpAt.UTC(),
record.CreatedAt.UTC(),
).ON_CONFLICT(pgtable.RuntimeRecords.GameID).DO_UPDATE(
pg.SET(
pgtable.RuntimeRecords.Status.SET(pgtable.RuntimeRecords.EXCLUDED.Status),
pgtable.RuntimeRecords.CurrentContainerID.SET(pgtable.RuntimeRecords.EXCLUDED.CurrentContainerID),
pgtable.RuntimeRecords.CurrentImageRef.SET(pgtable.RuntimeRecords.EXCLUDED.CurrentImageRef),
pgtable.RuntimeRecords.EngineEndpoint.SET(pgtable.RuntimeRecords.EXCLUDED.EngineEndpoint),
pgtable.RuntimeRecords.StatePath.SET(pgtable.RuntimeRecords.EXCLUDED.StatePath),
pgtable.RuntimeRecords.DockerNetwork.SET(pgtable.RuntimeRecords.EXCLUDED.DockerNetwork),
pgtable.RuntimeRecords.StartedAt.SET(pgtable.RuntimeRecords.EXCLUDED.StartedAt),
pgtable.RuntimeRecords.StoppedAt.SET(pgtable.RuntimeRecords.EXCLUDED.StoppedAt),
pgtable.RuntimeRecords.RemovedAt.SET(pgtable.RuntimeRecords.EXCLUDED.RemovedAt),
pgtable.RuntimeRecords.LastOpAt.SET(pgtable.RuntimeRecords.EXCLUDED.LastOpAt),
),
)
query, args := stmt.Sql()
if _, err := store.db.ExecContext(operationCtx, query, args...); err != nil {
return fmt.Errorf("upsert runtime record: %w", err)
}
return nil
}
// UpdateStatus applies one status transition with a compare-and-swap
// guard on (status, current_container_id). Validate is invoked before
// any SQL touch.
func (store *Store) UpdateStatus(ctx context.Context, input ports.UpdateStatusInput) error {
if store == nil || store.db == nil {
return errors.New("update runtime status: nil store")
}
if err := input.Validate(); err != nil {
return err
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "update runtime status", store.operationTimeout)
if err != nil {
return err
}
defer cancel()
now := input.Now.UTC()
stmt, err := buildUpdateStatusStatement(input, now)
if err != nil {
return err
}
query, args := stmt.Sql()
result, err := store.db.ExecContext(operationCtx, query, args...)
if err != nil {
return fmt.Errorf("update runtime status: %w", err)
}
affected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("update runtime status: rows affected: %w", err)
}
if affected == 0 {
return store.classifyMissingUpdate(operationCtx, input.GameID)
}
return nil
}
// classifyMissingUpdate distinguishes ErrNotFound from ErrConflict after
// an UPDATE that affected zero rows. A row that is absent yields
// ErrNotFound; a row whose status or container_id does not match the
// CAS predicate yields ErrConflict.
func (store *Store) classifyMissingUpdate(ctx context.Context, gameID string) error {
probe := pg.SELECT(pgtable.RuntimeRecords.Status).
FROM(pgtable.RuntimeRecords).
WHERE(pgtable.RuntimeRecords.GameID.EQ(pg.String(gameID)))
probeQuery, probeArgs := probe.Sql()
var current string
row := store.db.QueryRowContext(ctx, probeQuery, probeArgs...)
if err := row.Scan(&current); err != nil {
if sqlx.IsNoRows(err) {
return runtime.ErrNotFound
}
return fmt.Errorf("update runtime status: probe: %w", err)
}
return runtime.ErrConflict
}
// buildUpdateStatusStatement assembles the UPDATE statement applied for
// one runtime-status transition.
//
// status, last_op_at are always updated. The remaining columns are
// driven by the destination:
//
// - StatusStopped: stopped_at is captured at Now.
// - StatusRemoved: removed_at is captured at Now and current_container_id
// is NULLed (the container is gone; the prior id remains observable
// through operation_log).
// - StatusRunning: only status + last_op_at change. Fresh started_at
// and current_container_id are installed via Upsert before any
// stopped → running transition reaches this path; the path exists
// so runtime.AllowedTransitions stays one-to-one with the adapter
// capability matrix even though v1 services use Upsert for this
// case.
func buildUpdateStatusStatement(input ports.UpdateStatusInput, now time.Time) (pg.UpdateStatement, error) {
statusValue := pg.String(string(input.To))
nowValue := pg.TimestampzT(now)
var stmt pg.UpdateStatement
switch input.To {
case runtime.StatusStopped:
stmt = pgtable.RuntimeRecords.UPDATE(
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.StoppedAt,
).SET(
statusValue,
nowValue,
nowValue,
)
case runtime.StatusRemoved:
stmt = pgtable.RuntimeRecords.UPDATE(
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.RemovedAt,
pgtable.RuntimeRecords.CurrentContainerID,
).SET(
statusValue,
nowValue,
nowValue,
pg.NULL,
)
case runtime.StatusRunning:
stmt = pgtable.RuntimeRecords.UPDATE(
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.LastOpAt,
).SET(
statusValue,
nowValue,
)
default:
return nil, fmt.Errorf("update runtime status: destination status %q is unsupported", input.To)
}
whereExpr := pg.AND(
pgtable.RuntimeRecords.GameID.EQ(pg.String(input.GameID)),
pgtable.RuntimeRecords.Status.EQ(pg.String(string(input.ExpectedFrom))),
)
if input.ExpectedContainerID != "" {
whereExpr = pg.AND(
whereExpr,
pgtable.RuntimeRecords.CurrentContainerID.EQ(pg.String(input.ExpectedContainerID)),
)
}
return stmt.WHERE(whereExpr), nil
}
// ListByStatus returns every record currently indexed under status.
// Ordering is last_op_at DESC, game_id ASC — the direction the
// `runtime_records_status_last_op_idx` index is built in.
func (store *Store) ListByStatus(ctx context.Context, status runtime.Status) ([]runtime.RuntimeRecord, error) {
if store == nil || store.db == nil {
return nil, errors.New("list runtime records by status: nil store")
}
if !status.IsKnown() {
return nil, fmt.Errorf("list runtime records by status: status %q is unsupported", status)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "list runtime records by status", store.operationTimeout)
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(runtimeSelectColumns).
FROM(pgtable.RuntimeRecords).
WHERE(pgtable.RuntimeRecords.Status.EQ(pg.String(string(status)))).
ORDER_BY(pgtable.RuntimeRecords.LastOpAt.DESC(), pgtable.RuntimeRecords.GameID.ASC())
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("list runtime records by status: %w", err)
}
defer rows.Close()
records := make([]runtime.RuntimeRecord, 0)
for rows.Next() {
record, err := scanRecord(rows)
if err != nil {
return nil, fmt.Errorf("list runtime records by status: scan: %w", err)
}
records = append(records, record)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("list runtime records by status: %w", err)
}
if len(records) == 0 {
return nil, nil
}
return records, nil
}
// List returns every runtime record currently stored. Ordering matches
// ListByStatus — last_op_at DESC, game_id ASC — so the REST list
// endpoint sees the freshest activity first.
func (store *Store) List(ctx context.Context) ([]runtime.RuntimeRecord, error) {
if store == nil || store.db == nil {
return nil, errors.New("list runtime records: nil store")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "list runtime records", store.operationTimeout)
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(runtimeSelectColumns).
FROM(pgtable.RuntimeRecords).
ORDER_BY(pgtable.RuntimeRecords.LastOpAt.DESC(), pgtable.RuntimeRecords.GameID.ASC())
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("list runtime records: %w", err)
}
defer rows.Close()
records := make([]runtime.RuntimeRecord, 0)
for rows.Next() {
record, err := scanRecord(rows)
if err != nil {
return nil, fmt.Errorf("list runtime records: scan: %w", err)
}
records = append(records, record)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("list runtime records: %w", err)
}
if len(records) == 0 {
return nil, nil
}
return records, nil
}
// CountByStatus returns the number of records indexed under each status.
// Statuses with zero records are present in the result with a zero
// count so callers (e.g. the telemetry gauge) can publish a stable
// label set on every reading.
func (store *Store) CountByStatus(ctx context.Context) (map[runtime.Status]int, error) {
if store == nil || store.db == nil {
return nil, errors.New("count runtime records by status: nil store")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "count runtime records by status", store.operationTimeout)
if err != nil {
return nil, err
}
defer cancel()
countAlias := pg.COUNT(pg.STAR).AS("count")
stmt := pg.SELECT(pgtable.RuntimeRecords.Status, countAlias).
FROM(pgtable.RuntimeRecords).
GROUP_BY(pgtable.RuntimeRecords.Status)
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("count runtime records by status: %w", err)
}
defer rows.Close()
counts := make(map[runtime.Status]int, len(runtime.AllStatuses()))
for _, status := range runtime.AllStatuses() {
counts[status] = 0
}
for rows.Next() {
var status string
var count int
if err := rows.Scan(&status, &count); err != nil {
return nil, fmt.Errorf("count runtime records by status: scan: %w", err)
}
counts[runtime.Status(status)] = count
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("count runtime records by status: %w", err)
}
return counts, nil
}
// rowScanner abstracts *sql.Row and *sql.Rows so scanRecord can be shared
// across both single-row reads and iterated reads.
type rowScanner interface {
Scan(dest ...any) error
}
// scanRecord scans one runtime_records row from rs. Returns sql.ErrNoRows
// verbatim so callers can distinguish "no row" from a hard error.
func scanRecord(rs rowScanner) (runtime.RuntimeRecord, error) {
var (
gameID string
status string
currentContainerID sql.NullString
currentImageRef sql.NullString
engineEndpoint string
statePath string
dockerNetwork string
startedAt sql.NullTime
stoppedAt sql.NullTime
removedAt sql.NullTime
lastOpAt time.Time
createdAt time.Time
)
if err := rs.Scan(
&gameID,
&status,
&currentContainerID,
&currentImageRef,
&engineEndpoint,
&statePath,
&dockerNetwork,
&startedAt,
&stoppedAt,
&removedAt,
&lastOpAt,
&createdAt,
); err != nil {
return runtime.RuntimeRecord{}, err
}
return runtime.RuntimeRecord{
GameID: gameID,
Status: runtime.Status(status),
CurrentContainerID: sqlx.StringFromNullable(currentContainerID),
CurrentImageRef: sqlx.StringFromNullable(currentImageRef),
EngineEndpoint: engineEndpoint,
StatePath: statePath,
DockerNetwork: dockerNetwork,
StartedAt: sqlx.TimePtrFromNullable(startedAt),
StoppedAt: sqlx.TimePtrFromNullable(stoppedAt),
RemovedAt: sqlx.TimePtrFromNullable(removedAt),
LastOpAt: lastOpAt.UTC(),
CreatedAt: createdAt.UTC(),
}, nil
}
// Ensure Store satisfies the ports.RuntimeRecordStore interface at
// compile time.
var _ ports.RuntimeRecordStore = (*Store)(nil)
@@ -0,0 +1,420 @@
package runtimerecordstore_test
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/pgtest"
"galaxy/rtmanager/internal/adapters/postgres/runtimerecordstore"
"galaxy/rtmanager/internal/domain/runtime"
"galaxy/rtmanager/internal/ports"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) { pgtest.RunMain(m) }
func newStore(t *testing.T) *runtimerecordstore.Store {
t.Helper()
pgtest.TruncateAll(t)
store, err := runtimerecordstore.New(runtimerecordstore.Config{
DB: pgtest.Ensure(t).Pool(),
OperationTimeout: pgtest.OperationTimeout,
})
require.NoError(t, err)
return store
}
func runningRecord(t *testing.T, gameID, containerID, imageRef string) runtime.RuntimeRecord {
t.Helper()
now := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
started := now
return runtime.RuntimeRecord{
GameID: gameID,
Status: runtime.StatusRunning,
CurrentContainerID: containerID,
CurrentImageRef: imageRef,
EngineEndpoint: "http://galaxy-game-" + gameID + ":8080",
StatePath: "/var/lib/galaxy/games/" + gameID,
DockerNetwork: "galaxy-net",
StartedAt: &started,
LastOpAt: now,
CreatedAt: now,
}
}
func TestUpsertAndGetRoundTrip(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, record.GameID, got.GameID)
assert.Equal(t, record.Status, got.Status)
assert.Equal(t, record.CurrentContainerID, got.CurrentContainerID)
assert.Equal(t, record.CurrentImageRef, got.CurrentImageRef)
assert.Equal(t, record.EngineEndpoint, got.EngineEndpoint)
assert.Equal(t, record.StatePath, got.StatePath)
assert.Equal(t, record.DockerNetwork, got.DockerNetwork)
require.NotNil(t, got.StartedAt)
assert.True(t, record.StartedAt.Equal(*got.StartedAt))
assert.Equal(t, time.UTC, got.StartedAt.Location())
assert.Equal(t, time.UTC, got.LastOpAt.Location())
assert.Equal(t, time.UTC, got.CreatedAt.Location())
assert.Nil(t, got.StoppedAt)
assert.Nil(t, got.RemovedAt)
}
func TestGetReturnsNotFound(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.Get(ctx, "game-missing")
require.ErrorIs(t, err, runtime.ErrNotFound)
}
func TestUpsertOverwritesMutableColumnsPreservesCreatedAt(t *testing.T) {
ctx := context.Background()
store := newStore(t)
original := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, original))
updated := original
updated.CurrentContainerID = "container-2"
updated.CurrentImageRef = "galaxy/game:v1.2.4"
newStarted := original.LastOpAt.Add(time.Minute)
updated.StartedAt = &newStarted
updated.LastOpAt = newStarted
// Fresh CreatedAt simulates a caller passing "now"; the store must
// preserve the original CreatedAt value on conflict.
updated.CreatedAt = newStarted
require.NoError(t, store.Upsert(ctx, updated))
got, err := store.Get(ctx, original.GameID)
require.NoError(t, err)
assert.Equal(t, "container-2", got.CurrentContainerID)
assert.Equal(t, "galaxy/game:v1.2.4", got.CurrentImageRef)
assert.True(t, got.LastOpAt.Equal(newStarted))
assert.True(t, got.CreatedAt.Equal(original.CreatedAt),
"created_at must be preserved across upserts: got %s, want %s",
got.CreatedAt, original.CreatedAt)
}
func TestUpdateStatusRunningToStopped(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
now := record.LastOpAt.Add(2 * time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
ExpectedContainerID: record.CurrentContainerID,
To: runtime.StatusStopped,
Now: now,
}))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, runtime.StatusStopped, got.Status)
require.NotNil(t, got.StoppedAt)
assert.True(t, now.Equal(*got.StoppedAt))
assert.True(t, now.Equal(got.LastOpAt))
// container id is preserved on stop; cleanup later NULLs it.
assert.Equal(t, record.CurrentContainerID, got.CurrentContainerID)
}
func TestUpdateStatusRunningToRemovedClearsContainerID(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
now := record.LastOpAt.Add(time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusRemoved,
Now: now,
}))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, runtime.StatusRemoved, got.Status)
require.NotNil(t, got.RemovedAt)
assert.True(t, now.Equal(*got.RemovedAt))
assert.True(t, now.Equal(got.LastOpAt))
assert.Empty(t, got.CurrentContainerID, "current_container_id must be NULL after removal")
}
func TestUpdateStatusStoppedToRemoved(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
stopAt := record.LastOpAt.Add(time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: stopAt,
}))
removeAt := stopAt.Add(time.Hour)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusStopped,
To: runtime.StatusRemoved,
Now: removeAt,
}))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, runtime.StatusRemoved, got.Status)
require.NotNil(t, got.RemovedAt)
assert.True(t, removeAt.Equal(*got.RemovedAt))
assert.True(t, removeAt.Equal(got.LastOpAt))
require.NotNil(t, got.StoppedAt, "stopped_at must remain populated through removal")
assert.True(t, stopAt.Equal(*got.StoppedAt))
assert.Empty(t, got.CurrentContainerID)
}
func TestUpdateStatusReturnsConflictOnFromMismatch(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusStopped, // wrong
To: runtime.StatusRemoved,
Now: record.LastOpAt.Add(time.Minute),
})
require.ErrorIs(t, err, runtime.ErrConflict)
}
func TestUpdateStatusReturnsConflictOnContainerIDMismatch(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
ExpectedContainerID: "container-other",
To: runtime.StatusStopped,
Now: record.LastOpAt.Add(time.Minute),
})
require.ErrorIs(t, err, runtime.ErrConflict)
}
func TestUpdateStatusReturnsNotFoundForMissing(t *testing.T) {
ctx := context.Background()
store := newStore(t)
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: "game-missing",
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: time.Now().UTC(),
})
require.ErrorIs(t, err, runtime.ErrNotFound)
}
func TestUpdateStatusValidatesInputBeforeStore(t *testing.T) {
ctx := context.Background()
store := newStore(t)
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: "game-001",
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
// Now intentionally zero — validation must reject.
})
require.Error(t, err)
}
// TestUpdateStatusConcurrentCAS asserts the CAS guard: when two callers
// race to apply the running → stopped transition on the same row,
// exactly one wins (returns nil) and the other observes
// runtime.ErrConflict.
func TestUpdateStatusConcurrentCAS(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
const concurrency = 8
results := make([]error, concurrency)
var wg sync.WaitGroup
wg.Add(concurrency)
for index := range concurrency {
go func() {
defer wg.Done()
results[index] = store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
ExpectedContainerID: record.CurrentContainerID,
To: runtime.StatusStopped,
Now: record.LastOpAt.Add(time.Duration(index+1) * time.Second),
})
}()
}
wg.Wait()
wins, conflicts := 0, 0
for _, err := range results {
switch {
case err == nil:
wins++
case errors.Is(err, runtime.ErrConflict):
conflicts++
default:
t.Errorf("unexpected error from concurrent UpdateStatus: %v", err)
}
}
assert.Equal(t, 1, wins, "exactly one caller must win the CAS race")
assert.Equal(t, concurrency-1, conflicts, "the rest must observe runtime.ErrConflict")
}
func TestListByStatusReturnsExpectedRecords(t *testing.T) {
ctx := context.Background()
store := newStore(t)
a := runningRecord(t, "game-aaa", "container-a", "galaxy/game:v1.2.3")
b := runningRecord(t, "game-bbb", "container-b", "galaxy/game:v1.2.3")
c := runningRecord(t, "game-ccc", "container-c", "galaxy/game:v1.2.3")
for _, r := range []runtime.RuntimeRecord{a, b, c} {
require.NoError(t, store.Upsert(ctx, r))
}
stopAt := a.LastOpAt.Add(time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: b.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: stopAt,
}))
running, err := store.ListByStatus(ctx, runtime.StatusRunning)
require.NoError(t, err)
gotIDs := map[string]struct{}{}
for _, r := range running {
gotIDs[r.GameID] = struct{}{}
}
assert.Contains(t, gotIDs, a.GameID)
assert.Contains(t, gotIDs, c.GameID)
assert.NotContains(t, gotIDs, b.GameID)
stopped, err := store.ListByStatus(ctx, runtime.StatusStopped)
require.NoError(t, err)
require.Len(t, stopped, 1)
assert.Equal(t, b.GameID, stopped[0].GameID)
}
func TestListByStatusRejectsUnknown(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.ListByStatus(ctx, runtime.Status("exotic"))
require.Error(t, err)
}
func TestListReturnsEveryStatus(t *testing.T) {
ctx := context.Background()
store := newStore(t)
a := runningRecord(t, "game-aaa", "container-a", "galaxy/game:v1.2.3")
b := runningRecord(t, "game-bbb", "container-b", "galaxy/game:v1.2.3")
c := runningRecord(t, "game-ccc", "container-c", "galaxy/game:v1.2.3")
for _, r := range []runtime.RuntimeRecord{a, b, c} {
require.NoError(t, store.Upsert(ctx, r))
}
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: b.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: b.LastOpAt.Add(time.Minute),
}))
all, err := store.List(ctx)
require.NoError(t, err)
require.Len(t, all, 3)
gotIDs := map[string]runtime.Status{}
for _, r := range all {
gotIDs[r.GameID] = r.Status
}
assert.Equal(t, runtime.StatusRunning, gotIDs[a.GameID])
assert.Equal(t, runtime.StatusStopped, gotIDs[b.GameID])
assert.Equal(t, runtime.StatusRunning, gotIDs[c.GameID])
}
func TestListReturnsNilWhenEmpty(t *testing.T) {
ctx := context.Background()
store := newStore(t)
all, err := store.List(ctx)
require.NoError(t, err)
assert.Nil(t, all)
}
func TestCountByStatusReturnsAllBuckets(t *testing.T) {
ctx := context.Background()
store := newStore(t)
a := runningRecord(t, "game-1", "container-1", "galaxy/game:v1.2.3")
b := runningRecord(t, "game-2", "container-2", "galaxy/game:v1.2.3")
c := runningRecord(t, "game-3", "container-3", "galaxy/game:v1.2.3")
for _, r := range []runtime.RuntimeRecord{a, b, c} {
require.NoError(t, store.Upsert(ctx, r))
}
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: b.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: b.LastOpAt.Add(time.Minute),
}))
counts, err := store.CountByStatus(ctx)
require.NoError(t, err)
for _, status := range runtime.AllStatuses() {
_, ok := counts[status]
assert.True(t, ok, "status %q must appear in counts even when zero", status)
}
assert.Equal(t, 2, counts[runtime.StatusRunning])
assert.Equal(t, 1, counts[runtime.StatusStopped])
assert.Equal(t, 0, counts[runtime.StatusRemoved])
}
func TestNewRejectsNilDB(t *testing.T) {
_, err := runtimerecordstore.New(runtimerecordstore.Config{OperationTimeout: time.Second})
require.Error(t, err)
}
func TestNewRejectsNonPositiveTimeout(t *testing.T) {
_, err := runtimerecordstore.New(runtimerecordstore.Config{
DB: pgtest.Ensure(t).Pool(),
})
require.Error(t, err)
}