feat: runtime manager

This commit is contained in:
Ilia Denisov
2026-04-28 20:39:18 +02:00
committed by GitHub
parent e0a99b346b
commit a7cee15115
289 changed files with 45660 additions and 2207 deletions
@@ -0,0 +1,500 @@
// Package runtimerecordstore implements the PostgreSQL-backed adapter for
// `ports.RuntimeRecordStore`.
//
// The package owns the on-disk shape of the `runtime_records` table
// defined in
// `galaxy/rtmanager/internal/adapters/postgres/migrations/00001_init.sql`
// and translates the schema-agnostic `ports.RuntimeRecordStore` interface
// declared in `internal/ports/runtimerecordstore.go` into concrete
// go-jet/v2 statements driven by the pgx driver.
//
// Lifecycle transitions (UpdateStatus) use compare-and-swap on
// `(status, current_container_id)` rather than holding a SELECT ... FOR
// UPDATE lock across the caller's logic, mirroring the pattern used by
// `lobby/internal/adapters/postgres/gamestore`.
package runtimerecordstore
import (
"context"
"database/sql"
"errors"
"fmt"
"strings"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/sqlx"
pgtable "galaxy/rtmanager/internal/adapters/postgres/jet/rtmanager/table"
"galaxy/rtmanager/internal/domain/runtime"
"galaxy/rtmanager/internal/ports"
pg "github.com/go-jet/jet/v2/postgres"
)
// Config configures one PostgreSQL-backed runtime-record store instance.
// The store does not own the underlying *sql.DB lifecycle: the caller
// (typically the service runtime) opens, instruments, migrates, and
// closes the pool.
type Config struct {
// DB stores the connection pool the store uses for every query.
DB *sql.DB
// OperationTimeout bounds one round trip. The store creates a
// derived context for each operation so callers cannot starve the
// pool with an unbounded ctx.
OperationTimeout time.Duration
}
// Store persists Runtime Manager runtime records in PostgreSQL.
type Store struct {
db *sql.DB
operationTimeout time.Duration
}
// New constructs one PostgreSQL-backed runtime-record store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.DB == nil {
return nil, errors.New("new postgres runtime record store: db must not be nil")
}
if cfg.OperationTimeout <= 0 {
return nil, errors.New("new postgres runtime record store: operation timeout must be positive")
}
return &Store{
db: cfg.DB,
operationTimeout: cfg.OperationTimeout,
}, nil
}
// runtimeSelectColumns is the canonical SELECT list for the runtime_records
// table, matching scanRecord's column order.
var runtimeSelectColumns = pg.ColumnList{
pgtable.RuntimeRecords.GameID,
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.CurrentContainerID,
pgtable.RuntimeRecords.CurrentImageRef,
pgtable.RuntimeRecords.EngineEndpoint,
pgtable.RuntimeRecords.StatePath,
pgtable.RuntimeRecords.DockerNetwork,
pgtable.RuntimeRecords.StartedAt,
pgtable.RuntimeRecords.StoppedAt,
pgtable.RuntimeRecords.RemovedAt,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.CreatedAt,
}
// Get returns the record identified by gameID. It returns
// runtime.ErrNotFound when no record exists.
func (store *Store) Get(ctx context.Context, gameID string) (runtime.RuntimeRecord, error) {
if store == nil || store.db == nil {
return runtime.RuntimeRecord{}, errors.New("get runtime record: nil store")
}
if strings.TrimSpace(gameID) == "" {
return runtime.RuntimeRecord{}, fmt.Errorf("get runtime record: game id must not be empty")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "get runtime record", store.operationTimeout)
if err != nil {
return runtime.RuntimeRecord{}, err
}
defer cancel()
stmt := pg.SELECT(runtimeSelectColumns).
FROM(pgtable.RuntimeRecords).
WHERE(pgtable.RuntimeRecords.GameID.EQ(pg.String(gameID)))
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
record, err := scanRecord(row)
if sqlx.IsNoRows(err) {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
if err != nil {
return runtime.RuntimeRecord{}, fmt.Errorf("get runtime record: %w", err)
}
return record, nil
}
// Upsert inserts record when no row exists for record.GameID and
// otherwise overwrites every mutable column verbatim. created_at is
// preserved across upserts so the "first time RTM saw the game"
// timestamp stays stable.
func (store *Store) Upsert(ctx context.Context, record runtime.RuntimeRecord) error {
if store == nil || store.db == nil {
return errors.New("upsert runtime record: nil store")
}
if err := record.Validate(); err != nil {
return fmt.Errorf("upsert runtime record: %w", err)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "upsert runtime record", store.operationTimeout)
if err != nil {
return err
}
defer cancel()
stmt := pgtable.RuntimeRecords.INSERT(
pgtable.RuntimeRecords.GameID,
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.CurrentContainerID,
pgtable.RuntimeRecords.CurrentImageRef,
pgtable.RuntimeRecords.EngineEndpoint,
pgtable.RuntimeRecords.StatePath,
pgtable.RuntimeRecords.DockerNetwork,
pgtable.RuntimeRecords.StartedAt,
pgtable.RuntimeRecords.StoppedAt,
pgtable.RuntimeRecords.RemovedAt,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.CreatedAt,
).VALUES(
record.GameID,
string(record.Status),
sqlx.NullableString(record.CurrentContainerID),
sqlx.NullableString(record.CurrentImageRef),
record.EngineEndpoint,
record.StatePath,
record.DockerNetwork,
sqlx.NullableTimePtr(record.StartedAt),
sqlx.NullableTimePtr(record.StoppedAt),
sqlx.NullableTimePtr(record.RemovedAt),
record.LastOpAt.UTC(),
record.CreatedAt.UTC(),
).ON_CONFLICT(pgtable.RuntimeRecords.GameID).DO_UPDATE(
pg.SET(
pgtable.RuntimeRecords.Status.SET(pgtable.RuntimeRecords.EXCLUDED.Status),
pgtable.RuntimeRecords.CurrentContainerID.SET(pgtable.RuntimeRecords.EXCLUDED.CurrentContainerID),
pgtable.RuntimeRecords.CurrentImageRef.SET(pgtable.RuntimeRecords.EXCLUDED.CurrentImageRef),
pgtable.RuntimeRecords.EngineEndpoint.SET(pgtable.RuntimeRecords.EXCLUDED.EngineEndpoint),
pgtable.RuntimeRecords.StatePath.SET(pgtable.RuntimeRecords.EXCLUDED.StatePath),
pgtable.RuntimeRecords.DockerNetwork.SET(pgtable.RuntimeRecords.EXCLUDED.DockerNetwork),
pgtable.RuntimeRecords.StartedAt.SET(pgtable.RuntimeRecords.EXCLUDED.StartedAt),
pgtable.RuntimeRecords.StoppedAt.SET(pgtable.RuntimeRecords.EXCLUDED.StoppedAt),
pgtable.RuntimeRecords.RemovedAt.SET(pgtable.RuntimeRecords.EXCLUDED.RemovedAt),
pgtable.RuntimeRecords.LastOpAt.SET(pgtable.RuntimeRecords.EXCLUDED.LastOpAt),
),
)
query, args := stmt.Sql()
if _, err := store.db.ExecContext(operationCtx, query, args...); err != nil {
return fmt.Errorf("upsert runtime record: %w", err)
}
return nil
}
// UpdateStatus applies one status transition with a compare-and-swap
// guard on (status, current_container_id). Validate is invoked before
// any SQL touch.
func (store *Store) UpdateStatus(ctx context.Context, input ports.UpdateStatusInput) error {
if store == nil || store.db == nil {
return errors.New("update runtime status: nil store")
}
if err := input.Validate(); err != nil {
return err
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "update runtime status", store.operationTimeout)
if err != nil {
return err
}
defer cancel()
now := input.Now.UTC()
stmt, err := buildUpdateStatusStatement(input, now)
if err != nil {
return err
}
query, args := stmt.Sql()
result, err := store.db.ExecContext(operationCtx, query, args...)
if err != nil {
return fmt.Errorf("update runtime status: %w", err)
}
affected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("update runtime status: rows affected: %w", err)
}
if affected == 0 {
return store.classifyMissingUpdate(operationCtx, input.GameID)
}
return nil
}
// classifyMissingUpdate distinguishes ErrNotFound from ErrConflict after
// an UPDATE that affected zero rows. A row that is absent yields
// ErrNotFound; a row whose status or container_id does not match the
// CAS predicate yields ErrConflict.
func (store *Store) classifyMissingUpdate(ctx context.Context, gameID string) error {
probe := pg.SELECT(pgtable.RuntimeRecords.Status).
FROM(pgtable.RuntimeRecords).
WHERE(pgtable.RuntimeRecords.GameID.EQ(pg.String(gameID)))
probeQuery, probeArgs := probe.Sql()
var current string
row := store.db.QueryRowContext(ctx, probeQuery, probeArgs...)
if err := row.Scan(&current); err != nil {
if sqlx.IsNoRows(err) {
return runtime.ErrNotFound
}
return fmt.Errorf("update runtime status: probe: %w", err)
}
return runtime.ErrConflict
}
// buildUpdateStatusStatement assembles the UPDATE statement applied for
// one runtime-status transition.
//
// status, last_op_at are always updated. The remaining columns are
// driven by the destination:
//
// - StatusStopped: stopped_at is captured at Now.
// - StatusRemoved: removed_at is captured at Now and current_container_id
// is NULLed (the container is gone; the prior id remains observable
// through operation_log).
// - StatusRunning: only status + last_op_at change. Fresh started_at
// and current_container_id are installed via Upsert before any
// stopped → running transition reaches this path; the path exists
// so runtime.AllowedTransitions stays one-to-one with the adapter
// capability matrix even though v1 services use Upsert for this
// case.
func buildUpdateStatusStatement(input ports.UpdateStatusInput, now time.Time) (pg.UpdateStatement, error) {
statusValue := pg.String(string(input.To))
nowValue := pg.TimestampzT(now)
var stmt pg.UpdateStatement
switch input.To {
case runtime.StatusStopped:
stmt = pgtable.RuntimeRecords.UPDATE(
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.StoppedAt,
).SET(
statusValue,
nowValue,
nowValue,
)
case runtime.StatusRemoved:
stmt = pgtable.RuntimeRecords.UPDATE(
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.RemovedAt,
pgtable.RuntimeRecords.CurrentContainerID,
).SET(
statusValue,
nowValue,
nowValue,
pg.NULL,
)
case runtime.StatusRunning:
stmt = pgtable.RuntimeRecords.UPDATE(
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.LastOpAt,
).SET(
statusValue,
nowValue,
)
default:
return nil, fmt.Errorf("update runtime status: destination status %q is unsupported", input.To)
}
whereExpr := pg.AND(
pgtable.RuntimeRecords.GameID.EQ(pg.String(input.GameID)),
pgtable.RuntimeRecords.Status.EQ(pg.String(string(input.ExpectedFrom))),
)
if input.ExpectedContainerID != "" {
whereExpr = pg.AND(
whereExpr,
pgtable.RuntimeRecords.CurrentContainerID.EQ(pg.String(input.ExpectedContainerID)),
)
}
return stmt.WHERE(whereExpr), nil
}
// ListByStatus returns every record currently indexed under status.
// Ordering is last_op_at DESC, game_id ASC — the direction the
// `runtime_records_status_last_op_idx` index is built in.
func (store *Store) ListByStatus(ctx context.Context, status runtime.Status) ([]runtime.RuntimeRecord, error) {
if store == nil || store.db == nil {
return nil, errors.New("list runtime records by status: nil store")
}
if !status.IsKnown() {
return nil, fmt.Errorf("list runtime records by status: status %q is unsupported", status)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "list runtime records by status", store.operationTimeout)
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(runtimeSelectColumns).
FROM(pgtable.RuntimeRecords).
WHERE(pgtable.RuntimeRecords.Status.EQ(pg.String(string(status)))).
ORDER_BY(pgtable.RuntimeRecords.LastOpAt.DESC(), pgtable.RuntimeRecords.GameID.ASC())
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("list runtime records by status: %w", err)
}
defer rows.Close()
records := make([]runtime.RuntimeRecord, 0)
for rows.Next() {
record, err := scanRecord(rows)
if err != nil {
return nil, fmt.Errorf("list runtime records by status: scan: %w", err)
}
records = append(records, record)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("list runtime records by status: %w", err)
}
if len(records) == 0 {
return nil, nil
}
return records, nil
}
// List returns every runtime record currently stored. Ordering matches
// ListByStatus — last_op_at DESC, game_id ASC — so the REST list
// endpoint sees the freshest activity first.
func (store *Store) List(ctx context.Context) ([]runtime.RuntimeRecord, error) {
if store == nil || store.db == nil {
return nil, errors.New("list runtime records: nil store")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "list runtime records", store.operationTimeout)
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(runtimeSelectColumns).
FROM(pgtable.RuntimeRecords).
ORDER_BY(pgtable.RuntimeRecords.LastOpAt.DESC(), pgtable.RuntimeRecords.GameID.ASC())
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("list runtime records: %w", err)
}
defer rows.Close()
records := make([]runtime.RuntimeRecord, 0)
for rows.Next() {
record, err := scanRecord(rows)
if err != nil {
return nil, fmt.Errorf("list runtime records: scan: %w", err)
}
records = append(records, record)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("list runtime records: %w", err)
}
if len(records) == 0 {
return nil, nil
}
return records, nil
}
// CountByStatus returns the number of records indexed under each status.
// Statuses with zero records are present in the result with a zero
// count so callers (e.g. the telemetry gauge) can publish a stable
// label set on every reading.
func (store *Store) CountByStatus(ctx context.Context) (map[runtime.Status]int, error) {
if store == nil || store.db == nil {
return nil, errors.New("count runtime records by status: nil store")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "count runtime records by status", store.operationTimeout)
if err != nil {
return nil, err
}
defer cancel()
countAlias := pg.COUNT(pg.STAR).AS("count")
stmt := pg.SELECT(pgtable.RuntimeRecords.Status, countAlias).
FROM(pgtable.RuntimeRecords).
GROUP_BY(pgtable.RuntimeRecords.Status)
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("count runtime records by status: %w", err)
}
defer rows.Close()
counts := make(map[runtime.Status]int, len(runtime.AllStatuses()))
for _, status := range runtime.AllStatuses() {
counts[status] = 0
}
for rows.Next() {
var status string
var count int
if err := rows.Scan(&status, &count); err != nil {
return nil, fmt.Errorf("count runtime records by status: scan: %w", err)
}
counts[runtime.Status(status)] = count
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("count runtime records by status: %w", err)
}
return counts, nil
}
// rowScanner abstracts *sql.Row and *sql.Rows so scanRecord can be shared
// across both single-row reads and iterated reads.
type rowScanner interface {
Scan(dest ...any) error
}
// scanRecord scans one runtime_records row from rs. Returns sql.ErrNoRows
// verbatim so callers can distinguish "no row" from a hard error.
func scanRecord(rs rowScanner) (runtime.RuntimeRecord, error) {
var (
gameID string
status string
currentContainerID sql.NullString
currentImageRef sql.NullString
engineEndpoint string
statePath string
dockerNetwork string
startedAt sql.NullTime
stoppedAt sql.NullTime
removedAt sql.NullTime
lastOpAt time.Time
createdAt time.Time
)
if err := rs.Scan(
&gameID,
&status,
&currentContainerID,
&currentImageRef,
&engineEndpoint,
&statePath,
&dockerNetwork,
&startedAt,
&stoppedAt,
&removedAt,
&lastOpAt,
&createdAt,
); err != nil {
return runtime.RuntimeRecord{}, err
}
return runtime.RuntimeRecord{
GameID: gameID,
Status: runtime.Status(status),
CurrentContainerID: sqlx.StringFromNullable(currentContainerID),
CurrentImageRef: sqlx.StringFromNullable(currentImageRef),
EngineEndpoint: engineEndpoint,
StatePath: statePath,
DockerNetwork: dockerNetwork,
StartedAt: sqlx.TimePtrFromNullable(startedAt),
StoppedAt: sqlx.TimePtrFromNullable(stoppedAt),
RemovedAt: sqlx.TimePtrFromNullable(removedAt),
LastOpAt: lastOpAt.UTC(),
CreatedAt: createdAt.UTC(),
}, nil
}
// Ensure Store satisfies the ports.RuntimeRecordStore interface at
// compile time.
var _ ports.RuntimeRecordStore = (*Store)(nil)
@@ -0,0 +1,420 @@
package runtimerecordstore_test
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/pgtest"
"galaxy/rtmanager/internal/adapters/postgres/runtimerecordstore"
"galaxy/rtmanager/internal/domain/runtime"
"galaxy/rtmanager/internal/ports"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) { pgtest.RunMain(m) }
func newStore(t *testing.T) *runtimerecordstore.Store {
t.Helper()
pgtest.TruncateAll(t)
store, err := runtimerecordstore.New(runtimerecordstore.Config{
DB: pgtest.Ensure(t).Pool(),
OperationTimeout: pgtest.OperationTimeout,
})
require.NoError(t, err)
return store
}
func runningRecord(t *testing.T, gameID, containerID, imageRef string) runtime.RuntimeRecord {
t.Helper()
now := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
started := now
return runtime.RuntimeRecord{
GameID: gameID,
Status: runtime.StatusRunning,
CurrentContainerID: containerID,
CurrentImageRef: imageRef,
EngineEndpoint: "http://galaxy-game-" + gameID + ":8080",
StatePath: "/var/lib/galaxy/games/" + gameID,
DockerNetwork: "galaxy-net",
StartedAt: &started,
LastOpAt: now,
CreatedAt: now,
}
}
func TestUpsertAndGetRoundTrip(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, record.GameID, got.GameID)
assert.Equal(t, record.Status, got.Status)
assert.Equal(t, record.CurrentContainerID, got.CurrentContainerID)
assert.Equal(t, record.CurrentImageRef, got.CurrentImageRef)
assert.Equal(t, record.EngineEndpoint, got.EngineEndpoint)
assert.Equal(t, record.StatePath, got.StatePath)
assert.Equal(t, record.DockerNetwork, got.DockerNetwork)
require.NotNil(t, got.StartedAt)
assert.True(t, record.StartedAt.Equal(*got.StartedAt))
assert.Equal(t, time.UTC, got.StartedAt.Location())
assert.Equal(t, time.UTC, got.LastOpAt.Location())
assert.Equal(t, time.UTC, got.CreatedAt.Location())
assert.Nil(t, got.StoppedAt)
assert.Nil(t, got.RemovedAt)
}
func TestGetReturnsNotFound(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.Get(ctx, "game-missing")
require.ErrorIs(t, err, runtime.ErrNotFound)
}
func TestUpsertOverwritesMutableColumnsPreservesCreatedAt(t *testing.T) {
ctx := context.Background()
store := newStore(t)
original := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, original))
updated := original
updated.CurrentContainerID = "container-2"
updated.CurrentImageRef = "galaxy/game:v1.2.4"
newStarted := original.LastOpAt.Add(time.Minute)
updated.StartedAt = &newStarted
updated.LastOpAt = newStarted
// Fresh CreatedAt simulates a caller passing "now"; the store must
// preserve the original CreatedAt value on conflict.
updated.CreatedAt = newStarted
require.NoError(t, store.Upsert(ctx, updated))
got, err := store.Get(ctx, original.GameID)
require.NoError(t, err)
assert.Equal(t, "container-2", got.CurrentContainerID)
assert.Equal(t, "galaxy/game:v1.2.4", got.CurrentImageRef)
assert.True(t, got.LastOpAt.Equal(newStarted))
assert.True(t, got.CreatedAt.Equal(original.CreatedAt),
"created_at must be preserved across upserts: got %s, want %s",
got.CreatedAt, original.CreatedAt)
}
func TestUpdateStatusRunningToStopped(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
now := record.LastOpAt.Add(2 * time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
ExpectedContainerID: record.CurrentContainerID,
To: runtime.StatusStopped,
Now: now,
}))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, runtime.StatusStopped, got.Status)
require.NotNil(t, got.StoppedAt)
assert.True(t, now.Equal(*got.StoppedAt))
assert.True(t, now.Equal(got.LastOpAt))
// container id is preserved on stop; cleanup later NULLs it.
assert.Equal(t, record.CurrentContainerID, got.CurrentContainerID)
}
func TestUpdateStatusRunningToRemovedClearsContainerID(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
now := record.LastOpAt.Add(time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusRemoved,
Now: now,
}))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, runtime.StatusRemoved, got.Status)
require.NotNil(t, got.RemovedAt)
assert.True(t, now.Equal(*got.RemovedAt))
assert.True(t, now.Equal(got.LastOpAt))
assert.Empty(t, got.CurrentContainerID, "current_container_id must be NULL after removal")
}
func TestUpdateStatusStoppedToRemoved(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
stopAt := record.LastOpAt.Add(time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: stopAt,
}))
removeAt := stopAt.Add(time.Hour)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusStopped,
To: runtime.StatusRemoved,
Now: removeAt,
}))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, runtime.StatusRemoved, got.Status)
require.NotNil(t, got.RemovedAt)
assert.True(t, removeAt.Equal(*got.RemovedAt))
assert.True(t, removeAt.Equal(got.LastOpAt))
require.NotNil(t, got.StoppedAt, "stopped_at must remain populated through removal")
assert.True(t, stopAt.Equal(*got.StoppedAt))
assert.Empty(t, got.CurrentContainerID)
}
func TestUpdateStatusReturnsConflictOnFromMismatch(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusStopped, // wrong
To: runtime.StatusRemoved,
Now: record.LastOpAt.Add(time.Minute),
})
require.ErrorIs(t, err, runtime.ErrConflict)
}
func TestUpdateStatusReturnsConflictOnContainerIDMismatch(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
ExpectedContainerID: "container-other",
To: runtime.StatusStopped,
Now: record.LastOpAt.Add(time.Minute),
})
require.ErrorIs(t, err, runtime.ErrConflict)
}
func TestUpdateStatusReturnsNotFoundForMissing(t *testing.T) {
ctx := context.Background()
store := newStore(t)
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: "game-missing",
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: time.Now().UTC(),
})
require.ErrorIs(t, err, runtime.ErrNotFound)
}
func TestUpdateStatusValidatesInputBeforeStore(t *testing.T) {
ctx := context.Background()
store := newStore(t)
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: "game-001",
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
// Now intentionally zero — validation must reject.
})
require.Error(t, err)
}
// TestUpdateStatusConcurrentCAS asserts the CAS guard: when two callers
// race to apply the running → stopped transition on the same row,
// exactly one wins (returns nil) and the other observes
// runtime.ErrConflict.
func TestUpdateStatusConcurrentCAS(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
const concurrency = 8
results := make([]error, concurrency)
var wg sync.WaitGroup
wg.Add(concurrency)
for index := range concurrency {
go func() {
defer wg.Done()
results[index] = store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
ExpectedContainerID: record.CurrentContainerID,
To: runtime.StatusStopped,
Now: record.LastOpAt.Add(time.Duration(index+1) * time.Second),
})
}()
}
wg.Wait()
wins, conflicts := 0, 0
for _, err := range results {
switch {
case err == nil:
wins++
case errors.Is(err, runtime.ErrConflict):
conflicts++
default:
t.Errorf("unexpected error from concurrent UpdateStatus: %v", err)
}
}
assert.Equal(t, 1, wins, "exactly one caller must win the CAS race")
assert.Equal(t, concurrency-1, conflicts, "the rest must observe runtime.ErrConflict")
}
func TestListByStatusReturnsExpectedRecords(t *testing.T) {
ctx := context.Background()
store := newStore(t)
a := runningRecord(t, "game-aaa", "container-a", "galaxy/game:v1.2.3")
b := runningRecord(t, "game-bbb", "container-b", "galaxy/game:v1.2.3")
c := runningRecord(t, "game-ccc", "container-c", "galaxy/game:v1.2.3")
for _, r := range []runtime.RuntimeRecord{a, b, c} {
require.NoError(t, store.Upsert(ctx, r))
}
stopAt := a.LastOpAt.Add(time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: b.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: stopAt,
}))
running, err := store.ListByStatus(ctx, runtime.StatusRunning)
require.NoError(t, err)
gotIDs := map[string]struct{}{}
for _, r := range running {
gotIDs[r.GameID] = struct{}{}
}
assert.Contains(t, gotIDs, a.GameID)
assert.Contains(t, gotIDs, c.GameID)
assert.NotContains(t, gotIDs, b.GameID)
stopped, err := store.ListByStatus(ctx, runtime.StatusStopped)
require.NoError(t, err)
require.Len(t, stopped, 1)
assert.Equal(t, b.GameID, stopped[0].GameID)
}
func TestListByStatusRejectsUnknown(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.ListByStatus(ctx, runtime.Status("exotic"))
require.Error(t, err)
}
func TestListReturnsEveryStatus(t *testing.T) {
ctx := context.Background()
store := newStore(t)
a := runningRecord(t, "game-aaa", "container-a", "galaxy/game:v1.2.3")
b := runningRecord(t, "game-bbb", "container-b", "galaxy/game:v1.2.3")
c := runningRecord(t, "game-ccc", "container-c", "galaxy/game:v1.2.3")
for _, r := range []runtime.RuntimeRecord{a, b, c} {
require.NoError(t, store.Upsert(ctx, r))
}
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: b.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: b.LastOpAt.Add(time.Minute),
}))
all, err := store.List(ctx)
require.NoError(t, err)
require.Len(t, all, 3)
gotIDs := map[string]runtime.Status{}
for _, r := range all {
gotIDs[r.GameID] = r.Status
}
assert.Equal(t, runtime.StatusRunning, gotIDs[a.GameID])
assert.Equal(t, runtime.StatusStopped, gotIDs[b.GameID])
assert.Equal(t, runtime.StatusRunning, gotIDs[c.GameID])
}
func TestListReturnsNilWhenEmpty(t *testing.T) {
ctx := context.Background()
store := newStore(t)
all, err := store.List(ctx)
require.NoError(t, err)
assert.Nil(t, all)
}
func TestCountByStatusReturnsAllBuckets(t *testing.T) {
ctx := context.Background()
store := newStore(t)
a := runningRecord(t, "game-1", "container-1", "galaxy/game:v1.2.3")
b := runningRecord(t, "game-2", "container-2", "galaxy/game:v1.2.3")
c := runningRecord(t, "game-3", "container-3", "galaxy/game:v1.2.3")
for _, r := range []runtime.RuntimeRecord{a, b, c} {
require.NoError(t, store.Upsert(ctx, r))
}
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: b.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: b.LastOpAt.Add(time.Minute),
}))
counts, err := store.CountByStatus(ctx)
require.NoError(t, err)
for _, status := range runtime.AllStatuses() {
_, ok := counts[status]
assert.True(t, ok, "status %q must appear in counts even when zero", status)
}
assert.Equal(t, 2, counts[runtime.StatusRunning])
assert.Equal(t, 1, counts[runtime.StatusStopped])
assert.Equal(t, 0, counts[runtime.StatusRemoved])
}
func TestNewRejectsNilDB(t *testing.T) {
_, err := runtimerecordstore.New(runtimerecordstore.Config{OperationTimeout: time.Second})
require.Error(t, err)
}
func TestNewRejectsNonPositiveTimeout(t *testing.T) {
_, err := runtimerecordstore.New(runtimerecordstore.Config{
DB: pgtest.Ensure(t).Pool(),
})
require.Error(t, err)
}