feat: runtime manager

This commit is contained in:
Ilia Denisov
2026-04-28 20:39:18 +02:00
committed by GitHub
parent e0a99b346b
commit a7cee15115
289 changed files with 45660 additions and 2207 deletions
@@ -0,0 +1,203 @@
// Package healthsnapshotstore implements the PostgreSQL-backed adapter
// for `ports.HealthSnapshotStore`.
//
// The package owns the on-disk shape of the `health_snapshots` table
// defined in
// `galaxy/rtmanager/internal/adapters/postgres/migrations/00001_init.sql`
// and translates the schema-agnostic `ports.HealthSnapshotStore` interface
// declared in `internal/ports/healthsnapshotstore.go` into concrete
// go-jet/v2 statements driven by the pgx driver.
//
// The `details` jsonb column round-trips as a `json.RawMessage`. Empty
// payloads are substituted with the SQL default `{}` on Upsert so the
// CHECK constraints and downstream readers never observe a non-JSON
// empty string.
package healthsnapshotstore
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/sqlx"
pgtable "galaxy/rtmanager/internal/adapters/postgres/jet/rtmanager/table"
"galaxy/rtmanager/internal/domain/health"
"galaxy/rtmanager/internal/domain/runtime"
"galaxy/rtmanager/internal/ports"
pg "github.com/go-jet/jet/v2/postgres"
)
// emptyDetails is the canonical jsonb payload installed when the caller
// supplies an empty Details slice. It matches the SQL DEFAULT for the
// column.
const emptyDetails = "{}"
// Config configures one PostgreSQL-backed health-snapshot store instance.
type Config struct {
// DB stores the connection pool the store uses for every query.
DB *sql.DB
// OperationTimeout bounds one round trip.
OperationTimeout time.Duration
}
// Store persists Runtime Manager health snapshots in PostgreSQL.
type Store struct {
db *sql.DB
operationTimeout time.Duration
}
// New constructs one PostgreSQL-backed health-snapshot store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.DB == nil {
return nil, errors.New("new postgres health snapshot store: db must not be nil")
}
if cfg.OperationTimeout <= 0 {
return nil, errors.New("new postgres health snapshot store: operation timeout must be positive")
}
return &Store{
db: cfg.DB,
operationTimeout: cfg.OperationTimeout,
}, nil
}
// healthSnapshotSelectColumns is the canonical SELECT list for the
// health_snapshots table, matching scanSnapshot's column order.
var healthSnapshotSelectColumns = pg.ColumnList{
pgtable.HealthSnapshots.GameID,
pgtable.HealthSnapshots.ContainerID,
pgtable.HealthSnapshots.Status,
pgtable.HealthSnapshots.Source,
pgtable.HealthSnapshots.Details,
pgtable.HealthSnapshots.ObservedAt,
}
// Upsert installs snapshot as the latest observation for snapshot.GameID.
// snapshot is validated through health.HealthSnapshot.Validate before the
// SQL is issued.
func (store *Store) Upsert(ctx context.Context, snapshot health.HealthSnapshot) error {
if store == nil || store.db == nil {
return errors.New("upsert health snapshot: nil store")
}
if err := snapshot.Validate(); err != nil {
return fmt.Errorf("upsert health snapshot: %w", err)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "upsert health snapshot", store.operationTimeout)
if err != nil {
return err
}
defer cancel()
details := emptyDetails
if len(snapshot.Details) > 0 {
details = string(snapshot.Details)
}
stmt := pgtable.HealthSnapshots.INSERT(
pgtable.HealthSnapshots.GameID,
pgtable.HealthSnapshots.ContainerID,
pgtable.HealthSnapshots.Status,
pgtable.HealthSnapshots.Source,
pgtable.HealthSnapshots.Details,
pgtable.HealthSnapshots.ObservedAt,
).VALUES(
snapshot.GameID,
snapshot.ContainerID,
string(snapshot.Status),
string(snapshot.Source),
details,
snapshot.ObservedAt.UTC(),
).ON_CONFLICT(pgtable.HealthSnapshots.GameID).DO_UPDATE(
pg.SET(
pgtable.HealthSnapshots.ContainerID.SET(pgtable.HealthSnapshots.EXCLUDED.ContainerID),
pgtable.HealthSnapshots.Status.SET(pgtable.HealthSnapshots.EXCLUDED.Status),
pgtable.HealthSnapshots.Source.SET(pgtable.HealthSnapshots.EXCLUDED.Source),
pgtable.HealthSnapshots.Details.SET(pgtable.HealthSnapshots.EXCLUDED.Details),
pgtable.HealthSnapshots.ObservedAt.SET(pgtable.HealthSnapshots.EXCLUDED.ObservedAt),
),
)
query, args := stmt.Sql()
if _, err := store.db.ExecContext(operationCtx, query, args...); err != nil {
return fmt.Errorf("upsert health snapshot: %w", err)
}
return nil
}
// Get returns the latest snapshot for gameID. It returns
// runtime.ErrNotFound when no snapshot has been recorded yet.
func (store *Store) Get(ctx context.Context, gameID string) (health.HealthSnapshot, error) {
if store == nil || store.db == nil {
return health.HealthSnapshot{}, errors.New("get health snapshot: nil store")
}
if strings.TrimSpace(gameID) == "" {
return health.HealthSnapshot{}, fmt.Errorf("get health snapshot: game id must not be empty")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "get health snapshot", store.operationTimeout)
if err != nil {
return health.HealthSnapshot{}, err
}
defer cancel()
stmt := pg.SELECT(healthSnapshotSelectColumns).
FROM(pgtable.HealthSnapshots).
WHERE(pgtable.HealthSnapshots.GameID.EQ(pg.String(gameID)))
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
snapshot, err := scanSnapshot(row)
if sqlx.IsNoRows(err) {
return health.HealthSnapshot{}, runtime.ErrNotFound
}
if err != nil {
return health.HealthSnapshot{}, fmt.Errorf("get health snapshot: %w", err)
}
return snapshot, nil
}
// rowScanner abstracts *sql.Row and *sql.Rows so scanSnapshot can be
// shared across both single-row reads and iterated reads.
type rowScanner interface {
Scan(dest ...any) error
}
// scanSnapshot scans one health_snapshots row from rs.
func scanSnapshot(rs rowScanner) (health.HealthSnapshot, error) {
var (
gameID string
containerID string
status string
source string
details []byte
observedAt time.Time
)
if err := rs.Scan(
&gameID,
&containerID,
&status,
&source,
&details,
&observedAt,
); err != nil {
return health.HealthSnapshot{}, err
}
return health.HealthSnapshot{
GameID: gameID,
ContainerID: containerID,
Status: health.SnapshotStatus(status),
Source: health.SnapshotSource(source),
Details: json.RawMessage(details),
ObservedAt: observedAt.UTC(),
}, nil
}
// Ensure Store satisfies the ports.HealthSnapshotStore interface at
// compile time.
var _ ports.HealthSnapshotStore = (*Store)(nil)
@@ -0,0 +1,157 @@
package healthsnapshotstore_test
import (
"context"
"encoding/json"
"testing"
"time"
"galaxy/rtmanager/internal/adapters/postgres/healthsnapshotstore"
"galaxy/rtmanager/internal/adapters/postgres/internal/pgtest"
"galaxy/rtmanager/internal/domain/health"
"galaxy/rtmanager/internal/domain/runtime"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) { pgtest.RunMain(m) }
func newStore(t *testing.T) *healthsnapshotstore.Store {
t.Helper()
pgtest.TruncateAll(t)
store, err := healthsnapshotstore.New(healthsnapshotstore.Config{
DB: pgtest.Ensure(t).Pool(),
OperationTimeout: pgtest.OperationTimeout,
})
require.NoError(t, err)
return store
}
func probeFailedSnapshot(gameID string, observedAt time.Time) health.HealthSnapshot {
return health.HealthSnapshot{
GameID: gameID,
ContainerID: "container-1",
Status: health.SnapshotStatusProbeFailed,
Source: health.SnapshotSourceProbe,
Details: json.RawMessage(`{"consecutive_failures":3,"last_status":503,"last_error":"timeout"}`),
ObservedAt: observedAt,
}
}
func TestUpsertAndGetRoundTrip(t *testing.T) {
ctx := context.Background()
store := newStore(t)
snapshot := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
require.NoError(t, store.Upsert(ctx, snapshot))
got, err := store.Get(ctx, "game-001")
require.NoError(t, err)
assert.Equal(t, snapshot.GameID, got.GameID)
assert.Equal(t, snapshot.ContainerID, got.ContainerID)
assert.Equal(t, snapshot.Status, got.Status)
assert.Equal(t, snapshot.Source, got.Source)
assert.JSONEq(t, string(snapshot.Details), string(got.Details))
assert.True(t, snapshot.ObservedAt.Equal(got.ObservedAt))
assert.Equal(t, time.UTC, got.ObservedAt.Location())
}
func TestUpsertOverwritesPriorSnapshot(t *testing.T) {
ctx := context.Background()
store := newStore(t)
first := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
require.NoError(t, store.Upsert(ctx, first))
second := health.HealthSnapshot{
GameID: "game-001",
ContainerID: "container-2",
Status: health.SnapshotStatusHealthy,
Source: health.SnapshotSourceInspect,
Details: json.RawMessage(`{"restart_count":0,"state":"running"}`),
ObservedAt: first.ObservedAt.Add(time.Minute),
}
require.NoError(t, store.Upsert(ctx, second))
got, err := store.Get(ctx, "game-001")
require.NoError(t, err)
assert.Equal(t, "container-2", got.ContainerID)
assert.Equal(t, health.SnapshotStatusHealthy, got.Status)
assert.Equal(t, health.SnapshotSourceInspect, got.Source)
assert.JSONEq(t, string(second.Details), string(got.Details))
assert.True(t, second.ObservedAt.Equal(got.ObservedAt))
}
func TestGetReturnsNotFound(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.Get(ctx, "game-missing")
require.ErrorIs(t, err, runtime.ErrNotFound)
}
func TestUpsertEmptyDetailsRoundTripsAsEmptyObject(t *testing.T) {
ctx := context.Background()
store := newStore(t)
snapshot := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
snapshot.Details = nil
require.NoError(t, store.Upsert(ctx, snapshot))
got, err := store.Get(ctx, "game-001")
require.NoError(t, err)
assert.JSONEq(t, "{}", string(got.Details),
"empty json.RawMessage must round-trip as the SQL default {}, got %q",
string(got.Details))
}
func TestUpsertValidatesSnapshot(t *testing.T) {
ctx := context.Background()
store := newStore(t)
tests := []struct {
name string
mutate func(*health.HealthSnapshot)
}{
{"empty game id", func(s *health.HealthSnapshot) { s.GameID = "" }},
{"unknown status", func(s *health.HealthSnapshot) { s.Status = "exotic" }},
{"unknown source", func(s *health.HealthSnapshot) { s.Source = "exotic" }},
{"zero observed at", func(s *health.HealthSnapshot) { s.ObservedAt = time.Time{} }},
{"invalid json details", func(s *health.HealthSnapshot) {
s.Details = json.RawMessage("not json")
}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
snapshot := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
tt.mutate(&snapshot)
err := store.Upsert(ctx, snapshot)
require.Error(t, err)
})
}
}
func TestGetRejectsEmptyGameID(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.Get(ctx, "")
require.Error(t, err)
}
func TestNewRejectsNilDB(t *testing.T) {
_, err := healthsnapshotstore.New(healthsnapshotstore.Config{OperationTimeout: time.Second})
require.Error(t, err)
}
func TestNewRejectsNonPositiveTimeout(t *testing.T) {
_, err := healthsnapshotstore.New(healthsnapshotstore.Config{
DB: pgtest.Ensure(t).Pool(),
})
require.Error(t, err)
}