feat: runtime manager

This commit is contained in:
Ilia Denisov
2026-04-28 20:39:18 +02:00
committed by GitHub
parent e0a99b346b
commit a7cee15115
289 changed files with 45660 additions and 2207 deletions
@@ -0,0 +1,493 @@
// Package docker provides the production Docker SDK adapter that
// implements `galaxy/rtmanager/internal/ports.DockerClient`. The
// adapter is the single component allowed to talk to the local Docker
// daemon; every Runtime Manager service path that needs container
// lifecycle operations goes through this surface.
//
// The adapter is intentionally narrow — it does not orchestrate, log,
// or retry. Cross-cutting concerns (lease coordination, durable state,
// notification side-effects) live in the service layer.
package docker
import (
"context"
"errors"
"fmt"
"io"
"maps"
"strings"
"sync"
"time"
cerrdefs "github.com/containerd/errdefs"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/events"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/image"
"github.com/docker/docker/api/types/network"
dockerclient "github.com/docker/docker/client"
"github.com/docker/go-units"
"galaxy/rtmanager/internal/ports"
)
// EnginePort is the in-container HTTP port the engine listens on. The
// value is fixed by `rtmanager/README.md §Container Model` and by the
// engine's Dockerfile (`game/Dockerfile`); RTM never publishes the port
// to the host. Keeping the constant here lets the adapter own the URL
// shape so the start service does not have to know it.
const EnginePort = 8080
// Config groups the dependencies and per-process defaults required to
// construct a Client. The struct is value-typed so wiring code can
// build it inline without intermediate variables.
type Config struct {
// Docker stores the SDK client this adapter wraps. It must be
// non-nil; callers typically construct it via `client.NewClientWithOpts`.
Docker *dockerclient.Client
// LogDriver stores the Docker logging driver applied to every
// container the adapter creates (e.g. `json-file`).
LogDriver string
// LogOpts stores the comma-separated `key=value` driver options
// forwarded to Docker. Empty disables driver-specific options.
LogOpts string
// Clock supplies the wall-clock used for `RunResult.StartedAt`.
// Defaults to `time.Now` when nil.
Clock func() time.Time
}
// Client is the production adapter implementing `ports.DockerClient`.
// Construct it via NewClient; do not zero-initialise.
type Client struct {
docker *dockerclient.Client
logDriver string
logOpts string
clock func() time.Time
}
// NewClient constructs a Client from cfg. It returns an error if cfg
// does not carry the minimum collaborator set the adapter needs to
// function.
func NewClient(cfg Config) (*Client, error) {
if cfg.Docker == nil {
return nil, errors.New("new docker adapter: nil docker client")
}
if strings.TrimSpace(cfg.LogDriver) == "" {
return nil, errors.New("new docker adapter: log driver must not be empty")
}
clock := cfg.Clock
if clock == nil {
clock = time.Now
}
return &Client{
docker: cfg.Docker,
logDriver: cfg.LogDriver,
logOpts: cfg.LogOpts,
clock: clock,
}, nil
}
// EnsureNetwork verifies the user-defined Docker network is present.
// The adapter never creates networks; provisioning is the operator's
// job per `rtmanager/README.md §Container Model`.
func (client *Client) EnsureNetwork(ctx context.Context, name string) error {
if _, err := client.docker.NetworkInspect(ctx, name, network.InspectOptions{}); err != nil {
if cerrdefs.IsNotFound(err) {
return ports.ErrNetworkMissing
}
return fmt.Errorf("ensure network %q: %w", name, err)
}
return nil
}
// PullImage pulls ref according to policy. The pull stream is drained
// to completion because the Docker SDK only finishes the underlying
// pull when the body is consumed.
func (client *Client) PullImage(ctx context.Context, ref string, policy ports.PullPolicy) error {
if !policy.IsKnown() {
return fmt.Errorf("pull image %q: unknown pull policy %q", ref, policy)
}
switch policy {
case ports.PullPolicyAlways:
return client.runPull(ctx, ref)
case ports.PullPolicyIfMissing:
if present, err := client.imagePresent(ctx, ref); err != nil {
return err
} else if present {
return nil
}
return client.runPull(ctx, ref)
case ports.PullPolicyNever:
present, err := client.imagePresent(ctx, ref)
if err != nil {
return err
}
if !present {
return ports.ErrImageNotFound
}
return nil
default:
return fmt.Errorf("pull image %q: unsupported pull policy %q", ref, policy)
}
}
// InspectImage returns image metadata for ref. RTM only reads labels
// at start time; the broader inspect struct stays accessible for
// diagnostics.
func (client *Client) InspectImage(ctx context.Context, ref string) (ports.ImageInspect, error) {
inspect, err := client.docker.ImageInspect(ctx, ref)
if err != nil {
if cerrdefs.IsNotFound(err) {
return ports.ImageInspect{}, ports.ErrImageNotFound
}
return ports.ImageInspect{}, fmt.Errorf("inspect image %q: %w", ref, err)
}
var labels map[string]string
if inspect.Config != nil {
labels = copyStringMap(inspect.Config.Labels)
}
return ports.ImageInspect{Ref: ref, Labels: labels}, nil
}
// InspectContainer returns container metadata for containerID. The
// adapter best-effort decodes Docker timestamps; malformed values map
// to the zero time so callers do not have to defend against nil
// pointers in the SDK response.
func (client *Client) InspectContainer(ctx context.Context, containerID string) (ports.ContainerInspect, error) {
inspect, err := client.docker.ContainerInspect(ctx, containerID)
if err != nil {
if cerrdefs.IsNotFound(err) {
return ports.ContainerInspect{}, ports.ErrContainerNotFound
}
return ports.ContainerInspect{}, fmt.Errorf("inspect container %q: %w", containerID, err)
}
result := ports.ContainerInspect{ID: inspect.ID}
if inspect.ContainerJSONBase != nil {
result.RestartCount = inspect.RestartCount
if inspect.State != nil {
result.Status = string(inspect.State.Status)
result.OOMKilled = inspect.State.OOMKilled
result.ExitCode = inspect.State.ExitCode
result.StartedAt = parseDockerTime(inspect.State.StartedAt)
result.FinishedAt = parseDockerTime(inspect.State.FinishedAt)
if inspect.State.Health != nil {
result.Health = string(inspect.State.Health.Status)
}
}
}
if inspect.Config != nil {
result.ImageRef = inspect.Config.Image
result.Hostname = inspect.Config.Hostname
result.Labels = copyStringMap(inspect.Config.Labels)
}
return result, nil
}
// Run creates and starts one container according to spec. On
// `ContainerStart` failure the adapter best-effort removes the partial
// container so the start service never has to clean up after a failed
// start path.
func (client *Client) Run(ctx context.Context, spec ports.RunSpec) (ports.RunResult, error) {
if err := spec.Validate(); err != nil {
return ports.RunResult{}, fmt.Errorf("run container: %w", err)
}
memoryBytes, err := units.RAMInBytes(spec.Memory)
if err != nil {
return ports.RunResult{}, fmt.Errorf("run container %q: parse memory %q: %w", spec.Name, spec.Memory, err)
}
pidsLimit := int64(spec.PIDsLimit)
containerCfg := &container.Config{
Image: spec.Image,
Hostname: spec.Hostname,
Env: envMapToSlice(spec.Env),
Labels: copyStringMap(spec.Labels),
Cmd: append([]string(nil), spec.Cmd...),
}
hostCfg := &container.HostConfig{
Binds: bindMountsToBinds(spec.BindMounts),
LogConfig: container.LogConfig{
Type: client.logDriver,
Config: parseLogOpts(client.logOpts),
},
Resources: container.Resources{
NanoCPUs: int64(spec.CPUQuota * 1e9),
Memory: memoryBytes,
PidsLimit: &pidsLimit,
},
}
netCfg := &network.NetworkingConfig{
EndpointsConfig: map[string]*network.EndpointSettings{
spec.Network: {
Aliases: []string{spec.Hostname},
},
},
}
created, err := client.docker.ContainerCreate(ctx, containerCfg, hostCfg, netCfg, nil, spec.Name)
if err != nil {
return ports.RunResult{}, fmt.Errorf("create container %q: %w", spec.Name, err)
}
if err := client.docker.ContainerStart(ctx, created.ID, container.StartOptions{}); err != nil {
client.cleanupAfterFailedStart(created.ID)
return ports.RunResult{}, fmt.Errorf("start container %q: %w", spec.Name, err)
}
return ports.RunResult{
ContainerID: created.ID,
EngineEndpoint: fmt.Sprintf("http://%s:%d", spec.Hostname, EnginePort),
StartedAt: client.clock(),
}, nil
}
// Stop bounds graceful shutdown by timeout. A missing container is
// surfaced as ErrContainerNotFound so the service layer can treat it
// as already-stopped per `rtmanager/README.md §Lifecycles → Stop`.
func (client *Client) Stop(ctx context.Context, containerID string, timeout time.Duration) error {
seconds := max(int(timeout.Round(time.Second).Seconds()), 0)
if err := client.docker.ContainerStop(ctx, containerID, container.StopOptions{Timeout: &seconds}); err != nil {
if cerrdefs.IsNotFound(err) {
return ports.ErrContainerNotFound
}
return fmt.Errorf("stop container %q: %w", containerID, err)
}
return nil
}
// Remove removes the container without forcing kill. A missing
// container is reported as success so callers can treat the operation
// as idempotent.
func (client *Client) Remove(ctx context.Context, containerID string) error {
if err := client.docker.ContainerRemove(ctx, containerID, container.RemoveOptions{}); err != nil {
if cerrdefs.IsNotFound(err) {
return nil
}
return fmt.Errorf("remove container %q: %w", containerID, err)
}
return nil
}
// List returns container summaries that match filter. Empty Labels
// match every container; the reconciler always passes
// `com.galaxy.owner=rtmanager`.
func (client *Client) List(ctx context.Context, filter ports.ListFilter) ([]ports.ContainerSummary, error) {
args := filters.NewArgs()
for key, value := range filter.Labels {
args.Add("label", key+"="+value)
}
summaries, err := client.docker.ContainerList(ctx, container.ListOptions{All: true, Filters: args})
if err != nil {
return nil, fmt.Errorf("list containers: %w", err)
}
out := make([]ports.ContainerSummary, 0, len(summaries))
for _, summary := range summaries {
hostname := ""
if len(summary.Names) > 0 {
hostname = strings.TrimPrefix(summary.Names[0], "/")
}
out = append(out, ports.ContainerSummary{
ID: summary.ID,
ImageRef: summary.Image,
Hostname: hostname,
Labels: copyStringMap(summary.Labels),
Status: string(summary.State),
StartedAt: time.Unix(summary.Created, 0).UTC(),
})
}
return out, nil
}
// EventsListen subscribes to the Docker events stream and returns a
// typed channel of decoded container events plus an asynchronous
// error channel. The caller cancels ctx to terminate the subscription;
// the goroutine closes both channels on termination.
func (client *Client) EventsListen(ctx context.Context) (<-chan ports.DockerEvent, <-chan error, error) {
msgs, sdkErrs := client.docker.Events(ctx, events.ListOptions{})
out := make(chan ports.DockerEvent)
outErrs := make(chan error, 1)
var closeOnce sync.Once
closeAll := func() {
closeOnce.Do(func() {
close(out)
close(outErrs)
})
}
go func() {
defer closeAll()
for {
select {
case <-ctx.Done():
return
case msg, ok := <-msgs:
if !ok {
return
}
if msg.Type != events.ContainerEventType {
continue
}
select {
case <-ctx.Done():
return
case out <- decodeEvent(msg):
}
case err, ok := <-sdkErrs:
if !ok {
return
}
if err == nil {
continue
}
select {
case <-ctx.Done():
case outErrs <- err:
}
return
}
}
}()
return out, outErrs, nil
}
func (client *Client) cleanupAfterFailedStart(containerID string) {
cleanupCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
_ = client.docker.ContainerRemove(cleanupCtx, containerID, container.RemoveOptions{Force: true})
}
func (client *Client) imagePresent(ctx context.Context, ref string) (bool, error) {
if _, err := client.docker.ImageInspect(ctx, ref); err != nil {
if cerrdefs.IsNotFound(err) {
return false, nil
}
return false, fmt.Errorf("inspect image %q: %w", ref, err)
}
return true, nil
}
func (client *Client) runPull(ctx context.Context, ref string) error {
body, err := client.docker.ImagePull(ctx, ref, image.PullOptions{})
if err != nil {
if cerrdefs.IsNotFound(err) {
return ports.ErrImageNotFound
}
return fmt.Errorf("pull image %q: %w", ref, err)
}
defer body.Close()
if _, err := io.Copy(io.Discard, body); err != nil {
return fmt.Errorf("drain pull stream for %q: %w", ref, err)
}
return nil
}
func envMapToSlice(envMap map[string]string) []string {
if len(envMap) == 0 {
return nil
}
out := make([]string, 0, len(envMap))
for key, value := range envMap {
out = append(out, key+"="+value)
}
return out
}
func bindMountsToBinds(mounts []ports.BindMount) []string {
if len(mounts) == 0 {
return nil
}
binds := make([]string, 0, len(mounts))
for _, mount := range mounts {
bind := mount.HostPath + ":" + mount.MountPath
if mount.ReadOnly {
bind += ":ro"
}
binds = append(binds, bind)
}
return binds
}
func parseLogOpts(raw string) map[string]string {
if strings.TrimSpace(raw) == "" {
return nil
}
out := make(map[string]string)
for part := range strings.SplitSeq(raw, ",") {
entry := strings.TrimSpace(part)
if entry == "" {
continue
}
index := strings.IndexByte(entry, '=')
if index <= 0 {
continue
}
out[entry[:index]] = entry[index+1:]
}
if len(out) == 0 {
return nil
}
return out
}
func parseDockerTime(raw string) time.Time {
if raw == "" {
return time.Time{}
}
parsed, err := time.Parse(time.RFC3339Nano, raw)
if err != nil {
return time.Time{}
}
return parsed.UTC()
}
func copyStringMap(in map[string]string) map[string]string {
if in == nil {
return nil
}
out := make(map[string]string, len(in))
maps.Copy(out, in)
return out
}
func decodeEvent(msg events.Message) ports.DockerEvent {
occurredAt := time.Time{}
switch {
case msg.TimeNano != 0:
occurredAt = time.Unix(0, msg.TimeNano).UTC()
case msg.Time != 0:
occurredAt = time.Unix(msg.Time, 0).UTC()
}
exitCode := 0
if raw, ok := msg.Actor.Attributes["exitCode"]; ok {
if value, err := parseExitCode(raw); err == nil {
exitCode = value
}
}
return ports.DockerEvent{
Action: string(msg.Action),
ContainerID: msg.Actor.ID,
Labels: copyStringMap(msg.Actor.Attributes),
ExitCode: exitCode,
OccurredAt: occurredAt,
}
}
func parseExitCode(raw string) (int, error) {
value := 0
for _, r := range raw {
if r < '0' || r > '9' {
return 0, fmt.Errorf("non-numeric exit code %q", raw)
}
value = value*10 + int(r-'0')
}
return value, nil
}
// Compile-time assertion: Client implements ports.DockerClient.
var _ ports.DockerClient = (*Client)(nil)
@@ -0,0 +1,561 @@
package docker
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"sync/atomic"
"testing"
"time"
dockerclient "github.com/docker/docker/client"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"galaxy/rtmanager/internal/ports"
)
// newTestClient wires an httptest.Server backed Docker SDK client to our
// adapter. The handler is invoked for every Docker API request issued
// during the test; tests assert on path and method to route the
// response.
func newTestClient(t *testing.T, handler http.HandlerFunc) *Client {
t.Helper()
server := httptest.NewServer(handler)
t.Cleanup(server.Close)
docker, err := dockerclient.NewClientWithOpts(
dockerclient.WithHost(server.URL),
dockerclient.WithHTTPClient(server.Client()),
dockerclient.WithVersion("1.45"),
)
require.NoError(t, err)
t.Cleanup(func() { _ = docker.Close() })
client, err := NewClient(Config{
Docker: docker,
LogDriver: "json-file",
LogOpts: "max-size=1m,max-file=3",
Clock: func() time.Time { return time.Date(2026, time.April, 27, 12, 0, 0, 0, time.UTC) },
})
require.NoError(t, err)
return client
}
func writeJSON(t *testing.T, w http.ResponseWriter, status int, body any) {
t.Helper()
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
require.NoError(t, json.NewEncoder(w).Encode(body))
}
func writeNotFound(t *testing.T, w http.ResponseWriter, msg string) {
t.Helper()
writeJSON(t, w, http.StatusNotFound, map[string]string{"message": msg})
}
// Docker SDK uses /v1.45 prefix when client is pinned to API 1.45.
func dockerPath(suffix string) string {
return "/v1.45" + suffix
}
func TestNewClientValidatesConfig(t *testing.T) {
t.Run("nil docker client", func(t *testing.T) {
_, err := NewClient(Config{LogDriver: "json-file"})
require.Error(t, err)
assert.Contains(t, err.Error(), "nil docker client")
})
t.Run("empty log driver", func(t *testing.T) {
docker, err := dockerclient.NewClientWithOpts(dockerclient.WithHost("tcp://127.0.0.1:65535"))
require.NoError(t, err)
t.Cleanup(func() { _ = docker.Close() })
_, err = NewClient(Config{Docker: docker, LogDriver: " "})
require.Error(t, err)
assert.Contains(t, err.Error(), "log driver")
})
}
func TestEnsureNetwork(t *testing.T) {
t.Run("present", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, http.MethodGet, r.Method)
require.Equal(t, dockerPath("/networks/galaxy-net"), r.URL.Path)
writeJSON(t, w, http.StatusOK, map[string]any{"Id": "net-1", "Name": "galaxy-net"})
})
require.NoError(t, client.EnsureNetwork(context.Background(), "galaxy-net"))
})
t.Run("missing", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
writeNotFound(t, w, "no such network")
})
err := client.EnsureNetwork(context.Background(), "missing")
require.Error(t, err)
assert.ErrorIs(t, err, ports.ErrNetworkMissing)
})
t.Run("transport error", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "boom", http.StatusInternalServerError)
})
err := client.EnsureNetwork(context.Background(), "x")
require.Error(t, err)
assert.NotErrorIs(t, err, ports.ErrNetworkMissing)
})
}
func TestInspectImage(t *testing.T) {
t.Run("present", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, http.MethodGet, r.Method)
require.Equal(t, dockerPath("/images/galaxy/game:test/json"), r.URL.Path)
writeJSON(t, w, http.StatusOK, map[string]any{
"Id": "sha256:abc",
"Config": map[string]any{
"Labels": map[string]string{
"com.galaxy.cpu_quota": "1.0",
"com.galaxy.memory": "512m",
"com.galaxy.pids_limit": "512",
},
},
})
})
got, err := client.InspectImage(context.Background(), "galaxy/game:test")
require.NoError(t, err)
assert.Equal(t, "galaxy/game:test", got.Ref)
assert.Equal(t, "1.0", got.Labels["com.galaxy.cpu_quota"])
assert.Equal(t, "512m", got.Labels["com.galaxy.memory"])
})
t.Run("not found", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
writeNotFound(t, w, "no such image")
})
_, err := client.InspectImage(context.Background(), "galaxy/missing:tag")
require.Error(t, err)
assert.ErrorIs(t, err, ports.ErrImageNotFound)
})
}
func TestInspectContainer(t *testing.T) {
t.Run("present", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, http.MethodGet, r.Method)
require.Equal(t, dockerPath("/containers/cont-1/json"), r.URL.Path)
writeJSON(t, w, http.StatusOK, map[string]any{
"Id": "cont-1",
"RestartCount": 2,
"State": map[string]any{
"Status": "running",
"OOMKilled": false,
"ExitCode": 0,
"StartedAt": "2026-04-27T11:00:00.5Z",
"FinishedAt": "0001-01-01T00:00:00Z",
"Health": map[string]any{"Status": "healthy"},
},
"Config": map[string]any{
"Image": "galaxy/game:test",
"Hostname": "galaxy-game-game-1",
"Labels": map[string]string{
"com.galaxy.owner": "rtmanager",
"com.galaxy.game_id": "game-1",
},
},
})
})
got, err := client.InspectContainer(context.Background(), "cont-1")
require.NoError(t, err)
assert.Equal(t, "cont-1", got.ID)
assert.Equal(t, 2, got.RestartCount)
assert.Equal(t, "running", got.Status)
assert.Equal(t, "healthy", got.Health)
assert.Equal(t, "galaxy/game:test", got.ImageRef)
assert.Equal(t, "galaxy-game-game-1", got.Hostname)
assert.Equal(t, "rtmanager", got.Labels["com.galaxy.owner"])
assert.False(t, got.StartedAt.IsZero())
})
t.Run("not found", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
writeNotFound(t, w, "no such container")
})
_, err := client.InspectContainer(context.Background(), "missing")
require.Error(t, err)
assert.ErrorIs(t, err, ports.ErrContainerNotFound)
})
}
func TestPullImagePolicies(t *testing.T) {
t.Run("if_missing/found skips pull", func(t *testing.T) {
hits := struct {
inspect atomic.Int32
pull atomic.Int32
}{}
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
switch {
case strings.HasSuffix(r.URL.Path, "/json") && r.Method == http.MethodGet:
hits.inspect.Add(1)
writeJSON(t, w, http.StatusOK, map[string]any{"Id": "sha256:x"})
case strings.Contains(r.URL.Path, "/images/create"):
hits.pull.Add(1)
w.WriteHeader(http.StatusOK)
default:
t.Fatalf("unexpected request %s %s", r.Method, r.URL.Path)
}
})
require.NoError(t, client.PullImage(context.Background(), "alpine:3.21", ports.PullPolicyIfMissing))
assert.Equal(t, int32(1), hits.inspect.Load())
assert.Equal(t, int32(0), hits.pull.Load())
})
t.Run("if_missing/absent triggers pull", func(t *testing.T) {
hits := struct {
inspect atomic.Int32
pull atomic.Int32
}{}
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
switch {
case strings.HasSuffix(r.URL.Path, "/json") && r.Method == http.MethodGet:
hits.inspect.Add(1)
writeNotFound(t, w, "no such image")
case strings.Contains(r.URL.Path, "/images/create"):
hits.pull.Add(1)
w.WriteHeader(http.StatusOK)
_, _ = io.WriteString(w, `{"status":"Pulling..."}`+"\n"+`{"status":"Done"}`+"\n")
default:
t.Fatalf("unexpected request %s %s", r.Method, r.URL.Path)
}
})
require.NoError(t, client.PullImage(context.Background(), "alpine:3.21", ports.PullPolicyIfMissing))
assert.Equal(t, int32(1), hits.inspect.Load())
assert.Equal(t, int32(1), hits.pull.Load())
})
t.Run("always pulls regardless of cache", func(t *testing.T) {
var pullCount atomic.Int32
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
require.Contains(t, r.URL.Path, "/images/create")
pullCount.Add(1)
w.WriteHeader(http.StatusOK)
})
require.NoError(t, client.PullImage(context.Background(), "alpine:3.21", ports.PullPolicyAlways))
assert.Equal(t, int32(1), pullCount.Load())
})
t.Run("never with absent image", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, http.MethodGet, r.Method)
writeNotFound(t, w, "no such image")
})
err := client.PullImage(context.Background(), "alpine:3.21", ports.PullPolicyNever)
require.Error(t, err)
assert.ErrorIs(t, err, ports.ErrImageNotFound)
})
t.Run("never with present image", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, http.MethodGet, r.Method)
writeJSON(t, w, http.StatusOK, map[string]any{"Id": "x"})
})
require.NoError(t, client.PullImage(context.Background(), "alpine:3.21", ports.PullPolicyNever))
})
t.Run("unknown policy", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
t.Fatal("must not call docker on unknown policy")
})
err := client.PullImage(context.Background(), "alpine:3.21", ports.PullPolicy("invalid"))
require.Error(t, err)
})
}
func TestRunHappyPath(t *testing.T) {
calls := struct {
create atomic.Int32
start atomic.Int32
remove atomic.Int32
}{}
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodPost && strings.HasSuffix(r.URL.Path, "/containers/create"):
calls.create.Add(1)
require.Equal(t, "galaxy-game-game-1", r.URL.Query().Get("name"))
writeJSON(t, w, http.StatusCreated, map[string]any{"Id": "cont-new", "Warnings": []string{}})
case r.Method == http.MethodPost && strings.HasSuffix(r.URL.Path, "/start"):
calls.start.Add(1)
require.Equal(t, dockerPath("/containers/cont-new/start"), r.URL.Path)
w.WriteHeader(http.StatusNoContent)
case r.Method == http.MethodDelete && strings.HasPrefix(r.URL.Path, dockerPath("/containers/")):
calls.remove.Add(1)
w.WriteHeader(http.StatusNoContent)
default:
t.Fatalf("unexpected %s %s", r.Method, r.URL.Path)
}
})
result, err := client.Run(context.Background(), ports.RunSpec{
Name: "galaxy-game-game-1",
Image: "galaxy/game:test",
Hostname: "galaxy-game-game-1",
Network: "galaxy-net",
Env: map[string]string{
"GAME_STATE_PATH": "/var/lib/galaxy-game",
"STORAGE_PATH": "/var/lib/galaxy-game",
},
Labels: map[string]string{"com.galaxy.owner": "rtmanager"},
LogDriver: "json-file",
BindMounts: []ports.BindMount{
{HostPath: "/var/lib/galaxy/games/game-1", MountPath: "/var/lib/galaxy-game"},
},
CPUQuota: 1.0,
Memory: "512m",
PIDsLimit: 512,
})
require.NoError(t, err)
assert.Equal(t, "cont-new", result.ContainerID)
assert.Equal(t, "http://galaxy-game-game-1:8080", result.EngineEndpoint)
assert.False(t, result.StartedAt.IsZero())
assert.Equal(t, int32(1), calls.create.Load())
assert.Equal(t, int32(1), calls.start.Load())
assert.Equal(t, int32(0), calls.remove.Load())
}
func TestRunStartFailureRemovesContainer(t *testing.T) {
calls := struct {
create atomic.Int32
start atomic.Int32
remove atomic.Int32
}{}
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
switch {
case r.Method == http.MethodPost && strings.HasSuffix(r.URL.Path, "/containers/create"):
calls.create.Add(1)
writeJSON(t, w, http.StatusCreated, map[string]any{"Id": "cont-x"})
case r.Method == http.MethodPost && strings.HasSuffix(r.URL.Path, "/start"):
calls.start.Add(1)
http.Error(w, `{"message":"insufficient host resources"}`, http.StatusInternalServerError)
case r.Method == http.MethodDelete && strings.HasPrefix(r.URL.Path, dockerPath("/containers/cont-x")):
calls.remove.Add(1)
require.Equal(t, "1", r.URL.Query().Get("force"))
w.WriteHeader(http.StatusNoContent)
default:
t.Fatalf("unexpected %s %s", r.Method, r.URL.Path)
}
})
_, err := client.Run(context.Background(), ports.RunSpec{
Name: "x",
Image: "img",
Hostname: "x",
Network: "n",
LogDriver: "json-file",
CPUQuota: 1.0,
Memory: "64m",
PIDsLimit: 64,
})
require.Error(t, err)
assert.Equal(t, int32(1), calls.create.Load())
assert.Equal(t, int32(1), calls.start.Load())
assert.Equal(t, int32(1), calls.remove.Load(), "adapter must roll back the partial container")
}
func TestRunRejectsInvalidSpec(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
t.Fatal("must not contact docker on invalid spec")
})
_, err := client.Run(context.Background(), ports.RunSpec{Name: "x"})
require.Error(t, err)
assert.Contains(t, err.Error(), "image must not be empty")
}
func TestStop(t *testing.T) {
t.Run("graceful stop", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, http.MethodPost, r.Method)
require.Equal(t, dockerPath("/containers/cont-1/stop"), r.URL.Path)
require.Equal(t, "30", r.URL.Query().Get("t"))
w.WriteHeader(http.StatusNoContent)
})
require.NoError(t, client.Stop(context.Background(), "cont-1", 30*time.Second))
})
t.Run("missing container", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
writeNotFound(t, w, "no such container")
})
err := client.Stop(context.Background(), "missing", 30*time.Second)
assert.ErrorIs(t, err, ports.ErrContainerNotFound)
})
t.Run("negative timeout normalised to zero", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, "0", r.URL.Query().Get("t"))
w.WriteHeader(http.StatusNoContent)
})
require.NoError(t, client.Stop(context.Background(), "x", -5*time.Second))
})
}
func TestRemoveIsIdempotent(t *testing.T) {
t.Run("present", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, http.MethodDelete, r.Method)
w.WriteHeader(http.StatusNoContent)
})
require.NoError(t, client.Remove(context.Background(), "cont-1"))
})
t.Run("missing", func(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
writeNotFound(t, w, "no such container")
})
require.NoError(t, client.Remove(context.Background(), "missing"))
})
}
func TestListAppliesLabelFilter(t *testing.T) {
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, http.MethodGet, r.Method)
require.Equal(t, dockerPath("/containers/json"), r.URL.Path)
require.Equal(t, "1", r.URL.Query().Get("all"))
filtersRaw := r.URL.Query().Get("filters")
require.NotEmpty(t, filtersRaw)
var args map[string]map[string]bool
require.NoError(t, json.Unmarshal([]byte(filtersRaw), &args))
require.True(t, args["label"]["com.galaxy.owner=rtmanager"])
writeJSON(t, w, http.StatusOK, []map[string]any{
{
"Id": "cont-a",
"Image": "galaxy/game:1.2.3",
"Names": []string{"/galaxy-game-game-1"},
"Labels": map[string]string{"com.galaxy.owner": "rtmanager"},
"State": "running",
"Created": int64(1700000000),
},
})
})
got, err := client.List(context.Background(), ports.ListFilter{
Labels: map[string]string{"com.galaxy.owner": "rtmanager"},
})
require.NoError(t, err)
require.Len(t, got, 1)
assert.Equal(t, "cont-a", got[0].ID)
assert.Equal(t, "galaxy/game:1.2.3", got[0].ImageRef)
assert.Equal(t, "galaxy-game-game-1", got[0].Hostname)
assert.Equal(t, "running", got[0].Status)
assert.False(t, got[0].StartedAt.IsZero())
assert.Equal(t, "rtmanager", got[0].Labels["com.galaxy.owner"])
}
func TestEventsListenDecodesContainerEvents(t *testing.T) {
mu := make(chan struct{})
client := newTestClient(t, func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, http.MethodGet, r.Method)
require.Equal(t, dockerPath("/events"), r.URL.Path)
flusher, ok := w.(http.Flusher)
require.True(t, ok)
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
flusher.Flush()
// Container start event
writeEvent(t, w, "container", "start", "cont-1", map[string]string{
"image": "galaxy/game:1.2.3",
"name": "galaxy-game-game-1",
"com.galaxy.game_id": "game-1",
}, time.Now())
flusher.Flush()
// Container die event with exit code 137
writeEvent(t, w, "container", "die", "cont-1", map[string]string{
"exitCode": "137",
}, time.Now())
flusher.Flush()
// Image event must be filtered out by adapter
writeEvent(t, w, "image", "pull", "img", nil, time.Now())
flusher.Flush()
<-mu
})
defer close(mu)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
events, _, err := client.EventsListen(ctx)
require.NoError(t, err)
got := []ports.DockerEvent{}
deadline := time.After(2 * time.Second)
for len(got) < 2 {
select {
case ev, ok := <-events:
if !ok {
t.Fatalf("events channel closed; got %d events", len(got))
}
got = append(got, ev)
case <-deadline:
t.Fatalf("did not receive expected events; have %d", len(got))
}
}
require.Len(t, got, 2)
assert.Equal(t, "start", got[0].Action)
assert.Equal(t, "cont-1", got[0].ContainerID)
assert.Equal(t, "game-1", got[0].Labels["com.galaxy.game_id"])
assert.Equal(t, "die", got[1].Action)
assert.Equal(t, 137, got[1].ExitCode)
}
func writeEvent(t *testing.T, w io.Writer, eventType, action, id string, attributes map[string]string, when time.Time) {
t.Helper()
payload := map[string]any{
"Type": eventType,
"Action": action,
"Actor": map[string]any{"ID": id, "Attributes": attributes},
"time": when.Unix(),
"timeNano": when.UnixNano(),
}
data, err := json.Marshal(payload)
require.NoError(t, err)
_, err = fmt.Fprintln(w, string(data))
require.NoError(t, err)
}
// Sanity: parsing helpers.
func TestParseLogOpts(t *testing.T) {
got := parseLogOpts("max-size=1m,max-file=3, ,empty=,=novalue")
assert.Equal(t, "1m", got["max-size"])
assert.Equal(t, "3", got["max-file"])
assert.Equal(t, "", got["empty"])
_, hasNovalue := got["=novalue"]
assert.False(t, hasNovalue)
}
func TestParseDockerTime(t *testing.T) {
assert.True(t, parseDockerTime("").IsZero())
assert.True(t, parseDockerTime("not-a-date").IsZero())
parsed := parseDockerTime("2026-04-27T11:00:00.5Z")
assert.False(t, parsed.IsZero())
assert.Equal(t, time.UTC, parsed.Location())
}
func TestEnvMapToSliceDeterministicLength(t *testing.T) {
got := envMapToSlice(map[string]string{"A": "1", "B": "2"})
assert.Len(t, got, 2)
for _, kv := range got {
assert.Contains(t, []string{"A=1", "B=2"}, kv)
}
assert.Nil(t, envMapToSlice(nil))
}
// Compile-time sanity: make sure errors.Is wiring stays intact.
func TestSentinelErrorsAreDistinct(t *testing.T) {
require.True(t, errors.Is(ports.ErrNetworkMissing, ports.ErrNetworkMissing))
require.False(t, errors.Is(ports.ErrNetworkMissing, ports.ErrImageNotFound))
}
func TestURLPathEscapingForCharacters(t *testing.T) {
// Ensure the SDK URL path encodes special characters; the adapter
// passes raw inputs through and lets the SDK escape.
encoded := url.PathEscape("game-1")
assert.Equal(t, "game-1", encoded)
}
@@ -0,0 +1,175 @@
// Code generated by MockGen. DO NOT EDIT.
// Source: galaxy/rtmanager/internal/ports (interfaces: DockerClient)
//
// Generated by this command:
//
// mockgen -destination=../adapters/docker/mocks/mock_dockerclient.go -package=mocks galaxy/rtmanager/internal/ports DockerClient
//
// Package mocks is a generated GoMock package.
package mocks
import (
context "context"
ports "galaxy/rtmanager/internal/ports"
reflect "reflect"
time "time"
gomock "go.uber.org/mock/gomock"
)
// MockDockerClient is a mock of DockerClient interface.
type MockDockerClient struct {
ctrl *gomock.Controller
recorder *MockDockerClientMockRecorder
isgomock struct{}
}
// MockDockerClientMockRecorder is the mock recorder for MockDockerClient.
type MockDockerClientMockRecorder struct {
mock *MockDockerClient
}
// NewMockDockerClient creates a new mock instance.
func NewMockDockerClient(ctrl *gomock.Controller) *MockDockerClient {
mock := &MockDockerClient{ctrl: ctrl}
mock.recorder = &MockDockerClientMockRecorder{mock}
return mock
}
// EXPECT returns an object that allows the caller to indicate expected use.
func (m *MockDockerClient) EXPECT() *MockDockerClientMockRecorder {
return m.recorder
}
// EnsureNetwork mocks base method.
func (m *MockDockerClient) EnsureNetwork(ctx context.Context, name string) error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "EnsureNetwork", ctx, name)
ret0, _ := ret[0].(error)
return ret0
}
// EnsureNetwork indicates an expected call of EnsureNetwork.
func (mr *MockDockerClientMockRecorder) EnsureNetwork(ctx, name any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EnsureNetwork", reflect.TypeOf((*MockDockerClient)(nil).EnsureNetwork), ctx, name)
}
// EventsListen mocks base method.
func (m *MockDockerClient) EventsListen(ctx context.Context) (<-chan ports.DockerEvent, <-chan error, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "EventsListen", ctx)
ret0, _ := ret[0].(<-chan ports.DockerEvent)
ret1, _ := ret[1].(<-chan error)
ret2, _ := ret[2].(error)
return ret0, ret1, ret2
}
// EventsListen indicates an expected call of EventsListen.
func (mr *MockDockerClientMockRecorder) EventsListen(ctx any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EventsListen", reflect.TypeOf((*MockDockerClient)(nil).EventsListen), ctx)
}
// InspectContainer mocks base method.
func (m *MockDockerClient) InspectContainer(ctx context.Context, containerID string) (ports.ContainerInspect, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "InspectContainer", ctx, containerID)
ret0, _ := ret[0].(ports.ContainerInspect)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// InspectContainer indicates an expected call of InspectContainer.
func (mr *MockDockerClientMockRecorder) InspectContainer(ctx, containerID any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InspectContainer", reflect.TypeOf((*MockDockerClient)(nil).InspectContainer), ctx, containerID)
}
// InspectImage mocks base method.
func (m *MockDockerClient) InspectImage(ctx context.Context, ref string) (ports.ImageInspect, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "InspectImage", ctx, ref)
ret0, _ := ret[0].(ports.ImageInspect)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// InspectImage indicates an expected call of InspectImage.
func (mr *MockDockerClientMockRecorder) InspectImage(ctx, ref any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InspectImage", reflect.TypeOf((*MockDockerClient)(nil).InspectImage), ctx, ref)
}
// List mocks base method.
func (m *MockDockerClient) List(ctx context.Context, filter ports.ListFilter) ([]ports.ContainerSummary, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "List", ctx, filter)
ret0, _ := ret[0].([]ports.ContainerSummary)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// List indicates an expected call of List.
func (mr *MockDockerClientMockRecorder) List(ctx, filter any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "List", reflect.TypeOf((*MockDockerClient)(nil).List), ctx, filter)
}
// PullImage mocks base method.
func (m *MockDockerClient) PullImage(ctx context.Context, ref string, policy ports.PullPolicy) error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "PullImage", ctx, ref, policy)
ret0, _ := ret[0].(error)
return ret0
}
// PullImage indicates an expected call of PullImage.
func (mr *MockDockerClientMockRecorder) PullImage(ctx, ref, policy any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PullImage", reflect.TypeOf((*MockDockerClient)(nil).PullImage), ctx, ref, policy)
}
// Remove mocks base method.
func (m *MockDockerClient) Remove(ctx context.Context, containerID string) error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "Remove", ctx, containerID)
ret0, _ := ret[0].(error)
return ret0
}
// Remove indicates an expected call of Remove.
func (mr *MockDockerClientMockRecorder) Remove(ctx, containerID any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Remove", reflect.TypeOf((*MockDockerClient)(nil).Remove), ctx, containerID)
}
// Run mocks base method.
func (m *MockDockerClient) Run(ctx context.Context, spec ports.RunSpec) (ports.RunResult, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "Run", ctx, spec)
ret0, _ := ret[0].(ports.RunResult)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// Run indicates an expected call of Run.
func (mr *MockDockerClientMockRecorder) Run(ctx, spec any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Run", reflect.TypeOf((*MockDockerClient)(nil).Run), ctx, spec)
}
// Stop mocks base method.
func (m *MockDockerClient) Stop(ctx context.Context, containerID string, timeout time.Duration) error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "Stop", ctx, containerID, timeout)
ret0, _ := ret[0].(error)
return ret0
}
// Stop indicates an expected call of Stop.
func (mr *MockDockerClientMockRecorder) Stop(ctx, containerID, timeout any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Stop", reflect.TypeOf((*MockDockerClient)(nil).Stop), ctx, containerID, timeout)
}
@@ -0,0 +1,11 @@
package mocks
import (
"galaxy/rtmanager/internal/ports"
)
// Compile-time assertion that the generated mock satisfies the port
// interface. Future signature drift between the port and the generated
// file fails the build at this line, which is more actionable than a
// runtime check from a service test.
var _ ports.DockerClient = (*MockDockerClient)(nil)
@@ -0,0 +1,202 @@
// Package docker smoke tests exercise the production adapter against a
// real Docker daemon. The tests skip when no Docker socket is reachable
// (`skipUnlessDockerAvailable`), so they run in the default
// `go test ./...` pass without a build tag.
package docker
import (
"context"
"crypto/rand"
"encoding/hex"
"errors"
"os"
"testing"
"time"
"github.com/docker/docker/api/types/network"
dockerclient "github.com/docker/docker/client"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"galaxy/rtmanager/internal/ports"
)
const (
smokeImage = "alpine:3.21"
smokeNetPrefix = "rtmanager-smoke-"
)
func skipUnlessDockerAvailable(t *testing.T) {
t.Helper()
if os.Getenv("DOCKER_HOST") == "" {
if _, err := os.Stat("/var/run/docker.sock"); err != nil {
t.Skip("docker daemon not available; set DOCKER_HOST or expose /var/run/docker.sock")
}
}
}
func newSmokeAdapter(t *testing.T) (*Client, *dockerclient.Client) {
t.Helper()
docker, err := dockerclient.NewClientWithOpts(dockerclient.FromEnv, dockerclient.WithAPIVersionNegotiation())
require.NoError(t, err)
t.Cleanup(func() { _ = docker.Close() })
pingCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if _, err := docker.Ping(pingCtx); err != nil {
// A reachable socket path may still be unusable in sandboxed
// environments (e.g., macOS sandbox blocking the colima socket).
// The smoke test can only run when the daemon answers ping, so a
// permission-denied / connection-refused error is a runtime
// "Docker unavailable" signal and skips the test.
t.Skipf("docker daemon unavailable: %v", err)
}
adapter, err := NewClient(Config{
Docker: docker,
LogDriver: "json-file",
})
require.NoError(t, err)
return adapter, docker
}
func uniqueSuffix(t *testing.T) string {
t.Helper()
buf := make([]byte, 4)
_, err := rand.Read(buf)
require.NoError(t, err)
return hex.EncodeToString(buf)
}
// TestSmokeFullLifecycle runs the adapter through every method against
// the real Docker daemon: ensure-network → pull → run → events →
// stop → remove.
func TestSmokeFullLifecycle(t *testing.T) {
skipUnlessDockerAvailable(t)
adapter, docker := newSmokeAdapter(t)
suffix := uniqueSuffix(t)
netName := smokeNetPrefix + suffix
containerName := "rtmanager-smoke-cont-" + suffix
// Step 1 — provision a temporary user-defined bridge network.
createCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
_, err := docker.NetworkCreate(createCtx, netName, network.CreateOptions{Driver: "bridge"})
require.NoError(t, err)
t.Cleanup(func() {
removeCtx, removeCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer removeCancel()
_ = docker.NetworkRemove(removeCtx, netName)
})
// Step 2 — EnsureNetwork present and missing paths.
require.NoError(t, adapter.EnsureNetwork(createCtx, netName))
missingErr := adapter.EnsureNetwork(createCtx, "rtmanager-smoke-missing-"+suffix)
require.Error(t, missingErr)
assert.ErrorIs(t, missingErr, ports.ErrNetworkMissing)
// Step 3 — pull alpine via the configured policy.
pullCtx, pullCancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer pullCancel()
require.NoError(t, adapter.PullImage(pullCtx, smokeImage, ports.PullPolicyIfMissing))
// Step 4 — subscribe to events before running the container so we
// observe the start event.
listenCtx, listenCancel := context.WithCancel(context.Background())
defer listenCancel()
events, listenErrs, err := adapter.EventsListen(listenCtx)
require.NoError(t, err)
// Step 5 — run a tiny container that sleeps so we can observe it.
stateDir := t.TempDir()
runCtx, runCancel := context.WithTimeout(context.Background(), 60*time.Second)
defer runCancel()
result, err := adapter.Run(runCtx, ports.RunSpec{
Name: containerName,
Image: smokeImage,
Hostname: "smoke-" + suffix,
Network: netName,
Env: map[string]string{
"GAME_STATE_PATH": "/tmp/state",
"STORAGE_PATH": "/tmp/state",
},
Labels: map[string]string{
"com.galaxy.owner": "rtmanager",
"com.galaxy.kind": "smoke",
},
BindMounts: []ports.BindMount{
{HostPath: stateDir, MountPath: "/tmp/state"},
},
LogDriver: "json-file",
CPUQuota: 0.5,
Memory: "64m",
PIDsLimit: 32,
Cmd: []string{"/bin/sh", "-c", "sleep 60"},
})
require.NoError(t, err)
t.Cleanup(func() {
removeCtx, removeCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer removeCancel()
_ = adapter.Remove(removeCtx, result.ContainerID)
})
require.NotEmpty(t, result.ContainerID)
require.Equal(t, "http://smoke-"+suffix+":8080", result.EngineEndpoint)
// Step 6 — wait for a `start` event for the new container id.
startObserved := waitForEvent(t, events, listenErrs, "start", result.ContainerID, 15*time.Second)
require.True(t, startObserved, "did not observe start event for container %s", result.ContainerID)
// Step 7 — InspectContainer returns running state.
inspectCtx, inspectCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer inspectCancel()
inspect, err := adapter.InspectContainer(inspectCtx, result.ContainerID)
require.NoError(t, err)
assert.Equal(t, "running", inspect.Status)
// Step 8 — Stop, then Remove, then InspectContainer must report
// not found.
stopCtx, stopCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer stopCancel()
require.NoError(t, adapter.Stop(stopCtx, result.ContainerID, 5*time.Second))
require.NoError(t, adapter.Remove(stopCtx, result.ContainerID))
if _, err := adapter.InspectContainer(stopCtx, result.ContainerID); !errors.Is(err, ports.ErrContainerNotFound) {
t.Fatalf("expected ErrContainerNotFound, got %v", err)
}
// Step 9 — terminate the events subscription cleanly.
listenCancel()
select {
case _, ok := <-events:
_ = ok
case <-time.After(5 * time.Second):
t.Log("events channel did not close within timeout (best-effort)")
}
}
func waitForEvent(t *testing.T, events <-chan ports.DockerEvent, errs <-chan error, action, containerID string, timeout time.Duration) bool {
t.Helper()
deadline := time.After(timeout)
for {
select {
case ev, ok := <-events:
if !ok {
return false
}
if ev.Action == action && ev.ContainerID == containerID {
return true
}
case err := <-errs:
if err != nil {
t.Fatalf("events stream error: %v", err)
}
case <-deadline:
return false
}
}
}
@@ -0,0 +1,165 @@
// Package healtheventspublisher provides the Redis-Streams-backed
// publisher for `runtime:health_events`. Every Publish call upserts the
// latest `health_snapshots` row before XADDing the event so consumers
// observing the snapshot store can never lag the event stream by more
// than the duration of one network call.
//
// The publisher is shared across `ports.HealthEventPublisher` callers:
// the start service emits `container_started`; the probe, inspect, and
// events-listener workers emit the rest. The publisher's surface is
// stable across all of them.
package healtheventspublisher
import (
"context"
"encoding/json"
"errors"
"fmt"
"strconv"
"galaxy/rtmanager/internal/domain/health"
"galaxy/rtmanager/internal/ports"
"github.com/redis/go-redis/v9"
)
// emptyDetails is the canonical JSON payload installed when the caller
// supplies an empty Details slice. Matches the SQL DEFAULT for
// `health_snapshots.details`.
const emptyDetails = "{}"
// Wire field names used by the Redis Streams payload. Frozen by
// `rtmanager/api/runtime-health-asyncapi.yaml`; renaming any of them
// breaks consumers.
const (
fieldGameID = "game_id"
fieldContainerID = "container_id"
fieldEventType = "event_type"
fieldOccurredAtMS = "occurred_at_ms"
fieldDetails = "details"
)
// Config groups the dependencies and stream name required to construct
// a Publisher.
type Config struct {
// Client appends entries to the Redis Stream. Must be non-nil.
Client *redis.Client
// Snapshots upserts the latest health snapshot. Must be non-nil.
Snapshots ports.HealthSnapshotStore
// Stream stores the Redis Stream key events are published to (e.g.
// `runtime:health_events`). Must not be empty.
Stream string
}
// Publisher implements `ports.HealthEventPublisher` on top of a shared
// Redis client and the production `health_snapshots` store.
type Publisher struct {
client *redis.Client
snapshots ports.HealthSnapshotStore
stream string
}
// NewPublisher constructs one Publisher from cfg. Validation errors
// surface the missing collaborator verbatim.
func NewPublisher(cfg Config) (*Publisher, error) {
if cfg.Client == nil {
return nil, errors.New("new rtmanager health events publisher: nil redis client")
}
if cfg.Snapshots == nil {
return nil, errors.New("new rtmanager health events publisher: nil snapshot store")
}
if cfg.Stream == "" {
return nil, errors.New("new rtmanager health events publisher: stream must not be empty")
}
return &Publisher{
client: cfg.Client,
snapshots: cfg.Snapshots,
stream: cfg.Stream,
}, nil
}
// Publish upserts the matching health_snapshots row and then XADDs the
// envelope to the configured Redis Stream. Both side effects are
// required; the snapshot upsert runs first so a successful Publish
// always leaves the snapshot store at least as fresh as the stream.
func (publisher *Publisher) Publish(ctx context.Context, envelope ports.HealthEventEnvelope) error {
if publisher == nil || publisher.client == nil || publisher.snapshots == nil {
return errors.New("publish health event: nil publisher")
}
if ctx == nil {
return errors.New("publish health event: nil context")
}
if err := envelope.Validate(); err != nil {
return fmt.Errorf("publish health event: %w", err)
}
details := envelope.Details
if len(details) == 0 {
details = json.RawMessage(emptyDetails)
}
status, source := snapshotMappingFor(envelope.EventType)
snapshot := health.HealthSnapshot{
GameID: envelope.GameID,
ContainerID: envelope.ContainerID,
Status: status,
Source: source,
Details: details,
ObservedAt: envelope.OccurredAt.UTC(),
}
if err := publisher.snapshots.Upsert(ctx, snapshot); err != nil {
return fmt.Errorf("publish health event: upsert snapshot: %w", err)
}
occurredAtMS := envelope.OccurredAt.UTC().UnixMilli()
values := map[string]any{
fieldGameID: envelope.GameID,
fieldContainerID: envelope.ContainerID,
fieldEventType: string(envelope.EventType),
fieldOccurredAtMS: strconv.FormatInt(occurredAtMS, 10),
fieldDetails: string(details),
}
if err := publisher.client.XAdd(ctx, &redis.XAddArgs{
Stream: publisher.stream,
Values: values,
}).Err(); err != nil {
return fmt.Errorf("publish health event: xadd: %w", err)
}
return nil
}
// snapshotMappingFor returns the SnapshotStatus and SnapshotSource that
// match eventType per `rtmanager/README.md §Health Monitoring`.
//
// `container_started` is observed when the start service successfully
// runs the container; the snapshot collapses it to `healthy`.
// `probe_recovered` collapses to `healthy` per
// `rtmanager/docs/domain-and-ports.md` §4: it does not have its own
// snapshot status; the next observation overwrites the prior
// `probe_failed` with `healthy`.
func snapshotMappingFor(eventType health.EventType) (health.SnapshotStatus, health.SnapshotSource) {
switch eventType {
case health.EventTypeContainerStarted:
return health.SnapshotStatusHealthy, health.SnapshotSourceDockerEvent
case health.EventTypeContainerExited:
return health.SnapshotStatusExited, health.SnapshotSourceDockerEvent
case health.EventTypeContainerOOM:
return health.SnapshotStatusOOM, health.SnapshotSourceDockerEvent
case health.EventTypeContainerDisappeared:
return health.SnapshotStatusContainerDisappeared, health.SnapshotSourceDockerEvent
case health.EventTypeInspectUnhealthy:
return health.SnapshotStatusInspectUnhealthy, health.SnapshotSourceInspect
case health.EventTypeProbeFailed:
return health.SnapshotStatusProbeFailed, health.SnapshotSourceProbe
case health.EventTypeProbeRecovered:
return health.SnapshotStatusHealthy, health.SnapshotSourceProbe
default:
return "", ""
}
}
// Compile-time assertion: Publisher implements
// ports.HealthEventPublisher.
var _ ports.HealthEventPublisher = (*Publisher)(nil)
@@ -0,0 +1,197 @@
package healtheventspublisher_test
import (
"context"
"encoding/json"
"strconv"
"sync"
"testing"
"time"
"galaxy/rtmanager/internal/adapters/healtheventspublisher"
"galaxy/rtmanager/internal/domain/health"
"galaxy/rtmanager/internal/ports"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// fakeSnapshots captures Upsert invocations for assertions.
type fakeSnapshots struct {
mu sync.Mutex
upserts []health.HealthSnapshot
upsertErr error
}
func (s *fakeSnapshots) Upsert(_ context.Context, snapshot health.HealthSnapshot) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.upsertErr != nil {
return s.upsertErr
}
s.upserts = append(s.upserts, snapshot)
return nil
}
func (s *fakeSnapshots) Get(_ context.Context, _ string) (health.HealthSnapshot, error) {
return health.HealthSnapshot{}, nil
}
func newPublisher(t *testing.T, snapshots ports.HealthSnapshotStore) (*healtheventspublisher.Publisher, *miniredis.Miniredis, *redis.Client) {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = client.Close() })
publisher, err := healtheventspublisher.NewPublisher(healtheventspublisher.Config{
Client: client,
Snapshots: snapshots,
Stream: "runtime:health_events",
})
require.NoError(t, err)
return publisher, server, client
}
func TestNewPublisherRejectsMissingCollaborators(t *testing.T) {
_, err := healtheventspublisher.NewPublisher(healtheventspublisher.Config{})
require.Error(t, err)
_, err = healtheventspublisher.NewPublisher(healtheventspublisher.Config{
Client: redis.NewClient(&redis.Options{Addr: "127.0.0.1:0"}),
})
require.Error(t, err)
_, err = healtheventspublisher.NewPublisher(healtheventspublisher.Config{
Client: redis.NewClient(&redis.Options{Addr: "127.0.0.1:0"}),
Snapshots: &fakeSnapshots{},
})
require.Error(t, err)
}
func TestPublishContainerStartedUpsertsHealthyAndXAdds(t *testing.T) {
snapshots := &fakeSnapshots{}
publisher, _, client := newPublisher(t, snapshots)
occurredAt := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
envelope := ports.HealthEventEnvelope{
GameID: "game-1",
ContainerID: "c-1",
EventType: health.EventTypeContainerStarted,
OccurredAt: occurredAt,
Details: json.RawMessage(`{"image_ref":"galaxy/game:1.2.3"}`),
}
require.NoError(t, publisher.Publish(context.Background(), envelope))
require.Len(t, snapshots.upserts, 1)
snapshot := snapshots.upserts[0]
assert.Equal(t, "game-1", snapshot.GameID)
assert.Equal(t, "c-1", snapshot.ContainerID)
assert.Equal(t, health.SnapshotStatusHealthy, snapshot.Status)
assert.Equal(t, health.SnapshotSourceDockerEvent, snapshot.Source)
assert.JSONEq(t, `{"image_ref":"galaxy/game:1.2.3"}`, string(snapshot.Details))
assert.Equal(t, occurredAt, snapshot.ObservedAt)
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
require.NoError(t, err)
require.Len(t, entries, 1)
values := entries[0].Values
assert.Equal(t, "game-1", values["game_id"])
assert.Equal(t, "c-1", values["container_id"])
assert.Equal(t, "container_started", values["event_type"])
assert.Equal(t, strconv.FormatInt(occurredAt.UnixMilli(), 10), values["occurred_at_ms"])
assert.JSONEq(t, `{"image_ref":"galaxy/game:1.2.3"}`, values["details"].(string))
}
func TestPublishMapsEveryEventTypeToASnapshot(t *testing.T) {
t.Parallel()
cases := []struct {
eventType health.EventType
expectStatus health.SnapshotStatus
expectSource health.SnapshotSource
}{
{health.EventTypeContainerStarted, health.SnapshotStatusHealthy, health.SnapshotSourceDockerEvent},
{health.EventTypeContainerExited, health.SnapshotStatusExited, health.SnapshotSourceDockerEvent},
{health.EventTypeContainerOOM, health.SnapshotStatusOOM, health.SnapshotSourceDockerEvent},
{health.EventTypeContainerDisappeared, health.SnapshotStatusContainerDisappeared, health.SnapshotSourceDockerEvent},
{health.EventTypeInspectUnhealthy, health.SnapshotStatusInspectUnhealthy, health.SnapshotSourceInspect},
{health.EventTypeProbeFailed, health.SnapshotStatusProbeFailed, health.SnapshotSourceProbe},
{health.EventTypeProbeRecovered, health.SnapshotStatusHealthy, health.SnapshotSourceProbe},
}
for _, tc := range cases {
t.Run(string(tc.eventType), func(t *testing.T) {
t.Parallel()
snapshots := &fakeSnapshots{}
publisher, _, _ := newPublisher(t, snapshots)
require.NoError(t, publisher.Publish(context.Background(), ports.HealthEventEnvelope{
GameID: "g",
ContainerID: "c",
EventType: tc.eventType,
OccurredAt: time.Now().UTC(),
Details: json.RawMessage(`{}`),
}))
require.Len(t, snapshots.upserts, 1)
assert.Equal(t, tc.expectStatus, snapshots.upserts[0].Status)
assert.Equal(t, tc.expectSource, snapshots.upserts[0].Source)
})
}
}
func TestPublishEmptyDetailsBecomesEmptyObject(t *testing.T) {
snapshots := &fakeSnapshots{}
publisher, _, client := newPublisher(t, snapshots)
envelope := ports.HealthEventEnvelope{
GameID: "g",
ContainerID: "c",
EventType: health.EventTypeContainerDisappeared,
OccurredAt: time.Now().UTC(),
}
require.NoError(t, publisher.Publish(context.Background(), envelope))
require.Len(t, snapshots.upserts, 1)
assert.JSONEq(t, "{}", string(snapshots.upserts[0].Details))
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
require.NoError(t, err)
require.Len(t, entries, 1)
assert.JSONEq(t, "{}", entries[0].Values["details"].(string))
}
func TestPublishRejectsInvalidEnvelope(t *testing.T) {
snapshots := &fakeSnapshots{}
publisher, _, client := newPublisher(t, snapshots)
require.Error(t, publisher.Publish(context.Background(), ports.HealthEventEnvelope{}))
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
require.NoError(t, err)
assert.Empty(t, entries)
assert.Empty(t, snapshots.upserts)
}
func TestPublishSurfacesSnapshotErrorWithoutXAdd(t *testing.T) {
snapshots := &fakeSnapshots{upsertErr: assertSentinelErr}
publisher, _, client := newPublisher(t, snapshots)
err := publisher.Publish(context.Background(), ports.HealthEventEnvelope{
GameID: "g",
ContainerID: "c",
EventType: health.EventTypeContainerStarted,
OccurredAt: time.Now().UTC(),
Details: json.RawMessage(`{"image_ref":"x"}`),
})
require.Error(t, err)
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
require.NoError(t, err)
assert.Empty(t, entries, "xadd must not run when snapshot upsert fails")
}
// assertSentinelErr is a sentinel for snapshot-failure assertions.
var assertSentinelErr = sentinelError("snapshot upsert failure")
type sentinelError string
func (s sentinelError) Error() string { return string(s) }
@@ -0,0 +1,100 @@
// Package jobresultspublisher provides the Redis-Streams-backed
// publisher for `runtime:job_results`. The start-jobs and stop-jobs
// consumers call this adapter so every consumed envelope produces
// exactly one outcome entry on the result stream.
//
// The wire fields mirror the AsyncAPI schema frozen in
// `rtmanager/api/runtime-jobs-asyncapi.yaml`. Every field is XADDed
// even when empty so consumers can rely on the schema's required-field
// set.
package jobresultspublisher
import (
"context"
"errors"
"fmt"
"strings"
"galaxy/rtmanager/internal/ports"
"github.com/redis/go-redis/v9"
)
// Wire field names used by the Redis Streams payload. Frozen by
// `rtmanager/api/runtime-jobs-asyncapi.yaml`; renaming any of them
// breaks consumers.
const (
fieldGameID = "game_id"
fieldOutcome = "outcome"
fieldContainerID = "container_id"
fieldEngineEndpoint = "engine_endpoint"
fieldErrorCode = "error_code"
fieldErrorMessage = "error_message"
)
// Config groups the dependencies and stream name required to construct
// a Publisher.
type Config struct {
// Client appends entries to the Redis Stream. Must be non-nil.
Client *redis.Client
// Stream stores the Redis Stream key job results are published to
// (e.g. `runtime:job_results`). Must not be empty.
Stream string
}
// Publisher implements `ports.JobResultPublisher` on top of a shared
// Redis client.
type Publisher struct {
client *redis.Client
stream string
}
// NewPublisher constructs one Publisher from cfg. Validation errors
// surface the missing collaborator verbatim.
func NewPublisher(cfg Config) (*Publisher, error) {
if cfg.Client == nil {
return nil, errors.New("new rtmanager job results publisher: nil redis client")
}
if strings.TrimSpace(cfg.Stream) == "" {
return nil, errors.New("new rtmanager job results publisher: stream must not be empty")
}
return &Publisher{
client: cfg.Client,
stream: cfg.Stream,
}, nil
}
// Publish XADDs result to the configured Redis Stream. The wire payload
// includes every field declared as required by the AsyncAPI schema —
// empty strings are kept so consumers always see the documented keys.
func (publisher *Publisher) Publish(ctx context.Context, result ports.JobResult) error {
if publisher == nil || publisher.client == nil {
return errors.New("publish job result: nil publisher")
}
if ctx == nil {
return errors.New("publish job result: nil context")
}
if err := result.Validate(); err != nil {
return fmt.Errorf("publish job result: %w", err)
}
values := map[string]any{
fieldGameID: result.GameID,
fieldOutcome: result.Outcome,
fieldContainerID: result.ContainerID,
fieldEngineEndpoint: result.EngineEndpoint,
fieldErrorCode: result.ErrorCode,
fieldErrorMessage: result.ErrorMessage,
}
if err := publisher.client.XAdd(ctx, &redis.XAddArgs{
Stream: publisher.stream,
Values: values,
}).Err(); err != nil {
return fmt.Errorf("publish job result: xadd: %w", err)
}
return nil
}
// Compile-time assertion: Publisher implements ports.JobResultPublisher.
var _ ports.JobResultPublisher = (*Publisher)(nil)
@@ -0,0 +1,142 @@
package jobresultspublisher_test
import (
"context"
"testing"
"galaxy/rtmanager/internal/adapters/jobresultspublisher"
"galaxy/rtmanager/internal/ports"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func newPublisher(t *testing.T) (*jobresultspublisher.Publisher, *redis.Client) {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = client.Close() })
publisher, err := jobresultspublisher.NewPublisher(jobresultspublisher.Config{
Client: client,
Stream: "runtime:job_results",
})
require.NoError(t, err)
return publisher, client
}
func TestNewPublisherRejectsMissingCollaborators(t *testing.T) {
_, err := jobresultspublisher.NewPublisher(jobresultspublisher.Config{})
require.Error(t, err)
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = client.Close() })
_, err = jobresultspublisher.NewPublisher(jobresultspublisher.Config{Client: client})
require.Error(t, err)
_, err = jobresultspublisher.NewPublisher(jobresultspublisher.Config{Client: client, Stream: " "})
require.Error(t, err)
}
func TestPublishRejectsInvalidResult(t *testing.T) {
publisher, _ := newPublisher(t)
require.Error(t, publisher.Publish(context.Background(), ports.JobResult{}))
require.Error(t, publisher.Publish(context.Background(), ports.JobResult{
GameID: "game-1",
Outcome: "weird",
}))
}
func TestPublishStartSuccessXAddsAllRequiredFields(t *testing.T) {
publisher, client := newPublisher(t)
result := ports.JobResult{
GameID: "game-1",
Outcome: ports.JobOutcomeSuccess,
ContainerID: "c-1",
EngineEndpoint: "http://galaxy-game-game-1:8080",
ErrorCode: "",
ErrorMessage: "",
}
require.NoError(t, publisher.Publish(context.Background(), result))
entries, err := client.XRange(context.Background(), "runtime:job_results", "-", "+").Result()
require.NoError(t, err)
require.Len(t, entries, 1)
values := entries[0].Values
assert.Equal(t, "game-1", values["game_id"])
assert.Equal(t, "success", values["outcome"])
assert.Equal(t, "c-1", values["container_id"])
assert.Equal(t, "http://galaxy-game-game-1:8080", values["engine_endpoint"])
assert.Equal(t, "", values["error_code"])
assert.Equal(t, "", values["error_message"])
}
func TestPublishFailureXAddsEmptyContainerAndEndpoint(t *testing.T) {
publisher, client := newPublisher(t)
result := ports.JobResult{
GameID: "game-2",
Outcome: ports.JobOutcomeFailure,
ErrorCode: "image_pull_failed",
ErrorMessage: "manifest unknown",
}
require.NoError(t, publisher.Publish(context.Background(), result))
entries, err := client.XRange(context.Background(), "runtime:job_results", "-", "+").Result()
require.NoError(t, err)
require.Len(t, entries, 1)
values := entries[0].Values
assert.Equal(t, "game-2", values["game_id"])
assert.Equal(t, "failure", values["outcome"])
assert.Equal(t, "", values["container_id"], "failure must publish empty container id")
assert.Equal(t, "", values["engine_endpoint"], "failure must publish empty engine endpoint")
assert.Equal(t, "image_pull_failed", values["error_code"])
assert.Equal(t, "manifest unknown", values["error_message"])
}
func TestPublishReplayNoOpKeepsContainerAndEndpoint(t *testing.T) {
publisher, client := newPublisher(t)
result := ports.JobResult{
GameID: "game-3",
Outcome: ports.JobOutcomeSuccess,
ContainerID: "c-3",
EngineEndpoint: "http://galaxy-game-game-3:8080",
ErrorCode: "replay_no_op",
}
require.NoError(t, publisher.Publish(context.Background(), result))
entries, err := client.XRange(context.Background(), "runtime:job_results", "-", "+").Result()
require.NoError(t, err)
require.Len(t, entries, 1)
values := entries[0].Values
assert.Equal(t, "game-3", values["game_id"])
assert.Equal(t, "success", values["outcome"])
assert.Equal(t, "c-3", values["container_id"])
assert.Equal(t, "http://galaxy-game-game-3:8080", values["engine_endpoint"])
assert.Equal(t, "replay_no_op", values["error_code"])
assert.Equal(t, "", values["error_message"])
}
func TestPublishFailsOnClosedClient(t *testing.T) {
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
publisher, err := jobresultspublisher.NewPublisher(jobresultspublisher.Config{
Client: client,
Stream: "runtime:job_results",
})
require.NoError(t, err)
require.NoError(t, client.Close())
err = publisher.Publish(context.Background(), ports.JobResult{
GameID: "game-4",
Outcome: ports.JobOutcomeSuccess,
})
require.Error(t, err)
}
@@ -0,0 +1,219 @@
// Package lobbyclient provides the trusted-internal Lobby REST client
// Runtime Manager uses to fetch ancillary game metadata for diagnostics.
//
// The client is intentionally minimal: the GetGame fetch is ancillary
// diagnostics because the start envelope already carries the only
// required field (`image_ref`). A failed call surfaces as
// `ports.ErrLobbyUnavailable` so callers can distinguish "not found"
// from transport faults and continue without aborting the start
// operation.
package lobbyclient
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"galaxy/rtmanager/internal/ports"
)
const (
getGamePathSuffix = "/api/v1/internal/games/%s"
)
// Config configures one HTTP-backed Lobby internal client.
type Config struct {
// BaseURL stores the absolute base URL of the Lobby internal HTTP
// listener (e.g. `http://lobby:8095`).
BaseURL string
// RequestTimeout bounds one outbound lookup request.
RequestTimeout time.Duration
}
// Client resolves Lobby game records through the trusted internal HTTP
// API.
type Client struct {
baseURL string
requestTimeout time.Duration
httpClient *http.Client
closeIdleConnections func()
}
type gameRecordEnvelope struct {
GameID string `json:"game_id"`
Status string `json:"status"`
TargetEngineVersion string `json:"target_engine_version"`
}
type errorEnvelope struct {
Error *errorBody `json:"error"`
}
type errorBody struct {
Code string `json:"code"`
Message string `json:"message"`
}
// NewClient constructs a Lobby internal client that uses
// repository-standard HTTP transport instrumentation through otelhttp.
// The cloned default transport keeps the production wiring isolated
// from caller-provided transports.
func NewClient(cfg Config) (*Client, error) {
transport, ok := http.DefaultTransport.(*http.Transport)
if !ok {
return nil, errors.New("new lobby internal client: default transport is not *http.Transport")
}
cloned := transport.Clone()
return newClient(cfg, &http.Client{Transport: otelhttp.NewTransport(cloned)}, cloned.CloseIdleConnections)
}
func newClient(cfg Config, httpClient *http.Client, closeIdleConnections func()) (*Client, error) {
switch {
case strings.TrimSpace(cfg.BaseURL) == "":
return nil, errors.New("new lobby internal client: base URL must not be empty")
case cfg.RequestTimeout <= 0:
return nil, errors.New("new lobby internal client: request timeout must be positive")
case httpClient == nil:
return nil, errors.New("new lobby internal client: http client must not be nil")
}
parsed, err := url.Parse(strings.TrimRight(strings.TrimSpace(cfg.BaseURL), "/"))
if err != nil {
return nil, fmt.Errorf("new lobby internal client: parse base URL: %w", err)
}
if parsed.Scheme == "" || parsed.Host == "" {
return nil, errors.New("new lobby internal client: base URL must be absolute")
}
return &Client{
baseURL: parsed.String(),
requestTimeout: cfg.RequestTimeout,
httpClient: httpClient,
closeIdleConnections: closeIdleConnections,
}, nil
}
// Close releases idle HTTP connections owned by the client transport.
// Call once on shutdown.
func (client *Client) Close() error {
if client == nil || client.closeIdleConnections == nil {
return nil
}
client.closeIdleConnections()
return nil
}
// GetGame returns the Lobby game record for gameID. It maps Lobby's
// `404 not_found` to `ports.ErrLobbyGameNotFound`; every other failure
// (transport, timeout, non-2xx response) maps to
// `ports.ErrLobbyUnavailable` wrapped with the original error so callers
// keep the diagnostic detail.
func (client *Client) GetGame(ctx context.Context, gameID string) (ports.LobbyGameRecord, error) {
if client == nil || client.httpClient == nil {
return ports.LobbyGameRecord{}, errors.New("lobby get game: nil client")
}
if ctx == nil {
return ports.LobbyGameRecord{}, errors.New("lobby get game: nil context")
}
if err := ctx.Err(); err != nil {
return ports.LobbyGameRecord{}, err
}
if strings.TrimSpace(gameID) == "" {
return ports.LobbyGameRecord{}, errors.New("lobby get game: game id must not be empty")
}
payload, statusCode, err := client.doRequest(ctx, http.MethodGet, fmt.Sprintf(getGamePathSuffix, url.PathEscape(gameID)))
if err != nil {
return ports.LobbyGameRecord{}, fmt.Errorf("%w: %w", ports.ErrLobbyUnavailable, err)
}
switch statusCode {
case http.StatusOK:
var envelope gameRecordEnvelope
if err := decodeJSONPayload(payload, &envelope); err != nil {
return ports.LobbyGameRecord{}, fmt.Errorf("%w: decode success response: %w", ports.ErrLobbyUnavailable, err)
}
if strings.TrimSpace(envelope.GameID) == "" {
return ports.LobbyGameRecord{}, fmt.Errorf("%w: success response missing game_id", ports.ErrLobbyUnavailable)
}
return ports.LobbyGameRecord{
GameID: envelope.GameID,
Status: envelope.Status,
TargetEngineVersion: envelope.TargetEngineVersion,
}, nil
case http.StatusNotFound:
return ports.LobbyGameRecord{}, ports.ErrLobbyGameNotFound
default:
errorCode := decodeErrorCode(payload)
if errorCode != "" {
return ports.LobbyGameRecord{}, fmt.Errorf("%w: unexpected status %d (error_code=%s)", ports.ErrLobbyUnavailable, statusCode, errorCode)
}
return ports.LobbyGameRecord{}, fmt.Errorf("%w: unexpected status %d", ports.ErrLobbyUnavailable, statusCode)
}
}
func (client *Client) doRequest(ctx context.Context, method, requestPath string) ([]byte, int, error) {
attemptCtx, cancel := context.WithTimeout(ctx, client.requestTimeout)
defer cancel()
req, err := http.NewRequestWithContext(attemptCtx, method, client.baseURL+requestPath, nil)
if err != nil {
return nil, 0, fmt.Errorf("build request: %w", err)
}
req.Header.Set("Accept", "application/json")
resp, err := client.httpClient.Do(req)
if err != nil {
return nil, 0, err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("read response body: %w", err)
}
return body, resp.StatusCode, nil
}
// decodeJSONPayload tolerantly decodes a JSON object; unknown fields
// are ignored so additive Lobby schema changes do not break us.
func decodeJSONPayload(payload []byte, target any) error {
decoder := json.NewDecoder(bytes.NewReader(payload))
if err := decoder.Decode(target); err != nil {
return err
}
if err := decoder.Decode(&struct{}{}); err != io.EOF {
if err == nil {
return errors.New("unexpected trailing JSON input")
}
return err
}
return nil
}
func decodeErrorCode(payload []byte) string {
if len(payload) == 0 {
return ""
}
var envelope errorEnvelope
if err := json.Unmarshal(payload, &envelope); err != nil {
return ""
}
if envelope.Error == nil {
return ""
}
return envelope.Error.Code
}
// Compile-time assertion: Client implements ports.LobbyInternalClient.
var _ ports.LobbyInternalClient = (*Client)(nil)
@@ -0,0 +1,153 @@
package lobbyclient
import (
"context"
"errors"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"galaxy/rtmanager/internal/ports"
)
func newTestClient(t *testing.T, baseURL string, timeout time.Duration) *Client {
t.Helper()
client, err := NewClient(Config{BaseURL: baseURL, RequestTimeout: timeout})
require.NoError(t, err)
t.Cleanup(func() { _ = client.Close() })
return client
}
func TestNewClientValidatesConfig(t *testing.T) {
cases := map[string]Config{
"empty base url": {BaseURL: "", RequestTimeout: time.Second},
"non-absolute base url": {BaseURL: "lobby:8095", RequestTimeout: time.Second},
"non-positive timeout": {BaseURL: "http://lobby:8095", RequestTimeout: 0},
}
for name, cfg := range cases {
t.Run(name, func(t *testing.T) {
_, err := NewClient(cfg)
require.Error(t, err)
})
}
}
func TestGetGameSuccess(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, http.MethodGet, r.Method)
require.Equal(t, "/api/v1/internal/games/game-1", r.URL.Path)
require.Equal(t, "application/json", r.Header.Get("Accept"))
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"game_id": "game-1",
"game_name": "Sample",
"status": "running",
"target_engine_version": "1.4.2",
"current_turn": 0,
"runtime_status": "running"
}`))
}))
defer server.Close()
client := newTestClient(t, server.URL, time.Second)
got, err := client.GetGame(context.Background(), "game-1")
require.NoError(t, err)
assert.Equal(t, "game-1", got.GameID)
assert.Equal(t, "running", got.Status)
assert.Equal(t, "1.4.2", got.TargetEngineVersion)
}
func TestGetGameNotFound(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusNotFound)
_, _ = w.Write([]byte(`{"error":{"code":"not_found","message":"no such game"}}`))
}))
defer server.Close()
client := newTestClient(t, server.URL, time.Second)
_, err := client.GetGame(context.Background(), "missing")
require.Error(t, err)
assert.True(t, errors.Is(err, ports.ErrLobbyGameNotFound))
assert.False(t, errors.Is(err, ports.ErrLobbyUnavailable))
}
func TestGetGameInternalErrorMapsToUnavailable(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte(`{"error":{"code":"internal_error","message":"boom"}}`))
}))
defer server.Close()
client := newTestClient(t, server.URL, time.Second)
_, err := client.GetGame(context.Background(), "x")
require.Error(t, err)
assert.True(t, errors.Is(err, ports.ErrLobbyUnavailable))
assert.Contains(t, err.Error(), "500")
assert.Contains(t, err.Error(), "internal_error")
}
func TestGetGameTimeoutMapsToUnavailable(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
time.Sleep(150 * time.Millisecond)
_, _ = w.Write([]byte(`{}`))
}))
defer server.Close()
client := newTestClient(t, server.URL, 50*time.Millisecond)
_, err := client.GetGame(context.Background(), "x")
require.Error(t, err)
assert.True(t, errors.Is(err, ports.ErrLobbyUnavailable))
}
func TestGetGameSuccessMissingGameIDIsUnavailable(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte(`{"status":"running"}`))
}))
defer server.Close()
client := newTestClient(t, server.URL, time.Second)
_, err := client.GetGame(context.Background(), "x")
require.Error(t, err)
assert.True(t, errors.Is(err, ports.ErrLobbyUnavailable))
assert.Contains(t, err.Error(), "missing game_id")
}
func TestGetGameRejectsBadInput(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Fatal("must not contact lobby on bad input")
}))
defer server.Close()
client := newTestClient(t, server.URL, time.Second)
t.Run("empty game id", func(t *testing.T) {
_, err := client.GetGame(context.Background(), " ")
require.Error(t, err)
assert.Contains(t, err.Error(), "game id")
})
t.Run("canceled context", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := client.GetGame(ctx, "x")
require.Error(t, err)
assert.True(t, errors.Is(err, context.Canceled))
})
}
func TestCloseReleasesConnections(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte(`{"game_id":"x","status":"running","target_engine_version":"1.0.0"}`))
}))
defer server.Close()
client := newTestClient(t, server.URL, time.Second)
_, err := client.GetGame(context.Background(), "x")
require.NoError(t, err)
assert.NoError(t, client.Close())
assert.NoError(t, client.Close()) // idempotent
}
@@ -0,0 +1,70 @@
// Package notificationpublisher provides the Redis-Streams-backed
// notification-intent publisher Runtime Manager uses to emit admin-only
// failure notifications. The adapter is a thin shim over
// `galaxy/notificationintent.Publisher` that drops the entry id at the
// wrapper boundary; rationale lives in
// `rtmanager/docs/domain-and-ports.md §7`.
package notificationpublisher
import (
"context"
"errors"
"fmt"
"github.com/redis/go-redis/v9"
"galaxy/notificationintent"
"galaxy/rtmanager/internal/ports"
)
// Config groups the dependencies and stream name required to
// construct a Publisher.
type Config struct {
// Client appends entries to Redis Streams. Must be non-nil.
Client *redis.Client
// Stream stores the Redis Stream key intents are published to.
// When empty, `notificationintent.DefaultIntentsStream` is used.
Stream string
}
// Publisher implements `ports.NotificationIntentPublisher` on top of
// the shared `notificationintent.Publisher`. The wrapper is the single
// point that drops the entry id returned by the underlying publisher.
type Publisher struct {
inner *notificationintent.Publisher
}
// NewPublisher constructs a Publisher from cfg. It wraps the shared
// publisher and delegates validation; transport errors and validation
// errors propagate verbatim.
func NewPublisher(cfg Config) (*Publisher, error) {
if cfg.Client == nil {
return nil, errors.New("new rtmanager notification publisher: nil redis client")
}
inner, err := notificationintent.NewPublisher(notificationintent.PublisherConfig{
Client: cfg.Client,
Stream: cfg.Stream,
})
if err != nil {
return nil, fmt.Errorf("new rtmanager notification publisher: %w", err)
}
return &Publisher{inner: inner}, nil
}
// Publish forwards intent to the underlying notificationintent
// publisher and discards the resulting Redis Stream entry id. A failed
// publish surfaces as the underlying error.
func (publisher *Publisher) Publish(ctx context.Context, intent notificationintent.Intent) error {
if publisher == nil || publisher.inner == nil {
return errors.New("publish notification intent: nil publisher")
}
if _, err := publisher.inner.Publish(ctx, intent); err != nil {
return err
}
return nil
}
// Compile-time assertion: Publisher implements
// ports.NotificationIntentPublisher.
var _ ports.NotificationIntentPublisher = (*Publisher)(nil)
@@ -0,0 +1,123 @@
package notificationpublisher
import (
"context"
"encoding/json"
"testing"
"time"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"galaxy/notificationintent"
)
func newRedis(t *testing.T) (*redis.Client, *miniredis.Miniredis) {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = client.Close() })
return client, server
}
func readStream(t *testing.T, client *redis.Client, stream string) []redis.XMessage {
t.Helper()
messages, err := client.XRange(context.Background(), stream, "-", "+").Result()
require.NoError(t, err)
return messages
}
func TestNewPublisherValidation(t *testing.T) {
t.Run("nil client", func(t *testing.T) {
_, err := NewPublisher(Config{})
require.Error(t, err)
assert.Contains(t, err.Error(), "nil redis client")
})
}
func TestPublisherWritesIntent(t *testing.T) {
client, _ := newRedis(t)
publisher, err := NewPublisher(Config{Client: client, Stream: "notification:intents"})
require.NoError(t, err)
intent, err := notificationintent.NewRuntimeImagePullFailedIntent(
notificationintent.Metadata{
IdempotencyKey: "rtmanager:start:game-1:abc",
OccurredAt: time.UnixMilli(1714200000000).UTC(),
},
notificationintent.RuntimeImagePullFailedPayload{
GameID: "game-1",
ImageRef: "galaxy/game:1.4.2",
ErrorCode: "image_pull_failed",
ErrorMessage: "registry timeout",
AttemptedAtMs: 1714200000000,
},
)
require.NoError(t, err)
require.NoError(t, publisher.Publish(context.Background(), intent))
messages := readStream(t, client, "notification:intents")
require.Len(t, messages, 1)
values := messages[0].Values
assert.Equal(t, "runtime.image_pull_failed", values["notification_type"])
assert.Equal(t, "runtime_manager", values["producer"])
assert.Equal(t, "admin_email", values["audience_kind"])
assert.Equal(t, "rtmanager:start:game-1:abc", values["idempotency_key"])
// recipient_user_ids_json must be absent for admin_email audience.
_, hasRecipients := values["recipient_user_ids_json"]
assert.False(t, hasRecipients)
payloadRaw, ok := values["payload_json"].(string)
require.True(t, ok)
var payload map[string]any
require.NoError(t, json.Unmarshal([]byte(payloadRaw), &payload))
assert.Equal(t, "game-1", payload["game_id"])
assert.Equal(t, "galaxy/game:1.4.2", payload["image_ref"])
}
func TestPublisherForwardsValidationError(t *testing.T) {
client, _ := newRedis(t)
publisher, err := NewPublisher(Config{Client: client})
require.NoError(t, err)
// Intent with a zero OccurredAt fails the shared validator.
bad := notificationintent.Intent{
NotificationType: notificationintent.NotificationTypeRuntimeImagePullFailed,
Producer: notificationintent.ProducerRuntimeManager,
AudienceKind: notificationintent.AudienceKindAdminEmail,
IdempotencyKey: "k",
PayloadJSON: `{"game_id":"g","image_ref":"r","error_code":"c","error_message":"m","attempted_at_ms":1}`,
}
require.Error(t, publisher.Publish(context.Background(), bad))
}
func TestPublisherDefaultsStreamName(t *testing.T) {
client, _ := newRedis(t)
publisher, err := NewPublisher(Config{Client: client, Stream: ""})
require.NoError(t, err)
intent, err := notificationintent.NewRuntimeContainerStartFailedIntent(
notificationintent.Metadata{
IdempotencyKey: "k",
OccurredAt: time.UnixMilli(1714200000000).UTC(),
},
notificationintent.RuntimeContainerStartFailedPayload{
GameID: "g",
ImageRef: "r",
ErrorCode: "container_start_failed",
ErrorMessage: "boom",
AttemptedAtMs: 1714200000000,
},
)
require.NoError(t, err)
require.NoError(t, publisher.Publish(context.Background(), intent))
messages := readStream(t, client, notificationintent.DefaultIntentsStream)
require.Len(t, messages, 1)
}
@@ -0,0 +1,203 @@
// Package healthsnapshotstore implements the PostgreSQL-backed adapter
// for `ports.HealthSnapshotStore`.
//
// The package owns the on-disk shape of the `health_snapshots` table
// defined in
// `galaxy/rtmanager/internal/adapters/postgres/migrations/00001_init.sql`
// and translates the schema-agnostic `ports.HealthSnapshotStore` interface
// declared in `internal/ports/healthsnapshotstore.go` into concrete
// go-jet/v2 statements driven by the pgx driver.
//
// The `details` jsonb column round-trips as a `json.RawMessage`. Empty
// payloads are substituted with the SQL default `{}` on Upsert so the
// CHECK constraints and downstream readers never observe a non-JSON
// empty string.
package healthsnapshotstore
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/sqlx"
pgtable "galaxy/rtmanager/internal/adapters/postgres/jet/rtmanager/table"
"galaxy/rtmanager/internal/domain/health"
"galaxy/rtmanager/internal/domain/runtime"
"galaxy/rtmanager/internal/ports"
pg "github.com/go-jet/jet/v2/postgres"
)
// emptyDetails is the canonical jsonb payload installed when the caller
// supplies an empty Details slice. It matches the SQL DEFAULT for the
// column.
const emptyDetails = "{}"
// Config configures one PostgreSQL-backed health-snapshot store instance.
type Config struct {
// DB stores the connection pool the store uses for every query.
DB *sql.DB
// OperationTimeout bounds one round trip.
OperationTimeout time.Duration
}
// Store persists Runtime Manager health snapshots in PostgreSQL.
type Store struct {
db *sql.DB
operationTimeout time.Duration
}
// New constructs one PostgreSQL-backed health-snapshot store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.DB == nil {
return nil, errors.New("new postgres health snapshot store: db must not be nil")
}
if cfg.OperationTimeout <= 0 {
return nil, errors.New("new postgres health snapshot store: operation timeout must be positive")
}
return &Store{
db: cfg.DB,
operationTimeout: cfg.OperationTimeout,
}, nil
}
// healthSnapshotSelectColumns is the canonical SELECT list for the
// health_snapshots table, matching scanSnapshot's column order.
var healthSnapshotSelectColumns = pg.ColumnList{
pgtable.HealthSnapshots.GameID,
pgtable.HealthSnapshots.ContainerID,
pgtable.HealthSnapshots.Status,
pgtable.HealthSnapshots.Source,
pgtable.HealthSnapshots.Details,
pgtable.HealthSnapshots.ObservedAt,
}
// Upsert installs snapshot as the latest observation for snapshot.GameID.
// snapshot is validated through health.HealthSnapshot.Validate before the
// SQL is issued.
func (store *Store) Upsert(ctx context.Context, snapshot health.HealthSnapshot) error {
if store == nil || store.db == nil {
return errors.New("upsert health snapshot: nil store")
}
if err := snapshot.Validate(); err != nil {
return fmt.Errorf("upsert health snapshot: %w", err)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "upsert health snapshot", store.operationTimeout)
if err != nil {
return err
}
defer cancel()
details := emptyDetails
if len(snapshot.Details) > 0 {
details = string(snapshot.Details)
}
stmt := pgtable.HealthSnapshots.INSERT(
pgtable.HealthSnapshots.GameID,
pgtable.HealthSnapshots.ContainerID,
pgtable.HealthSnapshots.Status,
pgtable.HealthSnapshots.Source,
pgtable.HealthSnapshots.Details,
pgtable.HealthSnapshots.ObservedAt,
).VALUES(
snapshot.GameID,
snapshot.ContainerID,
string(snapshot.Status),
string(snapshot.Source),
details,
snapshot.ObservedAt.UTC(),
).ON_CONFLICT(pgtable.HealthSnapshots.GameID).DO_UPDATE(
pg.SET(
pgtable.HealthSnapshots.ContainerID.SET(pgtable.HealthSnapshots.EXCLUDED.ContainerID),
pgtable.HealthSnapshots.Status.SET(pgtable.HealthSnapshots.EXCLUDED.Status),
pgtable.HealthSnapshots.Source.SET(pgtable.HealthSnapshots.EXCLUDED.Source),
pgtable.HealthSnapshots.Details.SET(pgtable.HealthSnapshots.EXCLUDED.Details),
pgtable.HealthSnapshots.ObservedAt.SET(pgtable.HealthSnapshots.EXCLUDED.ObservedAt),
),
)
query, args := stmt.Sql()
if _, err := store.db.ExecContext(operationCtx, query, args...); err != nil {
return fmt.Errorf("upsert health snapshot: %w", err)
}
return nil
}
// Get returns the latest snapshot for gameID. It returns
// runtime.ErrNotFound when no snapshot has been recorded yet.
func (store *Store) Get(ctx context.Context, gameID string) (health.HealthSnapshot, error) {
if store == nil || store.db == nil {
return health.HealthSnapshot{}, errors.New("get health snapshot: nil store")
}
if strings.TrimSpace(gameID) == "" {
return health.HealthSnapshot{}, fmt.Errorf("get health snapshot: game id must not be empty")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "get health snapshot", store.operationTimeout)
if err != nil {
return health.HealthSnapshot{}, err
}
defer cancel()
stmt := pg.SELECT(healthSnapshotSelectColumns).
FROM(pgtable.HealthSnapshots).
WHERE(pgtable.HealthSnapshots.GameID.EQ(pg.String(gameID)))
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
snapshot, err := scanSnapshot(row)
if sqlx.IsNoRows(err) {
return health.HealthSnapshot{}, runtime.ErrNotFound
}
if err != nil {
return health.HealthSnapshot{}, fmt.Errorf("get health snapshot: %w", err)
}
return snapshot, nil
}
// rowScanner abstracts *sql.Row and *sql.Rows so scanSnapshot can be
// shared across both single-row reads and iterated reads.
type rowScanner interface {
Scan(dest ...any) error
}
// scanSnapshot scans one health_snapshots row from rs.
func scanSnapshot(rs rowScanner) (health.HealthSnapshot, error) {
var (
gameID string
containerID string
status string
source string
details []byte
observedAt time.Time
)
if err := rs.Scan(
&gameID,
&containerID,
&status,
&source,
&details,
&observedAt,
); err != nil {
return health.HealthSnapshot{}, err
}
return health.HealthSnapshot{
GameID: gameID,
ContainerID: containerID,
Status: health.SnapshotStatus(status),
Source: health.SnapshotSource(source),
Details: json.RawMessage(details),
ObservedAt: observedAt.UTC(),
}, nil
}
// Ensure Store satisfies the ports.HealthSnapshotStore interface at
// compile time.
var _ ports.HealthSnapshotStore = (*Store)(nil)
@@ -0,0 +1,157 @@
package healthsnapshotstore_test
import (
"context"
"encoding/json"
"testing"
"time"
"galaxy/rtmanager/internal/adapters/postgres/healthsnapshotstore"
"galaxy/rtmanager/internal/adapters/postgres/internal/pgtest"
"galaxy/rtmanager/internal/domain/health"
"galaxy/rtmanager/internal/domain/runtime"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) { pgtest.RunMain(m) }
func newStore(t *testing.T) *healthsnapshotstore.Store {
t.Helper()
pgtest.TruncateAll(t)
store, err := healthsnapshotstore.New(healthsnapshotstore.Config{
DB: pgtest.Ensure(t).Pool(),
OperationTimeout: pgtest.OperationTimeout,
})
require.NoError(t, err)
return store
}
func probeFailedSnapshot(gameID string, observedAt time.Time) health.HealthSnapshot {
return health.HealthSnapshot{
GameID: gameID,
ContainerID: "container-1",
Status: health.SnapshotStatusProbeFailed,
Source: health.SnapshotSourceProbe,
Details: json.RawMessage(`{"consecutive_failures":3,"last_status":503,"last_error":"timeout"}`),
ObservedAt: observedAt,
}
}
func TestUpsertAndGetRoundTrip(t *testing.T) {
ctx := context.Background()
store := newStore(t)
snapshot := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
require.NoError(t, store.Upsert(ctx, snapshot))
got, err := store.Get(ctx, "game-001")
require.NoError(t, err)
assert.Equal(t, snapshot.GameID, got.GameID)
assert.Equal(t, snapshot.ContainerID, got.ContainerID)
assert.Equal(t, snapshot.Status, got.Status)
assert.Equal(t, snapshot.Source, got.Source)
assert.JSONEq(t, string(snapshot.Details), string(got.Details))
assert.True(t, snapshot.ObservedAt.Equal(got.ObservedAt))
assert.Equal(t, time.UTC, got.ObservedAt.Location())
}
func TestUpsertOverwritesPriorSnapshot(t *testing.T) {
ctx := context.Background()
store := newStore(t)
first := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
require.NoError(t, store.Upsert(ctx, first))
second := health.HealthSnapshot{
GameID: "game-001",
ContainerID: "container-2",
Status: health.SnapshotStatusHealthy,
Source: health.SnapshotSourceInspect,
Details: json.RawMessage(`{"restart_count":0,"state":"running"}`),
ObservedAt: first.ObservedAt.Add(time.Minute),
}
require.NoError(t, store.Upsert(ctx, second))
got, err := store.Get(ctx, "game-001")
require.NoError(t, err)
assert.Equal(t, "container-2", got.ContainerID)
assert.Equal(t, health.SnapshotStatusHealthy, got.Status)
assert.Equal(t, health.SnapshotSourceInspect, got.Source)
assert.JSONEq(t, string(second.Details), string(got.Details))
assert.True(t, second.ObservedAt.Equal(got.ObservedAt))
}
func TestGetReturnsNotFound(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.Get(ctx, "game-missing")
require.ErrorIs(t, err, runtime.ErrNotFound)
}
func TestUpsertEmptyDetailsRoundTripsAsEmptyObject(t *testing.T) {
ctx := context.Background()
store := newStore(t)
snapshot := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
snapshot.Details = nil
require.NoError(t, store.Upsert(ctx, snapshot))
got, err := store.Get(ctx, "game-001")
require.NoError(t, err)
assert.JSONEq(t, "{}", string(got.Details),
"empty json.RawMessage must round-trip as the SQL default {}, got %q",
string(got.Details))
}
func TestUpsertValidatesSnapshot(t *testing.T) {
ctx := context.Background()
store := newStore(t)
tests := []struct {
name string
mutate func(*health.HealthSnapshot)
}{
{"empty game id", func(s *health.HealthSnapshot) { s.GameID = "" }},
{"unknown status", func(s *health.HealthSnapshot) { s.Status = "exotic" }},
{"unknown source", func(s *health.HealthSnapshot) { s.Source = "exotic" }},
{"zero observed at", func(s *health.HealthSnapshot) { s.ObservedAt = time.Time{} }},
{"invalid json details", func(s *health.HealthSnapshot) {
s.Details = json.RawMessage("not json")
}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
snapshot := probeFailedSnapshot("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC))
tt.mutate(&snapshot)
err := store.Upsert(ctx, snapshot)
require.Error(t, err)
})
}
}
func TestGetRejectsEmptyGameID(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.Get(ctx, "")
require.Error(t, err)
}
func TestNewRejectsNilDB(t *testing.T) {
_, err := healthsnapshotstore.New(healthsnapshotstore.Config{OperationTimeout: time.Second})
require.Error(t, err)
}
func TestNewRejectsNonPositiveTimeout(t *testing.T) {
_, err := healthsnapshotstore.New(healthsnapshotstore.Config{
DB: pgtest.Ensure(t).Pool(),
})
require.Error(t, err)
}
@@ -0,0 +1,209 @@
// Package pgtest exposes the testcontainers-backed PostgreSQL bootstrap
// shared by every Runtime Manager PG adapter test. The package is regular
// Go code — not a `_test.go` file — so it can be imported by the
// `_test.go` files in the three sibling store packages
// (`runtimerecordstore`, `operationlogstore`, `healthsnapshotstore`).
//
// No production code in `cmd/rtmanager` or in the runtime imports this
// package. The testcontainers-go dependency therefore stays out of the
// production binary's import graph.
package pgtest
import (
"context"
"database/sql"
"net/url"
"os"
"sync"
"testing"
"time"
"galaxy/postgres"
"galaxy/rtmanager/internal/adapters/postgres/migrations"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
)
const (
postgresImage = "postgres:16-alpine"
superUser = "galaxy"
superPassword = "galaxy"
superDatabase = "galaxy_rtmanager"
serviceRole = "rtmanagerservice"
servicePassword = "rtmanagerservice"
serviceSchema = "rtmanager"
containerStartup = 90 * time.Second
// OperationTimeout is the per-statement timeout used by every store
// constructed via the per-package newStore helpers. Tests may pass a
// smaller value if they need to assert deadline behaviour explicitly.
OperationTimeout = 10 * time.Second
)
// Env holds the per-process container plus the *sql.DB pool already
// provisioned with the rtmanager schema, role, and migrations applied.
type Env struct {
container *tcpostgres.PostgresContainer
pool *sql.DB
}
// Pool returns the shared pool. Tests truncate per-table state before
// each run via TruncateAll.
func (env *Env) Pool() *sql.DB { return env.pool }
var (
once sync.Once
cur *Env
curEr error
)
// Ensure starts the PostgreSQL container on first invocation and applies
// the embedded goose migrations. Subsequent invocations reuse the same
// container/pool. When Docker is unavailable Ensure calls t.Skip with the
// underlying error so the test suite still passes on machines without
// Docker.
func Ensure(t testing.TB) *Env {
t.Helper()
once.Do(func() {
cur, curEr = start()
})
if curEr != nil {
t.Skipf("postgres container start failed (Docker unavailable?): %v", curEr)
}
return cur
}
// TruncateAll wipes every Runtime Manager table inside the shared pool,
// leaving the schema and indexes intact. Use it from each test that needs
// a clean slate.
func TruncateAll(t testing.TB) {
t.Helper()
env := Ensure(t)
const stmt = `TRUNCATE TABLE runtime_records, operation_log, health_snapshots RESTART IDENTITY CASCADE`
if _, err := env.pool.ExecContext(context.Background(), stmt); err != nil {
t.Fatalf("truncate rtmanager tables: %v", err)
}
}
// Shutdown terminates the shared container and closes the pool. It is
// invoked from each test package's TestMain after `m.Run` returns so the
// container is released even if individual tests panic.
func Shutdown() {
if cur == nil {
return
}
if cur.pool != nil {
_ = cur.pool.Close()
}
if cur.container != nil {
_ = testcontainers.TerminateContainer(cur.container)
}
cur = nil
}
// RunMain is a convenience helper for each store package's TestMain: it
// runs the test main, captures the exit code, shuts the container down,
// and exits. Wiring it through one helper keeps every TestMain to two
// lines.
func RunMain(m *testing.M) {
code := m.Run()
Shutdown()
os.Exit(code)
}
func start() (*Env, error) {
ctx := context.Background()
container, err := tcpostgres.Run(ctx, postgresImage,
tcpostgres.WithDatabase(superDatabase),
tcpostgres.WithUsername(superUser),
tcpostgres.WithPassword(superPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(containerStartup),
),
)
if err != nil {
return nil, err
}
baseDSN, err := container.ConnectionString(ctx, "sslmode=disable")
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := provisionRoleAndSchema(ctx, baseDSN); err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
scopedDSN, err := dsnForServiceRole(baseDSN)
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
cfg := postgres.DefaultConfig()
cfg.PrimaryDSN = scopedDSN
cfg.OperationTimeout = OperationTimeout
pool, err := postgres.OpenPrimary(ctx, cfg)
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := postgres.Ping(ctx, pool, OperationTimeout); err != nil {
_ = pool.Close()
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := postgres.RunMigrations(ctx, pool, migrations.FS(), "."); err != nil {
_ = pool.Close()
_ = testcontainers.TerminateContainer(container)
return nil, err
}
return &Env{container: container, pool: pool}, nil
}
func provisionRoleAndSchema(ctx context.Context, baseDSN string) error {
cfg := postgres.DefaultConfig()
cfg.PrimaryDSN = baseDSN
cfg.OperationTimeout = OperationTimeout
db, err := postgres.OpenPrimary(ctx, cfg)
if err != nil {
return err
}
defer func() { _ = db.Close() }()
statements := []string{
`DO $$ BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'rtmanagerservice') THEN
CREATE ROLE rtmanagerservice LOGIN PASSWORD 'rtmanagerservice';
END IF;
END $$;`,
`CREATE SCHEMA IF NOT EXISTS rtmanager AUTHORIZATION rtmanagerservice;`,
`GRANT USAGE ON SCHEMA rtmanager TO rtmanagerservice;`,
}
for _, statement := range statements {
if _, err := db.ExecContext(ctx, statement); err != nil {
return err
}
}
return nil
}
func dsnForServiceRole(baseDSN string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := url.Values{}
values.Set("search_path", serviceSchema)
values.Set("sslmode", "disable")
scoped := url.URL{
Scheme: parsed.Scheme,
User: url.UserPassword(serviceRole, servicePassword),
Host: parsed.Host,
Path: parsed.Path,
RawQuery: values.Encode(),
}
return scoped.String(), nil
}
@@ -0,0 +1,112 @@
// Package sqlx contains the small set of helpers shared by every Runtime
// Manager PostgreSQL adapter (runtimerecordstore, operationlogstore,
// healthsnapshotstore). The helpers centralise the boundary translations
// for nullable timestamps and the pgx SQLSTATE codes the adapters
// interpret as domain conflicts.
package sqlx
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"github.com/jackc/pgx/v5/pgconn"
)
// PgUniqueViolationCode identifies the SQLSTATE returned by PostgreSQL
// when a UNIQUE constraint is violated by INSERT or UPDATE.
const PgUniqueViolationCode = "23505"
// IsUniqueViolation reports whether err is a PostgreSQL unique-violation,
// regardless of constraint name.
func IsUniqueViolation(err error) bool {
var pgErr *pgconn.PgError
if !errors.As(err, &pgErr) {
return false
}
return pgErr.Code == PgUniqueViolationCode
}
// IsNoRows reports whether err is sql.ErrNoRows.
func IsNoRows(err error) bool {
return errors.Is(err, sql.ErrNoRows)
}
// NullableTime returns t.UTC() when non-zero, otherwise nil so the column
// is bound as SQL NULL.
func NullableTime(t time.Time) any {
if t.IsZero() {
return nil
}
return t.UTC()
}
// NullableTimePtr returns t.UTC() when t is non-nil and non-zero, otherwise
// nil. Companion of NullableTime for domain types that use *time.Time to
// express absent timestamps.
func NullableTimePtr(t *time.Time) any {
if t == nil {
return nil
}
return NullableTime(*t)
}
// NullableString returns value when non-empty, otherwise nil so the column
// is bound as SQL NULL. Used for Runtime Manager columns that map empty
// domain strings to NULL (current_container_id, current_image_ref).
func NullableString(value string) any {
if value == "" {
return nil
}
return value
}
// StringFromNullable copies an optional sql.NullString into a domain
// string. NULL becomes the empty string, matching the Runtime Manager
// domain convention that empty == NULL for nullable text columns.
func StringFromNullable(value sql.NullString) string {
if !value.Valid {
return ""
}
return value.String
}
// TimeFromNullable copies an optional sql.NullTime into a domain
// time.Time, applying the global UTC normalisation rule. NULL values
// become the zero time.Time.
func TimeFromNullable(value sql.NullTime) time.Time {
if !value.Valid {
return time.Time{}
}
return value.Time.UTC()
}
// TimePtrFromNullable copies an optional sql.NullTime into a domain
// *time.Time. NULL becomes nil; non-NULL values are wrapped after UTC
// normalisation.
func TimePtrFromNullable(value sql.NullTime) *time.Time {
if !value.Valid {
return nil
}
t := value.Time.UTC()
return &t
}
// WithTimeout derives a child context bounded by timeout and prefixes
// context errors with operation. Callers must always invoke the returned
// cancel.
func WithTimeout(ctx context.Context, operation string, timeout time.Duration) (context.Context, context.CancelFunc, error) {
if ctx == nil {
return nil, nil, fmt.Errorf("%s: nil context", operation)
}
if err := ctx.Err(); err != nil {
return nil, nil, fmt.Errorf("%s: %w", operation, err)
}
if timeout <= 0 {
return nil, nil, fmt.Errorf("%s: operation timeout must be positive", operation)
}
bounded, cancel := context.WithTimeout(ctx, timeout)
return bounded, cancel, nil
}
@@ -0,0 +1,19 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type GooseDbVersion struct {
ID int32 `sql:"primary_key"`
VersionID int64
IsApplied bool
Tstamp time.Time
}
@@ -0,0 +1,21 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type HealthSnapshots struct {
GameID string `sql:"primary_key"`
ContainerID string
Status string
Source string
Details string
ObservedAt time.Time
}
@@ -0,0 +1,27 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type OperationLog struct {
ID int64 `sql:"primary_key"`
GameID string
OpKind string
OpSource string
SourceRef string
ImageRef string
ContainerID string
Outcome string
ErrorCode string
ErrorMessage string
StartedAt time.Time
FinishedAt *time.Time
}
@@ -0,0 +1,27 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type RuntimeRecords struct {
GameID string `sql:"primary_key"`
Status string
CurrentContainerID *string
CurrentImageRef *string
EngineEndpoint string
StatePath string
DockerNetwork string
StartedAt *time.Time
StoppedAt *time.Time
RemovedAt *time.Time
LastOpAt time.Time
CreatedAt time.Time
}
@@ -0,0 +1,87 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var GooseDbVersion = newGooseDbVersionTable("rtmanager", "goose_db_version", "")
type gooseDbVersionTable struct {
postgres.Table
// Columns
ID postgres.ColumnInteger
VersionID postgres.ColumnInteger
IsApplied postgres.ColumnBool
Tstamp postgres.ColumnTimestamp
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type GooseDbVersionTable struct {
gooseDbVersionTable
EXCLUDED gooseDbVersionTable
}
// AS creates new GooseDbVersionTable with assigned alias
func (a GooseDbVersionTable) AS(alias string) *GooseDbVersionTable {
return newGooseDbVersionTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new GooseDbVersionTable with assigned schema name
func (a GooseDbVersionTable) FromSchema(schemaName string) *GooseDbVersionTable {
return newGooseDbVersionTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new GooseDbVersionTable with assigned table prefix
func (a GooseDbVersionTable) WithPrefix(prefix string) *GooseDbVersionTable {
return newGooseDbVersionTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new GooseDbVersionTable with assigned table suffix
func (a GooseDbVersionTable) WithSuffix(suffix string) *GooseDbVersionTable {
return newGooseDbVersionTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newGooseDbVersionTable(schemaName, tableName, alias string) *GooseDbVersionTable {
return &GooseDbVersionTable{
gooseDbVersionTable: newGooseDbVersionTableImpl(schemaName, tableName, alias),
EXCLUDED: newGooseDbVersionTableImpl("", "excluded", ""),
}
}
func newGooseDbVersionTableImpl(schemaName, tableName, alias string) gooseDbVersionTable {
var (
IDColumn = postgres.IntegerColumn("id")
VersionIDColumn = postgres.IntegerColumn("version_id")
IsAppliedColumn = postgres.BoolColumn("is_applied")
TstampColumn = postgres.TimestampColumn("tstamp")
allColumns = postgres.ColumnList{IDColumn, VersionIDColumn, IsAppliedColumn, TstampColumn}
mutableColumns = postgres.ColumnList{VersionIDColumn, IsAppliedColumn, TstampColumn}
defaultColumns = postgres.ColumnList{TstampColumn}
)
return gooseDbVersionTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
ID: IDColumn,
VersionID: VersionIDColumn,
IsApplied: IsAppliedColumn,
Tstamp: TstampColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,93 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var HealthSnapshots = newHealthSnapshotsTable("rtmanager", "health_snapshots", "")
type healthSnapshotsTable struct {
postgres.Table
// Columns
GameID postgres.ColumnString
ContainerID postgres.ColumnString
Status postgres.ColumnString
Source postgres.ColumnString
Details postgres.ColumnString
ObservedAt postgres.ColumnTimestampz
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type HealthSnapshotsTable struct {
healthSnapshotsTable
EXCLUDED healthSnapshotsTable
}
// AS creates new HealthSnapshotsTable with assigned alias
func (a HealthSnapshotsTable) AS(alias string) *HealthSnapshotsTable {
return newHealthSnapshotsTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new HealthSnapshotsTable with assigned schema name
func (a HealthSnapshotsTable) FromSchema(schemaName string) *HealthSnapshotsTable {
return newHealthSnapshotsTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new HealthSnapshotsTable with assigned table prefix
func (a HealthSnapshotsTable) WithPrefix(prefix string) *HealthSnapshotsTable {
return newHealthSnapshotsTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new HealthSnapshotsTable with assigned table suffix
func (a HealthSnapshotsTable) WithSuffix(suffix string) *HealthSnapshotsTable {
return newHealthSnapshotsTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newHealthSnapshotsTable(schemaName, tableName, alias string) *HealthSnapshotsTable {
return &HealthSnapshotsTable{
healthSnapshotsTable: newHealthSnapshotsTableImpl(schemaName, tableName, alias),
EXCLUDED: newHealthSnapshotsTableImpl("", "excluded", ""),
}
}
func newHealthSnapshotsTableImpl(schemaName, tableName, alias string) healthSnapshotsTable {
var (
GameIDColumn = postgres.StringColumn("game_id")
ContainerIDColumn = postgres.StringColumn("container_id")
StatusColumn = postgres.StringColumn("status")
SourceColumn = postgres.StringColumn("source")
DetailsColumn = postgres.StringColumn("details")
ObservedAtColumn = postgres.TimestampzColumn("observed_at")
allColumns = postgres.ColumnList{GameIDColumn, ContainerIDColumn, StatusColumn, SourceColumn, DetailsColumn, ObservedAtColumn}
mutableColumns = postgres.ColumnList{ContainerIDColumn, StatusColumn, SourceColumn, DetailsColumn, ObservedAtColumn}
defaultColumns = postgres.ColumnList{ContainerIDColumn, DetailsColumn}
)
return healthSnapshotsTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
GameID: GameIDColumn,
ContainerID: ContainerIDColumn,
Status: StatusColumn,
Source: SourceColumn,
Details: DetailsColumn,
ObservedAt: ObservedAtColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,111 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var OperationLog = newOperationLogTable("rtmanager", "operation_log", "")
type operationLogTable struct {
postgres.Table
// Columns
ID postgres.ColumnInteger
GameID postgres.ColumnString
OpKind postgres.ColumnString
OpSource postgres.ColumnString
SourceRef postgres.ColumnString
ImageRef postgres.ColumnString
ContainerID postgres.ColumnString
Outcome postgres.ColumnString
ErrorCode postgres.ColumnString
ErrorMessage postgres.ColumnString
StartedAt postgres.ColumnTimestampz
FinishedAt postgres.ColumnTimestampz
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type OperationLogTable struct {
operationLogTable
EXCLUDED operationLogTable
}
// AS creates new OperationLogTable with assigned alias
func (a OperationLogTable) AS(alias string) *OperationLogTable {
return newOperationLogTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new OperationLogTable with assigned schema name
func (a OperationLogTable) FromSchema(schemaName string) *OperationLogTable {
return newOperationLogTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new OperationLogTable with assigned table prefix
func (a OperationLogTable) WithPrefix(prefix string) *OperationLogTable {
return newOperationLogTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new OperationLogTable with assigned table suffix
func (a OperationLogTable) WithSuffix(suffix string) *OperationLogTable {
return newOperationLogTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newOperationLogTable(schemaName, tableName, alias string) *OperationLogTable {
return &OperationLogTable{
operationLogTable: newOperationLogTableImpl(schemaName, tableName, alias),
EXCLUDED: newOperationLogTableImpl("", "excluded", ""),
}
}
func newOperationLogTableImpl(schemaName, tableName, alias string) operationLogTable {
var (
IDColumn = postgres.IntegerColumn("id")
GameIDColumn = postgres.StringColumn("game_id")
OpKindColumn = postgres.StringColumn("op_kind")
OpSourceColumn = postgres.StringColumn("op_source")
SourceRefColumn = postgres.StringColumn("source_ref")
ImageRefColumn = postgres.StringColumn("image_ref")
ContainerIDColumn = postgres.StringColumn("container_id")
OutcomeColumn = postgres.StringColumn("outcome")
ErrorCodeColumn = postgres.StringColumn("error_code")
ErrorMessageColumn = postgres.StringColumn("error_message")
StartedAtColumn = postgres.TimestampzColumn("started_at")
FinishedAtColumn = postgres.TimestampzColumn("finished_at")
allColumns = postgres.ColumnList{IDColumn, GameIDColumn, OpKindColumn, OpSourceColumn, SourceRefColumn, ImageRefColumn, ContainerIDColumn, OutcomeColumn, ErrorCodeColumn, ErrorMessageColumn, StartedAtColumn, FinishedAtColumn}
mutableColumns = postgres.ColumnList{GameIDColumn, OpKindColumn, OpSourceColumn, SourceRefColumn, ImageRefColumn, ContainerIDColumn, OutcomeColumn, ErrorCodeColumn, ErrorMessageColumn, StartedAtColumn, FinishedAtColumn}
defaultColumns = postgres.ColumnList{IDColumn, SourceRefColumn, ImageRefColumn, ContainerIDColumn, ErrorCodeColumn, ErrorMessageColumn}
)
return operationLogTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
ID: IDColumn,
GameID: GameIDColumn,
OpKind: OpKindColumn,
OpSource: OpSourceColumn,
SourceRef: SourceRefColumn,
ImageRef: ImageRefColumn,
ContainerID: ContainerIDColumn,
Outcome: OutcomeColumn,
ErrorCode: ErrorCodeColumn,
ErrorMessage: ErrorMessageColumn,
StartedAt: StartedAtColumn,
FinishedAt: FinishedAtColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,111 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var RuntimeRecords = newRuntimeRecordsTable("rtmanager", "runtime_records", "")
type runtimeRecordsTable struct {
postgres.Table
// Columns
GameID postgres.ColumnString
Status postgres.ColumnString
CurrentContainerID postgres.ColumnString
CurrentImageRef postgres.ColumnString
EngineEndpoint postgres.ColumnString
StatePath postgres.ColumnString
DockerNetwork postgres.ColumnString
StartedAt postgres.ColumnTimestampz
StoppedAt postgres.ColumnTimestampz
RemovedAt postgres.ColumnTimestampz
LastOpAt postgres.ColumnTimestampz
CreatedAt postgres.ColumnTimestampz
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type RuntimeRecordsTable struct {
runtimeRecordsTable
EXCLUDED runtimeRecordsTable
}
// AS creates new RuntimeRecordsTable with assigned alias
func (a RuntimeRecordsTable) AS(alias string) *RuntimeRecordsTable {
return newRuntimeRecordsTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new RuntimeRecordsTable with assigned schema name
func (a RuntimeRecordsTable) FromSchema(schemaName string) *RuntimeRecordsTable {
return newRuntimeRecordsTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new RuntimeRecordsTable with assigned table prefix
func (a RuntimeRecordsTable) WithPrefix(prefix string) *RuntimeRecordsTable {
return newRuntimeRecordsTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new RuntimeRecordsTable with assigned table suffix
func (a RuntimeRecordsTable) WithSuffix(suffix string) *RuntimeRecordsTable {
return newRuntimeRecordsTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newRuntimeRecordsTable(schemaName, tableName, alias string) *RuntimeRecordsTable {
return &RuntimeRecordsTable{
runtimeRecordsTable: newRuntimeRecordsTableImpl(schemaName, tableName, alias),
EXCLUDED: newRuntimeRecordsTableImpl("", "excluded", ""),
}
}
func newRuntimeRecordsTableImpl(schemaName, tableName, alias string) runtimeRecordsTable {
var (
GameIDColumn = postgres.StringColumn("game_id")
StatusColumn = postgres.StringColumn("status")
CurrentContainerIDColumn = postgres.StringColumn("current_container_id")
CurrentImageRefColumn = postgres.StringColumn("current_image_ref")
EngineEndpointColumn = postgres.StringColumn("engine_endpoint")
StatePathColumn = postgres.StringColumn("state_path")
DockerNetworkColumn = postgres.StringColumn("docker_network")
StartedAtColumn = postgres.TimestampzColumn("started_at")
StoppedAtColumn = postgres.TimestampzColumn("stopped_at")
RemovedAtColumn = postgres.TimestampzColumn("removed_at")
LastOpAtColumn = postgres.TimestampzColumn("last_op_at")
CreatedAtColumn = postgres.TimestampzColumn("created_at")
allColumns = postgres.ColumnList{GameIDColumn, StatusColumn, CurrentContainerIDColumn, CurrentImageRefColumn, EngineEndpointColumn, StatePathColumn, DockerNetworkColumn, StartedAtColumn, StoppedAtColumn, RemovedAtColumn, LastOpAtColumn, CreatedAtColumn}
mutableColumns = postgres.ColumnList{StatusColumn, CurrentContainerIDColumn, CurrentImageRefColumn, EngineEndpointColumn, StatePathColumn, DockerNetworkColumn, StartedAtColumn, StoppedAtColumn, RemovedAtColumn, LastOpAtColumn, CreatedAtColumn}
defaultColumns = postgres.ColumnList{}
)
return runtimeRecordsTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
GameID: GameIDColumn,
Status: StatusColumn,
CurrentContainerID: CurrentContainerIDColumn,
CurrentImageRef: CurrentImageRefColumn,
EngineEndpoint: EngineEndpointColumn,
StatePath: StatePathColumn,
DockerNetwork: DockerNetworkColumn,
StartedAt: StartedAtColumn,
StoppedAt: StoppedAtColumn,
RemovedAt: RemovedAtColumn,
LastOpAt: LastOpAtColumn,
CreatedAt: CreatedAtColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,17 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
// UseSchema sets a new schema name for all generated table SQL builder types. It is recommended to invoke
// this method only once at the beginning of the program.
func UseSchema(schema string) {
GooseDbVersion = GooseDbVersion.FromSchema(schema)
HealthSnapshots = HealthSnapshots.FromSchema(schema)
OperationLog = OperationLog.FromSchema(schema)
RuntimeRecords = RuntimeRecords.FromSchema(schema)
}
@@ -0,0 +1,106 @@
-- +goose Up
-- Initial Runtime Manager PostgreSQL schema.
--
-- Three tables cover the durable surface of the service:
-- * runtime_records — one row per game with the latest known runtime
-- status and Docker container binding;
-- * operation_log — append-only audit of every start/stop/restart/
-- patch/cleanup/reconcile_* operation RTM performed;
-- * health_snapshots — latest technical health observation per game.
--
-- Schema and the matching `rtmanagerservice` role are provisioned
-- outside this script (in tests via cmd/jetgen/main.go::provisionRoleAndSchema;
-- in production via an ops init script). This migration runs as the
-- schema owner with `search_path=rtmanager` and only contains DDL for the
-- service-owned tables and indexes. ARCHITECTURE.md §Database topology
-- mandates that the per-service role's grants stay restricted to its own
-- schema; consequently this file deliberately deviates from PLAN.md
-- Stage 09's literal `CREATE SCHEMA IF NOT EXISTS rtmanager;` instruction.
-- runtime_records holds one durable record per game with the latest
-- known runtime status and Docker container binding. The status enum
-- (running | stopped | removed) is enforced by a CHECK so domain code
-- can rely on it without reading every callsite. The (status, last_op_at)
-- index drives the periodic container-cleanup worker that scans
-- `status='stopped' AND last_op_at < now() - retention`.
CREATE TABLE runtime_records (
game_id text PRIMARY KEY,
status text NOT NULL,
current_container_id text,
current_image_ref text,
engine_endpoint text NOT NULL,
state_path text NOT NULL,
docker_network text NOT NULL,
started_at timestamptz,
stopped_at timestamptz,
removed_at timestamptz,
last_op_at timestamptz NOT NULL,
created_at timestamptz NOT NULL,
CONSTRAINT runtime_records_status_chk
CHECK (status IN ('running', 'stopped', 'removed'))
);
CREATE INDEX runtime_records_status_last_op_idx
ON runtime_records (status, last_op_at);
-- operation_log is an append-only audit of every operation Runtime
-- Manager performed against a game's runtime. The (game_id, started_at
-- DESC) index drives audit reads from the GM/Admin REST surface;
-- finished_at is nullable for in-flight rows even though Stage 13+
-- always finalises the row in the same transaction. The op_kind /
-- op_source / outcome enums are enforced by CHECK constraints to keep
-- the audit schema honest without a separate Go validator.
CREATE TABLE operation_log (
id bigserial PRIMARY KEY,
game_id text NOT NULL,
op_kind text NOT NULL,
op_source text NOT NULL,
source_ref text NOT NULL DEFAULT '',
image_ref text NOT NULL DEFAULT '',
container_id text NOT NULL DEFAULT '',
outcome text NOT NULL,
error_code text NOT NULL DEFAULT '',
error_message text NOT NULL DEFAULT '',
started_at timestamptz NOT NULL,
finished_at timestamptz,
CONSTRAINT operation_log_op_kind_chk
CHECK (op_kind IN (
'start', 'stop', 'restart', 'patch',
'cleanup_container', 'reconcile_adopt', 'reconcile_dispose'
)),
CONSTRAINT operation_log_op_source_chk
CHECK (op_source IN (
'lobby_stream', 'gm_rest', 'admin_rest',
'auto_ttl', 'auto_reconcile'
)),
CONSTRAINT operation_log_outcome_chk
CHECK (outcome IN ('success', 'failure'))
);
CREATE INDEX operation_log_game_started_idx
ON operation_log (game_id, started_at DESC);
-- health_snapshots stores the latest technical health observation per
-- game. One row per game; later observations overwrite. The status enum
-- mirrors the `event_type` vocabulary on `runtime:health_events`
-- (collapsed to a flat status column for the latest-observation view).
CREATE TABLE health_snapshots (
game_id text PRIMARY KEY,
container_id text NOT NULL DEFAULT '',
status text NOT NULL,
source text NOT NULL,
details jsonb NOT NULL DEFAULT '{}'::jsonb,
observed_at timestamptz NOT NULL,
CONSTRAINT health_snapshots_status_chk
CHECK (status IN (
'healthy', 'probe_failed', 'exited',
'oom', 'inspect_unhealthy', 'container_disappeared'
)),
CONSTRAINT health_snapshots_source_chk
CHECK (source IN ('docker_event', 'inspect', 'probe'))
);
-- +goose Down
DROP TABLE IF EXISTS health_snapshots;
DROP TABLE IF EXISTS operation_log;
DROP TABLE IF EXISTS runtime_records;
@@ -0,0 +1,19 @@
// Package migrations exposes the embedded goose migration files used by
// Runtime Manager to provision its `rtmanager` schema in PostgreSQL.
//
// The embedded filesystem is consumed by `pkg/postgres.RunMigrations`
// during rtmanager-service startup and by `cmd/jetgen` when regenerating
// the `internal/adapters/postgres/jet/` code against a transient
// PostgreSQL instance.
package migrations
import "embed"
//go:embed *.sql
var fs embed.FS
// FS returns the embedded filesystem containing every numbered goose
// migration shipped with Runtime Manager.
func FS() embed.FS {
return fs
}
@@ -0,0 +1,235 @@
// Package operationlogstore implements the PostgreSQL-backed adapter for
// `ports.OperationLogStore`.
//
// The package owns the on-disk shape of the `operation_log` table defined
// in
// `galaxy/rtmanager/internal/adapters/postgres/migrations/00001_init.sql`
// and translates the schema-agnostic `ports.OperationLogStore` interface
// declared in `internal/ports/operationlogstore.go` into concrete
// go-jet/v2 statements driven by the pgx driver.
//
// Append uses `INSERT ... RETURNING id` to surface the bigserial id back
// to callers; ListByGame is index-driven by `operation_log_game_started_idx`.
package operationlogstore
import (
"context"
"database/sql"
"errors"
"fmt"
"strings"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/sqlx"
pgtable "galaxy/rtmanager/internal/adapters/postgres/jet/rtmanager/table"
"galaxy/rtmanager/internal/domain/operation"
"galaxy/rtmanager/internal/ports"
pg "github.com/go-jet/jet/v2/postgres"
)
// Config configures one PostgreSQL-backed operation-log store instance.
type Config struct {
// DB stores the connection pool the store uses for every query.
DB *sql.DB
// OperationTimeout bounds one round trip.
OperationTimeout time.Duration
}
// Store persists Runtime Manager operation-log entries in PostgreSQL.
type Store struct {
db *sql.DB
operationTimeout time.Duration
}
// New constructs one PostgreSQL-backed operation-log store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.DB == nil {
return nil, errors.New("new postgres operation log store: db must not be nil")
}
if cfg.OperationTimeout <= 0 {
return nil, errors.New("new postgres operation log store: operation timeout must be positive")
}
return &Store{
db: cfg.DB,
operationTimeout: cfg.OperationTimeout,
}, nil
}
// operationLogSelectColumns is the canonical SELECT list for the
// operation_log table, matching scanEntry's column order.
var operationLogSelectColumns = pg.ColumnList{
pgtable.OperationLog.ID,
pgtable.OperationLog.GameID,
pgtable.OperationLog.OpKind,
pgtable.OperationLog.OpSource,
pgtable.OperationLog.SourceRef,
pgtable.OperationLog.ImageRef,
pgtable.OperationLog.ContainerID,
pgtable.OperationLog.Outcome,
pgtable.OperationLog.ErrorCode,
pgtable.OperationLog.ErrorMessage,
pgtable.OperationLog.StartedAt,
pgtable.OperationLog.FinishedAt,
}
// Append inserts entry into the operation log and returns the generated
// bigserial id. entry is validated through operation.OperationEntry.Validate
// before the SQL is issued.
func (store *Store) Append(ctx context.Context, entry operation.OperationEntry) (int64, error) {
if store == nil || store.db == nil {
return 0, errors.New("append operation log entry: nil store")
}
if err := entry.Validate(); err != nil {
return 0, fmt.Errorf("append operation log entry: %w", err)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "append operation log entry", store.operationTimeout)
if err != nil {
return 0, err
}
defer cancel()
stmt := pgtable.OperationLog.INSERT(
pgtable.OperationLog.GameID,
pgtable.OperationLog.OpKind,
pgtable.OperationLog.OpSource,
pgtable.OperationLog.SourceRef,
pgtable.OperationLog.ImageRef,
pgtable.OperationLog.ContainerID,
pgtable.OperationLog.Outcome,
pgtable.OperationLog.ErrorCode,
pgtable.OperationLog.ErrorMessage,
pgtable.OperationLog.StartedAt,
pgtable.OperationLog.FinishedAt,
).VALUES(
entry.GameID,
string(entry.OpKind),
string(entry.OpSource),
entry.SourceRef,
entry.ImageRef,
entry.ContainerID,
string(entry.Outcome),
entry.ErrorCode,
entry.ErrorMessage,
entry.StartedAt.UTC(),
sqlx.NullableTimePtr(entry.FinishedAt),
).RETURNING(pgtable.OperationLog.ID)
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
var id int64
if err := row.Scan(&id); err != nil {
return 0, fmt.Errorf("append operation log entry: %w", err)
}
return id, nil
}
// ListByGame returns the most recent entries for gameID, ordered by
// started_at descending and capped by limit. The (game_id,
// started_at DESC) index drives the read.
func (store *Store) ListByGame(ctx context.Context, gameID string, limit int) ([]operation.OperationEntry, error) {
if store == nil || store.db == nil {
return nil, errors.New("list operation log entries by game: nil store")
}
if strings.TrimSpace(gameID) == "" {
return nil, fmt.Errorf("list operation log entries by game: game id must not be empty")
}
if limit <= 0 {
return nil, fmt.Errorf("list operation log entries by game: limit must be positive, got %d", limit)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "list operation log entries by game", store.operationTimeout)
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(operationLogSelectColumns).
FROM(pgtable.OperationLog).
WHERE(pgtable.OperationLog.GameID.EQ(pg.String(gameID))).
ORDER_BY(pgtable.OperationLog.StartedAt.DESC(), pgtable.OperationLog.ID.DESC()).
LIMIT(int64(limit))
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("list operation log entries by game: %w", err)
}
defer rows.Close()
entries := make([]operation.OperationEntry, 0)
for rows.Next() {
entry, err := scanEntry(rows)
if err != nil {
return nil, fmt.Errorf("list operation log entries by game: scan: %w", err)
}
entries = append(entries, entry)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("list operation log entries by game: %w", err)
}
if len(entries) == 0 {
return nil, nil
}
return entries, nil
}
// rowScanner abstracts *sql.Row and *sql.Rows so scanEntry can be shared
// across both single-row reads and iterated reads.
type rowScanner interface {
Scan(dest ...any) error
}
// scanEntry scans one operation_log row from rs.
func scanEntry(rs rowScanner) (operation.OperationEntry, error) {
var (
id int64
gameID string
opKind string
opSource string
sourceRef string
imageRef string
containerID string
outcome string
errorCode string
errorMessage string
startedAt time.Time
finishedAt sql.NullTime
)
if err := rs.Scan(
&id,
&gameID,
&opKind,
&opSource,
&sourceRef,
&imageRef,
&containerID,
&outcome,
&errorCode,
&errorMessage,
&startedAt,
&finishedAt,
); err != nil {
return operation.OperationEntry{}, err
}
return operation.OperationEntry{
ID: id,
GameID: gameID,
OpKind: operation.OpKind(opKind),
OpSource: operation.OpSource(opSource),
SourceRef: sourceRef,
ImageRef: imageRef,
ContainerID: containerID,
Outcome: operation.Outcome(outcome),
ErrorCode: errorCode,
ErrorMessage: errorMessage,
StartedAt: startedAt.UTC(),
FinishedAt: sqlx.TimePtrFromNullable(finishedAt),
}, nil
}
// Ensure Store satisfies the ports.OperationLogStore interface at compile
// time.
var _ ports.OperationLogStore = (*Store)(nil)
@@ -0,0 +1,207 @@
package operationlogstore_test
import (
"context"
"testing"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/pgtest"
"galaxy/rtmanager/internal/adapters/postgres/operationlogstore"
"galaxy/rtmanager/internal/domain/operation"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) { pgtest.RunMain(m) }
func newStore(t *testing.T) *operationlogstore.Store {
t.Helper()
pgtest.TruncateAll(t)
store, err := operationlogstore.New(operationlogstore.Config{
DB: pgtest.Ensure(t).Pool(),
OperationTimeout: pgtest.OperationTimeout,
})
require.NoError(t, err)
return store
}
func successStartEntry(gameID string, startedAt time.Time, sourceRef string) operation.OperationEntry {
finishedAt := startedAt.Add(time.Second)
return operation.OperationEntry{
GameID: gameID,
OpKind: operation.OpKindStart,
OpSource: operation.OpSourceLobbyStream,
SourceRef: sourceRef,
ImageRef: "galaxy/game:v1.2.3",
ContainerID: "container-1",
Outcome: operation.OutcomeSuccess,
StartedAt: startedAt,
FinishedAt: &finishedAt,
}
}
func TestAppendReturnsPositiveIDs(t *testing.T) {
ctx := context.Background()
store := newStore(t)
startedAt := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
id1, err := store.Append(ctx, successStartEntry("game-001", startedAt, "1700000000000-0"))
require.NoError(t, err)
assert.Greater(t, id1, int64(0))
id2, err := store.Append(ctx, successStartEntry("game-001", startedAt.Add(time.Minute), "1700000000001-0"))
require.NoError(t, err)
assert.Greater(t, id2, id1)
}
func TestAppendValidatesEntry(t *testing.T) {
ctx := context.Background()
store := newStore(t)
tests := []struct {
name string
mutate func(*operation.OperationEntry)
}{
{"empty game id", func(e *operation.OperationEntry) { e.GameID = "" }},
{"unknown op kind", func(e *operation.OperationEntry) { e.OpKind = "exotic" }},
{"unknown op source", func(e *operation.OperationEntry) { e.OpSource = "exotic" }},
{"unknown outcome", func(e *operation.OperationEntry) { e.Outcome = "exotic" }},
{"zero started at", func(e *operation.OperationEntry) { e.StartedAt = time.Time{} }},
{"failure without error code", func(e *operation.OperationEntry) {
e.Outcome = operation.OutcomeFailure
e.ErrorCode = ""
}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
entry := successStartEntry("game-001",
time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC), "ref")
tt.mutate(&entry)
_, err := store.Append(ctx, entry)
require.Error(t, err)
})
}
}
func TestListByGameReturnsEntriesNewestFirst(t *testing.T) {
ctx := context.Background()
store := newStore(t)
base := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
for index := range 3 {
_, err := store.Append(ctx, successStartEntry("game-001",
base.Add(time.Duration(index)*time.Minute),
"ref-game-001-"))
require.NoError(t, err)
}
// Foreign-game entry must not appear in the list.
_, err := store.Append(ctx, successStartEntry("game-other", base, "ref-other"))
require.NoError(t, err)
entries, err := store.ListByGame(ctx, "game-001", 10)
require.NoError(t, err)
require.Len(t, entries, 3)
for index := range 2 {
assert.True(t,
!entries[index].StartedAt.Before(entries[index+1].StartedAt),
"entries must be ordered started_at DESC; got %s before %s",
entries[index].StartedAt, entries[index+1].StartedAt,
)
}
}
func TestListByGameRespectsLimit(t *testing.T) {
ctx := context.Background()
store := newStore(t)
base := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
for index := range 5 {
_, err := store.Append(ctx, successStartEntry("game-001",
base.Add(time.Duration(index)*time.Minute), "ref"))
require.NoError(t, err)
}
entries, err := store.ListByGame(ctx, "game-001", 2)
require.NoError(t, err)
require.Len(t, entries, 2)
}
func TestListByGameReturnsEmptyForUnknownGame(t *testing.T) {
ctx := context.Background()
store := newStore(t)
entries, err := store.ListByGame(ctx, "game-missing", 10)
require.NoError(t, err)
assert.Empty(t, entries)
}
func TestListByGameRejectsInvalidArgs(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.ListByGame(ctx, "", 10)
require.Error(t, err)
_, err = store.ListByGame(ctx, "game-001", 0)
require.Error(t, err)
_, err = store.ListByGame(ctx, "game-001", -3)
require.Error(t, err)
}
func TestAppendRoundTripsAllFields(t *testing.T) {
ctx := context.Background()
store := newStore(t)
startedAt := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
finishedAt := startedAt.Add(2 * time.Second)
original := operation.OperationEntry{
GameID: "game-001",
OpKind: operation.OpKindStop,
OpSource: operation.OpSourceGMRest,
SourceRef: "request-7",
ImageRef: "galaxy/game:v2.0.0",
ContainerID: "container-X",
Outcome: operation.OutcomeFailure,
ErrorCode: "container_start_failed",
ErrorMessage: "stop deadline exceeded",
StartedAt: startedAt,
FinishedAt: &finishedAt,
}
id, err := store.Append(ctx, original)
require.NoError(t, err)
entries, err := store.ListByGame(ctx, "game-001", 10)
require.NoError(t, err)
require.Len(t, entries, 1)
got := entries[0]
assert.Equal(t, id, got.ID)
assert.Equal(t, original.GameID, got.GameID)
assert.Equal(t, original.OpKind, got.OpKind)
assert.Equal(t, original.OpSource, got.OpSource)
assert.Equal(t, original.SourceRef, got.SourceRef)
assert.Equal(t, original.ImageRef, got.ImageRef)
assert.Equal(t, original.ContainerID, got.ContainerID)
assert.Equal(t, original.Outcome, got.Outcome)
assert.Equal(t, original.ErrorCode, got.ErrorCode)
assert.Equal(t, original.ErrorMessage, got.ErrorMessage)
assert.True(t, original.StartedAt.Equal(got.StartedAt))
require.NotNil(t, got.FinishedAt)
assert.True(t, original.FinishedAt.Equal(*got.FinishedAt))
assert.Equal(t, time.UTC, got.StartedAt.Location())
assert.Equal(t, time.UTC, got.FinishedAt.Location())
}
func TestNewRejectsNilDB(t *testing.T) {
_, err := operationlogstore.New(operationlogstore.Config{OperationTimeout: time.Second})
require.Error(t, err)
}
func TestNewRejectsNonPositiveTimeout(t *testing.T) {
_, err := operationlogstore.New(operationlogstore.Config{
DB: pgtest.Ensure(t).Pool(),
})
require.Error(t, err)
}
@@ -0,0 +1,500 @@
// Package runtimerecordstore implements the PostgreSQL-backed adapter for
// `ports.RuntimeRecordStore`.
//
// The package owns the on-disk shape of the `runtime_records` table
// defined in
// `galaxy/rtmanager/internal/adapters/postgres/migrations/00001_init.sql`
// and translates the schema-agnostic `ports.RuntimeRecordStore` interface
// declared in `internal/ports/runtimerecordstore.go` into concrete
// go-jet/v2 statements driven by the pgx driver.
//
// Lifecycle transitions (UpdateStatus) use compare-and-swap on
// `(status, current_container_id)` rather than holding a SELECT ... FOR
// UPDATE lock across the caller's logic, mirroring the pattern used by
// `lobby/internal/adapters/postgres/gamestore`.
package runtimerecordstore
import (
"context"
"database/sql"
"errors"
"fmt"
"strings"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/sqlx"
pgtable "galaxy/rtmanager/internal/adapters/postgres/jet/rtmanager/table"
"galaxy/rtmanager/internal/domain/runtime"
"galaxy/rtmanager/internal/ports"
pg "github.com/go-jet/jet/v2/postgres"
)
// Config configures one PostgreSQL-backed runtime-record store instance.
// The store does not own the underlying *sql.DB lifecycle: the caller
// (typically the service runtime) opens, instruments, migrates, and
// closes the pool.
type Config struct {
// DB stores the connection pool the store uses for every query.
DB *sql.DB
// OperationTimeout bounds one round trip. The store creates a
// derived context for each operation so callers cannot starve the
// pool with an unbounded ctx.
OperationTimeout time.Duration
}
// Store persists Runtime Manager runtime records in PostgreSQL.
type Store struct {
db *sql.DB
operationTimeout time.Duration
}
// New constructs one PostgreSQL-backed runtime-record store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.DB == nil {
return nil, errors.New("new postgres runtime record store: db must not be nil")
}
if cfg.OperationTimeout <= 0 {
return nil, errors.New("new postgres runtime record store: operation timeout must be positive")
}
return &Store{
db: cfg.DB,
operationTimeout: cfg.OperationTimeout,
}, nil
}
// runtimeSelectColumns is the canonical SELECT list for the runtime_records
// table, matching scanRecord's column order.
var runtimeSelectColumns = pg.ColumnList{
pgtable.RuntimeRecords.GameID,
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.CurrentContainerID,
pgtable.RuntimeRecords.CurrentImageRef,
pgtable.RuntimeRecords.EngineEndpoint,
pgtable.RuntimeRecords.StatePath,
pgtable.RuntimeRecords.DockerNetwork,
pgtable.RuntimeRecords.StartedAt,
pgtable.RuntimeRecords.StoppedAt,
pgtable.RuntimeRecords.RemovedAt,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.CreatedAt,
}
// Get returns the record identified by gameID. It returns
// runtime.ErrNotFound when no record exists.
func (store *Store) Get(ctx context.Context, gameID string) (runtime.RuntimeRecord, error) {
if store == nil || store.db == nil {
return runtime.RuntimeRecord{}, errors.New("get runtime record: nil store")
}
if strings.TrimSpace(gameID) == "" {
return runtime.RuntimeRecord{}, fmt.Errorf("get runtime record: game id must not be empty")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "get runtime record", store.operationTimeout)
if err != nil {
return runtime.RuntimeRecord{}, err
}
defer cancel()
stmt := pg.SELECT(runtimeSelectColumns).
FROM(pgtable.RuntimeRecords).
WHERE(pgtable.RuntimeRecords.GameID.EQ(pg.String(gameID)))
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
record, err := scanRecord(row)
if sqlx.IsNoRows(err) {
return runtime.RuntimeRecord{}, runtime.ErrNotFound
}
if err != nil {
return runtime.RuntimeRecord{}, fmt.Errorf("get runtime record: %w", err)
}
return record, nil
}
// Upsert inserts record when no row exists for record.GameID and
// otherwise overwrites every mutable column verbatim. created_at is
// preserved across upserts so the "first time RTM saw the game"
// timestamp stays stable.
func (store *Store) Upsert(ctx context.Context, record runtime.RuntimeRecord) error {
if store == nil || store.db == nil {
return errors.New("upsert runtime record: nil store")
}
if err := record.Validate(); err != nil {
return fmt.Errorf("upsert runtime record: %w", err)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "upsert runtime record", store.operationTimeout)
if err != nil {
return err
}
defer cancel()
stmt := pgtable.RuntimeRecords.INSERT(
pgtable.RuntimeRecords.GameID,
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.CurrentContainerID,
pgtable.RuntimeRecords.CurrentImageRef,
pgtable.RuntimeRecords.EngineEndpoint,
pgtable.RuntimeRecords.StatePath,
pgtable.RuntimeRecords.DockerNetwork,
pgtable.RuntimeRecords.StartedAt,
pgtable.RuntimeRecords.StoppedAt,
pgtable.RuntimeRecords.RemovedAt,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.CreatedAt,
).VALUES(
record.GameID,
string(record.Status),
sqlx.NullableString(record.CurrentContainerID),
sqlx.NullableString(record.CurrentImageRef),
record.EngineEndpoint,
record.StatePath,
record.DockerNetwork,
sqlx.NullableTimePtr(record.StartedAt),
sqlx.NullableTimePtr(record.StoppedAt),
sqlx.NullableTimePtr(record.RemovedAt),
record.LastOpAt.UTC(),
record.CreatedAt.UTC(),
).ON_CONFLICT(pgtable.RuntimeRecords.GameID).DO_UPDATE(
pg.SET(
pgtable.RuntimeRecords.Status.SET(pgtable.RuntimeRecords.EXCLUDED.Status),
pgtable.RuntimeRecords.CurrentContainerID.SET(pgtable.RuntimeRecords.EXCLUDED.CurrentContainerID),
pgtable.RuntimeRecords.CurrentImageRef.SET(pgtable.RuntimeRecords.EXCLUDED.CurrentImageRef),
pgtable.RuntimeRecords.EngineEndpoint.SET(pgtable.RuntimeRecords.EXCLUDED.EngineEndpoint),
pgtable.RuntimeRecords.StatePath.SET(pgtable.RuntimeRecords.EXCLUDED.StatePath),
pgtable.RuntimeRecords.DockerNetwork.SET(pgtable.RuntimeRecords.EXCLUDED.DockerNetwork),
pgtable.RuntimeRecords.StartedAt.SET(pgtable.RuntimeRecords.EXCLUDED.StartedAt),
pgtable.RuntimeRecords.StoppedAt.SET(pgtable.RuntimeRecords.EXCLUDED.StoppedAt),
pgtable.RuntimeRecords.RemovedAt.SET(pgtable.RuntimeRecords.EXCLUDED.RemovedAt),
pgtable.RuntimeRecords.LastOpAt.SET(pgtable.RuntimeRecords.EXCLUDED.LastOpAt),
),
)
query, args := stmt.Sql()
if _, err := store.db.ExecContext(operationCtx, query, args...); err != nil {
return fmt.Errorf("upsert runtime record: %w", err)
}
return nil
}
// UpdateStatus applies one status transition with a compare-and-swap
// guard on (status, current_container_id). Validate is invoked before
// any SQL touch.
func (store *Store) UpdateStatus(ctx context.Context, input ports.UpdateStatusInput) error {
if store == nil || store.db == nil {
return errors.New("update runtime status: nil store")
}
if err := input.Validate(); err != nil {
return err
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "update runtime status", store.operationTimeout)
if err != nil {
return err
}
defer cancel()
now := input.Now.UTC()
stmt, err := buildUpdateStatusStatement(input, now)
if err != nil {
return err
}
query, args := stmt.Sql()
result, err := store.db.ExecContext(operationCtx, query, args...)
if err != nil {
return fmt.Errorf("update runtime status: %w", err)
}
affected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("update runtime status: rows affected: %w", err)
}
if affected == 0 {
return store.classifyMissingUpdate(operationCtx, input.GameID)
}
return nil
}
// classifyMissingUpdate distinguishes ErrNotFound from ErrConflict after
// an UPDATE that affected zero rows. A row that is absent yields
// ErrNotFound; a row whose status or container_id does not match the
// CAS predicate yields ErrConflict.
func (store *Store) classifyMissingUpdate(ctx context.Context, gameID string) error {
probe := pg.SELECT(pgtable.RuntimeRecords.Status).
FROM(pgtable.RuntimeRecords).
WHERE(pgtable.RuntimeRecords.GameID.EQ(pg.String(gameID)))
probeQuery, probeArgs := probe.Sql()
var current string
row := store.db.QueryRowContext(ctx, probeQuery, probeArgs...)
if err := row.Scan(&current); err != nil {
if sqlx.IsNoRows(err) {
return runtime.ErrNotFound
}
return fmt.Errorf("update runtime status: probe: %w", err)
}
return runtime.ErrConflict
}
// buildUpdateStatusStatement assembles the UPDATE statement applied for
// one runtime-status transition.
//
// status, last_op_at are always updated. The remaining columns are
// driven by the destination:
//
// - StatusStopped: stopped_at is captured at Now.
// - StatusRemoved: removed_at is captured at Now and current_container_id
// is NULLed (the container is gone; the prior id remains observable
// through operation_log).
// - StatusRunning: only status + last_op_at change. Fresh started_at
// and current_container_id are installed via Upsert before any
// stopped → running transition reaches this path; the path exists
// so runtime.AllowedTransitions stays one-to-one with the adapter
// capability matrix even though v1 services use Upsert for this
// case.
func buildUpdateStatusStatement(input ports.UpdateStatusInput, now time.Time) (pg.UpdateStatement, error) {
statusValue := pg.String(string(input.To))
nowValue := pg.TimestampzT(now)
var stmt pg.UpdateStatement
switch input.To {
case runtime.StatusStopped:
stmt = pgtable.RuntimeRecords.UPDATE(
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.StoppedAt,
).SET(
statusValue,
nowValue,
nowValue,
)
case runtime.StatusRemoved:
stmt = pgtable.RuntimeRecords.UPDATE(
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.LastOpAt,
pgtable.RuntimeRecords.RemovedAt,
pgtable.RuntimeRecords.CurrentContainerID,
).SET(
statusValue,
nowValue,
nowValue,
pg.NULL,
)
case runtime.StatusRunning:
stmt = pgtable.RuntimeRecords.UPDATE(
pgtable.RuntimeRecords.Status,
pgtable.RuntimeRecords.LastOpAt,
).SET(
statusValue,
nowValue,
)
default:
return nil, fmt.Errorf("update runtime status: destination status %q is unsupported", input.To)
}
whereExpr := pg.AND(
pgtable.RuntimeRecords.GameID.EQ(pg.String(input.GameID)),
pgtable.RuntimeRecords.Status.EQ(pg.String(string(input.ExpectedFrom))),
)
if input.ExpectedContainerID != "" {
whereExpr = pg.AND(
whereExpr,
pgtable.RuntimeRecords.CurrentContainerID.EQ(pg.String(input.ExpectedContainerID)),
)
}
return stmt.WHERE(whereExpr), nil
}
// ListByStatus returns every record currently indexed under status.
// Ordering is last_op_at DESC, game_id ASC — the direction the
// `runtime_records_status_last_op_idx` index is built in.
func (store *Store) ListByStatus(ctx context.Context, status runtime.Status) ([]runtime.RuntimeRecord, error) {
if store == nil || store.db == nil {
return nil, errors.New("list runtime records by status: nil store")
}
if !status.IsKnown() {
return nil, fmt.Errorf("list runtime records by status: status %q is unsupported", status)
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "list runtime records by status", store.operationTimeout)
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(runtimeSelectColumns).
FROM(pgtable.RuntimeRecords).
WHERE(pgtable.RuntimeRecords.Status.EQ(pg.String(string(status)))).
ORDER_BY(pgtable.RuntimeRecords.LastOpAt.DESC(), pgtable.RuntimeRecords.GameID.ASC())
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("list runtime records by status: %w", err)
}
defer rows.Close()
records := make([]runtime.RuntimeRecord, 0)
for rows.Next() {
record, err := scanRecord(rows)
if err != nil {
return nil, fmt.Errorf("list runtime records by status: scan: %w", err)
}
records = append(records, record)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("list runtime records by status: %w", err)
}
if len(records) == 0 {
return nil, nil
}
return records, nil
}
// List returns every runtime record currently stored. Ordering matches
// ListByStatus — last_op_at DESC, game_id ASC — so the REST list
// endpoint sees the freshest activity first.
func (store *Store) List(ctx context.Context) ([]runtime.RuntimeRecord, error) {
if store == nil || store.db == nil {
return nil, errors.New("list runtime records: nil store")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "list runtime records", store.operationTimeout)
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(runtimeSelectColumns).
FROM(pgtable.RuntimeRecords).
ORDER_BY(pgtable.RuntimeRecords.LastOpAt.DESC(), pgtable.RuntimeRecords.GameID.ASC())
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("list runtime records: %w", err)
}
defer rows.Close()
records := make([]runtime.RuntimeRecord, 0)
for rows.Next() {
record, err := scanRecord(rows)
if err != nil {
return nil, fmt.Errorf("list runtime records: scan: %w", err)
}
records = append(records, record)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("list runtime records: %w", err)
}
if len(records) == 0 {
return nil, nil
}
return records, nil
}
// CountByStatus returns the number of records indexed under each status.
// Statuses with zero records are present in the result with a zero
// count so callers (e.g. the telemetry gauge) can publish a stable
// label set on every reading.
func (store *Store) CountByStatus(ctx context.Context) (map[runtime.Status]int, error) {
if store == nil || store.db == nil {
return nil, errors.New("count runtime records by status: nil store")
}
operationCtx, cancel, err := sqlx.WithTimeout(ctx, "count runtime records by status", store.operationTimeout)
if err != nil {
return nil, err
}
defer cancel()
countAlias := pg.COUNT(pg.STAR).AS("count")
stmt := pg.SELECT(pgtable.RuntimeRecords.Status, countAlias).
FROM(pgtable.RuntimeRecords).
GROUP_BY(pgtable.RuntimeRecords.Status)
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("count runtime records by status: %w", err)
}
defer rows.Close()
counts := make(map[runtime.Status]int, len(runtime.AllStatuses()))
for _, status := range runtime.AllStatuses() {
counts[status] = 0
}
for rows.Next() {
var status string
var count int
if err := rows.Scan(&status, &count); err != nil {
return nil, fmt.Errorf("count runtime records by status: scan: %w", err)
}
counts[runtime.Status(status)] = count
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("count runtime records by status: %w", err)
}
return counts, nil
}
// rowScanner abstracts *sql.Row and *sql.Rows so scanRecord can be shared
// across both single-row reads and iterated reads.
type rowScanner interface {
Scan(dest ...any) error
}
// scanRecord scans one runtime_records row from rs. Returns sql.ErrNoRows
// verbatim so callers can distinguish "no row" from a hard error.
func scanRecord(rs rowScanner) (runtime.RuntimeRecord, error) {
var (
gameID string
status string
currentContainerID sql.NullString
currentImageRef sql.NullString
engineEndpoint string
statePath string
dockerNetwork string
startedAt sql.NullTime
stoppedAt sql.NullTime
removedAt sql.NullTime
lastOpAt time.Time
createdAt time.Time
)
if err := rs.Scan(
&gameID,
&status,
&currentContainerID,
&currentImageRef,
&engineEndpoint,
&statePath,
&dockerNetwork,
&startedAt,
&stoppedAt,
&removedAt,
&lastOpAt,
&createdAt,
); err != nil {
return runtime.RuntimeRecord{}, err
}
return runtime.RuntimeRecord{
GameID: gameID,
Status: runtime.Status(status),
CurrentContainerID: sqlx.StringFromNullable(currentContainerID),
CurrentImageRef: sqlx.StringFromNullable(currentImageRef),
EngineEndpoint: engineEndpoint,
StatePath: statePath,
DockerNetwork: dockerNetwork,
StartedAt: sqlx.TimePtrFromNullable(startedAt),
StoppedAt: sqlx.TimePtrFromNullable(stoppedAt),
RemovedAt: sqlx.TimePtrFromNullable(removedAt),
LastOpAt: lastOpAt.UTC(),
CreatedAt: createdAt.UTC(),
}, nil
}
// Ensure Store satisfies the ports.RuntimeRecordStore interface at
// compile time.
var _ ports.RuntimeRecordStore = (*Store)(nil)
@@ -0,0 +1,420 @@
package runtimerecordstore_test
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/rtmanager/internal/adapters/postgres/internal/pgtest"
"galaxy/rtmanager/internal/adapters/postgres/runtimerecordstore"
"galaxy/rtmanager/internal/domain/runtime"
"galaxy/rtmanager/internal/ports"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) { pgtest.RunMain(m) }
func newStore(t *testing.T) *runtimerecordstore.Store {
t.Helper()
pgtest.TruncateAll(t)
store, err := runtimerecordstore.New(runtimerecordstore.Config{
DB: pgtest.Ensure(t).Pool(),
OperationTimeout: pgtest.OperationTimeout,
})
require.NoError(t, err)
return store
}
func runningRecord(t *testing.T, gameID, containerID, imageRef string) runtime.RuntimeRecord {
t.Helper()
now := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
started := now
return runtime.RuntimeRecord{
GameID: gameID,
Status: runtime.StatusRunning,
CurrentContainerID: containerID,
CurrentImageRef: imageRef,
EngineEndpoint: "http://galaxy-game-" + gameID + ":8080",
StatePath: "/var/lib/galaxy/games/" + gameID,
DockerNetwork: "galaxy-net",
StartedAt: &started,
LastOpAt: now,
CreatedAt: now,
}
}
func TestUpsertAndGetRoundTrip(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, record.GameID, got.GameID)
assert.Equal(t, record.Status, got.Status)
assert.Equal(t, record.CurrentContainerID, got.CurrentContainerID)
assert.Equal(t, record.CurrentImageRef, got.CurrentImageRef)
assert.Equal(t, record.EngineEndpoint, got.EngineEndpoint)
assert.Equal(t, record.StatePath, got.StatePath)
assert.Equal(t, record.DockerNetwork, got.DockerNetwork)
require.NotNil(t, got.StartedAt)
assert.True(t, record.StartedAt.Equal(*got.StartedAt))
assert.Equal(t, time.UTC, got.StartedAt.Location())
assert.Equal(t, time.UTC, got.LastOpAt.Location())
assert.Equal(t, time.UTC, got.CreatedAt.Location())
assert.Nil(t, got.StoppedAt)
assert.Nil(t, got.RemovedAt)
}
func TestGetReturnsNotFound(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.Get(ctx, "game-missing")
require.ErrorIs(t, err, runtime.ErrNotFound)
}
func TestUpsertOverwritesMutableColumnsPreservesCreatedAt(t *testing.T) {
ctx := context.Background()
store := newStore(t)
original := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, original))
updated := original
updated.CurrentContainerID = "container-2"
updated.CurrentImageRef = "galaxy/game:v1.2.4"
newStarted := original.LastOpAt.Add(time.Minute)
updated.StartedAt = &newStarted
updated.LastOpAt = newStarted
// Fresh CreatedAt simulates a caller passing "now"; the store must
// preserve the original CreatedAt value on conflict.
updated.CreatedAt = newStarted
require.NoError(t, store.Upsert(ctx, updated))
got, err := store.Get(ctx, original.GameID)
require.NoError(t, err)
assert.Equal(t, "container-2", got.CurrentContainerID)
assert.Equal(t, "galaxy/game:v1.2.4", got.CurrentImageRef)
assert.True(t, got.LastOpAt.Equal(newStarted))
assert.True(t, got.CreatedAt.Equal(original.CreatedAt),
"created_at must be preserved across upserts: got %s, want %s",
got.CreatedAt, original.CreatedAt)
}
func TestUpdateStatusRunningToStopped(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
now := record.LastOpAt.Add(2 * time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
ExpectedContainerID: record.CurrentContainerID,
To: runtime.StatusStopped,
Now: now,
}))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, runtime.StatusStopped, got.Status)
require.NotNil(t, got.StoppedAt)
assert.True(t, now.Equal(*got.StoppedAt))
assert.True(t, now.Equal(got.LastOpAt))
// container id is preserved on stop; cleanup later NULLs it.
assert.Equal(t, record.CurrentContainerID, got.CurrentContainerID)
}
func TestUpdateStatusRunningToRemovedClearsContainerID(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
now := record.LastOpAt.Add(time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusRemoved,
Now: now,
}))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, runtime.StatusRemoved, got.Status)
require.NotNil(t, got.RemovedAt)
assert.True(t, now.Equal(*got.RemovedAt))
assert.True(t, now.Equal(got.LastOpAt))
assert.Empty(t, got.CurrentContainerID, "current_container_id must be NULL after removal")
}
func TestUpdateStatusStoppedToRemoved(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
stopAt := record.LastOpAt.Add(time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: stopAt,
}))
removeAt := stopAt.Add(time.Hour)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusStopped,
To: runtime.StatusRemoved,
Now: removeAt,
}))
got, err := store.Get(ctx, record.GameID)
require.NoError(t, err)
assert.Equal(t, runtime.StatusRemoved, got.Status)
require.NotNil(t, got.RemovedAt)
assert.True(t, removeAt.Equal(*got.RemovedAt))
assert.True(t, removeAt.Equal(got.LastOpAt))
require.NotNil(t, got.StoppedAt, "stopped_at must remain populated through removal")
assert.True(t, stopAt.Equal(*got.StoppedAt))
assert.Empty(t, got.CurrentContainerID)
}
func TestUpdateStatusReturnsConflictOnFromMismatch(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusStopped, // wrong
To: runtime.StatusRemoved,
Now: record.LastOpAt.Add(time.Minute),
})
require.ErrorIs(t, err, runtime.ErrConflict)
}
func TestUpdateStatusReturnsConflictOnContainerIDMismatch(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
ExpectedContainerID: "container-other",
To: runtime.StatusStopped,
Now: record.LastOpAt.Add(time.Minute),
})
require.ErrorIs(t, err, runtime.ErrConflict)
}
func TestUpdateStatusReturnsNotFoundForMissing(t *testing.T) {
ctx := context.Background()
store := newStore(t)
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: "game-missing",
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: time.Now().UTC(),
})
require.ErrorIs(t, err, runtime.ErrNotFound)
}
func TestUpdateStatusValidatesInputBeforeStore(t *testing.T) {
ctx := context.Background()
store := newStore(t)
err := store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: "game-001",
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
// Now intentionally zero — validation must reject.
})
require.Error(t, err)
}
// TestUpdateStatusConcurrentCAS asserts the CAS guard: when two callers
// race to apply the running → stopped transition on the same row,
// exactly one wins (returns nil) and the other observes
// runtime.ErrConflict.
func TestUpdateStatusConcurrentCAS(t *testing.T) {
ctx := context.Background()
store := newStore(t)
record := runningRecord(t, "game-001", "container-1", "galaxy/game:v1.2.3")
require.NoError(t, store.Upsert(ctx, record))
const concurrency = 8
results := make([]error, concurrency)
var wg sync.WaitGroup
wg.Add(concurrency)
for index := range concurrency {
go func() {
defer wg.Done()
results[index] = store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: record.GameID,
ExpectedFrom: runtime.StatusRunning,
ExpectedContainerID: record.CurrentContainerID,
To: runtime.StatusStopped,
Now: record.LastOpAt.Add(time.Duration(index+1) * time.Second),
})
}()
}
wg.Wait()
wins, conflicts := 0, 0
for _, err := range results {
switch {
case err == nil:
wins++
case errors.Is(err, runtime.ErrConflict):
conflicts++
default:
t.Errorf("unexpected error from concurrent UpdateStatus: %v", err)
}
}
assert.Equal(t, 1, wins, "exactly one caller must win the CAS race")
assert.Equal(t, concurrency-1, conflicts, "the rest must observe runtime.ErrConflict")
}
func TestListByStatusReturnsExpectedRecords(t *testing.T) {
ctx := context.Background()
store := newStore(t)
a := runningRecord(t, "game-aaa", "container-a", "galaxy/game:v1.2.3")
b := runningRecord(t, "game-bbb", "container-b", "galaxy/game:v1.2.3")
c := runningRecord(t, "game-ccc", "container-c", "galaxy/game:v1.2.3")
for _, r := range []runtime.RuntimeRecord{a, b, c} {
require.NoError(t, store.Upsert(ctx, r))
}
stopAt := a.LastOpAt.Add(time.Minute)
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: b.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: stopAt,
}))
running, err := store.ListByStatus(ctx, runtime.StatusRunning)
require.NoError(t, err)
gotIDs := map[string]struct{}{}
for _, r := range running {
gotIDs[r.GameID] = struct{}{}
}
assert.Contains(t, gotIDs, a.GameID)
assert.Contains(t, gotIDs, c.GameID)
assert.NotContains(t, gotIDs, b.GameID)
stopped, err := store.ListByStatus(ctx, runtime.StatusStopped)
require.NoError(t, err)
require.Len(t, stopped, 1)
assert.Equal(t, b.GameID, stopped[0].GameID)
}
func TestListByStatusRejectsUnknown(t *testing.T) {
ctx := context.Background()
store := newStore(t)
_, err := store.ListByStatus(ctx, runtime.Status("exotic"))
require.Error(t, err)
}
func TestListReturnsEveryStatus(t *testing.T) {
ctx := context.Background()
store := newStore(t)
a := runningRecord(t, "game-aaa", "container-a", "galaxy/game:v1.2.3")
b := runningRecord(t, "game-bbb", "container-b", "galaxy/game:v1.2.3")
c := runningRecord(t, "game-ccc", "container-c", "galaxy/game:v1.2.3")
for _, r := range []runtime.RuntimeRecord{a, b, c} {
require.NoError(t, store.Upsert(ctx, r))
}
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: b.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: b.LastOpAt.Add(time.Minute),
}))
all, err := store.List(ctx)
require.NoError(t, err)
require.Len(t, all, 3)
gotIDs := map[string]runtime.Status{}
for _, r := range all {
gotIDs[r.GameID] = r.Status
}
assert.Equal(t, runtime.StatusRunning, gotIDs[a.GameID])
assert.Equal(t, runtime.StatusStopped, gotIDs[b.GameID])
assert.Equal(t, runtime.StatusRunning, gotIDs[c.GameID])
}
func TestListReturnsNilWhenEmpty(t *testing.T) {
ctx := context.Background()
store := newStore(t)
all, err := store.List(ctx)
require.NoError(t, err)
assert.Nil(t, all)
}
func TestCountByStatusReturnsAllBuckets(t *testing.T) {
ctx := context.Background()
store := newStore(t)
a := runningRecord(t, "game-1", "container-1", "galaxy/game:v1.2.3")
b := runningRecord(t, "game-2", "container-2", "galaxy/game:v1.2.3")
c := runningRecord(t, "game-3", "container-3", "galaxy/game:v1.2.3")
for _, r := range []runtime.RuntimeRecord{a, b, c} {
require.NoError(t, store.Upsert(ctx, r))
}
require.NoError(t, store.UpdateStatus(ctx, ports.UpdateStatusInput{
GameID: b.GameID,
ExpectedFrom: runtime.StatusRunning,
To: runtime.StatusStopped,
Now: b.LastOpAt.Add(time.Minute),
}))
counts, err := store.CountByStatus(ctx)
require.NoError(t, err)
for _, status := range runtime.AllStatuses() {
_, ok := counts[status]
assert.True(t, ok, "status %q must appear in counts even when zero", status)
}
assert.Equal(t, 2, counts[runtime.StatusRunning])
assert.Equal(t, 1, counts[runtime.StatusStopped])
assert.Equal(t, 0, counts[runtime.StatusRemoved])
}
func TestNewRejectsNilDB(t *testing.T) {
_, err := runtimerecordstore.New(runtimerecordstore.Config{OperationTimeout: time.Second})
require.Error(t, err)
}
func TestNewRejectsNonPositiveTimeout(t *testing.T) {
_, err := runtimerecordstore.New(runtimerecordstore.Config{
DB: pgtest.Ensure(t).Pool(),
})
require.Error(t, err)
}
@@ -0,0 +1,117 @@
// Package gamelease implements the Redis-backed adapter for
// `ports.GameLeaseStore`.
//
// The lease guards every lifecycle operation Runtime Manager runs
// against one game (start, stop, restart, patch, cleanup, plus the
// reconciler's drift mutations). Acquisition uses `SET NX PX <ttl>`
// with a random caller token; release runs a Lua compare-and-delete
// so a holder that lost the lease through TTL expiry cannot wipe
// another caller's claim.
package gamelease
import (
"context"
"errors"
"fmt"
"strings"
"time"
"galaxy/rtmanager/internal/adapters/redisstate"
"galaxy/rtmanager/internal/ports"
"github.com/redis/go-redis/v9"
)
// releaseScript removes the per-game lease only when the supplied token
// still owns it. Compare-and-delete prevents a TTL-expired holder from
// clearing another caller's claim.
var releaseScript = redis.NewScript(`
if redis.call("GET", KEYS[1]) == ARGV[1] then
return redis.call("DEL", KEYS[1])
end
return 0
`)
// Config configures one Redis-backed game lease store instance. The
// store does not own the redis client lifecycle; the caller (typically
// the service runtime) opens and closes it.
type Config struct {
// Client stores the Redis client the store uses for every command.
Client *redis.Client
}
// Store persists the per-game lifecycle lease in Redis.
type Store struct {
client *redis.Client
keys redisstate.Keyspace
}
// New constructs one Redis-backed game lease store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.Client == nil {
return nil, errors.New("new rtmanager game lease store: nil redis client")
}
return &Store{
client: cfg.Client,
keys: redisstate.Keyspace{},
}, nil
}
// TryAcquire attempts to acquire the per-game lease for gameID owned by
// token for ttl. The acquired return is true on a successful claim and
// false when another caller still owns the lease. A non-nil error
// reports a transport failure and must not be confused with a missed
// lease.
func (store *Store) TryAcquire(ctx context.Context, gameID, token string, ttl time.Duration) (bool, error) {
if store == nil || store.client == nil {
return false, errors.New("try acquire game lease: nil store")
}
if ctx == nil {
return false, errors.New("try acquire game lease: nil context")
}
if strings.TrimSpace(gameID) == "" {
return false, errors.New("try acquire game lease: game id must not be empty")
}
if strings.TrimSpace(token) == "" {
return false, errors.New("try acquire game lease: token must not be empty")
}
if ttl <= 0 {
return false, errors.New("try acquire game lease: ttl must be positive")
}
acquired, err := store.client.SetNX(ctx, store.keys.GameLease(gameID), token, ttl).Result()
if err != nil {
return false, fmt.Errorf("try acquire game lease: %w", err)
}
return acquired, nil
}
// Release removes the per-game lease for gameID only when token still
// matches the stored owner value. A token mismatch is a silent no-op.
func (store *Store) Release(ctx context.Context, gameID, token string) error {
if store == nil || store.client == nil {
return errors.New("release game lease: nil store")
}
if ctx == nil {
return errors.New("release game lease: nil context")
}
if strings.TrimSpace(gameID) == "" {
return errors.New("release game lease: game id must not be empty")
}
if strings.TrimSpace(token) == "" {
return errors.New("release game lease: token must not be empty")
}
if err := releaseScript.Run(
ctx,
store.client,
[]string{store.keys.GameLease(gameID)},
token,
).Err(); err != nil {
return fmt.Errorf("release game lease: %w", err)
}
return nil
}
// Compile-time assertion: Store implements ports.GameLeaseStore.
var _ ports.GameLeaseStore = (*Store)(nil)
@@ -0,0 +1,133 @@
package gamelease_test
import (
"context"
"testing"
"time"
"galaxy/rtmanager/internal/adapters/redisstate"
"galaxy/rtmanager/internal/adapters/redisstate/gamelease"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func newLeaseStore(t *testing.T) (*gamelease.Store, *miniredis.Miniredis) {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = client.Close() })
store, err := gamelease.New(gamelease.Config{Client: client})
require.NoError(t, err)
return store, server
}
func TestNewRejectsNilClient(t *testing.T) {
_, err := gamelease.New(gamelease.Config{})
require.Error(t, err)
}
func TestTryAcquireSetsKeyAndTTL(t *testing.T) {
store, server := newLeaseStore(t)
acquired, err := store.TryAcquire(context.Background(), "game-1", "token-A", time.Minute)
require.NoError(t, err)
assert.True(t, acquired)
key := redisstate.Keyspace{}.GameLease("game-1")
assert.True(t, server.Exists(key), "key %q must exist after TryAcquire", key)
stored, err := server.Get(key)
require.NoError(t, err)
assert.Equal(t, "token-A", stored)
// TTL must be positive (miniredis returns the remaining duration).
ttl := server.TTL(key)
assert.Greater(t, ttl, time.Duration(0))
}
func TestTryAcquireReturnsFalseWhenAlreadyHeld(t *testing.T) {
store, _ := newLeaseStore(t)
acquired, err := store.TryAcquire(context.Background(), "game-1", "token-A", time.Minute)
require.NoError(t, err)
require.True(t, acquired)
acquired, err = store.TryAcquire(context.Background(), "game-1", "token-B", time.Minute)
require.NoError(t, err)
assert.False(t, acquired)
}
func TestReleaseRemovesKeyForOwnerToken(t *testing.T) {
store, server := newLeaseStore(t)
_, err := store.TryAcquire(context.Background(), "game-1", "token-A", time.Minute)
require.NoError(t, err)
require.NoError(t, store.Release(context.Background(), "game-1", "token-A"))
key := redisstate.Keyspace{}.GameLease("game-1")
assert.False(t, server.Exists(key), "key %q must be deleted after Release", key)
}
func TestReleaseIsNoOpForForeignToken(t *testing.T) {
store, server := newLeaseStore(t)
_, err := store.TryAcquire(context.Background(), "game-1", "token-A", time.Minute)
require.NoError(t, err)
require.NoError(t, store.Release(context.Background(), "game-1", "token-B"))
key := redisstate.Keyspace{}.GameLease("game-1")
assert.True(t, server.Exists(key), "key %q must still exist when foreign token is released", key)
stored, err := server.Get(key)
require.NoError(t, err)
assert.Equal(t, "token-A", stored)
}
func TestTryAcquireSucceedsAfterTTLExpiry(t *testing.T) {
store, server := newLeaseStore(t)
acquired, err := store.TryAcquire(context.Background(), "game-1", "token-A", time.Minute)
require.NoError(t, err)
require.True(t, acquired)
server.FastForward(2 * time.Minute)
acquired, err = store.TryAcquire(context.Background(), "game-1", "token-B", time.Minute)
require.NoError(t, err)
assert.True(t, acquired)
}
func TestTryAcquireRejectsInvalidArguments(t *testing.T) {
store, _ := newLeaseStore(t)
_, err := store.TryAcquire(context.Background(), "", "token", time.Minute)
require.Error(t, err)
_, err = store.TryAcquire(context.Background(), "game-1", "", time.Minute)
require.Error(t, err)
_, err = store.TryAcquire(context.Background(), "game-1", "token", 0)
require.Error(t, err)
}
func TestReleaseRejectsInvalidArguments(t *testing.T) {
store, _ := newLeaseStore(t)
require.Error(t, store.Release(context.Background(), "", "token"))
require.Error(t, store.Release(context.Background(), "game-1", ""))
}
func TestKeyspaceGameLeaseIsPrefixedAndEncoded(t *testing.T) {
key := redisstate.Keyspace{}.GameLease("game with spaces")
assert.NotEmpty(t, key)
assert.Contains(t, key, "rtmanager:game_lease:")
suffix := key[len("rtmanager:game_lease:"):]
// base64url-encoded suffix must not contain the original spaces.
assert.NotContains(t, suffix, " ")
}
@@ -0,0 +1,44 @@
// Package redisstate hosts the Runtime Manager Redis adapters that share
// a single keyspace. Each sibling subpackage (e.g. `streamoffsets`)
// implements one port and uses Keyspace to compose its keys, so the
// Redis namespace stays under one document and one prefix.
//
// The package itself only declares the keyspace; concrete stores live in
// nested packages so dependencies (testcontainers, miniredis) stay out
// of consumer build graphs that do not need them.
package redisstate
import "encoding/base64"
// defaultPrefix is the mandatory `rtmanager:` namespace prefix shared by
// every Runtime Manager Redis key.
const defaultPrefix = "rtmanager:"
// Keyspace builds the Runtime Manager Redis keys. The namespace covers
// the stream consumer offsets and the per-game lifecycle lease in v1.
//
// Dynamic key segments are encoded with base64url so raw key structure
// does not depend on caller-provided characters; this matches the
// encoding chosen by `lobby/internal/adapters/redisstate.Keyspace`.
type Keyspace struct{}
// StreamOffset returns the Redis key that stores the last successfully
// processed entry id for one Redis Stream consumer. The streamLabel is
// the short logical identifier of the consumer (e.g. `start_jobs`,
// `stop_jobs`), not the full stream name; it stays stable when the
// underlying stream key is renamed.
func (Keyspace) StreamOffset(streamLabel string) string {
return defaultPrefix + "stream_offsets:" + encodeKeyComponent(streamLabel)
}
// GameLease returns the Redis key that stores the per-game lifecycle
// lease guarding start / stop / restart / patch / cleanup operations
// against the same game. The gameID is base64url-encoded so callers can
// pass any opaque identifier without escaping raw key characters.
func (Keyspace) GameLease(gameID string) string {
return defaultPrefix + "game_lease:" + encodeKeyComponent(gameID)
}
func encodeKeyComponent(value string) string {
return base64.RawURLEncoding.EncodeToString([]byte(value))
}
@@ -0,0 +1,94 @@
// Package streamoffsets implements the Redis-backed adapter for
// `ports.StreamOffsetStore`.
//
// The start-jobs and stop-jobs consumers call Load on startup to
// resume from the persisted offset and Save after every successful
// message handling. Keys are produced by
// `redisstate.Keyspace.StreamOffset`, mirroring the lobby pattern.
package streamoffsets
import (
"context"
"errors"
"fmt"
"strings"
"galaxy/rtmanager/internal/adapters/redisstate"
"galaxy/rtmanager/internal/ports"
"github.com/redis/go-redis/v9"
)
// Config configures one Redis-backed stream-offset store instance. The
// store does not own the redis client lifecycle; the caller (typically
// the service runtime) opens and closes it.
type Config struct {
// Client stores the Redis client the store uses for every command.
Client *redis.Client
}
// Store persists Runtime Manager stream consumer offsets in Redis.
type Store struct {
client *redis.Client
keys redisstate.Keyspace
}
// New constructs one Redis-backed stream-offset store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.Client == nil {
return nil, errors.New("new rtmanager stream offset store: nil redis client")
}
return &Store{
client: cfg.Client,
keys: redisstate.Keyspace{},
}, nil
}
// Load returns the last processed entry id for streamLabel when one is
// stored. A missing key returns ("", false, nil).
func (store *Store) Load(ctx context.Context, streamLabel string) (string, bool, error) {
if store == nil || store.client == nil {
return "", false, errors.New("load rtmanager stream offset: nil store")
}
if ctx == nil {
return "", false, errors.New("load rtmanager stream offset: nil context")
}
if strings.TrimSpace(streamLabel) == "" {
return "", false, errors.New("load rtmanager stream offset: stream label must not be empty")
}
value, err := store.client.Get(ctx, store.keys.StreamOffset(streamLabel)).Result()
switch {
case errors.Is(err, redis.Nil):
return "", false, nil
case err != nil:
return "", false, fmt.Errorf("load rtmanager stream offset: %w", err)
}
return value, true, nil
}
// Save stores entryID as the new offset for streamLabel. The key has no
// TTL — offsets are durable and only overwritten by subsequent Saves.
func (store *Store) Save(ctx context.Context, streamLabel, entryID string) error {
if store == nil || store.client == nil {
return errors.New("save rtmanager stream offset: nil store")
}
if ctx == nil {
return errors.New("save rtmanager stream offset: nil context")
}
if strings.TrimSpace(streamLabel) == "" {
return errors.New("save rtmanager stream offset: stream label must not be empty")
}
if strings.TrimSpace(entryID) == "" {
return errors.New("save rtmanager stream offset: entry id must not be empty")
}
if err := store.client.Set(ctx, store.keys.StreamOffset(streamLabel), entryID, 0).Err(); err != nil {
return fmt.Errorf("save rtmanager stream offset: %w", err)
}
return nil
}
// Ensure Store satisfies the ports.StreamOffsetStore interface at
// compile time.
var _ ports.StreamOffsetStore = (*Store)(nil)
@@ -0,0 +1,86 @@
package streamoffsets_test
import (
"context"
"testing"
"galaxy/rtmanager/internal/adapters/redisstate"
"galaxy/rtmanager/internal/adapters/redisstate/streamoffsets"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func newOffsetStore(t *testing.T) (*streamoffsets.Store, *miniredis.Miniredis) {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = client.Close() })
store, err := streamoffsets.New(streamoffsets.Config{Client: client})
require.NoError(t, err)
return store, server
}
func TestNewRejectsNilClient(t *testing.T) {
_, err := streamoffsets.New(streamoffsets.Config{})
require.Error(t, err)
}
func TestLoadMissingReturnsNotFound(t *testing.T) {
store, _ := newOffsetStore(t)
id, found, err := store.Load(context.Background(), "start_jobs")
require.NoError(t, err)
assert.False(t, found)
assert.Empty(t, id)
}
func TestSaveLoadRoundTrip(t *testing.T) {
store, server := newOffsetStore(t)
require.NoError(t, store.Save(context.Background(), "start_jobs", "1700000000000-0"))
id, found, err := store.Load(context.Background(), "start_jobs")
require.NoError(t, err)
assert.True(t, found)
assert.Equal(t, "1700000000000-0", id)
// The persisted key must follow the rtmanager keyspace prefix.
expectedKey := redisstate.Keyspace{}.StreamOffset("start_jobs")
assert.True(t, server.Exists(expectedKey),
"key %q must exist after Save", expectedKey)
}
func TestSaveOverwritesPriorValue(t *testing.T) {
store, _ := newOffsetStore(t)
require.NoError(t, store.Save(context.Background(), "start_jobs", "100-0"))
require.NoError(t, store.Save(context.Background(), "start_jobs", "200-0"))
id, found, err := store.Load(context.Background(), "start_jobs")
require.NoError(t, err)
assert.True(t, found)
assert.Equal(t, "200-0", id)
}
func TestLoadAndSaveRejectInvalidArguments(t *testing.T) {
store, _ := newOffsetStore(t)
require.Error(t, store.Save(context.Background(), "", "100-0"))
require.Error(t, store.Save(context.Background(), "start_jobs", ""))
_, _, err := store.Load(context.Background(), "")
require.Error(t, err)
}
func TestKeyspaceStreamOffsetIsPrefixed(t *testing.T) {
key := redisstate.Keyspace{}.StreamOffset("start_jobs")
assert.NotEmpty(t, key)
assert.Contains(t, key, "rtmanager:stream_offsets:")
// base64url-encoded label must not contain raw colons or spaces.
suffix := key[len("rtmanager:stream_offsets:"):]
assert.NotContains(t, suffix, ":")
}