feat: runtime manager

This commit is contained in:
Ilia Denisov
2026-04-28 20:39:18 +02:00
committed by GitHub
parent e0a99b346b
commit a7cee15115
289 changed files with 45660 additions and 2207 deletions
+336
View File
@@ -0,0 +1,336 @@
package ports
import (
"context"
"errors"
"fmt"
"time"
)
// PullPolicy enumerates the supported image pull policies. The value
// set mirrors `config.ImagePullPolicy`; the runtime/wiring layer
// translates between the two so the docker adapter does not import
// `internal/config` and the port package stays free of configuration
// concerns.
type PullPolicy string
// Supported pull policies, frozen by `rtmanager/README.md §Configuration`.
const (
// PullPolicyIfMissing pulls the image only when it is absent from
// the local Docker daemon.
PullPolicyIfMissing PullPolicy = "if_missing"
// PullPolicyAlways pulls the image on every start.
PullPolicyAlways PullPolicy = "always"
// PullPolicyNever skips the pull and fails the start when the image
// is absent.
PullPolicyNever PullPolicy = "never"
)
// IsKnown reports whether policy belongs to the frozen pull-policy
// vocabulary.
func (policy PullPolicy) IsKnown() bool {
switch policy {
case PullPolicyIfMissing, PullPolicyAlways, PullPolicyNever:
return true
default:
return false
}
}
//go:generate go run go.uber.org/mock/mockgen -destination=../adapters/docker/mocks/mock_dockerclient.go -package=mocks galaxy/rtmanager/internal/ports DockerClient
// DockerClient is the narrow Docker port Runtime Manager uses. The
// production adapter wraps `github.com/docker/docker/client`; service
// tests use a generated mock. The surface intentionally exposes only
// the operations RTM needs; `docker logs` and stream attach are out
// of scope for v1.
type DockerClient interface {
// EnsureNetwork verifies the configured Docker network is present
// on the daemon. It returns ErrNetworkMissing when the network does
// not exist; RTM never creates networks itself.
EnsureNetwork(ctx context.Context, name string) error
// PullImage pulls ref according to policy. It returns nil on
// success and a wrapped Docker error otherwise. Implementations
// honour PullPolicyNever by skipping the pull and returning nil
// when the image is already present, or returning ErrImageNotFound
// otherwise.
PullImage(ctx context.Context, ref string, policy PullPolicy) error
// InspectImage returns image metadata for ref. It returns
// ErrImageNotFound when no such image exists locally.
InspectImage(ctx context.Context, ref string) (ImageInspect, error)
// InspectContainer returns container metadata for containerID. It
// returns ErrContainerNotFound when no such container exists.
InspectContainer(ctx context.Context, containerID string) (ContainerInspect, error)
// Run creates and starts one container according to spec. The
// returned RunResult carries the assigned container id, the stable
// engine endpoint, and the wall-clock observed by the daemon.
Run(ctx context.Context, spec RunSpec) (RunResult, error)
// Stop sends SIGTERM to the container followed by SIGKILL after
// timeout. It returns nil when the container exited cleanly and
// ErrContainerNotFound when it is already gone.
Stop(ctx context.Context, containerID string, timeout time.Duration) error
// Remove removes the container. It returns nil when the container
// no longer exists (idempotent removal).
Remove(ctx context.Context, containerID string) error
// List returns container summaries that match filter. Implementations
// translate ListFilter into the appropriate Docker filters argument.
List(ctx context.Context, filter ListFilter) ([]ContainerSummary, error)
// EventsListen subscribes to the Docker events stream and returns
// the decoded event channel together with an asynchronous error
// channel. The caller cancels ctx to terminate the subscription.
// Implementations close events when the subscription terminates.
EventsListen(ctx context.Context) (events <-chan DockerEvent, errs <-chan error, err error)
}
// RunSpec stores the request shape used by DockerClient.Run.
type RunSpec struct {
// Name stores the container name (typically `galaxy-game-{game_id}`).
Name string
// Image stores the image reference resolved by the producer.
Image string
// Hostname stores the container hostname assigned for the embedded
// Docker DNS to resolve from other containers on the network.
Hostname string
// Network stores the user-defined Docker network the container
// attaches to.
Network string
// Env stores the environment variables forwarded to the container
// (e.g. GAME_STATE_PATH, STORAGE_PATH).
Env map[string]string
// Cmd overrides the entrypoint arguments for the container. Production
// callers leave it nil so the engine image's own CMD runs; tests use
// it to drive a tiny container that does not embed RTM-specific
// behaviour. Empty Cmd means "use image default", which mirrors the
// Docker SDK contract.
Cmd []string
// Labels stores the labels applied to the container so the
// reconciler and the events listener can identify it.
Labels map[string]string
// BindMounts stores the host-to-container bind mounts. RTM uses
// exactly one mount in v1 (the per-game state directory).
BindMounts []BindMount
// LogDriver stores the Docker logging driver name.
LogDriver string
// LogOpts stores the logging-driver options as key=value pairs.
LogOpts map[string]string
// CPUQuota stores the `--cpus` value applied as a resource limit.
CPUQuota float64
// Memory stores the `--memory` value (e.g. `512m`) applied as a
// resource limit.
Memory string
// PIDsLimit stores the `--pids-limit` value.
PIDsLimit int
}
// BindMount stores one host-to-container bind mount.
type BindMount struct {
// HostPath stores the absolute host path bound into the container.
HostPath string
// MountPath stores the absolute in-container path the host
// directory is mounted at.
MountPath string
// ReadOnly mounts the host path read-only when true.
ReadOnly bool
}
// RunResult stores the response shape returned by DockerClient.Run.
type RunResult struct {
// ContainerID identifies the created container.
ContainerID string
// EngineEndpoint stores the stable URL Game Master uses to reach
// the engine container.
EngineEndpoint string
// StartedAt stores the wall-clock the daemon observed for the
// start event.
StartedAt time.Time
}
// ImageInspect stores the subset of `docker image inspect` fields RTM
// reads. Only Labels are required at start time (resource limits live
// there); other fields may be populated when convenient for diagnostics.
type ImageInspect struct {
// Ref stores the image reference the inspection was scoped to.
Ref string
// Labels stores the image-level labels (e.g.
// `com.galaxy.cpu_quota`).
Labels map[string]string
}
// ContainerInspect stores the subset of `docker inspect` fields RTM
// reads from a running or exited container.
type ContainerInspect struct {
// ID identifies the container.
ID string
// ImageRef stores the image reference the container was started
// from.
ImageRef string
// Hostname stores the container hostname.
Hostname string
// Labels stores the container labels assigned at create time.
Labels map[string]string
// Status stores the verbatim Docker `State.Status` value (e.g.
// `running`, `exited`).
Status string
// Health stores the verbatim Docker `State.Health.Status` value
// (e.g. `healthy`, `unhealthy`). Empty when the image declares no
// HEALTHCHECK.
Health string
// RestartCount stores the Docker `RestartCount` observed at
// inspection time.
RestartCount int
// StartedAt stores the daemon-observed start wall-clock.
StartedAt time.Time
// FinishedAt stores the daemon-observed exit wall-clock. Zero when
// the container is still running.
FinishedAt time.Time
// ExitCode stores the exit code reported by the daemon. Zero when
// the container is still running.
ExitCode int
// OOMKilled reports whether the container was killed by the OOM
// killer.
OOMKilled bool
}
// ContainerSummary stores the subset of `docker ps` fields RTM reads.
type ContainerSummary struct {
// ID identifies the container.
ID string
// ImageRef stores the image reference.
ImageRef string
// Hostname stores the container hostname.
Hostname string
// Labels stores the container labels assigned at create time.
Labels map[string]string
// Status stores the verbatim Docker `State.Status` value.
Status string
// StartedAt stores the daemon-observed start wall-clock.
StartedAt time.Time
}
// ListFilter stores the criteria used by DockerClient.List.
type ListFilter struct {
// Labels stores label key=value pairs that must all be present on
// the container. Empty matches every container.
Labels map[string]string
}
// DockerEvent stores one decoded entry from the Docker events stream.
// RTM only consumes container-scoped events.
type DockerEvent struct {
// Action stores the Docker event action verbatim (e.g. `start`,
// `die`, `oom`, `destroy`).
Action string
// ContainerID identifies the container the event refers to.
ContainerID string
// Labels stores the container labels carried by the event
// attributes when present.
Labels map[string]string
// ExitCode stores the exit code attribute when applicable (e.g.
// `die` events). Zero when the action does not carry one.
ExitCode int
// OccurredAt stores the daemon-observed event wall-clock.
OccurredAt time.Time
}
// String returns policy as its stored enum value. Convenient for use in
// log fields and error messages.
func (policy PullPolicy) String() string {
return string(policy)
}
// ErrNetworkMissing reports that the configured Docker network is not
// present on the daemon.
var ErrNetworkMissing = errors.New("docker network missing")
// ErrImageNotFound reports that an image reference does not resolve to
// a local Docker image.
var ErrImageNotFound = errors.New("docker image not found")
// ErrContainerNotFound reports that a container id does not resolve to
// a Docker container.
var ErrContainerNotFound = errors.New("docker container not found")
// Validate reports whether spec carries the structural invariants
// required by DockerClient.Run. Adapters use it as the first defence
// against malformed specs originating in service code.
func (spec RunSpec) Validate() error {
if spec.Name == "" {
return fmt.Errorf("run spec: name must not be empty")
}
if spec.Image == "" {
return fmt.Errorf("run spec: image must not be empty")
}
if spec.Hostname == "" {
return fmt.Errorf("run spec: hostname must not be empty")
}
if spec.Network == "" {
return fmt.Errorf("run spec: network must not be empty")
}
if spec.LogDriver == "" {
return fmt.Errorf("run spec: log driver must not be empty")
}
if spec.CPUQuota <= 0 {
return fmt.Errorf("run spec: cpu quota must be positive")
}
if spec.Memory == "" {
return fmt.Errorf("run spec: memory must not be empty")
}
if spec.PIDsLimit <= 0 {
return fmt.Errorf("run spec: pids limit must be positive")
}
for index, mount := range spec.BindMounts {
if mount.HostPath == "" {
return fmt.Errorf("run spec: bind mounts[%d]: host path must not be empty", index)
}
if mount.MountPath == "" {
return fmt.Errorf("run spec: bind mounts[%d]: mount path must not be empty", index)
}
}
return nil
}
+38
View File
@@ -0,0 +1,38 @@
package ports
import (
"context"
"time"
)
// GameLeaseStore guards every lifecycle operation Runtime Manager runs
// against one game. The lease serialises starts, stops, restarts, patches,
// and cleanup operations on the same `game_id` across all entry points
// (Lobby stream consumer, GM REST handler, Admin REST handler, periodic
// workers) so concurrent operations cannot corrupt each other's
// intermediate Docker / PostgreSQL state.
//
// The lease is a per-game key with a random token. Adapters use SETNX with
// PX TTL on TryAcquire and a compare-and-delete on Release so a publisher
// that lost the lease (TTL expiry, replica swap) cannot clear another
// caller's claim.
//
// In v1 the lease is not renewed mid-operation; callers must keep the
// total operation duration below the configured TTL
// (`RTMANAGER_GAME_LEASE_TTL_SECONDS`, default 60s). Multi-GB image pulls
// can exceed this in production and remain a known limitation; later
// stages may introduce a renewal helper if it bites.
type GameLeaseStore interface {
// TryAcquire attempts to acquire the per-game lease for gameID owned
// by token for ttl. It returns true when the lease was acquired and
// false when another holder still owns it. A non-nil error reports
// transport-level failures (Redis unreachable, network timeout) and
// must not be confused with a missed lease.
TryAcquire(ctx context.Context, gameID, token string, ttl time.Duration) (acquired bool, err error)
// Release removes the per-game lease for gameID only when token still
// matches the stored owner value. Releasing a lease the caller no
// longer owns is a silent no-op so a TTL-driven release race never
// clears another caller's claim.
Release(ctx context.Context, gameID, token string) error
}
@@ -0,0 +1,81 @@
package ports
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
"galaxy/rtmanager/internal/domain/health"
)
// HealthEventPublisher emits one entry on the `runtime:health_events`
// Redis Stream and updates `health_snapshots` with the latest observation
// for the affected game. Adapters publish and snapshot in one call so
// every emission durably advances both surfaces; partial publishes (event
// without snapshot, or vice versa) are not allowed.
//
// The start service emits `container_started` through this port; the
// periodic Docker inspect, the active probe, and the Docker events
// listener publish the rest of the event types through the same port
// without changing its surface.
type HealthEventPublisher interface {
// Publish records envelope on the configured `runtime:health_events`
// stream and upserts the matching `health_snapshots` row. A non-nil
// error reports a transport or storage failure; the caller treats it
// as a degraded emission per `rtmanager/README.md §Notification
// Contracts` (the underlying business state is the source of truth,
// not the event stream).
Publish(ctx context.Context, envelope HealthEventEnvelope) error
}
// HealthEventEnvelope carries the payload published on
// `runtime:health_events`. The fields mirror the AsyncAPI schema frozen
// in `rtmanager/api/runtime-health-asyncapi.yaml`; adapters serialise
// every field verbatim so consumers see the contracted shape.
type HealthEventEnvelope struct {
// GameID identifies the platform game the event refers to.
GameID string
// ContainerID identifies the Docker container observed by the event
// source. May differ from the record's current container id after a
// restart race; consumers are expected to treat the value as the
// observation's container, not the record's.
ContainerID string
// EventType classifies the event per the frozen vocabulary in
// `galaxy/rtmanager/internal/domain/health.EventType`.
EventType health.EventType
// OccurredAt stores the wall-clock at which Runtime Manager observed
// the event. Adapters convert it to UTC milliseconds for the wire
// payload (`occurred_at_ms`).
OccurredAt time.Time
// Details stores the event-type-specific JSON payload. Adapters
// persist and stream it verbatim; nil and empty values are treated as
// the canonical empty-object payload.
Details json.RawMessage
}
// Validate reports whether envelope satisfies the structural invariants
// implied by the AsyncAPI schema.
func (envelope HealthEventEnvelope) Validate() error {
if strings.TrimSpace(envelope.GameID) == "" {
return fmt.Errorf("health event envelope: game id must not be empty")
}
if strings.TrimSpace(envelope.ContainerID) == "" {
return fmt.Errorf("health event envelope: container id must not be empty")
}
if !envelope.EventType.IsKnown() {
return fmt.Errorf("health event envelope: event type %q is unsupported", envelope.EventType)
}
if envelope.OccurredAt.IsZero() {
return fmt.Errorf("health event envelope: occurred at must not be zero")
}
if len(envelope.Details) > 0 && !json.Valid(envelope.Details) {
return fmt.Errorf("health event envelope: details must be valid JSON when non-empty")
}
return nil
}
@@ -0,0 +1,22 @@
package ports
import (
"context"
"galaxy/rtmanager/internal/domain/health"
)
// HealthSnapshotStore stores the latest technical-health observation per
// game. Adapters keep one row per game_id; later observations overwrite.
type HealthSnapshotStore interface {
// Upsert installs snapshot as the latest observation for
// snapshot.GameID. Adapters validate snapshot through
// health.HealthSnapshot.Validate before touching the store.
Upsert(ctx context.Context, snapshot health.HealthSnapshot) error
// Get returns the latest snapshot for gameID. It returns
// runtime.ErrNotFound (declared in
// `galaxy/rtmanager/internal/domain/runtime`) when no snapshot has
// been recorded yet.
Get(ctx context.Context, gameID string) (health.HealthSnapshot, error)
}
@@ -0,0 +1,91 @@
package ports
import (
"context"
"fmt"
"strings"
)
// JobResultPublisher emits one entry on the `runtime:job_results` Redis
// Stream per finalised start or stop runtime job. Adapters serialise
// every JobResult field verbatim so consumers (Game Lobby's
// runtime-job-result worker today, future services tomorrow) see the
// AsyncAPI shape frozen in `rtmanager/api/runtime-jobs-asyncapi.yaml`.
//
// The start-jobs and stop-jobs consumers publish through this port.
// The synchronous REST handlers do not — REST callers receive the same
// `Result` shape directly from the service layer.
type JobResultPublisher interface {
// Publish records result on the configured `runtime:job_results`
// stream. A non-nil error reports a transport or serialisation
// failure; the caller treats the failure as a degraded emission
// (the operation_log already records the durable outcome).
Publish(ctx context.Context, result JobResult) error
}
// JobResult outcome values frozen by the
// `RuntimeJobResultPayload.outcome` enum.
const (
// JobOutcomeSuccess marks a successful start or stop, including the
// idempotent replay variant (`error_code=replay_no_op`).
JobOutcomeSuccess = "success"
// JobOutcomeFailure marks a stable failure for which the payload
// carries a non-empty `error_code`.
JobOutcomeFailure = "failure"
)
// JobResult carries the wire payload published on
// `runtime:job_results`. The fields mirror the AsyncAPI schema frozen
// in `rtmanager/api/runtime-jobs-asyncapi.yaml`; adapters serialise
// every field verbatim so consumers see the contracted shape. Fields
// that are required by the contract (every field on this struct) are
// always present in the wire entry — even when their string value is
// empty (allowed for `container_id` / `engine_endpoint` / `error_code`
// / `error_message` on appropriate variants).
type JobResult struct {
// GameID identifies the platform game the job acted on. Required.
GameID string
// Outcome reports the high-level outcome. Must be `success` or
// `failure` (use the JobOutcome* constants).
Outcome string
// ContainerID stores the Docker container id. Populated on
// `success` for fresh starts and replays; empty on `failure` and
// on `success/replay_no_op` for stop jobs that observed a removed
// record.
ContainerID string
// EngineEndpoint stores the stable engine URL
// `http://galaxy-game-{game_id}:8080`. Populated alongside
// ContainerID, empty in the same cases.
EngineEndpoint string
// ErrorCode stores the stable error code from
// `rtmanager/README.md §Error Model`. Empty for fresh successes,
// `replay_no_op` for idempotent replays, one of the failure
// codes otherwise.
ErrorCode string
// ErrorMessage stores the operator-readable detail. Empty for
// successes; populated alongside ErrorCode on failure.
ErrorMessage string
}
// Validate reports whether result satisfies the structural invariants
// implied by the AsyncAPI schema: a non-empty game id and one of the
// two known outcome values. The remaining fields are required to be
// present on the wire but may be empty strings, so Validate does not
// constrain them.
func (result JobResult) Validate() error {
if strings.TrimSpace(result.GameID) == "" {
return fmt.Errorf("job result: game id must not be empty")
}
switch result.Outcome {
case JobOutcomeSuccess, JobOutcomeFailure:
return nil
default:
return fmt.Errorf("job result: outcome %q is unsupported", result.Outcome)
}
}
+47
View File
@@ -0,0 +1,47 @@
package ports
import (
"context"
"errors"
)
// LobbyInternalClient is the synchronous trusted-REST port Runtime
// Manager uses to read ancillary game metadata from Game Lobby. Stage
// 13 calls GetGame purely for diagnostic context; the start envelope
// already carries the only required field (`image_ref`) so a
// LobbyInternalClient failure must not abort the start operation.
type LobbyInternalClient interface {
// GetGame returns the Lobby game record for gameID. It returns
// ErrLobbyGameNotFound when no record exists and ErrLobbyUnavailable
// for transport / timeout / non-2xx responses.
GetGame(ctx context.Context, gameID string) (LobbyGameRecord, error)
}
// LobbyGameRecord stores the subset of the Lobby `GameRecord` schema
// Runtime Manager uses. The shape is intentionally minimal: this fetch
// is ancillary diagnostics and v1 has no required field. The struct
// may be extended additively without breaking existing callers.
type LobbyGameRecord struct {
// GameID identifies the platform game.
GameID string
// Status stores the verbatim Lobby status string (e.g. `starting`,
// `running`, `paused`). Runtime Manager does not interpret it; it
// is exposed for log enrichment and diagnostics only.
Status string
// TargetEngineVersion stores the semver of the engine version Lobby
// resolved into the start envelope's image_ref. Empty when Lobby
// did not return one.
TargetEngineVersion string
}
// ErrLobbyGameNotFound reports that the Lobby internal API returned 404
// for the requested game id.
var ErrLobbyGameNotFound = errors.New("lobby game not found")
// ErrLobbyUnavailable reports that the Lobby internal API could not be
// reached (transport error, timeout, non-2xx response). Callers must
// treat the failure as recoverable: Runtime Manager continues the
// operation when the call is purely diagnostic.
var ErrLobbyUnavailable = errors.New("lobby internal api unavailable")
@@ -0,0 +1,25 @@
package ports
import (
"context"
"galaxy/notificationintent"
)
// NotificationIntentPublisher is the producer port Runtime Manager uses
// to publish admin-only notification intents to Notification Service.
// The production adapter is a thin wrapper around
// `notificationintent.Publisher`; the wrapper drops the entry id
// returned by the underlying publisher because Runtime Manager does
// not track per-intent ids in v1.
//
// A failed Publish call is a notification degradation per
// `galaxy/rtmanager/README.md §Notification Contracts` and must not roll
// back already committed business state. Callers log the error and
// proceed.
type NotificationIntentPublisher interface {
// Publish normalises intent and appends it to the configured Redis
// Stream. Validation failures and transport errors are returned
// verbatim.
Publish(ctx context.Context, intent notificationintent.Intent) error
}
@@ -0,0 +1,23 @@
package ports
import (
"context"
"galaxy/rtmanager/internal/domain/operation"
)
// OperationLogStore stores append-only audit entries for every
// lifecycle operation Runtime Manager performed against a game's
// runtime. Adapters must persist entry verbatim and return the
// generated bigserial id from Append.
type OperationLogStore interface {
// Append inserts entry into the operation log and returns the
// generated bigserial id. Adapters validate entry through
// operation.OperationEntry.Validate before touching the store.
Append(ctx context.Context, entry operation.OperationEntry) (id int64, err error)
// ListByGame returns the most recent entries for gameID, ordered by
// started_at descending and capped by limit. A non-positive limit
// is rejected as invalid input by adapters.
ListByGame(ctx context.Context, gameID string, limit int) ([]operation.OperationEntry, error)
}
@@ -0,0 +1,112 @@
// Package ports defines the stable interfaces that connect Runtime
// Manager use cases to external state and external services.
package ports
import (
"context"
"fmt"
"strings"
"time"
"galaxy/rtmanager/internal/domain/runtime"
)
// RuntimeRecordStore stores runtime records and exposes the operations
// used by the service layer (Stages 13+) and the workers (Stages 15-18).
// Adapters must preserve domain semantics:
//
// - Get returns runtime.ErrNotFound when no record exists for gameID.
// - Upsert installs a record verbatim; the caller is responsible for
// domain validation through runtime.RuntimeRecord.Validate.
// - UpdateStatus applies one transition through a compare-and-swap
// guard on (status, current_container_id) and returns
// runtime.ErrConflict on a stale CAS.
// - List returns every record currently stored, regardless of status.
// - ListByStatus returns every record currently indexed under status.
type RuntimeRecordStore interface {
// Get returns the record identified by gameID. It returns
// runtime.ErrNotFound when no record exists.
Get(ctx context.Context, gameID string) (runtime.RuntimeRecord, error)
// Upsert inserts record when no row exists for record.GameID and
// otherwise overwrites every column verbatim. The start service uses
// Upsert to install fresh records on start, the inner start of
// restart and patch, and the reconcile_adopt path.
Upsert(ctx context.Context, record runtime.RuntimeRecord) error
// UpdateStatus applies one status transition in a compare-and-swap
// fashion. The adapter must first call runtime.Transition to reject
// invalid pairs without touching the store, then verify that the
// stored status equals input.ExpectedFrom, and (when
// input.ExpectedContainerID is non-empty) that the stored
// current_container_id equals it. The adapter derives stopped_at /
// removed_at and updates last_op_at from input.Now per the
// destination status.
UpdateStatus(ctx context.Context, input UpdateStatusInput) error
// List returns every runtime record currently stored. Used by the
// internal REST list endpoint; the v1 working set is bounded by the
// games tracked by Lobby and is small enough to return in one
// response (pagination is not supported). The order is
// adapter-defined; callers may reorder as needed.
List(ctx context.Context) ([]runtime.RuntimeRecord, error)
// ListByStatus returns every record currently indexed under status.
// The order is adapter-defined; callers may reorder as needed.
ListByStatus(ctx context.Context, status runtime.Status) ([]runtime.RuntimeRecord, error)
}
// UpdateStatusInput stores the arguments required to apply one status
// transition through a RuntimeRecordStore. The adapter is responsible
// for translating the destination status into the matching column
// updates (stopped_at / removed_at / current_container_id NULLing) and
// for the CAS guard.
type UpdateStatusInput struct {
// GameID identifies the record to mutate.
GameID string
// ExpectedFrom stores the status the caller believes the record
// currently has. A mismatch results in runtime.ErrConflict.
ExpectedFrom runtime.Status
// ExpectedContainerID is an optional CAS guard. When non-empty, the
// adapter rejects the update with runtime.ErrConflict if the stored
// current_container_id does not equal it. Used by stop / cleanup /
// reconcile to protect against concurrent restart races. Empty
// disables the container-id CAS while keeping the status CAS.
ExpectedContainerID string
// To stores the destination status.
To runtime.Status
// Now stores the wall-clock used to derive stopped_at / removed_at
// and last_op_at depending on To.
Now time.Time
}
// Validate reports whether input contains a structurally valid status
// transition request. Adapters call Validate before touching the store.
func (input UpdateStatusInput) Validate() error {
if strings.TrimSpace(input.GameID) == "" {
return fmt.Errorf("update runtime status: game id must not be empty")
}
if !input.ExpectedFrom.IsKnown() {
return fmt.Errorf(
"update runtime status: expected from status %q is unsupported",
input.ExpectedFrom,
)
}
if !input.To.IsKnown() {
return fmt.Errorf(
"update runtime status: to status %q is unsupported",
input.To,
)
}
if err := runtime.Transition(input.ExpectedFrom, input.To); err != nil {
return fmt.Errorf("update runtime status: %w", err)
}
if input.Now.IsZero() {
return fmt.Errorf("update runtime status: now must not be zero")
}
return nil
}
@@ -0,0 +1,70 @@
package ports
import (
"errors"
"testing"
"time"
"galaxy/rtmanager/internal/domain/runtime"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func validUpdateStatusInput() UpdateStatusInput {
return UpdateStatusInput{
GameID: "game-test",
ExpectedFrom: runtime.StatusRunning,
ExpectedContainerID: "container-1",
To: runtime.StatusStopped,
Now: time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC),
}
}
func TestUpdateStatusInputValidateHappy(t *testing.T) {
require.NoError(t, validUpdateStatusInput().Validate())
}
func TestUpdateStatusInputValidateAcceptsEmptyContainerCAS(t *testing.T) {
input := validUpdateStatusInput()
input.ExpectedContainerID = ""
assert.NoError(t, input.Validate())
}
func TestUpdateStatusInputValidateRejects(t *testing.T) {
tests := []struct {
name string
mutate func(*UpdateStatusInput)
}{
{"empty game id", func(i *UpdateStatusInput) { i.GameID = "" }},
{"unknown expected from", func(i *UpdateStatusInput) {
i.ExpectedFrom = "exotic"
}},
{"unknown to", func(i *UpdateStatusInput) {
i.To = "exotic"
}},
{"zero now", func(i *UpdateStatusInput) {
i.Now = time.Time{}
}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
input := validUpdateStatusInput()
tt.mutate(&input)
assert.Error(t, input.Validate())
})
}
}
func TestUpdateStatusInputValidateRejectsForbiddenTransition(t *testing.T) {
input := validUpdateStatusInput()
input.ExpectedFrom = runtime.StatusRemoved
input.To = runtime.StatusRunning
err := input.Validate()
require.Error(t, err)
assert.True(t, errors.Is(err, runtime.ErrInvalidTransition),
"want runtime.ErrInvalidTransition, got %v", err)
}
@@ -0,0 +1,23 @@
package ports
import "context"
// StreamOffsetStore persists the last successfully processed Redis
// Stream entry id per consumer label. Workers call Load on startup to
// resume from the persisted offset and Save after every successful
// message handling so the next iteration advances past the
// just-processed entry. The label is the short logical identifier of
// the consumer (e.g. `start_jobs`, `stop_jobs`), not the full stream
// name; it stays stable when the underlying stream key is renamed.
type StreamOffsetStore interface {
// Load returns the last processed entry id for the consumer
// labelled stream when one is stored. The boolean return reports
// whether a value was present; implementations must not return an
// error for a missing key.
Load(ctx context.Context, stream string) (entryID string, found bool, err error)
// Save stores entryID as the new last processed offset for the
// consumer labelled stream. Implementations overwrite any previous
// value unconditionally.
Save(ctx context.Context, stream, entryID string) error
}