feat: runtime manager
This commit is contained in:
@@ -0,0 +1,318 @@
|
||||
// Package dockerinspect runs the periodic Docker inspect described in
|
||||
// `rtmanager/README.md §Health Monitoring`.
|
||||
//
|
||||
// On every tick the worker lists `runtime_records.status=running`,
|
||||
// inspects each container, and emits `inspect_unhealthy` when any of
|
||||
// the following holds:
|
||||
//
|
||||
// - `RestartCount` increased between observations (delta detection
|
||||
// requires a prior observation; the first inspect of a record only
|
||||
// records the baseline);
|
||||
// - `State.Status != "running"`;
|
||||
// - `State.Health.Status == "unhealthy"` (only meaningful when the
|
||||
// image declares a Docker HEALTHCHECK).
|
||||
//
|
||||
// `ErrContainerNotFound` is left to the reconciler — the inspect
|
||||
// worker logs and skips so that `container_disappeared` emission
|
||||
// stays single-sourced (Docker events listener + reconciler).
|
||||
//
|
||||
// Per-game state is pruned at the start of every tick against the
|
||||
// freshly-read running list, so a stopped or removed game never
|
||||
// carries a stale baseline into a new lifecycle.
|
||||
package dockerinspect
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"galaxy/rtmanager/internal/domain/health"
|
||||
"galaxy/rtmanager/internal/domain/runtime"
|
||||
"galaxy/rtmanager/internal/logging"
|
||||
"galaxy/rtmanager/internal/ports"
|
||||
"galaxy/rtmanager/internal/telemetry"
|
||||
)
|
||||
|
||||
// dockerStateRunning is the verbatim Docker `State.Status` value the
|
||||
// worker treats as healthy.
|
||||
const dockerStateRunning = "running"
|
||||
|
||||
// dockerHealthUnhealthy is the verbatim Docker `State.Health.Status`
|
||||
// value the worker treats as unhealthy.
|
||||
const dockerHealthUnhealthy = "unhealthy"
|
||||
|
||||
// Dependencies groups the collaborators required by Worker.
|
||||
type Dependencies struct {
|
||||
// Docker provides the InspectContainer surface.
|
||||
Docker ports.DockerClient
|
||||
|
||||
// RuntimeRecords lists running games on every tick.
|
||||
RuntimeRecords ports.RuntimeRecordStore
|
||||
|
||||
// HealthEvents emits `inspect_unhealthy` entries.
|
||||
HealthEvents ports.HealthEventPublisher
|
||||
|
||||
// Telemetry records one health-event counter per emission.
|
||||
Telemetry *telemetry.Runtime
|
||||
|
||||
// Interval bounds the tick period.
|
||||
Interval time.Duration
|
||||
|
||||
// Clock supplies the wall-clock used for emission timestamps.
|
||||
// Defaults to `time.Now` when nil.
|
||||
Clock func() time.Time
|
||||
|
||||
// Logger receives structured worker-level events. Defaults to
|
||||
// `slog.Default()` when nil.
|
||||
Logger *slog.Logger
|
||||
}
|
||||
|
||||
// Worker drives the periodic inspect loop.
|
||||
type Worker struct {
|
||||
docker ports.DockerClient
|
||||
runtimeRecords ports.RuntimeRecordStore
|
||||
healthEvents ports.HealthEventPublisher
|
||||
telemetry *telemetry.Runtime
|
||||
|
||||
interval time.Duration
|
||||
|
||||
clock func() time.Time
|
||||
logger *slog.Logger
|
||||
|
||||
mu sync.Mutex
|
||||
states map[string]*inspectState
|
||||
}
|
||||
|
||||
// inspectState stores the per-game baseline. Owned by Worker and
|
||||
// protected by Worker.mu.
|
||||
type inspectState struct {
|
||||
lastRestartCount int
|
||||
seen bool
|
||||
}
|
||||
|
||||
// NewWorker constructs one Worker from deps.
|
||||
func NewWorker(deps Dependencies) (*Worker, error) {
|
||||
switch {
|
||||
case deps.Docker == nil:
|
||||
return nil, errors.New("new docker inspect worker: nil docker client")
|
||||
case deps.RuntimeRecords == nil:
|
||||
return nil, errors.New("new docker inspect worker: nil runtime records store")
|
||||
case deps.HealthEvents == nil:
|
||||
return nil, errors.New("new docker inspect worker: nil health events publisher")
|
||||
case deps.Telemetry == nil:
|
||||
return nil, errors.New("new docker inspect worker: nil telemetry runtime")
|
||||
case deps.Interval <= 0:
|
||||
return nil, errors.New("new docker inspect worker: interval must be positive")
|
||||
}
|
||||
|
||||
clock := deps.Clock
|
||||
if clock == nil {
|
||||
clock = time.Now
|
||||
}
|
||||
logger := deps.Logger
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
|
||||
return &Worker{
|
||||
docker: deps.Docker,
|
||||
runtimeRecords: deps.RuntimeRecords,
|
||||
healthEvents: deps.HealthEvents,
|
||||
telemetry: deps.Telemetry,
|
||||
interval: deps.Interval,
|
||||
clock: clock,
|
||||
logger: logger.With("worker", "rtmanager.dockerinspect"),
|
||||
states: map[string]*inspectState{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Run drives the inspect loop until ctx is cancelled. Per-tick errors
|
||||
// are absorbed; the loop only exits on context cancellation.
|
||||
func (worker *Worker) Run(ctx context.Context) error {
|
||||
if worker == nil {
|
||||
return errors.New("run docker inspect worker: nil worker")
|
||||
}
|
||||
if ctx == nil {
|
||||
return errors.New("run docker inspect worker: nil context")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
worker.logger.Info("docker inspect worker started",
|
||||
"interval", worker.interval.String(),
|
||||
)
|
||||
defer worker.logger.Info("docker inspect worker stopped")
|
||||
|
||||
ticker := time.NewTicker(worker.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-ticker.C:
|
||||
worker.tick(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown is a no-op; Run terminates on context cancellation.
|
||||
func (worker *Worker) Shutdown(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("shutdown docker inspect worker: nil context")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Tick performs one inspect pass. Exported so tests can drive the
|
||||
// worker deterministically without spinning a real ticker.
|
||||
func (worker *Worker) Tick(ctx context.Context) {
|
||||
worker.tick(ctx)
|
||||
}
|
||||
|
||||
// tick performs one full pass: list running records, prune state for
|
||||
// stopped games, then inspect every running container sequentially.
|
||||
// Inspect calls are cheap; sequential execution avoids fan-out against
|
||||
// the Docker daemon.
|
||||
func (worker *Worker) tick(ctx context.Context) {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
records, err := worker.runtimeRecords.ListByStatus(ctx, runtime.StatusRunning)
|
||||
if err != nil {
|
||||
worker.logger.WarnContext(ctx, "list running records",
|
||||
"err", err.Error(),
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
worker.pruneStates(records)
|
||||
|
||||
for _, record := range records {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
worker.inspectOne(ctx, record)
|
||||
}
|
||||
}
|
||||
|
||||
// pruneStates removes per-game baselines for games no longer in the
|
||||
// running list.
|
||||
func (worker *Worker) pruneStates(records []runtime.RuntimeRecord) {
|
||||
worker.mu.Lock()
|
||||
defer worker.mu.Unlock()
|
||||
if len(worker.states) == 0 {
|
||||
return
|
||||
}
|
||||
running := make(map[string]struct{}, len(records))
|
||||
for _, record := range records {
|
||||
running[record.GameID] = struct{}{}
|
||||
}
|
||||
for gameID := range worker.states {
|
||||
if _, ok := running[gameID]; !ok {
|
||||
delete(worker.states, gameID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// inspectOne issues one InspectContainer call and emits
|
||||
// `inspect_unhealthy` when the observation crosses any of the three
|
||||
// trigger conditions. The first observation of a record only seeds the
|
||||
// baseline; deltas need at least two ticks.
|
||||
func (worker *Worker) inspectOne(ctx context.Context, record runtime.RuntimeRecord) {
|
||||
inspect, err := worker.docker.InspectContainer(ctx, record.CurrentContainerID)
|
||||
if err != nil {
|
||||
if errors.Is(err, ports.ErrContainerNotFound) {
|
||||
worker.logger.DebugContext(ctx, "inspect skipped: container missing",
|
||||
"game_id", record.GameID,
|
||||
"container_id", record.CurrentContainerID,
|
||||
)
|
||||
return
|
||||
}
|
||||
worker.logger.WarnContext(ctx, "inspect failed",
|
||||
"game_id", record.GameID,
|
||||
"container_id", record.CurrentContainerID,
|
||||
"err", err.Error(),
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
worker.mu.Lock()
|
||||
state, ok := worker.states[record.GameID]
|
||||
if !ok {
|
||||
state = &inspectState{}
|
||||
worker.states[record.GameID] = state
|
||||
}
|
||||
prev := *state
|
||||
state.lastRestartCount = inspect.RestartCount
|
||||
state.seen = true
|
||||
worker.mu.Unlock()
|
||||
|
||||
emit := false
|
||||
switch {
|
||||
case prev.seen && inspect.RestartCount > prev.lastRestartCount:
|
||||
emit = true
|
||||
case inspect.Status != dockerStateRunning:
|
||||
emit = true
|
||||
case inspect.Health == dockerHealthUnhealthy:
|
||||
emit = true
|
||||
}
|
||||
if !emit {
|
||||
return
|
||||
}
|
||||
|
||||
worker.publish(ctx, ports.HealthEventEnvelope{
|
||||
GameID: record.GameID,
|
||||
ContainerID: record.CurrentContainerID,
|
||||
EventType: health.EventTypeInspectUnhealthy,
|
||||
OccurredAt: worker.clock().UTC(),
|
||||
Details: inspectUnhealthyDetails(inspect.RestartCount, inspect.Status, inspect.Health),
|
||||
})
|
||||
}
|
||||
|
||||
// publish emits one envelope through the configured publisher, updates
|
||||
// the telemetry counter, and logs the outcome. Failures degrade to a
|
||||
// warning log per `rtmanager/README.md §Notification Contracts`.
|
||||
func (worker *Worker) publish(ctx context.Context, envelope ports.HealthEventEnvelope) {
|
||||
if err := worker.healthEvents.Publish(ctx, envelope); err != nil {
|
||||
worker.logger.ErrorContext(ctx, "publish health event",
|
||||
"game_id", envelope.GameID,
|
||||
"container_id", envelope.ContainerID,
|
||||
"event_type", string(envelope.EventType),
|
||||
"err", err.Error(),
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
worker.telemetry.RecordHealthEvent(ctx, string(envelope.EventType))
|
||||
|
||||
logArgs := []any{
|
||||
"game_id", envelope.GameID,
|
||||
"container_id", envelope.ContainerID,
|
||||
"event_type", string(envelope.EventType),
|
||||
}
|
||||
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
|
||||
worker.logger.InfoContext(ctx, "inspect event published", logArgs...)
|
||||
}
|
||||
|
||||
// inspectUnhealthyDetails builds the JSON payload required by the
|
||||
// `inspect_unhealthy` AsyncAPI variant. All three fields are required
|
||||
// even when their value is the zero value.
|
||||
func inspectUnhealthyDetails(restartCount int, state, health string) json.RawMessage {
|
||||
payload := struct {
|
||||
RestartCount int `json:"restart_count"`
|
||||
State string `json:"state"`
|
||||
Health string `json:"health"`
|
||||
}{
|
||||
RestartCount: restartCount,
|
||||
State: state,
|
||||
Health: health,
|
||||
}
|
||||
encoded, _ := json.Marshal(payload)
|
||||
return encoded
|
||||
}
|
||||
@@ -0,0 +1,388 @@
|
||||
package dockerinspect_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/rtmanager/internal/adapters/docker/mocks"
|
||||
"galaxy/rtmanager/internal/domain/health"
|
||||
"galaxy/rtmanager/internal/domain/runtime"
|
||||
"galaxy/rtmanager/internal/ports"
|
||||
"galaxy/rtmanager/internal/telemetry"
|
||||
"galaxy/rtmanager/internal/worker/dockerinspect"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/mock/gomock"
|
||||
)
|
||||
|
||||
func silentLogger() *slog.Logger {
|
||||
return slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
}
|
||||
|
||||
// fakeRuntimeRecords supports ListByStatus only.
|
||||
type fakeRuntimeRecords struct {
|
||||
mu sync.Mutex
|
||||
running []runtime.RuntimeRecord
|
||||
listErr error
|
||||
}
|
||||
|
||||
func newFakeRuntimeRecords() *fakeRuntimeRecords { return &fakeRuntimeRecords{} }
|
||||
|
||||
func (s *fakeRuntimeRecords) Set(records ...runtime.RuntimeRecord) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.running = append([]runtime.RuntimeRecord(nil), records...)
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) Clear() {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.running = nil
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) Get(_ context.Context, _ string) (runtime.RuntimeRecord, error) {
|
||||
return runtime.RuntimeRecord{}, runtime.ErrNotFound
|
||||
}
|
||||
func (s *fakeRuntimeRecords) Upsert(_ context.Context, _ runtime.RuntimeRecord) error { return nil }
|
||||
func (s *fakeRuntimeRecords) UpdateStatus(_ context.Context, _ ports.UpdateStatusInput) error {
|
||||
return nil
|
||||
}
|
||||
func (s *fakeRuntimeRecords) List(_ context.Context) ([]runtime.RuntimeRecord, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (s *fakeRuntimeRecords) ListByStatus(_ context.Context, status runtime.Status) ([]runtime.RuntimeRecord, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.listErr != nil {
|
||||
return nil, s.listErr
|
||||
}
|
||||
if status != runtime.StatusRunning {
|
||||
return nil, nil
|
||||
}
|
||||
out := make([]runtime.RuntimeRecord, len(s.running))
|
||||
copy(out, s.running)
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// fakeHealthEvents captures every Publish call.
|
||||
type fakeHealthEvents struct {
|
||||
mu sync.Mutex
|
||||
published []ports.HealthEventEnvelope
|
||||
publishErr error
|
||||
}
|
||||
|
||||
func (s *fakeHealthEvents) Publish(_ context.Context, envelope ports.HealthEventEnvelope) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.publishErr != nil {
|
||||
return s.publishErr
|
||||
}
|
||||
s.published = append(s.published, envelope)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeHealthEvents) Published() []ports.HealthEventEnvelope {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
out := make([]ports.HealthEventEnvelope, len(s.published))
|
||||
copy(out, s.published)
|
||||
return out
|
||||
}
|
||||
|
||||
// --- harness ----------------------------------------------------------
|
||||
|
||||
type harness struct {
|
||||
docker *mocks.MockDockerClient
|
||||
records *fakeRuntimeRecords
|
||||
health *fakeHealthEvents
|
||||
worker *dockerinspect.Worker
|
||||
now time.Time
|
||||
}
|
||||
|
||||
func newHarness(t *testing.T) *harness {
|
||||
t.Helper()
|
||||
ctrl := gomock.NewController(t)
|
||||
t.Cleanup(ctrl.Finish)
|
||||
|
||||
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
docker := mocks.NewMockDockerClient(ctrl)
|
||||
records := newFakeRuntimeRecords()
|
||||
healthEvents := &fakeHealthEvents{}
|
||||
now := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
|
||||
|
||||
worker, err := dockerinspect.NewWorker(dockerinspect.Dependencies{
|
||||
Docker: docker,
|
||||
RuntimeRecords: records,
|
||||
HealthEvents: healthEvents,
|
||||
Telemetry: telemetryRuntime,
|
||||
Interval: 50 * time.Millisecond,
|
||||
Clock: func() time.Time { return now },
|
||||
Logger: silentLogger(),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
return &harness{
|
||||
docker: docker,
|
||||
records: records,
|
||||
health: healthEvents,
|
||||
worker: worker,
|
||||
now: now,
|
||||
}
|
||||
}
|
||||
|
||||
func runningRecord(gameID string) runtime.RuntimeRecord {
|
||||
startedAt := time.Date(2026, 4, 27, 11, 0, 0, 0, time.UTC)
|
||||
return runtime.RuntimeRecord{
|
||||
GameID: gameID,
|
||||
Status: runtime.StatusRunning,
|
||||
CurrentContainerID: "ctr-" + gameID,
|
||||
CurrentImageRef: "galaxy/game:1.0.0",
|
||||
EngineEndpoint: "http://galaxy-game-" + gameID + ":8080",
|
||||
StatePath: "/var/lib/galaxy/games/" + gameID,
|
||||
DockerNetwork: "galaxy-net",
|
||||
StartedAt: &startedAt,
|
||||
LastOpAt: startedAt,
|
||||
CreatedAt: startedAt,
|
||||
}
|
||||
}
|
||||
|
||||
// --- constructor ------------------------------------------------------
|
||||
|
||||
func TestNewWorkerRejectsMissingDeps(t *testing.T) {
|
||||
ctrl := gomock.NewController(t)
|
||||
t.Cleanup(ctrl.Finish)
|
||||
telemetryRuntime, err := telemetry.NewWithProviders(nil, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
base := dockerinspect.Dependencies{
|
||||
Docker: mocks.NewMockDockerClient(ctrl),
|
||||
RuntimeRecords: newFakeRuntimeRecords(),
|
||||
HealthEvents: &fakeHealthEvents{},
|
||||
Telemetry: telemetryRuntime,
|
||||
Interval: time.Second,
|
||||
}
|
||||
|
||||
defectives := []dockerinspect.Dependencies{
|
||||
{},
|
||||
{Docker: base.Docker},
|
||||
{Docker: base.Docker, RuntimeRecords: base.RuntimeRecords},
|
||||
{Docker: base.Docker, RuntimeRecords: base.RuntimeRecords, HealthEvents: base.HealthEvents},
|
||||
{Docker: base.Docker, RuntimeRecords: base.RuntimeRecords, HealthEvents: base.HealthEvents, Telemetry: base.Telemetry},
|
||||
}
|
||||
for index, deps := range defectives {
|
||||
_, err := dockerinspect.NewWorker(deps)
|
||||
require.Errorf(t, err, "case %d should fail", index)
|
||||
}
|
||||
|
||||
_, err = dockerinspect.NewWorker(base)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// --- behaviour --------------------------------------------------------
|
||||
|
||||
func TestTickFirstObservationOnlySeedsBaseline(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.records.Set(runningRecord("game-a"))
|
||||
|
||||
h.docker.EXPECT().InspectContainer(gomock.Any(), "ctr-game-a").Return(ports.ContainerInspect{
|
||||
ID: "ctr-game-a",
|
||||
Status: "running",
|
||||
Health: "",
|
||||
RestartCount: 2,
|
||||
}, nil)
|
||||
|
||||
h.worker.Tick(context.Background())
|
||||
assert.Empty(t, h.health.Published(), "first observation seeds baseline only")
|
||||
}
|
||||
|
||||
func TestTickRestartCountGrowthEmits(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.records.Set(runningRecord("game-a"))
|
||||
|
||||
gomock.InOrder(
|
||||
h.docker.EXPECT().InspectContainer(gomock.Any(), "ctr-game-a").Return(ports.ContainerInspect{
|
||||
ID: "ctr-game-a", Status: "running", RestartCount: 2,
|
||||
}, nil),
|
||||
h.docker.EXPECT().InspectContainer(gomock.Any(), "ctr-game-a").Return(ports.ContainerInspect{
|
||||
ID: "ctr-game-a", Status: "running", RestartCount: 3,
|
||||
}, nil),
|
||||
)
|
||||
|
||||
h.worker.Tick(context.Background())
|
||||
h.worker.Tick(context.Background())
|
||||
|
||||
envelopes := h.health.Published()
|
||||
require.Len(t, envelopes, 1)
|
||||
envelope := envelopes[0]
|
||||
assert.Equal(t, health.EventTypeInspectUnhealthy, envelope.EventType)
|
||||
assert.Equal(t, "game-a", envelope.GameID)
|
||||
assert.Equal(t, "ctr-game-a", envelope.ContainerID)
|
||||
|
||||
var details struct {
|
||||
RestartCount int `json:"restart_count"`
|
||||
State string `json:"state"`
|
||||
Health string `json:"health"`
|
||||
}
|
||||
require.NoError(t, json.Unmarshal(envelope.Details, &details))
|
||||
assert.Equal(t, 3, details.RestartCount)
|
||||
assert.Equal(t, "running", details.State)
|
||||
assert.Empty(t, details.Health)
|
||||
}
|
||||
|
||||
func TestTickStateNotRunningEmits(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.records.Set(runningRecord("game-a"))
|
||||
|
||||
h.docker.EXPECT().InspectContainer(gomock.Any(), "ctr-game-a").Return(ports.ContainerInspect{
|
||||
ID: "ctr-game-a",
|
||||
Status: "exited",
|
||||
Health: "",
|
||||
RestartCount: 0,
|
||||
}, nil)
|
||||
|
||||
h.worker.Tick(context.Background())
|
||||
envelopes := h.health.Published()
|
||||
require.Len(t, envelopes, 1, "state != running emits even on first observation")
|
||||
envelope := envelopes[0]
|
||||
assert.Equal(t, health.EventTypeInspectUnhealthy, envelope.EventType)
|
||||
|
||||
var details struct {
|
||||
RestartCount int `json:"restart_count"`
|
||||
State string `json:"state"`
|
||||
Health string `json:"health"`
|
||||
}
|
||||
require.NoError(t, json.Unmarshal(envelope.Details, &details))
|
||||
assert.Equal(t, "exited", details.State)
|
||||
}
|
||||
|
||||
func TestTickHealthUnhealthyEmits(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.records.Set(runningRecord("game-a"))
|
||||
|
||||
h.docker.EXPECT().InspectContainer(gomock.Any(), "ctr-game-a").Return(ports.ContainerInspect{
|
||||
ID: "ctr-game-a",
|
||||
Status: "running",
|
||||
Health: "unhealthy",
|
||||
RestartCount: 0,
|
||||
}, nil)
|
||||
|
||||
h.worker.Tick(context.Background())
|
||||
envelopes := h.health.Published()
|
||||
require.Len(t, envelopes, 1, "Health == unhealthy emits even on first observation")
|
||||
envelope := envelopes[0]
|
||||
assert.Equal(t, health.EventTypeInspectUnhealthy, envelope.EventType)
|
||||
|
||||
var details struct {
|
||||
Health string `json:"health"`
|
||||
}
|
||||
require.NoError(t, json.Unmarshal(envelope.Details, &details))
|
||||
assert.Equal(t, "unhealthy", details.Health)
|
||||
}
|
||||
|
||||
func TestTickHealthyDoesNotEmitOnSecondPass(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.records.Set(runningRecord("game-a"))
|
||||
|
||||
gomock.InOrder(
|
||||
h.docker.EXPECT().InspectContainer(gomock.Any(), "ctr-game-a").Return(ports.ContainerInspect{
|
||||
ID: "ctr-game-a", Status: "running", RestartCount: 5,
|
||||
}, nil),
|
||||
h.docker.EXPECT().InspectContainer(gomock.Any(), "ctr-game-a").Return(ports.ContainerInspect{
|
||||
ID: "ctr-game-a", Status: "running", RestartCount: 5,
|
||||
}, nil),
|
||||
)
|
||||
|
||||
h.worker.Tick(context.Background())
|
||||
h.worker.Tick(context.Background())
|
||||
assert.Empty(t, h.health.Published(), "stable healthy observations must not emit")
|
||||
}
|
||||
|
||||
func TestTickContainerNotFoundIsSilent(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.records.Set(runningRecord("game-a"))
|
||||
|
||||
h.docker.EXPECT().InspectContainer(gomock.Any(), "ctr-game-a").Return(ports.ContainerInspect{}, ports.ErrContainerNotFound)
|
||||
|
||||
h.worker.Tick(context.Background())
|
||||
assert.Empty(t, h.health.Published(), "ErrContainerNotFound must not emit; reconciler handles drift")
|
||||
}
|
||||
|
||||
func TestTickArbitraryInspectErrorIsAbsorbed(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.records.Set(runningRecord("game-a"))
|
||||
|
||||
h.docker.EXPECT().InspectContainer(gomock.Any(), "ctr-game-a").Return(ports.ContainerInspect{}, errors.New("docker daemon broken"))
|
||||
|
||||
require.NotPanics(t, func() { h.worker.Tick(context.Background()) })
|
||||
assert.Empty(t, h.health.Published())
|
||||
}
|
||||
|
||||
func TestTickPrunesStateForGamesNoLongerRunning(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.records.Set(runningRecord("game-a"))
|
||||
|
||||
gomock.InOrder(
|
||||
h.docker.EXPECT().InspectContainer(gomock.Any(), "ctr-game-a").Return(ports.ContainerInspect{
|
||||
ID: "ctr-game-a", Status: "running", RestartCount: 5,
|
||||
}, nil),
|
||||
// After the game leaves running and re-enters, baseline must be
|
||||
// reset; a smaller RestartCount must NOT emit (no delta from a
|
||||
// stale state).
|
||||
h.docker.EXPECT().InspectContainer(gomock.Any(), "ctr-game-a").Return(ports.ContainerInspect{
|
||||
ID: "ctr-game-a", Status: "running", RestartCount: 1,
|
||||
}, nil),
|
||||
)
|
||||
|
||||
h.worker.Tick(context.Background())
|
||||
h.records.Clear()
|
||||
h.worker.Tick(context.Background())
|
||||
h.records.Set(runningRecord("game-a"))
|
||||
h.worker.Tick(context.Background())
|
||||
|
||||
assert.Empty(t, h.health.Published(), "fresh baseline after re-running must not compare against stale lastRestartCount")
|
||||
}
|
||||
|
||||
func TestTickAbsorbsListError(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
h.records.listErr = errors.New("pg down")
|
||||
|
||||
require.NotPanics(t, func() { h.worker.Tick(context.Background()) })
|
||||
assert.Empty(t, h.health.Published())
|
||||
}
|
||||
|
||||
func TestRunRespectsContextCancel(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- h.worker.Run(ctx) }()
|
||||
|
||||
cancel()
|
||||
select {
|
||||
case err := <-done:
|
||||
assert.ErrorIs(t, err, context.Canceled)
|
||||
case <-time.After(time.Second):
|
||||
t.Fatalf("Run did not exit after cancel")
|
||||
}
|
||||
}
|
||||
|
||||
func TestShutdownIsNoOp(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
require.NoError(t, h.worker.Shutdown(context.Background()))
|
||||
}
|
||||
|
||||
// --- compile-time safety ----------------------------------------------
|
||||
|
||||
var (
|
||||
_ ports.RuntimeRecordStore = (*fakeRuntimeRecords)(nil)
|
||||
_ ports.HealthEventPublisher = (*fakeHealthEvents)(nil)
|
||||
)
|
||||
Reference in New Issue
Block a user