feat: runtime manager

This commit is contained in:
Ilia Denisov
2026-04-28 20:39:18 +02:00
committed by GitHub
parent e0a99b346b
commit a7cee15115
289 changed files with 45660 additions and 2207 deletions
@@ -0,0 +1,165 @@
// Package healtheventspublisher provides the Redis-Streams-backed
// publisher for `runtime:health_events`. Every Publish call upserts the
// latest `health_snapshots` row before XADDing the event so consumers
// observing the snapshot store can never lag the event stream by more
// than the duration of one network call.
//
// The publisher is shared across `ports.HealthEventPublisher` callers:
// the start service emits `container_started`; the probe, inspect, and
// events-listener workers emit the rest. The publisher's surface is
// stable across all of them.
package healtheventspublisher
import (
"context"
"encoding/json"
"errors"
"fmt"
"strconv"
"galaxy/rtmanager/internal/domain/health"
"galaxy/rtmanager/internal/ports"
"github.com/redis/go-redis/v9"
)
// emptyDetails is the canonical JSON payload installed when the caller
// supplies an empty Details slice. Matches the SQL DEFAULT for
// `health_snapshots.details`.
const emptyDetails = "{}"
// Wire field names used by the Redis Streams payload. Frozen by
// `rtmanager/api/runtime-health-asyncapi.yaml`; renaming any of them
// breaks consumers.
const (
fieldGameID = "game_id"
fieldContainerID = "container_id"
fieldEventType = "event_type"
fieldOccurredAtMS = "occurred_at_ms"
fieldDetails = "details"
)
// Config groups the dependencies and stream name required to construct
// a Publisher.
type Config struct {
// Client appends entries to the Redis Stream. Must be non-nil.
Client *redis.Client
// Snapshots upserts the latest health snapshot. Must be non-nil.
Snapshots ports.HealthSnapshotStore
// Stream stores the Redis Stream key events are published to (e.g.
// `runtime:health_events`). Must not be empty.
Stream string
}
// Publisher implements `ports.HealthEventPublisher` on top of a shared
// Redis client and the production `health_snapshots` store.
type Publisher struct {
client *redis.Client
snapshots ports.HealthSnapshotStore
stream string
}
// NewPublisher constructs one Publisher from cfg. Validation errors
// surface the missing collaborator verbatim.
func NewPublisher(cfg Config) (*Publisher, error) {
if cfg.Client == nil {
return nil, errors.New("new rtmanager health events publisher: nil redis client")
}
if cfg.Snapshots == nil {
return nil, errors.New("new rtmanager health events publisher: nil snapshot store")
}
if cfg.Stream == "" {
return nil, errors.New("new rtmanager health events publisher: stream must not be empty")
}
return &Publisher{
client: cfg.Client,
snapshots: cfg.Snapshots,
stream: cfg.Stream,
}, nil
}
// Publish upserts the matching health_snapshots row and then XADDs the
// envelope to the configured Redis Stream. Both side effects are
// required; the snapshot upsert runs first so a successful Publish
// always leaves the snapshot store at least as fresh as the stream.
func (publisher *Publisher) Publish(ctx context.Context, envelope ports.HealthEventEnvelope) error {
if publisher == nil || publisher.client == nil || publisher.snapshots == nil {
return errors.New("publish health event: nil publisher")
}
if ctx == nil {
return errors.New("publish health event: nil context")
}
if err := envelope.Validate(); err != nil {
return fmt.Errorf("publish health event: %w", err)
}
details := envelope.Details
if len(details) == 0 {
details = json.RawMessage(emptyDetails)
}
status, source := snapshotMappingFor(envelope.EventType)
snapshot := health.HealthSnapshot{
GameID: envelope.GameID,
ContainerID: envelope.ContainerID,
Status: status,
Source: source,
Details: details,
ObservedAt: envelope.OccurredAt.UTC(),
}
if err := publisher.snapshots.Upsert(ctx, snapshot); err != nil {
return fmt.Errorf("publish health event: upsert snapshot: %w", err)
}
occurredAtMS := envelope.OccurredAt.UTC().UnixMilli()
values := map[string]any{
fieldGameID: envelope.GameID,
fieldContainerID: envelope.ContainerID,
fieldEventType: string(envelope.EventType),
fieldOccurredAtMS: strconv.FormatInt(occurredAtMS, 10),
fieldDetails: string(details),
}
if err := publisher.client.XAdd(ctx, &redis.XAddArgs{
Stream: publisher.stream,
Values: values,
}).Err(); err != nil {
return fmt.Errorf("publish health event: xadd: %w", err)
}
return nil
}
// snapshotMappingFor returns the SnapshotStatus and SnapshotSource that
// match eventType per `rtmanager/README.md §Health Monitoring`.
//
// `container_started` is observed when the start service successfully
// runs the container; the snapshot collapses it to `healthy`.
// `probe_recovered` collapses to `healthy` per
// `rtmanager/docs/domain-and-ports.md` §4: it does not have its own
// snapshot status; the next observation overwrites the prior
// `probe_failed` with `healthy`.
func snapshotMappingFor(eventType health.EventType) (health.SnapshotStatus, health.SnapshotSource) {
switch eventType {
case health.EventTypeContainerStarted:
return health.SnapshotStatusHealthy, health.SnapshotSourceDockerEvent
case health.EventTypeContainerExited:
return health.SnapshotStatusExited, health.SnapshotSourceDockerEvent
case health.EventTypeContainerOOM:
return health.SnapshotStatusOOM, health.SnapshotSourceDockerEvent
case health.EventTypeContainerDisappeared:
return health.SnapshotStatusContainerDisappeared, health.SnapshotSourceDockerEvent
case health.EventTypeInspectUnhealthy:
return health.SnapshotStatusInspectUnhealthy, health.SnapshotSourceInspect
case health.EventTypeProbeFailed:
return health.SnapshotStatusProbeFailed, health.SnapshotSourceProbe
case health.EventTypeProbeRecovered:
return health.SnapshotStatusHealthy, health.SnapshotSourceProbe
default:
return "", ""
}
}
// Compile-time assertion: Publisher implements
// ports.HealthEventPublisher.
var _ ports.HealthEventPublisher = (*Publisher)(nil)
@@ -0,0 +1,197 @@
package healtheventspublisher_test
import (
"context"
"encoding/json"
"strconv"
"sync"
"testing"
"time"
"galaxy/rtmanager/internal/adapters/healtheventspublisher"
"galaxy/rtmanager/internal/domain/health"
"galaxy/rtmanager/internal/ports"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// fakeSnapshots captures Upsert invocations for assertions.
type fakeSnapshots struct {
mu sync.Mutex
upserts []health.HealthSnapshot
upsertErr error
}
func (s *fakeSnapshots) Upsert(_ context.Context, snapshot health.HealthSnapshot) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.upsertErr != nil {
return s.upsertErr
}
s.upserts = append(s.upserts, snapshot)
return nil
}
func (s *fakeSnapshots) Get(_ context.Context, _ string) (health.HealthSnapshot, error) {
return health.HealthSnapshot{}, nil
}
func newPublisher(t *testing.T, snapshots ports.HealthSnapshotStore) (*healtheventspublisher.Publisher, *miniredis.Miniredis, *redis.Client) {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = client.Close() })
publisher, err := healtheventspublisher.NewPublisher(healtheventspublisher.Config{
Client: client,
Snapshots: snapshots,
Stream: "runtime:health_events",
})
require.NoError(t, err)
return publisher, server, client
}
func TestNewPublisherRejectsMissingCollaborators(t *testing.T) {
_, err := healtheventspublisher.NewPublisher(healtheventspublisher.Config{})
require.Error(t, err)
_, err = healtheventspublisher.NewPublisher(healtheventspublisher.Config{
Client: redis.NewClient(&redis.Options{Addr: "127.0.0.1:0"}),
})
require.Error(t, err)
_, err = healtheventspublisher.NewPublisher(healtheventspublisher.Config{
Client: redis.NewClient(&redis.Options{Addr: "127.0.0.1:0"}),
Snapshots: &fakeSnapshots{},
})
require.Error(t, err)
}
func TestPublishContainerStartedUpsertsHealthyAndXAdds(t *testing.T) {
snapshots := &fakeSnapshots{}
publisher, _, client := newPublisher(t, snapshots)
occurredAt := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
envelope := ports.HealthEventEnvelope{
GameID: "game-1",
ContainerID: "c-1",
EventType: health.EventTypeContainerStarted,
OccurredAt: occurredAt,
Details: json.RawMessage(`{"image_ref":"galaxy/game:1.2.3"}`),
}
require.NoError(t, publisher.Publish(context.Background(), envelope))
require.Len(t, snapshots.upserts, 1)
snapshot := snapshots.upserts[0]
assert.Equal(t, "game-1", snapshot.GameID)
assert.Equal(t, "c-1", snapshot.ContainerID)
assert.Equal(t, health.SnapshotStatusHealthy, snapshot.Status)
assert.Equal(t, health.SnapshotSourceDockerEvent, snapshot.Source)
assert.JSONEq(t, `{"image_ref":"galaxy/game:1.2.3"}`, string(snapshot.Details))
assert.Equal(t, occurredAt, snapshot.ObservedAt)
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
require.NoError(t, err)
require.Len(t, entries, 1)
values := entries[0].Values
assert.Equal(t, "game-1", values["game_id"])
assert.Equal(t, "c-1", values["container_id"])
assert.Equal(t, "container_started", values["event_type"])
assert.Equal(t, strconv.FormatInt(occurredAt.UnixMilli(), 10), values["occurred_at_ms"])
assert.JSONEq(t, `{"image_ref":"galaxy/game:1.2.3"}`, values["details"].(string))
}
func TestPublishMapsEveryEventTypeToASnapshot(t *testing.T) {
t.Parallel()
cases := []struct {
eventType health.EventType
expectStatus health.SnapshotStatus
expectSource health.SnapshotSource
}{
{health.EventTypeContainerStarted, health.SnapshotStatusHealthy, health.SnapshotSourceDockerEvent},
{health.EventTypeContainerExited, health.SnapshotStatusExited, health.SnapshotSourceDockerEvent},
{health.EventTypeContainerOOM, health.SnapshotStatusOOM, health.SnapshotSourceDockerEvent},
{health.EventTypeContainerDisappeared, health.SnapshotStatusContainerDisappeared, health.SnapshotSourceDockerEvent},
{health.EventTypeInspectUnhealthy, health.SnapshotStatusInspectUnhealthy, health.SnapshotSourceInspect},
{health.EventTypeProbeFailed, health.SnapshotStatusProbeFailed, health.SnapshotSourceProbe},
{health.EventTypeProbeRecovered, health.SnapshotStatusHealthy, health.SnapshotSourceProbe},
}
for _, tc := range cases {
t.Run(string(tc.eventType), func(t *testing.T) {
t.Parallel()
snapshots := &fakeSnapshots{}
publisher, _, _ := newPublisher(t, snapshots)
require.NoError(t, publisher.Publish(context.Background(), ports.HealthEventEnvelope{
GameID: "g",
ContainerID: "c",
EventType: tc.eventType,
OccurredAt: time.Now().UTC(),
Details: json.RawMessage(`{}`),
}))
require.Len(t, snapshots.upserts, 1)
assert.Equal(t, tc.expectStatus, snapshots.upserts[0].Status)
assert.Equal(t, tc.expectSource, snapshots.upserts[0].Source)
})
}
}
func TestPublishEmptyDetailsBecomesEmptyObject(t *testing.T) {
snapshots := &fakeSnapshots{}
publisher, _, client := newPublisher(t, snapshots)
envelope := ports.HealthEventEnvelope{
GameID: "g",
ContainerID: "c",
EventType: health.EventTypeContainerDisappeared,
OccurredAt: time.Now().UTC(),
}
require.NoError(t, publisher.Publish(context.Background(), envelope))
require.Len(t, snapshots.upserts, 1)
assert.JSONEq(t, "{}", string(snapshots.upserts[0].Details))
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
require.NoError(t, err)
require.Len(t, entries, 1)
assert.JSONEq(t, "{}", entries[0].Values["details"].(string))
}
func TestPublishRejectsInvalidEnvelope(t *testing.T) {
snapshots := &fakeSnapshots{}
publisher, _, client := newPublisher(t, snapshots)
require.Error(t, publisher.Publish(context.Background(), ports.HealthEventEnvelope{}))
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
require.NoError(t, err)
assert.Empty(t, entries)
assert.Empty(t, snapshots.upserts)
}
func TestPublishSurfacesSnapshotErrorWithoutXAdd(t *testing.T) {
snapshots := &fakeSnapshots{upsertErr: assertSentinelErr}
publisher, _, client := newPublisher(t, snapshots)
err := publisher.Publish(context.Background(), ports.HealthEventEnvelope{
GameID: "g",
ContainerID: "c",
EventType: health.EventTypeContainerStarted,
OccurredAt: time.Now().UTC(),
Details: json.RawMessage(`{"image_ref":"x"}`),
})
require.Error(t, err)
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
require.NoError(t, err)
assert.Empty(t, entries, "xadd must not run when snapshot upsert fails")
}
// assertSentinelErr is a sentinel for snapshot-failure assertions.
var assertSentinelErr = sentinelError("snapshot upsert failure")
type sentinelError string
func (s sentinelError) Error() string { return string(s) }