feat: runtime manager
This commit is contained in:
@@ -0,0 +1,165 @@
|
||||
// Package healtheventspublisher provides the Redis-Streams-backed
|
||||
// publisher for `runtime:health_events`. Every Publish call upserts the
|
||||
// latest `health_snapshots` row before XADDing the event so consumers
|
||||
// observing the snapshot store can never lag the event stream by more
|
||||
// than the duration of one network call.
|
||||
//
|
||||
// The publisher is shared across `ports.HealthEventPublisher` callers:
|
||||
// the start service emits `container_started`; the probe, inspect, and
|
||||
// events-listener workers emit the rest. The publisher's surface is
|
||||
// stable across all of them.
|
||||
package healtheventspublisher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"galaxy/rtmanager/internal/domain/health"
|
||||
"galaxy/rtmanager/internal/ports"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// emptyDetails is the canonical JSON payload installed when the caller
|
||||
// supplies an empty Details slice. Matches the SQL DEFAULT for
|
||||
// `health_snapshots.details`.
|
||||
const emptyDetails = "{}"
|
||||
|
||||
// Wire field names used by the Redis Streams payload. Frozen by
|
||||
// `rtmanager/api/runtime-health-asyncapi.yaml`; renaming any of them
|
||||
// breaks consumers.
|
||||
const (
|
||||
fieldGameID = "game_id"
|
||||
fieldContainerID = "container_id"
|
||||
fieldEventType = "event_type"
|
||||
fieldOccurredAtMS = "occurred_at_ms"
|
||||
fieldDetails = "details"
|
||||
)
|
||||
|
||||
// Config groups the dependencies and stream name required to construct
|
||||
// a Publisher.
|
||||
type Config struct {
|
||||
// Client appends entries to the Redis Stream. Must be non-nil.
|
||||
Client *redis.Client
|
||||
|
||||
// Snapshots upserts the latest health snapshot. Must be non-nil.
|
||||
Snapshots ports.HealthSnapshotStore
|
||||
|
||||
// Stream stores the Redis Stream key events are published to (e.g.
|
||||
// `runtime:health_events`). Must not be empty.
|
||||
Stream string
|
||||
}
|
||||
|
||||
// Publisher implements `ports.HealthEventPublisher` on top of a shared
|
||||
// Redis client and the production `health_snapshots` store.
|
||||
type Publisher struct {
|
||||
client *redis.Client
|
||||
snapshots ports.HealthSnapshotStore
|
||||
stream string
|
||||
}
|
||||
|
||||
// NewPublisher constructs one Publisher from cfg. Validation errors
|
||||
// surface the missing collaborator verbatim.
|
||||
func NewPublisher(cfg Config) (*Publisher, error) {
|
||||
if cfg.Client == nil {
|
||||
return nil, errors.New("new rtmanager health events publisher: nil redis client")
|
||||
}
|
||||
if cfg.Snapshots == nil {
|
||||
return nil, errors.New("new rtmanager health events publisher: nil snapshot store")
|
||||
}
|
||||
if cfg.Stream == "" {
|
||||
return nil, errors.New("new rtmanager health events publisher: stream must not be empty")
|
||||
}
|
||||
return &Publisher{
|
||||
client: cfg.Client,
|
||||
snapshots: cfg.Snapshots,
|
||||
stream: cfg.Stream,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Publish upserts the matching health_snapshots row and then XADDs the
|
||||
// envelope to the configured Redis Stream. Both side effects are
|
||||
// required; the snapshot upsert runs first so a successful Publish
|
||||
// always leaves the snapshot store at least as fresh as the stream.
|
||||
func (publisher *Publisher) Publish(ctx context.Context, envelope ports.HealthEventEnvelope) error {
|
||||
if publisher == nil || publisher.client == nil || publisher.snapshots == nil {
|
||||
return errors.New("publish health event: nil publisher")
|
||||
}
|
||||
if ctx == nil {
|
||||
return errors.New("publish health event: nil context")
|
||||
}
|
||||
if err := envelope.Validate(); err != nil {
|
||||
return fmt.Errorf("publish health event: %w", err)
|
||||
}
|
||||
|
||||
details := envelope.Details
|
||||
if len(details) == 0 {
|
||||
details = json.RawMessage(emptyDetails)
|
||||
}
|
||||
|
||||
status, source := snapshotMappingFor(envelope.EventType)
|
||||
snapshot := health.HealthSnapshot{
|
||||
GameID: envelope.GameID,
|
||||
ContainerID: envelope.ContainerID,
|
||||
Status: status,
|
||||
Source: source,
|
||||
Details: details,
|
||||
ObservedAt: envelope.OccurredAt.UTC(),
|
||||
}
|
||||
if err := publisher.snapshots.Upsert(ctx, snapshot); err != nil {
|
||||
return fmt.Errorf("publish health event: upsert snapshot: %w", err)
|
||||
}
|
||||
|
||||
occurredAtMS := envelope.OccurredAt.UTC().UnixMilli()
|
||||
values := map[string]any{
|
||||
fieldGameID: envelope.GameID,
|
||||
fieldContainerID: envelope.ContainerID,
|
||||
fieldEventType: string(envelope.EventType),
|
||||
fieldOccurredAtMS: strconv.FormatInt(occurredAtMS, 10),
|
||||
fieldDetails: string(details),
|
||||
}
|
||||
if err := publisher.client.XAdd(ctx, &redis.XAddArgs{
|
||||
Stream: publisher.stream,
|
||||
Values: values,
|
||||
}).Err(); err != nil {
|
||||
return fmt.Errorf("publish health event: xadd: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// snapshotMappingFor returns the SnapshotStatus and SnapshotSource that
|
||||
// match eventType per `rtmanager/README.md §Health Monitoring`.
|
||||
//
|
||||
// `container_started` is observed when the start service successfully
|
||||
// runs the container; the snapshot collapses it to `healthy`.
|
||||
// `probe_recovered` collapses to `healthy` per
|
||||
// `rtmanager/docs/domain-and-ports.md` §4: it does not have its own
|
||||
// snapshot status; the next observation overwrites the prior
|
||||
// `probe_failed` with `healthy`.
|
||||
func snapshotMappingFor(eventType health.EventType) (health.SnapshotStatus, health.SnapshotSource) {
|
||||
switch eventType {
|
||||
case health.EventTypeContainerStarted:
|
||||
return health.SnapshotStatusHealthy, health.SnapshotSourceDockerEvent
|
||||
case health.EventTypeContainerExited:
|
||||
return health.SnapshotStatusExited, health.SnapshotSourceDockerEvent
|
||||
case health.EventTypeContainerOOM:
|
||||
return health.SnapshotStatusOOM, health.SnapshotSourceDockerEvent
|
||||
case health.EventTypeContainerDisappeared:
|
||||
return health.SnapshotStatusContainerDisappeared, health.SnapshotSourceDockerEvent
|
||||
case health.EventTypeInspectUnhealthy:
|
||||
return health.SnapshotStatusInspectUnhealthy, health.SnapshotSourceInspect
|
||||
case health.EventTypeProbeFailed:
|
||||
return health.SnapshotStatusProbeFailed, health.SnapshotSourceProbe
|
||||
case health.EventTypeProbeRecovered:
|
||||
return health.SnapshotStatusHealthy, health.SnapshotSourceProbe
|
||||
default:
|
||||
return "", ""
|
||||
}
|
||||
}
|
||||
|
||||
// Compile-time assertion: Publisher implements
|
||||
// ports.HealthEventPublisher.
|
||||
var _ ports.HealthEventPublisher = (*Publisher)(nil)
|
||||
@@ -0,0 +1,197 @@
|
||||
package healtheventspublisher_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"strconv"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/rtmanager/internal/adapters/healtheventspublisher"
|
||||
"galaxy/rtmanager/internal/domain/health"
|
||||
"galaxy/rtmanager/internal/ports"
|
||||
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// fakeSnapshots captures Upsert invocations for assertions.
|
||||
type fakeSnapshots struct {
|
||||
mu sync.Mutex
|
||||
upserts []health.HealthSnapshot
|
||||
upsertErr error
|
||||
}
|
||||
|
||||
func (s *fakeSnapshots) Upsert(_ context.Context, snapshot health.HealthSnapshot) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.upsertErr != nil {
|
||||
return s.upsertErr
|
||||
}
|
||||
s.upserts = append(s.upserts, snapshot)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *fakeSnapshots) Get(_ context.Context, _ string) (health.HealthSnapshot, error) {
|
||||
return health.HealthSnapshot{}, nil
|
||||
}
|
||||
|
||||
func newPublisher(t *testing.T, snapshots ports.HealthSnapshotStore) (*healtheventspublisher.Publisher, *miniredis.Miniredis, *redis.Client) {
|
||||
t.Helper()
|
||||
server := miniredis.RunT(t)
|
||||
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
|
||||
t.Cleanup(func() { _ = client.Close() })
|
||||
|
||||
publisher, err := healtheventspublisher.NewPublisher(healtheventspublisher.Config{
|
||||
Client: client,
|
||||
Snapshots: snapshots,
|
||||
Stream: "runtime:health_events",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
return publisher, server, client
|
||||
}
|
||||
|
||||
func TestNewPublisherRejectsMissingCollaborators(t *testing.T) {
|
||||
_, err := healtheventspublisher.NewPublisher(healtheventspublisher.Config{})
|
||||
require.Error(t, err)
|
||||
|
||||
_, err = healtheventspublisher.NewPublisher(healtheventspublisher.Config{
|
||||
Client: redis.NewClient(&redis.Options{Addr: "127.0.0.1:0"}),
|
||||
})
|
||||
require.Error(t, err)
|
||||
|
||||
_, err = healtheventspublisher.NewPublisher(healtheventspublisher.Config{
|
||||
Client: redis.NewClient(&redis.Options{Addr: "127.0.0.1:0"}),
|
||||
Snapshots: &fakeSnapshots{},
|
||||
})
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
func TestPublishContainerStartedUpsertsHealthyAndXAdds(t *testing.T) {
|
||||
snapshots := &fakeSnapshots{}
|
||||
publisher, _, client := newPublisher(t, snapshots)
|
||||
|
||||
occurredAt := time.Date(2026, 4, 27, 12, 0, 0, 0, time.UTC)
|
||||
envelope := ports.HealthEventEnvelope{
|
||||
GameID: "game-1",
|
||||
ContainerID: "c-1",
|
||||
EventType: health.EventTypeContainerStarted,
|
||||
OccurredAt: occurredAt,
|
||||
Details: json.RawMessage(`{"image_ref":"galaxy/game:1.2.3"}`),
|
||||
}
|
||||
require.NoError(t, publisher.Publish(context.Background(), envelope))
|
||||
|
||||
require.Len(t, snapshots.upserts, 1)
|
||||
snapshot := snapshots.upserts[0]
|
||||
assert.Equal(t, "game-1", snapshot.GameID)
|
||||
assert.Equal(t, "c-1", snapshot.ContainerID)
|
||||
assert.Equal(t, health.SnapshotStatusHealthy, snapshot.Status)
|
||||
assert.Equal(t, health.SnapshotSourceDockerEvent, snapshot.Source)
|
||||
assert.JSONEq(t, `{"image_ref":"galaxy/game:1.2.3"}`, string(snapshot.Details))
|
||||
assert.Equal(t, occurredAt, snapshot.ObservedAt)
|
||||
|
||||
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, entries, 1)
|
||||
values := entries[0].Values
|
||||
assert.Equal(t, "game-1", values["game_id"])
|
||||
assert.Equal(t, "c-1", values["container_id"])
|
||||
assert.Equal(t, "container_started", values["event_type"])
|
||||
assert.Equal(t, strconv.FormatInt(occurredAt.UnixMilli(), 10), values["occurred_at_ms"])
|
||||
assert.JSONEq(t, `{"image_ref":"galaxy/game:1.2.3"}`, values["details"].(string))
|
||||
}
|
||||
|
||||
func TestPublishMapsEveryEventTypeToASnapshot(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
eventType health.EventType
|
||||
expectStatus health.SnapshotStatus
|
||||
expectSource health.SnapshotSource
|
||||
}{
|
||||
{health.EventTypeContainerStarted, health.SnapshotStatusHealthy, health.SnapshotSourceDockerEvent},
|
||||
{health.EventTypeContainerExited, health.SnapshotStatusExited, health.SnapshotSourceDockerEvent},
|
||||
{health.EventTypeContainerOOM, health.SnapshotStatusOOM, health.SnapshotSourceDockerEvent},
|
||||
{health.EventTypeContainerDisappeared, health.SnapshotStatusContainerDisappeared, health.SnapshotSourceDockerEvent},
|
||||
{health.EventTypeInspectUnhealthy, health.SnapshotStatusInspectUnhealthy, health.SnapshotSourceInspect},
|
||||
{health.EventTypeProbeFailed, health.SnapshotStatusProbeFailed, health.SnapshotSourceProbe},
|
||||
{health.EventTypeProbeRecovered, health.SnapshotStatusHealthy, health.SnapshotSourceProbe},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(string(tc.eventType), func(t *testing.T) {
|
||||
t.Parallel()
|
||||
snapshots := &fakeSnapshots{}
|
||||
publisher, _, _ := newPublisher(t, snapshots)
|
||||
require.NoError(t, publisher.Publish(context.Background(), ports.HealthEventEnvelope{
|
||||
GameID: "g",
|
||||
ContainerID: "c",
|
||||
EventType: tc.eventType,
|
||||
OccurredAt: time.Now().UTC(),
|
||||
Details: json.RawMessage(`{}`),
|
||||
}))
|
||||
require.Len(t, snapshots.upserts, 1)
|
||||
assert.Equal(t, tc.expectStatus, snapshots.upserts[0].Status)
|
||||
assert.Equal(t, tc.expectSource, snapshots.upserts[0].Source)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPublishEmptyDetailsBecomesEmptyObject(t *testing.T) {
|
||||
snapshots := &fakeSnapshots{}
|
||||
publisher, _, client := newPublisher(t, snapshots)
|
||||
|
||||
envelope := ports.HealthEventEnvelope{
|
||||
GameID: "g",
|
||||
ContainerID: "c",
|
||||
EventType: health.EventTypeContainerDisappeared,
|
||||
OccurredAt: time.Now().UTC(),
|
||||
}
|
||||
require.NoError(t, publisher.Publish(context.Background(), envelope))
|
||||
|
||||
require.Len(t, snapshots.upserts, 1)
|
||||
assert.JSONEq(t, "{}", string(snapshots.upserts[0].Details))
|
||||
|
||||
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, entries, 1)
|
||||
assert.JSONEq(t, "{}", entries[0].Values["details"].(string))
|
||||
}
|
||||
|
||||
func TestPublishRejectsInvalidEnvelope(t *testing.T) {
|
||||
snapshots := &fakeSnapshots{}
|
||||
publisher, _, client := newPublisher(t, snapshots)
|
||||
|
||||
require.Error(t, publisher.Publish(context.Background(), ports.HealthEventEnvelope{}))
|
||||
|
||||
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
assert.Empty(t, entries)
|
||||
assert.Empty(t, snapshots.upserts)
|
||||
}
|
||||
|
||||
func TestPublishSurfacesSnapshotErrorWithoutXAdd(t *testing.T) {
|
||||
snapshots := &fakeSnapshots{upsertErr: assertSentinelErr}
|
||||
publisher, _, client := newPublisher(t, snapshots)
|
||||
|
||||
err := publisher.Publish(context.Background(), ports.HealthEventEnvelope{
|
||||
GameID: "g",
|
||||
ContainerID: "c",
|
||||
EventType: health.EventTypeContainerStarted,
|
||||
OccurredAt: time.Now().UTC(),
|
||||
Details: json.RawMessage(`{"image_ref":"x"}`),
|
||||
})
|
||||
require.Error(t, err)
|
||||
|
||||
entries, err := client.XRange(context.Background(), "runtime:health_events", "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
assert.Empty(t, entries, "xadd must not run when snapshot upsert fails")
|
||||
}
|
||||
|
||||
// assertSentinelErr is a sentinel for snapshot-failure assertions.
|
||||
var assertSentinelErr = sentinelError("snapshot upsert failure")
|
||||
|
||||
type sentinelError string
|
||||
|
||||
func (s sentinelError) Error() string { return string(s) }
|
||||
Reference in New Issue
Block a user