feat: runtime manager
This commit is contained in:
@@ -0,0 +1,165 @@
|
||||
// Package healtheventspublisher provides the Redis-Streams-backed
|
||||
// publisher for `runtime:health_events`. Every Publish call upserts the
|
||||
// latest `health_snapshots` row before XADDing the event so consumers
|
||||
// observing the snapshot store can never lag the event stream by more
|
||||
// than the duration of one network call.
|
||||
//
|
||||
// The publisher is shared across `ports.HealthEventPublisher` callers:
|
||||
// the start service emits `container_started`; the probe, inspect, and
|
||||
// events-listener workers emit the rest. The publisher's surface is
|
||||
// stable across all of them.
|
||||
package healtheventspublisher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"galaxy/rtmanager/internal/domain/health"
|
||||
"galaxy/rtmanager/internal/ports"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// emptyDetails is the canonical JSON payload installed when the caller
|
||||
// supplies an empty Details slice. Matches the SQL DEFAULT for
|
||||
// `health_snapshots.details`.
|
||||
const emptyDetails = "{}"
|
||||
|
||||
// Wire field names used by the Redis Streams payload. Frozen by
|
||||
// `rtmanager/api/runtime-health-asyncapi.yaml`; renaming any of them
|
||||
// breaks consumers.
|
||||
const (
|
||||
fieldGameID = "game_id"
|
||||
fieldContainerID = "container_id"
|
||||
fieldEventType = "event_type"
|
||||
fieldOccurredAtMS = "occurred_at_ms"
|
||||
fieldDetails = "details"
|
||||
)
|
||||
|
||||
// Config groups the dependencies and stream name required to construct
|
||||
// a Publisher.
|
||||
type Config struct {
|
||||
// Client appends entries to the Redis Stream. Must be non-nil.
|
||||
Client *redis.Client
|
||||
|
||||
// Snapshots upserts the latest health snapshot. Must be non-nil.
|
||||
Snapshots ports.HealthSnapshotStore
|
||||
|
||||
// Stream stores the Redis Stream key events are published to (e.g.
|
||||
// `runtime:health_events`). Must not be empty.
|
||||
Stream string
|
||||
}
|
||||
|
||||
// Publisher implements `ports.HealthEventPublisher` on top of a shared
|
||||
// Redis client and the production `health_snapshots` store.
|
||||
type Publisher struct {
|
||||
client *redis.Client
|
||||
snapshots ports.HealthSnapshotStore
|
||||
stream string
|
||||
}
|
||||
|
||||
// NewPublisher constructs one Publisher from cfg. Validation errors
|
||||
// surface the missing collaborator verbatim.
|
||||
func NewPublisher(cfg Config) (*Publisher, error) {
|
||||
if cfg.Client == nil {
|
||||
return nil, errors.New("new rtmanager health events publisher: nil redis client")
|
||||
}
|
||||
if cfg.Snapshots == nil {
|
||||
return nil, errors.New("new rtmanager health events publisher: nil snapshot store")
|
||||
}
|
||||
if cfg.Stream == "" {
|
||||
return nil, errors.New("new rtmanager health events publisher: stream must not be empty")
|
||||
}
|
||||
return &Publisher{
|
||||
client: cfg.Client,
|
||||
snapshots: cfg.Snapshots,
|
||||
stream: cfg.Stream,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Publish upserts the matching health_snapshots row and then XADDs the
|
||||
// envelope to the configured Redis Stream. Both side effects are
|
||||
// required; the snapshot upsert runs first so a successful Publish
|
||||
// always leaves the snapshot store at least as fresh as the stream.
|
||||
func (publisher *Publisher) Publish(ctx context.Context, envelope ports.HealthEventEnvelope) error {
|
||||
if publisher == nil || publisher.client == nil || publisher.snapshots == nil {
|
||||
return errors.New("publish health event: nil publisher")
|
||||
}
|
||||
if ctx == nil {
|
||||
return errors.New("publish health event: nil context")
|
||||
}
|
||||
if err := envelope.Validate(); err != nil {
|
||||
return fmt.Errorf("publish health event: %w", err)
|
||||
}
|
||||
|
||||
details := envelope.Details
|
||||
if len(details) == 0 {
|
||||
details = json.RawMessage(emptyDetails)
|
||||
}
|
||||
|
||||
status, source := snapshotMappingFor(envelope.EventType)
|
||||
snapshot := health.HealthSnapshot{
|
||||
GameID: envelope.GameID,
|
||||
ContainerID: envelope.ContainerID,
|
||||
Status: status,
|
||||
Source: source,
|
||||
Details: details,
|
||||
ObservedAt: envelope.OccurredAt.UTC(),
|
||||
}
|
||||
if err := publisher.snapshots.Upsert(ctx, snapshot); err != nil {
|
||||
return fmt.Errorf("publish health event: upsert snapshot: %w", err)
|
||||
}
|
||||
|
||||
occurredAtMS := envelope.OccurredAt.UTC().UnixMilli()
|
||||
values := map[string]any{
|
||||
fieldGameID: envelope.GameID,
|
||||
fieldContainerID: envelope.ContainerID,
|
||||
fieldEventType: string(envelope.EventType),
|
||||
fieldOccurredAtMS: strconv.FormatInt(occurredAtMS, 10),
|
||||
fieldDetails: string(details),
|
||||
}
|
||||
if err := publisher.client.XAdd(ctx, &redis.XAddArgs{
|
||||
Stream: publisher.stream,
|
||||
Values: values,
|
||||
}).Err(); err != nil {
|
||||
return fmt.Errorf("publish health event: xadd: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// snapshotMappingFor returns the SnapshotStatus and SnapshotSource that
|
||||
// match eventType per `rtmanager/README.md §Health Monitoring`.
|
||||
//
|
||||
// `container_started` is observed when the start service successfully
|
||||
// runs the container; the snapshot collapses it to `healthy`.
|
||||
// `probe_recovered` collapses to `healthy` per
|
||||
// `rtmanager/docs/domain-and-ports.md` §4: it does not have its own
|
||||
// snapshot status; the next observation overwrites the prior
|
||||
// `probe_failed` with `healthy`.
|
||||
func snapshotMappingFor(eventType health.EventType) (health.SnapshotStatus, health.SnapshotSource) {
|
||||
switch eventType {
|
||||
case health.EventTypeContainerStarted:
|
||||
return health.SnapshotStatusHealthy, health.SnapshotSourceDockerEvent
|
||||
case health.EventTypeContainerExited:
|
||||
return health.SnapshotStatusExited, health.SnapshotSourceDockerEvent
|
||||
case health.EventTypeContainerOOM:
|
||||
return health.SnapshotStatusOOM, health.SnapshotSourceDockerEvent
|
||||
case health.EventTypeContainerDisappeared:
|
||||
return health.SnapshotStatusContainerDisappeared, health.SnapshotSourceDockerEvent
|
||||
case health.EventTypeInspectUnhealthy:
|
||||
return health.SnapshotStatusInspectUnhealthy, health.SnapshotSourceInspect
|
||||
case health.EventTypeProbeFailed:
|
||||
return health.SnapshotStatusProbeFailed, health.SnapshotSourceProbe
|
||||
case health.EventTypeProbeRecovered:
|
||||
return health.SnapshotStatusHealthy, health.SnapshotSourceProbe
|
||||
default:
|
||||
return "", ""
|
||||
}
|
||||
}
|
||||
|
||||
// Compile-time assertion: Publisher implements
|
||||
// ports.HealthEventPublisher.
|
||||
var _ ports.HealthEventPublisher = (*Publisher)(nil)
|
||||
Reference in New Issue
Block a user