feat: runtime manager
This commit is contained in:
@@ -0,0 +1,334 @@
|
||||
package harness
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/rtmanager/internal/ports"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// Default scenario timeouts. Stream-driven assertions sit on top of
|
||||
// the runtime's worker tickers (defaults of 200-500ms in
|
||||
// `EnvOptions`); 30s gives every reconcile / probe / event tick more
|
||||
// than enough headroom even on a slow CI runner.
|
||||
const (
|
||||
defaultStreamTimeout = 30 * time.Second
|
||||
defaultStreamPoll = 25 * time.Millisecond
|
||||
)
|
||||
|
||||
// XAddStartJob appends one start-job entry in the
|
||||
// `runtime:start_jobs` AsyncAPI shape and returns the assigned entry
|
||||
// id. Mirrors the wire shape produced by Lobby's
|
||||
// `runtimemanager.Publisher` so the consumer treats the entry exactly
|
||||
// like a real Lobby-published job.
|
||||
func XAddStartJob(t testing.TB, env *Env, gameID, imageRef string) string {
|
||||
t.Helper()
|
||||
id, err := env.RedisClient.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: env.Cfg.Streams.StartJobs,
|
||||
Values: map[string]any{
|
||||
"game_id": gameID,
|
||||
"image_ref": imageRef,
|
||||
"requested_at_ms": time.Now().UTC().UnixMilli(),
|
||||
},
|
||||
}).Result()
|
||||
require.NoErrorf(t, err, "xadd start_jobs for game %s", gameID)
|
||||
return id
|
||||
}
|
||||
|
||||
// XAddStopJob appends one stop-job entry classified by reason. The
|
||||
// reason enum is documented at `ports.StopReason`.
|
||||
func XAddStopJob(t testing.TB, env *Env, gameID, reason string) string {
|
||||
t.Helper()
|
||||
id, err := env.RedisClient.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: env.Cfg.Streams.StopJobs,
|
||||
Values: map[string]any{
|
||||
"game_id": gameID,
|
||||
"reason": reason,
|
||||
"requested_at_ms": time.Now().UTC().UnixMilli(),
|
||||
},
|
||||
}).Result()
|
||||
require.NoErrorf(t, err, "xadd stop_jobs for game %s", gameID)
|
||||
return id
|
||||
}
|
||||
|
||||
// JobResultEntry is the decoded shape of one `runtime:job_results`
|
||||
// stream entry. Mirrors `ports.JobResult` plus the entry id surfaced
|
||||
// by Redis so tests can correlate XADD ids with results.
|
||||
type JobResultEntry struct {
|
||||
StreamID string
|
||||
GameID string
|
||||
Outcome string
|
||||
ContainerID string
|
||||
EngineEndpoint string
|
||||
ErrorCode string
|
||||
ErrorMessage string
|
||||
}
|
||||
|
||||
// HealthEventEntry mirrors the `runtime:health_events` AsyncAPI shape
|
||||
// in decoded form.
|
||||
type HealthEventEntry struct {
|
||||
StreamID string
|
||||
GameID string
|
||||
ContainerID string
|
||||
EventType string
|
||||
OccurredAtMs int64
|
||||
Details map[string]any
|
||||
}
|
||||
|
||||
// NotificationIntentEntry decodes one `notification:intents` entry
|
||||
// that RTM publishes for first-touch start failures.
|
||||
type NotificationIntentEntry struct {
|
||||
StreamID string
|
||||
NotificationType string
|
||||
IdempotencyKey string
|
||||
Payload map[string]any
|
||||
}
|
||||
|
||||
// WaitForJobResult polls `runtime:job_results` until predicate
|
||||
// matches, or the timeout fires. Returns the matching entry. The
|
||||
// helper does not consume the stream — every call rescans from `0-0`
|
||||
// — because RTM's writes are append-only and the cardinality per test
|
||||
// is small.
|
||||
func WaitForJobResult(t testing.TB, env *Env, predicate func(JobResultEntry) bool, timeout time.Duration) JobResultEntry {
|
||||
t.Helper()
|
||||
if timeout <= 0 {
|
||||
timeout = defaultStreamTimeout
|
||||
}
|
||||
deadline := time.Now().Add(timeout)
|
||||
for {
|
||||
entries, err := env.RedisClient.XRange(context.Background(), env.Cfg.Streams.JobResults, "-", "+").Result()
|
||||
require.NoErrorf(t, err, "xrange %s", env.Cfg.Streams.JobResults)
|
||||
for _, entry := range entries {
|
||||
decoded := decodeJobResult(entry)
|
||||
if predicate(decoded) {
|
||||
return decoded
|
||||
}
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
t.Fatalf("rtmanager integration: no job_result matched within %s; observed=%v",
|
||||
timeout, jobResultStreamSummary(entries))
|
||||
}
|
||||
time.Sleep(defaultStreamPoll)
|
||||
}
|
||||
}
|
||||
|
||||
// AllJobResults returns every entry on `runtime:job_results` in stream
|
||||
// order. Useful for assertions that depend on cardinality (replay
|
||||
// tests).
|
||||
func AllJobResults(t testing.TB, env *Env) []JobResultEntry {
|
||||
t.Helper()
|
||||
entries, err := env.RedisClient.XRange(context.Background(), env.Cfg.Streams.JobResults, "-", "+").Result()
|
||||
require.NoErrorf(t, err, "xrange %s", env.Cfg.Streams.JobResults)
|
||||
out := make([]JobResultEntry, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
out = append(out, decodeJobResult(entry))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// WaitForHealthEvent polls `runtime:health_events` until predicate
|
||||
// matches, or the timeout fires.
|
||||
func WaitForHealthEvent(t testing.TB, env *Env, predicate func(HealthEventEntry) bool, timeout time.Duration) HealthEventEntry {
|
||||
t.Helper()
|
||||
if timeout <= 0 {
|
||||
timeout = defaultStreamTimeout
|
||||
}
|
||||
deadline := time.Now().Add(timeout)
|
||||
for {
|
||||
entries, err := env.RedisClient.XRange(context.Background(), env.Cfg.Streams.HealthEvents, "-", "+").Result()
|
||||
require.NoErrorf(t, err, "xrange %s", env.Cfg.Streams.HealthEvents)
|
||||
for _, entry := range entries {
|
||||
decoded := decodeHealthEvent(t, entry)
|
||||
if predicate(decoded) {
|
||||
return decoded
|
||||
}
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
t.Fatalf("rtmanager integration: no health_event matched within %s; observed=%v",
|
||||
timeout, healthEventStreamSummary(entries))
|
||||
}
|
||||
time.Sleep(defaultStreamPoll)
|
||||
}
|
||||
}
|
||||
|
||||
// WaitForNotificationIntent polls `notification:intents` until
|
||||
// predicate matches.
|
||||
func WaitForNotificationIntent(t testing.TB, env *Env, predicate func(NotificationIntentEntry) bool, timeout time.Duration) NotificationIntentEntry {
|
||||
t.Helper()
|
||||
if timeout <= 0 {
|
||||
timeout = defaultStreamTimeout
|
||||
}
|
||||
deadline := time.Now().Add(timeout)
|
||||
for {
|
||||
entries, err := env.RedisClient.XRange(context.Background(), env.Cfg.Streams.NotificationIntents, "-", "+").Result()
|
||||
require.NoErrorf(t, err, "xrange %s", env.Cfg.Streams.NotificationIntents)
|
||||
for _, entry := range entries {
|
||||
decoded := decodeNotificationIntent(t, entry)
|
||||
if predicate(decoded) {
|
||||
return decoded
|
||||
}
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
t.Fatalf("rtmanager integration: no notification_intent matched within %s; observed=%v",
|
||||
timeout, notificationStreamSummary(entries))
|
||||
}
|
||||
time.Sleep(defaultStreamPoll)
|
||||
}
|
||||
}
|
||||
|
||||
// JobOutcomeIs returns a predicate matching a job result whose game id
|
||||
// and outcome equal the inputs.
|
||||
func JobOutcomeIs(gameID, outcome string) func(JobResultEntry) bool {
|
||||
return func(entry JobResultEntry) bool {
|
||||
return entry.GameID == gameID && entry.Outcome == outcome
|
||||
}
|
||||
}
|
||||
|
||||
// JobOutcomeWithErrorCode matches a job result whose game id, outcome,
|
||||
// and error_code all equal the inputs. Used by replay-no-op
|
||||
// assertions.
|
||||
func JobOutcomeWithErrorCode(gameID, outcome, errorCode string) func(JobResultEntry) bool {
|
||||
return func(entry JobResultEntry) bool {
|
||||
return entry.GameID == gameID && entry.Outcome == outcome && entry.ErrorCode == errorCode
|
||||
}
|
||||
}
|
||||
|
||||
// HealthEventTypeIs returns a predicate matching a health event whose
|
||||
// game id and event_type equal the inputs.
|
||||
func HealthEventTypeIs(gameID, eventType string) func(HealthEventEntry) bool {
|
||||
return func(entry HealthEventEntry) bool {
|
||||
return entry.GameID == gameID && entry.EventType == eventType
|
||||
}
|
||||
}
|
||||
|
||||
func decodeJobResult(message redis.XMessage) JobResultEntry {
|
||||
return JobResultEntry{
|
||||
StreamID: message.ID,
|
||||
GameID: streamString(message.Values, "game_id"),
|
||||
Outcome: streamString(message.Values, "outcome"),
|
||||
ContainerID: streamString(message.Values, "container_id"),
|
||||
EngineEndpoint: streamString(message.Values, "engine_endpoint"),
|
||||
ErrorCode: streamString(message.Values, "error_code"),
|
||||
ErrorMessage: streamString(message.Values, "error_message"),
|
||||
}
|
||||
}
|
||||
|
||||
func decodeHealthEvent(t testing.TB, message redis.XMessage) HealthEventEntry {
|
||||
t.Helper()
|
||||
occurredAt, _ := strconv.ParseInt(streamString(message.Values, "occurred_at_ms"), 10, 64)
|
||||
entry := HealthEventEntry{
|
||||
StreamID: message.ID,
|
||||
GameID: streamString(message.Values, "game_id"),
|
||||
ContainerID: streamString(message.Values, "container_id"),
|
||||
EventType: streamString(message.Values, "event_type"),
|
||||
OccurredAtMs: occurredAt,
|
||||
}
|
||||
rawDetails := streamString(message.Values, "details")
|
||||
if rawDetails != "" {
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal([]byte(rawDetails), &parsed); err == nil {
|
||||
entry.Details = parsed
|
||||
}
|
||||
}
|
||||
return entry
|
||||
}
|
||||
|
||||
func decodeNotificationIntent(t testing.TB, message redis.XMessage) NotificationIntentEntry {
|
||||
t.Helper()
|
||||
entry := NotificationIntentEntry{
|
||||
StreamID: message.ID,
|
||||
NotificationType: streamString(message.Values, "notification_type"),
|
||||
IdempotencyKey: streamString(message.Values, "idempotency_key"),
|
||||
}
|
||||
rawPayload := streamString(message.Values, "payload_json")
|
||||
if rawPayload == "" {
|
||||
rawPayload = streamString(message.Values, "payload")
|
||||
}
|
||||
if rawPayload != "" {
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal([]byte(rawPayload), &parsed); err == nil {
|
||||
entry.Payload = parsed
|
||||
}
|
||||
}
|
||||
return entry
|
||||
}
|
||||
|
||||
func streamString(values map[string]any, key string) string {
|
||||
raw, ok := values[key]
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
switch typed := raw.(type) {
|
||||
case string:
|
||||
return typed
|
||||
case []byte:
|
||||
return string(typed)
|
||||
default:
|
||||
return fmt.Sprintf("%v", typed)
|
||||
}
|
||||
}
|
||||
|
||||
func jobResultStreamSummary(entries []redis.XMessage) []string {
|
||||
out := make([]string, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
decoded := decodeJobResult(entry)
|
||||
out = append(out, fmt.Sprintf("%s game=%s outcome=%s err=%s",
|
||||
decoded.StreamID, decoded.GameID, decoded.Outcome, decoded.ErrorCode))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func healthEventStreamSummary(entries []redis.XMessage) []string {
|
||||
out := make([]string, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
out = append(out, fmt.Sprintf("%s %s %s",
|
||||
entry.ID, streamString(entry.Values, "game_id"), streamString(entry.Values, "event_type")))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func notificationStreamSummary(entries []redis.XMessage) []string {
|
||||
out := make([]string, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
out = append(out, fmt.Sprintf("%s %s",
|
||||
entry.ID, streamString(entry.Values, "notification_type")))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// EnsureJobOutcomeConstants pins the constants from `ports` so suite
|
||||
// authors can build predicates without importing `ports` themselves.
|
||||
// Re-exported here to keep test source focused.
|
||||
var (
|
||||
JobOutcomeSuccess = ports.JobOutcomeSuccess
|
||||
JobOutcomeFailure = ports.JobOutcomeFailure
|
||||
)
|
||||
|
||||
// AssertNoJobResultBeyond fails the test if the count of entries on
|
||||
// `runtime:job_results` exceeds `expectedCount`. Used by the replay
|
||||
// tests to prove the second envelope was no-op.
|
||||
func AssertNoJobResultBeyond(t testing.TB, env *Env, expectedCount int) {
|
||||
t.Helper()
|
||||
entries, err := env.RedisClient.XLen(context.Background(), env.Cfg.Streams.JobResults).Result()
|
||||
require.NoError(t, err)
|
||||
require.LessOrEqualf(t, entries, int64(expectedCount),
|
||||
"job_results stream has more entries than expected; got=%d expected<=%d", entries, expectedCount)
|
||||
}
|
||||
|
||||
// SanitizeContainerSummaryFor returns a stable diagnostic string for a
|
||||
// container summary keyed by game id. Used in test failures.
|
||||
func SanitizeContainerSummaryFor(values map[string]string, gameID string) string {
|
||||
parts := make([]string, 0, len(values))
|
||||
for key, value := range values {
|
||||
parts = append(parts, key+"="+value)
|
||||
}
|
||||
return fmt.Sprintf("game=%s {%s}", gameID, strings.Join(parts, ", "))
|
||||
}
|
||||
Reference in New Issue
Block a user