feat: runtime manager

This commit is contained in:
Ilia Denisov
2026-04-28 20:39:18 +02:00
committed by GitHub
parent e0a99b346b
commit a7cee15115
289 changed files with 45660 additions and 2207 deletions
+334
View File
@@ -0,0 +1,334 @@
package harness
import (
"context"
"encoding/json"
"fmt"
"strconv"
"strings"
"testing"
"time"
"galaxy/rtmanager/internal/ports"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/require"
)
// Default scenario timeouts. Stream-driven assertions sit on top of
// the runtime's worker tickers (defaults of 200-500ms in
// `EnvOptions`); 30s gives every reconcile / probe / event tick more
// than enough headroom even on a slow CI runner.
const (
defaultStreamTimeout = 30 * time.Second
defaultStreamPoll = 25 * time.Millisecond
)
// XAddStartJob appends one start-job entry in the
// `runtime:start_jobs` AsyncAPI shape and returns the assigned entry
// id. Mirrors the wire shape produced by Lobby's
// `runtimemanager.Publisher` so the consumer treats the entry exactly
// like a real Lobby-published job.
func XAddStartJob(t testing.TB, env *Env, gameID, imageRef string) string {
t.Helper()
id, err := env.RedisClient.XAdd(context.Background(), &redis.XAddArgs{
Stream: env.Cfg.Streams.StartJobs,
Values: map[string]any{
"game_id": gameID,
"image_ref": imageRef,
"requested_at_ms": time.Now().UTC().UnixMilli(),
},
}).Result()
require.NoErrorf(t, err, "xadd start_jobs for game %s", gameID)
return id
}
// XAddStopJob appends one stop-job entry classified by reason. The
// reason enum is documented at `ports.StopReason`.
func XAddStopJob(t testing.TB, env *Env, gameID, reason string) string {
t.Helper()
id, err := env.RedisClient.XAdd(context.Background(), &redis.XAddArgs{
Stream: env.Cfg.Streams.StopJobs,
Values: map[string]any{
"game_id": gameID,
"reason": reason,
"requested_at_ms": time.Now().UTC().UnixMilli(),
},
}).Result()
require.NoErrorf(t, err, "xadd stop_jobs for game %s", gameID)
return id
}
// JobResultEntry is the decoded shape of one `runtime:job_results`
// stream entry. Mirrors `ports.JobResult` plus the entry id surfaced
// by Redis so tests can correlate XADD ids with results.
type JobResultEntry struct {
StreamID string
GameID string
Outcome string
ContainerID string
EngineEndpoint string
ErrorCode string
ErrorMessage string
}
// HealthEventEntry mirrors the `runtime:health_events` AsyncAPI shape
// in decoded form.
type HealthEventEntry struct {
StreamID string
GameID string
ContainerID string
EventType string
OccurredAtMs int64
Details map[string]any
}
// NotificationIntentEntry decodes one `notification:intents` entry
// that RTM publishes for first-touch start failures.
type NotificationIntentEntry struct {
StreamID string
NotificationType string
IdempotencyKey string
Payload map[string]any
}
// WaitForJobResult polls `runtime:job_results` until predicate
// matches, or the timeout fires. Returns the matching entry. The
// helper does not consume the stream — every call rescans from `0-0`
// — because RTM's writes are append-only and the cardinality per test
// is small.
func WaitForJobResult(t testing.TB, env *Env, predicate func(JobResultEntry) bool, timeout time.Duration) JobResultEntry {
t.Helper()
if timeout <= 0 {
timeout = defaultStreamTimeout
}
deadline := time.Now().Add(timeout)
for {
entries, err := env.RedisClient.XRange(context.Background(), env.Cfg.Streams.JobResults, "-", "+").Result()
require.NoErrorf(t, err, "xrange %s", env.Cfg.Streams.JobResults)
for _, entry := range entries {
decoded := decodeJobResult(entry)
if predicate(decoded) {
return decoded
}
}
if time.Now().After(deadline) {
t.Fatalf("rtmanager integration: no job_result matched within %s; observed=%v",
timeout, jobResultStreamSummary(entries))
}
time.Sleep(defaultStreamPoll)
}
}
// AllJobResults returns every entry on `runtime:job_results` in stream
// order. Useful for assertions that depend on cardinality (replay
// tests).
func AllJobResults(t testing.TB, env *Env) []JobResultEntry {
t.Helper()
entries, err := env.RedisClient.XRange(context.Background(), env.Cfg.Streams.JobResults, "-", "+").Result()
require.NoErrorf(t, err, "xrange %s", env.Cfg.Streams.JobResults)
out := make([]JobResultEntry, 0, len(entries))
for _, entry := range entries {
out = append(out, decodeJobResult(entry))
}
return out
}
// WaitForHealthEvent polls `runtime:health_events` until predicate
// matches, or the timeout fires.
func WaitForHealthEvent(t testing.TB, env *Env, predicate func(HealthEventEntry) bool, timeout time.Duration) HealthEventEntry {
t.Helper()
if timeout <= 0 {
timeout = defaultStreamTimeout
}
deadline := time.Now().Add(timeout)
for {
entries, err := env.RedisClient.XRange(context.Background(), env.Cfg.Streams.HealthEvents, "-", "+").Result()
require.NoErrorf(t, err, "xrange %s", env.Cfg.Streams.HealthEvents)
for _, entry := range entries {
decoded := decodeHealthEvent(t, entry)
if predicate(decoded) {
return decoded
}
}
if time.Now().After(deadline) {
t.Fatalf("rtmanager integration: no health_event matched within %s; observed=%v",
timeout, healthEventStreamSummary(entries))
}
time.Sleep(defaultStreamPoll)
}
}
// WaitForNotificationIntent polls `notification:intents` until
// predicate matches.
func WaitForNotificationIntent(t testing.TB, env *Env, predicate func(NotificationIntentEntry) bool, timeout time.Duration) NotificationIntentEntry {
t.Helper()
if timeout <= 0 {
timeout = defaultStreamTimeout
}
deadline := time.Now().Add(timeout)
for {
entries, err := env.RedisClient.XRange(context.Background(), env.Cfg.Streams.NotificationIntents, "-", "+").Result()
require.NoErrorf(t, err, "xrange %s", env.Cfg.Streams.NotificationIntents)
for _, entry := range entries {
decoded := decodeNotificationIntent(t, entry)
if predicate(decoded) {
return decoded
}
}
if time.Now().After(deadline) {
t.Fatalf("rtmanager integration: no notification_intent matched within %s; observed=%v",
timeout, notificationStreamSummary(entries))
}
time.Sleep(defaultStreamPoll)
}
}
// JobOutcomeIs returns a predicate matching a job result whose game id
// and outcome equal the inputs.
func JobOutcomeIs(gameID, outcome string) func(JobResultEntry) bool {
return func(entry JobResultEntry) bool {
return entry.GameID == gameID && entry.Outcome == outcome
}
}
// JobOutcomeWithErrorCode matches a job result whose game id, outcome,
// and error_code all equal the inputs. Used by replay-no-op
// assertions.
func JobOutcomeWithErrorCode(gameID, outcome, errorCode string) func(JobResultEntry) bool {
return func(entry JobResultEntry) bool {
return entry.GameID == gameID && entry.Outcome == outcome && entry.ErrorCode == errorCode
}
}
// HealthEventTypeIs returns a predicate matching a health event whose
// game id and event_type equal the inputs.
func HealthEventTypeIs(gameID, eventType string) func(HealthEventEntry) bool {
return func(entry HealthEventEntry) bool {
return entry.GameID == gameID && entry.EventType == eventType
}
}
func decodeJobResult(message redis.XMessage) JobResultEntry {
return JobResultEntry{
StreamID: message.ID,
GameID: streamString(message.Values, "game_id"),
Outcome: streamString(message.Values, "outcome"),
ContainerID: streamString(message.Values, "container_id"),
EngineEndpoint: streamString(message.Values, "engine_endpoint"),
ErrorCode: streamString(message.Values, "error_code"),
ErrorMessage: streamString(message.Values, "error_message"),
}
}
func decodeHealthEvent(t testing.TB, message redis.XMessage) HealthEventEntry {
t.Helper()
occurredAt, _ := strconv.ParseInt(streamString(message.Values, "occurred_at_ms"), 10, 64)
entry := HealthEventEntry{
StreamID: message.ID,
GameID: streamString(message.Values, "game_id"),
ContainerID: streamString(message.Values, "container_id"),
EventType: streamString(message.Values, "event_type"),
OccurredAtMs: occurredAt,
}
rawDetails := streamString(message.Values, "details")
if rawDetails != "" {
var parsed map[string]any
if err := json.Unmarshal([]byte(rawDetails), &parsed); err == nil {
entry.Details = parsed
}
}
return entry
}
func decodeNotificationIntent(t testing.TB, message redis.XMessage) NotificationIntentEntry {
t.Helper()
entry := NotificationIntentEntry{
StreamID: message.ID,
NotificationType: streamString(message.Values, "notification_type"),
IdempotencyKey: streamString(message.Values, "idempotency_key"),
}
rawPayload := streamString(message.Values, "payload_json")
if rawPayload == "" {
rawPayload = streamString(message.Values, "payload")
}
if rawPayload != "" {
var parsed map[string]any
if err := json.Unmarshal([]byte(rawPayload), &parsed); err == nil {
entry.Payload = parsed
}
}
return entry
}
func streamString(values map[string]any, key string) string {
raw, ok := values[key]
if !ok {
return ""
}
switch typed := raw.(type) {
case string:
return typed
case []byte:
return string(typed)
default:
return fmt.Sprintf("%v", typed)
}
}
func jobResultStreamSummary(entries []redis.XMessage) []string {
out := make([]string, 0, len(entries))
for _, entry := range entries {
decoded := decodeJobResult(entry)
out = append(out, fmt.Sprintf("%s game=%s outcome=%s err=%s",
decoded.StreamID, decoded.GameID, decoded.Outcome, decoded.ErrorCode))
}
return out
}
func healthEventStreamSummary(entries []redis.XMessage) []string {
out := make([]string, 0, len(entries))
for _, entry := range entries {
out = append(out, fmt.Sprintf("%s %s %s",
entry.ID, streamString(entry.Values, "game_id"), streamString(entry.Values, "event_type")))
}
return out
}
func notificationStreamSummary(entries []redis.XMessage) []string {
out := make([]string, 0, len(entries))
for _, entry := range entries {
out = append(out, fmt.Sprintf("%s %s",
entry.ID, streamString(entry.Values, "notification_type")))
}
return out
}
// EnsureJobOutcomeConstants pins the constants from `ports` so suite
// authors can build predicates without importing `ports` themselves.
// Re-exported here to keep test source focused.
var (
JobOutcomeSuccess = ports.JobOutcomeSuccess
JobOutcomeFailure = ports.JobOutcomeFailure
)
// AssertNoJobResultBeyond fails the test if the count of entries on
// `runtime:job_results` exceeds `expectedCount`. Used by the replay
// tests to prove the second envelope was no-op.
func AssertNoJobResultBeyond(t testing.TB, env *Env, expectedCount int) {
t.Helper()
entries, err := env.RedisClient.XLen(context.Background(), env.Cfg.Streams.JobResults).Result()
require.NoError(t, err)
require.LessOrEqualf(t, entries, int64(expectedCount),
"job_results stream has more entries than expected; got=%d expected<=%d", entries, expectedCount)
}
// SanitizeContainerSummaryFor returns a stable diagnostic string for a
// container summary keyed by game id. Used in test failures.
func SanitizeContainerSummaryFor(values map[string]string, gameID string) string {
parts := make([]string, 0, len(values))
for key, value := range values {
parts = append(parts, key+"="+value)
}
return fmt.Sprintf("game=%s {%s}", gameID, strings.Join(parts, ", "))
}