feat: runtime manager

This commit is contained in:
Ilia Denisov
2026-04-28 20:39:18 +02:00
committed by GitHub
parent e0a99b346b
commit a7cee15115
289 changed files with 45660 additions and 2207 deletions
+20 -4
View File
@@ -39,6 +39,9 @@ integration/
├── lobbynotification/
│ ├── lobby_notification_test.go
│ └── race_name_intents_test.go
├── lobbyrtm/
│ ├── harness_test.go
│ └── lobby_rtm_test.go
├── go.mod
├── go.sum
└── internal/
@@ -49,10 +52,13 @@ integration/
│ └── contract.go
└── harness/
├── binary.go
├── dockernetwork.go
├── engineimage.go
├── keys.go
├── mail_stub.go
├── process.go
├── redis_container.go
├── rtmanagerservice.go
├── smtp_capture.go
└── user_stub.go
```
@@ -95,15 +101,23 @@ integration/
applications, invites, member operations, runtime pause, cascade
membership block, and the three race-name intents emitted by capability
evaluation at game finish and by self-service registration.
- `lobbyrtm` verifies the asynchronous boundary between real
`Game Lobby` and real `Runtime Manager` end-to-end against a real
Docker daemon: start_job → engine container → success job_result →
game `running`; cascade-blocked owner → stop_job(cancelled) → engine
stopped; missing image → failure job_result + admin notification
intent → game `start_failed`. Skips automatically on hosts without
Docker.
The current fast suites still use one isolated `miniredis` instance plus either
real downstream processes or external stateful HTTP stubs where appropriate.
`authsessionmail`, `gatewayauthsessionmail`, `notificationgateway`,
`notificationmail`, `notificationuser`, `gatewayauthsessionusermail`,
`lobbyuser`, and `lobbynotification` are the deliberate exceptions: they use
one real Redis container through `testcontainers-go`, because those
boundaries must exercise real Redis stream, persistence, or scheduling
behavior.
`lobbyuser`, `lobbynotification`, and `lobbyrtm` are the deliberate
exceptions: they use one real Redis container through
`testcontainers-go`, because those boundaries must exercise real Redis
stream, persistence, or scheduling behavior. `lobbyrtm` additionally
needs a real Docker daemon and the `galaxy/game` engine image.
`authsessionmail` additionally contains one targeted SMTP-capture scenario for
the real `smtp` provider path, while `gatewayauthsessionmail` keeps `Mail
Service` in `stub` mode and extracts the confirmation code through the trusted
@@ -127,6 +141,7 @@ go test ./notificationuser/...
go test ./gatewayauthsessionusermail/...
go test ./lobbyuser/...
go test ./lobbynotification/...
go test ./lobbyrtm/...
```
Useful regression commands after boundary changes:
@@ -144,6 +159,7 @@ go test ./notificationuser/...
go test ./gatewayauthsessionusermail/...
go test ./lobbyuser/...
go test ./lobbynotification/...
go test ./lobbyrtm/...
cd ../gateway && go test ./...
cd ../authsession && go test ./... -run GatewayCompatibility
cd ../user && go test ./...
@@ -0,0 +1,631 @@
// Package gatewaylobby_test exercises the authenticated Gateway -> Game
// Lobby boundary against real Gateway + real Auth/Session Service + real
// User Service + real Game Lobby running on testcontainers PostgreSQL
// and Redis.
//
// The boundary contract under test is: a client signs a FlatBuffers
// `ExecuteCommandRequest` for one of the reserved `lobby.*` message
// types; Gateway verifies the signature, looks up the device session,
// resolves the calling `user_id`, routes the command to the Lobby
// downstream client, and signs the FlatBuffers response. The suite
// asserts on the gRPC response shape, the signed result envelope, and
// the decoded FlatBuffers payload.
//
// Coverage maps onto `TESTING.md §6` `Gateway <-> Game Lobby`:
// authenticated platform-level command routing.
package gatewaylobby_test
import (
"bytes"
"context"
"crypto/ed25519"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"errors"
"io"
"net/http"
"path/filepath"
"testing"
"time"
gatewayv1 "galaxy/gateway/proto/galaxy/gateway/v1"
contractsgatewayv1 "galaxy/integration/internal/contracts/gatewayv1"
"galaxy/integration/internal/harness"
lobbymodel "galaxy/model/lobby"
"galaxy/transcoder"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
const (
gatewaySendEmailCodePath = "/api/v1/public/auth/send-email-code"
gatewayConfirmEmailCodePath = "/api/v1/public/auth/confirm-email-code"
testEmail = "owner@example.com"
testTimeZone = "Europe/Kaliningrad"
)
// TestGatewayRoutesLobbyMyGamesListAndSignsResponse drives a single
// authenticated user through the full public-auth flow, then issues
// `lobby.my.games.list` via the authenticated gRPC ExecuteCommand
// surface and asserts the routed-and-signed end-to-end pipeline.
func TestGatewayRoutesLobbyMyGamesListAndSignsResponse(t *testing.T) {
h := newGatewayLobbyHarness(t)
clientPrivateKey := newClientPrivateKey("g1-owner")
deviceSessionID, ownerUserID := h.authenticate(t, testEmail, clientPrivateKey)
// Pre-seed: directly create a private game owned by this user via
// Lobby's public REST surface. This mirrors what an admin/UI tool
// would do; the seed proves Gateway routing reads back caller-owned
// state, not just empty results.
gameID := h.createPrivateGame(t, ownerUserID, "Gateway Routing Galaxy",
time.Now().Add(48*time.Hour).Unix())
// Send authenticated `lobby.my.games.list` via the Gateway gRPC
// surface.
conn := h.dialGateway(t)
client := gatewayv1.NewEdgeGatewayClient(conn)
requestBytes, err := transcoder.MyGamesListRequestToPayload(&lobbymodel.MyGamesListRequest{})
require.NoError(t, err)
executeRequest := newExecuteCommandRequest(
deviceSessionID,
"req-list-1",
lobbymodel.MessageTypeMyGamesList,
requestBytes,
clientPrivateKey,
)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
response, err := client.ExecuteCommand(ctx, executeRequest)
require.NoError(t, err, "ExecuteCommand for lobby.my.games.list must succeed")
require.Equal(t, "ok", response.GetResultCode())
require.NotEmpty(t, response.GetSignature(), "gateway must sign every successful response")
// Verify the signed envelope.
require.NoError(t, contractsgatewayv1.VerifyResponseSignature(
h.responseSignerPublicKey,
response.GetSignature(),
contractsgatewayv1.ResponseSigningFields{
ProtocolVersion: response.GetProtocolVersion(),
RequestID: response.GetRequestId(),
TimestampMS: response.GetTimestampMs(),
ResultCode: response.GetResultCode(),
PayloadHash: response.GetPayloadHash(),
}),
)
require.NoError(t, contractsgatewayv1.VerifyPayloadHash(
response.GetPayloadBytes(), response.GetPayloadHash()))
// Decode the FlatBuffers payload. Lobby's `/my/games` may or may
// not include the newly-seeded game depending on its membership /
// status filter; the boundary contract under test here is the
// Gateway routing + signing, not Lobby's own list semantics. We
// assert the response decodes to a valid (possibly empty) list
// and, if the game IS present, that the projected owner+type
// fields survive the FlatBuffers roundtrip.
decoded, err := transcoder.PayloadToMyGamesListResponse(response.GetPayloadBytes())
require.NoError(t, err)
require.NotNil(t, decoded.Items, "Items must always be non-nil even when empty")
for _, item := range decoded.Items {
if item.GameID == gameID {
assert.Equal(t, ownerUserID, item.OwnerUserID)
assert.Equal(t, "private", item.GameType)
return
}
}
// Game absent from /my/games is acceptable for this test. Issue a
// direct lobby read to confirm the game does exist on the lobby
// side, so we know the routing path is the only thing we depend
// on (not lobby's own `/my/games` filter).
t.Logf("seeded game %s not in /my/games (likely lobby filter on draft); routing pipeline succeeded with empty items", gameID)
require.True(t, h.gameExists(t, gameID),
"seeded game must still be observable via lobby admin REST")
}
// TestGatewayRoutesLobbyOpenEnrollmentEnforcesOwnerOnly drives two
// authenticated users: the owner who can transition the game to
// `enrollment_open`, and a non-owner whose attempt is rejected with
// the canonical lobby error envelope. The test exercises the
// "owner-only commands before start" requirement of `TESTING.md §6`.
func TestGatewayRoutesLobbyOpenEnrollmentEnforcesOwnerOnly(t *testing.T) {
h := newGatewayLobbyHarness(t)
ownerKey := newClientPrivateKey("g1-owner-2")
ownerSessionID, ownerUserID := h.authenticate(t, "owner2@example.com", ownerKey)
guestKey := newClientPrivateKey("g1-guest")
guestSessionID, _ := h.authenticate(t, "guest@example.com", guestKey)
gameID := h.createPrivateGame(t, ownerUserID, "Owner-Only Galaxy",
time.Now().Add(48*time.Hour).Unix())
conn := h.dialGateway(t)
client := gatewayv1.NewEdgeGatewayClient(conn)
// Owner sends `lobby.game.open-enrollment` → success.
ownerRequest, err := transcoder.OpenEnrollmentRequestToPayload(&lobbymodel.OpenEnrollmentRequest{
GameID: gameID,
})
require.NoError(t, err)
ownerResponse, err := client.ExecuteCommand(
context.Background(),
newExecuteCommandRequest(ownerSessionID, "req-owner-open", lobbymodel.MessageTypeOpenEnrollment, ownerRequest, ownerKey),
)
require.NoError(t, err)
assert.Equal(t, "ok", ownerResponse.GetResultCode())
decoded, err := transcoder.PayloadToOpenEnrollmentResponse(ownerResponse.GetPayloadBytes())
require.NoError(t, err)
assert.Equal(t, gameID, decoded.GameID)
assert.Equal(t, "enrollment_open", decoded.Status)
// Guest sends the same command → must be rejected by lobby's
// owner-only guard. The error envelope passes through Gateway and
// arrives as ResultCode=forbidden (or 4xx code) with payload bytes
// carrying the canonical ErrorResponse.
guestRequest, err := transcoder.OpenEnrollmentRequestToPayload(&lobbymodel.OpenEnrollmentRequest{
GameID: gameID,
})
require.NoError(t, err)
guestResponse, err := client.ExecuteCommand(
context.Background(),
newExecuteCommandRequest(guestSessionID, "req-guest-open", lobbymodel.MessageTypeOpenEnrollment, guestRequest, guestKey),
)
require.NoError(t, err, "non-2xx lobby responses must surface as a normal gRPC response with a non-ok ResultCode")
require.NotEqual(t, "ok", guestResponse.GetResultCode(),
"non-owner must not receive ok; got %s", guestResponse.GetResultCode())
decodedError, err := transcoder.PayloadToLobbyErrorResponse(guestResponse.GetPayloadBytes())
require.NoError(t, err)
assert.NotEmpty(t, decodedError.Error.Code)
assert.NotEmpty(t, decodedError.Error.Message)
}
// gatewayLobbyHarness owns the per-test infrastructure: shared
// PostgreSQL+Redis containers, four real binaries, the Gateway
// response-signer key, and the public/internal addresses for each
// service.
type gatewayLobbyHarness struct {
redis *redis.Client
mailStub *harness.MailStub
authsessionPublicURL string
gatewayPublicURL string
gatewayGRPCAddr string
userServiceURL string
lobbyAdminURL string
lobbyPublicURL string
responseSignerPublicKey ed25519.PublicKey
authsessionProcess *harness.Process
gatewayProcess *harness.Process
userServiceProcess *harness.Process
lobbyProcess *harness.Process
}
func newGatewayLobbyHarness(t *testing.T) *gatewayLobbyHarness {
t.Helper()
redisRuntime := harness.StartRedisContainer(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisRuntime.Addr,
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() { require.NoError(t, redisClient.Close()) })
mailStub := harness.NewMailStub(t)
responseSignerPath, responseSignerPublicKey := harness.WriteResponseSignerPEM(t, t.Name())
userServiceAddr := harness.FreeTCPAddress(t)
authsessionPublicAddr := harness.FreeTCPAddress(t)
authsessionInternalAddr := harness.FreeTCPAddress(t)
gatewayPublicAddr := harness.FreeTCPAddress(t)
gatewayGRPCAddr := harness.FreeTCPAddress(t)
lobbyPublicAddr := harness.FreeTCPAddress(t)
lobbyInternalAddr := harness.FreeTCPAddress(t)
userServiceBinary := harness.BuildBinary(t, "userservice", "./user/cmd/userservice")
authsessionBinary := harness.BuildBinary(t, "authsession", "./authsession/cmd/authsession")
gatewayBinary := harness.BuildBinary(t, "gateway", "./gateway/cmd/gateway")
lobbyBinary := harness.BuildBinary(t, "lobby", "./lobby/cmd/lobby")
userServiceEnv := harness.StartUserServicePersistence(t, redisRuntime.Addr).Env
userServiceEnv["USERSERVICE_LOG_LEVEL"] = "info"
userServiceEnv["USERSERVICE_INTERNAL_HTTP_ADDR"] = userServiceAddr
userServiceEnv["OTEL_TRACES_EXPORTER"] = "none"
userServiceEnv["OTEL_METRICS_EXPORTER"] = "none"
userServiceProcess := harness.StartProcess(t, "userservice", userServiceBinary, userServiceEnv)
waitForUserServiceReady(t, userServiceProcess, "http://"+userServiceAddr)
authsessionEnv := map[string]string{
"AUTHSESSION_LOG_LEVEL": "info",
"AUTHSESSION_PUBLIC_HTTP_ADDR": authsessionPublicAddr,
"AUTHSESSION_PUBLIC_HTTP_REQUEST_TIMEOUT": time.Second.String(),
"AUTHSESSION_INTERNAL_HTTP_ADDR": authsessionInternalAddr,
"AUTHSESSION_INTERNAL_HTTP_REQUEST_TIMEOUT": time.Second.String(),
"AUTHSESSION_REDIS_MASTER_ADDR": redisRuntime.Addr,
"AUTHSESSION_REDIS_PASSWORD": "integration",
"AUTHSESSION_USER_SERVICE_MODE": "rest",
"AUTHSESSION_USER_SERVICE_BASE_URL": "http://" + userServiceAddr,
"AUTHSESSION_USER_SERVICE_REQUEST_TIMEOUT": time.Second.String(),
"AUTHSESSION_MAIL_SERVICE_MODE": "rest",
"AUTHSESSION_MAIL_SERVICE_BASE_URL": mailStub.BaseURL(),
"AUTHSESSION_MAIL_SERVICE_REQUEST_TIMEOUT": time.Second.String(),
"AUTHSESSION_REDIS_GATEWAY_SESSION_CACHE_KEY_PREFIX": "gateway:session:",
"AUTHSESSION_REDIS_GATEWAY_SESSION_EVENTS_STREAM": "gateway:session_events",
"OTEL_TRACES_EXPORTER": "none",
"OTEL_METRICS_EXPORTER": "none",
}
authsessionProcess := harness.StartProcess(t, "authsession", authsessionBinary, authsessionEnv)
waitForAuthsessionPublicReady(t, authsessionProcess, "http://"+authsessionPublicAddr)
lobbyEnv := harness.StartLobbyServicePersistence(t, redisRuntime.Addr).Env
lobbyEnv["LOBBY_LOG_LEVEL"] = "info"
lobbyEnv["LOBBY_PUBLIC_HTTP_ADDR"] = lobbyPublicAddr
lobbyEnv["LOBBY_INTERNAL_HTTP_ADDR"] = lobbyInternalAddr
lobbyEnv["LOBBY_USER_SERVICE_BASE_URL"] = "http://" + userServiceAddr
lobbyEnv["LOBBY_GM_BASE_URL"] = mailStub.BaseURL() // unused; lobby just needs a syntactically valid URL.
lobbyEnv["LOBBY_RUNTIME_JOB_RESULTS_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["LOBBY_USER_LIFECYCLE_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["LOBBY_GM_EVENTS_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["OTEL_TRACES_EXPORTER"] = "none"
lobbyEnv["OTEL_METRICS_EXPORTER"] = "none"
lobbyProcess := harness.StartProcess(t, "lobby", lobbyBinary, lobbyEnv)
harness.WaitForHTTPStatus(t, lobbyProcess, "http://"+lobbyInternalAddr+"/readyz", http.StatusOK)
gatewayEnv := map[string]string{
"GATEWAY_LOG_LEVEL": "info",
"GATEWAY_PUBLIC_HTTP_ADDR": gatewayPublicAddr,
"GATEWAY_AUTHENTICATED_GRPC_ADDR": gatewayGRPCAddr,
"GATEWAY_REDIS_MASTER_ADDR": redisRuntime.Addr,
"GATEWAY_REDIS_PASSWORD": "integration",
"GATEWAY_SESSION_CACHE_REDIS_KEY_PREFIX": "gateway:session:",
"GATEWAY_SESSION_EVENTS_REDIS_STREAM": "gateway:session_events",
"GATEWAY_CLIENT_EVENTS_REDIS_STREAM": "gateway:client_events",
"GATEWAY_REPLAY_REDIS_KEY_PREFIX": "gateway:replay:",
"GATEWAY_RESPONSE_SIGNER_PRIVATE_KEY_PEM_PATH": filepath.Clean(responseSignerPath),
"GATEWAY_AUTH_SERVICE_BASE_URL": "http://" + authsessionPublicAddr,
"GATEWAY_USER_SERVICE_BASE_URL": "http://" + userServiceAddr,
"GATEWAY_LOBBY_SERVICE_BASE_URL": "http://" + lobbyPublicAddr,
"GATEWAY_PUBLIC_AUTH_UPSTREAM_TIMEOUT": (500 * time.Millisecond).String(),
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_AUTH_RATE_LIMIT_REQUESTS": "100",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_AUTH_RATE_LIMIT_WINDOW": "1s",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_AUTH_RATE_LIMIT_BURST": "100",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_SEND_EMAIL_CODE_IDENTITY_RATE_LIMIT_REQUESTS": "100",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_SEND_EMAIL_CODE_IDENTITY_RATE_LIMIT_WINDOW": "1s",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_SEND_EMAIL_CODE_IDENTITY_RATE_LIMIT_BURST": "100",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_CONFIRM_EMAIL_CODE_IDENTITY_RATE_LIMIT_REQUESTS": "100",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_CONFIRM_EMAIL_CODE_IDENTITY_RATE_LIMIT_WINDOW": "1s",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_CONFIRM_EMAIL_CODE_IDENTITY_RATE_LIMIT_BURST": "100",
"OTEL_TRACES_EXPORTER": "none",
"OTEL_METRICS_EXPORTER": "none",
}
gatewayProcess := harness.StartProcess(t, "gateway", gatewayBinary, gatewayEnv)
harness.WaitForHTTPStatus(t, gatewayProcess, "http://"+gatewayPublicAddr+"/healthz", http.StatusOK)
harness.WaitForTCP(t, gatewayProcess, gatewayGRPCAddr)
return &gatewayLobbyHarness{
redis: redisClient,
mailStub: mailStub,
authsessionPublicURL: "http://" + authsessionPublicAddr,
gatewayPublicURL: "http://" + gatewayPublicAddr,
gatewayGRPCAddr: gatewayGRPCAddr,
userServiceURL: "http://" + userServiceAddr,
lobbyAdminURL: "http://" + lobbyInternalAddr,
lobbyPublicURL: "http://" + lobbyPublicAddr,
responseSignerPublicKey: responseSignerPublicKey,
authsessionProcess: authsessionProcess,
gatewayProcess: gatewayProcess,
userServiceProcess: userServiceProcess,
lobbyProcess: lobbyProcess,
}
}
// authenticate runs the public-auth challenge/confirm flow through the
// Gateway and returns the resulting `device_session_id` plus the
// resolved `user_id`.
func (h *gatewayLobbyHarness) authenticate(t *testing.T, email string, clientKey ed25519.PrivateKey) (string, string) {
t.Helper()
challengeID := h.sendChallenge(t, email)
code := h.waitForChallengeCode(t, email)
confirm := h.confirmCode(t, challengeID, code, clientKey)
require.Equalf(t, http.StatusOK, confirm.StatusCode, "confirm status: %s", confirm.Body)
var confirmBody struct {
DeviceSessionID string `json:"device_session_id"`
}
require.NoError(t, decodeStrictJSONPayload([]byte(confirm.Body), &confirmBody))
require.NotEmpty(t, confirmBody.DeviceSessionID)
user := h.lookupUserByEmail(t, email)
// Wait for the gateway session projection to land in Redis.
deadline := time.Now().Add(5 * time.Second)
for time.Now().Before(deadline) {
if _, err := h.redis.Get(context.Background(), "gateway:session:"+confirmBody.DeviceSessionID).Bytes(); err == nil {
return confirmBody.DeviceSessionID, user.UserID
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("gateway session projection for %s never arrived", confirmBody.DeviceSessionID)
return "", ""
}
// waitForChallengeCode polls the mail stub until the requested email
// has received an auth-code delivery and returns the cleartext code.
func (h *gatewayLobbyHarness) waitForChallengeCode(t *testing.T, email string) string {
t.Helper()
deadline := time.Now().Add(5 * time.Second)
for time.Now().Before(deadline) {
for _, delivery := range h.mailStub.RecordedDeliveries() {
if delivery.Email == email && delivery.Code != "" {
return delivery.Code
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("auth code for %s never arrived at the mail stub", email)
return ""
}
func (h *gatewayLobbyHarness) sendChallenge(t *testing.T, email string) string {
t.Helper()
response := postJSONValue(t, h.gatewayPublicURL+gatewaySendEmailCodePath, map[string]string{
"email": email,
})
require.Equalf(t, http.StatusOK, response.StatusCode, "send-email-code: %s", response.Body)
var body struct {
ChallengeID string `json:"challenge_id"`
}
require.NoError(t, decodeStrictJSONPayload([]byte(response.Body), &body))
require.NotEmpty(t, body.ChallengeID)
return body.ChallengeID
}
func (h *gatewayLobbyHarness) confirmCode(t *testing.T, challengeID, code string, clientPrivateKey ed25519.PrivateKey) httpResponse {
t.Helper()
return postJSONValue(t, h.gatewayPublicURL+gatewayConfirmEmailCodePath, map[string]string{
"challenge_id": challengeID,
"code": code,
"client_public_key": encodePublicKey(clientPrivateKey.Public().(ed25519.PublicKey)),
"time_zone": testTimeZone,
})
}
func (h *gatewayLobbyHarness) lookupUserByEmail(t *testing.T, email string) struct {
UserID string `json:"user_id"`
} {
t.Helper()
resp := postJSONValue(t, h.userServiceURL+"/api/v1/internal/user-lookups/by-email", map[string]string{
"email": email,
})
require.Equalf(t, http.StatusOK, resp.StatusCode, "user lookup: %s", resp.Body)
// User Service returns the full user record; only user_id is needed.
var body struct {
User struct {
UserID string `json:"user_id"`
} `json:"user"`
}
require.NoError(t, json.Unmarshal([]byte(resp.Body), &body))
require.NotEmpty(t, body.User.UserID)
return struct {
UserID string `json:"user_id"`
}{UserID: body.User.UserID}
}
func (h *gatewayLobbyHarness) createPrivateGame(t *testing.T, ownerUserID, gameName string, enrollmentEndsAt int64) string {
t.Helper()
resp := postJSONValueWithHeaders(t, h.lobbyPublicURL+"/api/v1/lobby/games", map[string]any{
"game_name": gameName,
"game_type": "private",
"min_players": 1,
"max_players": 4,
"start_gap_hours": 6,
"start_gap_players": 1,
"enrollment_ends_at": enrollmentEndsAt,
"turn_schedule": "0 18 * * *",
"target_engine_version": "1.0.0",
}, map[string]string{"X-User-Id": ownerUserID})
require.Equalf(t, http.StatusCreated, resp.StatusCode, "create private game: %s", resp.Body)
var record struct {
GameID string `json:"game_id"`
}
require.NoError(t, json.Unmarshal([]byte(resp.Body), &record))
require.NotEmpty(t, record.GameID)
return record.GameID
}
// gameExists checks whether the lobby admin surface still observes a
// game that was created through the public surface.
func (h *gatewayLobbyHarness) gameExists(t *testing.T, gameID string) bool {
t.Helper()
req, err := http.NewRequest(http.MethodGet, h.lobbyAdminURL+"/api/v1/lobby/games/"+gameID, nil)
require.NoError(t, err)
resp := doRequest(t, req)
return resp.StatusCode == http.StatusOK
}
func (h *gatewayLobbyHarness) dialGateway(t *testing.T) *grpc.ClientConn {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
conn, err := grpc.DialContext(ctx, h.gatewayGRPCAddr,
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithBlock(),
)
require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, conn.Close()) })
return conn
}
// --- request/response helpers ---
func newExecuteCommandRequest(deviceSessionID, requestID, messageType string, payloadBytes []byte, clientPrivateKey ed25519.PrivateKey) *gatewayv1.ExecuteCommandRequest {
payloadHash := contractsgatewayv1.ComputePayloadHash(payloadBytes)
request := &gatewayv1.ExecuteCommandRequest{
ProtocolVersion: contractsgatewayv1.ProtocolVersionV1,
DeviceSessionId: deviceSessionID,
MessageType: messageType,
TimestampMs: time.Now().UnixMilli(),
RequestId: requestID,
PayloadBytes: payloadBytes,
PayloadHash: payloadHash,
TraceId: "trace-" + requestID,
}
request.Signature = contractsgatewayv1.SignRequest(clientPrivateKey, contractsgatewayv1.RequestSigningFields{
ProtocolVersion: request.GetProtocolVersion(),
DeviceSessionID: request.GetDeviceSessionId(),
MessageType: request.GetMessageType(),
TimestampMS: request.GetTimestampMs(),
RequestID: request.GetRequestId(),
PayloadHash: request.GetPayloadHash(),
})
return request
}
type httpResponse struct {
StatusCode int
Body string
Header http.Header
}
func postJSONValue(t *testing.T, targetURL string, body any) httpResponse {
t.Helper()
return postJSONValueWithHeaders(t, targetURL, body, nil)
}
func postJSONValueWithHeaders(t *testing.T, targetURL string, body any, headers map[string]string) httpResponse {
t.Helper()
payload, err := json.Marshal(body)
require.NoError(t, err)
request, err := http.NewRequest(http.MethodPost, targetURL, bytes.NewReader(payload))
require.NoError(t, err)
request.Header.Set("Content-Type", "application/json")
for key, value := range headers {
if value == "" {
continue
}
request.Header.Set(key, value)
}
return doRequest(t, request)
}
func doRequest(t *testing.T, request *http.Request) httpResponse {
t.Helper()
client := &http.Client{
Timeout: 5 * time.Second,
Transport: &http.Transport{DisableKeepAlives: true},
}
t.Cleanup(client.CloseIdleConnections)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
payload, err := io.ReadAll(response.Body)
require.NoError(t, err)
return httpResponse{
StatusCode: response.StatusCode,
Body: string(payload),
Header: response.Header.Clone(),
}
}
func decodeStrictJSONPayload(payload []byte, target any) error {
decoder := json.NewDecoder(bytes.NewReader(payload))
decoder.DisallowUnknownFields()
if err := decoder.Decode(target); err != nil {
return err
}
if err := decoder.Decode(&struct{}{}); err != io.EOF {
if err == nil {
return errors.New("unexpected trailing JSON input")
}
return err
}
return nil
}
func waitForUserServiceReady(t *testing.T, process *harness.Process, baseURL string) {
t.Helper()
client := &http.Client{Timeout: 250 * time.Millisecond}
t.Cleanup(client.CloseIdleConnections)
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
req, err := http.NewRequest(http.MethodGet, baseURL+"/api/v1/internal/users/user-readiness-probe/exists", nil)
require.NoError(t, err)
response, err := client.Do(req)
if err == nil {
_, _ = io.Copy(io.Discard, response.Body)
response.Body.Close()
if response.StatusCode == http.StatusOK {
return
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("wait for userservice readiness: timeout\n%s", process.Logs())
}
func waitForAuthsessionPublicReady(t *testing.T, process *harness.Process, baseURL string) {
t.Helper()
// AuthSession's public listener does not expose a `/healthz` path;
// posting an empty-email send-email-code request is the cheapest
// readiness signal and returns 400 once routing is up.
client := &http.Client{Timeout: 250 * time.Millisecond}
t.Cleanup(client.CloseIdleConnections)
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
body := bytes.NewReader([]byte(`{"email":""}`))
req, err := http.NewRequest(http.MethodPost, baseURL+"/api/v1/public/auth/send-email-code", body)
require.NoError(t, err)
req.Header.Set("Content-Type", "application/json")
response, err := client.Do(req)
if err == nil {
_, _ = io.Copy(io.Discard, response.Body)
response.Body.Close()
if response.StatusCode == http.StatusBadRequest {
return
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("wait for authsession readiness: timeout\n%s", process.Logs())
}
func newClientPrivateKey(label string) ed25519.PrivateKey {
seed := sha256.Sum256([]byte("galaxy-integration-gateway-lobby-client-" + label))
return ed25519.NewKeyFromSeed(seed[:])
}
func encodePublicKey(publicKey ed25519.PublicKey) string {
return base64.StdEncoding.EncodeToString(publicKey)
}
@@ -0,0 +1,289 @@
package harness
import (
"context"
"crypto/rand"
"encoding/hex"
"encoding/json"
"fmt"
"net"
"net/http"
"os/exec"
"strings"
"testing"
"time"
)
const (
dockerNetworkPrefix = "lobbyrtm-it-"
dockerNetworkTimeout = 30 * time.Second
dockerCLITimeout = 30 * time.Second
containerHealthzPort = 8080
containerHealthzTimeout = 5 * time.Second
containerHealthzPoll = 100 * time.Millisecond
)
// EnsureDockerNetwork creates a uniquely-named Docker bridge network
// for the caller's test and registers cleanup. Each test gets its own
// network so concurrent scenarios cannot collide on the per-game DNS
// hostname (`galaxy-game-{game_id}`). The helper skips the test when
// no Docker daemon is reachable.
func EnsureDockerNetwork(t testing.TB) string {
t.Helper()
requireDockerDaemon(t)
name := dockerNetworkPrefix + uniqueSuffix(t)
ctx, cancel := context.WithTimeout(context.Background(), dockerNetworkTimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "docker", "network", "create", "--driver", "bridge", name)
output, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("integration harness: create docker network %q: %v; output:\n%s",
name, err, strings.TrimSpace(string(output)))
}
t.Cleanup(func() {
cleanupCtx, cleanupCancel := context.WithTimeout(context.Background(), dockerNetworkTimeout)
defer cleanupCancel()
removeCmd := exec.CommandContext(cleanupCtx, "docker", "network", "rm", name)
if rmErr := removeCmd.Run(); rmErr != nil {
t.Logf("integration harness: remove docker network %q: %v", name, rmErr)
}
})
return name
}
// FindContainerIDByLabel returns the id of the single running container
// labelled with the given game id, or an empty string when no match is
// found. The label keys are the ones rtmanager attaches at start time
// (`com.galaxy.owner=rtmanager`, `com.galaxy.game_id=<gameID>`).
func FindContainerIDByLabel(t testing.TB, gameID string) string {
t.Helper()
requireDockerDaemon(t)
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "docker", "ps", "-aq", "--no-trunc",
"--filter", "label=com.galaxy.owner=rtmanager",
"--filter", "label=com.galaxy.game_id="+gameID,
)
output, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("integration harness: docker ps for game %s: %v; output:\n%s",
gameID, err, strings.TrimSpace(string(output)))
}
id := strings.TrimSpace(string(output))
if id == "" {
return ""
}
if strings.Contains(id, "\n") {
t.Fatalf("integration harness: multiple containers for game %s:\n%s", gameID, id)
}
return id
}
// ContainerState returns the runtime state string (e.g. `running`,
// `exited`) of the container with the given id, looked up via
// `docker inspect`.
func ContainerState(t testing.TB, containerID string) string {
t.Helper()
requireDockerDaemon(t)
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "docker", "inspect", "--format", "{{.State.Status}}", containerID)
output, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("integration harness: docker inspect %s: %v; output:\n%s",
containerID, err, strings.TrimSpace(string(output)))
}
return strings.TrimSpace(string(output))
}
// ContainerNetworkIP returns the IPv4 address of the named container
// inside the named bridge network. Returns an empty string when the
// container has no endpoint on that network.
func ContainerNetworkIP(t testing.TB, containerID, networkName string) string {
t.Helper()
requireDockerDaemon(t)
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "docker", "inspect", "--format", "{{json .NetworkSettings.Networks}}", containerID)
output, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("integration harness: docker inspect networks %s: %v; output:\n%s",
containerID, err, strings.TrimSpace(string(output)))
}
var networks map[string]struct {
IPAddress string `json:"IPAddress"`
}
if err := json.Unmarshal(output, &networks); err != nil {
t.Fatalf("integration harness: parse network json for %s: %v; payload=%s",
containerID, err, strings.TrimSpace(string(output)))
}
if entry, ok := networks[networkName]; ok {
return entry.IPAddress
}
return ""
}
// WaitForEngineHealthz polls the engine `/healthz` on port 8080 until
// it returns 200 or the timeout fires. On macOS the docker bridge IP is
// not routable from the host, so the helper falls back to a transient
// `busybox` probe container on the same docker network. On Linux it
// dials the bridge IP directly.
func WaitForEngineHealthz(t testing.TB, ip string, timeout time.Duration) {
t.Helper()
if ip == "" {
t.Fatalf("integration harness: empty engine ip")
}
if timeout <= 0 {
timeout = containerHealthzTimeout
}
if dialFromHost(ip, containerHealthzPort, 500*time.Millisecond) {
waitForHealthzFromHost(t, ip, timeout)
return
}
network, hostname := containerNetworkAndHostname(t, ip)
if network == "" || hostname == "" {
t.Fatalf("integration harness: cannot resolve docker network/hostname for engine ip %s", ip)
}
waitForHealthzViaProbe(t, network, hostname, timeout)
}
// dialFromHost reports whether tcp connect to ip:port succeeds within
// timeout. Used to detect the macOS routing limitation cheaply.
func dialFromHost(ip string, port int, timeout time.Duration) bool {
conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip, fmt.Sprintf("%d", port)), timeout)
if err != nil {
return false
}
_ = conn.Close()
return true
}
func waitForHealthzFromHost(t testing.TB, ip string, timeout time.Duration) {
t.Helper()
url := fmt.Sprintf("http://%s/healthz", net.JoinHostPort(ip, fmt.Sprintf("%d", containerHealthzPort)))
client := &http.Client{
Timeout: 500 * time.Millisecond,
Transport: &http.Transport{DisableKeepAlives: true},
}
t.Cleanup(client.CloseIdleConnections)
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
t.Fatalf("integration harness: build healthz request for %s: %v", url, err)
}
resp, err := client.Do(req)
if err == nil {
resp.Body.Close()
if resp.StatusCode == http.StatusOK {
return
}
}
time.Sleep(containerHealthzPoll)
}
t.Fatalf("integration harness: engine /healthz on %s did not return 200 within %s", url, timeout)
}
// containerNetworkAndHostname locates the bridge network and engine
// container hostname behind the given IP so the busybox probe can use
// the docker DNS name rather than rely on host routing. The lookup is
// scoped to RTM-owned containers (`com.galaxy.owner=rtmanager`).
func containerNetworkAndHostname(t testing.TB, ip string) (string, string) {
t.Helper()
requireDockerDaemon(t)
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "docker", "ps", "-aq", "--no-trunc",
"--filter", "label=com.galaxy.owner=rtmanager",
)
output, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("integration harness: docker ps for engine probe: %v; output:\n%s", err, strings.TrimSpace(string(output)))
}
for _, id := range strings.Split(strings.TrimSpace(string(output)), "\n") {
id = strings.TrimSpace(id)
if id == "" {
continue
}
ipsByNetwork, hostname, ok := inspectIPAndHostname(t, id)
if !ok {
continue
}
for networkName, networkIP := range ipsByNetwork {
if networkIP == ip {
return networkName, hostname
}
}
}
return "", ""
}
func inspectIPAndHostname(t testing.TB, containerID string) (map[string]string, string, bool) {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), dockerCLITimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "docker", "inspect", "--format",
"{{json .NetworkSettings.Networks}}|{{.Config.Hostname}}", containerID)
output, err := cmd.CombinedOutput()
if err != nil {
return nil, "", false
}
parts := strings.SplitN(strings.TrimSpace(string(output)), "|", 2)
if len(parts) != 2 {
return nil, "", false
}
var networks map[string]struct {
IPAddress string `json:"IPAddress"`
}
if err := json.Unmarshal([]byte(parts[0]), &networks); err != nil {
return nil, "", false
}
ipsByNetwork := make(map[string]string, len(networks))
for name, entry := range networks {
ipsByNetwork[name] = entry.IPAddress
}
return ipsByNetwork, parts[1], true
}
// waitForHealthzViaProbe runs `wget -qO- http://<hostname>:8080/healthz`
// inside a transient busybox container on networkName until the probe
// exits 0 or the timeout fires.
func waitForHealthzViaProbe(t testing.TB, networkName, hostname string, timeout time.Duration) {
t.Helper()
deadline := time.Now().Add(timeout)
url := fmt.Sprintf("http://%s:%d/healthz", hostname, containerHealthzPort)
for time.Now().Before(deadline) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
cmd := exec.CommandContext(ctx, "docker", "run", "--rm",
"--network", networkName,
"busybox:stable",
"wget", "-qO-", url,
)
out, err := cmd.CombinedOutput()
cancel()
if err == nil && strings.Contains(string(out), "ok") {
return
}
time.Sleep(containerHealthzPoll)
}
t.Fatalf("integration harness: engine /healthz on %s did not return 200 via probe within %s", url, timeout)
}
func uniqueSuffix(t testing.TB) string {
t.Helper()
buf := make([]byte, 4)
if _, err := rand.Read(buf); err != nil {
t.Fatalf("integration harness: read random suffix: %v", err)
}
return hex.EncodeToString(buf)
}
+139
View File
@@ -0,0 +1,139 @@
package harness
import (
"context"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"sync"
"testing"
"time"
)
// EngineImageRef is the canonical tag the lobbyrtm boundary suite (and
// any future suite that needs the galaxy/game engine binary) builds and
// runs against. The `-lobbyrtm-it` suffix differs from the
// `-rtm-it` tag the service-local rtmanager/integration harness uses, so
// an operator running both suites locally cannot accidentally consume
// the wrong image, and `docker image rm` of one suite's leftovers does
// not remove the other suite's tag.
const EngineImageRef = "galaxy/game:1.0.0-lobbyrtm-it"
const (
imageBuildTimeout = 10 * time.Minute
dockerDaemonPingTimeout = 5 * time.Second
)
var (
engineImageOnce sync.Once
engineImageErr error
dockerAvailableOnce sync.Once
dockerAvailableErr error
)
// RequireDockerDaemon skips the calling test when no Docker daemon is
// reachable from this process. Suites that need Docker but stand up
// testcontainers (Postgres/Redis) before any RTM-specific helper
// should call this helper first so the skip path runs *before* the
// testcontainer client probes the daemon and fails hard.
func RequireDockerDaemon(t testing.TB) {
t.Helper()
requireDockerDaemon(t)
}
// EnsureGalaxyGameImage builds the galaxy/game engine image from the
// workspace root once per test process and returns the canonical tag.
// On hosts without a reachable Docker daemon the helper calls `t.Skip`
// so suites stay green when `/var/run/docker.sock` is missing and
// `DOCKER_HOST` is unset.
//
// The build is wrapped in `sync.Once`; concurrent suite invocations
// share the same image. The Dockerfile path and build context match
// `rtmanager/integration/harness/docker.go::buildAndTagEngineImage` —
// galaxy's `go.work` resolves `galaxy/{model,error,...}` only when the
// workspace root is the build context.
func EnsureGalaxyGameImage(t testing.TB) string {
t.Helper()
requireDockerDaemon(t)
engineImageOnce.Do(func() {
engineImageErr = buildEngineImage()
})
if engineImageErr != nil {
t.Fatalf("integration harness: build galaxy/game image: %v", engineImageErr)
}
return EngineImageRef
}
func buildEngineImage() error {
root, err := workspaceRoot()
if err != nil {
return fmt.Errorf("resolve workspace root: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), imageBuildTimeout)
defer cancel()
dockerfilePath := filepath.Join("game", "Dockerfile")
cmd := exec.CommandContext(ctx, "docker", "build",
"-f", dockerfilePath,
"-t", EngineImageRef,
".",
)
cmd.Dir = root
cmd.Env = append(os.Environ(), "DOCKER_BUILDKIT=1")
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("docker build (-f %s) in %s: %w; output:\n%s",
dockerfilePath, root, err, strings.TrimSpace(string(output)))
}
return nil
}
// requireDockerDaemon skips the calling test when no Docker daemon is
// reachable from this process. The check runs once per process and
// caches the verdict so successive callers do not pay the ping cost.
func requireDockerDaemon(t testing.TB) {
t.Helper()
dockerAvailableOnce.Do(func() {
dockerAvailableErr = pingDockerDaemon()
})
if dockerAvailableErr != nil {
t.Skipf("integration harness: docker daemon unavailable: %v", dockerAvailableErr)
}
}
func pingDockerDaemon() error {
if os.Getenv("DOCKER_HOST") == "" {
if _, err := os.Stat("/var/run/docker.sock"); err != nil {
return fmt.Errorf("set DOCKER_HOST or expose /var/run/docker.sock: %w", err)
}
}
ctx, cancel := context.WithTimeout(context.Background(), dockerDaemonPingTimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "docker", "version", "--format", "{{.Server.Version}}")
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("docker version: %w; output:\n%s", err, strings.TrimSpace(string(output)))
}
return nil
}
// workspaceRoot resolves the absolute path of the galaxy/ workspace
// root by anchoring on this file's location. The harness lives at
// `galaxy/integration/internal/harness/engineimage.go`; the workspace
// root is three directories up.
func workspaceRoot() (string, error) {
_, file, _, ok := runtime.Caller(0)
if !ok {
return "", errors.New("resolve runtime caller for workspace root")
}
dir := filepath.Dir(file)
root := filepath.Clean(filepath.Join(dir, "..", "..", ".."))
return root, nil
}
@@ -0,0 +1,54 @@
package harness
import (
"context"
"testing"
)
// RTManagerServicePersistence captures the per-test persistence
// dependencies of the Runtime Manager binary: a PostgreSQL container
// hosting the `rtmanager` schema owned by the `rtmanagerservice` role,
// plus the Redis credentials that point the service at the
// caller-supplied master address.
type RTManagerServicePersistence struct {
// Postgres exposes the started container so tests that need direct
// SQL access to the rtmanager schema can read or write through it.
Postgres *PostgresRuntime
// Env carries the environment entries that must be passed to the
// rtmanager process. It is safe to merge into the caller's existing
// env map, or to use as-is and append further RTMANAGER_* knobs in
// place. RTMANAGER_GAME_STATE_ROOT is intentionally omitted; the
// caller supplies a per-test directory.
Env map[string]string
}
// StartRTManagerServicePersistence brings up one isolated PostgreSQL
// container, provisions the `rtmanager` schema with the
// `rtmanagerservice` role, and returns the environment entries that
// wire the rtmanager binary at that container plus the supplied Redis
// master address.
//
// The Redis password value matches the architectural rule that Redis
// traffic is password-protected; miniredis accepts arbitrary password
// values when its own RequireAuth is not engaged, and the same value
// works against the real testcontainers Redis runtime.
//
// Cleanup of the container is handled by StartPostgresContainer through
// `t.Cleanup`; callers do not need to defer anything.
func StartRTManagerServicePersistence(t testing.TB, redisMasterAddr string) RTManagerServicePersistence {
t.Helper()
rt := StartPostgresContainer(t)
if err := rt.EnsureRoleAndSchema(context.Background(), "rtmanager", "rtmanagerservice", "rtmanagerservice"); err != nil {
t.Fatalf("ensure rtmanager schema/role: %v", err)
}
env := WithPostgres(rt, "RTMANAGER", "rtmanager", "rtmanagerservice")
env["RTMANAGER_REDIS_MASTER_ADDR"] = redisMasterAddr
env["RTMANAGER_REDIS_PASSWORD"] = "integration"
return RTManagerServicePersistence{
Postgres: rt,
Env: env,
}
}
@@ -0,0 +1,508 @@
// Package lobbyauthsession_test exercises the authenticated context
// propagation between Auth/Session Service and Game Lobby. The
// architecture wires the two services through Gateway: AuthSession
// owns the device-session lifecycle, Gateway projects sessions into
// its cache and signs request envelopes, and Lobby reads the
// resolved `X-User-Id` from the gateway-authenticated downstream
// hop.
//
// The boundary contract under test is: revoking a device session
// through AuthSession's internal API removes the session projection
// from the gateway cache, after which Gateway refuses to route any
// subsequent `lobby.*` command for that session. The suite asserts
// the boundary on the public surfaces: AuthSession internal REST,
// Gateway authenticated gRPC, and Lobby state via direct REST
// observation.
//
// Coverage maps onto `TESTING.md §6` `Lobby ↔ Auth/Session`:
// "authenticated context correctly propagated from gateway".
package lobbyauthsession_test
import (
"bytes"
"context"
"crypto/ed25519"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"errors"
"io"
"net/http"
"path/filepath"
"testing"
"time"
gatewayv1 "galaxy/gateway/proto/galaxy/gateway/v1"
contractsgatewayv1 "galaxy/integration/internal/contracts/gatewayv1"
"galaxy/integration/internal/harness"
lobbymodel "galaxy/model/lobby"
"galaxy/transcoder"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/status"
)
// TestSessionRevocationStopsGatewayFromRoutingLobbyCommands proves
// that AuthSession owns the authenticated context: a successful
// `lobby.my.games.list` command before the revoke must succeed, and
// the same command after the revoke must fail at Gateway with
// Unauthenticated, never reaching Lobby.
func TestSessionRevocationStopsGatewayFromRoutingLobbyCommands(t *testing.T) {
h := newHarness(t)
clientKey := newClientPrivateKey("g4-revoke")
deviceSessionID, _ := h.authenticate(t, "revoke@example.com", clientKey)
conn := h.dialGateway(t)
client := gatewayv1.NewEdgeGatewayClient(conn)
// Pre-revoke: lobby.my.games.list must succeed.
requestBytes, err := transcoder.MyGamesListRequestToPayload(&lobbymodel.MyGamesListRequest{})
require.NoError(t, err)
preResponse, err := client.ExecuteCommand(context.Background(),
newExecuteCommandRequest(deviceSessionID, "req-pre-revoke", lobbymodel.MessageTypeMyGamesList, requestBytes, clientKey),
)
require.NoError(t, err)
assert.Equal(t, "ok", preResponse.GetResultCode())
// Revoke through AuthSession internal API.
h.revokeSession(t, deviceSessionID)
// Wait for the gateway projection to drop / flip to revoked.
h.waitForSessionGone(t, deviceSessionID, 5*time.Second)
// Post-revoke: same command must be rejected at Gateway.
postResponse, err := client.ExecuteCommand(context.Background(),
newExecuteCommandRequest(deviceSessionID, "req-post-revoke", lobbymodel.MessageTypeMyGamesList, requestBytes, clientKey),
)
require.Error(t, err, "post-revoke command must fail at Gateway")
require.Nil(t, postResponse)
statusCode := status.Code(err)
require.Truef(t,
statusCode == codes.Unauthenticated ||
statusCode == codes.PermissionDenied ||
statusCode == codes.FailedPrecondition,
"post-revoke must fail with Unauthenticated/PermissionDenied/FailedPrecondition, got %s: %v",
statusCode, err,
)
}
// --- harness ---
type lobbyAuthsessionHarness struct {
redis *redis.Client
mailStub *harness.MailStub
authsessionPublicURL string
authsessionInternalURL string
gatewayPublicURL string
gatewayGRPCAddr string
userServiceURL string
lobbyPublicURL string
processes []*harness.Process
}
func newHarness(t *testing.T) *lobbyAuthsessionHarness {
t.Helper()
redisRuntime := harness.StartRedisContainer(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisRuntime.Addr,
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() { require.NoError(t, redisClient.Close()) })
mailStub := harness.NewMailStub(t)
responseSignerPath, _ := harness.WriteResponseSignerPEM(t, t.Name())
userServiceAddr := harness.FreeTCPAddress(t)
authsessionPublicAddr := harness.FreeTCPAddress(t)
authsessionInternalAddr := harness.FreeTCPAddress(t)
gatewayPublicAddr := harness.FreeTCPAddress(t)
gatewayGRPCAddr := harness.FreeTCPAddress(t)
lobbyPublicAddr := harness.FreeTCPAddress(t)
lobbyInternalAddr := harness.FreeTCPAddress(t)
userServiceBinary := harness.BuildBinary(t, "userservice", "./user/cmd/userservice")
authsessionBinary := harness.BuildBinary(t, "authsession", "./authsession/cmd/authsession")
gatewayBinary := harness.BuildBinary(t, "gateway", "./gateway/cmd/gateway")
lobbyBinary := harness.BuildBinary(t, "lobby", "./lobby/cmd/lobby")
userServiceEnv := harness.StartUserServicePersistence(t, redisRuntime.Addr).Env
userServiceEnv["USERSERVICE_LOG_LEVEL"] = "info"
userServiceEnv["USERSERVICE_INTERNAL_HTTP_ADDR"] = userServiceAddr
userServiceEnv["OTEL_TRACES_EXPORTER"] = "none"
userServiceEnv["OTEL_METRICS_EXPORTER"] = "none"
userServiceProcess := harness.StartProcess(t, "userservice", userServiceBinary, userServiceEnv)
waitForUserServiceReady(t, userServiceProcess, "http://"+userServiceAddr)
authsessionEnv := map[string]string{
"AUTHSESSION_LOG_LEVEL": "info",
"AUTHSESSION_PUBLIC_HTTP_ADDR": authsessionPublicAddr,
"AUTHSESSION_PUBLIC_HTTP_REQUEST_TIMEOUT": time.Second.String(),
"AUTHSESSION_INTERNAL_HTTP_ADDR": authsessionInternalAddr,
"AUTHSESSION_INTERNAL_HTTP_REQUEST_TIMEOUT": time.Second.String(),
"AUTHSESSION_REDIS_MASTER_ADDR": redisRuntime.Addr,
"AUTHSESSION_REDIS_PASSWORD": "integration",
"AUTHSESSION_USER_SERVICE_MODE": "rest",
"AUTHSESSION_USER_SERVICE_BASE_URL": "http://" + userServiceAddr,
"AUTHSESSION_USER_SERVICE_REQUEST_TIMEOUT": time.Second.String(),
"AUTHSESSION_MAIL_SERVICE_MODE": "rest",
"AUTHSESSION_MAIL_SERVICE_BASE_URL": mailStub.BaseURL(),
"AUTHSESSION_MAIL_SERVICE_REQUEST_TIMEOUT": time.Second.String(),
"AUTHSESSION_REDIS_GATEWAY_SESSION_CACHE_KEY_PREFIX": "gateway:session:",
"AUTHSESSION_REDIS_GATEWAY_SESSION_EVENTS_STREAM": "gateway:session_events",
"OTEL_TRACES_EXPORTER": "none",
"OTEL_METRICS_EXPORTER": "none",
}
authsessionProcess := harness.StartProcess(t, "authsession", authsessionBinary, authsessionEnv)
waitForAuthsessionReady(t, authsessionProcess, "http://"+authsessionPublicAddr)
lobbyEnv := harness.StartLobbyServicePersistence(t, redisRuntime.Addr).Env
lobbyEnv["LOBBY_LOG_LEVEL"] = "info"
lobbyEnv["LOBBY_PUBLIC_HTTP_ADDR"] = lobbyPublicAddr
lobbyEnv["LOBBY_INTERNAL_HTTP_ADDR"] = lobbyInternalAddr
lobbyEnv["LOBBY_USER_SERVICE_BASE_URL"] = "http://" + userServiceAddr
lobbyEnv["LOBBY_GM_BASE_URL"] = mailStub.BaseURL()
lobbyEnv["LOBBY_RUNTIME_JOB_RESULTS_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["LOBBY_USER_LIFECYCLE_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["LOBBY_GM_EVENTS_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["OTEL_TRACES_EXPORTER"] = "none"
lobbyEnv["OTEL_METRICS_EXPORTER"] = "none"
lobbyProcess := harness.StartProcess(t, "lobby", lobbyBinary, lobbyEnv)
harness.WaitForHTTPStatus(t, lobbyProcess, "http://"+lobbyInternalAddr+"/readyz", http.StatusOK)
gatewayEnv := map[string]string{
"GATEWAY_LOG_LEVEL": "info",
"GATEWAY_PUBLIC_HTTP_ADDR": gatewayPublicAddr,
"GATEWAY_AUTHENTICATED_GRPC_ADDR": gatewayGRPCAddr,
"GATEWAY_REDIS_MASTER_ADDR": redisRuntime.Addr,
"GATEWAY_REDIS_PASSWORD": "integration",
"GATEWAY_SESSION_CACHE_REDIS_KEY_PREFIX": "gateway:session:",
"GATEWAY_SESSION_EVENTS_REDIS_STREAM": "gateway:session_events",
"GATEWAY_CLIENT_EVENTS_REDIS_STREAM": "gateway:client_events",
"GATEWAY_REPLAY_REDIS_KEY_PREFIX": "gateway:replay:",
"GATEWAY_RESPONSE_SIGNER_PRIVATE_KEY_PEM_PATH": filepath.Clean(responseSignerPath),
"GATEWAY_AUTH_SERVICE_BASE_URL": "http://" + authsessionPublicAddr,
"GATEWAY_USER_SERVICE_BASE_URL": "http://" + userServiceAddr,
"GATEWAY_LOBBY_SERVICE_BASE_URL": "http://" + lobbyPublicAddr,
"GATEWAY_PUBLIC_AUTH_UPSTREAM_TIMEOUT": (500 * time.Millisecond).String(),
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_AUTH_RATE_LIMIT_REQUESTS": "100",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_AUTH_RATE_LIMIT_WINDOW": "1s",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_AUTH_RATE_LIMIT_BURST": "100",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_SEND_EMAIL_CODE_IDENTITY_RATE_LIMIT_REQUESTS": "100",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_SEND_EMAIL_CODE_IDENTITY_RATE_LIMIT_WINDOW": "1s",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_SEND_EMAIL_CODE_IDENTITY_RATE_LIMIT_BURST": "100",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_CONFIRM_EMAIL_CODE_IDENTITY_RATE_LIMIT_REQUESTS": "100",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_CONFIRM_EMAIL_CODE_IDENTITY_RATE_LIMIT_WINDOW": "1s",
"GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_CONFIRM_EMAIL_CODE_IDENTITY_RATE_LIMIT_BURST": "100",
"OTEL_TRACES_EXPORTER": "none",
"OTEL_METRICS_EXPORTER": "none",
}
gatewayProcess := harness.StartProcess(t, "gateway", gatewayBinary, gatewayEnv)
harness.WaitForHTTPStatus(t, gatewayProcess, "http://"+gatewayPublicAddr+"/healthz", http.StatusOK)
harness.WaitForTCP(t, gatewayProcess, gatewayGRPCAddr)
return &lobbyAuthsessionHarness{
redis: redisClient,
mailStub: mailStub,
authsessionPublicURL: "http://" + authsessionPublicAddr,
authsessionInternalURL: "http://" + authsessionInternalAddr,
gatewayPublicURL: "http://" + gatewayPublicAddr,
gatewayGRPCAddr: gatewayGRPCAddr,
userServiceURL: "http://" + userServiceAddr,
lobbyPublicURL: "http://" + lobbyPublicAddr,
processes: []*harness.Process{userServiceProcess, authsessionProcess, lobbyProcess, gatewayProcess},
}
}
// authenticate runs the public-auth flow through the Gateway and
// returns the resulting `device_session_id` plus the resolved user_id.
func (h *lobbyAuthsessionHarness) authenticate(t *testing.T, email string, clientKey ed25519.PrivateKey) (string, string) {
t.Helper()
challengeID := h.sendChallenge(t, email)
code := h.waitForChallengeCode(t, email)
confirm := h.confirmCode(t, challengeID, code, clientKey)
require.Equalf(t, http.StatusOK, confirm.StatusCode, "confirm: %s", confirm.Body)
var confirmBody struct {
DeviceSessionID string `json:"device_session_id"`
}
require.NoError(t, decodeStrictJSONPayload([]byte(confirm.Body), &confirmBody))
require.NotEmpty(t, confirmBody.DeviceSessionID)
user := h.lookupUserByEmail(t, email)
deadline := time.Now().Add(5 * time.Second)
for time.Now().Before(deadline) {
if _, err := h.redis.Get(context.Background(), "gateway:session:"+confirmBody.DeviceSessionID).Bytes(); err == nil {
return confirmBody.DeviceSessionID, user.UserID
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("gateway session projection for %s never arrived", confirmBody.DeviceSessionID)
return "", ""
}
func (h *lobbyAuthsessionHarness) sendChallenge(t *testing.T, email string) string {
t.Helper()
resp := postJSON(t, h.gatewayPublicURL+"/api/v1/public/auth/send-email-code", map[string]string{
"email": email,
}, nil)
require.Equalf(t, http.StatusOK, resp.StatusCode, "send-email-code: %s", resp.Body)
var body struct {
ChallengeID string `json:"challenge_id"`
}
require.NoError(t, decodeStrictJSONPayload([]byte(resp.Body), &body))
return body.ChallengeID
}
func (h *lobbyAuthsessionHarness) confirmCode(t *testing.T, challengeID, code string, clientKey ed25519.PrivateKey) httpResponse {
t.Helper()
return postJSON(t, h.gatewayPublicURL+"/api/v1/public/auth/confirm-email-code", map[string]string{
"challenge_id": challengeID,
"code": code,
"client_public_key": base64.StdEncoding.EncodeToString(clientKey.Public().(ed25519.PublicKey)),
"time_zone": "Europe/Kaliningrad",
}, nil)
}
func (h *lobbyAuthsessionHarness) waitForChallengeCode(t *testing.T, email string) string {
t.Helper()
deadline := time.Now().Add(5 * time.Second)
for time.Now().Before(deadline) {
for _, delivery := range h.mailStub.RecordedDeliveries() {
if delivery.Email == email && delivery.Code != "" {
return delivery.Code
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("auth code for %s never arrived", email)
return ""
}
func (h *lobbyAuthsessionHarness) lookupUserByEmail(t *testing.T, email string) struct {
UserID string `json:"user_id"`
} {
t.Helper()
resp := postJSON(t, h.userServiceURL+"/api/v1/internal/user-lookups/by-email", map[string]string{"email": email}, nil)
require.Equalf(t, http.StatusOK, resp.StatusCode, "user lookup: %s", resp.Body)
var body struct {
User struct {
UserID string `json:"user_id"`
} `json:"user"`
}
require.NoError(t, json.Unmarshal([]byte(resp.Body), &body))
return struct {
UserID string `json:"user_id"`
}{UserID: body.User.UserID}
}
// revokeSession calls AuthSession's internal revoke surface for a
// specific device session. The body shape is defined by
// `authsession/api/internal-openapi.yaml#RevokeDeviceSessionRequest`.
func (h *lobbyAuthsessionHarness) revokeSession(t *testing.T, deviceSessionID string) {
t.Helper()
target := h.authsessionInternalURL + "/api/v1/internal/sessions/" + deviceSessionID + "/revoke"
resp := postJSON(t, target, map[string]any{
"reason_code": "test_revocation",
"actor": map[string]string{
"type": "test",
"id": "lobbyauthsession-suite",
},
}, nil)
require.Truef(t,
resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusNoContent,
"revoke session %s: status=%d body=%s", deviceSessionID, resp.StatusCode, resp.Body,
)
}
// waitForSessionGone polls the gateway session cache until the
// session record is removed or marked revoked.
func (h *lobbyAuthsessionHarness) waitForSessionGone(t *testing.T, deviceSessionID string, timeout time.Duration) {
t.Helper()
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
payload, err := h.redis.Get(context.Background(), "gateway:session:"+deviceSessionID).Bytes()
if err == redis.Nil {
return
}
if err == nil {
var record struct {
Status string `json:"status"`
}
if json.Unmarshal(payload, &record) == nil && record.Status != "active" {
return
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("session %s still active in gateway cache after %s", deviceSessionID, timeout)
}
func (h *lobbyAuthsessionHarness) dialGateway(t *testing.T) *grpc.ClientConn {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
conn, err := grpc.DialContext(ctx, h.gatewayGRPCAddr,
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithBlock(),
)
require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, conn.Close()) })
return conn
}
// --- shared helpers ---
func newExecuteCommandRequest(deviceSessionID, requestID, messageType string, payload []byte, clientKey ed25519.PrivateKey) *gatewayv1.ExecuteCommandRequest {
payloadHash := contractsgatewayv1.ComputePayloadHash(payload)
request := &gatewayv1.ExecuteCommandRequest{
ProtocolVersion: contractsgatewayv1.ProtocolVersionV1,
DeviceSessionId: deviceSessionID,
MessageType: messageType,
TimestampMs: time.Now().UnixMilli(),
RequestId: requestID,
PayloadBytes: payload,
PayloadHash: payloadHash,
TraceId: "trace-" + requestID,
}
request.Signature = contractsgatewayv1.SignRequest(clientKey, contractsgatewayv1.RequestSigningFields{
ProtocolVersion: request.GetProtocolVersion(),
DeviceSessionID: request.GetDeviceSessionId(),
MessageType: request.GetMessageType(),
TimestampMS: request.GetTimestampMs(),
RequestID: request.GetRequestId(),
PayloadHash: request.GetPayloadHash(),
})
return request
}
type httpResponse struct {
StatusCode int
Body string
Header http.Header
}
func postJSON(t *testing.T, url string, body any, header http.Header) httpResponse {
t.Helper()
var reader io.Reader
if body != nil {
payload, err := json.Marshal(body)
require.NoError(t, err)
reader = bytes.NewReader(payload)
}
req, err := http.NewRequest(http.MethodPost, url, reader)
require.NoError(t, err)
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
for k, vs := range header {
for _, v := range vs {
req.Header.Add(k, v)
}
}
return doRequest(t, req)
}
func doRequest(t *testing.T, request *http.Request) httpResponse {
t.Helper()
client := &http.Client{
Timeout: 5 * time.Second,
Transport: &http.Transport{DisableKeepAlives: true},
}
t.Cleanup(client.CloseIdleConnections)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
payload, err := io.ReadAll(response.Body)
require.NoError(t, err)
return httpResponse{
StatusCode: response.StatusCode,
Body: string(payload),
Header: response.Header.Clone(),
}
}
func decodeStrictJSONPayload(payload []byte, target any) error {
decoder := json.NewDecoder(bytes.NewReader(payload))
decoder.DisallowUnknownFields()
if err := decoder.Decode(target); err != nil {
return err
}
if err := decoder.Decode(&struct{}{}); err != io.EOF {
if err == nil {
return errors.New("unexpected trailing JSON input")
}
return err
}
return nil
}
func waitForUserServiceReady(t *testing.T, process *harness.Process, baseURL string) {
t.Helper()
client := &http.Client{Timeout: 250 * time.Millisecond}
t.Cleanup(client.CloseIdleConnections)
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
req, err := http.NewRequest(http.MethodGet, baseURL+"/api/v1/internal/users/user-readiness-probe/exists", nil)
require.NoError(t, err)
response, err := client.Do(req)
if err == nil {
_, _ = io.Copy(io.Discard, response.Body)
response.Body.Close()
if response.StatusCode == http.StatusOK {
return
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("wait for userservice readiness: timeout\n%s", process.Logs())
}
func waitForAuthsessionReady(t *testing.T, process *harness.Process, baseURL string) {
t.Helper()
// AuthSession's public listener has no /healthz; posting an empty
// email send-email-code request is the cheapest readiness probe.
client := &http.Client{Timeout: 250 * time.Millisecond}
t.Cleanup(client.CloseIdleConnections)
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
body := bytes.NewReader([]byte(`{"email":""}`))
req, err := http.NewRequest(http.MethodPost, baseURL+"/api/v1/public/auth/send-email-code", body)
require.NoError(t, err)
req.Header.Set("Content-Type", "application/json")
response, err := client.Do(req)
if err == nil {
_, _ = io.Copy(io.Discard, response.Body)
response.Body.Close()
if response.StatusCode == http.StatusBadRequest {
return
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("wait for authsession readiness: timeout\n%s", process.Logs())
}
func newClientPrivateKey(label string) ed25519.PrivateKey {
seed := sha256.Sum256([]byte("galaxy-integration-lobby-authsession-client-" + label))
return ed25519.NewKeyFromSeed(seed[:])
}
+747
View File
@@ -0,0 +1,747 @@
// Package lobbyrtm_test exercises the Lobby ↔ Runtime Manager
// boundary against real Lobby + real Runtime Manager + real
// PostgreSQL + real Redis + real Docker daemon running the
// galaxy/game test engine container. It satisfies the inter-service
// requirement spelled out in `TESTING.md §7` and PLAN.md Stage 20.
//
// The boundary contract is: Lobby publishes `runtime:start_jobs` and
// `runtime:stop_jobs` envelopes, RTM consumes them and runs/stops
// engine containers, RTM publishes `runtime:job_results`, Lobby
// transitions the game accordingly. The suite asserts only on those
// public surfaces (Lobby/RTM REST, Redis Streams, Docker container
// state); it never imports `*/internal/...` packages of either
// service.
package lobbyrtm_test
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"maps"
"net/http"
"net/http/httptest"
"os"
"strconv"
"strings"
"sync/atomic"
"testing"
"time"
"galaxy/integration/internal/harness"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/require"
)
const (
defaultEngineVersion = "1.0.0"
missingEngineVersion = "0.0.0-missing"
startJobsStream = "runtime:start_jobs"
stopJobsStream = "runtime:stop_jobs"
jobResultsStream = "runtime:job_results"
healthEventsStream = "runtime:health_events"
notificationIntentsKey = "notification:intents"
userLifecycleStream = "user:lifecycle_events"
gmEventsStream = "gm:lobby_events"
expectedLobbyProducer = "game_lobby"
notificationImagePulled = "runtime.image_pull_failed"
)
// suiteSeq scopes per-test stream prefixes so concurrent test
// invocations cannot bleed events into each other.
var suiteSeq atomic.Int64
// lobbyRTMHarness owns the per-test infrastructure: containers,
// processes, stream keys, and helper clients. One harness per test
// keeps each scenario fully isolated.
type lobbyRTMHarness struct {
redis *redis.Client
userServiceURL string
lobbyPublicURL string
lobbyAdminURL string
rtmInternalURL string
intentsStream string
lifecycleStream string
jobResultsStream string
startJobsStream string
stopJobsStream string
healthEvents string
gmStub *httptest.Server
dockerNetwork string
engineImage string
userServiceProcess *harness.Process
lobbyProcess *harness.Process
rtmProcess *harness.Process
}
type ensureUserResponse struct {
Outcome string `json:"outcome"`
UserID string `json:"user_id"`
}
type httpResponse struct {
StatusCode int
Body string
Header http.Header
}
// newLobbyRTMHarness brings up one independent test environment:
// Postgres containers per service (mirrors `lobbynotification`), one
// Redis container, real binaries for User Service / Lobby / RTM, a
// GM stub that returns 200, a per-test Docker bridge network, and
// the freshly-built `galaxy/game` test image.
func newLobbyRTMHarness(t *testing.T) *lobbyRTMHarness {
t.Helper()
// Skip the whole suite when Docker is unreachable. The ensure-only
// check runs before any testcontainer is started so the skip path
// kicks in before testcontainers-go tries (and fails) to probe the
// daemon.
harness.RequireDockerDaemon(t)
redisRuntime := harness.StartRedisContainer(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisRuntime.Addr,
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
require.NoError(t, redisClient.Close())
})
gmStub := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte(`{}`))
}))
t.Cleanup(gmStub.Close)
engineImage := harness.EnsureGalaxyGameImage(t)
dockerNetwork := harness.EnsureDockerNetwork(t)
userServiceAddr := harness.FreeTCPAddress(t)
lobbyPublicAddr := harness.FreeTCPAddress(t)
lobbyInternalAddr := harness.FreeTCPAddress(t)
rtmInternalAddr := harness.FreeTCPAddress(t)
userServiceBinary := harness.BuildBinary(t, "userservice", "./user/cmd/userservice")
lobbyBinary := harness.BuildBinary(t, "lobby", "./lobby/cmd/lobby")
rtmBinary := harness.BuildBinary(t, "rtmanager", "./rtmanager/cmd/rtmanager")
userServiceEnv := harness.StartUserServicePersistence(t, redisRuntime.Addr).Env
userServiceEnv["USERSERVICE_LOG_LEVEL"] = "info"
userServiceEnv["USERSERVICE_INTERNAL_HTTP_ADDR"] = userServiceAddr
userServiceEnv["OTEL_TRACES_EXPORTER"] = "none"
userServiceEnv["OTEL_METRICS_EXPORTER"] = "none"
userServiceProcess := harness.StartProcess(t, "userservice", userServiceBinary, userServiceEnv)
waitForUserServiceReady(t, userServiceProcess, "http://"+userServiceAddr)
suffix := strconv.FormatInt(suiteSeq.Add(1), 10)
intentsStream := notificationIntentsKey + ":" + suffix
lifecycleStream := userLifecycleStream + ":" + suffix
jobResultsStreamKey := jobResultsStream + ":" + suffix
startJobsStreamKey := startJobsStream + ":" + suffix
stopJobsStreamKey := stopJobsStream + ":" + suffix
healthEventsStreamKey := healthEventsStream + ":" + suffix
gmEventsStreamKey := gmEventsStream + ":" + suffix
lobbyEnv := harness.StartLobbyServicePersistence(t, redisRuntime.Addr).Env
lobbyEnv["LOBBY_LOG_LEVEL"] = "info"
lobbyEnv["LOBBY_PUBLIC_HTTP_ADDR"] = lobbyPublicAddr
lobbyEnv["LOBBY_INTERNAL_HTTP_ADDR"] = lobbyInternalAddr
lobbyEnv["LOBBY_USER_SERVICE_BASE_URL"] = "http://" + userServiceAddr
lobbyEnv["LOBBY_GM_BASE_URL"] = gmStub.URL
lobbyEnv["LOBBY_NOTIFICATION_INTENTS_STREAM"] = intentsStream
lobbyEnv["LOBBY_USER_LIFECYCLE_STREAM"] = lifecycleStream
lobbyEnv["LOBBY_RUNTIME_JOB_RESULTS_STREAM"] = jobResultsStreamKey
lobbyEnv["LOBBY_RUNTIME_START_JOBS_STREAM"] = startJobsStreamKey
lobbyEnv["LOBBY_RUNTIME_STOP_JOBS_STREAM"] = stopJobsStreamKey
lobbyEnv["LOBBY_GM_EVENTS_STREAM"] = gmEventsStreamKey
lobbyEnv["LOBBY_RUNTIME_JOB_RESULTS_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["LOBBY_USER_LIFECYCLE_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["LOBBY_GM_EVENTS_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["LOBBY_ENGINE_IMAGE_TEMPLATE"] = "galaxy/game:{engine_version}-lobbyrtm-it"
lobbyEnv["OTEL_TRACES_EXPORTER"] = "none"
lobbyEnv["OTEL_METRICS_EXPORTER"] = "none"
lobbyProcess := harness.StartProcess(t, "lobby", lobbyBinary, lobbyEnv)
harness.WaitForHTTPStatus(t, lobbyProcess, "http://"+lobbyInternalAddr+"/readyz", http.StatusOK)
rtmEnv := harness.StartRTManagerServicePersistence(t, redisRuntime.Addr).Env
rtmEnv["RTMANAGER_LOG_LEVEL"] = "info"
rtmEnv["RTMANAGER_INTERNAL_HTTP_ADDR"] = rtmInternalAddr
rtmEnv["RTMANAGER_LOBBY_INTERNAL_BASE_URL"] = "http://" + lobbyInternalAddr
rtmEnv["RTMANAGER_DOCKER_HOST"] = resolveDockerHost()
rtmEnv["RTMANAGER_DOCKER_NETWORK"] = dockerNetwork
// On dev machines and in sandboxes the rtmanager process cannot
// chown the per-game state dir to root (uid 0). Pin the owner to
// the current process uid/gid so `chown` is a no-op.
rtmEnv["RTMANAGER_GAME_STATE_OWNER_UID"] = strconv.Itoa(os.Getuid())
rtmEnv["RTMANAGER_GAME_STATE_OWNER_GID"] = strconv.Itoa(os.Getgid())
rtmEnv["RTMANAGER_GAME_STATE_ROOT"] = t.TempDir()
rtmEnv["RTMANAGER_REDIS_START_JOBS_STREAM"] = startJobsStreamKey
rtmEnv["RTMANAGER_REDIS_STOP_JOBS_STREAM"] = stopJobsStreamKey
rtmEnv["RTMANAGER_REDIS_JOB_RESULTS_STREAM"] = jobResultsStreamKey
rtmEnv["RTMANAGER_REDIS_HEALTH_EVENTS_STREAM"] = healthEventsStreamKey
rtmEnv["RTMANAGER_NOTIFICATION_INTENTS_STREAM"] = intentsStream
rtmEnv["RTMANAGER_STREAM_BLOCK_TIMEOUT"] = "200ms"
rtmEnv["RTMANAGER_RECONCILE_INTERVAL"] = "1s"
rtmEnv["RTMANAGER_CLEANUP_INTERVAL"] = "1s"
rtmEnv["RTMANAGER_INSPECT_INTERVAL"] = "1s"
rtmEnv["RTMANAGER_PROBE_INTERVAL"] = "1s"
rtmEnv["RTMANAGER_PROBE_TIMEOUT"] = "1s"
rtmEnv["RTMANAGER_PROBE_FAILURES_THRESHOLD"] = "3"
rtmEnv["RTMANAGER_GAME_LEASE_TTL_SECONDS"] = "10"
rtmEnv["OTEL_TRACES_EXPORTER"] = "none"
rtmEnv["OTEL_METRICS_EXPORTER"] = "none"
rtmProcess := harness.StartProcess(t, "rtmanager", rtmBinary, rtmEnv)
harness.WaitForHTTPStatus(t, rtmProcess, "http://"+rtmInternalAddr+"/readyz", http.StatusOK)
return &lobbyRTMHarness{
redis: redisClient,
userServiceURL: "http://" + userServiceAddr,
lobbyPublicURL: "http://" + lobbyPublicAddr,
lobbyAdminURL: "http://" + lobbyInternalAddr,
rtmInternalURL: "http://" + rtmInternalAddr,
intentsStream: intentsStream,
lifecycleStream: lifecycleStream,
jobResultsStream: jobResultsStreamKey,
startJobsStream: startJobsStreamKey,
stopJobsStream: stopJobsStreamKey,
healthEvents: healthEventsStreamKey,
gmStub: gmStub,
dockerNetwork: dockerNetwork,
engineImage: engineImage,
userServiceProcess: userServiceProcess,
lobbyProcess: lobbyProcess,
rtmProcess: rtmProcess,
}
}
// ensureUser provisions a fresh User Service account by email and
// returns the assigned user_id. The email pattern includes the test
// name to avoid collisions across concurrent tests sharing the
// container.
func (h *lobbyRTMHarness) ensureUser(t *testing.T, email string) ensureUserResponse {
t.Helper()
resp := postJSON(t, h.userServiceURL+"/api/v1/internal/users/ensure-by-email", map[string]any{
"email": email,
"registration_context": map[string]string{
"preferred_language": "en",
"time_zone": "Europe/Kaliningrad",
},
}, nil)
var out ensureUserResponse
requireJSONStatus(t, resp, http.StatusOK, &out)
require.Equal(t, "created", out.Outcome)
require.NotEmpty(t, out.UserID)
return out
}
// userCreatePrivateGame creates a private game owned by ownerUserID
// with the supplied target engine version. Returns the assigned
// game_id.
func (h *lobbyRTMHarness) userCreatePrivateGame(
t *testing.T,
ownerUserID, name, targetEngineVersion string,
enrollmentEndsAt int64,
) string {
t.Helper()
resp := postJSON(t, h.lobbyPublicURL+"/api/v1/lobby/games", map[string]any{
"game_name": name,
"game_type": "private",
"min_players": 1,
"max_players": 4,
"start_gap_hours": 6,
"start_gap_players": 1,
"enrollment_ends_at": enrollmentEndsAt,
"turn_schedule": "0 18 * * *",
"target_engine_version": targetEngineVersion,
}, http.Header{"X-User-Id": []string{ownerUserID}})
require.Equalf(t, http.StatusCreated, resp.StatusCode, "create private game: %s", resp.Body)
var record map[string]any
require.NoError(t, json.Unmarshal([]byte(resp.Body), &record))
gameID, ok := record["game_id"].(string)
require.Truef(t, ok, "game_id missing: %s", resp.Body)
return gameID
}
func (h *lobbyRTMHarness) userOpenEnrollment(t *testing.T, ownerUserID, gameID string) {
t.Helper()
resp := postJSON(t,
h.lobbyPublicURL+"/api/v1/lobby/games/"+gameID+"/open-enrollment",
nil,
http.Header{"X-User-Id": []string{ownerUserID}},
)
require.Equalf(t, http.StatusOK, resp.StatusCode, "user open enrollment: %s", resp.Body)
}
func (h *lobbyRTMHarness) userCreateInvite(t *testing.T, ownerUserID, gameID, inviteeUserID string) {
t.Helper()
resp := postJSON(t,
h.lobbyPublicURL+"/api/v1/lobby/games/"+gameID+"/invites",
map[string]any{"invitee_user_id": inviteeUserID},
http.Header{"X-User-Id": []string{ownerUserID}},
)
require.Equalf(t, http.StatusCreated, resp.StatusCode, "create invite: %s", resp.Body)
}
func (h *lobbyRTMHarness) firstCreatedInviteID(t *testing.T, inviteeUserID, gameID string) string {
t.Helper()
req, err := http.NewRequest(http.MethodGet,
h.lobbyPublicURL+"/api/v1/lobby/my/invites?status=created", nil)
require.NoError(t, err)
req.Header.Set("X-User-Id", inviteeUserID)
resp := doRequest(t, req)
require.Equalf(t, http.StatusOK, resp.StatusCode, "list my invites: %s", resp.Body)
var body struct {
Items []struct {
InviteID string `json:"invite_id"`
GameID string `json:"game_id"`
} `json:"items"`
}
require.NoError(t, json.Unmarshal([]byte(resp.Body), &body))
for _, item := range body.Items {
if item.GameID == gameID {
return item.InviteID
}
}
t.Fatalf("no invite found for invitee %s on game %s; body=%s", inviteeUserID, gameID, resp.Body)
return ""
}
func (h *lobbyRTMHarness) userRedeemInvite(t *testing.T, inviteeUserID, gameID, inviteID, raceName string) {
t.Helper()
resp := postJSON(t,
h.lobbyPublicURL+"/api/v1/lobby/games/"+gameID+"/invites/"+inviteID+"/redeem",
map[string]any{"race_name": raceName},
http.Header{"X-User-Id": []string{inviteeUserID}},
)
require.Equalf(t, http.StatusOK, resp.StatusCode, "redeem invite: %s", resp.Body)
}
func (h *lobbyRTMHarness) userReadyToStart(t *testing.T, ownerUserID, gameID string) {
t.Helper()
resp := postJSON(t,
h.lobbyPublicURL+"/api/v1/lobby/games/"+gameID+"/ready-to-start",
nil,
http.Header{"X-User-Id": []string{ownerUserID}},
)
require.Equalf(t, http.StatusOK, resp.StatusCode, "ready-to-start: %s", resp.Body)
}
func (h *lobbyRTMHarness) userStartGame(t *testing.T, ownerUserID, gameID string) {
t.Helper()
resp := postJSON(t,
h.lobbyPublicURL+"/api/v1/lobby/games/"+gameID+"/start",
nil,
http.Header{"X-User-Id": []string{ownerUserID}},
)
require.Equalf(t, http.StatusOK, resp.StatusCode, "user start: %s", resp.Body)
}
// prepareInflightGame walks one private game from creation through
// `start`. For the happy and cancel scenarios the game subsequently
// reaches `running` once RTM publishes the success job_result; for
// the failure scenario it ends in `start_failed`.
//
// Returns owner and invitee user records plus the game id.
func (h *lobbyRTMHarness) prepareInflightGame(
t *testing.T,
ownerEmail, inviteeEmail, gameName, targetEngineVersion string,
) (owner, invitee ensureUserResponse, gameID string) {
t.Helper()
owner = h.ensureUser(t, ownerEmail)
invitee = h.ensureUser(t, inviteeEmail)
gameID = h.userCreatePrivateGame(t, owner.UserID, gameName, targetEngineVersion,
time.Now().Add(48*time.Hour).Unix())
h.userOpenEnrollment(t, owner.UserID, gameID)
h.userCreateInvite(t, owner.UserID, gameID, invitee.UserID)
inviteID := h.firstCreatedInviteID(t, invitee.UserID, gameID)
h.userRedeemInvite(t, invitee.UserID, gameID, inviteID, "PilotInvitee")
h.userReadyToStart(t, owner.UserID, gameID)
h.userStartGame(t, owner.UserID, gameID)
return owner, invitee, gameID
}
// gameStatus reads one game record off Lobby's internal API and
// returns its status field. Used by waitGameStatus and direct
// assertions.
func (h *lobbyRTMHarness) gameStatus(t *testing.T, gameID string) string {
t.Helper()
req, err := http.NewRequest(http.MethodGet,
h.lobbyAdminURL+"/api/v1/internal/games/"+gameID, nil)
require.NoError(t, err)
resp := doRequest(t, req)
if resp.StatusCode != http.StatusOK {
t.Fatalf("get game internal: status=%d body=%s", resp.StatusCode, resp.Body)
}
var record struct {
Status string `json:"status"`
}
require.NoError(t, json.Unmarshal([]byte(resp.Body), &record))
return record.Status
}
// waitGameStatus polls `GET /api/v1/internal/games/{gameID}` until
// the record reports the expected status or the timeout fires.
func (h *lobbyRTMHarness) waitGameStatus(t *testing.T, gameID, want string, timeout time.Duration) {
t.Helper()
deadline := time.Now().Add(timeout)
for {
got := h.gameStatus(t, gameID)
if got == want {
return
}
if time.Now().After(deadline) {
t.Fatalf("game %s status: want %q got %q (after %s)", gameID, want, got, timeout)
}
time.Sleep(150 * time.Millisecond)
}
}
// publishUserLifecycleEvent appends one event to the per-test
// `user:lifecycle_events` stream. The Lobby userlifecycle worker
// consumes the same stream.
func (h *lobbyRTMHarness) publishUserLifecycleEvent(t *testing.T, eventType, userID string) {
t.Helper()
_, err := h.redis.XAdd(context.Background(), &redis.XAddArgs{
Stream: h.lifecycleStream,
Values: map[string]any{
"event_type": eventType,
"user_id": userID,
"occurred_at_ms": strconv.FormatInt(time.Now().UnixMilli(), 10),
"source": "user_admin",
"actor_type": "admin",
"actor_id": "admin-1",
"reason_code": "terminal_policy_violation",
},
}).Result()
require.NoError(t, err)
}
// jobResultEntry decodes one `runtime:job_results` Redis Stream entry.
type jobResultEntry struct {
StreamID string
GameID string
Outcome string
ContainerID string
EngineEndpoint string
ErrorCode string
ErrorMessage string
}
// stopJobEntry decodes one `runtime:stop_jobs` Redis Stream entry as
// published by Lobby.
type stopJobEntry struct {
StreamID string
GameID string
Reason string
}
// notificationIntentEntry decodes one `notification:intents` entry.
type notificationIntentEntry struct {
StreamID string
NotificationType string
Producer string
Payload map[string]any
}
// allJobResults returns every entry on the per-test job_results
// stream in stream order.
func (h *lobbyRTMHarness) allJobResults(t *testing.T) []jobResultEntry {
t.Helper()
entries, err := h.redis.XRange(context.Background(), h.jobResultsStream, "-", "+").Result()
require.NoError(t, err)
out := make([]jobResultEntry, 0, len(entries))
for _, entry := range entries {
out = append(out, jobResultEntry{
StreamID: entry.ID,
GameID: streamString(entry.Values, "game_id"),
Outcome: streamString(entry.Values, "outcome"),
ContainerID: streamString(entry.Values, "container_id"),
EngineEndpoint: streamString(entry.Values, "engine_endpoint"),
ErrorCode: streamString(entry.Values, "error_code"),
ErrorMessage: streamString(entry.Values, "error_message"),
})
}
return out
}
// waitJobResult polls the per-test job_results stream until predicate
// matches one entry, or the timeout fires.
func (h *lobbyRTMHarness) waitJobResult(
t *testing.T,
predicate func(jobResultEntry) bool,
timeout time.Duration,
) jobResultEntry {
t.Helper()
deadline := time.Now().Add(timeout)
for {
entries := h.allJobResults(t)
for _, entry := range entries {
if predicate(entry) {
return entry
}
}
if time.Now().After(deadline) {
t.Fatalf("no job_result matched within %s; observed=%+v", timeout, entries)
}
time.Sleep(150 * time.Millisecond)
}
}
// allStopJobs returns every entry on the per-test stop_jobs stream.
func (h *lobbyRTMHarness) allStopJobs(t *testing.T) []stopJobEntry {
t.Helper()
entries, err := h.redis.XRange(context.Background(), h.stopJobsStream, "-", "+").Result()
require.NoError(t, err)
out := make([]stopJobEntry, 0, len(entries))
for _, entry := range entries {
out = append(out, stopJobEntry{
StreamID: entry.ID,
GameID: streamString(entry.Values, "game_id"),
Reason: streamString(entry.Values, "reason"),
})
}
return out
}
// waitStopJobReason polls the stop_jobs stream until an entry for
// gameID with the expected reason appears.
func (h *lobbyRTMHarness) waitStopJobReason(t *testing.T, gameID, reason string, timeout time.Duration) stopJobEntry {
t.Helper()
deadline := time.Now().Add(timeout)
for {
for _, entry := range h.allStopJobs(t) {
if entry.GameID == gameID && entry.Reason == reason {
return entry
}
}
if time.Now().After(deadline) {
t.Fatalf("no stop_job for game %s with reason %q within %s", gameID, reason, timeout)
}
time.Sleep(150 * time.Millisecond)
}
}
// allNotificationIntents returns every entry on the per-test
// notification:intents stream.
func (h *lobbyRTMHarness) allNotificationIntents(t *testing.T) []notificationIntentEntry {
t.Helper()
entries, err := h.redis.XRange(context.Background(), h.intentsStream, "-", "+").Result()
require.NoError(t, err)
out := make([]notificationIntentEntry, 0, len(entries))
for _, entry := range entries {
decoded := notificationIntentEntry{
StreamID: entry.ID,
NotificationType: streamString(entry.Values, "notification_type"),
Producer: streamString(entry.Values, "producer"),
}
// `pkg/notificationintent` publishes the payload under the
// field name `payload_json`. Older versions of this harness
// looked for `payload` and silently produced an empty Payload
// map, which made every predicate that checks `Payload["…"]`
// fall through. Read both field names for forward compat.
raw := streamString(entry.Values, "payload_json")
if raw == "" {
raw = streamString(entry.Values, "payload")
}
if raw != "" {
var parsed map[string]any
if err := json.Unmarshal([]byte(raw), &parsed); err == nil {
decoded.Payload = parsed
}
}
out = append(out, decoded)
}
return out
}
// waitNotificationIntent polls the intents stream until the
// predicate matches.
func (h *lobbyRTMHarness) waitNotificationIntent(
t *testing.T,
predicate func(notificationIntentEntry) bool,
timeout time.Duration,
) notificationIntentEntry {
t.Helper()
deadline := time.Now().Add(timeout)
for {
entries := h.allNotificationIntents(t)
for _, entry := range entries {
if predicate(entry) {
return entry
}
}
if time.Now().After(deadline) {
summary := make([]string, 0, len(entries))
for _, entry := range entries {
summary = append(summary, entry.NotificationType+":"+entry.Producer)
}
t.Fatalf("no notification_intent matched within %s; observed=%v", timeout, summary)
}
time.Sleep(150 * time.Millisecond)
}
}
// rtmRuntimeStatus issues `GET /api/v1/internal/runtimes/{gameID}`
// against RTM and returns the persisted runtime record's status, or
// the empty string when RTM responds 404.
func (h *lobbyRTMHarness) rtmRuntimeStatus(t *testing.T, gameID string) (string, int) {
t.Helper()
req, err := http.NewRequest(http.MethodGet,
h.rtmInternalURL+"/api/v1/internal/runtimes/"+gameID, nil)
require.NoError(t, err)
resp := doRequest(t, req)
if resp.StatusCode == http.StatusNotFound {
return "", resp.StatusCode
}
if resp.StatusCode != http.StatusOK {
t.Fatalf("rtm get runtime: status=%d body=%s", resp.StatusCode, resp.Body)
}
var record struct {
Status string `json:"status"`
}
require.NoError(t, json.Unmarshal([]byte(resp.Body), &record))
return record.Status, resp.StatusCode
}
// waitRTMRuntimeStatus polls RTM until the runtime record reports
// the expected status or the timeout fires.
func (h *lobbyRTMHarness) waitRTMRuntimeStatus(t *testing.T, gameID, want string, timeout time.Duration) {
t.Helper()
deadline := time.Now().Add(timeout)
for {
status, code := h.rtmRuntimeStatus(t, gameID)
if status == want {
return
}
if time.Now().After(deadline) {
t.Fatalf("rtm runtime status for %s: want %q got %q (http %d) within %s",
gameID, want, status, code, timeout)
}
time.Sleep(150 * time.Millisecond)
}
}
// streamString reads a Redis Streams field as a string regardless of
// the underlying go-redis decoded type.
func streamString(values map[string]any, key string) string {
raw, ok := values[key]
if !ok {
return ""
}
switch typed := raw.(type) {
case string:
return typed
case []byte:
return string(typed)
default:
return fmt.Sprintf("%v", typed)
}
}
func waitForUserServiceReady(t *testing.T, process *harness.Process, baseURL string) {
t.Helper()
client := &http.Client{Timeout: 250 * time.Millisecond}
t.Cleanup(client.CloseIdleConnections)
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
req, err := http.NewRequest(http.MethodGet,
baseURL+"/api/v1/internal/users/user-readiness-probe/exists", nil)
require.NoError(t, err)
response, err := client.Do(req)
if err == nil {
_, _ = io.Copy(io.Discard, response.Body)
response.Body.Close()
if response.StatusCode == http.StatusOK {
return
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("wait for userservice readiness: timeout\n%s", process.Logs())
}
func postJSON(t *testing.T, url string, body any, header http.Header) httpResponse {
t.Helper()
var reader io.Reader
if body != nil {
payload, err := json.Marshal(body)
require.NoError(t, err)
reader = bytes.NewReader(payload)
}
req, err := http.NewRequest(http.MethodPost, url, reader)
require.NoError(t, err)
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
maps.Copy(req.Header, header)
return doRequest(t, req)
}
func doRequest(t *testing.T, request *http.Request) httpResponse {
t.Helper()
client := &http.Client{
Timeout: 5 * time.Second,
Transport: &http.Transport{DisableKeepAlives: true},
}
t.Cleanup(client.CloseIdleConnections)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
payload, err := io.ReadAll(response.Body)
require.NoError(t, err)
return httpResponse{
StatusCode: response.StatusCode,
Body: string(payload),
Header: response.Header.Clone(),
}
}
func requireJSONStatus(t *testing.T, response httpResponse, wantStatus int, target any) {
t.Helper()
require.Equalf(t, wantStatus, response.StatusCode, "unexpected status, body=%s", response.Body)
if target != nil {
require.NoError(t, decodeStrictJSON([]byte(response.Body), target))
}
}
func decodeStrictJSON(payload []byte, target any) error {
decoder := json.NewDecoder(bytes.NewReader(payload))
decoder.DisallowUnknownFields()
if err := decoder.Decode(target); err != nil {
return err
}
if err := decoder.Decode(&struct{}{}); err != io.EOF {
if err == nil {
return errors.New("unexpected trailing JSON input")
}
return err
}
return nil
}
// resolveDockerHost honours DOCKER_HOST when the developer machine
// routes through colima or a remote daemon, falling back to the
// standard unix path otherwise.
func resolveDockerHost() string {
if host := strings.TrimSpace(os.Getenv("DOCKER_HOST")); host != "" {
return host
}
return "unix:///var/run/docker.sock"
}
+204
View File
@@ -0,0 +1,204 @@
package lobbyrtm_test
import (
"net/http"
"strings"
"testing"
"time"
"galaxy/integration/internal/harness"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const (
jobOutcomeSuccess = "success"
jobOutcomeFailure = "failure"
stopReasonCancelled = "cancelled"
errorCodeImagePullFailed = "image_pull_failed"
)
// TestStartFlowSucceedsWithRealEngine drives the happy path:
// Lobby creates a private game, the owner walks it through enrollment
// to start, Lobby publishes a `runtime:start_jobs` envelope with the
// resolved `image_ref`, RTM starts a real `galaxy/game` engine
// container, publishes a success `runtime:job_results` entry, and
// Lobby's runtimejobresult worker transitions the game to `running`.
// The test then hits the engine's `/healthz` endpoint directly via
// the bridge network IP, proving the container is alive end-to-end.
func TestStartFlowSucceedsWithRealEngine(t *testing.T) {
h := newLobbyRTMHarness(t)
owner, _, gameID := h.prepareInflightGame(t,
"start-owner@example.com",
"start-invitee@example.com",
"Start Galaxy",
defaultEngineVersion,
)
t.Logf("owner=%s game=%s", owner.UserID, gameID)
// RTM publishes a success job_result for the start envelope.
startResult := h.waitJobResult(t, func(entry jobResultEntry) bool {
return entry.GameID == gameID && entry.Outcome == jobOutcomeSuccess
}, 90*time.Second)
require.Empty(t, startResult.ErrorCode, "happy path must publish empty error_code")
require.NotEmpty(t, startResult.ContainerID, "happy path must carry a container id")
require.NotEmpty(t, startResult.EngineEndpoint, "happy path must carry an engine endpoint")
// Lobby's runtime-job-result worker drives the game to `running`.
h.waitGameStatus(t, gameID, "running", 30*time.Second)
// RTM persists the runtime record and exposes it through REST.
h.waitRTMRuntimeStatus(t, gameID, "running", 15*time.Second)
// A real engine container exists with the expected labels.
containerID := harness.FindContainerIDByLabel(t, gameID)
require.NotEmptyf(t, containerID, "no engine container found for game %s", gameID)
require.Equal(t, startResult.ContainerID, containerID,
"job_result container_id must match the live container")
require.Equal(t, "running", harness.ContainerState(t, containerID))
// The engine answers /healthz on the bridge network IP.
ip := harness.ContainerNetworkIP(t, containerID, h.dockerNetwork)
require.NotEmptyf(t, ip, "engine container %s has no IP on network %s", containerID, h.dockerNetwork)
harness.WaitForEngineHealthz(t, ip, 15*time.Second)
}
// TestRunningGameStopsWhenOwnerCascadeBlocked drives the stop path:
// drive the same game to `running`, publish a
// `user.lifecycle.permanent_blocked` event for the owner, the Lobby
// userlifecycle worker cascades to the inflight game, publishes a
// `runtime:stop_jobs` envelope with `reason=cancelled`, and RTM stops
// the engine. The test asserts on the public boundary surfaces only.
func TestRunningGameStopsWhenOwnerCascadeBlocked(t *testing.T) {
h := newLobbyRTMHarness(t)
owner, _, gameID := h.prepareInflightGame(t,
"stop-owner@example.com",
"stop-invitee@example.com",
"Stop Galaxy",
defaultEngineVersion,
)
t.Logf("owner=%s game=%s", owner.UserID, gameID)
// Wait for the start outcome so we know RTM is fully running
// before we trigger the cascade.
h.waitJobResult(t, func(entry jobResultEntry) bool {
return entry.GameID == gameID && entry.Outcome == jobOutcomeSuccess
}, 90*time.Second)
h.waitGameStatus(t, gameID, "running", 30*time.Second)
containerID := harness.FindContainerIDByLabel(t, gameID)
require.NotEmpty(t, containerID)
// Trigger the cascade: permanent block on the game owner causes
// Lobby's userlifecycle worker to publish stop_job(cancelled) and
// transition the owned game to `cancelled`.
h.publishUserLifecycleEvent(t, "user.lifecycle.permanent_blocked", owner.UserID)
// Lobby observably publishes the right stop envelope on the boundary.
stop := h.waitStopJobReason(t, gameID, stopReasonCancelled, 30*time.Second)
assert.Equal(t, gameID, stop.GameID)
// Lobby moves the game to cancelled.
h.waitGameStatus(t, gameID, "cancelled", 30*time.Second)
// RTM consumes stop_job, stops the engine, and persists status=stopped.
h.waitRTMRuntimeStatus(t, gameID, "stopped", 30*time.Second)
// The container is no longer running. Docker reports `exited`
// (or `created`/`removing` during teardown); none of those match
// `running`, which is the only state that contradicts a successful
// stop.
require.Eventuallyf(t, func() bool {
state := harness.ContainerState(t, containerID)
return state != "running"
}, 30*time.Second, 250*time.Millisecond,
"engine container %s did not leave running state", containerID)
// RTM emitted at least two job_results for this game: one success
// for the start, one success for the stop.
successCount := 0
for _, entry := range h.allJobResults(t) {
if entry.GameID == gameID && entry.Outcome == jobOutcomeSuccess {
successCount++
}
}
assert.GreaterOrEqualf(t, successCount, 2,
"expected at least two success job_results (start + stop) for game %s", gameID)
}
// TestStartFailsWhenImageMissing drives the failure path: the game's
// `target_engine_version` resolves to a non-existent image tag, RTM
// fails to pull, publishes a failure `runtime:job_results` plus a
// `runtime.image_pull_failed` notification intent, and Lobby's
// runtimejobresult worker transitions the game to `start_failed`.
func TestStartFailsWhenImageMissing(t *testing.T) {
h := newLobbyRTMHarness(t)
owner, _, gameID := h.prepareInflightGame(t,
"fail-owner@example.com",
"fail-invitee@example.com",
"Fail Galaxy",
missingEngineVersion,
)
t.Logf("owner=%s game=%s", owner.UserID, gameID)
expectedImageRef := "galaxy/game:" + missingEngineVersion + "-lobbyrtm-it"
// RTM publishes a failure job_result with the stable code.
failure := h.waitJobResult(t, func(entry jobResultEntry) bool {
return entry.GameID == gameID && entry.Outcome == jobOutcomeFailure
}, 120*time.Second)
assert.Equal(t, errorCodeImagePullFailed, failure.ErrorCode)
assert.Empty(t, failure.ContainerID)
assert.Empty(t, failure.EngineEndpoint)
assert.NotEmpty(t, failure.ErrorMessage)
// RTM also publishes an admin notification intent on the shared stream.
intent := h.waitNotificationIntent(t, func(entry notificationIntentEntry) bool {
if entry.NotificationType != notificationImagePulled {
return false
}
payloadGameID, _ := entry.Payload["game_id"].(string)
return payloadGameID == gameID
}, 30*time.Second)
require.NotNil(t, intent.Payload)
assert.Equal(t, gameID, intent.Payload["game_id"])
assert.Equal(t, expectedImageRef, intent.Payload["image_ref"])
assert.Equal(t, errorCodeImagePullFailed, intent.Payload["error_code"])
// Lobby flips the game to start_failed.
h.waitGameStatus(t, gameID, "start_failed", 60*time.Second)
// No engine container should exist for this game.
containerID := harness.FindContainerIDByLabel(t, gameID)
if containerID != "" {
state := harness.ContainerState(t, containerID)
assert.NotEqual(t, "running", state,
"failed image pull must not leave a running container behind (state=%s)", state)
}
// RTM either has no record (clean rollback) or has one not in
// `running`. Either is acceptable per the start service contract.
status, code := h.rtmRuntimeStatus(t, gameID)
switch code {
case http.StatusNotFound:
// nothing persisted — clean rollback path
case http.StatusOK:
assert.NotEqual(t, "running", status,
"failed image pull must not persist a running record")
default:
t.Fatalf("unexpected RTM runtime response: status=%q code=%d", status, code)
}
// Sanity check the notification carried RTM's producer marker
// rather than Lobby's, so we know the suite truly observed RTM
// publishing on the shared stream.
assert.Truef(t,
strings.Contains(intent.Producer, "rtm") ||
strings.Contains(intent.Producer, "runtime"),
"image_pull_failed intent producer should be RTM-flavoured, got %q", intent.Producer)
}
@@ -0,0 +1,664 @@
// Package lobbyrtmnotification_test exercises the failure-with-
// notification path that crosses three real services at once: Lobby
// publishes a start job, Runtime Manager fails to pull the engine
// image, RTM publishes both a failure `runtime:job_results` envelope
// AND a `runtime.image_pull_failed` admin notification intent on
// `notification:intents`. The Notification Service consumes the intent
// and routes it to Mail Service, where the resulting delivery is
// observable on the public list-deliveries surface.
//
// The suite proves the same Redis bus carries both flows correctly
// when all three services are booted together — the union of
// `integration/lobbyrtm` (which uses a stub notification) and
// `integration/rtmanagernotification` (which has no Lobby).
package lobbyrtmnotification_test
import (
"bytes"
"context"
"encoding/json"
"errors"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync/atomic"
"testing"
"time"
"galaxy/integration/internal/harness"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const (
notificationIntentsStream = "notification:intents"
startJobsStream = "runtime:start_jobs"
stopJobsStream = "runtime:stop_jobs"
jobResultsStream = "runtime:job_results"
healthEventsStream = "runtime:health_events"
userLifecycleStream = "user:lifecycle_events"
gmEventsStream = "gm:lobby_events"
mailDeliveriesPath = "/api/v1/internal/deliveries"
notificationImagePulled = "runtime.image_pull_failed"
missingEngineVersion = "0.0.0-missing"
adminEmailRecipient = "rtm-admin@example.com"
)
var suiteSeq atomic.Int64
// TestImagePullFailureReachesMailThroughNotification drives Lobby +
// RTM + Notification + Mail end-to-end. Lobby publishes a start job
// for an unresolvable image; RTM fails the pull and publishes both a
// failure job_result (consumed by Lobby) and a notification intent
// (consumed by Notification, then routed to Mail).
func TestImagePullFailureReachesMailThroughNotification(t *testing.T) {
h := newTripleHarness(t)
owner := h.ensureUser(t, "triple-owner@example.com")
invitee := h.ensureUser(t, "triple-invitee@example.com")
gameID := h.adminCreatePrivateGameForOwner(t, owner.UserID, "Triple Galaxy",
time.Now().Add(48*time.Hour).Unix(), missingEngineVersion)
h.userOpenEnrollment(t, owner.UserID, gameID)
h.userCreateInvite(t, owner.UserID, gameID, invitee.UserID)
inviteID := h.firstCreatedInviteID(t, invitee.UserID, gameID)
h.userRedeemInvite(t, invitee.UserID, gameID, inviteID, "PilotTriple")
h.userReadyToStart(t, owner.UserID, gameID)
h.userStartGame(t, owner.UserID, gameID)
t.Logf("triple harness gameID=%s ownerUserID=%s", gameID, owner.UserID)
expectedImageRef := "galaxy/game:" + missingEngineVersion + "-tripleit"
// 1. RTM publishes a failure job_result on `runtime:job_results`.
failure := h.waitJobResult(t, func(entry jobResultEntry) bool {
return entry.GameID == gameID && entry.Outcome == "failure"
}, 120*time.Second)
assert.Equal(t, "image_pull_failed", failure.ErrorCode)
// 2. RTM publishes an admin notification intent.
intent := h.waitNotificationIntent(t, func(entry notificationIntentEntry) bool {
return entry.NotificationType == notificationImagePulled &&
entry.PayloadGameID == gameID
}, 60*time.Second)
assert.Equal(t, expectedImageRef, intent.PayloadImageRef)
// 3. Notification consumes the intent and Mail records the
// delivery for the configured admin recipient.
idempotencyKey := "notification:" + intent.RedisEntryID +
"/email:email:" + adminEmailRecipient
delivery := h.eventuallyDelivery(t, url.Values{
"source": []string{"notification"},
"status": []string{"sent"},
"recipient": []string{adminEmailRecipient},
"template_id": []string{notificationImagePulled},
"idempotency_key": []string{idempotencyKey},
})
assert.Equal(t, "template", delivery.PayloadMode)
assert.Equal(t, notificationImagePulled, delivery.TemplateID)
assert.Equal(t, []string{adminEmailRecipient}, delivery.To)
// 4. Lobby's runtimejobresult worker drives the game to
// `start_failed` because of the same failure outcome on the
// shared bus.
h.waitGameStatus(t, gameID, "start_failed", 60*time.Second)
}
type tripleHarness struct {
redis *redis.Client
userServiceURL string
lobbyAdminURL string
lobbyPublicURL string
mailBaseURL string
notificationURL string
intentsStream string
startJobs string
stopJobs string
jobResults string
healthEvents string
lifecycleStream string
gmEventsStream string
processes []*harness.Process
}
func newTripleHarness(t *testing.T) *tripleHarness {
t.Helper()
harness.RequireDockerDaemon(t) // RTM /readyz pings Docker.
redisRuntime := harness.StartRedisContainer(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisRuntime.Addr,
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() { require.NoError(t, redisClient.Close()) })
dockerNetwork := harness.EnsureDockerNetwork(t)
userServiceAddr := harness.FreeTCPAddress(t)
mailInternalAddr := harness.FreeTCPAddress(t)
notificationInternalAddr := harness.FreeTCPAddress(t)
lobbyPublicAddr := harness.FreeTCPAddress(t)
lobbyInternalAddr := harness.FreeTCPAddress(t)
rtmInternalAddr := harness.FreeTCPAddress(t)
userServiceBinary := harness.BuildBinary(t, "userservice", "./user/cmd/userservice")
mailBinary := harness.BuildBinary(t, "mail", "./mail/cmd/mail")
notificationBinary := harness.BuildBinary(t, "notification", "./notification/cmd/notification")
lobbyBinary := harness.BuildBinary(t, "lobby", "./lobby/cmd/lobby")
rtmBinary := harness.BuildBinary(t, "rtmanager", "./rtmanager/cmd/rtmanager")
suffix := strconv.FormatInt(suiteSeq.Add(1), 10)
intentsStream := notificationIntentsStream + ":" + suffix
startJobs := startJobsStream + ":" + suffix
stopJobs := stopJobsStream + ":" + suffix
jobResults := jobResultsStream + ":" + suffix
healthEvents := healthEventsStream + ":" + suffix
lifecycle := userLifecycleStream + ":" + suffix
gmEvents := gmEventsStream + ":" + suffix
// User Service.
userServiceEnv := harness.StartUserServicePersistence(t, redisRuntime.Addr).Env
userServiceEnv["USERSERVICE_LOG_LEVEL"] = "info"
userServiceEnv["USERSERVICE_INTERNAL_HTTP_ADDR"] = userServiceAddr
userServiceEnv["OTEL_TRACES_EXPORTER"] = "none"
userServiceEnv["OTEL_METRICS_EXPORTER"] = "none"
userServiceProcess := harness.StartProcess(t, "userservice", userServiceBinary, userServiceEnv)
waitForUserServiceReady(t, userServiceProcess, "http://"+userServiceAddr)
// Mail Service.
mailEnv := harness.StartMailServicePersistence(t, redisRuntime.Addr).Env
mailEnv["MAIL_LOG_LEVEL"] = "info"
mailEnv["MAIL_INTERNAL_HTTP_ADDR"] = mailInternalAddr
mailEnv["MAIL_TEMPLATE_DIR"] = mailTemplateDir(t)
mailEnv["MAIL_SMTP_MODE"] = "stub"
mailEnv["MAIL_STREAM_BLOCK_TIMEOUT"] = "100ms"
mailEnv["MAIL_OPERATOR_REQUEST_TIMEOUT"] = time.Second.String()
mailEnv["MAIL_SHUTDOWN_TIMEOUT"] = "2s"
mailEnv["OTEL_TRACES_EXPORTER"] = "none"
mailEnv["OTEL_METRICS_EXPORTER"] = "none"
mailProcess := harness.StartProcess(t, "mail", mailBinary, mailEnv)
waitForMailReady(t, mailProcess, "http://"+mailInternalAddr)
// Notification Service. Admin emails for runtime.* go to a single
// shared address; the suite does not test multi-recipient routing.
notificationEnv := harness.StartNotificationServicePersistence(t, redisRuntime.Addr).Env
notificationEnv["NOTIFICATION_LOG_LEVEL"] = "info"
notificationEnv["NOTIFICATION_INTERNAL_HTTP_ADDR"] = notificationInternalAddr
notificationEnv["NOTIFICATION_USER_SERVICE_BASE_URL"] = "http://" + userServiceAddr
notificationEnv["NOTIFICATION_USER_SERVICE_TIMEOUT"] = time.Second.String()
notificationEnv["NOTIFICATION_INTENTS_STREAM"] = intentsStream
notificationEnv["NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT"] = "100ms"
notificationEnv["NOTIFICATION_ROUTE_BACKOFF_MIN"] = "100ms"
notificationEnv["NOTIFICATION_ROUTE_BACKOFF_MAX"] = "100ms"
notificationEnv["NOTIFICATION_ADMIN_EMAILS_RUNTIME_IMAGE_PULL_FAILED"] = adminEmailRecipient
notificationEnv["NOTIFICATION_ADMIN_EMAILS_RUNTIME_CONTAINER_START_FAILED"] = adminEmailRecipient
notificationEnv["NOTIFICATION_ADMIN_EMAILS_RUNTIME_START_CONFIG_INVALID"] = adminEmailRecipient
notificationEnv["OTEL_TRACES_EXPORTER"] = "none"
notificationEnv["OTEL_METRICS_EXPORTER"] = "none"
notificationProcess := harness.StartProcess(t, "notification", notificationBinary, notificationEnv)
harness.WaitForHTTPStatus(t, notificationProcess, "http://"+notificationInternalAddr+"/readyz", http.StatusOK)
// Lobby.
lobbyEnv := harness.StartLobbyServicePersistence(t, redisRuntime.Addr).Env
lobbyEnv["LOBBY_LOG_LEVEL"] = "info"
lobbyEnv["LOBBY_PUBLIC_HTTP_ADDR"] = lobbyPublicAddr
lobbyEnv["LOBBY_INTERNAL_HTTP_ADDR"] = lobbyInternalAddr
lobbyEnv["LOBBY_USER_SERVICE_BASE_URL"] = "http://" + userServiceAddr
lobbyEnv["LOBBY_GM_BASE_URL"] = "http://" + notificationInternalAddr
lobbyEnv["LOBBY_NOTIFICATION_INTENTS_STREAM"] = intentsStream
lobbyEnv["LOBBY_USER_LIFECYCLE_STREAM"] = lifecycle
lobbyEnv["LOBBY_RUNTIME_JOB_RESULTS_STREAM"] = jobResults
lobbyEnv["LOBBY_RUNTIME_START_JOBS_STREAM"] = startJobs
lobbyEnv["LOBBY_RUNTIME_STOP_JOBS_STREAM"] = stopJobs
lobbyEnv["LOBBY_GM_EVENTS_STREAM"] = gmEvents
lobbyEnv["LOBBY_RUNTIME_JOB_RESULTS_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["LOBBY_USER_LIFECYCLE_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["LOBBY_GM_EVENTS_READ_BLOCK_TIMEOUT"] = "200ms"
lobbyEnv["LOBBY_ENGINE_IMAGE_TEMPLATE"] = "galaxy/game:{engine_version}-tripleit"
lobbyEnv["OTEL_TRACES_EXPORTER"] = "none"
lobbyEnv["OTEL_METRICS_EXPORTER"] = "none"
lobbyProcess := harness.StartProcess(t, "lobby", lobbyBinary, lobbyEnv)
harness.WaitForHTTPStatus(t, lobbyProcess, "http://"+lobbyInternalAddr+"/readyz", http.StatusOK)
// Runtime Manager.
rtmEnv := harness.StartRTManagerServicePersistence(t, redisRuntime.Addr).Env
rtmEnv["RTMANAGER_LOG_LEVEL"] = "info"
rtmEnv["RTMANAGER_INTERNAL_HTTP_ADDR"] = rtmInternalAddr
rtmEnv["RTMANAGER_LOBBY_INTERNAL_BASE_URL"] = "http://" + lobbyInternalAddr
rtmEnv["RTMANAGER_LOBBY_INTERNAL_TIMEOUT"] = "200ms"
rtmEnv["RTMANAGER_DOCKER_HOST"] = resolveDockerHost()
rtmEnv["RTMANAGER_DOCKER_NETWORK"] = dockerNetwork
rtmEnv["RTMANAGER_GAME_STATE_ROOT"] = t.TempDir()
rtmEnv["RTMANAGER_REDIS_START_JOBS_STREAM"] = startJobs
rtmEnv["RTMANAGER_REDIS_STOP_JOBS_STREAM"] = stopJobs
rtmEnv["RTMANAGER_REDIS_JOB_RESULTS_STREAM"] = jobResults
rtmEnv["RTMANAGER_REDIS_HEALTH_EVENTS_STREAM"] = healthEvents
rtmEnv["RTMANAGER_NOTIFICATION_INTENTS_STREAM"] = intentsStream
rtmEnv["RTMANAGER_STREAM_BLOCK_TIMEOUT"] = "200ms"
rtmEnv["RTMANAGER_RECONCILE_INTERVAL"] = "5s"
rtmEnv["RTMANAGER_CLEANUP_INTERVAL"] = "5s"
rtmEnv["RTMANAGER_INSPECT_INTERVAL"] = "5s"
rtmEnv["RTMANAGER_PROBE_INTERVAL"] = "5s"
rtmEnv["RTMANAGER_PROBE_TIMEOUT"] = "1s"
rtmEnv["RTMANAGER_PROBE_FAILURES_THRESHOLD"] = "3"
rtmEnv["RTMANAGER_GAME_LEASE_TTL_SECONDS"] = "30"
rtmEnv["OTEL_TRACES_EXPORTER"] = "none"
rtmEnv["OTEL_METRICS_EXPORTER"] = "none"
rtmProcess := harness.StartProcess(t, "rtmanager", rtmBinary, rtmEnv)
harness.WaitForHTTPStatus(t, rtmProcess, "http://"+rtmInternalAddr+"/readyz", http.StatusOK)
return &tripleHarness{
redis: redisClient,
userServiceURL: "http://" + userServiceAddr,
lobbyAdminURL: "http://" + lobbyInternalAddr,
lobbyPublicURL: "http://" + lobbyPublicAddr,
mailBaseURL: "http://" + mailInternalAddr,
notificationURL: "http://" + notificationInternalAddr,
intentsStream: intentsStream,
startJobs: startJobs,
stopJobs: stopJobs,
jobResults: jobResults,
healthEvents: healthEvents,
lifecycleStream: lifecycle,
gmEventsStream: gmEvents,
processes: []*harness.Process{userServiceProcess, mailProcess, notificationProcess, lobbyProcess, rtmProcess},
}
}
// --- Lobby fixtures ---
type ensureUserResponse struct {
Outcome string `json:"outcome"`
UserID string `json:"user_id"`
}
func (h *tripleHarness) ensureUser(t *testing.T, email string) ensureUserResponse {
t.Helper()
resp := postJSON(t, h.userServiceURL+"/api/v1/internal/users/ensure-by-email", map[string]any{
"email": email,
"registration_context": map[string]string{
"preferred_language": "en",
"time_zone": "Europe/Kaliningrad",
},
}, nil)
var out ensureUserResponse
requireJSONStatus(t, resp, http.StatusOK, &out)
require.NotEmpty(t, out.UserID)
return out
}
func (h *tripleHarness) adminCreatePrivateGameForOwner(t *testing.T, ownerUserID, gameName string, enrollmentEndsAt int64, engineVersion string) string {
t.Helper()
resp := postJSON(t, h.lobbyPublicURL+"/api/v1/lobby/games", map[string]any{
"game_name": gameName,
"game_type": "private",
"min_players": 1,
"max_players": 4,
"start_gap_hours": 6,
"start_gap_players": 1,
"enrollment_ends_at": enrollmentEndsAt,
"turn_schedule": "0 18 * * *",
"target_engine_version": engineVersion,
}, http.Header{"X-User-Id": []string{ownerUserID}})
require.Equalf(t, http.StatusCreated, resp.StatusCode, "create private game: %s", resp.Body)
var record struct {
GameID string `json:"game_id"`
}
require.NoError(t, json.Unmarshal([]byte(resp.Body), &record))
require.NotEmpty(t, record.GameID)
return record.GameID
}
func (h *tripleHarness) userOpenEnrollment(t *testing.T, ownerUserID, gameID string) {
t.Helper()
resp := postJSON(t, h.lobbyPublicURL+"/api/v1/lobby/games/"+gameID+"/open-enrollment", nil,
http.Header{"X-User-Id": []string{ownerUserID}})
require.Equalf(t, http.StatusOK, resp.StatusCode, "open enrollment: %s", resp.Body)
}
func (h *tripleHarness) userReadyToStart(t *testing.T, ownerUserID, gameID string) {
t.Helper()
resp := postJSON(t, h.lobbyPublicURL+"/api/v1/lobby/games/"+gameID+"/ready-to-start", nil,
http.Header{"X-User-Id": []string{ownerUserID}})
require.Equalf(t, http.StatusOK, resp.StatusCode, "ready-to-start: %s", resp.Body)
}
func (h *tripleHarness) userStartGame(t *testing.T, ownerUserID, gameID string) {
t.Helper()
resp := postJSON(t, h.lobbyPublicURL+"/api/v1/lobby/games/"+gameID+"/start", nil,
http.Header{"X-User-Id": []string{ownerUserID}})
require.Equalf(t, http.StatusOK, resp.StatusCode, "start game: %s", resp.Body)
}
func (h *tripleHarness) userCreateInvite(t *testing.T, ownerUserID, gameID, inviteeUserID string) {
t.Helper()
resp := postJSON(t, h.lobbyPublicURL+"/api/v1/lobby/games/"+gameID+"/invites",
map[string]any{"invitee_user_id": inviteeUserID},
http.Header{"X-User-Id": []string{ownerUserID}})
require.Equalf(t, http.StatusCreated, resp.StatusCode, "create invite: %s", resp.Body)
}
func (h *tripleHarness) firstCreatedInviteID(t *testing.T, inviteeUserID, gameID string) string {
t.Helper()
req, err := http.NewRequest(http.MethodGet,
h.lobbyPublicURL+"/api/v1/lobby/my/invites?status=created", nil)
require.NoError(t, err)
req.Header.Set("X-User-Id", inviteeUserID)
resp := doRequest(t, req)
require.Equalf(t, http.StatusOK, resp.StatusCode, "list my invites: %s", resp.Body)
var body struct {
Items []struct {
InviteID string `json:"invite_id"`
GameID string `json:"game_id"`
} `json:"items"`
}
require.NoError(t, json.Unmarshal([]byte(resp.Body), &body))
for _, item := range body.Items {
if item.GameID == gameID {
return item.InviteID
}
}
t.Fatalf("no invite for invitee %s on game %s", inviteeUserID, gameID)
return ""
}
func (h *tripleHarness) userRedeemInvite(t *testing.T, inviteeUserID, gameID, inviteID, raceName string) {
t.Helper()
resp := postJSON(t,
h.lobbyPublicURL+"/api/v1/lobby/games/"+gameID+"/invites/"+inviteID+"/redeem",
map[string]any{"race_name": raceName},
http.Header{"X-User-Id": []string{inviteeUserID}})
require.Equalf(t, http.StatusOK, resp.StatusCode, "redeem invite: %s", resp.Body)
}
// --- observation helpers ---
type jobResultEntry struct {
GameID string
Outcome string
ContainerID string
EngineEndpoint string
ErrorCode string
ErrorMessage string
}
func (h *tripleHarness) waitJobResult(t *testing.T, predicate func(jobResultEntry) bool, timeout time.Duration) jobResultEntry {
t.Helper()
deadline := time.Now().Add(timeout)
for {
entries, err := h.redis.XRange(context.Background(), h.jobResults, "-", "+").Result()
require.NoError(t, err)
for _, entry := range entries {
parsed := jobResultEntry{
GameID: readString(entry.Values, "game_id"),
Outcome: readString(entry.Values, "outcome"),
ContainerID: readString(entry.Values, "container_id"),
EngineEndpoint: readString(entry.Values, "engine_endpoint"),
ErrorCode: readString(entry.Values, "error_code"),
ErrorMessage: readString(entry.Values, "error_message"),
}
if predicate(parsed) {
return parsed
}
}
if time.Now().After(deadline) {
t.Fatalf("matching job_result not observed within %s", timeout)
}
time.Sleep(50 * time.Millisecond)
}
}
type notificationIntentEntry struct {
RedisEntryID string
NotificationType string
Producer string
AudienceKind string
PayloadGameID string
PayloadImageRef string
PayloadErrorCode string
}
func (h *tripleHarness) waitNotificationIntent(t *testing.T, predicate func(notificationIntentEntry) bool, timeout time.Duration) notificationIntentEntry {
t.Helper()
deadline := time.Now().Add(timeout)
for {
entries, err := h.redis.XRange(context.Background(), h.intentsStream, "-", "+").Result()
require.NoError(t, err)
for _, entry := range entries {
parsed := notificationIntentEntry{
RedisEntryID: entry.ID,
NotificationType: readString(entry.Values, "notification_type"),
Producer: readString(entry.Values, "producer"),
AudienceKind: readString(entry.Values, "audience_kind"),
}
if payload := readString(entry.Values, "payload_json"); payload != "" {
var data struct {
GameID string `json:"game_id"`
ImageRef string `json:"image_ref"`
ErrorCode string `json:"error_code"`
}
if err := json.Unmarshal([]byte(payload), &data); err == nil {
parsed.PayloadGameID = data.GameID
parsed.PayloadImageRef = data.ImageRef
parsed.PayloadErrorCode = data.ErrorCode
}
}
if predicate(parsed) {
return parsed
}
}
if time.Now().After(deadline) {
t.Fatalf("matching notification intent not observed within %s", timeout)
}
time.Sleep(50 * time.Millisecond)
}
}
type mailDeliverySummary struct {
DeliveryID string `json:"delivery_id"`
Source string `json:"source"`
PayloadMode string `json:"payload_mode"`
TemplateID string `json:"template_id"`
Locale string `json:"locale"`
To []string `json:"to"`
Status string `json:"status"`
}
func (h *tripleHarness) eventuallyDelivery(t *testing.T, query url.Values) mailDeliverySummary {
t.Helper()
deadline := time.Now().Add(60 * time.Second)
for {
listURL := h.mailBaseURL + mailDeliveriesPath + "?" + query.Encode()
req, err := http.NewRequest(http.MethodGet, listURL, nil)
require.NoError(t, err)
resp := doRequest(t, req)
if resp.StatusCode == http.StatusOK {
var body struct {
Items []mailDeliverySummary `json:"items"`
}
if json.Unmarshal([]byte(resp.Body), &body) == nil && len(body.Items) > 0 {
return body.Items[0]
}
}
if time.Now().After(deadline) {
t.Fatalf("mail delivery not observed within 60s for query %v", query)
}
time.Sleep(50 * time.Millisecond)
}
}
func (h *tripleHarness) waitGameStatus(t *testing.T, gameID, want string, timeout time.Duration) {
t.Helper()
deadline := time.Now().Add(timeout)
for {
req, err := http.NewRequest(http.MethodGet, h.lobbyAdminURL+"/api/v1/lobby/games/"+gameID, nil)
require.NoError(t, err)
resp := doRequest(t, req)
if resp.StatusCode == http.StatusOK {
var record struct {
Status string `json:"status"`
}
if json.Unmarshal([]byte(resp.Body), &record) == nil && record.Status == want {
return
}
}
if time.Now().After(deadline) {
t.Fatalf("game %s did not reach status %q within %s", gameID, want, timeout)
}
time.Sleep(100 * time.Millisecond)
}
}
// --- shared helpers ---
func readString(values map[string]any, key string) string {
v, _ := values[key].(string)
return strings.TrimSpace(v)
}
type httpResponse struct {
StatusCode int
Body string
Header http.Header
}
func postJSON(t *testing.T, url string, body any, header http.Header) httpResponse {
t.Helper()
var reader io.Reader
if body != nil {
payload, err := json.Marshal(body)
require.NoError(t, err)
reader = bytes.NewReader(payload)
}
req, err := http.NewRequest(http.MethodPost, url, reader)
require.NoError(t, err)
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
for key, vs := range header {
for _, v := range vs {
req.Header.Add(key, v)
}
}
return doRequest(t, req)
}
func doRequest(t *testing.T, request *http.Request) httpResponse {
t.Helper()
client := &http.Client{
Timeout: 5 * time.Second,
Transport: &http.Transport{DisableKeepAlives: true},
}
t.Cleanup(client.CloseIdleConnections)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
payload, err := io.ReadAll(response.Body)
require.NoError(t, err)
return httpResponse{
StatusCode: response.StatusCode,
Body: string(payload),
Header: response.Header.Clone(),
}
}
func requireJSONStatus(t *testing.T, response httpResponse, want int, target any) {
t.Helper()
require.Equalf(t, want, response.StatusCode, "response: %s", response.Body)
require.NoError(t, decodeStrictJSON([]byte(response.Body), target))
}
func decodeStrictJSON(payload []byte, target any) error {
decoder := json.NewDecoder(bytes.NewReader(payload))
decoder.DisallowUnknownFields()
if err := decoder.Decode(target); err != nil {
return err
}
if err := decoder.Decode(&struct{}{}); err != io.EOF {
if err == nil {
return errors.New("unexpected trailing JSON input")
}
return err
}
return nil
}
func waitForUserServiceReady(t *testing.T, process *harness.Process, baseURL string) {
t.Helper()
client := &http.Client{Timeout: 250 * time.Millisecond}
t.Cleanup(client.CloseIdleConnections)
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
req, err := http.NewRequest(http.MethodGet, baseURL+"/api/v1/internal/users/user-readiness-probe/exists", nil)
require.NoError(t, err)
response, err := client.Do(req)
if err == nil {
_, _ = io.Copy(io.Discard, response.Body)
response.Body.Close()
if response.StatusCode == http.StatusOK {
return
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("wait for userservice readiness: timeout\n%s", process.Logs())
}
func waitForMailReady(t *testing.T, process *harness.Process, baseURL string) {
t.Helper()
client := &http.Client{Timeout: 250 * time.Millisecond}
t.Cleanup(client.CloseIdleConnections)
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
req, err := http.NewRequest(http.MethodGet, baseURL+mailDeliveriesPath, nil)
require.NoError(t, err)
response, err := client.Do(req)
if err == nil {
_, _ = io.Copy(io.Discard, response.Body)
response.Body.Close()
if response.StatusCode == http.StatusOK {
return
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("wait for mail readiness: timeout\n%s", process.Logs())
}
func mailTemplateDir(t *testing.T) string {
t.Helper()
return filepath.Join(repositoryRoot(t), "mail", "templates")
}
func repositoryRoot(t *testing.T) string {
t.Helper()
_, file, _, ok := runtime.Caller(0)
if !ok {
t.Fatal("resolve repository root: runtime caller is unavailable")
}
return filepath.Clean(filepath.Join(filepath.Dir(file), "..", ".."))
}
// resolveDockerHost honours DOCKER_HOST when the developer machine
// routes through colima or a remote daemon, fall back to the standard
// unix path otherwise.
func resolveDockerHost() string {
if host := strings.TrimSpace(os.Getenv("DOCKER_HOST")); host != "" {
return host
}
return "unix:///var/run/docker.sock"
}
+367
View File
@@ -0,0 +1,367 @@
// Package mailsmoke_test exercises the real SMTP adapter of Mail
// Service against a real SMTP receiver running in a testcontainer.
// The suite is the small dedicated smoke suite called out in
// `TESTING.md §4` ("Add only a small dedicated smoke suite for the
// real mail adapter").
//
// The boundary contract under test is: a delivery accepted on Mail's
// internal HTTP surface in `smtp` mode is actually transmitted over
// SMTP to the configured upstream and is observable on the
// receiver's inspection API. No other Galaxy service is booted; the
// test is intentionally narrow.
package mailsmoke_test
import (
"bytes"
"context"
"crypto/rand"
"crypto/rsa"
"crypto/x509"
"crypto/x509/pkix"
"encoding/json"
"encoding/pem"
"errors"
"fmt"
"io"
"math/big"
"net"
"net/http"
"path/filepath"
"runtime"
"strconv"
"sync/atomic"
"testing"
"time"
"galaxy/integration/internal/harness"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
testcontainers "github.com/testcontainers/testcontainers-go"
"github.com/testcontainers/testcontainers-go/wait"
)
const (
mailpitImage = "axllent/mailpit:latest"
mailpitSMTPPort = "1025/tcp"
mailpitAPIPort = "8025/tcp"
mailDeliveryPath = "/api/v1/internal/deliveries"
commandSource = "mailsmoke"
commandTemplate = "auth.login_code"
smokeRecipient = "smoke-recipient@example.com"
smokeFromEmail = "noreply@galaxy.example.com"
)
var smokeSeq atomic.Int64
// TestMailServiceDeliversToRealSMTPProvider drives Mail Service in
// `smtp` mode at a real Mailpit testcontainer. The service must
// transmit the configured payload over SMTP and the receiver must
// register it as a stored message visible on its HTTP inspection API.
func TestMailServiceDeliversToRealSMTPProvider(t *testing.T) {
mailpit := startMailpitContainer(t)
mailService := startMailServiceWithSMTP(t, mailpit.SMTPEndpoint())
suffix := strconv.FormatInt(smokeSeq.Add(1), 10)
idempotencyKey := "mailsmoke:" + suffix
uniqueRecipient := "smoke-" + suffix + "-" + smokeRecipient
// Mail Service has a synchronous trusted REST surface for the
// auth login-code path (`/api/v1/internal/login-code-deliveries`).
// It accepts the request, renders the template, and drives the
// configured SMTP provider — exactly what the smoke suite needs
// to verify against the real Mailpit container.
loginCodeBody := map[string]any{
"email": uniqueRecipient,
"code": "123456",
"locale": "en",
}
bodyBytes, err := json.Marshal(loginCodeBody)
require.NoError(t, err)
req, err := http.NewRequest(http.MethodPost,
mailService.BaseURL+"/api/v1/internal/login-code-deliveries",
bytes.NewReader(bodyBytes),
)
require.NoError(t, err)
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Idempotency-Key", idempotencyKey)
resp := doRequest(t, req)
require.Equalf(t,
http.StatusOK,
resp.StatusCode,
"submit login-code delivery: %s", resp.Body,
)
// Mailpit exposes received messages at /api/v1/messages with a
// JSON envelope containing `messages_count` plus per-message
// items. Wait until our envelope shows up.
waitForMailpitMessage(t, mailpit.APIBaseURL(), uniqueRecipient, 30*time.Second)
}
// --- mailpit container ---
type mailpitContainer struct {
container testcontainers.Container
smtpHost string
smtpPort string
apiHost string
apiPort string
}
func (m *mailpitContainer) SMTPEndpoint() string {
return m.smtpHost + ":" + m.smtpPort
}
func (m *mailpitContainer) APIBaseURL() string {
return "http://" + m.apiHost + ":" + m.apiPort
}
func startMailpitContainer(t *testing.T) *mailpitContainer {
t.Helper()
// Mail Service hardcodes `gomail.TLSMandatory`; the smoke suite
// must give Mailpit a usable cert+key so STARTTLS succeeds even
// against a self-signed server. The cert is short-lived and is
// regenerated per test run.
certPEM, keyPEM := generateSelfSignedCert(t, "mailpit-smoke")
ctx := context.Background()
req := testcontainers.ContainerRequest{
Image: mailpitImage,
ExposedPorts: []string{
mailpitSMTPPort,
mailpitAPIPort,
},
Env: map[string]string{
"MP_SMTP_TLS_CERT": "/etc/mailpit/cert.pem",
"MP_SMTP_TLS_KEY": "/etc/mailpit/key.pem",
},
Files: []testcontainers.ContainerFile{
{
Reader: bytes.NewReader(certPEM),
ContainerFilePath: "/etc/mailpit/cert.pem",
FileMode: 0o644,
},
{
Reader: bytes.NewReader(keyPEM),
ContainerFilePath: "/etc/mailpit/key.pem",
FileMode: 0o600,
},
},
WaitingFor: wait.ForLog("accessible via").
WithStartupTimeout(30 * time.Second),
}
container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
ContainerRequest: req,
Started: true,
})
require.NoError(t, err)
t.Cleanup(func() {
if err := testcontainers.TerminateContainer(container); err != nil {
t.Errorf("terminate mailpit container: %v", err)
}
})
smtpHost, err := container.Host(ctx)
require.NoError(t, err)
smtpPort, err := container.MappedPort(ctx, mailpitSMTPPort)
require.NoError(t, err)
apiPort, err := container.MappedPort(ctx, mailpitAPIPort)
require.NoError(t, err)
return &mailpitContainer{
container: container,
smtpHost: smtpHost,
smtpPort: smtpPort.Port(),
apiHost: smtpHost,
apiPort: apiPort.Port(),
}
}
func waitForMailpitMessage(t *testing.T, apiBaseURL, recipient string, timeout time.Duration) {
t.Helper()
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
req, err := http.NewRequest(http.MethodGet, apiBaseURL+"/api/v1/messages", nil)
require.NoError(t, err)
resp := doRequest(t, req)
if resp.StatusCode == http.StatusOK {
var body struct {
Messages []struct {
To []struct {
Address string `json:"Address"`
} `json:"To"`
Subject string `json:"Subject"`
} `json:"messages"`
}
if json.Unmarshal([]byte(resp.Body), &body) == nil {
for _, m := range body.Messages {
for _, addr := range m.To {
if addr.Address == recipient {
return
}
}
}
}
}
time.Sleep(100 * time.Millisecond)
}
t.Fatalf("mailpit did not register a message for %s within %s", recipient, timeout)
}
// --- mail service in real-SMTP mode ---
type mailService struct {
BaseURL string
}
func startMailServiceWithSMTP(t *testing.T, smtpAddr string) mailService {
t.Helper()
redisRuntime := harness.StartRedisContainer(t)
mailInternalAddr := harness.FreeTCPAddress(t)
mailBinary := harness.BuildBinary(t, "mail", "./mail/cmd/mail")
mailEnv := harness.StartMailServicePersistence(t, redisRuntime.Addr).Env
mailEnv["MAIL_LOG_LEVEL"] = "info"
mailEnv["MAIL_INTERNAL_HTTP_ADDR"] = mailInternalAddr
mailEnv["MAIL_TEMPLATE_DIR"] = mailTemplateDir(t)
mailEnv["MAIL_SMTP_MODE"] = "smtp"
mailEnv["MAIL_SMTP_ADDR"] = smtpAddr
mailEnv["MAIL_SMTP_FROM_EMAIL"] = smokeFromEmail
mailEnv["MAIL_SMTP_FROM_NAME"] = "Galaxy Mail Smoke"
mailEnv["MAIL_SMTP_TIMEOUT"] = "10s"
mailEnv["MAIL_SMTP_INSECURE_SKIP_VERIFY"] = "true"
mailEnv["MAIL_STREAM_BLOCK_TIMEOUT"] = "100ms"
mailEnv["MAIL_OPERATOR_REQUEST_TIMEOUT"] = "5s"
mailEnv["MAIL_SHUTDOWN_TIMEOUT"] = "2s"
mailEnv["OTEL_TRACES_EXPORTER"] = "none"
mailEnv["OTEL_METRICS_EXPORTER"] = "none"
mailProcess := harness.StartProcess(t, "mail", mailBinary, mailEnv)
waitForMailReady(t, mailProcess, "http://"+mailInternalAddr)
return mailService{BaseURL: "http://" + mailInternalAddr}
}
// --- shared helpers ---
func waitForMailReady(t *testing.T, process *harness.Process, baseURL string) {
t.Helper()
client := &http.Client{Timeout: 250 * time.Millisecond}
t.Cleanup(client.CloseIdleConnections)
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
req, err := http.NewRequest(http.MethodGet, baseURL+mailDeliveryPath, nil)
require.NoError(t, err)
response, err := client.Do(req)
if err == nil {
_, _ = io.Copy(io.Discard, response.Body)
response.Body.Close()
if response.StatusCode == http.StatusOK {
return
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("wait for mail readiness: timeout\n%s", process.Logs())
}
type httpResponse struct {
StatusCode int
Body string
Header http.Header
}
func postJSON(t *testing.T, url string, body any) httpResponse {
t.Helper()
payload, err := json.Marshal(body)
require.NoError(t, err)
req, err := http.NewRequest(http.MethodPost, url, bytes.NewReader(payload))
require.NoError(t, err)
req.Header.Set("Content-Type", "application/json")
return doRequest(t, req)
}
func doRequest(t *testing.T, request *http.Request) httpResponse {
t.Helper()
client := &http.Client{
Timeout: 5 * time.Second,
Transport: &http.Transport{DisableKeepAlives: true},
}
t.Cleanup(client.CloseIdleConnections)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
payload, err := io.ReadAll(response.Body)
require.NoError(t, err)
return httpResponse{
StatusCode: response.StatusCode,
Body: string(payload),
Header: response.Header.Clone(),
}
}
// generateSelfSignedCert produces a short-lived RSA cert + key for the
// Mailpit container so STARTTLS succeeds against
// `MAIL_SMTP_INSECURE_SKIP_VERIFY=true` clients.
func generateSelfSignedCert(t *testing.T, commonName string) ([]byte, []byte) {
t.Helper()
priv, err := rsa.GenerateKey(rand.Reader, 2048)
require.NoError(t, err)
serial, err := rand.Int(rand.Reader, big.NewInt(1<<62))
require.NoError(t, err)
template := x509.Certificate{
SerialNumber: serial,
Subject: pkix.Name{CommonName: commonName},
NotBefore: time.Now().Add(-time.Hour),
NotAfter: time.Now().Add(24 * time.Hour),
KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment | x509.KeyUsageCertSign,
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
BasicConstraintsValid: true,
IsCA: true,
IPAddresses: []net.IP{net.ParseIP("127.0.0.1")},
DNSNames: []string{"localhost", commonName},
}
certDER, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv)
require.NoError(t, err)
certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certDER})
keyPEM := pem.EncodeToMemory(&pem.Block{
Type: "RSA PRIVATE KEY",
Bytes: x509.MarshalPKCS1PrivateKey(priv),
})
return certPEM, keyPEM
}
func mailTemplateDir(t *testing.T) string {
t.Helper()
return filepath.Join(repositoryRoot(t), "mail", "templates")
}
func repositoryRoot(t *testing.T) string {
t.Helper()
_, file, _, ok := runtime.Caller(0)
if !ok {
t.Fatal("resolve repository root: runtime caller is unavailable")
}
return filepath.Clean(filepath.Join(filepath.Dir(file), "..", ".."))
}
// silence unused-import noise for symbols touched only via reflection /
// conditional compilation.
var _ = fmt.Sprintf
var _ = errors.New
var _ = assert.Equal
@@ -0,0 +1,602 @@
// Package rtmanagernotification_test exercises the Runtime Manager →
// Notification Service boundary against real RTM + real Notification +
// real Mail Service + real User Service running on testcontainers
// PostgreSQL and Redis, with a real Docker daemon for RTM's readiness
// pings.
//
// The boundary contract under test is: when a start job points at an
// unresolvable image, RTM publishes one `runtime.image_pull_failed`
// admin-only notification intent on `notification:intents`; the
// Notification Service consumes the intent, resolves the admin email
// recipient list from configuration, and hands the delivery to Mail
// Service in template-mode. The suite asserts the wire shape on
// `notification:intents` and the resulting Mail delivery record.
//
// Game Master is not booted: RTM emits the intent itself; Notification
// resolves the audience from `NOTIFICATION_ADMIN_EMAILS_*`; the
// scenario needs no user-targeted resolution.
package rtmanagernotification_test
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync/atomic"
"testing"
"time"
"galaxy/integration/internal/harness"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const (
intentsStreamPrefix = "notification:intents"
startJobsStreamPrefix = "runtime:start_jobs"
stopJobsStreamPrefix = "runtime:stop_jobs"
jobResultsStreamPrefix = "runtime:job_results"
healthEventsStreamPrefix = "runtime:health_events"
mailDeliveriesPath = "/api/v1/internal/deliveries"
notificationTypeImagePull = "runtime.image_pull_failed"
notificationTypeStartFailed = "runtime.container_start_failed"
notificationTypeConfigInval = "runtime.start_config_invalid"
expectedAdminEmailRecipient = "rtm-admin@example.com"
expectedRTMProducer = "runtime_manager"
missingImageRef = "galaxy/integration-missing:0.0.0"
)
var suiteSeq atomic.Int64
// TestRTMImagePullFailureFlowsThroughNotificationToMail drives Runtime
// Manager with a start envelope pointing at an unresolvable image
// reference, then asserts:
//
// 1. RTM publishes one `runtime.image_pull_failed` intent on
// `notification:intents` with the frozen admin payload.
// 2. The Notification Service consumes it and fans out the matching
// mail delivery to the configured admin recipient.
// 3. Mail Service records the delivery with the right template id,
// idempotency key, and template variables.
//
// The path covers the full producer → orchestrator → transport
// pipeline that `TESTING.md §7` requests as the
// `Runtime Manager ↔ Notification` boundary suite.
func TestRTMImagePullFailureFlowsThroughNotificationToMail(t *testing.T) {
h := newRTMNotificationHarness(t)
gameID := uniqueGameID(t)
h.publishStartJob(t, gameID, missingImageRef)
// Step 1 — RTM publishes the admin notification intent.
intent := h.waitForIntent(t,
notificationTypeImagePull,
gameID,
30*time.Second,
)
assert.Equal(t, expectedRTMProducer, intent.Producer)
assert.Equal(t, "admin_email", intent.AudienceKind)
assert.Equal(t, gameID, intent.PayloadGameID)
assert.Equal(t, missingImageRef, intent.PayloadImageRef)
assert.Equal(t, "image_pull_failed", intent.PayloadErrorCode)
assert.NotEmpty(t, intent.PayloadErrorMessage,
"intent payload must carry operator-readable detail")
assert.NotZero(t, intent.PayloadAttemptedAtMS)
// Step 2 — Notification routes to Mail; Mail sends the delivery.
idempotencyKey := "notification:" + intent.RedisEntryID +
"/email:email:" + expectedAdminEmailRecipient
delivery := h.eventuallyDelivery(t, url.Values{
"source": []string{"notification"},
"status": []string{"sent"},
"recipient": []string{expectedAdminEmailRecipient},
"template_id": []string{notificationTypeImagePull},
"idempotency_key": []string{idempotencyKey},
})
assert.Equal(t, "template", delivery.PayloadMode)
assert.Equal(t, notificationTypeImagePull, delivery.TemplateID)
assert.Equal(t, []string{expectedAdminEmailRecipient}, delivery.To)
detail := h.getDelivery(t, delivery.DeliveryID)
assert.Equal(t, "notification", detail.Source)
assert.Equal(t, "template", detail.PayloadMode)
assert.Equal(t, notificationTypeImagePull, detail.TemplateID)
assert.Equal(t, idempotencyKey, detail.IdempotencyKey)
assert.Equal(t, []string{expectedAdminEmailRecipient}, detail.To)
require.NotNil(t, detail.TemplateVariables,
"mail delivery must record template variables for admin triage")
assert.Equal(t, gameID, detail.TemplateVariables["game_id"])
assert.Equal(t, missingImageRef, detail.TemplateVariables["image_ref"])
assert.Equal(t, "image_pull_failed", detail.TemplateVariables["error_code"])
}
// rtmNotificationHarness owns the per-test infrastructure: shared
// Redis, four real binaries (RTM, Notification, Mail, User), and the
// per-test Docker network RTM's `/readyz` insists on. One harness per
// test keeps each scenario fully isolated.
type rtmNotificationHarness struct {
redis *redis.Client
rtmInternalURL string
mailBaseURL string
intentsStream string
startJobsStream string
stopJobsStream string
jobResultsStream string
healthEvents string
rtmProcess *harness.Process
notificationProcess *harness.Process
mailProcess *harness.Process
userServiceProcess *harness.Process
}
func newRTMNotificationHarness(t *testing.T) *rtmNotificationHarness {
t.Helper()
// `/readyz` of RTM pings the Docker daemon; skip the suite if no
// Docker socket is reachable.
harness.RequireDockerDaemon(t)
redisRuntime := harness.StartRedisContainer(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisRuntime.Addr,
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
require.NoError(t, redisClient.Close())
})
dockerNetwork := harness.EnsureDockerNetwork(t)
userServiceAddr := harness.FreeTCPAddress(t)
mailInternalAddr := harness.FreeTCPAddress(t)
notificationInternalAddr := harness.FreeTCPAddress(t)
rtmInternalAddr := harness.FreeTCPAddress(t)
userServiceBinary := harness.BuildBinary(t, "userservice", "./user/cmd/userservice")
mailBinary := harness.BuildBinary(t, "mail", "./mail/cmd/mail")
notificationBinary := harness.BuildBinary(t, "notification", "./notification/cmd/notification")
rtmBinary := harness.BuildBinary(t, "rtmanager", "./rtmanager/cmd/rtmanager")
// User Service: needed by Notification's port even though every
// intent in this suite is admin-only.
userServiceEnv := harness.StartUserServicePersistence(t, redisRuntime.Addr).Env
userServiceEnv["USERSERVICE_LOG_LEVEL"] = "info"
userServiceEnv["USERSERVICE_INTERNAL_HTTP_ADDR"] = userServiceAddr
userServiceEnv["OTEL_TRACES_EXPORTER"] = "none"
userServiceEnv["OTEL_METRICS_EXPORTER"] = "none"
userServiceProcess := harness.StartProcess(t, "userservice", userServiceBinary, userServiceEnv)
waitForUserServiceReady(t, userServiceProcess, "http://"+userServiceAddr)
// Per-test stream prefixes.
suffix := strconv.FormatInt(suiteSeq.Add(1), 10)
intentsStream := intentsStreamPrefix + ":" + suffix
startJobsStream := startJobsStreamPrefix + ":" + suffix
stopJobsStream := stopJobsStreamPrefix + ":" + suffix
jobResultsStream := jobResultsStreamPrefix + ":" + suffix
healthEvents := healthEventsStreamPrefix + ":" + suffix
// Mail Service.
mailEnv := harness.StartMailServicePersistence(t, redisRuntime.Addr).Env
mailEnv["MAIL_LOG_LEVEL"] = "info"
mailEnv["MAIL_INTERNAL_HTTP_ADDR"] = mailInternalAddr
mailEnv["MAIL_TEMPLATE_DIR"] = mailTemplateDir(t)
mailEnv["MAIL_SMTP_MODE"] = "stub"
mailEnv["MAIL_STREAM_BLOCK_TIMEOUT"] = "100ms"
mailEnv["MAIL_OPERATOR_REQUEST_TIMEOUT"] = time.Second.String()
mailEnv["MAIL_SHUTDOWN_TIMEOUT"] = "2s"
mailEnv["OTEL_TRACES_EXPORTER"] = "none"
mailEnv["OTEL_METRICS_EXPORTER"] = "none"
mailProcess := harness.StartProcess(t, "mail", mailBinary, mailEnv)
waitForMailReady(t, mailProcess, "http://"+mailInternalAddr)
// Notification Service. Admin-email envs route every runtime.*
// intent to a shared rtm-admin recipient.
notificationEnv := harness.StartNotificationServicePersistence(t, redisRuntime.Addr).Env
notificationEnv["NOTIFICATION_LOG_LEVEL"] = "info"
notificationEnv["NOTIFICATION_INTERNAL_HTTP_ADDR"] = notificationInternalAddr
notificationEnv["NOTIFICATION_USER_SERVICE_BASE_URL"] = "http://" + userServiceAddr
notificationEnv["NOTIFICATION_USER_SERVICE_TIMEOUT"] = time.Second.String()
notificationEnv["NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT"] = "100ms"
notificationEnv["NOTIFICATION_ROUTE_BACKOFF_MIN"] = "100ms"
notificationEnv["NOTIFICATION_ROUTE_BACKOFF_MAX"] = "100ms"
notificationEnv["NOTIFICATION_INTENTS_STREAM"] = intentsStream
notificationEnv["NOTIFICATION_ADMIN_EMAILS_RUNTIME_IMAGE_PULL_FAILED"] = expectedAdminEmailRecipient
notificationEnv["NOTIFICATION_ADMIN_EMAILS_RUNTIME_CONTAINER_START_FAILED"] = expectedAdminEmailRecipient
notificationEnv["NOTIFICATION_ADMIN_EMAILS_RUNTIME_START_CONFIG_INVALID"] = expectedAdminEmailRecipient
notificationEnv["OTEL_TRACES_EXPORTER"] = "none"
notificationEnv["OTEL_METRICS_EXPORTER"] = "none"
notificationProcess := harness.StartProcess(t, "notification", notificationBinary, notificationEnv)
harness.WaitForHTTPStatus(t, notificationProcess,
"http://"+notificationInternalAddr+"/readyz", http.StatusOK)
// Runtime Manager. Lobby base URL points at notification's
// ready-probe path so RTM's start-service ancillary GetGame call
// resolves to a valid 200/404 surface even though no Lobby is
// running. The start service treats the response as best-effort
// and never aborts on an unparseable body.
rtmEnv := harness.StartRTManagerServicePersistence(t, redisRuntime.Addr).Env
rtmEnv["RTMANAGER_LOG_LEVEL"] = "info"
rtmEnv["RTMANAGER_INTERNAL_HTTP_ADDR"] = rtmInternalAddr
rtmEnv["RTMANAGER_LOBBY_INTERNAL_BASE_URL"] = "http://127.0.0.1:1"
rtmEnv["RTMANAGER_LOBBY_INTERNAL_TIMEOUT"] = "200ms"
rtmEnv["RTMANAGER_DOCKER_HOST"] = resolveDockerHost()
rtmEnv["RTMANAGER_DOCKER_NETWORK"] = dockerNetwork
rtmEnv["RTMANAGER_GAME_STATE_ROOT"] = t.TempDir()
rtmEnv["RTMANAGER_REDIS_START_JOBS_STREAM"] = startJobsStream
rtmEnv["RTMANAGER_REDIS_STOP_JOBS_STREAM"] = stopJobsStream
rtmEnv["RTMANAGER_REDIS_JOB_RESULTS_STREAM"] = jobResultsStream
rtmEnv["RTMANAGER_REDIS_HEALTH_EVENTS_STREAM"] = healthEvents
rtmEnv["RTMANAGER_NOTIFICATION_INTENTS_STREAM"] = intentsStream
rtmEnv["RTMANAGER_STREAM_BLOCK_TIMEOUT"] = "200ms"
rtmEnv["RTMANAGER_RECONCILE_INTERVAL"] = "5s"
rtmEnv["RTMANAGER_CLEANUP_INTERVAL"] = "5s"
rtmEnv["RTMANAGER_INSPECT_INTERVAL"] = "5s"
rtmEnv["RTMANAGER_PROBE_INTERVAL"] = "5s"
rtmEnv["RTMANAGER_PROBE_TIMEOUT"] = "1s"
rtmEnv["RTMANAGER_PROBE_FAILURES_THRESHOLD"] = "3"
rtmEnv["RTMANAGER_GAME_LEASE_TTL_SECONDS"] = "30"
rtmEnv["RTMANAGER_IMAGE_PULL_POLICY"] = "if_missing"
rtmEnv["OTEL_TRACES_EXPORTER"] = "none"
rtmEnv["OTEL_METRICS_EXPORTER"] = "none"
rtmProcess := harness.StartProcess(t, "rtmanager", rtmBinary, rtmEnv)
harness.WaitForHTTPStatus(t, rtmProcess,
"http://"+rtmInternalAddr+"/readyz", http.StatusOK)
return &rtmNotificationHarness{
redis: redisClient,
rtmInternalURL: "http://" + rtmInternalAddr,
mailBaseURL: "http://" + mailInternalAddr,
intentsStream: intentsStream,
startJobsStream: startJobsStream,
stopJobsStream: stopJobsStream,
jobResultsStream: jobResultsStream,
healthEvents: healthEvents,
rtmProcess: rtmProcess,
notificationProcess: notificationProcess,
mailProcess: mailProcess,
userServiceProcess: userServiceProcess,
}
}
func (h *rtmNotificationHarness) publishStartJob(t *testing.T, gameID, imageRef string) {
t.Helper()
_, err := h.redis.XAdd(context.Background(), &redis.XAddArgs{
Stream: h.startJobsStream,
Values: map[string]any{
"game_id": gameID,
"image_ref": imageRef,
"requested_at_ms": strconv.FormatInt(time.Now().UnixMilli(), 10),
},
}).Result()
require.NoError(t, err)
}
// observedIntent stores the decoded fields of one notification intent
// entry that the suite cares about.
type observedIntent struct {
RedisEntryID string
NotificationType string
Producer string
AudienceKind string
PayloadGameID string
PayloadImageRef string
PayloadErrorCode string
PayloadErrorMessage string
PayloadAttemptedAtMS int64
}
func (h *rtmNotificationHarness) waitForIntent(
t *testing.T,
notificationType, gameID string,
timeout time.Duration,
) observedIntent {
t.Helper()
deadline := time.Now().Add(timeout)
for {
entries, err := h.redis.XRange(context.Background(), h.intentsStream, "-", "+").Result()
require.NoError(t, err)
for _, entry := range entries {
intent, ok := decodeIntent(entry)
if !ok {
continue
}
if intent.NotificationType != notificationType {
continue
}
if intent.PayloadGameID != gameID {
continue
}
return intent
}
if time.Now().After(deadline) {
t.Fatalf("intent %s for game %s not observed on stream %s within %s\n%s",
notificationType, gameID, h.intentsStream, timeout, h.rtmProcess.Logs())
}
time.Sleep(50 * time.Millisecond)
}
}
func decodeIntent(entry redis.XMessage) (observedIntent, bool) {
notificationType, _ := entry.Values["notification_type"].(string)
producer, _ := entry.Values["producer"].(string)
audienceKind, _ := entry.Values["audience_kind"].(string)
payloadJSON, _ := entry.Values["payload_json"].(string)
if notificationType == "" {
return observedIntent{}, false
}
out := observedIntent{
RedisEntryID: entry.ID,
NotificationType: notificationType,
Producer: producer,
AudienceKind: audienceKind,
}
if payloadJSON == "" {
return out, true
}
var payload struct {
GameID string `json:"game_id"`
ImageRef string `json:"image_ref"`
ErrorCode string `json:"error_code"`
ErrorMessage string `json:"error_message"`
AttemptedAtMS int64 `json:"attempted_at_ms"`
}
if err := json.Unmarshal([]byte(payloadJSON), &payload); err == nil {
out.PayloadGameID = payload.GameID
out.PayloadImageRef = payload.ImageRef
out.PayloadErrorCode = payload.ErrorCode
out.PayloadErrorMessage = payload.ErrorMessage
out.PayloadAttemptedAtMS = payload.AttemptedAtMS
}
return out, true
}
// mailDeliverySummary mirrors the public list-deliveries response of
// Mail Service.
type mailDeliverySummary struct {
DeliveryID string `json:"delivery_id"`
Source string `json:"source"`
PayloadMode string `json:"payload_mode"`
TemplateID string `json:"template_id"`
Locale string `json:"locale"`
To []string `json:"to"`
Status string `json:"status"`
}
type mailDeliveryDetail struct {
DeliveryID string `json:"delivery_id"`
Source string `json:"source"`
PayloadMode string `json:"payload_mode"`
TemplateID string `json:"template_id"`
Locale string `json:"locale"`
To []string `json:"to"`
IdempotencyKey string `json:"idempotency_key"`
Status string `json:"status"`
TemplateVariables map[string]any `json:"template_variables,omitempty"`
}
func (h *rtmNotificationHarness) eventuallyDelivery(
t *testing.T,
query url.Values,
) mailDeliverySummary {
t.Helper()
deadline := time.Now().Add(30 * time.Second)
for {
summary, found := h.findDelivery(t, query)
if found {
return summary
}
if time.Now().After(deadline) {
t.Fatalf("mail delivery for query %v not observed within 30s\n%s",
query, h.notificationProcess.Logs())
}
time.Sleep(50 * time.Millisecond)
}
}
func (h *rtmNotificationHarness) findDelivery(
t *testing.T,
query url.Values,
) (mailDeliverySummary, bool) {
t.Helper()
listURL := h.mailBaseURL + mailDeliveriesPath + "?" + query.Encode()
req, err := http.NewRequest(http.MethodGet, listURL, nil)
require.NoError(t, err)
resp := doRequest(t, req)
if resp.StatusCode != http.StatusOK {
return mailDeliverySummary{}, false
}
var body struct {
Items []mailDeliverySummary `json:"items"`
}
if err := json.Unmarshal([]byte(resp.Body), &body); err != nil {
return mailDeliverySummary{}, false
}
if len(body.Items) == 0 {
return mailDeliverySummary{}, false
}
return body.Items[0], true
}
func (h *rtmNotificationHarness) getDelivery(t *testing.T, deliveryID string) mailDeliveryDetail {
t.Helper()
req, err := http.NewRequest(http.MethodGet, h.mailBaseURL+mailDeliveriesPath+"/"+url.PathEscape(deliveryID), nil)
require.NoError(t, err)
resp := doRequest(t, req)
require.Equalf(t, http.StatusOK, resp.StatusCode, "get delivery: %s", resp.Body)
// Mail's detail response carries many fields the suite does not
// assert on (cc, bcc, reply-to, attempt history, …). Use a
// lenient decoder so additive contract changes do not break this
// boundary test.
var detail mailDeliveryDetail
require.NoError(t, json.Unmarshal([]byte(resp.Body), &detail))
return detail
}
// --- shared helpers (mirror the conventions of integration/notificationmail) ---
type httpResponse struct {
StatusCode int
Body string
Header http.Header
}
func doRequest(t *testing.T, request *http.Request) httpResponse {
t.Helper()
client := &http.Client{
Timeout: 5 * time.Second,
Transport: &http.Transport{DisableKeepAlives: true},
}
t.Cleanup(client.CloseIdleConnections)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
payload, err := io.ReadAll(response.Body)
require.NoError(t, err)
return httpResponse{
StatusCode: response.StatusCode,
Body: string(payload),
Header: response.Header.Clone(),
}
}
func decodeStrictJSON(payload []byte, target any) error {
decoder := json.NewDecoder(bytes.NewReader(payload))
decoder.DisallowUnknownFields()
if err := decoder.Decode(target); err != nil {
return err
}
if err := decoder.Decode(&struct{}{}); err != io.EOF {
if err == nil {
return errors.New("unexpected trailing JSON input")
}
return err
}
return nil
}
func waitForUserServiceReady(t *testing.T, process *harness.Process, baseURL string) {
t.Helper()
client := &http.Client{Timeout: 250 * time.Millisecond}
t.Cleanup(client.CloseIdleConnections)
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
req, err := http.NewRequest(http.MethodGet,
baseURL+"/api/v1/internal/users/user-readiness-probe/exists", nil)
require.NoError(t, err)
response, err := client.Do(req)
if err == nil {
_, _ = io.Copy(io.Discard, response.Body)
response.Body.Close()
if response.StatusCode == http.StatusOK {
return
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("wait for userservice readiness: timeout\n%s", process.Logs())
}
func waitForMailReady(t *testing.T, process *harness.Process, baseURL string) {
t.Helper()
client := &http.Client{Timeout: 250 * time.Millisecond}
t.Cleanup(client.CloseIdleConnections)
deadline := time.Now().Add(10 * time.Second)
for time.Now().Before(deadline) {
req, err := http.NewRequest(http.MethodGet, baseURL+mailDeliveriesPath, nil)
require.NoError(t, err)
response, err := client.Do(req)
if err == nil {
_, _ = io.Copy(io.Discard, response.Body)
response.Body.Close()
if response.StatusCode == http.StatusOK {
return
}
}
time.Sleep(25 * time.Millisecond)
}
t.Fatalf("wait for mail readiness: timeout\n%s", process.Logs())
}
func mailTemplateDir(t *testing.T) string {
t.Helper()
return filepath.Join(repositoryRoot(t), "mail", "templates")
}
func repositoryRoot(t *testing.T) string {
t.Helper()
_, file, _, ok := runtime.Caller(0)
if !ok {
t.Fatal("resolve repository root: runtime caller is unavailable")
}
return filepath.Clean(filepath.Join(filepath.Dir(file), "..", ".."))
}
// uniqueGameID derives a deterministic, per-test, per-invocation game
// id usable as the `game_id` field on `runtime:start_jobs` entries
// without colliding when `-count` exceeds one.
func uniqueGameID(t *testing.T) string {
t.Helper()
return fmt.Sprintf("game-%s-%d", sanitiseGameName(t.Name()), time.Now().UnixNano())
}
func sanitiseGameName(name string) string {
allowed := func(r rune) rune {
switch {
case r >= 'a' && r <= 'z',
r >= 'A' && r <= 'Z',
r >= '0' && r <= '9':
return r
case r == '/' || r == '_' || r == '-':
return '-'
default:
return -1
}
}
out := make([]rune, 0, len(name))
for _, r := range name {
if mapped := allowed(r); mapped != -1 {
out = append(out, mapped)
}
}
return string(out)
}
// resolveDockerHost mirrors `rtmanager/integration/harness.runtime.go`:
// honour DOCKER_HOST when the developer machine routes through colima
// or a remote daemon, fall back to the standard unix path otherwise.
func resolveDockerHost() string {
if host := strings.TrimSpace(os.Getenv("DOCKER_HOST")); host != "" {
return host
}
return "unix:///var/run/docker.sock"
}