chore: sync testing plan with gateway

This commit is contained in:
IliaDenisov
2026-04-09 12:34:55 +02:00
parent c64c298d06
commit 9065b82fe2
5 changed files with 262 additions and 11 deletions
+65 -2
View File
@@ -126,13 +126,21 @@ The testing plan follows this service order:
* `GET /healthz` * `GET /healthz`
* `GET /readyz` * `GET /readyz`
* mounted public auth routes * mounted public auth routes
* wrong-method and not-found handling
* public route-class classification for auth, browser bootstrap, browser asset, and misc traffic
* isolation of browser/public-auth rate-limit buckets
* rejection of oversized public request bodies * rejection of oversized public request bodies
* `RemoteAddr`-based public IP derivation that ignores forwarded proxy headers
* public rate-limit behavior * public rate-limit behavior
* stable projection of upstream public auth errors * stable projection of upstream public auth errors
* sensitive-field redaction in public-auth logs
* public OpenAPI contract validation
* admin `/metrics` availability only on the private admin listener
* Authenticated gRPC envelope validation tests: * Authenticated gRPC envelope validation tests:
* missing required fields * missing required fields
* unsupported `protocol_version` * unsupported `protocol_version`
* parsed envelope attachment before delegate execution
* malformed `payload_hash` * malformed `payload_hash`
* mismatched `payload_hash` * mismatched `payload_hash`
* invalid signature * invalid signature
@@ -145,11 +153,15 @@ The testing plan follows this service order:
* cache hit * cache hit
* cache miss * cache miss
* malformed cached record * malformed cached record
* read-through local-cache warming after first fallback lookup
* local hit skips fallback lookup
* cache invalidation/update handling * cache invalidation/update handling
* Response signing tests: * Response signing tests:
* signed unary response generation * signed unary response generation
* unary response fails closed when the response signer is unavailable
* signed bootstrap push event generation * signed bootstrap push event generation
* bootstrap push fails closed when the response signer is unavailable
* signed stream event generation * signed stream event generation
* Routing tests: * Routing tests:
@@ -157,22 +169,35 @@ The testing plan follows this service order:
* downstream timeout mapping * downstream timeout mapping
* downstream availability mapping * downstream availability mapping
* authenticated internal command context construction * authenticated internal command context construction
* verified trace/span context propagation downstream
* graceful drain of in-flight unary requests on shutdown
* sensitive transport material redaction in authenticated logs
* Push tests: * Push tests:
* `SubscribeEvents` binds `user_id` and `device_session_id` * `SubscribeEvents` binds `user_id` and `device_session_id`
* bootstrap server-time event is emitted * bootstrap server-time event is emitted
* user-targeted events fan out to all matching user sessions
* session-targeted events reach only the addressed session
* stream queue overflow closes only the affected stream * stream queue overflow closes only the affected stream
* revoked session closes matching streams only * revoked session closes matching streams only
* revoked-session stream reopen is rejected
* active streams close with deterministic status on gateway shutdown
* Anti-abuse tests: * Anti-abuse tests:
* IP/session/user/message-class buckets * IP/session/user/message-class buckets
* interaction between rate limits and verification order * interaction between rate limits and verification order
* authenticated/public anti-abuse bucket isolation
* authenticated policy-hook input and reject mapping
* Redis adapter tests: * Redis adapter tests:
* session cache lookup * session cache lookup
* replay reservation * replay reservation
* client event stream consumption * client event stream consumption
* session event stream consumption * session event stream consumption
* subscriber start-from-tail semantics
* malformed-event drop/evict-and-continue behavior
* later-event-wins behavior for session snapshots
* subscriber shutdown interrupts blocking reads
### Inter-service integration tests at this stage ### Inter-service integration tests at this stage
@@ -180,7 +205,10 @@ The testing plan follows this service order:
* session cache compatibility * session cache compatibility
* replay reservation semantics * replay reservation semantics
* event stream consumption for push * session update warms local cache without repeated fallback lookups
* revoked snapshot invalidates authenticated requests without fallback lookup
* client-event stream consumption for push fan-out
* session-event stream consumption for revoke propagation and push teardown
* `Gateway <-> stub Auth adapter` * `Gateway <-> stub Auth adapter`
* public auth passthrough * public auth passthrough
@@ -194,7 +222,11 @@ The testing plan follows this service order:
* Authenticated request verification pipeline remains stable. * Authenticated request verification pipeline remains stable.
* Public auth routes remain mounted and deterministic. * Public auth routes remain mounted and deterministic.
* Public route classes and anti-abuse buckets remain isolated.
* Admin metrics stay off the public ingress.
* Push bootstrap event remains signed and schema-compatible. * Push bootstrap event remains signed and schema-compatible.
* Push revoke and shutdown close streams with stable status mapping.
* Gateway logs remain free of sensitive request/auth material.
--- ---
@@ -207,6 +239,9 @@ The testing plan follows this service order:
* challenge creation * challenge creation
* TTL expiration * TTL expiration
* resend throttling * resend throttling
* `delivery_throttled` challenge creation without `UserDirectory` or `MailSender` calls
* `delivery_suppressed` behavior for blocked subjects
* expiry grace-window transition from `challenge_expired` to `challenge_not_found`
* delivery state transitions * delivery state transitions
* invalid confirm attempt limits * invalid confirm attempt limits
* success-shaped `send-email-code` behavior * success-shaped `send-email-code` behavior
@@ -218,7 +253,9 @@ The testing plan follows this service order:
* existing user * existing user
* creatable user * creatable user
* short-window idempotent confirm retry * short-window idempotent confirm retry
* projection repair on repeated confirm after prior publish failure
* same challenge plus different public key failure * same challenge plus different public key failure
* confirm-race cleanup of superseded sessions
* session-limit exceeded * session-limit exceeded
* Session lifecycle tests: * Session lifecycle tests:
@@ -226,13 +263,14 @@ The testing plan follows this service order:
* revoke one session * revoke one session
* revoke all sessions * revoke all sessions
* block user/email and revoke implied sessions * block user/email and revoke implied sessions
* already-revoked and already-blocked idempotent results * `already_revoked`, `no_active_sessions`, and `already_blocked` acknowledgement semantics
* Projection tests: * Projection tests:
* source-of-truth session write * source-of-truth session write
* gateway KV snapshot write * gateway KV snapshot write
* gateway session stream event publish * gateway session stream event publish
* repeated publish idempotency * repeated publish idempotency
* stored session reread before publish to avoid stale active projection
* Public API tests: * Public API tests:
* JSON decoding and unknown field rejection * JSON decoding and unknown field rejection
@@ -269,6 +307,7 @@ The testing plan follows this service order:
* login creates session * login creates session
* session projection becomes visible to gateway * session projection becomes visible to gateway
* repeated confirm repairs a previously failed projection publish
* revoked session invalidates gateway authentication path * revoked session invalidates gateway authentication path
* revoked session closes gateway push stream * revoked session closes gateway push stream
* `Auth / Session <-> stub Mail` * `Auth / Session <-> stub Mail`
@@ -282,6 +321,8 @@ The testing plan follows this service order:
* `confirm-email-code` always returns a ready `device_session_id`. * `confirm-email-code` always returns a ready `device_session_id`.
* Gateway continues authenticating from cache rather than synchronous auth lookups. * Gateway continues authenticating from cache rather than synchronous auth lookups.
* Confirm idempotency window behavior remains stable. * Confirm idempotency window behavior remains stable.
* Projection repair-on-retry remains safe after source-of-truth commits.
* Confirm-race cleanup does not leave multiple active winner sessions.
* Session projection remains compatible with gateway expectations. * Session projection remains compatible with gateway expectations.
--- ---
@@ -295,6 +336,8 @@ The testing plan follows this service order:
* create user * create user
* find by email * find by email
* normalized email uniqueness * normalized email uniqueness
* generated default `race_name` for new users
* `race_name` uniqueness and confusable-substitution policy
* role assignment * role assignment
* tariff/entitlement fields * tariff/entitlement fields
* Profile tests: * Profile tests:
@@ -302,22 +345,30 @@ The testing plan follows this service order:
* allowed profile reads * allowed profile reads
* allowed profile edits * allowed profile edits
* forbidden profile edits * forbidden profile edits
* self-service rejection for e-mail and `declared_country` mutations
* `profile_update_block` sanction gating for profile/settings writes
* settings reads/writes * settings reads/writes
* BCP 47 and IANA validation for settings values
* Restriction/sanction tests: * Restriction/sanction tests:
* block flags * block flags
* user limits * user limits
* override fields * override fields
* declared current sanctions view * declared current sanctions view
* effective sanction/limit snapshot shaping for downstream consumers
* Entitlement tests: * Entitlement tests:
* free user * free user
* paid placeholder states * paid placeholder states
* default simultaneous-game limit and per-user overrides * default simultaneous-game limit and per-user overrides
* entitlement, sanction, and limit interaction rules
* Internal/admin-oriented tests: * Internal/admin-oriented tests:
* resolve existing/creatable/blocked decision for auth * resolve existing/creatable/blocked decision for auth
* `ensure-by-email` create-only `registration_context` semantics
* current `declared_country` read/write path * current `declared_country` read/write path
* exact lookup by `user_id`, normalized `email`, and `race_name`
* paginated filtered listing with deterministic ordering
* Storage and API contract tests: * Storage and API contract tests:
* public/trusted endpoints * public/trusted endpoints
@@ -346,6 +397,8 @@ The testing plan follows this service order:
* User resolution outcomes remain stable for auth flow. * User resolution outcomes remain stable for auth flow.
* User-facing profile APIs do not bypass auth/session rules. * User-facing profile APIs do not bypass auth/session rules.
* `registration_context` stays create-only and does not overwrite existing users.
* `race_name` uniqueness policy remains stable for self-service and auth-created users.
* User limit and sanction data stay compatible with downstream consumers. * User limit and sanction data stay compatible with downstream consumers.
--- ---
@@ -733,35 +786,44 @@ The testing plan follows this service order:
* enqueue authenticated observation * enqueue authenticated observation
* ingest validation * ingest validation
* malformed FlatBuffers payload rejection
* required-scalar-field validation
* non-blocking acceptance * non-blocking acceptance
* Worker pipeline tests: * Worker pipeline tests:
* geo lookup * geo lookup
* geo lookup miss handling
* country aggregation * country aggregation
* `usual_connection_country` derivation * `usual_connection_country` derivation
* suspicious multi-country detection * suspicious multi-country detection
* review recommendation calculation * review recommendation calculation
* queue retry-safe processing
* State tests: * State tests:
* durable `country_review_recommended` * durable `country_review_recommended`
* declared-country version history * declared-country version history
* declared-country version lifecycle: `recorded`, `applied`, `sync_failed`
* session block action history * session block action history
* Admin/query API tests: * Admin/query API tests:
* list review candidates * list review candidates
* stable ordering and pagination for candidate queries
* read user geo profile * read user geo profile
* grouping by `device_session_id` in review/read responses
* apply approved declared-country change * apply approved declared-country change
* Queue and lag tests: * Queue and lag tests:
* backlog observability * backlog observability
* duplicate observation safety * duplicate observation safety
* delayed processing behavior * delayed processing behavior
* retry and failure observability
### Inter-service integration tests with already implemented components ### Inter-service integration tests with already implemented components
* `Gateway <-> Geo` * `Gateway <-> Geo`
* async observation publish from authenticated request context * async observation publish from authenticated request context
* fail-open edge behavior when geo ingest is unavailable
* `Geo <-> Auth / Session` * `Geo <-> Auth / Session`
* suspicious session block request * suspicious session block request
@@ -779,6 +841,7 @@ The testing plan follows this service order:
### Regression tests to keep from this stage onward ### Regression tests to keep from this stage onward
* Geo processing never blocks the current gameplay request. * Geo processing never blocks the current gameplay request.
* Review-recommended state remains queryable even when event/mail side effects fail.
* Session suspicion affects only later requests via auth/session. * Session suspicion affects only later requests via auth/session.
* Geo owns history, while user service owns current effective declared country. * Geo owns history, while user service owns current effective declared country.
@@ -234,16 +234,27 @@ func (g runningAuthenticatedGateway) stop(t *testing.T) {
func dialGatewayClient(t *testing.T, addr string) *grpc.ClientConn { func dialGatewayClient(t *testing.T, addr string) *grpc.ClientConn {
t.Helper() t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), time.Second) var conn *grpc.ClientConn
defer cancel() require.Eventually(t, func() bool {
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
conn, err := grpc.DialContext( candidate, err := grpc.DialContext(
ctx, ctx,
addr, addr,
grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithBlock(), grpc.WithBlock(),
) )
require.NoError(t, err) if err != nil {
if candidate != nil {
_ = candidate.Close()
}
return false
}
conn = candidate
return true
}, 2*time.Second, 10*time.Millisecond, "gateway did not accept gRPC connections")
return conn return conn
} }
@@ -8,6 +8,7 @@ import (
"time" "time"
"galaxy/gateway/internal/authn" "galaxy/gateway/internal/authn"
"galaxy/gateway/internal/config"
"galaxy/gateway/internal/downstream" "galaxy/gateway/internal/downstream"
"galaxy/gateway/internal/testutil" "galaxy/gateway/internal/testutil"
gatewayv1 "galaxy/gateway/proto/galaxy/gateway/v1" gatewayv1 "galaxy/gateway/proto/galaxy/gateway/v1"
@@ -143,6 +144,78 @@ func TestExecuteCommandMapsDownstreamUnavailableToUnavailable(t *testing.T) {
assert.Equal(t, 1, failingClient.executeCalls) assert.Equal(t, 1, failingClient.executeCalls)
} }
func TestExecuteCommandMapsDownstreamTimeoutToUnavailable(t *testing.T) {
t.Parallel()
stallingClient := &recordingDownstreamClient{
executeFunc: func(ctx context.Context, _ downstream.AuthenticatedCommand) (downstream.UnaryResult, error) {
<-ctx.Done()
return downstream.UnaryResult{}, ctx.Err()
},
}
server, runGateway := newTestGatewayWithGRPCConfig(t, newAuthenticatedGRPCConfigForTest(func(cfg *config.AuthenticatedGRPCConfig) {
cfg.DownstreamTimeout = 50 * time.Millisecond
}), ServerDependencies{
Router: downstream.NewStaticRouter(map[string]downstream.Client{
"fleet.move": stallingClient,
}),
SessionCache: userMappedSessionCache(map[string]string{"device-session-123": "user-123"}),
ReplayStore: staticReplayStore{},
ResponseSigner: newTestResponseSigner(),
})
defer runGateway.stop(t)
addr := waitForListenAddr(t, server)
conn := dialGatewayClient(t, addr)
defer func() {
require.NoError(t, conn.Close())
}()
client := gatewayv1.NewEdgeGatewayClient(conn)
_, err := client.ExecuteCommand(context.Background(), newValidExecuteCommandRequest())
require.Error(t, err)
assert.Equal(t, codes.Unavailable, status.Code(err))
assert.Equal(t, "downstream service is unavailable", status.Convert(err).Message())
assert.Equal(t, 1, stallingClient.executeCalls)
}
func TestExecuteCommandFailsClosedWhenResponseSignerUnavailable(t *testing.T) {
t.Parallel()
successClient := &recordingDownstreamClient{
executeFunc: func(context.Context, downstream.AuthenticatedCommand) (downstream.UnaryResult, error) {
return downstream.UnaryResult{
ResultCode: "accepted",
PayloadBytes: []byte("downstream-response"),
}, nil
},
}
server, runGateway := newTestGateway(t, ServerDependencies{
Router: downstream.NewStaticRouter(map[string]downstream.Client{
"fleet.move": successClient,
}),
ResponseSigner: unavailableResponseSigner{},
SessionCache: userMappedSessionCache(map[string]string{"device-session-123": "user-123"}),
ReplayStore: staticReplayStore{},
})
defer runGateway.stop(t)
addr := waitForListenAddr(t, server)
conn := dialGatewayClient(t, addr)
defer func() {
require.NoError(t, conn.Close())
}()
client := gatewayv1.NewEdgeGatewayClient(conn)
_, err := client.ExecuteCommand(context.Background(), newValidExecuteCommandRequest())
require.Error(t, err)
assert.Equal(t, codes.Unavailable, status.Code(err))
assert.Equal(t, "response signer is unavailable", status.Convert(err).Message())
assert.Equal(t, 1, successClient.executeCalls)
}
func TestExecuteCommandPropagatesOTelSpanContextToDownstream(t *testing.T) { func TestExecuteCommandPropagatesOTelSpanContextToDownstream(t *testing.T) {
t.Parallel() t.Parallel()
+27
View File
@@ -216,6 +216,33 @@ func TestSubscribeEventsMissingReplayStoreFailsClosed(t *testing.T) {
assert.Equal(t, "replay store is unavailable", status.Convert(err).Message()) assert.Equal(t, "replay store is unavailable", status.Convert(err).Message())
} }
func TestSubscribeEventsFailsClosedWhenResponseSignerUnavailable(t *testing.T) {
t.Parallel()
server, runGateway := newTestGateway(t, ServerDependencies{
ResponseSigner: unavailableResponseSigner{},
SessionCache: staticSessionCache{
lookupFunc: func(context.Context, string) (session.Record, error) {
return newActiveSessionRecord(), nil
},
},
ReplayStore: staticReplayStore{},
})
defer runGateway.stop(t)
addr := waitForListenAddr(t, server)
conn := dialGatewayClient(t, addr)
defer func() {
require.NoError(t, conn.Close())
}()
client := gatewayv1.NewEdgeGatewayClient(conn)
err := subscribeEventsError(t, context.Background(), client, newValidSubscribeEventsRequest())
require.Error(t, err)
assert.Equal(t, codes.Unavailable, status.Code(err))
assert.Equal(t, "response signer is unavailable", status.Convert(err).Message())
}
func TestServerLifecycle(t *testing.T) { func TestServerLifecycle(t *testing.T) {
t.Parallel() t.Parallel()
@@ -216,6 +216,83 @@ func TestPublicAntiAbuseBrowserClassBucketsStayIsolatedFromPublicAuth(t *testing
} }
} }
func TestPublicAntiAbuseUsesRemoteAddrInsteadOfForwardedHeaders(t *testing.T) {
t.Parallel()
tests := []struct {
name string
headerKey string
firstHeader string
secondHeader string
firstRemote string
secondRemote string
wantSecondCode int
}{
{
name: "same remote addr ignores x-forwarded-for changes",
headerKey: "X-Forwarded-For",
firstHeader: "198.51.100.10",
secondHeader: "198.51.100.11",
firstRemote: "192.0.2.10:1234",
secondRemote: "192.0.2.10:1234",
wantSecondCode: http.StatusTooManyRequests,
},
{
name: "different remote addr wins over shared forwarded header",
headerKey: "Forwarded",
firstHeader: "for=198.51.100.10",
secondHeader: "for=198.51.100.10",
firstRemote: "192.0.2.10:1234",
secondRemote: "192.0.2.11:1234",
wantSecondCode: http.StatusOK,
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
cfg := config.DefaultPublicHTTPConfig()
cfg.AntiAbuse.PublicAuth.RateLimit = config.PublicRateLimitConfig{
Requests: 1,
Window: time.Hour,
Burst: 1,
}
cfg.AntiAbuse.SendEmailCodeIdentity.RateLimit = config.PublicRateLimitConfig{
Requests: 100,
Window: time.Hour,
Burst: 100,
}
authService := &recordingAuthServiceClient{
sendEmailCodeResult: SendEmailCodeResult{
ChallengeID: "challenge-123",
},
}
handler := newPublicHandlerWithConfig(cfg, ServerDependencies{AuthService: authService})
first := sendEmailCodeRequest(`{"email":"pilot-one@example.com"}`)
first.RemoteAddr = tt.firstRemote
first.Header.Set(tt.headerKey, tt.firstHeader)
second := sendEmailCodeRequest(`{"email":"pilot-two@example.com"}`)
second.RemoteAddr = tt.secondRemote
second.Header.Set(tt.headerKey, tt.secondHeader)
firstResp := httptest.NewRecorder()
handler.ServeHTTP(firstResp, first)
secondResp := httptest.NewRecorder()
handler.ServeHTTP(secondResp, second)
assert.Equal(t, http.StatusOK, firstResp.Code)
assert.Equal(t, tt.wantSecondCode, secondResp.Code)
})
}
}
func TestPublicAntiAbuseSendEmailIdentityThrottle(t *testing.T) { func TestPublicAntiAbuseSendEmailIdentityThrottle(t *testing.T) {
t.Parallel() t.Parallel()