R3: gateway edge hardening — body cap, h2c sizing, rate-limit observability

- GATEWAY_MAX_BODY_BYTES (1 MiB): connect WithReadMaxBytes + http.MaxBytesReader
  on the public mux; explicit http2.Server MaxConcurrentStreams/IdleTimeout and
  an http.Server ReadHeaderTimeout (R2 report follow-up).
- gateway_rate_limited_total{class} counter, Debug per rejection, a rejection
  tracker drained every 30 s into a Warn summary per key and a report POST to
  /api/v1/internal/ratelimit/report (feeds the admin view + auto-flag).
- The dead AdminPerMinute/AdminBurst policy now guards the /_gm mount (429),
  ahead of its Basic-Auth.
- resolve() logs the cause of infra session-resolve failures at Warn (the
  transient unauthenticated dips from the R2 run); unknown tokens stay silent.
This commit is contained in:
Ilia Denisov
2026-06-10 01:58:48 +02:00
parent c23ac94c4e
commit 8878711cf3
12 changed files with 549 additions and 35 deletions
+114
View File
@@ -83,6 +83,120 @@ func TestExecuteAuthedRequiresSession(t *testing.T) {
}
}
// TestExecuteRateLimitedTracked verifies a limiter rejection returns
// ResourceExhausted and lands in the rejection tracker under the public class,
// keyed by the client IP (R3).
func TestExecuteRateLimitedTracked(t *testing.T) {
backendSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte(`{"token":"tok","user_id":"u-1","is_guest":true,"display_name":"Guest"}`))
}))
defer backendSrv.Close()
backend, err := backendclient.New(backendSrv.URL, "localhost:9090", 2*time.Second)
if err != nil {
t.Fatalf("backendclient: %v", err)
}
defer func() { _ = backend.Close() }()
limits := config.DefaultRateLimit()
limits.PublicPerMinute, limits.PublicBurst = 1, 1
tracker := ratelimit.NewTracker()
edge := connectsrv.NewServer(connectsrv.Deps{
Registry: transcode.NewRegistry(backend, nil),
Sessions: session.NewCache(backend, time.Minute, 100),
Limiter: ratelimit.New(),
Tracker: tracker,
Hub: push.NewHub(0),
RateLimit: limits,
Heartbeat: 15 * time.Second,
})
edgeSrv := httptest.NewServer(edge.HTTPHandler())
defer edgeSrv.Close()
client := edgev1connect.NewGatewayClient(http.DefaultClient, edgeSrv.URL)
if _, err := client.Execute(context.Background(), connect.NewRequest(&edgev1.ExecuteRequest{
MessageType: transcode.MsgAuthGuest,
})); err != nil {
t.Fatalf("first execute: %v", err)
}
_, err = client.Execute(context.Background(), connect.NewRequest(&edgev1.ExecuteRequest{
MessageType: transcode.MsgAuthGuest,
}))
if connect.CodeOf(err) != connect.CodeResourceExhausted {
t.Fatalf("code = %v, want ResourceExhausted", connect.CodeOf(err))
}
entries := tracker.Drain()
if len(entries) != 1 {
t.Fatalf("tracker drained %d entries, want 1", len(entries))
}
if e := entries[0]; e.Class != "public" || e.Key != "127.0.0.1" || e.Rejected != 1 {
t.Fatalf("tracked %+v, want public/127.0.0.1 rejected=1", e)
}
}
// TestAdminMountRateLimited verifies the /_gm mount is guarded by the per-IP
// admin limiter class ahead of the proxy's Basic-Auth (R3).
func TestAdminMountRateLimited(t *testing.T) {
backendSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}))
defer backendSrv.Close()
backend, err := backendclient.New(backendSrv.URL, "localhost:9090", 2*time.Second)
if err != nil {
t.Fatalf("backendclient: %v", err)
}
defer func() { _ = backend.Close() }()
limits := config.DefaultRateLimit()
limits.AdminPerMinute, limits.AdminBurst = 1, 1
edge := connectsrv.NewServer(connectsrv.Deps{
Registry: transcode.NewRegistry(backend, nil),
Sessions: session.NewCache(backend, time.Minute, 100),
Limiter: ratelimit.New(),
Hub: push.NewHub(0),
RateLimit: limits,
Heartbeat: 15 * time.Second,
AdminProxy: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}),
})
edgeSrv := httptest.NewServer(edge.HTTPHandler())
defer edgeSrv.Close()
first, err := http.Get(edgeSrv.URL + "/_gm/")
if err != nil {
t.Fatalf("first /_gm: %v", err)
}
_ = first.Body.Close()
if first.StatusCode != http.StatusOK {
t.Fatalf("first /_gm = %d, want 200", first.StatusCode)
}
second, err := http.Get(edgeSrv.URL + "/_gm/")
if err != nil {
t.Fatalf("second /_gm: %v", err)
}
_ = second.Body.Close()
if second.StatusCode != http.StatusTooManyRequests {
t.Fatalf("second /_gm = %d, want 429", second.StatusCode)
}
}
// TestExecuteOversizedPayloadRejected verifies the request-body cap: an Execute
// message above GATEWAY_MAX_BODY_BYTES is refused at the edge without reaching
// the backend (R3).
func TestExecuteOversizedPayloadRejected(t *testing.T) {
client, cleanup := newEdge(t, func(w http.ResponseWriter, r *http.Request) {
t.Error("backend must not be called for an oversized payload")
})
defer cleanup()
_, err := client.Execute(context.Background(), connect.NewRequest(&edgev1.ExecuteRequest{
MessageType: transcode.MsgAuthGuest,
Payload: make([]byte, config.DefaultMaxBodyBytes+1),
}))
if connect.CodeOf(err) != connect.CodeResourceExhausted {
t.Fatalf("code = %v, want ResourceExhausted", connect.CodeOf(err))
}
}
func TestExecuteUnknownMessageType(t *testing.T) {
client, cleanup := newEdge(t, func(w http.ResponseWriter, r *http.Request) {})
defer cleanup()