Files
scrabble-game/gateway/internal/connectsrv/metrics.go
T
Ilia Denisov 8878711cf3 R3: gateway edge hardening — body cap, h2c sizing, rate-limit observability
- GATEWAY_MAX_BODY_BYTES (1 MiB): connect WithReadMaxBytes + http.MaxBytesReader
  on the public mux; explicit http2.Server MaxConcurrentStreams/IdleTimeout and
  an http.Server ReadHeaderTimeout (R2 report follow-up).
- gateway_rate_limited_total{class} counter, Debug per rejection, a rejection
  tracker drained every 30 s into a Warn summary per key and a report POST to
  /api/v1/internal/ratelimit/report (feeds the admin view + auto-flag).
- The dead AdminPerMinute/AdminBurst policy now guards the /_gm mount (429),
  ahead of its Basic-Auth.
- resolve() logs the cause of infra session-resolve failures at Warn (the
  transient unauthenticated dips from the R2 run); unknown tokens stay silent.
2026-06-10 01:58:48 +02:00

89 lines
3.2 KiB
Go

package connectsrv
import (
"context"
"time"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/noop"
)
// meterName scopes the gateway edge's OpenTelemetry instruments.
const meterName = "scrabble/gateway/edge"
// activeUserWindows are the rolling windows the active_users gauge reports.
var activeUserWindows = []struct {
label string
dur time.Duration
}{
{label: "24h", dur: 24 * time.Hour},
{label: "7d", dur: 7 * 24 * time.Hour},
}
// serverMetrics holds the edge's operational instruments. It defaults to no-ops;
// NewServer installs the real meter when one is supplied in Deps.
type serverMetrics struct {
edge metric.Float64Histogram
rateLimited metric.Int64Counter
active *activeUsers
}
// newServerMetrics builds the instruments on meter (nil selects a no-op meter),
// falling back to a no-op histogram on the (rare) construction error. The
// active_users gauge is registered as an observable callback over the in-memory
// tracker.
func newServerMetrics(meter metric.Meter) *serverMetrics {
if meter == nil {
meter = noop.NewMeterProvider().Meter(meterName)
}
h, err := meter.Float64Histogram("edge_request_duration",
metric.WithUnit("s"),
metric.WithDescription("Seconds to serve one Connect Execute call, by message type and result."))
if err != nil {
h, _ = noop.NewMeterProvider().Meter(meterName).Float64Histogram("edge_request_duration")
}
c, err := meter.Int64Counter("gateway_rate_limited_total",
metric.WithDescription("Rate-limiter rejections at the edge, by limiter class (user, public, email or admin) — aggregate only, no per-user attributes."))
if err != nil {
c, _ = noop.NewMeterProvider().Meter(meterName).Int64Counter("gateway_rate_limited_total")
}
m := &serverMetrics{edge: h, rateLimited: c, active: newActiveUsers()}
gauge, err := meter.Int64ObservableGauge("active_users",
metric.WithDescription("Distinct accounts that performed an authenticated action within the window (in-memory, single gateway instance)."))
if err == nil {
windows := make([]time.Duration, len(activeUserWindows))
for i, w := range activeUserWindows {
windows[i] = w.dur
}
_, _ = meter.RegisterCallback(func(_ context.Context, o metric.Observer) error {
counts := m.active.counts(windows)
for i, w := range activeUserWindows {
o.ObserveInt64(gauge, int64(counts[i]), metric.WithAttributes(attribute.String("window", w.label)))
}
return nil
}, gauge)
}
return m
}
// recordEdge records the duration of one Execute call labelled by message type and
// outcome (ok, domain, unauthenticated, rate_limited, unknown_type or internal).
func (m *serverMetrics) recordEdge(ctx context.Context, msgType, result string, start time.Time) {
m.edge.Record(ctx, time.Since(start).Seconds(), metric.WithAttributes(
attribute.String("message_type", msgType),
attribute.String("result", result),
))
}
// recordActive marks account uid active now, feeding the active_users gauge.
func (m *serverMetrics) recordActive(uid string) {
m.active.seen(uid)
}
// recordRateLimited counts one limiter rejection under class.
func (m *serverMetrics) recordRateLimited(ctx context.Context, class string) {
m.rateLimited.Add(ctx, 1, metric.WithAttributes(attribute.String("class", class)))
}