8878711cf3
- GATEWAY_MAX_BODY_BYTES (1 MiB): connect WithReadMaxBytes + http.MaxBytesReader
on the public mux; explicit http2.Server MaxConcurrentStreams/IdleTimeout and
an http.Server ReadHeaderTimeout (R2 report follow-up).
- gateway_rate_limited_total{class} counter, Debug per rejection, a rejection
tracker drained every 30 s into a Warn summary per key and a report POST to
/api/v1/internal/ratelimit/report (feeds the admin view + auto-flag).
- The dead AdminPerMinute/AdminBurst policy now guards the /_gm mount (429),
ahead of its Basic-Auth.
- resolve() logs the cause of infra session-resolve failures at Warn (the
transient unauthenticated dips from the R2 run); unknown tokens stay silent.
89 lines
3.2 KiB
Go
89 lines
3.2 KiB
Go
package connectsrv
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"go.opentelemetry.io/otel/attribute"
|
|
"go.opentelemetry.io/otel/metric"
|
|
"go.opentelemetry.io/otel/metric/noop"
|
|
)
|
|
|
|
// meterName scopes the gateway edge's OpenTelemetry instruments.
|
|
const meterName = "scrabble/gateway/edge"
|
|
|
|
// activeUserWindows are the rolling windows the active_users gauge reports.
|
|
var activeUserWindows = []struct {
|
|
label string
|
|
dur time.Duration
|
|
}{
|
|
{label: "24h", dur: 24 * time.Hour},
|
|
{label: "7d", dur: 7 * 24 * time.Hour},
|
|
}
|
|
|
|
// serverMetrics holds the edge's operational instruments. It defaults to no-ops;
|
|
// NewServer installs the real meter when one is supplied in Deps.
|
|
type serverMetrics struct {
|
|
edge metric.Float64Histogram
|
|
rateLimited metric.Int64Counter
|
|
active *activeUsers
|
|
}
|
|
|
|
// newServerMetrics builds the instruments on meter (nil selects a no-op meter),
|
|
// falling back to a no-op histogram on the (rare) construction error. The
|
|
// active_users gauge is registered as an observable callback over the in-memory
|
|
// tracker.
|
|
func newServerMetrics(meter metric.Meter) *serverMetrics {
|
|
if meter == nil {
|
|
meter = noop.NewMeterProvider().Meter(meterName)
|
|
}
|
|
h, err := meter.Float64Histogram("edge_request_duration",
|
|
metric.WithUnit("s"),
|
|
metric.WithDescription("Seconds to serve one Connect Execute call, by message type and result."))
|
|
if err != nil {
|
|
h, _ = noop.NewMeterProvider().Meter(meterName).Float64Histogram("edge_request_duration")
|
|
}
|
|
c, err := meter.Int64Counter("gateway_rate_limited_total",
|
|
metric.WithDescription("Rate-limiter rejections at the edge, by limiter class (user, public, email or admin) — aggregate only, no per-user attributes."))
|
|
if err != nil {
|
|
c, _ = noop.NewMeterProvider().Meter(meterName).Int64Counter("gateway_rate_limited_total")
|
|
}
|
|
m := &serverMetrics{edge: h, rateLimited: c, active: newActiveUsers()}
|
|
|
|
gauge, err := meter.Int64ObservableGauge("active_users",
|
|
metric.WithDescription("Distinct accounts that performed an authenticated action within the window (in-memory, single gateway instance)."))
|
|
if err == nil {
|
|
windows := make([]time.Duration, len(activeUserWindows))
|
|
for i, w := range activeUserWindows {
|
|
windows[i] = w.dur
|
|
}
|
|
_, _ = meter.RegisterCallback(func(_ context.Context, o metric.Observer) error {
|
|
counts := m.active.counts(windows)
|
|
for i, w := range activeUserWindows {
|
|
o.ObserveInt64(gauge, int64(counts[i]), metric.WithAttributes(attribute.String("window", w.label)))
|
|
}
|
|
return nil
|
|
}, gauge)
|
|
}
|
|
return m
|
|
}
|
|
|
|
// recordEdge records the duration of one Execute call labelled by message type and
|
|
// outcome (ok, domain, unauthenticated, rate_limited, unknown_type or internal).
|
|
func (m *serverMetrics) recordEdge(ctx context.Context, msgType, result string, start time.Time) {
|
|
m.edge.Record(ctx, time.Since(start).Seconds(), metric.WithAttributes(
|
|
attribute.String("message_type", msgType),
|
|
attribute.String("result", result),
|
|
))
|
|
}
|
|
|
|
// recordActive marks account uid active now, feeding the active_users gauge.
|
|
func (m *serverMetrics) recordActive(uid string) {
|
|
m.active.seen(uid)
|
|
}
|
|
|
|
// recordRateLimited counts one limiter rejection under class.
|
|
func (m *serverMetrics) recordRateLimited(ctx context.Context, class string) {
|
|
m.rateLimited.Add(ctx, 1, metric.WithAttributes(attribute.String("class", class)))
|
|
}
|