Stage 12: observability & performance (OTel/OTLP, domain metrics, guest GC)
- pkg/telemetry: shared OTel provider bootstrap (none/stdout/otlp + W3C
propagators + Go runtime metrics); backend/internal/telemetry becomes a thin
facade keeping its gin middleware.
- Telemetry parity: gateway and the Telegram connector gain telemetry runtimes
and config (GATEWAY_/TELEGRAM_ SERVICE_NAME + OTEL_*); otelgrpc instruments the
backend push server, the gateway's backend+connector clients and the connector
server. Default exporter stays none (collector/dashboards are Stage 14).
- Operational metrics (variant attribute on game-scoped ones): game_replay_duration,
game_move_validate_duration, games_started_total, games_abandoned_total,
game_cache_active, chat_messages_total{kind}, gateway edge_request_duration.
Wired via the SetMetrics setter pattern (default no-op meter).
- TODO-3: account.GuestReaper deletes guests with no game seat past
BACKEND_GUEST_RETENTION (default 30d, swept every BACKEND_GUEST_REAP_INTERVAL).
- Tests: pkg/telemetry exporter selection; game/social/edge metric recording via
a manual reader; config (otlp accepted, guest knobs); inttest guest reaper.
- Docs: PLAN.md re-scopes Stage 12 and adds Stage 13 (alphabet-on-wire) + Stage 14
(CI/deploy) with the agreed dictionary-versioning resolution; ARCHITECTURE 11/13,
TESTING, the three READMEs and FUNCTIONAL(+ru) updated.
This commit is contained in:
@@ -0,0 +1,95 @@
|
||||
package game
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/metric/metricdata"
|
||||
|
||||
"scrabble/backend/internal/engine"
|
||||
)
|
||||
|
||||
// TestGameMetrics records each game instrument through a manual reader and asserts
|
||||
// the counters carry the right "variant" attribute and the histograms observe.
|
||||
func TestGameMetrics(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
reader := sdkmetric.NewManualReader()
|
||||
meter := sdkmetric.NewMeterProvider(sdkmetric.WithReader(reader)).Meter("test")
|
||||
m := newGameMetrics(meter)
|
||||
|
||||
m.recordStarted(ctx, engine.VariantEnglish)
|
||||
m.recordStarted(ctx, engine.VariantEnglish)
|
||||
m.recordStarted(ctx, engine.VariantRussianScrabble)
|
||||
m.recordAbandoned(ctx, engine.VariantErudit)
|
||||
m.recordReplay(ctx, engine.VariantEnglish, time.Now().Add(-time.Millisecond))
|
||||
m.recordValidate(ctx, engine.VariantRussianScrabble, time.Now().Add(-time.Millisecond))
|
||||
|
||||
var rm metricdata.ResourceMetrics
|
||||
if err := reader.Collect(ctx, &rm); err != nil {
|
||||
t.Fatalf("collect: %v", err)
|
||||
}
|
||||
|
||||
started := counterByAttr(t, rm, "games_started_total", "variant")
|
||||
if started["english"] != 2 || started["russian_scrabble"] != 1 {
|
||||
t.Errorf("games_started_total = %v, want english:2 russian_scrabble:1", started)
|
||||
}
|
||||
if abandoned := counterByAttr(t, rm, "games_abandoned_total", "variant"); abandoned["erudit"] != 1 {
|
||||
t.Errorf("games_abandoned_total = %v, want erudit:1", abandoned)
|
||||
}
|
||||
if c := histogramCount(t, rm, "game_replay_duration"); c != 1 {
|
||||
t.Errorf("game_replay_duration observations = %d, want 1", c)
|
||||
}
|
||||
if c := histogramCount(t, rm, "game_move_validate_duration"); c != 1 {
|
||||
t.Errorf("game_move_validate_duration observations = %d, want 1", c)
|
||||
}
|
||||
}
|
||||
|
||||
// counterByAttr sums the int64 counter named name, grouped by the value of the
|
||||
// attribute key attr.
|
||||
func counterByAttr(t *testing.T, rm metricdata.ResourceMetrics, name, attr string) map[string]int64 {
|
||||
t.Helper()
|
||||
out := map[string]int64{}
|
||||
for _, sm := range rm.ScopeMetrics {
|
||||
for _, md := range sm.Metrics {
|
||||
if md.Name != name {
|
||||
continue
|
||||
}
|
||||
sum, ok := md.Data.(metricdata.Sum[int64])
|
||||
if !ok {
|
||||
t.Fatalf("%s is not an int64 sum", name)
|
||||
}
|
||||
for _, dp := range sum.DataPoints {
|
||||
v, _ := dp.Attributes.Value(attribute.Key(attr))
|
||||
out[v.AsString()] += dp.Value
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// histogramCount returns the total observation count of the float64 histogram
|
||||
// named name.
|
||||
func histogramCount(t *testing.T, rm metricdata.ResourceMetrics, name string) uint64 {
|
||||
t.Helper()
|
||||
for _, sm := range rm.ScopeMetrics {
|
||||
for _, md := range sm.Metrics {
|
||||
if md.Name != name {
|
||||
continue
|
||||
}
|
||||
h, ok := md.Data.(metricdata.Histogram[float64])
|
||||
if !ok {
|
||||
t.Fatalf("%s is not a float64 histogram", name)
|
||||
}
|
||||
var n uint64
|
||||
for _, dp := range h.DataPoints {
|
||||
n += dp.Count
|
||||
}
|
||||
return n
|
||||
}
|
||||
}
|
||||
t.Fatalf("%s not found", name)
|
||||
return 0
|
||||
}
|
||||
Reference in New Issue
Block a user