Stage 16: deploy infra & test contour
- backend + gateway multi-stage distroless Dockerfiles; the gateway embeds and
serves the SPA at / and /telegram/ via go:embed (committed dist placeholder,
real build baked in by the image's node stage)
- deploy/docker-compose.yml: backend + gateway + Postgres + Telegram connector
(VPN sidecar) + OTel Collector + Prometheus (15d) + Tempo (72h) + Grafana,
fronted by a caddy owning a single /_gm Basic-Auth (admin console + Grafana
subpath); inter-service on a private network, only caddy on the edge network
- new metrics: backend accounts_created_total{kind} (robots excluded) and an
in-memory gateway active_users{window=24h,7d} gauge
- CI: single .gitea/workflows/ci.yaml (unit/integration/ui + a gated test-contour
deploy) on the new feature/* -> development -> master branch model; the old
go-unit/integration/ui-test workflows are folded in; the connector-scoped
compose is retired (superseded by deploy/)
- docs: ARCHITECTURE §11/§12/§13, root + gateway READMEs, CLAUDE.md branching,
PLAN.md (stage 16 done + refinements + Stage 17 forward-notes)
This commit is contained in:
@@ -12,14 +12,26 @@ import (
|
||||
// meterName scopes the gateway edge's OpenTelemetry instruments.
|
||||
const meterName = "scrabble/gateway/edge"
|
||||
|
||||
// activeUserWindows are the rolling windows the active_users gauge reports.
|
||||
var activeUserWindows = []struct {
|
||||
label string
|
||||
dur time.Duration
|
||||
}{
|
||||
{label: "24h", dur: 24 * time.Hour},
|
||||
{label: "7d", dur: 7 * 24 * time.Hour},
|
||||
}
|
||||
|
||||
// serverMetrics holds the edge's operational instruments. It defaults to no-ops;
|
||||
// NewServer installs the real meter when one is supplied in Deps.
|
||||
type serverMetrics struct {
|
||||
edge metric.Float64Histogram
|
||||
edge metric.Float64Histogram
|
||||
active *activeUsers
|
||||
}
|
||||
|
||||
// newServerMetrics builds the instruments on meter (nil selects a no-op meter),
|
||||
// falling back to a no-op histogram on the (rare) construction error.
|
||||
// falling back to a no-op histogram on the (rare) construction error. The
|
||||
// active_users gauge is registered as an observable callback over the in-memory
|
||||
// tracker.
|
||||
func newServerMetrics(meter metric.Meter) *serverMetrics {
|
||||
if meter == nil {
|
||||
meter = noop.NewMeterProvider().Meter(meterName)
|
||||
@@ -30,7 +42,24 @@ func newServerMetrics(meter metric.Meter) *serverMetrics {
|
||||
if err != nil {
|
||||
h, _ = noop.NewMeterProvider().Meter(meterName).Float64Histogram("edge_request_duration")
|
||||
}
|
||||
return &serverMetrics{edge: h}
|
||||
m := &serverMetrics{edge: h, active: newActiveUsers()}
|
||||
|
||||
gauge, err := meter.Int64ObservableGauge("active_users",
|
||||
metric.WithDescription("Distinct accounts that performed an authenticated action within the window (in-memory, single gateway instance)."))
|
||||
if err == nil {
|
||||
windows := make([]time.Duration, len(activeUserWindows))
|
||||
for i, w := range activeUserWindows {
|
||||
windows[i] = w.dur
|
||||
}
|
||||
_, _ = meter.RegisterCallback(func(_ context.Context, o metric.Observer) error {
|
||||
counts := m.active.counts(windows)
|
||||
for i, w := range activeUserWindows {
|
||||
o.ObserveInt64(gauge, int64(counts[i]), metric.WithAttributes(attribute.String("window", w.label)))
|
||||
}
|
||||
return nil
|
||||
}, gauge)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// recordEdge records the duration of one Execute call labelled by message type and
|
||||
@@ -41,3 +70,8 @@ func (m *serverMetrics) recordEdge(ctx context.Context, msgType, result string,
|
||||
attribute.String("result", result),
|
||||
))
|
||||
}
|
||||
|
||||
// recordActive marks account uid active now, feeding the active_users gauge.
|
||||
func (m *serverMetrics) recordActive(uid string) {
|
||||
m.active.seen(uid)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user