Stage 16: deploy infra & test contour
CI / unit (pull_request) Successful in 9s
CI / integration (pull_request) Successful in 11s
CI / ui (pull_request) Successful in 19s
CI / deploy (pull_request) Failing after 1s

- backend + gateway multi-stage distroless Dockerfiles; the gateway embeds and
  serves the SPA at / and /telegram/ via go:embed (committed dist placeholder,
  real build baked in by the image's node stage)
- deploy/docker-compose.yml: backend + gateway + Postgres + Telegram connector
  (VPN sidecar) + OTel Collector + Prometheus (15d) + Tempo (72h) + Grafana,
  fronted by a caddy owning a single /_gm Basic-Auth (admin console + Grafana
  subpath); inter-service on a private network, only caddy on the edge network
- new metrics: backend accounts_created_total{kind} (robots excluded) and an
  in-memory gateway active_users{window=24h,7d} gauge
- CI: single .gitea/workflows/ci.yaml (unit/integration/ui + a gated test-contour
  deploy) on the new feature/* -> development -> master branch model; the old
  go-unit/integration/ui-test workflows are folded in; the connector-scoped
  compose is retired (superseded by deploy/)
- docs: ARCHITECTURE §11/§12/§13, root + gateway READMEs, CLAUDE.md branching,
  PLAN.md (stage 16 done + refinements + Stage 17 forward-notes)
This commit is contained in:
Ilia Denisov
2026-06-05 11:42:26 +02:00
parent 8c8f8c4d42
commit 8700fbfae1
35 changed files with 1413 additions and 318 deletions
+39
View File
@@ -0,0 +1,39 @@
{
"uid": "scrabble-edge",
"title": "Scrabble — Edge / UX",
"tags": ["scrabble"],
"timezone": "",
"schemaVersion": 39,
"version": 1,
"refresh": "30s",
"time": { "from": "now-6h", "to": "now" },
"panels": [
{
"type": "timeseries",
"title": "Edge request rate by message type",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m])) by (message_type)", "legendFormat": "{{message_type}}" }]
},
{
"type": "timeseries",
"title": "Edge p95 latency",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [
{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(edge_request_duration_bucket[5m])) by (le))", "legendFormat": "p95" },
{ "refId": "B", "expr": "histogram_quantile(0.50, sum(rate(edge_request_duration_bucket[5m])) by (le))", "legendFormat": "p50" }
]
},
{
"type": "timeseries",
"title": "Edge requests by result",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 },
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m])) by (result)", "legendFormat": "{{result}}" }]
}
]
}
@@ -0,0 +1,59 @@
{
"uid": "scrabble-game",
"title": "Scrabble — Game domain",
"tags": ["scrabble"],
"timezone": "",
"schemaVersion": 39,
"version": 1,
"refresh": "30s",
"time": { "from": "now-24h", "to": "now" },
"panels": [
{
"type": "timeseries",
"title": "Games started / abandoned (rate by variant)",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [
{ "refId": "A", "expr": "sum(rate(games_started_total[15m])) by (variant)", "legendFormat": "started {{variant}}" },
{ "refId": "B", "expr": "sum(rate(games_abandoned_total[15m])) by (variant)", "legendFormat": "abandoned {{variant}}" }
]
},
{
"type": "timeseries",
"title": "Robot games finished (rate)",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(robot_games_finished_total[15m]))", "legendFormat": "robot games" }]
},
{
"type": "timeseries",
"title": "Live games in cache (by variant)",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(game_cache_active) by (variant)", "legendFormat": "{{variant}}" }]
},
{
"type": "timeseries",
"title": "Chat messages (rate by kind)",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(chat_messages_total[15m])) by (kind)", "legendFormat": "{{kind}}" }]
},
{
"type": "timeseries",
"title": "Journal replay p95 (by variant)",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(game_replay_duration_bucket[5m])) by (le, variant))", "legendFormat": "{{variant}}" }]
},
{
"type": "timeseries",
"title": "Move validate p95 (by variant)",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(game_move_validate_duration_bucket[5m])) by (le, variant))", "legendFormat": "{{variant}}" }]
}
]
}
@@ -0,0 +1,58 @@
{
"uid": "scrabble-overview",
"title": "Scrabble — Service overview",
"tags": ["scrabble"],
"timezone": "",
"schemaVersion": 39,
"version": 1,
"refresh": "30s",
"time": { "from": "now-6h", "to": "now" },
"panels": [
{
"type": "stat",
"title": "Active users (24h)",
"gridPos": { "h": 5, "w": 6, "x": 0, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "max(active_users{window=\"24h\"})" }]
},
{
"type": "stat",
"title": "Active users (7d)",
"gridPos": { "h": 5, "w": 6, "x": 6, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "max(active_users{window=\"7d\"})" }]
},
{
"type": "stat",
"title": "Edge requests/s",
"gridPos": { "h": 5, "w": 6, "x": 12, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m]))" }]
},
{
"type": "stat",
"title": "Edge error ratio",
"gridPos": { "h": 5, "w": 6, "x": 18, "y": 0 },
"fieldConfig": { "defaults": { "unit": "percentunit" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count{result!=\"ok\"}[5m])) / clamp_min(sum(rate(edge_request_duration_count[5m])), 1)" }]
},
{
"type": "timeseries",
"title": "Goroutines by service",
"description": "OTel Go runtime metric; verify the exact name against live Prometheus if empty (go_goroutine_count / process_runtime_go_goroutines depending on the contrib runtime version).",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "go_goroutine_count", "legendFormat": "{{service_name}}" }]
},
{
"type": "timeseries",
"title": "Heap memory used by service",
"description": "OTel Go runtime metric (best-effort name go_memory_used); verify against live Prometheus if empty.",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 },
"fieldConfig": { "defaults": { "unit": "bytes" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(go_memory_used) by (service_name)", "legendFormat": "{{service_name}}" }]
}
]
}
+34
View File
@@ -0,0 +1,34 @@
{
"uid": "scrabble-users",
"title": "Scrabble — Users",
"tags": ["scrabble"],
"timezone": "",
"schemaVersion": 39,
"version": 1,
"refresh": "30s",
"time": { "from": "now-7d", "to": "now" },
"panels": [
{
"type": "timeseries",
"title": "Active users (in-memory, single gateway)",
"description": "Distinct accounts with an authenticated action within the window. Resets on gateway restart; correct for a single instance (MVP).",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "max(active_users) by (window)", "legendFormat": "{{window}}" }]
},
{
"type": "timeseries",
"title": "New accounts (rate by kind)",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(accounts_created_total[1h])) by (kind)", "legendFormat": "{{kind}}" }]
},
{
"type": "timeseries",
"title": "New accounts (cumulative by kind)",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(accounts_created_total) by (kind)", "legendFormat": "{{kind}}" }]
}
]
}
@@ -0,0 +1,15 @@
# Loads the committed dashboard JSON from /var/lib/grafana/dashboards (mounted
# read-only from deploy/grafana/dashboards).
apiVersion: 1
providers:
- name: scrabble
orgId: 1
folder: Scrabble
type: file
disableDeletion: false
editable: true
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: false
@@ -0,0 +1,16 @@
# Grafana datasources for the Scrabble contour, provisioned at startup. Metrics
# come from Prometheus (scraping the collector) and traces from Tempo.
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
uid: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
- name: Tempo
type: tempo
uid: tempo
access: proxy
url: http://tempo:3200