Stage 16: deploy infra & test contour
- backend + gateway multi-stage distroless Dockerfiles; the gateway embeds and
serves the SPA at / and /telegram/ via go:embed (committed dist placeholder,
real build baked in by the image's node stage)
- deploy/docker-compose.yml: backend + gateway + Postgres + Telegram connector
(VPN sidecar) + OTel Collector + Prometheus (15d) + Tempo (72h) + Grafana,
fronted by a caddy owning a single /_gm Basic-Auth (admin console + Grafana
subpath); inter-service on a private network, only caddy on the edge network
- new metrics: backend accounts_created_total{kind} (robots excluded) and an
in-memory gateway active_users{window=24h,7d} gauge
- CI: single .gitea/workflows/ci.yaml (unit/integration/ui + a gated test-contour
deploy) on the new feature/* -> development -> master branch model; the old
go-unit/integration/ui-test workflows are folded in; the connector-scoped
compose is retired (superseded by deploy/)
- docs: ARCHITECTURE §11/§12/§13, root + gateway READMEs, CLAUDE.md branching,
PLAN.md (stage 16 done + refinements + Stage 17 forward-notes)
This commit is contained in:
@@ -0,0 +1,43 @@
|
||||
# Environment for deploy/docker-compose.yml. The CI deploy job (ci.yaml) maps the
|
||||
# Gitea TEST_-prefixed secrets/variables onto these unprefixed names; Stage 17
|
||||
# maps the PROD_-prefixed set the same way. Copy to deploy/.env for a local run.
|
||||
|
||||
# --- Postgres ---------------------------------------------------------------
|
||||
POSTGRES_DB=scrabble
|
||||
POSTGRES_USER=scrabble
|
||||
POSTGRES_PASSWORD=change-me # required
|
||||
|
||||
# --- Dictionary -------------------------------------------------------------
|
||||
DICT_VERSION=v1.0.0 # scrabble-dictionary release tag (image build-arg)
|
||||
|
||||
# --- Logging ----------------------------------------------------------------
|
||||
LOG_LEVEL=info
|
||||
|
||||
# --- Edge / caddy -----------------------------------------------------------
|
||||
# Test: ":80" (the host caddy terminates TLS and forwards to scrabble:80 on the
|
||||
# external `edge` network). Prod (Stage 17): a domain so caddy does its own ACME.
|
||||
CADDY_SITE_ADDRESS=:80
|
||||
GM_BASICAUTH_USER=gm
|
||||
GM_BASICAUTH_HASH= # required; `caddy hash-password` bcrypt hash
|
||||
|
||||
# --- UI build args (baked into the gateway image) ---------------------------
|
||||
VITE_TELEGRAM_BOT_ID=
|
||||
VITE_TELEGRAM_LINK=
|
||||
VITE_GATEWAY_URL=
|
||||
|
||||
# --- Gateway ----------------------------------------------------------------
|
||||
GATEWAY_DEFAULT_SUPPORTED_LANGUAGES=en,ru
|
||||
|
||||
# --- Grafana ----------------------------------------------------------------
|
||||
GRAFANA_ROOT_URL=/_gm/grafana/ # set the full https URL behind a real domain
|
||||
GRAFANA_ADMIN_PASSWORD=admin
|
||||
|
||||
# --- Telegram connector -----------------------------------------------------
|
||||
AWG_CONF= # required; AmneziaWG sidecar config
|
||||
TELEGRAM_BOT_TOKEN_EN= # at least one of EN/RU required
|
||||
TELEGRAM_BOT_TOKEN_RU=
|
||||
TELEGRAM_GAME_CHANNEL_ID_EN=
|
||||
TELEGRAM_GAME_CHANNEL_ID_RU=
|
||||
TELEGRAM_MINIAPP_URL= # required
|
||||
TELEGRAM_TEST_ENV=false
|
||||
TELEGRAM_API_BASE_URL=
|
||||
@@ -0,0 +1,35 @@
|
||||
# Edge reverse proxy for the Scrabble contour. A single Basic-Auth gate covers
|
||||
# every operator surface under /_gm (the backend-rendered admin console and the
|
||||
# Grafana subpath); everything else (the SPA at / and /telegram/, plus the
|
||||
# Connect edge) goes to the gateway. Mirrors ../galaxy-game's /_gm model.
|
||||
#
|
||||
# CADDY_SITE_ADDRESS is ":80" in the test contour (the host caddy terminates TLS
|
||||
# and forwards); set it to a domain in prod (Stage 17) so this caddy does its own
|
||||
# ACME and the contour is self-contained.
|
||||
{
|
||||
admin off
|
||||
}
|
||||
|
||||
{$CADDY_SITE_ADDRESS::80} {
|
||||
# Operator surfaces under /_gm: a single shared Basic-Auth, then route.
|
||||
@gm path /_gm /_gm/*
|
||||
handle @gm {
|
||||
basic_auth {
|
||||
{$GM_BASICAUTH_USER:gm} {$GM_BASICAUTH_HASH}
|
||||
}
|
||||
# Grafana serves from this sub-path (GF_SERVER_SERVE_FROM_SUB_PATH=true), so
|
||||
# the prefix is forwarded intact, not stripped.
|
||||
handle /_gm/grafana* {
|
||||
reverse_proxy grafana:3000
|
||||
}
|
||||
# Everything else under /_gm is the backend-rendered admin console.
|
||||
handle {
|
||||
reverse_proxy backend:8080
|
||||
}
|
||||
}
|
||||
|
||||
# The SPA (/, /telegram/) and the Connect edge are served by the gateway.
|
||||
handle {
|
||||
reverse_proxy gateway:8081
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,217 @@
|
||||
# Full deploy descriptor for the Scrabble test contour: backend + gateway +
|
||||
# Postgres + the Telegram connector (with its VPN sidecar) + the observability
|
||||
# stack (OTel Collector -> Prometheus + Tempo -> Grafana). Driven by
|
||||
# .gitea/workflows/ci.yaml (`docker compose up -d --build`); env values are
|
||||
# interpolated from Gitea Actions TEST_ secrets/variables exported by the deploy
|
||||
# job (see deploy/.env.example for the unprefixed names).
|
||||
#
|
||||
# Networking (mirrors ../galaxy-game):
|
||||
# - `internal` (scrabble-internal): all inter-service traffic, project-private
|
||||
# DNS so service names never collide on the shared `edge` network.
|
||||
# - `edge` (external): the host caddy reaches this contour at `scrabble:80`
|
||||
# (the in-compose caddy's alias). The in-compose caddy terminates only HTTP in
|
||||
# the test contour; the host caddy terminates TLS and forwards. For prod
|
||||
# (Stage 17, no host caddy) set CADDY_SITE_ADDRESS to the domain so the caddy
|
||||
# does its own ACME — the contour is then self-contained.
|
||||
# - The connector egresses to api.telegram.org through the `vpn` sidecar
|
||||
# (network_mode: service:vpn); it answers internal gRPC at `telegram:9091`.
|
||||
name: scrabble
|
||||
|
||||
services:
|
||||
postgres:
|
||||
container_name: scrabble-postgres
|
||||
image: postgres:17-alpine
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_DB: ${POSTGRES_DB:-scrabble}
|
||||
POSTGRES_USER: ${POSTGRES_USER:-scrabble}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-scrabble} -d ${POSTGRES_DB:-scrabble}"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 30
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
networks: [internal]
|
||||
|
||||
backend:
|
||||
container_name: scrabble-backend
|
||||
image: scrabble-backend:latest
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: backend/Dockerfile
|
||||
args:
|
||||
DICT_VERSION: ${DICT_VERSION:-v1.0.0}
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
# search_path=backend matches the migrations (00001 creates the schema).
|
||||
BACKEND_POSTGRES_DSN: postgres://${POSTGRES_USER:-scrabble}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-scrabble}?sslmode=disable&search_path=backend
|
||||
BACKEND_HTTP_ADDR: ":8080"
|
||||
BACKEND_GRPC_ADDR: ":9090"
|
||||
BACKEND_CONNECTOR_ADDR: telegram:9091
|
||||
BACKEND_LOG_LEVEL: ${LOG_LEVEL:-info}
|
||||
BACKEND_SERVICE_NAME: scrabble-backend
|
||||
BACKEND_OTEL_TRACES_EXPORTER: otlp
|
||||
BACKEND_OTEL_METRICS_EXPORTER: otlp
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
||||
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||
# No container healthcheck: the distroless image has no shell/wget. Readiness
|
||||
# is covered by the CI post-deploy probe (GET / through caddy).
|
||||
networks: [internal]
|
||||
|
||||
gateway:
|
||||
container_name: scrabble-gateway
|
||||
image: scrabble-gateway:latest
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: gateway/Dockerfile
|
||||
args:
|
||||
VITE_TELEGRAM_BOT_ID: ${VITE_TELEGRAM_BOT_ID:-}
|
||||
VITE_TELEGRAM_LINK: ${VITE_TELEGRAM_LINK:-}
|
||||
VITE_GATEWAY_URL: ${VITE_GATEWAY_URL:-}
|
||||
restart: unless-stopped
|
||||
depends_on: [backend]
|
||||
environment:
|
||||
GATEWAY_HTTP_ADDR: ":8081"
|
||||
GATEWAY_BACKEND_HTTP_URL: http://backend:8080
|
||||
GATEWAY_BACKEND_GRPC_ADDR: backend:9090
|
||||
GATEWAY_CONNECTOR_ADDR: telegram:9091
|
||||
GATEWAY_DEFAULT_SUPPORTED_LANGUAGES: ${GATEWAY_DEFAULT_SUPPORTED_LANGUAGES:-en,ru}
|
||||
GATEWAY_LOG_LEVEL: ${LOG_LEVEL:-info}
|
||||
GATEWAY_SERVICE_NAME: scrabble-gateway
|
||||
GATEWAY_OTEL_TRACES_EXPORTER: otlp
|
||||
GATEWAY_OTEL_METRICS_EXPORTER: otlp
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
||||
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||
# GATEWAY_ADMIN_* intentionally unset: in the deployed contour the front
|
||||
# caddy owns the /_gm Basic-Auth and routes /_gm to the backend directly.
|
||||
networks: [internal]
|
||||
|
||||
# --- Telegram connector (egress via the VPN sidecar) -----------------------
|
||||
vpn:
|
||||
container_name: scrabble-telegram-vpn
|
||||
image: docker.iliadenisov.ru/developer/amneziawg-sidecar:latest
|
||||
restart: unless-stopped
|
||||
privileged: true
|
||||
environment:
|
||||
AWG_CONF: ${AWG_CONF:?set AWG_CONF}
|
||||
networks:
|
||||
internal:
|
||||
aliases: [telegram]
|
||||
|
||||
telegram:
|
||||
container_name: scrabble-telegram
|
||||
image: scrabble-telegram:latest
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: platform/telegram/Dockerfile
|
||||
restart: unless-stopped
|
||||
depends_on: [vpn]
|
||||
network_mode: "service:vpn"
|
||||
environment:
|
||||
# The bot tokens live ONLY in this container (ARCHITECTURE.md §12). At least
|
||||
# one token is required (the connector validates this at boot).
|
||||
TELEGRAM_BOT_TOKEN_EN: ${TELEGRAM_BOT_TOKEN_EN:-}
|
||||
TELEGRAM_BOT_TOKEN_RU: ${TELEGRAM_BOT_TOKEN_RU:-}
|
||||
TELEGRAM_GAME_CHANNEL_ID_EN: ${TELEGRAM_GAME_CHANNEL_ID_EN:-}
|
||||
TELEGRAM_GAME_CHANNEL_ID_RU: ${TELEGRAM_GAME_CHANNEL_ID_RU:-}
|
||||
TELEGRAM_MINIAPP_URL: ${TELEGRAM_MINIAPP_URL:?set TELEGRAM_MINIAPP_URL}
|
||||
TELEGRAM_GRPC_ADDR: ":9091"
|
||||
TELEGRAM_TEST_ENV: ${TELEGRAM_TEST_ENV:-false}
|
||||
TELEGRAM_API_BASE_URL: ${TELEGRAM_API_BASE_URL:-}
|
||||
TELEGRAM_LOG_LEVEL: ${LOG_LEVEL:-info}
|
||||
TELEGRAM_SERVICE_NAME: scrabble-telegram
|
||||
TELEGRAM_OTEL_TRACES_EXPORTER: otlp
|
||||
TELEGRAM_OTEL_METRICS_EXPORTER: otlp
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
||||
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||
|
||||
# --- Edge reverse proxy (single /_gm Basic-Auth; SPA + Connect -> gateway) --
|
||||
caddy:
|
||||
container_name: scrabble-caddy
|
||||
image: caddy:2-alpine
|
||||
restart: unless-stopped
|
||||
depends_on: [gateway, backend, grafana]
|
||||
environment:
|
||||
# Test: ":80" (host caddy terminates TLS). Prod: a domain for own ACME.
|
||||
CADDY_SITE_ADDRESS: ${CADDY_SITE_ADDRESS:-:80}
|
||||
GM_BASICAUTH_USER: ${GM_BASICAUTH_USER:-gm}
|
||||
GM_BASICAUTH_HASH: ${GM_BASICAUTH_HASH:?set GM_BASICAUTH_HASH}
|
||||
volumes:
|
||||
- ./caddy/Caddyfile:/etc/caddy/Caddyfile:ro
|
||||
- caddy-data:/data
|
||||
networks:
|
||||
internal: {}
|
||||
edge:
|
||||
aliases: [scrabble]
|
||||
|
||||
# --- Observability ---------------------------------------------------------
|
||||
otelcol:
|
||||
container_name: scrabble-otelcol
|
||||
image: otel/opentelemetry-collector-contrib:0.119.0
|
||||
restart: unless-stopped
|
||||
command: ["--config=/etc/otelcol/config.yaml"]
|
||||
volumes:
|
||||
- ./otelcol/config.yaml:/etc/otelcol/config.yaml:ro
|
||||
networks: [internal]
|
||||
|
||||
prometheus:
|
||||
container_name: scrabble-prometheus
|
||||
image: prom/prometheus:v2.55.1
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- --config.file=/etc/prometheus/prometheus.yml
|
||||
- --storage.tsdb.retention.time=15d
|
||||
volumes:
|
||||
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
- prometheus-data:/prometheus
|
||||
networks: [internal]
|
||||
|
||||
tempo:
|
||||
container_name: scrabble-tempo
|
||||
image: grafana/tempo:2.7.1
|
||||
restart: unless-stopped
|
||||
command: ["-config.file=/etc/tempo/tempo.yaml"]
|
||||
volumes:
|
||||
- ./tempo/tempo.yaml:/etc/tempo/tempo.yaml:ro
|
||||
- tempo-data:/var/tempo
|
||||
networks: [internal]
|
||||
|
||||
grafana:
|
||||
container_name: scrabble-grafana
|
||||
image: grafana/grafana:11.4.0
|
||||
restart: unless-stopped
|
||||
depends_on: [prometheus, tempo]
|
||||
environment:
|
||||
# Served under /_gm/grafana behind caddy's Basic-Auth; anonymous Admin so a
|
||||
# single shared login (caddy) gates it with no per-user Grafana accounts.
|
||||
GF_SERVER_ROOT_URL: ${GRAFANA_ROOT_URL:-/_gm/grafana/}
|
||||
GF_SERVER_SERVE_FROM_SUB_PATH: "true"
|
||||
GF_AUTH_ANONYMOUS_ENABLED: "true"
|
||||
GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
|
||||
GF_AUTH_DISABLE_LOGIN_FORM: "true"
|
||||
GF_AUTH_BASIC_ENABLED: "false"
|
||||
GF_USERS_ALLOW_SIGN_UP: "false"
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin}
|
||||
volumes:
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||||
- grafana-data:/var/lib/grafana
|
||||
networks: [internal]
|
||||
|
||||
networks:
|
||||
internal:
|
||||
name: scrabble-internal
|
||||
edge:
|
||||
external: true
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
caddy-data:
|
||||
prometheus-data:
|
||||
tempo-data:
|
||||
grafana-data:
|
||||
@@ -0,0 +1,39 @@
|
||||
{
|
||||
"uid": "scrabble-edge",
|
||||
"title": "Scrabble — Edge / UX",
|
||||
"tags": ["scrabble"],
|
||||
"timezone": "",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-6h", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Edge request rate by message type",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m])) by (message_type)", "legendFormat": "{{message_type}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Edge p95 latency",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(edge_request_duration_bucket[5m])) by (le))", "legendFormat": "p95" },
|
||||
{ "refId": "B", "expr": "histogram_quantile(0.50, sum(rate(edge_request_duration_bucket[5m])) by (le))", "legendFormat": "p50" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Edge requests by result",
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 },
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m])) by (result)", "legendFormat": "{{result}}" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
{
|
||||
"uid": "scrabble-game",
|
||||
"title": "Scrabble — Game domain",
|
||||
"tags": ["scrabble"],
|
||||
"timezone": "",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-24h", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Games started / abandoned (rate by variant)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "refId": "A", "expr": "sum(rate(games_started_total[15m])) by (variant)", "legendFormat": "started {{variant}}" },
|
||||
{ "refId": "B", "expr": "sum(rate(games_abandoned_total[15m])) by (variant)", "legendFormat": "abandoned {{variant}}" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Robot games finished (rate)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(robot_games_finished_total[15m]))", "legendFormat": "robot games" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Live games in cache (by variant)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(game_cache_active) by (variant)", "legendFormat": "{{variant}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Chat messages (rate by kind)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(chat_messages_total[15m])) by (kind)", "legendFormat": "{{kind}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Journal replay p95 (by variant)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
|
||||
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(game_replay_duration_bucket[5m])) by (le, variant))", "legendFormat": "{{variant}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Move validate p95 (by variant)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
|
||||
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(game_move_validate_duration_bucket[5m])) by (le, variant))", "legendFormat": "{{variant}}" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
{
|
||||
"uid": "scrabble-overview",
|
||||
"title": "Scrabble — Service overview",
|
||||
"tags": ["scrabble"],
|
||||
"timezone": "",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-6h", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Active users (24h)",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 0, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "max(active_users{window=\"24h\"})" }]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Active users (7d)",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 6, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "max(active_users{window=\"7d\"})" }]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Edge requests/s",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 12, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m]))" }]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Edge error ratio",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 18, "y": 0 },
|
||||
"fieldConfig": { "defaults": { "unit": "percentunit" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count{result!=\"ok\"}[5m])) / clamp_min(sum(rate(edge_request_duration_count[5m])), 1)" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Goroutines by service",
|
||||
"description": "OTel Go runtime metric; verify the exact name against live Prometheus if empty (go_goroutine_count / process_runtime_go_goroutines depending on the contrib runtime version).",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "go_goroutine_count", "legendFormat": "{{service_name}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Heap memory used by service",
|
||||
"description": "OTel Go runtime metric (best-effort name go_memory_used); verify against live Prometheus if empty.",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 },
|
||||
"fieldConfig": { "defaults": { "unit": "bytes" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(go_memory_used) by (service_name)", "legendFormat": "{{service_name}}" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"uid": "scrabble-users",
|
||||
"title": "Scrabble — Users",
|
||||
"tags": ["scrabble"],
|
||||
"timezone": "",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-7d", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Active users (in-memory, single gateway)",
|
||||
"description": "Distinct accounts with an authenticated action within the window. Resets on gateway restart; correct for a single instance (MVP).",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "max(active_users) by (window)", "legendFormat": "{{window}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "New accounts (rate by kind)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(accounts_created_total[1h])) by (kind)", "legendFormat": "{{kind}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "New accounts (cumulative by kind)",
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(accounts_created_total) by (kind)", "legendFormat": "{{kind}}" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
# Loads the committed dashboard JSON from /var/lib/grafana/dashboards (mounted
|
||||
# read-only from deploy/grafana/dashboards).
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: scrabble
|
||||
orgId: 1
|
||||
folder: Scrabble
|
||||
type: file
|
||||
disableDeletion: false
|
||||
editable: true
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
foldersFromFilesStructure: false
|
||||
@@ -0,0 +1,16 @@
|
||||
# Grafana datasources for the Scrabble contour, provisioned at startup. Metrics
|
||||
# come from Prometheus (scraping the collector) and traces from Tempo.
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
- name: Tempo
|
||||
type: tempo
|
||||
uid: tempo
|
||||
access: proxy
|
||||
url: http://tempo:3200
|
||||
@@ -0,0 +1,38 @@
|
||||
# OpenTelemetry Collector for the Scrabble contour. Receives OTLP/gRPC from the
|
||||
# three services (backend, gateway, connector — pkg/telemetry exports OTLP only),
|
||||
# fans metrics out to a Prometheus scrape endpoint and traces to Tempo.
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
|
||||
processors:
|
||||
batch: {}
|
||||
|
||||
exporters:
|
||||
# Exposes the collected metrics for Prometheus to scrape (otelcol:9464/metrics).
|
||||
# add_metric_suffixes:false keeps the instrument names verbatim (no _seconds /
|
||||
# _total unit/type suffixes) so the dashboards' PromQL matches the names defined
|
||||
# in code; resource_to_telemetry_conversion promotes service.name to a label.
|
||||
prometheus:
|
||||
endpoint: 0.0.0.0:9464
|
||||
add_metric_suffixes: false
|
||||
resource_to_telemetry_conversion:
|
||||
enabled: true
|
||||
# Forwards traces to Tempo's OTLP ingest.
|
||||
otlp/tempo:
|
||||
endpoint: tempo:4317
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [otlp/tempo]
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [prometheus]
|
||||
@@ -0,0 +1,14 @@
|
||||
# Prometheus scrape config for the Scrabble contour. The OTel Collector exposes
|
||||
# every service's metrics on its prometheus exporter; Prometheus scrapes that one
|
||||
# endpoint. Retention (15d) is set on the command line in docker-compose.yml.
|
||||
global:
|
||||
scrape_interval: 30s
|
||||
evaluation_interval: 30s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: otelcol
|
||||
static_configs:
|
||||
- targets: ["otelcol:9464"]
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets: ["localhost:9090"]
|
||||
@@ -0,0 +1,26 @@
|
||||
# Tempo for the Scrabble contour: single-binary, local filesystem storage, OTLP
|
||||
# ingest from the collector, 72h block retention.
|
||||
server:
|
||||
http_listen_port: 3200
|
||||
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
|
||||
ingester:
|
||||
max_block_duration: 5m
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: 72h
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: local
|
||||
local:
|
||||
path: /var/tempo/blocks
|
||||
wal:
|
||||
path: /var/tempo/wal
|
||||
Reference in New Issue
Block a user