Stage 16: deploy infra & test contour #17

Merged
developer merged 4 commits from feature/stage-16-deploy-test-contour into development 2026-06-05 15:00:46 +00:00
38 changed files with 1587 additions and 331 deletions
+210
View File
@@ -0,0 +1,210 @@
name: CI
# Single gated pipeline for the test contour (Stage 16). Gitea cannot express
# cross-workflow `needs`, so the full test suite and the auto test-deploy live in
# one workflow.
#
# Branch model (CLAUDE.md): feature branches are cut from `development`; a commit
# to a feature branch triggers nothing. The pipeline runs on a PR into
# `development` or `master` (the full test suite — the merge gate) and on a push
# to `development` (after a merge). The deploy job runs only for `development`
# (PR or merge), so a PR into `master` is test-only; the prod deploy is a manual
# workflow (Stage 18).
#
# Console output is kept plain (NO_COLOR + `docker compose --ansi never` +
# `--progress plain`) so the Gitea logs stay readable.
on:
pull_request:
branches: [development, master]
push:
branches: [development]
jobs:
unit:
runs-on: ubuntu-latest
defaults:
run:
shell: bash
env:
# The engine consumes the published scrabble-solver module from this Gitea;
# GOPRIVATE makes go fetch it directly (skipping the public proxy/checksum DB).
GOPRIVATE: gitea.iliadenisov.ru/*
DICT_VERSION: v1.0.0
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Fetch dictionary DAWGs
run: |
mkdir -p "${GITHUB_WORKSPACE}/dawg"
curl -fsSL -o /tmp/dawg.tar.gz "https://gitea.iliadenisov.ru/developer/scrabble-dictionary/releases/download/${DICT_VERSION}/scrabble-dawg-${DICT_VERSION}.tar.gz"
tar xzf /tmp/dawg.tar.gz -C "${GITHUB_WORKSPACE}/dawg"
ls -la "${GITHUB_WORKSPACE}/dawg"
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: go.work
cache: true
- name: gofmt
run: |
unformatted="$(gofmt -l .)"
if [ -n "$unformatted" ]; then
echo "gofmt needed on:"; echo "$unformatted"; exit 1
fi
- name: vet
run: go vet ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
- name: build
run: go build ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
- name: test
env:
BACKEND_DICT_DIR: ${{ github.workspace }}/dawg
run: go test -count=1 ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
integration:
runs-on: ubuntu-latest
defaults:
run:
shell: bash
env:
# Ryuk (testcontainers' reaper) does not start cleanly on every runner; the
# suite's TestMain terminates its own container, so disable it.
TESTCONTAINERS_RYUK_DISABLED: "true"
GOPRIVATE: gitea.iliadenisov.ru/*
DICT_VERSION: v1.0.0
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Fetch dictionary DAWGs
run: |
mkdir -p "${GITHUB_WORKSPACE}/dawg"
curl -fsSL -o /tmp/dawg.tar.gz "https://gitea.iliadenisov.ru/developer/scrabble-dictionary/releases/download/${DICT_VERSION}/scrabble-dawg-${DICT_VERSION}.tar.gz"
tar xzf /tmp/dawg.tar.gz -C "${GITHUB_WORKSPACE}/dawg"
ls -la "${GITHUB_WORKSPACE}/dawg"
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: go.work
cache: true
- name: Integration tests
# -count=1 disables the cache; -p=1 -parallel=1 keeps the container-backed
# tests serial; the 15-minute timeout bounds a stuck container pull.
env:
BACKEND_DICT_DIR: ${{ github.workspace }}/dawg
run: go test -tags=integration -count=1 -p=1 -parallel=1 -timeout=15m ./backend/...
ui:
runs-on: ubuntu-latest
defaults:
run:
shell: bash
working-directory: ui
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Node
uses: actions/setup-node@v4
with:
node-version: 22
- name: Install pnpm
run: npm install -g pnpm@11.0.9
- name: Install deps
run: pnpm install --frozen-lockfile
- name: Type-check
run: pnpm run check
- name: Unit tests
run: pnpm run test:unit
- name: Build
run: pnpm run build
- name: Bundle-size budget
run: node scripts/bundle-size.mjs
- name: Install Playwright browsers
run: pnpm exec playwright install chromium webkit
timeout-minutes: 5
- name: E2E smoke (mock)
run: pnpm run test:e2e
timeout-minutes: 5
deploy:
# Auto test-deploy on a PR into development and on the push that merges it.
# A PR into master is test-only (this job is skipped); prod deploy is manual.
needs: [unit, integration, ui]
if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/development') || (github.event_name == 'pull_request' && github.base_ref == 'development') }}
runs-on: ubuntu-latest
defaults:
run:
shell: bash
env:
NO_COLOR: "1"
DOCKER_CLI_HINTS: "false"
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Build and (re)deploy the test contour
working-directory: deploy
env:
# Sensitive values -> secrets; non-sensitive -> variables. The compose
# interpolates these unprefixed names (see deploy/.env.example).
POSTGRES_PASSWORD: ${{ secrets.TEST_POSTGRES_PASSWORD }}
AWG_CONF: ${{ secrets.TEST_AWG_CONF }}
GM_BASICAUTH_HASH: ${{ secrets.TEST_GM_BASICAUTH_HASH }}
GRAFANA_ADMIN_PASSWORD: ${{ secrets.TEST_GRAFANA_ADMIN_PASSWORD }}
TELEGRAM_BOT_TOKEN_EN: ${{ secrets.TEST_TELEGRAM_BOT_TOKEN_EN }}
TELEGRAM_BOT_TOKEN_RU: ${{ secrets.TEST_TELEGRAM_BOT_TOKEN_RU }}
GM_BASICAUTH_USER: ${{ vars.TEST_GM_BASICAUTH_USER }}
GRAFANA_ROOT_URL: ${{ vars.TEST_GRAFANA_ROOT_URL }}
CADDY_SITE_ADDRESS: ${{ vars.TEST_CADDY_SITE_ADDRESS }}
TELEGRAM_MINIAPP_URL: ${{ vars.TEST_TELEGRAM_MINIAPP_URL }}
TELEGRAM_GAME_CHANNEL_ID_EN: ${{ vars.TEST_TELEGRAM_GAME_CHANNEL_ID_EN }}
TELEGRAM_GAME_CHANNEL_ID_RU: ${{ vars.TEST_TELEGRAM_GAME_CHANNEL_ID_RU }}
# The test contour always uses Telegram's test environment — pinned here,
# not an operator variable. Stage 18's prod workflow leaves it false.
TELEGRAM_TEST_ENV: "true"
VITE_TELEGRAM_BOT_ID: ${{ vars.TEST_VITE_TELEGRAM_BOT_ID }}
VITE_TELEGRAM_LINK: ${{ vars.TEST_VITE_TELEGRAM_LINK }}
VITE_GATEWAY_URL: ${{ vars.TEST_VITE_GATEWAY_URL }}
GATEWAY_DEFAULT_SUPPORTED_LANGUAGES: ${{ vars.TEST_GATEWAY_DEFAULT_SUPPORTED_LANGUAGES }}
# Unset vars render empty -> the compose ":-" defaults apply.
POSTGRES_DB: ${{ vars.TEST_POSTGRES_DB }}
POSTGRES_USER: ${{ vars.TEST_POSTGRES_USER }}
DICT_VERSION: ${{ vars.TEST_DICT_VERSION }}
LOG_LEVEL: ${{ vars.TEST_LOG_LEVEL }}
run: |
docker compose --ansi never build --progress plain
docker compose --ansi never up -d --remove-orphans
- name: Probe the gateway through caddy
run: |
set -u
for i in $(seq 1 20); do
if docker run --rm --network edge alpine:3.20 wget -q -T 5 -O /dev/null http://scrabble/; then
echo "healthy: GET http://scrabble/"
exit 0
fi
sleep 3
done
echo "probe failed; recent gateway logs:"
docker logs --tail 50 scrabble-gateway || true
exit 1
- name: Prune dangling images
if: always()
run: docker image prune -f
-81
View File
@@ -1,81 +0,0 @@
name: Tests · Go
# Fast unit tests for the Go side of the monorepo. Runs on every push and pull
# request whose path filter matches a Go source directory. The module list
# grows as new go.work modules (gateway, pkg/*, platform/*) are added by later
# stages.
on:
push:
paths:
- 'backend/**'
- 'gateway/**'
- 'pkg/**'
- 'platform/**'
- 'go.work'
- 'go.work.sum'
- '.gitea/workflows/go-unit.yaml'
- '!**/*.md'
pull_request:
paths:
- 'backend/**'
- 'gateway/**'
- 'pkg/**'
- 'platform/**'
- 'go.work'
- 'go.work.sum'
- '.gitea/workflows/go-unit.yaml'
- '!**/*.md'
jobs:
test:
runs-on: ubuntu-latest
defaults:
run:
shell: bash
env:
# The engine consumes the published scrabble-solver module from this Gitea;
# GOPRIVATE makes go fetch it directly (skipping the public proxy/checksum DB).
# DICT_VERSION selects the dictionary DAWG release the engine tests load.
GOPRIVATE: gitea.iliadenisov.ru/*
DICT_VERSION: v1.0.0
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Fetch dictionary DAWGs
# The DAWGs moved to the scrabble-dictionary repo (the solver is now a
# versioned module pinned in backend/go.mod, fetched via GOPRIVATE — no
# sibling clone). They ship as a release artifact, one semver per set.
run: |
mkdir -p "${GITHUB_WORKSPACE}/dawg"
curl -fsSL -o /tmp/dawg.tar.gz "https://gitea.iliadenisov.ru/developer/scrabble-dictionary/releases/download/${DICT_VERSION}/scrabble-dawg-${DICT_VERSION}.tar.gz"
tar xzf /tmp/dawg.tar.gz -C "${GITHUB_WORKSPACE}/dawg"
ls -la "${GITHUB_WORKSPACE}/dawg"
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: go.work
cache: true
- name: gofmt
run: |
unformatted="$(gofmt -l .)"
if [ -n "$unformatted" ]; then
echo "gofmt needed on:"; echo "$unformatted"; exit 1
fi
- name: vet
run: go vet ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
- name: build
run: go build ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
- name: test
# -count=1 disables the test cache so a green run never depends on a
# previous runner's cached state. BACKEND_DICT_DIR points the engine
# tests at the DAWGs fetched from the dictionary release.
env:
BACKEND_DICT_DIR: ${{ github.workspace }}/dawg
run: go test -count=1 ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
-71
View File
@@ -1,71 +0,0 @@
name: Tests · Integration
# Postgres-backed integration tests for the Go backend, gated behind the
# `integration` build tag. They spin a throwaway postgres:17-alpine container via
# testcontainers-go, which reaches the host Docker daemon through the socket the
# Gitea runner exposes. Slower than the unit job (go-unit.yaml); run serially
# (-p=1) with Ryuk disabled — TestMain terminates its own container. The module
# list grows as new go.work modules are added by later stages.
on:
push:
paths:
- 'backend/**'
- 'pkg/**'
- 'go.work'
- 'go.work.sum'
- '.gitea/workflows/integration.yaml'
- '!**/*.md'
pull_request:
paths:
- 'backend/**'
- 'pkg/**'
- 'go.work'
- 'go.work.sum'
- '.gitea/workflows/integration.yaml'
- '!**/*.md'
jobs:
integration:
runs-on: ubuntu-latest
defaults:
run:
shell: bash
env:
# Ryuk (testcontainers' reaper) does not start cleanly on every runner;
# the suite's TestMain terminates its own container, so disable it.
TESTCONTAINERS_RYUK_DISABLED: "true"
# The engine consumes the published scrabble-solver module from this Gitea
# (GOPRIVATE -> direct fetch, skipping the public proxy/checksum DB);
# DICT_VERSION selects the dictionary DAWG release the engine tests load.
GOPRIVATE: gitea.iliadenisov.ru/*
DICT_VERSION: v1.0.0
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Fetch dictionary DAWGs
# The DAWGs moved to the scrabble-dictionary repo (the solver is now a
# versioned module pinned in backend/go.mod, fetched via GOPRIVATE — no
# sibling clone). They ship as a release artifact; the engine's untagged
# tests (compiled here too) load them.
run: |
mkdir -p "${GITHUB_WORKSPACE}/dawg"
curl -fsSL -o /tmp/dawg.tar.gz "https://gitea.iliadenisov.ru/developer/scrabble-dictionary/releases/download/${DICT_VERSION}/scrabble-dawg-${DICT_VERSION}.tar.gz"
tar xzf /tmp/dawg.tar.gz -C "${GITHUB_WORKSPACE}/dawg"
ls -la "${GITHUB_WORKSPACE}/dawg"
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: go.work
cache: true
- name: Integration tests
# -count=1 disables the test cache; -p=1 -parallel=1 keeps the
# container-backed tests serial; the 15-minute timeout bounds a stuck
# container pull. The engine package's (untagged) tests also compile and
# run here, so BACKEND_DICT_DIR points them at the DAWGs from the release.
env:
BACKEND_DICT_DIR: ${{ github.workspace }}/dawg
run: go test -tags=integration -count=1 -p=1 -parallel=1 -timeout=15m ./backend/...
-67
View File
@@ -1,67 +0,0 @@
name: Tests · UI
# Hermetic UI checks: type-check, Vitest unit tests, production build with a
# bundle-size budget, and a Playwright smoke (Chromium + WebKit) against the in-memory
# mock transport (no backend/gateway/Postgres). The committed src/gen/ codegen is built, not
# regenerated (the same model as the Go committed jet/fbs output).
on:
push:
paths:
- 'ui/**'
- '.gitea/workflows/ui-test.yaml'
pull_request:
paths:
- 'ui/**'
- '.gitea/workflows/ui-test.yaml'
jobs:
test:
runs-on: ubuntu-latest
defaults:
run:
shell: bash
working-directory: ui
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Node
uses: actions/setup-node@v4
with:
node-version: 22
- name: Install pnpm
run: npm install -g pnpm@11.0.9
- name: Install deps
run: pnpm install --frozen-lockfile
- name: Type-check
run: pnpm run check
- name: Unit tests
run: pnpm run test:unit
- name: Build
run: pnpm run build
- name: Bundle-size budget
run: node scripts/bundle-size.mjs
# The Playwright system libraries are provisioned once on the runner host
# (`sudo npx playwright@<version> install-deps chromium`), so the job needs no
# apt and no sudo: it only downloads the browser binaries into the runner cache
# (persisted by the host executor) and runs the suite. WebKit's Debian build
# bundles most of its own libraries and runs headless without extra host deps; if
# a runner ever lacks one, provision it once on the host with
# `sudo npx playwright install-deps webkit`. The timeouts guard against a future
# hang. Keep this in lockstep with @playwright/test in package.json — re-run
# install-deps on the host after a major bump.
- name: Install Playwright browsers
run: pnpm exec playwright install chromium webkit
timeout-minutes: 5
- name: E2E smoke (mock)
run: pnpm run test:e2e
timeout-minutes: 5
+23 -5
View File
@@ -49,9 +49,20 @@ conversation memory — is the source of continuity. Keep it that way.
## Branching & CI
- Trunk is **`master`** (owner preference). From Stage 1, work on `feature/*`
and merge via PR with a green CI gate. The genesis commit (Stage 0) lands on
`master` by necessity (an empty branch has nothing to PR into).
- **Two long-lived branches** (Stage 16 onward): **`development`** is the
integration branch; **`master`** is the production trunk. Cut `feature/*`
branches **from `development`** and PR them back into it. (Stages 015 used
`master` as the trunk with `feature/* → master`; the genesis Stage 0 commit is
on `master` by necessity.)
- A commit to a `feature/*` branch triggers **nothing**. The single workflow
`.gitea/workflows/ci.yaml` runs the full suite (`unit` + `integration` + `ui`)
on a PR into `development` or `master`, and the gated **`deploy`** job auto-rolls
the **test contour** on a PR into — or a push to — `development`
(`docker compose up -d --build` on the runner host + a `GET /` probe). A PR into
`master` is test-only.
- Merge `development → master` only when CI is green; the **prod** deploy is then a
**manual** workflow (Stage 18), never automatic. Secrets/variables are prefixed
`TEST_` / `PROD_` per contour (Gitea 1.26 has no deployment environments).
- After any push, watch the run to green before declaring a stage done — use the
ready-made watcher, never an inline poll loop:
`python3 ~/.claude/bin/gitea-ci-watch.py` (background). It reads `$GITEA_URL`
@@ -113,6 +124,8 @@ backend/ # module scrabble/backend
docs/ .gitea/workflows/ PLAN.md CLAUDE.md README.md
gateway/ ui/ pkg/ # added by their stages
platform/telegram/ # Telegram connector side-service (Stage 9): bot + gRPC API
backend/Dockerfile gateway/Dockerfile platform/telegram/Dockerfile # multi-stage distroless (Stage 16)
deploy/ # docker-compose + caddy + otelcol/prometheus/tempo/grafana (Stage 16)
```
## Build & test
@@ -127,9 +140,14 @@ go run ./backend/cmd/backend # /healthz, /readyz on :8080
cd ui && pnpm install && pnpm check && pnpm test:unit && pnpm build # the UI (Stage 7+)
pnpm start # UI mock mode: lobby -> game, no backend
docker build -f backend/Dockerfile -t scrabble-backend . # images (Stage 16); gateway embeds the UI
docker build -f gateway/Dockerfile -t scrabble-gateway .
docker compose -f deploy/docker-compose.yml config # validate the full contour
```
The `ui` module is a Node project (pnpm), **not** in `go.work`; its CI is
`.gitea/workflows/ui-test.yaml`. Committed edge codegen under `ui/src/gen/`
The `ui` module is a Node project (pnpm), **not** in `go.work`; it is the `ui` job
of the single `.gitea/workflows/ci.yaml` (Stage 16 folded the former go-unit /
integration / ui-test workflows into it). Committed edge codegen under `ui/src/gen/`
(regenerate with `pnpm codegen`); pnpm build-script approval lives in
`ui/pnpm-workspace.yaml` (`allowBuilds: esbuild: true`).
+92 -13
View File
@@ -49,8 +49,9 @@ independent (see ARCHITECTURE §9.1).
| 13 | Alphabet on the wire (UI alphabet-agnostic) | **done** |
| 14 | Solver & dictionary split (publish solver + scrabble-dictionary repo/artifact) | **done** |
| 15 | Dual Telegram bots & language-gated variants | **done** |
| 16 | Deploy infra & test contour (Dockerfiles, gateway static UI, compose, observability) | todo |
| 17 | Prod contour deploy (SSH export/import, manual after merge) | todo |
| 16 | Deploy infra & test contour (Dockerfiles, gateway static UI, compose, observability) | **done** |
| 17 | Test-contour verification & defect fixes | todo |
| 18 | Prod contour deploy (SSH export/import, manual after merge) | todo |
Scaffolding is incremental: `go.work` lists only existing modules; each stage
adds the modules it needs.
@@ -244,7 +245,7 @@ indices; the premiums.ts parity-test rework.
### Stage 14 — Solver & dictionary split (TODO-1 + TODO-2)
Re-scoped from the original "CI & deploy": that was several sessions of work, so the
deploy + observability + the two-bots idea were split into **Stages 1517** below and this
deploy + observability + the two-bots idea were split into **Stages 1518** below and this
stage took only the dependency/artifact split that everything else builds on. Scope: publish
`scrabble-solver` as a versioned Gitea module and split the dictionary build into a new
`scrabble-dictionary` repo delivering a **release artifact**, then make `scrabble-game` consume
@@ -279,7 +280,7 @@ back to `preferred_language`). Non-Telegram logins (web/email/guest) carry the g
(`GATEWAY_DEFAULT_SUPPORTED_LANGUAGES`, all variants). Admin broadcasts (`SendToUser`/`SendToGameChannel`)
pick the bot by an **operator-chosen** language in the console — unrelated to `ValidateInitData`.
### Stage 16 — Deploy infra & test contour
### Stage 16 — Deploy infra & test contour *(done)*
Scope: the deploy machinery + the **test contour** (the bulk of the original Stage 14). Backend +
gateway **Dockerfiles** (multi-stage distroless, mirroring the Stage 9 connector image); the gateway
gains **static UI serving****embedded** via `go:embed` (a node build stage in the gateway image),
@@ -297,15 +298,37 @@ h2c wrap — `/` + `/telegram/` mounts; a committed `dist` placeholder so `go bu
build); Postgres healthcheck/volume; whether the connector-scoped compose is retired for the root one;
collector/Tempo/Prometheus retention.
### Stage 17 — Prod contour deploy
### Stage 17 — Test-contour verification & defect fixes
Scope: exercise the deployed **test contour** end-to-end and fix the defects it surfaces — the
"does it actually work in the contour" pass before prod. Bring up the `development` deploy, then
verify each piece against a real run: the gateway serves the SPA at `/` and `/telegram/`; the admin
console and Grafana sit behind the single `/_gm` Basic-Auth; the Telegram **bots** start (test
environment) and the Mini App launches/authenticates; a game can be created and played through (web
+ Mini App); the **observability** stack receives data (Prometheus targets up, the dashboards
populate incl. `accounts_created_total`/`active_users`, traces reach Tempo); the out-of-app push
works. Fix the defects found and harden where the run exposes gaps — notably a CI **connector
liveness check** (the deploy probe only hits the gateway today, so a crash-looping connector is
invisible — that is how the Stage 16 test-env miss went unnoticed) and **path-conditional CI** (skip
the jobs whose code did not change, behind a single always-running gate job so branch-protection
required checks stay satisfiable — a skipped required check otherwise blocks the merge).
Open details (interview at start): the verification checklist + pass bar; which discovered defects
are in-scope vs deferred; the changed-paths design + the aggregate gate job; the connector
liveness-check grace period (the VPN sidecar handshake lets the connector restart a few times before
it settles).
### Stage 18 — Prod contour deploy
Scope: the **production contour** on a remote host over SSH. Deploy by **container export/import**
(`docker save``scp`/ssh → `docker load``docker compose up` on the remote), the SSH key + host IP
in Gitea secrets; **strictly manual** (`workflow_dispatch`) after a feature branch is merged to
`master`. Two-contour config uses **`TEST_`/`PROD_` secret/variable prefixes** — Gitea 1.26 has no
deployment environments (verified: the `environments` API 404s), so a flat prefixed namespace is the
convention.
Open details (re-interview): export/import vs a registry trade-off; prod domain/TLS at the remote
caddy; prod VPN; rollback.
in Gitea secrets; **strictly manual** (`workflow_dispatch`) after `development` is merged to `master`
(the Stage 16 branch model: `feature/* → development → master`, merge gated green). Two-contour config
uses **`TEST_`/`PROD_` secret/variable prefixes** — Gitea 1.26 has no deployment environments (verified:
the `environments` API 404s), so a flat prefixed namespace is the convention.
Reuses the Stage 16 `deploy/docker-compose.yml` as-is, mapping the **`PROD_`** set onto the same
unprefixed compose vars. **No host caddy on prod**, so the contour's own caddy terminates TLS — set
`CADDY_SITE_ADDRESS` to the prod domain so caddy does its own ACME (the Caddyfile is already
parameterised for this; the test contour leaves it `:80` behind the host caddy).
Open details (re-interview): export/import vs a registry trade-off; prod domain/cert source (ACME vs a
provided cert) at the contour caddy; prod VPN; rollback.
## Refinements logged during implementation
@@ -901,7 +924,7 @@ caddy; prod VPN; rollback.
CI & deploy (TODO-1, TODO-2, the collector + dashboards). The latter two were written
into the plan now as the agreed baseline (each still re-interviews at its own start).
(Stage 14 was itself later re-scoped to the solver/dictionary split alone; deploy +
observability + the dual-bot idea split into Stages 1517.)
observability + the dual-bot idea split into Stages 1518.)
- **Shared telemetry** (interview): a new `pkg/telemetry` owns the OTel provider
bootstrap (exporter selection, W3C propagators, shutdown, Go runtime metrics); the
backend `internal/telemetry` is now a thin facade over it (keeping its gin middleware),
@@ -981,7 +1004,7 @@ caddy; prod VPN; rollback.
- **Stage 14** (interview + implementation, re-scoped + discharges TODO-1/TODO-2):
- **Re-scoped to the split** (interview): the original "CI & deploy" was several sessions of work,
so it was cut to the **solver/dictionary split** (the dependency foundation) and the deploy +
observability + the dual-bot idea were written into the plan as new **Stages 1517**. The deploy
observability + the dual-bot idea were written into the plan as new **Stages 1518**. The deploy
decisions taken at the interview are recorded there (embed the UI in the gateway via `go:embed`;
full Collector+Prometheus+Tempo+Grafana stack; **two contours** — test = auto on feature-branch
push on the local host, prod = manual SSH `docker save`/`load` after merge; `TEST_`/`PROD_` secret
@@ -1036,6 +1059,62 @@ caddy; prod VPN; rollback.
per-language vars (the full deploy stack is Stage 16). No CI workflow change (the Go and UI workflows
already span the touched modules).
- **Stage 16** (interview + implementation):
- **Branch model reshaped** (interview, supersedes the Stage 0 `feature/* → master`): a long-lived
**`development`** integration branch + **`master`** as the prod trunk. Feature branches are cut from
`development`; a feature-branch commit triggers nothing. A single consolidated
`.gitea/workflows/ci.yaml` (Gitea has no cross-workflow `needs`) runs `unit`+`integration`+`ui` on a PR
into `development`/`master` and a **gated `deploy`** job (`needs` the three) that auto-rolls the test
contour **on a PR into — or a push to — `development`** (owner's "и PR, и push"). A PR into `master` is
test-only; prod is the manual Stage 18. The former `go-unit`/`integration`/`ui-test` workflows were
folded in (no path filters — full CI on every PR, per the owner). Console kept plain (`NO_COLOR`,
`docker compose --ansi never`, `--progress plain`).
- **Gateway serves the UI** (interview, the §13 single-origin): a new `gateway/internal/webui` embeds
`dist` via `go:embed` (a committed placeholder index so `go build`/CI compile without a UI build) and
serves the SPA at `/` and `/telegram/` (a path-stripping SPA handler, index.html fallback for the hash
router), mounted in the edge mux **below** the h2c wrap; `/_gm` stays an explicit 404 when the local
admin proxy is off so the catch-all does not leak the shell. The `gateway/Dockerfile` node stage builds
the UI with the `VITE_*` build-args and copies it into the embed dir before `go build`.
- **Images** (interview): multi-stage distroless `backend/Dockerfile` (a DAWG stage `curl`s the
`scrabble-dawg` release pinned to `DICT_VERSION`, `GOPRIVATE` fetches the solver) and `gateway/Dockerfile`
(node UI stage + Go stage), both trimming `go.work` like `platform/telegram/Dockerfile`. Built and
verified locally.
- **Contour = caddy-fronted** (interview, "caddy всё равно нужен для https"): a new `caddy` service owns
a **single `/_gm` Basic-Auth** and routes `/_gm/grafana/*` → Grafana (anonymous-admin + sub-path, no
own accounts) and the rest of `/_gm/*` → the backend console; everything else → the gateway. This
**supersedes Stage 10's** gateway-fronts-`/_gm` model **in the deploy topology** (the gateway's own
`/_gm` proxy stays for a local non-caddy run). TLS: the **host caddy** terminates it for the test
contour and forwards to `scrabble:80`; the in-compose caddy is parameterised (`CADDY_SITE_ADDRESS`) to
own ACME on prod (Stage 18) where there is no host caddy.
- **Networks** (engineering): inter-service traffic on a private `internal` network (project-scoped DNS,
no name collisions on the shared `edge`); only caddy joins the external `edge` (alias `scrabble`). The
connector keeps its VPN sidecar (the only egress that needs the tunnel). The connector-scoped
`platform/telegram/deploy/docker-compose.yml` was **retired** (the root `deploy/docker-compose.yml`
supersedes it; the connector Dockerfile stays).
- **Observability stack** (interview): OTel Collector (OTLP/gRPC → a Prometheus scrape endpoint +
Tempo OTLP) + Prometheus (**15d**) + Tempo (**72h**) + Grafana (provisioned Prometheus+Tempo datasources
+ four dashboards: Service overview, Edge/UX, Game domain, Users; Traces via the Tempo datasource +
Explore, no fixed panels). The collector's prometheus exporter uses `add_metric_suffixes:false` +
`resource_to_telemetry_conversion` so the dashboards' PromQL matches the in-code metric names and carries
`service_name`. The three services export `otlp` in the contour (default stays `none`, so CI needs no
collector). Loki/logs were left out of scope (container stdout / zap JSON).
- **User metrics** (interview): a backend `accounts_created_total{kind}` counter (telegram/email/guest;
robots excluded — they are a provisioned pool, not users) via the Stage-12 `SetMetrics` no-op pattern,
and a gateway **in-memory** `active_users{window=24h,7d}` observable gauge (distinct authenticated edge
actors). The owner chose the in-memory gauge over a DB `last_seen_at` (overkill); its single-instance /
reset-on-restart limits are documented (a live gauge, not billing).
- **Owner actions before the contour is green** (surfaced, not blockers): set the **`TEST_`** Gitea
secrets/variables (see `deploy/.env.example`) and add a host-caddy route `<test domain> → scrabble:80`
on the runner host. CI bootstrap nuance: the first PR introducing `ci.yaml` may first deploy on the
post-merge push to `development` (depending on whether Gitea runs head/base workflows for a PR), after
which PR-time deploys work.
- **Telegram test environment** (post-deploy fix): the connector now selects Telegram's test env with the
library's native `tgbot.UseTestEnvironment()` (was a `token += "/test"` hack — functionally identical,
verified, but the option is idiomatic and now has a `bot` test asserting the `/bot<token>/test/getMe`
path). The test contour **pins `TELEGRAM_TEST_ENV=true` in `ci.yaml`** (the contour is the test
environment) rather than via a `TEST_`-prefixed variable — removing a confusing double-`TEST` operator
knob and the secret-vs-variable footgun; prod (Stage 18) leaves it `false`.
## Deferred TODOs (cross-stage)
- ~~**TODO-1 — publish & version the solver.**~~ **Done in Stage 14.** `scrabble-solver` is
+21
View File
@@ -80,3 +80,24 @@ pnpm dev # against a running gateway (Vite proxies the RPC path to :8081)
`pnpm check` (type-check), `pnpm test:unit` (Vitest), `pnpm test:e2e` (Playwright
smoke vs the mock), `pnpm build` (static bundle). Details — including the committed
edge codegen (`pnpm codegen`) — are in [`ui/README.md`](ui/README.md).
## Deploy (`deploy/`)
The full contour is [`deploy/docker-compose.yml`](deploy/docker-compose.yml):
`backend` + `gateway` (with the UI embedded via `go:embed`, baked in by its node
build stage) + Postgres + the Telegram connector (with a VPN sidecar) + an
observability stack (OTel Collector → Prometheus + Tempo → Grafana) + a front
**caddy** that owns a single `/_gm` Basic-Auth (admin console + Grafana). The Go
services build from multi-stage distroless `*/Dockerfile`.
```sh
docker build -f backend/Dockerfile -t scrabble-backend . # pulls the DAWG release artifact
docker build -f gateway/Dockerfile -t scrabble-gateway . # node stage builds + embeds the UI
docker compose -f deploy/docker-compose.yml config # validate (needs the TEST_/PROD_ env)
```
CI auto-deploys the **test contour** on a PR into — or push to — `development`
(`.gitea/workflows/ci.yaml`); the **prod contour** is a manual deploy after
`development → master` (Stage 18). Env reference: [`deploy/.env.example`](deploy/.env.example);
the topology and the two-contour model are in
[`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) §13.
+42
View File
@@ -0,0 +1,42 @@
# Multi-stage build for the backend service. Mirrors platform/telegram/Dockerfile:
# a golang-alpine builder yields a static binary shipped on distroless nonroot.
#
# The dictionary DAWGs are baked in from the scrabble-dictionary release artifact
# (Stage 14) — the same set the Go CI downloads — and BACKEND_DICT_DIR points the
# binary at them. The published solver module is fetched directly from Gitea
# (GOPRIVATE), so the build stage needs git and network.
#
# Build from the repository root so go.work, go.work.sum, pkg/ and backend/ are all
# in the Docker context:
# docker build -f backend/Dockerfile -t scrabble-backend .
# --- dictionary artifact -----------------------------------------------------
FROM alpine:3.20 AS dawg
ARG DICT_VERSION=v1.0.0
RUN apk add --no-cache curl tar
RUN mkdir -p /dawg \
&& curl -fsSL -o /tmp/dawg.tar.gz \
"https://gitea.iliadenisov.ru/developer/scrabble-dictionary/releases/download/${DICT_VERSION}/scrabble-dawg-${DICT_VERSION}.tar.gz" \
&& tar xzf /tmp/dawg.tar.gz -C /dawg
# --- build -------------------------------------------------------------------
FROM golang:1.26.3-alpine AS build
WORKDIR /src
# git: the published solver module is fetched from Gitea directly (GOPRIVATE).
RUN apk add --no-cache git
ENV GOPRIVATE=gitea.iliadenisov.ru/*
COPY go.work go.work.sum ./
COPY pkg ./pkg
COPY backend ./backend
# Reduce the workspace to what the backend needs: backend + pkg.
RUN go work edit -dropuse=./gateway -dropuse=./platform/telegram
RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -o /out/backend ./backend/cmd/backend
# --- runtime -----------------------------------------------------------------
FROM gcr.io/distroless/static-debian12:nonroot
COPY --from=build /out/backend /usr/local/bin/backend
COPY --from=dawg /dawg /opt/dawg
ENV BACKEND_DICT_DIR=/opt/dawg
ENTRYPOINT ["/usr/local/bin/backend"]
+1
View File
@@ -132,6 +132,7 @@ func run(ctx context.Context, cfg config.Config, logger *zap.Logger) error {
hub := notify.NewHub(0)
accounts := account.NewStore(db)
accounts.SetMetrics(tel.MeterProvider().Meter("scrabble/backend/account"))
games := game.NewService(game.NewStore(db), accounts, registry, cfg.Game, logger)
games.SetNotifier(hub)
games.SetMetrics(tel.MeterProvider().Meter("scrabble/backend/game"))
+11 -3
View File
@@ -93,12 +93,14 @@ type Identity struct {
// Store is the Postgres-backed query surface for accounts and identities.
type Store struct {
db *sql.DB
db *sql.DB
metrics *accountMetrics
}
// NewStore constructs a Store wrapping db.
// NewStore constructs a Store wrapping db. Metrics default to a no-op meter until
// SetMetrics installs the real one during startup wiring.
func NewStore(db *sql.DB) *Store {
return &Store{db: db}
return &Store{db: db, metrics: defaultAccountMetrics()}
}
// ProvisionByIdentity returns the account bound to (kind, externalID), creating
@@ -331,6 +333,11 @@ func (s *Store) create(ctx context.Context, kind, externalID string, seed provis
if err != nil {
return Account{}, fmt.Errorf("account: create for identity (%s, %s): %w", kind, externalID, err)
}
// Count genuinely new durable accounts; robots are a fixed provisioned pool,
// not users, so they are excluded.
if kind != KindRobot {
s.metrics.recordCreated(ctx, kind)
}
return created, nil
}
@@ -355,6 +362,7 @@ func (s *Store) ProvisionGuest(ctx context.Context) (Account, error) {
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
return Account{}, fmt.Errorf("account: provision guest: %w", err)
}
s.metrics.recordCreated(ctx, kindGuest)
return modelToAccount(row), nil
}
+53
View File
@@ -0,0 +1,53 @@
package account
import (
"context"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/noop"
)
// meterName scopes the account domain's OpenTelemetry instruments.
const meterName = "scrabble/backend/account"
// kindGuest labels guest accounts in accounts_created_total. Guests carry no
// identity, so they have no identity Kind; this is the metric label for them.
const kindGuest = "guest"
// accountMetrics holds the account domain's operational instruments. It defaults
// to no-ops (see defaultAccountMetrics); SetMetrics installs the real meter during
// startup wiring.
type accountMetrics struct {
created metric.Int64Counter
}
// defaultAccountMetrics returns instruments backed by a no-op meter.
func defaultAccountMetrics() *accountMetrics {
return newAccountMetrics(noop.NewMeterProvider().Meter(meterName))
}
// newAccountMetrics builds the instruments on meter, falling back to a no-op
// counter on the (rare) construction error.
func newAccountMetrics(meter metric.Meter) *accountMetrics {
c, err := meter.Int64Counter("accounts_created_total",
metric.WithDescription("New accounts created, labelled by kind (telegram/email/guest); robots are not counted."))
if err != nil {
c, _ = noop.NewMeterProvider().Meter(meterName).Int64Counter("accounts_created_total")
}
return &accountMetrics{created: c}
}
// SetMetrics installs the meter the account store records to. It must be called
// during startup wiring; the default is a no-op meter.
func (s *Store) SetMetrics(meter metric.Meter) {
if meter == nil {
return
}
s.metrics = newAccountMetrics(meter)
}
// recordCreated counts one newly created account of the given kind.
func (m *accountMetrics) recordCreated(ctx context.Context, kind string) {
m.created.Add(ctx, 1, metric.WithAttributes(attribute.String("kind", kind)))
}
+45
View File
@@ -0,0 +1,45 @@
# Environment for deploy/docker-compose.yml. The CI deploy job (ci.yaml) maps the
# Gitea TEST_-prefixed secrets/variables onto these unprefixed names; Stage 18
# maps the PROD_-prefixed set the same way. Copy to deploy/.env for a local run.
#
# Full reference (required vs optional, defaults, secret-vs-variable): deploy/README.md.
# --- Postgres ---------------------------------------------------------------
POSTGRES_DB=scrabble
POSTGRES_USER=scrabble
POSTGRES_PASSWORD=change-me # required
# --- Dictionary -------------------------------------------------------------
DICT_VERSION=v1.0.0 # scrabble-dictionary release tag (image build-arg)
# --- Logging ----------------------------------------------------------------
LOG_LEVEL=info
# --- Edge / caddy -----------------------------------------------------------
# Test: ":80" (the host caddy terminates TLS and forwards to scrabble:80 on the
# external `edge` network). Prod (Stage 18): a domain so caddy does its own ACME.
CADDY_SITE_ADDRESS=:80
GM_BASICAUTH_USER=gm
GM_BASICAUTH_HASH= # required; `caddy hash-password` bcrypt hash
# --- UI build args (baked into the gateway image) ---------------------------
VITE_TELEGRAM_BOT_ID=
VITE_TELEGRAM_LINK=
VITE_GATEWAY_URL=
# --- Gateway ----------------------------------------------------------------
GATEWAY_DEFAULT_SUPPORTED_LANGUAGES=en,ru
# --- Grafana ----------------------------------------------------------------
GRAFANA_ROOT_URL=/_gm/grafana/ # set the full https URL behind a real domain
GRAFANA_ADMIN_PASSWORD=admin
# --- Telegram connector -----------------------------------------------------
AWG_CONF= # required; AmneziaWG sidecar config
TELEGRAM_BOT_TOKEN_EN= # at least one of EN/RU required
TELEGRAM_BOT_TOKEN_RU=
TELEGRAM_GAME_CHANNEL_ID_EN=
TELEGRAM_GAME_CHANNEL_ID_RU=
TELEGRAM_MINIAPP_URL= # required
TELEGRAM_TEST_ENV=false
TELEGRAM_API_BASE_URL=
+102
View File
@@ -0,0 +1,102 @@
# deploy
The full Scrabble contour: `backend` + `gateway` + Postgres + the Telegram
connector (with a VPN sidecar) + the observability stack (OTel Collector →
Prometheus + Tempo → Grafana), fronted by a **caddy** that owns a single `/_gm`
Basic-Auth (the admin console + Grafana). Topology and the decision record are in
[`../docs/ARCHITECTURE.md`](../docs/ARCHITECTURE.md) §13; this file is the
operational reference for **every environment variable**.
## Services
| Service | Image | Role |
| --- | --- | --- |
| `caddy` | `caddy:2-alpine` | Edge proxy (alias `scrabble` on `edge`): single `/_gm` Basic-Auth → admin console + Grafana; everything else → gateway. TLS per `CADDY_SITE_ADDRESS`. |
| `gateway` | built (`gateway/Dockerfile`) | Public edge; serves the embedded SPA at `/` and `/telegram/`; Connect-RPC edge. |
| `backend` | built (`backend/Dockerfile`) | Domain service; bakes in the DAWG dictionaries; runs migrations at boot. |
| `postgres` | `postgres:17-alpine` | Database (named volume, `pg_isready` healthcheck). |
| `vpn` + `telegram` | sidecar + built (`platform/telegram/Dockerfile`) | Telegram connector; egresses through the AmneziaWG sidecar; internal gRPC at `telegram:9091`. |
| `otelcol` | `otel/opentelemetry-collector-contrib` | OTLP/gRPC `:4317` → Prometheus scrape (`:9464`) + Tempo. |
| `prometheus` | `prom/prometheus` | Metrics, 15d retention. |
| `tempo` | `grafana/tempo` | Traces, 72h retention. |
| `grafana` | `grafana/grafana` | Dashboards (provisioned), anonymous-admin behind caddy's `/_gm/grafana`. |
Networking: inter-service traffic is on the private `internal` network
(project-scoped DNS); only `caddy` joins the shared external `edge` network so the
host caddy can reach it at `scrabble:80`. `edge` must already exist on the host
(`docker network create edge`).
## Run it
**Locally** — copy the template, fill the required values, bring it up:
```sh
cp deploy/.env.example deploy/.env # then edit deploy/.env
docker network create edge # once, if it does not exist
cd deploy && docker compose up -d --build
```
**In CI** (the test contour) — `.gitea/workflows/ci.yaml`'s `deploy` job maps the
Gitea **`TEST_`-prefixed** secrets/variables onto the unprefixed names below and
runs `docker compose up -d --build` on the runner host. Stage 18 (prod) maps the
**`PROD_`** set the same way. So a Gitea secret named `TEST_POSTGRES_PASSWORD`
feeds the compose's `POSTGRES_PASSWORD`, etc.
## Required variables
`docker compose` aborts immediately if any of these is unset (they use `:?`):
| Variable | Gitea kind | Purpose |
| --- | --- | --- |
| `POSTGRES_PASSWORD` | secret | Postgres password (also embedded in `BACKEND_POSTGRES_DSN`). |
| `AWG_CONF` | secret | AmneziaWG config for the VPN sidecar (the connector's only egress). |
| `GM_BASICAUTH_HASH` | secret | bcrypt hash gating `/_gm` (admin console + Grafana). Generate with `docker run --rm caddy:2-alpine caddy hash-password --plaintext '<pw>'`. |
| `TELEGRAM_MINIAPP_URL` | variable | The Mini App URL the connector hands out in deep links / buttons. |
**Plus at least one bot token**`TELEGRAM_BOT_TOKEN_EN` or `TELEGRAM_BOT_TOKEN_RU`
(secrets). Compose cannot express "one of", so they default to empty, but the
connector **fails at boot** if both are empty.
## Optional variables (with defaults)
| Variable | Gitea kind | Default | Purpose |
| --- | --- | --- | --- |
| `POSTGRES_DB` | variable | `scrabble` | Database name. |
| `POSTGRES_USER` | variable | `scrabble` | Database user. |
| `DICT_VERSION` | variable | `v1.0.0` | `scrabble-dictionary` release tag baked into the backend image (build-arg). |
| `LOG_LEVEL` | variable | `info` | Shared log level for backend / gateway / connector (`debug\|info\|warn\|error`). |
| `CADDY_SITE_ADDRESS` | variable | `:80` | Caddy site address. Test: `:80` (host caddy terminates TLS). Prod: a domain, so caddy does its own ACME. |
| `GM_BASICAUTH_USER` | variable | `gm` | Username for the `/_gm` Basic-Auth. |
| `GRAFANA_ROOT_URL` | variable | `/_gm/grafana/` | Grafana root URL (sub-path serving). Set the full `https://<domain>/_gm/grafana/` behind a real domain. |
| `GRAFANA_ADMIN_PASSWORD` | secret | `admin` | Grafana admin password. Low impact (the login form is disabled, access is anonymous-admin behind caddy) but set it anyway. |
| `TELEGRAM_GAME_CHANNEL_ID_EN` | variable | _(empty)_ | English game-channel id; empty/`0` disables channel posts. |
| `TELEGRAM_GAME_CHANNEL_ID_RU` | variable | _(empty)_ | Russian game-channel id; empty/`0` disables channel posts. |
| `TELEGRAM_TEST_ENV` | _pinned_ | `false` | `true` routes the bot through Telegram's test environment (`.../bot<token>/test/METHOD`). **The CI test contour pins this to `true` in `ci.yaml`** (the contour is the test environment) — it is not a Gitea variable. Set it in `.env` for a local run; prod (Stage 18) leaves it `false`. |
| `TELEGRAM_API_BASE_URL` | variable | _(empty)_ | Override the Bot API host (a mock/self-hosted server); empty = `https://api.telegram.org`. |
| `GATEWAY_DEFAULT_SUPPORTED_LANGUAGES` | variable | `en,ru` | Variant-gating set for non-Telegram logins (web/email/guest). |
| `VITE_TELEGRAM_BOT_ID` | variable | _(empty)_ | UI build-arg: numeric bot id for the web Login Widget. |
| `VITE_TELEGRAM_LINK` | variable | _(empty)_ | UI build-arg: deep-link base for share-to-Telegram (e.g. `https://t.me/<bot>/<app>`). |
| `VITE_GATEWAY_URL` | variable | _(empty)_ | UI build-arg: gateway origin; empty = same-origin (the usual single-origin deploy). |
The three `VITE_*` are **build-args** baked into the gateway image at build time, so
changing them requires a rebuild (`--build`), not just a restart.
## Fixed internal wiring (not operator-set)
These are hard-wired in `docker-compose.yml` (no `${...}`), pointing the services
at each other on the `internal` network — listed here so they are not mistaken for
missing config: `BACKEND_POSTGRES_DSN` (→ `postgres`, `search_path=backend`),
`GATEWAY_BACKEND_HTTP_URL`/`_GRPC_ADDR` (→ `backend`),
`GATEWAY_CONNECTOR_ADDR`/`BACKEND_CONNECTOR_ADDR` (→ `telegram:9091`), the three
services' `*_OTEL_*_EXPORTER=otlp` + `OTEL_EXPORTER_OTLP_ENDPOINT=http://otelcol:4317`
(`_INSECURE=true`). `GATEWAY_ADMIN_*` is intentionally **unset** — caddy owns `/_gm`
in the contour.
## Host-side setup (outside this repo)
- **`edge` network** must exist on the host (`docker network create edge`).
- **Host caddy** route `<domain> → scrabble:80` (the in-compose caddy serves HTTP
in the test contour; the host caddy terminates TLS). Not needed on prod, where the
contour caddy owns TLS (set `CADDY_SITE_ADDRESS` to the domain).
- **Branch protection** required-status-check names are `CI / unit`,
`CI / integration`, `CI / ui` (see [`../CLAUDE.md`](../CLAUDE.md) "Branching & CI").
+35
View File
@@ -0,0 +1,35 @@
# Edge reverse proxy for the Scrabble contour. A single Basic-Auth gate covers
# every operator surface under /_gm (the backend-rendered admin console and the
# Grafana subpath); everything else (the SPA at / and /telegram/, plus the
# Connect edge) goes to the gateway. Mirrors ../galaxy-game's /_gm model.
#
# CADDY_SITE_ADDRESS is ":80" in the test contour (the host caddy terminates TLS
# and forwards); set it to a domain in prod (Stage 18) so this caddy does its own
# ACME and the contour is self-contained.
{
admin off
}
{$CADDY_SITE_ADDRESS::80} {
# Operator surfaces under /_gm: a single shared Basic-Auth, then route.
@gm path /_gm /_gm/*
handle @gm {
basic_auth {
{$GM_BASICAUTH_USER:gm} {$GM_BASICAUTH_HASH}
}
# Grafana serves from this sub-path (GF_SERVER_SERVE_FROM_SUB_PATH=true), so
# the prefix is forwarded intact, not stripped.
handle /_gm/grafana* {
reverse_proxy grafana:3000
}
# Everything else under /_gm is the backend-rendered admin console.
handle {
reverse_proxy backend:8080
}
}
# The SPA (/, /telegram/) and the Connect edge are served by the gateway.
handle {
reverse_proxy gateway:8081
}
}
+217
View File
@@ -0,0 +1,217 @@
# Full deploy descriptor for the Scrabble test contour: backend + gateway +
# Postgres + the Telegram connector (with its VPN sidecar) + the observability
# stack (OTel Collector -> Prometheus + Tempo -> Grafana). Driven by
# .gitea/workflows/ci.yaml (`docker compose up -d --build`); env values are
# interpolated from Gitea Actions TEST_ secrets/variables exported by the deploy
# job (see deploy/.env.example for the unprefixed names).
#
# Networking (mirrors ../galaxy-game):
# - `internal` (scrabble-internal): all inter-service traffic, project-private
# DNS so service names never collide on the shared `edge` network.
# - `edge` (external): the host caddy reaches this contour at `scrabble:80`
# (the in-compose caddy's alias). The in-compose caddy terminates only HTTP in
# the test contour; the host caddy terminates TLS and forwards. For prod
# (Stage 18, no host caddy) set CADDY_SITE_ADDRESS to the domain so the caddy
# does its own ACME — the contour is then self-contained.
# - The connector egresses to api.telegram.org through the `vpn` sidecar
# (network_mode: service:vpn); it answers internal gRPC at `telegram:9091`.
name: scrabble
services:
postgres:
container_name: scrabble-postgres
image: postgres:17-alpine
restart: unless-stopped
environment:
POSTGRES_DB: ${POSTGRES_DB:-scrabble}
POSTGRES_USER: ${POSTGRES_USER:-scrabble}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-scrabble} -d ${POSTGRES_DB:-scrabble}"]
interval: 5s
timeout: 3s
retries: 30
volumes:
- postgres-data:/var/lib/postgresql/data
networks: [internal]
backend:
container_name: scrabble-backend
image: scrabble-backend:latest
build:
context: ..
dockerfile: backend/Dockerfile
args:
DICT_VERSION: ${DICT_VERSION:-v1.0.0}
restart: unless-stopped
depends_on:
postgres:
condition: service_healthy
environment:
# search_path=backend matches the migrations (00001 creates the schema).
BACKEND_POSTGRES_DSN: postgres://${POSTGRES_USER:-scrabble}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-scrabble}?sslmode=disable&search_path=backend
BACKEND_HTTP_ADDR: ":8080"
BACKEND_GRPC_ADDR: ":9090"
BACKEND_CONNECTOR_ADDR: telegram:9091
BACKEND_LOG_LEVEL: ${LOG_LEVEL:-info}
BACKEND_SERVICE_NAME: scrabble-backend
BACKEND_OTEL_TRACES_EXPORTER: otlp
BACKEND_OTEL_METRICS_EXPORTER: otlp
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
OTEL_EXPORTER_OTLP_INSECURE: "true"
# No container healthcheck: the distroless image has no shell/wget. Readiness
# is covered by the CI post-deploy probe (GET / through caddy).
networks: [internal]
gateway:
container_name: scrabble-gateway
image: scrabble-gateway:latest
build:
context: ..
dockerfile: gateway/Dockerfile
args:
VITE_TELEGRAM_BOT_ID: ${VITE_TELEGRAM_BOT_ID:-}
VITE_TELEGRAM_LINK: ${VITE_TELEGRAM_LINK:-}
VITE_GATEWAY_URL: ${VITE_GATEWAY_URL:-}
restart: unless-stopped
depends_on: [backend]
environment:
GATEWAY_HTTP_ADDR: ":8081"
GATEWAY_BACKEND_HTTP_URL: http://backend:8080
GATEWAY_BACKEND_GRPC_ADDR: backend:9090
GATEWAY_CONNECTOR_ADDR: telegram:9091
GATEWAY_DEFAULT_SUPPORTED_LANGUAGES: ${GATEWAY_DEFAULT_SUPPORTED_LANGUAGES:-en,ru}
GATEWAY_LOG_LEVEL: ${LOG_LEVEL:-info}
GATEWAY_SERVICE_NAME: scrabble-gateway
GATEWAY_OTEL_TRACES_EXPORTER: otlp
GATEWAY_OTEL_METRICS_EXPORTER: otlp
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
OTEL_EXPORTER_OTLP_INSECURE: "true"
# GATEWAY_ADMIN_* intentionally unset: in the deployed contour the front
# caddy owns the /_gm Basic-Auth and routes /_gm to the backend directly.
networks: [internal]
# --- Telegram connector (egress via the VPN sidecar) -----------------------
vpn:
container_name: scrabble-telegram-vpn
image: docker.iliadenisov.ru/developer/amneziawg-sidecar:latest
restart: unless-stopped
privileged: true
environment:
AWG_CONF: ${AWG_CONF:?set AWG_CONF}
networks:
internal:
aliases: [telegram]
telegram:
container_name: scrabble-telegram
image: scrabble-telegram:latest
build:
context: ..
dockerfile: platform/telegram/Dockerfile
restart: unless-stopped
depends_on: [vpn]
network_mode: "service:vpn"
environment:
# The bot tokens live ONLY in this container (ARCHITECTURE.md §12). At least
# one token is required (the connector validates this at boot).
TELEGRAM_BOT_TOKEN_EN: ${TELEGRAM_BOT_TOKEN_EN:-}
TELEGRAM_BOT_TOKEN_RU: ${TELEGRAM_BOT_TOKEN_RU:-}
TELEGRAM_GAME_CHANNEL_ID_EN: ${TELEGRAM_GAME_CHANNEL_ID_EN:-}
TELEGRAM_GAME_CHANNEL_ID_RU: ${TELEGRAM_GAME_CHANNEL_ID_RU:-}
TELEGRAM_MINIAPP_URL: ${TELEGRAM_MINIAPP_URL:?set TELEGRAM_MINIAPP_URL}
TELEGRAM_GRPC_ADDR: ":9091"
TELEGRAM_TEST_ENV: ${TELEGRAM_TEST_ENV:-false}
TELEGRAM_API_BASE_URL: ${TELEGRAM_API_BASE_URL:-}
TELEGRAM_LOG_LEVEL: ${LOG_LEVEL:-info}
TELEGRAM_SERVICE_NAME: scrabble-telegram
TELEGRAM_OTEL_TRACES_EXPORTER: otlp
TELEGRAM_OTEL_METRICS_EXPORTER: otlp
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
OTEL_EXPORTER_OTLP_INSECURE: "true"
# --- Edge reverse proxy (single /_gm Basic-Auth; SPA + Connect -> gateway) --
caddy:
container_name: scrabble-caddy
image: caddy:2-alpine
restart: unless-stopped
depends_on: [gateway, backend, grafana]
environment:
# Test: ":80" (host caddy terminates TLS). Prod: a domain for own ACME.
CADDY_SITE_ADDRESS: ${CADDY_SITE_ADDRESS:-:80}
GM_BASICAUTH_USER: ${GM_BASICAUTH_USER:-gm}
GM_BASICAUTH_HASH: ${GM_BASICAUTH_HASH:?set GM_BASICAUTH_HASH}
volumes:
- ./caddy/Caddyfile:/etc/caddy/Caddyfile:ro
- caddy-data:/data
networks:
internal: {}
edge:
aliases: [scrabble]
# --- Observability ---------------------------------------------------------
otelcol:
container_name: scrabble-otelcol
image: otel/opentelemetry-collector-contrib:0.119.0
restart: unless-stopped
command: ["--config=/etc/otelcol/config.yaml"]
volumes:
- ./otelcol/config.yaml:/etc/otelcol/config.yaml:ro
networks: [internal]
prometheus:
container_name: scrabble-prometheus
image: prom/prometheus:v2.55.1
restart: unless-stopped
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=15d
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus-data:/prometheus
networks: [internal]
tempo:
container_name: scrabble-tempo
image: grafana/tempo:2.7.1
restart: unless-stopped
command: ["-config.file=/etc/tempo/tempo.yaml"]
volumes:
- ./tempo/tempo.yaml:/etc/tempo/tempo.yaml:ro
- tempo-data:/var/tempo
networks: [internal]
grafana:
container_name: scrabble-grafana
image: grafana/grafana:11.4.0
restart: unless-stopped
depends_on: [prometheus, tempo]
environment:
# Served under /_gm/grafana behind caddy's Basic-Auth; anonymous Admin so a
# single shared login (caddy) gates it with no per-user Grafana accounts.
GF_SERVER_ROOT_URL: ${GRAFANA_ROOT_URL:-/_gm/grafana/}
GF_SERVER_SERVE_FROM_SUB_PATH: "true"
GF_AUTH_ANONYMOUS_ENABLED: "true"
GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
GF_AUTH_DISABLE_LOGIN_FORM: "true"
GF_AUTH_BASIC_ENABLED: "false"
GF_USERS_ALLOW_SIGN_UP: "false"
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin}
volumes:
- ./grafana/provisioning:/etc/grafana/provisioning:ro
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
- grafana-data:/var/lib/grafana
networks: [internal]
networks:
internal:
name: scrabble-internal
edge:
external: true
volumes:
postgres-data:
caddy-data:
prometheus-data:
tempo-data:
grafana-data:
+39
View File
@@ -0,0 +1,39 @@
{
"uid": "scrabble-edge",
"title": "Scrabble — Edge / UX",
"tags": ["scrabble"],
"timezone": "",
"schemaVersion": 39,
"version": 1,
"refresh": "30s",
"time": { "from": "now-6h", "to": "now" },
"panels": [
{
"type": "timeseries",
"title": "Edge request rate by message type",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m])) by (message_type)", "legendFormat": "{{message_type}}" }]
},
{
"type": "timeseries",
"title": "Edge p95 latency",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [
{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(edge_request_duration_bucket[5m])) by (le))", "legendFormat": "p95" },
{ "refId": "B", "expr": "histogram_quantile(0.50, sum(rate(edge_request_duration_bucket[5m])) by (le))", "legendFormat": "p50" }
]
},
{
"type": "timeseries",
"title": "Edge requests by result",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 },
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m])) by (result)", "legendFormat": "{{result}}" }]
}
]
}
@@ -0,0 +1,59 @@
{
"uid": "scrabble-game",
"title": "Scrabble — Game domain",
"tags": ["scrabble"],
"timezone": "",
"schemaVersion": 39,
"version": 1,
"refresh": "30s",
"time": { "from": "now-24h", "to": "now" },
"panels": [
{
"type": "timeseries",
"title": "Games started / abandoned (rate by variant)",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [
{ "refId": "A", "expr": "sum(rate(games_started_total[15m])) by (variant)", "legendFormat": "started {{variant}}" },
{ "refId": "B", "expr": "sum(rate(games_abandoned_total[15m])) by (variant)", "legendFormat": "abandoned {{variant}}" }
]
},
{
"type": "timeseries",
"title": "Robot games finished (rate)",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(robot_games_finished_total[15m]))", "legendFormat": "robot games" }]
},
{
"type": "timeseries",
"title": "Live games in cache (by variant)",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(game_cache_active) by (variant)", "legendFormat": "{{variant}}" }]
},
{
"type": "timeseries",
"title": "Chat messages (rate by kind)",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(chat_messages_total[15m])) by (kind)", "legendFormat": "{{kind}}" }]
},
{
"type": "timeseries",
"title": "Journal replay p95 (by variant)",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(game_replay_duration_bucket[5m])) by (le, variant))", "legendFormat": "{{variant}}" }]
},
{
"type": "timeseries",
"title": "Move validate p95 (by variant)",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(game_move_validate_duration_bucket[5m])) by (le, variant))", "legendFormat": "{{variant}}" }]
}
]
}
@@ -0,0 +1,58 @@
{
"uid": "scrabble-overview",
"title": "Scrabble — Service overview",
"tags": ["scrabble"],
"timezone": "",
"schemaVersion": 39,
"version": 1,
"refresh": "30s",
"time": { "from": "now-6h", "to": "now" },
"panels": [
{
"type": "stat",
"title": "Active users (24h)",
"gridPos": { "h": 5, "w": 6, "x": 0, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "max(active_users{window=\"24h\"})" }]
},
{
"type": "stat",
"title": "Active users (7d)",
"gridPos": { "h": 5, "w": 6, "x": 6, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "max(active_users{window=\"7d\"})" }]
},
{
"type": "stat",
"title": "Edge requests/s",
"gridPos": { "h": 5, "w": 6, "x": 12, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m]))" }]
},
{
"type": "stat",
"title": "Edge error ratio",
"gridPos": { "h": 5, "w": 6, "x": 18, "y": 0 },
"fieldConfig": { "defaults": { "unit": "percentunit" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count{result!=\"ok\"}[5m])) / clamp_min(sum(rate(edge_request_duration_count[5m])), 1)" }]
},
{
"type": "timeseries",
"title": "Goroutines by service",
"description": "OTel Go runtime metric; verify the exact name against live Prometheus if empty (go_goroutine_count / process_runtime_go_goroutines depending on the contrib runtime version).",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "go_goroutine_count", "legendFormat": "{{service_name}}" }]
},
{
"type": "timeseries",
"title": "Heap memory used by service",
"description": "OTel Go runtime metric (best-effort name go_memory_used); verify against live Prometheus if empty.",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 },
"fieldConfig": { "defaults": { "unit": "bytes" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(go_memory_used) by (service_name)", "legendFormat": "{{service_name}}" }]
}
]
}
+34
View File
@@ -0,0 +1,34 @@
{
"uid": "scrabble-users",
"title": "Scrabble — Users",
"tags": ["scrabble"],
"timezone": "",
"schemaVersion": 39,
"version": 1,
"refresh": "30s",
"time": { "from": "now-7d", "to": "now" },
"panels": [
{
"type": "timeseries",
"title": "Active users (in-memory, single gateway)",
"description": "Distinct accounts with an authenticated action within the window. Resets on gateway restart; correct for a single instance (MVP).",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "max(active_users) by (window)", "legendFormat": "{{window}}" }]
},
{
"type": "timeseries",
"title": "New accounts (rate by kind)",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(accounts_created_total[1h])) by (kind)", "legendFormat": "{{kind}}" }]
},
{
"type": "timeseries",
"title": "New accounts (cumulative by kind)",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(accounts_created_total) by (kind)", "legendFormat": "{{kind}}" }]
}
]
}
@@ -0,0 +1,15 @@
# Loads the committed dashboard JSON from /var/lib/grafana/dashboards (mounted
# read-only from deploy/grafana/dashboards).
apiVersion: 1
providers:
- name: scrabble
orgId: 1
folder: Scrabble
type: file
disableDeletion: false
editable: true
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: false
@@ -0,0 +1,16 @@
# Grafana datasources for the Scrabble contour, provisioned at startup. Metrics
# come from Prometheus (scraping the collector) and traces from Tempo.
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
uid: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
- name: Tempo
type: tempo
uid: tempo
access: proxy
url: http://tempo:3200
+38
View File
@@ -0,0 +1,38 @@
# OpenTelemetry Collector for the Scrabble contour. Receives OTLP/gRPC from the
# three services (backend, gateway, connector — pkg/telemetry exports OTLP only),
# fans metrics out to a Prometheus scrape endpoint and traces to Tempo.
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
processors:
batch: {}
exporters:
# Exposes the collected metrics for Prometheus to scrape (otelcol:9464/metrics).
# add_metric_suffixes:false keeps the instrument names verbatim (no _seconds /
# _total unit/type suffixes) so the dashboards' PromQL matches the names defined
# in code; resource_to_telemetry_conversion promotes service.name to a label.
prometheus:
endpoint: 0.0.0.0:9464
add_metric_suffixes: false
resource_to_telemetry_conversion:
enabled: true
# Forwards traces to Tempo's OTLP ingest.
otlp/tempo:
endpoint: tempo:4317
tls:
insecure: true
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [otlp/tempo]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [prometheus]
+14
View File
@@ -0,0 +1,14 @@
# Prometheus scrape config for the Scrabble contour. The OTel Collector exposes
# every service's metrics on its prometheus exporter; Prometheus scrapes that one
# endpoint. Retention (15d) is set on the command line in docker-compose.yml.
global:
scrape_interval: 30s
evaluation_interval: 30s
scrape_configs:
- job_name: otelcol
static_configs:
- targets: ["otelcol:9464"]
- job_name: prometheus
static_configs:
- targets: ["localhost:9090"]
+26
View File
@@ -0,0 +1,26 @@
# Tempo for the Scrabble contour: single-binary, local filesystem storage, OTLP
# ingest from the collector, 72h block retention.
server:
http_listen_port: 3200
distributor:
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
ingester:
max_block_duration: 5m
compactor:
compaction:
block_retention: 72h
storage:
trace:
backend: local
local:
path: /var/tempo/blocks
wal:
path: /var/tempo/wal
+39 -13
View File
@@ -489,8 +489,11 @@ promotions) is future work and would deliver short markdown messages (text + lin
available for debugging; **`otlp`** (gRPC, endpoint from the standard
`OTEL_EXPORTER_OTLP_*` environment) exports to a collector. The Postgres pool is
instrumented with otelsql and `otelgrpc` traces the backend↔gateway push stream
and the gateway↔connector calls. The OTLP collector and Grafana dashboards are
stood up with the deploy (Stage 15).
and the gateway↔connector calls. The OTLP **Collector** (OTLP/gRPC → Prometheus
metrics + Tempo traces), **Prometheus** (15d), **Tempo** (72h) and **Grafana**
(provisioned datasources + dashboards, behind the caddy `/_gm/grafana` Basic-Auth)
are stood up with the deploy (`deploy/`, Stage 16); the default exporter stays
`none`, so CI needs no collector.
- Per-request server-side timing via gin middleware from day one (the access log
carries method, route, status, latency and the active trace id). A
client-measured RTT piggybacked on the next request is a later enhancement.
@@ -503,6 +506,12 @@ promotions) is future work and would deliver short markdown messages (text + lin
(the UI-perceived roundtrip, by `message_type`/`result`); and Go runtime/heap
metrics. Game-scoped metrics carry a `variant` attribute
(english/russian_scrabble/erudit).
- User metrics (Stage 16): a backend counter `accounts_created_total` (`kind` =
telegram/email/guest; robots are a provisioned pool, not users, and are excluded)
and a gateway **in-memory** observable gauge `active_users` (`window` = 24h/7d) —
distinct accounts that performed an authenticated edge action in the window. The
gauge is single-process by design (single-instance MVP, §10): it is correct for one
gateway, resets on restart, and is a live operational figure, not a billing count.
- Unauthenticated `GET /healthz` (liveness) and `GET /readyz` (readiness — the
database answers a bounded ping and the session cache is warmed).
- The backend serves a **second listener** — a gRPC server
@@ -518,7 +527,7 @@ promotions) is future work and would deliver short markdown messages (text + lin
| Session minting; email-code / guest validation | gateway (with backend) |
| Session → `user_id` resolution, `X-User-ID` injection | gateway |
| Authorisation, ownership, state transitions | backend (`X-User-ID` is the sole identity input) |
| Admin authentication | gateway validates HTTP Basic Auth (`GATEWAY_ADMIN_*`) on the public `/_gm/*` path and reverse-proxies it **verbatim** to the backend's server-rendered admin console; the backend trusts the gateway (no admin principal) and guards its state-changing POSTs with a **same-origin** check — the console's CSRF defence. No operator identity is tracked |
| Admin authentication | a single Basic-Auth gate on `/_gm/*`, forwarded **verbatim** to the backend's server-rendered admin console (and, in the deployed contour, routing `/_gm/grafana/*` to Grafana). In the deploy the **caddy** owns this gate (§13); a local non-caddy run uses the gateway's own `GATEWAY_ADMIN_*` proxy. The backend trusts the proxy (no admin principal) and guards its state-changing POSTs with a **same-origin** check — the console's CSRF defence. No operator identity is tracked |
| backend ↔ gateway ↔ connector trust | the network (only gateway may reach backend; the connector serves unauthenticated gRPC on the internal segment) |
This is an explicit, accepted MVP risk: compromise of the gateway↔backend
@@ -536,16 +545,33 @@ a dedicated redeem sub-limit or a longer code is the hardening step if abuse app
## 13. Deployment (informational)
Single public origin, path-routed: a mini-landing at the root, the **Telegram Mini
App under `/telegram/`** (the gateway serves the static UI build, wired in Stage 15;
outside Telegram that path redirects to the root), the gateway public surface and the **admin console
at `/_gm`** (backend-rendered, Basic-Auth at the gateway) share one host that
terminates TLS. The **Telegram connector** runs as a separate
container with **no public ingress** — it long-polls Telegram and egresses through a
VPN sidecar, answering only internal gRPC. MVP runs one `gateway`, one `backend`, one
Postgres, plus the connector. The connector's Docker/compose ships now
(`platform/telegram/deploy`, mirroring `../15-puzzle`); the gateway's static UI serving
and the full multi-service deploy land in Stage 15.
Single public origin, path-routed. The gateway **embeds** the static UI build
(`go:embed`, baked in by a node stage in `gateway/Dockerfile`) and serves the one
SPA at both `/` (web) and `/telegram/` (the Telegram Mini App; outside Telegram that
path redirects to the root — the client-side guard). An in-compose **caddy** is the
contour's edge: it owns a single `/_gm` Basic-Auth and routes `/_gm/grafana/*` to
**Grafana** (anonymous-admin, so the one shared login gates it with no per-user
Grafana accounts) and the rest of `/_gm/*` to the backend-rendered **admin console**;
everything else (`/`, `/telegram/`, the Connect edge) goes to the gateway. The
**Telegram connector** runs as a separate container with **no public ingress** — it
long-polls Telegram and egresses through a VPN sidecar, answering only internal gRPC.
The full contour (`deploy/docker-compose.yml`) runs one `gateway`, one `backend`,
one Postgres, the connector (+ its VPN sidecar) and the **observability stack**
OTel Collector (OTLP/gRPC ingest → Prometheus metrics + Tempo traces) and Grafana
with provisioned datasources and dashboards. Inter-service traffic uses a private
`internal` network (project-scoped DNS); only caddy joins the shared external `edge`
network (alias `scrabble`).
Two contours, two secret/variable prefixes (`TEST_` / `PROD_`):
- **Test** (Stage 16): auto-deploys on a PR into — or a push to — `development`
(`.gitea/workflows/ci.yaml``docker compose up -d --build` on the Gitea runner
host, then a `GET /` probe through caddy). The host caddy terminates TLS and
forwards the domain to `scrabble:80`, so the in-compose caddy serves plain HTTP
(`CADDY_SITE_ADDRESS=:80`).
- **Prod** (Stage 18): a manual SSH deploy after `development → master`. There is no
host caddy, so the contour ships its own caddy terminating TLS — set
`CADDY_SITE_ADDRESS` to the domain and the caddy does its own ACME.
## 14. CI & branches
+53
View File
@@ -0,0 +1,53 @@
# Multi-stage build for the gateway service. A node stage builds the static UI
# (Vite), the result is embedded into the Go binary (gateway/internal/webui/dist),
# and the Go stage — mirroring platform/telegram/Dockerfile — yields a static
# binary shipped on distroless nonroot. So the single binary serves the SPA at /
# and /telegram/ (docs/ARCHITECTURE.md §13) with no separate static container.
#
# The production UI build vars are image build-args, baked into the bundle.
# Build from the repository root so go.work, pkg/, gateway/ and ui/ are all in the
# Docker context:
# docker build -f gateway/Dockerfile \
# --build-arg VITE_GATEWAY_URL=https://example \
# -t scrabble-gateway .
# --- UI build ----------------------------------------------------------------
FROM node:22-alpine AS ui
WORKDIR /ui
RUN corepack enable && corepack prepare pnpm@11.0.9 --activate
# Prod UI build vars (Vite reads VITE_-prefixed env at build; baked into the bundle).
ARG VITE_TELEGRAM_BOT_ID=
ARG VITE_TELEGRAM_LINK=
ARG VITE_GATEWAY_URL=
ENV VITE_TELEGRAM_BOT_ID=$VITE_TELEGRAM_BOT_ID \
VITE_TELEGRAM_LINK=$VITE_TELEGRAM_LINK \
VITE_GATEWAY_URL=$VITE_GATEWAY_URL
# Install with the lockfile first (the workspace file carries pnpm's build-script
# approval for esbuild), then build. Committed src/gen/ means no codegen here.
COPY ui/package.json ui/pnpm-lock.yaml ui/pnpm-workspace.yaml ./
RUN pnpm install --frozen-lockfile
COPY ui ./
RUN pnpm build
# --- Go build ----------------------------------------------------------------
FROM golang:1.26.3-alpine AS build
WORKDIR /src
COPY go.work go.work.sum ./
COPY pkg ./pkg
COPY gateway ./gateway
# Replace the committed placeholder with the freshly built UI before compiling, so
# go:embed bakes the real bundle into the binary.
RUN rm -rf gateway/internal/webui/dist
COPY --from=ui /ui/dist gateway/internal/webui/dist
# Reduce the workspace to what the gateway needs: gateway + pkg.
RUN go work edit -dropuse=./backend -dropuse=./platform/telegram
RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -o /out/gateway ./gateway/cmd/gateway
# --- runtime -----------------------------------------------------------------
FROM gcr.io/distroless/static-debian12:nonroot
COPY --from=build /out/gateway /usr/local/bin/gateway
ENTRYPOINT ["/usr/local/bin/gateway"]
+9 -4
View File
@@ -5,9 +5,13 @@ terminates the client's **Connect-RPC + FlatBuffers** traffic over HTTP/2
cleartext (`h2c`), authenticates the originating credential, mints/resolves a
thin opaque session, rate-limits, injects `X-User-ID` when forwarding to the
backend over REST/JSON, and bridges the backend's gRPC push stream to each
client's in-app live channel. It also serves the backend's admin console at `/_gm`
on its public listener behind HTTP Basic-Auth. See
[`../docs/ARCHITECTURE.md`](../docs/ARCHITECTURE.md) §2, §3, §10, §12.
client's in-app live channel. It **embeds the static UI build** (`go:embed`, baked
in by the gateway image's node stage) and serves the one SPA at `/` (web) and
`/telegram/` (the Mini App) — the single-origin model. It can also serve the
backend's admin console at `/_gm` behind HTTP Basic-Auth for a local non-caddy run;
in the deployed contour the front caddy owns `/_gm` (see
[`../deploy`](../deploy)). See
[`../docs/ARCHITECTURE.md`](../docs/ARCHITECTURE.md) §2, §3, §10, §12, §13.
## Package layout
@@ -22,8 +26,9 @@ internal/ratelimit/ # token-bucket limiter (golang.org/x/time/rate)
internal/connector/ # gRPC client to the Telegram connector (initData validate, out-of-app push) + routing
internal/push/ # live-event fan-out hub (per-user client streams)
internal/transcode/ # FlatBuffers<->REST bridge + message_type registry
internal/connectsrv/ # the Connect Gateway service over h2c
internal/connectsrv/ # the Connect Gateway service over h2c (+ the in-memory active_users gauge)
internal/admin/ # Basic-Auth reverse proxy mounting the backend admin console at /_gm (verbatim)
internal/webui/ # embedded SPA build (go:embed dist) served at / and /telegram/
```
The FlatBuffers payloads and the backend push proto are the shared wire
@@ -0,0 +1,63 @@
package connectsrv
import (
"sync"
"time"
)
// activeUsers tracks distinct authenticated accounts by last-action time, backing
// the in-memory active_users gauge. It is single-process by design (the gateway is
// single-instance in the MVP, docs/ARCHITECTURE.md §10): the distinct count is
// correct for one process, resets on restart, and is a live operational gauge, not
// a billing figure. Memory is bounded by the number of distinct accounts active
// within the longest window; stale entries are pruned on observation.
type activeUsers struct {
mu sync.Mutex
lastSeen map[string]time.Time
now func() time.Time
}
// newActiveUsers returns an empty tracker using the wall clock.
func newActiveUsers() *activeUsers {
return &activeUsers{lastSeen: make(map[string]time.Time), now: time.Now}
}
// seen records that account uid performed an authenticated action now.
func (a *activeUsers) seen(uid string) {
if uid == "" {
return
}
a.mu.Lock()
a.lastSeen[uid] = a.now()
a.mu.Unlock()
}
// counts returns, for each window, the number of distinct accounts last seen
// within it, pruning entries older than the longest window in the same pass.
func (a *activeUsers) counts(windows []time.Duration) []int {
a.mu.Lock()
defer a.mu.Unlock()
now := a.now()
var longest time.Duration
for _, w := range windows {
if w > longest {
longest = w
}
}
res := make([]int, len(windows))
for uid, ts := range a.lastSeen {
age := now.Sub(ts)
if age > longest {
delete(a.lastSeen, uid)
continue
}
for i, w := range windows {
if age <= w {
res[i]++
}
}
}
return res
}
@@ -0,0 +1,45 @@
package connectsrv
import (
"testing"
"time"
)
func TestActiveUsersCountsAndPrune(t *testing.T) {
a := newActiveUsers()
base := time.Date(2026, 6, 5, 12, 0, 0, 0, time.UTC)
cur := base
a.now = func() time.Time { return cur }
a.seen("u1") // at base
cur = base.Add(2 * time.Hour)
a.seen("u2") // base+2h
cur = base.Add(50 * time.Hour)
a.seen("u3") // base+50h
windows := []time.Duration{24 * time.Hour, 7 * 24 * time.Hour}
// now = base+50h: u3 within 24h; all three within 7d.
got := a.counts(windows)
if got[0] != 1 || got[1] != 3 {
t.Fatalf("counts at +50h = %v, want [1 3]", got)
}
// now = base+169h: u1 (age 169h) prunes past the 7d window; u2/u3 remain in 7d.
cur = base.Add(169 * time.Hour)
got = a.counts(windows)
if got[0] != 0 || got[1] != 2 {
t.Fatalf("counts at +169h = %v, want [0 2]", got)
}
if _, ok := a.lastSeen["u1"]; ok {
t.Fatalf("u1 should have been pruned from the tracker")
}
}
func TestActiveUsersIgnoresEmpty(t *testing.T) {
a := newActiveUsers()
a.seen("")
if got := a.counts([]time.Duration{time.Hour}); got[0] != 0 {
t.Fatalf("empty uid recorded: got %v", got)
}
}
+37 -3
View File
@@ -12,14 +12,26 @@ import (
// meterName scopes the gateway edge's OpenTelemetry instruments.
const meterName = "scrabble/gateway/edge"
// activeUserWindows are the rolling windows the active_users gauge reports.
var activeUserWindows = []struct {
label string
dur time.Duration
}{
{label: "24h", dur: 24 * time.Hour},
{label: "7d", dur: 7 * 24 * time.Hour},
}
// serverMetrics holds the edge's operational instruments. It defaults to no-ops;
// NewServer installs the real meter when one is supplied in Deps.
type serverMetrics struct {
edge metric.Float64Histogram
edge metric.Float64Histogram
active *activeUsers
}
// newServerMetrics builds the instruments on meter (nil selects a no-op meter),
// falling back to a no-op histogram on the (rare) construction error.
// falling back to a no-op histogram on the (rare) construction error. The
// active_users gauge is registered as an observable callback over the in-memory
// tracker.
func newServerMetrics(meter metric.Meter) *serverMetrics {
if meter == nil {
meter = noop.NewMeterProvider().Meter(meterName)
@@ -30,7 +42,24 @@ func newServerMetrics(meter metric.Meter) *serverMetrics {
if err != nil {
h, _ = noop.NewMeterProvider().Meter(meterName).Float64Histogram("edge_request_duration")
}
return &serverMetrics{edge: h}
m := &serverMetrics{edge: h, active: newActiveUsers()}
gauge, err := meter.Int64ObservableGauge("active_users",
metric.WithDescription("Distinct accounts that performed an authenticated action within the window (in-memory, single gateway instance)."))
if err == nil {
windows := make([]time.Duration, len(activeUserWindows))
for i, w := range activeUserWindows {
windows[i] = w.dur
}
_, _ = meter.RegisterCallback(func(_ context.Context, o metric.Observer) error {
counts := m.active.counts(windows)
for i, w := range activeUserWindows {
o.ObserveInt64(gauge, int64(counts[i]), metric.WithAttributes(attribute.String("window", w.label)))
}
return nil
}, gauge)
}
return m
}
// recordEdge records the duration of one Execute call labelled by message type and
@@ -41,3 +70,8 @@ func (m *serverMetrics) recordEdge(ctx context.Context, msgType, result string,
attribute.String("result", result),
))
}
// recordActive marks account uid active now, feeding the active_users gauge.
func (m *serverMetrics) recordActive(uid string) {
m.active.seen(uid)
}
+17 -1
View File
@@ -24,6 +24,7 @@ import (
"scrabble/gateway/internal/ratelimit"
"scrabble/gateway/internal/session"
"scrabble/gateway/internal/transcode"
"scrabble/gateway/internal/webui"
edgev1 "scrabble/gateway/proto/edge/v1"
"scrabble/gateway/proto/edge/v1/edgev1connect"
)
@@ -89,9 +90,21 @@ func (s *Server) HTTPHandler() http.Handler {
if s.adminProxy != nil {
// The admin console (backend /_gm) is served on the public listener behind
// the proxy's Basic-Auth, mounted below the h2c wrap so the Connect edge keeps
// working over h2c (docs/ARCHITECTURE.md §12).
// working over h2c (docs/ARCHITECTURE.md §12). In the deployed contour the
// front caddy owns the /_gm Basic-Auth and Grafana routing; this mount serves
// a non-caddy (local) setup.
mux.Handle("/_gm/", s.adminProxy)
} else {
// With the console disabled here, keep /_gm a 404 so the SPA catch-all below
// does not serve the app shell at the operator path.
mux.Handle("/_gm/", http.NotFoundHandler())
}
// The embedded single-page UI is served at the site root and, for the Telegram
// Mini App, under /telegram/ — the single-origin model (docs/ARCHITECTURE.md
// §13). Both mounts sit below the h2c wrap so the Connect edge (a more specific
// prefix) keeps priority; "/" is the catch-all SPA fallback for the hash router.
mux.Handle("/telegram/", webui.Handler("/telegram/"))
mux.Handle("/", webui.Handler(""))
return h2c.NewHandler(mux, &http2.Server{})
}
@@ -118,6 +131,9 @@ func (s *Server) Execute(ctx context.Context, req *connect.Request[edgev1.Execut
result = "unauthenticated"
return nil, err
}
// A valid session proving an authenticated request is an "action" for the
// active_users gauge, counted before the rate-limit/domain outcome.
s.metrics.recordActive(uid)
if !s.limiter.Allow("user:"+uid, s.userPolicy) {
result = "rate_limited"
return nil, connect.NewError(connect.CodeResourceExhausted, errRateLimited)
+2
View File
@@ -0,0 +1,2 @@
# Placeholder so the embedded dist/assets directory exists in a plain build.
# The production gateway image replaces dist/ with the real Vite build.
+15
View File
@@ -0,0 +1,15 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Scrabble</title>
</head>
<body>
<p>
UI build placeholder. The production gateway image embeds the real Vite
build (see gateway/Dockerfile); seeing this page means the binary was
built without a UI build.
</p>
</body>
</html>
+71
View File
@@ -0,0 +1,71 @@
// Package webui serves the embedded single-page UI build over the public edge.
//
// The committed dist/ holds only a placeholder index.html so the gateway module
// compiles with a plain `go build` (and in CI) without a UI build. The production
// gateway image replaces dist/ with the real Vite build before compiling (see
// gateway/Dockerfile), so the binary ships the UI inside it. Because Vite is built
// with a relative asset base, one build serves under any path: Handler is mounted
// both at "/" (web) and at "/telegram/" (the Telegram Mini App), matching the
// single-origin model in docs/ARCHITECTURE.md §13.
package webui
import (
"embed"
"io/fs"
"net/http"
"path"
"strings"
)
//go:embed all:dist
var dist embed.FS
// distFS returns the embedded build rooted at dist/. The directory is embedded at
// compile time, so its absence is a build error rather than a runtime condition.
func distFS() fs.FS {
sub, err := fs.Sub(dist, "dist")
if err != nil {
panic("webui: embedded dist/ missing: " + err.Error())
}
return sub
}
// Handler serves the embedded SPA. An existing file is served directly (with the
// standard content-type and caching headers); every other path falls back to
// index.html so the client-side hash router can take over a deep link. When
// stripPrefix is non-empty it is removed from the request path before lookup, so
// the same build serves under a sub-path (e.g. "/telegram/").
func Handler(stripPrefix string) http.Handler {
content := distFS()
files := http.FileServer(http.FS(content))
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
name := strings.TrimPrefix(path.Clean("/"+r.URL.Path), "/")
if name == "" {
serveIndex(w, content)
return
}
if info, err := fs.Stat(content, name); err != nil || info.IsDir() {
// Unknown path or a directory: serve the SPA shell, never a listing.
serveIndex(w, content)
return
}
files.ServeHTTP(w, r)
})
if p := strings.TrimSuffix(stripPrefix, "/"); p != "" {
return http.StripPrefix(p, h)
}
return h
}
// serveIndex writes the SPA shell with a 200 status, so a client-routed deep link
// still loads the app rather than a 404.
func serveIndex(w http.ResponseWriter, content fs.FS) {
data, err := fs.ReadFile(content, "index.html")
if err != nil {
http.Error(w, "ui not built", http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
w.WriteHeader(http.StatusOK)
_, _ = w.Write(data)
}
+52
View File
@@ -0,0 +1,52 @@
package webui
import (
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
)
// get drives the handler with a GET for the given path and returns the response.
func get(t *testing.T, h http.Handler, target string) *http.Response {
t.Helper()
rec := httptest.NewRecorder()
h.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, target, nil))
return rec.Result()
}
func TestHandlerServesIndexAndFallsBack(t *testing.T) {
h := Handler("")
// The embedded placeholder index is served at the root.
if resp := get(t, h, "/"); resp.StatusCode != http.StatusOK {
t.Fatalf("GET / status = %d, want 200", resp.StatusCode)
}
// An existing (non-index) file is served directly by the file server.
if resp := get(t, h, "/assets/.gitkeep"); resp.StatusCode != http.StatusOK {
t.Fatalf("GET /assets/.gitkeep status = %d, want 200 (served file)", resp.StatusCode)
}
// An unknown deep link falls back to the SPA shell (200, not 404) so the
// client-side hash router can take over.
resp := get(t, h, "/game/abc/deep")
if resp.StatusCode != http.StatusOK {
t.Fatalf("GET /game/abc/deep status = %d, want 200 (SPA fallback)", resp.StatusCode)
}
body, _ := io.ReadAll(resp.Body)
if !strings.Contains(string(body), "<html") {
t.Fatalf("fallback body is not the index HTML: %q", body)
}
}
func TestHandlerStripsPrefix(t *testing.T) {
h := Handler("/telegram/")
for _, target := range []string{"/telegram/", "/telegram/assets/.gitkeep", "/telegram/lobby/x"} {
if resp := get(t, h, target); resp.StatusCode != http.StatusOK {
t.Fatalf("GET %s status = %d, want 200", target, resp.StatusCode)
}
}
}
@@ -1,62 +0,0 @@
# Deploy descriptor for the Telegram connector (the platform side-service).
#
# Networking mirrors the sibling ../15-puzzle/deploy/docker-compose.yml:
# - The `vpn` sidecar (developer/amneziawg-sidecar) holds the tunnel and provides
# the netns shared by `app` (network_mode: "service:vpn"). All of the
# connector's egress to api.telegram.org therefore leaves through the tunnel.
# - `vpn` is the one attached to the external `edge` network, with the alias
# `telegram`, so the other services reach the connector's gRPC port at
# `telegram:9091` inside the shared netns. The connector needs NO public
# ingress — it long-polls Telegram and only answers internal gRPC.
#
# The connector joins the same `edge` network as `backend` and `gateway` (the full
# service set rolled out together on a dev-environment deploy). The gateway calls it
# with GATEWAY_CONNECTOR_ADDR=telegram:9091; the backend admin surface (Stage 10)
# will use the same address. The single public ingress for the host reverse proxy
# (caddy) is the gateway's HTTP port, which also serves the Mini App under /telegram/
# (ARCHITECTURE.md §13). The full multi-service compose lands with Stage 12; this is
# the connector-scoped descriptor.
name: scrabble-telegram
services:
vpn:
container_name: scrabble-telegram-vpn
image: docker.iliadenisov.ru/developer/amneziawg-sidecar:latest
restart: unless-stopped
privileged: true
environment:
AWG_CONF: ${AWG_CONF:?set AWG_CONF}
networks:
edge:
aliases:
- telegram
app:
container_name: scrabble-telegram
image: scrabble-telegram:latest
build:
# Build from the repository root so go.work, pkg/ and platform/telegram/ are
# all in the Docker context (see platform/telegram/Dockerfile).
context: ../../..
dockerfile: platform/telegram/Dockerfile
restart: unless-stopped
depends_on:
- vpn
network_mode: "service:vpn"
environment:
# The bot tokens live ONLY in this container (ARCHITECTURE.md §12). One bot per
# service language (en/ru); at least one token is required (the connector
# validates this at boot — compose cannot express "one of").
TELEGRAM_BOT_TOKEN_EN: ${TELEGRAM_BOT_TOKEN_EN:-}
TELEGRAM_BOT_TOKEN_RU: ${TELEGRAM_BOT_TOKEN_RU:-}
TELEGRAM_GAME_CHANNEL_ID_EN: ${TELEGRAM_GAME_CHANNEL_ID_EN:-}
TELEGRAM_GAME_CHANNEL_ID_RU: ${TELEGRAM_GAME_CHANNEL_ID_RU:-}
TELEGRAM_MINIAPP_URL: ${TELEGRAM_MINIAPP_URL:?set TELEGRAM_MINIAPP_URL}
TELEGRAM_GRPC_ADDR: ${TELEGRAM_GRPC_ADDR:-:9091}
# Set to true when deploying into Telegram's test environment.
TELEGRAM_TEST_ENV: ${TELEGRAM_TEST_ENV:-false}
TELEGRAM_API_BASE_URL: ${TELEGRAM_API_BASE_URL:-}
networks:
edge:
external: true
+5 -8
View File
@@ -43,21 +43,18 @@ func New(cfg Config, log *zap.Logger) (*Bot, error) {
}
t := &Bot{miniAppURL: cfg.MiniAppURL, log: log}
token := cfg.Token
if cfg.TestEnv {
// The Bot API test environment lives under /bot<token>/test/METHOD; the
// client builds <host>/bot<token>/<method>, so suffixing the token with
// "/test" injects the test segment without a custom host.
token += "/test"
}
opts := []tgbot.Option{
tgbot.WithDefaultHandler(t.handleStart),
tgbot.WithMessageTextHandler("/start", tgbot.MatchTypePrefix, t.handleStart),
}
if cfg.TestEnv {
// Route to the Bot API test environment (.../bot<token>/test/METHOD).
opts = append(opts, tgbot.UseTestEnvironment())
}
if cfg.APIBaseURL != "" {
opts = append(opts, tgbot.WithServerURL(cfg.APIBaseURL))
}
api, err := tgbot.New(token, opts...)
api, err := tgbot.New(cfg.Token, opts...)
if err != nil {
return nil, err
}
@@ -75,6 +75,34 @@ func TestSendTextHasNoMarkup(t *testing.T) {
}
}
// getMePathFor captures the path bot.New's getMe call hits for the given TestEnv,
// so the test environment routing is covered (a misroute is exactly what makes a
// test-environment token fail with "getMe unauthorized").
func getMePathFor(t *testing.T, testEnv bool) string {
t.Helper()
var path string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.HasSuffix(r.URL.Path, "/getMe") {
path = r.URL.Path
}
io.WriteString(w, `{"ok":true,"result":{"id":1,"is_bot":true,"first_name":"t","username":"tb"}}`)
}))
t.Cleanup(srv.Close)
if _, err := New(Config{Token: "123:ABC", APIBaseURL: srv.URL, TestEnv: testEnv, MiniAppURL: "https://example.com/"}, zap.NewNop()); err != nil {
t.Fatalf("new bot (testEnv=%v): %v", testEnv, err)
}
return path
}
func TestTestEnvironmentRoutesGetMe(t *testing.T) {
if got, want := getMePathFor(t, true), "/bot123:ABC/test/getMe"; got != want {
t.Errorf("TestEnv getMe path = %q, want %q", got, want)
}
if got, want := getMePathFor(t, false), "/bot123:ABC/getMe"; got != want {
t.Errorf("prod getMe path = %q, want %q", got, want)
}
}
func TestStartPayload(t *testing.T) {
cases := map[string]string{
"/start g123": "g123",