Stage 16: deploy infra & test contour
- backend + gateway multi-stage distroless Dockerfiles; the gateway embeds and
serves the SPA at / and /telegram/ via go:embed (committed dist placeholder,
real build baked in by the image's node stage)
- deploy/docker-compose.yml: backend + gateway + Postgres + Telegram connector
(VPN sidecar) + OTel Collector + Prometheus (15d) + Tempo (72h) + Grafana,
fronted by a caddy owning a single /_gm Basic-Auth (admin console + Grafana
subpath); inter-service on a private network, only caddy on the edge network
- new metrics: backend accounts_created_total{kind} (robots excluded) and an
in-memory gateway active_users{window=24h,7d} gauge
- CI: single .gitea/workflows/ci.yaml (unit/integration/ui + a gated test-contour
deploy) on the new feature/* -> development -> master branch model; the old
go-unit/integration/ui-test workflows are folded in; the connector-scoped
compose is retired (superseded by deploy/)
- docs: ARCHITECTURE §11/§12/§13, root + gateway READMEs, CLAUDE.md branching,
PLAN.md (stage 16 done + refinements + Stage 17 forward-notes)
This commit is contained in:
@@ -0,0 +1,203 @@
|
||||
name: CI
|
||||
|
||||
# Single gated pipeline for the test contour (Stage 16). Gitea cannot express
|
||||
# cross-workflow `needs`, so the full test suite and the auto test-deploy live in
|
||||
# one workflow.
|
||||
#
|
||||
# Branch model (CLAUDE.md): feature branches are cut from `development`; a commit
|
||||
# to a feature branch triggers nothing. The pipeline runs on a PR into
|
||||
# `development` or `master` (the full test suite — the merge gate) and on a push
|
||||
# to `development` (after a merge). The deploy job runs only for `development`
|
||||
# (PR or merge), so a PR into `master` is test-only; the prod deploy is a manual
|
||||
# workflow (Stage 17).
|
||||
#
|
||||
# Console output is kept plain (NO_COLOR + `docker compose --ansi never` +
|
||||
# `--progress plain`) so the Gitea logs stay readable.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [development, master]
|
||||
push:
|
||||
branches: [development]
|
||||
|
||||
jobs:
|
||||
unit:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
env:
|
||||
# The engine consumes the published scrabble-solver module from this Gitea;
|
||||
# GOPRIVATE makes go fetch it directly (skipping the public proxy/checksum DB).
|
||||
GOPRIVATE: gitea.iliadenisov.ru/*
|
||||
DICT_VERSION: v1.0.0
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Fetch dictionary DAWGs
|
||||
run: |
|
||||
mkdir -p "${GITHUB_WORKSPACE}/dawg"
|
||||
curl -fsSL -o /tmp/dawg.tar.gz "https://gitea.iliadenisov.ru/developer/scrabble-dictionary/releases/download/${DICT_VERSION}/scrabble-dawg-${DICT_VERSION}.tar.gz"
|
||||
tar xzf /tmp/dawg.tar.gz -C "${GITHUB_WORKSPACE}/dawg"
|
||||
ls -la "${GITHUB_WORKSPACE}/dawg"
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.work
|
||||
cache: true
|
||||
|
||||
- name: gofmt
|
||||
run: |
|
||||
unformatted="$(gofmt -l .)"
|
||||
if [ -n "$unformatted" ]; then
|
||||
echo "gofmt needed on:"; echo "$unformatted"; exit 1
|
||||
fi
|
||||
|
||||
- name: vet
|
||||
run: go vet ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
|
||||
|
||||
- name: build
|
||||
run: go build ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
|
||||
|
||||
- name: test
|
||||
env:
|
||||
BACKEND_DICT_DIR: ${{ github.workspace }}/dawg
|
||||
run: go test -count=1 ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
|
||||
|
||||
integration:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
env:
|
||||
# Ryuk (testcontainers' reaper) does not start cleanly on every runner; the
|
||||
# suite's TestMain terminates its own container, so disable it.
|
||||
TESTCONTAINERS_RYUK_DISABLED: "true"
|
||||
GOPRIVATE: gitea.iliadenisov.ru/*
|
||||
DICT_VERSION: v1.0.0
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Fetch dictionary DAWGs
|
||||
run: |
|
||||
mkdir -p "${GITHUB_WORKSPACE}/dawg"
|
||||
curl -fsSL -o /tmp/dawg.tar.gz "https://gitea.iliadenisov.ru/developer/scrabble-dictionary/releases/download/${DICT_VERSION}/scrabble-dawg-${DICT_VERSION}.tar.gz"
|
||||
tar xzf /tmp/dawg.tar.gz -C "${GITHUB_WORKSPACE}/dawg"
|
||||
ls -la "${GITHUB_WORKSPACE}/dawg"
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.work
|
||||
cache: true
|
||||
|
||||
- name: Integration tests
|
||||
# -count=1 disables the cache; -p=1 -parallel=1 keeps the container-backed
|
||||
# tests serial; the 15-minute timeout bounds a stuck container pull.
|
||||
env:
|
||||
BACKEND_DICT_DIR: ${{ github.workspace }}/dawg
|
||||
run: go test -tags=integration -count=1 -p=1 -parallel=1 -timeout=15m ./backend/...
|
||||
|
||||
ui:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: ui
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Install pnpm
|
||||
run: npm install -g pnpm@11.0.9
|
||||
|
||||
- name: Install deps
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Type-check
|
||||
run: pnpm run check
|
||||
|
||||
- name: Unit tests
|
||||
run: pnpm run test:unit
|
||||
|
||||
- name: Build
|
||||
run: pnpm run build
|
||||
|
||||
- name: Bundle-size budget
|
||||
run: node scripts/bundle-size.mjs
|
||||
|
||||
- name: Install Playwright browsers
|
||||
run: pnpm exec playwright install chromium webkit
|
||||
timeout-minutes: 5
|
||||
|
||||
- name: E2E smoke (mock)
|
||||
run: pnpm run test:e2e
|
||||
timeout-minutes: 5
|
||||
|
||||
deploy:
|
||||
# Auto test-deploy on a PR into development and on the push that merges it.
|
||||
# A PR into master is test-only (this job is skipped); prod deploy is manual.
|
||||
needs: [unit, integration, ui]
|
||||
if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/development') || (github.event_name == 'pull_request' && github.base_ref == 'development') }}
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
env:
|
||||
NO_COLOR: "1"
|
||||
DOCKER_CLI_HINTS: "false"
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build and (re)deploy the test contour
|
||||
working-directory: deploy
|
||||
env:
|
||||
# Sensitive values -> secrets; non-sensitive -> variables. The compose
|
||||
# interpolates these unprefixed names (see deploy/.env.example).
|
||||
POSTGRES_PASSWORD: ${{ secrets.TEST_POSTGRES_PASSWORD }}
|
||||
AWG_CONF: ${{ secrets.TEST_AWG_CONF }}
|
||||
GM_BASICAUTH_HASH: ${{ secrets.TEST_GM_BASICAUTH_HASH }}
|
||||
GRAFANA_ADMIN_PASSWORD: ${{ secrets.TEST_GRAFANA_ADMIN_PASSWORD }}
|
||||
TELEGRAM_BOT_TOKEN_EN: ${{ secrets.TEST_TELEGRAM_BOT_TOKEN_EN }}
|
||||
TELEGRAM_BOT_TOKEN_RU: ${{ secrets.TEST_TELEGRAM_BOT_TOKEN_RU }}
|
||||
GM_BASICAUTH_USER: ${{ vars.TEST_GM_BASICAUTH_USER }}
|
||||
GRAFANA_ROOT_URL: ${{ vars.TEST_GRAFANA_ROOT_URL }}
|
||||
CADDY_SITE_ADDRESS: ${{ vars.TEST_CADDY_SITE_ADDRESS }}
|
||||
TELEGRAM_MINIAPP_URL: ${{ vars.TEST_TELEGRAM_MINIAPP_URL }}
|
||||
TELEGRAM_GAME_CHANNEL_ID_EN: ${{ vars.TEST_TELEGRAM_GAME_CHANNEL_ID_EN }}
|
||||
TELEGRAM_GAME_CHANNEL_ID_RU: ${{ vars.TEST_TELEGRAM_GAME_CHANNEL_ID_RU }}
|
||||
TELEGRAM_TEST_ENV: ${{ vars.TEST_TELEGRAM_TEST_ENV }}
|
||||
VITE_TELEGRAM_BOT_ID: ${{ vars.TEST_VITE_TELEGRAM_BOT_ID }}
|
||||
VITE_TELEGRAM_LINK: ${{ vars.TEST_VITE_TELEGRAM_LINK }}
|
||||
VITE_GATEWAY_URL: ${{ vars.TEST_VITE_GATEWAY_URL }}
|
||||
GATEWAY_DEFAULT_SUPPORTED_LANGUAGES: ${{ vars.TEST_GATEWAY_DEFAULT_SUPPORTED_LANGUAGES }}
|
||||
run: |
|
||||
docker compose --ansi never build --progress plain
|
||||
docker compose --ansi never up -d --remove-orphans
|
||||
|
||||
- name: Probe the gateway through caddy
|
||||
run: |
|
||||
set -u
|
||||
for i in $(seq 1 20); do
|
||||
if docker run --rm --network edge alpine:3.20 wget -q -T 5 -O /dev/null http://scrabble/; then
|
||||
echo "healthy: GET http://scrabble/"
|
||||
exit 0
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
echo "probe failed; recent gateway logs:"
|
||||
docker logs --tail 50 scrabble-gateway || true
|
||||
exit 1
|
||||
|
||||
- name: Prune dangling images
|
||||
if: always()
|
||||
run: docker image prune -f
|
||||
@@ -1,81 +0,0 @@
|
||||
name: Tests · Go
|
||||
|
||||
# Fast unit tests for the Go side of the monorepo. Runs on every push and pull
|
||||
# request whose path filter matches a Go source directory. The module list
|
||||
# grows as new go.work modules (gateway, pkg/*, platform/*) are added by later
|
||||
# stages.
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'backend/**'
|
||||
- 'gateway/**'
|
||||
- 'pkg/**'
|
||||
- 'platform/**'
|
||||
- 'go.work'
|
||||
- 'go.work.sum'
|
||||
- '.gitea/workflows/go-unit.yaml'
|
||||
- '!**/*.md'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'backend/**'
|
||||
- 'gateway/**'
|
||||
- 'pkg/**'
|
||||
- 'platform/**'
|
||||
- 'go.work'
|
||||
- 'go.work.sum'
|
||||
- '.gitea/workflows/go-unit.yaml'
|
||||
- '!**/*.md'
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
env:
|
||||
# The engine consumes the published scrabble-solver module from this Gitea;
|
||||
# GOPRIVATE makes go fetch it directly (skipping the public proxy/checksum DB).
|
||||
# DICT_VERSION selects the dictionary DAWG release the engine tests load.
|
||||
GOPRIVATE: gitea.iliadenisov.ru/*
|
||||
DICT_VERSION: v1.0.0
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Fetch dictionary DAWGs
|
||||
# The DAWGs moved to the scrabble-dictionary repo (the solver is now a
|
||||
# versioned module pinned in backend/go.mod, fetched via GOPRIVATE — no
|
||||
# sibling clone). They ship as a release artifact, one semver per set.
|
||||
run: |
|
||||
mkdir -p "${GITHUB_WORKSPACE}/dawg"
|
||||
curl -fsSL -o /tmp/dawg.tar.gz "https://gitea.iliadenisov.ru/developer/scrabble-dictionary/releases/download/${DICT_VERSION}/scrabble-dawg-${DICT_VERSION}.tar.gz"
|
||||
tar xzf /tmp/dawg.tar.gz -C "${GITHUB_WORKSPACE}/dawg"
|
||||
ls -la "${GITHUB_WORKSPACE}/dawg"
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.work
|
||||
cache: true
|
||||
|
||||
- name: gofmt
|
||||
run: |
|
||||
unformatted="$(gofmt -l .)"
|
||||
if [ -n "$unformatted" ]; then
|
||||
echo "gofmt needed on:"; echo "$unformatted"; exit 1
|
||||
fi
|
||||
|
||||
- name: vet
|
||||
run: go vet ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
|
||||
|
||||
- name: build
|
||||
run: go build ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
|
||||
|
||||
- name: test
|
||||
# -count=1 disables the test cache so a green run never depends on a
|
||||
# previous runner's cached state. BACKEND_DICT_DIR points the engine
|
||||
# tests at the DAWGs fetched from the dictionary release.
|
||||
env:
|
||||
BACKEND_DICT_DIR: ${{ github.workspace }}/dawg
|
||||
run: go test -count=1 ./backend/... ./pkg/... ./gateway/... ./platform/telegram/...
|
||||
@@ -1,71 +0,0 @@
|
||||
name: Tests · Integration
|
||||
|
||||
# Postgres-backed integration tests for the Go backend, gated behind the
|
||||
# `integration` build tag. They spin a throwaway postgres:17-alpine container via
|
||||
# testcontainers-go, which reaches the host Docker daemon through the socket the
|
||||
# Gitea runner exposes. Slower than the unit job (go-unit.yaml); run serially
|
||||
# (-p=1) with Ryuk disabled — TestMain terminates its own container. The module
|
||||
# list grows as new go.work modules are added by later stages.
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'backend/**'
|
||||
- 'pkg/**'
|
||||
- 'go.work'
|
||||
- 'go.work.sum'
|
||||
- '.gitea/workflows/integration.yaml'
|
||||
- '!**/*.md'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'backend/**'
|
||||
- 'pkg/**'
|
||||
- 'go.work'
|
||||
- 'go.work.sum'
|
||||
- '.gitea/workflows/integration.yaml'
|
||||
- '!**/*.md'
|
||||
|
||||
jobs:
|
||||
integration:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
env:
|
||||
# Ryuk (testcontainers' reaper) does not start cleanly on every runner;
|
||||
# the suite's TestMain terminates its own container, so disable it.
|
||||
TESTCONTAINERS_RYUK_DISABLED: "true"
|
||||
# The engine consumes the published scrabble-solver module from this Gitea
|
||||
# (GOPRIVATE -> direct fetch, skipping the public proxy/checksum DB);
|
||||
# DICT_VERSION selects the dictionary DAWG release the engine tests load.
|
||||
GOPRIVATE: gitea.iliadenisov.ru/*
|
||||
DICT_VERSION: v1.0.0
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Fetch dictionary DAWGs
|
||||
# The DAWGs moved to the scrabble-dictionary repo (the solver is now a
|
||||
# versioned module pinned in backend/go.mod, fetched via GOPRIVATE — no
|
||||
# sibling clone). They ship as a release artifact; the engine's untagged
|
||||
# tests (compiled here too) load them.
|
||||
run: |
|
||||
mkdir -p "${GITHUB_WORKSPACE}/dawg"
|
||||
curl -fsSL -o /tmp/dawg.tar.gz "https://gitea.iliadenisov.ru/developer/scrabble-dictionary/releases/download/${DICT_VERSION}/scrabble-dawg-${DICT_VERSION}.tar.gz"
|
||||
tar xzf /tmp/dawg.tar.gz -C "${GITHUB_WORKSPACE}/dawg"
|
||||
ls -la "${GITHUB_WORKSPACE}/dawg"
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.work
|
||||
cache: true
|
||||
|
||||
- name: Integration tests
|
||||
# -count=1 disables the test cache; -p=1 -parallel=1 keeps the
|
||||
# container-backed tests serial; the 15-minute timeout bounds a stuck
|
||||
# container pull. The engine package's (untagged) tests also compile and
|
||||
# run here, so BACKEND_DICT_DIR points them at the DAWGs from the release.
|
||||
env:
|
||||
BACKEND_DICT_DIR: ${{ github.workspace }}/dawg
|
||||
run: go test -tags=integration -count=1 -p=1 -parallel=1 -timeout=15m ./backend/...
|
||||
@@ -1,67 +0,0 @@
|
||||
name: Tests · UI
|
||||
|
||||
# Hermetic UI checks: type-check, Vitest unit tests, production build with a
|
||||
# bundle-size budget, and a Playwright smoke (Chromium + WebKit) against the in-memory
|
||||
# mock transport (no backend/gateway/Postgres). The committed src/gen/ codegen is built, not
|
||||
# regenerated (the same model as the Go committed jet/fbs output).
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'ui/**'
|
||||
- '.gitea/workflows/ui-test.yaml'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'ui/**'
|
||||
- '.gitea/workflows/ui-test.yaml'
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: ui
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Install pnpm
|
||||
run: npm install -g pnpm@11.0.9
|
||||
|
||||
- name: Install deps
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Type-check
|
||||
run: pnpm run check
|
||||
|
||||
- name: Unit tests
|
||||
run: pnpm run test:unit
|
||||
|
||||
- name: Build
|
||||
run: pnpm run build
|
||||
|
||||
- name: Bundle-size budget
|
||||
run: node scripts/bundle-size.mjs
|
||||
|
||||
# The Playwright system libraries are provisioned once on the runner host
|
||||
# (`sudo npx playwright@<version> install-deps chromium`), so the job needs no
|
||||
# apt and no sudo: it only downloads the browser binaries into the runner cache
|
||||
# (persisted by the host executor) and runs the suite. WebKit's Debian build
|
||||
# bundles most of its own libraries and runs headless without extra host deps; if
|
||||
# a runner ever lacks one, provision it once on the host with
|
||||
# `sudo npx playwright install-deps webkit`. The timeouts guard against a future
|
||||
# hang. Keep this in lockstep with @playwright/test in package.json — re-run
|
||||
# install-deps on the host after a major bump.
|
||||
- name: Install Playwright browsers
|
||||
run: pnpm exec playwright install chromium webkit
|
||||
timeout-minutes: 5
|
||||
|
||||
- name: E2E smoke (mock)
|
||||
run: pnpm run test:e2e
|
||||
timeout-minutes: 5
|
||||
@@ -49,9 +49,20 @@ conversation memory — is the source of continuity. Keep it that way.
|
||||
|
||||
## Branching & CI
|
||||
|
||||
- Trunk is **`master`** (owner preference). From Stage 1, work on `feature/*`
|
||||
and merge via PR with a green CI gate. The genesis commit (Stage 0) lands on
|
||||
`master` by necessity (an empty branch has nothing to PR into).
|
||||
- **Two long-lived branches** (Stage 16 onward): **`development`** is the
|
||||
integration branch; **`master`** is the production trunk. Cut `feature/*`
|
||||
branches **from `development`** and PR them back into it. (Stages 0–15 used
|
||||
`master` as the trunk with `feature/* → master`; the genesis Stage 0 commit is
|
||||
on `master` by necessity.)
|
||||
- A commit to a `feature/*` branch triggers **nothing**. The single workflow
|
||||
`.gitea/workflows/ci.yaml` runs the full suite (`unit` + `integration` + `ui`)
|
||||
on a PR into `development` or `master`, and the gated **`deploy`** job auto-rolls
|
||||
the **test contour** on a PR into — or a push to — `development`
|
||||
(`docker compose up -d --build` on the runner host + a `GET /` probe). A PR into
|
||||
`master` is test-only.
|
||||
- Merge `development → master` only when CI is green; the **prod** deploy is then a
|
||||
**manual** workflow (Stage 17), never automatic. Secrets/variables are prefixed
|
||||
`TEST_` / `PROD_` per contour (Gitea 1.26 has no deployment environments).
|
||||
- After any push, watch the run to green before declaring a stage done — use the
|
||||
ready-made watcher, never an inline poll loop:
|
||||
`python3 ~/.claude/bin/gitea-ci-watch.py` (background). It reads `$GITEA_URL`
|
||||
@@ -113,6 +124,8 @@ backend/ # module scrabble/backend
|
||||
docs/ .gitea/workflows/ PLAN.md CLAUDE.md README.md
|
||||
gateway/ ui/ pkg/ # added by their stages
|
||||
platform/telegram/ # Telegram connector side-service (Stage 9): bot + gRPC API
|
||||
backend/Dockerfile gateway/Dockerfile platform/telegram/Dockerfile # multi-stage distroless (Stage 16)
|
||||
deploy/ # docker-compose + caddy + otelcol/prometheus/tempo/grafana (Stage 16)
|
||||
```
|
||||
|
||||
## Build & test
|
||||
@@ -127,9 +140,14 @@ go run ./backend/cmd/backend # /healthz, /readyz on :8080
|
||||
|
||||
cd ui && pnpm install && pnpm check && pnpm test:unit && pnpm build # the UI (Stage 7+)
|
||||
pnpm start # UI mock mode: lobby -> game, no backend
|
||||
|
||||
docker build -f backend/Dockerfile -t scrabble-backend . # images (Stage 16); gateway embeds the UI
|
||||
docker build -f gateway/Dockerfile -t scrabble-gateway .
|
||||
docker compose -f deploy/docker-compose.yml config # validate the full contour
|
||||
```
|
||||
|
||||
The `ui` module is a Node project (pnpm), **not** in `go.work`; its CI is
|
||||
`.gitea/workflows/ui-test.yaml`. Committed edge codegen under `ui/src/gen/`
|
||||
The `ui` module is a Node project (pnpm), **not** in `go.work`; it is the `ui` job
|
||||
of the single `.gitea/workflows/ci.yaml` (Stage 16 folded the former go-unit /
|
||||
integration / ui-test workflows into it). Committed edge codegen under `ui/src/gen/`
|
||||
(regenerate with `pnpm codegen`); pnpm build-script approval lives in
|
||||
`ui/pnpm-workspace.yaml` (`allowBuilds: esbuild: true`).
|
||||
|
||||
@@ -49,7 +49,7 @@ independent (see ARCHITECTURE §9.1).
|
||||
| 13 | Alphabet on the wire (UI alphabet-agnostic) | **done** |
|
||||
| 14 | Solver & dictionary split (publish solver + scrabble-dictionary repo/artifact) | **done** |
|
||||
| 15 | Dual Telegram bots & language-gated variants | **done** |
|
||||
| 16 | Deploy infra & test contour (Dockerfiles, gateway static UI, compose, observability) | todo |
|
||||
| 16 | Deploy infra & test contour (Dockerfiles, gateway static UI, compose, observability) | **done** |
|
||||
| 17 | Prod contour deploy (SSH export/import, manual after merge) | todo |
|
||||
|
||||
Scaffolding is incremental: `go.work` lists only existing modules; each stage
|
||||
@@ -279,7 +279,7 @@ back to `preferred_language`). Non-Telegram logins (web/email/guest) carry the g
|
||||
(`GATEWAY_DEFAULT_SUPPORTED_LANGUAGES`, all variants). Admin broadcasts (`SendToUser`/`SendToGameChannel`)
|
||||
pick the bot by an **operator-chosen** language in the console — unrelated to `ValidateInitData`.
|
||||
|
||||
### Stage 16 — Deploy infra & test contour
|
||||
### Stage 16 — Deploy infra & test contour *(done)*
|
||||
Scope: the deploy machinery + the **test contour** (the bulk of the original Stage 14). Backend +
|
||||
gateway **Dockerfiles** (multi-stage distroless, mirroring the Stage 9 connector image); the gateway
|
||||
gains **static UI serving** — **embedded** via `go:embed` (a node build stage in the gateway image),
|
||||
@@ -300,12 +300,16 @@ collector/Tempo/Prometheus retention.
|
||||
### Stage 17 — Prod contour deploy
|
||||
Scope: the **production contour** on a remote host over SSH. Deploy by **container export/import**
|
||||
(`docker save` → `scp`/ssh → `docker load` → `docker compose up` on the remote), the SSH key + host IP
|
||||
in Gitea secrets; **strictly manual** (`workflow_dispatch`) after a feature branch is merged to
|
||||
`master`. Two-contour config uses **`TEST_`/`PROD_` secret/variable prefixes** — Gitea 1.26 has no
|
||||
deployment environments (verified: the `environments` API 404s), so a flat prefixed namespace is the
|
||||
convention.
|
||||
Open details (re-interview): export/import vs a registry trade-off; prod domain/TLS at the remote
|
||||
caddy; prod VPN; rollback.
|
||||
in Gitea secrets; **strictly manual** (`workflow_dispatch`) after `development` is merged to `master`
|
||||
(the Stage 16 branch model: `feature/* → development → master`, merge gated green). Two-contour config
|
||||
uses **`TEST_`/`PROD_` secret/variable prefixes** — Gitea 1.26 has no deployment environments (verified:
|
||||
the `environments` API 404s), so a flat prefixed namespace is the convention.
|
||||
Reuses the Stage 16 `deploy/docker-compose.yml` as-is, mapping the **`PROD_`** set onto the same
|
||||
unprefixed compose vars. **No host caddy on prod**, so the contour's own caddy terminates TLS — set
|
||||
`CADDY_SITE_ADDRESS` to the prod domain so caddy does its own ACME (the Caddyfile is already
|
||||
parameterised for this; the test contour leaves it `:80` behind the host caddy).
|
||||
Open details (re-interview): export/import vs a registry trade-off; prod domain/cert source (ACME vs a
|
||||
provided cert) at the contour caddy; prod VPN; rollback.
|
||||
|
||||
## Refinements logged during implementation
|
||||
|
||||
@@ -1036,6 +1040,56 @@ caddy; prod VPN; rollback.
|
||||
per-language vars (the full deploy stack is Stage 16). No CI workflow change (the Go and UI workflows
|
||||
already span the touched modules).
|
||||
|
||||
- **Stage 16** (interview + implementation):
|
||||
- **Branch model reshaped** (interview, supersedes the Stage 0 `feature/* → master`): a long-lived
|
||||
**`development`** integration branch + **`master`** as the prod trunk. Feature branches are cut from
|
||||
`development`; a feature-branch commit triggers nothing. A single consolidated
|
||||
`.gitea/workflows/ci.yaml` (Gitea has no cross-workflow `needs`) runs `unit`+`integration`+`ui` on a PR
|
||||
into `development`/`master` and a **gated `deploy`** job (`needs` the three) that auto-rolls the test
|
||||
contour **on a PR into — or a push to — `development`** (owner's "и PR, и push"). A PR into `master` is
|
||||
test-only; prod is the manual Stage 17. The former `go-unit`/`integration`/`ui-test` workflows were
|
||||
folded in (no path filters — full CI on every PR, per the owner). Console kept plain (`NO_COLOR`,
|
||||
`docker compose --ansi never`, `--progress plain`).
|
||||
- **Gateway serves the UI** (interview, the §13 single-origin): a new `gateway/internal/webui` embeds
|
||||
`dist` via `go:embed` (a committed placeholder index so `go build`/CI compile without a UI build) and
|
||||
serves the SPA at `/` and `/telegram/` (a path-stripping SPA handler, index.html fallback for the hash
|
||||
router), mounted in the edge mux **below** the h2c wrap; `/_gm` stays an explicit 404 when the local
|
||||
admin proxy is off so the catch-all does not leak the shell. The `gateway/Dockerfile` node stage builds
|
||||
the UI with the `VITE_*` build-args and copies it into the embed dir before `go build`.
|
||||
- **Images** (interview): multi-stage distroless `backend/Dockerfile` (a DAWG stage `curl`s the
|
||||
`scrabble-dawg` release pinned to `DICT_VERSION`, `GOPRIVATE` fetches the solver) and `gateway/Dockerfile`
|
||||
(node UI stage + Go stage), both trimming `go.work` like `platform/telegram/Dockerfile`. Built and
|
||||
verified locally.
|
||||
- **Contour = caddy-fronted** (interview, "caddy всё равно нужен для https"): a new `caddy` service owns
|
||||
a **single `/_gm` Basic-Auth** and routes `/_gm/grafana/*` → Grafana (anonymous-admin + sub-path, no
|
||||
own accounts) and the rest of `/_gm/*` → the backend console; everything else → the gateway. This
|
||||
**supersedes Stage 10's** gateway-fronts-`/_gm` model **in the deploy topology** (the gateway's own
|
||||
`/_gm` proxy stays for a local non-caddy run). TLS: the **host caddy** terminates it for the test
|
||||
contour and forwards to `scrabble:80`; the in-compose caddy is parameterised (`CADDY_SITE_ADDRESS`) to
|
||||
own ACME on prod (Stage 17) where there is no host caddy.
|
||||
- **Networks** (engineering): inter-service traffic on a private `internal` network (project-scoped DNS,
|
||||
no name collisions on the shared `edge`); only caddy joins the external `edge` (alias `scrabble`). The
|
||||
connector keeps its VPN sidecar (the only egress that needs the tunnel). The connector-scoped
|
||||
`platform/telegram/deploy/docker-compose.yml` was **retired** (the root `deploy/docker-compose.yml`
|
||||
supersedes it; the connector Dockerfile stays).
|
||||
- **Observability stack** (interview): OTel Collector (OTLP/gRPC → a Prometheus scrape endpoint +
|
||||
Tempo OTLP) + Prometheus (**15d**) + Tempo (**72h**) + Grafana (provisioned Prometheus+Tempo datasources
|
||||
+ four dashboards: Service overview, Edge/UX, Game domain, Users; Traces via the Tempo datasource +
|
||||
Explore, no fixed panels). The collector's prometheus exporter uses `add_metric_suffixes:false` +
|
||||
`resource_to_telemetry_conversion` so the dashboards' PromQL matches the in-code metric names and carries
|
||||
`service_name`. The three services export `otlp` in the contour (default stays `none`, so CI needs no
|
||||
collector). Loki/logs were left out of scope (container stdout / zap JSON).
|
||||
- **User metrics** (interview): a backend `accounts_created_total{kind}` counter (telegram/email/guest;
|
||||
robots excluded — they are a provisioned pool, not users) via the Stage-12 `SetMetrics` no-op pattern,
|
||||
and a gateway **in-memory** `active_users{window=24h,7d}` observable gauge (distinct authenticated edge
|
||||
actors). The owner chose the in-memory gauge over a DB `last_seen_at` (overkill); its single-instance /
|
||||
reset-on-restart limits are documented (a live gauge, not billing).
|
||||
- **Owner actions before the contour is green** (surfaced, not blockers): set the **`TEST_`** Gitea
|
||||
secrets/variables (see `deploy/.env.example`) and add a host-caddy route `<test domain> → scrabble:80`
|
||||
on the runner host. CI bootstrap nuance: the first PR introducing `ci.yaml` may first deploy on the
|
||||
post-merge push to `development` (depending on whether Gitea runs head/base workflows for a PR), after
|
||||
which PR-time deploys work.
|
||||
|
||||
## Deferred TODOs (cross-stage)
|
||||
|
||||
- ~~**TODO-1 — publish & version the solver.**~~ **Done in Stage 14.** `scrabble-solver` is
|
||||
|
||||
@@ -80,3 +80,24 @@ pnpm dev # against a running gateway (Vite proxies the RPC path to :8081)
|
||||
`pnpm check` (type-check), `pnpm test:unit` (Vitest), `pnpm test:e2e` (Playwright
|
||||
smoke vs the mock), `pnpm build` (static bundle). Details — including the committed
|
||||
edge codegen (`pnpm codegen`) — are in [`ui/README.md`](ui/README.md).
|
||||
|
||||
## Deploy (`deploy/`)
|
||||
|
||||
The full contour is [`deploy/docker-compose.yml`](deploy/docker-compose.yml):
|
||||
`backend` + `gateway` (with the UI embedded via `go:embed`, baked in by its node
|
||||
build stage) + Postgres + the Telegram connector (with a VPN sidecar) + an
|
||||
observability stack (OTel Collector → Prometheus + Tempo → Grafana) + a front
|
||||
**caddy** that owns a single `/_gm` Basic-Auth (admin console + Grafana). The Go
|
||||
services build from multi-stage distroless `*/Dockerfile`.
|
||||
|
||||
```sh
|
||||
docker build -f backend/Dockerfile -t scrabble-backend . # pulls the DAWG release artifact
|
||||
docker build -f gateway/Dockerfile -t scrabble-gateway . # node stage builds + embeds the UI
|
||||
docker compose -f deploy/docker-compose.yml config # validate (needs the TEST_/PROD_ env)
|
||||
```
|
||||
|
||||
CI auto-deploys the **test contour** on a PR into — or push to — `development`
|
||||
(`.gitea/workflows/ci.yaml`); the **prod contour** is a manual deploy after
|
||||
`development → master` (Stage 17). Env reference: [`deploy/.env.example`](deploy/.env.example);
|
||||
the topology and the two-contour model are in
|
||||
[`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) §13.
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
# Multi-stage build for the backend service. Mirrors platform/telegram/Dockerfile:
|
||||
# a golang-alpine builder yields a static binary shipped on distroless nonroot.
|
||||
#
|
||||
# The dictionary DAWGs are baked in from the scrabble-dictionary release artifact
|
||||
# (Stage 14) — the same set the Go CI downloads — and BACKEND_DICT_DIR points the
|
||||
# binary at them. The published solver module is fetched directly from Gitea
|
||||
# (GOPRIVATE), so the build stage needs git and network.
|
||||
#
|
||||
# Build from the repository root so go.work, go.work.sum, pkg/ and backend/ are all
|
||||
# in the Docker context:
|
||||
# docker build -f backend/Dockerfile -t scrabble-backend .
|
||||
|
||||
# --- dictionary artifact -----------------------------------------------------
|
||||
FROM alpine:3.20 AS dawg
|
||||
ARG DICT_VERSION=v1.0.0
|
||||
RUN apk add --no-cache curl tar
|
||||
RUN mkdir -p /dawg \
|
||||
&& curl -fsSL -o /tmp/dawg.tar.gz \
|
||||
"https://gitea.iliadenisov.ru/developer/scrabble-dictionary/releases/download/${DICT_VERSION}/scrabble-dawg-${DICT_VERSION}.tar.gz" \
|
||||
&& tar xzf /tmp/dawg.tar.gz -C /dawg
|
||||
|
||||
# --- build -------------------------------------------------------------------
|
||||
FROM golang:1.26.3-alpine AS build
|
||||
WORKDIR /src
|
||||
# git: the published solver module is fetched from Gitea directly (GOPRIVATE).
|
||||
RUN apk add --no-cache git
|
||||
ENV GOPRIVATE=gitea.iliadenisov.ru/*
|
||||
|
||||
COPY go.work go.work.sum ./
|
||||
COPY pkg ./pkg
|
||||
COPY backend ./backend
|
||||
|
||||
# Reduce the workspace to what the backend needs: backend + pkg.
|
||||
RUN go work edit -dropuse=./gateway -dropuse=./platform/telegram
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -o /out/backend ./backend/cmd/backend
|
||||
|
||||
# --- runtime -----------------------------------------------------------------
|
||||
FROM gcr.io/distroless/static-debian12:nonroot
|
||||
COPY --from=build /out/backend /usr/local/bin/backend
|
||||
COPY --from=dawg /dawg /opt/dawg
|
||||
ENV BACKEND_DICT_DIR=/opt/dawg
|
||||
ENTRYPOINT ["/usr/local/bin/backend"]
|
||||
@@ -132,6 +132,7 @@ func run(ctx context.Context, cfg config.Config, logger *zap.Logger) error {
|
||||
hub := notify.NewHub(0)
|
||||
|
||||
accounts := account.NewStore(db)
|
||||
accounts.SetMetrics(tel.MeterProvider().Meter("scrabble/backend/account"))
|
||||
games := game.NewService(game.NewStore(db), accounts, registry, cfg.Game, logger)
|
||||
games.SetNotifier(hub)
|
||||
games.SetMetrics(tel.MeterProvider().Meter("scrabble/backend/game"))
|
||||
|
||||
@@ -93,12 +93,14 @@ type Identity struct {
|
||||
|
||||
// Store is the Postgres-backed query surface for accounts and identities.
|
||||
type Store struct {
|
||||
db *sql.DB
|
||||
db *sql.DB
|
||||
metrics *accountMetrics
|
||||
}
|
||||
|
||||
// NewStore constructs a Store wrapping db.
|
||||
// NewStore constructs a Store wrapping db. Metrics default to a no-op meter until
|
||||
// SetMetrics installs the real one during startup wiring.
|
||||
func NewStore(db *sql.DB) *Store {
|
||||
return &Store{db: db}
|
||||
return &Store{db: db, metrics: defaultAccountMetrics()}
|
||||
}
|
||||
|
||||
// ProvisionByIdentity returns the account bound to (kind, externalID), creating
|
||||
@@ -331,6 +333,11 @@ func (s *Store) create(ctx context.Context, kind, externalID string, seed provis
|
||||
if err != nil {
|
||||
return Account{}, fmt.Errorf("account: create for identity (%s, %s): %w", kind, externalID, err)
|
||||
}
|
||||
// Count genuinely new durable accounts; robots are a fixed provisioned pool,
|
||||
// not users, so they are excluded.
|
||||
if kind != KindRobot {
|
||||
s.metrics.recordCreated(ctx, kind)
|
||||
}
|
||||
return created, nil
|
||||
}
|
||||
|
||||
@@ -355,6 +362,7 @@ func (s *Store) ProvisionGuest(ctx context.Context) (Account, error) {
|
||||
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
|
||||
return Account{}, fmt.Errorf("account: provision guest: %w", err)
|
||||
}
|
||||
s.metrics.recordCreated(ctx, kindGuest)
|
||||
return modelToAccount(row), nil
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
package account
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/metric/noop"
|
||||
)
|
||||
|
||||
// meterName scopes the account domain's OpenTelemetry instruments.
|
||||
const meterName = "scrabble/backend/account"
|
||||
|
||||
// kindGuest labels guest accounts in accounts_created_total. Guests carry no
|
||||
// identity, so they have no identity Kind; this is the metric label for them.
|
||||
const kindGuest = "guest"
|
||||
|
||||
// accountMetrics holds the account domain's operational instruments. It defaults
|
||||
// to no-ops (see defaultAccountMetrics); SetMetrics installs the real meter during
|
||||
// startup wiring.
|
||||
type accountMetrics struct {
|
||||
created metric.Int64Counter
|
||||
}
|
||||
|
||||
// defaultAccountMetrics returns instruments backed by a no-op meter.
|
||||
func defaultAccountMetrics() *accountMetrics {
|
||||
return newAccountMetrics(noop.NewMeterProvider().Meter(meterName))
|
||||
}
|
||||
|
||||
// newAccountMetrics builds the instruments on meter, falling back to a no-op
|
||||
// counter on the (rare) construction error.
|
||||
func newAccountMetrics(meter metric.Meter) *accountMetrics {
|
||||
c, err := meter.Int64Counter("accounts_created_total",
|
||||
metric.WithDescription("New accounts created, labelled by kind (telegram/email/guest); robots are not counted."))
|
||||
if err != nil {
|
||||
c, _ = noop.NewMeterProvider().Meter(meterName).Int64Counter("accounts_created_total")
|
||||
}
|
||||
return &accountMetrics{created: c}
|
||||
}
|
||||
|
||||
// SetMetrics installs the meter the account store records to. It must be called
|
||||
// during startup wiring; the default is a no-op meter.
|
||||
func (s *Store) SetMetrics(meter metric.Meter) {
|
||||
if meter == nil {
|
||||
return
|
||||
}
|
||||
s.metrics = newAccountMetrics(meter)
|
||||
}
|
||||
|
||||
// recordCreated counts one newly created account of the given kind.
|
||||
func (m *accountMetrics) recordCreated(ctx context.Context, kind string) {
|
||||
m.created.Add(ctx, 1, metric.WithAttributes(attribute.String("kind", kind)))
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
# Environment for deploy/docker-compose.yml. The CI deploy job (ci.yaml) maps the
|
||||
# Gitea TEST_-prefixed secrets/variables onto these unprefixed names; Stage 17
|
||||
# maps the PROD_-prefixed set the same way. Copy to deploy/.env for a local run.
|
||||
|
||||
# --- Postgres ---------------------------------------------------------------
|
||||
POSTGRES_DB=scrabble
|
||||
POSTGRES_USER=scrabble
|
||||
POSTGRES_PASSWORD=change-me # required
|
||||
|
||||
# --- Dictionary -------------------------------------------------------------
|
||||
DICT_VERSION=v1.0.0 # scrabble-dictionary release tag (image build-arg)
|
||||
|
||||
# --- Logging ----------------------------------------------------------------
|
||||
LOG_LEVEL=info
|
||||
|
||||
# --- Edge / caddy -----------------------------------------------------------
|
||||
# Test: ":80" (the host caddy terminates TLS and forwards to scrabble:80 on the
|
||||
# external `edge` network). Prod (Stage 17): a domain so caddy does its own ACME.
|
||||
CADDY_SITE_ADDRESS=:80
|
||||
GM_BASICAUTH_USER=gm
|
||||
GM_BASICAUTH_HASH= # required; `caddy hash-password` bcrypt hash
|
||||
|
||||
# --- UI build args (baked into the gateway image) ---------------------------
|
||||
VITE_TELEGRAM_BOT_ID=
|
||||
VITE_TELEGRAM_LINK=
|
||||
VITE_GATEWAY_URL=
|
||||
|
||||
# --- Gateway ----------------------------------------------------------------
|
||||
GATEWAY_DEFAULT_SUPPORTED_LANGUAGES=en,ru
|
||||
|
||||
# --- Grafana ----------------------------------------------------------------
|
||||
GRAFANA_ROOT_URL=/_gm/grafana/ # set the full https URL behind a real domain
|
||||
GRAFANA_ADMIN_PASSWORD=admin
|
||||
|
||||
# --- Telegram connector -----------------------------------------------------
|
||||
AWG_CONF= # required; AmneziaWG sidecar config
|
||||
TELEGRAM_BOT_TOKEN_EN= # at least one of EN/RU required
|
||||
TELEGRAM_BOT_TOKEN_RU=
|
||||
TELEGRAM_GAME_CHANNEL_ID_EN=
|
||||
TELEGRAM_GAME_CHANNEL_ID_RU=
|
||||
TELEGRAM_MINIAPP_URL= # required
|
||||
TELEGRAM_TEST_ENV=false
|
||||
TELEGRAM_API_BASE_URL=
|
||||
@@ -0,0 +1,35 @@
|
||||
# Edge reverse proxy for the Scrabble contour. A single Basic-Auth gate covers
|
||||
# every operator surface under /_gm (the backend-rendered admin console and the
|
||||
# Grafana subpath); everything else (the SPA at / and /telegram/, plus the
|
||||
# Connect edge) goes to the gateway. Mirrors ../galaxy-game's /_gm model.
|
||||
#
|
||||
# CADDY_SITE_ADDRESS is ":80" in the test contour (the host caddy terminates TLS
|
||||
# and forwards); set it to a domain in prod (Stage 17) so this caddy does its own
|
||||
# ACME and the contour is self-contained.
|
||||
{
|
||||
admin off
|
||||
}
|
||||
|
||||
{$CADDY_SITE_ADDRESS::80} {
|
||||
# Operator surfaces under /_gm: a single shared Basic-Auth, then route.
|
||||
@gm path /_gm /_gm/*
|
||||
handle @gm {
|
||||
basic_auth {
|
||||
{$GM_BASICAUTH_USER:gm} {$GM_BASICAUTH_HASH}
|
||||
}
|
||||
# Grafana serves from this sub-path (GF_SERVER_SERVE_FROM_SUB_PATH=true), so
|
||||
# the prefix is forwarded intact, not stripped.
|
||||
handle /_gm/grafana* {
|
||||
reverse_proxy grafana:3000
|
||||
}
|
||||
# Everything else under /_gm is the backend-rendered admin console.
|
||||
handle {
|
||||
reverse_proxy backend:8080
|
||||
}
|
||||
}
|
||||
|
||||
# The SPA (/, /telegram/) and the Connect edge are served by the gateway.
|
||||
handle {
|
||||
reverse_proxy gateway:8081
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,217 @@
|
||||
# Full deploy descriptor for the Scrabble test contour: backend + gateway +
|
||||
# Postgres + the Telegram connector (with its VPN sidecar) + the observability
|
||||
# stack (OTel Collector -> Prometheus + Tempo -> Grafana). Driven by
|
||||
# .gitea/workflows/ci.yaml (`docker compose up -d --build`); env values are
|
||||
# interpolated from Gitea Actions TEST_ secrets/variables exported by the deploy
|
||||
# job (see deploy/.env.example for the unprefixed names).
|
||||
#
|
||||
# Networking (mirrors ../galaxy-game):
|
||||
# - `internal` (scrabble-internal): all inter-service traffic, project-private
|
||||
# DNS so service names never collide on the shared `edge` network.
|
||||
# - `edge` (external): the host caddy reaches this contour at `scrabble:80`
|
||||
# (the in-compose caddy's alias). The in-compose caddy terminates only HTTP in
|
||||
# the test contour; the host caddy terminates TLS and forwards. For prod
|
||||
# (Stage 17, no host caddy) set CADDY_SITE_ADDRESS to the domain so the caddy
|
||||
# does its own ACME — the contour is then self-contained.
|
||||
# - The connector egresses to api.telegram.org through the `vpn` sidecar
|
||||
# (network_mode: service:vpn); it answers internal gRPC at `telegram:9091`.
|
||||
name: scrabble
|
||||
|
||||
services:
|
||||
postgres:
|
||||
container_name: scrabble-postgres
|
||||
image: postgres:17-alpine
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_DB: ${POSTGRES_DB:-scrabble}
|
||||
POSTGRES_USER: ${POSTGRES_USER:-scrabble}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-scrabble} -d ${POSTGRES_DB:-scrabble}"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 30
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
networks: [internal]
|
||||
|
||||
backend:
|
||||
container_name: scrabble-backend
|
||||
image: scrabble-backend:latest
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: backend/Dockerfile
|
||||
args:
|
||||
DICT_VERSION: ${DICT_VERSION:-v1.0.0}
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
# search_path=backend matches the migrations (00001 creates the schema).
|
||||
BACKEND_POSTGRES_DSN: postgres://${POSTGRES_USER:-scrabble}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-scrabble}?sslmode=disable&search_path=backend
|
||||
BACKEND_HTTP_ADDR: ":8080"
|
||||
BACKEND_GRPC_ADDR: ":9090"
|
||||
BACKEND_CONNECTOR_ADDR: telegram:9091
|
||||
BACKEND_LOG_LEVEL: ${LOG_LEVEL:-info}
|
||||
BACKEND_SERVICE_NAME: scrabble-backend
|
||||
BACKEND_OTEL_TRACES_EXPORTER: otlp
|
||||
BACKEND_OTEL_METRICS_EXPORTER: otlp
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
||||
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||
# No container healthcheck: the distroless image has no shell/wget. Readiness
|
||||
# is covered by the CI post-deploy probe (GET / through caddy).
|
||||
networks: [internal]
|
||||
|
||||
gateway:
|
||||
container_name: scrabble-gateway
|
||||
image: scrabble-gateway:latest
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: gateway/Dockerfile
|
||||
args:
|
||||
VITE_TELEGRAM_BOT_ID: ${VITE_TELEGRAM_BOT_ID:-}
|
||||
VITE_TELEGRAM_LINK: ${VITE_TELEGRAM_LINK:-}
|
||||
VITE_GATEWAY_URL: ${VITE_GATEWAY_URL:-}
|
||||
restart: unless-stopped
|
||||
depends_on: [backend]
|
||||
environment:
|
||||
GATEWAY_HTTP_ADDR: ":8081"
|
||||
GATEWAY_BACKEND_HTTP_URL: http://backend:8080
|
||||
GATEWAY_BACKEND_GRPC_ADDR: backend:9090
|
||||
GATEWAY_CONNECTOR_ADDR: telegram:9091
|
||||
GATEWAY_DEFAULT_SUPPORTED_LANGUAGES: ${GATEWAY_DEFAULT_SUPPORTED_LANGUAGES:-en,ru}
|
||||
GATEWAY_LOG_LEVEL: ${LOG_LEVEL:-info}
|
||||
GATEWAY_SERVICE_NAME: scrabble-gateway
|
||||
GATEWAY_OTEL_TRACES_EXPORTER: otlp
|
||||
GATEWAY_OTEL_METRICS_EXPORTER: otlp
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
||||
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||
# GATEWAY_ADMIN_* intentionally unset: in the deployed contour the front
|
||||
# caddy owns the /_gm Basic-Auth and routes /_gm to the backend directly.
|
||||
networks: [internal]
|
||||
|
||||
# --- Telegram connector (egress via the VPN sidecar) -----------------------
|
||||
vpn:
|
||||
container_name: scrabble-telegram-vpn
|
||||
image: docker.iliadenisov.ru/developer/amneziawg-sidecar:latest
|
||||
restart: unless-stopped
|
||||
privileged: true
|
||||
environment:
|
||||
AWG_CONF: ${AWG_CONF:?set AWG_CONF}
|
||||
networks:
|
||||
internal:
|
||||
aliases: [telegram]
|
||||
|
||||
telegram:
|
||||
container_name: scrabble-telegram
|
||||
image: scrabble-telegram:latest
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: platform/telegram/Dockerfile
|
||||
restart: unless-stopped
|
||||
depends_on: [vpn]
|
||||
network_mode: "service:vpn"
|
||||
environment:
|
||||
# The bot tokens live ONLY in this container (ARCHITECTURE.md §12). At least
|
||||
# one token is required (the connector validates this at boot).
|
||||
TELEGRAM_BOT_TOKEN_EN: ${TELEGRAM_BOT_TOKEN_EN:-}
|
||||
TELEGRAM_BOT_TOKEN_RU: ${TELEGRAM_BOT_TOKEN_RU:-}
|
||||
TELEGRAM_GAME_CHANNEL_ID_EN: ${TELEGRAM_GAME_CHANNEL_ID_EN:-}
|
||||
TELEGRAM_GAME_CHANNEL_ID_RU: ${TELEGRAM_GAME_CHANNEL_ID_RU:-}
|
||||
TELEGRAM_MINIAPP_URL: ${TELEGRAM_MINIAPP_URL:?set TELEGRAM_MINIAPP_URL}
|
||||
TELEGRAM_GRPC_ADDR: ":9091"
|
||||
TELEGRAM_TEST_ENV: ${TELEGRAM_TEST_ENV:-false}
|
||||
TELEGRAM_API_BASE_URL: ${TELEGRAM_API_BASE_URL:-}
|
||||
TELEGRAM_LOG_LEVEL: ${LOG_LEVEL:-info}
|
||||
TELEGRAM_SERVICE_NAME: scrabble-telegram
|
||||
TELEGRAM_OTEL_TRACES_EXPORTER: otlp
|
||||
TELEGRAM_OTEL_METRICS_EXPORTER: otlp
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
||||
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||
|
||||
# --- Edge reverse proxy (single /_gm Basic-Auth; SPA + Connect -> gateway) --
|
||||
caddy:
|
||||
container_name: scrabble-caddy
|
||||
image: caddy:2-alpine
|
||||
restart: unless-stopped
|
||||
depends_on: [gateway, backend, grafana]
|
||||
environment:
|
||||
# Test: ":80" (host caddy terminates TLS). Prod: a domain for own ACME.
|
||||
CADDY_SITE_ADDRESS: ${CADDY_SITE_ADDRESS:-:80}
|
||||
GM_BASICAUTH_USER: ${GM_BASICAUTH_USER:-gm}
|
||||
GM_BASICAUTH_HASH: ${GM_BASICAUTH_HASH:?set GM_BASICAUTH_HASH}
|
||||
volumes:
|
||||
- ./caddy/Caddyfile:/etc/caddy/Caddyfile:ro
|
||||
- caddy-data:/data
|
||||
networks:
|
||||
internal: {}
|
||||
edge:
|
||||
aliases: [scrabble]
|
||||
|
||||
# --- Observability ---------------------------------------------------------
|
||||
otelcol:
|
||||
container_name: scrabble-otelcol
|
||||
image: otel/opentelemetry-collector-contrib:0.119.0
|
||||
restart: unless-stopped
|
||||
command: ["--config=/etc/otelcol/config.yaml"]
|
||||
volumes:
|
||||
- ./otelcol/config.yaml:/etc/otelcol/config.yaml:ro
|
||||
networks: [internal]
|
||||
|
||||
prometheus:
|
||||
container_name: scrabble-prometheus
|
||||
image: prom/prometheus:v2.55.1
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- --config.file=/etc/prometheus/prometheus.yml
|
||||
- --storage.tsdb.retention.time=15d
|
||||
volumes:
|
||||
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
- prometheus-data:/prometheus
|
||||
networks: [internal]
|
||||
|
||||
tempo:
|
||||
container_name: scrabble-tempo
|
||||
image: grafana/tempo:2.7.1
|
||||
restart: unless-stopped
|
||||
command: ["-config.file=/etc/tempo/tempo.yaml"]
|
||||
volumes:
|
||||
- ./tempo/tempo.yaml:/etc/tempo/tempo.yaml:ro
|
||||
- tempo-data:/var/tempo
|
||||
networks: [internal]
|
||||
|
||||
grafana:
|
||||
container_name: scrabble-grafana
|
||||
image: grafana/grafana:11.4.0
|
||||
restart: unless-stopped
|
||||
depends_on: [prometheus, tempo]
|
||||
environment:
|
||||
# Served under /_gm/grafana behind caddy's Basic-Auth; anonymous Admin so a
|
||||
# single shared login (caddy) gates it with no per-user Grafana accounts.
|
||||
GF_SERVER_ROOT_URL: ${GRAFANA_ROOT_URL:-/_gm/grafana/}
|
||||
GF_SERVER_SERVE_FROM_SUB_PATH: "true"
|
||||
GF_AUTH_ANONYMOUS_ENABLED: "true"
|
||||
GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
|
||||
GF_AUTH_DISABLE_LOGIN_FORM: "true"
|
||||
GF_AUTH_BASIC_ENABLED: "false"
|
||||
GF_USERS_ALLOW_SIGN_UP: "false"
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin}
|
||||
volumes:
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||||
- grafana-data:/var/lib/grafana
|
||||
networks: [internal]
|
||||
|
||||
networks:
|
||||
internal:
|
||||
name: scrabble-internal
|
||||
edge:
|
||||
external: true
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
caddy-data:
|
||||
prometheus-data:
|
||||
tempo-data:
|
||||
grafana-data:
|
||||
@@ -0,0 +1,39 @@
|
||||
{
|
||||
"uid": "scrabble-edge",
|
||||
"title": "Scrabble — Edge / UX",
|
||||
"tags": ["scrabble"],
|
||||
"timezone": "",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-6h", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Edge request rate by message type",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m])) by (message_type)", "legendFormat": "{{message_type}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Edge p95 latency",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(edge_request_duration_bucket[5m])) by (le))", "legendFormat": "p95" },
|
||||
{ "refId": "B", "expr": "histogram_quantile(0.50, sum(rate(edge_request_duration_bucket[5m])) by (le))", "legendFormat": "p50" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Edge requests by result",
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 },
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m])) by (result)", "legendFormat": "{{result}}" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
{
|
||||
"uid": "scrabble-game",
|
||||
"title": "Scrabble — Game domain",
|
||||
"tags": ["scrabble"],
|
||||
"timezone": "",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-24h", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Games started / abandoned (rate by variant)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "refId": "A", "expr": "sum(rate(games_started_total[15m])) by (variant)", "legendFormat": "started {{variant}}" },
|
||||
{ "refId": "B", "expr": "sum(rate(games_abandoned_total[15m])) by (variant)", "legendFormat": "abandoned {{variant}}" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Robot games finished (rate)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(robot_games_finished_total[15m]))", "legendFormat": "robot games" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Live games in cache (by variant)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(game_cache_active) by (variant)", "legendFormat": "{{variant}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Chat messages (rate by kind)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(chat_messages_total[15m])) by (kind)", "legendFormat": "{{kind}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Journal replay p95 (by variant)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
|
||||
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(game_replay_duration_bucket[5m])) by (le, variant))", "legendFormat": "{{variant}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Move validate p95 (by variant)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
|
||||
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "histogram_quantile(0.95, sum(rate(game_move_validate_duration_bucket[5m])) by (le, variant))", "legendFormat": "{{variant}}" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
{
|
||||
"uid": "scrabble-overview",
|
||||
"title": "Scrabble — Service overview",
|
||||
"tags": ["scrabble"],
|
||||
"timezone": "",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-6h", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Active users (24h)",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 0, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "max(active_users{window=\"24h\"})" }]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Active users (7d)",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 6, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "max(active_users{window=\"7d\"})" }]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Edge requests/s",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 12, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m]))" }]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Edge error ratio",
|
||||
"gridPos": { "h": 5, "w": 6, "x": 18, "y": 0 },
|
||||
"fieldConfig": { "defaults": { "unit": "percentunit" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count{result!=\"ok\"}[5m])) / clamp_min(sum(rate(edge_request_duration_count[5m])), 1)" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Goroutines by service",
|
||||
"description": "OTel Go runtime metric; verify the exact name against live Prometheus if empty (go_goroutine_count / process_runtime_go_goroutines depending on the contrib runtime version).",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "go_goroutine_count", "legendFormat": "{{service_name}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Heap memory used by service",
|
||||
"description": "OTel Go runtime metric (best-effort name go_memory_used); verify against live Prometheus if empty.",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 },
|
||||
"fieldConfig": { "defaults": { "unit": "bytes" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(go_memory_used) by (service_name)", "legendFormat": "{{service_name}}" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"uid": "scrabble-users",
|
||||
"title": "Scrabble — Users",
|
||||
"tags": ["scrabble"],
|
||||
"timezone": "",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-7d", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Active users (in-memory, single gateway)",
|
||||
"description": "Distinct accounts with an authenticated action within the window. Resets on gateway restart; correct for a single instance (MVP).",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "max(active_users) by (window)", "legendFormat": "{{window}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "New accounts (rate by kind)",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(accounts_created_total[1h])) by (kind)", "legendFormat": "{{kind}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "New accounts (cumulative by kind)",
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(accounts_created_total) by (kind)", "legendFormat": "{{kind}}" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
# Loads the committed dashboard JSON from /var/lib/grafana/dashboards (mounted
|
||||
# read-only from deploy/grafana/dashboards).
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: scrabble
|
||||
orgId: 1
|
||||
folder: Scrabble
|
||||
type: file
|
||||
disableDeletion: false
|
||||
editable: true
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
foldersFromFilesStructure: false
|
||||
@@ -0,0 +1,16 @@
|
||||
# Grafana datasources for the Scrabble contour, provisioned at startup. Metrics
|
||||
# come from Prometheus (scraping the collector) and traces from Tempo.
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
- name: Tempo
|
||||
type: tempo
|
||||
uid: tempo
|
||||
access: proxy
|
||||
url: http://tempo:3200
|
||||
@@ -0,0 +1,38 @@
|
||||
# OpenTelemetry Collector for the Scrabble contour. Receives OTLP/gRPC from the
|
||||
# three services (backend, gateway, connector — pkg/telemetry exports OTLP only),
|
||||
# fans metrics out to a Prometheus scrape endpoint and traces to Tempo.
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
|
||||
processors:
|
||||
batch: {}
|
||||
|
||||
exporters:
|
||||
# Exposes the collected metrics for Prometheus to scrape (otelcol:9464/metrics).
|
||||
# add_metric_suffixes:false keeps the instrument names verbatim (no _seconds /
|
||||
# _total unit/type suffixes) so the dashboards' PromQL matches the names defined
|
||||
# in code; resource_to_telemetry_conversion promotes service.name to a label.
|
||||
prometheus:
|
||||
endpoint: 0.0.0.0:9464
|
||||
add_metric_suffixes: false
|
||||
resource_to_telemetry_conversion:
|
||||
enabled: true
|
||||
# Forwards traces to Tempo's OTLP ingest.
|
||||
otlp/tempo:
|
||||
endpoint: tempo:4317
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [otlp/tempo]
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [prometheus]
|
||||
@@ -0,0 +1,14 @@
|
||||
# Prometheus scrape config for the Scrabble contour. The OTel Collector exposes
|
||||
# every service's metrics on its prometheus exporter; Prometheus scrapes that one
|
||||
# endpoint. Retention (15d) is set on the command line in docker-compose.yml.
|
||||
global:
|
||||
scrape_interval: 30s
|
||||
evaluation_interval: 30s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: otelcol
|
||||
static_configs:
|
||||
- targets: ["otelcol:9464"]
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets: ["localhost:9090"]
|
||||
@@ -0,0 +1,26 @@
|
||||
# Tempo for the Scrabble contour: single-binary, local filesystem storage, OTLP
|
||||
# ingest from the collector, 72h block retention.
|
||||
server:
|
||||
http_listen_port: 3200
|
||||
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
|
||||
ingester:
|
||||
max_block_duration: 5m
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: 72h
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: local
|
||||
local:
|
||||
path: /var/tempo/blocks
|
||||
wal:
|
||||
path: /var/tempo/wal
|
||||
+39
-13
@@ -489,8 +489,11 @@ promotions) is future work and would deliver short markdown messages (text + lin
|
||||
available for debugging; **`otlp`** (gRPC, endpoint from the standard
|
||||
`OTEL_EXPORTER_OTLP_*` environment) exports to a collector. The Postgres pool is
|
||||
instrumented with otelsql and `otelgrpc` traces the backend↔gateway push stream
|
||||
and the gateway↔connector calls. The OTLP collector and Grafana dashboards are
|
||||
stood up with the deploy (Stage 15).
|
||||
and the gateway↔connector calls. The OTLP **Collector** (OTLP/gRPC → Prometheus
|
||||
metrics + Tempo traces), **Prometheus** (15d), **Tempo** (72h) and **Grafana**
|
||||
(provisioned datasources + dashboards, behind the caddy `/_gm/grafana` Basic-Auth)
|
||||
are stood up with the deploy (`deploy/`, Stage 16); the default exporter stays
|
||||
`none`, so CI needs no collector.
|
||||
- Per-request server-side timing via gin middleware from day one (the access log
|
||||
carries method, route, status, latency and the active trace id). A
|
||||
client-measured RTT piggybacked on the next request is a later enhancement.
|
||||
@@ -503,6 +506,12 @@ promotions) is future work and would deliver short markdown messages (text + lin
|
||||
(the UI-perceived roundtrip, by `message_type`/`result`); and Go runtime/heap
|
||||
metrics. Game-scoped metrics carry a `variant` attribute
|
||||
(english/russian_scrabble/erudit).
|
||||
- User metrics (Stage 16): a backend counter `accounts_created_total` (`kind` =
|
||||
telegram/email/guest; robots are a provisioned pool, not users, and are excluded)
|
||||
and a gateway **in-memory** observable gauge `active_users` (`window` = 24h/7d) —
|
||||
distinct accounts that performed an authenticated edge action in the window. The
|
||||
gauge is single-process by design (single-instance MVP, §10): it is correct for one
|
||||
gateway, resets on restart, and is a live operational figure, not a billing count.
|
||||
- Unauthenticated `GET /healthz` (liveness) and `GET /readyz` (readiness — the
|
||||
database answers a bounded ping and the session cache is warmed).
|
||||
- The backend serves a **second listener** — a gRPC server
|
||||
@@ -518,7 +527,7 @@ promotions) is future work and would deliver short markdown messages (text + lin
|
||||
| Session minting; email-code / guest validation | gateway (with backend) |
|
||||
| Session → `user_id` resolution, `X-User-ID` injection | gateway |
|
||||
| Authorisation, ownership, state transitions | backend (`X-User-ID` is the sole identity input) |
|
||||
| Admin authentication | gateway validates HTTP Basic Auth (`GATEWAY_ADMIN_*`) on the public `/_gm/*` path and reverse-proxies it **verbatim** to the backend's server-rendered admin console; the backend trusts the gateway (no admin principal) and guards its state-changing POSTs with a **same-origin** check — the console's CSRF defence. No operator identity is tracked |
|
||||
| Admin authentication | a single Basic-Auth gate on `/_gm/*`, forwarded **verbatim** to the backend's server-rendered admin console (and, in the deployed contour, routing `/_gm/grafana/*` to Grafana). In the deploy the **caddy** owns this gate (§13); a local non-caddy run uses the gateway's own `GATEWAY_ADMIN_*` proxy. The backend trusts the proxy (no admin principal) and guards its state-changing POSTs with a **same-origin** check — the console's CSRF defence. No operator identity is tracked |
|
||||
| backend ↔ gateway ↔ connector trust | the network (only gateway may reach backend; the connector serves unauthenticated gRPC on the internal segment) |
|
||||
|
||||
This is an explicit, accepted MVP risk: compromise of the gateway↔backend
|
||||
@@ -536,16 +545,33 @@ a dedicated redeem sub-limit or a longer code is the hardening step if abuse app
|
||||
|
||||
## 13. Deployment (informational)
|
||||
|
||||
Single public origin, path-routed: a mini-landing at the root, the **Telegram Mini
|
||||
App under `/telegram/`** (the gateway serves the static UI build, wired in Stage 15;
|
||||
outside Telegram that path redirects to the root), the gateway public surface and the **admin console
|
||||
at `/_gm`** (backend-rendered, Basic-Auth at the gateway) share one host that
|
||||
terminates TLS. The **Telegram connector** runs as a separate
|
||||
container with **no public ingress** — it long-polls Telegram and egresses through a
|
||||
VPN sidecar, answering only internal gRPC. MVP runs one `gateway`, one `backend`, one
|
||||
Postgres, plus the connector. The connector's Docker/compose ships now
|
||||
(`platform/telegram/deploy`, mirroring `../15-puzzle`); the gateway's static UI serving
|
||||
and the full multi-service deploy land in Stage 15.
|
||||
Single public origin, path-routed. The gateway **embeds** the static UI build
|
||||
(`go:embed`, baked in by a node stage in `gateway/Dockerfile`) and serves the one
|
||||
SPA at both `/` (web) and `/telegram/` (the Telegram Mini App; outside Telegram that
|
||||
path redirects to the root — the client-side guard). An in-compose **caddy** is the
|
||||
contour's edge: it owns a single `/_gm` Basic-Auth and routes `/_gm/grafana/*` to
|
||||
**Grafana** (anonymous-admin, so the one shared login gates it with no per-user
|
||||
Grafana accounts) and the rest of `/_gm/*` to the backend-rendered **admin console**;
|
||||
everything else (`/`, `/telegram/`, the Connect edge) goes to the gateway. The
|
||||
**Telegram connector** runs as a separate container with **no public ingress** — it
|
||||
long-polls Telegram and egresses through a VPN sidecar, answering only internal gRPC.
|
||||
|
||||
The full contour (`deploy/docker-compose.yml`) runs one `gateway`, one `backend`,
|
||||
one Postgres, the connector (+ its VPN sidecar) and the **observability stack** —
|
||||
OTel Collector (OTLP/gRPC ingest → Prometheus metrics + Tempo traces) and Grafana
|
||||
with provisioned datasources and dashboards. Inter-service traffic uses a private
|
||||
`internal` network (project-scoped DNS); only caddy joins the shared external `edge`
|
||||
network (alias `scrabble`).
|
||||
|
||||
Two contours, two secret/variable prefixes (`TEST_` / `PROD_`):
|
||||
- **Test** (Stage 16): auto-deploys on a PR into — or a push to — `development`
|
||||
(`.gitea/workflows/ci.yaml` → `docker compose up -d --build` on the Gitea runner
|
||||
host, then a `GET /` probe through caddy). The host caddy terminates TLS and
|
||||
forwards the domain to `scrabble:80`, so the in-compose caddy serves plain HTTP
|
||||
(`CADDY_SITE_ADDRESS=:80`).
|
||||
- **Prod** (Stage 17): a manual SSH deploy after `development → master`. There is no
|
||||
host caddy, so the contour ships its own caddy terminating TLS — set
|
||||
`CADDY_SITE_ADDRESS` to the domain and the caddy does its own ACME.
|
||||
|
||||
## 14. CI & branches
|
||||
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
# Multi-stage build for the gateway service. A node stage builds the static UI
|
||||
# (Vite), the result is embedded into the Go binary (gateway/internal/webui/dist),
|
||||
# and the Go stage — mirroring platform/telegram/Dockerfile — yields a static
|
||||
# binary shipped on distroless nonroot. So the single binary serves the SPA at /
|
||||
# and /telegram/ (docs/ARCHITECTURE.md §13) with no separate static container.
|
||||
#
|
||||
# The production UI build vars are image build-args, baked into the bundle.
|
||||
# Build from the repository root so go.work, pkg/, gateway/ and ui/ are all in the
|
||||
# Docker context:
|
||||
# docker build -f gateway/Dockerfile \
|
||||
# --build-arg VITE_GATEWAY_URL=https://example \
|
||||
# -t scrabble-gateway .
|
||||
|
||||
# --- UI build ----------------------------------------------------------------
|
||||
FROM node:22-alpine AS ui
|
||||
WORKDIR /ui
|
||||
RUN corepack enable && corepack prepare pnpm@11.0.9 --activate
|
||||
|
||||
# Prod UI build vars (Vite reads VITE_-prefixed env at build; baked into the bundle).
|
||||
ARG VITE_TELEGRAM_BOT_ID=
|
||||
ARG VITE_TELEGRAM_LINK=
|
||||
ARG VITE_GATEWAY_URL=
|
||||
ENV VITE_TELEGRAM_BOT_ID=$VITE_TELEGRAM_BOT_ID \
|
||||
VITE_TELEGRAM_LINK=$VITE_TELEGRAM_LINK \
|
||||
VITE_GATEWAY_URL=$VITE_GATEWAY_URL
|
||||
|
||||
# Install with the lockfile first (the workspace file carries pnpm's build-script
|
||||
# approval for esbuild), then build. Committed src/gen/ means no codegen here.
|
||||
COPY ui/package.json ui/pnpm-lock.yaml ui/pnpm-workspace.yaml ./
|
||||
RUN pnpm install --frozen-lockfile
|
||||
COPY ui ./
|
||||
RUN pnpm build
|
||||
|
||||
# --- Go build ----------------------------------------------------------------
|
||||
FROM golang:1.26.3-alpine AS build
|
||||
WORKDIR /src
|
||||
COPY go.work go.work.sum ./
|
||||
COPY pkg ./pkg
|
||||
COPY gateway ./gateway
|
||||
|
||||
# Replace the committed placeholder with the freshly built UI before compiling, so
|
||||
# go:embed bakes the real bundle into the binary.
|
||||
RUN rm -rf gateway/internal/webui/dist
|
||||
COPY --from=ui /ui/dist gateway/internal/webui/dist
|
||||
|
||||
# Reduce the workspace to what the gateway needs: gateway + pkg.
|
||||
RUN go work edit -dropuse=./backend -dropuse=./platform/telegram
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -o /out/gateway ./gateway/cmd/gateway
|
||||
|
||||
# --- runtime -----------------------------------------------------------------
|
||||
FROM gcr.io/distroless/static-debian12:nonroot
|
||||
COPY --from=build /out/gateway /usr/local/bin/gateway
|
||||
ENTRYPOINT ["/usr/local/bin/gateway"]
|
||||
+9
-4
@@ -5,9 +5,13 @@ terminates the client's **Connect-RPC + FlatBuffers** traffic over HTTP/2
|
||||
cleartext (`h2c`), authenticates the originating credential, mints/resolves a
|
||||
thin opaque session, rate-limits, injects `X-User-ID` when forwarding to the
|
||||
backend over REST/JSON, and bridges the backend's gRPC push stream to each
|
||||
client's in-app live channel. It also serves the backend's admin console at `/_gm`
|
||||
on its public listener behind HTTP Basic-Auth. See
|
||||
[`../docs/ARCHITECTURE.md`](../docs/ARCHITECTURE.md) §2, §3, §10, §12.
|
||||
client's in-app live channel. It **embeds the static UI build** (`go:embed`, baked
|
||||
in by the gateway image's node stage) and serves the one SPA at `/` (web) and
|
||||
`/telegram/` (the Mini App) — the single-origin model. It can also serve the
|
||||
backend's admin console at `/_gm` behind HTTP Basic-Auth for a local non-caddy run;
|
||||
in the deployed contour the front caddy owns `/_gm` (see
|
||||
[`../deploy`](../deploy)). See
|
||||
[`../docs/ARCHITECTURE.md`](../docs/ARCHITECTURE.md) §2, §3, §10, §12, §13.
|
||||
|
||||
## Package layout
|
||||
|
||||
@@ -22,8 +26,9 @@ internal/ratelimit/ # token-bucket limiter (golang.org/x/time/rate)
|
||||
internal/connector/ # gRPC client to the Telegram connector (initData validate, out-of-app push) + routing
|
||||
internal/push/ # live-event fan-out hub (per-user client streams)
|
||||
internal/transcode/ # FlatBuffers<->REST bridge + message_type registry
|
||||
internal/connectsrv/ # the Connect Gateway service over h2c
|
||||
internal/connectsrv/ # the Connect Gateway service over h2c (+ the in-memory active_users gauge)
|
||||
internal/admin/ # Basic-Auth reverse proxy mounting the backend admin console at /_gm (verbatim)
|
||||
internal/webui/ # embedded SPA build (go:embed dist) served at / and /telegram/
|
||||
```
|
||||
|
||||
The FlatBuffers payloads and the backend push proto are the shared wire
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
package connectsrv
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// activeUsers tracks distinct authenticated accounts by last-action time, backing
|
||||
// the in-memory active_users gauge. It is single-process by design (the gateway is
|
||||
// single-instance in the MVP, docs/ARCHITECTURE.md §10): the distinct count is
|
||||
// correct for one process, resets on restart, and is a live operational gauge, not
|
||||
// a billing figure. Memory is bounded by the number of distinct accounts active
|
||||
// within the longest window; stale entries are pruned on observation.
|
||||
type activeUsers struct {
|
||||
mu sync.Mutex
|
||||
lastSeen map[string]time.Time
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
// newActiveUsers returns an empty tracker using the wall clock.
|
||||
func newActiveUsers() *activeUsers {
|
||||
return &activeUsers{lastSeen: make(map[string]time.Time), now: time.Now}
|
||||
}
|
||||
|
||||
// seen records that account uid performed an authenticated action now.
|
||||
func (a *activeUsers) seen(uid string) {
|
||||
if uid == "" {
|
||||
return
|
||||
}
|
||||
a.mu.Lock()
|
||||
a.lastSeen[uid] = a.now()
|
||||
a.mu.Unlock()
|
||||
}
|
||||
|
||||
// counts returns, for each window, the number of distinct accounts last seen
|
||||
// within it, pruning entries older than the longest window in the same pass.
|
||||
func (a *activeUsers) counts(windows []time.Duration) []int {
|
||||
a.mu.Lock()
|
||||
defer a.mu.Unlock()
|
||||
|
||||
now := a.now()
|
||||
var longest time.Duration
|
||||
for _, w := range windows {
|
||||
if w > longest {
|
||||
longest = w
|
||||
}
|
||||
}
|
||||
|
||||
res := make([]int, len(windows))
|
||||
for uid, ts := range a.lastSeen {
|
||||
age := now.Sub(ts)
|
||||
if age > longest {
|
||||
delete(a.lastSeen, uid)
|
||||
continue
|
||||
}
|
||||
for i, w := range windows {
|
||||
if age <= w {
|
||||
res[i]++
|
||||
}
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package connectsrv
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestActiveUsersCountsAndPrune(t *testing.T) {
|
||||
a := newActiveUsers()
|
||||
base := time.Date(2026, 6, 5, 12, 0, 0, 0, time.UTC)
|
||||
cur := base
|
||||
a.now = func() time.Time { return cur }
|
||||
|
||||
a.seen("u1") // at base
|
||||
cur = base.Add(2 * time.Hour)
|
||||
a.seen("u2") // base+2h
|
||||
cur = base.Add(50 * time.Hour)
|
||||
a.seen("u3") // base+50h
|
||||
|
||||
windows := []time.Duration{24 * time.Hour, 7 * 24 * time.Hour}
|
||||
|
||||
// now = base+50h: u3 within 24h; all three within 7d.
|
||||
got := a.counts(windows)
|
||||
if got[0] != 1 || got[1] != 3 {
|
||||
t.Fatalf("counts at +50h = %v, want [1 3]", got)
|
||||
}
|
||||
|
||||
// now = base+169h: u1 (age 169h) prunes past the 7d window; u2/u3 remain in 7d.
|
||||
cur = base.Add(169 * time.Hour)
|
||||
got = a.counts(windows)
|
||||
if got[0] != 0 || got[1] != 2 {
|
||||
t.Fatalf("counts at +169h = %v, want [0 2]", got)
|
||||
}
|
||||
if _, ok := a.lastSeen["u1"]; ok {
|
||||
t.Fatalf("u1 should have been pruned from the tracker")
|
||||
}
|
||||
}
|
||||
|
||||
func TestActiveUsersIgnoresEmpty(t *testing.T) {
|
||||
a := newActiveUsers()
|
||||
a.seen("")
|
||||
if got := a.counts([]time.Duration{time.Hour}); got[0] != 0 {
|
||||
t.Fatalf("empty uid recorded: got %v", got)
|
||||
}
|
||||
}
|
||||
@@ -12,14 +12,26 @@ import (
|
||||
// meterName scopes the gateway edge's OpenTelemetry instruments.
|
||||
const meterName = "scrabble/gateway/edge"
|
||||
|
||||
// activeUserWindows are the rolling windows the active_users gauge reports.
|
||||
var activeUserWindows = []struct {
|
||||
label string
|
||||
dur time.Duration
|
||||
}{
|
||||
{label: "24h", dur: 24 * time.Hour},
|
||||
{label: "7d", dur: 7 * 24 * time.Hour},
|
||||
}
|
||||
|
||||
// serverMetrics holds the edge's operational instruments. It defaults to no-ops;
|
||||
// NewServer installs the real meter when one is supplied in Deps.
|
||||
type serverMetrics struct {
|
||||
edge metric.Float64Histogram
|
||||
edge metric.Float64Histogram
|
||||
active *activeUsers
|
||||
}
|
||||
|
||||
// newServerMetrics builds the instruments on meter (nil selects a no-op meter),
|
||||
// falling back to a no-op histogram on the (rare) construction error.
|
||||
// falling back to a no-op histogram on the (rare) construction error. The
|
||||
// active_users gauge is registered as an observable callback over the in-memory
|
||||
// tracker.
|
||||
func newServerMetrics(meter metric.Meter) *serverMetrics {
|
||||
if meter == nil {
|
||||
meter = noop.NewMeterProvider().Meter(meterName)
|
||||
@@ -30,7 +42,24 @@ func newServerMetrics(meter metric.Meter) *serverMetrics {
|
||||
if err != nil {
|
||||
h, _ = noop.NewMeterProvider().Meter(meterName).Float64Histogram("edge_request_duration")
|
||||
}
|
||||
return &serverMetrics{edge: h}
|
||||
m := &serverMetrics{edge: h, active: newActiveUsers()}
|
||||
|
||||
gauge, err := meter.Int64ObservableGauge("active_users",
|
||||
metric.WithDescription("Distinct accounts that performed an authenticated action within the window (in-memory, single gateway instance)."))
|
||||
if err == nil {
|
||||
windows := make([]time.Duration, len(activeUserWindows))
|
||||
for i, w := range activeUserWindows {
|
||||
windows[i] = w.dur
|
||||
}
|
||||
_, _ = meter.RegisterCallback(func(_ context.Context, o metric.Observer) error {
|
||||
counts := m.active.counts(windows)
|
||||
for i, w := range activeUserWindows {
|
||||
o.ObserveInt64(gauge, int64(counts[i]), metric.WithAttributes(attribute.String("window", w.label)))
|
||||
}
|
||||
return nil
|
||||
}, gauge)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// recordEdge records the duration of one Execute call labelled by message type and
|
||||
@@ -41,3 +70,8 @@ func (m *serverMetrics) recordEdge(ctx context.Context, msgType, result string,
|
||||
attribute.String("result", result),
|
||||
))
|
||||
}
|
||||
|
||||
// recordActive marks account uid active now, feeding the active_users gauge.
|
||||
func (m *serverMetrics) recordActive(uid string) {
|
||||
m.active.seen(uid)
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"scrabble/gateway/internal/ratelimit"
|
||||
"scrabble/gateway/internal/session"
|
||||
"scrabble/gateway/internal/transcode"
|
||||
"scrabble/gateway/internal/webui"
|
||||
edgev1 "scrabble/gateway/proto/edge/v1"
|
||||
"scrabble/gateway/proto/edge/v1/edgev1connect"
|
||||
)
|
||||
@@ -89,9 +90,21 @@ func (s *Server) HTTPHandler() http.Handler {
|
||||
if s.adminProxy != nil {
|
||||
// The admin console (backend /_gm) is served on the public listener behind
|
||||
// the proxy's Basic-Auth, mounted below the h2c wrap so the Connect edge keeps
|
||||
// working over h2c (docs/ARCHITECTURE.md §12).
|
||||
// working over h2c (docs/ARCHITECTURE.md §12). In the deployed contour the
|
||||
// front caddy owns the /_gm Basic-Auth and Grafana routing; this mount serves
|
||||
// a non-caddy (local) setup.
|
||||
mux.Handle("/_gm/", s.adminProxy)
|
||||
} else {
|
||||
// With the console disabled here, keep /_gm a 404 so the SPA catch-all below
|
||||
// does not serve the app shell at the operator path.
|
||||
mux.Handle("/_gm/", http.NotFoundHandler())
|
||||
}
|
||||
// The embedded single-page UI is served at the site root and, for the Telegram
|
||||
// Mini App, under /telegram/ — the single-origin model (docs/ARCHITECTURE.md
|
||||
// §13). Both mounts sit below the h2c wrap so the Connect edge (a more specific
|
||||
// prefix) keeps priority; "/" is the catch-all SPA fallback for the hash router.
|
||||
mux.Handle("/telegram/", webui.Handler("/telegram/"))
|
||||
mux.Handle("/", webui.Handler(""))
|
||||
return h2c.NewHandler(mux, &http2.Server{})
|
||||
}
|
||||
|
||||
@@ -118,6 +131,9 @@ func (s *Server) Execute(ctx context.Context, req *connect.Request[edgev1.Execut
|
||||
result = "unauthenticated"
|
||||
return nil, err
|
||||
}
|
||||
// A valid session proving an authenticated request is an "action" for the
|
||||
// active_users gauge, counted before the rate-limit/domain outcome.
|
||||
s.metrics.recordActive(uid)
|
||||
if !s.limiter.Allow("user:"+uid, s.userPolicy) {
|
||||
result = "rate_limited"
|
||||
return nil, connect.NewError(connect.CodeResourceExhausted, errRateLimited)
|
||||
|
||||
+2
@@ -0,0 +1,2 @@
|
||||
# Placeholder so the embedded dist/assets directory exists in a plain build.
|
||||
# The production gateway image replaces dist/ with the real Vite build.
|
||||
+15
@@ -0,0 +1,15 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>Scrabble</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
UI build placeholder. The production gateway image embeds the real Vite
|
||||
build (see gateway/Dockerfile); seeing this page means the binary was
|
||||
built without a UI build.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,71 @@
|
||||
// Package webui serves the embedded single-page UI build over the public edge.
|
||||
//
|
||||
// The committed dist/ holds only a placeholder index.html so the gateway module
|
||||
// compiles with a plain `go build` (and in CI) without a UI build. The production
|
||||
// gateway image replaces dist/ with the real Vite build before compiling (see
|
||||
// gateway/Dockerfile), so the binary ships the UI inside it. Because Vite is built
|
||||
// with a relative asset base, one build serves under any path: Handler is mounted
|
||||
// both at "/" (web) and at "/telegram/" (the Telegram Mini App), matching the
|
||||
// single-origin model in docs/ARCHITECTURE.md §13.
|
||||
package webui
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"io/fs"
|
||||
"net/http"
|
||||
"path"
|
||||
"strings"
|
||||
)
|
||||
|
||||
//go:embed all:dist
|
||||
var dist embed.FS
|
||||
|
||||
// distFS returns the embedded build rooted at dist/. The directory is embedded at
|
||||
// compile time, so its absence is a build error rather than a runtime condition.
|
||||
func distFS() fs.FS {
|
||||
sub, err := fs.Sub(dist, "dist")
|
||||
if err != nil {
|
||||
panic("webui: embedded dist/ missing: " + err.Error())
|
||||
}
|
||||
return sub
|
||||
}
|
||||
|
||||
// Handler serves the embedded SPA. An existing file is served directly (with the
|
||||
// standard content-type and caching headers); every other path falls back to
|
||||
// index.html so the client-side hash router can take over a deep link. When
|
||||
// stripPrefix is non-empty it is removed from the request path before lookup, so
|
||||
// the same build serves under a sub-path (e.g. "/telegram/").
|
||||
func Handler(stripPrefix string) http.Handler {
|
||||
content := distFS()
|
||||
files := http.FileServer(http.FS(content))
|
||||
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
name := strings.TrimPrefix(path.Clean("/"+r.URL.Path), "/")
|
||||
if name == "" {
|
||||
serveIndex(w, content)
|
||||
return
|
||||
}
|
||||
if info, err := fs.Stat(content, name); err != nil || info.IsDir() {
|
||||
// Unknown path or a directory: serve the SPA shell, never a listing.
|
||||
serveIndex(w, content)
|
||||
return
|
||||
}
|
||||
files.ServeHTTP(w, r)
|
||||
})
|
||||
if p := strings.TrimSuffix(stripPrefix, "/"); p != "" {
|
||||
return http.StripPrefix(p, h)
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
// serveIndex writes the SPA shell with a 200 status, so a client-routed deep link
|
||||
// still loads the app rather than a 404.
|
||||
func serveIndex(w http.ResponseWriter, content fs.FS) {
|
||||
data, err := fs.ReadFile(content, "index.html")
|
||||
if err != nil {
|
||||
http.Error(w, "ui not built", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write(data)
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
package webui
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// get drives the handler with a GET for the given path and returns the response.
|
||||
func get(t *testing.T, h http.Handler, target string) *http.Response {
|
||||
t.Helper()
|
||||
rec := httptest.NewRecorder()
|
||||
h.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, target, nil))
|
||||
return rec.Result()
|
||||
}
|
||||
|
||||
func TestHandlerServesIndexAndFallsBack(t *testing.T) {
|
||||
h := Handler("")
|
||||
|
||||
// The embedded placeholder index is served at the root.
|
||||
if resp := get(t, h, "/"); resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("GET / status = %d, want 200", resp.StatusCode)
|
||||
}
|
||||
|
||||
// An existing (non-index) file is served directly by the file server.
|
||||
if resp := get(t, h, "/assets/.gitkeep"); resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("GET /assets/.gitkeep status = %d, want 200 (served file)", resp.StatusCode)
|
||||
}
|
||||
|
||||
// An unknown deep link falls back to the SPA shell (200, not 404) so the
|
||||
// client-side hash router can take over.
|
||||
resp := get(t, h, "/game/abc/deep")
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("GET /game/abc/deep status = %d, want 200 (SPA fallback)", resp.StatusCode)
|
||||
}
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
if !strings.Contains(string(body), "<html") {
|
||||
t.Fatalf("fallback body is not the index HTML: %q", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandlerStripsPrefix(t *testing.T) {
|
||||
h := Handler("/telegram/")
|
||||
|
||||
for _, target := range []string{"/telegram/", "/telegram/assets/.gitkeep", "/telegram/lobby/x"} {
|
||||
if resp := get(t, h, target); resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("GET %s status = %d, want 200", target, resp.StatusCode)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
# Deploy descriptor for the Telegram connector (the platform side-service).
|
||||
#
|
||||
# Networking mirrors the sibling ../15-puzzle/deploy/docker-compose.yml:
|
||||
# - The `vpn` sidecar (developer/amneziawg-sidecar) holds the tunnel and provides
|
||||
# the netns shared by `app` (network_mode: "service:vpn"). All of the
|
||||
# connector's egress to api.telegram.org therefore leaves through the tunnel.
|
||||
# - `vpn` is the one attached to the external `edge` network, with the alias
|
||||
# `telegram`, so the other services reach the connector's gRPC port at
|
||||
# `telegram:9091` inside the shared netns. The connector needs NO public
|
||||
# ingress — it long-polls Telegram and only answers internal gRPC.
|
||||
#
|
||||
# The connector joins the same `edge` network as `backend` and `gateway` (the full
|
||||
# service set rolled out together on a dev-environment deploy). The gateway calls it
|
||||
# with GATEWAY_CONNECTOR_ADDR=telegram:9091; the backend admin surface (Stage 10)
|
||||
# will use the same address. The single public ingress for the host reverse proxy
|
||||
# (caddy) is the gateway's HTTP port, which also serves the Mini App under /telegram/
|
||||
# (ARCHITECTURE.md §13). The full multi-service compose lands with Stage 12; this is
|
||||
# the connector-scoped descriptor.
|
||||
name: scrabble-telegram
|
||||
|
||||
services:
|
||||
vpn:
|
||||
container_name: scrabble-telegram-vpn
|
||||
image: docker.iliadenisov.ru/developer/amneziawg-sidecar:latest
|
||||
restart: unless-stopped
|
||||
privileged: true
|
||||
environment:
|
||||
AWG_CONF: ${AWG_CONF:?set AWG_CONF}
|
||||
networks:
|
||||
edge:
|
||||
aliases:
|
||||
- telegram
|
||||
|
||||
app:
|
||||
container_name: scrabble-telegram
|
||||
image: scrabble-telegram:latest
|
||||
build:
|
||||
# Build from the repository root so go.work, pkg/ and platform/telegram/ are
|
||||
# all in the Docker context (see platform/telegram/Dockerfile).
|
||||
context: ../../..
|
||||
dockerfile: platform/telegram/Dockerfile
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- vpn
|
||||
network_mode: "service:vpn"
|
||||
environment:
|
||||
# The bot tokens live ONLY in this container (ARCHITECTURE.md §12). One bot per
|
||||
# service language (en/ru); at least one token is required (the connector
|
||||
# validates this at boot — compose cannot express "one of").
|
||||
TELEGRAM_BOT_TOKEN_EN: ${TELEGRAM_BOT_TOKEN_EN:-}
|
||||
TELEGRAM_BOT_TOKEN_RU: ${TELEGRAM_BOT_TOKEN_RU:-}
|
||||
TELEGRAM_GAME_CHANNEL_ID_EN: ${TELEGRAM_GAME_CHANNEL_ID_EN:-}
|
||||
TELEGRAM_GAME_CHANNEL_ID_RU: ${TELEGRAM_GAME_CHANNEL_ID_RU:-}
|
||||
TELEGRAM_MINIAPP_URL: ${TELEGRAM_MINIAPP_URL:?set TELEGRAM_MINIAPP_URL}
|
||||
TELEGRAM_GRPC_ADDR: ${TELEGRAM_GRPC_ADDR:-:9091}
|
||||
# Set to true when deploying into Telegram's test environment.
|
||||
TELEGRAM_TEST_ENV: ${TELEGRAM_TEST_ENV:-false}
|
||||
TELEGRAM_API_BASE_URL: ${TELEGRAM_API_BASE_URL:-}
|
||||
|
||||
networks:
|
||||
edge:
|
||||
external: true
|
||||
Reference in New Issue
Block a user