831ecd0cab
Root cause of the Grafana "readdirent /etc/grafana/dashboards: no such file or
directory": the CI runner checks out into an ephemeral act workspace that is
removed after the job, so binding the compose config files straight from it
dangles the mounts in the long-lived containers (verified the act source dir is
emptied after the job). caddy/otelcol/prometheus/tempo read their config once at
startup so they survive, but would break on a restart — same latent bug.
Fix (mirrors ../galaxy-game's $HOME/.galaxy-dev/monitoring): the deploy job seeds
the config dirs to a stable $HOME/.scrabble-deploy and the compose binds them via
${SCRABBLE_CONFIG_DIR:-.} (local runs keep "."). Documented in the compose header,
deploy/README.md and the ci.yaml step.
233 lines
9.2 KiB
YAML
233 lines
9.2 KiB
YAML
# Full deploy descriptor for the Scrabble test contour: backend + gateway +
|
|
# Postgres + the Telegram connector (with its VPN sidecar) + the observability
|
|
# stack (OTel Collector -> Prometheus + Tempo -> Grafana). Driven by
|
|
# .gitea/workflows/ci.yaml (`docker compose up -d --build`); env values are
|
|
# interpolated from Gitea Actions TEST_ secrets/variables exported by the deploy
|
|
# job (see deploy/.env.example for the unprefixed names).
|
|
#
|
|
# Config bind sources are prefixed with ${SCRABBLE_CONFIG_DIR:-.}: locally they bind
|
|
# straight from this directory, but CI seeds them to a stable host path and sets
|
|
# SCRABBLE_CONFIG_DIR to it, because the runner's checkout is ephemeral (act removes
|
|
# it after the job) and the bind mounts must outlive the job in the long-running
|
|
# containers (see .gitea/workflows/ci.yaml + deploy/README.md).
|
|
#
|
|
# Networking (mirrors ../galaxy-game):
|
|
# - `internal` (scrabble-internal): all inter-service traffic, project-private
|
|
# DNS so service names never collide on the shared `edge` network.
|
|
# - `edge` (external): the host caddy reaches this contour at `scrabble:80`
|
|
# (the in-compose caddy's alias). The in-compose caddy terminates only HTTP in
|
|
# the test contour; the host caddy terminates TLS and forwards. For prod
|
|
# (Stage 18, no host caddy) set CADDY_SITE_ADDRESS to the domain so the caddy
|
|
# does its own ACME — the contour is then self-contained.
|
|
# - The connector egresses to api.telegram.org through the `vpn` sidecar
|
|
# (network_mode: service:vpn); it answers internal gRPC at `telegram:9091`.
|
|
name: scrabble
|
|
|
|
services:
|
|
postgres:
|
|
container_name: scrabble-postgres
|
|
image: postgres:17-alpine
|
|
restart: unless-stopped
|
|
environment:
|
|
POSTGRES_DB: ${POSTGRES_DB:-scrabble}
|
|
POSTGRES_USER: ${POSTGRES_USER:-scrabble}
|
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-scrabble} -d ${POSTGRES_DB:-scrabble}"]
|
|
interval: 5s
|
|
timeout: 3s
|
|
retries: 30
|
|
volumes:
|
|
- postgres-data:/var/lib/postgresql/data
|
|
networks: [internal]
|
|
|
|
backend:
|
|
container_name: scrabble-backend
|
|
image: scrabble-backend:latest
|
|
build:
|
|
context: ..
|
|
dockerfile: backend/Dockerfile
|
|
args:
|
|
DICT_VERSION: ${DICT_VERSION:-v1.0.0}
|
|
restart: unless-stopped
|
|
depends_on:
|
|
postgres:
|
|
condition: service_healthy
|
|
environment:
|
|
# search_path=backend matches the migrations (00001 creates the schema).
|
|
BACKEND_POSTGRES_DSN: postgres://${POSTGRES_USER:-scrabble}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-scrabble}?sslmode=disable&search_path=backend
|
|
BACKEND_HTTP_ADDR: ":8080"
|
|
BACKEND_GRPC_ADDR: ":9090"
|
|
BACKEND_CONNECTOR_ADDR: telegram:9091
|
|
BACKEND_LOG_LEVEL: ${LOG_LEVEL:-info}
|
|
BACKEND_SERVICE_NAME: scrabble-backend
|
|
BACKEND_OTEL_TRACES_EXPORTER: otlp
|
|
BACKEND_OTEL_METRICS_EXPORTER: otlp
|
|
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
|
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
|
# No container healthcheck: the distroless image has no shell/wget. Readiness
|
|
# is covered by the CI post-deploy probe (GET / through caddy).
|
|
networks: [internal]
|
|
|
|
gateway:
|
|
container_name: scrabble-gateway
|
|
image: scrabble-gateway:latest
|
|
build:
|
|
context: ..
|
|
dockerfile: gateway/Dockerfile
|
|
args:
|
|
VITE_TELEGRAM_BOT_ID: ${VITE_TELEGRAM_BOT_ID:-}
|
|
VITE_TELEGRAM_LINK: ${VITE_TELEGRAM_LINK:-}
|
|
VITE_GATEWAY_URL: ${VITE_GATEWAY_URL:-}
|
|
restart: unless-stopped
|
|
depends_on: [backend]
|
|
environment:
|
|
GATEWAY_HTTP_ADDR: ":8081"
|
|
GATEWAY_BACKEND_HTTP_URL: http://backend:8080
|
|
GATEWAY_BACKEND_GRPC_ADDR: backend:9090
|
|
GATEWAY_CONNECTOR_ADDR: telegram:9091
|
|
GATEWAY_DEFAULT_SUPPORTED_LANGUAGES: ${GATEWAY_DEFAULT_SUPPORTED_LANGUAGES:-en,ru}
|
|
GATEWAY_LOG_LEVEL: ${LOG_LEVEL:-info}
|
|
GATEWAY_SERVICE_NAME: scrabble-gateway
|
|
GATEWAY_OTEL_TRACES_EXPORTER: otlp
|
|
GATEWAY_OTEL_METRICS_EXPORTER: otlp
|
|
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
|
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
|
# GATEWAY_ADMIN_* intentionally unset: in the deployed contour the front
|
|
# caddy owns the /_gm Basic-Auth and routes /_gm to the backend directly.
|
|
networks: [internal]
|
|
|
|
# --- Telegram connector (egress via the VPN sidecar) -----------------------
|
|
vpn:
|
|
container_name: scrabble-telegram-vpn
|
|
image: docker.iliadenisov.ru/developer/amneziawg-sidecar:latest
|
|
restart: unless-stopped
|
|
privileged: true
|
|
environment:
|
|
AWG_CONF: ${AWG_CONF:?set AWG_CONF}
|
|
networks:
|
|
internal:
|
|
aliases: [telegram]
|
|
|
|
telegram:
|
|
container_name: scrabble-telegram
|
|
image: scrabble-telegram:latest
|
|
build:
|
|
context: ..
|
|
dockerfile: platform/telegram/Dockerfile
|
|
restart: unless-stopped
|
|
depends_on: [vpn]
|
|
network_mode: "service:vpn"
|
|
environment:
|
|
# The bot tokens live ONLY in this container (ARCHITECTURE.md §12). At least
|
|
# one token is required (the connector validates this at boot).
|
|
TELEGRAM_BOT_TOKEN_EN: ${TELEGRAM_BOT_TOKEN_EN:-}
|
|
TELEGRAM_BOT_TOKEN_RU: ${TELEGRAM_BOT_TOKEN_RU:-}
|
|
TELEGRAM_GAME_CHANNEL_ID_EN: ${TELEGRAM_GAME_CHANNEL_ID_EN:-}
|
|
TELEGRAM_GAME_CHANNEL_ID_RU: ${TELEGRAM_GAME_CHANNEL_ID_RU:-}
|
|
TELEGRAM_MINIAPP_URL: ${TELEGRAM_MINIAPP_URL:?set TELEGRAM_MINIAPP_URL}
|
|
TELEGRAM_GRPC_ADDR: ":9091"
|
|
TELEGRAM_TEST_ENV: ${TELEGRAM_TEST_ENV:-false}
|
|
TELEGRAM_API_BASE_URL: ${TELEGRAM_API_BASE_URL:-}
|
|
TELEGRAM_LOG_LEVEL: ${LOG_LEVEL:-info}
|
|
TELEGRAM_SERVICE_NAME: scrabble-telegram
|
|
# The connector shares the VPN sidecar's netns. Routing to the collector's
|
|
# internal IP stays off the tunnel (connected route), but the sidecar's DNS
|
|
# hijacks name resolution: AWG_CONF must NOT carry a `DNS=` directive, else
|
|
# `otelcol` won't resolve ("produced zero addresses"). Without DNS= the netns
|
|
# uses Docker's resolver, which resolves both otelcol and api.telegram.org
|
|
# (see deploy/README.md).
|
|
TELEGRAM_OTEL_TRACES_EXPORTER: otlp
|
|
TELEGRAM_OTEL_METRICS_EXPORTER: otlp
|
|
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
|
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
|
|
|
# --- Edge reverse proxy (single /_gm Basic-Auth; SPA + Connect -> gateway) --
|
|
caddy:
|
|
container_name: scrabble-caddy
|
|
image: caddy:2-alpine
|
|
restart: unless-stopped
|
|
depends_on: [gateway, backend, grafana]
|
|
environment:
|
|
# Test: ":80" (host caddy terminates TLS). Prod: a domain for own ACME.
|
|
CADDY_SITE_ADDRESS: ${CADDY_SITE_ADDRESS:-:80}
|
|
GM_BASICAUTH_USER: ${GM_BASICAUTH_USER:-gm}
|
|
GM_BASICAUTH_HASH: ${GM_BASICAUTH_HASH:?set GM_BASICAUTH_HASH}
|
|
volumes:
|
|
- ${SCRABBLE_CONFIG_DIR:-.}/caddy/Caddyfile:/etc/caddy/Caddyfile:ro
|
|
- caddy-data:/data
|
|
networks:
|
|
internal: {}
|
|
edge:
|
|
aliases: [scrabble]
|
|
|
|
# --- Observability ---------------------------------------------------------
|
|
otelcol:
|
|
container_name: scrabble-otelcol
|
|
image: otel/opentelemetry-collector-contrib:0.119.0
|
|
restart: unless-stopped
|
|
command: ["--config=/etc/otelcol/config.yaml"]
|
|
volumes:
|
|
- ${SCRABBLE_CONFIG_DIR:-.}/otelcol/config.yaml:/etc/otelcol/config.yaml:ro
|
|
networks: [internal]
|
|
|
|
prometheus:
|
|
container_name: scrabble-prometheus
|
|
image: prom/prometheus:v2.55.1
|
|
restart: unless-stopped
|
|
command:
|
|
- --config.file=/etc/prometheus/prometheus.yml
|
|
- --storage.tsdb.retention.time=15d
|
|
volumes:
|
|
- ${SCRABBLE_CONFIG_DIR:-.}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
|
- prometheus-data:/prometheus
|
|
networks: [internal]
|
|
|
|
tempo:
|
|
container_name: scrabble-tempo
|
|
image: grafana/tempo:2.7.1
|
|
restart: unless-stopped
|
|
command: ["-config.file=/etc/tempo/tempo.yaml"]
|
|
volumes:
|
|
- ${SCRABBLE_CONFIG_DIR:-.}/tempo/tempo.yaml:/etc/tempo/tempo.yaml:ro
|
|
- tempo-data:/var/tempo
|
|
networks: [internal]
|
|
|
|
grafana:
|
|
container_name: scrabble-grafana
|
|
image: grafana/grafana:11.4.0
|
|
restart: unless-stopped
|
|
depends_on: [prometheus, tempo]
|
|
environment:
|
|
# Served under /_gm/grafana behind caddy's Basic-Auth; anonymous Admin so a
|
|
# single shared login (caddy) gates it with no per-user Grafana accounts.
|
|
GF_SERVER_ROOT_URL: ${GRAFANA_ROOT_URL:-/_gm/grafana/}
|
|
GF_SERVER_SERVE_FROM_SUB_PATH: "true"
|
|
GF_AUTH_ANONYMOUS_ENABLED: "true"
|
|
GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
|
|
GF_AUTH_DISABLE_LOGIN_FORM: "true"
|
|
GF_AUTH_BASIC_ENABLED: "false"
|
|
GF_USERS_ALLOW_SIGN_UP: "false"
|
|
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin}
|
|
volumes:
|
|
- ${SCRABBLE_CONFIG_DIR:-.}/grafana/provisioning:/etc/grafana/provisioning:ro
|
|
# Dashboards live under /etc/grafana (NOT /var/lib/grafana, which the
|
|
# grafana-data volume mounts over — a nested bind there is shadowed and the
|
|
# provider logs "no such file or directory").
|
|
- ${SCRABBLE_CONFIG_DIR:-.}/grafana/dashboards:/etc/grafana/dashboards:ro
|
|
- grafana-data:/var/lib/grafana
|
|
networks: [internal]
|
|
|
|
networks:
|
|
internal:
|
|
name: scrabble-internal
|
|
edge:
|
|
external: true
|
|
|
|
volumes:
|
|
postgres-data:
|
|
caddy-data:
|
|
prometheus-data:
|
|
tempo-data:
|
|
grafana-data:
|