From 00c79064fcaa24220db7e158a9b470e80d4e62e7 Mon Sep 17 00:00:00 2001 From: Ilia Denisov Date: Wed, 13 May 2026 23:26:35 +0200 Subject: [PATCH 1/4] tools/dev-deploy: long-lived dev environment behind host Caddy A docker-compose stack that hosts postgres, redis, mailpit, backend, gateway, and an app-routing Caddy. Reachable through the host Caddy at https://www.galaxy.lan (static SPA) and https://api.galaxy.lan (REST + gRPC). Coexists with tools/local-dev/ and tools/local-ci/ by giving every name (compose project, container, network, volume) a distinct galaxy-dev-* prefix. State is persisted in named volumes; game-state lives under ${GALAXY_DEV_GAME_STATE_DIR:-$HOME/.galaxy-dev/game-state} so the default works for a non-root runner without sudo. Co-Authored-By: Claude Opus 4.7 (1M context) --- tools/dev-deploy/.env.example | 21 +++ tools/dev-deploy/Caddyfile.dev | 25 ++++ tools/dev-deploy/Caddyfile.prod | 15 ++ tools/dev-deploy/Makefile | 105 +++++++++++++ tools/dev-deploy/README.md | 188 +++++++++++++++++++++++ tools/dev-deploy/docker-compose.yml | 223 ++++++++++++++++++++++++++++ 6 files changed, 577 insertions(+) create mode 100644 tools/dev-deploy/.env.example create mode 100644 tools/dev-deploy/Caddyfile.dev create mode 100644 tools/dev-deploy/Caddyfile.prod create mode 100644 tools/dev-deploy/Makefile create mode 100644 tools/dev-deploy/README.md create mode 100644 tools/dev-deploy/docker-compose.yml diff --git a/tools/dev-deploy/.env.example b/tools/dev-deploy/.env.example new file mode 100644 index 0000000..12c3a19 --- /dev/null +++ b/tools/dev-deploy/.env.example @@ -0,0 +1,21 @@ +# Defaults for the long-lived dev stack. Copy to `.env` and edit +# per-environment overrides. Everything in this file is non-secret; +# real credentials would go through Gitea Actions secrets and never +# this file. +# +# The compose `${VAR:-default}` expansions fall back to the values +# baked into `docker-compose.yml`, so this file documents the knobs +# rather than driving them. + +# Auto-provisioned sandbox bootstrap. Empty disables the bootstrap. +BACKEND_DEV_SANDBOX_EMAIL=dev@galaxy.lan +BACKEND_DEV_SANDBOX_ENGINE_IMAGE=galaxy-engine:dev +BACKEND_DEV_SANDBOX_ENGINE_VERSION=0.1.0 +BACKEND_DEV_SANDBOX_PLAYER_COUNT=20 + +# `123456` short-circuits the email-code path for the dev account. +# Leave empty in environments where real Mailpit codes must be used. +BACKEND_AUTH_DEV_FIXED_CODE=123456 + +# Name of the external Docker bridge the host Caddy is attached to. +GALAXY_EDGE_NETWORK=edge diff --git a/tools/dev-deploy/Caddyfile.dev b/tools/dev-deploy/Caddyfile.dev new file mode 100644 index 0000000..64758b7 --- /dev/null +++ b/tools/dev-deploy/Caddyfile.dev @@ -0,0 +1,25 @@ +# Application-routing Caddy for the long-lived dev environment. +# Listens only on the `edge` Docker network; TLS termination and the +# real `:80`/`:443` listeners belong to the host Caddy in front of us. +# +# `/srv/galaxy-ui` is mounted from the `galaxy-dev-ui-dist` named volume, +# refreshed on every dev-deploy run. + +{ + auto_https off +} + +:80 { + @frontend host www.galaxy.lan + handle @frontend { + root * /srv/galaxy-ui + try_files {path} /index.html + file_server + encode zstd gzip + } + + @api host api.galaxy.lan + handle @api { + reverse_proxy galaxy-api:8080 + } +} diff --git a/tools/dev-deploy/Caddyfile.prod b/tools/dev-deploy/Caddyfile.prod new file mode 100644 index 0000000..5784c28 --- /dev/null +++ b/tools/dev-deploy/Caddyfile.prod @@ -0,0 +1,15 @@ +# Production placeholder. Mirrors `Caddyfile.dev` but uses real +# hostnames and lets Caddy auto-provision TLS certificates. Not used +# until prod-deploy plumbing exists; kept under version control so the +# dev/prod surface stays symmetric. + +www.galaxy.com { + root * /srv/galaxy-ui + try_files {path} /index.html + file_server + encode zstd gzip +} + +api.galaxy.com { + reverse_proxy galaxy-api:8080 +} diff --git a/tools/dev-deploy/Makefile b/tools/dev-deploy/Makefile new file mode 100644 index 0000000..403054b --- /dev/null +++ b/tools/dev-deploy/Makefile @@ -0,0 +1,105 @@ +.PHONY: help up down rebuild logs status clean-data health psql build-engine seed-ui + +.DEFAULT_GOAL := help + +REPO_ROOT := $(realpath $(CURDIR)/../..) +ENGINE_IMAGE := galaxy-engine:dev +ENGINE_LABEL := org.opencontainers.image.title=galaxy-game-engine +# Game-state root lives under the invoking user's home by default so +# `make up` works without sudo. Override `GALAXY_DEV_GAME_STATE_DIR` +# in the environment or `.env` to relocate (e.g. /var/lib/galaxy-dev/ +# game-state in a production-shaped host). The value flows through to +# both the compose bind-mount and the backend's +# `BACKEND_GAME_STATE_ROOT`. +export GALAXY_DEV_GAME_STATE_DIR ?= $(HOME)/.galaxy-dev/game-state + +COMPOSE := docker compose + +help: + @echo "Long-lived Galaxy dev environment (https://*.galaxy.lan):" + @echo " make up Build images, ensure engine image, bring stack up" + @echo " make rebuild Force rebuild of backend / gateway images and bring up" + @echo " make build-engine Build $(ENGINE_IMAGE) from game/Dockerfile (no-op if present)" + @echo " make seed-ui Build ui/frontend and load into galaxy-dev-ui-dist volume" + @echo " make down Stop containers, keep named volumes" + @echo " make logs Tail all logs" + @echo " make status docker compose ps" + @echo " make health Probe the stack through the host Caddy" + @echo " make psql Open a psql shell as galaxy@galaxy_backend" + @echo " make clean-data Stop everything and wipe named volumes + game-state" + @echo "" + @echo "Requires:" + @echo " - external Docker network '$${GALAXY_EDGE_NETWORK:-edge}'" + @echo " (docker network create edge)" + @echo " - host Caddy proxying *.galaxy.lan into that network" + @echo " - game-state dir: $(GALAXY_DEV_GAME_STATE_DIR) (auto-created)" + +up: build-engine + mkdir -p "$(GALAXY_DEV_GAME_STATE_DIR)" + $(COMPOSE) up -d --wait + +rebuild: build-engine + $(COMPOSE) build --no-cache galaxy-backend galaxy-api + mkdir -p "$(GALAXY_DEV_GAME_STATE_DIR)" + $(COMPOSE) up -d --wait + +build-engine: + @if docker image inspect $(ENGINE_IMAGE) >/dev/null 2>&1; then \ + echo "$(ENGINE_IMAGE) already built; skipping (use 'docker rmi $(ENGINE_IMAGE)' to force a rebuild)."; \ + else \ + echo "building $(ENGINE_IMAGE)…"; \ + docker build -t $(ENGINE_IMAGE) -f $(REPO_ROOT)/game/Dockerfile $(REPO_ROOT); \ + fi + +# Build the UI frontend and load the resulting build/ directory into +# the named volume Caddy serves from. Used by the dev-deploy workflow +# and by anyone bringing the stack up by hand. +seed-ui: + @if [ ! -d $(REPO_ROOT)/ui/frontend/node_modules ]; then \ + echo "installing UI dependencies…"; \ + (cd $(REPO_ROOT)/ui && pnpm install --frozen-lockfile); \ + fi + @echo "building UI (vite build)…" + (cd $(REPO_ROOT)/ui/frontend && \ + VITE_GATEWAY_BASE_URL=https://api.galaxy.lan \ + VITE_GATEWAY_RESPONSE_PUBLIC_KEY=$$(cat $(REPO_ROOT)/ui/frontend/.env.development \ + | sed -n 's/^VITE_GATEWAY_RESPONSE_PUBLIC_KEY=//p') \ + pnpm build) + @echo "loading build/ into galaxy-dev-ui-dist volume…" + docker volume create galaxy-dev-ui-dist >/dev/null + docker run --rm \ + -v galaxy-dev-ui-dist:/dst \ + -v $(REPO_ROOT)/ui/frontend/build:/src:ro \ + alpine sh -c 'rm -rf /dst/* /dst/.??* 2>/dev/null; cp -a /src/. /dst/' + +down: + $(COMPOSE) down + +logs: + $(COMPOSE) logs -f --tail=100 + +status: + $(COMPOSE) ps + +health: + @echo "Frontend (https://www.galaxy.lan):" + @curl -sS -o /dev/null -w " HTTP %{http_code}\n" https://www.galaxy.lan/ || echo " unreachable" + @echo "API healthz (https://api.galaxy.lan/healthz):" + @curl -sS -o /dev/null -w " HTTP %{http_code}\n" https://api.galaxy.lan/healthz || echo " unreachable" + +psql: + $(COMPOSE) exec galaxy-postgres psql -U galaxy -d galaxy_backend + +clean-data: + @echo "Stopping containers and engines, then wiping volumes + game-state…" + @ids=$$(docker ps -aq --filter label=$(ENGINE_LABEL)); \ + if [ -n "$$ids" ]; then \ + echo "stopping engine containers…"; \ + docker rm -f $$ids >/dev/null; \ + fi + $(COMPOSE) down -v + @if [ -d "$(GALAXY_DEV_GAME_STATE_DIR)" ]; then \ + echo "wiping $(GALAXY_DEV_GAME_STATE_DIR)…"; \ + docker run --rm -v "$(GALAXY_DEV_GAME_STATE_DIR):/state" alpine sh -c 'rm -rf /state/*' 2>/dev/null \ + || rm -rf "$(GALAXY_DEV_GAME_STATE_DIR)"/* 2>/dev/null || true; \ + fi diff --git a/tools/dev-deploy/README.md b/tools/dev-deploy/README.md new file mode 100644 index 0000000..0728d1b --- /dev/null +++ b/tools/dev-deploy/README.md @@ -0,0 +1,188 @@ +# `tools/dev-deploy/` — long-lived Galaxy dev environment + +A docker-compose stack that runs the Galaxy backend, gateway, supporting +services, and a small Caddy in front of them, reachable through the host +Caddy at `https://www.galaxy.lan` and `https://api.galaxy.lan`. Used by +the `dev-deploy.yaml` Gitea Actions workflow as the canonical dev target +on every merge into the `development` branch, and runnable by hand +through this Makefile for local debugging of the deploy plumbing +itself. + +This stack is **not** the developer's primary playground for UI work — +that role still belongs to [`tools/local-dev/`](../local-dev/README.md), +which is faster (Vite HMR, host-side dev server) and isolated to one +developer. The two stacks coexist on the same host because every name +is distinct: + +| | `tools/local-dev/` | `tools/dev-deploy/` | +|------------------|------------------------------|-----------------------------| +| Compose project | `local-dev` | `galaxy-dev` | +| Container prefix | `galaxy-local-dev-*` | `galaxy-dev-*` | +| Network | `galaxy-local-dev-net` | `galaxy-dev-internal`, `edge` | +| Volumes | `galaxy-local-dev-*` | `galaxy-dev-*` | +| Host ports | 5433/6380/8025/8080/9090 | none (only `edge` network) | +| Game state | `/tmp/galaxy-game-state` | `/var/lib/galaxy-dev/game-state` | +| Engine image | `galaxy-engine:local-dev` | `galaxy-engine:dev` | + +## Prerequisites + +The host must already provide: + +- Docker daemon reachable as the user running `make` (member of the + `docker` group, no sudo). +- An external bridge network named `edge` (or whatever + `GALAXY_EDGE_NETWORK` overrides to): + + ```sh + docker network create edge + ``` + +- A host Caddy listening on `:80`/`:443`, attached to the `edge` + network, and proxying `www.galaxy.lan` and `api.galaxy.lan` to + `galaxy-caddy:80`. Example fragment for the host Caddyfile: + + ```caddy + www.galaxy.lan, api.galaxy.lan { + tls internal + reverse_proxy galaxy-caddy:80 + } + ``` + +- Game-state directory writable by the user running `make`. Default + is `${HOME}/.galaxy-dev/game-state`; `make up` creates it on demand. + Override by exporting `GALAXY_DEV_GAME_STATE_DIR` (e.g. to + `/var/lib/galaxy-dev/game-state` once the host is provisioned for + it). + +## Bring it up + +```sh +make -C tools/dev-deploy up +``` + +`up` (re)builds the local-dev backend and gateway images, makes sure the +engine image `galaxy-engine:dev` exists, and waits for healthchecks. It +does **not** seed the UI volume — that is normally done by CI. The first +time you run by hand: + +```sh +make -C tools/dev-deploy seed-ui +make -C tools/dev-deploy up +make -C tools/dev-deploy health +``` + +`seed-ui` runs `pnpm build` in `ui/frontend/`, then copies the resulting +`build/` tree into the `galaxy-dev-ui-dist` volume. Subsequent CI deploys +overwrite this volume automatically. + +## Daily flow + +```sh +make -C tools/dev-deploy rebuild # rebuild backend/gateway images + up +make -C tools/dev-deploy logs # tail compose logs +make -C tools/dev-deploy health # probe https://*.galaxy.lan +make -C tools/dev-deploy down # stop, keep state +``` + +State persists in named volumes between `up`/`down` cycles. The +`development` branch keeps the dev environment continuously usable — +games created last week survive into this week unless somebody +calls `make clean-data`. + +## Logging in + +The same dev-mode email-code override as `tools/local-dev/` applies: + +1. Enter `dev@galaxy.lan` (or whatever `BACKEND_DEV_SANDBOX_EMAIL` + resolves to) in the login form. +2. Submit `123456` as the code if `BACKEND_AUTH_DEV_FIXED_CODE` is + non-empty. Otherwise open Mailpit at + `http://galaxy-mailpit:8025/` from inside the network or proxy it + through the host Caddy when needed. + +The fixed-code override is rejected by production env loaders, so it +cannot leak into the prod environment. + +## Networking + +``` +Browser + │ https://www.galaxy.lan, https://api.galaxy.lan + ▼ +host-Caddy (:80, :443, TLS, attached to `edge` network) + │ reverse_proxy *.galaxy.lan → galaxy-caddy:80 + ▼ +galaxy-caddy (networks: edge + galaxy-dev-internal) + │ www.galaxy.lan → file_server /srv/galaxy-ui (volume galaxy-dev-ui-dist) + │ api.galaxy.lan → reverse_proxy galaxy-api:8080 + ▼ +galaxy-dev-internal + ├─ galaxy-api (gateway: :8080 REST, :9090 gRPC) + ├─ galaxy-backend (backend: :8080 HTTP, :8081 gRPC push) + ├─ galaxy-postgres (postgres: :5432) + ├─ galaxy-redis (redis: :6379) + ├─ galaxy-mailpit (mailpit: :8025 UI, :1025 SMTP) + └─ engine containers (spawned by backend on demand) +``` + +The compose project deliberately exposes no host ports. Diagnostics +that used to go through `localhost:8025` etc. now go through the +container network: `docker compose -f tools/dev-deploy/docker-compose.yml +exec galaxy-mailpit wget -qO- localhost:8025/messages` and similar. + +## Persistent state and schema changes + +The dev Postgres volume `galaxy-dev-postgres-data` survives redeploys. +Until the pre-production migration rule is lifted, every +backward-incompatible change to `backend/internal/postgres/migrations/00001_init.sql` +needs a manual wipe before the next deploy succeeds: + +```sh +make -C tools/dev-deploy clean-data +make -C tools/dev-deploy up +``` + +This is the same caveat as `tools/local-dev/`, just with a different +volume name. + +## Make targets + +```text +make up Build images, ensure engine image, bring stack up (waits for health) +make rebuild Rebuild backend / gateway images (ignores cache), then up +make seed-ui pnpm build + load build/ into galaxy-dev-ui-dist volume +make build-engine Build galaxy-engine:dev (no-op if image already present) +make down Stop containers, keep named volumes +make logs Tail compose logs +make status docker compose ps +make health curl https://www.galaxy.lan + https://api.galaxy.lan/healthz +make psql psql as galaxy@galaxy_backend +make clean-data Stop everything and wipe volumes + game-state dir +``` + +## Files + +- `docker-compose.yml` — six services: postgres, redis, mailpit, + galaxy-backend, galaxy-api, galaxy-caddy. Reuses the alpine-runtime + Dockerfiles from `../local-dev/` so the backend healthcheck can run + `wget`. Reuses the dev keypair from `../local-dev/keys/`. +- `Caddyfile.dev` — the application-routing Caddy config, mounted into + `galaxy-caddy` at `/etc/caddy/Caddyfile`. +- `Caddyfile.prod` — placeholder for a future prod deployment; not used + by this compose. +- `Makefile` — wrapper over `docker compose` with helpers for engine, + UI seeding, health probes, and full wipe. +- `.env.example` — non-secret defaults for the compose `${VAR:-}` + expansions. Copy to `.env` if you want host-local overrides. + +## Relationship to other infrastructure + +- `tools/local-dev/` — single-developer playground, host-port mapped, + Vite dev server on the side. Recommended for active UI work. +- `tools/local-ci/` — Gitea + act runner for **fallback** workflow + testing without `gitea.lan`. Optional, not part of the per-stage CI + gate anymore. +- `.gitea/workflows/dev-deploy.yaml` — the CI side of this stack: + builds images, seeds the UI volume, runs `docker compose up -d` on + every merge into `development`. The Makefile in this directory is + what that workflow ultimately calls into. diff --git a/tools/dev-deploy/docker-compose.yml b/tools/dev-deploy/docker-compose.yml new file mode 100644 index 0000000..5a449df --- /dev/null +++ b/tools/dev-deploy/docker-compose.yml @@ -0,0 +1,223 @@ +# Long-lived dev environment for the Galaxy stack, deployed by the +# `dev-deploy.yaml` Gitea Actions workflow on every merge into the +# `development` branch and (optionally) by `make -C tools/dev-deploy up` +# from a developer shell on the same host. +# +# The stack is reachable from a browser only through the host Caddy on +# the machine, which terminates TLS and forwards `*.galaxy.lan` into the +# external `edge` Docker network where `galaxy-caddy` does app-routing. +# No service in this compose project binds a host port — coexistence +# with `tools/local-dev/` (which listens on localhost:5433/6380/8025/...) +# is achieved by distinct names, networks, and volumes. +# +# Browser → host-Caddy (:80/:443) → galaxy-caddy → {galaxy-api, /srv/galaxy-ui} +# +# Persistent state lives in named volumes under the `galaxy-dev-*` +# prefix; surviving redeploys across compose rebuilds. + +name: galaxy-dev + +services: + galaxy-postgres: + image: postgres:16-alpine + container_name: galaxy-dev-postgres + restart: unless-stopped + environment: + POSTGRES_USER: galaxy + POSTGRES_PASSWORD: galaxy + POSTGRES_DB: galaxy_backend + volumes: + - galaxy-dev-postgres-data:/var/lib/postgresql/data + networks: + - galaxy-internal + healthcheck: + test: ["CMD-SHELL", "pg_isready -U galaxy -d galaxy_backend"] + interval: 3s + timeout: 3s + retries: 30 + start_period: 5s + + galaxy-redis: + image: redis:7-alpine + container_name: galaxy-dev-redis + restart: unless-stopped + command: + - redis-server + - --requirepass + - galaxy-dev + - --appendonly + - "no" + - --save + - "" + networks: + - galaxy-internal + healthcheck: + test: ["CMD", "redis-cli", "-a", "galaxy-dev", "PING"] + interval: 3s + timeout: 3s + retries: 30 + start_period: 3s + + galaxy-mailpit: + image: axllent/mailpit:v1.21 + container_name: galaxy-dev-mailpit + restart: unless-stopped + networks: + - galaxy-internal + healthcheck: + test: ["CMD", "wget", "-q", "-O-", "http://localhost:8025/livez"] + interval: 3s + timeout: 3s + retries: 30 + start_period: 3s + + galaxy-backend: + build: + context: ../.. + dockerfile: tools/local-dev/backend.Dockerfile + image: galaxy/backend:dev + container_name: galaxy-dev-backend + restart: unless-stopped + user: "0:0" + depends_on: + galaxy-postgres: + condition: service_healthy + galaxy-mailpit: + condition: service_healthy + environment: + BACKEND_LOGGING_LEVEL: info + BACKEND_HTTP_LISTEN_ADDR: ":8080" + BACKEND_GRPC_PUSH_LISTEN_ADDR: ":8081" + BACKEND_POSTGRES_DSN: "postgres://galaxy:galaxy@galaxy-postgres:5432/galaxy_backend?search_path=backend&sslmode=disable" + BACKEND_SMTP_HOST: galaxy-mailpit + BACKEND_SMTP_PORT: "1025" + BACKEND_SMTP_FROM: "galaxy-backend@galaxy.lan" + BACKEND_SMTP_TLS_MODE: none + BACKEND_DOCKER_NETWORK: galaxy-dev-internal + BACKEND_GAME_STATE_ROOT: ${GALAXY_DEV_GAME_STATE_DIR} + BACKEND_GEOIP_DB_PATH: /var/lib/galaxy/geoip.mmdb + BACKEND_NOTIFICATION_ADMIN_EMAIL: admin@galaxy.lan + BACKEND_MAIL_WORKER_INTERVAL: 500ms + BACKEND_NOTIFICATION_WORKER_INTERVAL: 500ms + BACKEND_OTEL_TRACES_EXPORTER: none + BACKEND_OTEL_METRICS_EXPORTER: none + BACKEND_AUTH_DEV_FIXED_CODE: ${BACKEND_AUTH_DEV_FIXED_CODE:-} + BACKEND_DEV_SANDBOX_EMAIL: ${BACKEND_DEV_SANDBOX_EMAIL:-} + BACKEND_DEV_SANDBOX_ENGINE_IMAGE: ${BACKEND_DEV_SANDBOX_ENGINE_IMAGE:-galaxy-engine:dev} + BACKEND_DEV_SANDBOX_ENGINE_VERSION: ${BACKEND_DEV_SANDBOX_ENGINE_VERSION:-0.1.0} + BACKEND_DEV_SANDBOX_PLAYER_COUNT: ${BACKEND_DEV_SANDBOX_PLAYER_COUNT:-20} + volumes: + - /var/run/docker.sock:/var/run/docker.sock + # Per-game state directories live under the same absolute path + # both inside the backend container and on the Docker daemon host, + # so the bind-mount source the backend hands to the daemon + # resolves correctly when spawning engine containers. The dev + # environment uses a distinct prefix from `tools/local-dev/` so + # the two stacks do not collide on the same host. + # Game-state root must resolve to the same absolute path inside + # the backend container and on the Docker daemon host, because + # backend hands that path to the daemon when it spawns engine + # containers. The Makefile exports `GALAXY_DEV_GAME_STATE_DIR` + # to `${HOME}/.galaxy-dev/game-state` by default, so a non-root + # runner user can write to it without sudo. + - type: bind + source: ${GALAXY_DEV_GAME_STATE_DIR} + target: ${GALAXY_DEV_GAME_STATE_DIR} + bind: + create_host_path: true + - ../../pkg/geoip/test-data/test-data/GeoIP2-Country-Test.mmdb:/var/lib/galaxy/geoip.mmdb:ro + networks: + - galaxy-internal + healthcheck: + test: ["CMD", "wget", "-q", "-O-", "http://localhost:8080/healthz"] + interval: 3s + timeout: 3s + retries: 60 + start_period: 10s + + galaxy-api: + build: + context: ../.. + dockerfile: tools/local-dev/gateway.Dockerfile + image: galaxy/gateway:dev + container_name: galaxy-dev-api + restart: unless-stopped + depends_on: + galaxy-backend: + condition: service_healthy + galaxy-redis: + condition: service_healthy + environment: + GATEWAY_LOG_LEVEL: info + GATEWAY_PUBLIC_HTTP_ADDR: ":8080" + GATEWAY_AUTHENTICATED_GRPC_ADDR: ":9090" + GATEWAY_BACKEND_HTTP_URL: "http://galaxy-backend:8080" + GATEWAY_BACKEND_GRPC_PUSH_URL: "galaxy-backend:8081" + GATEWAY_BACKEND_GATEWAY_CLIENT_ID: dev-gateway-1 + GATEWAY_RESPONSE_SIGNER_PRIVATE_KEY_PEM_PATH: /run/secrets/gateway-response.pem + GATEWAY_REDIS_MASTER_ADDR: "galaxy-redis:6379" + GATEWAY_REDIS_PASSWORD: galaxy-dev + # Anti-abuse defaults are looser than production: the dev + # environment is shared by a handful of trusted testers who + # frequently hammer the same identity to reproduce flows. + GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_AUTH_RATE_LIMIT_REQUESTS: "10000" + GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_AUTH_RATE_LIMIT_BURST: "1000" + GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_SEND_EMAIL_CODE_IDENTITY_RATE_LIMIT_REQUESTS: "10000" + GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_SEND_EMAIL_CODE_IDENTITY_RATE_LIMIT_BURST: "1000" + GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_CONFIRM_EMAIL_CODE_IDENTITY_RATE_LIMIT_REQUESTS: "10000" + GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_CONFIRM_EMAIL_CODE_IDENTITY_RATE_LIMIT_BURST: "1000" + GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_MISC_MAX_BODY_BYTES: "131072" + GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_MISC_RATE_LIMIT_REQUESTS: "10000" + GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_PUBLIC_MISC_RATE_LIMIT_BURST: "1000" + GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_BROWSER_BOOTSTRAP_MAX_BODY_BYTES: "65536" + GATEWAY_PUBLIC_HTTP_ANTI_ABUSE_BROWSER_ASSET_MAX_BODY_BYTES: "65536" + GATEWAY_AUTHENTICATED_GRPC_ANTI_ABUSE_IP_RATE_LIMIT_REQUESTS: "10000" + GATEWAY_AUTHENTICATED_GRPC_ANTI_ABUSE_IP_RATE_LIMIT_BURST: "1000" + GATEWAY_AUTHENTICATED_GRPC_ANTI_ABUSE_SESSION_RATE_LIMIT_REQUESTS: "10000" + GATEWAY_AUTHENTICATED_GRPC_ANTI_ABUSE_SESSION_RATE_LIMIT_BURST: "1000" + GATEWAY_AUTHENTICATED_GRPC_ANTI_ABUSE_USER_RATE_LIMIT_REQUESTS: "10000" + GATEWAY_AUTHENTICATED_GRPC_ANTI_ABUSE_USER_RATE_LIMIT_BURST: "1000" + GATEWAY_AUTHENTICATED_GRPC_ANTI_ABUSE_MESSAGE_CLASS_RATE_LIMIT_REQUESTS: "10000" + GATEWAY_AUTHENTICATED_GRPC_ANTI_ABUSE_MESSAGE_CLASS_RATE_LIMIT_BURST: "1000" + volumes: + - ../local-dev/keys/gateway-response.pem:/run/secrets/gateway-response.pem:ro + networks: + - galaxy-internal + healthcheck: + test: ["CMD", "wget", "-q", "-O-", "http://localhost:8080/healthz"] + interval: 3s + timeout: 3s + retries: 30 + start_period: 5s + + galaxy-caddy: + image: caddy:2.11.2-alpine + container_name: galaxy-dev-caddy + restart: unless-stopped + depends_on: + galaxy-api: + condition: service_healthy + volumes: + - ./Caddyfile.dev:/etc/caddy/Caddyfile:ro + - galaxy-dev-caddy-data:/data + - galaxy-dev-ui-dist:/srv/galaxy-ui:ro + networks: + - galaxy-internal + - edge + +networks: + galaxy-internal: + name: galaxy-dev-internal + driver: bridge + internal: false + edge: + name: ${GALAXY_EDGE_NETWORK:-edge} + external: true + +volumes: + galaxy-dev-postgres-data: + name: galaxy-dev-postgres-data + galaxy-dev-caddy-data: + name: galaxy-dev-caddy-data + galaxy-dev-ui-dist: + name: galaxy-dev-ui-dist -- 2.52.0 From f316952c1245cde99bfed7f126700538d4be29ae Mon Sep 17 00:00:00 2001 From: Ilia Denisov Date: Wed, 13 May 2026 23:26:46 +0200 Subject: [PATCH 2/4] ci: split workflows for linear development flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reshapes .gitea/workflows/ around the new main ← development ← feature/* branching model: - go-unit.yaml — Go unit tests, runs on push/PR matching Go paths - ui-test.yaml — narrowed to Vitest + Playwright only (Go tests now live in go-unit.yaml) - integration.yaml — testcontainers suite, fires on PR to development/main and on push to development - dev-deploy.yaml — builds the stack and (re)deploys tools/dev-deploy/ on every merge into development - prod-build.yaml — builds prod images on push to main and uploads docker save bundles as artifacts (30-day retention) - deploy-prod.yaml — workflow_dispatch placeholder for the future SSH-based rollout ui-release.yaml is removed; its v* tag trigger is superseded by prod-build.yaml plus the manual deploy-prod entry point. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitea/workflows/deploy-prod.yaml | 31 +++++++ .gitea/workflows/dev-deploy.yaml | 116 +++++++++++++++++++++++ .gitea/workflows/go-unit.yaml | 78 ++++++++++++++++ .gitea/workflows/integration.yaml | 65 +++++++++++++ .gitea/workflows/prod-build.yaml | 116 +++++++++++++++++++++++ .gitea/workflows/ui-release.yaml | 148 ------------------------------ .gitea/workflows/ui-test.yaml | 61 +----------- 7 files changed, 409 insertions(+), 206 deletions(-) create mode 100644 .gitea/workflows/deploy-prod.yaml create mode 100644 .gitea/workflows/dev-deploy.yaml create mode 100644 .gitea/workflows/go-unit.yaml create mode 100644 .gitea/workflows/integration.yaml create mode 100644 .gitea/workflows/prod-build.yaml delete mode 100644 .gitea/workflows/ui-release.yaml diff --git a/.gitea/workflows/deploy-prod.yaml b/.gitea/workflows/deploy-prod.yaml new file mode 100644 index 0000000..7514a3c --- /dev/null +++ b/.gitea/workflows/deploy-prod.yaml @@ -0,0 +1,31 @@ +name: deploy-prod + +# Placeholder for the production rollout workflow. Today it only proves +# the manual entry point works; the actual `docker save | ssh prod +# docker load` + remote `docker compose up -d` pipeline is wired in +# once the production host, SSH credentials, and DNS are decided. + +on: + workflow_dispatch: + inputs: + image_tag: + description: "Image tag to deploy (commit-, produced by prod-build.yaml)" + required: true + type: string + +jobs: + deploy: + runs-on: ubuntu-latest + defaults: + run: + shell: bash + steps: + - name: Announce target + run: | + echo "Would deploy image tag: ${{ inputs.image_tag }}" + echo "TODO:" + echo " 1. Download galaxy-images-${{ inputs.image_tag }} from prod-build artifacts." + echo " 2. scp the .tar.gz bundles to the production host." + echo " 3. ssh prod 'docker load -i ...' for backend / gateway / engine." + echo " 4. ssh prod 'docker compose -f /opt/galaxy/docker-compose.yml up -d'." + echo " 5. Probe https://api.galaxy.com/healthz and roll back on failure." diff --git a/.gitea/workflows/dev-deploy.yaml b/.gitea/workflows/dev-deploy.yaml new file mode 100644 index 0000000..91f7dce --- /dev/null +++ b/.gitea/workflows/dev-deploy.yaml @@ -0,0 +1,116 @@ +name: dev-deploy + +# Builds the Galaxy stack and (re)deploys it into the long-lived dev +# environment on the host running this Gitea Actions runner. Triggered +# on every merge into `development`. Branch protections on `development` +# guarantee the commit already passed `go-unit`, `ui-test`, and +# `integration` as part of the PR that produced this push, so this +# workflow does not re-run those tests — it focuses on packaging and +# rollout. + +on: + push: + branches: + - development + paths: + - 'backend/**' + - 'gateway/**' + - 'game/**' + - 'pkg/**' + - 'ui/**' + - 'go.work' + - 'go.work.sum' + - 'tools/dev-deploy/**' + - '.gitea/workflows/dev-deploy.yaml' + - '!**/*.md' + +jobs: + deploy: + runs-on: ubuntu-latest + defaults: + run: + shell: bash + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.work + cache: true + + - name: Set up pnpm + uses: pnpm/action-setup@v4 + with: + version: 11.0.7 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + cache-dependency-path: ui/pnpm-lock.yaml + + - name: Install UI dependencies + working-directory: ui + run: pnpm install --frozen-lockfile + + - name: Build UI frontend + working-directory: ui/frontend + env: + VITE_GATEWAY_BASE_URL: https://api.galaxy.lan + run: | + # The response-signing public key is committed in + # `.env.development` alongside its private counterpart in + # `tools/local-dev/keys/`. Pull it from there at build time so + # the production-mode bundle ships the same key the dev + # gateway uses to sign. + export VITE_GATEWAY_RESPONSE_PUBLIC_KEY="$(grep -E '^VITE_GATEWAY_RESPONSE_PUBLIC_KEY=' .env.development | cut -d= -f2)" + pnpm build + + - name: Build galaxy-engine image + working-directory: ${{ gitea.workspace }} + run: | + docker build \ + -t galaxy-engine:dev \ + -f game/Dockerfile \ + . + + - name: Build backend + gateway images + working-directory: tools/dev-deploy + run: | + docker compose build galaxy-backend galaxy-api + + - name: Seed UI volume + run: | + docker volume create galaxy-dev-ui-dist >/dev/null + docker run --rm \ + -v galaxy-dev-ui-dist:/dst \ + -v "${{ gitea.workspace }}/ui/frontend/build:/src:ro" \ + alpine sh -c 'rm -rf /dst/* /dst/.??* 2>/dev/null; cp -a /src/. /dst/' + + - name: Bring up the stack + working-directory: tools/dev-deploy + env: + GALAXY_DEV_GAME_STATE_DIR: ${{ env.HOME }}/.galaxy-dev/game-state + run: | + mkdir -p "$GALAXY_DEV_GAME_STATE_DIR" + docker compose up -d --wait --remove-orphans + + - name: Probe the stack + run: | + set -e + # Use --resolve so the probe goes through the same routing as + # a browser on the host: the host Caddy on :443 (which has + # `tls internal`) terminates and forwards into the edge + # network. We accept the host's internal CA via -k because + # the runner image has no reason to trust it. + curl -sk --max-time 10 https://api.galaxy.lan/healthz \ + | tee /tmp/healthz + test -s /tmp/healthz + curl -sk --max-time 10 -o /dev/null -w '%{http_code}\n' \ + https://www.galaxy.lan/ | tee /tmp/www_status + grep -qE '^(200|304)$' /tmp/www_status diff --git a/.gitea/workflows/go-unit.yaml b/.gitea/workflows/go-unit.yaml new file mode 100644 index 0000000..c33d1dd --- /dev/null +++ b/.gitea/workflows/go-unit.yaml @@ -0,0 +1,78 @@ +name: go-unit + +# Fast unit tests for the Go side of the monorepo. Runs on every push +# and pull request whose path filter matches a Go source directory. +# The integration suite (testcontainers-driven, slow) lives in +# `integration.yaml` and only fires for PRs into `development`/`main` +# and pushes to `development`. + +on: + push: + paths: + - 'backend/**' + - 'gateway/**' + - 'game/**' + - 'pkg/**' + - 'ui/core/**' + - 'go.work' + - 'go.work.sum' + - '.gitea/workflows/go-unit.yaml' + - '!**/*.md' + pull_request: + paths: + - 'backend/**' + - 'gateway/**' + - 'game/**' + - 'pkg/**' + - 'ui/core/**' + - 'go.work' + - 'go.work.sum' + - '.gitea/workflows/go-unit.yaml' + - '!**/*.md' + +jobs: + test: + runs-on: ubuntu-latest + defaults: + run: + shell: bash + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.work + cache: true + + - name: Run Go tests + # client/ is the deprecated Fyne client; excluded from CI per + # ui/PLAN.md §74. -count=1 disables Go's test cache so a green + # run never depends on a previous runner's cached state. The + # backend suite is run with -p 1 because most backend packages + # spawn their own Postgres testcontainer, and parallel + # Postgres bootstraps starve each other on a constrained + # runner. pkg modules are listed one by one because ./pkg/... + # does not recurse across the independent go.work modules + # under pkg/. + run: | + go test -count=1 -p 1 ./backend/... + go test -count=1 \ + ./gateway/... \ + ./game/... \ + ./ui/core/... \ + ./pkg/calc/... \ + ./pkg/connector/... \ + ./pkg/cronutil/... \ + ./pkg/error/... \ + ./pkg/geoip/... \ + ./pkg/model/... \ + ./pkg/postgres/... \ + ./pkg/redisconn/... \ + ./pkg/schema/... \ + ./pkg/storage/... \ + ./pkg/transcoder/... \ + ./pkg/util/... diff --git a/.gitea/workflows/integration.yaml b/.gitea/workflows/integration.yaml new file mode 100644 index 0000000..1f94fa8 --- /dev/null +++ b/.gitea/workflows/integration.yaml @@ -0,0 +1,65 @@ +name: integration + +# Full integration suite (testcontainers-driven, ~5–10 minutes). Heavy +# enough that we do not run it on every push to a feature branch — only +# when there is an open PR aimed at `development`/`main`, or after a +# merge into `development`. The unit jobs (`go-unit.yaml`, +# `ui-test.yaml`) keep guarding fast feedback on every push. + +on: + pull_request: + branches: + - development + - main + paths: + - 'backend/**' + - 'gateway/**' + - 'game/**' + - 'pkg/**' + - 'ui/core/**' + - 'integration/**' + - 'go.work' + - 'go.work.sum' + - '.gitea/workflows/integration.yaml' + - '!**/*.md' + push: + branches: + - development + paths: + - 'backend/**' + - 'gateway/**' + - 'game/**' + - 'pkg/**' + - 'ui/core/**' + - 'integration/**' + - 'go.work' + - 'go.work.sum' + - '.gitea/workflows/integration.yaml' + - '!**/*.md' + +jobs: + integration: + runs-on: ubuntu-latest + defaults: + run: + shell: bash + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.work + cache: true + + - name: Run integration suite + # `make integration` precleans leftover docker-compose state and + # then runs every test under integration/ serially (-p=1 + # -parallel=1, 15-minute per-test timeout). Testcontainers + # reaches the host's docker daemon via the socket Gitea exposes + # to the runner; the workflow inherits the same access the + # runner has. + run: make -C integration integration diff --git a/.gitea/workflows/prod-build.yaml b/.gitea/workflows/prod-build.yaml new file mode 100644 index 0000000..8018625 --- /dev/null +++ b/.gitea/workflows/prod-build.yaml @@ -0,0 +1,116 @@ +name: prod-build + +# Builds the production-grade Docker images and the UI bundle on every +# merge into `main`, then saves the artifacts so a future +# `deploy-prod.yaml` run can ship them to the production host. This +# workflow does not deploy anything by itself — production rollout is +# strictly manual (workflow_dispatch on `deploy-prod.yaml`). + +on: + push: + branches: + - main + paths: + - 'backend/**' + - 'gateway/**' + - 'game/**' + - 'pkg/**' + - 'ui/**' + - 'go.work' + - 'go.work.sum' + - '.gitea/workflows/prod-build.yaml' + - '!**/*.md' + +jobs: + build: + runs-on: ubuntu-latest + defaults: + run: + shell: bash + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.work + cache: true + + - name: Set up pnpm + uses: pnpm/action-setup@v4 + with: + version: 11.0.7 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + cache-dependency-path: ui/pnpm-lock.yaml + + - name: Resolve image tag + id: tag + run: | + short_sha=$(git rev-parse --short=12 HEAD) + echo "tag=commit-${short_sha}" >>"$GITHUB_OUTPUT" + + - name: Build backend image + run: | + docker build \ + -t "galaxy/backend:${{ steps.tag.outputs.tag }}" \ + -f backend/Dockerfile \ + . + + - name: Build gateway image + run: | + docker build \ + -t "galaxy/gateway:${{ steps.tag.outputs.tag }}" \ + -f gateway/Dockerfile \ + . + + - name: Build engine image + run: | + docker build \ + -t "galaxy/game-engine:${{ steps.tag.outputs.tag }}" \ + -f game/Dockerfile \ + . + + - name: Install UI dependencies + working-directory: ui + run: pnpm install --frozen-lockfile + + - name: Build UI bundle + working-directory: ui/frontend + env: + VITE_GATEWAY_BASE_URL: https://api.galaxy.com + run: | + # Production response-signing public key is not in the repo + # yet (the dev key in `tools/local-dev/keys/` is for dev + # only). When real prod keys exist, source them from a Gitea + # Actions secret and set VITE_GATEWAY_RESPONSE_PUBLIC_KEY + # here. Until then the prod bundle compiles with the dev + # key as a placeholder so the artifact exists. + export VITE_GATEWAY_RESPONSE_PUBLIC_KEY="$(grep -E '^VITE_GATEWAY_RESPONSE_PUBLIC_KEY=' .env.development | cut -d= -f2)" + pnpm build + + - name: Save images as artifact bundles + run: | + mkdir -p artifacts + docker save "galaxy/backend:${{ steps.tag.outputs.tag }}" \ + | gzip >"artifacts/backend-${{ steps.tag.outputs.tag }}.tar.gz" + docker save "galaxy/gateway:${{ steps.tag.outputs.tag }}" \ + | gzip >"artifacts/gateway-${{ steps.tag.outputs.tag }}.tar.gz" + docker save "galaxy/game-engine:${{ steps.tag.outputs.tag }}" \ + | gzip >"artifacts/game-engine-${{ steps.tag.outputs.tag }}.tar.gz" + tar -C ui/frontend -czf \ + "artifacts/ui-dist-${{ steps.tag.outputs.tag }}.tar.gz" build + + - name: Upload images + uses: actions/upload-artifact@v4 + with: + name: galaxy-images-${{ steps.tag.outputs.tag }} + path: artifacts/*.tar.gz + retention-days: 30 diff --git a/.gitea/workflows/ui-release.yaml b/.gitea/workflows/ui-release.yaml deleted file mode 100644 index f772f86..0000000 --- a/.gitea/workflows/ui-release.yaml +++ /dev/null @@ -1,148 +0,0 @@ -name: ui-release - -# Tier 2 (release) workflow. Runs on tag push. -# -# Currently mirrors the Tier 1 step set. Visual regression baseline -# checks and the macOS-runner iOS smoke job are landed in later phases -# of ui/PLAN.md and live as commented sections at the end of this file -# until those phases ship. - -on: - push: - tags: - - 'v*' - -jobs: - test: - runs-on: ubuntu-latest - defaults: - run: - shell: bash - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version-file: go.work - cache: true - - - name: Run Go tests - # client/ is the deprecated Fyne client; excluded from CI per - # ui/PLAN.md §74. -count=1 disables Go's test cache so a green - # run never depends on a previous runner's cached state. The - # backend suite is run with -p 1 because most backend packages - # spawn their own Postgres testcontainer, and parallel - # Postgres bootstraps starve each other on a constrained - # runner. pkg modules are listed one by one because ./pkg/... - # does not recurse across the independent go.work modules - # under pkg/. - run: | - go test -count=1 -p 1 ./backend/... - go test -count=1 \ - ./gateway/... \ - ./game/... \ - ./ui/core/... \ - ./pkg/calc/... \ - ./pkg/connector/... \ - ./pkg/cronutil/... \ - ./pkg/error/... \ - ./pkg/geoip/... \ - ./pkg/model/... \ - ./pkg/postgres/... \ - ./pkg/redisconn/... \ - ./pkg/schema/... \ - ./pkg/storage/... \ - ./pkg/transcoder/... \ - ./pkg/util/... - - - name: Set up pnpm - uses: pnpm/action-setup@v4 - with: - version: 11.0.7 - - - name: Set up Node - uses: actions/setup-node@v4 - with: - node-version: 22 - cache: pnpm - cache-dependency-path: ui/pnpm-lock.yaml - - - name: Install npm dependencies - working-directory: ui - run: pnpm install --frozen-lockfile - - - name: Install Playwright browsers - working-directory: ui/frontend - run: pnpm exec playwright install --with-deps - - - name: Run Vitest - working-directory: ui/frontend - run: pnpm test - - - name: Run Playwright - working-directory: ui/frontend - run: pnpm exec playwright test - - - name: Upload Playwright report on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: playwright-report - path: ui/frontend/playwright-report/ - retention-days: 14 - - - name: Upload Playwright traces on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: playwright-traces - path: ui/frontend/test-results/ - retention-days: 14 - -# visual-regression: enabled in Phase 33 of ui/PLAN.md, once the PWA -# shell and service worker land and a snapshot baseline is committed -# under ui/frontend/tests/__snapshots__/. -# -# visual-regression: -# runs-on: ubuntu-latest -# needs: test -# steps: -# - uses: actions/checkout@v4 -# - uses: pnpm/action-setup@v4 -# with: { version: 11.0.7 } -# - uses: actions/setup-node@v4 -# with: -# node-version: 22 -# cache: pnpm -# cache-dependency-path: ui/pnpm-lock.yaml -# - working-directory: ui -# run: pnpm install --frozen-lockfile -# - working-directory: ui/frontend -# run: pnpm exec playwright install --with-deps -# - working-directory: ui/frontend -# run: pnpm exec playwright test --grep @visual - -# ios-smoke: enabled in Phase 32 of ui/PLAN.md, once the Capacitor -# wrapper lands. Runs a Capacitor + Appium smoke against an iOS -# simulator on a macOS runner. -# -# ios-smoke: -# runs-on: macos-13 -# needs: test -# steps: -# - uses: actions/checkout@v4 -# - uses: pnpm/action-setup@v4 -# with: { version: 11.0.7 } -# - uses: actions/setup-node@v4 -# with: -# node-version: 22 -# cache: pnpm -# cache-dependency-path: ui/pnpm-lock.yaml -# - working-directory: ui -# run: pnpm install --frozen-lockfile -# - working-directory: ui/mobile -# run: pnpm exec cap sync ios && pnpm exec appium-smoke ios diff --git a/.gitea/workflows/ui-test.yaml b/.gitea/workflows/ui-test.yaml index b48dc6d..6c923aa 100644 --- a/.gitea/workflows/ui-test.yaml +++ b/.gitea/workflows/ui-test.yaml @@ -1,38 +1,18 @@ name: ui-test -# Tier 1 (per-PR) workflow. Runs Vitest + Playwright for the UI client and -# the monorepo Go service tests (everything except the integration suite, -# which lives behind `make -C integration integration` and needs a Docker -# daemon set up for testcontainers). -# -# The path filter is intentionally broad until a dedicated go-test -# workflow is introduced; this is the only CI gate today. +# UI-side unit and end-to-end tests (Vitest + Playwright). The Go side +# of the workspace is tested in `go-unit.yaml`. Both workflows can run +# in parallel for a push that touches Go and UI together. on: push: paths: - 'ui/**' - - 'backend/**' - - 'gateway/**' - - 'game/**' - - 'pkg/**' - - 'go.work' - - 'go.work.sum' - '.gitea/workflows/ui-test.yaml' - # Skip docs-only commits. Negation removes pure markdown changes; - # mixed commits (code + .md) still match a positive pattern above - # and trigger the workflow. Image and other binary asset paths - # are already outside the positive list. - '!**/*.md' pull_request: paths: - 'ui/**' - - 'backend/**' - - 'gateway/**' - - 'game/**' - - 'pkg/**' - - 'go.work' - - 'go.work.sum' - '.gitea/workflows/ui-test.yaml' - '!**/*.md' @@ -48,41 +28,6 @@ jobs: with: submodules: recursive - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version-file: go.work - cache: true - - - name: Run Go tests - # client/ is the deprecated Fyne client; excluded from CI per - # ui/PLAN.md §74. -count=1 disables Go's test cache so a green - # run never depends on a previous runner's cached state. The - # backend suite is run with -p 1 because most backend packages - # spawn their own Postgres testcontainer, and parallel - # Postgres bootstraps starve each other on a constrained - # runner. pkg modules are listed one by one because ./pkg/... - # does not recurse across the independent go.work modules - # under pkg/. - run: | - go test -count=1 -p 1 ./backend/... - go test -count=1 \ - ./gateway/... \ - ./game/... \ - ./ui/core/... \ - ./pkg/calc/... \ - ./pkg/connector/... \ - ./pkg/cronutil/... \ - ./pkg/error/... \ - ./pkg/geoip/... \ - ./pkg/model/... \ - ./pkg/postgres/... \ - ./pkg/redisconn/... \ - ./pkg/schema/... \ - ./pkg/storage/... \ - ./pkg/transcoder/... \ - ./pkg/util/... - - name: Set up pnpm uses: pnpm/action-setup@v4 with: -- 2.52.0 From f00c8efd18df591a096ad323c9630a59f6a7b41f Mon Sep 17 00:00:00 2001 From: Ilia Denisov Date: Wed, 13 May 2026 23:26:57 +0200 Subject: [PATCH 3/4] docs: sync project guides to the new CI flow Aligns the project guides with the branching/CI/environment changes landed in the previous commits: - CLAUDE.md: per-stage CI gate now closes against gitea.lan; describes the main/development/feature/* flow and the workflow surface - docs/ARCHITECTURE.md: new section 18 "CI and Environments" covering branches, workflows, and the local-dev / dev-deploy / local-ci triad; section numbering shifted accordingly - tools/local-ci/README.md: marked as fallback (offline / runner isolation only) - tools/local-dev/README.md and ui/README.md: cross-link to tools/dev-deploy/ for production-shaped testing Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 55 +++++++++++++++++++++++++-------------- docs/ARCHITECTURE.md | 47 +++++++++++++++++++++++++++++++-- tools/local-ci/README.md | 16 +++++++++--- tools/local-dev/README.md | 14 ++++++---- ui/README.md | 6 +++++ 5 files changed, 107 insertions(+), 31 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index eb0def0..a58e51e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -34,32 +34,47 @@ This repository hosts the Galaxy Game project. deeper than what fits in `README.md` (per-feature design notes, protocol specs, runbooks). Not stage-by-stage history. +## Branching and CI flow + +Branches: + +- `main` — production-track. Direct pushes are disallowed; the only + way in is a PR merge from `development`. A merge fires + `prod-build.yaml` which packages the artifacts; production rollout + is manual through `deploy-prod.yaml`. +- `development` — long-lived dev integration branch. Every merge into + it auto-deploys to the dev environment via `dev-deploy.yaml` + (reachable at `https://www.galaxy.lan` / `https://api.galaxy.lan`). +- `feature/*` — short-lived branches off `development`. Merged back + via PR; only then do they reach the dev environment. + +Workflows in `.gitea/workflows/`: + +| File | Trigger | What it does | +|------|---------|--------------| +| `go-unit.yaml` | push + PR matching Go paths | Fast Go unit tests. | +| `ui-test.yaml` | push + PR matching `ui/**` | Vitest + Playwright. | +| `integration.yaml` | PR to `development`/`main`; push to `development` | testcontainers integration suite. | +| `dev-deploy.yaml` | push to `development` | Build images + (re)deploy to `tools/dev-deploy/`. | +| `prod-build.yaml` | push to `main` | Build prod images and `docker save` into artifacts. | +| `deploy-prod.yaml` | `workflow_dispatch` | Manual rollout (placeholder until prod host exists). | + ## Per-stage CI gate Every completed stage from any `PLAN.md` (per-service or `ui/PLAN.md`) -must be exercised on the local Gitea Actions runner before being -declared done. The runbook lives in `tools/local-ci/README.md`; the -short version is: +must be exercised on `gitea.lan` before being declared done. The +short version: -1. Commit the stage changes. -2. `make -C tools/local-ci push` — pushes `HEAD` to the local Gitea - instance and triggers every workflow that matches the changed - paths. -3. Poll the latest run via the API snippet in `ui/docs/testing.md` - (or the Gitea UI on `http://localhost:3000`) until it leaves +1. Commit the stage changes on the feature branch. +2. `git push gitea …` to publish the branch. +3. Poll the latest run in the Gitea UI (or the API) until it leaves `running`. Inspect the log on failure. -4. Only after the run is `success` may the stage be marked done in - the corresponding `PLAN.md`. +4. Only after every workflow that fired is `success` may the stage be + marked done in the corresponding `PLAN.md`. -This applies even when the local unit-test suite is green — -workflow-only failures (path filters, action-version mismatches, -missing secrets, runner-only environment differences) are cheap to -catch here and expensive to catch on a remote PR. The push step is -implicitly authorised: do not ask for confirmation on every stage. - -If `tools/local-ci` is not running, bring it up first -(`make -C tools/local-ci up`); do not skip this gate. The single -exception is when the user explicitly waives it for a stage. +`tools/local-ci/` is now an opt-in fallback for testing workflow +changes without `gitea.lan` (offline iterations, runner-isolation +debugging). It is no longer required for the per-stage gate. ## Decisions during stage implementation diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index a0e3598..6ed6df5 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -751,7 +751,50 @@ addition. `GET /readyz` (Postgres reachable, migrations applied, gRPC listener bound). Probes are excluded from anti-replay and rate limiting. -## 18. Deployment Topology (informational) +## 18. CI and Environments + +The repository is monorepo and intentionally so — semver tags and +per-service rollouts are achievable without splitting the code into +multiple repositories. + +Branches: + +- `main` — production-track. Direct pushes are disallowed; the only + way in is a PR merge from `development`. +- `development` — long-lived dev integration branch. Every merge + triggers an auto-deploy into the long-lived dev environment on the + CI host, reachable through the host Caddy at + `https://www.galaxy.lan` and `https://api.galaxy.lan`. +- `feature/*` — short-lived branches off `development`. Merged back + via PR; PRs run unit + integration checks before merge. + +Workflows under `.gitea/workflows/`: + +| File | Trigger | Purpose | +|------|---------|---------| +| `go-unit.yaml` | push + PR matching Go paths | Fast Go unit tests. | +| `ui-test.yaml` | push + PR matching `ui/**` | Vitest + Playwright. | +| `integration.yaml` | PR to `development` / `main`; push to `development` | testcontainers integration suite. | +| `dev-deploy.yaml` | push to `development` | Build images, seed UI volume, `compose up` against `tools/dev-deploy/`. | +| `prod-build.yaml` | push to `main` | Build production images and persist `docker save` bundles as artifacts. | +| `deploy-prod.yaml` | manual `workflow_dispatch` | Placeholder for the future SSH-based production rollout. | + +Environments: + +- **`tools/local-dev/`** — single-developer playground. Bound to + host ports, Vite dev server runs on the host. Not driven by CI. +- **`tools/dev-deploy/`** — long-lived dev environment behind + `*.galaxy.lan`, redeployed on every merge into `development`. +- **production** — future. Images come from the + `galaxy-images-commit-` artifact produced by `prod-build.yaml` + and are shipped to the production host via `docker save` → + `ssh prod docker load` → `docker compose up -d`. + +`tools/local-ci/` remains as an opt-in fallback runner for testing +workflow changes without `gitea.lan`. It is no longer part of the +per-stage CI gate; see `CLAUDE.md` for the gate definition. + +## 19. Deployment Topology (informational) - MVP runs three executables: one `gateway` instance, one `backend` instance, and N `galaxy-game-{game_id}` containers managed by backend. @@ -770,7 +813,7 @@ Future scale-out hooks (not in MVP): - mTLS between gateway and backend. - Docker-socket-proxy sidecar fronting Docker daemon access. -## 19. Glossary +## 20. Glossary - **device_session_id** — opaque identifier of an authenticated client device; primary key of the device session record. diff --git a/tools/local-ci/README.md b/tools/local-ci/README.md index 93990a9..1115f01 100644 --- a/tools/local-ci/README.md +++ b/tools/local-ci/README.md @@ -1,9 +1,17 @@ -# Local Gitea CI +# Local Gitea CI (fallback) + +> **Status:** fallback / opt-in. The primary CI target is now +> `gitea.lan` with its host-mode `act_runner`. The per-stage CI gate +> closes against `gitea.lan`, not against this stack. Use this +> directory when you want to validate `.gitea/workflows/*` without +> reaching `gitea.lan` — for example, when iterating on a workflow +> file from a flight without LAN access — or when isolating a runner +> issue from production-shaped infrastructure. Self-contained Gitea + Actions runner for verifying -`.gitea/workflows/*` honestly before pushing to a real Gitea instance. -Runs natively on arm64 (Apple Silicon) — every image below has an -arm64 variant, so Docker pulls the right architecture and the runner +`.gitea/workflows/*` honestly before pushing to `gitea.lan`. Runs +natively on arm64 (Apple Silicon) — every image below has an arm64 +variant, so Docker pulls the right architecture and the runner executes workflow steps without QEMU emulation. ## Prerequisites diff --git a/tools/local-dev/README.md b/tools/local-dev/README.md index ee78639..d15a405 100644 --- a/tools/local-dev/README.md +++ b/tools/local-dev/README.md @@ -10,11 +10,15 @@ FlatBuffers wire, every authenticated call verifies the response signature against the dev keypair, and every email passes through Mailpit's web UI for inspection. -This stack is **not** a CI gate (that role belongs to -[`tools/local-ci/`](../local-ci/README.md), which boots a Gitea + -Actions runner and replays workflow files). The two stacks are -independent and can coexist on the same machine; they bind different -ports and use different networks. +This stack is **not** a CI gate (the per-stage CI gate now lives on +`gitea.lan`; see project-level `CLAUDE.md`). It is also distinct from +the **long-lived dev environment** at +[`tools/dev-deploy/`](../dev-deploy/README.md), which is redeployed on +every merge into `development` and is reachable as +`https://www.galaxy.lan` / `https://api.galaxy.lan`. The three stacks +(`tools/local-dev/`, `tools/dev-deploy/`, and the fallback +`tools/local-ci/`) coexist on the same host because every name — +compose project, container, network, volume — is distinct. ## Bring it up diff --git a/ui/README.md b/ui/README.md index 7d649a2..12abcde 100644 --- a/ui/README.md +++ b/ui/README.md @@ -153,6 +153,12 @@ The stack accepts a fixed dev code (`123456`) in addition to the real Mailpit-delivered one. Full runbook in [`../tools/local-dev/README.md`](../tools/local-dev/README.md). +For testing the production-shaped surface — Caddy in front of the +gateway, statically served UI bundle, real `https://*.galaxy.lan` +hostnames — use the long-lived dev environment at +[`../tools/dev-deploy/`](../tools/dev-deploy/README.md). It is +redeployed by Gitea Actions on every merge into `development`. + ## Per-phase docs Topic docs live under `ui/docs/` and are added per phase as they're -- 2.52.0 From c6c5f3c8dd1b0e39a5fcefbc47e601b70dd4f034 Mon Sep 17 00:00:00 2001 From: Ilia Denisov Date: Wed, 13 May 2026 23:43:51 +0200 Subject: [PATCH 4/4] ci: skip TLS verify for actions/checkout on LAN Gitea MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Gitea host serves https://gitea.iliadenisov.ru with a cert signed by host-Caddy's internal CA, which the runner-image's CA bundle does not trust. actions/checkout@v4 fails on `git fetch` as a result, so every workflow on gitea.lan has been failing — visible only now that we made gitea.lan the primary CI target. Sets GIT_SSL_NO_VERIFY=true on every workflow as a quick fix. Safe in practice because both endpoints sit on the same LAN. The long-term fix is to bake the Caddy root CA into the runner image and drop this env. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitea/workflows/dev-deploy.yaml | 6 ++++++ .gitea/workflows/go-unit.yaml | 9 +++++++++ .gitea/workflows/integration.yaml | 6 ++++++ .gitea/workflows/prod-build.yaml | 6 ++++++ .gitea/workflows/ui-test.yaml | 6 ++++++ 5 files changed, 33 insertions(+) diff --git a/.gitea/workflows/dev-deploy.yaml b/.gitea/workflows/dev-deploy.yaml index 91f7dce..13c4a92 100644 --- a/.gitea/workflows/dev-deploy.yaml +++ b/.gitea/workflows/dev-deploy.yaml @@ -24,6 +24,12 @@ on: - '.gitea/workflows/dev-deploy.yaml' - '!**/*.md' +env: + # See go-unit.yaml for the rationale; this disables TLS verify for + # actions/checkout against the LAN Gitea host signed by host-Caddy's + # internal CA. + GIT_SSL_NO_VERIFY: "true" + jobs: deploy: runs-on: ubuntu-latest diff --git a/.gitea/workflows/go-unit.yaml b/.gitea/workflows/go-unit.yaml index c33d1dd..200d15b 100644 --- a/.gitea/workflows/go-unit.yaml +++ b/.gitea/workflows/go-unit.yaml @@ -30,6 +30,15 @@ on: - '.gitea/workflows/go-unit.yaml' - '!**/*.md' +env: + # The Gitea host serves https://gitea.iliadenisov.ru with a cert + # signed by host-Caddy's internal CA. The runner-image's CA bundle + # does not include that root, so actions/checkout fails on `git + # fetch`. Disabling SSL verify is acceptable for this LAN-only + # infrastructure; the long-term fix is to mount the Caddy root CA + # into the runner image. + GIT_SSL_NO_VERIFY: "true" + jobs: test: runs-on: ubuntu-latest diff --git a/.gitea/workflows/integration.yaml b/.gitea/workflows/integration.yaml index 1f94fa8..240f244 100644 --- a/.gitea/workflows/integration.yaml +++ b/.gitea/workflows/integration.yaml @@ -37,6 +37,12 @@ on: - '.gitea/workflows/integration.yaml' - '!**/*.md' +env: + # See go-unit.yaml for the rationale; this disables TLS verify for + # actions/checkout against the LAN Gitea host signed by host-Caddy's + # internal CA. + GIT_SSL_NO_VERIFY: "true" + jobs: integration: runs-on: ubuntu-latest diff --git a/.gitea/workflows/prod-build.yaml b/.gitea/workflows/prod-build.yaml index 8018625..f511e81 100644 --- a/.gitea/workflows/prod-build.yaml +++ b/.gitea/workflows/prod-build.yaml @@ -21,6 +21,12 @@ on: - '.gitea/workflows/prod-build.yaml' - '!**/*.md' +env: + # See go-unit.yaml for the rationale; this disables TLS verify for + # actions/checkout against the LAN Gitea host signed by host-Caddy's + # internal CA. + GIT_SSL_NO_VERIFY: "true" + jobs: build: runs-on: ubuntu-latest diff --git a/.gitea/workflows/ui-test.yaml b/.gitea/workflows/ui-test.yaml index 6c923aa..5eff4ff 100644 --- a/.gitea/workflows/ui-test.yaml +++ b/.gitea/workflows/ui-test.yaml @@ -16,6 +16,12 @@ on: - '.gitea/workflows/ui-test.yaml' - '!**/*.md' +env: + # See go-unit.yaml for the rationale; this disables TLS verify for + # actions/checkout against the LAN Gitea host signed by host-Caddy's + # internal CA. + GIT_SSL_NO_VERIFY: "true" + jobs: test: runs-on: ubuntu-latest -- 2.52.0