Files
Ilia Denisov ff53cc0ad3 local-dev: prune broken engines on rebuild + document one-time bake
`make rebuild` runs `compose build --no-cache backend gateway` plus
a fresh `up -d --wait`. It must therefore also reap any engine
container whose bind-mount source went away during host downtime,
otherwise the new backend image boots into a stack with the same
orphan that triggered the heal flow in the first place.

Also extend the troubleshooting note: pulling the heal-cycle fix
requires one explicit `make rebuild` so the backend image picks up
the pre-bootstrap reconciler tick. Without that, `make up` runs
the new Makefile target but the legacy backend cannot follow
through, and the developer is left staring at a `cancelled`
sandbox with no running replacement.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-10 22:40:27 +02:00

119 lines
4.7 KiB
Makefile

.PHONY: help up down logs status rebuild clean psql logs-backend logs-gateway logs-mail build-engine stop-engines prune-broken-engines wait
.DEFAULT_GOAL := help
COMPOSE := docker compose
REPO_ROOT := $(realpath $(CURDIR)/../..)
ENGINE_IMAGE := galaxy-engine:local-dev
# Label set by the engine `Dockerfile` runtime stage; used to find
# engine containers spawned by backend's runtime that fall outside
# `docker compose down`'s scope.
ENGINE_LABEL := org.opencontainers.image.title=galaxy-game-engine
help:
@echo "Local development stack for the Galaxy UI:"
@echo " make up Build (if needed) and bring up the stack, wait until healthy"
@echo " make down Stop compose containers, leave engines + volumes intact"
@echo " make rebuild Force rebuild of backend / gateway images and bring up"
@echo " make build-engine Build the engine image $(ENGINE_IMAGE) used by the dev sandbox"
@echo " make stop-engines Stop and remove only the per-game engine containers"
@echo " make prune-broken-engines Remove non-running engine containers Docker can't heal (run inside 'up')"
@echo " make clean Stop everything (incl. engines) and wipe volumes + game state"
@echo " make logs Tail all logs"
@echo " make logs-backend Tail only the backend logs"
@echo " make logs-gateway Tail only the gateway logs"
@echo " make logs-mail Tail only the mailpit logs"
@echo " make status docker compose ps"
@echo " make psql Open a psql shell as galaxy@galaxy_backend"
@echo ""
@echo "After 'make up', point the UI at the stack with:"
@echo " pnpm -C ui/frontend dev"
@echo "and open http://localhost:5173 (UI) plus http://localhost:8025 (Mailpit)."
@echo ""
@echo "Default login for the auto-provisioned dev sandbox: dev@local.test"
@echo "(see BACKEND_DEV_SANDBOX_EMAIL in .env). Login code: 123456."
up: build-engine prune-broken-engines
$(COMPOSE) up -d --wait
rebuild: build-engine prune-broken-engines
$(COMPOSE) build --no-cache backend gateway
$(COMPOSE) up -d --wait
build-engine:
@if docker image inspect $(ENGINE_IMAGE) >/dev/null 2>&1; then \
echo "$(ENGINE_IMAGE) already built; skipping (use 'docker rmi $(ENGINE_IMAGE)' to force a rebuild)."; \
else \
echo "building $(ENGINE_IMAGE)"; \
docker build -t $(ENGINE_IMAGE) -f $(REPO_ROOT)/game/Dockerfile $(REPO_ROOT); \
fi
down:
$(COMPOSE) down
clean: stop-engines
$(COMPOSE) down -v
@if [ -d /tmp/galaxy-game-state ]; then \
echo "wiping /tmp/galaxy-game-state…"; \
docker run --rm -v /tmp/galaxy-game-state:/state alpine sh -c 'rm -rf /state/*' 2>/dev/null || rm -rf /tmp/galaxy-game-state/* 2>/dev/null || true; \
fi
# Spawned engine containers run outside the compose project (the
# backend's runtime creates them on demand). They intentionally
# survive `make down` so the runtime reconciler can reattach on the
# next `make up` — killing them out of band makes the runtime
# cascade the game to `cancelled`. We only remove them as part of
# `clean`, where the whole DB is wiped anyway.
stop-engines:
@ids=$$(docker ps -aq --filter label=$(ENGINE_LABEL)); \
if [ -n "$$ids" ]; then \
echo "stopping engine containers…"; \
docker rm -f $$ids >/dev/null; \
fi
# Remove engine containers Docker can no longer heal on its own.
# After a host reboot, the per-game bind-mount source under
# /tmp/galaxy-game-state/<uuid> may have been wiped (macOS clears
# /private/tmp on reboot), so `restart: unless-stopped` cannot
# revive the container — Docker refuses to start it with a missing
# bind-mount source and leaves it stuck in `exited` / `created`
# state. This target prunes the husks before `compose up`; the
# backend's pre-bootstrap reconciler tick (`backend/cmd/backend/main.go`)
# then cascades the orphan runtime row to `removed`, the lobby
# cancels the game, and the dev-sandbox bootstrap purges the
# cancelled tile and provisions a fresh sandbox in the same
# `make up` cycle. Healthy `running` / `restarting` containers are
# left intact so a long-lived sandbox survives normal up/down
# cycles.
prune-broken-engines:
@ids=""; \
for cid in $$(docker ps -aq --filter label=$(ENGINE_LABEL) 2>/dev/null); do \
state=$$(docker inspect -f '{{.State.Status}}' $$cid 2>/dev/null); \
case "$$state" in \
running|restarting) ;; \
*) ids="$$ids $$cid";; \
esac; \
done; \
if [ -n "$$ids" ]; then \
echo "removing non-running engine containers (post-reboot cleanup):$$ids"; \
docker rm -f $$ids >/dev/null; \
fi
logs:
$(COMPOSE) logs -f --tail=100
logs-backend:
$(COMPOSE) logs -f --tail=200 backend
logs-gateway:
$(COMPOSE) logs -f --tail=200 gateway
logs-mail:
$(COMPOSE) logs -f --tail=200 mailpit
status:
$(COMPOSE) ps
psql:
$(COMPOSE) exec postgres psql -U galaxy -d galaxy_backend