From daed2690c1e43e0b58d79d8afc8dd454b58f7dad Mon Sep 17 00:00:00 2001 From: Ilia Denisov Date: Tue, 19 May 2026 01:00:21 +0200 Subject: [PATCH] fix(compose): keep galaxy.stack label on containers only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit stamped `galaxy.stack=` on services, volumes, and networks. Putting it on volumes/networks changes their compose config-hash on every label revision, so `docker compose up` tries to recreate them — which on the long-lived dev environment either destroys the postgres data volume or deadlocks while trying to remove `galaxy-dev-internal` with containers still bound to it. Observed live: run #184 hung in compose recreate after the three stateful services were stopped, with no recovery. Containers alone are sufficient for the cleanup contract (we filter containers, not volumes or networks). Roll back the label on volumes and networks in both compose files and capture the rule in docs/ARCHITECTURE.md so the next contributor does not reintroduce it. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/ARCHITECTURE.md | 19 ++++++++++++++----- tools/dev-deploy/docker-compose.yml | 17 +++++++++-------- tools/local-dev/docker-compose.yml | 7 +++---- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index a2dec3e..1947197 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -832,14 +832,14 @@ Environments: ### Container labels -Every Docker resource Galaxy creates carries an opinionated label so -that host-side tooling (Makefiles, CI workflows, `preclean.sh`) can -scope its operations to Galaxy-owned objects and never touch unrelated -workloads on the shared daemon. +Every Galaxy-managed Docker **container** carries an opinionated +label so that host-side tooling (Makefiles, CI workflows, +`preclean.sh`) can scope its operations to Galaxy-owned containers +and never touch unrelated workloads on the shared daemon. | Label | Values | Set by | Used by | |-------|--------|--------|---------| -| `galaxy.stack` | `local-dev`, `dev-deploy`, `integration` | `tools/{local-dev,dev-deploy}/docker-compose.yml` for compose-managed resources; backend reads `BACKEND_STACK_LABEL` and stamps engines it spawns. | `tools/{local-dev,dev-deploy}/Makefile`, `.gitea/workflows/dev-deploy.yaml`. | +| `galaxy.stack` | `local-dev`, `dev-deploy`, `integration` | `tools/{local-dev,dev-deploy}/docker-compose.yml` for compose-managed services; backend reads `BACKEND_STACK_LABEL` and stamps engines it spawns. | `tools/{local-dev,dev-deploy}/Makefile`, `.gitea/workflows/dev-deploy.yaml`. | | `galaxy.backend` | `1` | `backend/internal/dockerclient` adapter on every engine container. | `integration/scripts/preclean.sh`. | | `galaxy.game_id` | `` | Backend on engine create. | Reconciler reattach loop. | | `galaxy.engine_version` | `` | Backend on engine create. | Reconciler version checks. | @@ -853,6 +853,15 @@ scoped by their compose project name (`galaxy-dev`, `galaxy-local-dev`), which Compose enforces on `docker compose up/down`; the labels make the contract explicit and survive hand-rolled cleanup commands as well. +**Scope deliberately limited to containers.** Labels are NOT stamped +on named volumes or user-defined networks. Adding labels there would +change the compose config-hash for the volume/network on every label +revision and force `docker compose up` to recreate them — which for a +postgres data volume means destroying the database, and for a shared +network can deadlock if any container is still attached. Containers +alone are sufficient for the cleanup contract; stateful resources stay +untouched by compose between deploys. + ## 19. Deployment Topology (informational) - MVP runs three executables: one `gateway` instance, one `backend` diff --git a/tools/dev-deploy/docker-compose.yml b/tools/dev-deploy/docker-compose.yml index dbc0cc1..8fdfc2b 100644 --- a/tools/dev-deploy/docker-compose.yml +++ b/tools/dev-deploy/docker-compose.yml @@ -238,22 +238,23 @@ networks: name: galaxy-dev-internal driver: bridge internal: false - labels: - galaxy.stack: dev-deploy edge: name: ${GALAXY_EDGE_NETWORK:-edge} external: true +# Note: `galaxy.stack=dev-deploy` is intentionally stamped only on +# services (containers). Stamping it on networks or named volumes +# changes the compose config-hash for those resources, and on a +# subsequent `compose up` compose tries to recreate them — for the +# `galaxy-dev-postgres-data` volume that means destroying the +# database, and for `galaxy-dev-internal` it can deadlock if any +# container is still attached. Per-container labels are sufficient +# for the CI/cleanup contract; we filter containers, not volumes or +# networks. volumes: galaxy-dev-postgres-data: name: galaxy-dev-postgres-data - labels: - galaxy.stack: dev-deploy galaxy-dev-caddy-data: name: galaxy-dev-caddy-data - labels: - galaxy.stack: dev-deploy galaxy-dev-ui-dist: name: galaxy-dev-ui-dist - labels: - galaxy.stack: dev-deploy diff --git a/tools/local-dev/docker-compose.yml b/tools/local-dev/docker-compose.yml index 9e52c39..5a7db40 100644 --- a/tools/local-dev/docker-compose.yml +++ b/tools/local-dev/docker-compose.yml @@ -218,11 +218,10 @@ services: networks: galaxy-net: name: galaxy-local-dev-net - labels: - galaxy.stack: local-dev +# See note in tools/dev-deploy/docker-compose.yml — labels live only +# on services (containers), not on volumes or networks, to keep the +# compose config-hash for stateful resources stable across deploys. volumes: postgres-data: name: galaxy-local-dev-postgres-data - labels: - galaxy.stack: local-dev