a338ebf058
Tests · Integration / integration (pull_request) Successful in 1m37s
Root cause for the long-standing "Dev Sandbox flips to cancelled after dev-deploy" symptom in push-triggered cycles: when `integration.yaml` runs in parallel with `dev-deploy.yaml`, its `integration/scripts/preclean.sh` issues a `docker rm -f` over every container labelled `galaxy.backend=1`. That label is stamped by the backend's runtime adapter on every engine it spawns — including the engines living in the long-lived dev-deploy environment on the same Docker daemon. Each post-merge auto-deploy therefore had the integration preclean wipe the dev-sandbox engine, and the new backend's reconciler tick observed `container disappeared` and cascaded the sandbox into `cancelled`. Fix: - `integration/testenv/backend.go` now sets `BACKEND_STACK_LABEL=integration` on every backend-under-test, so the engines spawned by integration carry `galaxy.stack=integration` in addition to `galaxy.backend=1`. The backend support for this env was added in the previous CI tidy-up PR (#13). - `integration/scripts/preclean.sh` gains a multi-label AND filter helper and uses it to scope engine cleanup to the combination `galaxy.backend=1 AND galaxy.stack=integration`. dev-deploy and local-dev engines carry different `galaxy.stack` values, so the AND match leaves them alone. - `docs/ARCHITECTURE.md` "Container labels" — refreshed to call out the AND-scoping rule and the new integration backend stamp. - `tools/dev-deploy/KNOWN-ISSUES.md` — the sandbox-cancel entry gets an "Update" section recording the root cause and the fix; the status is downgraded to "partially fixed" because the solo `workflow_dispatch` reproduction (which does NOT trigger integration) remains unexplained. - `tools/dev-deploy/KNOWN-ISSUES.md` — separately, document the `docker restart galaxy-dev-backend` failure caused by the runner-workspace bind-mount that surfaced while diagnosing this issue. Workaround: `make -C tools/dev-deploy up` from the persistent checkout. Real fix is a follow-up (bake fixture into image or copy to named volume). Verification: - `go build ./backend/... ./integration/...` — clean. - `bash -n integration/scripts/preclean.sh` — syntax OK. - Live AND-filter check on the dev host: `docker ps -aq --filter label=galaxy.backend=1 --filter label=galaxy.stack=integration` returns nothing while the dev-deploy engine `galaxy-game-80f3ce86-...` keeps running. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
189 lines
6.2 KiB
Go
189 lines
6.2 KiB
Go
package testenv
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"path/filepath"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/moby/moby/api/types/container"
|
|
"github.com/moby/moby/api/types/mount"
|
|
"github.com/testcontainers/testcontainers-go"
|
|
tcnetwork "github.com/testcontainers/testcontainers-go/network"
|
|
"github.com/testcontainers/testcontainers-go/wait"
|
|
)
|
|
|
|
// BackendContainer wraps a running galaxy/backend:integration
|
|
// container reachable from the host (HTTPHost, GRPCPushHost) and
|
|
// from the shared Docker network at the alias "backend".
|
|
type BackendContainer struct {
|
|
Container testcontainers.Container
|
|
HTTPHost string
|
|
HTTPPort int
|
|
HTTPURL string
|
|
GRPCHost string
|
|
GRPCPort int
|
|
GRPCURL string
|
|
|
|
// AdminUser/AdminPassword are the bootstrap admin credentials this
|
|
// container started with. Tests that exercise the admin surface
|
|
// reuse them directly.
|
|
AdminUser string
|
|
AdminPassword string
|
|
}
|
|
|
|
// BackendOptions tunes a backend container before it boots.
|
|
type BackendOptions struct {
|
|
NetworkAlias string
|
|
NetworkName string
|
|
PostgresDSN string
|
|
MailpitHost string
|
|
MailpitPort int
|
|
GeoIPHostPath string
|
|
AdminEmail string
|
|
Extra map[string]string
|
|
}
|
|
|
|
// StartBackend boots galaxy/backend:integration with the supplied
|
|
// options.
|
|
func StartBackend(t *testing.T, opts BackendOptions) *BackendContainer {
|
|
t.Helper()
|
|
EnsureBackendImage(t)
|
|
|
|
if opts.NetworkAlias == "" {
|
|
opts.NetworkAlias = "backend"
|
|
}
|
|
if opts.AdminEmail == "" {
|
|
opts.AdminEmail = "admin@galaxy.test"
|
|
}
|
|
|
|
geoIPInContainer := "/var/lib/galaxy/geoip.mmdb"
|
|
// Use a unique daemon-side path for each test so concurrent
|
|
// runs cannot collide. Docker creates the source directory at
|
|
// container start because BindOptions.CreateMountpoint=true.
|
|
stateRoot := "/tmp/galaxy-state-" + uuid.NewString()
|
|
|
|
env := map[string]string{
|
|
"BACKEND_HTTP_LISTEN_ADDR": ":8080",
|
|
"BACKEND_GRPC_PUSH_LISTEN_ADDR": ":8081",
|
|
"BACKEND_LOGGING_LEVEL": "info",
|
|
"BACKEND_POSTGRES_DSN": opts.PostgresDSN,
|
|
"BACKEND_SMTP_HOST": opts.MailpitHost,
|
|
"BACKEND_SMTP_PORT": fmt.Sprintf("%d", opts.MailpitPort),
|
|
"BACKEND_SMTP_FROM": "galaxy-backend@galaxy.test",
|
|
"BACKEND_SMTP_TLS_MODE": "none",
|
|
"BACKEND_DOCKER_NETWORK": opts.NetworkName,
|
|
"BACKEND_GAME_STATE_ROOT": stateRoot,
|
|
"BACKEND_ADMIN_BOOTSTRAP_USER": "bootstrap",
|
|
"BACKEND_ADMIN_BOOTSTRAP_PASSWORD": "bootstrap-secret",
|
|
"BACKEND_GEOIP_DB_PATH": geoIPInContainer,
|
|
"BACKEND_OTEL_TRACES_EXPORTER": "none",
|
|
"BACKEND_OTEL_METRICS_EXPORTER": "none",
|
|
"BACKEND_NOTIFICATION_ADMIN_EMAIL": opts.AdminEmail,
|
|
"BACKEND_AUTH_CHALLENGE_THROTTLE_MAX": "100",
|
|
"BACKEND_MAIL_WORKER_INTERVAL": "500ms",
|
|
"BACKEND_NOTIFICATION_WORKER_INTERVAL": "500ms",
|
|
// Stamp galaxy.stack=integration on every engine container the
|
|
// backend-under-test spawns so the post-run preclean.sh can
|
|
// scope its cleanup to integration-owned engines and leave
|
|
// dev-deploy / local-dev stacks running on the same daemon
|
|
// untouched. See `integration/scripts/preclean.sh` and the
|
|
// "Container labels" section in `docs/ARCHITECTURE.md`.
|
|
"BACKEND_STACK_LABEL": "integration",
|
|
}
|
|
for k, v := range opts.Extra {
|
|
env[k] = v
|
|
}
|
|
|
|
dockerSocket := DockerSocketPath()
|
|
req := testcontainers.ContainerRequest{
|
|
Image: BackendImage,
|
|
ExposedPorts: []string{"8080/tcp", "8081/tcp"},
|
|
Env: env,
|
|
WaitingFor: wait.ForHTTP("/healthz").
|
|
WithPort("8080/tcp").
|
|
WithStartupTimeout(60 * time.Second),
|
|
Files: []testcontainers.ContainerFile{
|
|
{
|
|
HostFilePath: opts.GeoIPHostPath,
|
|
ContainerFilePath: geoIPInContainer,
|
|
FileMode: 0o644,
|
|
},
|
|
},
|
|
HostConfigModifier: func(hc *container.HostConfig) {
|
|
hc.Binds = append(hc.Binds, dockerSocket+":/var/run/docker.sock")
|
|
// Bind a unique daemon-side directory at the same path
|
|
// inside the backend container. CreateMountpoint=true
|
|
// asks the daemon to create the source directory if it
|
|
// is missing, so we do not need a second container just
|
|
// to mkdir on the daemon host. Per-game subdirectories
|
|
// are created by backend's runtime via os.MkdirAll
|
|
// before each engine container start.
|
|
hc.Mounts = append(hc.Mounts, mount.Mount{
|
|
Type: mount.TypeBind,
|
|
Source: stateRoot,
|
|
Target: stateRoot,
|
|
BindOptions: &mount.BindOptions{
|
|
CreateMountpoint: true,
|
|
},
|
|
})
|
|
},
|
|
// The distroless `nonroot` user (uid 65532) cannot reach the
|
|
// Docker daemon socket that backend mounts to manage engine
|
|
// containers. In integration tests we run as root so the
|
|
// dockerclient.EnsureNetwork startup probe succeeds; the
|
|
// production deployment will rely on a docker-socket-proxy
|
|
// sidecar (see ARCHITECTURE.md §13).
|
|
User: "0:0",
|
|
}
|
|
|
|
gcr := &testcontainers.GenericContainerRequest{ContainerRequest: req}
|
|
if opts.NetworkName != "" {
|
|
_ = tcnetwork.WithNetwork([]string{opts.NetworkAlias}, &testcontainers.DockerNetwork{Name: opts.NetworkName}).Customize(gcr)
|
|
}
|
|
gcr.Started = true
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
|
defer cancel()
|
|
container, err := testcontainers.GenericContainer(ctx, *gcr)
|
|
if err != nil {
|
|
t.Fatalf("start backend container: %v", err)
|
|
}
|
|
t.Cleanup(func() {
|
|
if err := testcontainers.TerminateContainer(container); err != nil {
|
|
t.Logf("terminate backend: %v", err)
|
|
}
|
|
})
|
|
|
|
host, err := container.Host(ctx)
|
|
if err != nil {
|
|
t.Fatalf("backend host: %v", err)
|
|
}
|
|
httpPort, err := container.MappedPort(ctx, "8080/tcp")
|
|
if err != nil {
|
|
t.Fatalf("backend http port: %v", err)
|
|
}
|
|
grpcPort, err := container.MappedPort(ctx, "8081/tcp")
|
|
if err != nil {
|
|
t.Fatalf("backend grpc port: %v", err)
|
|
}
|
|
|
|
return &BackendContainer{
|
|
Container: container,
|
|
HTTPHost: host,
|
|
HTTPPort: int(httpPort.Num()),
|
|
HTTPURL: fmt.Sprintf("http://%s:%d", host, httpPort.Num()),
|
|
GRPCHost: host,
|
|
GRPCPort: int(grpcPort.Num()),
|
|
GRPCURL: fmt.Sprintf("%s:%d", host, grpcPort.Num()),
|
|
AdminUser: env["BACKEND_ADMIN_BOOTSTRAP_USER"],
|
|
AdminPassword: env["BACKEND_ADMIN_BOOTSTRAP_PASSWORD"],
|
|
}
|
|
}
|
|
|
|
// _ keeps filepath imported even when only the network helper grows
|
|
// here later.
|
|
var _ = filepath.Separator
|