feat: backend service

This commit is contained in:
Ilia Denisov
2026-05-06 10:14:55 +03:00
committed by GitHub
parent 3e2622757e
commit f446c6a2ac
1486 changed files with 49720 additions and 266401 deletions
+62
View File
@@ -0,0 +1,62 @@
# syntax=docker/dockerfile:1.7
# Build context is the workspace root (galaxy/), not the backend/
# subdirectory, because the backend module pulls galaxy/{cronutil,error,
# geoip,model,postgres,util} through the go.work replace directives.
# Build with:
#
# docker build -t galaxy/backend:integration -f backend/Dockerfile .
FROM golang:1.26.2-alpine AS builder
WORKDIR /src
ENV CGO_ENABLED=0 GOFLAGS=-trimpath
COPY pkg/cronutil/ ./pkg/cronutil/
COPY pkg/error/ ./pkg/error/
COPY pkg/geoip/ ./pkg/geoip/
COPY pkg/model/ ./pkg/model/
COPY pkg/postgres/ ./pkg/postgres/
COPY pkg/util/ ./pkg/util/
COPY backend/ ./backend/
# Synthesise a minimal go.work tailored to the backend binary so the
# repository-level workspace (which lists every module) does not need
# to be copied into the build context.
RUN <<'EOF' cat > go.work
go 1.26.2
use (
./backend
./pkg/cronutil
./pkg/error
./pkg/geoip
./pkg/model
./pkg/postgres
./pkg/util
)
replace (
galaxy/cronutil v0.0.0 => ./pkg/cronutil
galaxy/error v0.0.0 => ./pkg/error
galaxy/geoip v0.0.0 => ./pkg/geoip
galaxy/model v0.0.0 => ./pkg/model
galaxy/postgres v0.0.0 => ./pkg/postgres
galaxy/util v0.0.0 => ./pkg/util
)
EOF
RUN --mount=type=cache,target=/root/.cache/go-build \
--mount=type=cache,target=/go/pkg/mod \
go build -ldflags="-s -w" -o /out/backend ./backend/cmd/backend
FROM gcr.io/distroless/static-debian12:nonroot AS runtime
LABEL org.opencontainers.image.title="galaxy-backend"
EXPOSE 8080
EXPOSE 8081
USER nonroot:nonroot
COPY --from=builder /out/backend /usr/local/bin/backend
ENTRYPOINT ["/usr/local/bin/backend"]
+25
View File
@@ -0,0 +1,25 @@
.PHONY: jet proto build vet test tidy
# jet regenerates the go-jet query packages under internal/postgres/jet by
# spinning up a transient Postgres container, applying the embedded
# migrations, and running the generator against the `backend` schema.
jet:
go run ./cmd/jetgen
# proto regenerates the gRPC bindings under proto/ from the .proto files
# in the same directory using the buf toolchain (`buf generate`). The
# generated *.pb.go and *_grpc.pb.go files are committed to the repo.
proto:
buf generate
build:
go build ./...
vet:
go vet ./...
test:
go test ./...
tidy:
go mod tidy
+868
View File
@@ -0,0 +1,868 @@
# backend — Implementation Plan
This plan has been already implemented and stays here for historical reasons.
It should NOT be threated as source of truth for service functionality.
---
## Summary
This plan is the technical specification for implementing the
consolidated Galaxy `backend` service. It is read together with
`../ARCHITECTURE.md` (architecture and security model) and
`README.md` (module layout, configuration, operations).
After reading those two documents and this plan, an implementing
engineer should not need to ask architectural questions. Every stage is
self-contained inside its domain area; stages run in order; each stage
has explicit Critical files.
The plan does not invent new domain concepts. It catalogues the work
required to assemble what the architecture document already defines.
## ~~Stage 1~~ — Repository cleanup
This stage was implemented and marked as done.
Goal: remove every module whose responsibility moves into `backend`,
and prepare the workspace for the new module.
Actions:
1. `git rm -r authsession/ lobby/ mail/ notification/ gamemaster/
rtmanager/ geoprofile/ user/ integration/ pkg/redisconn/
pkg/notificationintent/`.
2. Edit `go.work`:
- Remove `use` lines for the deleted modules.
- Remove `replace` lines for `galaxy/redisconn` and
`galaxy/notificationintent`.
- Do not add `./backend` yet — the module is created in Stage 2.
3. Confirm that surviving modules still build:
`go build ./gateway/... ./game/... ./client/... ./pkg/...`.
Any compile error here means a surviving module imported a
removed package and must be patched (the only realistic culprit is
`gateway`, which references `pkg/redisconn` and the deleted streams;
patches there belong to Stage 6, not Stage 1 — for Stage 1 it is
acceptable to leave gateway broken if and only if the only failures
come from imports of removed packages).
4. Run `go vet ./pkg/...` and confirm no diagnostic.
Out of scope: any code change inside surviving modules. Stage 1 is
purely deletion plus `go.work` edits.
Critical files:
- `go.work`
- the deletion of `authsession/`, `lobby/`, `mail/`, `notification/`,
`gamemaster/`, `rtmanager/`, `geoprofile/`, `user/`, `integration/`,
`pkg/redisconn/`, `pkg/notificationintent/`.
Done criteria:
- `git status` shows only deletions plus the `go.work` edit.
- `go build ./pkg/...` is clean.
- `go vet ./pkg/...` is clean.
## ~~Stage 2~~ — Backend skeleton & shared infrastructure
This stage was implemented and marked as done.
Goal: stand up the new module with its boot path, configuration,
telemetry, logger, HTTP listener, Postgres pool, and gRPC listener — all
with empty handlers. After this stage `go run ./backend/cmd/backend`
must boot to a state where probes return 200 and migrations run (with an
empty migration file).
Actions:
1. Create `backend/go.mod` with module path `galaxy/backend` and Go
version matching `go.work`. Add direct dependencies:
`github.com/gin-gonic/gin`, `github.com/jackc/pgx/v5`,
`github.com/go-jet/jet/v2`, `github.com/pressly/goose/v3`,
`go.uber.org/zap`, `go.opentelemetry.io/otel` and the OTLP
trace/metric exporters used by other services, and the `galaxy/*`
pkg modules (`postgres`, `model`, `geoip`, `cronutil`, `error`,
`util`).
2. Add `./backend` to `go.work` `use(...)`.
3. `backend/cmd/backend/main.go` — boot order:
1. Load `config.LoadFromEnv()`; `cfg.Validate()`.
2. Initialise telemetry (`telemetry.NewProcess(cfg.Telemetry)`). Set
global tracer and meter providers.
3. Construct the zap logger; inject trace fields helper.
4. Open Postgres pool. Apply embedded migrations with goose. Fail
fast on any error.
5. Construct module wiring (empty for now; populated in Stage 5).
6. Start the HTTP server (gin engine with empty route groups, plus
`/healthz` and `/readyz`).
7. Start the gRPC push server (no streams accepted yet — Stage 6).
8. Block on `signal.NotifyContext(ctx, SIGINT, SIGTERM)`; on signal,
drain in the order described in `README.md` §16.
4. `backend/internal/config/config.go` — env-loader following the
pattern used by surviving services. Cover every variable listed in
`README.md` §4. Provide `DefaultConfig()` and `Validate()`.
5. `backend/internal/telemetry/runtime.go` — port the existing service
pattern verbatim: configurable OTLP gRPC/HTTP exporter, optional
stdout exporter, Prometheus pull endpoint when configured. Expose
`TraceFieldsFromContext(ctx) []zap.Field`.
6. `backend/internal/server/server.go` — gin engine, three empty route
groups, request id middleware, panic recovery middleware, otel
middleware. Probe handlers in `server/probes.go`.
7. `backend/internal/postgres/pool.go` — pgx pool factory using the
shared `galaxy/postgres` helper.
8. `backend/internal/postgres/migrations/00001_init.sql` — empty file
containing the `-- +goose Up` and `-- +goose Down` markers and a
single `CREATE SCHEMA IF NOT EXISTS backend;` statement so the
migration is non-empty and can be verified.
9. `backend/internal/postgres/migrations/embed.go` — `embed.FS` and
exported `Migrations() fs.FS` helper.
10. `backend/internal/push/server.go` — gRPC server skeleton bound to
`cfg.GRPCPushListenAddr`. No service registered yet.
11. `backend/Makefile` — at minimum a `jet` target stub that prints
"not generated yet"; will be filled in Stage 4.
Critical files:
- `backend/go.mod`, `go.work`
- `backend/cmd/backend/main.go`
- `backend/internal/config/config.go`
- `backend/internal/telemetry/runtime.go`
- `backend/internal/server/server.go`, `backend/internal/server/probes.go`
- `backend/internal/postgres/pool.go`,
`backend/internal/postgres/migrations/00001_init.sql`,
`backend/internal/postgres/migrations/embed.go`
- `backend/internal/push/server.go`
- `backend/Makefile`
Done criteria:
- `go build ./backend/...` is clean.
- `go run ./backend/cmd/backend` starts, applies the placeholder
migration, opens HTTP and gRPC listeners, and serves `/healthz` 200
and `/readyz` 200.
- Telemetry output (stdout exporter) shows trace and metric activity on
a probe hit.
## ~~Stage~~ 3 — API contract & routing
This stage was implemented and marked as done.
Goal: define the entire backend REST contract in `openapi.yaml` and
register every handler as a placeholder that returns
`501 Not Implemented`. Wire the middleware stack for each route group.
The contract test suite must validate every endpoint round-trip against
the OpenAPI document and pass on the placeholders.
Actions:
1. Author `backend/openapi.yaml` — single document with three tags
(`Public`, `User`, `Admin`) and the endpoint set below. Reuse
schemas from `pkg/model` where possible; keep the rest under
`components/schemas/*`.
2. Implement middleware in `backend/internal/server/middleware/`:
- `requestid` — assigns and propagates a request id (Stage 2 may
have already done this; consolidate here).
- `logging` — emits an access log entry with trace fields.
- `metrics` — counters and histograms per route group.
- `panicrecovery` — converts panics to 500 with structured logging.
- `userid` — required on `/api/v1/user/*`. Reads `X-User-ID`,
parses as UUID, places it in the request context. Rejects with
400 if missing or malformed. Backend trusts the value (see
architecture trust note).
- `basicauth` — required on `/api/v1/admin/*`. Stage 3 uses a stub
verifier that accepts any non-empty username and a fixed password
read from a test-only env var so contract tests can pass; Stage
5.3 replaces the verifier with the real Postgres-backed one.
3. Implement handlers per endpoint in
`backend/internal/server/handlers_<group>_<topic>.go`. Every handler
returns `501 Not Implemented` with the standard error body
`{"error":{"code":"not_implemented","message":"..."}}`.
4. Implement the contract test:
`backend/internal/server/contract_test.go`. Loads
`backend/openapi.yaml` via `kin-openapi`, builds the gin engine,
walks every operation, sends a representative request, and
validates both the request and response against the OpenAPI
document.
5. Document `openapi.yaml` location and contract test pattern in
`backend/docs/api-contract.md` (a brief decision record).
### Endpoint inventory
Public (`/api/v1/public/*`):
- `POST /auth/send-email-code` — request body `{email, locale?}`;
response `{challenge_id}`.
- `POST /auth/confirm-email-code` — request body
`{challenge_id, code, client_public_key, time_zone}`; response
`{device_session_id}`.
Probes (root):
- `GET /healthz` — `200` always when the process is alive.
- `GET /readyz` — `200` once Postgres reachable, migrations applied,
gRPC listener bound; `503` otherwise.
User (`/api/v1/user/*`, all require `X-User-ID`):
- `GET /account` — current account view (profile + settings +
entitlements).
- `PATCH /account/profile` — update mutable profile fields
(`display_name`).
- `PATCH /account/settings` — update `preferred_language`, `time_zone`.
- `POST /account/delete` — soft delete; cascade is in process.
- `GET /lobby/games` — public list with paging.
- `POST /lobby/games` — create.
- `GET /lobby/games/{game_id}`.
- `PATCH /lobby/games/{game_id}`.
- `POST /lobby/games/{game_id}/open-enrollment`.
- `POST /lobby/games/{game_id}/ready-to-start`.
- `POST /lobby/games/{game_id}/start`.
- `POST /lobby/games/{game_id}/pause`.
- `POST /lobby/games/{game_id}/resume`.
- `POST /lobby/games/{game_id}/cancel`.
- `POST /lobby/games/{game_id}/retry-start`.
- `POST /lobby/games/{game_id}/applications`.
- `POST /lobby/games/{game_id}/applications/{application_id}/approve`.
- `POST /lobby/games/{game_id}/applications/{application_id}/reject`.
- `POST /lobby/games/{game_id}/invites`.
- `POST /lobby/games/{game_id}/invites/{invite_id}/redeem`.
- `POST /lobby/games/{game_id}/invites/{invite_id}/decline`.
- `POST /lobby/games/{game_id}/invites/{invite_id}/revoke`.
- `GET /lobby/games/{game_id}/memberships`.
- `POST /lobby/games/{game_id}/memberships/{membership_id}/remove`.
- `POST /lobby/games/{game_id}/memberships/{membership_id}/block`.
- `GET /lobby/my/games`.
- `GET /lobby/my/applications`.
- `GET /lobby/my/invites`.
- `GET /lobby/my/race-names`.
- `POST /lobby/race-names/register` — promote a `pending_registration`
to `registered` within the 30-day window.
- `POST /games/{game_id}/commands` — proxy to engine command path.
- `POST /games/{game_id}/orders` — proxy to engine order validation.
- `GET /games/{game_id}/reports/{turn}` — proxy to engine report path.
Admin (`/api/v1/admin/*`, all require Basic Auth):
- `GET /admin-accounts`, `POST /admin-accounts`,
`GET /admin-accounts/{username}`,
`POST /admin-accounts/{username}/disable`,
`POST /admin-accounts/{username}/enable`,
`POST /admin-accounts/{username}/reset-password`.
- `GET /users`, `GET /users/{user_id}`,
`POST /users/{user_id}/sanctions`,
`POST /users/{user_id}/limits`,
`POST /users/{user_id}/entitlements`,
`POST /users/{user_id}/soft-delete`.
- `GET /games`, `GET /games/{game_id}`,
`POST /games/{game_id}/force-start`,
`POST /games/{game_id}/force-stop`,
`POST /games/{game_id}/ban-member`.
- `GET /runtimes/{game_id}`,
`POST /runtimes/{game_id}/restart`,
`POST /runtimes/{game_id}/patch`,
`POST /runtimes/{game_id}/force-next-turn`,
`GET /engine-versions`, `POST /engine-versions`,
`PATCH /engine-versions/{id}`,
`POST /engine-versions/{id}/disable`.
- `GET /mail/deliveries`,
`GET /mail/deliveries/{delivery_id}`,
`GET /mail/deliveries/{delivery_id}/attempts`,
`POST /mail/deliveries/{delivery_id}/resend`,
`GET /mail/dead-letters`.
- `GET /notifications`, `GET /notifications/{notification_id}`,
`GET /notifications/dead-letters`,
`GET /notifications/malformed`.
- `GET /geo/users/{user_id}/countries` — counter listing.
Internal (gateway-only, `/api/v1/internal/*`):
- `GET /sessions/{device_session_id}` — gateway session lookup.
- `POST /sessions/{device_session_id}/revoke` — admin or self revoke
passthrough; backend emits `session_invalidation`.
- `POST /sessions/users/{user_id}/revoke-all`.
- `GET /users/{user_id}/account-internal` — server-to-server fetch
used by gateway flows that need account state alongside the session.
The internal group is on `/api/v1/internal/*`. The trust model treats
it as part of the user surface (no extra auth in MVP).
Critical files:
- `backend/openapi.yaml`
- `backend/internal/server/router.go`
- `backend/internal/server/middleware/{requestid,logging,metrics,panicrecovery,userid,basicauth}.go`
- `backend/internal/server/handlers_*.go`
- `backend/internal/server/contract_test.go`
- `backend/docs/api-contract.md`
Done criteria:
- `go test ./backend/internal/server/...` is green; the contract test
exercises every endpoint and validates against `openapi.yaml`.
- Every endpoint returns `501 Not Implemented` with the standard error
body.
- gin route table at startup matches the OpenAPI inventory exactly.
## ~~Stage 4~~ — Persistence layer
This stage was implemented and marked as done.
Goal: define every `backend` schema table, generate jet code, and make
the wiring of the persistence layer ready for the domain modules.
Actions:
1. Replace `backend/internal/postgres/migrations/00001_init.sql` with
the full DDL. The schema is `backend`. The expected tables and
their primary purposes:
Auth:
- `device_sessions(device_session_id uuid pk, user_id uuid not null,
client_public_key bytea not null, status text not null,
created_at, revoked_at, last_seen_at)` plus indexes on
`user_id` and `status`.
- `auth_challenges(challenge_id uuid pk, email text not null,
code_hash bytea not null, created_at, expires_at, consumed_at,
attempts int not null default 0)`. Index on `email`.
- `blocked_emails(email text pk, blocked_at, reason text)`.
User:
- `accounts(user_id uuid pk, email text unique not null,
user_name text unique not null, display_name text not null,
preferred_language text not null, time_zone text not null,
declared_country text, permanent_block bool not null default false,
created_at, updated_at, deleted_at)`.
- `entitlement_records(record_id uuid pk, user_id uuid not null,
tier text not null, source text not null, created_at)`.
- `entitlement_snapshots(user_id uuid pk, tier text not null,
max_registered_race_names int not null, taken_at timestamptz)`.
Updated on every entitlement change.
- `sanction_records`, `sanction_active`, `limit_records`,
`limit_active` — same shape as the previous `user` service had
(record + active rollup pattern).
Admin:
- `admin_accounts(username text pk, password_hash bytea not null,
created_at, last_used_at, disabled_at)`.
Lobby:
- `games(game_id uuid pk, owner_user_id uuid not null,
visibility text not null, status text not null, ...)` covering
enrollment state machine fields documented in
`ARCHITECTURE_deprecated.md` § Game Lobby.
- `applications(application_id uuid pk, game_id uuid not null,
applicant_user_id uuid not null, status text not null, ...)`.
- `invites(invite_id uuid pk, game_id uuid not null,
invited_user_id uuid, code text unique, status text, ...)`.
- `memberships(membership_id uuid pk, game_id uuid not null,
user_id uuid not null, race_name text not null, status text,
...)` plus `unique(game_id, user_id)`.
- `race_names(name text not null, canonical text not null,
status text not null, owner_user_id uuid, game_id uuid,
expires_at, registered_at, ...)` plus
`unique(canonical) where status in ('registered','reservation','pending_registration')`.
Runtime:
- `runtime_records(game_id uuid pk, current_container_id text,
status text not null, image_ref text, started_at, last_observed_at,
...)`.
- `engine_versions(version text pk, image_ref text not null,
enabled bool not null default true, created_at, ...)`.
- `player_mappings(game_id uuid not null, user_id uuid not null,
race_name text not null, engine_player_uuid uuid not null,
primary key(game_id, user_id))`.
- `runtime_operation_log(operation_id uuid pk, game_id uuid,
op text, status text, started_at, finished_at, error text)`.
- `runtime_health_snapshots(snapshot_id uuid pk, game_id uuid,
observed_at, payload jsonb)`.
Mail:
- `mail_deliveries(delivery_id uuid pk, template_id text not null,
idempotency_key text not null, status text not null,
attempts int not null default 0, next_attempt_at timestamptz,
payload_id uuid not null, created_at, ...)` plus
`unique(template_id, idempotency_key)`.
- `mail_recipients(recipient_id uuid pk, delivery_id uuid not null,
address text not null, kind text not null)`.
- `mail_attempts(attempt_id uuid pk, delivery_id uuid, attempt_no int,
started_at, finished_at, outcome text, error text)`.
- `mail_dead_letters(dead_letter_id uuid pk, delivery_id uuid,
archived_at, reason text)`.
- `mail_payloads(payload_id uuid pk, content_type text not null,
subject text, body bytea not null)`.
Notification:
- `notifications(notification_id uuid pk, kind text not null,
idempotency_key text not null, user_id uuid, payload jsonb,
created_at)` plus `unique(kind, idempotency_key)`.
- `notification_routes(route_id uuid pk, notification_id uuid,
channel text not null, status text not null, last_attempt_at,
...)`.
- `notification_dead_letters(dead_letter_id uuid pk, notification_id
uuid, archived_at, reason text)`.
- `notification_malformed_intents(id uuid pk, received_at, payload
jsonb, reason text)`.
Geo:
- `user_country_counters(user_id uuid not null, country text not null,
count bigint not null default 0, last_seen_at timestamptz,
primary key(user_id, country))`.
2. Add `created_at TIMESTAMPTZ DEFAULT now()` to every table; add
`updated_at` and `deleted_at` where the domain reasons in
`ARCHITECTURE_deprecated.md` apply. UTC normalisation is performed
in Go on read and write (the existing `pkg/postgres` helpers cover
this).
3. `backend/cmd/jetgen/main.go` — port the existing pattern from a
surviving reference (the previous services' `cmd/jetgen` is a good
template; adjust import paths to `galaxy/backend`). The tool spins
up a transient Postgres container, applies the embedded migrations,
and runs `jet -dsn=...` writing into `internal/postgres/jet/`.
4. `backend/Makefile` — fill in the `jet` target.
5. Run `make jet` and commit `internal/postgres/jet/`.
6. Add `backend/internal/postgres/jet/jet.go` — package doc and
`//go:generate` comment pointing to `cmd/jetgen`.
7. Sanity test in `backend/internal/postgres/migrations_test.go`:
spin up a Postgres testcontainer, apply migrations, assert that
the `backend` schema exists and that every expected table is
present.
Critical files:
- `backend/internal/postgres/migrations/00001_init.sql`
- `backend/internal/postgres/jet/**`
- `backend/cmd/jetgen/main.go`
- `backend/Makefile`
- `backend/internal/postgres/migrations_test.go`
Done criteria:
- `go test ./backend/internal/postgres/...` is green.
- `make jet` regenerates without diff.
- All tables listed above exist after a fresh migration.
## ~~Stage 5~~ — Domain implementation
Goal: implement domain modules in dependency order. After each substage
the backend is functional for the substage's slice of behaviour. The
contract tests from Stage 3 progressively flip from `501` to actual
responses as each substage replaces placeholders.
Substages run strictly in order. Each substage:
- Implements package code in `backend/internal/<domain>/`.
- Replaces the corresponding `501` handler bodies in
`backend/internal/server/handlers_*.go` with real logic that calls
the domain package.
- Adds focused unit and contract coverage for the substage's
endpoints.
- Wires the new package into `backend/cmd/backend/main.go`.
### ~~5.1~~ — auth
This substage was implemented and marked as done. See
[`docs/stage05_1-auth.md`](docs/stage05_1-auth.md) for the decisions
taken during implementation.
Behaviour:
- `POST /api/v1/public/auth/send-email-code` — generates a challenge,
hashes the code, persists in `auth_challenges`, calls
`mail.EnqueueLoginCode(email, code)`. Returns `{challenge_id}` for
every non-blocked email (existing user, new user, throttled — all
return identical shape; blocked email rejects with 400 only when the
block is permanent).
- `POST /api/v1/public/auth/confirm-email-code` — looks up the
challenge, verifies the code (constant-time), enforces attempt
ceiling, marks consumed, calls `user.EnsureByEmail(email,
preferred_language, time_zone)` to obtain the user_id, stores the
Ed25519 public key, creates a `device_session` row, populates the
in-memory cache, calls
`geo.SetDeclaredCountryAtRegistration(user_id, source_ip)`, and
returns `{device_session_id}`.
- `GET /api/v1/internal/sessions/{device_session_id}` — sync session
lookup for gateway.
- `POST /api/v1/internal/sessions/{device_session_id}/revoke` and
`POST /api/v1/internal/sessions/users/{user_id}/revoke-all` — mark
sessions revoked, evict from in-memory cache, emit
`session_invalidation` push event (Stage 6 wires the actual
emission; until then `auth` calls a no-op publisher injected at
wiring).
Cache: full session table read at startup; write-through on every
mutation.
### ~~5.2~~ — user
This substage was implemented and marked as done. See
[`docs/stage05_2-user.md`](docs/stage05_2-user.md) for the decisions
taken during implementation.
Behaviour:
- Account CRUD limited to allowed mutations on profile and settings.
- `EnsureByEmail` and `ResolveByEmail` for `auth`.
- Entitlement records and snapshots; tier downgrades never revoke
already-registered race names.
- Sanctions and limits using the record + active rollup pattern.
- Soft delete: writes `deleted_at` and triggers in-process cascade —
`lobby.OnUserDeleted(user_id)`, `notification.OnUserDeleted(user_id)`,
`geo.OnUserDeleted(user_id)`. Permanent block triggers
`lobby.OnUserBlocked(user_id)`.
- Cache: latest entitlement snapshot per user; warmed on startup;
write-through on entitlement mutation.
### ~~5.3~~ — admin
This substage was implemented and marked as done. See
[`docs/stage05_3-admin.md`](docs/stage05_3-admin.md) for the decisions
taken during implementation.
Behaviour:
- `admin_accounts` CRUD with bcrypt hashing.
- Bootstrap on startup via env vars (`BACKEND_ADMIN_BOOTSTRAP_USER`,
`BACKEND_ADMIN_BOOTSTRAP_PASSWORD`); idempotent.
- Replace the Stage 3 stub `basicauth` middleware with the real
Postgres-backed verifier. Constant-time comparison via bcrypt.
- Admin CRUD endpoints across users, games, runtime, mail,
notification, geo. Each admin endpoint delegates to the domain
package's admin-facing methods.
Cache: full admin table at startup; write-through on mutation.
### ~~5.4~~ — lobby
This substage was implemented and marked as done. See
[`docs/stage05_4-lobby.md`](docs/stage05_4-lobby.md) for the decisions
taken during implementation.
Behaviour:
- Games CRUD with the enrollment state machine.
- Applications and invites with their lifecycles.
- Memberships with race name binding.
- Race Name Directory: registered, reservation, and
pending_registration tiers; canonical key via `disciplinedware/go-confusables`;
uniqueness across all three tiers; capability promotion based on
`max_planets > initial AND max_population > initial` from the
runtime snapshot.
- Pending-registration sweeper: scheduled job, releases entries past
the 30-day window; uses `pkg/cronutil`. The same sweeper auto-closes
enrollment-expired games whose `approved_count >= min_players`.
- Hooks consumed from other modules:
- `OnUserBlocked(user_id)` — release all RND/applications/invites/
memberships in one transaction.
- `OnUserDeleted(user_id)` — same.
- `OnRuntimeSnapshot(snapshot)` — update denormalised runtime view
on the game (current_turn, status, per-member max stats).
- `OnGameFinished(game_id)` — drive race name promotion logic and
move game to `finished`.
Cache: active games and memberships, RND canonical set; warmed on
startup; write-through on mutation.
### ~~5.5~~ — runtime (with dockerclient and engineclient)
This substage was implemented and marked as done. See
[`docs/stage05_5-runtime.md`](docs/stage05_5-runtime.md) for the
decisions taken during implementation.
Behaviour:
- Engine version registry CRUD.
- `engineclient` is a thin `net/http` client over `pkg/model` types,
one method per engine endpoint listed in `README.md` §8.
- `dockerclient` wraps `github.com/docker/docker` for: pull, create,
start, stop, remove, inspect, list (filtered by the
`galaxy.backend=1` label), patch (semver-only, validated against
`engine_versions`).
- Per-game serialisation: a `sync.Map[game_id]*sync.Mutex` ensures
concurrent ops on the same game are sequential.
- Worker pool for long-running operations: started in Stage 5.5; jobs
enqueued on a buffered channel; bounded concurrency.
- `runtime_operation_log` records every op (start time, finish time,
outcome, error).
- Reconciliation: on startup and on a `pkg/cronutil` schedule, list
containers labelled `galaxy.backend=1`, match against
`runtime_records`, adopt unrecorded labelled containers, mark
recorded but missing as removed. Emit
`lobby.OnRuntimeJobResult` for each removed.
- Snapshot publication: after every successful engine read or a
health-probe transition, synthesise a snapshot and call
`lobby.OnRuntimeSnapshot(snapshot)` synchronously.
- Turn scheduler: `pkg/cronutil` schedule per running game; each tick
invokes the engine `admin/turn`, on success snapshots and publishes;
force-next-turn sets a one-shot skip flag stored in
`runtime_records`.
Cache: active runtime records, engine version registry; warmed on
startup; write-through on mutation.
### ~~5.6~~ — mail
This substage was implemented and marked as done. See
[`docs/stage05_6-mail.md`](docs/stage05_6-mail.md) for the decisions
taken during implementation.
Behaviour:
- Outbox tables defined in Stage 4.
- Worker goroutine: scans `mail_deliveries` with
`SELECT ... FOR UPDATE SKIP LOCKED` ordered by `next_attempt_at`,
attempts SMTP delivery via `wneessen/go-mail`, records in
`mail_attempts`, updates status, schedules backoff with jitter, or
dead-letters past the configured maximum attempts.
- Drain on startup: replays all `pending` and `retrying` rows.
- Public API for producers: `EnqueueLoginCode(email, code, ttl)`,
`EnqueueTemplate(template_id, recipient, payload, idempotency_key)`.
- Admin endpoints implemented: list, view, resend.
### ~~5.7~~ — notification
This substage was implemented and marked as done. See
[`docs/stage05_7-notification.md`](docs/stage05_7-notification.md) for
the decisions taken during implementation.
Behaviour:
- `Submit(intent)` — validate intent shape, enforce idempotency,
persist `notifications`, materialise `notification_routes`, fan out
to push (Stage 6 wires the actual push emission; until then a no-op
publisher) and email (`mail.EnqueueTemplate`).
- Each kind has a fixed channel set documented in `README.md` §10.
- Malformed intents go to `notification_malformed_intents` and never
block the producer.
- Dead-letter handling: a failed route past max attempts moves to
`notification_dead_letters`.
- Producers (lobby, runtime, geo, auth) are wired via direct function
calls.
### ~~5.8~~ — geo
This substage was implemented and marked as done. See
[`docs/stage05_8-geo.md`](docs/stage05_8-geo.md) for the decisions
taken during implementation.
Behaviour:
- Load GeoLite2 Country DB at startup from `BACKEND_GEOIP_DB_PATH`.
- `SetDeclaredCountryAtRegistration(user_id, ip)` — sync; lookup,
update `accounts.declared_country`. No-op on lookup error.
- `IncrementCounterAsync(user_id, ip)` — fire-and-forget goroutine;
upsert `user_country_counters` with `count = count + 1`,
`last_seen_at = now()`.
- Middleware on `/api/v1/user/*` extracts the source IP from
`X-Forwarded-For` (or `RemoteAddr`) and calls
`IncrementCounterAsync` after the handler returns successfully.
- `OnUserDeleted(user_id)` — delete the user's counter rows.
Critical files (Stage 5 as a whole):
- `backend/internal/auth/**`
- `backend/internal/user/**`
- `backend/internal/admin/**`
- `backend/internal/lobby/**`
- `backend/internal/runtime/**`
- `backend/internal/dockerclient/**`
- `backend/internal/engineclient/**`
- `backend/internal/mail/**`
- `backend/internal/notification/**`
- `backend/internal/geo/**`
- `backend/internal/server/handlers_*.go` (replacing 501 stubs)
- `backend/cmd/backend/main.go` (wiring expansion)
Done criteria:
- All Stage 3 contract tests pass against real responses.
- Each substage adds focused unit tests (`testify`, mocks where
external boundaries justify them).
- `go run ./backend/cmd/backend` boots, all caches warm, all workers
start.
## ~~Stage 6~~ — Push gRPC interface and gateway adaptation
Goal: stand up the bidirectional control channel between backend and
gateway. Backend pushes `client_event` and `session_invalidation`;
gateway opens the stream, signs and forwards client events, immediately
acts on session invalidations. Remove every Redis dependency from
gateway except anti-replay reservations.
### ~~6.1~~ — Backend push server
This substage was implemented and marked as done. See
[`docs/stage06_1-push.md`](docs/stage06_1-push.md) for the decisions
taken during implementation.
Actions:
1. Author `backend/proto/push/v1/push.proto` with
`service Push { rpc SubscribePush(GatewaySubscribeRequest) returns
(stream PushEvent); }` and the message types defined in
`README.md` §7. Include a `cursor` field (string).
2. `backend/buf.yaml`, `backend/buf.gen.yaml` mirroring the gateway
pattern; generate Go bindings into `backend/proto/push/v1/`.
3. `backend/internal/push/server.go` — gRPC service implementation:
- Maintains a connection registry keyed by gateway client id (the
`GatewaySubscribeRequest` provides one; if multiple gateway
instances connect, each gets its own queue).
- Holds an in-memory ring buffer keyed by cursor, with TTL equal to
`BACKEND_FRESHNESS_WINDOW`. Cursors past TTL are discarded.
- Resume: if the client's cursor is still in the buffer, replay
from there; otherwise replay nothing and start fresh.
- Backpressure: per-connection buffered channel; on overflow, drop
the oldest events for that connection and log.
4. Provide a publisher API consumed by `auth`, `lobby`, `notification`,
and `runtime`:
- `push.PublishClientEvent(user_id, device_session_id?, payload, kind)`.
- `push.PublishSessionInvalidation(device_session_id|user_id, reason)`.
### ~~6.2~~ — Gateway adaptation
This substage was implemented and marked as done. See
[`docs/stage06_2-gateway.md`](docs/stage06_2-gateway.md) for the
decisions taken during implementation.
Actions:
1. Remove `redisconn` usage for session projection and for the two
stream consumers. Keep `redisconn` only for anti-replay
reservations.
2. Remove `gateway/internal/config` env vars
`GATEWAY_SESSION_EVENTS_REDIS_STREAM` and
`GATEWAY_CLIENT_EVENTS_REDIS_STREAM`. Add
`GATEWAY_BACKEND_HTTP_URL` and `GATEWAY_BACKEND_GRPC_PUSH_URL`.
3. Add `gateway/internal/backendclient/` with:
- `RESTClient` — HTTP client for `/api/v1/internal/sessions/...` and
for forwarding public/user requests.
- `PushClient` — gRPC client to `SubscribePush` with reconnect
loop, exponential backoff with jitter, and cursor persistence in
process memory.
4. Replace gateway session validation with a sync REST call to
backend per request.
5. Replace gateway client-events Redis consumer with the
`SubscribePush` consumer. On `client_event`: sign envelope (Ed25519)
and deliver to the matching client subscription. On
`session_invalidation`: look up active subscriptions for the target
sessions, close them, and reject any in-flight authenticated
request bound to those sessions.
6. Anti-replay request_id reservations remain in Redis (unchanged).
7. Update gateway tests to use a mocked backend HTTP and gRPC server.
Critical files:
- `backend/proto/push/v1/push.proto`
- `backend/buf.yaml`, `backend/buf.gen.yaml`
- `backend/internal/push/server.go`,
`backend/internal/push/publisher.go`
- `gateway/internal/backendclient/*.go`
- `gateway/internal/config/config.go` (env var changes)
- `gateway/internal/handlers/*.go` (route forwarding to backend)
- `gateway/internal/auth/*.go` (session lookup → REST)
- `gateway/internal/eventfanout/*.go` (replace Redis consumer with
gRPC consumer; rename if helpful)
Done criteria:
- `go run ./backend/cmd/backend` and `go run ./gateway/cmd/gateway`
cooperate end-to-end with no Redis stream usage.
- A revocation through the admin surface causes immediate stream
closure on the affected client.
- Gateway anti-replay still rejects duplicates.
- gateway test suite green.
## ~~Stage 7~~ — Integration testing
This stage was implemented and marked as done. See
[`docs/stage07-integration.md`](docs/stage07-integration.md) for the
decisions taken during implementation, including the testenv layout,
the signed-envelope gRPC client, and the per-scenario coverage notes.
Goal: end-to-end coverage of the platform with real binaries and real
infrastructure where practical.
Actions:
1. Recreate the top-level `integration/` module, registered in
`go.work`. The module hosts black-box test suites that drive
`gateway` from outside and verify behaviour at the public boundary
(with `backend` and `game` running in containers).
2. Add testcontainers fixtures: Postgres, an SMTP capture server (for
example `axllent/mailpit`), the `galaxy/game` engine image, the
`galaxy/backend` image (built from this repo), and the
`galaxy/gateway` image. The Docker daemon used by testcontainers
is the same one backend will use to manage engines.
3. Add a synthetic GeoLite2 mmdb (use `pkg/geoip/test-data/`).
4. Cover scenarios:
- Registration flow: send-email-code → confirm-email-code →
`declared_country` populated from synthetic mmdb.
- User account fetch: `X-User-ID` path returns the expected
account; geo counter increments per request.
- Lobby flow: create game → invite → application → ready-to-start
→ start (engine container starts, healthz green, status read) →
command → force-next-turn → finish → race name promotion.
- Mail flow: trigger an email-bound notification → SMTP capture
receives it → admin resend works.
- Notification flow: lobby invite triggers a push event reaching
the test client's gateway subscription, plus an email captured
by SMTP.
- Admin flow: bootstrap admin authenticates; CRUD admin creates a
second admin; second admin disables the first.
- Soft delete flow: user soft-delete cascades; their RND entries,
memberships, applications, invites, geo counters are released
or removed.
- Session revocation: admin revokes a session → push
`session_invalidation` arrives at gateway → active subscription
closes; subsequent requests with that `device_session_id`
rejected by gateway.
- Anti-replay: same `request_id` replayed within freshness window
is rejected by gateway.
5. CI: run `go test ./integration/... -tags=integration` (or whichever
flag the team prefers). Tests requiring real Docker run only when
a Docker daemon is available; otherwise they skip with a clear
message.
Critical files:
- `integration/go.mod`
- `integration/auth_flow_test.go`
- `integration/lobby_flow_test.go`
- `integration/mail_flow_test.go`
- `integration/notification_flow_test.go`
- `integration/admin_flow_test.go`
- `integration/soft_delete_test.go`
- `integration/session_revoke_test.go`
- `integration/anti_replay_test.go`
- `integration/testenv/*.go` (shared fixtures)
Done criteria:
- `go test ./integration/...` runs the full suite.
- All listed scenarios pass green on a developer machine with Docker
available.
- Failures produce actionable diagnostics (logs from each component
attached to the test report).
## Stage acceptance and decision records
After each stage, the implementing engineer writes a short decision
record under `backend/docs/stage<NN>-<topic>.md` capturing any
non-trivial choice made during implementation that is not obvious from
the code or from this plan. Records that contradict this plan must be
brought to the architecture conversation before merge — the plan and
the architecture document are the agreed contract.
+472
View File
@@ -0,0 +1,472 @@
# backend
`backend` is the consolidated business service of the Galaxy platform. It
owns identity, sessions, lobby, game runtime, mail, notifications, geo
signals, and administration. It is reachable only from `gateway` over
the trusted network. See `../ARCHITECTURE.md` for the platform-level
context, security model, and decision rationale.
## 1. Purpose
A single Go binary that:
- Serves three HTTP route groups (`/api/v1/public/*`, `/api/v1/user/*`,
`/api/v1/admin/*`) plus health probes.
- Hosts a gRPC `SubscribePush` server consumed by `gateway`.
- Owns one Postgres schema (`backend`).
- Talks to the Docker daemon to run game engine containers.
- Talks to an SMTP relay to send mail through a durable outbox.
- Reads the GeoLite2 country database for source-IP country lookup.
This README describes how the binary is laid out, configured, and run.
The implementation specification lives in `PLAN.md`.
## 2. API Surfaces
| Prefix | Auth | Audience |
| ------------------ | ----------------------------------------------- | ------------------------------------- |
| `/api/v1/public/*` | none | Registration, code confirmation |
| `/api/v1/user/*` | `X-User-ID` injected by gateway | Authenticated end users |
| `/api/v1/admin/*` | HTTP Basic Auth against `admin_accounts` | Platform administrators |
| `/healthz` | none | Liveness probe |
| `/readyz` | none | Readiness probe |
The full contract is documented in `openapi.yaml` and validated at
runtime by the contract tests under `internal/server/`.
## 3. Module Layout
```text
backend/
├── cmd/
│ ├── backend/ # main.go: process entrypoint
│ └── jetgen/ # jet code generator runner
├── internal/
│ ├── admin/ # admin_accounts, Basic Auth verifier, admin operations
│ ├── auth/ # email-code challenges, device sessions, Ed25519 keys
│ ├── config/ # env-var loader, Validate
│ ├── dockerclient/ # docker/docker wrapper for container ops
│ ├── engineclient/ # net/http client to galaxy-game containers
│ ├── geo/ # geoip lookup, declared_country, per-user counters
│ ├── lobby/ # games, applications, invites, memberships, RND
│ ├── mail/ # outbox worker, SMTP delivery, dead letters
│ ├── notification/ # intent normalisation, push + email fan-out
│ ├── postgres/ # pgx pool, embedded migrations, jet/
│ ├── push/ # gRPC SubscribePush server
│ ├── runtime/ # engine version registry, container lifecycle, scheduler
│ ├── server/ # gin engine, route groups, middleware, handlers
│ ├── telemetry/ # otel runtime, zap factory
│ └── user/ # accounts, settings, entitlements, sanctions, soft delete
├── proto/
│ └── push/v1/ # push.proto and generated gRPC code
├── docs/ # per-stage decision records (one file per decision)
├── openapi.yaml # full REST contract (public + user + admin)
├── go.mod
├── Makefile # `make jet` regenerates jet code
└── README.md
```
## 4. Configuration
All configuration is environment-based; there are no flags or files.
`Validate()` is called once at startup; missing required values fail
fast.
| Variable | Required | Default | Purpose |
| --------------------------------------- | -------- | ------------------------ | --------------------------------------------------- |
| `BACKEND_HTTP_LISTEN_ADDR` | no | `:8080` | HTTP listener for REST surfaces and probes. |
| `BACKEND_HTTP_READ_TIMEOUT` | no | `30s` | HTTP read timeout. |
| `BACKEND_HTTP_WRITE_TIMEOUT` | no | `30s` | HTTP write timeout. |
| `BACKEND_HTTP_SHUTDOWN_TIMEOUT` | no | `15s` | Graceful shutdown budget for HTTP server. |
| `BACKEND_SHUTDOWN_TIMEOUT` | no | `30s` | Process-wide cap applied to each component shutdown. |
| `BACKEND_GRPC_PUSH_LISTEN_ADDR` | no | `:8081` | gRPC listener for the push interface. |
| `BACKEND_GRPC_PUSH_SHUTDOWN_TIMEOUT` | no | `10s` | Graceful shutdown budget for the gRPC server. |
| `BACKEND_LOGGING_LEVEL` | no | `info` | zap log level. |
| `BACKEND_POSTGRES_DSN` | yes | — | pgx-style Postgres DSN. Must include `search_path=backend` so unqualified reads and writes resolve to the service-owned schema. |
| `BACKEND_POSTGRES_MAX_CONNS` | no | `25` | Pool max connections. |
| `BACKEND_POSTGRES_MIN_CONNS` | no | `2` | Pool min connections. |
| `BACKEND_POSTGRES_OPERATION_TIMEOUT` | no | `5s` | Default per-statement timeout. |
| `BACKEND_SMTP_HOST` | yes | — | SMTP relay host. |
| `BACKEND_SMTP_PORT` | no | `587` | SMTP relay port. |
| `BACKEND_SMTP_USERNAME` | no | — | SMTP auth username (omit for anonymous). |
| `BACKEND_SMTP_PASSWORD` | no | — | SMTP auth password. |
| `BACKEND_SMTP_FROM` | yes | — | RFC-5321 From address. |
| `BACKEND_SMTP_TLS_MODE` | no | `starttls` | `none`, `starttls`, or `tls`. |
| `BACKEND_MAIL_WORKER_INTERVAL` | no | `2s` | How often the outbox worker scans for new work. |
| `BACKEND_MAIL_MAX_ATTEMPTS` | no | `8` | Maximum delivery attempts before dead-lettering. |
| `BACKEND_DOCKER_HOST` | no | `unix:///var/run/docker.sock` | Docker daemon endpoint. |
| `BACKEND_DOCKER_NETWORK` | yes | — | User-defined Docker bridge network for engines. |
| `BACKEND_GAME_STATE_ROOT` | yes | — | Host directory bind-mounted into engine containers. |
| `BACKEND_ADMIN_BOOTSTRAP_USER` | no | — | Initial admin username; idempotent insert. |
| `BACKEND_ADMIN_BOOTSTRAP_PASSWORD` | no | — | Initial admin password; required if user is set. |
| `BACKEND_GEOIP_DB_PATH` | yes | — | Filesystem path to GeoLite2 Country `.mmdb`. |
| `BACKEND_OTEL_TRACES_EXPORTER` | no | `otlp` | `none`, `otlp`, `stdout`. |
| `BACKEND_OTEL_METRICS_EXPORTER` | no | `otlp` | `none`, `otlp`, `stdout`, `prometheus`. |
| `BACKEND_OTEL_PROTOCOL` | no | `grpc` | `grpc` or `http/protobuf`. OTLP only. |
| `BACKEND_OTEL_ENDPOINT` | no | provider default | OTLP endpoint URL. |
| `BACKEND_OTEL_PROMETHEUS_LISTEN_ADDR` | no | `:9100` | When `BACKEND_OTEL_METRICS_EXPORTER=prometheus`. |
| `BACKEND_SERVICE_NAME` | no | `galaxy-backend` | Resource attribute for telemetry. |
| `BACKEND_FRESHNESS_WINDOW` | no | `5m` | Mirrors gateway freshness window for push cursor TTL. |
| `BACKEND_AUTH_CHALLENGE_TTL` | no | `10m` | Lifetime of an issued `auth_challenges` row. |
| `BACKEND_AUTH_CHALLENGE_MAX_ATTEMPTS` | no | `5` | Maximum confirm-email-code attempts per challenge. |
| `BACKEND_AUTH_CHALLENGE_THROTTLE_WINDOW`| no | `60s` | Rolling window over which challenges are counted toward throttle. |
| `BACKEND_AUTH_CHALLENGE_THROTTLE_MAX` | no | `3` | Max un-consumed, non-expired challenges per email per window before reuse kicks in. |
| `BACKEND_AUTH_USERNAME_MAX_RETRIES` | no | `10` | Retry budget for synthesising a unique placeholder `accounts.user_name` at registration. |
| `BACKEND_LOBBY_SWEEPER_INTERVAL` | no | `60s` | How often the lobby sweeper releases expired pending_registrations and auto-closes enrollment-expired games. |
| `BACKEND_LOBBY_PENDING_REGISTRATION_TTL`| no | `720h` (30 days) | Lifetime of a `pending_registration` Race Name Directory entry awaiting promotion. |
| `BACKEND_LOBBY_INVITE_DEFAULT_TTL` | no | `168h` (7 days) | Default expiry applied to invites whose request body omits `expires_at`. |
| `BACKEND_ENGINE_CALL_TIMEOUT` | no | `60s` | Per-call timeout for engine writes (init, turn, banish, command, order). |
| `BACKEND_ENGINE_PROBE_TIMEOUT` | no | `5s` | Per-call timeout for engine reads (status, report, healthz). |
| `BACKEND_RUNTIME_WORKER_POOL_SIZE` | no | `4` | Long-running runtime job concurrency. |
| `BACKEND_RUNTIME_JOB_QUEUE_SIZE` | no | `64` | Buffered runtime-job channel depth. |
| `BACKEND_RUNTIME_RECONCILE_INTERVAL` | no | `60s` | Interval between reconciler passes against the Docker daemon. |
| `BACKEND_RUNTIME_IMAGE_PULL_POLICY` | no | `if_missing` | Engine image pull policy: `if_missing`, `always`, `never`. |
| `BACKEND_RUNTIME_CONTAINER_LOG_DRIVER` | no | `json-file` | Docker log driver applied to engine containers. |
| `BACKEND_RUNTIME_CONTAINER_LOG_OPTS` | no | — | Comma-separated `key=value` pairs forwarded to the log driver. |
| `BACKEND_RUNTIME_CONTAINER_CPU_QUOTA` | no | `2.0` | Engine container `--cpus`. |
| `BACKEND_RUNTIME_CONTAINER_MEMORY` | no | `512m` | Engine container `--memory`. |
| `BACKEND_RUNTIME_CONTAINER_PIDS_LIMIT` | no | `256` | Engine container `--pids-limit`. |
| `BACKEND_RUNTIME_CONTAINER_STATE_MOUNT` | no | `/var/lib/galaxy-game` | Absolute in-container path for the per-game state bind mount. |
| `BACKEND_RUNTIME_STOP_GRACE_PERIOD` | no | `10s` | SIGTERM-to-SIGKILL grace period for engine container stop. |
| `BACKEND_NOTIFICATION_ADMIN_EMAIL` | no | — | Recipient address for admin-channel notifications (`runtime.*` kinds). When empty, admin-channel routes are recorded as `skipped` and the catalog is partially silenced. |
| `BACKEND_NOTIFICATION_WORKER_INTERVAL` | no | `5s` | Notification route worker scan interval. |
| `BACKEND_NOTIFICATION_MAX_ATTEMPTS` | no | `8` | Notification route delivery attempts before dead-lettering. |
If `BACKEND_ADMIN_BOOTSTRAP_USER` is set without
`BACKEND_ADMIN_BOOTSTRAP_PASSWORD`, `Validate()` fails. If neither is
set, no bootstrap insert happens and operators are expected to have
seeded `admin_accounts` ahead of time.
## 5. Persistence
- One Postgres database, schema `backend`. The role used by `backend`
must own the schema (or be granted `CREATE` on it for migrations).
- Migrations live in `internal/postgres/migrations/`, are embedded into
the binary via `embed.FS`, and are applied with `pressly/goose/v3`
before the HTTP listener opens. The startup path also issues a
`CREATE SCHEMA IF NOT EXISTS backend` so a fresh database does not
trip goose's bookkeeping table on the first migration.
- Pre-production uses one migration file (`00001_init.sql`) covering
every backend domain (auth, user, admin, lobby, runtime, mail,
notification, geo). Future migrations are sequence-numbered and
additive.
- Queries are written through `go-jet/jet/v2`. The generated code is in
`internal/postgres/jet/backend/` and is committed; `internal/postgres/jet/jet.go`
carries package metadata that survives regeneration.
- `make jet` regenerates the jet code: it spins up a transient Postgres
container, applies the migrations, runs `cmd/jetgen`, and writes the
output back into `internal/postgres/jet/backend/`. Goose's
bookkeeping table is dropped before generation so it does not leak
into the generated package.
- `BACKEND_POSTGRES_DSN` must include `search_path=backend`; the runtime
pool relies on this so unqualified reads and writes resolve to the
service-owned schema.
Idempotency is enforced through UNIQUE indexes on durable tables; there
is no separate idempotency-key table. Worker pickup uses `SELECT ...
FOR UPDATE SKIP LOCKED` ordered by `next_attempt_at`.
## 6. In-Memory Cache
`backend` warms the following caches at startup before the HTTP listener
opens:
- Active device sessions (lookup by `device_session_id`).
- User entitlement snapshots (lookup by `user_id`).
- Engine version registry (lookup by version label, populated by `internal/runtime`).
- Active runtime records (lookup by `game_id`, populated by `internal/runtime`).
- Active games and their memberships.
- Race Name Directory canonical keys.
- Admin accounts.
Each cache is updated write-through in the same domain transaction
that touches Postgres. Caches are bounded to MVP-scale data sets; if any
cache grows beyond the budget, the architecture document mandates a
discussion before moving the cache out of process.
## 7. gRPC Push Interface
The push interface is the only gRPC server hosted by `backend`. The
contract is in `proto/push/v1/push.proto`:
```proto
service Push {
rpc SubscribePush(GatewaySubscribeRequest) returns (stream PushEvent);
}
message PushEvent {
oneof kind {
ClientEvent client_event = 1;
SessionInvalidation session_invalidation = 2;
}
string cursor = 3;
}
```
- `ClientEvent` carries an opaque payload addressed to a `(user_id [,
device_session_id])`. Gateway signs and forwards it to active client
subscriptions. The frame also carries `event_id`, `request_id`, and
`trace_id` correlation strings populated by backend producers
(notification dispatcher fills `event_id` from `route_id`,
`request_id` from the originating intent's `idempotency_key`, and
`trace_id` from the active span); gateway re-emits the values inside
the signed client envelope without re-interpreting them.
- `SessionInvalidation` instructs gateway to close active subscriptions
and reject in-flight requests for the affected sessions.
- `cursor` is a monotonically increasing string. Gateway stores the last
consumed cursor and uses it on reconnect. The format is opaque to
gateway; backend only guarantees lexicographic monotonicity within a
process lifetime, and resets the sequence after a restart.
- Backend keeps an in-memory ring buffer of recent events with a TTL of
`BACKEND_FRESHNESS_WINDOW`. Cursors that have aged out resume from a
fresh point.
- A gateway reconnect with the same `gateway_client_id` replaces the
previous subscription (`codes.Aborted` is returned to the older
stream). Distinct ids fan out as separate broadcast targets.
- Cursor format is a zero-padded decimal `uint64` string emitted by an
in-process counter; gateway treats it as opaque.
- Ring buffer eviction is by TTL plus a fixed capacity ceiling.
Backpressure is per-connection drop-oldest: if the buffered channel
for a subscriber overflows, the oldest event for that connection is
discarded and the loss is logged so operators can correlate the gap
on the gateway side.
## 8. Engine Client
`internal/engineclient` is a thin `net/http`-based client that targets
running engine containers at `http://galaxy-game-{game_id}:8080`. It
uses the DTOs in `pkg/model/{order,report,rest}` directly; it does not
introduce its own request/response types.
Endpoints used:
- `POST /api/v1/admin/init`
- `GET /api/v1/admin/status`
- `PUT /api/v1/admin/turn`
- `POST /api/v1/admin/race/banish`
- `PUT /api/v1/command`
- `PUT /api/v1/order`
- `GET /api/v1/report`
- `GET /healthz`
Engine-version arbitration lives in `internal/runtime`. Patch updates
are semver-patch-only inside the same major/minor line; major or minor
changes require explicit stop and start. Reconciliation adopts
unrecorded containers tagged with the `galaxy.backend=1` label and
marks recorded containers that are missing as removed.
## 9. Mail Outbox
Tables in schema `backend`:
- `mail_deliveries` — one row per logical delivery, keyed by
`(template_id, idempotency_key)`.
- `mail_recipients` — `(delivery_id, address)`.
- `mail_attempts` — append-only attempt log.
- `mail_dead_letters` — terminal failure mirror with the latest payload
pointer for forensics and resend.
- `mail_payloads` — opaque rendered payload bytes.
Lifecycle:
1. Producer writes the delivery and payload rows in one transaction.
2. The worker picks the row with `SELECT ... FOR UPDATE SKIP LOCKED`,
sends through SMTP using `wneessen/go-mail`, records the attempt,
and either marks `sent` or schedules `next_attempt_at` with
exponential backoff and jitter.
3. After `BACKEND_MAIL_MAX_ATTEMPTS` the delivery moves to
`mail_dead_letters`. An admin notification intent is emitted.
4. Operators can resend a `pending`, `retrying`, or `dead_lettered`
delivery via `POST /api/v1/admin/mail/{delivery_id}/resend`. Resend
on a `sent` delivery returns `409 Conflict` so operators cannot
accidentally redeliver an email that already left the relay.
On startup the worker drains every row in `pending` or `retrying`
state. There is no separate recovery flow.
`mail_attempts.attempt_no` is monotonic across the entire history of a
single `delivery_id` — a resend keeps the previous attempts and appends
new ones rather than restarting the counter. `EnqueueLoginCode` uses a
server-side UUID as `idempotency_key` so callers cannot collide; other
template producers (notification routes, future direct callers) supply
a stable key, and the UNIQUE on `(template_id, idempotency_key)`
prevents duplicate delivery rows.
## 10. Notification Catalog
The catalog is the closed set of `notification_kind` values understood
by `internal/notification`. Each kind specifies the channels it fans
out to and the payload fields used by templates and clients. The
`auth.login_code` row is delivered directly through the mail outbox
from `internal/auth` and is not materialised inside
`notification_routes` — the auth flow needs the delivery row to commit
synchronously with the challenge, which the notification dispatcher
cannot guarantee.
| Kind | Channels | Payload essentials |
| ----------------------------------- | ------------- | -------------------------------------------------------- |
| `auth.login_code` *(direct mail)* | email | `code`, `ttl` |
| `lobby.invite.received` | push, email | `game_id`, `inviter_user_id` |
| `lobby.invite.revoked` | push | `game_id` |
| `lobby.application.submitted` | push | `game_id`, `application_id` |
| `lobby.application.approved` | push, email | `game_id` |
| `lobby.application.rejected` | push, email | `game_id` |
| `lobby.membership.removed` | push, email | `game_id`, `reason` |
| `lobby.membership.blocked` | push, email | `game_id` |
| `lobby.race_name.registered` | push | `race_name` |
| `lobby.race_name.pending` | push, email | `race_name`, `expires_at` |
| `lobby.race_name.expired` | push | `race_name` |
| `runtime.image_pull_failed` | admin email | `game_id`, `image_ref` |
| `runtime.container_start_failed` | admin email | `game_id` |
| `runtime.start_config_invalid` | admin email | `game_id`, `reason` |
Admin-channel kinds (`runtime.*`) deliver email to
`BACKEND_NOTIFICATION_ADMIN_EMAIL`; when the variable is empty, those
routes land in `notification_routes` with `status='skipped'` and the
operator log line records the configuration miss.
`game.*` (`game.started`, `game.turn.ready`, `game.generation.failed`,
`game.finished`) and `mail.dead_lettered` are reserved kinds without a
producer in the catalog; adding them is an additive change to the
catalog vocabulary and the migration CHECK constraint.
Templates ship in English only; localisation belongs to clients that
render the push payload, not to the backend mail body. Per-route mail
idempotency uses the `route_id` UUID as `idempotency_key`, so retried
notifications and partial failures cannot fan out a duplicate email.
## 11. Geo Profile
`internal/geo` operates on the GeoLite2 Country database loaded from
`BACKEND_GEOIP_DB_PATH` at startup.
- `SetDeclaredCountryAtRegistration(user_id, ip)` is called from
`auth.confirmEmailCode`. It looks up the country and writes it to
`accounts.declared_country`. The value is never updated after.
- `IncrementCounterAsync(user_id, ip)` is called from the user-surface
middleware. It launches a goroutine that looks up the country and
upserts `(user_id, country, count)` in `user_country_counters`. The
caller does not block.
- Lookup errors are logged and ignored; geo work never blocks the user.
There is no aggregation, no automatic flagging, no version history of
declared country, no admin-side review workflow. Counter rows are
exposed to operators via the admin surface for manual inspection only.
## 12. Admin Surface
- HTTP Basic Auth credentials are checked against `admin_accounts`
(Postgres). Passwords are hashed with bcrypt cost 12.
- Bootstrap on startup: if `BACKEND_ADMIN_BOOTSTRAP_USER` is configured
and no row with that username exists, insert one with the hashed
bootstrap password. The insert is idempotent.
- Admin endpoints are grouped by domain:
- `POST/GET /api/v1/admin/admin-accounts/*` — manage admins.
- `GET/POST /api/v1/admin/users/*` — list, lookup, sanction, limit, soft delete.
- `GET/POST /api/v1/admin/games/*` — list, create (public-game), inspect, force start/stop, ban member.
- `GET/POST /api/v1/admin/runtimes/*` — inspect runtime, restart, patch.
- `GET/POST /api/v1/admin/mail/*` — list deliveries, resend, view attempts.
- `GET /api/v1/admin/notifications/*` — inspect notifications and dead letters.
- Failed Basic Auth returns `401` with `WWW-Authenticate: Basic realm="galaxy-admin"`.
## 13. Local Run
Prerequisites:
- Go toolchain matching `go.work`.
- Postgres reachable via `BACKEND_POSTGRES_DSN` (a local container is
fine).
- An SMTP server (`mailhog`, `mailpit`, or any other dev relay) reachable
via `BACKEND_SMTP_HOST`/`BACKEND_SMTP_PORT`.
- Docker daemon reachable via `BACKEND_DOCKER_HOST` (the local socket is
the default; running engines through this requires the user-defined
bridge named in `BACKEND_DOCKER_NETWORK`).
- A GeoLite2 Country `.mmdb` file at `BACKEND_GEOIP_DB_PATH`. For tests,
use the synthetic mmdb generator under `pkg/geoip/test-data/`.
Run:
```bash
go run ./backend/cmd/backend
```
Migrations are embedded and applied at startup. Bootstrapping the first
admin happens on the first run if the env vars are set. Subsequent
restarts are idempotent.
## 14. Testing
Three levels:
- **Unit tests** colocated with the implementation (`*_test.go` next to
the file under test). Use `testify` for assertions, `go.uber.org/mock`
for interface mocking when an external boundary justifies it.
- **Contract tests** under `internal/server/`. Validate every request
and response against `openapi.yaml` at runtime via `kin-openapi`. New
endpoints must be added to `openapi.yaml` first; the contract test
fails until the implementation matches.
- **Integration tests** under `../integration/` (top-level repo
module). Use `testcontainers-go` for Postgres and optionally for an
SMTP capture container. Cover the user flows end to end through the
real backend binary.
`make test` runs unit and contract tests. `make integration-test` runs
the integration suite (requires Docker).
## 15. Telemetry
Required minimum signals:
- `http_requests_total{group, method, path, status}` and
`http_request_duration_seconds{...}` for each route group.
- `grpc_push_subscribers` (gauge), `grpc_push_events_total{kind}`,
`grpc_push_dropped_total{gateway_client_id}`.
- `mail_outbox_depth{state}` (gauge), `mail_attempts_total{outcome}`,
`mail_dead_letters_total`.
- `notification_intents_total{kind, outcome}`,
`notification_routes_total{channel}`.
- `runtime_container_ops_total{op, outcome}`,
`runtime_health_probes_total{outcome}`.
- `geo_lookups_total{outcome}`.
- `db_pool_acquires_total`, `db_pool_in_use{...}`, `db_pool_waits_total`.
Tracing covers HTTP request → domain operation → Postgres calls →
external client calls (SMTP, Docker, engine). Every span is linked to
the request id.
Logs are JSON, written to stdout, with `otel_trace_id` and
`otel_span_id` injected when a span context is available. The minimum
fields are `ts`, `level`, `caller`, `service`, `msg`, plus per-call
context.
## 16. Operational Notes
- Graceful shutdown drains in this order on SIGTERM/SIGINT: stop
accepting new HTTP and gRPC traffic → wait for in-flight requests
(bounded by `BACKEND_HTTP_SHUTDOWN_TIMEOUT` and the gRPC counterpart)
→ flush mail outbox writes that have already started → drain push
events to gateway → close the Docker client → close the Postgres pool.
- `/healthz` returns 200 unconditionally as long as the process is
alive.
- `/readyz` checks: Postgres reachable, migrations applied, gRPC
listener bound. Returns 503 until all hold.
- Logs are JSON to stdout. Crash dumps go to stderr.
- Configuration changes require a restart; there is no live reload.
- Bootstrap admin password should be rotated through the admin surface
immediately after the first deploy.
## 17. Service Documentation
Extended service-local documentation lives in [`docs/`](docs/):
- [Documentation index](docs/README.md)
- [Runtime and components](docs/runtime.md)
- [Domain and protocol flows](docs/flows.md)
- [Operator runbook](docs/runbook.md)
- [Configuration and OpenAPI examples](docs/examples.md)
Primary references:
- [`PLAN.md`](PLAN.md) — historical staged build-up of the service.
- [`openapi.yaml`](openapi.yaml) — REST contract.
- [`../ARCHITECTURE.md`](../ARCHITECTURE.md) — workspace-level architecture.
+11
View File
@@ -0,0 +1,11 @@
version: v2
plugins:
- remote: buf.build/protocolbuffers/go:v1.36.11
out: proto
opt:
- paths=source_relative
- remote: buf.build/grpc/go:v1.6.1
out: proto
opt:
- paths=source_relative
+12
View File
@@ -0,0 +1,12 @@
version: v2
modules:
- path: proto
lint:
use:
- STANDARD
breaking:
use:
- FILE
+544
View File
@@ -0,0 +1,544 @@
// Command backend boots the Galaxy backend process. It loads configuration,
// initialises telemetry and the structured logger, opens the Postgres pool,
// applies embedded migrations, and runs the HTTP, gRPC push, and (optional)
// Prometheus metrics listeners until SIGINT or SIGTERM triggers an orderly
// shutdown.
package main
import (
"context"
"errors"
"fmt"
"os"
"os/signal"
"syscall"
"galaxy/backend/internal/admin"
"galaxy/backend/internal/app"
"galaxy/backend/internal/auth"
"galaxy/backend/internal/config"
"galaxy/backend/internal/dockerclient"
"galaxy/backend/internal/engineclient"
"galaxy/backend/internal/geo"
"galaxy/backend/internal/lobby"
"galaxy/backend/internal/logging"
"galaxy/backend/internal/mail"
"galaxy/backend/internal/metricsapi"
"galaxy/backend/internal/notification"
backendpostgres "galaxy/backend/internal/postgres"
"galaxy/backend/push"
"galaxy/backend/internal/runtime"
backendserver "galaxy/backend/internal/server"
"galaxy/backend/internal/telemetry"
"galaxy/backend/internal/user"
mobyclient "github.com/moby/moby/client"
"github.com/google/uuid"
"go.uber.org/zap"
)
func main() {
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer cancel()
if err := run(ctx); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
func run(ctx context.Context) (err error) {
cfg, err := config.LoadFromEnv()
if err != nil {
return fmt.Errorf("load backend config: %w", err)
}
logger, err := logging.New(cfg.Logging)
if err != nil {
return fmt.Errorf("build backend logger: %w", err)
}
defer func() {
err = errors.Join(err, logging.Sync(logger))
}()
telemetryRT, err := telemetry.New(ctx, logger, cfg.Telemetry)
if err != nil {
return fmt.Errorf("build backend telemetry: %w", err)
}
defer func() {
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout)
defer shutdownCancel()
err = errors.Join(err, telemetryRT.Shutdown(shutdownCtx))
}()
db, err := backendpostgres.Open(ctx, cfg.Postgres, telemetryRT)
if err != nil {
return fmt.Errorf("open backend postgres pool: %w", err)
}
defer func() {
err = errors.Join(err, db.Close())
}()
if err := backendpostgres.ApplyMigrations(ctx, db); err != nil {
return fmt.Errorf("apply backend migrations: %w", err)
}
pushSvc, err := push.NewService(push.ServiceConfig{FreshnessWindow: cfg.FreshnessWindow}, logger, telemetryRT)
if err != nil {
return fmt.Errorf("build backend push service: %w", err)
}
geoSvc, err := geo.NewService(cfg.GeoIP.DBPath, db)
if err != nil {
return fmt.Errorf("build backend geo service: %w", err)
}
geoSvc.SetLogger(logger)
defer func() {
// Drain pending counter goroutines first so their upserts run
// against a still-open Postgres pool, then release the
// GeoLite2 resolver. Drain is bounded by cfg.ShutdownTimeout
// so a stuck DB cannot indefinitely delay process exit.
drainCtx, drainCancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout)
defer drainCancel()
geoSvc.Drain(drainCtx)
err = errors.Join(err, geoSvc.Close())
}()
userStore := user.NewStore(db)
userCache := user.NewCache()
authStore := auth.NewStore(db)
authCache := auth.NewCache()
if err := authCache.Warm(ctx, authStore); err != nil {
return fmt.Errorf("warm backend auth cache: %w", err)
}
logger.Info("auth cache warmed", zap.Int("active_sessions", authCache.Size()))
// auth.Service depends on user.Service through SessionRevoker, but
// user.Service depends on auth.Service through the lobby cascade
// path. Each cyclic dependency is resolved with a tiny adapter
// struct whose inner pointer is patched once both services exist.
revoker := &authSessionRevoker{}
lobbyCascade := &lobbyCascadeAdapter{}
userNotifyCascade := &userNotificationCascadeAdapter{}
lobbyNotifyPublisher := &lobbyNotificationPublisherAdapter{}
runtimeNotifyPublisher := &runtimeNotificationPublisherAdapter{}
userSvc := user.NewService(user.Deps{
Store: userStore,
Cache: userCache,
Lobby: lobbyCascade,
Notification: userNotifyCascade,
Geo: geoSvc,
SessionRevoker: revoker,
UserNameMaxRetries: cfg.Auth.UserNameMaxRetries,
Logger: logger,
})
if err := userCache.Warm(ctx, userStore); err != nil {
return fmt.Errorf("warm backend user entitlement cache: %w", err)
}
logger.Info("user entitlement cache warmed", zap.Int("snapshots", userCache.Size()))
mailStore := mail.NewStore(db)
mailSender, err := mail.NewSMTPSender(cfg.SMTP, logger)
if err != nil {
return fmt.Errorf("build mail smtp sender: %w", err)
}
mailSvc := mail.NewService(mail.Deps{
Store: mailStore,
SMTP: mailSender,
Admin: mail.NewNoopAdminNotifier(logger),
Config: cfg.Mail,
Logger: logger,
})
authSvc := auth.NewService(auth.Deps{
Store: authStore,
Cache: authCache,
User: userSvc,
Geo: geoSvc,
Mail: mailSvc,
Push: pushSvc,
Config: cfg.Auth,
Logger: logger,
})
revoker.svc = authSvc
adminStore := admin.NewStore(db)
adminCache := admin.NewCache()
if err := admin.Bootstrap(ctx, adminStore, cfg.Admin, logger); err != nil {
return fmt.Errorf("admin bootstrap: %w", err)
}
adminSvc := admin.NewService(admin.Deps{
Store: adminStore,
Cache: adminCache,
Logger: logger,
})
if err := adminCache.Warm(ctx, adminStore); err != nil {
return fmt.Errorf("warm backend admin cache: %w", err)
}
logger.Info("admin cache warmed", zap.Int("admins", adminCache.Size()))
runtimeGateway := &runtimeGatewayAdapter{}
lobbyStore := lobby.NewStore(db)
lobbyCache := lobby.NewCache()
lobbySvc, err := lobby.NewService(lobby.Deps{
Store: lobbyStore,
Cache: lobbyCache,
Runtime: runtimeGateway,
Notification: lobbyNotifyPublisher,
Entitlement: &userEntitlementAdapter{svc: userSvc},
Config: cfg.Lobby,
Logger: logger,
})
if err != nil {
return fmt.Errorf("build backend lobby service: %w", err)
}
if err := lobbyCache.Warm(ctx, lobbyStore); err != nil {
return fmt.Errorf("warm backend lobby cache: %w", err)
}
games, members, raceNames := lobbyCache.Sizes()
logger.Info("lobby cache warmed",
zap.Int("games", games),
zap.Int("memberships", members),
zap.Int("race_names", raceNames),
)
lobbyCascade.svc = lobbySvc
dockerCli, err := mobyclient.New(mobyclient.WithHost(cfg.Docker.Host))
if err != nil {
return fmt.Errorf("build docker client: %w", err)
}
dockerAdapter, err := dockerclient.NewAdapter(dockerclient.AdapterConfig{Docker: dockerCli})
if err != nil {
return fmt.Errorf("build docker adapter: %w", err)
}
if err := dockerAdapter.EnsureNetwork(ctx, cfg.Docker.Network); err != nil {
return fmt.Errorf("docker network %q: %w", cfg.Docker.Network, err)
}
engineCli, err := engineclient.NewClient(engineclient.Config{
CallTimeout: cfg.Engine.CallTimeout,
ProbeTimeout: cfg.Engine.ProbeTimeout,
})
if err != nil {
return fmt.Errorf("build engine client: %w", err)
}
defer func() {
err = errors.Join(err, engineCli.Close())
}()
runtimeStore := runtime.NewStore(db)
runtimeCache := runtime.NewCache()
engineVersionSvc := runtime.NewEngineVersionService(runtimeStore, runtimeCache, nil)
runtimeSvc, err := runtime.NewService(runtime.Deps{
Store: runtimeStore,
Cache: runtimeCache,
EngineVersions: engineVersionSvc,
Docker: dockerAdapter,
Engine: engineCli,
Lobby: &lobbyConsumerAdapter{svc: lobbySvc},
Notification: runtimeNotifyPublisher,
DockerNetwork: cfg.Docker.Network,
HostStateRoot: cfg.Game.StateRoot,
Config: cfg.Runtime,
Logger: logger,
})
if err != nil {
return fmt.Errorf("build runtime service: %w", err)
}
if err := runtimeCache.Warm(ctx, runtimeStore); err != nil {
return fmt.Errorf("warm backend runtime cache: %w", err)
}
rtRecords, rtVersions := runtimeCache.Sizes()
logger.Info("runtime cache warmed",
zap.Int("active_runtimes", rtRecords),
zap.Int("engine_versions", rtVersions),
)
runtimeGateway.svc = runtimeSvc
notifStore := notification.NewStore(db)
notifSvc := notification.NewService(notification.Deps{
Store: notifStore,
Mail: mailSvc,
Push: pushSvc,
Accounts: userSvc,
Config: cfg.Notification,
Logger: logger,
})
userNotifyCascade.svc = notifSvc
lobbyNotifyPublisher.svc = notifSvc
runtimeNotifyPublisher.svc = notifSvc
if email := cfg.Notification.AdminEmail; email == "" {
logger.Info("notification admin email not configured (BACKEND_NOTIFICATION_ADMIN_EMAIL); admin-channel routes will be skipped")
} else {
logger.Info("notification admin email configured", zap.String("admin_email", email))
}
publicAuthHandlers := backendserver.NewPublicAuthHandlers(authSvc, logger)
internalSessionsHandlers := backendserver.NewInternalSessionsHandlers(authSvc, logger)
userAccountHandlers := backendserver.NewUserAccountHandlers(userSvc, logger)
adminUsersHandlers := backendserver.NewAdminUsersHandlers(userSvc, logger)
adminAdminAccountsHandlers := backendserver.NewAdminAdminAccountsHandlers(adminSvc, logger)
internalUsersHandlers := backendserver.NewInternalUsersHandlers(userSvc, logger)
userLobbyGamesHandlers := backendserver.NewUserLobbyGamesHandlers(lobbySvc, logger)
userLobbyApplicationsHandlers := backendserver.NewUserLobbyApplicationsHandlers(lobbySvc, logger)
userLobbyInvitesHandlers := backendserver.NewUserLobbyInvitesHandlers(lobbySvc, logger)
userLobbyMembershipsHandlers := backendserver.NewUserLobbyMembershipsHandlers(lobbySvc, logger)
userLobbyMyHandlers := backendserver.NewUserLobbyMyHandlers(lobbySvc, logger)
userLobbyRaceNamesHandlers := backendserver.NewUserLobbyRaceNamesHandlers(lobbySvc, logger)
adminGamesHandlers := backendserver.NewAdminGamesHandlers(lobbySvc, logger)
adminEngineVersionsHandlers := backendserver.NewAdminEngineVersionsHandlers(engineVersionSvc, logger)
adminRuntimesHandlers := backendserver.NewAdminRuntimesHandlers(runtimeSvc, logger)
adminMailHandlers := backendserver.NewAdminMailHandlers(mailSvc, logger)
adminNotificationsHandlers := backendserver.NewAdminNotificationsHandlers(notifSvc, logger)
adminGeoHandlers := backendserver.NewAdminGeoHandlers(geoSvc, logger)
userGamesHandlers := backendserver.NewUserGamesHandlers(runtimeSvc, engineCli, logger)
ready := func() bool {
return authCache.Ready() && userCache.Ready() && adminCache.Ready() && lobbyCache.Ready() && runtimeCache.Ready()
}
handler, err := backendserver.NewRouter(backendserver.RouterDependencies{
Logger: logger,
Telemetry: telemetryRT,
Ready: ready,
AdminVerifier: adminSvc,
GeoCounter: geoSvc,
PublicAuth: publicAuthHandlers,
InternalSessions: internalSessionsHandlers,
UserAccount: userAccountHandlers,
AdminUsers: adminUsersHandlers,
AdminAdminAccounts: adminAdminAccountsHandlers,
InternalUsers: internalUsersHandlers,
UserLobbyGames: userLobbyGamesHandlers,
UserLobbyApplications: userLobbyApplicationsHandlers,
UserLobbyInvites: userLobbyInvitesHandlers,
UserLobbyMemberships: userLobbyMembershipsHandlers,
UserLobbyMy: userLobbyMyHandlers,
UserLobbyRaceNames: userLobbyRaceNamesHandlers,
AdminGames: adminGamesHandlers,
AdminRuntimes: adminRuntimesHandlers,
AdminEngineVersions: adminEngineVersionsHandlers,
AdminMail: adminMailHandlers,
AdminNotifications: adminNotificationsHandlers,
AdminGeo: adminGeoHandlers,
UserGames: userGamesHandlers,
})
if err != nil {
return fmt.Errorf("build backend router: %w", err)
}
httpServer := backendserver.NewServer(cfg.HTTP, handler, logger)
pushServer := push.NewServer(cfg.GRPCPush, pushSvc, logger, telemetryRT)
metricsServer := metricsapi.NewServer(telemetryRT.PrometheusListenAddr(), telemetryRT.Handler(), logger)
lobbySweeper := lobby.NewSweeper(lobbySvc)
mailWorker := mail.NewWorker(mailSvc)
notifWorker := notification.NewWorker(notifSvc)
runtimeWorkers := runtimeSvc.Workers()
runtimeScheduler := runtimeSvc.SchedulerComponent()
runtimeReconciler := runtimeSvc.Reconciler()
components := []app.Component{httpServer, pushServer, mailWorker, notifWorker, lobbySweeper, runtimeWorkers, runtimeScheduler, runtimeReconciler}
if metricsServer.Enabled() {
components = append(components, metricsServer)
}
logger.Info("backend application starting",
zap.String("http_addr", cfg.HTTP.Addr),
zap.String("grpc_push_addr", cfg.GRPCPush.Addr),
zap.String("traces_exporter", cfg.Telemetry.TracesExporter),
zap.String("metrics_exporter", cfg.Telemetry.MetricsExporter),
zap.String("prometheus_addr", telemetryRT.PrometheusListenAddr()),
)
return app.New(cfg.ShutdownTimeout, components...).Run(ctx)
}
// authSessionRevoker adapts `*auth.Service.RevokeAllForUser` to the
// `user.SessionRevoker` interface (which returns only an error, while
// auth's method also returns the slice of revoked sessions). The svc
// field is patched by the caller after both services have been
// constructed — auth.Service depends on user.Service through
// `UserEnsurer`, while user.Service depends on auth.Service through
// `SessionRevoker`. Wiring the adapter struct first and patching the
// pointer afterwards breaks the cycle without introducing a third
// package.
type authSessionRevoker struct {
svc *auth.Service
}
func (r *authSessionRevoker) RevokeAllForUser(ctx context.Context, userID uuid.UUID) error {
if r == nil || r.svc == nil {
return nil
}
_, err := r.svc.RevokeAllForUser(ctx, userID)
return err
}
// lobbyCascadeAdapter adapts `*lobby.Service` to the
// `user.LobbyCascade` interface. The svc field is patched after both
// services have been constructed — same dependency-cycle pattern as
// authSessionRevoker.
type lobbyCascadeAdapter struct {
svc *lobby.Service
}
func (a *lobbyCascadeAdapter) OnUserBlocked(ctx context.Context, userID uuid.UUID) error {
if a == nil || a.svc == nil {
return nil
}
return a.svc.OnUserBlocked(ctx, userID)
}
func (a *lobbyCascadeAdapter) OnUserDeleted(ctx context.Context, userID uuid.UUID) error {
if a == nil || a.svc == nil {
return nil
}
return a.svc.OnUserDeleted(ctx, userID)
}
// userEntitlementAdapter adapts `*user.Service.GetEntitlementSnapshot`
// to the `lobby.EntitlementProvider` interface. Lobby reads the
// `MaxRegisteredRaceNames` field at race-name registration time to
// enforce the per-tier quota.
type userEntitlementAdapter struct {
svc *user.Service
}
func (a *userEntitlementAdapter) GetMaxRegisteredRaceNames(ctx context.Context, userID uuid.UUID) (int32, error) {
if a == nil || a.svc == nil {
return 1, nil
}
snap, err := a.svc.GetEntitlementSnapshot(ctx, userID)
if err != nil {
return 0, err
}
return snap.MaxRegisteredRaceNames, nil
}
// runtimeGatewayAdapter implements `lobby.RuntimeGateway` by
// delegating to `*runtime.Service`. The svc pointer is patched after
// the services are constructed — runtime depends on lobby
// (LobbyConsumer), so we wire the adapter first and patch it once
// runtimeSvc exists.
type runtimeGatewayAdapter struct {
svc *runtime.Service
}
func (a *runtimeGatewayAdapter) StartGame(ctx context.Context, gameID uuid.UUID) error {
if a == nil || a.svc == nil {
return nil
}
return a.svc.StartGame(ctx, gameID)
}
func (a *runtimeGatewayAdapter) StopGame(ctx context.Context, gameID uuid.UUID) error {
if a == nil || a.svc == nil {
return nil
}
return a.svc.StopGame(ctx, gameID)
}
func (a *runtimeGatewayAdapter) PauseGame(ctx context.Context, gameID uuid.UUID) error {
if a == nil || a.svc == nil {
return nil
}
return a.svc.PauseGame(ctx, gameID)
}
func (a *runtimeGatewayAdapter) ResumeGame(ctx context.Context, gameID uuid.UUID) error {
if a == nil || a.svc == nil {
return nil
}
return a.svc.ResumeGame(ctx, gameID)
}
// lobbyConsumerAdapter implements `runtime.LobbyConsumer` by
// translating runtime DTOs into the lobby package's vocabulary.
type lobbyConsumerAdapter struct {
svc *lobby.Service
}
func (a *lobbyConsumerAdapter) OnRuntimeSnapshot(ctx context.Context, gameID uuid.UUID, snapshot runtime.LobbySnapshot) error {
if a == nil || a.svc == nil {
return nil
}
stats := make([]lobby.PlayerTurnStats, 0, len(snapshot.PlayerStats))
for _, s := range snapshot.PlayerStats {
stats = append(stats, lobby.PlayerTurnStats{
UserID: s.UserID,
InitialPlanets: s.InitialPlanets,
InitialPopulation: s.InitialPopulation,
CurrentPlanets: s.CurrentPlanets,
CurrentPopulation: s.CurrentPopulation,
MaxPlanets: s.MaxPlanets,
MaxPopulation: s.MaxPopulation,
})
}
return a.svc.OnRuntimeSnapshot(ctx, gameID, lobby.RuntimeSnapshot{
CurrentTurn: snapshot.CurrentTurn,
RuntimeStatus: snapshot.RuntimeStatus,
EngineHealth: snapshot.EngineHealth,
ObservedAt: snapshot.ObservedAt,
PlayerStats: stats,
})
}
func (a *lobbyConsumerAdapter) OnRuntimeJobResult(ctx context.Context, gameID uuid.UUID, result runtime.JobResult) error {
if a == nil || a.svc == nil {
return nil
}
return a.svc.OnRuntimeJobResult(ctx, gameID, lobby.RuntimeJobResult{
Op: result.Op,
Status: result.Status,
Message: result.Message,
})
}
// userNotificationCascadeAdapter implements
// `user.NotificationCascade` by delegating to `*notification.Service`.
// Construction order: user.Service depends on the cascade and is
// built before notification.Service. The svc pointer is patched once
// notifSvc exists.
type userNotificationCascadeAdapter struct {
svc *notification.Service
}
func (a *userNotificationCascadeAdapter) OnUserDeleted(ctx context.Context, userID uuid.UUID) error {
if a == nil || a.svc == nil {
return nil
}
return a.svc.OnUserDeleted(ctx, userID)
}
// lobbyNotificationPublisherAdapter implements
// `lobby.NotificationPublisher` by translating each LobbyNotification
// into a notification.Intent through the publisher Adapter exposed by
// notification.Service.
type lobbyNotificationPublisherAdapter struct {
svc *notification.Service
}
func (a *lobbyNotificationPublisherAdapter) PublishLobbyEvent(ctx context.Context, ev lobby.LobbyNotification) error {
if a == nil || a.svc == nil {
return nil
}
return a.svc.LobbyAdapter().PublishLobbyEvent(ctx, ev)
}
// runtimeNotificationPublisherAdapter implements
// `runtime.NotificationPublisher` by delegating to the runtime adapter
// exposed by notification.Service.
type runtimeNotificationPublisherAdapter struct {
svc *notification.Service
}
func (a *runtimeNotificationPublisherAdapter) PublishRuntimeEvent(ctx context.Context, kind, idempotencyKey string, payload map[string]any) error {
if a == nil || a.svc == nil {
return nil
}
return a.svc.RuntimeAdapter().PublishRuntimeEvent(ctx, kind, idempotencyKey, payload)
}
+199
View File
@@ -0,0 +1,199 @@
// Command jetgen regenerates the go-jet/v2 query-builder code under
// galaxy/backend/internal/postgres/jet/ against a transient PostgreSQL
// instance.
//
// Invoke as `go run ./cmd/jetgen` (or via the `make jet` target) from inside
// `galaxy/backend`. The tool is not part of the runtime binary.
//
// Steps:
//
// 1. start a postgres:16-alpine container via testcontainers-go
// 2. open it through galaxy/postgres with search_path=backend
// 3. ensure the backend schema exists, then apply the embedded goose
// migrations
// 4. run jet's PostgreSQL generator against schema=backend, writing into
// ../internal/postgres/jet
package main
import (
"context"
"database/sql"
"errors"
"fmt"
"log"
"net/url"
"os"
"path/filepath"
"runtime"
"strings"
"time"
"galaxy/backend/internal/postgres/migrations"
"galaxy/postgres"
jetpostgres "github.com/go-jet/jet/v2/generator/postgres"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
)
const (
postgresImage = "postgres:16-alpine"
superuserName = "galaxy"
superuserPassword = "galaxy"
superuserDatabase = "galaxy_backend"
backendSchema = "backend"
containerStartup = 90 * time.Second
defaultOpTimeout = 10 * time.Second
jetOutputDirSuffix = "internal/postgres/jet"
)
func main() {
if err := run(context.Background()); err != nil {
log.Fatalf("jetgen: %v", err)
}
}
func run(ctx context.Context) error {
outputDir, err := jetOutputDir()
if err != nil {
return err
}
container, err := tcpostgres.Run(ctx, postgresImage,
tcpostgres.WithDatabase(superuserDatabase),
tcpostgres.WithUsername(superuserName),
tcpostgres.WithPassword(superuserPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(containerStartup),
),
)
if err != nil {
return fmt.Errorf("start postgres container: %w", err)
}
defer func() {
if termErr := testcontainers.TerminateContainer(container); termErr != nil {
log.Printf("jetgen: terminate container: %v", termErr)
}
}()
baseDSN, err := container.ConnectionString(ctx, "sslmode=disable")
if err != nil {
return fmt.Errorf("resolve container dsn: %w", err)
}
scopedDSN, err := dsnWithSearchPath(baseDSN, backendSchema)
if err != nil {
return err
}
if err := applyMigrations(ctx, scopedDSN); err != nil {
return err
}
// jet's ProcessSchema wipes <outputDir>/<schema> on every run, so package
// metadata kept directly under outputDir (e.g. jet.go) survives. We only
// ensure the parent directory exists so the first run on a fresh
// checkout does not fail with ENOENT.
if err := os.MkdirAll(outputDir, 0o755); err != nil {
return fmt.Errorf("ensure jet output dir: %w", err)
}
jetDB, err := openScoped(ctx, scopedDSN)
if err != nil {
return fmt.Errorf("open scoped pool for jet generation: %w", err)
}
defer func() { _ = jetDB.Close() }()
// Drop goose's bookkeeping table inside the schema-scoped connection so
// jet does not generate code for it. The table is recreated on the next
// migration run; jetgen never reuses the container.
if _, err := jetDB.ExecContext(ctx, "DROP TABLE IF EXISTS goose_db_version"); err != nil {
return fmt.Errorf("drop goose_db_version: %w", err)
}
if err := jetpostgres.GenerateDB(jetDB, backendSchema, outputDir); err != nil {
return fmt.Errorf("jet generate: %w", err)
}
log.Printf("jetgen: generated jet code into %s (schema=%s)", outputDir, backendSchema)
return nil
}
// dsnWithSearchPath rewrites the connection string so each new connection
// pins search_path to the named schema. The schema must exist before the
// first query that depends on search_path resolution; ensureSchema handles
// that on the migration path.
func dsnWithSearchPath(baseDSN, schema string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", fmt.Errorf("parse base dsn: %w", err)
}
values := parsed.Query()
values.Set("search_path", schema)
if values.Get("sslmode") == "" {
values.Set("sslmode", "disable")
}
parsed.RawQuery = values.Encode()
return parsed.String(), nil
}
func applyMigrations(ctx context.Context, dsn string) error {
db, err := openScoped(ctx, dsn)
if err != nil {
return fmt.Errorf("open scoped pool: %w", err)
}
defer func() { _ = db.Close() }()
if err := postgres.Ping(ctx, db, defaultOpTimeout); err != nil {
return err
}
if err := ensureSchema(ctx, db, backendSchema); err != nil {
return err
}
if err := postgres.RunMigrations(ctx, db, migrations.Migrations(), "."); err != nil {
return fmt.Errorf("run migrations: %w", err)
}
return nil
}
// ensureSchema creates the named schema when it is absent. The statement is
// idempotent and unaffected by search_path, so it must run before goose
// creates its bookkeeping table inside the schema-scoped connection.
func ensureSchema(ctx context.Context, db *sql.DB, schema string) error {
stmt := fmt.Sprintf("CREATE SCHEMA IF NOT EXISTS %s", quoteIdent(schema))
if _, err := db.ExecContext(ctx, stmt); err != nil {
return fmt.Errorf("ensure schema %q: %w", schema, err)
}
return nil
}
func openScoped(ctx context.Context, dsn string) (*sql.DB, error) {
cfg := postgres.DefaultConfig()
cfg.PrimaryDSN = dsn
cfg.OperationTimeout = defaultOpTimeout
return postgres.OpenPrimary(ctx, cfg)
}
// jetOutputDir returns the absolute path that jet should write into. The path
// is anchored to galaxy/backend via runtime.Caller so the tool can be
// invoked from any working directory.
func jetOutputDir() (string, error) {
_, file, _, ok := runtime.Caller(0)
if !ok {
return "", errors.New("resolve runtime caller for jet output path")
}
dir := filepath.Dir(file)
// dir = .../galaxy/backend/cmd/jetgen
moduleRoot := filepath.Clean(filepath.Join(dir, "..", ".."))
return filepath.Join(moduleRoot, jetOutputDirSuffix), nil
}
// quoteIdent quotes a SQL identifier by doubling embedded quote characters.
// jetgen uses a fixed schema name, but quoting keeps the helper safe to reuse
// if the constant ever changes to a configurable value.
func quoteIdent(name string) string {
return `"` + strings.ReplaceAll(name, `"`, `""`) + `"`
}
+22
View File
@@ -0,0 +1,22 @@
# Backend Service Docs
This directory keeps service-local documentation that is too detailed for
the workspace-level architecture document and too diagram-heavy for the
module README.
Sections:
- [Runtime and components](runtime.md)
- [Domain and protocol flows](flows.md)
- [Operator runbook](runbook.md)
- [Configuration and contract examples](examples.md)
Primary references:
- [`../README.md`](../README.md) — service scope, contracts,
configuration, operational behaviour.
- [`../openapi.yaml`](../openapi.yaml) — REST contract.
- [`../PLAN.md`](../PLAN.md) — historical staged build-up; kept for
archaeology, not as a source of truth.
- [`../../ARCHITECTURE.md`](../../ARCHITECTURE.md) — workspace-level
architecture.
+165
View File
@@ -0,0 +1,165 @@
# Configuration and Contract Examples
Example values that complement `../README.md` §4 and the OpenAPI
contract.
## Local `.env`
```dotenv
# HTTP and gRPC listeners
BACKEND_HTTP_LISTEN_ADDR=:8080
BACKEND_GRPC_PUSH_LISTEN_ADDR=:8081
# Postgres
BACKEND_POSTGRES_DSN=postgres://galaxy:galaxy@localhost:5432/galaxy_backend?sslmode=disable&search_path=backend
# SMTP relay (mailpit by default for dev)
BACKEND_SMTP_HOST=localhost
BACKEND_SMTP_PORT=1025
BACKEND_SMTP_FROM=galaxy-backend@galaxy.test
BACKEND_SMTP_TLS_MODE=none
# Docker
BACKEND_DOCKER_HOST=unix:///var/run/docker.sock
BACKEND_DOCKER_NETWORK=galaxy-dev
# Game engine
BACKEND_GAME_STATE_ROOT=/var/lib/galaxy-game
# Admin bootstrap
BACKEND_ADMIN_BOOTSTRAP_USER=bootstrap
BACKEND_ADMIN_BOOTSTRAP_PASSWORD=change-me-immediately
# GeoLite2
BACKEND_GEOIP_DB_PATH=/var/lib/galaxy/geoip.mmdb
# Telemetry (stdout for dev)
BACKEND_OTEL_TRACES_EXPORTER=stdout
BACKEND_OTEL_METRICS_EXPORTER=stdout
```
The above is enough for `go run ./backend/cmd/backend` to boot
locally. Required-but-empty admin variables can be set to `bootstrap`
and any non-empty password; rotate immediately after first sign-in.
## Public REST examples
### `POST /api/v1/public/auth/send-email-code`
```http
POST /api/v1/public/auth/send-email-code HTTP/1.1
Host: backend.internal
Content-Type: application/json
Accept-Language: en-US
```
```http
HTTP/1.1 200 OK
Content-Type: application/json
```
The `Accept-Language` header is captured as `preferred_language` for
the new account; the body schema rejects unknown fields, so locale
must travel through the header.
### `POST /api/v1/public/auth/confirm-email-code`
```http
POST /api/v1/public/auth/confirm-email-code HTTP/1.1
Host: backend.internal
Content-Type: application/json
```
```http
HTTP/1.1 200 OK
Content-Type: application/json
```
## Internal REST examples (gateway-only)
```http
GET /api/v1/internal/sessions/5e7ae3e6-3f4f-4d59-9b9b-2f2c3d2e0a91 HTTP/1.1
Host: backend.internal
```
```http
HTTP/1.1 200 OK
Content-Type: application/json
```
```http
POST /api/v1/internal/sessions/5e7ae3e6-.../revoke HTTP/1.1
Host: backend.internal
```
## Admin REST examples
```http
GET /api/v1/admin/mail/deliveries?page=1&page_size=10 HTTP/1.1
Host: backend.internal
Authorization: Basic <base64 of bootstrap:secret>
```
```http
HTTP/1.1 200 OK
Content-Type: application/json
```
Resend on a `sent` row returns `409 Conflict`:
```http
POST /api/v1/admin/mail/deliveries/{id}/resend HTTP/1.1
Authorization: Basic ...
```
```http
HTTP/1.1 409 Conflict
Content-Type: application/json
```
## Standard error envelope
Every error response across the four route groups uses:
```json
{"error": {"code": "<machine_readable>", "message": "<human_readable>"}}
```
The closed set of `code` values lives in
`components/schemas/ErrorBody` of `../openapi.yaml`.
],
"total": 1
}
```
Resend on a `sent` row returns `409 Conflict`:
```http
POST /api/v1/admin/mail/deliveries/{id}/resend HTTP/1.1
Authorization: Basic ...
```
```http
HTTP/1.1 409 Conflict
Content-Type: application/json
{"error": {"code": "conflict", "message": "delivery already sent"}}
```
## Standard error envelope
Every error response across the four route groups uses:
```json
{"error": {"code": "<machine_readable>", "message": "<human_readable>"}}
```
The closed set of `code` values lives in
`components/schemas/ErrorBody` of `../openapi.yaml`.
+277
View File
@@ -0,0 +1,277 @@
# Domain and Protocol Flows
This document collects the multi-step interactions inside `backend`
that span domain modules. Each section assumes the reader is familiar
with `../README.md` and `../../ARCHITECTURE.md`.
## Registration (send + confirm)
```mermaid
sequenceDiagram
participant Client
participant Gateway
participant Auth
participant User
participant Geo
participant Mail
participant Mailpit as SMTP relay
Client->>Gateway: POST /api/v1/public/auth/send-email-code\nbody: {email}; header Accept-Language
Gateway->>Auth: forward + Accept-Language
Auth->>Auth: hash code (bcrypt cost 10)
Auth->>Auth: persist auth_challenges row<br/>(stores preferred_language)
Auth->>Mail: EnqueueLoginCode(email, code, ttl)
Mail-->>Auth: delivery_id
Auth-->>Gateway: 200 {challenge_id}
Gateway-->>Client: 200 {challenge_id}
Mail->>Mailpit: SMTP delivery (worker)
Client->>Gateway: POST /api/v1/public/auth/confirm-email-code\nbody: {challenge_id, code, client_public_key, time_zone}
Gateway->>Auth: forward
Auth->>Auth: SELECT FOR UPDATE auth_challenges<br/>(increment attempts, enforce ceiling)
Auth->>Auth: bcrypt verify
Auth->>User: EnsureByEmail(email, preferred_language, time_zone, source_ip)
User->>User: insert account if missing<br/>(synth Player-XXXXXXXX)
User->>Geo: SetDeclaredCountryAtRegistration(user_id, source_ip)
User-->>Auth: user_id
Auth->>Auth: SELECT FOR UPDATE again,<br/>mark consumed,<br/>insert device_session,<br/>cache write-through
Auth-->>Gateway: 200 {device_session_id}
Gateway-->>Client: 200 {device_session_id}
```
Re-confirming the same `challenge_id` returns the existing session and
clears the throttle window (the throttle reuses the latest un-consumed
challenge rather than dropping the request). `accounts.user_name` is
synthesised once and never overwritten on subsequent sign-ins; the same
account always lands the same handle.
## Authenticated request lifecycle
```mermaid
sequenceDiagram
participant Client
participant Gateway
participant Backend HTTP
participant Cache
participant Domain
participant Postgres
Client->>Gateway: signed gRPC ExecuteCommand
Gateway->>Gateway: verify signature, payload_hash,<br/>freshness, anti-replay
Gateway->>Backend HTTP: GET /api/v1/internal/sessions/{id}
Backend HTTP-->>Gateway: 200 {user_id, status:active}
Gateway->>Backend HTTP: forward command\nas REST + X-User-ID
Backend HTTP->>Cache: lookup
Cache-->>Backend HTTP: hit / miss
alt cache miss
Backend HTTP->>Postgres: read
Postgres-->>Backend HTTP: row
Backend HTTP->>Cache: warm
end
Backend HTTP->>Domain: business logic
Domain->>Postgres: write
Domain->>Cache: write-through after commit
Domain-->>Backend HTTP: result
Backend HTTP-->>Gateway: JSON
Gateway->>Gateway: encode FlatBuffers,<br/>sign response envelope
Gateway-->>Client: signed gRPC response
```
`X-User-ID` is the sole identity input on the user surface. The geo
counter middleware fires off `geo.IncrementCounterAsync` after the
handler returns successfully; the request itself does not block on
that.
## Lobby state machine and Race Name Directory
The lobby state machine is the closed transition graph below. Owner
endpoints (or admin overrides for public games owned by NULL) drive
forward transitions; the runtime callback is the only path that flips
`starting → running`. Every transition checks ownership, target state,
and idempotency.
```mermaid
stateDiagram-v2
[*] --> draft
draft --> enrollment_open: open-enrollment
enrollment_open --> ready_to_start: ready-to-start (auto on min_players)
ready_to_start --> starting: start
starting --> running: runtime ack
starting --> start_failed: runtime error
start_failed --> ready_to_start: retry-start
running --> paused: pause
paused --> running: resume
running --> finished: engine finish callback
running --> cancelled: cancel
paused --> cancelled: cancel
starting --> cancelled: cancel
enrollment_open --> cancelled: cancel
ready_to_start --> cancelled: cancel
draft --> cancelled: cancel
cancelled --> [*]
finished --> [*]
```
The Race Name Directory has three tiers:
- **registered** — platform-unique. Single live binding per canonical
key.
- **reservation** — per-game; a user can hold the same canonical key
in multiple active games concurrently.
- **pending_registration** — issued after a "capable finish"
(`max_planets > initial AND max_population > initial`). The pending
entry is auto-promoted to `registered` if the user calls
`POST /api/v1/user/lobby/race-names/register` within
`BACKEND_LOBBY_PENDING_REGISTRATION_TTL` (default 30 days);
otherwise the sweeper releases it.
Canonicalisation goes through
[`disciplinedware/go-confusables`](https://github.com/disciplinedware/go-confusables)
plus a small anti-fraud map (digit-letter substitution for common
look-alikes). Cross-user uniqueness across reservations and pending
registrations is enforced with a per-canonical advisory lock at write
time, since `race_names` is a composite PK that does not express that
invariant alone.
## Mail outbox
```mermaid
sequenceDiagram
participant Producer
participant Mail
participant Postgres
participant Worker
participant SMTP
participant Admin
Producer->>Mail: EnqueueLoginCode / EnqueueTemplate
Mail->>Postgres: insert mail_payloads + mail_deliveries<br/>(unique on template_id, idempotency_key)
Mail-->>Producer: delivery_id
loop every BACKEND_MAIL_WORKER_INTERVAL
Worker->>Postgres: SELECT FOR UPDATE SKIP LOCKED
Postgres-->>Worker: row
Worker->>SMTP: send via wneessen/go-mail
alt success
Worker->>Postgres: insert mail_attempts(success),<br/>mark delivery sent
else transient
Worker->>Postgres: insert mail_attempts(transient),<br/>schedule next_attempt_at + jitter
else permanent or attempts >= MAX
Worker->>Postgres: insert mail_attempts(permanent),<br/>move to mail_dead_letters
Worker->>Admin: notification intent (mail.dead_lettered)
end
end
```
`mail_attempts.attempt_no` is monotonic across the entire history of a
single delivery. Resend on a `pending` / `retrying` / `dead_lettered`
row re-arms the row; resend on `sent` returns `409 Conflict`.
## Notification fan-out
```mermaid
sequenceDiagram
participant Producer
participant Notif
participant Postgres
participant Push
participant Mail
Producer->>Notif: Submit(intent)
Notif->>Notif: validate kind + payload
Notif->>Postgres: INSERT notifications ON CONFLICT (kind, idempotency_key) DO NOTHING
Notif->>Postgres: materialise notification_routes<br/>per channel from catalog
Notif->>Push: PublishClientEvent(user_id, payload)
Notif->>Mail: EnqueueTemplate(template_id, recipient,<br/>payload, route_id)
Notif-->>Producer: ok (best-effort dispatch)
loop every BACKEND_NOTIFICATION_WORKER_INTERVAL
Postgres-->>Notif: routes still in pending / retrying
Notif->>Push: retry push (or)
Notif->>Mail: re-arm mail row
end
```
`auth.login_code` bypasses notification entirely: auth writes the
delivery row directly so the challenge commit is atomic with the mail
queue insert. Catalog entries that target administrators land email
on `BACKEND_NOTIFICATION_ADMIN_EMAIL`; if the variable is empty the
route lands with `status='skipped'` and an operator log line records
the configuration miss.
## Runtime job lifecycle
```mermaid
sequenceDiagram
participant Lobby
participant Runtime
participant Workers
participant Docker
participant Engine
participant Reconciler
Lobby->>Runtime: StartGame(game_id)
Runtime->>Workers: enqueue start job
Runtime-->>Lobby: ack
Workers->>Docker: pull / create / start engine container
Docker-->>Workers: container id
Workers->>Engine: POST /api/v1/admin/init
Engine-->>Workers: ok / error
Workers->>Runtime: write runtime_records (running or start_failed)
Workers->>Lobby: OnRuntimeJobResult
loop scheduler tick
Workers->>Engine: PUT /api/v1/admin/turn
Engine-->>Workers: snapshot
Workers->>Runtime: persist runtime_records
Workers->>Lobby: OnRuntimeSnapshot
end
Reconciler->>Docker: list containers labelled galaxy.backend=1
alt missing recorded container
Reconciler->>Runtime: mark removed
Reconciler->>Lobby: OnRuntimeJobResult(removed)
else unrecorded labelled container
Reconciler->>Runtime: adopt
end
```
Per-game serialisation is enforced by a `sync.Map[game_id]*sync.Mutex`
inside `runtime.Service`, so concurrent start / stop / patch attempts
on the same `game_id` cannot race. `runtime_operation_log` records
every operation for audit.
## Push gRPC
```mermaid
sequenceDiagram
participant Backend
participant Ring
participant Gateway
loop domain emits client_event / session_invalidation
Backend->>Ring: append, allocate cursor
end
Gateway->>Backend: SubscribePush(GatewaySubscribeRequest{cursor?})
alt cursor present and within ring TTL
Backend->>Gateway: replay events newer than cursor
else cursor missing or aged out
Backend->>Gateway: stream from current head
end
loop event published
Backend->>Gateway: PushEvent
end
Gateway->>Backend: same gateway_client_id reconnects
Backend->>Backend: cancel previous stream (codes.Aborted)
Backend->>Gateway: stream again
```
The cursor is a zero-padded decimal `uint64` minted by an in-process
counter; backend resets the sequence after a restart, so cursors are
only meaningful within a single process lifetime. Per-connection
backpressure is drop-oldest, with a log line on each drop so the
gateway side can correlate gaps.
+163
View File
@@ -0,0 +1,163 @@
# Operator Runbook
Practical pointers for operating `galaxy/backend` and the integration
test stack. The list mirrors the steady-state behaviour documented in
`../README.md`; when in doubt, the README is canonical.
## Cold start
1. Provision Postgres and configure `BACKEND_POSTGRES_DSN` with
`?search_path=backend`.
2. Provision an SMTP relay reachable from the backend host. Use
`BACKEND_SMTP_TLS_MODE=none` only for local development.
3. Mount a GeoLite2 Country `.mmdb` and point
`BACKEND_GEOIP_DB_PATH` at it. The `pkg/geoip/test-data/` submodule
ships a fixture that is sufficient for synthetic IPs.
4. Mount the Docker daemon socket if the deployment is responsible
for engine containers. The MVP topology mounts
`/var/run/docker.sock` directly; future hardening introduces a
`tecnativa/docker-socket-proxy` sidecar.
5. Ensure the user-defined Docker bridge named in
`BACKEND_DOCKER_NETWORK` exists; backend's
`dockerclient.EnsureNetwork` creates it if missing on first boot.
6. Seed the bootstrap admin via `BACKEND_ADMIN_BOOTSTRAP_USER` and
`BACKEND_ADMIN_BOOTSTRAP_PASSWORD`; rotate the password immediately
after the first deploy through the admin surface. The insert is
idempotent.
## Migrations
`pressly/goose/v3` applies embedded migrations from
`internal/postgres/migrations/`. The pre-production set ships as
`00001_init.sql` plus additive numbered files. Backend always runs
`CREATE SCHEMA IF NOT EXISTS backend` before goose so a fresh database
does not trip the bookkeeping table on the first migration.
`internal/postgres/migrations_test.go` asserts that the migration
produces the expected table set; adding a table without updating the
expected list is a loud test failure.
## Probes
- `GET /healthz` — process liveness. Always `200` once the binary is
alive.
- `GET /readyz``200` once Postgres is reachable, migrations are
applied, every cache warm-up has finished, and the gRPC push
listener is bound. Returns `503` until all hold.
## Caches
Every cache (`auth`, `user`, `admin`, `lobby`, `runtime`,
`engineversion`) reads its full table at startup. Mutations write
through the cache *after* the matching Postgres mutation commits, so
a commit failure leaves the cache in sync with the previous database
state. To force a cache rebuild, restart the process; there is no
runtime invalidation endpoint.
## Mail outbox
- The worker scans every `BACKEND_MAIL_WORKER_INTERVAL` (default
`2s`) using `SELECT ... FOR UPDATE SKIP LOCKED`.
- A row reaches `dead_lettered` after `BACKEND_MAIL_MAX_ATTEMPTS`
(default `8`).
- Operators inspect the outbox via:
- `GET /api/v1/admin/mail/deliveries?page=N`
- `GET /api/v1/admin/mail/deliveries/{delivery_id}`
- `GET /api/v1/admin/mail/deliveries/{delivery_id}/attempts`
- `GET /api/v1/admin/mail/dead-letters`
- `POST /api/v1/admin/mail/deliveries/{delivery_id}/resend` re-arms a
delivery for another attempt cycle. Allowed states are `pending`,
`retrying`, and `dead_lettered`. Resend on a `sent` row returns
`409 Conflict`.
- `mail_attempts.attempt_no` is monotonic across the entire history
of a single delivery; a resend appends new attempts rather than
starting over.
## Notification pipeline
- `notification.Submit(intent)` validates the intent shape, enforces
idempotency via `UNIQUE (kind, idempotency_key)`, and materialises
per-route rows in `notification_routes`. Push routes go straight to
`push.Service`; email routes are inserted into `mail_deliveries`.
- The notification worker mirrors the mail worker pattern: `SELECT
... FOR UPDATE SKIP LOCKED` on `notification_routes`, scan every
`BACKEND_NOTIFICATION_WORKER_INTERVAL` (default `5s`), dead-letter
after `BACKEND_NOTIFICATION_MAX_ATTEMPTS` (default `8`).
- `OnUserDeleted` skips a user's pending routes rather than deleting
them so audit trails are preserved.
- Admin-channel kinds (`runtime.image_pull_failed`,
`runtime.container_start_failed`, `runtime.start_config_invalid`)
deliver email to `BACKEND_NOTIFICATION_ADMIN_EMAIL`. When that
variable is empty, routes land with `status='skipped'` so the
catalog never silently discards an admin-targeted intent.
## Runtime control plane
- `runtime_operation_log` records every container operation (start,
stop, patch, force-next-turn) with start/finish timestamps,
outcome, and error message.
- `BACKEND_RUNTIME_RECONCILE_INTERVAL` (default `60s`) governs the
reconciler. It walks `docker ps -f label=galaxy.backend=1` and
reconciles against `runtime_records`.
- `BACKEND_RUNTIME_IMAGE_PULL_POLICY` accepts `if_missing` (default),
`always`, `never`. `never` requires that the engine image be
pre-pulled on every host that may run a game.
- Force-next-turn flips a one-shot skip flag in `runtime_records`;
the next scheduled tick observes the flag and consumes it.
## Geo
- `accounts.declared_country` is set once at registration. There is
no version history; admins inspect the current value through the
user surface.
- `user_country_counters` is updated fire-and-forget per
authenticated request. Lookups are best-effort: any `pkg/geoip`
error is logged and ignored, never blocks the request.
- Source IP for both flows reads the leftmost `X-Forwarded-For` and
falls back to `RemoteAddr`. Backend trusts the value because the
trust boundary lives at gateway.
- Email PII never appears in logs verbatim. Modules emit a per-process
HMAC-SHA256-truncated `email_hash` instead.
## Telemetry
- `BACKEND_OTEL_TRACES_EXPORTER` and
`BACKEND_OTEL_METRICS_EXPORTER` accept `otlp` (default), `none`,
`stdout`, and (metrics only) `prometheus`. The Prometheus path
binds a separate listener at
`BACKEND_OTEL_PROMETHEUS_LISTEN_ADDR` so the scrape endpoint stays
off the public surface.
- Logs are JSON to stdout; crash dumps to stderr.
- `otel_trace_id` and `otel_span_id` are injected into every log line
written inside a request scope, so a single `request_id` correlates
across HTTP, gRPC, and the workers.
## Integration test suite
`integration/` boots the full stack (Postgres, Redis, mailpit,
backend, gateway, optionally a `galaxy-game` engine) through
`testcontainers-go`. Day-to-day commands:
```bash
# Run every scenario; first cold run builds the three Docker images.
go test ./integration/...
# Run a single scenario.
go test -count=1 -v -run TestAuthFlow ./integration/...
# Force a rebuild of the integration images.
docker rmi galaxy/backend:integration galaxy/gateway:integration galaxy/game:integration
go test ./integration/...
```
Each scenario calls `testenv.Bootstrap(t)` which spins up an isolated
stack and registers `t.Cleanup` for every container. On test failure,
backend and gateway container logs are dumped through `t.Logf`. The
backend container runs as uid 0 so it can read the Docker daemon
socket; production deployments run distroless `nonroot` and rely on a
docker-socket-proxy sidecar.
The integration suite is the only place that exercises the engine
container lifecycle end-to-end. Building `galaxy/game:integration`
adds ~3060 seconds to a cold run; subsequent runs reuse the
BuildKit layer cache.
+169
View File
@@ -0,0 +1,169 @@
# Runtime and Components
The diagram below focuses on the deployed `galaxy/backend` process and
its runtime dependencies. Every component is wired in
`backend/cmd/backend/main.go`.
```mermaid
flowchart LR
subgraph Inbound
Gateway["Gateway<br/>HTTP + gRPC push subscriber"]
Probes["Liveness / readiness<br/>probes"]
end
subgraph BackendProcess["Backend process"]
HTTP["HTTP listener<br/>:8080<br/>/api/v1/{public,user,internal,admin}"]
Push["gRPC push listener<br/>:8081<br/>Push.SubscribePush"]
Metrics["Optional Prometheus<br/>metrics listener"]
AuthSvc["auth.Service"]
UserSvc["user.Service"]
AdminSvc["admin.Service"]
LobbySvc["lobby.Service"]
RuntimeSvc["runtime.Service"]
MailSvc["mail.Service"]
NotifSvc["notification.Service"]
GeoSvc["geo.Service"]
PushSvc["push.Service<br/>(ring buffer + cursor)"]
Caches["Write-through caches<br/>auth / user / admin /<br/>lobby / runtime"]
MailWorker["mail worker"]
NotifWorker["notification worker"]
Sweeper["lobby sweeper"]
RuntimeWorkers["runtime worker pool +<br/>scheduler + reconciler"]
Telemetry["zap + OpenTelemetry"]
end
Postgres[(Postgres<br/>backend schema)]
Docker[(Docker daemon)]
SMTP[(SMTP relay)]
GeoDB[(GeoLite2 mmdb)]
Game[(galaxy-game-{id}<br/>engine containers)]
Gateway --> HTTP
Gateway --> Push
Probes --> HTTP
HTTP --> AuthSvc & UserSvc & AdminSvc & LobbySvc & RuntimeSvc & MailSvc & NotifSvc & GeoSvc
Push --> PushSvc
AuthSvc & UserSvc & AdminSvc & LobbySvc & RuntimeSvc & MailSvc & NotifSvc --> Caches
AuthSvc & UserSvc & AdminSvc & LobbySvc & RuntimeSvc & MailSvc & NotifSvc & GeoSvc --> Postgres
MailWorker --> Postgres
MailWorker --> SMTP
NotifWorker --> Postgres
NotifWorker --> MailSvc & PushSvc
Sweeper --> LobbySvc
RuntimeWorkers --> Docker
RuntimeWorkers --> Game
RuntimeWorkers --> RuntimeSvc
GeoSvc --> GeoDB
HTTP & Push & MailWorker & NotifWorker & Sweeper & RuntimeWorkers --> Telemetry
```
## Process lifecycle
`internal/app.App` orchestrates startup and shutdown. The start order
is fixed:
1. Load configuration with `internal/config.LoadFromEnv` and validate.
2. Build the zap logger and OpenTelemetry runtime.
3. Open the Postgres pool through `internal/postgres.Open`.
4. Apply embedded migrations with `pressly/goose/v3` before any
listener binds.
5. Build the push service (no listener yet) so domain modules can be
given a real publisher.
6. Build domain services in dependency order: geo → user (uses geo)
→ mail → auth (uses user, mail, push) → admin → lobby (uses runtime
adapter, notification adapter, user-entitlement adapter) → runtime
(uses lobby consumer) → notification (uses mail, push, accounts).
7. Warm every cache (`auth`, `user`, `admin`, `lobby`, `runtime`).
Each cache exposes `Ready()`; `/readyz` waits on every flag.
8. Wire HTTP handlers and the gin engine.
9. Start the HTTP server, the gRPC push server, the mail worker, the
notification worker, the lobby sweeper, the runtime worker pool,
the runtime scheduler, and the reconciler. The optional
Prometheus metrics server is added only when configured.
`app.New` accepts a `shutdownTimeout` (`BACKEND_SHUTDOWN_TIMEOUT`,
default `30s`). On `SIGINT`/`SIGTERM`, components are stopped in
reverse order:
1. Refuse new HTTP and gRPC traffic.
2. Drain in-flight requests (`BACKEND_HTTP_SHUTDOWN_TIMEOUT`,
`BACKEND_GRPC_PUSH_SHUTDOWN_TIMEOUT`).
3. Flush the mail worker's currently-running attempt; pending rows
stay in the database for the next process to pick up.
4. Flush push events that already left domain services to the gateway
buffer.
5. Drain pending geo counter goroutines.
6. Close the Docker client and the runtime engine HTTP client.
7. Close the Postgres pool.
8. Shut down telemetry, flushing any buffered traces.
The smaller of `BACKEND_SHUTDOWN_TIMEOUT` and the per-component
deadline always wins.
## Cyclic dependency adapters
Several domain pairs are mutually dependent (auth↔user for session
revoke on permanent block; lobby↔runtime for start/stop calls and
snapshot push-back; user/lobby/runtime↔notification for fan-out
publishers). The wiring code in `cmd/backend/main.go` constructs a
small adapter struct first, then patches its inner pointer once the
real service exists. The adapters live next to the wiring code and
never grow domain logic; they are pure forwarders that fall back to a
no-op when the inner pointer is still `nil` (the initial state during
boot).
## Worker pools
- **Mail worker** (`internal/mail.Worker`) — single goroutine that
scans `mail_deliveries` with `SELECT ... FOR UPDATE SKIP LOCKED`,
sends through SMTP, records the attempt, and either marks `sent` or
schedules `next_attempt_at` with backoff plus jitter. Drains pending
and retrying rows on startup.
- **Notification worker** (`internal/notification.Worker`) — same
pattern over `notification_routes`: pulls a route, dispatches push
or email, writes the outcome, and either marks delivered or moves
the route into `notification_dead_letters` after the configured
attempt budget.
- **Lobby sweeper** (`internal/lobby.Sweeper`) — `pkg/cronutil` job
that releases `pending_registration` Race Name Directory entries
past `BACKEND_LOBBY_PENDING_REGISTRATION_TTL` and auto-closes
enrollment-expired games whose `approved_count >= min_players`.
- **Runtime worker pool** (`internal/runtime.Workers`) — bounded
concurrency (`BACKEND_RUNTIME_WORKER_POOL_SIZE`) over a buffered
channel (`BACKEND_RUNTIME_JOB_QUEUE_SIZE`). Long-running pulls and
starts execute here; the calling path returns as soon as the job is
queued. After Docker reports the container running, the worker
polls the engine `/healthz` until the listener is bound (Docker
marks a container running as soon as the entrypoint starts; the
Go binary inside takes a moment to bind its TCP port). Only after
`/healthz` succeeds does the worker call `/admin/init`.
- **Runtime scheduler** (`internal/runtime.SchedulerComponent`) —
`pkg/cronutil` schedule per running game; each tick invokes the
engine `admin/turn`. Force-next-turn flips a one-shot skip flag in
`runtime_records`; the next scheduled tick observes the flag and
consumes it.
- **Runtime reconciler** (`internal/runtime.Reconciler`) — periodic
list of containers labelled `galaxy.backend=1`, matched against
`runtime_records`. Adopts unrecorded labelled containers, marks
recorded but missing as `removed`, and emits
`lobby.OnRuntimeJobResult` for the latter.
## Telemetry
Tracing covers `HTTP request → domain operation → Postgres call →
external client (SMTP, Docker, engine)`. zap injects `otel_trace_id`
and `otel_span_id` into every log entry written inside a request
scope. OTel exporters honour `BACKEND_OTEL_TRACES_EXPORTER` and
`BACKEND_OTEL_METRICS_EXPORTER`; both default to `otlp` and accept
`none`, `stdout`, and (for metrics) `prometheus`.
`TraceFieldsFromContext(ctx)` is exposed by
`internal/telemetry.Runtime` rather than the logger package because
the helper is used by middleware and depends on the OTel runtime, not
the logger configuration. Keeping it next to the runtime keeps
`server → telemetry` import direction one-way.
+167
View File
@@ -0,0 +1,167 @@
module galaxy/backend
go 1.26.1
require (
galaxy/cronutil v0.0.0
galaxy/model v0.0.0
galaxy/postgres v0.0.0
galaxy/util v0.0.0-00010101000000-000000000000
github.com/disciplinedware/go-confusables v0.1.1
github.com/getkin/kin-openapi v0.135.0
github.com/gin-gonic/gin v1.12.0
github.com/go-jet/jet/v2 v2.14.1
github.com/google/uuid v1.6.0
github.com/jackc/pgx/v5 v5.9.2
github.com/prometheus/client_golang v1.23.2
github.com/testcontainers/testcontainers-go v0.42.0
github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0
github.com/wneessen/go-mail v0.7.2
go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin v0.68.0
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0
go.opentelemetry.io/otel v1.43.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0
go.opentelemetry.io/otel/exporters/prometheus v0.65.0
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.43.0
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0
go.opentelemetry.io/otel/metric v1.43.0
go.opentelemetry.io/otel/sdk v1.43.0
go.opentelemetry.io/otel/sdk/metric v1.43.0
go.opentelemetry.io/otel/trace v1.43.0
go.uber.org/zap v1.27.1
google.golang.org/grpc v1.80.0
)
require (
github.com/oschwald/geoip2-golang/v2 v2.1.0 // indirect
github.com/oschwald/maxminddb-golang/v2 v2.1.1 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect
)
require (
dario.cat/mergo v1.0.2 // indirect
galaxy/geoip v0.0.0
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/XSAM/otelsql v0.42.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bytedance/gopkg v0.1.4 // indirect
github.com/bytedance/sonic v1.15.0 // indirect
github.com/bytedance/sonic/loader v0.5.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cloudwego/base64x v0.1.6 // indirect
github.com/containerd/errdefs v1.0.0
github.com/containerd/errdefs/pkg v0.3.0 // indirect
github.com/containerd/log v0.1.0 // indirect
github.com/containerd/platforms v0.2.1 // indirect
github.com/cpuguy83/dockercfg v0.3.2 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/distribution/reference v0.6.0 // indirect
github.com/docker/go-connections v0.7.0 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/ebitengine/purego v0.10.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/gabriel-vasile/mimetype v1.4.13 // indirect
github.com/gin-contrib/sse v1.1.1 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.30.2 // indirect
github.com/goccy/go-json v0.10.6 // indirect
github.com/goccy/go-yaml v1.19.2 // indirect
github.com/gorilla/mux v1.8.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
github.com/jackc/chunkreader/v2 v2.0.1 // indirect
github.com/jackc/pgconn v1.14.3 // indirect
github.com/jackc/pgio v1.0.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgproto3/v2 v2.3.3 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/jackc/pgtype v1.14.4 // indirect
github.com/jackc/puddle/v2 v2.2.2 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.18.5 // indirect
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/lib/pq v1.10.9 // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
github.com/magiconair/properties v1.8.10 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-isatty v0.0.21 // indirect
github.com/mfridman/interpolate v0.0.2 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/moby/go-archive v0.2.0 // indirect
github.com/moby/moby/api v1.54.2
github.com/moby/moby/client v0.4.1
github.com/moby/patternmatcher v0.6.1 // indirect
github.com/moby/sys/sequential v0.6.0 // indirect
github.com/moby/sys/user v0.4.0 // indirect
github.com/moby/sys/userns v0.1.0 // indirect
github.com/moby/term v0.5.2 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/oasdiff/yaml v0.0.9 // indirect
github.com/oasdiff/yaml3 v0.0.12 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/pelletier/go-toml/v2 v2.3.0 // indirect
github.com/perimeterx/marshmallow v1.1.5 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/pressly/goose/v3 v3.27.1 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.67.5 // indirect
github.com/prometheus/otlptranslator v1.0.0 // indirect
github.com/prometheus/procfs v0.20.1 // indirect
github.com/quic-go/qpack v0.6.0 // indirect
github.com/quic-go/quic-go v0.59.0 // indirect
github.com/sethvargo/go-retry v0.3.0 // indirect
github.com/shirou/gopsutil/v4 v4.26.3 // indirect
github.com/sirupsen/logrus v1.9.4 // indirect
github.com/stretchr/testify v1.11.1
github.com/tklauser/go-sysconf v0.3.16 // indirect
github.com/tklauser/numcpus v0.11.0 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.3.1 // indirect
github.com/woodsbury/decimal128 v1.3.0 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
go.mongodb.org/mongo-driver/v2 v2.5.0 // indirect
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect
go.opentelemetry.io/proto/otlp v1.10.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.yaml.in/yaml/v2 v2.4.4 // indirect
golang.org/x/arch v0.25.0 // indirect
golang.org/x/crypto v0.50.0
golang.org/x/net v0.53.0 // indirect
golang.org/x/sync v0.20.0 // indirect
golang.org/x/sys v0.43.0 // indirect
golang.org/x/text v0.36.0
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20260420184626-e10c466a9529 // indirect
google.golang.org/protobuf v1.36.11
gopkg.in/yaml.v3 v3.0.1 // indirect
)
replace galaxy/postgres => ../pkg/postgres
replace galaxy/geoip => ../pkg/geoip
replace galaxy/model => ../pkg/model
replace galaxy/cronutil => ../pkg/cronutil
replace galaxy/util => ../pkg/util
+532
View File
@@ -0,0 +1,532 @@
dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8=
dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg=
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/XSAM/otelsql v0.42.0 h1:Li0xF4eJUxG2e0x3D4rvRlys1f27yJKvjTh7ljkUP5o=
github.com/XSAM/otelsql v0.42.0/go.mod h1:4mOrEv+cS1KmKzrvTktvJnstr5GtKSAK+QHvFR9OcpI=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bytedance/gopkg v0.1.4 h1:oZnQwnX82KAIWb7033bEwtxvTqXcYMxDBaQxo5JJHWM=
github.com/bytedance/gopkg v0.1.4/go.mod h1:v1zWfPm21Fb+OsyXN2VAHdL6TBb2L88anLQgdyje6R4=
github.com/bytedance/sonic v1.15.0 h1:/PXeWFaR5ElNcVE84U0dOHjiMHQOwNIx3K4ymzh/uSE=
github.com/bytedance/sonic v1.15.0/go.mod h1:tFkWrPz0/CUCLEF4ri4UkHekCIcdnkqXw9VduqpJh0k=
github.com/bytedance/sonic/loader v0.5.1 h1:Ygpfa9zwRCCKSlrp5bBP/b/Xzc3VxsAW+5NIYXrOOpI=
github.com/bytedance/sonic/loader v0.5.1/go.mod h1:AR4NYCk5DdzZizZ5djGqQ92eEhCCcdf5x77udYiSJRo=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/disciplinedware/go-confusables v0.1.1 h1:l/JVOsdrEDHo7nvL+tQfRO1F14UyuuDm1Uvv3Nqmq9Q=
github.com/disciplinedware/go-confusables v0.1.1/go.mod h1:2hAXIAtpSqx+tMKdCzgRNv4J/kmz/oGfSHTBGJjVgfc=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/go-connections v0.7.0 h1:6SsRfJddP22WMrCkj19x9WKjEDTB+ahsdiGYf0mN39c=
github.com/docker/go-connections v0.7.0/go.mod h1:no1qkHdjq7kLMGUXYAduOhYPSJxxvgWBh7ogVvptn3Q=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU=
github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/gabriel-vasile/mimetype v1.4.13 h1:46nXokslUBsAJE/wMsp5gtO500a4F3Nkz9Ufpk2AcUM=
github.com/gabriel-vasile/mimetype v1.4.13/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
github.com/getkin/kin-openapi v0.135.0 h1:751SjYfbiwqukYuVjwYEIKNfrSwS5YpA7DZnKSwQgtg=
github.com/getkin/kin-openapi v0.135.0/go.mod h1:6dd5FJl6RdX4usBtFBaQhk9q62Yb2J0Mk5IhUO/QqFI=
github.com/gin-contrib/sse v1.1.1 h1:uGYpNwTacv5R68bSGMapo62iLTRa9l5zxGCps4hK6ko=
github.com/gin-contrib/sse v1.1.1/go.mod h1:QXzuVkA0YO7o/gun03UI1Q+FTI8ZV/n5t03kIQAI89s=
github.com/gin-gonic/gin v1.12.0 h1:b3YAbrZtnf8N//yjKeU2+MQsh2mY5htkZidOM7O0wG8=
github.com/gin-gonic/gin v1.12.0/go.mod h1:VxccKfsSllpKshkBWgVgRniFFAzFb9csfngsqANjnLc=
github.com/go-jet/jet/v2 v2.14.1 h1:wsfD9e7CGP9h46+IFNlftfncBcmVnKddikbTtapQM3M=
github.com/go-jet/jet/v2 v2.14.1/go.mod h1:dqTAECV2Mo3S2NFjbm4vJ1aDruZjhaJ1RAAR8rGUkkc=
github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=
github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.30.2 h1:JiFIMtSSHb2/XBUbWM4i/MpeQm9ZK2xqPNk8vgvu5JQ=
github.com/go-playground/validator/v10 v10.30.2/go.mod h1:mAf2pIOVXjTEBrwUMGKkCWKKPs9NheYGabeB04txQSc=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-test/deep v1.0.8 h1:TDsG77qcSprGbC6vTN8OuXp5g+J+b5Pcguhf7Zt61VM=
github.com/go-test/deep v1.0.8/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE=
github.com/goccy/go-json v0.10.6 h1:p8HrPJzOakx/mn/bQtjgNjdTcN+/S6FcG2CTtQOrHVU=
github.com/goccy/go-json v0.10.6/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM=
github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
github.com/gofrs/uuid v4.0.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c=
github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo=
github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk=
github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8=
github.com/jackc/chunkreader/v2 v2.0.1/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk=
github.com/jackc/pgconn v0.0.0-20190420214824-7e0022ef6ba3/go.mod h1:jkELnwuX+w9qN5YIfX0fl88Ehu4XC3keFuOJJk9pcnA=
github.com/jackc/pgconn v0.0.0-20190824142844-760dd75542eb/go.mod h1:lLjNuW/+OfW9/pnVKPazfWOgNfH2aPem8YQ7ilXGvJE=
github.com/jackc/pgconn v0.0.0-20190831204454-2fabfa3c18b7/go.mod h1:ZJKsE/KZfsUgOEh9hBm+xYTstcNHg7UPMVJqRfQxq4s=
github.com/jackc/pgconn v1.8.0/go.mod h1:1C2Pb36bGIP9QHGBYCjnyhqu7Rv3sGshaQUvmfGIB/o=
github.com/jackc/pgconn v1.9.0/go.mod h1:YctiPyvzfU11JFxoXokUOOKQXQmDMoJL9vJzHH8/2JY=
github.com/jackc/pgconn v1.9.1-0.20210724152538-d89c8390a530/go.mod h1:4z2w8XhRbP1hYxkpTuBjTS3ne3J48K83+u0zoyvg2pI=
github.com/jackc/pgconn v1.14.3 h1:bVoTr12EGANZz66nZPkMInAV/KHD2TxH9npjXXgiB3w=
github.com/jackc/pgconn v1.14.3/go.mod h1:RZbme4uasqzybK2RK5c65VsHxoyaml09lx3tXOcO/VM=
github.com/jackc/pgio v1.0.0 h1:g12B9UwVnzGhueNavwioyEEpAmqMe1E/BN9ES+8ovkE=
github.com/jackc/pgio v1.0.0/go.mod h1:oP+2QK2wFfUWgr+gxjoBH9KGBb31Eio69xUb0w5bYf8=
github.com/jackc/pgmock v0.0.0-20190831213851-13a1b77aafa2/go.mod h1:fGZlG77KXmcq05nJLRkk0+p82V8B8Dw8KN2/V9c/OAE=
github.com/jackc/pgmock v0.0.0-20201204152224-4fe30f7445fd/go.mod h1:hrBW0Enj2AZTNpt/7Y5rr2xe/9Mn757Wtb2xeBzPv2c=
github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65 h1:DadwsjnMwFjfWc9y5Wi/+Zz7xoE5ALHsRQlOctkOiHc=
github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65/go.mod h1:5R2h2EEX+qri8jOWMbJCtaPWkrrNc7OHwsp2TCqp7ak=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgproto3 v1.1.0/go.mod h1:eR5FA3leWg7p9aeAqi37XOTgTIbkABlvcPB3E5rlc78=
github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190420180111-c116219b62db/go.mod h1:bhq50y+xrl9n5mRYyCBFKkpRVTLYJVWeCc+mEAI3yXA=
github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190609003834-432c2951c711/go.mod h1:uH0AWtUmuShn0bcesswc4aBTWGvw0cAxIJp+6OB//Wg=
github.com/jackc/pgproto3/v2 v2.0.0-rc3/go.mod h1:ryONWYqW6dqSg1Lw6vXNMXoBJhpzvWKnT95C46ckYeM=
github.com/jackc/pgproto3/v2 v2.0.0-rc3.0.20190831210041-4c03ce451f29/go.mod h1:ryONWYqW6dqSg1Lw6vXNMXoBJhpzvWKnT95C46ckYeM=
github.com/jackc/pgproto3/v2 v2.0.6/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA=
github.com/jackc/pgproto3/v2 v2.1.1/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA=
github.com/jackc/pgproto3/v2 v2.3.3 h1:1HLSx5H+tXR9pW3in3zaztoEwQYRC9SQaYUHjTSUOag=
github.com/jackc/pgproto3/v2 v2.3.3/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA=
github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b/go.mod h1:vsD4gTJCa9TptPL8sPkXrLZ+hDuNrZCnj29CQpr4X1E=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgtype v0.0.0-20190421001408-4ed0de4755e0/go.mod h1:hdSHsc1V01CGwFsrv11mJRHWJ6aifDLfdV3aVjFF0zg=
github.com/jackc/pgtype v0.0.0-20190824184912-ab885b375b90/go.mod h1:KcahbBH1nCMSo2DXpzsoWOAfFkdEtEJpPbVLq8eE+mc=
github.com/jackc/pgtype v0.0.0-20190828014616-a8802b16cc59/go.mod h1:MWlu30kVJrUS8lot6TQqcg7mtthZ9T0EoIBFiJcmcyw=
github.com/jackc/pgtype v1.8.1-0.20210724151600-32e20a603178/go.mod h1:C516IlIV9NKqfsMCXTdChteoXmwgUceqaLfjg2e3NlM=
github.com/jackc/pgtype v1.14.0/go.mod h1:LUMuVrfsFfdKGLw+AFFVv6KtHOFMwRgDDzBt76IqCA4=
github.com/jackc/pgtype v1.14.4 h1:fKuNiCumbKTAIxQwXfB/nsrnkEI6bPJrrSiMKgbJ2j8=
github.com/jackc/pgtype v1.14.4/go.mod h1:aKeozOde08iifGosdJpz9MBZonJOUJxqNpPBcMJTlVA=
github.com/jackc/pgx/v4 v4.0.0-20190420224344-cc3461e65d96/go.mod h1:mdxmSJJuR08CZQyj1PVQBHy9XOp5p8/SHH6a0psbY9Y=
github.com/jackc/pgx/v4 v4.0.0-20190421002000-1b8f0016e912/go.mod h1:no/Y67Jkk/9WuGR0JG/JseM9irFbnEPbuWV2EELPNuM=
github.com/jackc/pgx/v4 v4.0.0-pre1.0.20190824185557-6972a5742186/go.mod h1:X+GQnOEnf1dqHGpw7JmHqHc1NxDoalibchSk9/RWuDc=
github.com/jackc/pgx/v4 v4.12.1-0.20210724153913-640aa07df17c/go.mod h1:1QD0+tgSXP7iUjYm9C1NxKhny7lq6ee99u/z+IHFcgs=
github.com/jackc/pgx/v4 v4.18.2/go.mod h1:Ey4Oru5tH5sB6tV7hDmfWFahwF15Eb7DNXlRKx2CkVw=
github.com/jackc/pgx/v4 v4.18.3 h1:dE2/TrEsGX3RBprb3qryqSV9Y60iZN1C6i8IrmW9/BA=
github.com/jackc/pgx/v4 v4.18.3/go.mod h1:Ey4Oru5tH5sB6tV7hDmfWFahwF15Eb7DNXlRKx2CkVw=
github.com/jackc/pgx/v5 v5.9.2 h1:3ZhOzMWnR4yJ+RW1XImIPsD1aNSz4T4fyP7zlQb56hw=
github.com/jackc/pgx/v5 v5.9.2/go.mod h1:mal1tBGAFfLHvZzaYh77YS/eC6IX9OWbRV1QIIM0Jn4=
github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle v1.3.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk=
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE=
github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ=
github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.1.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.10.2/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE=
github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.21 h1:xYae+lCNBP7QuW4PUnNG61ffM4hVIfm+zUzDuSzYLGs=
github.com/mattn/go-isatty v0.0.21/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4=
github.com/mdelapenya/tlscert v0.2.0 h1:7H81W6Z/4weDvZBNOfQte5GpIMo0lGYEeWbkGp5LJHI=
github.com/mdelapenya/tlscert v0.2.0/go.mod h1:O4njj3ELLnJjGdkN7M/vIVCpZ+Cf0L6muqOG4tLSl8o=
github.com/mfridman/interpolate v0.0.2 h1:pnuTK7MQIxxFz1Gr+rjSIx9u7qVjf5VOoM/u6BbAxPY=
github.com/mfridman/interpolate v0.0.2/go.mod h1:p+7uk6oE07mpE/Ik1b8EckO0O4ZXiGAfshKBWLUM9Xg=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/go-archive v0.2.0 h1:zg5QDUM2mi0JIM9fdQZWC7U8+2ZfixfTYoHL7rWUcP8=
github.com/moby/go-archive v0.2.0/go.mod h1:mNeivT14o8xU+5q1YnNrkQVpK+dnNe/K6fHqnTg4qPU=
github.com/moby/moby/api v1.54.2 h1:wiat9QAhnDQjA7wk1kh/TqHz2I1uUA7M7t9SAl/JNXg=
github.com/moby/moby/api v1.54.2/go.mod h1:+RQ6wluLwtYaTd1WnPLykIDPekkuyD/ROWQClE83pzs=
github.com/moby/moby/client v0.4.1 h1:DMQgisVoMkmMs7fp3ROSdiBnoAu8+vo3GggFl06M/wY=
github.com/moby/moby/client v0.4.1/go.mod h1:z52C9O2POPOsnxZAy//WtKcQ32P+jT/NGeXu/7nfjGQ=
github.com/moby/patternmatcher v0.6.1 h1:qlhtafmr6kgMIJjKJMDmMWq7WLkKIo23hsrpR3x084U=
github.com/moby/patternmatcher v0.6.1/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs=
github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw=
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/oasdiff/yaml v0.0.9 h1:zQOvd2UKoozsSsAknnWoDJlSK4lC0mpmjfDsfqNwX48=
github.com/oasdiff/yaml v0.0.9/go.mod h1:8lvhgJG4xiKPj3HN5lDow4jZHPlx1i7dIwzkdAo6oAM=
github.com/oasdiff/yaml3 v0.0.12 h1:75urAtPeDg2/iDEWwzNrLOWxI9N/dCh81nTTJtokt2M=
github.com/oasdiff/yaml3 v0.0.12/go.mod h1:y5+oSEHCPT/DGrS++Wc/479ERge0zTFxaF8PbGKcg2o=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
github.com/oschwald/geoip2-golang/v2 v2.1.0 h1:DjnLhNJu9WHwTrmoiQFvgmyJoczhdnm7LB23UBI2Amo=
github.com/oschwald/geoip2-golang/v2 v2.1.0/go.mod h1:qdVmcPgrTJ4q2eP9tHq/yldMTdp2VMr33uVdFbHBiBc=
github.com/oschwald/maxminddb-golang/v2 v2.1.1 h1:lA8FH0oOrM4u7mLvowq8IT6a3Q/qEnqRzLQn9eH5ojc=
github.com/oschwald/maxminddb-golang/v2 v2.1.1/go.mod h1:PLdx6PR+siSIoXqqy7C7r3SB3KZnhxWr1Dp6g0Hacl8=
github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM=
github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
github.com/perimeterx/marshmallow v1.1.5 h1:a2LALqQ1BlHM8PZblsDdidgv1mWi1DgC2UmX50IvK2s=
github.com/perimeterx/marshmallow v1.1.5/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0Vchtcl8we9tYaXw=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU=
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/pressly/goose/v3 v3.27.1 h1:6uEvcprBybDmW4hcz3gYujhARhye+GoWKhEWyzD5sh4=
github.com/pressly/goose/v3 v3.27.1/go.mod h1:maruOxsPnIG2yHHyo8UqKWXYKFcH7Q76csUV7+7KYoM=
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4=
github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw=
github.com/prometheus/otlptranslator v1.0.0 h1:s0LJW/iN9dkIH+EnhiD3BlkkP5QVIUVEoIwkU+A6qos=
github.com/prometheus/otlptranslator v1.0.0/go.mod h1:vRYWnXvI6aWGpsdY/mOT/cbeVRBlPWtBNDb7kGR3uKM=
github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc=
github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo=
github.com/quic-go/qpack v0.6.0 h1:g7W+BMYynC1LbYLSqRt8PBg5Tgwxn214ZZR34VIOjz8=
github.com/quic-go/qpack v0.6.0/go.mod h1:lUpLKChi8njB4ty2bFLX2x4gzDqXwUpaO1DP9qMDZII=
github.com/quic-go/quic-go v0.59.0 h1:OLJkp1Mlm/aS7dpKgTc6cnpynnD2Xg7C1pwL6vy/SAw=
github.com/quic-go/quic-go v0.59.0/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU=
github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/sethvargo/go-retry v0.3.0 h1:EEt31A35QhrcRZtrYFDTBg91cqZVnFL2navjDrah2SE=
github.com/sethvargo/go-retry v0.3.0/go.mod h1:mNX17F0C/HguQMyMyJxcnU471gOZGxCLyYaFyAZraas=
github.com/shirou/gopsutil/v4 v4.26.3 h1:2ESdQt90yU3oXF/CdOlRCJxrP+Am1aBYubTMTfxJ1qc=
github.com/shirou/gopsutil/v4 v4.26.3/go.mod h1:LZ6ewCSkBqUpvSOf+LsTGnRinC6iaNUNMGBtDkJBaLQ=
github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4=
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w=
github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/objx v0.5.3 h1:jmXUvGomnU1o3W/V5h2VEradbpJDwGrzugQQvL0POH4=
github.com/stretchr/objx v0.5.3/go.mod h1:rDQraq+vQZU7Fde9LOZLr8Tax6zZvy4kuNKF+QYS+U0=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/testcontainers/testcontainers-go v0.42.0 h1:He3IhTzTZOygSXLJPMX7n44XtK+qhjat1nI9cneBbUY=
github.com/testcontainers/testcontainers-go v0.42.0/go.mod h1:vZjdY1YmUA1qEForxOIOazfsrdyORJAbhi0bp8plN30=
github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0 h1:GCbb1ndrF7OTDiIvxXyItaDab4qkzTFJ48LKFdM7EIo=
github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0/go.mod h1:IRPBaI8jXdrNfD0e4Zm7Fbcgaz5shKxOQv4axiL09xs=
github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA=
github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI=
github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw=
github.com/tklauser/numcpus v0.11.0/go.mod h1:z+LwcLq54uWZTX0u/bGobaV34u6V7KNlTZejzM6/3MQ=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.3.1 h1:waO7eEiFDwidsBN6agj1vJQ4AG7lh2yqXyOXqhgQuyY=
github.com/ugorji/go/codec v1.3.1/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4=
github.com/wneessen/go-mail v0.7.2 h1:xxPnhZ6IZLSgxShebmZ6DPKh1b6OJcoHfzy7UjOkzS8=
github.com/wneessen/go-mail v0.7.2/go.mod h1:+TkW6QP3EVkgTEqHtVmnAE/1MRhmzb8Y9/W3pweuS+k=
github.com/woodsbury/decimal128 v1.3.0 h1:8pffMNWIlC0O5vbyHWFZAt5yWvWcrHA+3ovIIjVWss0=
github.com/woodsbury/decimal128 v1.3.0/go.mod h1:C5UTmyTjW3JftjUFzOVhC20BEQa2a4ZKOB5I6Zjb+ds=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q=
go.mongodb.org/mongo-driver/v2 v2.5.0 h1:yXUhImUjjAInNcpTcAlPHiT7bIXhshCTL3jVBkF3xaE=
go.mongodb.org/mongo-driver/v2 v2.5.0/go.mod h1:yOI9kBsufol30iFsl1slpdq1I0eHPzybRWdyYUs8K/0=
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin v0.68.0 h1:5FXSL2s6afUC1bzNzl1iedZZ8yqR7GOhbCoEXtyeK6Q=
go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin v0.68.0/go.mod h1:MdHW7tLtkeGJnR4TyOrnd5D0zUGZQB1l84uHCe8hRpE=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0 h1:yI1/OhfEPy7J9eoa6Sj051C7n5dvpj0QX8g4sRchg04=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0/go.mod h1:NoUCKYWK+3ecatC4HjkRktREheMeEtrXoQxrqYFeHSc=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 h1:CqXxU8VOmDefoh0+ztfGaymYbhdB/tT3zs79QaZTNGY=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0/go.mod h1:BuhAPThV8PBHBvg8ZzZ/Ok3idOdhWIodywz2xEcRbJo=
go.opentelemetry.io/contrib/propagators/b3 v1.43.0 h1:CETqV3QLLPTy5yNrqyMr41VnAOOD4lsRved7n4QG00A=
go.opentelemetry.io/contrib/propagators/b3 v1.43.0/go.mod h1:Q4mCiCdziYzpNR0g+6UqVotAlCDZdzz6L8jwY4knOrw=
go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 h1:8UQVDcZxOJLtX6gxtDt3vY2WTgvZqMQRzjsqiIHQdkc=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0/go.mod h1:2lmweYCiHYpEjQ/lSJBYhj9jP1zvCvQW4BqL9dnT7FQ=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0 h1:w1K+pCJoPpQifuVpsKamUdn9U0zM3xUziVOqsGksUrY=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0/go.mod h1:HBy4BjzgVE8139ieRI75oXm3EcDN+6GhD88JT1Kjvxg=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 h1:RAE+JPfvEmvy+0LzyUA25/SGawPwIUbZ6u0Wug54sLc=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0/go.mod h1:AGmbycVGEsRx9mXMZ75CsOyhSP6MFIcj/6dnG+vhVjk=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 h1:3iZJKlCZufyRzPzlQhUIWVmfltrXuGyfjREgGP3UUjc=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0/go.mod h1:/G+nUPfhq2e+qiXMGxMwumDrP5jtzU+mWN7/sjT2rak=
go.opentelemetry.io/otel/exporters/prometheus v0.65.0 h1:jOveH/b4lU9HT7y+Gfamf18BqlOuz2PWEvs8yM7Q6XE=
go.opentelemetry.io/otel/exporters/prometheus v0.65.0/go.mod h1:i1P8pcumauPtUI4YNopea1dhzEMuEqWP1xoUZDylLHo=
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.43.0 h1:TC+BewnDpeiAmcscXbGMfxkO+mwYUwE/VySwvw88PfA=
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.43.0/go.mod h1:J/ZyF4vfPwsSr9xJSPyQ4LqtcTPULFR64KwTikGLe+A=
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0 h1:mS47AX77OtFfKG4vtp+84kuGSFZHTyxtXIN269vChY0=
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0/go.mod h1:PJnsC41lAGncJlPUniSwM81gc80GkgWJWr3cu2nKEtU=
go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM=
go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY=
go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg=
go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg=
go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw=
go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A=
go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A=
go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g=
go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk=
go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y=
go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4=
go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA=
go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM=
go.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc=
go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
go.yaml.in/yaml/v2 v2.4.4 h1:tuyd0P+2Ont/d6e2rl3be67goVK4R6deVxCUX5vyPaQ=
go.yaml.in/yaml/v2 v2.4.4/go.mod h1:gMZqIpDtDqOfM0uNfy0SkpRhvUryYH0Z6wdMYcacYXQ=
golang.org/x/arch v0.25.0 h1:qnk6Ksugpi5Bz32947rkUgDt9/s5qvqDPl/gBKdMJLE=
golang.org/x/arch v0.25.0/go.mod h1:0X+GdSIP+kL5wPmpK7sdkEVTt2XoYP0cSjQSbZBwOi8=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20201203163018-be400aefbc4c/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.20.0/go.mod h1:Xwo95rrVNIoSMx9wa1JroENMToLWn3RNVrTBpLHgZPQ=
golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA=
golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
golang.org/x/term v0.42.0 h1:UiKe+zDFmJobeJ5ggPwOshJIVt6/Ft0rcfrXZDLWAWY=
golang.org/x/term v0.42.0/go.mod h1:Dq/D+snpsbazcBG5+F9Q1n2rXV8Ma+71xEjTRufARgY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190823170909-c4a336ef6a2f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4=
gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E=
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA=
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260420184626-e10c466a9529 h1:XF8+t6QQiS0o9ArVan/HW8Q7cycNPGsJf6GA2nXxYAg=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260420184626-e10c466a9529/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8=
google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM=
google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/inconshreveable/log15.v2 v2.0.0-20180818164646-67afb5ed74ec/go.mod h1:aPpfJ7XW+gOuirDoZ8gHhLh3kZ1B08FtV2bbmy7Jv3s=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q=
gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
modernc.org/libc v1.72.1 h1:db1xwJ6u1kE3KHTFTTbe2GCrczHPKzlURP0aDC4NGD0=
modernc.org/libc v1.72.1/go.mod h1:HRMiC/PhPGLIPM7GzAFCbI+oSgE3dhZ8FWftmRrHVlY=
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
modernc.org/sqlite v1.49.1 h1:dYGHTKcX1sJ+EQDnUzvz4TJ5GbuvhNJa8Fg6ElGx73U=
modernc.org/sqlite v1.49.1/go.mod h1:m0w8xhwYUVY3H6pSDwc3gkJ/irZT/0YEXwBlhaxQEew=
pgregory.net/rapid v1.2.0 h1:keKAYRcjm+e1F0oAuU5F5+YPAWcyxNNRK2wud503Gnk=
pgregory.net/rapid v1.2.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04=
+236
View File
@@ -0,0 +1,236 @@
// Package admin owns the platform's administrator records inside the
// `backend.admin_accounts` table together with the Basic Auth verifier
// consumed by `backend/internal/server/middleware/basicauth`.
//
// The package introduces the package on top of the The implementation user surface.
// The previous placeholder verifier
// (`basicauth.StaticVerifier`) is retired from production wiring; the
// admin-account CRUD endpoints under `/api/v1/admin/admin-accounts/*`
// flip from 501 placeholders to real implementations backed by
// `*admin.Service`.
//
// The package is intentionally narrow: it owns its own table, exposes
// a Verifier-shaped surface, and ships an idempotent env-driven
// bootstrap so a fresh deploy can authenticate the first operator
// without manual SQL. Cross-domain admin handlers (users, games,
// runtime, mail, notification, geo) live in their respective module
// packages; this package only owns the credential gate.
package admin
import (
"context"
"errors"
"fmt"
"strings"
"time"
"github.com/jackc/pgx/v5/pgconn"
"go.uber.org/zap"
"golang.org/x/crypto/bcrypt"
)
// bootstrapBcryptCost is the cost factor used for every admin password
// hash. It matches `ARCHITECTURE.md` §14 and `backend/README.md` §12.
//
// The Stage-5.1 auth code uses `bcrypt.DefaultCost` (10) for one-time
// login codes; admin passwords stay separate at cost 12 so the
// stronger hashing covers reused secrets.
const bootstrapBcryptCost = 12
// pgErrCodeUniqueViolation is the SQLSTATE value emitted by Postgres
// when a UNIQUE constraint is violated. The pgx driver surfaces the
// value on `*pgconn.PgError`. The constant is duplicated from
// `internal/user/user.go` so the two packages stay decoupled.
const pgErrCodeUniqueViolation = "23505"
// Admin is the read-side aggregate served to handlers and the
// in-memory cache. It mirrors the OpenAPI `AdminAccount` schema; the
// password hash is intentionally absent so handlers cannot accidentally
// surface it.
type Admin struct {
Username string
CreatedAt time.Time
LastUsedAt *time.Time
DisabledAt *time.Time
}
// Deps aggregates every collaborator the Service depends on.
// Constructing the Service through Deps (rather than positional args)
// keeps wiring patches small when new dependencies are added.
type Deps struct {
// Store must be non-nil. It owns every Postgres query against
// `backend.admin_accounts`.
Store *Store
// Cache must be non-nil. The Verifier consults it on the request
// path; mutation methods write through after a successful commit.
Cache *Cache
// Logger is named under "admin" by NewService. Nil falls back to
// zap.NewNop.
Logger *zap.Logger
// Now overrides time.Now for deterministic tests. A nil Now defaults
// to time.Now in NewService.
Now func() time.Time
}
// Service is the admin-domain entry point. Concurrency safety is
// delegated to Postgres for persisted state and to the embedded Cache
// for the in-memory projection.
type Service struct {
deps Deps
}
// NewService constructs a Service from deps. A nil Now defaults to
// time.Now; a nil Logger defaults to zap.NewNop. Store and Cache must
// be non-nil — calling Service methods with nil values will panic at
// first use, matching how main.go signals missing wiring.
func NewService(deps Deps) *Service {
if deps.Now == nil {
deps.Now = time.Now
}
if deps.Logger == nil {
deps.Logger = zap.NewNop()
}
deps.Logger = deps.Logger.Named("admin")
return &Service{deps: deps}
}
// CreateInput is the parameter struct for Service.Create.
type CreateInput struct {
Username string
Password string
}
// Validate normalises the request and rejects empty fields.
func (in *CreateInput) Validate() error {
in.Username = strings.TrimSpace(in.Username)
if in.Username == "" {
return fmt.Errorf("%w: username must not be empty", ErrInvalidInput)
}
if in.Password == "" {
return fmt.Errorf("%w: password must not be empty", ErrInvalidInput)
}
return nil
}
// List returns every admin row ordered by username ASC.
func (s *Service) List(ctx context.Context) ([]Admin, error) {
rows, _, err := s.deps.Store.ListAll(ctx)
if err != nil {
return nil, fmt.Errorf("admin list: %w", err)
}
return rows, nil
}
// Get returns the admin aggregate for username. Returns ErrNotFound
// when no row matches.
func (s *Service) Get(ctx context.Context, username string) (Admin, error) {
username = strings.TrimSpace(username)
if username == "" {
return Admin{}, ErrNotFound
}
admin, _, err := s.deps.Store.Lookup(ctx, username)
if err != nil {
return Admin{}, err
}
return admin, nil
}
// Create persists a fresh admin row with the bcrypt-hashed password,
// refreshes the in-memory cache, and returns the persisted aggregate.
// Returns ErrUsernameTaken when the username already exists.
func (s *Service) Create(ctx context.Context, in CreateInput) (Admin, error) {
if err := (&in).Validate(); err != nil {
return Admin{}, err
}
hash, err := bcrypt.GenerateFromPassword([]byte(in.Password), bootstrapBcryptCost)
if err != nil {
return Admin{}, fmt.Errorf("admin create: hash password: %w", err)
}
admin, err := s.deps.Store.Insert(ctx, in.Username, hash)
if err != nil {
if errors.Is(err, ErrUsernameTaken) {
return Admin{}, err
}
return Admin{}, fmt.Errorf("admin create: %w", err)
}
s.deps.Cache.Put(admin, hash)
return admin, nil
}
// Disable sets `disabled_at = now()` when the account is currently
// enabled. The operation is idempotent: when the account is already
// disabled the existing row is returned unchanged. Returns ErrNotFound
// when no row matches.
func (s *Service) Disable(ctx context.Context, username string) (Admin, error) {
username = strings.TrimSpace(username)
if username == "" {
return Admin{}, ErrNotFound
}
now := s.deps.Now().UTC()
admin, hash, err := s.deps.Store.SetDisabledAt(ctx, username, &now)
if err != nil {
return Admin{}, fmt.Errorf("admin disable: %w", err)
}
s.deps.Cache.Put(admin, hash)
return admin, nil
}
// Enable clears `disabled_at` when the account is currently disabled.
// The operation is idempotent: when the account is already enabled the
// existing row is returned unchanged. Returns ErrNotFound when no row
// matches.
func (s *Service) Enable(ctx context.Context, username string) (Admin, error) {
username = strings.TrimSpace(username)
if username == "" {
return Admin{}, ErrNotFound
}
admin, hash, err := s.deps.Store.SetDisabledAt(ctx, username, nil)
if err != nil {
return Admin{}, fmt.Errorf("admin enable: %w", err)
}
s.deps.Cache.Put(admin, hash)
return admin, nil
}
// ResetPassword bcrypt-hashes newPassword and replaces the stored
// password_hash. The new password itself is not returned per the
// OpenAPI contract ("delivered out-of-band").
func (s *Service) ResetPassword(ctx context.Context, username, newPassword string) (Admin, error) {
username = strings.TrimSpace(username)
if username == "" {
return Admin{}, ErrNotFound
}
if newPassword == "" {
return Admin{}, fmt.Errorf("%w: password must not be empty", ErrInvalidInput)
}
hash, err := bcrypt.GenerateFromPassword([]byte(newPassword), bootstrapBcryptCost)
if err != nil {
return Admin{}, fmt.Errorf("admin reset password: hash: %w", err)
}
admin, err := s.deps.Store.UpdatePasswordHash(ctx, username, hash)
if err != nil {
return Admin{}, fmt.Errorf("admin reset password: %w", err)
}
s.deps.Cache.Put(admin, hash)
return admin, nil
}
// isUniqueViolation reports whether err is a Postgres UNIQUE
// constraint violation. constraintName may be empty to match any
// UNIQUE violation.
func isUniqueViolation(err error, constraintName string) bool {
var pgErr *pgconn.PgError
if !errors.As(err, &pgErr) {
return false
}
if pgErr.Code != pgErrCodeUniqueViolation {
return false
}
if constraintName == "" {
return true
}
return pgErr.ConstraintName == constraintName
}
+398
View File
@@ -0,0 +1,398 @@
package admin_test
import (
"context"
"database/sql"
"errors"
"net/url"
"testing"
"time"
"galaxy/backend/internal/admin"
"galaxy/backend/internal/config"
backendpg "galaxy/backend/internal/postgres"
pgshared "galaxy/postgres"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
"go.uber.org/zap"
"golang.org/x/crypto/bcrypt"
)
const (
pgImage = "postgres:16-alpine"
pgUser = "galaxy"
pgPassword = "galaxy"
pgDatabase = "galaxy_backend"
pgSchema = "backend"
pgStartup = 90 * time.Second
pgOpTO = 10 * time.Second
)
// startPostgres spins up a Postgres testcontainer with the backend
// migrations applied. The returned *sql.DB is closed and the container
// terminated by t.Cleanup hooks. Tests skip cleanly when Docker is
// unavailable.
func startPostgres(t *testing.T) *sql.DB {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
t.Cleanup(cancel)
pgContainer, err := tcpostgres.Run(ctx, pgImage,
tcpostgres.WithDatabase(pgDatabase),
tcpostgres.WithUsername(pgUser),
tcpostgres.WithPassword(pgPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(pgStartup),
),
)
if err != nil {
t.Skipf("postgres testcontainer unavailable, skipping: %v", err)
}
t.Cleanup(func() {
if termErr := testcontainers.TerminateContainer(pgContainer); termErr != nil {
t.Errorf("terminate postgres container: %v", termErr)
}
})
baseDSN, err := pgContainer.ConnectionString(ctx, "sslmode=disable")
if err != nil {
t.Fatalf("connection string: %v", err)
}
scopedDSN, err := dsnWithSearchPath(baseDSN, pgSchema)
if err != nil {
t.Fatalf("scope dsn: %v", err)
}
cfg := pgshared.DefaultConfig()
cfg.PrimaryDSN = scopedDSN
cfg.OperationTimeout = pgOpTO
db, err := pgshared.OpenPrimary(ctx, cfg)
if err != nil {
t.Fatalf("open primary: %v", err)
}
t.Cleanup(func() { _ = db.Close() })
if err := pgshared.Ping(ctx, db, cfg.OperationTimeout); err != nil {
t.Fatalf("ping: %v", err)
}
if err := backendpg.ApplyMigrations(ctx, db); err != nil {
t.Fatalf("apply migrations: %v", err)
}
return db
}
func dsnWithSearchPath(baseDSN, schema string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := parsed.Query()
values.Set("search_path", schema)
if values.Get("sslmode") == "" {
values.Set("sslmode", "disable")
}
parsed.RawQuery = values.Encode()
return parsed.String(), nil
}
func buildService(t *testing.T, db *sql.DB) (*admin.Service, *admin.Store, *admin.Cache) {
t.Helper()
store := admin.NewStore(db)
cache := admin.NewCache()
if err := cache.Warm(context.Background(), store); err != nil {
t.Fatalf("warm admin cache: %v", err)
}
svc := admin.NewService(admin.Deps{
Store: store,
Cache: cache,
Logger: zap.NewNop(),
})
return svc, store, cache
}
func TestBootstrapInsertsThenSkips(t *testing.T) {
t.Parallel()
db := startPostgres(t)
store := admin.NewStore(db)
cfg := config.AdminBootstrapConfig{User: "root", Password: "root-secret"}
logger := zap.NewNop()
if err := admin.Bootstrap(context.Background(), store, cfg, logger); err != nil {
t.Fatalf("first bootstrap: %v", err)
}
first, hash, err := store.Lookup(context.Background(), "root")
if err != nil {
t.Fatalf("lookup after first bootstrap: %v", err)
}
if first.Username != "root" {
t.Fatalf("Username = %q, want root", first.Username)
}
if err := bcrypt.CompareHashAndPassword(hash, []byte("root-secret")); err != nil {
t.Fatalf("CompareHashAndPassword: %v", err)
}
// Second call must not modify the row even when the password value
// supplied via env vars differs.
cfg.Password = "different"
if err := admin.Bootstrap(context.Background(), store, cfg, logger); err != nil {
t.Fatalf("second bootstrap: %v", err)
}
_, sameHash, err := store.Lookup(context.Background(), "root")
if err != nil {
t.Fatalf("lookup after second bootstrap: %v", err)
}
if string(hash) != string(sameHash) {
t.Fatalf("password_hash mutated by idempotent bootstrap")
}
}
func TestBootstrapSkipsWhenUserEmpty(t *testing.T) {
t.Parallel()
db := startPostgres(t)
store := admin.NewStore(db)
if err := admin.Bootstrap(context.Background(), store, config.AdminBootstrapConfig{}, zap.NewNop()); err != nil {
t.Fatalf("bootstrap: %v", err)
}
admins, _, err := store.ListAll(context.Background())
if err != nil {
t.Fatalf("list: %v", err)
}
if len(admins) != 0 {
t.Fatalf("ListAll = %d rows, want 0", len(admins))
}
}
func TestVerifyHappyPath(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
created, err := svc.Create(context.Background(), admin.CreateInput{
Username: "alice",
Password: "alice-secret",
})
if err != nil {
t.Fatalf("create: %v", err)
}
if created.Username != "alice" {
t.Fatalf("Username = %q, want alice", created.Username)
}
ok, err := svc.Verify(context.Background(), "alice", "alice-secret")
if err != nil || !ok {
t.Fatalf("Verify(correct) = (%v, %v), want (true, nil)", ok, err)
}
}
func TestVerifyRejectsWrongPassword(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
_, err := svc.Create(context.Background(), admin.CreateInput{Username: "alice", Password: "good"})
if err != nil {
t.Fatalf("create: %v", err)
}
ok, err := svc.Verify(context.Background(), "alice", "bad")
if err != nil {
t.Fatalf("Verify returned error: %v", err)
}
if ok {
t.Fatalf("Verify(wrong) = true, want false")
}
}
func TestVerifyRejectsUnknownUser(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
ok, err := svc.Verify(context.Background(), "ghost", "x")
if err != nil || ok {
t.Fatalf("Verify(ghost) = (%v, %v), want (false, nil)", ok, err)
}
}
func TestVerifyRejectsDisabledAccount(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
if _, err := svc.Create(context.Background(), admin.CreateInput{Username: "alice", Password: "good"}); err != nil {
t.Fatalf("create: %v", err)
}
if _, err := svc.Disable(context.Background(), "alice"); err != nil {
t.Fatalf("disable: %v", err)
}
ok, err := svc.Verify(context.Background(), "alice", "good")
if err != nil || ok {
t.Fatalf("Verify(disabled) = (%v, %v), want (false, nil)", ok, err)
}
}
func TestEnableReversesDisable(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
if _, err := svc.Create(context.Background(), admin.CreateInput{Username: "alice", Password: "good"}); err != nil {
t.Fatalf("create: %v", err)
}
if _, err := svc.Disable(context.Background(), "alice"); err != nil {
t.Fatalf("disable: %v", err)
}
got, err := svc.Enable(context.Background(), "alice")
if err != nil {
t.Fatalf("enable: %v", err)
}
if got.DisabledAt != nil {
t.Fatalf("DisabledAt = %v, want nil after enable", got.DisabledAt)
}
ok, err := svc.Verify(context.Background(), "alice", "good")
if err != nil || !ok {
t.Fatalf("Verify after enable = (%v, %v), want (true, nil)", ok, err)
}
}
func TestCreateRejectsDuplicateUsername(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
if _, err := svc.Create(context.Background(), admin.CreateInput{Username: "alice", Password: "x"}); err != nil {
t.Fatalf("create #1: %v", err)
}
if _, err := svc.Create(context.Background(), admin.CreateInput{Username: "alice", Password: "y"}); !errors.Is(err, admin.ErrUsernameTaken) {
t.Fatalf("Create #2 err = %v, want ErrUsernameTaken", err)
}
}
func TestCreateRejectsEmptyFields(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
if _, err := svc.Create(context.Background(), admin.CreateInput{Username: "", Password: "x"}); !errors.Is(err, admin.ErrInvalidInput) {
t.Fatalf("Create(empty username) err = %v, want ErrInvalidInput", err)
}
if _, err := svc.Create(context.Background(), admin.CreateInput{Username: "alice", Password: ""}); !errors.Is(err, admin.ErrInvalidInput) {
t.Fatalf("Create(empty password) err = %v, want ErrInvalidInput", err)
}
}
func TestResetPasswordReplacesHash(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
if _, err := svc.Create(context.Background(), admin.CreateInput{Username: "alice", Password: "old"}); err != nil {
t.Fatalf("create: %v", err)
}
if _, err := svc.ResetPassword(context.Background(), "alice", "new-secret"); err != nil {
t.Fatalf("reset: %v", err)
}
if ok, _ := svc.Verify(context.Background(), "alice", "old"); ok {
t.Fatalf("Verify(old) = true after reset")
}
if ok, err := svc.Verify(context.Background(), "alice", "new-secret"); err != nil || !ok {
t.Fatalf("Verify(new) = (%v, %v), want (true, nil)", ok, err)
}
}
func TestResetPasswordOnUnknownUser(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
if _, err := svc.ResetPassword(context.Background(), "ghost", "x"); !errors.Is(err, admin.ErrNotFound) {
t.Fatalf("ResetPassword(ghost) err = %v, want ErrNotFound", err)
}
}
func TestListReturnsAllRows(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
for _, u := range []string{"alice", "bob", "carol"} {
if _, err := svc.Create(context.Background(), admin.CreateInput{Username: u, Password: "x"}); err != nil {
t.Fatalf("create %s: %v", u, err)
}
}
got, err := svc.List(context.Background())
if err != nil {
t.Fatalf("list: %v", err)
}
if len(got) != 3 {
t.Fatalf("List = %d rows, want 3", len(got))
}
// Order is by username ASC at the SQL level.
if got[0].Username != "alice" || got[1].Username != "bob" || got[2].Username != "carol" {
t.Fatalf("List order = %v, want [alice bob carol]", []string{got[0].Username, got[1].Username, got[2].Username})
}
}
func TestVerifyTouchesLastUsedAt(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, store, _ := buildService(t, db)
if _, err := svc.Create(context.Background(), admin.CreateInput{Username: "alice", Password: "good"}); err != nil {
t.Fatalf("create: %v", err)
}
if ok, err := svc.Verify(context.Background(), "alice", "good"); err != nil || !ok {
t.Fatalf("Verify: (%v, %v)", ok, err)
}
// last_used_at is updated by a fire-and-forget goroutine. Poll until
// it lands or the deadline passes.
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
got, _, err := store.Lookup(context.Background(), "alice")
if err != nil {
t.Fatalf("lookup: %v", err)
}
if got.LastUsedAt != nil {
return
}
time.Sleep(20 * time.Millisecond)
}
t.Fatalf("LastUsedAt not populated after Verify")
}
func TestDisableIsIdempotent(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
if _, err := svc.Create(context.Background(), admin.CreateInput{Username: "alice", Password: "x"}); err != nil {
t.Fatalf("create: %v", err)
}
first, err := svc.Disable(context.Background(), "alice")
if err != nil {
t.Fatalf("disable #1: %v", err)
}
if first.DisabledAt == nil {
t.Fatalf("DisabledAt = nil after disable")
}
second, err := svc.Disable(context.Background(), "alice")
if err != nil {
t.Fatalf("disable #2: %v", err)
}
if second.DisabledAt == nil {
t.Fatalf("DisabledAt = nil on second disable")
}
}
func TestDisableUnknownUser(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc, _, _ := buildService(t, db)
if _, err := svc.Disable(context.Background(), "ghost"); !errors.Is(err, admin.ErrNotFound) {
t.Fatalf("Disable(ghost) err = %v, want ErrNotFound", err)
}
}
+56
View File
@@ -0,0 +1,56 @@
package admin
import (
"context"
"fmt"
"galaxy/backend/internal/config"
"go.uber.org/zap"
"golang.org/x/crypto/bcrypt"
)
// Bootstrap inserts the seed admin row when the env-driven
// `BACKEND_ADMIN_BOOTSTRAP_USER` / `BACKEND_ADMIN_BOOTSTRAP_PASSWORD`
// values are supplied and no row with that username exists yet. The
// insert is idempotent across restarts so operators can leave the env
// vars set after the first deploy without re-creating the row on
// every boot.
//
// Bootstrap runs *before* `Cache.Warm` so the warm read picks up the
// seed row. Errors are returned to the caller; the boot path in
// `cmd/backend/main.go` aborts startup if Bootstrap fails (a missing
// admin would lock the surface out anyway, so failing fast is the
// safer default).
//
// When both env vars are empty the function logs "skipped" and
// returns nil. `config.Validate()` already enforces that the username
// and password are set together, so by the time Bootstrap runs the
// remaining "user set without password" combination is impossible.
func Bootstrap(ctx context.Context, store *Store, cfg config.AdminBootstrapConfig, logger *zap.Logger) error {
if logger == nil {
logger = zap.NewNop()
}
logger = logger.Named("admin.bootstrap")
if cfg.User == "" {
logger.Info("skipped (no env vars)")
return nil
}
hash, err := bcrypt.GenerateFromPassword([]byte(cfg.Password), bootstrapBcryptCost)
if err != nil {
return fmt.Errorf("admin bootstrap: hash password: %w", err)
}
inserted, err := store.BootstrapInsert(ctx, cfg.User, hash)
if err != nil {
return fmt.Errorf("admin bootstrap: %w", err)
}
if inserted {
logger.Info("inserted seed admin", zap.String("admin_username", cfg.User))
} else {
logger.Info("skipped (admin exists)", zap.String("admin_username", cfg.User))
}
return nil
}
+128
View File
@@ -0,0 +1,128 @@
package admin
import (
"context"
"fmt"
"sync"
"sync/atomic"
)
// cacheEntry pairs the admin aggregate with its bcrypt hash. The
// hash is private to the admin package: handlers receive only the
// Admin shape, and Verify consumes the hash directly off the cache.
type cacheEntry struct {
admin Admin
passwordHash []byte
}
// Cache is the in-memory write-through projection of the rows in
// `backend.admin_accounts`. Reads (Get) are RLocked; writes (Put,
// Remove) are Locked.
//
// The cache mirrors the `auth.Cache` and `user.Cache` idioms: callers
// commit to Postgres first, then update the cache. A commit failure
// leaves the cache untouched, matching the previous DB state.
type Cache struct {
mu sync.RWMutex
byName map[string]cacheEntry
ready atomic.Bool
}
// NewCache constructs an empty Cache. The cache reports Ready() ==
// false until Warm completes successfully.
func NewCache() *Cache {
return &Cache{
byName: make(map[string]cacheEntry),
}
}
// Warm replaces the cache contents with every row loaded from store.
// It is intended to be called exactly once at process boot before the
// HTTP listener accepts traffic; successful completion flips Ready to
// true. Subsequent calls re-warm the cache (useful in tests).
func (c *Cache) Warm(ctx context.Context, store *Store) error {
if c == nil {
return nil
}
admins, hashes, err := store.ListAll(ctx)
if err != nil {
return fmt.Errorf("admin cache warm: %w", err)
}
c.mu.Lock()
defer c.mu.Unlock()
c.byName = make(map[string]cacheEntry, len(admins))
for i, a := range admins {
c.byName[a.Username] = cacheEntry{
admin: a,
passwordHash: hashes[i],
}
}
c.ready.Store(true)
return nil
}
// Ready reports whether Warm has completed at least once. The HTTP
// readiness probe wires through this method together with the auth
// and user caches so `/readyz` only flips to 200 after every cache is
// hydrated.
func (c *Cache) Ready() bool {
if c == nil {
return false
}
return c.ready.Load()
}
// Size returns the number of cached admin accounts. Useful for the
// startup log line and tests.
func (c *Cache) Size() int {
if c == nil {
return 0
}
c.mu.RLock()
defer c.mu.RUnlock()
return len(c.byName)
}
// Get returns the cached entry for username and a presence flag.
// Misses always return the zero entry and false.
func (c *Cache) Get(username string) (Admin, []byte, bool) {
if c == nil {
return Admin{}, nil, false
}
c.mu.RLock()
defer c.mu.RUnlock()
entry, ok := c.byName[username]
if !ok {
return Admin{}, nil, false
}
return entry.admin, entry.passwordHash, true
}
// Put stores admin and its bcrypt hash in the cache. It is safe to
// call on an existing entry — the value is overwritten with the
// latest snapshot. The slice is stored by reference; callers must not
// mutate it after handing it to Put.
func (c *Cache) Put(admin Admin, passwordHash []byte) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
c.byName[admin.Username] = cacheEntry{
admin: admin,
passwordHash: passwordHash,
}
}
// Remove evicts the entry for username. Calling Remove on a missing
// entry is a no-op. The current implementation ships no Delete operation; the helper
// exists for symmetry with `auth.Cache` / `user.Cache` and for any
// future hard-delete flow.
func (c *Cache) Remove(username string) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
delete(c.byName, username)
}
+98
View File
@@ -0,0 +1,98 @@
package admin_test
import (
"context"
"database/sql"
"testing"
"time"
"galaxy/backend/internal/admin"
_ "github.com/jackc/pgx/v5/stdlib"
)
func TestCacheGetReturnsFalseUntilPut(t *testing.T) {
t.Parallel()
cache := admin.NewCache()
if _, _, ok := cache.Get("alice"); ok {
t.Fatalf("Get on empty cache returned ok=true")
}
}
func TestCacheReadyFlipsAfterWarm(t *testing.T) {
t.Parallel()
cache := admin.NewCache()
if cache.Ready() {
t.Fatalf("Ready() = true before Warm")
}
store := admin.NewStore(stubDB(t))
if err := cache.Warm(context.Background(), store); err == nil {
t.Fatalf("Warm against an empty stub DB unexpectedly succeeded")
}
if cache.Ready() {
t.Fatalf("Ready() flipped after a failed Warm")
}
}
func TestCachePutIsVisibleToReader(t *testing.T) {
t.Parallel()
cache := admin.NewCache()
now := time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC)
cache.Put(admin.Admin{
Username: "alice",
CreatedAt: now,
}, []byte("hash-bytes"))
got, hash, ok := cache.Get("alice")
if !ok {
t.Fatalf("Get after Put returned ok=false")
}
if got.Username != "alice" {
t.Fatalf("Get returned username %q, want alice", got.Username)
}
if string(hash) != "hash-bytes" {
t.Fatalf("Get returned hash %q, want hash-bytes", string(hash))
}
if cache.Size() != 1 {
t.Fatalf("Size = %d, want 1", cache.Size())
}
}
func TestCachePutOverwrites(t *testing.T) {
t.Parallel()
cache := admin.NewCache()
cache.Put(admin.Admin{Username: "alice"}, []byte("old"))
cache.Put(admin.Admin{Username: "alice"}, []byte("new"))
_, hash, ok := cache.Get("alice")
if !ok || string(hash) != "new" {
t.Fatalf("Get after overwrite returned ok=%v hash=%q, want ok=true hash=new", ok, string(hash))
}
if cache.Size() != 1 {
t.Fatalf("Size after overwrite = %d, want 1", cache.Size())
}
}
func TestCacheRemove(t *testing.T) {
t.Parallel()
cache := admin.NewCache()
cache.Put(admin.Admin{Username: "alice"}, []byte("hash"))
cache.Remove("alice")
if _, _, ok := cache.Get("alice"); ok {
t.Fatalf("Get after Remove returned ok=true")
}
cache.Remove("alice") // idempotent — must not panic
}
// stubDB returns a *sql.DB that fails every query. Used only by the
// "Warm-on-failure does not flip Ready" test where the actual driver
// behaviour is irrelevant.
func stubDB(t *testing.T) *sql.DB {
t.Helper()
db, err := sql.Open("pgx", "postgres://disabled.invalid:5432/none?sslmode=disable&connect_timeout=1")
if err != nil {
t.Fatalf("sql.Open: %v", err)
}
t.Cleanup(func() { _ = db.Close() })
return db
}
+21
View File
@@ -0,0 +1,21 @@
package admin
import "errors"
// Sentinel errors emitted by Service methods. Handlers translate them
// into HTTP responses; callers in tests can match on them with
// errors.Is.
var (
// ErrNotFound is returned when a lookup against `backend.admin_accounts`
// matches no row. Handlers map it to HTTP 404.
ErrNotFound = errors.New("admin: account not found")
// ErrUsernameTaken is returned by Create when the supplied username
// already exists. Handlers map it to HTTP 409 with code "conflict".
ErrUsernameTaken = errors.New("admin: username already in use")
// ErrInvalidInput is returned when a request is syntactically valid
// but semantically rejected (empty username, empty password). Handlers
// map it to HTTP 400.
ErrInvalidInput = errors.New("admin: invalid input")
)
+214
View File
@@ -0,0 +1,214 @@
package admin
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"galaxy/backend/internal/postgres/jet/backend/model"
"galaxy/backend/internal/postgres/jet/backend/table"
"github.com/go-jet/jet/v2/postgres"
"github.com/go-jet/jet/v2/qrm"
)
// adminAccountsPrimaryKey is the constraint name surfaced on the
// primary-key UNIQUE violation when a duplicate username is inserted.
// Postgres synthesises the constraint name as `<table>_pkey` for
// primary-key constraints, which matches the migration in
// `backend/internal/postgres/migrations/00001_init.sql:199`.
const adminAccountsPrimaryKey = "admin_accounts_pkey"
// Store is the Postgres-backed query surface for the admin package.
// Queries are built through go-jet against the generated table
// bindings under `backend/internal/postgres/jet/backend/table`.
type Store struct {
db *sql.DB
}
// NewStore constructs a Store wrapping db.
func NewStore(db *sql.DB) *Store {
return &Store{db: db}
}
// adminColumnList is the canonical projection used by every read path.
// The slice ordering matches the destination struct fields.
func adminColumnList() postgres.ColumnList {
return postgres.ColumnList{
table.AdminAccounts.Username,
table.AdminAccounts.PasswordHash,
table.AdminAccounts.CreatedAt,
table.AdminAccounts.LastUsedAt,
table.AdminAccounts.DisabledAt,
}
}
// Lookup returns the admin row and its bcrypt hash for username.
// Returns ErrNotFound when no row matches.
func (s *Store) Lookup(ctx context.Context, username string) (Admin, []byte, error) {
stmt := postgres.SELECT(adminColumnList()).
FROM(table.AdminAccounts).
WHERE(table.AdminAccounts.Username.EQ(postgres.String(username))).
LIMIT(1)
var row model.AdminAccounts
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return Admin{}, nil, ErrNotFound
}
return Admin{}, nil, fmt.Errorf("admin store: lookup %q: %w", username, err)
}
admin, hash := modelToAdmin(row)
return admin, hash, nil
}
// ListAll returns every admin row paired with its bcrypt hash, ordered
// by username ASC. Used by Cache.Warm and by the List handler (the
// hashes are dropped before the handler sends a response, but Warm
// needs them so Verify can match without a follow-up query).
func (s *Store) ListAll(ctx context.Context) ([]Admin, [][]byte, error) {
stmt := postgres.SELECT(adminColumnList()).
FROM(table.AdminAccounts).
ORDER_BY(table.AdminAccounts.Username.ASC())
var rows []model.AdminAccounts
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return nil, nil, fmt.Errorf("admin store: list: %w", err)
}
admins := make([]Admin, 0, len(rows))
hashes := make([][]byte, 0, len(rows))
for _, row := range rows {
admin, hash := modelToAdmin(row)
admins = append(admins, admin)
hashes = append(hashes, hash)
}
return admins, hashes, nil
}
// Insert persists a fresh admin row. Returns ErrUsernameTaken when the
// primary-key UNIQUE constraint is violated (concurrent or repeat
// Create).
func (s *Store) Insert(ctx context.Context, username string, passwordHash []byte) (Admin, error) {
stmt := table.AdminAccounts.
INSERT(table.AdminAccounts.Username, table.AdminAccounts.PasswordHash).
VALUES(username, passwordHash).
RETURNING(adminColumnList())
var row model.AdminAccounts
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if isUniqueViolation(err, adminAccountsPrimaryKey) {
return Admin{}, ErrUsernameTaken
}
return Admin{}, fmt.Errorf("admin store: insert %q: %w", username, err)
}
admin, _ := modelToAdmin(row)
return admin, nil
}
// UpdatePasswordHash replaces the stored bcrypt hash for username.
// Returns ErrNotFound when no row matches.
func (s *Store) UpdatePasswordHash(ctx context.Context, username string, passwordHash []byte) (Admin, error) {
stmt := table.AdminAccounts.
UPDATE(table.AdminAccounts.PasswordHash).
SET(passwordHash).
WHERE(table.AdminAccounts.Username.EQ(postgres.String(username))).
RETURNING(adminColumnList())
var row model.AdminAccounts
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return Admin{}, ErrNotFound
}
return Admin{}, fmt.Errorf("admin store: update password for %q: %w", username, err)
}
admin, _ := modelToAdmin(row)
return admin, nil
}
// SetDisabledAt patches `disabled_at` for username. Pass `&time` to
// disable, `nil` to re-enable. Returns the refreshed Admin together
// with its bcrypt hash so the cache stays consistent. Returns
// ErrNotFound when no row matches.
func (s *Store) SetDisabledAt(ctx context.Context, username string, disabledAt *time.Time) (Admin, []byte, error) {
var disabledExpr postgres.Expression
if disabledAt != nil {
disabledExpr = postgres.TimestampzT(*disabledAt)
} else {
disabledExpr = postgres.TimestampzExp(postgres.NULL)
}
stmt := table.AdminAccounts.
UPDATE(table.AdminAccounts.DisabledAt).
SET(disabledExpr).
WHERE(table.AdminAccounts.Username.EQ(postgres.String(username))).
RETURNING(adminColumnList())
var row model.AdminAccounts
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return Admin{}, nil, ErrNotFound
}
return Admin{}, nil, fmt.Errorf("admin store: set disabled_at for %q: %w", username, err)
}
admin, hash := modelToAdmin(row)
return admin, hash, nil
}
// TouchLastUsed bumps last_used_at on a successful Verify. The caller
// runs the update fire-and-forget; errors are returned for logging
// but never propagated to the request.
func (s *Store) TouchLastUsed(ctx context.Context, username string, now time.Time) error {
stmt := table.AdminAccounts.
UPDATE(table.AdminAccounts.LastUsedAt).
SET(postgres.TimestampzT(now)).
WHERE(table.AdminAccounts.Username.EQ(postgres.String(username)))
if _, err := stmt.ExecContext(ctx, s.db); err != nil {
return fmt.Errorf("admin store: touch last_used_at for %q: %w", username, err)
}
return nil
}
// BootstrapInsert inserts the seed admin row when no row with the
// supplied username exists. The boolean reports whether the insert
// happened (true) or was skipped because of an existing row (false).
//
// Idempotent across restarts: subsequent calls with the same username
// return false without modifying the password hash. Operators rotate
// the seed admin's password through `ResetPassword`, not by editing
// env vars and restarting.
func (s *Store) BootstrapInsert(ctx context.Context, username string, passwordHash []byte) (bool, error) {
stmt := table.AdminAccounts.
INSERT(table.AdminAccounts.Username, table.AdminAccounts.PasswordHash).
VALUES(username, passwordHash).
ON_CONFLICT(table.AdminAccounts.Username).
DO_NOTHING()
res, err := stmt.ExecContext(ctx, s.db)
if err != nil {
return false, fmt.Errorf("admin store: bootstrap insert %q: %w", username, err)
}
affected, err := res.RowsAffected()
if err != nil {
return false, fmt.Errorf("admin store: bootstrap rows-affected: %w", err)
}
return affected > 0, nil
}
// modelToAdmin projects a generated model row into the public Admin
// struct plus the raw password hash. The conversion centralises the
// pointer-copy of nullable timestamps so each method stays a one-liner.
func modelToAdmin(row model.AdminAccounts) (Admin, []byte) {
admin := Admin{
Username: row.Username,
CreatedAt: row.CreatedAt,
}
if row.LastUsedAt != nil {
t := *row.LastUsedAt
admin.LastUsedAt = &t
}
if row.DisabledAt != nil {
t := *row.DisabledAt
admin.DisabledAt = &t
}
return admin, row.PasswordHash
}
+132
View File
@@ -0,0 +1,132 @@
package admin
import (
"context"
"errors"
"strings"
"go.uber.org/zap"
"golang.org/x/crypto/bcrypt"
)
// Verify implements `basicauth.Verifier`. The middleware in
// `internal/server/middleware/basicauth/basicauth.go:84` invokes this
// method on every admin request.
//
// Behaviour:
//
// 1. Empty username rejects fast.
// 2. Cache lookup; on miss, fall back to a direct Postgres read and
// populate the cache. Lookup misses return (false, nil) — no
// account-existence leak.
// 3. Disabled accounts (`disabled_at IS NOT NULL`) reject without
// hitting bcrypt.
// 4. `bcrypt.CompareHashAndPassword` runs constant-time on the
// matching path; a mismatch returns (false, nil) so the
// middleware emits 401 with the standard envelope.
// 5. On match a fire-and-forget goroutine bumps `last_used_at` and
// refreshes the cached entry. Errors on the bump are logged but
// never block the request.
// 6. Any other error returned by the lookup path surfaces to the
// middleware which maps it to 500.
func (s *Service) Verify(ctx context.Context, username, password string) (bool, error) {
username = strings.TrimSpace(username)
if username == "" {
return false, nil
}
admin, hash, err := s.lookupForVerify(ctx, username)
if err != nil {
if errors.Is(err, ErrNotFound) {
return false, nil
}
return false, err
}
if admin.DisabledAt != nil {
return false, nil
}
switch err := bcrypt.CompareHashAndPassword(hash, []byte(password)); {
case err == nil:
s.touchLastUsedAsync(username, hash)
return true, nil
case errors.Is(err, bcrypt.ErrMismatchedHashAndPassword):
return false, nil
default:
return false, err
}
}
// lookupForVerify reads the cache first and falls back to Postgres on
// miss, populating the cache so subsequent requests skip the round
// trip. The returned hash slice is the cached entry's reference;
// callers must not mutate it.
func (s *Service) lookupForVerify(ctx context.Context, username string) (Admin, []byte, error) {
if admin, hash, ok := s.deps.Cache.Get(username); ok {
return admin, hash, nil
}
admin, hash, err := s.deps.Store.Lookup(ctx, username)
if err != nil {
return Admin{}, nil, err
}
s.deps.Cache.Put(admin, hash)
return admin, hash, nil
}
// touchLastUsedAsync schedules a fire-and-forget UPDATE on
// `last_used_at`. The update uses a fresh background context so the
// write survives the request lifecycle even when the caller
// disconnects mid-response. On success the cached entry is refreshed
// in place so subsequent reads see the new timestamp; failures are
// logged at warn level and the cache stays at the old value.
//
// `last_used_at` is observability-only: it never gates authentication.
// The fire-and-forget pattern keeps the request path single-digit
// milliseconds even under transient Postgres latency.
func (s *Service) touchLastUsedAsync(username string, hash []byte) {
now := s.deps.Now().UTC()
go func() {
// Background context — the request may complete before the
// goroutine reaches Postgres. The store query carries no
// timeout of its own; the pool's default operation timeout
// applies instead.
ctx := context.Background()
if err := s.deps.Store.TouchLastUsed(ctx, username, now); err != nil {
s.deps.Logger.Warn("touch last_used_at failed",
zap.String("admin_username", username),
zap.Error(err),
)
return
}
// Refresh the cached entry. We re-read so the cache reflects
// any concurrent disable/enable that happened between the
// successful Verify and the bump.
if admin, freshHash, ok := s.deps.Cache.Get(username); ok {
admin.LastUsedAt = &now
// Prefer the slice that was just verified; if the cache
// rotated to a different hash (concurrent password
// reset), keep the cached one to avoid clobbering it.
if hashesEqual(freshHash, hash) {
s.deps.Cache.Put(admin, hash)
} else {
s.deps.Cache.Put(admin, freshHash)
}
}
}()
}
// hashesEqual reports whether two bcrypt hashes are byte-identical.
// The caller cares only about staleness detection — bcrypt hashes are
// not secret in the cache (the cache lives in process memory), so a
// timing-leaking comparison is acceptable.
func hashesEqual(a, b []byte) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
+178
View File
@@ -0,0 +1,178 @@
// Package app wires the backend process lifecycle and coordinates component
// startup and graceful shutdown.
package app
import (
"context"
"errors"
"fmt"
"sync"
"time"
)
// Component is a long-lived backend subsystem that participates in coordinated
// startup and graceful shutdown.
type Component interface {
// Run starts the component and blocks until it stops.
Run(context.Context) error
// Shutdown stops the component within the provided timeout-bounded context.
Shutdown(context.Context) error
}
// App owns the process-level lifecycle of the backend and its registered
// components.
type App struct {
shutdownTimeout time.Duration
components []Component
}
// New constructs an App with the supplied shutdown timeout and a defensive
// copy of the supplied components.
func New(shutdownTimeout time.Duration, components ...Component) *App {
clonedComponents := append([]Component(nil), components...)
return &App{
shutdownTimeout: shutdownTimeout,
components: clonedComponents,
}
}
// Run starts all configured components, waits for cancellation or the first
// component failure, and then executes best-effort graceful shutdown for every
// component.
func (a *App) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run backend app: nil context")
}
if err := a.validate(); err != nil {
return err
}
if len(a.components) == 0 {
<-ctx.Done()
return nil
}
runCtx, cancel := context.WithCancel(ctx)
defer cancel()
results := make(chan componentResult, len(a.components))
var runWG sync.WaitGroup
for idx, component := range a.components {
runWG.Add(1)
go func(index int, component Component) {
defer runWG.Done()
results <- componentResult{
index: index,
err: component.Run(runCtx),
}
}(idx, component)
}
var runErr error
select {
case <-ctx.Done():
case result := <-results:
runErr = classifyComponentResult(ctx, result)
}
cancel()
shutdownErr := a.shutdownComponents()
waitErr := a.waitForComponents(&runWG)
return errors.Join(runErr, shutdownErr, waitErr)
}
// componentResult captures the first observed exit from a running component.
type componentResult struct {
index int
err error
}
// validate confirms that the App has a safe shutdown budget and no nil
// components before goroutines are started.
func (a *App) validate() error {
if a.shutdownTimeout <= 0 {
return fmt.Errorf("run backend app: shutdown timeout must be positive, got %s", a.shutdownTimeout)
}
for idx, component := range a.components {
if component == nil {
return fmt.Errorf("run backend app: component %d is nil", idx)
}
}
return nil
}
// classifyComponentResult maps the first component exit into the error that
// should control the application result.
func classifyComponentResult(parentCtx context.Context, result componentResult) error {
switch {
case result.err == nil:
if parentCtx.Err() != nil {
return nil
}
return fmt.Errorf("run backend app: component %d exited without error before shutdown", result.index)
case errors.Is(result.err, context.Canceled) && parentCtx.Err() != nil:
return nil
default:
return fmt.Errorf("run backend app: component %d: %w", result.index, result.err)
}
}
// shutdownComponents calls Shutdown on every registered component using a fresh
// timeout-bounded context per component and joins any shutdown failures.
func (a *App) shutdownComponents() error {
var shutdownWG sync.WaitGroup
errs := make(chan error, len(a.components))
for idx, component := range a.components {
shutdownWG.Add(1)
go func(index int, component Component) {
defer shutdownWG.Done()
shutdownCtx, cancel := context.WithTimeout(context.Background(), a.shutdownTimeout)
defer cancel()
if err := component.Shutdown(shutdownCtx); err != nil {
errs <- fmt.Errorf("shutdown backend component %d: %w", index, err)
}
}(idx, component)
}
shutdownWG.Wait()
close(errs)
var joined error
for err := range errs {
joined = errors.Join(joined, err)
}
return joined
}
// waitForComponents waits for running components to return after shutdown and
// reports when they outlive the configured shutdown budget.
func (a *App) waitForComponents(runWG *sync.WaitGroup) error {
done := make(chan struct{})
go func() {
runWG.Wait()
close(done)
}()
waitCtx, cancel := context.WithTimeout(context.Background(), a.shutdownTimeout)
defer cancel()
select {
case <-done:
return nil
case <-waitCtx.Done():
return fmt.Errorf("wait for backend components: %w", waitCtx.Err())
}
}
+93
View File
@@ -0,0 +1,93 @@
// Package auth implements the email-code authentication flow and the
// active-session bookkeeping consumed by gateway. The package is
// described end-to-end in `backend/PLAN.md` §5.1.
//
// External dependencies that have not landed yet (mail in 5.6, push
// session_invalidation in 6) are injected through the LoginCodeMailer
// and SessionInvalidator interfaces; auth ships no-op implementations
// that satisfy the contract until the real services arrive.
package auth
import (
"crypto/hmac"
"crypto/rand"
"crypto/sha256"
"encoding/hex"
"time"
"galaxy/backend/internal/config"
"go.uber.org/zap"
)
// Deps aggregates every collaborator the Service depends on.
// Constructing the Service through Deps (rather than positional args)
// keeps wiring patches small when new dependencies are added.
//
// Cache and Store must be non-nil: GetSession reads through Cache,
// SendEmailCode and ConfirmEmailCode mutate Store. User, Geo, Mail and
// Push are tested-in-isolation interfaces; production wires the real
// `*user.Service`, `*geo.Service`, mail, and push implementations.
type Deps struct {
Store *Store
Cache *Cache
User UserEnsurer
Geo GeoService
Mail LoginCodeMailer
Push SessionInvalidator
Config config.AuthConfig
// Now overrides time.Now for deterministic tests. A nil Now defaults
// to time.Now in NewService.
Now func() time.Time
// Logger is named under "auth" by NewService. Nil falls back to
// zap.NewNop.
Logger *zap.Logger
}
// Service is the auth-domain entry point.
type Service struct {
deps Deps
// emailHashKey keys the HMAC used to derive `email_hash` log fields.
// A per-boot random key keeps email PII out of structured logs while
// still letting operators correlate log entries within a single
// process lifetime.
emailHashKey []byte
}
// NewService constructs a Service from deps. A nil Now defaults to
// time.Now; a nil Logger defaults to zap.NewNop. The other dependencies
// must be supplied — calling Service methods with nil Cache/Store/User/
// Geo/Mail/Push will panic at first use, matching how main.go signals
// missing wiring.
func NewService(deps Deps) *Service {
if deps.Now == nil {
deps.Now = time.Now
}
if deps.Logger == nil {
deps.Logger = zap.NewNop()
}
deps.Logger = deps.Logger.Named("auth")
key := make([]byte, 32)
if _, err := rand.Read(key); err != nil {
// rand.Read should not fail in practice; if it does, fall back
// to a deterministic key. Email hashing is a log-scoping aid,
// not a security primitive, so a constant key is acceptable.
copy(key, []byte("galaxy-backend-auth-fallback-key"))
}
return &Service{deps: deps, emailHashKey: key}
}
// hashEmail returns a stable, hex-encoded HMAC-SHA256 prefix of email
// suitable for use in structured logs. The key is per-process so the
// same email maps to the same hash across log lines emitted by this
// process, but never across process restarts. The truncation gives
// operators enough collision-resistance for ad-hoc grep without keeping
// an offline key store.
func (s *Service) hashEmail(email string) string {
mac := hmac.New(sha256.New, s.emailHashKey)
_, _ = mac.Write([]byte(email))
full := mac.Sum(nil)
return hex.EncodeToString(full[:8])
}
+511
View File
@@ -0,0 +1,511 @@
package auth_test
import (
"context"
"crypto/rand"
"database/sql"
"errors"
"net/url"
"sync"
"testing"
"time"
"galaxy/backend/internal/auth"
"galaxy/backend/internal/config"
backendpg "galaxy/backend/internal/postgres"
"galaxy/backend/internal/user"
pgshared "galaxy/postgres"
"github.com/google/uuid"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
)
const (
pgImage = "postgres:16-alpine"
pgUser = "galaxy"
pgPassword = "galaxy"
pgDatabase = "galaxy_backend"
pgSchema = "backend"
pgStartup = 90 * time.Second
pgOpTO = 10 * time.Second
)
// startPostgres spins up a Postgres testcontainer with the backend
// migrations applied. The returned *sql.DB is closed and the container
// terminated by t.Cleanup hooks.
func startPostgres(t *testing.T) *sql.DB {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
t.Cleanup(cancel)
pgContainer, err := tcpostgres.Run(ctx, pgImage,
tcpostgres.WithDatabase(pgDatabase),
tcpostgres.WithUsername(pgUser),
tcpostgres.WithPassword(pgPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(pgStartup),
),
)
if err != nil {
t.Skipf("postgres testcontainer unavailable, skipping: %v", err)
}
t.Cleanup(func() {
if termErr := testcontainers.TerminateContainer(pgContainer); termErr != nil {
t.Errorf("terminate postgres container: %v", termErr)
}
})
baseDSN, err := pgContainer.ConnectionString(ctx, "sslmode=disable")
if err != nil {
t.Fatalf("connection string: %v", err)
}
scopedDSN, err := dsnWithSearchPath(baseDSN, pgSchema)
if err != nil {
t.Fatalf("scope dsn: %v", err)
}
cfg := pgshared.DefaultConfig()
cfg.PrimaryDSN = scopedDSN
cfg.OperationTimeout = pgOpTO
db, err := pgshared.OpenPrimary(ctx, cfg)
if err != nil {
t.Fatalf("open primary: %v", err)
}
t.Cleanup(func() { _ = db.Close() })
if err := pgshared.Ping(ctx, db, cfg.OperationTimeout); err != nil {
t.Fatalf("ping: %v", err)
}
if err := backendpg.ApplyMigrations(ctx, db); err != nil {
t.Fatalf("apply migrations: %v", err)
}
return db
}
func dsnWithSearchPath(baseDSN, schema string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := parsed.Query()
values.Set("search_path", schema)
if values.Get("sslmode") == "" {
values.Set("sslmode", "disable")
}
parsed.RawQuery = values.Encode()
return parsed.String(), nil
}
// recordingMailer implements auth.LoginCodeMailer and remembers the most
// recent enqueue.
type recordingMailer struct {
mu sync.Mutex
lastCode string
lastTo string
calls int
}
func newRecordingMailer() *recordingMailer { return &recordingMailer{} }
func (m *recordingMailer) EnqueueLoginCode(_ context.Context, email, code string, _ time.Duration) error {
m.mu.Lock()
defer m.mu.Unlock()
m.lastTo = email
m.lastCode = code
m.calls++
return nil
}
func (m *recordingMailer) snapshot() (string, string, int) {
m.mu.Lock()
defer m.mu.Unlock()
return m.lastTo, m.lastCode, m.calls
}
// recordingPush implements auth.SessionInvalidator and counts emissions.
type recordingPush struct {
mu sync.Mutex
calls []recordedPush
}
type recordedPush struct {
deviceSessionID, userID uuid.UUID
reason string
}
func newRecordingPush() *recordingPush { return &recordingPush{} }
func (p *recordingPush) PublishSessionInvalidation(_ context.Context, dsID, uid uuid.UUID, reason string) {
p.mu.Lock()
defer p.mu.Unlock()
p.calls = append(p.calls, recordedPush{deviceSessionID: dsID, userID: uid, reason: reason})
}
func (p *recordingPush) snapshot() []recordedPush {
p.mu.Lock()
defer p.mu.Unlock()
out := make([]recordedPush, len(p.calls))
copy(out, p.calls)
return out
}
// stubGeo implements auth.GeoService with no real lookups. The country
// it returns is configurable per call via CountryForIP; LanguageForIP
// returns "" so the auth flow exercises the "en" fallback path.
type stubGeo struct {
countryByIP map[string]string
}
func newStubGeo() *stubGeo {
return &stubGeo{countryByIP: map[string]string{}}
}
func (g *stubGeo) LookupCountry(sourceIP string) string {
return g.countryByIP[sourceIP]
}
func (g *stubGeo) LanguageForIP(_ string) string { return "" }
func (g *stubGeo) SetDeclaredCountryAtRegistration(_ context.Context, _ uuid.UUID, _ string) error {
return nil
}
// authConfig builds an AuthConfig suitable for tests.
func authConfig() config.AuthConfig {
return config.AuthConfig{
ChallengeTTL: 5 * time.Minute,
ChallengeMaxAttempts: 3,
ChallengeThrottle: config.AuthChallengeThrottleConfig{
Window: time.Minute,
Max: 3,
},
UserNameMaxRetries: 10,
}
}
// buildService wires every dependency around db and returns the service
// plus the recording fakes for assertions.
func buildService(t *testing.T, db *sql.DB) (*auth.Service, *recordingMailer, *recordingPush, *stubGeo) {
t.Helper()
store := auth.NewStore(db)
cache := auth.NewCache()
if err := cache.Warm(context.Background(), store); err != nil {
t.Fatalf("warm cache: %v", err)
}
mailer := newRecordingMailer()
pusher := newRecordingPush()
geo := newStubGeo()
userStore := user.NewStore(db)
userSvc := user.NewService(user.Deps{
Store: userStore,
Cache: user.NewCache(),
UserNameMaxRetries: 10,
Now: time.Now,
})
svc := auth.NewService(auth.Deps{
Store: store,
Cache: cache,
User: userSvc,
Geo: geo,
Mail: mailer,
Push: pusher,
Config: authConfig(),
Now: time.Now,
})
return svc, mailer, pusher, geo
}
func randomKey(t *testing.T) []byte {
t.Helper()
key := make([]byte, 32)
if _, err := rand.Read(key); err != nil {
t.Fatalf("rand: %v", err)
}
return key
}
func TestAuthEndToEnd(t *testing.T) {
db := startPostgres(t)
svc, mailer, pusher, _ := buildService(t, db)
ctx := context.Background()
challengeID, err := svc.SendEmailCode(ctx, "Alice@Example.Test", "ru", "", "")
if err != nil {
t.Fatalf("SendEmailCode: %v", err)
}
if challengeID == uuid.Nil {
t.Fatalf("SendEmailCode returned nil challenge_id")
}
gotEmail, gotCode, calls := mailer.snapshot()
if gotEmail != "alice@example.test" {
t.Fatalf("mailer email = %q, want lower-cased", gotEmail)
}
if len(gotCode) != auth.CodeLength {
t.Fatalf("mailer code = %q (len %d), want length %d", gotCode, len(gotCode), auth.CodeLength)
}
if calls != 1 {
t.Fatalf("mailer calls = %d, want 1", calls)
}
pubKey := randomKey(t)
session, err := svc.ConfirmEmailCode(ctx, auth.ConfirmInputs{
ChallengeID: challengeID,
Code: gotCode,
ClientPublicKey: pubKey,
TimeZone: "Europe/Moscow",
SourceIP: "",
})
if err != nil {
t.Fatalf("ConfirmEmailCode: %v", err)
}
if session.UserID == uuid.Nil {
t.Fatalf("session has nil user_id")
}
if session.Status != auth.SessionStatusActive {
t.Fatalf("session.Status = %q, want %q", session.Status, auth.SessionStatusActive)
}
got, err := svc.GetSession(ctx, session.DeviceSessionID)
if err != nil {
t.Fatalf("GetSession: %v", err)
}
if got.UserID != session.UserID {
t.Fatalf("GetSession user_id = %s, want %s", got.UserID, session.UserID)
}
revoked, err := svc.RevokeSession(ctx, session.DeviceSessionID)
if err != nil {
t.Fatalf("RevokeSession: %v", err)
}
if revoked.Status != auth.SessionStatusRevoked {
t.Fatalf("revoked.Status = %q, want %q", revoked.Status, auth.SessionStatusRevoked)
}
if revoked.RevokedAt == nil {
t.Fatalf("revoked.RevokedAt nil after revoke")
}
if _, err := svc.GetSession(ctx, session.DeviceSessionID); !errors.Is(err, auth.ErrSessionNotFound) {
t.Fatalf("GetSession after revoke = %v, want ErrSessionNotFound", err)
}
again, err := svc.RevokeSession(ctx, session.DeviceSessionID)
if err != nil {
t.Fatalf("idempotent RevokeSession: %v", err)
}
if again.DeviceSessionID != session.DeviceSessionID || again.Status != auth.SessionStatusRevoked {
t.Fatalf("idempotent revoke shape mismatch: %+v", again)
}
pushes := pusher.snapshot()
if len(pushes) != 1 {
t.Fatalf("push emissions = %d, want 1", len(pushes))
}
if pushes[0].deviceSessionID != session.DeviceSessionID {
t.Fatalf("push device_session_id mismatch")
}
}
func TestSendEmailCodePermanentlyBlocked(t *testing.T) {
db := startPostgres(t)
svc, _, _, _ := buildService(t, db)
// Insert a permanent_block account directly.
if _, err := db.Exec(`
INSERT INTO backend.accounts (
user_id, email, user_name, preferred_language, time_zone, permanent_block
) VALUES ($1, $2, $3, $4, $5, true)
`, uuid.New(), "blocked@example.test", "Player-XXBLOCK1", "en", "UTC"); err != nil {
t.Fatalf("seed account: %v", err)
}
_, err := svc.SendEmailCode(context.Background(), "blocked@example.test", "", "", "")
if !errors.Is(err, auth.ErrEmailPermanentlyBlocked) {
t.Fatalf("SendEmailCode for blocked email = %v, want ErrEmailPermanentlyBlocked", err)
}
}
func TestSendEmailCodeThrottleReusesChallenge(t *testing.T) {
db := startPostgres(t)
svc, mailer, _, _ := buildService(t, db)
ctx := context.Background()
const email = "throttle@example.test"
cfg := authConfig()
var firstID uuid.UUID
for i := range cfg.ChallengeThrottle.Max {
id, err := svc.SendEmailCode(ctx, email, "", "", "")
if err != nil {
t.Fatalf("SendEmailCode #%d: %v", i, err)
}
if i == 0 {
firstID = id
}
}
_, _, callsBefore := mailer.snapshot()
// One more call — must reuse the latest challenge_id and skip mail.
id, err := svc.SendEmailCode(ctx, email, "", "", "")
if err != nil {
t.Fatalf("SendEmailCode (throttled): %v", err)
}
_, _, callsAfter := mailer.snapshot()
if callsAfter != callsBefore {
t.Fatalf("mail enqueue should be skipped on throttle: before=%d after=%d", callsBefore, callsAfter)
}
if id == uuid.Nil {
t.Fatalf("throttled call returned nil challenge_id")
}
if id == firstID {
t.Fatalf("throttled call returned the FIRST challenge — expected the latest")
}
}
func TestConfirmEmailCodeWrongCode(t *testing.T) {
db := startPostgres(t)
svc, mailer, _, _ := buildService(t, db)
ctx := context.Background()
id, err := svc.SendEmailCode(ctx, "wrong@example.test", "en", "", "")
if err != nil {
t.Fatalf("send: %v", err)
}
_, code, _ := mailer.snapshot()
wrong := flipDigit(code)
_, err = svc.ConfirmEmailCode(ctx, auth.ConfirmInputs{
ChallengeID: id,
Code: wrong,
ClientPublicKey: randomKey(t),
TimeZone: "UTC",
})
if !errors.Is(err, auth.ErrCodeMismatch) {
t.Fatalf("ConfirmEmailCode wrong code = %v, want ErrCodeMismatch", err)
}
}
func TestConfirmEmailCodeAttemptsCeiling(t *testing.T) {
db := startPostgres(t)
svc, mailer, _, _ := buildService(t, db)
ctx := context.Background()
id, err := svc.SendEmailCode(ctx, "ceiling@example.test", "en", "", "")
if err != nil {
t.Fatalf("send: %v", err)
}
_, code, _ := mailer.snapshot()
wrong := flipDigit(code)
// Burn `max` attempts with the wrong code.
for i := range authConfig().ChallengeMaxAttempts {
_, err := svc.ConfirmEmailCode(ctx, auth.ConfirmInputs{
ChallengeID: id,
Code: wrong,
ClientPublicKey: randomKey(t),
TimeZone: "UTC",
})
if !errors.Is(err, auth.ErrCodeMismatch) {
t.Fatalf("attempt %d: %v, want ErrCodeMismatch", i, err)
}
}
// One past the ceiling — even with the right code, ErrTooManyAttempts.
_, err = svc.ConfirmEmailCode(ctx, auth.ConfirmInputs{
ChallengeID: id,
Code: code,
ClientPublicKey: randomKey(t),
TimeZone: "UTC",
})
if !errors.Is(err, auth.ErrTooManyAttempts) {
t.Fatalf("post-ceiling = %v, want ErrTooManyAttempts", err)
}
}
func TestConfirmEmailCodeChallengeNotFound(t *testing.T) {
db := startPostgres(t)
svc, _, _, _ := buildService(t, db)
_, err := svc.ConfirmEmailCode(context.Background(), auth.ConfirmInputs{
ChallengeID: uuid.New(),
Code: "000000",
ClientPublicKey: randomKey(t),
TimeZone: "UTC",
})
if !errors.Is(err, auth.ErrChallengeNotFound) {
t.Fatalf("unknown challenge = %v, want ErrChallengeNotFound", err)
}
}
func TestRevokeAllForUser(t *testing.T) {
db := startPostgres(t)
svc, mailer, pusher, _ := buildService(t, db)
ctx := context.Background()
const email = "many@example.test"
const sessionsToCreate = 3
var userID uuid.UUID
deviceSessionIDs := make([]uuid.UUID, 0, sessionsToCreate)
for range sessionsToCreate {
id, err := svc.SendEmailCode(ctx, email, "en", "", "")
if err != nil {
t.Fatalf("send: %v", err)
}
_, code, _ := mailer.snapshot()
sess, err := svc.ConfirmEmailCode(ctx, auth.ConfirmInputs{
ChallengeID: id,
Code: code,
ClientPublicKey: randomKey(t),
TimeZone: "UTC",
})
if err != nil {
t.Fatalf("confirm: %v", err)
}
userID = sess.UserID
deviceSessionIDs = append(deviceSessionIDs, sess.DeviceSessionID)
}
revoked, err := svc.RevokeAllForUser(ctx, userID)
if err != nil {
t.Fatalf("RevokeAllForUser: %v", err)
}
if len(revoked) != sessionsToCreate {
t.Fatalf("revoked count = %d, want %d", len(revoked), sessionsToCreate)
}
for _, dsID := range deviceSessionIDs {
if _, err := svc.GetSession(ctx, dsID); !errors.Is(err, auth.ErrSessionNotFound) {
t.Fatalf("session %s still in cache: %v", dsID, err)
}
}
if got := len(pusher.snapshot()); got != sessionsToCreate {
t.Fatalf("push emissions = %d, want %d", got, sessionsToCreate)
}
// Idempotent: revoking again returns an empty slice.
again, err := svc.RevokeAllForUser(ctx, userID)
if err != nil {
t.Fatalf("idempotent RevokeAllForUser: %v", err)
}
if len(again) != 0 {
t.Fatalf("idempotent RevokeAllForUser = %d sessions, want 0", len(again))
}
}
// flipDigit returns code with its first digit replaced by ((digit+1) % 10)
// so the resulting string is still a valid CodeLength-digit code but
// guaranteed to differ.
func flipDigit(code string) string {
if code == "" {
return "0"
}
bytes := []byte(code)
if bytes[0] >= '0' && bytes[0] <= '9' {
bytes[0] = '0' + ((bytes[0]-'0')+1)%10
} else {
bytes[0] = '0'
}
return string(bytes)
}
+159
View File
@@ -0,0 +1,159 @@
package auth
import (
"context"
"sync"
"sync/atomic"
"github.com/google/uuid"
)
// Cache is the in-memory write-through projection of the active rows in
// `backend.device_sessions`. Reads (Get) are RLocked; writes (Add,
// Remove, RemoveByUser) are Locked. The cache holds two maps:
//
// - byID maps device_session_id → Session.
// - byUser maps user_id → set of device_session_ids belonging to that
// user, used to satisfy bulk revoke without scanning byID.
//
// Both maps are updated atomically inside one Lock per mutation. The
// caller is expected to commit the corresponding database write *before*
// invoking Add or Remove so that the cache stays consistent under crash:
// a Postgres commit failure leaves the cache untouched, matching the
// previous DB state.
type Cache struct {
mu sync.RWMutex
byID map[uuid.UUID]Session
byUser map[uuid.UUID]map[uuid.UUID]struct{}
ready atomic.Bool
}
// NewCache constructs an empty Cache. The cache reports Ready() == false
// until Warm completes successfully.
func NewCache() *Cache {
return &Cache{
byID: make(map[uuid.UUID]Session),
byUser: make(map[uuid.UUID]map[uuid.UUID]struct{}),
}
}
// Warm replaces the cache contents with every active session loaded from
// store. It is intended to be called exactly once at process boot before
// the HTTP listener accepts traffic; successful completion flips Ready
// to true. Subsequent calls re-warm the cache (useful in tests).
func (c *Cache) Warm(ctx context.Context, store *Store) error {
sessions, err := store.ListActiveSessions(ctx)
if err != nil {
return err
}
c.mu.Lock()
defer c.mu.Unlock()
c.byID = make(map[uuid.UUID]Session, len(sessions))
c.byUser = make(map[uuid.UUID]map[uuid.UUID]struct{})
for _, s := range sessions {
c.byID[s.DeviceSessionID] = s
set, ok := c.byUser[s.UserID]
if !ok {
set = make(map[uuid.UUID]struct{})
c.byUser[s.UserID] = set
}
set[s.DeviceSessionID] = struct{}{}
}
c.ready.Store(true)
return nil
}
// Ready reports whether Warm has completed at least once. The HTTP
// readiness probe wires through this method so `/readyz` only flips to
// 200 after the cache is hydrated.
func (c *Cache) Ready() bool {
if c == nil {
return false
}
return c.ready.Load()
}
// Size returns the number of cached active sessions. Useful in startup
// logs ("auth cache warmed: N sessions") and in tests.
func (c *Cache) Size() int {
if c == nil {
return 0
}
c.mu.RLock()
defer c.mu.RUnlock()
return len(c.byID)
}
// Get returns the session with deviceSessionID and a presence flag.
// Misses always return the zero Session and false; callers should not
// inspect the returned value when ok is false.
func (c *Cache) Get(deviceSessionID uuid.UUID) (Session, bool) {
if c == nil {
return Session{}, false
}
c.mu.RLock()
defer c.mu.RUnlock()
s, ok := c.byID[deviceSessionID]
return s, ok
}
// Add stores s in the cache. It is safe to call on an existing entry
// — both the primary map and the user index are updated to the latest
// snapshot.
func (c *Cache) Add(s Session) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
c.byID[s.DeviceSessionID] = s
set, ok := c.byUser[s.UserID]
if !ok {
set = make(map[uuid.UUID]struct{})
c.byUser[s.UserID] = set
}
set[s.DeviceSessionID] = struct{}{}
}
// Remove evicts the entry for deviceSessionID from both maps. Calling
// Remove on a missing entry is a no-op.
func (c *Cache) Remove(deviceSessionID uuid.UUID) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
s, ok := c.byID[deviceSessionID]
if !ok {
return
}
delete(c.byID, deviceSessionID)
if set := c.byUser[s.UserID]; set != nil {
delete(set, deviceSessionID)
if len(set) == 0 {
delete(c.byUser, s.UserID)
}
}
}
// RemoveByUser evicts every cached entry belonging to userID and returns
// the device_session_ids it removed. The returned slice is safe for the
// caller to hold past the call — it is freshly allocated.
func (c *Cache) RemoveByUser(userID uuid.UUID) []uuid.UUID {
if c == nil {
return nil
}
c.mu.Lock()
defer c.mu.Unlock()
set, ok := c.byUser[userID]
if !ok {
return nil
}
removed := make([]uuid.UUID, 0, len(set))
for id := range set {
removed = append(removed, id)
delete(c.byID, id)
}
delete(c.byUser, userID)
return removed
}
+141
View File
@@ -0,0 +1,141 @@
package auth
import (
"context"
"sync"
"sync/atomic"
"testing"
"github.com/google/uuid"
)
func TestCacheGetAddRemove(t *testing.T) {
c := NewCache()
if c.Ready() {
t.Fatalf("fresh cache should not be Ready before Warm")
}
if c.Size() != 0 {
t.Fatalf("fresh cache size = %d, want 0", c.Size())
}
id := uuid.New()
uid := uuid.New()
s := Session{DeviceSessionID: id, UserID: uid, Status: SessionStatusActive}
c.Add(s)
if c.Size() != 1 {
t.Fatalf("size after Add = %d, want 1", c.Size())
}
got, ok := c.Get(id)
if !ok || got.DeviceSessionID != id {
t.Fatalf("Get after Add: ok=%v session=%+v", ok, got)
}
c.Remove(id)
if c.Size() != 0 {
t.Fatalf("size after Remove = %d, want 0", c.Size())
}
if _, ok := c.Get(id); ok {
t.Fatalf("Get after Remove returned a hit")
}
// Remove on already-evicted entry is a no-op.
c.Remove(id)
}
func TestCacheRemoveByUser(t *testing.T) {
c := NewCache()
uid := uuid.New()
other := uuid.New()
c.Add(Session{DeviceSessionID: uuid.New(), UserID: uid, Status: SessionStatusActive})
c.Add(Session{DeviceSessionID: uuid.New(), UserID: uid, Status: SessionStatusActive})
c.Add(Session{DeviceSessionID: uuid.New(), UserID: other, Status: SessionStatusActive})
removed := c.RemoveByUser(uid)
if len(removed) != 2 {
t.Fatalf("RemoveByUser removed %d, want 2", len(removed))
}
if c.Size() != 1 {
t.Fatalf("size after RemoveByUser = %d, want 1", c.Size())
}
if got := c.RemoveByUser(uid); got != nil {
t.Fatalf("RemoveByUser on empty user returned %v, want nil", got)
}
}
func TestCacheWarmFlipsReady(t *testing.T) {
// Constructing a Cache and calling Warm against a Store without a real
// database is awkward — the e2e test exercises Warm against Postgres.
// Here we manually populate to confirm Ready toggles.
c := NewCache()
if c.Ready() {
t.Fatalf("Ready before Warm")
}
// Simulate a successful Warm by setting ready and inserting via Add.
c.ready.Store(true)
if !c.Ready() {
t.Fatalf("Ready did not flip after store")
}
}
func TestCacheConcurrentGetAddRemove(t *testing.T) {
c := NewCache()
const writers = 4
const readers = 4
const opsPerWorker = 1000
uid := uuid.New()
ids := make([]uuid.UUID, opsPerWorker)
for i := range ids {
ids[i] = uuid.New()
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var stop atomic.Bool
var wg sync.WaitGroup
for range writers {
wg.Add(1)
go func() {
defer wg.Done()
for i := range opsPerWorker {
if stop.Load() {
return
}
c.Add(Session{DeviceSessionID: ids[i], UserID: uid, Status: SessionStatusActive})
c.Remove(ids[i])
}
}()
}
for range readers {
wg.Add(1)
go func() {
defer wg.Done()
for i := range opsPerWorker {
if stop.Load() {
return
}
_, _ = c.Get(ids[i%len(ids)])
}
}()
}
done := make(chan struct{})
go func() { wg.Wait(); close(done) }()
select {
case <-done:
case <-ctx.Done():
stop.Store(true)
<-done
t.Fatalf("cache concurrency test timed out")
}
// After all goroutines finish, the cache must be empty (every Add
// is paired with a Remove).
if c.Size() != 0 {
t.Fatalf("cache size after concurrent run = %d, want 0", c.Size())
}
}
+262
View File
@@ -0,0 +1,262 @@
package auth
import (
"context"
"database/sql"
"errors"
"fmt"
"strings"
"github.com/google/uuid"
"go.uber.org/zap"
)
// SendEmailCode issues an email login challenge for email and returns
// its challenge_id. The wire shape is intentionally identical for new
// users, existing users, and throttled requesters; the only path that
// returns ErrEmailPermanentlyBlocked is when email maps to an account
// whose `permanent_block` column is true (handler maps that sentinel to
// 400 invalid_request).
//
// Throttle behaviour: when the count of un-consumed, non-expired
// challenges for email created within ChallengeThrottle.Window already
// equals or exceeds ChallengeThrottle.Max, SendEmailCode reuses the
// most recent existing challenge_id and skips the mail enqueue. This
// avoids a leak where an attacker who controls their own SMTP server
// could otherwise correlate "row created without mail" with
// throttle-state on the platform.
//
// locale (request body, BCP 47) takes precedence over acceptLanguage
// (the standard HTTP header forwarded by gateway) when both are
// supplied. The captured value is persisted on the challenge row as
// `preferred_language`, replayed at confirm-email-code, and used only
// for newly-registered accounts; existing accounts keep their stored
// language.
func (s *Service) SendEmailCode(
ctx context.Context,
email, locale, acceptLanguage, sourceIP string,
) (uuid.UUID, error) {
normalised := normaliseEmail(email)
if normalised == "" {
return uuid.Nil, fmt.Errorf("auth: email is empty")
}
permanent, err := s.deps.Store.IsEmailPermanentlyBlocked(ctx, normalised)
if err != nil {
return uuid.Nil, err
}
if permanent {
return uuid.Nil, ErrEmailPermanentlyBlocked
}
captured := pickCapturedLocale(locale, acceptLanguage)
now := s.deps.Now()
windowStart := now.Add(-s.deps.Config.ChallengeThrottle.Window)
count, err := s.deps.Store.CountRecentChallenges(ctx, normalised, windowStart)
if err != nil {
return uuid.Nil, err
}
if count >= s.deps.Config.ChallengeThrottle.Max {
existing, lerr := s.deps.Store.LatestUnconsumedChallenge(ctx, normalised, windowStart)
if lerr == nil {
s.deps.Logger.Info("auth challenge reused (throttled)",
zap.String("email_hash", s.hashEmail(normalised)),
zap.String("challenge_id", existing.ChallengeID.String()),
zap.Int("recent_count", count),
)
return existing.ChallengeID, nil
}
if !errors.Is(lerr, sql.ErrNoRows) {
return uuid.Nil, lerr
}
// sql.ErrNoRows here is a race (a concurrent confirm consumed
// the row between count and select); fall through and issue a
// fresh challenge.
}
code, err := generateCode()
if err != nil {
return uuid.Nil, err
}
hash, err := hashCode(code)
if err != nil {
return uuid.Nil, fmt.Errorf("auth: hash code: %w", err)
}
challenge := Challenge{
ChallengeID: uuid.New(),
Email: normalised,
CodeHash: hash,
ExpiresAt: now.Add(s.deps.Config.ChallengeTTL),
PreferredLanguage: captured,
}
if err := s.deps.Store.InsertChallenge(ctx, challenge); err != nil {
return uuid.Nil, err
}
if err := s.deps.Mail.EnqueueLoginCode(ctx, normalised, code, s.deps.Config.ChallengeTTL); err != nil {
// A mail-enqueue failure is logged but not surfaced — the user
// can issue another challenge. The implementation will surface a
// transient error path; for The implementation the no-op publisher never
// returns an error.
s.deps.Logger.Warn("auth: enqueue login code failed",
zap.String("email_hash", s.hashEmail(normalised)),
zap.String("challenge_id", challenge.ChallengeID.String()),
zap.Error(err),
)
}
s.deps.Logger.Info("auth challenge issued",
zap.String("email_hash", s.hashEmail(normalised)),
zap.String("challenge_id", challenge.ChallengeID.String()),
)
return challenge.ChallengeID, nil
}
// ConfirmInputs is the parsed-and-validated input to ConfirmEmailCode.
// Wire-format validation (base64 decode, 32-byte length, IANA time-zone
// parse, source-IP extraction) happens at the handler boundary so the
// service operates on already-typed values.
type ConfirmInputs struct {
ChallengeID uuid.UUID
Code string
ClientPublicKey []byte
TimeZone string
SourceIP string
}
// ConfirmEmailCode redeems a challenge_id, ensures the corresponding
// `accounts` row exists, and creates an active `device_sessions` row.
// The returned Session is identical to the row stored in the database
// (including server-assigned timestamps).
//
// The flow runs in two transactions:
//
// 1. LoadAndIncrementChallenge increments the attempts counter under
// SELECT FOR UPDATE so concurrent attempts cannot bypass the ceiling.
// 2. Out-of-band: ceiling check, bcrypt verify, EnsureByEmail.
// 3. MarkConsumedAndInsertSession atomically marks the challenge
// consumed and inserts the device_session row, satisfying the
// "single challenge → at most one session" invariant.
//
// Post-commit work (cache write-through, declared_country backfill) is
// best-effort: a failure does not roll the registration back.
func (s *Service) ConfirmEmailCode(ctx context.Context, in ConfirmInputs) (Session, error) {
if in.ChallengeID == uuid.Nil {
return Session{}, ErrChallengeNotFound
}
if len(in.ClientPublicKey) != 32 {
return Session{}, fmt.Errorf("auth: client public key must be 32 bytes, got %d", len(in.ClientPublicKey))
}
if strings.TrimSpace(in.TimeZone) == "" {
return Session{}, fmt.Errorf("auth: time_zone must not be empty")
}
loaded, err := s.deps.Store.LoadAndIncrementChallenge(ctx, in.ChallengeID)
if err != nil {
return Session{}, err
}
if int(loaded.Attempts) > s.deps.Config.ChallengeMaxAttempts {
s.deps.Logger.Info("auth challenge attempts exhausted",
zap.String("challenge_id", in.ChallengeID.String()),
zap.Int32("attempts", loaded.Attempts),
)
return Session{}, ErrTooManyAttempts
}
if err := verifyCode(loaded.CodeHash, in.Code); err != nil {
if errors.Is(err, ErrCodeMismatch) {
s.deps.Logger.Info("auth challenge code mismatch",
zap.String("challenge_id", in.ChallengeID.String()),
zap.Int32("attempts", loaded.Attempts),
)
return Session{}, ErrCodeMismatch
}
return Session{}, err
}
preferredLang := loaded.PreferredLanguage
if preferredLang == "" {
preferredLang = s.deps.Geo.LanguageForIP(in.SourceIP)
}
if preferredLang == "" {
preferredLang = defaultLanguage
}
declaredCountry := s.deps.Geo.LookupCountry(in.SourceIP)
userID, err := s.deps.User.EnsureByEmail(ctx, loaded.Email, preferredLang, in.TimeZone, declaredCountry)
if err != nil {
return Session{}, fmt.Errorf("auth: ensure account by email: %w", err)
}
deviceSessionID := uuid.New()
pending := Session{
DeviceSessionID: deviceSessionID,
UserID: userID,
Status: SessionStatusActive,
ClientPublicKey: cloneBytes(in.ClientPublicKey),
}
if err := s.deps.Store.MarkConsumedAndInsertSession(ctx, in.ChallengeID, pending); err != nil {
return Session{}, err
}
persisted, err := s.deps.Store.LoadSession(ctx, deviceSessionID)
if err != nil {
return Session{}, fmt.Errorf("auth: reload created session: %w", err)
}
s.deps.Cache.Add(persisted)
if err := s.deps.Geo.SetDeclaredCountryAtRegistration(ctx, userID, in.SourceIP); err != nil {
s.deps.Logger.Warn("auth: declared country backfill failed",
zap.String("user_id", userID.String()),
zap.Error(err),
)
}
s.deps.Logger.Info("auth session created",
zap.String("user_id", userID.String()),
zap.String("device_session_id", deviceSessionID.String()),
)
return persisted, nil
}
// defaultLanguage is the fallback locale written when neither the body
// nor the Accept-Language header nor the geoip-derived language produce
// a value.
const defaultLanguage = "en"
func normaliseEmail(email string) string {
return strings.ToLower(strings.TrimSpace(email))
}
// pickCapturedLocale picks the locale to persist on the challenge row.
// The body field wins over the header. The header parsing is
// intentionally minimal — auth only stores the value, so a richer parse
// would be wasted; user.Service treats the captured string as opaque.
func pickCapturedLocale(locale, acceptLanguage string) string {
if v := strings.TrimSpace(locale); v != "" {
return v
}
if acceptLanguage == "" {
return ""
}
first := acceptLanguage
if idx := strings.IndexAny(first, ",;"); idx >= 0 {
first = first[:idx]
}
return strings.TrimSpace(first)
}
func cloneBytes(b []byte) []byte {
if b == nil {
return nil
}
out := make([]byte, len(b))
copy(out, b)
return out
}
+61
View File
@@ -0,0 +1,61 @@
package auth
import (
"crypto/rand"
"errors"
"fmt"
"strings"
"golang.org/x/crypto/bcrypt"
)
// CodeLength is the fixed length of the decimal code delivered by
// SendEmailCode. The OpenAPI description ("six-digit") locks the value
// at six; tests cannot lower it without breaking the contract test
// against the schema.
const CodeLength = 6
// codeBcryptCost is the bcrypt cost used to store the hashed code in
// auth_challenges.code_hash. Cost 10 matches the convention documented
// for admin password storage in `backend/README.md` §12. Six-digit codes
// have only ~1M entropy, so the bcrypt slowdown is what bounds online
// attacks together with the per-challenge attempt ceiling.
const codeBcryptCost = bcrypt.DefaultCost
// generateCode returns a random CodeLength-character decimal string. The
// modulo bias when mapping uniform bytes to ten digits is acceptable for
// short-lived registration codes — the per-challenge attempt ceiling and
// the TTL bound abuse far more tightly than the negligible bias.
func generateCode() (string, error) {
digits := make([]byte, CodeLength)
if _, err := rand.Read(digits); err != nil {
return "", fmt.Errorf("auth: generate code: %w", err)
}
var sb strings.Builder
sb.Grow(CodeLength)
for _, b := range digits {
sb.WriteByte('0' + b%10)
}
return sb.String(), nil
}
// hashCode returns the bcrypt hash of code using the package-level cost.
func hashCode(code string) ([]byte, error) {
return bcrypt.GenerateFromPassword([]byte(code), codeBcryptCost)
}
// verifyCode reports whether code matches hash. The function is a thin
// wrapper around bcrypt.CompareHashAndPassword so the comparison is
// constant-time on the matching path. Returns nil on match,
// ErrCodeMismatch when the bcrypt mismatch error fires, and a wrapped
// error for any other failure (e.g. malformed hash).
func verifyCode(hash []byte, code string) error {
err := bcrypt.CompareHashAndPassword(hash, []byte(code))
if err == nil {
return nil
}
if errors.Is(err, bcrypt.ErrMismatchedHashAndPassword) {
return ErrCodeMismatch
}
return fmt.Errorf("auth: verify code: %w", err)
}
+76
View File
@@ -0,0 +1,76 @@
package auth
import (
"strings"
"testing"
"errors"
)
func TestGenerateCodeShape(t *testing.T) {
for range 100 {
code, err := generateCode()
if err != nil {
t.Fatalf("generateCode: %v", err)
}
if len(code) != CodeLength {
t.Fatalf("len(code) = %d, want %d (got %q)", len(code), CodeLength, code)
}
for _, r := range code {
if r < '0' || r > '9' {
t.Fatalf("non-digit rune %q in code %q", r, code)
}
}
}
}
func TestGenerateCodeRandomness(t *testing.T) {
seen := make(map[string]struct{})
const trials = 50
for range trials {
code, err := generateCode()
if err != nil {
t.Fatalf("generateCode: %v", err)
}
seen[code] = struct{}{}
}
// 50 trials over a 10^6 space — duplicate is astronomically unlikely.
if len(seen) < trials-1 {
t.Fatalf("generateCode produced too many duplicates: %d/%d unique", len(seen), trials)
}
}
func TestHashAndVerifyCodeRoundTrip(t *testing.T) {
const code = "654321"
hash, err := hashCode(code)
if err != nil {
t.Fatalf("hashCode: %v", err)
}
if !strings.HasPrefix(string(hash), "$2") {
t.Fatalf("hash does not look like bcrypt: %q", string(hash))
}
if err := verifyCode(hash, code); err != nil {
t.Fatalf("verifyCode on matching code: %v", err)
}
}
func TestVerifyCodeMismatch(t *testing.T) {
hash, err := hashCode("111111")
if err != nil {
t.Fatalf("hashCode: %v", err)
}
err = verifyCode(hash, "222222")
if !errors.Is(err, ErrCodeMismatch) {
t.Fatalf("verifyCode mismatch returned %v, want ErrCodeMismatch", err)
}
}
func TestVerifyCodeMalformedHash(t *testing.T) {
err := verifyCode([]byte("not-a-hash"), "111111")
if err == nil {
t.Fatalf("verifyCode with garbage hash returned nil")
}
if errors.Is(err, ErrCodeMismatch) {
t.Fatalf("malformed hash classified as mismatch: %v", err)
}
}
+90
View File
@@ -0,0 +1,90 @@
package auth
import (
"context"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
)
// LoginCodeMailer is the publisher contract auth uses to deliver a
// one-time login code to a user's mailbox. The canonical
// implementation lives in `backend/internal/mail`; tests can use
// `NewNoopLoginCodeMailer` to record the outbound code without wiring
// SMTP.
type LoginCodeMailer interface {
EnqueueLoginCode(ctx context.Context, email, code string, ttl time.Duration) error
}
// SessionInvalidator emits the gRPC push session_invalidation event
// when auth revokes one or more device sessions. The canonical
// implementation lives in `backend/internal/push`; tests can use
// `NewNoopSessionInvalidator` for an in-memory log-only fallback.
type SessionInvalidator interface {
PublishSessionInvalidation(ctx context.Context, deviceSessionID, userID uuid.UUID, reason string)
}
// UserEnsurer binds a confirmed email to an `accounts.user_id`. The
// canonical implementation is `*user.Service`; tests can swap in a
// recording fake.
type UserEnsurer interface {
EnsureByEmail(ctx context.Context, email, preferredLanguage, timeZone, declaredCountry string) (uuid.UUID, error)
}
// GeoService provides the geo helpers auth needs at confirm-email-code:
// a country lookup for the `preferred_language` fallback and a
// post-commit write of `accounts.declared_country`. Both methods are
// best-effort — auth never blocks the registration flow on geo failures.
type GeoService interface {
LookupCountry(sourceIP string) string
LanguageForIP(sourceIP string) string
SetDeclaredCountryAtRegistration(ctx context.Context, userID uuid.UUID, sourceIP string) error
}
// NewNoopLoginCodeMailer returns a LoginCodeMailer that logs the
// outbound code at info level and returns nil. The wiring code uses
// the real `mail.Service`; this constructor exists for tests and for
// local smoke runs that do not want to bring up an SMTP relay.
func NewNoopLoginCodeMailer(logger *zap.Logger) LoginCodeMailer {
if logger == nil {
logger = zap.NewNop()
}
return &noopLoginCodeMailer{logger: logger.Named("auth.mail.noop")}
}
type noopLoginCodeMailer struct {
logger *zap.Logger
}
func (m *noopLoginCodeMailer) EnqueueLoginCode(_ context.Context, email, code string, ttl time.Duration) error {
m.logger.Info("auth login code (noop publisher)",
zap.String("email", email),
zap.String("code", code),
zap.Duration("ttl", ttl),
)
return nil
}
// NewNoopSessionInvalidator returns a SessionInvalidator that logs
// every invalidation at info level and never blocks. The wiring code
// uses the real `push.Service`; this constructor exists for tests
// that need a callable surface without bringing up gRPC.
func NewNoopSessionInvalidator(logger *zap.Logger) SessionInvalidator {
if logger == nil {
logger = zap.NewNop()
}
return &noopSessionInvalidator{logger: logger.Named("auth.push.noop")}
}
type noopSessionInvalidator struct {
logger *zap.Logger
}
func (p *noopSessionInvalidator) PublishSessionInvalidation(_ context.Context, deviceSessionID, userID uuid.UUID, reason string) {
p.logger.Info("session invalidation (noop publisher)",
zap.String("device_session_id", deviceSessionID.String()),
zap.String("user_id", userID.String()),
zap.String("reason", reason),
)
}
+39
View File
@@ -0,0 +1,39 @@
package auth
import "errors"
// Sentinel errors emitted by Service methods. Handlers translate them
// into HTTP responses; callers in tests can match on them with
// errors.Is.
var (
// ErrChallengeNotFound is returned when a confirm-email-code request
// references a challenge_id that does not exist, has already been
// consumed, or has expired. Returned as a single sentinel because the
// API surface deliberately does not differentiate between these cases
// — distinguishing them would leak whether a challenge_id was ever
// valid, which is signal an attacker should not have.
ErrChallengeNotFound = errors.New("auth: challenge is not redeemable")
// ErrTooManyAttempts is returned when confirm-email-code increments
// the attempts counter past the configured ceiling. The challenge row
// remains in the database with its incremented counter so further
// attempts on the same challenge_id continue to fail with the same
// error until the row expires.
ErrTooManyAttempts = errors.New("auth: too many attempts")
// ErrCodeMismatch is returned when the supplied code does not match
// the stored bcrypt hash. The challenge stays un-consumed so the user
// can try again — bounded by ErrTooManyAttempts.
ErrCodeMismatch = errors.New("auth: code is incorrect")
// ErrEmailPermanentlyBlocked is returned by SendEmailCode when the
// supplied email maps to an existing account whose `permanent_block`
// column is true. This is the only path that does not return an
// opaque success shape.
ErrEmailPermanentlyBlocked = errors.New("auth: email is permanently blocked")
// ErrSessionNotFound is returned by GetSession (and the revoke
// helpers in their look-it-up-after-zero-rows fallback) when the
// device_session_id does not name a row in `device_sessions`.
ErrSessionNotFound = errors.New("auth: session not found")
)
+90
View File
@@ -0,0 +1,90 @@
package auth
import (
"context"
"errors"
"github.com/google/uuid"
"go.uber.org/zap"
)
// GetSession returns the active session keyed by deviceSessionID. The
// lookup is cache-only: the cache is the write-through projection of
// `device_sessions WHERE status='active'`, so a miss means the session
// is either revoked or absent. Either way the gateway sees
// ErrSessionNotFound and treats the calling client as unauthenticated.
func (s *Service) GetSession(_ context.Context, deviceSessionID uuid.UUID) (Session, error) {
if deviceSessionID == uuid.Nil {
return Session{}, ErrSessionNotFound
}
sess, ok := s.deps.Cache.Get(deviceSessionID)
if !ok {
return Session{}, ErrSessionNotFound
}
return sess, nil
}
// RevokeSession marks deviceSessionID revoked, evicts it from the cache,
// and emits a session_invalidation push event. The call is idempotent:
// a second revoke on an already-revoked session returns the existing
// row with status='revoked' (HTTP 200), not ErrSessionNotFound. An
// unknown device_session_id yields ErrSessionNotFound.
//
// Cache eviction and the push emission run after the database UPDATE
// commits so a failed UPDATE leaves both cache and gateway view intact.
func (s *Service) RevokeSession(ctx context.Context, deviceSessionID uuid.UUID) (Session, error) {
if deviceSessionID == uuid.Nil {
return Session{}, ErrSessionNotFound
}
revoked, ok, err := s.deps.Store.RevokeSession(ctx, deviceSessionID)
if err != nil {
return Session{}, err
}
if ok {
s.deps.Cache.Remove(deviceSessionID)
s.deps.Push.PublishSessionInvalidation(ctx, deviceSessionID, revoked.UserID, "auth.revoke_session")
s.deps.Logger.Info("auth session revoked",
zap.String("device_session_id", deviceSessionID.String()),
zap.String("user_id", revoked.UserID.String()),
)
return revoked, nil
}
// UPDATE matched no rows: the session is either already revoked or
// never existed. Distinguish by reading the row directly so we can
// return the idempotent revoked-shape rather than a 404 when the
// session simply was revoked earlier.
existing, err := s.deps.Store.LoadSession(ctx, deviceSessionID)
if err != nil {
if errors.Is(err, ErrSessionNotFound) {
return Session{}, ErrSessionNotFound
}
return Session{}, err
}
return existing, nil
}
// RevokeAllForUser marks every active session for userID revoked,
// evicts each from the cache, and emits one session_invalidation push
// event per revoked row. Returns the list of revoked sessions in the
// order Postgres returned them. An empty result is a successful
// idempotent call (handler reports revoked_count=0).
func (s *Service) RevokeAllForUser(ctx context.Context, userID uuid.UUID) ([]Session, error) {
if userID == uuid.Nil {
return nil, nil
}
revoked, err := s.deps.Store.RevokeAllForUser(ctx, userID)
if err != nil {
return nil, err
}
for _, sess := range revoked {
s.deps.Cache.Remove(sess.DeviceSessionID)
s.deps.Push.PublishSessionInvalidation(ctx, sess.DeviceSessionID, sess.UserID, "auth.revoke_all_for_user")
}
if len(revoked) > 0 {
s.deps.Logger.Info("auth sessions revoked (bulk)",
zap.String("user_id", userID.String()),
zap.Int("count", len(revoked)),
)
}
return revoked, nil
}
+444
View File
@@ -0,0 +1,444 @@
package auth
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"galaxy/backend/internal/postgres/jet/backend/model"
"galaxy/backend/internal/postgres/jet/backend/table"
"github.com/go-jet/jet/v2/postgres"
"github.com/go-jet/jet/v2/qrm"
"github.com/google/uuid"
)
// Challenge mirrors a row in `backend.auth_challenges` enriched with the
// PreferredLanguage column added by migration 00002. The CodeHash slice
// is the raw bcrypt hash; verifyCode wraps the comparison.
type Challenge struct {
ChallengeID uuid.UUID
Email string
CodeHash []byte
Attempts int32
CreatedAt time.Time
ExpiresAt time.Time
ConsumedAt *time.Time
PreferredLanguage string
}
// Session mirrors a row in `backend.device_sessions`. The
// ClientPublicKey slice is the raw 32-byte Ed25519 key; the handler
// layer is responsible for base64 encoding/decoding on the wire.
type Session struct {
DeviceSessionID uuid.UUID
UserID uuid.UUID
Status string
ClientPublicKey []byte
CreatedAt time.Time
RevokedAt *time.Time
LastSeenAt *time.Time
}
// SessionStatusActive and SessionStatusRevoked enumerate the values
// auth writes. The CHECK constraint on `device_sessions.status` also
// allows 'blocked', which the user package emits when applying a
// `permanent_block` sanction.
const (
SessionStatusActive = "active"
SessionStatusRevoked = "revoked"
)
// Store is the Postgres-backed query surface for `backend.auth_challenges`,
// `backend.device_sessions` and the read-side `backend.accounts` lookup
// auth needs to detect permanently-blocked emails.
type Store struct {
db *sql.DB
}
// NewStore constructs a Store wrapping db.
func NewStore(db *sql.DB) *Store {
return &Store{db: db}
}
// challengeColumns lists the projection used by every read of
// `auth_challenges`. The order matches model.AuthChallenges field order
// inside QueryContext destination scans.
func challengeColumns() postgres.ColumnList {
return postgres.ColumnList{
table.AuthChallenges.ChallengeID,
table.AuthChallenges.Email,
table.AuthChallenges.CodeHash,
table.AuthChallenges.Attempts,
table.AuthChallenges.CreatedAt,
table.AuthChallenges.ExpiresAt,
table.AuthChallenges.ConsumedAt,
table.AuthChallenges.PreferredLanguage,
}
}
// sessionColumns lists the projection used by every read of
// `device_sessions`.
func sessionColumns() postgres.ColumnList {
return postgres.ColumnList{
table.DeviceSessions.DeviceSessionID,
table.DeviceSessions.UserID,
table.DeviceSessions.ClientPublicKey,
table.DeviceSessions.Status,
table.DeviceSessions.CreatedAt,
table.DeviceSessions.RevokedAt,
table.DeviceSessions.LastSeenAt,
}
}
// IsEmailPermanentlyBlocked reports whether email maps to a live
// `accounts` row whose permanent_block column is true. The lookup is
// case-sensitive: callers are expected to pass an already-normalised
// (lowercase, trimmed) email.
//
// A non-existent account returns (false, nil) — the auth flow treats
// such emails as eligible for fresh registration.
func (s *Store) IsEmailPermanentlyBlocked(ctx context.Context, email string) (bool, error) {
stmt := postgres.SELECT(table.Accounts.PermanentBlock).
FROM(table.Accounts).
WHERE(
table.Accounts.Email.EQ(postgres.String(email)).
AND(table.Accounts.DeletedAt.IS_NULL()),
).
LIMIT(1)
var row model.Accounts
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return false, nil
}
return false, fmt.Errorf("auth store: query permanent_block for %q: %w", email, err)
}
return row.PermanentBlock, nil
}
// LatestUnconsumedChallenge returns the most recently issued
// un-consumed, non-expired challenge for email created at or after
// since. Returns sql.ErrNoRows when no such challenge exists. The
// throttle path uses this method to reuse the existing challenge_id
// rather than emit a fresh row.
func (s *Store) LatestUnconsumedChallenge(ctx context.Context, email string, since time.Time) (Challenge, error) {
stmt := postgres.SELECT(challengeColumns()).
FROM(table.AuthChallenges).
WHERE(
table.AuthChallenges.Email.EQ(postgres.String(email)).
AND(table.AuthChallenges.ConsumedAt.IS_NULL()).
AND(table.AuthChallenges.ExpiresAt.GT(postgres.NOW())).
AND(table.AuthChallenges.CreatedAt.GT_EQ(postgres.TimestampzT(since))),
).
ORDER_BY(table.AuthChallenges.CreatedAt.DESC()).
LIMIT(1)
var row model.AuthChallenges
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return Challenge{}, sql.ErrNoRows
}
return Challenge{}, err
}
return modelToChallenge(row), nil
}
// CountRecentChallenges returns the number of un-consumed, non-expired
// challenges issued for email at or after since. Used by the throttle
// gate in SendEmailCode.
func (s *Store) CountRecentChallenges(ctx context.Context, email string, since time.Time) (int, error) {
stmt := postgres.SELECT(postgres.COUNT(postgres.STAR).AS("count")).
FROM(table.AuthChallenges).
WHERE(
table.AuthChallenges.Email.EQ(postgres.String(email)).
AND(table.AuthChallenges.ConsumedAt.IS_NULL()).
AND(table.AuthChallenges.ExpiresAt.GT(postgres.NOW())).
AND(table.AuthChallenges.CreatedAt.GT_EQ(postgres.TimestampzT(since))),
)
var dest struct {
Count int64 `alias:"count"`
}
if err := stmt.QueryContext(ctx, s.db, &dest); err != nil {
return 0, fmt.Errorf("auth store: count recent challenges: %w", err)
}
return int(dest.Count), nil
}
// InsertChallenge persists a fresh `auth_challenges` row. The caller
// owns the primary-key, the bcrypt hash, the expires_at timestamp and
// the captured locale. created_at and attempts default at the schema
// level.
func (s *Store) InsertChallenge(ctx context.Context, c Challenge) error {
stmt := table.AuthChallenges.INSERT(
table.AuthChallenges.ChallengeID,
table.AuthChallenges.Email,
table.AuthChallenges.CodeHash,
table.AuthChallenges.ExpiresAt,
table.AuthChallenges.PreferredLanguage,
).VALUES(c.ChallengeID, c.Email, c.CodeHash, c.ExpiresAt, c.PreferredLanguage)
if _, err := stmt.ExecContext(ctx, s.db); err != nil {
return fmt.Errorf("auth store: insert challenge: %w", err)
}
return nil
}
// LoadAndIncrementChallenge atomically locks the challenge row,
// validates that it is still un-consumed and non-expired, and increments
// its `attempts` counter. The returned Challenge carries the
// post-increment counter so the caller can compare it against the
// configured ceiling without a second query.
//
// Returns ErrChallengeNotFound when the row does not exist, has been
// consumed, or has expired. Any other error is wrapped with the auth
// store prefix.
func (s *Store) LoadAndIncrementChallenge(ctx context.Context, challengeID uuid.UUID) (Challenge, error) {
var loaded Challenge
err := withTx(ctx, s.db, func(tx *sql.Tx) error {
selectStmt := postgres.SELECT(challengeColumns()).
FROM(table.AuthChallenges).
WHERE(table.AuthChallenges.ChallengeID.EQ(postgres.UUID(challengeID))).
FOR(postgres.UPDATE())
var row model.AuthChallenges
if err := selectStmt.QueryContext(ctx, tx, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return ErrChallengeNotFound
}
return err
}
loaded = modelToChallenge(row)
if loaded.ConsumedAt != nil {
return ErrChallengeNotFound
}
if !loaded.ExpiresAt.After(time.Now()) {
return ErrChallengeNotFound
}
updateStmt := table.AuthChallenges.
UPDATE(table.AuthChallenges.Attempts).
SET(table.AuthChallenges.Attempts.ADD(postgres.Int(1))).
WHERE(table.AuthChallenges.ChallengeID.EQ(postgres.UUID(challengeID)))
if _, err := updateStmt.ExecContext(ctx, tx); err != nil {
return err
}
loaded.Attempts++
return nil
})
if err != nil {
if errors.Is(err, ErrChallengeNotFound) {
return Challenge{}, err
}
return Challenge{}, fmt.Errorf("auth store: load and increment challenge: %w", err)
}
return loaded, nil
}
// MarkConsumedAndInsertSession atomically:
//
// 1. Locks the challenge row.
// 2. Validates that it is still un-consumed and non-expired.
// 3. Sets consumed_at = now().
// 4. Inserts the supplied Session into device_sessions with status =
// 'active'.
//
// The two writes are committed together so a single challenge yields at
// most one device session even under concurrent confirm-email-code
// callers.
//
// Returns ErrChallengeNotFound when the challenge has been consumed (by
// a concurrent caller) or has expired in the gap between the
// LoadAndIncrementChallenge call and this one.
func (s *Store) MarkConsumedAndInsertSession(ctx context.Context, challengeID uuid.UUID, session Session) error {
err := withTx(ctx, s.db, func(tx *sql.Tx) error {
lockStmt := postgres.SELECT(table.AuthChallenges.ConsumedAt, table.AuthChallenges.ExpiresAt).
FROM(table.AuthChallenges).
WHERE(table.AuthChallenges.ChallengeID.EQ(postgres.UUID(challengeID))).
FOR(postgres.UPDATE())
var locked model.AuthChallenges
if err := lockStmt.QueryContext(ctx, tx, &locked); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return ErrChallengeNotFound
}
return err
}
if locked.ConsumedAt != nil || !locked.ExpiresAt.After(time.Now()) {
return ErrChallengeNotFound
}
consumeStmt := table.AuthChallenges.
UPDATE(table.AuthChallenges.ConsumedAt).
SET(postgres.NOW()).
WHERE(table.AuthChallenges.ChallengeID.EQ(postgres.UUID(challengeID)))
if _, err := consumeStmt.ExecContext(ctx, tx); err != nil {
return err
}
insertStmt := table.DeviceSessions.INSERT(
table.DeviceSessions.DeviceSessionID,
table.DeviceSessions.UserID,
table.DeviceSessions.ClientPublicKey,
table.DeviceSessions.Status,
).VALUES(session.DeviceSessionID, session.UserID, session.ClientPublicKey, SessionStatusActive)
if _, err := insertStmt.ExecContext(ctx, tx); err != nil {
return err
}
return nil
})
if err != nil {
if errors.Is(err, ErrChallengeNotFound) {
return err
}
return fmt.Errorf("auth store: mark consumed and insert session: %w", err)
}
return nil
}
// ListActiveSessions loads every row from device_sessions whose status
// is 'active'. Cache.Warm calls this at process boot.
func (s *Store) ListActiveSessions(ctx context.Context) ([]Session, error) {
stmt := postgres.SELECT(sessionColumns()).
FROM(table.DeviceSessions).
WHERE(table.DeviceSessions.Status.EQ(postgres.String(SessionStatusActive)))
var rows []model.DeviceSessions
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return nil, fmt.Errorf("auth store: list active sessions: %w", err)
}
out := make([]Session, 0, len(rows))
for _, row := range rows {
out = append(out, modelToSession(row))
}
return out, nil
}
// LoadSession returns the row for deviceSessionID regardless of status.
// Returns ErrSessionNotFound on missing row.
func (s *Store) LoadSession(ctx context.Context, deviceSessionID uuid.UUID) (Session, error) {
stmt := postgres.SELECT(sessionColumns()).
FROM(table.DeviceSessions).
WHERE(table.DeviceSessions.DeviceSessionID.EQ(postgres.UUID(deviceSessionID))).
LIMIT(1)
var row model.DeviceSessions
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return Session{}, ErrSessionNotFound
}
return Session{}, fmt.Errorf("auth store: load session %s: %w", deviceSessionID, err)
}
return modelToSession(row), nil
}
// RevokeSession transitions an active row to status='revoked' and
// returns the row as it stands after the update. The boolean reports
// whether the UPDATE actually changed a row — false means the row was
// already revoked or did not exist; the auth Service then falls back to
// LoadSession for idempotent-revoke responses.
func (s *Store) RevokeSession(ctx context.Context, deviceSessionID uuid.UUID) (Session, bool, error) {
stmt := table.DeviceSessions.
UPDATE(table.DeviceSessions.Status, table.DeviceSessions.RevokedAt).
SET(postgres.String(SessionStatusRevoked), postgres.NOW()).
WHERE(
table.DeviceSessions.DeviceSessionID.EQ(postgres.UUID(deviceSessionID)).
AND(table.DeviceSessions.Status.EQ(postgres.String(SessionStatusActive))),
).
RETURNING(sessionColumns())
var row model.DeviceSessions
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return Session{}, false, nil
}
return Session{}, false, fmt.Errorf("auth store: revoke session %s: %w", deviceSessionID, err)
}
return modelToSession(row), true, nil
}
// RevokeAllForUser transitions every active row for userID to
// status='revoked' and returns the rows as they stand after the update.
// An empty slice with a nil error is returned when the user owned no
// active sessions; the caller must treat that as a successful idempotent
// revoke (the API surface returns revoked_count=0 in that case).
func (s *Store) RevokeAllForUser(ctx context.Context, userID uuid.UUID) ([]Session, error) {
stmt := table.DeviceSessions.
UPDATE(table.DeviceSessions.Status, table.DeviceSessions.RevokedAt).
SET(postgres.String(SessionStatusRevoked), postgres.NOW()).
WHERE(
table.DeviceSessions.UserID.EQ(postgres.UUID(userID)).
AND(table.DeviceSessions.Status.EQ(postgres.String(SessionStatusActive))),
).
RETURNING(sessionColumns())
var rows []model.DeviceSessions
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return nil, fmt.Errorf("auth store: revoke all for user %s: %w", userID, err)
}
out := make([]Session, 0, len(rows))
for _, row := range rows {
out = append(out, modelToSession(row))
}
return out, nil
}
// modelToChallenge projects a generated model row into the public
// Challenge struct. Pointer fields are copied so callers cannot mutate
// the underlying scan buffer.
func modelToChallenge(row model.AuthChallenges) Challenge {
c := Challenge{
ChallengeID: row.ChallengeID,
Email: row.Email,
CodeHash: row.CodeHash,
Attempts: row.Attempts,
CreatedAt: row.CreatedAt,
ExpiresAt: row.ExpiresAt,
PreferredLanguage: row.PreferredLanguage,
}
if row.ConsumedAt != nil {
t := *row.ConsumedAt
c.ConsumedAt = &t
}
return c
}
// modelToSession projects a generated model row into the public Session
// struct.
func modelToSession(row model.DeviceSessions) Session {
s := Session{
DeviceSessionID: row.DeviceSessionID,
UserID: row.UserID,
Status: row.Status,
ClientPublicKey: row.ClientPublicKey,
CreatedAt: row.CreatedAt,
}
if row.RevokedAt != nil {
t := *row.RevokedAt
s.RevokedAt = &t
}
if row.LastSeenAt != nil {
t := *row.LastSeenAt
s.LastSeenAt = &t
}
return s
}
// withTx wraps fn in a Postgres transaction. fn's return value
// determines commit (nil) vs rollback (non-nil). Rollback errors are
// swallowed when fn already returned an error, since the latter is more
// actionable.
func withTx(ctx context.Context, db *sql.DB, fn func(tx *sql.Tx) error) error {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("auth store: begin tx: %w", err)
}
if err := fn(tx); err != nil {
_ = tx.Rollback()
return err
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("auth store: commit tx: %w", err)
}
return nil
}
+874
View File
@@ -0,0 +1,874 @@
// Package config loads process-level backend configuration from environment
// variables.
//
// The variable set is the canonical inventory documented in
// `backend/README.md` §4. LoadFromEnv populates a Config from environment,
// applies the documented defaults, then runs Validate. Validate fails fast on
// any required-but-missing variable so the process never starts in a partially
// configured state.
package config
import (
"fmt"
netmail "net/mail"
"os"
"slices"
"strconv"
"strings"
"time"
)
// Environment variable names. The exhaustive set follows README §4.
const (
envShutdownTimeout = "BACKEND_SHUTDOWN_TIMEOUT"
envLoggingLevel = "BACKEND_LOGGING_LEVEL"
envHTTPListenAddr = "BACKEND_HTTP_LISTEN_ADDR"
envHTTPReadTimeout = "BACKEND_HTTP_READ_TIMEOUT"
envHTTPWriteTimeout = "BACKEND_HTTP_WRITE_TIMEOUT"
envHTTPShutdownTimeout = "BACKEND_HTTP_SHUTDOWN_TIMEOUT"
envGRPCPushListenAddr = "BACKEND_GRPC_PUSH_LISTEN_ADDR"
envGRPCPushShutdownTimeout = "BACKEND_GRPC_PUSH_SHUTDOWN_TIMEOUT"
envPostgresDSN = "BACKEND_POSTGRES_DSN"
envPostgresMaxConns = "BACKEND_POSTGRES_MAX_CONNS"
envPostgresMinConns = "BACKEND_POSTGRES_MIN_CONNS"
envPostgresOperationTimeout = "BACKEND_POSTGRES_OPERATION_TIMEOUT"
envSMTPHost = "BACKEND_SMTP_HOST"
envSMTPPort = "BACKEND_SMTP_PORT"
envSMTPUsername = "BACKEND_SMTP_USERNAME"
envSMTPPassword = "BACKEND_SMTP_PASSWORD"
envSMTPFrom = "BACKEND_SMTP_FROM"
envSMTPTLSMode = "BACKEND_SMTP_TLS_MODE"
envMailWorkerInterval = "BACKEND_MAIL_WORKER_INTERVAL"
envMailMaxAttempts = "BACKEND_MAIL_MAX_ATTEMPTS"
envDockerHost = "BACKEND_DOCKER_HOST"
envDockerNetwork = "BACKEND_DOCKER_NETWORK"
envGameStateRoot = "BACKEND_GAME_STATE_ROOT"
envAdminBootstrapUser = "BACKEND_ADMIN_BOOTSTRAP_USER"
envAdminBootstrapPassword = "BACKEND_ADMIN_BOOTSTRAP_PASSWORD"
envGeoIPDBPath = "BACKEND_GEOIP_DB_PATH"
envOTelTracesExporter = "BACKEND_OTEL_TRACES_EXPORTER"
envOTelMetricsExporter = "BACKEND_OTEL_METRICS_EXPORTER"
envOTelProtocol = "BACKEND_OTEL_PROTOCOL"
envOTelEndpoint = "BACKEND_OTEL_ENDPOINT"
envOTelPrometheusListenAddr = "BACKEND_OTEL_PROMETHEUS_LISTEN_ADDR"
envServiceName = "BACKEND_SERVICE_NAME"
envFreshnessWindow = "BACKEND_FRESHNESS_WINDOW"
envAuthChallengeTTL = "BACKEND_AUTH_CHALLENGE_TTL"
envAuthChallengeMaxAttempts = "BACKEND_AUTH_CHALLENGE_MAX_ATTEMPTS"
envAuthChallengeThrottleWindow = "BACKEND_AUTH_CHALLENGE_THROTTLE_WINDOW"
envAuthChallengeThrottleMax = "BACKEND_AUTH_CHALLENGE_THROTTLE_MAX"
envAuthUserNameMaxRetries = "BACKEND_AUTH_USERNAME_MAX_RETRIES"
envLobbySweeperInterval = "BACKEND_LOBBY_SWEEPER_INTERVAL"
envLobbyPendingRegistrationTTL = "BACKEND_LOBBY_PENDING_REGISTRATION_TTL"
envLobbyInviteDefaultTTL = "BACKEND_LOBBY_INVITE_DEFAULT_TTL"
envEngineCallTimeout = "BACKEND_ENGINE_CALL_TIMEOUT"
envEngineProbeTimeout = "BACKEND_ENGINE_PROBE_TIMEOUT"
envRuntimeWorkerPoolSize = "BACKEND_RUNTIME_WORKER_POOL_SIZE"
envRuntimeJobQueueSize = "BACKEND_RUNTIME_JOB_QUEUE_SIZE"
envRuntimeReconcileInterval = "BACKEND_RUNTIME_RECONCILE_INTERVAL"
envRuntimeImagePullPolicy = "BACKEND_RUNTIME_IMAGE_PULL_POLICY"
envRuntimeContainerLogDriver = "BACKEND_RUNTIME_CONTAINER_LOG_DRIVER"
envRuntimeContainerLogOpts = "BACKEND_RUNTIME_CONTAINER_LOG_OPTS"
envRuntimeContainerCPUQuota = "BACKEND_RUNTIME_CONTAINER_CPU_QUOTA"
envRuntimeContainerMemory = "BACKEND_RUNTIME_CONTAINER_MEMORY"
envRuntimeContainerPIDsLimit = "BACKEND_RUNTIME_CONTAINER_PIDS_LIMIT"
envRuntimeContainerStateMount = "BACKEND_RUNTIME_CONTAINER_STATE_MOUNT"
envRuntimeStopGracePeriod = "BACKEND_RUNTIME_STOP_GRACE_PERIOD"
envNotificationAdminEmail = "BACKEND_NOTIFICATION_ADMIN_EMAIL"
envNotificationWorkerInterval = "BACKEND_NOTIFICATION_WORKER_INTERVAL"
envNotificationMaxAttempts = "BACKEND_NOTIFICATION_MAX_ATTEMPTS"
)
// Default values applied when an environment variable is absent.
const (
defaultShutdownTimeout = 30 * time.Second
defaultLoggingLevel = "info"
defaultHTTPListenAddr = ":8080"
defaultHTTPReadTimeout = 30 * time.Second
defaultHTTPWriteTimeout = 30 * time.Second
defaultHTTPShutdownTimeout = 15 * time.Second
defaultGRPCPushListenAddr = ":8081"
defaultGRPCPushShutdownTimeout = 10 * time.Second
defaultPostgresMaxConns = 25
defaultPostgresMinConns = 2
defaultPostgresOperationTimeout = 5 * time.Second
defaultSMTPPort = 587
defaultSMTPTLSMode = "starttls"
defaultMailWorkerInterval = 2 * time.Second
defaultMailMaxAttempts = 8
defaultDockerHost = "unix:///var/run/docker.sock"
defaultOTelTracesExporter = "otlp"
defaultOTelMetricsExporter = "otlp"
defaultOTelProtocol = "grpc"
defaultOTelPrometheusListenAddr = ":9100"
defaultServiceName = "galaxy-backend"
defaultFreshnessWindow = 5 * time.Minute
defaultAuthChallengeTTL = 10 * time.Minute
defaultAuthChallengeMaxAttempts = 5
defaultAuthChallengeThrottleWindow = 60 * time.Second
defaultAuthChallengeThrottleMax = 3
defaultAuthUserNameMaxRetries = 10
defaultLobbySweeperInterval = 60 * time.Second
defaultLobbyPendingRegistrationTTL = 30 * 24 * time.Hour
defaultLobbyInviteDefaultTTL = 7 * 24 * time.Hour
defaultEngineCallTimeout = 60 * time.Second
defaultEngineProbeTimeout = 5 * time.Second
defaultRuntimeWorkerPoolSize = 4
defaultRuntimeJobQueueSize = 64
defaultRuntimeReconcileInterval = 60 * time.Second
defaultRuntimeImagePullPolicy = "if_missing"
defaultRuntimeContainerLogDriver = "json-file"
defaultRuntimeContainerCPUQuota = 2.0
defaultRuntimeContainerMemory = "512m"
defaultRuntimeContainerPIDsLimit = 256
defaultRuntimeContainerStateMount = "/var/lib/galaxy-game"
defaultRuntimeStopGracePeriod = 10 * time.Second
defaultNotificationWorkerInterval = 5 * time.Second
defaultNotificationMaxAttempts = 8
)
// Allowed values for the closed-set string options.
var (
allowedTracesExporters = []string{"none", "otlp", "stdout"}
allowedMetricsExporters = []string{"none", "otlp", "stdout", "prometheus"}
allowedOTelProtocols = []string{"grpc", "http/protobuf"}
allowedSMTPTLSModes = []string{"none", "starttls", "tls"}
allowedPullPolicies = []string{"if_missing", "always", "never"}
)
// Config is the top-level backend configuration assembled from environment
// variables. The zero value is not usable; callers must obtain a Config via
// DefaultConfig or LoadFromEnv.
type Config struct {
// ShutdownTimeout bounds each component's Shutdown call coordinated by
// the process App lifecycle. Per-listener timeouts (HTTP, gRPC) bound the
// inner server stop and may be smaller than ShutdownTimeout.
ShutdownTimeout time.Duration
Logging LoggingConfig
HTTP HTTPConfig
GRPCPush GRPCPushConfig
Postgres PostgresConfig
SMTP SMTPConfig
Mail MailConfig
Docker DockerConfig
Game GameConfig
Admin AdminBootstrapConfig
GeoIP GeoIPConfig
Telemetry TelemetryConfig
Auth AuthConfig
Lobby LobbyConfig
Engine EngineConfig
Runtime RuntimeConfig
Notification NotificationConfig
// FreshnessWindow mirrors the gateway freshness window and is used by the
// push server to bound the cursor TTL.
FreshnessWindow time.Duration
}
// LoggingConfig stores the parameters used by the structured logger.
type LoggingConfig struct {
// Level is the zap level name (e.g. "debug", "info", "warn", "error").
Level string
}
// HTTPConfig configures the public HTTP listener.
type HTTPConfig struct {
Addr string
ReadTimeout time.Duration
WriteTimeout time.Duration
ShutdownTimeout time.Duration
}
// GRPCPushConfig configures the gRPC push listener.
type GRPCPushConfig struct {
Addr string
ShutdownTimeout time.Duration
}
// PostgresConfig configures the primary Postgres pool.
//
// MinConns mirrors README §4 BACKEND_POSTGRES_MIN_CONNS and is interpreted as
// the maximum number of idle connections kept warm in the pool — database/sql
// has no real minimum-pool concept, so this is the closest equivalent. The
// mapping is documented in `backend/README.md` and `backend/docs/`.
type PostgresConfig struct {
DSN string
MaxConns int
MinConns int
OperationTimeout time.Duration
}
// SMTPConfig configures the SMTP relay used by the mail outbox.
type SMTPConfig struct {
Host string
Port int
Username string
Password string
From string
TLSMode string
}
// MailConfig configures the mail outbox worker.
type MailConfig struct {
WorkerInterval time.Duration
MaxAttempts int
}
// DockerConfig configures the Docker client used by the runtime module.
type DockerConfig struct {
Host string
Network string
}
// GameConfig configures the runtime engine container layout.
type GameConfig struct {
StateRoot string
}
// AdminBootstrapConfig configures the optional first-admin bootstrap.
// At startup the admin module inserts a row in `backend.admin_accounts`
// when User is non-empty and no row with that username exists yet; the
// insert is idempotent across restarts.
type AdminBootstrapConfig struct {
User string
Password string
}
// GeoIPConfig configures the GeoLite2 country database used by geo lookups.
type GeoIPConfig struct {
DBPath string
}
// TelemetryConfig configures the OpenTelemetry runtime.
type TelemetryConfig struct {
ServiceName string
TracesExporter string
MetricsExporter string
Protocol string
Endpoint string
PrometheusListenAddr string
}
// AuthConfig configures the email-code authentication flow implemented in
// `backend/internal/auth`. ChallengeTTL bounds the lifetime of an issued
// `auth_challenges` row, ChallengeMaxAttempts caps confirm-email-code
// attempts per challenge, ChallengeThrottle bounds new-challenge issuance
// per email, and UserNameMaxRetries caps the retry budget for synthesising
// a unique `accounts.user_name` at registration.
type AuthConfig struct {
ChallengeTTL time.Duration
ChallengeMaxAttempts int
ChallengeThrottle AuthChallengeThrottleConfig
UserNameMaxRetries int
}
// AuthChallengeThrottleConfig bounds how many un-consumed, non-expired
// challenges a single email may hold inside a sliding window before the
// auth service starts reusing the most recent existing challenge instead
// of issuing a new one.
type AuthChallengeThrottleConfig struct {
Window time.Duration
Max int
}
// EngineConfig configures the per-call timeouts of `engineclient` against
// running game-engine containers. CallTimeout bounds turn-generation-class
// operations (init, turn, banish, command, order); ProbeTimeout bounds
// inspect-style reads (status, report, healthz).
type EngineConfig struct {
CallTimeout time.Duration
ProbeTimeout time.Duration
}
// RuntimeConfig configures the runtime module: worker pool, reconciliation
// cadence, image-pull policy, and per-container resource defaults applied
// at engine container creation time.
type RuntimeConfig struct {
// WorkerPoolSize bounds the number of concurrent long-running runtime
// jobs (image pull, container start, restart, patch).
WorkerPoolSize int
// JobQueueSize is the buffered job channel capacity. Once full, new
// runtime requests block briefly until a worker frees a slot.
JobQueueSize int
// ReconcileInterval bounds how often the runtime reconciler reads the
// Docker daemon's labelled containers and reconciles them against
// `runtime_records`.
ReconcileInterval time.Duration
// ImagePullPolicy selects the dockerclient pull behaviour:
// `if_missing`, `always`, or `never`.
ImagePullPolicy string
// ContainerLogDriver is the Docker log driver applied to every engine
// container created by the runtime (e.g., `json-file`).
ContainerLogDriver string
// ContainerLogOpts is the comma-separated `key=value` list passed to
// the log driver. May be empty.
ContainerLogOpts string
// ContainerCPUQuota is the `--cpus` value applied as a resource limit
// on each engine container.
ContainerCPUQuota float64
// ContainerMemory is the `--memory` value (e.g. `512m`).
ContainerMemory string
// ContainerPIDsLimit is the `--pids-limit` value.
ContainerPIDsLimit int
// ContainerStateMount is the absolute in-container path the per-game
// state directory is bind-mounted at.
ContainerStateMount string
// StopGracePeriod is the docker stop SIGTERM-to-SIGKILL grace period
// applied during stop / cancel / restart / patch.
StopGracePeriod time.Duration
}
// NotificationConfig configures the notification fan-out module
// implemented in `backend/internal/notification`. AdminEmail receives
// admin-channel kinds (the `runtime.*` set in `backend/README.md` §10);
// when empty, admin-email routes are recorded as `skipped`. WorkerInterval
// bounds how often the route worker scans for due rows; MaxAttempts caps
// route delivery retries before dead-lettering.
type NotificationConfig struct {
AdminEmail string
WorkerInterval time.Duration
MaxAttempts int
}
// LobbyConfig configures the lobby module: the periodic sweeper interval,
// the lifetime of `pending_registration` Race Name Directory entries, and
// the default expiry applied to invites that omit `expires_at`.
type LobbyConfig struct {
// SweeperInterval bounds how often the lobby sweeper goroutine wakes
// up to release expired pending_registration rows and to auto-close
// enrollment-expired games.
SweeperInterval time.Duration
// PendingRegistrationTTL bounds how long a `pending_registration`
// Race Name Directory row stays available for promotion via
// `lobby.race_name.register` before the sweeper releases it.
PendingRegistrationTTL time.Duration
// InviteDefaultTTL is the expiry applied to invites whose request body
// omits an explicit `expires_at`.
InviteDefaultTTL time.Duration
}
// DefaultConfig returns a Config pre-filled with the defaults documented in
// README §4. The required string fields (Postgres.DSN, SMTP.Host, SMTP.From,
// Docker.Network, Game.StateRoot, GeoIP.DBPath) remain zero-valued and must be
// supplied by callers (or by LoadFromEnv).
func DefaultConfig() Config {
return Config{
ShutdownTimeout: defaultShutdownTimeout,
Logging: LoggingConfig{
Level: defaultLoggingLevel,
},
HTTP: HTTPConfig{
Addr: defaultHTTPListenAddr,
ReadTimeout: defaultHTTPReadTimeout,
WriteTimeout: defaultHTTPWriteTimeout,
ShutdownTimeout: defaultHTTPShutdownTimeout,
},
GRPCPush: GRPCPushConfig{
Addr: defaultGRPCPushListenAddr,
ShutdownTimeout: defaultGRPCPushShutdownTimeout,
},
Postgres: PostgresConfig{
MaxConns: defaultPostgresMaxConns,
MinConns: defaultPostgresMinConns,
OperationTimeout: defaultPostgresOperationTimeout,
},
SMTP: SMTPConfig{
Port: defaultSMTPPort,
TLSMode: defaultSMTPTLSMode,
},
Mail: MailConfig{
WorkerInterval: defaultMailWorkerInterval,
MaxAttempts: defaultMailMaxAttempts,
},
Docker: DockerConfig{
Host: defaultDockerHost,
},
Telemetry: TelemetryConfig{
ServiceName: defaultServiceName,
TracesExporter: defaultOTelTracesExporter,
MetricsExporter: defaultOTelMetricsExporter,
Protocol: defaultOTelProtocol,
PrometheusListenAddr: defaultOTelPrometheusListenAddr,
},
FreshnessWindow: defaultFreshnessWindow,
Auth: AuthConfig{
ChallengeTTL: defaultAuthChallengeTTL,
ChallengeMaxAttempts: defaultAuthChallengeMaxAttempts,
ChallengeThrottle: AuthChallengeThrottleConfig{
Window: defaultAuthChallengeThrottleWindow,
Max: defaultAuthChallengeThrottleMax,
},
UserNameMaxRetries: defaultAuthUserNameMaxRetries,
},
Lobby: LobbyConfig{
SweeperInterval: defaultLobbySweeperInterval,
PendingRegistrationTTL: defaultLobbyPendingRegistrationTTL,
InviteDefaultTTL: defaultLobbyInviteDefaultTTL,
},
Engine: EngineConfig{
CallTimeout: defaultEngineCallTimeout,
ProbeTimeout: defaultEngineProbeTimeout,
},
Notification: NotificationConfig{
WorkerInterval: defaultNotificationWorkerInterval,
MaxAttempts: defaultNotificationMaxAttempts,
},
Runtime: RuntimeConfig{
WorkerPoolSize: defaultRuntimeWorkerPoolSize,
JobQueueSize: defaultRuntimeJobQueueSize,
ReconcileInterval: defaultRuntimeReconcileInterval,
ImagePullPolicy: defaultRuntimeImagePullPolicy,
ContainerLogDriver: defaultRuntimeContainerLogDriver,
ContainerCPUQuota: defaultRuntimeContainerCPUQuota,
ContainerMemory: defaultRuntimeContainerMemory,
ContainerPIDsLimit: defaultRuntimeContainerPIDsLimit,
ContainerStateMount: defaultRuntimeContainerStateMount,
StopGracePeriod: defaultRuntimeStopGracePeriod,
},
}
}
// LoadFromEnv loads Config from environment variables, applying the
// DefaultConfig values for any variable that is not set, and validates the
// result. The returned Config is safe to use without further modification.
func LoadFromEnv() (Config, error) {
cfg := DefaultConfig()
shutdownTimeout, err := loadDuration(envShutdownTimeout, cfg.ShutdownTimeout)
if err != nil {
return Config{}, err
}
cfg.ShutdownTimeout = shutdownTimeout
cfg.Logging.Level = loadString(envLoggingLevel, cfg.Logging.Level)
cfg.HTTP.Addr = loadString(envHTTPListenAddr, cfg.HTTP.Addr)
if cfg.HTTP.ReadTimeout, err = loadDuration(envHTTPReadTimeout, cfg.HTTP.ReadTimeout); err != nil {
return Config{}, err
}
if cfg.HTTP.WriteTimeout, err = loadDuration(envHTTPWriteTimeout, cfg.HTTP.WriteTimeout); err != nil {
return Config{}, err
}
if cfg.HTTP.ShutdownTimeout, err = loadDuration(envHTTPShutdownTimeout, cfg.HTTP.ShutdownTimeout); err != nil {
return Config{}, err
}
cfg.GRPCPush.Addr = loadString(envGRPCPushListenAddr, cfg.GRPCPush.Addr)
if cfg.GRPCPush.ShutdownTimeout, err = loadDuration(envGRPCPushShutdownTimeout, cfg.GRPCPush.ShutdownTimeout); err != nil {
return Config{}, err
}
cfg.Postgres.DSN = loadString(envPostgresDSN, cfg.Postgres.DSN)
if cfg.Postgres.MaxConns, err = loadInt(envPostgresMaxConns, cfg.Postgres.MaxConns); err != nil {
return Config{}, err
}
if cfg.Postgres.MinConns, err = loadInt(envPostgresMinConns, cfg.Postgres.MinConns); err != nil {
return Config{}, err
}
if cfg.Postgres.OperationTimeout, err = loadDuration(envPostgresOperationTimeout, cfg.Postgres.OperationTimeout); err != nil {
return Config{}, err
}
cfg.SMTP.Host = loadString(envSMTPHost, cfg.SMTP.Host)
if cfg.SMTP.Port, err = loadInt(envSMTPPort, cfg.SMTP.Port); err != nil {
return Config{}, err
}
cfg.SMTP.Username = loadString(envSMTPUsername, cfg.SMTP.Username)
cfg.SMTP.Password = loadString(envSMTPPassword, cfg.SMTP.Password)
cfg.SMTP.From = loadString(envSMTPFrom, cfg.SMTP.From)
cfg.SMTP.TLSMode = loadString(envSMTPTLSMode, cfg.SMTP.TLSMode)
if cfg.Mail.WorkerInterval, err = loadDuration(envMailWorkerInterval, cfg.Mail.WorkerInterval); err != nil {
return Config{}, err
}
if cfg.Mail.MaxAttempts, err = loadInt(envMailMaxAttempts, cfg.Mail.MaxAttempts); err != nil {
return Config{}, err
}
cfg.Docker.Host = loadString(envDockerHost, cfg.Docker.Host)
cfg.Docker.Network = loadString(envDockerNetwork, cfg.Docker.Network)
cfg.Game.StateRoot = loadString(envGameStateRoot, cfg.Game.StateRoot)
cfg.Admin.User = loadString(envAdminBootstrapUser, cfg.Admin.User)
cfg.Admin.Password = loadString(envAdminBootstrapPassword, cfg.Admin.Password)
cfg.GeoIP.DBPath = loadString(envGeoIPDBPath, cfg.GeoIP.DBPath)
cfg.Telemetry.TracesExporter = strings.ToLower(loadString(envOTelTracesExporter, cfg.Telemetry.TracesExporter))
cfg.Telemetry.MetricsExporter = strings.ToLower(loadString(envOTelMetricsExporter, cfg.Telemetry.MetricsExporter))
cfg.Telemetry.Protocol = strings.ToLower(loadString(envOTelProtocol, cfg.Telemetry.Protocol))
cfg.Telemetry.Endpoint = loadString(envOTelEndpoint, cfg.Telemetry.Endpoint)
cfg.Telemetry.PrometheusListenAddr = loadString(envOTelPrometheusListenAddr, cfg.Telemetry.PrometheusListenAddr)
cfg.Telemetry.ServiceName = loadString(envServiceName, cfg.Telemetry.ServiceName)
if cfg.FreshnessWindow, err = loadDuration(envFreshnessWindow, cfg.FreshnessWindow); err != nil {
return Config{}, err
}
if cfg.Auth.ChallengeTTL, err = loadDuration(envAuthChallengeTTL, cfg.Auth.ChallengeTTL); err != nil {
return Config{}, err
}
if cfg.Auth.ChallengeMaxAttempts, err = loadInt(envAuthChallengeMaxAttempts, cfg.Auth.ChallengeMaxAttempts); err != nil {
return Config{}, err
}
if cfg.Auth.ChallengeThrottle.Window, err = loadDuration(envAuthChallengeThrottleWindow, cfg.Auth.ChallengeThrottle.Window); err != nil {
return Config{}, err
}
if cfg.Auth.ChallengeThrottle.Max, err = loadInt(envAuthChallengeThrottleMax, cfg.Auth.ChallengeThrottle.Max); err != nil {
return Config{}, err
}
if cfg.Auth.UserNameMaxRetries, err = loadInt(envAuthUserNameMaxRetries, cfg.Auth.UserNameMaxRetries); err != nil {
return Config{}, err
}
if cfg.Lobby.SweeperInterval, err = loadDuration(envLobbySweeperInterval, cfg.Lobby.SweeperInterval); err != nil {
return Config{}, err
}
if cfg.Lobby.PendingRegistrationTTL, err = loadDuration(envLobbyPendingRegistrationTTL, cfg.Lobby.PendingRegistrationTTL); err != nil {
return Config{}, err
}
if cfg.Lobby.InviteDefaultTTL, err = loadDuration(envLobbyInviteDefaultTTL, cfg.Lobby.InviteDefaultTTL); err != nil {
return Config{}, err
}
if cfg.Engine.CallTimeout, err = loadDuration(envEngineCallTimeout, cfg.Engine.CallTimeout); err != nil {
return Config{}, err
}
if cfg.Engine.ProbeTimeout, err = loadDuration(envEngineProbeTimeout, cfg.Engine.ProbeTimeout); err != nil {
return Config{}, err
}
if cfg.Runtime.WorkerPoolSize, err = loadInt(envRuntimeWorkerPoolSize, cfg.Runtime.WorkerPoolSize); err != nil {
return Config{}, err
}
if cfg.Runtime.JobQueueSize, err = loadInt(envRuntimeJobQueueSize, cfg.Runtime.JobQueueSize); err != nil {
return Config{}, err
}
if cfg.Runtime.ReconcileInterval, err = loadDuration(envRuntimeReconcileInterval, cfg.Runtime.ReconcileInterval); err != nil {
return Config{}, err
}
cfg.Runtime.ImagePullPolicy = strings.ToLower(loadString(envRuntimeImagePullPolicy, cfg.Runtime.ImagePullPolicy))
cfg.Runtime.ContainerLogDriver = loadString(envRuntimeContainerLogDriver, cfg.Runtime.ContainerLogDriver)
cfg.Runtime.ContainerLogOpts = loadString(envRuntimeContainerLogOpts, cfg.Runtime.ContainerLogOpts)
if cfg.Runtime.ContainerCPUQuota, err = loadFloat(envRuntimeContainerCPUQuota, cfg.Runtime.ContainerCPUQuota); err != nil {
return Config{}, err
}
cfg.Runtime.ContainerMemory = loadString(envRuntimeContainerMemory, cfg.Runtime.ContainerMemory)
if cfg.Runtime.ContainerPIDsLimit, err = loadInt(envRuntimeContainerPIDsLimit, cfg.Runtime.ContainerPIDsLimit); err != nil {
return Config{}, err
}
cfg.Runtime.ContainerStateMount = loadString(envRuntimeContainerStateMount, cfg.Runtime.ContainerStateMount)
if cfg.Runtime.StopGracePeriod, err = loadDuration(envRuntimeStopGracePeriod, cfg.Runtime.StopGracePeriod); err != nil {
return Config{}, err
}
cfg.Notification.AdminEmail = loadString(envNotificationAdminEmail, cfg.Notification.AdminEmail)
if cfg.Notification.WorkerInterval, err = loadDuration(envNotificationWorkerInterval, cfg.Notification.WorkerInterval); err != nil {
return Config{}, err
}
if cfg.Notification.MaxAttempts, err = loadInt(envNotificationMaxAttempts, cfg.Notification.MaxAttempts); err != nil {
return Config{}, err
}
if err := cfg.Validate(); err != nil {
return Config{}, err
}
return cfg, nil
}
// Validate enforces the documented invariants from README §4. Required string
// fields must be non-empty; closed-set string options must match the allowed
// values; numeric and duration fields must be positive.
func (c Config) Validate() error {
if c.ShutdownTimeout <= 0 {
return fmt.Errorf("%s must be positive", envShutdownTimeout)
}
if strings.TrimSpace(c.Logging.Level) == "" {
return fmt.Errorf("%s must not be empty", envLoggingLevel)
}
if strings.TrimSpace(c.HTTP.Addr) == "" {
return fmt.Errorf("%s must not be empty", envHTTPListenAddr)
}
if c.HTTP.ReadTimeout <= 0 {
return fmt.Errorf("%s must be positive", envHTTPReadTimeout)
}
if c.HTTP.WriteTimeout <= 0 {
return fmt.Errorf("%s must be positive", envHTTPWriteTimeout)
}
if c.HTTP.ShutdownTimeout <= 0 {
return fmt.Errorf("%s must be positive", envHTTPShutdownTimeout)
}
if strings.TrimSpace(c.GRPCPush.Addr) == "" {
return fmt.Errorf("%s must not be empty", envGRPCPushListenAddr)
}
if c.GRPCPush.ShutdownTimeout <= 0 {
return fmt.Errorf("%s must be positive", envGRPCPushShutdownTimeout)
}
if strings.TrimSpace(c.Postgres.DSN) == "" {
return fmt.Errorf("%s must be set", envPostgresDSN)
}
if c.Postgres.MaxConns <= 0 {
return fmt.Errorf("%s must be positive", envPostgresMaxConns)
}
if c.Postgres.MinConns < 0 {
return fmt.Errorf("%s must not be negative", envPostgresMinConns)
}
if c.Postgres.MinConns > c.Postgres.MaxConns {
return fmt.Errorf("%s must not exceed %s", envPostgresMinConns, envPostgresMaxConns)
}
if c.Postgres.OperationTimeout <= 0 {
return fmt.Errorf("%s must be positive", envPostgresOperationTimeout)
}
if strings.TrimSpace(c.SMTP.Host) == "" {
return fmt.Errorf("%s must be set", envSMTPHost)
}
if c.SMTP.Port <= 0 || c.SMTP.Port > 65535 {
return fmt.Errorf("%s must be a valid TCP port (got %d)", envSMTPPort, c.SMTP.Port)
}
if strings.TrimSpace(c.SMTP.From) == "" {
return fmt.Errorf("%s must be set", envSMTPFrom)
}
if !containsString(allowedSMTPTLSModes, c.SMTP.TLSMode) {
return fmt.Errorf("%s must be one of %v (got %q)", envSMTPTLSMode, allowedSMTPTLSModes, c.SMTP.TLSMode)
}
if c.Mail.WorkerInterval <= 0 {
return fmt.Errorf("%s must be positive", envMailWorkerInterval)
}
if c.Mail.MaxAttempts <= 0 {
return fmt.Errorf("%s must be positive", envMailMaxAttempts)
}
if strings.TrimSpace(c.Docker.Host) == "" {
return fmt.Errorf("%s must not be empty", envDockerHost)
}
if strings.TrimSpace(c.Docker.Network) == "" {
return fmt.Errorf("%s must be set", envDockerNetwork)
}
if strings.TrimSpace(c.Game.StateRoot) == "" {
return fmt.Errorf("%s must be set", envGameStateRoot)
}
if c.Admin.User != "" && c.Admin.Password == "" {
return fmt.Errorf("%s requires %s", envAdminBootstrapUser, envAdminBootstrapPassword)
}
if strings.TrimSpace(c.GeoIP.DBPath) == "" {
return fmt.Errorf("%s must be set", envGeoIPDBPath)
}
if !containsString(allowedTracesExporters, c.Telemetry.TracesExporter) {
return fmt.Errorf("%s must be one of %v (got %q)", envOTelTracesExporter, allowedTracesExporters, c.Telemetry.TracesExporter)
}
if !containsString(allowedMetricsExporters, c.Telemetry.MetricsExporter) {
return fmt.Errorf("%s must be one of %v (got %q)", envOTelMetricsExporter, allowedMetricsExporters, c.Telemetry.MetricsExporter)
}
if c.Telemetry.TracesExporter == "otlp" || c.Telemetry.MetricsExporter == "otlp" {
if !containsString(allowedOTelProtocols, c.Telemetry.Protocol) {
return fmt.Errorf("%s must be one of %v (got %q)", envOTelProtocol, allowedOTelProtocols, c.Telemetry.Protocol)
}
}
if c.Telemetry.MetricsExporter == "prometheus" && strings.TrimSpace(c.Telemetry.PrometheusListenAddr) == "" {
return fmt.Errorf("%s must be set when %s is %q", envOTelPrometheusListenAddr, envOTelMetricsExporter, "prometheus")
}
if strings.TrimSpace(c.Telemetry.ServiceName) == "" {
return fmt.Errorf("%s must not be empty", envServiceName)
}
if c.FreshnessWindow <= 0 {
return fmt.Errorf("%s must be positive", envFreshnessWindow)
}
if c.Auth.ChallengeTTL <= 0 {
return fmt.Errorf("%s must be positive", envAuthChallengeTTL)
}
if c.Auth.ChallengeMaxAttempts <= 0 {
return fmt.Errorf("%s must be positive", envAuthChallengeMaxAttempts)
}
if c.Auth.ChallengeThrottle.Window <= 0 {
return fmt.Errorf("%s must be positive", envAuthChallengeThrottleWindow)
}
if c.Auth.ChallengeThrottle.Max <= 0 {
return fmt.Errorf("%s must be positive", envAuthChallengeThrottleMax)
}
if c.Auth.UserNameMaxRetries <= 0 {
return fmt.Errorf("%s must be positive", envAuthUserNameMaxRetries)
}
if c.Lobby.SweeperInterval <= 0 {
return fmt.Errorf("%s must be positive", envLobbySweeperInterval)
}
if c.Lobby.PendingRegistrationTTL <= 0 {
return fmt.Errorf("%s must be positive", envLobbyPendingRegistrationTTL)
}
if c.Lobby.InviteDefaultTTL <= 0 {
return fmt.Errorf("%s must be positive", envLobbyInviteDefaultTTL)
}
if c.Engine.CallTimeout <= 0 {
return fmt.Errorf("%s must be positive", envEngineCallTimeout)
}
if c.Engine.ProbeTimeout <= 0 {
return fmt.Errorf("%s must be positive", envEngineProbeTimeout)
}
if c.Runtime.WorkerPoolSize <= 0 {
return fmt.Errorf("%s must be positive", envRuntimeWorkerPoolSize)
}
if c.Runtime.JobQueueSize <= 0 {
return fmt.Errorf("%s must be positive", envRuntimeJobQueueSize)
}
if c.Runtime.ReconcileInterval <= 0 {
return fmt.Errorf("%s must be positive", envRuntimeReconcileInterval)
}
if !containsString(allowedPullPolicies, c.Runtime.ImagePullPolicy) {
return fmt.Errorf("%s must be one of %v (got %q)", envRuntimeImagePullPolicy, allowedPullPolicies, c.Runtime.ImagePullPolicy)
}
if strings.TrimSpace(c.Runtime.ContainerLogDriver) == "" {
return fmt.Errorf("%s must not be empty", envRuntimeContainerLogDriver)
}
if c.Runtime.ContainerCPUQuota <= 0 {
return fmt.Errorf("%s must be positive", envRuntimeContainerCPUQuota)
}
if strings.TrimSpace(c.Runtime.ContainerMemory) == "" {
return fmt.Errorf("%s must not be empty", envRuntimeContainerMemory)
}
if c.Runtime.ContainerPIDsLimit <= 0 {
return fmt.Errorf("%s must be positive", envRuntimeContainerPIDsLimit)
}
if !strings.HasPrefix(strings.TrimSpace(c.Runtime.ContainerStateMount), "/") {
return fmt.Errorf("%s must be an absolute path (got %q)", envRuntimeContainerStateMount, c.Runtime.ContainerStateMount)
}
if c.Runtime.StopGracePeriod <= 0 {
return fmt.Errorf("%s must be positive", envRuntimeStopGracePeriod)
}
if c.Notification.WorkerInterval <= 0 {
return fmt.Errorf("%s must be positive", envNotificationWorkerInterval)
}
if c.Notification.MaxAttempts <= 0 {
return fmt.Errorf("%s must be positive", envNotificationMaxAttempts)
}
if email := strings.TrimSpace(c.Notification.AdminEmail); email != "" {
if _, err := netmail.ParseAddress(email); err != nil {
return fmt.Errorf("%s must be a valid RFC 5322 address: %w", envNotificationAdminEmail, err)
}
}
return nil
}
func loadString(name, fallback string) string {
raw, ok := os.LookupEnv(name)
if !ok {
return fallback
}
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return fallback
}
return trimmed
}
func loadInt(name string, fallback int) (int, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return fallback, nil
}
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return fallback, nil
}
parsed, err := strconv.Atoi(trimmed)
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func loadFloat(name string, fallback float64) (float64, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return fallback, nil
}
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return fallback, nil
}
parsed, err := strconv.ParseFloat(trimmed, 64)
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func loadDuration(name string, fallback time.Duration) (time.Duration, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return fallback, nil
}
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return fallback, nil
}
parsed, err := time.ParseDuration(trimmed)
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func containsString(set []string, value string) bool {
return slices.Contains(set, value)
}
+94
View File
@@ -0,0 +1,94 @@
package config
import (
"strings"
"testing"
)
// validEnv enumerates the minimum environment required by Validate after
// LoadFromEnv. Tests start from this map and tweak individual entries.
func validEnv() map[string]string {
return map[string]string{
"BACKEND_POSTGRES_DSN": "postgres://galaxy:galaxy@127.0.0.1:5432/galaxy?sslmode=disable",
"BACKEND_SMTP_HOST": "smtp.example.test",
"BACKEND_SMTP_FROM": "noreply@example.test",
"BACKEND_DOCKER_NETWORK": "galaxy",
"BACKEND_GAME_STATE_ROOT": "/tmp/galaxy",
"BACKEND_GEOIP_DB_PATH": "/tmp/geoip.mmdb",
}
}
func setEnv(t *testing.T, env map[string]string) {
t.Helper()
for name, value := range env {
t.Setenv(name, value)
}
}
func TestLoadFromEnvAcceptsValidEnv(t *testing.T) {
setEnv(t, validEnv())
cfg, err := LoadFromEnv()
if err != nil {
t.Fatalf("LoadFromEnv returned error: %v", err)
}
if cfg.HTTP.Addr != defaultHTTPListenAddr {
t.Fatalf("HTTP.Addr = %q, want %q", cfg.HTTP.Addr, defaultHTTPListenAddr)
}
if cfg.GRPCPush.Addr != defaultGRPCPushListenAddr {
t.Fatalf("GRPCPush.Addr = %q, want %q", cfg.GRPCPush.Addr, defaultGRPCPushListenAddr)
}
if cfg.Postgres.DSN == "" {
t.Fatalf("Postgres.DSN must be populated from env")
}
if cfg.Telemetry.TracesExporter != defaultOTelTracesExporter {
t.Fatalf("Telemetry.TracesExporter = %q, want %q", cfg.Telemetry.TracesExporter, defaultOTelTracesExporter)
}
}
func TestLoadFromEnvFailsWithoutPostgresDSN(t *testing.T) {
env := validEnv()
delete(env, "BACKEND_POSTGRES_DSN")
setEnv(t, env)
if _, err := LoadFromEnv(); err == nil || !strings.Contains(err.Error(), "BACKEND_POSTGRES_DSN") {
t.Fatalf("expected BACKEND_POSTGRES_DSN error, got %v", err)
}
}
func TestValidateRejectsAdminUserWithoutPassword(t *testing.T) {
env := validEnv()
env["BACKEND_ADMIN_BOOTSTRAP_USER"] = "root"
setEnv(t, env)
if _, err := LoadFromEnv(); err == nil || !strings.Contains(err.Error(), "BACKEND_ADMIN_BOOTSTRAP_PASSWORD") {
t.Fatalf("expected admin password requirement, got %v", err)
}
}
func TestValidateRejectsUnknownTracesExporter(t *testing.T) {
env := validEnv()
env["BACKEND_OTEL_TRACES_EXPORTER"] = "kafka"
setEnv(t, env)
if _, err := LoadFromEnv(); err == nil || !strings.Contains(err.Error(), "BACKEND_OTEL_TRACES_EXPORTER") {
t.Fatalf("expected traces-exporter validation error, got %v", err)
}
}
func TestValidateRejectsPrometheusWithoutAddr(t *testing.T) {
cfg := DefaultConfig()
cfg.Postgres.DSN = "postgres://x:y@127.0.0.1/galaxy"
cfg.SMTP.Host = "smtp"
cfg.SMTP.From = "from@x"
cfg.Docker.Network = "galaxy"
cfg.Game.StateRoot = "/tmp/galaxy"
cfg.GeoIP.DBPath = "/tmp/geo"
cfg.Telemetry.MetricsExporter = "prometheus"
cfg.Telemetry.PrometheusListenAddr = ""
if err := cfg.Validate(); err == nil || !strings.Contains(err.Error(), "BACKEND_OTEL_PROMETHEUS_LISTEN_ADDR") {
t.Fatalf("expected prometheus address requirement, got %v", err)
}
}
+427
View File
@@ -0,0 +1,427 @@
package dockerclient
import (
"context"
"errors"
"fmt"
"io"
"strconv"
"strings"
"time"
cerrdefs "github.com/containerd/errdefs"
"github.com/moby/moby/api/types/container"
"github.com/moby/moby/api/types/mount"
"github.com/moby/moby/api/types/network"
mobyclient "github.com/moby/moby/client"
)
// enginePort is the in-container HTTP port the engine listens on. Galaxy
// never publishes the port to the host; it is reachable only through
// Docker DNS on the user-defined network.
const enginePort = 8080
// Adapter is the production *Client implementation backed by
// `github.com/moby/moby/client`. Use NewAdapter to construct it.
type Adapter struct {
docker *mobyclient.Client
clock func() time.Time
}
// AdapterConfig configures an Adapter.
type AdapterConfig struct {
// Docker is the underlying Moby client. Must be non-nil.
Docker *mobyclient.Client
// Clock supplies the wall-clock used when the daemon does not
// return a parseable started_at value. Defaults to time.Now.
Clock func() time.Time
}
// NewAdapter wraps a moby client with the dockerclient port surface.
func NewAdapter(cfg AdapterConfig) (*Adapter, error) {
if cfg.Docker == nil {
return nil, errors.New("dockerclient: nil moby client")
}
clock := cfg.Clock
if clock == nil {
clock = time.Now
}
return &Adapter{docker: cfg.Docker, clock: clock}, nil
}
// EnsureNetwork returns nil when the named user-defined network exists
// on the daemon; ErrNetworkMissing otherwise. Adapter never creates
// networks itself — operators provision the network ahead of time.
func (a *Adapter) EnsureNetwork(ctx context.Context, name string) error {
if _, err := a.docker.NetworkInspect(ctx, name, mobyclient.NetworkInspectOptions{}); err != nil {
if cerrdefs.IsNotFound(err) {
return ErrNetworkMissing
}
return fmt.Errorf("dockerclient: inspect network %q: %w", name, err)
}
return nil
}
// PullImage pulls ref according to policy. The pull stream is fully
// drained synchronously so callers know the image is ready when this
// returns nil.
func (a *Adapter) PullImage(ctx context.Context, ref string, policy PullPolicy) error {
if !policy.IsKnown() {
return ErrInvalidPullPolicy
}
switch policy {
case PullPolicyNever:
if _, err := a.InspectImage(ctx, ref); err != nil {
return err
}
return nil
case PullPolicyIfMissing:
if _, err := a.InspectImage(ctx, ref); err == nil {
return nil
} else if !errors.Is(err, ErrImageNotFound) {
return err
}
}
resp, err := a.docker.ImagePull(ctx, ref, mobyclient.ImagePullOptions{})
if err != nil {
return fmt.Errorf("%w: pull %q: %v", ErrImagePullFailed, ref, err)
}
if _, drainErr := io.Copy(io.Discard, resp); drainErr != nil {
_ = resp.Close()
return fmt.Errorf("%w: drain %q: %v", ErrImagePullFailed, ref, drainErr)
}
if closeErr := resp.Close(); closeErr != nil {
return fmt.Errorf("%w: close %q: %v", ErrImagePullFailed, ref, closeErr)
}
return nil
}
// InspectImage returns the labels of ref. Maps daemon `not found` to
// ErrImageNotFound.
func (a *Adapter) InspectImage(ctx context.Context, ref string) (ImageInspect, error) {
res, err := a.docker.ImageInspect(ctx, ref)
if err != nil {
if cerrdefs.IsNotFound(err) {
return ImageInspect{}, ErrImageNotFound
}
return ImageInspect{}, fmt.Errorf("dockerclient: inspect image %q: %w", ref, err)
}
out := ImageInspect{Ref: ref}
if res.Config != nil {
out.Labels = cloneStringMap(res.Config.Labels)
}
return out, nil
}
// InspectContainer returns the metadata for idOrName. Maps daemon
// `not found` to ErrContainerNotFound.
func (a *Adapter) InspectContainer(ctx context.Context, idOrName string) (ContainerInspect, error) {
res, err := a.docker.ContainerInspect(ctx, idOrName, mobyclient.ContainerInspectOptions{})
if err != nil {
if cerrdefs.IsNotFound(err) {
return ContainerInspect{}, ErrContainerNotFound
}
return ContainerInspect{}, fmt.Errorf("dockerclient: inspect container %q: %w", idOrName, err)
}
return mapContainerInspect(res.Container), nil
}
// Run pulls the image (per spec.PullPolicy), creates the container with
// the documented label set, attaches it to spec.Network, starts it, and
// returns the canonical engine endpoint URL.
func (a *Adapter) Run(ctx context.Context, spec RunSpec) (RunResult, error) {
if strings.TrimSpace(spec.Name) == "" {
return RunResult{}, errors.New("dockerclient: run: name must not be empty")
}
if strings.TrimSpace(spec.Image) == "" {
return RunResult{}, errors.New("dockerclient: run: image must not be empty")
}
if strings.TrimSpace(spec.Network) == "" {
return RunResult{}, errors.New("dockerclient: run: network must not be empty")
}
if strings.TrimSpace(spec.Hostname) == "" {
return RunResult{}, errors.New("dockerclient: run: hostname must not be empty")
}
policy := spec.PullPolicy
if policy == "" {
policy = PullPolicyIfMissing
}
if err := a.PullImage(ctx, spec.Image, policy); err != nil {
return RunResult{}, err
}
envSlice := make([]string, 0, len(spec.Env))
for k, v := range spec.Env {
envSlice = append(envSlice, k+"="+v)
}
labels := make(map[string]string, len(spec.Labels)+1)
for k, v := range spec.Labels {
labels[k] = v
}
labels[ManagedLabel] = ManagedLabelValue
mounts := make([]mount.Mount, 0, len(spec.BindMounts))
for _, b := range spec.BindMounts {
mounts = append(mounts, mount.Mount{
Type: mount.TypeBind,
Source: b.HostPath,
Target: b.MountPath,
ReadOnly: b.ReadOnly,
})
}
resources := container.Resources{}
if spec.CPUQuota > 0 {
// Convert decimal cpus into NanoCPUs (1.0 = 1e9).
resources.NanoCPUs = int64(spec.CPUQuota * 1e9)
}
if mem, err := parseMemoryString(spec.Memory); err != nil {
return RunResult{}, err
} else if mem > 0 {
resources.Memory = mem
}
if spec.PIDsLimit > 0 {
pl := int64(spec.PIDsLimit)
resources.PidsLimit = &pl
}
logConfig := container.LogConfig{}
if spec.LogDriver != "" {
logConfig.Type = spec.LogDriver
}
if spec.LogOpts != "" {
opts, err := parseLogOpts(spec.LogOpts)
if err != nil {
return RunResult{}, err
}
logConfig.Config = opts
}
hostCfg := &container.HostConfig{
NetworkMode: container.NetworkMode(spec.Network),
Mounts: mounts,
LogConfig: logConfig,
Resources: resources,
AutoRemove: false,
ReadonlyRootfs: false,
RestartPolicy: container.RestartPolicy{
Name: container.RestartPolicyOnFailure,
},
}
netCfg := &network.NetworkingConfig{
EndpointsConfig: map[string]*network.EndpointSettings{
spec.Network: {
Aliases: []string{spec.Hostname},
},
},
}
created, err := a.docker.ContainerCreate(ctx, mobyclient.ContainerCreateOptions{
Name: spec.Name,
Config: &container.Config{
Hostname: spec.Hostname,
Image: spec.Image,
Env: envSlice,
Cmd: spec.Cmd,
Labels: labels,
},
HostConfig: hostCfg,
NetworkingConfig: netCfg,
})
if err != nil {
return RunResult{}, fmt.Errorf("dockerclient: create container %q: %w", spec.Name, err)
}
if _, err := a.docker.ContainerStart(ctx, created.ID, mobyclient.ContainerStartOptions{}); err != nil {
// Best-effort: try to remove the freshly-created container so we
// do not leak a half-started one.
_, _ = a.docker.ContainerRemove(ctx, created.ID, mobyclient.ContainerRemoveOptions{Force: true})
return RunResult{}, fmt.Errorf("dockerclient: start container %q: %w", spec.Name, err)
}
startedAt := a.clock()
if inspect, err := a.docker.ContainerInspect(ctx, created.ID, mobyclient.ContainerInspectOptions{}); err == nil {
if inspect.Container.State != nil && inspect.Container.State.StartedAt != "" {
if parsed, perr := time.Parse(time.RFC3339Nano, inspect.Container.State.StartedAt); perr == nil {
startedAt = parsed
}
}
}
return RunResult{
ContainerID: created.ID,
EngineEndpoint: fmt.Sprintf("http://%s:%d", spec.Hostname, enginePort),
StartedAt: startedAt,
}, nil
}
// Stop sends SIGTERM to idOrName and waits up to timeoutSeconds before
// forcibly killing it. Maps daemon `not found` to ErrContainerNotFound.
func (a *Adapter) Stop(ctx context.Context, idOrName string, timeoutSeconds int) error {
opts := mobyclient.ContainerStopOptions{}
if timeoutSeconds >= 0 {
t := timeoutSeconds
opts.Timeout = &t
}
if _, err := a.docker.ContainerStop(ctx, idOrName, opts); err != nil {
if cerrdefs.IsNotFound(err) {
return ErrContainerNotFound
}
return fmt.Errorf("dockerclient: stop %q: %w", idOrName, err)
}
return nil
}
// Remove deletes idOrName. Idempotent: nil when the container is
// already gone.
func (a *Adapter) Remove(ctx context.Context, idOrName string) error {
if _, err := a.docker.ContainerRemove(ctx, idOrName, mobyclient.ContainerRemoveOptions{Force: true}); err != nil {
if cerrdefs.IsNotFound(err) {
return nil
}
return fmt.Errorf("dockerclient: remove %q: %w", idOrName, err)
}
return nil
}
// List returns container summaries that match filter.
func (a *Adapter) List(ctx context.Context, filter ListFilter) ([]ContainerSummary, error) {
filters := mobyclient.Filters{}
for k, v := range filter.Labels {
if v == "" {
filters.Add("label", k)
continue
}
filters.Add("label", k+"="+v)
}
res, err := a.docker.ContainerList(ctx, mobyclient.ContainerListOptions{
All: true,
Filters: filters,
})
if err != nil {
return nil, fmt.Errorf("dockerclient: list: %w", err)
}
out := make([]ContainerSummary, 0, len(res.Items))
for _, item := range res.Items {
out = append(out, mapContainerSummary(item))
}
return out, nil
}
func mapContainerInspect(c container.InspectResponse) ContainerInspect {
out := ContainerInspect{
ID: c.ID,
Name: strings.TrimPrefix(c.Name, "/"),
ImageRef: c.Image,
}
if c.Config != nil {
out.Hostname = c.Config.Hostname
out.Labels = cloneStringMap(c.Config.Labels)
if out.ImageRef == "" {
out.ImageRef = c.Config.Image
}
}
if c.State != nil {
out.Status = string(c.State.Status)
out.ExitCode = c.State.ExitCode
if t, err := time.Parse(time.RFC3339Nano, c.State.StartedAt); err == nil && !t.IsZero() {
out.StartedAt = t
}
if t, err := time.Parse(time.RFC3339Nano, c.State.FinishedAt); err == nil && !t.IsZero() {
out.FinishedAt = t
}
if c.State.Health != nil {
out.Health = string(c.State.Health.Status)
}
}
return out
}
func mapContainerSummary(s container.Summary) ContainerSummary {
out := ContainerSummary{
ID: s.ID,
ImageRef: s.Image,
Status: string(s.State),
Labels: cloneStringMap(s.Labels),
}
if len(s.Names) > 0 {
out.Name = strings.TrimPrefix(s.Names[0], "/")
}
out.StartedAt = time.Unix(s.Created, 0).UTC()
return out
}
func cloneStringMap(in map[string]string) map[string]string {
if len(in) == 0 {
return nil
}
out := make(map[string]string, len(in))
for k, v := range in {
out[k] = v
}
return out
}
// parseMemoryString accepts the docker `--memory` short forms (e.g.
// `512m`, `1g`) and returns the corresponding byte count. An empty
// string yields 0 (no memory limit). Unknown formats produce an error.
func parseMemoryString(raw string) (int64, error) {
raw = strings.TrimSpace(raw)
if raw == "" {
return 0, nil
}
multiplier := int64(1)
last := raw[len(raw)-1]
digits := raw
switch last {
case 'b', 'B':
multiplier = 1
digits = raw[:len(raw)-1]
case 'k', 'K':
multiplier = 1024
digits = raw[:len(raw)-1]
case 'm', 'M':
multiplier = 1024 * 1024
digits = raw[:len(raw)-1]
case 'g', 'G':
multiplier = 1024 * 1024 * 1024
digits = raw[:len(raw)-1]
default:
if last < '0' || last > '9' {
return 0, fmt.Errorf("dockerclient: invalid memory suffix in %q", raw)
}
}
n, err := strconv.ParseInt(digits, 10, 64)
if err != nil {
return 0, fmt.Errorf("dockerclient: parse memory %q: %w", raw, err)
}
if n < 0 {
return 0, fmt.Errorf("dockerclient: memory must be non-negative, got %q", raw)
}
return n * multiplier, nil
}
// parseLogOpts splits a comma-separated `key=value` list into a map.
func parseLogOpts(raw string) (map[string]string, error) {
out := make(map[string]string)
for _, pair := range strings.Split(raw, ",") {
pair = strings.TrimSpace(pair)
if pair == "" {
continue
}
k, v, ok := strings.Cut(pair, "=")
if !ok {
return nil, fmt.Errorf("dockerclient: log opt %q must be key=value", pair)
}
k = strings.TrimSpace(k)
v = strings.TrimSpace(v)
if k == "" {
return nil, fmt.Errorf("dockerclient: log opt %q has empty key", pair)
}
out[k] = v
}
return out, nil
}
@@ -0,0 +1,84 @@
package dockerclient
import (
"strings"
"testing"
)
func TestPullPolicyIsKnown(t *testing.T) {
cases := map[PullPolicy]bool{
PullPolicyIfMissing: true,
PullPolicyAlways: true,
PullPolicyNever: true,
PullPolicy(""): false,
PullPolicy("other"): false,
}
for p, want := range cases {
if got := p.IsKnown(); got != want {
t.Errorf("PullPolicy(%q).IsKnown() = %v, want %v", p, got, want)
}
}
}
func TestParseMemoryString(t *testing.T) {
cases := []struct {
raw string
want int64
}{
{"", 0},
{" ", 0},
{"512", 512},
{"512b", 512},
{"4k", 4 * 1024},
{"1m", 1 * 1024 * 1024},
{"512M", 512 * 1024 * 1024},
{"2g", 2 * 1024 * 1024 * 1024},
}
for _, c := range cases {
got, err := parseMemoryString(c.raw)
if err != nil {
t.Errorf("parseMemoryString(%q) returned error: %v", c.raw, err)
continue
}
if got != c.want {
t.Errorf("parseMemoryString(%q) = %d, want %d", c.raw, got, c.want)
}
}
}
func TestParseMemoryStringRejectsInvalid(t *testing.T) {
cases := []string{"abc", "1x", "-1m"}
for _, c := range cases {
if _, err := parseMemoryString(c); err == nil {
t.Errorf("parseMemoryString(%q) expected error, got nil", c)
}
}
}
func TestParseLogOpts(t *testing.T) {
got, err := parseLogOpts("max-size=10m,max-file=3")
if err != nil {
t.Fatalf("parseLogOpts unexpected error: %v", err)
}
if got["max-size"] != "10m" || got["max-file"] != "3" {
t.Errorf("parseLogOpts produced %v", got)
}
}
func TestParseLogOptsRejectsMissingValue(t *testing.T) {
if _, err := parseLogOpts("solo,foo=bar"); err == nil || !strings.Contains(err.Error(), "key=value") {
t.Errorf("expected key=value error, got %v", err)
}
}
func TestCloneStringMapNilSafe(t *testing.T) {
if got := cloneStringMap(nil); got != nil {
t.Errorf("cloneStringMap(nil) = %v, want nil", got)
}
src := map[string]string{"a": "1"}
got := cloneStringMap(src)
got["a"] = "mutated"
if src["a"] != "1" {
t.Errorf("cloneStringMap leaks mutation: %v", src)
}
}
+37
View File
@@ -0,0 +1,37 @@
package dockerclient
import "context"
// Client is the narrow Docker port consumed by `internal/runtime`. The
// production adapter is *Adapter (see adapter.go); tests substitute a
// hand-rolled stub or generated mock.
//
// Method semantics:
//
// - EnsureNetwork verifies a user-defined Docker network exists on
// the daemon. Adapter never creates networks.
// - PullImage pulls ref according to policy. Implementations must
// honour PullPolicyNever by skipping the pull and returning nil
// when the image is already present, or ErrImageNotFound otherwise.
// - InspectImage / InspectContainer return ErrImageNotFound /
// ErrContainerNotFound for missing inputs.
// - Run creates and starts one container. The returned RunResult
// carries the container id, the stable engine endpoint URL, and
// the wall-clock observed by the daemon.
// - Stop sends SIGTERM and waits up to the spec timeout before
// SIGKILL. Returns ErrContainerNotFound when the target is already
// gone.
// - Remove deletes the container. Idempotent: nil when already
// removed.
// - List returns container summaries that match filter. Adapter
// translates filter.Labels into the daemon-side filters argument.
type Client interface {
EnsureNetwork(ctx context.Context, name string) error
PullImage(ctx context.Context, ref string, policy PullPolicy) error
InspectImage(ctx context.Context, ref string) (ImageInspect, error)
InspectContainer(ctx context.Context, idOrName string) (ContainerInspect, error)
Run(ctx context.Context, spec RunSpec) (RunResult, error)
Stop(ctx context.Context, idOrName string, timeoutSeconds int) error
Remove(ctx context.Context, idOrName string) error
List(ctx context.Context, filter ListFilter) ([]ContainerSummary, error)
}
+36
View File
@@ -0,0 +1,36 @@
package dockerclient
import "errors"
// Sentinel errors returned by the production adapter and consumed by
// `internal/runtime`. Tests substitute their own implementations of
// Client and may return these sentinels verbatim or wrap them with
// extra context via fmt.Errorf("...: %w", ...).
var (
// ErrNetworkMissing is returned by EnsureNetwork when the configured
// user-defined Docker network does not exist on the daemon.
// `internal/runtime` treats this as a fatal startup error — Galaxy
// never creates Docker networks itself.
ErrNetworkMissing = errors.New("dockerclient: network missing")
// ErrImageNotFound is returned by InspectImage / PullImage(never)
// when the image is absent locally and the active pull policy
// forbids fetching it.
ErrImageNotFound = errors.New("dockerclient: image not found")
// ErrContainerNotFound is returned by InspectContainer / Stop /
// Remove when no container with the supplied id or name exists.
// `internal/runtime` treats this as an idempotent miss for Stop and
// Remove and as a removed-container signal for InspectContainer.
ErrContainerNotFound = errors.New("dockerclient: container not found")
// ErrInvalidPullPolicy is returned by Run / PullImage when the
// supplied PullPolicy is not part of the closed vocabulary.
ErrInvalidPullPolicy = errors.New("dockerclient: invalid pull policy")
// ErrImagePullFailed wraps every PullImage failure path returned to
// the caller so `internal/runtime` can attribute the failure to the
// pull stage rather than to container creation. The unwrap chain
// preserves the underlying daemon error for logs and metrics.
ErrImagePullFailed = errors.New("dockerclient: image pull failed")
)
+223
View File
@@ -0,0 +1,223 @@
// Package dockerclient is the narrow Docker API surface consumed by
// `internal/runtime`. Its sole responsibility is to translate between the
// runtime domain and the Moby SDK; no orchestration, persistence, or
// notification logic lives in this package.
//
// The package is intentionally small. The implementation only surfaces the
// container-lifecycle calls the runtime module needs (`EnsureNetwork`,
// `PullImage`, `InspectImage`, `InspectContainer`, `Run`, `Stop`,
// `Remove`, `List`); any future functionality is introduced as an
// additive method on the `Client` interface so the runtime package can
// adopt it without round-tripping through Moby SDK type imports.
//
// Production wiring uses *Adapter, which delegates to
// `github.com/moby/moby/client`. Unit tests in `internal/runtime` and
// elsewhere should mock the `Client` interface directly rather than
// reaching into Moby types.
package dockerclient
import (
"time"
)
// PullPolicy enumerates the supported image-pull behaviours documented in
// `backend/README.md` §4 under `BACKEND_RUNTIME_IMAGE_PULL_POLICY`.
type PullPolicy string
const (
// PullPolicyIfMissing pulls the image only when it is absent from the
// local Docker daemon.
PullPolicyIfMissing PullPolicy = "if_missing"
// PullPolicyAlways pulls the image on every Run.
PullPolicyAlways PullPolicy = "always"
// PullPolicyNever skips the pull and fails Run when the image is
// absent locally.
PullPolicyNever PullPolicy = "never"
)
// IsKnown reports whether p belongs to the closed PullPolicy vocabulary.
func (p PullPolicy) IsKnown() bool {
switch p {
case PullPolicyIfMissing, PullPolicyAlways, PullPolicyNever:
return true
default:
return false
}
}
// ManagedLabel is the Docker container label runtime stamps on every
// engine container so the reconciler and the events listener can
// identify Galaxy-managed containers from unrelated workloads sharing
// the daemon.
const ManagedLabel = "galaxy.backend"
// ManagedLabelValue is the string value paired with `ManagedLabel`.
const ManagedLabelValue = "1"
// RunSpec is the request shape used by Client.Run. Producers populate
// it inside `runtime.Service.StartGame`.
type RunSpec struct {
// Name is the container name (typically `galaxy-game-{game_id}`).
Name string
// Image is the resolved image reference (e.g.
// `galaxy-game:0.1.0`).
Image string
// Hostname is the container hostname; the engine endpoint URL
// `http://galaxy-game-{game_id}:8080` resolves through Docker DNS
// against this name on the user-defined network.
Hostname string
// Network is the user-defined Docker network name the container
// attaches to.
Network string
// Env lists the environment variables forwarded to the container.
Env map[string]string
// Cmd overrides the entrypoint arguments. Production callers leave
// it nil so the engine image's CMD runs.
Cmd []string
// Labels are applied at create time. The adapter merges
// `ManagedLabel=ManagedLabelValue` into this map automatically;
// callers may add more entries.
Labels map[string]string
// BindMounts describe the host-to-container bind mounts. Galaxy
// uses exactly one in MVP (the per-game state directory).
BindMounts []BindMount
// LogDriver is the Docker log-driver name (e.g. `json-file`).
LogDriver string
// LogOpts is the comma-separated `key=value` list forwarded to the
// log driver. May be empty.
LogOpts string
// CPUQuota is the `--cpus` value applied as a resource limit.
CPUQuota float64
// Memory is the `--memory` value (e.g. `512m`) applied as a
// resource limit.
Memory string
// PIDsLimit is the `--pids-limit` value.
PIDsLimit int
// PullPolicy selects how Run resolves a missing image. Defaults to
// PullPolicyIfMissing when zero.
PullPolicy PullPolicy
}
// BindMount stores one host-to-container bind mount.
type BindMount struct {
// HostPath is the absolute host path bound into the container.
HostPath string
// MountPath is the absolute in-container path the host directory
// is mounted at.
MountPath string
// ReadOnly mounts the host path read-only when true.
ReadOnly bool
}
// RunResult is the response shape returned by Client.Run.
type RunResult struct {
// ContainerID identifies the created container.
ContainerID string
// EngineEndpoint is the URL Galaxy uses to reach the engine. The
// adapter synthesises it as `http://{Hostname}:8080`.
EngineEndpoint string
// StartedAt is the wall-clock observed by the daemon for the start
// event.
StartedAt time.Time
}
// ImageInspect carries the subset of `docker image inspect` fields the
// runtime reads.
type ImageInspect struct {
// Ref is the image reference the inspection was scoped to.
Ref string
// Labels are the image-level labels (e.g. `com.galaxy.cpu_quota`).
Labels map[string]string
}
// ContainerInspect carries the subset of `docker inspect` fields the
// runtime reads from a running or exited container.
type ContainerInspect struct {
// ID identifies the container.
ID string
// Name is the container name (without leading `/`).
Name string
// ImageRef is the image reference the container was started from.
ImageRef string
// Hostname is the container hostname.
Hostname string
// Labels are the container labels assigned at create time.
Labels map[string]string
// Status is the verbatim Docker `State.Status` value (e.g.
// `running`, `exited`).
Status string
// Health is the verbatim Docker `State.Health.Status` value
// (e.g. `healthy`, `unhealthy`). Empty when the image declares no
// HEALTHCHECK.
Health string
// StartedAt is the daemon-observed start wall-clock.
StartedAt time.Time
// FinishedAt is the daemon-observed exit wall-clock. Zero when the
// container is still running.
FinishedAt time.Time
// ExitCode is the exit code reported by the daemon. Zero when the
// container is still running.
ExitCode int
}
// ContainerSummary carries the subset of `docker ps` fields the runtime
// reads.
type ContainerSummary struct {
// ID identifies the container.
ID string
// Name is the container name (without leading `/`).
Name string
// ImageRef is the image reference.
ImageRef string
// Hostname is the container hostname.
Hostname string
// Labels are the container labels assigned at create time.
Labels map[string]string
// Status is the verbatim Docker `State.Status` value.
Status string
// StartedAt is the daemon-observed start wall-clock.
StartedAt time.Time
}
// ListFilter narrows the ContainerList result. Empty fields match
// everything.
type ListFilter struct {
// Labels lists `key=value` label pairs that must all be present on
// the container. Empty matches every container.
Labels map[string]string
}
+328
View File
@@ -0,0 +1,328 @@
package engineclient
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"galaxy/model/rest"
)
const (
pathAdminInit = "/api/v1/admin/init"
pathAdminStatus = "/api/v1/admin/status"
pathAdminTurn = "/api/v1/admin/turn"
pathAdminRaceBanish = "/api/v1/admin/race/banish"
pathPlayerCommand = "/api/v1/command"
pathPlayerOrder = "/api/v1/order"
pathPlayerReport = "/api/v1/report"
pathHealthz = "/healthz"
)
// Config configures one HTTP-backed engine client.
type Config struct {
// CallTimeout bounds turn-generation-class operations: init, turn,
// banish, command, order. Mirrors `BACKEND_ENGINE_CALL_TIMEOUT`.
CallTimeout time.Duration
// ProbeTimeout bounds inspect-style reads: status, report, healthz.
// Mirrors `BACKEND_ENGINE_PROBE_TIMEOUT`.
ProbeTimeout time.Duration
}
// Client is the engine HTTP client. The zero value is not usable — use
// NewClient.
type Client struct {
callTimeout time.Duration
probeTimeout time.Duration
httpClient *http.Client
closeIdleConnections func()
}
// NewClient constructs a Client with an `otelhttp`-instrumented
// transport cloned from `http.DefaultTransport`. Close releases idle
// connections owned by the cloned transport.
func NewClient(cfg Config) (*Client, error) {
transport, ok := http.DefaultTransport.(*http.Transport)
if !ok {
return nil, errors.New("engineclient: default transport is not *http.Transport")
}
cloned := transport.Clone()
return newClient(cfg, &http.Client{Transport: otelhttp.NewTransport(cloned)}, cloned.CloseIdleConnections)
}
// NewClientWithHTTP constructs a Client around a caller-supplied
// `*http.Client`. Used in tests to inject `httptest`-backed transports.
func NewClientWithHTTP(cfg Config, hc *http.Client) (*Client, error) {
return newClient(cfg, hc, nil)
}
func newClient(cfg Config, hc *http.Client, closeIdle func()) (*Client, error) {
switch {
case cfg.CallTimeout <= 0:
return nil, errors.New("engineclient: call timeout must be positive")
case cfg.ProbeTimeout <= 0:
return nil, errors.New("engineclient: probe timeout must be positive")
case hc == nil:
return nil, errors.New("engineclient: http client must not be nil")
}
return &Client{
callTimeout: cfg.CallTimeout,
probeTimeout: cfg.ProbeTimeout,
httpClient: hc,
closeIdleConnections: closeIdle,
}, nil
}
// Close releases idle HTTP connections owned by the underlying
// transport. Safe to call multiple times.
func (c *Client) Close() error {
if c == nil || c.closeIdleConnections == nil {
return nil
}
c.closeIdleConnections()
return nil
}
// Init calls `POST /api/v1/admin/init`.
func (c *Client) Init(ctx context.Context, baseURL string, request rest.InitRequest) (rest.StateResponse, error) {
if err := validateBaseURL(baseURL); err != nil {
return rest.StateResponse{}, err
}
body, err := json.Marshal(request)
if err != nil {
return rest.StateResponse{}, fmt.Errorf("engineclient init: encode request: %w", err)
}
payload, status, doErr := c.doRequest(ctx, http.MethodPost, baseURL+pathAdminInit, body, c.callTimeout)
if doErr != nil {
return rest.StateResponse{}, fmt.Errorf("%w: engine init: %w", ErrEngineUnreachable, doErr)
}
switch status {
case http.StatusOK, http.StatusCreated:
return decodeStateResponse(payload, "engine init")
case http.StatusBadRequest:
return rest.StateResponse{}, fmt.Errorf("%w: engine init: %s", ErrEngineValidation, summariseEngineError(payload, status))
default:
return rest.StateResponse{}, fmt.Errorf("%w: engine init: %s", ErrEngineUnreachable, summariseEngineError(payload, status))
}
}
// Status calls `GET /api/v1/admin/status`.
func (c *Client) Status(ctx context.Context, baseURL string) (rest.StateResponse, error) {
if err := validateBaseURL(baseURL); err != nil {
return rest.StateResponse{}, err
}
payload, status, doErr := c.doRequest(ctx, http.MethodGet, baseURL+pathAdminStatus, nil, c.probeTimeout)
if doErr != nil {
return rest.StateResponse{}, fmt.Errorf("%w: engine status: %w", ErrEngineUnreachable, doErr)
}
switch status {
case http.StatusOK:
return decodeStateResponse(payload, "engine status")
case http.StatusBadRequest:
return rest.StateResponse{}, fmt.Errorf("%w: engine status: %s", ErrEngineValidation, summariseEngineError(payload, status))
default:
return rest.StateResponse{}, fmt.Errorf("%w: engine status: %s", ErrEngineUnreachable, summariseEngineError(payload, status))
}
}
// Turn calls `PUT /api/v1/admin/turn`.
func (c *Client) Turn(ctx context.Context, baseURL string) (rest.StateResponse, error) {
if err := validateBaseURL(baseURL); err != nil {
return rest.StateResponse{}, err
}
payload, status, doErr := c.doRequest(ctx, http.MethodPut, baseURL+pathAdminTurn, nil, c.callTimeout)
if doErr != nil {
return rest.StateResponse{}, fmt.Errorf("%w: engine turn: %w", ErrEngineUnreachable, doErr)
}
switch status {
case http.StatusOK:
return decodeStateResponse(payload, "engine turn")
case http.StatusBadRequest:
return rest.StateResponse{}, fmt.Errorf("%w: engine turn: %s", ErrEngineValidation, summariseEngineError(payload, status))
default:
return rest.StateResponse{}, fmt.Errorf("%w: engine turn: %s", ErrEngineUnreachable, summariseEngineError(payload, status))
}
}
// BanishRace calls `POST /api/v1/admin/race/banish` with body
// `{race_name}`. Engine returns 204 on success.
func (c *Client) BanishRace(ctx context.Context, baseURL, raceName string) error {
if err := validateBaseURL(baseURL); err != nil {
return err
}
if strings.TrimSpace(raceName) == "" {
return errors.New("engineclient banish: race name must not be empty")
}
body, err := json.Marshal(rest.BanishRequest{RaceName: raceName})
if err != nil {
return fmt.Errorf("engineclient banish: encode: %w", err)
}
payload, status, doErr := c.doRequest(ctx, http.MethodPost, baseURL+pathAdminRaceBanish, body, c.callTimeout)
if doErr != nil {
return fmt.Errorf("%w: engine banish: %w", ErrEngineUnreachable, doErr)
}
switch status {
case http.StatusNoContent, http.StatusOK:
return nil
case http.StatusBadRequest:
return fmt.Errorf("%w: engine banish: %s", ErrEngineValidation, summariseEngineError(payload, status))
default:
return fmt.Errorf("%w: engine banish: %s", ErrEngineUnreachable, summariseEngineError(payload, status))
}
}
// ExecuteCommands calls `PUT /api/v1/command` with payload forwarded
// verbatim. The engine response body is returned verbatim; on 4xx the
// body is returned alongside ErrEngineValidation so callers can
// forward the per-command error.
func (c *Client) ExecuteCommands(ctx context.Context, baseURL string, payload json.RawMessage) (json.RawMessage, error) {
return c.forwardPlayerWrite(ctx, baseURL, pathPlayerCommand, payload, "engine command")
}
// PutOrders calls `PUT /api/v1/order` with the same forwarding
// semantics as ExecuteCommands.
func (c *Client) PutOrders(ctx context.Context, baseURL string, payload json.RawMessage) (json.RawMessage, error) {
return c.forwardPlayerWrite(ctx, baseURL, pathPlayerOrder, payload, "engine order")
}
// GetReport calls `GET /api/v1/report?player=<raceName>&turn=<turn>`
// and returns the engine response body verbatim.
func (c *Client) GetReport(ctx context.Context, baseURL, raceName string, turn int) (json.RawMessage, error) {
if err := validateBaseURL(baseURL); err != nil {
return nil, err
}
if strings.TrimSpace(raceName) == "" {
return nil, errors.New("engineclient report: race name must not be empty")
}
if turn < 0 {
return nil, fmt.Errorf("engineclient report: turn must not be negative, got %d", turn)
}
values := url.Values{}
values.Set("player", raceName)
values.Set("turn", strconv.Itoa(turn))
target := baseURL + pathPlayerReport + "?" + values.Encode()
body, status, doErr := c.doRequest(ctx, http.MethodGet, target, nil, c.probeTimeout)
if doErr != nil {
return nil, fmt.Errorf("%w: engine report: %w", ErrEngineUnreachable, doErr)
}
switch status {
case http.StatusOK:
if len(body) == 0 {
return nil, fmt.Errorf("%w: engine report: empty response body", ErrEngineProtocolViolation)
}
return json.RawMessage(body), nil
case http.StatusBadRequest:
return json.RawMessage(body), fmt.Errorf("%w: engine report: %s", ErrEngineValidation, summariseEngineError(body, status))
default:
return nil, fmt.Errorf("%w: engine report: %s", ErrEngineUnreachable, summariseEngineError(body, status))
}
}
// Healthz calls `GET /healthz`. Returns nil on 2xx.
func (c *Client) Healthz(ctx context.Context, baseURL string) error {
if err := validateBaseURL(baseURL); err != nil {
return err
}
body, status, doErr := c.doRequest(ctx, http.MethodGet, baseURL+pathHealthz, nil, c.probeTimeout)
if doErr != nil {
return fmt.Errorf("%w: engine healthz: %w", ErrEngineUnreachable, doErr)
}
if status/100 == 2 {
return nil
}
return fmt.Errorf("%w: engine healthz: %s", ErrEngineUnreachable, summariseEngineError(body, status))
}
func (c *Client) forwardPlayerWrite(ctx context.Context, baseURL, requestPath string, payload json.RawMessage, opLabel string) (json.RawMessage, error) {
if err := validateBaseURL(baseURL); err != nil {
return nil, err
}
if len(bytes.TrimSpace(payload)) == 0 {
return nil, fmt.Errorf("%s: payload must not be empty", opLabel)
}
body, status, doErr := c.doRequest(ctx, http.MethodPut, baseURL+requestPath, []byte(payload), c.callTimeout)
if doErr != nil {
return nil, fmt.Errorf("%w: %s: %w", ErrEngineUnreachable, opLabel, doErr)
}
switch status {
case http.StatusOK, http.StatusAccepted:
return json.RawMessage(body), nil
case http.StatusBadRequest, http.StatusConflict:
return json.RawMessage(body), fmt.Errorf("%w: %s: %s", ErrEngineValidation, opLabel, summariseEngineError(body, status))
default:
return nil, fmt.Errorf("%w: %s: %s", ErrEngineUnreachable, opLabel, summariseEngineError(body, status))
}
}
func (c *Client) doRequest(ctx context.Context, method, target string, body []byte, timeout time.Duration) ([]byte, int, error) {
reqCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
var reader io.Reader
if body != nil {
reader = bytes.NewReader(body)
}
req, err := http.NewRequestWithContext(reqCtx, method, target, reader)
if err != nil {
return nil, 0, fmt.Errorf("build request: %w", err)
}
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
req.Header.Set("Accept", "application/json")
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, 0, err
}
defer func() { _ = resp.Body.Close() }()
payload, err := io.ReadAll(resp.Body)
if err != nil {
return nil, resp.StatusCode, fmt.Errorf("read body: %w", err)
}
return payload, resp.StatusCode, nil
}
func validateBaseURL(baseURL string) error {
if strings.TrimSpace(baseURL) == "" {
return errors.New("engineclient: baseURL must not be empty")
}
if !strings.HasPrefix(baseURL, "http://") && !strings.HasPrefix(baseURL, "https://") {
return fmt.Errorf("engineclient: baseURL %q must start with http:// or https://", baseURL)
}
return nil
}
func decodeStateResponse(body []byte, op string) (rest.StateResponse, error) {
if len(bytes.TrimSpace(body)) == 0 {
return rest.StateResponse{}, fmt.Errorf("%w: %s: empty body", ErrEngineProtocolViolation, op)
}
var out rest.StateResponse
if err := json.Unmarshal(body, &out); err != nil {
return rest.StateResponse{}, fmt.Errorf("%w: %s: %v", ErrEngineProtocolViolation, op, err)
}
return out, nil
}
func summariseEngineError(body []byte, status int) string {
if len(body) == 0 {
return fmt.Sprintf("status=%d", status)
}
trimmed := strings.TrimSpace(string(body))
if len(trimmed) > 256 {
trimmed = trimmed[:256] + "…"
}
return fmt.Sprintf("status=%d body=%s", status, trimmed)
}
@@ -0,0 +1,236 @@
package engineclient
import (
"context"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"galaxy/model/rest"
"github.com/google/uuid"
)
func newTestClient(t *testing.T, srv *httptest.Server) *Client {
t.Helper()
cli, err := NewClientWithHTTP(Config{CallTimeout: 2 * time.Second, ProbeTimeout: 1 * time.Second}, srv.Client())
if err != nil {
t.Fatalf("NewClientWithHTTP: %v", err)
}
return cli
}
func TestClientInitSuccess(t *testing.T) {
wantID := uuid.New()
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != pathAdminInit {
t.Fatalf("unexpected path: %s", r.URL.Path)
}
if r.Method != http.MethodPost {
t.Fatalf("unexpected method: %s", r.Method)
}
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(rest.StateResponse{ID: wantID, Turn: 1, Players: []rest.PlayerState{{ID: uuid.New(), RaceName: "alpha"}}})
}))
t.Cleanup(srv.Close)
cli := newTestClient(t, srv)
got, err := cli.Init(context.Background(), srv.URL, rest.InitRequest{Races: []rest.InitRace{{RaceName: "alpha"}}})
if err != nil {
t.Fatalf("Init returned error: %v", err)
}
if got.ID != wantID {
t.Fatalf("ID = %s, want %s", got.ID, wantID)
}
if got.Turn != 1 {
t.Fatalf("Turn = %d, want 1", got.Turn)
}
}
func TestClientInitValidationError(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, `{"reason":"races empty"}`, http.StatusBadRequest)
}))
t.Cleanup(srv.Close)
cli := newTestClient(t, srv)
_, err := cli.Init(context.Background(), srv.URL, rest.InitRequest{Races: []rest.InitRace{{RaceName: "x"}}})
if !errors.Is(err, ErrEngineValidation) {
t.Fatalf("expected ErrEngineValidation, got %v", err)
}
}
func TestClientInitUnreachableOn5xx(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "boom", http.StatusInternalServerError)
}))
t.Cleanup(srv.Close)
cli := newTestClient(t, srv)
_, err := cli.Init(context.Background(), srv.URL, rest.InitRequest{Races: []rest.InitRace{{RaceName: "x"}}})
if !errors.Is(err, ErrEngineUnreachable) {
t.Fatalf("expected ErrEngineUnreachable, got %v", err)
}
}
func TestClientInitProtocolViolation(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte("not-json"))
}))
t.Cleanup(srv.Close)
cli := newTestClient(t, srv)
_, err := cli.Init(context.Background(), srv.URL, rest.InitRequest{Races: []rest.InitRace{{RaceName: "x"}}})
if !errors.Is(err, ErrEngineProtocolViolation) {
t.Fatalf("expected ErrEngineProtocolViolation, got %v", err)
}
}
func TestClientStatusOK(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != pathAdminStatus || r.Method != http.MethodGet {
t.Fatalf("unexpected request: %s %s", r.Method, r.URL.Path)
}
_ = json.NewEncoder(w).Encode(rest.StateResponse{Turn: 5})
}))
t.Cleanup(srv.Close)
cli := newTestClient(t, srv)
got, err := cli.Status(context.Background(), srv.URL)
if err != nil {
t.Fatalf("Status: %v", err)
}
if got.Turn != 5 {
t.Fatalf("Turn = %d, want 5", got.Turn)
}
}
func TestClientTurnOK(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != pathAdminTurn || r.Method != http.MethodPut {
t.Fatalf("unexpected request: %s %s", r.Method, r.URL.Path)
}
_ = json.NewEncoder(w).Encode(rest.StateResponse{Turn: 6, Finished: true})
}))
t.Cleanup(srv.Close)
cli := newTestClient(t, srv)
got, err := cli.Turn(context.Background(), srv.URL)
if err != nil {
t.Fatalf("Turn: %v", err)
}
if !got.Finished {
t.Fatalf("expected finished=true")
}
}
func TestClientBanishRace(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != pathAdminRaceBanish || r.Method != http.MethodPost {
t.Fatalf("unexpected request: %s %s", r.Method, r.URL.Path)
}
var got rest.BanishRequest
_ = json.NewDecoder(r.Body).Decode(&got)
if got.RaceName != "loser" {
t.Fatalf("got race name %q", got.RaceName)
}
w.WriteHeader(http.StatusNoContent)
}))
t.Cleanup(srv.Close)
cli := newTestClient(t, srv)
if err := cli.BanishRace(context.Background(), srv.URL, "loser"); err != nil {
t.Fatalf("BanishRace: %v", err)
}
}
func TestClientCommandsForwardsBody(t *testing.T) {
want := json.RawMessage(`{"actor":"alpha","cmd":[{"@type":"raceQuit"}]}`)
gotResp := json.RawMessage(`{"applied":true}`)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != pathPlayerCommand || r.Method != http.MethodPut {
t.Fatalf("unexpected request: %s %s", r.Method, r.URL.Path)
}
_, _ = w.Write(gotResp)
}))
t.Cleanup(srv.Close)
cli := newTestClient(t, srv)
resp, err := cli.ExecuteCommands(context.Background(), srv.URL, want)
if err != nil {
t.Fatalf("ExecuteCommands: %v", err)
}
if string(resp) != string(gotResp) {
t.Fatalf("response = %s, want %s", string(resp), string(gotResp))
}
}
func TestClientReportsForwardsQuery(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != pathPlayerReport {
t.Fatalf("unexpected path: %s", r.URL.Path)
}
if r.URL.Query().Get("player") != "alpha" {
t.Fatalf("player = %q", r.URL.Query().Get("player"))
}
if r.URL.Query().Get("turn") != "3" {
t.Fatalf("turn = %q", r.URL.Query().Get("turn"))
}
_, _ = w.Write([]byte(`{"turn":3}`))
}))
t.Cleanup(srv.Close)
cli := newTestClient(t, srv)
body, err := cli.GetReport(context.Background(), srv.URL, "alpha", 3)
if err != nil {
t.Fatalf("GetReport: %v", err)
}
if !strings.Contains(string(body), `"turn":3`) {
t.Fatalf("body = %s", body)
}
}
func TestClientHealthzSuccess(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != pathHealthz {
t.Fatalf("unexpected path: %s", r.URL.Path)
}
_, _ = w.Write([]byte(`{"status":"ok"}`))
}))
t.Cleanup(srv.Close)
cli := newTestClient(t, srv)
if err := cli.Healthz(context.Background(), srv.URL); err != nil {
t.Fatalf("Healthz: %v", err)
}
}
func TestClientHealthzFailure(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "down", http.StatusServiceUnavailable)
}))
t.Cleanup(srv.Close)
cli := newTestClient(t, srv)
if err := cli.Healthz(context.Background(), srv.URL); !errors.Is(err, ErrEngineUnreachable) {
t.Fatalf("expected ErrEngineUnreachable, got %v", err)
}
}
func TestClientRejectsInvalidBaseURL(t *testing.T) {
cli, err := NewClientWithHTTP(Config{CallTimeout: time.Second, ProbeTimeout: time.Second}, http.DefaultClient)
if err != nil {
t.Fatalf("NewClientWithHTTP: %v", err)
}
if _, err := cli.Status(context.Background(), ""); err == nil {
t.Fatalf("expected error on empty base URL")
}
if _, err := cli.Status(context.Background(), "ftp://example.test"); err == nil {
t.Fatalf("expected error on non-http base URL")
}
}
+43
View File
@@ -0,0 +1,43 @@
// Package engineclient is the trusted-internal HTTP client `internal/runtime`
// uses to talk to a running `galaxy-game` engine container. The engine
// contract is the OpenAPI document shipped with the engine module
// (`galaxy/game/openapi.yaml`); this package reuses the existing typed
// DTOs in `pkg/model/{rest,order,report}` rather than introducing its
// own request/response types.
//
// The engine endpoint URL is per-call: the runtime stores it on
// `runtime_records.engine_endpoint` (the value the dockerclient adapter
// returns from Run). The client therefore does not bind a base URL at
// construction time — only the per-call timeouts are wired through
// `Config`.
//
// Error model:
//
// - ErrEngineUnreachable — network failure, 5xx, or timeout. The
// caller transitions the runtime record to `engine_unreachable`
// and re-tries on the next snapshot tick.
// - ErrEngineValidation — engine rejected the request (HTTP 4xx).
// The caller surfaces the engine's body verbatim through to the
// user.
// - ErrEngineProtocolViolation — engine returned an empty body or a
// malformed JSON response on a path that requires one.
package engineclient
import "errors"
var (
// ErrEngineUnreachable means the engine call failed because of a
// transport error (network, DNS, connect refused, timeout, 5xx).
// The implementation callers map this to a runtime status of
// `engine_unreachable` after a snapshot read.
ErrEngineUnreachable = errors.New("engineclient: engine unreachable")
// ErrEngineValidation means the engine returned a 4xx response.
// Callers forward the engine body so end users see the engine's
// per-command error reason verbatim.
ErrEngineValidation = errors.New("engineclient: engine validation failed")
// ErrEngineProtocolViolation means the engine returned an empty or
// malformed body on a path that contractually requires one.
ErrEngineProtocolViolation = errors.New("engineclient: engine protocol violation")
)
+36
View File
@@ -0,0 +1,36 @@
package geo
import (
"context"
"errors"
"fmt"
"galaxy/backend/internal/postgres/jet/backend/table"
"github.com/go-jet/jet/v2/postgres"
"github.com/google/uuid"
)
// OnUserDeleted removes every `backend.user_country_counters` row for
// userID. It is the geo-side leg of the soft-delete cascade documented
// in `backend/PLAN.md` §5.2 / §5.8 and is invoked from
// `backend/internal/user.Service.SoftDelete` after the
// `accounts.deleted_at` write commits.
//
// The DELETE is idempotent: re-running on a user with no counters is a
// successful no-op. Errors from the database are wrapped with the geo
// prefix so caller logs identify the source.
func (s *Service) OnUserDeleted(ctx context.Context, userID uuid.UUID) error {
if s == nil {
return errors.New("geo: nil service")
}
if userID == uuid.Nil {
return errors.New("geo: nil user id")
}
stmt := table.UserCountryCounters.DELETE().
WHERE(table.UserCountryCounters.UserID.EQ(postgres.UUID(userID)))
if _, err := stmt.ExecContext(ctx, s.db); err != nil {
return fmt.Errorf("geo: delete counters for %s: %w", userID, err)
}
return nil
}
+136
View File
@@ -0,0 +1,136 @@
package geo
import (
"context"
"errors"
"fmt"
"time"
"galaxy/backend/internal/postgres/jet/backend/model"
"galaxy/backend/internal/postgres/jet/backend/table"
"github.com/go-jet/jet/v2/postgres"
"github.com/google/uuid"
"go.uber.org/zap"
)
// counterUpsertTimeout bounds the database call performed by a single
// fire-and-forget counter goroutine. The upsert is a single statement on
// a tiny table and should complete in well under a second; the timeout
// exists to keep one slow Postgres node from accumulating leaked
// goroutines under load.
const counterUpsertTimeout = 5 * time.Second
// CountryCounter is one row from `backend.user_country_counters` exposed
// to the admin surface (`GET /api/v1/admin/geo/users/{user_id}/countries`).
//
// Country is the uppercase ISO 3166-1 alpha-2 code stored alongside the
// running count. LastSeenAt is nullable on the table and therefore
// optional; the admin response surfaces null when it is unset.
type CountryCounter struct {
Country string
Count int64
LastSeenAt *time.Time
}
// IncrementCounterAsync upserts the per-country counter for userID as a
// fire-and-forget goroutine: the country lookup is performed
// synchronously (it is pure CPU plus an mmap read), then a goroutine
// runs the database upsert against the Service-internal background
// context. The caller never blocks on the database round-trip and never
// observes errors directly — failures are logged via the Service logger
// configured through SetLogger.
//
// Inputs that yield no useful data short-circuit without launching the
// goroutine: a nil receiver, a zero userID, an empty sourceIP, or a
// failed country lookup all return immediately. A Service whose
// background context has already been cancelled (typically because Drain
// or Close ran) also short-circuits — counters are not started during
// shutdown, but live ones are awaited by Drain.
//
// The ctx parameter is intentionally unused for the database call: the
// request-scoped context is cancelled the moment the response is
// flushed to the gateway, which would race with the upsert. The
// goroutine derives its context from the Service-internal one
// instead.
func (s *Service) IncrementCounterAsync(_ context.Context, userID uuid.UUID, sourceIP string) {
if s == nil || userID == uuid.Nil || sourceIP == "" {
return
}
if s.bgCtx == nil || s.bgCtx.Err() != nil {
return
}
country := s.LookupCountry(sourceIP)
if country == "" {
return
}
s.wg.Go(func() {
ctx, cancel := context.WithTimeout(s.bgCtx, counterUpsertTimeout)
defer cancel()
if err := s.upsertCounter(ctx, userID, country); err != nil {
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
return
}
s.logger.Warn("counter upsert failed",
zap.String("user_id", userID.String()),
zap.String("country", country),
zap.Error(err),
)
}
})
}
// upsertCounter executes the atomic INSERT...ON CONFLICT against
// `backend.user_country_counters`. The compound primary key
// `(user_id, country)` makes the upsert race-safe across concurrent
// goroutines.
func (s *Service) upsertCounter(ctx context.Context, userID uuid.UUID, country string) error {
ucc := table.UserCountryCounters
stmt := ucc.INSERT(ucc.UserID, ucc.Country, ucc.Count, ucc.LastSeenAt).
VALUES(userID, country, postgres.Int(1), postgres.NOW()).
ON_CONFLICT(ucc.UserID, ucc.Country).
DO_UPDATE(postgres.SET(
ucc.Count.SET(ucc.Count.ADD(postgres.Int(1))),
ucc.LastSeenAt.SET(postgres.TimestampzExp(postgres.NOW())),
))
if _, err := stmt.ExecContext(ctx, s.db); err != nil {
return fmt.Errorf("geo: upsert counter for %s/%s: %w", userID, country, err)
}
return nil
}
// ListUserCounters returns every per-country counter recorded for
// userID, ordered by country ASC. The list is empty (and the error is
// nil) when the user has no rows; ListUserCounters does not check that
// the user exists in `backend.accounts` because the admin surface gates
// existence through a separate listing endpoint.
func (s *Service) ListUserCounters(ctx context.Context, userID uuid.UUID) ([]CountryCounter, error) {
if s == nil {
return nil, errors.New("geo: nil service")
}
if userID == uuid.Nil {
return nil, errors.New("geo: nil user id")
}
ucc := table.UserCountryCounters
stmt := postgres.SELECT(ucc.Country, ucc.Count, ucc.LastSeenAt).
FROM(ucc).
WHERE(ucc.UserID.EQ(postgres.UUID(userID))).
ORDER_BY(ucc.Country.ASC())
var dest []model.UserCountryCounters
if err := stmt.QueryContext(ctx, s.db, &dest); err != nil {
return nil, fmt.Errorf("geo: list counters for %s: %w", userID, err)
}
out := make([]CountryCounter, 0, len(dest))
for _, row := range dest {
entry := CountryCounter{Country: row.Country, Count: row.Count}
if row.LastSeenAt != nil {
ts := row.LastSeenAt.UTC()
entry.LastSeenAt = &ts
}
out = append(out, entry)
}
return out, nil
}
+320
View File
@@ -0,0 +1,320 @@
package geo_test
import (
"context"
"database/sql"
"net/url"
"testing"
"time"
"galaxy/backend/internal/geo"
backendpg "galaxy/backend/internal/postgres"
pgshared "galaxy/postgres"
"github.com/google/uuid"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
"go.uber.org/zap/zaptest"
)
const (
pgImage = "postgres:16-alpine"
pgUser = "galaxy"
pgPassword = "galaxy"
pgDatabase = "galaxy_backend"
pgSchema = "backend"
pgStartup = 90 * time.Second
pgOpTO = 10 * time.Second
)
// startPostgres mirrors the auth/notification test scaffolding: spin up
// a Postgres testcontainer, apply backend migrations, return *sql.DB.
func startPostgres(t *testing.T) *sql.DB {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
t.Cleanup(cancel)
pgContainer, err := tcpostgres.Run(ctx, pgImage,
tcpostgres.WithDatabase(pgDatabase),
tcpostgres.WithUsername(pgUser),
tcpostgres.WithPassword(pgPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(pgStartup),
),
)
if err != nil {
t.Skipf("postgres testcontainer unavailable, skipping: %v", err)
}
t.Cleanup(func() {
if termErr := testcontainers.TerminateContainer(pgContainer); termErr != nil {
t.Errorf("terminate postgres container: %v", termErr)
}
})
baseDSN, err := pgContainer.ConnectionString(ctx, "sslmode=disable")
if err != nil {
t.Fatalf("connection string: %v", err)
}
scoped, err := dsnWithSearchPath(baseDSN, pgSchema)
if err != nil {
t.Fatalf("scope dsn: %v", err)
}
cfg := pgshared.DefaultConfig()
cfg.PrimaryDSN = scoped
cfg.OperationTimeout = pgOpTO
db, err := pgshared.OpenPrimary(ctx, cfg)
if err != nil {
t.Fatalf("open primary: %v", err)
}
t.Cleanup(func() { _ = db.Close() })
if err := pgshared.Ping(ctx, db, cfg.OperationTimeout); err != nil {
t.Fatalf("ping: %v", err)
}
if err := backendpg.ApplyMigrations(ctx, db); err != nil {
t.Fatalf("apply migrations: %v", err)
}
return db
}
func dsnWithSearchPath(baseDSN, schema string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := parsed.Query()
values.Set("search_path", schema)
if values.Get("sslmode") == "" {
values.Set("sslmode", "disable")
}
parsed.RawQuery = values.Encode()
return parsed.String(), nil
}
// fixtureService constructs a Service that uses an injected database
// pool and skips the GeoLite2 resolver — the resolver is exercised by
// `pkg/geoip` tests, while the counter path under test is independent
// of the lookup. The caller is responsible for invoking Drain/Close.
func fixtureService(t *testing.T, db *sql.DB) *geo.Service {
t.Helper()
svc, err := geo.NewServiceForTest(db)
if err != nil {
t.Fatalf("new service: %v", err)
}
svc.SetLogger(zaptest.NewLogger(t))
return svc
}
func TestIncrementCounterAsyncCreatesRow(t *testing.T) {
db := startPostgres(t)
svc := fixtureService(t, db)
t.Cleanup(func() {
ctx, cancel := context.WithTimeout(context.Background(), pgOpTO)
defer cancel()
svc.Drain(ctx)
_ = svc.Close()
})
userID := uuid.New()
svc.IncrementCounterTestSync(t, userID, "DE")
count, lastSeen := readCounter(t, db, userID, "DE")
if count != 1 {
t.Fatalf("count: want 1, got %d", count)
}
if lastSeen == nil {
t.Fatal("last_seen_at: want non-null, got null")
}
}
func TestIncrementCounterAsyncIncrementsExistingRow(t *testing.T) {
db := startPostgres(t)
svc := fixtureService(t, db)
t.Cleanup(func() {
ctx, cancel := context.WithTimeout(context.Background(), pgOpTO)
defer cancel()
svc.Drain(ctx)
_ = svc.Close()
})
userID := uuid.New()
svc.IncrementCounterTestSync(t, userID, "DE")
_, firstSeen := readCounter(t, db, userID, "DE")
if firstSeen == nil {
t.Fatal("first last_seen_at: want non-null")
}
// Sleep long enough for now() to advance past Postgres timestamp
// resolution (microseconds in practice).
time.Sleep(2 * time.Millisecond)
svc.IncrementCounterTestSync(t, userID, "DE")
count, secondSeen := readCounter(t, db, userID, "DE")
if count != 2 {
t.Fatalf("count: want 2, got %d", count)
}
if secondSeen == nil || !secondSeen.After(*firstSeen) {
t.Fatalf("last_seen_at: want strictly later than %v, got %v", firstSeen, secondSeen)
}
}
func TestIncrementCounterAsyncShortCircuits(t *testing.T) {
db := startPostgres(t)
svc := fixtureService(t, db)
t.Cleanup(func() {
ctx, cancel := context.WithTimeout(context.Background(), pgOpTO)
defer cancel()
svc.Drain(ctx)
_ = svc.Close()
})
// Empty country / zero user — exercise the synchronous validation
// path through the public API to confirm no goroutine is launched.
svc.IncrementCounterAsync(context.Background(), uuid.Nil, "1.2.3.4")
svc.IncrementCounterAsync(context.Background(), uuid.New(), "")
rows := totalCounterRows(t, db)
if rows != 0 {
t.Fatalf("expected zero counter rows after short-circuit calls, got %d", rows)
}
}
func TestListUserCountersOrdered(t *testing.T) {
db := startPostgres(t)
svc := fixtureService(t, db)
t.Cleanup(func() {
ctx, cancel := context.WithTimeout(context.Background(), pgOpTO)
defer cancel()
svc.Drain(ctx)
_ = svc.Close()
})
userID := uuid.New()
svc.IncrementCounterTestSync(t, userID, "PL")
svc.IncrementCounterTestSync(t, userID, "DE")
svc.IncrementCounterTestSync(t, userID, "DE")
svc.IncrementCounterTestSync(t, userID, "AU")
ctx, cancel := context.WithTimeout(context.Background(), pgOpTO)
defer cancel()
entries, err := svc.ListUserCounters(ctx, userID)
if err != nil {
t.Fatalf("list: %v", err)
}
if len(entries) != 3 {
t.Fatalf("entries: want 3, got %d (%+v)", len(entries), entries)
}
wantOrder := []string{"AU", "DE", "PL"}
for i, e := range entries {
if e.Country != wantOrder[i] {
t.Errorf("entries[%d].Country = %q, want %q", i, e.Country, wantOrder[i])
}
if e.LastSeenAt == nil {
t.Errorf("entries[%d].LastSeenAt: want non-nil", i)
}
}
if entries[1].Count != 2 {
t.Errorf("entries[1].Count: want 2, got %d", entries[1].Count)
}
}
func TestListUserCountersEmpty(t *testing.T) {
db := startPostgres(t)
svc := fixtureService(t, db)
t.Cleanup(func() {
ctx, cancel := context.WithTimeout(context.Background(), pgOpTO)
defer cancel()
svc.Drain(ctx)
_ = svc.Close()
})
ctx, cancel := context.WithTimeout(context.Background(), pgOpTO)
defer cancel()
entries, err := svc.ListUserCounters(ctx, uuid.New())
if err != nil {
t.Fatalf("list unknown user: %v", err)
}
if len(entries) != 0 {
t.Fatalf("entries: want empty, got %+v", entries)
}
}
func TestListUserCountersNilArguments(t *testing.T) {
db := startPostgres(t)
svc := fixtureService(t, db)
t.Cleanup(func() { _ = svc.Close() })
ctx, cancel := context.WithTimeout(context.Background(), pgOpTO)
defer cancel()
if _, err := svc.ListUserCounters(ctx, uuid.Nil); err == nil {
t.Fatal("ListUserCounters(uuid.Nil): want error")
}
var nilSvc *geo.Service
if _, err := nilSvc.ListUserCounters(ctx, uuid.New()); err == nil {
t.Fatal("nil receiver ListUserCounters: want error")
}
}
func TestDrainAwaitsInFlightCounters(t *testing.T) {
db := startPostgres(t)
svc := fixtureService(t, db)
userID := uuid.New()
// Inject country directly through the test seam so the lookup never
// returns empty even though the resolver is unset.
svc.IncrementCounterTestSync(t, userID, "FR")
ctx, cancel := context.WithTimeout(context.Background(), pgOpTO)
defer cancel()
svc.Drain(ctx)
if err := svc.Close(); err != nil {
t.Fatalf("close: %v", err)
}
count, _ := readCounter(t, db, userID, "FR")
if count != 1 {
t.Fatalf("count after drain+close: want 1, got %d", count)
}
}
func readCounter(t *testing.T, db *sql.DB, userID uuid.UUID, country string) (int64, *time.Time) {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), pgOpTO)
defer cancel()
var (
count int64
lastSeenAt sql.NullTime
)
err := db.QueryRowContext(ctx, `
SELECT count, last_seen_at FROM backend.user_country_counters
WHERE user_id = $1 AND country = $2
`, userID, country).Scan(&count, &lastSeenAt)
if err != nil {
t.Fatalf("read counter (%s/%s): %v", userID, country, err)
}
if !lastSeenAt.Valid {
return count, nil
}
ts := lastSeenAt.Time.UTC()
return count, &ts
}
func totalCounterRows(t *testing.T, db *sql.DB) int {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), pgOpTO)
defer cancel()
var n int
if err := db.QueryRowContext(ctx, `
SELECT count(*) FROM backend.user_country_counters
`).Scan(&n); err != nil {
t.Fatalf("count rows: %v", err)
}
return n
}
+63
View File
@@ -0,0 +1,63 @@
package geo
import "strings"
// countryToLanguage maps an uppercase ISO 3166-1 alpha-2 country code to
// an ISO 639-1 lowercase language code. The set is intentionally minimal
// — covering the top-traffic Galaxy locales — and is consulted as a
// fallback when neither the request body nor the Accept-Language header
// supplied a locale at send-email-code. Unknown countries map to the
// empty string so the auth flow can default to "en".
//
// The mapping is intentionally hard-coded rather than derived from the
// GeoLite2 database: countries with multiple official languages collapse
// to the single most common UI locale to keep the registration path
// deterministic. The implementation may revise this table without changing the
// surface auth depends on.
var countryToLanguage = map[string]string{
// English-default territories and the platform fallback.
"US": "en", "GB": "en", "AU": "en", "NZ": "en", "IE": "en", "CA": "en",
// Western Europe.
"DE": "de", "AT": "de", "CH": "de",
"FR": "fr", "BE": "fr", "LU": "fr",
"ES": "es", "MX": "es", "AR": "es", "CL": "es", "CO": "es",
"IT": "it",
"PT": "pt", "BR": "pt",
"NL": "nl",
// Central / Eastern Europe.
"PL": "pl",
"RU": "ru", "BY": "ru", "KZ": "ru",
"UA": "uk",
"CZ": "cs",
"SK": "sk",
"HU": "hu",
"RO": "ro",
"BG": "bg",
// Northern Europe.
"SE": "sv",
"NO": "no",
"DK": "da",
"FI": "fi",
// Asia.
"JP": "ja",
"KR": "ko",
"CN": "zh", "TW": "zh", "HK": "zh", "SG": "zh",
"VN": "vi",
"TH": "th",
"ID": "id",
"IN": "en",
"IL": "he",
"TR": "tr",
// Middle East and North Africa.
"SA": "ar", "AE": "ar", "EG": "ar",
}
// languageForCountry returns the ISO 639-1 language code mapped to
// country, or "" when no mapping is known. country is normalised to
// uppercase before lookup.
func languageForCountry(country string) string {
if country == "" {
return ""
}
return countryToLanguage[strings.ToUpper(strings.TrimSpace(country))]
}
+43
View File
@@ -0,0 +1,43 @@
package geo
import (
"context"
"errors"
"fmt"
"galaxy/backend/internal/postgres/jet/backend/table"
"github.com/go-jet/jet/v2/postgres"
"github.com/google/uuid"
)
// SetDeclaredCountryAtRegistration writes the geoip-derived country to
// `accounts.declared_country` for userID, and only when the column is
// currently NULL. The semantics match PLAN.md §5.8: declared_country is
// captured at first registration and never updated thereafter, so
// repeated calls on the same account are no-ops.
//
// The geoip lookup itself is best-effort: a missing or invalid country
// returns nil (no UPDATE executed) and never blocks the auth flow. Errors
// from the database UPDATE itself surface to the caller so the auth
// service can decide whether to log or escalate.
func (s *Service) SetDeclaredCountryAtRegistration(ctx context.Context, userID uuid.UUID, sourceIP string) error {
if s == nil {
return errors.New("geo: nil service")
}
country := s.LookupCountry(sourceIP)
if country == "" {
return nil
}
stmt := table.Accounts.UPDATE(table.Accounts.DeclaredCountry, table.Accounts.UpdatedAt).
SET(postgres.String(country), postgres.NOW()).
WHERE(
table.Accounts.UserID.EQ(postgres.UUID(userID)).
AND(table.Accounts.DeclaredCountry.IS_NULL()).
AND(table.Accounts.DeletedAt.IS_NULL()),
)
if _, err := stmt.ExecContext(ctx, s.db); err != nil {
return fmt.Errorf("geo: set declared_country for %s: %w", userID, err)
}
return nil
}
+43
View File
@@ -0,0 +1,43 @@
package geo
import (
"context"
"database/sql"
"errors"
"testing"
"github.com/google/uuid"
"go.uber.org/zap"
)
// NewServiceForTest builds a Service with no GeoLite2 resolver. It is
// the entry point external tests use when they want to exercise the
// counter / admin paths without spinning up a real mmdb file. The
// returned Service still owns its background context and logger so
// IncrementCounterAsync and ListUserCounters behave exactly as they do
// in production.
func NewServiceForTest(db *sql.DB) (*Service, error) {
if db == nil {
return nil, errors.New("geo: db must not be nil")
}
bgCtx, bgCancel := context.WithCancel(context.Background())
return &Service{
db: db,
logger: zap.NewNop(),
bgCtx: bgCtx,
bgCancel: bgCancel,
}, nil
}
// IncrementCounterTestSync runs the package-private upsert path
// synchronously so external tests can assert on counter rows without
// having to deal with goroutine scheduling. Failure to upsert fails the
// test rather than being silently logged.
func (s *Service) IncrementCounterTestSync(t *testing.T, userID uuid.UUID, country string) {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), counterUpsertTimeout)
defer cancel()
if err := s.upsertCounter(ctx, userID, country); err != nil {
t.Fatalf("upsert counter (%s/%s): %v", userID, country, err)
}
}
+159
View File
@@ -0,0 +1,159 @@
// Package geo wraps the GeoLite2 country resolver and exposes the
// platform-level geo helpers consumed by `backend/internal/auth` at user
// registration time and by the user-surface middleware on every
// authenticated request.
//
// The implementation shipped `LookupCountry`, `LanguageForIP` and
// `SetDeclaredCountryAtRegistration`. The implementation added the
// `OnUserDeleted` cascade leg. The implementation layers `IncrementCounterAsync`
// and `ListUserCounters` on top of the same Service plus the
// background-goroutine machinery (cancellable context and WaitGroup)
// needed to drain pending counter upserts on shutdown.
package geo
import (
"context"
"database/sql"
"errors"
"fmt"
"sync"
"sync/atomic"
"galaxy/geoip"
"go.uber.org/zap"
)
// Service is the geo-domain entry point. It is safe for concurrent use.
type Service struct {
db *sql.DB
resolver *geoip.Resolver
logger *zap.Logger
// bgCtx is the lifetime context passed to fire-and-forget goroutines
// launched by IncrementCounterAsync. It is cancelled by Close so that
// in-flight counter upserts observe shutdown promptly. The matching
// WaitGroup tracks live goroutines so Drain (and Close) can wait for
// them.
bgCtx context.Context
bgCancel context.CancelFunc
wg sync.WaitGroup
closed atomic.Bool
}
// NewService constructs a Service backed by the GeoLite2 country database
// at databasePath and the supplied Postgres pool. Closing the returned
// Service releases the memory-mapped database file; the database pool is
// owned by the caller.
//
// A trimmed-empty databasePath is rejected with a non-nil error so that
// boot fails fast rather than silently hiding lookups behind a permanent
// failure path. Callers that explicitly want a no-op Service should
// inject their own implementation via the auth-level interfaces.
//
// The returned Service uses a no-op zap logger by default; callers that
// want diagnostic output from the asynchronous counter path inject one
// via SetLogger.
func NewService(databasePath string, db *sql.DB) (*Service, error) {
if db == nil {
return nil, errors.New("geo: db must not be nil")
}
resolver, err := geoip.Open(databasePath)
if err != nil {
return nil, fmt.Errorf("geo: open resolver: %w", err)
}
bgCtx, bgCancel := context.WithCancel(context.Background())
return &Service{
db: db,
resolver: resolver,
logger: zap.NewNop(),
bgCtx: bgCtx,
bgCancel: bgCancel,
}, nil
}
// SetLogger replaces the diagnostic logger used by the asynchronous
// counter path. A nil argument resets the logger to a no-op so that
// production wiring can supply a real logger after construction without
// the test paths having to thread one through. SetLogger is nil-safe on
// the Service receiver.
func (s *Service) SetLogger(logger *zap.Logger) {
if s == nil {
return
}
if logger == nil {
logger = zap.NewNop()
}
s.logger = logger.Named("geo")
}
// Drain blocks until every fire-and-forget goroutine launched through
// IncrementCounterAsync has finished, or until ctx is done. It cancels
// the Service-internal background context so live goroutines observe
// shutdown and stop waiting on the database. Drain is nil-safe and
// idempotent: subsequent calls return immediately.
//
// Drain does not close the GeoLite2 resolver — Close does. The split
// lets the boot orchestrator wait for in-flight writes within the
// shutdown deadline before the resolver and database pool are torn
// down.
func (s *Service) Drain(ctx context.Context) {
if s == nil {
return
}
if s.bgCancel != nil {
s.bgCancel()
}
done := make(chan struct{})
go func() {
s.wg.Wait()
close(done)
}()
select {
case <-done:
case <-ctx.Done():
}
}
// Close releases the underlying GeoLite2 database resources. Pending
// counter goroutines launched through IncrementCounterAsync are
// signalled to stop via the internal background context but are NOT
// awaited; callers that need to wait must invoke Drain first. Close is
// idempotent and nil-safe; subsequent lookups return the empty country
// / language ("" treated as no data).
func (s *Service) Close() error {
if s == nil {
return nil
}
if !s.closed.CompareAndSwap(false, true) {
return nil
}
if s.bgCancel != nil {
s.bgCancel()
}
if s.resolver == nil {
return nil
}
if err := s.resolver.Close(); err != nil {
return fmt.Errorf("geo: close resolver: %w", err)
}
s.resolver = nil
return nil
}
// LookupCountry resolves an uppercase ISO 3166-1 alpha-2 country code
// from sourceIP. The lookup is best-effort: the empty string is returned
// for any invalid address, missing record, or closed resolver. The
// returned error is always nil; callers that need diagnostic detail
// should query the geoip resolver directly.
func (s *Service) LookupCountry(sourceIP string) string {
if s == nil || s.resolver == nil || sourceIP == "" {
return ""
}
code, err := s.resolver.CountryString(sourceIP)
if err != nil {
return ""
}
return code
}
+82
View File
@@ -0,0 +1,82 @@
package geo
import (
"context"
"testing"
"time"
"go.uber.org/zap"
)
func TestLanguageForCountry(t *testing.T) {
cases := map[string]string{
"DE": "de",
"de": "de", // case-insensitive input
"RU": "ru",
"BR": "pt",
"": "",
"ZZ": "",
}
for input, want := range cases {
if got := languageForCountry(input); got != want {
t.Errorf("languageForCountry(%q) = %q, want %q", input, got, want)
}
}
}
func TestLookupCountryNilSafety(t *testing.T) {
var s *Service
if got := s.LookupCountry("8.8.8.8"); got != "" {
t.Errorf("nil Service LookupCountry = %q, want empty", got)
}
}
func TestLanguageForIPNilSafety(t *testing.T) {
var s *Service
if got := s.LanguageForIP("8.8.8.8"); got != "" {
t.Errorf("nil Service LanguageForIP = %q, want empty", got)
}
}
func TestSetLoggerNilSafety(t *testing.T) {
var s *Service
s.SetLogger(zap.NewNop())
s.SetLogger(nil)
live := &Service{}
live.SetLogger(nil) // does not panic; falls back to nop logger.
}
func TestDrainNilSafety(t *testing.T) {
var s *Service
s.Drain(context.Background())
}
func TestDrainReturnsWhenContextDone(t *testing.T) {
live := &Service{}
live.bgCtx, live.bgCancel = context.WithCancel(context.Background())
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
defer cancel()
start := time.Now()
live.Drain(ctx)
if elapsed := time.Since(start); elapsed > 5*time.Second {
t.Fatalf("Drain blocked too long: %s", elapsed)
}
}
func TestCloseIdempotent(t *testing.T) {
live := &Service{}
live.bgCtx, live.bgCancel = context.WithCancel(context.Background())
if err := live.Close(); err != nil {
t.Fatalf("first Close: %v", err)
}
if err := live.Close(); err != nil {
t.Fatalf("second Close: %v", err)
}
var nilSvc *Service
if err := nilSvc.Close(); err != nil {
t.Fatalf("nil Service Close: %v", err)
}
}
+14
View File
@@ -0,0 +1,14 @@
package geo
// LanguageForIP returns an ISO 639-1 language code derived from
// sourceIP. The function looks up the country via LookupCountry and then
// consults the static country->language table. Returns "" when the
// country lookup fails or no language mapping exists for the country.
//
// Auth uses LanguageForIP as a fallback after the client-supplied locale
// (request body or Accept-Language header). The empty string signals
// "fall through to the platform default 'en'".
func (s *Service) LanguageForIP(sourceIP string) string {
country := s.LookupCountry(sourceIP)
return languageForCountry(country)
}
+226
View File
@@ -0,0 +1,226 @@
package lobby
import (
"context"
"fmt"
"github.com/google/uuid"
"go.uber.org/zap"
)
// SubmitApplicationInput is the parameter struct for
// Service.SubmitApplication.
type SubmitApplicationInput struct {
GameID uuid.UUID
ApplicantUserID uuid.UUID
RaceName string
}
// SubmitApplication creates a new application bound to (gameID,
// applicantUserID, raceName). The game must be `enrollment_open`. The
// race name is recorded for context but the per-game canonical
// reservation is created at approval time.
func (s *Service) SubmitApplication(ctx context.Context, in SubmitApplicationInput) (Application, error) {
displayName, err := ValidateDisplayName(in.RaceName)
if err != nil {
return Application{}, err
}
game, err := s.GetGame(ctx, in.GameID)
if err != nil {
return Application{}, err
}
if game.Visibility != VisibilityPublic {
return Application{}, fmt.Errorf("%w: only public games accept applications", ErrConflict)
}
if game.Status != GameStatusEnrollmentOpen {
return Application{}, fmt.Errorf("%w: game is not in enrollment_open", ErrConflict)
}
app, err := s.deps.Store.InsertApplication(ctx, applicationInsert{
ApplicationID: uuid.New(),
GameID: in.GameID,
ApplicantUserID: in.ApplicantUserID,
RaceName: displayName,
})
if err != nil {
return Application{}, err
}
intent := LobbyNotification{
Kind: NotificationLobbyApplicationSubmitted,
IdempotencyKey: "application:" + app.ApplicationID.String(),
Payload: map[string]any{
"game_id": game.GameID.String(),
"application_id": app.ApplicationID.String(),
},
}
if game.OwnerUserID != nil {
intent.Recipients = []uuid.UUID{*game.OwnerUserID}
}
if pubErr := s.deps.Notification.PublishLobbyEvent(ctx, intent); pubErr != nil {
// Notification failures never roll back the canonical write.
s.deps.Logger.Warn("application submitted notification failed",
zap.String("application_id", app.ApplicationID.String()),
zap.Error(pubErr))
}
return app, nil
}
// ApproveApplication transitions a pending application to `approved`,
// creates the matching membership, and reserves the race-name canonical
// in the Race Name Directory.
func (s *Service) ApproveApplication(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID, applicationID uuid.UUID) (Application, error) {
app, err := s.deps.Store.LoadApplication(ctx, applicationID)
if err != nil {
return Application{}, err
}
if app.GameID != gameID {
return Application{}, ErrNotFound
}
game, err := s.GetGame(ctx, gameID)
if err != nil {
return Application{}, err
}
if err := s.checkGameAdminOrOwner(game, callerUserID, callerIsAdmin); err != nil {
return Application{}, err
}
if app.Status != ApplicationStatusPending {
return Application{}, fmt.Errorf("%w: application status is %q", ErrConflict, app.Status)
}
if game.Status != GameStatusEnrollmentOpen {
return Application{}, fmt.Errorf("%w: game is not in enrollment_open", ErrConflict)
}
canonical, err := s.deps.Policy.Canonical(app.RaceName)
if err != nil {
return Application{}, err
}
if err := s.assertRaceNameAvailable(ctx, canonical, app.ApplicantUserID, gameID); err != nil {
return Application{}, err
}
now := s.deps.Now().UTC()
if _, err := s.deps.Store.InsertRaceName(ctx, raceNameInsert{
Name: app.RaceName,
Canonical: canonical,
Status: RaceNameStatusReservation,
OwnerUserID: app.ApplicantUserID,
GameID: gameID,
ReservedAt: &now,
}); err != nil {
return Application{}, err
}
membership, err := s.deps.Store.InsertMembership(ctx, membershipInsert{
MembershipID: uuid.New(),
GameID: gameID,
UserID: app.ApplicantUserID,
RaceName: app.RaceName,
CanonicalKey: canonical,
})
if err != nil {
// Best-effort cleanup of the race-name reservation if the
// membership insert lost the race; the cascade still records
// the rejection.
_ = s.deps.Store.DeleteRaceName(ctx, canonical, gameID)
return Application{}, err
}
updated, err := s.deps.Store.UpdateApplicationStatus(ctx, applicationID, ApplicationStatusApproved, now)
if err != nil {
return Application{}, err
}
s.deps.Cache.PutMembership(membership)
s.deps.Cache.PutRaceName(RaceNameEntry{
Name: app.RaceName,
Canonical: canonical,
Status: RaceNameStatusReservation,
OwnerUserID: app.ApplicantUserID,
GameID: gameID,
ReservedAt: &now,
})
intent := LobbyNotification{
Kind: NotificationLobbyApplicationApproved,
IdempotencyKey: "application-approved:" + applicationID.String(),
Recipients: []uuid.UUID{app.ApplicantUserID},
Payload: map[string]any{
"game_id": gameID.String(),
},
}
if pubErr := s.deps.Notification.PublishLobbyEvent(ctx, intent); pubErr != nil {
s.deps.Logger.Warn("application approved notification failed",
zap.String("application_id", updated.ApplicationID.String()),
zap.Error(pubErr))
}
return updated, nil
}
// RejectApplication transitions a pending application to `rejected`.
func (s *Service) RejectApplication(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID, applicationID uuid.UUID) (Application, error) {
app, err := s.deps.Store.LoadApplication(ctx, applicationID)
if err != nil {
return Application{}, err
}
if app.GameID != gameID {
return Application{}, ErrNotFound
}
game, err := s.GetGame(ctx, gameID)
if err != nil {
return Application{}, err
}
if err := s.checkGameAdminOrOwner(game, callerUserID, callerIsAdmin); err != nil {
return Application{}, err
}
if app.Status != ApplicationStatusPending {
return Application{}, fmt.Errorf("%w: application status is %q", ErrConflict, app.Status)
}
now := s.deps.Now().UTC()
updated, err := s.deps.Store.UpdateApplicationStatus(ctx, applicationID, ApplicationStatusRejected, now)
if err != nil {
return Application{}, err
}
intent := LobbyNotification{
Kind: NotificationLobbyApplicationRejected,
IdempotencyKey: "application-rejected:" + applicationID.String(),
Recipients: []uuid.UUID{app.ApplicantUserID},
Payload: map[string]any{
"game_id": gameID.String(),
},
}
if pubErr := s.deps.Notification.PublishLobbyEvent(ctx, intent); pubErr != nil {
s.deps.Logger.Warn("application rejected notification failed",
zap.String("application_id", updated.ApplicationID.String()),
zap.Error(pubErr))
}
return updated, nil
}
// ListMyApplications returns every application owned by userID.
func (s *Service) ListMyApplications(ctx context.Context, userID uuid.UUID) ([]Application, error) {
return s.deps.Store.ListMyApplications(ctx, userID)
}
// checkGameAdminOrOwner enforces that the caller is either an admin or
// (for private games) the owner. Public games admin-only — same rule as
// transition().
func (s *Service) checkGameAdminOrOwner(game GameRecord, callerUserID *uuid.UUID, callerIsAdmin bool) error {
return s.checkOwner(game, callerUserID, callerIsAdmin)
}
// assertRaceNameAvailable returns nil when canonical is free for
// userID inside gameID. Free means: no `registered` / `reservation` /
// `pending_registration` owned by anyone else.
func (s *Service) assertRaceNameAvailable(ctx context.Context, canonical CanonicalKey, userID, gameID uuid.UUID) error {
_ = gameID
rows, err := s.deps.Store.FindRaceNameByCanonical(ctx, canonical)
if err != nil {
return err
}
for _, r := range rows {
if r.OwnerUserID == userID {
// Same user already binds this canonical — the per-game PK
// handles same-game collisions, and a user is allowed to
// hold the same canonical across multiple active games.
continue
}
switch r.Status {
case RaceNameStatusRegistered, RaceNameStatusReservation, RaceNameStatusPendingRegistration:
return fmt.Errorf("%w: race name held by another user", ErrRaceNameTaken)
}
}
return nil
}
+285
View File
@@ -0,0 +1,285 @@
package lobby
import (
"context"
"fmt"
"sync"
"sync/atomic"
"github.com/google/uuid"
)
// Cache is the in-memory write-through projection of the active lobby
// state: games (any non-finished/non-cancelled status), per-game
// memberships, and the Race Name Directory canonical map.
//
// Reads (Get*) take RLocks; writes (Put*, Remove*) take Locks. The cache
// mirrors the `internal/auth.Cache`, `internal/user.Cache`, and
// `internal/admin.Cache` idioms — Postgres is the source of truth, the
// cache is updated only after a successful commit.
type Cache struct {
mu sync.RWMutex
games map[uuid.UUID]GameRecord
memberships map[uuid.UUID]map[uuid.UUID]Membership // game_id -> membership_id -> Membership
rnd map[CanonicalKey]RaceNameEntry // canonical -> latest entry (most recent write wins)
ready atomic.Bool
}
// NewCache constructs an empty Cache.
func NewCache() *Cache {
return &Cache{
games: make(map[uuid.UUID]GameRecord),
memberships: make(map[uuid.UUID]map[uuid.UUID]Membership),
rnd: make(map[CanonicalKey]RaceNameEntry),
}
}
// Warm fills the cache from store. Must be called once at process boot
// before the HTTP listener accepts traffic. Subsequent calls re-warm.
func (c *Cache) Warm(ctx context.Context, store *Store) error {
if c == nil {
return nil
}
games, err := store.ListAllGames(ctx)
if err != nil {
return fmt.Errorf("lobby cache warm: games: %w", err)
}
memberships, err := store.ListAllMemberships(ctx)
if err != nil {
return fmt.Errorf("lobby cache warm: memberships: %w", err)
}
raceNames, err := store.ListAllRaceNames(ctx)
if err != nil {
return fmt.Errorf("lobby cache warm: race names: %w", err)
}
c.mu.Lock()
defer c.mu.Unlock()
c.games = make(map[uuid.UUID]GameRecord, len(games))
for _, g := range games {
if isCacheableStatus(g.Status) {
c.games[g.GameID] = g
}
}
c.memberships = make(map[uuid.UUID]map[uuid.UUID]Membership, len(c.games))
for _, m := range memberships {
if _, ok := c.games[m.GameID]; !ok {
continue
}
bucket := c.memberships[m.GameID]
if bucket == nil {
bucket = make(map[uuid.UUID]Membership)
c.memberships[m.GameID] = bucket
}
bucket[m.MembershipID] = m
}
c.rnd = make(map[CanonicalKey]RaceNameEntry, len(raceNames))
for _, r := range raceNames {
c.rnd[r.Canonical] = r
}
c.ready.Store(true)
return nil
}
// Ready reports whether Warm completed at least once.
func (c *Cache) Ready() bool {
if c == nil {
return false
}
return c.ready.Load()
}
// Sizes returns the cardinalities of the three subordinate projections.
// Useful for the startup log line and tests.
func (c *Cache) Sizes() (games int, memberships int, raceNames int) {
if c == nil {
return 0, 0, 0
}
c.mu.RLock()
defer c.mu.RUnlock()
for _, b := range c.memberships {
memberships += len(b)
}
return len(c.games), memberships, len(c.rnd)
}
// GetGame returns the cached game record together with a presence flag.
// Misses always return the zero record and false. Note that a finished
// or cancelled game is not in the cache; callers fall back to the store
// when isCacheableStatus(...)==false at write time.
func (c *Cache) GetGame(gameID uuid.UUID) (GameRecord, bool) {
if c == nil {
return GameRecord{}, false
}
c.mu.RLock()
defer c.mu.RUnlock()
g, ok := c.games[gameID]
return g, ok
}
// PutGame stores game in the cache when its status is cacheable;
// terminal statuses (finished, cancelled) cause the entry to be evicted.
func (c *Cache) PutGame(game GameRecord) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
if !isCacheableStatus(game.Status) {
delete(c.games, game.GameID)
delete(c.memberships, game.GameID)
return
}
c.games[game.GameID] = game
}
// RemoveGame evicts the game and any cached memberships under it.
func (c *Cache) RemoveGame(gameID uuid.UUID) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
delete(c.games, gameID)
delete(c.memberships, gameID)
}
// PutMembership stores or updates a membership row. Removes from cache
// when status is not active.
func (c *Cache) PutMembership(m Membership) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
bucket := c.memberships[m.GameID]
if m.Status != MembershipStatusActive {
if bucket != nil {
delete(bucket, m.MembershipID)
if len(bucket) == 0 {
delete(c.memberships, m.GameID)
}
}
return
}
if bucket == nil {
bucket = make(map[uuid.UUID]Membership)
c.memberships[m.GameID] = bucket
}
bucket[m.MembershipID] = m
}
// MembershipsForGame returns a copy of the active memberships for
// gameID. Empty when the game is not cached or has no active members.
func (c *Cache) MembershipsForGame(gameID uuid.UUID) []Membership {
if c == nil {
return nil
}
c.mu.RLock()
defer c.mu.RUnlock()
bucket := c.memberships[gameID]
if len(bucket) == 0 {
return nil
}
out := make([]Membership, 0, len(bucket))
for _, m := range bucket {
out = append(out, m)
}
return out
}
// PutRaceName stores or updates a race-name entry keyed by canonical.
// The cache is best-effort — it serves uniqueness fast-paths but Postgres
// is the authoritative reader on contention.
func (c *Cache) PutRaceName(entry RaceNameEntry) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
c.rnd[entry.Canonical] = entry
}
// RemoveRaceName evicts the entry at canonical.
func (c *Cache) RemoveRaceName(canonical CanonicalKey) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
delete(c.rnd, canonical)
}
// GetRaceName returns the cached entry plus a presence flag.
func (c *Cache) GetRaceName(canonical CanonicalKey) (RaceNameEntry, bool) {
if c == nil {
return RaceNameEntry{}, false
}
c.mu.RLock()
defer c.mu.RUnlock()
e, ok := c.rnd[canonical]
return e, ok
}
// EvictUserMemberships removes every cached membership belonging to
// userID. Used by `OnUserBlocked` / `OnUserDeleted` after the cascade
// commits so the cache reflects the new persisted state.
func (c *Cache) EvictUserMemberships(userID uuid.UUID) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
for gameID, bucket := range c.memberships {
for mid, m := range bucket {
if m.UserID == userID {
delete(bucket, mid)
}
}
if len(bucket) == 0 {
delete(c.memberships, gameID)
}
}
}
// EvictUserRaceNames removes every cached race-name owned by userID.
func (c *Cache) EvictUserRaceNames(userID uuid.UUID) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
for k, e := range c.rnd {
if e.OwnerUserID == userID {
delete(c.rnd, k)
}
}
}
// EvictOwnerGames evicts every cached game whose owner is userID. Used
// after the cascade cancels the user's owned games.
func (c *Cache) EvictOwnerGames(userID uuid.UUID) {
if c == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
for gameID, g := range c.games {
if g.OwnerUserID != nil && *g.OwnerUserID == userID {
delete(c.games, gameID)
delete(c.memberships, gameID)
}
}
}
// isCacheableStatus reports whether the cache should hold a game with
// the supplied status. Terminal statuses (finished, cancelled) are
// evicted; the in-memory cache only reflects active state.
func isCacheableStatus(status string) bool {
switch status {
case GameStatusFinished, GameStatusCancelled:
return false
default:
return true
}
}
+122
View File
@@ -0,0 +1,122 @@
package lobby
import (
"testing"
"time"
"github.com/google/uuid"
)
func TestCachePutGetRemoveGame(t *testing.T) {
c := NewCache()
g := GameRecord{
GameID: uuid.New(),
Status: GameStatusEnrollmentOpen,
GameName: "Test Game",
CreatedAt: time.Now(),
}
if _, ok := c.GetGame(g.GameID); ok {
t.Fatalf("GetGame on empty cache returned ok=true")
}
c.PutGame(g)
got, ok := c.GetGame(g.GameID)
if !ok || got.GameID != g.GameID {
t.Fatalf("GetGame after PutGame: ok=%v, got=%v", ok, got)
}
c.RemoveGame(g.GameID)
if _, ok := c.GetGame(g.GameID); ok {
t.Fatalf("GetGame after RemoveGame: ok=true")
}
}
func TestCachePutGameEvictsOnTerminalStatus(t *testing.T) {
c := NewCache()
g := GameRecord{
GameID: uuid.New(),
Status: GameStatusEnrollmentOpen,
GameName: "Test Game",
}
c.PutGame(g)
if _, ok := c.GetGame(g.GameID); !ok {
t.Fatalf("PutGame did not insert")
}
g.Status = GameStatusFinished
c.PutGame(g)
if _, ok := c.GetGame(g.GameID); ok {
t.Fatalf("PutGame with finished did not evict")
}
}
func TestCachePutMembershipEvictsOnNonActive(t *testing.T) {
c := NewCache()
gameID := uuid.New()
c.PutGame(GameRecord{GameID: gameID, Status: GameStatusEnrollmentOpen})
m := Membership{
MembershipID: uuid.New(),
GameID: gameID,
UserID: uuid.New(),
Status: MembershipStatusActive,
}
c.PutMembership(m)
if got := c.MembershipsForGame(gameID); len(got) != 1 {
t.Fatalf("MembershipsForGame after add = %d, want 1", len(got))
}
m.Status = MembershipStatusRemoved
c.PutMembership(m)
if got := c.MembershipsForGame(gameID); len(got) != 0 {
t.Fatalf("MembershipsForGame after remove = %d, want 0", len(got))
}
}
func TestCachePutRaceNameAndEvict(t *testing.T) {
c := NewCache()
owner := uuid.New()
entry := RaceNameEntry{
Name: "Andromeda",
Canonical: CanonicalKey("andromeda"),
Status: RaceNameStatusReservation,
OwnerUserID: owner,
GameID: uuid.New(),
}
c.PutRaceName(entry)
got, ok := c.GetRaceName(entry.Canonical)
if !ok || got.Canonical != entry.Canonical {
t.Fatalf("GetRaceName: ok=%v, got=%v", ok, got)
}
c.EvictUserRaceNames(owner)
if _, ok := c.GetRaceName(entry.Canonical); ok {
t.Fatalf("EvictUserRaceNames did not evict")
}
}
func TestCacheReadyDefaultsFalse(t *testing.T) {
c := NewCache()
if c.Ready() {
t.Fatalf("Ready() before Warm = true, want false")
}
}
func TestCacheSizesZero(t *testing.T) {
c := NewCache()
games, members, raceNames := c.Sizes()
if games != 0 || members != 0 || raceNames != 0 {
t.Fatalf("Sizes() on empty = (%d,%d,%d), want (0,0,0)", games, members, raceNames)
}
}
func TestCacheEvictOwnerGames(t *testing.T) {
c := NewCache()
owner := uuid.New()
otherOwner := uuid.New()
owned := GameRecord{GameID: uuid.New(), Status: GameStatusEnrollmentOpen, OwnerUserID: &owner}
other := GameRecord{GameID: uuid.New(), Status: GameStatusEnrollmentOpen, OwnerUserID: &otherOwner}
c.PutGame(owned)
c.PutGame(other)
c.EvictOwnerGames(owner)
if _, ok := c.GetGame(owned.GameID); ok {
t.Fatalf("EvictOwnerGames did not evict owned game")
}
if _, ok := c.GetGame(other.GameID); !ok {
t.Fatalf("EvictOwnerGames evicted unrelated game")
}
}
+81
View File
@@ -0,0 +1,81 @@
package lobby
import (
"context"
"errors"
"fmt"
"github.com/google/uuid"
"go.uber.org/zap"
)
// OnUserBlocked releases every lobby binding owned by the user under
// the `blocked` semantics: active memberships flip to `blocked`,
// pending applications get rejected, pending invites incoming get
// declined / outgoing get revoked, race-name entries are deleted, and
// owned games in non-running statuses are cancelled.
//
// Implements `internal/user.LobbyCascade.OnUserBlocked`. Errors during
// the cascade are joined and returned but never roll back the
// already-committed user write — the canonical state is the row in
// Postgres.
func (s *Service) OnUserBlocked(ctx context.Context, userID uuid.UUID) error {
return s.runCascade(ctx, userID, MembershipStatusBlocked)
}
// OnUserDeleted runs the same cascade as OnUserBlocked but transitions
// memberships to `removed` instead of `blocked`. Implements
// `internal/user.LobbyCascade.OnUserDeleted`.
func (s *Service) OnUserDeleted(ctx context.Context, userID uuid.UUID) error {
return s.runCascade(ctx, userID, MembershipStatusRemoved)
}
func (s *Service) runCascade(ctx context.Context, userID uuid.UUID, membershipStatus string) error {
snap, err := s.deps.Store.LoadCascadeSnapshot(ctx, userID)
if err != nil {
return fmt.Errorf("lobby cascade: load snapshot: %w", err)
}
if snap.empty() {
return nil
}
now := s.deps.Now().UTC()
if err := s.deps.Store.CascadeUser(ctx, userID, snap, membershipStatus, now); err != nil {
return fmt.Errorf("lobby cascade: write: %w", err)
}
s.deps.Cache.EvictUserMemberships(userID)
s.deps.Cache.EvictUserRaceNames(userID)
s.deps.Cache.EvictOwnerGames(userID)
var notifyErrs []error
for _, gameID := range snap.OwnedGameIDs {
s.deps.Cache.RemoveGame(gameID)
}
if len(snap.ActiveMembershipIDs) > 0 {
intent := LobbyNotification{
Kind: NotificationLobbyMembershipRemoved,
IdempotencyKey: "user-cascade-membership:" + userID.String(),
Recipients: []uuid.UUID{userID},
Payload: map[string]any{
"reason": membershipStatus,
},
}
if pubErr := s.deps.Notification.PublishLobbyEvent(ctx, intent); pubErr != nil {
notifyErrs = append(notifyErrs, pubErr)
}
}
if len(notifyErrs) > 0 {
s.deps.Logger.Warn("lobby cascade notification failures",
zap.String("user_id", userID.String()),
zap.Int("notify_errors", len(notifyErrs)))
}
return errors.Join(notifyErrs...)
}
func (snap CascadeUserSnapshot) empty() bool {
return len(snap.OwnedGameIDs) == 0 &&
len(snap.ActiveMembershipIDs) == 0 &&
len(snap.PendingApplications) == 0 &&
len(snap.IncomingInvites) == 0 &&
len(snap.OutgoingInvites) == 0 &&
len(snap.RaceNameKeys) == 0
}
+125
View File
@@ -0,0 +1,125 @@
package lobby
import (
"context"
"github.com/google/uuid"
"go.uber.org/zap"
)
// EntitlementProvider is the read-only view the lobby needs over the
// user-domain entitlement snapshot. The canonical implementation is
// `*user.Service` exposing `GetEntitlement(ctx, userID)`; tests substitute
// a fake.
//
// `MaxRegisteredRaceNames` is the only field consumed by when
// the caller attempts to register a `pending_registration` row the lobby
// counts already-`registered` rows for that user against this limit.
type EntitlementProvider interface {
GetMaxRegisteredRaceNames(ctx context.Context, userID uuid.UUID) (int32, error)
}
// RuntimeGateway is the outbound surface the lobby uses to ask the runtime
// module to start, pause, resume, or stop an engine container. The real
// implementation lives in `backend/internal/runtime` ; until
// then `NewNoopRuntimeGateway` ships a logger-only stub that pretends the
// request was accepted so the lobby state machine stays exercisable
// end-to-end.
type RuntimeGateway interface {
StartGame(ctx context.Context, gameID uuid.UUID) error
StopGame(ctx context.Context, gameID uuid.UUID) error
PauseGame(ctx context.Context, gameID uuid.UUID) error
ResumeGame(ctx context.Context, gameID uuid.UUID) error
}
// RuntimeJobResult is the inbound shape used by the runtime reconciler
// when a labelled container that lobby believes is alive has
// disappeared. The wiring connects `Service.OnRuntimeJobResult` against
// this type; the no-op consumer logs the event at debug level.
type RuntimeJobResult struct {
Op string
Status string
Message string
}
// NotificationPublisher is the outbound surface the lobby uses to fan out
// notification intents (invite received, application submitted, race name
// promoted, etc.). The real implementation lives in
// `backend/internal/notification` ; until then
// `NewNoopNotificationPublisher` ships a logger-only stub.
type NotificationPublisher interface {
PublishLobbyEvent(ctx context.Context, intent LobbyNotification) error
}
// LobbyNotification is the open shape carried by a notification intent.
// The implementation emits a small set of `Kind` values matching the catalog in
// `backend/README.md` §10. The `Payload` map is the kind-specific data
// blob; recipients are the user_ids the intent should reach.
//
// The struct lives in the lobby package on purpose: it is the producer
// vocabulary. The implementation will reuse it as the notification.Submit input
// (or wrap it in a domain-side type, if more channels show up).
type LobbyNotification struct {
Kind string
IdempotencyKey string
Recipients []uuid.UUID
Payload map[string]any
}
// NewNoopRuntimeGateway returns a RuntimeGateway that logs every call at
// debug level and returns nil. The lobby state machine treats the no-op
// as "request was accepted asynchronously" — the game stays in `starting`
// until the canonical implementation wires real `runtime` / `OnRuntimeSnapshot` interactions.
func NewNoopRuntimeGateway(logger *zap.Logger) RuntimeGateway {
if logger == nil {
logger = zap.NewNop()
}
return &noopRuntimeGateway{logger: logger.Named("lobby.runtime.noop")}
}
type noopRuntimeGateway struct {
logger *zap.Logger
}
func (g *noopRuntimeGateway) StartGame(_ context.Context, gameID uuid.UUID) error {
g.logger.Debug("noop start-game", zap.String("game_id", gameID.String()))
return nil
}
func (g *noopRuntimeGateway) StopGame(_ context.Context, gameID uuid.UUID) error {
g.logger.Debug("noop stop-game", zap.String("game_id", gameID.String()))
return nil
}
func (g *noopRuntimeGateway) PauseGame(_ context.Context, gameID uuid.UUID) error {
g.logger.Debug("noop pause-game", zap.String("game_id", gameID.String()))
return nil
}
func (g *noopRuntimeGateway) ResumeGame(_ context.Context, gameID uuid.UUID) error {
g.logger.Debug("noop resume-game", zap.String("game_id", gameID.String()))
return nil
}
// NewNoopNotificationPublisher returns a NotificationPublisher that logs
// every call at debug level and returns nil. The implementation will swap in a
// real publisher backed by `notification.Submit`.
func NewNoopNotificationPublisher(logger *zap.Logger) NotificationPublisher {
if logger == nil {
logger = zap.NewNop()
}
return &noopNotificationPublisher{logger: logger.Named("lobby.notify.noop")}
}
type noopNotificationPublisher struct {
logger *zap.Logger
}
func (p *noopNotificationPublisher) PublishLobbyEvent(_ context.Context, intent LobbyNotification) error {
p.logger.Debug("noop notification",
zap.String("kind", intent.Kind),
zap.String("idempotency_key", intent.IdempotencyKey),
zap.Int("recipients", len(intent.Recipients)),
)
return nil
}
+54
View File
@@ -0,0 +1,54 @@
package lobby
import "errors"
// Sentinel errors surface common rejection reasons across the lobby
// package. Handlers map them to HTTP envelopes through `respondLobbyError`
// in `internal/server/handlers_user_lobby_helpers.go`.
//
// Adding a new sentinel here is a deliberate API change: it appears in the
// handler error map and may surface as a new wire `code` value. Reuse the
// existing set when the behaviour overlaps.
var (
// ErrInvalidInput reports request-level validation failures (empty
// fields, malformed cron expressions, unknown enum values, race-name
// policy rejections). Maps to 400 invalid_request.
ErrInvalidInput = errors.New("lobby: invalid input")
// ErrNotFound reports that the requested record (game, application,
// invite, membership, race name) does not exist or is not visible to
// the caller. Maps to 404 not_found.
ErrNotFound = errors.New("lobby: not found")
// ErrForbidden reports that the caller is authenticated but not
// authorised for the requested action — most commonly "not the owner
// of this private game". Maps to 403 forbidden.
ErrForbidden = errors.New("lobby: forbidden")
// ErrConflict reports that the requested action conflicts with the
// current persisted state (illegal status transition, duplicate
// application, race-name canonical taken, invite already redeemed).
// Maps to 409 conflict.
ErrConflict = errors.New("lobby: conflict")
// ErrInvalidStatus reports a state-machine transition rejected by the
// game/application/invite/membership status. Treated as ErrConflict
// at the wire boundary; carried as a separate sentinel so transition
// callers can branch on it without parsing the wrapped message.
ErrInvalidStatus = errors.New("lobby: invalid status transition")
// ErrRaceNameTaken reports that a race-name canonical key is already
// claimed by a different user (registered, reserved, or
// pending_registration). Treated as ErrConflict at the wire boundary.
ErrRaceNameTaken = errors.New("lobby: race name is taken")
// ErrEntitlementExceeded reports that the caller already holds the
// maximum number of registered race names allowed by their tier.
// Treated as ErrConflict at the wire boundary.
ErrEntitlementExceeded = errors.New("lobby: entitlement quota exceeded")
// ErrPendingExpired reports that the pending_registration window
// passed before the user attempted to promote it to registered.
// Treated as ErrConflict at the wire boundary.
ErrPendingExpired = errors.New("lobby: pending registration expired")
)
+446
View File
@@ -0,0 +1,446 @@
package lobby
import (
"context"
"fmt"
"slices"
"strings"
"time"
"galaxy/cronutil"
"github.com/google/uuid"
)
// CreateGameInput is the parameter struct for Service.CreateGame.
type CreateGameInput struct {
OwnerUserID *uuid.UUID
Visibility string
GameName string
Description string
MinPlayers int32
MaxPlayers int32
StartGapHours int32
StartGapPlayers int32
EnrollmentEndsAt time.Time
TurnSchedule string
TargetEngineVersion string
}
// Validate normalises the request and rejects malformed values. It is
// called by Service.CreateGame before any Postgres write.
func (in *CreateGameInput) Validate(now time.Time) error {
in.GameName = strings.TrimSpace(in.GameName)
in.TurnSchedule = strings.TrimSpace(in.TurnSchedule)
in.TargetEngineVersion = strings.TrimSpace(in.TargetEngineVersion)
if in.GameName == "" {
return fmt.Errorf("%w: game_name must not be empty", ErrInvalidInput)
}
if in.Visibility != VisibilityPublic && in.Visibility != VisibilityPrivate {
return fmt.Errorf("%w: visibility must be 'public' or 'private'", ErrInvalidInput)
}
if in.Visibility == VisibilityPrivate && in.OwnerUserID == nil {
return fmt.Errorf("%w: private games require owner_user_id", ErrInvalidInput)
}
if in.Visibility == VisibilityPublic && in.OwnerUserID != nil {
return fmt.Errorf("%w: public games must not carry an owner_user_id", ErrInvalidInput)
}
if in.MinPlayers <= 0 || in.MaxPlayers <= 0 {
return fmt.Errorf("%w: min_players and max_players must be positive", ErrInvalidInput)
}
if in.MinPlayers > in.MaxPlayers {
return fmt.Errorf("%w: min_players must not exceed max_players", ErrInvalidInput)
}
if in.StartGapHours < 0 || in.StartGapPlayers < 0 {
return fmt.Errorf("%w: start_gap_hours and start_gap_players must be non-negative", ErrInvalidInput)
}
if in.EnrollmentEndsAt.Before(now) {
return fmt.Errorf("%w: enrollment_ends_at must be in the future", ErrInvalidInput)
}
if in.TurnSchedule == "" {
return fmt.Errorf("%w: turn_schedule must not be empty", ErrInvalidInput)
}
if _, err := cronutil.Parse(in.TurnSchedule); err != nil {
return fmt.Errorf("%w: turn_schedule must parse as a five-field cron expression: %v", ErrInvalidInput, err)
}
if in.TargetEngineVersion == "" {
return fmt.Errorf("%w: target_engine_version must not be empty", ErrInvalidInput)
}
return nil
}
// CreateGame persists a fresh `draft` game and returns it. The caller
// is responsible for setting OwnerUserID = nil (public games) or the
// authenticated user_id (private games).
func (s *Service) CreateGame(ctx context.Context, in CreateGameInput) (GameRecord, error) {
now := s.deps.Now().UTC()
if err := (&in).Validate(now); err != nil {
return GameRecord{}, err
}
rec, err := s.deps.Store.InsertGame(ctx, gameInsert{
GameID: uuid.New(),
OwnerUserID: in.OwnerUserID,
Visibility: in.Visibility,
GameName: in.GameName,
Description: in.Description,
MinPlayers: in.MinPlayers,
MaxPlayers: in.MaxPlayers,
StartGapHours: in.StartGapHours,
StartGapPlayers: in.StartGapPlayers,
EnrollmentEndsAt: in.EnrollmentEndsAt.UTC(),
TurnSchedule: in.TurnSchedule,
TargetEngineVersion: in.TargetEngineVersion,
})
if err != nil {
return GameRecord{}, err
}
s.deps.Cache.PutGame(rec)
return rec, nil
}
// UpdateGameInput is the parameter struct for Service.UpdateGame. Nil
// pointers leave the corresponding column alone.
type UpdateGameInput struct {
GameName *string
Description *string
EnrollmentEndsAt *time.Time
TurnSchedule *string
TargetEngineVersion *string
MinPlayers *int32
MaxPlayers *int32
StartGapHours *int32
StartGapPlayers *int32
}
// UpdateGame patches the supplied fields on a game. Only the owner of a
// private game (or admin via callerIsAdmin=true) can run this.
func (s *Service) UpdateGame(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID uuid.UUID, in UpdateGameInput) (GameRecord, error) {
game, err := s.GetGame(ctx, gameID)
if err != nil {
return GameRecord{}, err
}
if err := s.checkOwner(game, callerUserID, callerIsAdmin); err != nil {
return GameRecord{}, err
}
now := s.deps.Now().UTC()
patch := gameUpdate{
Description: in.Description,
MinPlayers: in.MinPlayers,
MaxPlayers: in.MaxPlayers,
StartGapHours: in.StartGapHours,
StartGapPlayers: in.StartGapPlayers,
}
if in.GameName != nil {
trimmed := strings.TrimSpace(*in.GameName)
if trimmed == "" {
return GameRecord{}, fmt.Errorf("%w: game_name must not be empty", ErrInvalidInput)
}
patch.GameName = &trimmed
}
if in.TurnSchedule != nil {
trimmed := strings.TrimSpace(*in.TurnSchedule)
if trimmed == "" {
return GameRecord{}, fmt.Errorf("%w: turn_schedule must not be empty", ErrInvalidInput)
}
if _, err := cronutil.Parse(trimmed); err != nil {
return GameRecord{}, fmt.Errorf("%w: turn_schedule must parse: %v", ErrInvalidInput, err)
}
patch.TurnSchedule = &trimmed
}
if in.TargetEngineVersion != nil {
trimmed := strings.TrimSpace(*in.TargetEngineVersion)
if trimmed == "" {
return GameRecord{}, fmt.Errorf("%w: target_engine_version must not be empty", ErrInvalidInput)
}
patch.TargetEngineVersion = &trimmed
}
if in.EnrollmentEndsAt != nil {
t := in.EnrollmentEndsAt.UTC()
patch.EnrollmentEndsAt = &t
}
if patch.MinPlayers != nil && patch.MaxPlayers != nil && *patch.MinPlayers > *patch.MaxPlayers {
return GameRecord{}, fmt.Errorf("%w: min_players must not exceed max_players", ErrInvalidInput)
}
if patch.MinPlayers != nil && patch.MaxPlayers == nil && *patch.MinPlayers > game.MaxPlayers {
return GameRecord{}, fmt.Errorf("%w: min_players must not exceed max_players", ErrInvalidInput)
}
if patch.MaxPlayers != nil && patch.MinPlayers == nil && *patch.MaxPlayers < game.MinPlayers {
return GameRecord{}, fmt.Errorf("%w: max_players must not be less than min_players", ErrInvalidInput)
}
updated, err := s.deps.Store.UpdateGame(ctx, gameID, patch, now)
if err != nil {
return GameRecord{}, err
}
s.deps.Cache.PutGame(updated)
_ = now
return updated, nil
}
// GetGame returns the game record for gameID. Cache-first; falls back
// to Postgres on miss.
func (s *Service) GetGame(ctx context.Context, gameID uuid.UUID) (GameRecord, error) {
if rec, ok := s.deps.Cache.GetGame(gameID); ok {
return rec, nil
}
rec, err := s.deps.Store.LoadGame(ctx, gameID)
if err != nil {
return GameRecord{}, err
}
s.deps.Cache.PutGame(rec)
return rec, nil
}
// ListPublicGames returns the requested page of public games.
type GamePage struct {
Items []GameRecord
Page int
PageSize int
Total int
}
func (s *Service) ListPublicGames(ctx context.Context, page, pageSize int) (GamePage, error) {
if page <= 0 {
page = 1
}
if pageSize <= 0 {
pageSize = 50
}
games, total, err := s.deps.Store.ListPublicGames(ctx, page, pageSize)
if err != nil {
return GamePage{}, err
}
return GamePage{Items: games, Page: page, PageSize: pageSize, Total: total}, nil
}
// ListAdminGames returns the requested page of every game (admin view).
func (s *Service) ListAdminGames(ctx context.Context, page, pageSize int) (GamePage, error) {
if page <= 0 {
page = 1
}
if pageSize <= 0 {
pageSize = 50
}
games, total, err := s.deps.Store.ListAdminGames(ctx, page, pageSize)
if err != nil {
return GamePage{}, err
}
return GamePage{Items: games, Page: page, PageSize: pageSize, Total: total}, nil
}
// ListMyGames returns the games where the caller has an active
// membership.
func (s *Service) ListMyGames(ctx context.Context, userID uuid.UUID) ([]GameRecord, error) {
return s.deps.Store.ListMyGames(ctx, userID)
}
// State-machine transition handlers below take the same shape: load the
// game (cache or store), check owner, validate the current status, run
// the transition write, refresh the cache, optionally tell the runtime
// gateway, and return the updated record.
// OpenEnrollment moves a `draft` game to `enrollment_open`.
func (s *Service) OpenEnrollment(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID uuid.UUID) (GameRecord, error) {
return s.transition(ctx, callerUserID, callerIsAdmin, gameID, transitionRule{
From: []string{GameStatusDraft},
To: GameStatusEnrollmentOpen,
Reason: "open enrollment",
Notification: nil,
})
}
// ReadyToStart moves an `enrollment_open` game to `ready_to_start`. The
// transition succeeds only when the game has at least `min_players`
// active memberships.
func (s *Service) ReadyToStart(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID uuid.UUID) (GameRecord, error) {
return s.transition(ctx, callerUserID, callerIsAdmin, gameID, transitionRule{
From: []string{GameStatusEnrollmentOpen},
To: GameStatusReadyToStart,
Reason: "ready to start",
Precondition: func(ctx context.Context, game GameRecord) error {
active, err := s.deps.Store.CountActiveMemberships(ctx, game.GameID)
if err != nil {
return err
}
if int32(active) < game.MinPlayers {
return fmt.Errorf("%w: approved_count (%d) must be >= min_players (%d)", ErrConflict, active, game.MinPlayers)
}
return nil
},
})
}
// Start kicks off the engine container; the lobby flips status to
// `starting` and asks RuntimeGateway. The implementation will transition the
// game to `running` via OnRuntimeSnapshot.
func (s *Service) Start(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID uuid.UUID) (GameRecord, error) {
return s.transition(ctx, callerUserID, callerIsAdmin, gameID, transitionRule{
From: []string{GameStatusReadyToStart},
To: GameStatusStarting,
Reason: "start",
PostCommit: func(ctx context.Context, game GameRecord) error {
if err := s.deps.Runtime.StartGame(ctx, game.GameID); err != nil {
return fmt.Errorf("runtime start: %w", err)
}
return nil
},
})
}
// Pause moves a `running` game to `paused`.
func (s *Service) Pause(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID uuid.UUID) (GameRecord, error) {
return s.transition(ctx, callerUserID, callerIsAdmin, gameID, transitionRule{
From: []string{GameStatusRunning},
To: GameStatusPaused,
Reason: "pause",
PostCommit: func(ctx context.Context, game GameRecord) error {
return s.deps.Runtime.PauseGame(ctx, game.GameID)
},
})
}
// Resume moves a `paused` game back to `running`.
func (s *Service) Resume(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID uuid.UUID) (GameRecord, error) {
return s.transition(ctx, callerUserID, callerIsAdmin, gameID, transitionRule{
From: []string{GameStatusPaused},
To: GameStatusRunning,
Reason: "resume",
PostCommit: func(ctx context.Context, game GameRecord) error {
return s.deps.Runtime.ResumeGame(ctx, game.GameID)
},
})
}
// Cancel moves any non-terminal game to `cancelled`. The runtime is
// asked to stop a running container if any.
func (s *Service) Cancel(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID uuid.UUID) (GameRecord, error) {
return s.transition(ctx, callerUserID, callerIsAdmin, gameID, transitionRule{
From: []string{
GameStatusDraft, GameStatusEnrollmentOpen, GameStatusReadyToStart,
GameStatusStarting, GameStatusStartFailed, GameStatusRunning, GameStatusPaused,
},
To: GameStatusCancelled,
Reason: "cancel",
PostCommit: func(ctx context.Context, game GameRecord) error {
switch game.Status {
case GameStatusRunning, GameStatusPaused, GameStatusStarting:
return s.deps.Runtime.StopGame(ctx, game.GameID)
}
return nil
},
})
}
// RetryStart moves a `start_failed` game back to `ready_to_start` so a
// subsequent /start call can re-attempt the runtime job.
func (s *Service) RetryStart(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID uuid.UUID) (GameRecord, error) {
return s.transition(ctx, callerUserID, callerIsAdmin, gameID, transitionRule{
From: []string{GameStatusStartFailed},
To: GameStatusReadyToStart,
Reason: "retry start",
})
}
// AdminForceStart moves any pre-running game to `starting`, bypassing
// the owner-only and min_players precondition checks.
func (s *Service) AdminForceStart(ctx context.Context, gameID uuid.UUID) (GameRecord, error) {
return s.transition(ctx, nil, true, gameID, transitionRule{
From: []string{
GameStatusDraft, GameStatusEnrollmentOpen, GameStatusReadyToStart,
GameStatusStartFailed,
},
To: GameStatusStarting,
Reason: "admin force-start",
PostCommit: func(ctx context.Context, game GameRecord) error {
return s.deps.Runtime.StartGame(ctx, game.GameID)
},
})
}
// AdminForceStop moves a running/paused game to `cancelled`.
func (s *Service) AdminForceStop(ctx context.Context, gameID uuid.UUID) (GameRecord, error) {
return s.transition(ctx, nil, true, gameID, transitionRule{
From: []string{GameStatusRunning, GameStatusPaused, GameStatusStarting},
To: GameStatusCancelled,
Reason: "admin force-stop",
PostCommit: func(ctx context.Context, game GameRecord) error {
return s.deps.Runtime.StopGame(ctx, game.GameID)
},
})
}
// transitionRule captures the inputs to Service.transition so the
// per-handler code stays declarative. From is the set of statuses the
// transition accepts; To is the target status. Precondition runs
// before the write (e.g., approved_count >= min_players); PostCommit
// runs after a successful write/cache update (e.g., RuntimeGateway).
// Errors from PostCommit are joined into the returned error so the
// caller can decide whether to surface them; the canonical state
// remains the post-commit row.
type transitionRule struct {
From []string
To string
Reason string
Precondition func(ctx context.Context, game GameRecord) error
PostCommit func(ctx context.Context, game GameRecord) error
Notification *LobbyNotification
}
func (s *Service) transition(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID uuid.UUID, rule transitionRule) (GameRecord, error) {
game, err := s.GetGame(ctx, gameID)
if err != nil {
return GameRecord{}, err
}
if err := s.checkOwner(game, callerUserID, callerIsAdmin); err != nil {
return GameRecord{}, err
}
if !slices.Contains(rule.From, game.Status) {
return GameRecord{}, fmt.Errorf("%w: cannot %s game in status %q", ErrConflict, rule.Reason, game.Status)
}
if rule.Precondition != nil {
if err := rule.Precondition(ctx, game); err != nil {
return GameRecord{}, err
}
}
now := s.deps.Now().UTC()
upd := statusUpdate{NewStatus: rule.To, UpdatedAt: now}
switch rule.To {
case GameStatusRunning:
if game.StartedAt == nil {
upd.SetStarted = true
upd.StartedAt = now
}
case GameStatusFinished:
upd.SetFinished = true
upd.FinishedAt = now
}
updated, err := s.deps.Store.UpdateGameStatus(ctx, gameID, upd)
if err != nil {
return GameRecord{}, err
}
s.deps.Cache.PutGame(updated)
if rule.PostCommit != nil {
if err := rule.PostCommit(ctx, updated); err != nil {
return updated, fmt.Errorf("post-commit %s: %w", rule.Reason, err)
}
}
return updated, nil
}
// checkOwner enforces ownership semantics:
//
// - callerIsAdmin == true → always allowed (admin force-start, etc.).
// - private games → callerUserID must equal game.OwnerUserID.
// - public games → callerIsAdmin is required.
func (s *Service) checkOwner(game GameRecord, callerUserID *uuid.UUID, callerIsAdmin bool) error {
if callerIsAdmin {
return nil
}
if game.Visibility == VisibilityPublic {
return fmt.Errorf("%w: public games require admin authority", ErrForbidden)
}
if callerUserID == nil || game.OwnerUserID == nil || *game.OwnerUserID != *callerUserID {
return fmt.Errorf("%w: caller is not the owner", ErrForbidden)
}
return nil
}
+243
View File
@@ -0,0 +1,243 @@
package lobby
import (
"context"
"fmt"
"strings"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
)
// IssueInviteInput is the parameter struct for Service.IssueInvite.
type IssueInviteInput struct {
GameID uuid.UUID
InviterUserID uuid.UUID
InvitedUserID *uuid.UUID
RaceName string
ExpiresAt *time.Time
}
// IssueInvite creates a new pending invite. When InvitedUserID is set
// the invite is user-bound; otherwise the service generates a hex code
// for code-based redemption. The game must be a private game owned by
// inviterUserID and in `enrollment_open` (or `draft`/`ready_to_start`).
func (s *Service) IssueInvite(ctx context.Context, in IssueInviteInput) (Invite, error) {
game, err := s.GetGame(ctx, in.GameID)
if err != nil {
return Invite{}, err
}
if game.Visibility != VisibilityPrivate {
return Invite{}, fmt.Errorf("%w: only private games accept invites", ErrConflict)
}
if err := s.checkOwner(game, &in.InviterUserID, false); err != nil {
return Invite{}, err
}
switch game.Status {
case GameStatusDraft, GameStatusEnrollmentOpen, GameStatusReadyToStart:
default:
return Invite{}, fmt.Errorf("%w: cannot issue invite while game is %q", ErrConflict, game.Status)
}
displayName := strings.TrimSpace(in.RaceName)
if displayName != "" {
validated, err := ValidateDisplayName(displayName)
if err != nil {
return Invite{}, err
}
displayName = validated
}
now := s.deps.Now().UTC()
expires := now.Add(s.deps.Config.InviteDefaultTTL)
if in.ExpiresAt != nil {
expires = in.ExpiresAt.UTC()
}
if !expires.After(now) {
return Invite{}, fmt.Errorf("%w: expires_at must be in the future", ErrInvalidInput)
}
var code string
if in.InvitedUserID == nil {
generated, err := generateInviteCode()
if err != nil {
return Invite{}, err
}
code = generated
}
invite, err := s.deps.Store.InsertInvite(ctx, inviteInsert{
InviteID: uuid.New(),
GameID: in.GameID,
InviterUserID: in.InviterUserID,
InvitedUserID: in.InvitedUserID,
Code: code,
RaceName: displayName,
ExpiresAt: expires,
})
if err != nil {
return Invite{}, err
}
if in.InvitedUserID != nil {
intent := LobbyNotification{
Kind: NotificationLobbyInviteReceived,
IdempotencyKey: "invite-received:" + invite.InviteID.String(),
Recipients: []uuid.UUID{*in.InvitedUserID},
Payload: map[string]any{
"game_id": game.GameID.String(),
"inviter_user_id": in.InviterUserID.String(),
},
}
if pubErr := s.deps.Notification.PublishLobbyEvent(ctx, intent); pubErr != nil {
s.deps.Logger.Warn("invite issued notification failed",
zap.String("invite_id", invite.InviteID.String()),
zap.Error(pubErr))
}
}
return invite, nil
}
// RedeemInvite turns a pending invite into a membership for redeemerUserID.
// User-bound invites require the recipient to match
// `invited_user_id`; code-based invites accept any caller.
func (s *Service) RedeemInvite(ctx context.Context, redeemerUserID uuid.UUID, gameID, inviteID uuid.UUID) (Invite, error) {
invite, err := s.deps.Store.LoadInvite(ctx, inviteID)
if err != nil {
return Invite{}, err
}
if invite.GameID != gameID {
return Invite{}, ErrNotFound
}
if invite.Status != InviteStatusPending {
return Invite{}, fmt.Errorf("%w: invite is %q", ErrConflict, invite.Status)
}
now := s.deps.Now().UTC()
if !invite.ExpiresAt.After(now) {
return Invite{}, fmt.Errorf("%w: invite expired at %s", ErrConflict, invite.ExpiresAt.UTC().Format(time.RFC3339))
}
if invite.InvitedUserID != nil && *invite.InvitedUserID != redeemerUserID {
return Invite{}, fmt.Errorf("%w: invite is bound to a different user", ErrForbidden)
}
game, err := s.GetGame(ctx, gameID)
if err != nil {
return Invite{}, err
}
switch game.Status {
case GameStatusDraft, GameStatusEnrollmentOpen, GameStatusReadyToStart:
default:
return Invite{}, fmt.Errorf("%w: cannot redeem invite while game is %q", ErrConflict, game.Status)
}
displayName := invite.RaceName
if displayName == "" {
return Invite{}, fmt.Errorf("%w: invite carries no race_name; ask issuer to re-issue", ErrInvalidInput)
}
canonical, err := s.deps.Policy.Canonical(displayName)
if err != nil {
return Invite{}, err
}
if err := s.assertRaceNameAvailable(ctx, canonical, redeemerUserID, gameID); err != nil {
return Invite{}, err
}
if _, err := s.deps.Store.InsertRaceName(ctx, raceNameInsert{
Name: displayName,
Canonical: canonical,
Status: RaceNameStatusReservation,
OwnerUserID: redeemerUserID,
GameID: gameID,
ReservedAt: &now,
}); err != nil {
return Invite{}, err
}
membership, err := s.deps.Store.InsertMembership(ctx, membershipInsert{
MembershipID: uuid.New(),
GameID: gameID,
UserID: redeemerUserID,
RaceName: displayName,
CanonicalKey: canonical,
})
if err != nil {
_ = s.deps.Store.DeleteRaceName(ctx, canonical, gameID)
return Invite{}, err
}
updated, err := s.deps.Store.UpdateInviteStatus(ctx, inviteID, InviteStatusRedeemed, now)
if err != nil {
return Invite{}, err
}
s.deps.Cache.PutMembership(membership)
s.deps.Cache.PutRaceName(RaceNameEntry{
Name: displayName,
Canonical: canonical,
Status: RaceNameStatusReservation,
OwnerUserID: redeemerUserID,
GameID: gameID,
ReservedAt: &now,
})
return updated, nil
}
// DeclineInvite transitions a pending recipient-bound invite to
// `declined`. Code-based invites cannot be declined (the code holder
// just never redeems them).
func (s *Service) DeclineInvite(ctx context.Context, callerUserID uuid.UUID, gameID, inviteID uuid.UUID) (Invite, error) {
invite, err := s.deps.Store.LoadInvite(ctx, inviteID)
if err != nil {
return Invite{}, err
}
if invite.GameID != gameID {
return Invite{}, ErrNotFound
}
if invite.InvitedUserID == nil {
return Invite{}, fmt.Errorf("%w: code-based invites cannot be declined", ErrConflict)
}
if *invite.InvitedUserID != callerUserID {
return Invite{}, fmt.Errorf("%w: caller is not the invite recipient", ErrForbidden)
}
if invite.Status != InviteStatusPending {
return Invite{}, fmt.Errorf("%w: invite is %q", ErrConflict, invite.Status)
}
now := s.deps.Now().UTC()
return s.deps.Store.UpdateInviteStatus(ctx, inviteID, InviteStatusDeclined, now)
}
// RevokeInvite transitions a pending invite to `revoked`. Only the
// inviter (or admin) may revoke.
func (s *Service) RevokeInvite(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID, inviteID uuid.UUID) (Invite, error) {
invite, err := s.deps.Store.LoadInvite(ctx, inviteID)
if err != nil {
return Invite{}, err
}
if invite.GameID != gameID {
return Invite{}, ErrNotFound
}
if !callerIsAdmin {
if callerUserID == nil || invite.InviterUserID != *callerUserID {
return Invite{}, fmt.Errorf("%w: caller is not the inviter", ErrForbidden)
}
}
if invite.Status != InviteStatusPending {
return Invite{}, fmt.Errorf("%w: invite is %q", ErrConflict, invite.Status)
}
now := s.deps.Now().UTC()
updated, err := s.deps.Store.UpdateInviteStatus(ctx, inviteID, InviteStatusRevoked, now)
if err != nil {
return Invite{}, err
}
if invite.InvitedUserID != nil {
intent := LobbyNotification{
Kind: NotificationLobbyInviteRevoked,
IdempotencyKey: "invite-revoked:" + inviteID.String(),
Recipients: []uuid.UUID{*invite.InvitedUserID},
Payload: map[string]any{
"game_id": gameID.String(),
},
}
if pubErr := s.deps.Notification.PublishLobbyEvent(ctx, intent); pubErr != nil {
s.deps.Logger.Warn("invite revoked notification failed",
zap.String("invite_id", inviteID.String()),
zap.Error(pubErr))
}
}
return updated, nil
}
// ListMyInvites returns every invite where userID is the recipient.
func (s *Service) ListMyInvites(ctx context.Context, userID uuid.UUID) ([]Invite, error) {
return s.deps.Store.ListMyInvites(ctx, userID)
}
+246
View File
@@ -0,0 +1,246 @@
// Package lobby owns the platform-side game lifecycle of the Galaxy
// `backend` service. It implements the substage 5.4 surface documented in
// `backend/PLAN.md` §5.4 and `backend/README.md`:
//
// - Games CRUD with the enrollment/start/finish state machine.
// - Applications, invites, and memberships with their lifecycles.
// - Race Name Directory: registered, reservation, pending_registration
// tiers with platform-wide canonical-key uniqueness.
// - User-blocked and user-deleted cascades wired into `internal/user`
// through the `LobbyCascade` interface.
// - Inbound runtime hooks (`OnRuntimeSnapshot`, `OnGameFinished`) called
// by `internal/runtime` once The implementation lands.
// - A periodic sweeper goroutine that releases expired
// `pending_registration` rows and auto-closes enrollment-expired
// games.
//
// Stages 5.5 / 5.7 inject the real RuntimeGateway and
// NotificationPublisher; until then `NewNoopRuntimeGateway` and
// `NewNoopNotificationPublisher` keep the package callable end-to-end.
package lobby
import (
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"time"
"galaxy/backend/internal/config"
"github.com/jackc/pgx/v5/pgconn"
"go.uber.org/zap"
)
// pgErrCodeUniqueViolation is the SQLSTATE value Postgres emits on a
// UNIQUE constraint violation. Duplicated from `internal/user` and
// `internal/admin` so the lobby package does not import either.
const pgErrCodeUniqueViolation = "23505"
// pgErrCodeCheckViolation is the SQLSTATE value Postgres emits when a
// CHECK constraint rejects a row. Used to map invalid status writes to
// ErrInvalidInput at the boundary.
const pgErrCodeCheckViolation = "23514"
// inviteCodeBytes is the half-byte length of a generated invite code.
// Each byte yields two hex characters, so the wire string is 16 chars.
const inviteCodeBytes = 8
// Visibility values stored verbatim in `games.visibility`.
const (
VisibilityPublic = "public"
VisibilityPrivate = "private"
)
// Game status vocabulary mirrors `games_status_chk` in
// `backend/internal/postgres/migrations/00001_init.sql`.
const (
GameStatusDraft = "draft"
GameStatusEnrollmentOpen = "enrollment_open"
GameStatusReadyToStart = "ready_to_start"
GameStatusStarting = "starting"
GameStatusStartFailed = "start_failed"
GameStatusRunning = "running"
GameStatusPaused = "paused"
GameStatusFinished = "finished"
GameStatusCancelled = "cancelled"
)
// Application status vocabulary mirrors `applications_status_chk`.
const (
ApplicationStatusPending = "pending"
ApplicationStatusApproved = "approved"
ApplicationStatusRejected = "rejected"
)
// Invite status vocabulary mirrors `invites_status_chk`.
const (
InviteStatusPending = "pending"
InviteStatusRedeemed = "redeemed"
InviteStatusDeclined = "declined"
InviteStatusRevoked = "revoked"
InviteStatusExpired = "expired"
)
// Membership status vocabulary mirrors `memberships_status_chk`.
const (
MembershipStatusActive = "active"
MembershipStatusRemoved = "removed"
MembershipStatusBlocked = "blocked"
)
// Race-name status vocabulary mirrors `race_names_status_chk`.
const (
RaceNameStatusRegistered = "registered"
RaceNameStatusReservation = "reservation"
RaceNameStatusPendingRegistration = "pending_registration"
)
// Notification kinds emitted by lobby. Mirrors
// `backend/README.md` §10, where the channel mapping is documented.
const (
NotificationLobbyInviteReceived = "lobby.invite.received"
NotificationLobbyInviteRevoked = "lobby.invite.revoked"
NotificationLobbyApplicationSubmitted = "lobby.application.submitted"
NotificationLobbyApplicationApproved = "lobby.application.approved"
NotificationLobbyApplicationRejected = "lobby.application.rejected"
NotificationLobbyMembershipRemoved = "lobby.membership.removed"
NotificationLobbyMembershipBlocked = "lobby.membership.blocked"
NotificationLobbyRaceNameRegistered = "lobby.race_name.registered"
NotificationLobbyRaceNamePending = "lobby.race_name.pending"
NotificationLobbyRaceNameExpired = "lobby.race_name.expired"
)
// Deps aggregates every collaborator the lobby Service depends on.
//
// Store and Cache are required. Logger and Now default to zap.NewNop /
// time.Now when nil. Runtime, Notification, Entitlement and Policy fall
// back to safe defaults (no-op publishers and a default-locale Policy)
// so unit tests can construct a Service with only Store + Cache populated.
type Deps struct {
Store *Store
Cache *Cache
Runtime RuntimeGateway
Notification NotificationPublisher
Entitlement EntitlementProvider
Policy *Policy
Config config.LobbyConfig
Logger *zap.Logger
Now func() time.Time
}
// Service is the lobby-domain entry point. Every public method is
// goroutine-safe; concurrency safety is delegated to Postgres for
// persisted state and to `*Cache` for the in-memory projection.
type Service struct {
deps Deps
}
// NewService constructs a Service from deps. Logger and Now are
// defaulted; Store and Cache must be non-nil — calling any method with
// a nil Store/Cache will panic at first use (matching how main.go
// signals missing wiring).
func NewService(deps Deps) (*Service, error) {
if deps.Logger == nil {
deps.Logger = zap.NewNop()
}
deps.Logger = deps.Logger.Named("lobby")
if deps.Now == nil {
deps.Now = time.Now
}
if deps.Runtime == nil {
deps.Runtime = NewNoopRuntimeGateway(deps.Logger)
}
if deps.Notification == nil {
deps.Notification = NewNoopNotificationPublisher(deps.Logger)
}
if deps.Policy == nil {
policy, err := NewPolicy()
if err != nil {
return nil, fmt.Errorf("lobby: build default race-name policy: %w", err)
}
deps.Policy = policy
}
if deps.Config.SweeperInterval <= 0 {
deps.Config.SweeperInterval = 60 * time.Second
}
if deps.Config.PendingRegistrationTTL <= 0 {
deps.Config.PendingRegistrationTTL = 30 * 24 * time.Hour
}
if deps.Config.InviteDefaultTTL <= 0 {
deps.Config.InviteDefaultTTL = 7 * 24 * time.Hour
}
return &Service{deps: deps}, nil
}
// Logger exposes the named logger used by the service. Mainly useful for
// tests asserting on log output.
func (s *Service) Logger() *zap.Logger {
if s == nil {
return zap.NewNop()
}
return s.deps.Logger
}
// Cache returns the in-memory projection. Used by main.go for the
// readiness probe and by tests.
func (s *Service) Cache() *Cache {
if s == nil {
return nil
}
return s.deps.Cache
}
// Config returns the lobby-side runtime configuration. Used by the
// sweeper to read the tick interval and by tests to assert the
// pending-registration TTL.
func (s *Service) Config() config.LobbyConfig {
if s == nil {
return config.LobbyConfig{}
}
return s.deps.Config
}
// generateInviteCode produces an `inviteCodeBytes`-byte hex code used
// for code-based invites. The function uses `crypto/rand`; a failure to
// read entropy is propagated to the caller.
func generateInviteCode() (string, error) {
buf := make([]byte, inviteCodeBytes)
if _, err := rand.Read(buf); err != nil {
return "", fmt.Errorf("lobby: generate invite code: %w", err)
}
return hex.EncodeToString(buf), nil
}
// isUniqueViolation reports whether err is a Postgres UNIQUE violation,
// optionally restricted to a specific constraint name. When
// constraintName is empty any UNIQUE violation matches.
func isUniqueViolation(err error, constraintName string) bool {
var pgErr *pgconn.PgError
if !errors.As(err, &pgErr) {
return false
}
if pgErr.Code != pgErrCodeUniqueViolation {
return false
}
if constraintName == "" {
return true
}
return pgErr.ConstraintName == constraintName
}
// isCheckViolation reports whether err is a Postgres CHECK constraint
// violation, optionally restricted to a specific constraint name.
func isCheckViolation(err error, constraintName string) bool {
var pgErr *pgconn.PgError
if !errors.As(err, &pgErr) {
return false
}
if pgErr.Code != pgErrCodeCheckViolation {
return false
}
if constraintName == "" {
return true
}
return pgErr.ConstraintName == constraintName
}
+374
View File
@@ -0,0 +1,374 @@
package lobby_test
import (
"context"
"database/sql"
"errors"
"net/url"
"testing"
"time"
"galaxy/backend/internal/config"
"galaxy/backend/internal/lobby"
backendpg "galaxy/backend/internal/postgres"
pgshared "galaxy/postgres"
"github.com/google/uuid"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
)
const (
testImage = "postgres:16-alpine"
testUser = "galaxy"
testPassword = "galaxy"
testDatabase = "galaxy_backend"
testSchema = "backend"
testStartup = 90 * time.Second
testOpTimeout = 10 * time.Second
)
func startPostgres(t *testing.T) *sql.DB {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
t.Cleanup(cancel)
pgContainer, err := tcpostgres.Run(ctx, testImage,
tcpostgres.WithDatabase(testDatabase),
tcpostgres.WithUsername(testUser),
tcpostgres.WithPassword(testPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(testStartup),
),
)
if err != nil {
t.Skipf("postgres testcontainer unavailable, skipping: %v", err)
}
t.Cleanup(func() {
if termErr := testcontainers.TerminateContainer(pgContainer); termErr != nil {
t.Errorf("terminate postgres container: %v", termErr)
}
})
baseDSN, err := pgContainer.ConnectionString(ctx, "sslmode=disable")
if err != nil {
t.Fatalf("connection string: %v", err)
}
scopedDSN, err := dsnWithSearchPath(baseDSN, testSchema)
if err != nil {
t.Fatalf("scope dsn: %v", err)
}
cfg := pgshared.DefaultConfig()
cfg.PrimaryDSN = scopedDSN
cfg.OperationTimeout = testOpTimeout
db, err := pgshared.OpenPrimary(ctx, cfg)
if err != nil {
t.Fatalf("open primary: %v", err)
}
t.Cleanup(func() {
if err := db.Close(); err != nil {
t.Errorf("close db: %v", err)
}
})
if err := pgshared.Ping(ctx, db, cfg.OperationTimeout); err != nil {
t.Fatalf("ping: %v", err)
}
if err := backendpg.ApplyMigrations(ctx, db); err != nil {
t.Fatalf("apply migrations: %v", err)
}
return db
}
func dsnWithSearchPath(baseDSN, schema string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := parsed.Query()
values.Set("search_path", schema)
if values.Get("sslmode") == "" {
values.Set("sslmode", "disable")
}
parsed.RawQuery = values.Encode()
return parsed.String(), nil
}
type stubEntitlement struct {
max int32
}
func (s stubEntitlement) GetMaxRegisteredRaceNames(_ context.Context, _ uuid.UUID) (int32, error) {
return s.max, nil
}
func newServiceForTest(t *testing.T, db *sql.DB, now func() time.Time, max int32) *lobby.Service {
t.Helper()
store := lobby.NewStore(db)
cache := lobby.NewCache()
if err := cache.Warm(context.Background(), store); err != nil {
t.Fatalf("warm cache: %v", err)
}
svc, err := lobby.NewService(lobby.Deps{
Store: store,
Cache: cache,
Entitlement: stubEntitlement{max: max},
Config: config.LobbyConfig{
SweeperInterval: time.Second,
PendingRegistrationTTL: time.Hour,
InviteDefaultTTL: time.Hour,
},
Now: now,
})
if err != nil {
t.Fatalf("new service: %v", err)
}
return svc
}
// seedAccount inserts a minimal accounts row so games / memberships
// referencing user_id can be created without violating any FK.
func seedAccount(t *testing.T, db *sql.DB, userID uuid.UUID) {
t.Helper()
_, err := db.ExecContext(context.Background(), `
INSERT INTO backend.accounts (
user_id, email, user_name, preferred_language, time_zone
) VALUES ($1, $2, $3, 'en', 'UTC')
`, userID, userID.String()+"@test.local", "user-"+userID.String()[:8])
if err != nil {
t.Fatalf("seed account %s: %v", userID, err)
}
}
func TestEndToEndPrivateGameFlow(t *testing.T) {
db := startPostgres(t)
now := time.Now().UTC()
clock := func() time.Time { return now }
svc := newServiceForTest(t, db, clock, 5)
owner := uuid.New()
seedAccount(t, db, owner)
game, err := svc.CreateGame(context.Background(), lobby.CreateGameInput{
OwnerUserID: &owner,
Visibility: lobby.VisibilityPrivate,
GameName: "End-to-End Game",
MinPlayers: 1,
MaxPlayers: 4,
StartGapHours: 1,
StartGapPlayers: 1,
EnrollmentEndsAt: now.Add(time.Hour),
TurnSchedule: "0 0 * * *",
TargetEngineVersion: "1.0.0",
})
if err != nil {
t.Fatalf("create game: %v", err)
}
if game.Status != lobby.GameStatusDraft {
t.Fatalf("create game status = %q, want draft", game.Status)
}
if got, ok := svc.Cache().GetGame(game.GameID); !ok || got.GameID != game.GameID {
t.Fatalf("game not cached after create")
}
if _, err := svc.OpenEnrollment(context.Background(), &owner, false, game.GameID); err != nil {
t.Fatalf("open enrollment: %v", err)
}
// Approve a member to clear min_players.
applicant := uuid.New()
seedAccount(t, db, applicant)
game = mustGet(t, svc, game.GameID)
// public-only handler does not run on private games; bypass via direct
// membership insert through the store to focus on state-machine.
store := lobby.NewStore(db)
canonicalPolicy, err := lobby.NewPolicy()
if err != nil {
t.Fatalf("new policy: %v", err)
}
canonical, err := canonicalPolicy.Canonical("PrivateRace")
if err != nil {
t.Fatalf("canonical: %v", err)
}
if _, err := db.ExecContext(context.Background(), `
INSERT INTO backend.memberships (
membership_id, game_id, user_id, race_name, canonical_key, status
) VALUES ($1, $2, $3, $4, $5, 'active')
`, uuid.New(), game.GameID, applicant, "PrivateRace", string(canonical)); err != nil {
t.Fatalf("seed membership: %v", err)
}
// Re-warm cache so the new membership flows through MembershipsForGame.
if err := svc.Cache().Warm(context.Background(), store); err != nil {
t.Fatalf("re-warm cache: %v", err)
}
if _, err := svc.ReadyToStart(context.Background(), &owner, false, game.GameID); err != nil {
t.Fatalf("ready-to-start: %v", err)
}
if _, err := svc.Start(context.Background(), &owner, false, game.GameID); err != nil {
t.Fatalf("start: %v", err)
}
game = mustGet(t, svc, game.GameID)
if game.Status != lobby.GameStatusStarting {
t.Fatalf("after start status = %q, want starting", game.Status)
}
// Simulate runtime → running.
if err := svc.OnRuntimeSnapshot(context.Background(), game.GameID, lobby.RuntimeSnapshot{
CurrentTurn: 1,
RuntimeStatus: "running",
}); err != nil {
t.Fatalf("on-runtime-snapshot running: %v", err)
}
game = mustGet(t, svc, game.GameID)
if game.Status != lobby.GameStatusRunning {
t.Fatalf("after runtime snapshot status = %q, want running", game.Status)
}
if _, err := svc.Pause(context.Background(), &owner, false, game.GameID); err != nil {
t.Fatalf("pause: %v", err)
}
if _, err := svc.Resume(context.Background(), &owner, false, game.GameID); err != nil {
t.Fatalf("resume: %v", err)
}
if _, err := svc.Cancel(context.Background(), &owner, false, game.GameID); err != nil {
t.Fatalf("cancel: %v", err)
}
game, err = svc.GetGame(context.Background(), game.GameID)
if err != nil {
t.Fatalf("get cancelled: %v", err)
}
if game.Status != lobby.GameStatusCancelled {
t.Fatalf("after cancel status = %q, want cancelled", game.Status)
}
}
func TestEndToEndPublicGameApplicationApproval(t *testing.T) {
db := startPostgres(t)
now := time.Now().UTC()
clock := func() time.Time { return now }
svc := newServiceForTest(t, db, clock, 5)
game, err := svc.CreateGame(context.Background(), lobby.CreateGameInput{
OwnerUserID: nil,
Visibility: lobby.VisibilityPublic,
GameName: "Public Game",
MinPlayers: 1,
MaxPlayers: 8,
StartGapHours: 1,
StartGapPlayers: 1,
EnrollmentEndsAt: now.Add(time.Hour),
TurnSchedule: "0 0 * * *",
TargetEngineVersion: "1.0.0",
})
if err != nil {
t.Fatalf("create public game: %v", err)
}
// Move to enrollment_open via admin force-start path is wrong; use
// transition via admin OpenEnrollment by passing callerIsAdmin=true.
if _, err := svc.OpenEnrollment(context.Background(), nil, true, game.GameID); err != nil {
t.Fatalf("open enrollment (admin): %v", err)
}
applicant := uuid.New()
seedAccount(t, db, applicant)
app, err := svc.SubmitApplication(context.Background(), lobby.SubmitApplicationInput{
GameID: game.GameID,
ApplicantUserID: applicant,
RaceName: "AlphaCentauri",
})
if err != nil {
t.Fatalf("submit application: %v", err)
}
if app.Status != lobby.ApplicationStatusPending {
t.Fatalf("application status = %q, want pending", app.Status)
}
approved, err := svc.ApproveApplication(context.Background(), nil, true, game.GameID, app.ApplicationID)
if err != nil {
t.Fatalf("approve application: %v", err)
}
if approved.Status != lobby.ApplicationStatusApproved {
t.Fatalf("approved status = %q, want approved", approved.Status)
}
memberships, err := svc.ListMembershipsForGame(context.Background(), game.GameID)
if err != nil {
t.Fatalf("list memberships: %v", err)
}
if len(memberships) != 1 || memberships[0].UserID != applicant {
t.Fatalf("memberships = %+v, want one for %s", memberships, applicant)
}
// Re-applying the same race name from a different user must conflict.
other := uuid.New()
seedAccount(t, db, other)
_, err = svc.SubmitApplication(context.Background(), lobby.SubmitApplicationInput{
GameID: game.GameID,
ApplicantUserID: other,
RaceName: "AlphaCentauri",
})
if err != nil {
t.Fatalf("second application setup: %v", err)
}
if _, err := svc.ApproveApplication(context.Background(), nil, true, game.GameID, secondApplication(t, db, game.GameID, other)); err == nil {
t.Fatal("approving second application with same race name should conflict")
} else if !errors.Is(err, lobby.ErrRaceNameTaken) {
t.Fatalf("approve second application: err = %v, want ErrRaceNameTaken", err)
}
}
func TestSweeperReleasesExpiredPendingRegistrations(t *testing.T) {
db := startPostgres(t)
now := time.Now().UTC()
clock := func() time.Time { return now }
svc := newServiceForTest(t, db, clock, 5)
user := uuid.New()
seedAccount(t, db, user)
gameID := uuid.New()
expired := now.Add(-time.Hour)
if _, err := db.ExecContext(context.Background(), `
INSERT INTO backend.race_names (
name, canonical, status, owner_user_id, game_id, expires_at
) VALUES ('Vega', 'vega', 'pending_registration', $1, $2, $3)
`, user, gameID, expired); err != nil {
t.Fatalf("seed pending row: %v", err)
}
sweeper := lobby.NewSweeper(svc)
if err := sweeper.Tick(context.Background()); err != nil {
t.Fatalf("sweeper tick: %v", err)
}
rows, err := lobby.NewStore(db).FindRaceNameByCanonical(context.Background(), "vega")
if err != nil {
t.Fatalf("find canonical after sweep: %v", err)
}
if len(rows) != 0 {
t.Fatalf("expected pending row to be released, got %d rows", len(rows))
}
}
func mustGet(t *testing.T, svc *lobby.Service, gameID uuid.UUID) lobby.GameRecord {
t.Helper()
g, err := svc.GetGame(context.Background(), gameID)
if err != nil {
t.Fatalf("get game %s: %v", gameID, err)
}
return g
}
// secondApplication looks up the second application id (the one
// submitted by `userID`) on `gameID`. The test seeds two applications
// in `TestEndToEndPublicGameApplicationApproval` and uses this helper
// to fetch the not-yet-decided one without coupling the test to insert
// order.
func secondApplication(t *testing.T, db *sql.DB, gameID, userID uuid.UUID) uuid.UUID {
t.Helper()
var id uuid.UUID
if err := db.QueryRowContext(context.Background(), `
SELECT application_id FROM backend.applications
WHERE game_id = $1 AND applicant_user_id = $2
`, gameID, userID).Scan(&id); err != nil {
t.Fatalf("lookup second application: %v", err)
}
return id
}
+160
View File
@@ -0,0 +1,160 @@
package lobby
import (
"context"
"fmt"
"github.com/google/uuid"
"go.uber.org/zap"
)
// ListMembershipsForGame returns every membership row for gameID
// ordered by joined_at ASC. Reads always go to the store (the cache
// holds only active rows and would skip removed/blocked entries).
func (s *Service) ListMembershipsForGame(ctx context.Context, gameID uuid.UUID) ([]Membership, error) {
if _, err := s.GetGame(ctx, gameID); err != nil {
return nil, err
}
return s.deps.Store.ListMembershipsForGame(ctx, gameID)
}
// RemoveMembership transitions an active membership to `removed`. The
// caller must be the membership's user (self-leave) or the owner of
// the game (owner removal). Removing a membership releases its race
// name reservation in the same flow.
func (s *Service) RemoveMembership(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID, membershipID uuid.UUID) (Membership, error) {
return s.changeMembershipStatus(ctx, callerUserID, callerIsAdmin, gameID, membershipID, MembershipStatusRemoved, NotificationLobbyMembershipRemoved, true)
}
// BlockMembership transitions an active membership to `blocked`. Only
// the owner of the game (or admin) may block.
func (s *Service) BlockMembership(ctx context.Context, callerUserID *uuid.UUID, callerIsAdmin bool, gameID, membershipID uuid.UUID) (Membership, error) {
return s.changeMembershipStatus(ctx, callerUserID, callerIsAdmin, gameID, membershipID, MembershipStatusBlocked, NotificationLobbyMembershipBlocked, false)
}
// AdminBanMember is the admin-only variant of BlockMembership: targets
// a user_id directly (the request body carries it instead of a
// membership_id) and emits the same intent as BlockMembership.
func (s *Service) AdminBanMember(ctx context.Context, gameID, userID uuid.UUID, reason string) (Membership, error) {
game, err := s.GetGame(ctx, gameID)
if err != nil {
return Membership{}, err
}
memberships, err := s.deps.Store.ListMembershipsForGame(ctx, gameID)
if err != nil {
return Membership{}, err
}
var target Membership
found := false
for _, m := range memberships {
if m.UserID == userID && m.Status == MembershipStatusActive {
target = m
found = true
break
}
}
if !found {
return Membership{}, ErrNotFound
}
now := s.deps.Now().UTC()
updated, err := s.deps.Store.UpdateMembershipStatus(ctx, target.MembershipID, MembershipStatusBlocked, now)
if err != nil {
return Membership{}, err
}
s.deps.Cache.PutMembership(updated)
intent := LobbyNotification{
Kind: NotificationLobbyMembershipBlocked,
IdempotencyKey: "membership-blocked:" + updated.MembershipID.String(),
Recipients: []uuid.UUID{userID},
Payload: map[string]any{
"game_id": gameID.String(),
"reason": reason,
},
}
if pubErr := s.deps.Notification.PublishLobbyEvent(ctx, intent); pubErr != nil {
s.deps.Logger.Warn("admin ban notification failed",
zap.String("membership_id", updated.MembershipID.String()),
zap.Error(pubErr))
}
_ = game
return updated, nil
}
// changeMembershipStatus is the shared implementation for Remove /
// Block. allowSelf controls whether the caller's own membership_id is
// an authorised target (true for Remove → "leave the game"; false for
// Block → owner-only).
func (s *Service) changeMembershipStatus(
ctx context.Context,
callerUserID *uuid.UUID,
callerIsAdmin bool,
gameID, membershipID uuid.UUID,
newStatus, notificationKind string,
allowSelf bool,
) (Membership, error) {
membership, err := s.deps.Store.LoadMembership(ctx, membershipID)
if err != nil {
return Membership{}, err
}
if membership.GameID != gameID {
return Membership{}, ErrNotFound
}
if membership.Status != MembershipStatusActive {
return Membership{}, fmt.Errorf("%w: membership is %q", ErrConflict, membership.Status)
}
game, err := s.GetGame(ctx, gameID)
if err != nil {
return Membership{}, err
}
if !callerIsAdmin {
if !s.canManageMembership(game, membership, callerUserID, allowSelf) {
return Membership{}, fmt.Errorf("%w: caller is not authorised to manage this membership", ErrForbidden)
}
}
now := s.deps.Now().UTC()
updated, err := s.deps.Store.UpdateMembershipStatus(ctx, membershipID, newStatus, now)
if err != nil {
return Membership{}, err
}
s.deps.Cache.PutMembership(updated)
if newStatus != MembershipStatusActive {
// Release the race-name reservation tied to this game.
if err := s.deps.Store.DeleteRaceName(ctx, CanonicalKey(membership.CanonicalKey), gameID); err != nil {
s.deps.Logger.Warn("release race name on membership change failed",
zap.String("membership_id", membershipID.String()),
zap.String("canonical_key", membership.CanonicalKey),
zap.Error(err))
} else {
s.deps.Cache.RemoveRaceName(CanonicalKey(membership.CanonicalKey))
}
}
intent := LobbyNotification{
Kind: notificationKind,
IdempotencyKey: notificationKind + ":" + updated.MembershipID.String(),
Recipients: []uuid.UUID{updated.UserID},
Payload: map[string]any{
"game_id": gameID.String(),
},
}
if pubErr := s.deps.Notification.PublishLobbyEvent(ctx, intent); pubErr != nil {
s.deps.Logger.Warn("membership notification failed",
zap.String("membership_id", updated.MembershipID.String()),
zap.String("kind", notificationKind),
zap.Error(pubErr))
}
return updated, nil
}
func (s *Service) canManageMembership(game GameRecord, membership Membership, callerUserID *uuid.UUID, allowSelf bool) bool {
if game.Visibility == VisibilityPublic {
// Public-game membership management is admin-only.
return false
}
if game.OwnerUserID != nil && callerUserID != nil && *game.OwnerUserID == *callerUserID {
return true
}
if allowSelf && callerUserID != nil && membership.UserID == *callerUserID {
return true
}
return false
}
+139
View File
@@ -0,0 +1,139 @@
package lobby
import (
"fmt"
"strings"
"unicode"
"unicode/utf8"
confusables "github.com/disciplinedware/go-confusables"
"golang.org/x/text/cases"
"golang.org/x/text/language"
)
// raceNameMaxRuneLen bounds the display length of a race-name. Must
// match the documented user-facing limit; the value is mirrored as an
// `if len(...)` check rather than enforced at the storage boundary so
// migrations stay simple.
const raceNameMaxRuneLen = 32
// CanonicalKey is the platform-wide race-name uniqueness key produced by
// `Policy.Canonical`. Two display names that yield the same CanonicalKey
// are considered the "same" race name for ownership purposes regardless
// of casing or visually-confusable substitutions.
type CanonicalKey string
// String returns the canonical key as its underlying string.
func (k CanonicalKey) String() string { return string(k) }
// IsZero reports whether the key carries no usable value.
func (k CanonicalKey) IsZero() bool { return strings.TrimSpace(string(k)) == "" }
// confusableSkeletoner is satisfied by the default
// `disciplinedware/go-confusables` runtime; tests substitute a
// deterministic stub via `WithSkeletoner`.
type confusableSkeletoner interface {
Skeleton(string) string
}
// Policy holds the canonicalisation pipeline used by the Race Name
// Directory. The pipeline is `case-fold → anti-fraud digit-letter
// replace → confusable skeleton`. Each step is idempotent.
type Policy struct {
caseFolder cases.Caser
skeletoner confusableSkeletoner
}
// antiFraudReplacer collapses the documented ASCII digit-to-letter
// pairs so `P1lot` and `Pilot` canonicalise to the same key. The set
// is intentionally small — adding entries broadens the equivalence
// classes platform-wide and is a deliberate policy decision.
var antiFraudReplacer = strings.NewReplacer(
"1", "i",
"0", "o",
"8", "b",
)
// NewPolicy returns the default race-name canonicalisation policy.
// Returns an error when the `disciplinedware/go-confusables` default
// skeletoner cannot be obtained — should never happen in practice but
// the constructor surfaces it explicitly so tests can assert on
// failure.
func NewPolicy() (*Policy, error) {
p := &Policy{
caseFolder: cases.Fold(cases.Compact),
skeletoner: confusables.Default(),
}
if p.skeletoner == nil {
return nil, fmt.Errorf("lobby: build race-name policy: confusables.Default() returned nil")
}
return p, nil
}
// WithSkeletoner overrides the underlying TR39 confusable skeletoner.
// Tests use this to substitute a deterministic stub; production wiring
// uses the default obtained from `NewPolicy`.
func (p *Policy) WithSkeletoner(s confusableSkeletoner) *Policy {
if p == nil {
return nil
}
if s == nil {
return p
}
out := *p
out.skeletoner = s
return &out
}
// Canonical returns the canonical key for raceName. The function trims
// surrounding whitespace, applies Unicode case-folding, runs the
// anti-fraud replacer, and then computes the TR39 confusable skeleton.
// Returns ErrInvalidInput when raceName is empty after trimming or the
// resulting key is empty.
//
// `language.Und` is passed to the case folder because case-folding for
// race names is intentionally locale-independent — two players from
// different locales must agree on which names collide.
func (p *Policy) Canonical(raceName string) (CanonicalKey, error) {
if p == nil || p.skeletoner == nil {
return "", fmt.Errorf("%w: lobby policy not initialised", ErrInvalidInput)
}
trimmed := strings.TrimSpace(raceName)
if trimmed == "" {
return "", fmt.Errorf("%w: race name must not be empty", ErrInvalidInput)
}
if utf8.RuneCountInString(trimmed) > raceNameMaxRuneLen {
return "", fmt.Errorf("%w: race name exceeds %d characters", ErrInvalidInput, raceNameMaxRuneLen)
}
folded := p.caseFolder.String(trimmed)
mapped := antiFraudReplacer.Replace(folded)
skeleton := p.skeletoner.Skeleton(mapped)
if strings.TrimSpace(skeleton) == "" {
return "", fmt.Errorf("%w: race name canonical key is empty", ErrInvalidInput)
}
return CanonicalKey(skeleton), nil
}
// ValidateDisplayName enforces the structural invariants on the
// caller-supplied display form: non-empty, ≤ raceNameMaxRuneLen runes,
// no control characters. Returns the trimmed form on success.
func ValidateDisplayName(raceName string) (string, error) {
trimmed := strings.TrimSpace(raceName)
if trimmed == "" {
return "", fmt.Errorf("%w: race name must not be empty", ErrInvalidInput)
}
if utf8.RuneCountInString(trimmed) > raceNameMaxRuneLen {
return "", fmt.Errorf("%w: race name exceeds %d characters", ErrInvalidInput, raceNameMaxRuneLen)
}
for _, r := range trimmed {
if unicode.IsControl(r) {
return "", fmt.Errorf("%w: race name must not contain control characters", ErrInvalidInput)
}
}
return trimmed, nil
}
// languageForFolder is the static language tag passed to cases.Fold; it
// remains untyped at construction time and is resolved lazily inside
// `cases.Fold(...)`. Kept here so tests can reference it explicitly.
var languageForFolder = language.Und
+98
View File
@@ -0,0 +1,98 @@
package lobby
import (
"errors"
"strings"
"testing"
)
func TestPolicyCanonicalCaseFold(t *testing.T) {
policy := mustPolicy(t)
cases := []string{
"Andromeda",
"andromeda",
"ANDROMEDA",
" Andromeda ",
}
want, err := policy.Canonical(cases[0])
if err != nil {
t.Fatalf("baseline canonical: %v", err)
}
for _, c := range cases[1:] {
got, err := policy.Canonical(c)
if err != nil {
t.Fatalf("canonical %q: %v", c, err)
}
if got != want {
t.Errorf("canonical %q = %q, want %q", c, got, want)
}
}
}
func TestPolicyCanonicalAntiFraud(t *testing.T) {
policy := mustPolicy(t)
want, err := policy.Canonical("pilot")
if err != nil {
t.Fatalf("baseline canonical: %v", err)
}
for _, c := range []string{"P1lot", "p1lot", "p1L0T", "P1L0t"} {
got, err := policy.Canonical(c)
if err != nil {
t.Fatalf("canonical %q: %v", c, err)
}
if got != want {
t.Errorf("canonical %q = %q, want %q", c, got, want)
}
}
}
func TestPolicyCanonicalRejectsEmpty(t *testing.T) {
policy := mustPolicy(t)
_, err := policy.Canonical(" ")
if !errors.Is(err, ErrInvalidInput) {
t.Fatalf("canonical empty: err = %v, want ErrInvalidInput", err)
}
}
func TestPolicyCanonicalRejectsTooLong(t *testing.T) {
policy := mustPolicy(t)
long := strings.Repeat("a", 50)
_, err := policy.Canonical(long)
if !errors.Is(err, ErrInvalidInput) {
t.Fatalf("canonical too long: err = %v, want ErrInvalidInput", err)
}
}
func TestValidateDisplayNameRejectsControlChars(t *testing.T) {
if _, err := ValidateDisplayName("bad\x00name"); !errors.Is(err, ErrInvalidInput) {
t.Fatalf("ValidateDisplayName control: err = %v, want ErrInvalidInput", err)
}
if _, err := ValidateDisplayName("good name"); err != nil {
t.Fatalf("ValidateDisplayName valid: err = %v", err)
}
}
func TestPolicyWithSkeletonerOverrides(t *testing.T) {
stub := stubSkeletoner(func(s string) string { return "fixed" })
policy := mustPolicy(t).WithSkeletoner(stub)
got, err := policy.Canonical("Andromeda")
if err != nil {
t.Fatalf("canonical with stub: %v", err)
}
if string(got) != "fixed" {
t.Errorf("canonical with stub = %q, want %q", got, "fixed")
}
}
func mustPolicy(t *testing.T) *Policy {
t.Helper()
p, err := NewPolicy()
if err != nil {
t.Fatalf("NewPolicy: %v", err)
}
return p
}
type stubSkeletoner func(string) string
func (s stubSkeletoner) Skeleton(in string) string { return s(in) }
@@ -0,0 +1,101 @@
package lobby
import (
"context"
"errors"
"fmt"
"github.com/google/uuid"
"go.uber.org/zap"
)
// RegisterRaceName promotes a `pending_registration` row owned by
// userID into a `registered` row. The promotion succeeds when:
//
// - the user has a `pending_registration` row matching the supplied
// display name (canonical key);
// - the row is still inside its 30-day window (expires_at > now);
// - the user owns fewer than `entitlement.max_registered_race_names`
// `registered` rows.
func (s *Service) RegisterRaceName(ctx context.Context, userID uuid.UUID, displayName string) (RaceNameEntry, error) {
displayName, err := ValidateDisplayName(displayName)
if err != nil {
return RaceNameEntry{}, err
}
canonical, err := s.deps.Policy.Canonical(displayName)
if err != nil {
return RaceNameEntry{}, err
}
rows, err := s.deps.Store.FindRaceNameByCanonical(ctx, canonical)
if err != nil {
return RaceNameEntry{}, err
}
var pending *RaceNameEntry
for i := range rows {
row := rows[i]
if row.OwnerUserID != userID {
if row.Status == RaceNameStatusRegistered ||
row.Status == RaceNameStatusReservation ||
row.Status == RaceNameStatusPendingRegistration {
return RaceNameEntry{}, fmt.Errorf("%w: race name held by another user", ErrRaceNameTaken)
}
continue
}
if row.Status == RaceNameStatusRegistered {
return RaceNameEntry{}, fmt.Errorf("%w: race name already registered by caller", ErrConflict)
}
if row.Status == RaceNameStatusPendingRegistration {
pending = &rows[i]
}
}
if pending == nil {
return RaceNameEntry{}, fmt.Errorf("%w: no pending_registration row for caller", ErrNotFound)
}
now := s.deps.Now().UTC()
if pending.ExpiresAt != nil && !pending.ExpiresAt.After(now) {
return RaceNameEntry{}, fmt.Errorf("%w: pending_registration window closed at %s", ErrPendingExpired, pending.ExpiresAt.UTC().Format("2006-01-02T15:04:05Z07:00"))
}
maxAllowed := int32(1)
if s.deps.Entitlement != nil {
got, eerr := s.deps.Entitlement.GetMaxRegisteredRaceNames(ctx, userID)
if eerr != nil {
return RaceNameEntry{}, fmt.Errorf("lobby: read entitlement: %w", eerr)
}
maxAllowed = got
}
currentCount, err := s.deps.Store.CountRegisteredRaceNamesByUser(ctx, userID)
if err != nil {
return RaceNameEntry{}, err
}
if int32(currentCount) >= maxAllowed {
return RaceNameEntry{}, fmt.Errorf("%w: %d registered race names of %d allowed", ErrEntitlementExceeded, currentCount, maxAllowed)
}
entry, err := s.deps.Store.PromotePendingToRegistered(ctx, canonical, userID, pending.GameID, displayName, now)
if err != nil {
if errors.Is(err, ErrNotFound) {
return RaceNameEntry{}, fmt.Errorf("%w: pending row vanished concurrently", ErrConflict)
}
return RaceNameEntry{}, err
}
s.deps.Cache.RemoveRaceName(canonical)
s.deps.Cache.PutRaceName(entry)
intent := LobbyNotification{
Kind: NotificationLobbyRaceNameRegistered,
IdempotencyKey: "racename-registered:" + string(canonical),
Recipients: []uuid.UUID{userID},
Payload: map[string]any{
"race_name": displayName,
},
}
if pubErr := s.deps.Notification.PublishLobbyEvent(ctx, intent); pubErr != nil {
s.deps.Logger.Warn("race-name registered notification failed",
zap.String("canonical", string(canonical)),
zap.Error(pubErr))
}
return entry, nil
}
// ListMyRaceNames returns every race-name row owned by userID.
func (s *Service) ListMyRaceNames(ctx context.Context, userID uuid.UUID) ([]RaceNameEntry, error) {
return s.deps.Store.ListRaceNamesForUser(ctx, userID)
}
+275
View File
@@ -0,0 +1,275 @@
package lobby
import (
"context"
"errors"
"fmt"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
)
// OnRuntimeSnapshot updates the denormalised runtime view on the game
// row from a snapshot reported by the runtime module. The lobby
// transitions the game's lifecycle status when the snapshot reports a
// state change relevant to the lobby state machine:
//
// - `running` → `running` (after `starting`).
// - `engine_unreachable` / `start_failed` → `start_failed` while
// `starting`.
// - `finished` → triggers `OnGameFinished`.
//
// Per-player MaxPlanets / MaxPopulation are accumulated across the
// game lifetime so the capable-finish evaluation in `OnGameFinished`
// has the data it needs.
//
// The current implementation ships the entry point + state-machine logic; The implementation // (runtime) wires the actual call site.
func (s *Service) OnRuntimeSnapshot(ctx context.Context, gameID uuid.UUID, snapshot RuntimeSnapshot) error {
game, err := s.GetGame(ctx, gameID)
if err != nil {
return err
}
merged := mergeRuntimeSnapshot(game.RuntimeSnapshot, snapshot)
now := s.deps.Now().UTC()
updated, err := s.deps.Store.UpdateGameRuntimeSnapshot(ctx, gameID, merged, now)
if err != nil {
return err
}
if next, transition := nextStatusFromSnapshot(updated.Status, snapshot); transition {
switch next {
case GameStatusFinished:
s.deps.Cache.PutGame(updated)
return s.OnGameFinished(ctx, gameID)
default:
rec, err := s.deps.Store.UpdateGameStatus(ctx, gameID, statusUpdate{
NewStatus: next,
UpdatedAt: now,
SetStarted: next == GameStatusRunning && updated.StartedAt == nil,
StartedAt: now,
})
if err != nil {
return err
}
updated = rec
}
}
s.deps.Cache.PutGame(updated)
return nil
}
// OnGameFinished completes the game lifecycle: marks the game as
// `finished`, evaluates capable-finish per active member, and
// transitions reservation rows to either `pending_registration`
// (capable) or deletes them (non-capable).
func (s *Service) OnGameFinished(ctx context.Context, gameID uuid.UUID) error {
game, err := s.GetGame(ctx, gameID)
if err != nil {
return err
}
now := s.deps.Now().UTC()
if game.Status != GameStatusFinished {
updated, err := s.deps.Store.UpdateGameStatus(ctx, gameID, statusUpdate{
NewStatus: GameStatusFinished,
UpdatedAt: now,
SetFinished: true,
FinishedAt: now,
})
if err != nil {
return err
}
game = updated
}
memberships, err := s.deps.Store.ListMembershipsForGame(ctx, gameID)
if err != nil {
return err
}
statsByUser := make(map[uuid.UUID]PlayerTurnStats, len(game.RuntimeSnapshot.PlayerStats))
for _, st := range game.RuntimeSnapshot.PlayerStats {
statsByUser[st.UserID] = st
}
expiry := now.Add(s.deps.Config.PendingRegistrationTTL)
var promoteErrs []error
for _, m := range memberships {
if m.Status != MembershipStatusActive {
continue
}
stats, hasStats := statsByUser[m.UserID]
canonical := CanonicalKey(m.CanonicalKey)
if hasStats && capableFinish(stats) {
// Best-effort: drop the existing reservation row before
// inserting the pending_registration so the per-game PK
// does not block the transition.
if err := s.deps.Store.DeleteRaceName(ctx, canonical, gameID); err != nil {
promoteErrs = append(promoteErrs, fmt.Errorf("delete reservation %s: %w", canonical, err))
continue
}
s.deps.Cache.RemoveRaceName(canonical)
entry, err := s.deps.Store.InsertRaceName(ctx, raceNameInsert{
Name: m.RaceName,
Canonical: canonical,
Status: RaceNameStatusPendingRegistration,
OwnerUserID: m.UserID,
GameID: gameID,
SourceGameID: ptrUUID(gameID),
ExpiresAt: &expiry,
})
if err != nil {
promoteErrs = append(promoteErrs, fmt.Errorf("promote pending %s: %w", canonical, err))
continue
}
s.deps.Cache.PutRaceName(entry)
intent := LobbyNotification{
Kind: NotificationLobbyRaceNamePending,
IdempotencyKey: "racename-pending:" + string(canonical) + ":" + gameID.String(),
Recipients: []uuid.UUID{m.UserID},
Payload: map[string]any{
"race_name": m.RaceName,
"expires_at": expiry.Format(time.RFC3339),
},
}
if pubErr := s.deps.Notification.PublishLobbyEvent(ctx, intent); pubErr != nil {
s.deps.Logger.Warn("race-name pending notification failed",
zap.String("canonical", string(canonical)),
zap.Error(pubErr))
}
continue
}
if err := s.deps.Store.DeleteRaceName(ctx, canonical, gameID); err != nil {
promoteErrs = append(promoteErrs, fmt.Errorf("delete non-capable reservation %s: %w", canonical, err))
continue
}
s.deps.Cache.RemoveRaceName(canonical)
}
s.deps.Cache.PutGame(game)
return errors.Join(promoteErrs...)
}
// OnRuntimeJobResult consumes adoption / removal events emitted by the
// runtime reconciler. The wiring connects the runtime → lobby callback
// through this entry point; the canonical mapping is:
//
// - reconciler reports `removed` → lobby cancels the game (the
// engine container is gone). Games already in `cancelled` or
// `finished` are ignored.
//
// Future job paths (start, stop, restart) may reuse the same shape.
func (s *Service) OnRuntimeJobResult(ctx context.Context, gameID uuid.UUID, result RuntimeJobResult) error {
if s == nil {
return nil
}
game, err := s.GetGame(ctx, gameID)
if err != nil {
if errors.Is(err, ErrNotFound) {
return nil
}
return err
}
if game.Status == GameStatusCancelled || game.Status == GameStatusFinished {
return nil
}
if result.Status != "removed" && result.Status != "stopped" {
// Unknown status — ignore for forward compatibility.
return nil
}
now := s.deps.Now().UTC()
updated, err := s.deps.Store.UpdateGameStatus(ctx, gameID, statusUpdate{
NewStatus: GameStatusCancelled,
UpdatedAt: now,
})
if err != nil {
return err
}
s.deps.Cache.PutGame(updated)
s.deps.Logger.Info("game cancelled by runtime reconciler",
zap.String("game_id", gameID.String()),
zap.String("op", result.Op),
zap.String("status", result.Status),
zap.String("message", result.Message),
)
return nil
}
// mergeRuntimeSnapshot merges the incoming snapshot into the previous
// one, preserving running maxima of per-player planets and population
// across the game lifetime.
func mergeRuntimeSnapshot(prev, next RuntimeSnapshot) RuntimeSnapshot {
out := RuntimeSnapshot{
CurrentTurn: next.CurrentTurn,
RuntimeStatus: next.RuntimeStatus,
EngineHealth: next.EngineHealth,
ObservedAt: next.ObservedAt,
}
statsByUser := make(map[uuid.UUID]PlayerTurnStats, len(prev.PlayerStats)+len(next.PlayerStats))
for _, st := range prev.PlayerStats {
statsByUser[st.UserID] = st
}
for _, st := range next.PlayerStats {
existing, ok := statsByUser[st.UserID]
if !ok {
st.MaxPlanets = max32(st.MaxPlanets, st.CurrentPlanets)
st.MaxPopulation = max32(st.MaxPopulation, st.CurrentPopulation)
statsByUser[st.UserID] = st
continue
}
st.InitialPlanets = existing.InitialPlanets
st.InitialPopulation = existing.InitialPopulation
st.MaxPlanets = max32(existing.MaxPlanets, max32(st.MaxPlanets, st.CurrentPlanets))
st.MaxPopulation = max32(existing.MaxPopulation, max32(st.MaxPopulation, st.CurrentPopulation))
statsByUser[st.UserID] = st
}
if len(statsByUser) > 0 {
out.PlayerStats = make([]PlayerTurnStats, 0, len(statsByUser))
for _, st := range statsByUser {
out.PlayerStats = append(out.PlayerStats, st)
}
}
return out
}
// nextStatusFromSnapshot maps the runtime-reported runtime status into
// a lobby status transition. Returns (next, true) when the lobby
// status must change; (current, false) otherwise.
func nextStatusFromSnapshot(currentStatus string, snapshot RuntimeSnapshot) (string, bool) {
switch snapshot.RuntimeStatus {
case "running":
if currentStatus == GameStatusStarting {
return GameStatusRunning, true
}
case "engine_unreachable", "start_failed", "generation_failed":
if currentStatus == GameStatusStarting {
return GameStatusStartFailed, true
}
case "finished":
if currentStatus != GameStatusFinished && currentStatus != GameStatusCancelled {
return GameStatusFinished, true
}
case "stopped":
if currentStatus == GameStatusRunning || currentStatus == GameStatusPaused {
return GameStatusFinished, true
}
}
return currentStatus, false
}
// capableFinish reports whether a per-player observation satisfies the
// "capable finish" criterion documented in
// `backend/PLAN.md` §5.4: max_planets > initial AND max_population >
// initial. Either of the inputs being zero (no observation) defaults
// to non-capable.
func capableFinish(stats PlayerTurnStats) bool {
if stats.InitialPlanets == 0 || stats.InitialPopulation == 0 {
return false
}
return stats.MaxPlanets > stats.InitialPlanets &&
stats.MaxPopulation > stats.InitialPopulation
}
func max32(a, b int32) int32 {
if a > b {
return a
}
return b
}
func ptrUUID(u uuid.UUID) *uuid.UUID { v := u; return &v }
File diff suppressed because it is too large Load Diff
+142
View File
@@ -0,0 +1,142 @@
package lobby
import (
"context"
"errors"
"fmt"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
)
// Sweeper is the periodic lobby maintenance worker. Each tick it
// releases expired `pending_registration` race-name rows and
// auto-closes enrollment windows whose `enrollment_ends_at` has passed.
//
// Implements `internal/app.Component`. The sweeper Run loop terminates
// on the parent context cancellation; Shutdown is a no-op because
// every tick already completes synchronously inside Run.
type Sweeper struct {
svc *Service
interval time.Duration
logger *zap.Logger
now func() time.Time
}
// NewSweeper constructs the sweeper. The interval falls back to the
// service config when zero.
func NewSweeper(svc *Service) *Sweeper {
cfg := svc.Config()
return &Sweeper{
svc: svc,
interval: cfg.SweeperInterval,
logger: svc.Logger().Named("sweeper"),
now: svc.deps.Now,
}
}
// Run drives the sweeper goroutine until ctx is done.
func (s *Sweeper) Run(ctx context.Context) error {
ticker := time.NewTicker(s.interval)
defer ticker.Stop()
// Run one tick immediately so a fresh process catches up on missed
// work without waiting for the first interval. Tests rely on this
// for deterministic e2e flows.
if err := s.tick(ctx); err != nil {
s.logger.Warn("lobby sweeper tick failed", zap.Error(err))
}
for {
select {
case <-ctx.Done():
return nil
case <-ticker.C:
if err := s.tick(ctx); err != nil {
s.logger.Warn("lobby sweeper tick failed", zap.Error(err))
}
}
}
}
// Shutdown is a no-op: every tick is synchronous inside Run.
func (s *Sweeper) Shutdown(_ context.Context) error { return nil }
// Tick runs a single sweep iteration. Exposed for tests so they can
// drive the sweeper without timing dependencies.
func (s *Sweeper) Tick(ctx context.Context) error { return s.tick(ctx) }
func (s *Sweeper) tick(ctx context.Context) error {
now := s.now().UTC()
releaseErr := s.releaseExpiredPending(ctx, now)
closeErr := s.autoCloseEnrollment(ctx, now)
return errors.Join(releaseErr, closeErr)
}
func (s *Sweeper) releaseExpiredPending(ctx context.Context, now time.Time) error {
rows, err := s.svc.deps.Store.ListPendingRegistrationsExpired(ctx, now)
if err != nil {
return fmt.Errorf("lobby sweeper: list expired pending: %w", err)
}
var errs []error
for _, row := range rows {
if err := s.svc.deps.Store.DeleteRaceName(ctx, row.Canonical, row.GameID); err != nil {
errs = append(errs, fmt.Errorf("delete pending %s: %w", row.Canonical, err))
continue
}
s.svc.deps.Cache.RemoveRaceName(row.Canonical)
intent := LobbyNotification{
Kind: NotificationLobbyRaceNameExpired,
IdempotencyKey: "racename-expired:" + string(row.Canonical) + ":" + row.GameID.String(),
Recipients: []uuid.UUID{row.OwnerUserID},
Payload: map[string]any{
"race_name": row.Name,
},
}
if pubErr := s.svc.deps.Notification.PublishLobbyEvent(ctx, intent); pubErr != nil {
s.logger.Warn("expired notification failed",
zap.String("canonical", string(row.Canonical)),
zap.Error(pubErr))
}
}
return errors.Join(errs...)
}
func (s *Sweeper) autoCloseEnrollment(ctx context.Context, now time.Time) error {
games, err := s.svc.deps.Store.ListEnrollmentExpiredGames(ctx, now)
if err != nil {
return fmt.Errorf("lobby sweeper: list expired enrollments: %w", err)
}
var errs []error
for _, game := range games {
active, err := s.svc.deps.Store.CountActiveMemberships(ctx, game.GameID)
if err != nil {
errs = append(errs, fmt.Errorf("count memberships %s: %w", game.GameID, err))
continue
}
if int32(active) < game.MinPlayers {
// Below quorum — leave the game in enrollment_open. Admins
// can extend `enrollment_ends_at` or cancel manually.
s.logger.Debug("enrollment expired below quorum, leaving",
zap.String("game_id", game.GameID.String()),
zap.Int32("min_players", game.MinPlayers),
zap.Int("active", active))
continue
}
updated, err := s.svc.deps.Store.UpdateGameStatus(ctx, game.GameID, statusUpdate{
NewStatus: GameStatusReadyToStart,
UpdatedAt: now,
})
if err != nil {
errs = append(errs, fmt.Errorf("transition %s to ready_to_start: %w", game.GameID, err))
continue
}
s.svc.deps.Cache.PutGame(updated)
s.logger.Info("enrollment auto-closed",
zap.String("game_id", game.GameID.String()),
zap.Int32("min_players", game.MinPlayers),
zap.Int("active", active))
}
return errors.Join(errs...)
}
+137
View File
@@ -0,0 +1,137 @@
package lobby
import (
"time"
"github.com/google/uuid"
)
// GameRecord mirrors a row in `backend.games` enriched with the
// denormalised runtime snapshot fields persisted in the same row. The
// JSON-encoded `runtime_snapshot` column is decoded into RuntimeSnapshot
// before reaching this struct.
type GameRecord struct {
GameID uuid.UUID
OwnerUserID *uuid.UUID
Visibility string
Status string
GameName string
Description string
MinPlayers int32
MaxPlayers int32
StartGapHours int32
StartGapPlayers int32
EnrollmentEndsAt time.Time
TurnSchedule string
TargetEngineVersion string
RuntimeSnapshot RuntimeSnapshot
CreatedAt time.Time
UpdatedAt time.Time
StartedAt *time.Time
FinishedAt *time.Time
}
// RuntimeSnapshot is the lobby's denormalised view of the runtime state
// reported by the runtime module. The current implementation ships placeholder values
// (zero CurrentTurn, empty RuntimeStatus) until the canonical implementation wires
// `OnRuntimeSnapshot`.
type RuntimeSnapshot struct {
CurrentTurn int32 `json:"current_turn"`
RuntimeStatus string `json:"runtime_status,omitempty"`
EngineHealth string `json:"engine_health,omitempty"`
PlayerStats []PlayerTurnStats `json:"player_stats,omitempty"`
ObservedAt time.Time `json:"observed_at,omitempty"`
}
// PlayerTurnStats is the per-player observation read from a runtime
// snapshot. Lobby aggregates `MaxPlanets` / `MaxPopulation` across the
// game lifetime to evaluate capable-finish at `OnGameFinished`.
type PlayerTurnStats struct {
UserID uuid.UUID `json:"user_id"`
InitialPlanets int32 `json:"initial_planets"`
InitialPopulation int32 `json:"initial_population"`
CurrentPlanets int32 `json:"current_planets"`
CurrentPopulation int32 `json:"current_population"`
MaxPlanets int32 `json:"max_planets"`
MaxPopulation int32 `json:"max_population"`
}
// Application mirrors a row in `backend.applications`.
type Application struct {
ApplicationID uuid.UUID
GameID uuid.UUID
ApplicantUserID uuid.UUID
RaceName string
Status string
CreatedAt time.Time
DecidedAt *time.Time
}
// Invite mirrors a row in `backend.invites`. `InvitedUserID` is nil for
// code-based invites; `Code` is non-empty for those.
type Invite struct {
InviteID uuid.UUID
GameID uuid.UUID
InviterUserID uuid.UUID
InvitedUserID *uuid.UUID
Code string
Status string
RaceName string
CreatedAt time.Time
ExpiresAt time.Time
DecidedAt *time.Time
}
// Membership mirrors a row in `backend.memberships`. `CanonicalKey` is
// the canonical form of `RaceName` produced by the Race Name Directory
// policy at write time.
type Membership struct {
MembershipID uuid.UUID
GameID uuid.UUID
UserID uuid.UUID
RaceName string
CanonicalKey string
Status string
JoinedAt time.Time
RemovedAt *time.Time
}
// RaceNameEntry mirrors a row in `backend.race_names`.
//
// Status `registered` rows store the all-zero sentinel UUID in `GameID`
// so the partial UNIQUE index `race_names_registered_uidx` covers the
// uniqueness rule. Status `reservation` and `pending_registration` rows
// store the originating `game_id`.
type RaceNameEntry struct {
Name string
Canonical CanonicalKey
Status string
OwnerUserID uuid.UUID
GameID uuid.UUID
SourceGameID *uuid.UUID
ReservedAt *time.Time
ExpiresAt *time.Time
RegisteredAt *time.Time
}
// IsRegistered reports whether the entry is platform-permanent.
func (e RaceNameEntry) IsRegistered() bool {
return e.Status == RaceNameStatusRegistered
}
// IsReservation reports whether the entry binds the canonical key to a
// concrete game without permanent ownership.
func (e RaceNameEntry) IsReservation() bool {
return e.Status == RaceNameStatusReservation
}
// IsPending reports whether the entry is awaiting capable-finish
// registration.
func (e RaceNameEntry) IsPending() bool {
return e.Status == RaceNameStatusPendingRegistration
}
// raceNameRegisteredGameSentinel is the sentinel UUID stored in
// `race_names.game_id` for `registered` rows. Mirrors the migration's
// `DEFAULT '00000000-0000-0000-0000-000000000000'` clause.
var raceNameRegisteredGameSentinel = uuid.UUID{}
+63
View File
@@ -0,0 +1,63 @@
// Package logging configures the backend structured logger.
package logging
import (
"strings"
"galaxy/backend/internal/config"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)
// New constructs the process-wide JSON logger from cfg.
func New(cfg config.LoggingConfig) (*zap.Logger, error) {
level := zap.NewAtomicLevel()
if err := level.UnmarshalText([]byte(strings.TrimSpace(cfg.Level))); err != nil {
return nil, err
}
zapCfg := zap.NewProductionConfig()
zapCfg.Level = level
zapCfg.Sampling = nil
zapCfg.Encoding = "json"
zapCfg.EncoderConfig.TimeKey = "timestamp"
zapCfg.EncoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder
zapCfg.OutputPaths = []string{"stdout"}
zapCfg.ErrorOutputPaths = []string{"stderr"}
return zapCfg.Build()
}
// Sync flushes logger and ignores the benign stdout or stderr sync errors
// commonly returned by containerized or redirected process outputs.
func Sync(logger *zap.Logger) error {
if logger == nil {
return nil
}
err := logger.Sync()
if err == nil || isIgnorableSyncError(err) {
return nil
}
return err
}
func isIgnorableSyncError(err error) bool {
if err == nil {
return false
}
message := strings.ToLower(err.Error())
switch {
case strings.Contains(message, "invalid argument"):
return true
case strings.Contains(message, "bad file descriptor"):
return true
case strings.Contains(message, "inappropriate ioctl for device"):
return true
default:
return false
}
}
+101
View File
@@ -0,0 +1,101 @@
package mail
import (
"context"
"github.com/google/uuid"
)
// AdminListDeliveriesPage bundles the pagination metadata returned to
// the admin API. The same shape is reused by AdminListDeadLettersPage
// — keeping it explicit clarifies the wire contract for handlers.
type AdminListDeliveriesPage struct {
Items []Delivery
Page int
PageSize int
Total int64
}
// AdminListDeadLettersPage mirrors AdminListDeliveriesPage for the
// dead-letter listing.
type AdminListDeadLettersPage struct {
Items []DeadLetter
Page int
PageSize int
Total int64
}
// AdminListDeliveries returns the requested delivery page. page is
// 1-indexed; pageSize is bounded by the caller (handler defaults).
func (s *Service) AdminListDeliveries(ctx context.Context, page, pageSize int) (AdminListDeliveriesPage, error) {
page, pageSize = normalisePaging(page, pageSize)
offset := (page - 1) * pageSize
items, total, err := s.deps.Store.ListDeliveries(ctx, offset, pageSize)
if err != nil {
return AdminListDeliveriesPage{}, err
}
return AdminListDeliveriesPage{
Items: items,
Page: page,
PageSize: pageSize,
Total: total,
}, nil
}
// AdminGetDelivery returns the delivery row by id, or
// ErrDeliveryNotFound when the row does not exist.
func (s *Service) AdminGetDelivery(ctx context.Context, deliveryID uuid.UUID) (Delivery, error) {
return s.deps.Store.GetDelivery(ctx, deliveryID)
}
// AdminListAttempts returns every attempt for the delivery in
// attempt_no order. ErrDeliveryNotFound is returned when the delivery
// row itself does not exist; an empty list (no rows yet) returns nil
// without error.
func (s *Service) AdminListAttempts(ctx context.Context, deliveryID uuid.UUID) ([]Attempt, error) {
if _, err := s.deps.Store.GetDelivery(ctx, deliveryID); err != nil {
return nil, err
}
return s.deps.Store.ListAttempts(ctx, deliveryID)
}
// AdminResendDelivery re-arms the targeted row for another delivery
// cycle. The contract: ErrDeliveryNotFound when the row is missing,
// ErrResendOnSent when the row is in the terminal `sent` state.
// Otherwise the row is reset to status='pending' with attempts=0 and
// next_attempt_at=now(); the worker picks it up on the next tick.
func (s *Service) AdminResendDelivery(ctx context.Context, deliveryID uuid.UUID) (Delivery, error) {
return s.deps.Store.ResendNonSent(ctx, deliveryID, s.deps.Now())
}
// AdminListDeadLetters returns the dead-letter page newest-first.
func (s *Service) AdminListDeadLetters(ctx context.Context, page, pageSize int) (AdminListDeadLettersPage, error) {
page, pageSize = normalisePaging(page, pageSize)
offset := (page - 1) * pageSize
items, total, err := s.deps.Store.ListDeadLetters(ctx, offset, pageSize)
if err != nil {
return AdminListDeadLettersPage{}, err
}
return AdminListDeadLettersPage{
Items: items,
Page: page,
PageSize: pageSize,
Total: total,
}, nil
}
// normalisePaging clamps page and pageSize to the values handlers can
// safely pass through to the store. The defaults match what the
// existing admin endpoints use elsewhere in `internal/server`.
func normalisePaging(page, pageSize int) (int, int) {
if page <= 0 {
page = 1
}
if pageSize <= 0 {
pageSize = 25
}
if pageSize > 200 {
pageSize = 200
}
return page, pageSize
}
+168
View File
@@ -0,0 +1,168 @@
package mail_test
import (
"context"
"errors"
"testing"
"time"
"galaxy/backend/internal/config"
"galaxy/backend/internal/mail"
"github.com/google/uuid"
"go.uber.org/zap/zaptest"
)
func TestAdminListPagination(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := mail.NewService(mail.Deps{
Store: mail.NewStore(db),
SMTP: newRecordingSender(),
Config: config.MailConfig{WorkerInterval: time.Hour, MaxAttempts: 3},
Logger: zaptest.NewLogger(t),
})
const total = 7
for i := range total {
if err := svc.EnqueueLoginCode(context.Background(), "a@example.test", "1234"+string(rune('0'+i)), 5*time.Minute); err != nil {
t.Fatalf("enqueue %d: %v", i, err)
}
}
page, err := svc.AdminListDeliveries(context.Background(), 1, 3)
if err != nil {
t.Fatalf("list page 1: %v", err)
}
if len(page.Items) != 3 {
t.Fatalf("page1 size=%d want 3", len(page.Items))
}
if page.Total != total {
t.Fatalf("page1 total=%d want %d", page.Total, total)
}
page, err = svc.AdminListDeliveries(context.Background(), 3, 3)
if err != nil {
t.Fatalf("list page 3: %v", err)
}
if len(page.Items) != 1 {
t.Fatalf("page3 size=%d want 1", len(page.Items))
}
}
func TestAdminGetDeliveryNotFound(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := mail.NewService(mail.Deps{
Store: mail.NewStore(db),
SMTP: newRecordingSender(),
Config: config.MailConfig{WorkerInterval: time.Hour, MaxAttempts: 3},
Logger: zaptest.NewLogger(t),
})
if _, err := svc.AdminGetDelivery(context.Background(), uuid.New()); !errors.Is(err, mail.ErrDeliveryNotFound) {
t.Fatalf("get missing: want ErrDeliveryNotFound, got %v", err)
}
}
func TestAdminResendStateMatrix(t *testing.T) {
t.Parallel()
db := startPostgres(t)
sender := newRecordingSender()
// Match the number of Send calls the matrix triggers (initial
// success path + resend re-send for the dead-lettered row).
sender.behaviour = []func(mail.OutboundMessage) error{
func(mail.OutboundMessage) error { return errors.New("transient #1") },
func(mail.OutboundMessage) error { return errors.New("transient #2") },
func(mail.OutboundMessage) error { return nil }, // sent path
}
clock := time.Now().UTC().Add(-2 * time.Hour) // bring next_attempt_at into the past
svc := mail.NewService(mail.Deps{
Store: mail.NewStore(db),
SMTP: sender,
Config: config.MailConfig{WorkerInterval: time.Hour, MaxAttempts: 2},
Now: func() time.Time { return clock },
Logger: zaptest.NewLogger(t),
})
worker := mail.NewWorker(svc)
// 1. Drive a row to dead-lettered (two failures with MaxAttempts=2).
if err := svc.EnqueueLoginCode(context.Background(), "dead@example.test", "111111", 5*time.Minute); err != nil {
t.Fatalf("enqueue dead: %v", err)
}
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #1: %v", err)
}
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #2: %v", err)
}
deadList, err := svc.AdminListDeliveries(context.Background(), 1, 5)
if err != nil {
t.Fatalf("list: %v", err)
}
if len(deadList.Items) != 1 || deadList.Items[0].Status != mail.StatusDeadLettered {
t.Fatalf("want 1 dead-lettered row, got %+v", deadList.Items)
}
deadID := deadList.Items[0].DeliveryID
// 2. Resend the dead-lettered row -> 200, status flips to pending,
// attempts=0.
resent, err := svc.AdminResendDelivery(context.Background(), deadID)
if err != nil {
t.Fatalf("resend dead: %v", err)
}
if resent.Status != mail.StatusPending {
t.Fatalf("status after resend=%q want pending", resent.Status)
}
if resent.Attempts != 0 {
t.Fatalf("attempts after resend=%d want 0", resent.Attempts)
}
// 3. Drive the worker once more — third Send call returns nil so
// the row transitions to sent.
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick post-resend: %v", err)
}
d, err := svc.AdminGetDelivery(context.Background(), deadID)
if err != nil {
t.Fatalf("get after send: %v", err)
}
if d.Status != mail.StatusSent {
t.Fatalf("status=%q want sent", d.Status)
}
// 4. Resend on `sent` -> ErrResendOnSent.
if _, err := svc.AdminResendDelivery(context.Background(), deadID); !errors.Is(err, mail.ErrResendOnSent) {
t.Fatalf("resend on sent: want ErrResendOnSent, got %v", err)
}
// 5. Resend on missing -> ErrDeliveryNotFound.
if _, err := svc.AdminResendDelivery(context.Background(), uuid.New()); !errors.Is(err, mail.ErrDeliveryNotFound) {
t.Fatalf("resend missing: want ErrDeliveryNotFound, got %v", err)
}
}
func TestServiceStats(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := mail.NewService(mail.Deps{
Store: mail.NewStore(db),
SMTP: newRecordingSender(),
Config: config.MailConfig{WorkerInterval: time.Hour, MaxAttempts: 3},
Logger: zaptest.NewLogger(t),
})
for i := range 3 {
if err := svc.EnqueueLoginCode(context.Background(), "stats@example.test", "55555"+string(rune('0'+i)), 5*time.Minute); err != nil {
t.Fatalf("enqueue: %v", err)
}
}
stats, err := svc.Stats(context.Background())
if err != nil {
t.Fatalf("stats: %v", err)
}
if stats[mail.StatusPending] != 3 {
t.Fatalf("pending=%d want 3", stats[mail.StatusPending])
}
if _, ok := stats[mail.StatusSent]; !ok {
t.Fatal("Stats must always return all four buckets")
}
}
+121
View File
@@ -0,0 +1,121 @@
package mail
import (
"context"
"errors"
"time"
"galaxy/backend/internal/config"
"github.com/google/uuid"
"go.uber.org/zap"
)
// SMTPSender is the wire-level boundary the worker uses to deliver an
// outbox row through SMTP. Implementations are expected to be
// concurrency-safe and to honour ctx cancellation: the worker passes a
// per-row context bounded by the configured operation timeout.
//
// `Send` is the single point where transient-vs-permanent classification
// happens; the returned error carries IsPermanent to let the worker
// decide between schedule-a-retry and dead-letter.
type SMTPSender interface {
Send(ctx context.Context, msg OutboundMessage) error
}
// OutboundMessage is the rendered, recipient-addressed payload handed
// to SMTPSender. From is taken from BACKEND_SMTP_FROM at construction
// time, so producers and the worker never set it directly.
type OutboundMessage struct {
To []string
Subject string
ContentType string
Body []byte
}
// SendError augments a regular error with a permanence classification.
// Permanent errors (RFC 5321 5xx, malformed addresses, oversize body)
// dead-letter the row immediately on the next attempt; transient ones
// (4xx, network) trigger the backoff schedule.
type SendError struct {
Err error
Permanent bool
}
// Error returns the underlying error string.
func (e *SendError) Error() string {
if e == nil || e.Err == nil {
return ""
}
return e.Err.Error()
}
// Unwrap exposes the underlying error for errors.Is / errors.As.
func (e *SendError) Unwrap() error {
if e == nil {
return nil
}
return e.Err
}
// IsPermanent reports whether err is a *SendError marked Permanent.
// Non-SendError values are treated as transient by default — the
// worker will retry until MaxAttempts.
func IsPermanent(err error) bool {
if err == nil {
return false
}
var se *SendError
if errors.As(err, &se) && se != nil {
return se.Permanent
}
return false
}
// AdminNotifier is the outbound surface mail uses to flag a dead-letter
// to operators. The canonical notification wiring lives in `cmd/backend/main.go` and publisher; until
// then NewNoopAdminNotifier ships a logger-only stub matching the
// pattern used elsewhere in `backend/internal/*`.
type AdminNotifier interface {
OnDeadLetter(ctx context.Context, deliveryID uuid.UUID, templateID, reason string)
}
// Deps aggregates every collaborator the Service depends on.
//
// Store and SMTP must be non-nil. Admin defaults to a no-op publisher
// when omitted; Now defaults to time.Now; Logger defaults to
// zap.NewNop. Config carries the worker interval and max-attempts
// derived from `BACKEND_MAIL_*`.
type Deps struct {
Store *Store
SMTP SMTPSender
Admin AdminNotifier
Config config.MailConfig
// Now overrides time.Now for deterministic tests. A nil Now defaults
// to time.Now in NewService.
Now func() time.Time
// Logger is named under "mail" by NewService. Nil falls back to
// zap.NewNop.
Logger *zap.Logger
}
// NewNoopAdminNotifier returns an AdminNotifier that logs every
// dead-letter event at warn level and never blocks. The canonical implementation replaces // it with the real notification publisher.
func NewNoopAdminNotifier(logger *zap.Logger) AdminNotifier {
if logger == nil {
logger = zap.NewNop()
}
return &noopAdminNotifier{logger: logger.Named("notify.noop")}
}
type noopAdminNotifier struct {
logger *zap.Logger
}
func (n *noopAdminNotifier) OnDeadLetter(_ context.Context, deliveryID uuid.UUID, templateID, reason string) {
n.logger.Warn("mail dead-letter (noop publisher)",
zap.String("delivery_id", deliveryID.String()),
zap.String("template_id", templateID),
zap.String("reason", reason),
)
}
+243
View File
@@ -0,0 +1,243 @@
package mail
import (
"context"
"fmt"
netmail "net/mail"
"strings"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
)
// contentTypeTextPlain is the RFC 2046 text/plain MIME type stored in
// `mail_payloads.content_type` for plain-text bodies.
const contentTypeTextPlain = "text/plain"
// TemplateLoginCode is the template_id stored in `mail_deliveries` for
// the auth-issued login code. The value matches the kind in the
// notification catalog (`README.md` §10) so future cross-reporting
// stays consistent.
const TemplateLoginCode = "auth.login_code"
// EnqueueLoginCode renders the auth login-code email and inserts the
// outbox row. Each call gets a fresh server-side idempotency_key so
// the unique constraint cannot accidentally suppress a legitimate
// re-issue; double-enqueue protection lives in the auth challenge
// throttle (see `auth.Service.SendEmailCode`).
func (s *Service) EnqueueLoginCode(ctx context.Context, email, code string, ttl time.Duration) error {
addr, err := normaliseRecipient(email)
if err != nil {
return err
}
subject, body := renderLoginCode(code, ttl)
args := EnqueueArgs{
DeliveryID: uuid.New(),
TemplateID: TemplateLoginCode,
IdempotencyKey: uuid.NewString(),
Recipients: []string{addr},
ContentType: contentTypeTextPlain,
Subject: subject,
Body: []byte(body),
}
inserted, err := s.deps.Store.InsertEnqueue(ctx, args)
if err != nil {
return fmt.Errorf("mail: enqueue login code: %w", err)
}
if !inserted {
// Cannot happen given the random key, but keeps the invariant
// explicit for readers grep-ing for unexpected paths.
s.deps.Logger.Warn("login-code enqueue collided on random idempotency key",
zap.String("delivery_id", args.DeliveryID.String()))
}
return nil
}
// EnqueueTemplate is the generic producer surface used by future
// notification fan-out . Caller supplies a stable
// idempotencyKey so re-deliveries of the same logical event are
// deduplicated by the (template_id, idempotency_key) UNIQUE
// constraint.
func (s *Service) EnqueueTemplate(ctx context.Context, templateID, recipient string, payload map[string]any, idempotencyKey string) error {
if strings.TrimSpace(idempotencyKey) == "" {
return fmt.Errorf("mail: idempotency_key must not be empty")
}
addr, err := normaliseRecipient(recipient)
if err != nil {
return err
}
render, ok := templateRenderers[templateID]
if !ok {
return fmt.Errorf("%w: %q", ErrUnknownTemplate, templateID)
}
subject, body, err := render(payload)
if err != nil {
return fmt.Errorf("mail: render template %q: %w", templateID, err)
}
args := EnqueueArgs{
DeliveryID: uuid.New(),
TemplateID: templateID,
IdempotencyKey: idempotencyKey,
Recipients: []string{addr},
ContentType: contentTypeTextPlain,
Subject: subject,
Body: []byte(body),
}
if _, err := s.deps.Store.InsertEnqueue(ctx, args); err != nil {
return fmt.Errorf("mail: enqueue template: %w", err)
}
return nil
}
// normaliseRecipient trims whitespace and validates the address with
// stdlib RFC 5322 parsing. Empty / malformed addresses are rejected
// with ErrInvalidRecipient. The returned string is the canonical form
// (`mail.Address.Address`) without any display name.
func normaliseRecipient(addr string) (string, error) {
trimmed := strings.TrimSpace(addr)
if trimmed == "" {
return "", ErrInvalidRecipient
}
parsed, err := netmail.ParseAddress(trimmed)
if err != nil {
return "", ErrInvalidRecipient
}
return parsed.Address, nil
}
// templateRenderers is the inline catalog of mail templates the
// notification module dispatches against. The implementation added
// `auth.login_code`; The implementation added the rest of the email-bearing
// kinds enumerated in `README.md` §10. Each renderer takes the
// producer-supplied payload map and returns (subject, body) or an
// error when required fields are missing or wrongly typed.
var templateRenderers = map[string]func(map[string]any) (string, string, error){
TemplateLoginCode: func(payload map[string]any) (string, string, error) {
code, _ := payload["code"].(string)
if code == "" {
return "", "", fmt.Errorf("payload.code must be a non-empty string")
}
ttl, _ := payload["ttl"].(time.Duration)
subject, body := renderLoginCode(code, ttl)
return subject, body, nil
},
"lobby.invite.received": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
inviter := payloadString(payload, "inviter_user_id")
subject := "You have a new Galaxy game invite"
body := fmt.Sprintf(
"You have been invited to a Galaxy game.\n\nGame: %s\nInviter: %s\n\nOpen the Galaxy client to accept or decline.\n",
gameID, inviter,
)
return subject, body, nil
},
"lobby.application.approved": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
subject := "Your Galaxy application was approved"
body := fmt.Sprintf(
"Your application to join the Galaxy game %s has been approved. The game owner will start the match when ready.\n",
gameID,
)
return subject, body, nil
},
"lobby.application.rejected": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
subject := "Your Galaxy application was rejected"
body := fmt.Sprintf(
"Your application to join the Galaxy game %s has been rejected. You can apply to other public games from the lobby.\n",
gameID,
)
return subject, body, nil
},
"lobby.membership.removed": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
reason := payloadString(payload, "reason")
subject := "You were removed from a Galaxy game"
body := fmt.Sprintf(
"Your membership in the Galaxy game %s has been removed.\n\nReason: %s\n",
gameID, fallbackString(reason, "no reason provided"),
)
return subject, body, nil
},
"lobby.membership.blocked": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
subject := "You were blocked from a Galaxy game"
body := fmt.Sprintf(
"Your membership in the Galaxy game %s has been blocked. Please contact the game owner if this is unexpected.\n",
gameID,
)
return subject, body, nil
},
"lobby.race_name.pending": func(payload map[string]any) (string, string, error) {
raceName := payloadString(payload, "race_name")
expiresAt := payloadString(payload, "expires_at")
subject := "Your Galaxy race name is awaiting registration"
body := fmt.Sprintf(
"Congratulations — your Galaxy race name %q has reached pending registration. Confirm registration before %s to lock it permanently.\n",
raceName, fallbackString(expiresAt, "the listed deadline"),
)
return subject, body, nil
},
"runtime.image_pull_failed": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
imageRef := payloadString(payload, "image_ref")
subject := "Galaxy runtime: image pull failed"
body := fmt.Sprintf(
"Image pull failed while preparing engine container for game %s.\n\nimage_ref: %s\n\nReview the runtime operation log for details.\n",
gameID, fallbackString(imageRef, "unknown"),
)
return subject, body, nil
},
"runtime.container_start_failed": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
subject := "Galaxy runtime: container start failed"
body := fmt.Sprintf(
"Engine container start failed for game %s.\n\nReview the runtime operation log and Docker daemon logs for details.\n",
gameID,
)
return subject, body, nil
},
"runtime.start_config_invalid": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
reason := payloadString(payload, "reason")
subject := "Galaxy runtime: start config invalid"
body := fmt.Sprintf(
"Engine container start was rejected by configuration validation for game %s.\n\nReason: %s\n",
gameID, fallbackString(reason, "no reason provided"),
)
return subject, body, nil
},
}
// payloadString fetches a string field from a notification payload
// without panicking on missing or wrongly-typed entries; an empty
// string is the documented fallback.
func payloadString(payload map[string]any, key string) string {
v, _ := payload[key].(string)
return v
}
// fallbackString returns alt when value is empty.
func fallbackString(value, alt string) string {
if strings.TrimSpace(value) == "" {
return alt
}
return value
}
// renderLoginCode builds the English plain-text body used for the
// `auth.login_code` template. Localisation is deferred to a future
// stage (see `backend/README.md` and `backend/docs/`).
func renderLoginCode(code string, ttl time.Duration) (subject, body string) {
subject = fmt.Sprintf("Galaxy login code: %s", code)
minutes := int(ttl.Round(time.Minute) / time.Minute)
if minutes <= 0 {
minutes = 1
}
body = fmt.Sprintf(
"Your one-time Galaxy login code is %s.\n\nThe code expires in %d minutes. If you did not request it, you can ignore this email.\n",
code, minutes,
)
return subject, body
}
+147
View File
@@ -0,0 +1,147 @@
package mail
import (
"strings"
"testing"
"time"
)
func TestRenderLoginCode(t *testing.T) {
t.Parallel()
subject, body := renderLoginCode("123456", 10*time.Minute)
if !strings.Contains(subject, "123456") {
t.Fatalf("subject must include code, got %q", subject)
}
if !strings.Contains(body, "123456") {
t.Fatalf("body must include code, got %q", body)
}
if !strings.Contains(body, "10 minutes") {
t.Fatalf("body must include human-readable TTL, got %q", body)
}
}
func TestRenderLoginCode_RoundsTTL(t *testing.T) {
t.Parallel()
cases := map[string]struct {
ttl time.Duration
expect string
}{
"sub-minute": {ttl: 30 * time.Second, expect: "1 minutes"},
"exact": {ttl: 10 * time.Minute, expect: "10 minutes"},
"with secs": {ttl: 5*time.Minute + 29*time.Second, expect: "5 minutes"},
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
t.Parallel()
_, body := renderLoginCode("000000", tc.ttl)
if !strings.Contains(body, tc.expect) {
t.Fatalf("body missing %q for ttl=%s, got %q", tc.expect, tc.ttl, body)
}
})
}
}
func TestNormaliseRecipient(t *testing.T) {
t.Parallel()
cases := map[string]struct {
input string
want string
err bool
}{
"plain": {input: "alice@example.test", want: "alice@example.test"},
"trims": {input: " bob@example.test ", want: "bob@example.test"},
"display-stripped": {input: "Alice <alice@example.test>", want: "alice@example.test"},
"empty": {input: "", err: true},
"whitespace": {input: " ", err: true},
"malformed": {input: "not-an-email", err: true},
"with-spaces": {input: "ali ce@example.test", err: true},
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
t.Parallel()
got, err := normaliseRecipient(tc.input)
if tc.err {
if err == nil {
t.Fatalf("expected error, got %q", got)
}
return
}
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != tc.want {
t.Fatalf("got %q want %q", got, tc.want)
}
})
}
}
func TestTemplateRendererLoginCode(t *testing.T) {
t.Parallel()
render := templateRenderers[TemplateLoginCode]
if render == nil {
t.Fatal("TemplateLoginCode renderer must be registered")
}
subject, body, err := render(map[string]any{"code": "654321", "ttl": 7 * time.Minute})
if err != nil {
t.Fatalf("render: %v", err)
}
if !strings.Contains(subject, "654321") || !strings.Contains(body, "654321") {
t.Fatalf("subject=%q body=%q must mention code", subject, body)
}
if _, _, err := render(map[string]any{"ttl": 7 * time.Minute}); err == nil {
t.Fatal("missing code must error")
}
}
func TestNextBackoffMonotonicAndCapped(t *testing.T) {
t.Parallel()
// Sample many runs per attempt so jitter does not flake the
// invariant: median of attempt N is below median of attempt N+1
// up to the cap.
prev := time.Duration(0)
for n := 1; n <= 12; n++ {
var sum time.Duration
runs := 32
for range runs {
sum += nextBackoff(n)
}
avg := sum / time.Duration(runs)
if avg > backoffMax+backoffMax/4 { // generous upper bound
t.Fatalf("attempt %d avg %s exceeds capped budget", n, avg)
}
if avg < backoffBase/2 {
t.Fatalf("attempt %d avg %s below base/2", n, avg)
}
if n > 1 && avg < prev/2 {
t.Fatalf("backoff decreased dramatically between attempts %d and %d (%s vs %s)", n-1, n, prev, avg)
}
prev = avg
}
}
func TestIsPermanent(t *testing.T) {
t.Parallel()
if IsPermanent(nil) {
t.Fatal("nil must not be permanent")
}
transient := &SendError{Err: errSentinel("transient")}
if IsPermanent(transient) {
t.Fatal("default SendError must not be permanent")
}
permanent := &SendError{Err: errSentinel("permanent"), Permanent: true}
if !IsPermanent(permanent) {
t.Fatal("Permanent=true must report true")
}
}
// errSentinel is a tiny sentinel error helper used only in tests.
type errSentinel string
func (e errSentinel) Error() string { return string(e) }
+27
View File
@@ -0,0 +1,27 @@
package mail
import "errors"
// Sentinel errors emitted by Service methods. Handlers translate them
// into HTTP responses; tests match on them with errors.Is.
var (
// ErrDeliveryNotFound is returned by AdminGetDelivery and AdminResend
// when the supplied delivery_id does not name a row.
ErrDeliveryNotFound = errors.New("mail: delivery not found")
// ErrResendOnSent is returned by AdminResend when the targeted row
// is in the terminal `sent` state. The admin contract maps this to
// 409 Conflict; resending an already-delivered mail would push a
// duplicate copy to the recipient.
ErrResendOnSent = errors.New("mail: cannot resend a sent delivery")
// ErrUnknownTemplate is returned by EnqueueTemplate when the
// supplied template_id is not registered in the inline template
// catalog. A typo at the producer is the typical cause.
ErrUnknownTemplate = errors.New("mail: unknown template")
// ErrInvalidRecipient is returned by EnqueueLoginCode and
// EnqueueTemplate when the supplied recipient address is empty or
// fails go-mail's RFC 5322 validation.
ErrInvalidRecipient = errors.New("mail: invalid recipient address")
)
+94
View File
@@ -0,0 +1,94 @@
// Package mail implements the durable mail outbox documented in
// `backend/PLAN.md` §5.6 and `backend/README.md` §9. Producers call
// EnqueueLoginCode or EnqueueTemplate; the rows land in
// `backend.mail_deliveries` together with their payload and recipients.
// A single Worker goroutine claims due rows with
// `SELECT … FOR UPDATE SKIP LOCKED`, sends them through SMTP via the
// `wneessen/go-mail` library, records every attempt, and dead-letters
// rows that exceed the configured maximum.
//
// Until The implementation lands the notification module, the AdminNotifier
// dependency is satisfied by NewNoopAdminNotifier — same pattern auth
// uses for LoginCodeMailer and SessionInvalidator.
package mail
import (
"context"
"galaxy/backend/internal/config"
"time"
"go.uber.org/zap"
)
// Service is the mail-domain entry point. It wires the persistence
// store, the SMTP sender, the admin-notification publisher used on
// dead-letter, the runtime configuration, and a structured logger.
type Service struct {
deps Deps
}
// NewService constructs a Service from deps. A nil Now defaults to
// time.Now; a nil Logger defaults to zap.NewNop. Store and SMTP must be
// supplied — calling Service methods with either nil panics on first
// use, matching how the rest of `internal/*` signals missing wiring.
func NewService(deps Deps) *Service {
if deps.Now == nil {
deps.Now = time.Now
}
if deps.Logger == nil {
deps.Logger = zap.NewNop()
}
deps.Logger = deps.Logger.Named("mail")
if deps.Admin == nil {
deps.Admin = NewNoopAdminNotifier(deps.Logger)
}
return &Service{deps: deps}
}
// Backoff parameters for the worker retry schedule. The values match
// the trade-off documented in `backend/README.md` and `backend/docs/`: a 5
// second base, ×2 growth, capped at one hour, with ±25% jitter.
const (
backoffBase = 5 * time.Second
backoffFactor = 2.0
backoffMax = time.Hour
backoffJitter = 0.25
)
// Status values stored in `mail_deliveries.status`. Mirrored by the
// CHECK constraint added in migration 00001.
const (
StatusPending = "pending"
StatusRetrying = "retrying"
StatusSent = "sent"
StatusDeadLettered = "dead_lettered"
)
// Outcome values stored in `mail_attempts.outcome`. Mirrored by the
// CHECK constraint added in migration 00001.
const (
OutcomeSuccess = "success"
OutcomeTransientError = "transient_error"
OutcomePermanentError = "permanent_error"
)
// Recipient kinds stored in `mail_recipients.kind`. The 5.6
// implementation only emits 'to'; cc/bcc/reply_to remain available
// for future producers.
const (
RecipientKindTo = "to"
)
// Config returns the runtime mail configuration. Worker uses it to
// schedule the scan loop and bound retries.
func (s *Service) Config() config.MailConfig {
return s.deps.Config
}
// Stats returns the live count of `mail_deliveries` rows grouped by
// status. The metricsapi server reads this through the Service so
// `mail_outbox_depth{state}` (README §15) does not require the worker
// to publish gauges from inside its hot path.
func (s *Service) Stats(ctx context.Context) (map[string]int64, error) {
return s.deps.Store.CountByStatus(ctx)
}
+131
View File
@@ -0,0 +1,131 @@
package mail
import (
"context"
"errors"
"fmt"
"galaxy/backend/internal/config"
gomail "github.com/wneessen/go-mail"
"go.uber.org/zap"
)
// SMTPClient is the abstraction surface over `wneessen/go-mail` so
// tests can stub the wire layer without dialling. Production wires
// realSMTPClient.
type SMTPClient interface {
DialAndSendWithContext(ctx context.Context, msg *gomail.Msg) error
}
// realSMTPClient adapts *gomail.Client to SMTPClient. The variadic
// nature of DialAndSendWithContext is hidden because the worker only
// ever sends one message per call.
type realSMTPClient struct {
inner *gomail.Client
}
func (c *realSMTPClient) DialAndSendWithContext(ctx context.Context, msg *gomail.Msg) error {
return c.inner.DialAndSendWithContext(ctx, msg)
}
// smtpSender implements SMTPSender on top of an SMTPClient. The
// `from` address is captured at construction time from
// `BACKEND_SMTP_FROM`.
type smtpSender struct {
client SMTPClient
from string
logger *zap.Logger
}
// NewSMTPSender constructs the production sender bound to the SMTP
// relay configured in cfg. The TLS-mode mapping is:
//
// - "none" → plain TCP, no TLS;
// - "starttls" → STARTTLS required (TLSMandatory);
// - "tls" → implicit TLS at the configured port (WithSSL).
//
// PLAIN authentication is enabled when both Username and Password are
// non-empty.
func NewSMTPSender(cfg config.SMTPConfig, logger *zap.Logger) (SMTPSender, error) {
if logger == nil {
logger = zap.NewNop()
}
logger = logger.Named("mail.smtp")
opts := []gomail.Option{gomail.WithPort(cfg.Port)}
switch cfg.TLSMode {
case "none":
opts = append(opts, gomail.WithTLSPolicy(gomail.NoTLS))
case "starttls":
opts = append(opts, gomail.WithTLSPolicy(gomail.TLSMandatory))
case "tls":
opts = append(opts, gomail.WithSSL())
default:
return nil, fmt.Errorf("mail: unsupported SMTP TLS mode %q", cfg.TLSMode)
}
if cfg.Username != "" && cfg.Password != "" {
opts = append(opts,
gomail.WithSMTPAuth(gomail.SMTPAuthPlain),
gomail.WithUsername(cfg.Username),
gomail.WithPassword(cfg.Password),
)
}
cli, err := gomail.NewClient(cfg.Host, opts...)
if err != nil {
return nil, fmt.Errorf("mail: build smtp client: %w", err)
}
return &smtpSender{
client: &realSMTPClient{inner: cli},
from: cfg.From,
logger: logger,
}, nil
}
// Send renders the OutboundMessage as a *gomail.Msg and dispatches it
// through the SMTP client. Address validation is intentional: a
// malformed To here means the producer slipped past
// normaliseRecipient, which is a programming error and gets wrapped
// as Permanent so the worker dead-letters immediately.
func (s *smtpSender) Send(ctx context.Context, msg OutboundMessage) error {
if len(msg.To) == 0 {
return &SendError{Err: errors.New("mail: outbound message has no recipients"), Permanent: true}
}
m := gomail.NewMsg()
if err := m.From(s.from); err != nil {
return &SendError{Err: fmt.Errorf("set FROM: %w", err), Permanent: true}
}
for _, addr := range msg.To {
if err := m.AddTo(addr); err != nil {
return &SendError{Err: fmt.Errorf("add TO %q: %w", addr, err), Permanent: true}
}
}
m.Subject(msg.Subject)
contentType := gomail.ContentType(msg.ContentType)
if msg.ContentType == "" {
contentType = gomail.TypeTextPlain
}
m.SetBodyString(contentType, string(msg.Body))
if err := s.client.DialAndSendWithContext(ctx, m); err != nil {
permanent := classifySMTPError(err)
return &SendError{Err: err, Permanent: permanent}
}
return nil
}
// classifySMTPError decides whether err is permanent. A *gomail.SendError
// reports its permanence through IsTemp; everything else (dial
// failures, context errors, generic I/O) is treated as transient so the
// worker retries until MaxAttempts.
func classifySMTPError(err error) bool {
if err == nil {
return false
}
var sendErr *gomail.SendError
if errors.As(err, &sendErr) && sendErr != nil {
return !sendErr.IsTemp()
}
return false
}
+665
View File
@@ -0,0 +1,665 @@
package mail
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"galaxy/backend/internal/postgres/jet/backend/model"
"galaxy/backend/internal/postgres/jet/backend/table"
"github.com/go-jet/jet/v2/postgres"
"github.com/go-jet/jet/v2/qrm"
"github.com/google/uuid"
)
// Store is the Postgres-backed query surface for the mail outbox
// (`mail_deliveries`, `mail_recipients`, `mail_attempts`,
// `mail_dead_letters`, `mail_payloads`). All queries are built through
// go-jet against the generated table bindings under
// `backend/internal/postgres/jet/backend/table`.
type Store struct {
db *sql.DB
}
// NewStore constructs a Store wrapping db.
func NewStore(db *sql.DB) *Store {
return &Store{db: db}
}
// Delivery mirrors a row in `backend.mail_deliveries`. Tests and
// admin endpoints work against this struct directly.
type Delivery struct {
DeliveryID uuid.UUID
TemplateID string
IdempotencyKey string
Status string
Attempts int32
NextAttemptAt *time.Time
PayloadID uuid.UUID
LastError string
CreatedAt time.Time
UpdatedAt time.Time
SentAt *time.Time
DeadLetteredAt *time.Time
}
// Attempt mirrors a row in `backend.mail_attempts`.
type Attempt struct {
AttemptID uuid.UUID
DeliveryID uuid.UUID
AttemptNo int32
StartedAt time.Time
FinishedAt *time.Time
Outcome string
Error string
}
// DeadLetter mirrors a row in `backend.mail_dead_letters`.
type DeadLetter struct {
DeadLetterID uuid.UUID
DeliveryID uuid.UUID
ArchivedAt time.Time
Reason string
}
// Payload mirrors a row in `backend.mail_payloads`. Body is the raw
// rendered bytes; Subject is nullable in the schema and is therefore a
// pointer here.
type Payload struct {
PayloadID uuid.UUID
ContentType string
Subject *string
Body []byte
CreatedAt time.Time
}
// Recipient mirrors a row in `backend.mail_recipients`.
type Recipient struct {
RecipientID uuid.UUID
DeliveryID uuid.UUID
Address string
Kind string
}
// EnqueueArgs aggregates the inputs to InsertEnqueue. Constructing the
// struct by name keeps the call site readable when the Service grows
// new optional fields (locale, headers, etc.).
type EnqueueArgs struct {
DeliveryID uuid.UUID
TemplateID string
IdempotencyKey string
Recipients []string
ContentType string
Subject string
Body []byte
}
// deliveryColumns lists the projection used by every read of
// `mail_deliveries`. The order matches model.MailDeliveries field
// layout for direct QRM scanning.
func deliveryColumns() postgres.ColumnList {
d := table.MailDeliveries
return postgres.ColumnList{
d.DeliveryID, d.TemplateID, d.IdempotencyKey, d.Status,
d.Attempts, d.NextAttemptAt, d.PayloadID, d.LastError,
d.CreatedAt, d.UpdatedAt, d.SentAt, d.DeadLetteredAt,
}
}
// InsertEnqueue persists a fresh delivery row together with its payload
// and recipients in a single transaction. The (template_id,
// idempotency_key) UNIQUE constraint handles duplicate enqueue: when
// the conflict triggers, the transaction rolls back the payload insert
// (so we do not leak orphaned payloads) and reports `inserted=false`
// to the caller.
func (s *Store) InsertEnqueue(ctx context.Context, args EnqueueArgs) (bool, error) {
var inserted bool
err := withTx(ctx, s.db, func(tx *sql.Tx) error {
payloadID := uuid.New()
payloadStmt := table.MailPayloads.INSERT(
table.MailPayloads.PayloadID,
table.MailPayloads.ContentType,
table.MailPayloads.Subject,
table.MailPayloads.Body,
).VALUES(payloadID, args.ContentType, args.Subject, args.Body)
if _, err := payloadStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("insert payload: %w", err)
}
deliveryStmt := table.MailDeliveries.INSERT(
table.MailDeliveries.DeliveryID,
table.MailDeliveries.TemplateID,
table.MailDeliveries.IdempotencyKey,
table.MailDeliveries.Status,
table.MailDeliveries.NextAttemptAt,
table.MailDeliveries.PayloadID,
).VALUES(
args.DeliveryID, args.TemplateID, args.IdempotencyKey, StatusPending,
postgres.NOW(), payloadID,
).
ON_CONFLICT(table.MailDeliveries.TemplateID, table.MailDeliveries.IdempotencyKey).
DO_NOTHING().
RETURNING(table.MailDeliveries.DeliveryID)
var stored model.MailDeliveries
if err := deliveryStmt.QueryContext(ctx, tx, &stored); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
// Idempotent re-enqueue. Roll back the transaction so the
// orphan payload insert does not survive.
return errIdempotentNoop
}
return fmt.Errorf("insert delivery: %w", err)
}
for _, addr := range args.Recipients {
recipientStmt := table.MailRecipients.INSERT(
table.MailRecipients.RecipientID,
table.MailRecipients.DeliveryID,
table.MailRecipients.Address,
table.MailRecipients.Kind,
).VALUES(uuid.New(), args.DeliveryID, addr, RecipientKindTo)
if _, err := recipientStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("insert recipient %q: %w", addr, err)
}
}
inserted = true
return nil
})
if errors.Is(err, errIdempotentNoop) {
return false, nil
}
if err != nil {
return false, err
}
return inserted, nil
}
// errIdempotentNoop is an internal sentinel that tells withTx to roll
// back the transaction without surfacing an error to the caller. It
// must never escape this package — InsertEnqueue catches it on the
// way out.
var errIdempotentNoop = errors.New("mail store: idempotent noop")
// ClaimDue locks up to `limit` due rows with FOR UPDATE SKIP LOCKED
// and returns them with their full payload and recipient set. The
// supplied tx must be the worker's per-row transaction; the caller
// completes the work and commits. exclude is the list of delivery_ids
// already handled in the current tick — they are filtered out so a
// row whose retry lands at next_attempt_at <= now() is not re-claimed
// inside the same tick loop.
func (s *Store) ClaimDue(ctx context.Context, tx *sql.Tx, limit int, exclude ...uuid.UUID) ([]ClaimedDelivery, error) {
d := table.MailDeliveries
condition := d.Status.IN(postgres.String(StatusPending), postgres.String(StatusRetrying)).
AND(d.NextAttemptAt.IS_NULL().OR(d.NextAttemptAt.LT_EQ(postgres.NOW())))
if len(exclude) > 0 {
excludeExprs := make([]postgres.Expression, 0, len(exclude))
for _, id := range exclude {
excludeExprs = append(excludeExprs, postgres.UUID(id))
}
condition = condition.AND(d.DeliveryID.NOT_IN(excludeExprs...))
}
stmt := postgres.SELECT(deliveryColumns()).
FROM(d).
WHERE(condition).
ORDER_BY(postgres.COALESCE(d.NextAttemptAt, d.CreatedAt).ASC()).
LIMIT(int64(limit)).
FOR(postgres.UPDATE().SKIP_LOCKED())
var rows []model.MailDeliveries
if err := stmt.QueryContext(ctx, tx, &rows); err != nil {
return nil, fmt.Errorf("claim due: %w", err)
}
claimed := make([]ClaimedDelivery, 0, len(rows))
for _, row := range rows {
delivery := modelToDelivery(row)
payload, err := s.loadPayloadTx(ctx, tx, delivery.PayloadID)
if err != nil {
return nil, err
}
recipients, err := s.listRecipientsTx(ctx, tx, delivery.DeliveryID)
if err != nil {
return nil, err
}
claimed = append(claimed, ClaimedDelivery{
Delivery: delivery,
Payload: payload,
Recipients: recipients,
})
}
return claimed, nil
}
// ClaimedDelivery bundles a locked delivery row with its payload and
// recipients so the worker has everything it needs in one structure.
type ClaimedDelivery struct {
Delivery Delivery
Payload Payload
Recipients []Recipient
}
// RecordAttempt inserts a row into `mail_attempts` for the given
// delivery. attempt_no is derived from MAX(attempt_no) + 1 within the
// transaction, which keeps the column monotonic across resend cycles
// — the delivery's wire-visible `attempts` field counts only the
// current cycle (and resets on resend), while `mail_attempts` stays
// append-only forensic history.
func (s *Store) RecordAttempt(ctx context.Context, tx *sql.Tx, deliveryID uuid.UUID, startedAt time.Time, finishedAt time.Time, outcome string, errMsg string) (int32, error) {
a := table.MailAttempts
// Read the current max attempt_no for this delivery first; the
// surrounding worker transaction guarantees no concurrent inserts on
// the same delivery_id, so a simple read-then-write is sufficient
// (and avoids the awkward correlated subquery inside INSERT...VALUES
// that jet does not parenthesise).
maxStmt := postgres.SELECT(postgres.MAXi(a.AttemptNo).AS("max")).
FROM(a).
WHERE(a.DeliveryID.EQ(postgres.UUID(deliveryID)))
var maxRow struct {
Max *int32 `alias:"max"`
}
if err := maxStmt.QueryContext(ctx, tx, &maxRow); err != nil {
return 0, fmt.Errorf("record attempt: read max attempt_no: %w", err)
}
nextNo := int32(1)
if maxRow.Max != nil {
nextNo = *maxRow.Max + 1
}
insertStmt := a.INSERT(
a.AttemptID, a.DeliveryID, a.AttemptNo,
a.StartedAt, a.FinishedAt, a.Outcome, a.Error,
).VALUES(
uuid.New(), deliveryID, nextNo,
startedAt, finishedAt, outcome, errMsg,
).RETURNING(a.AttemptNo)
var inserted model.MailAttempts
if err := insertStmt.QueryContext(ctx, tx, &inserted); err != nil {
return 0, fmt.Errorf("record attempt: %w", err)
}
return inserted.AttemptNo, nil
}
// MarkSent flips the delivery to status='sent' and stamps sent_at.
func (s *Store) MarkSent(ctx context.Context, tx *sql.Tx, deliveryID uuid.UUID, at time.Time) error {
d := table.MailDeliveries
stmt := d.UPDATE().
SET(
d.Status.SET(postgres.String(StatusSent)),
d.Attempts.SET(d.Attempts.ADD(postgres.Int(1))),
d.SentAt.SET(postgres.TimestampzT(at)),
d.UpdatedAt.SET(postgres.TimestampzT(at)),
d.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
d.LastError.SET(postgres.String("")),
).
WHERE(d.DeliveryID.EQ(postgres.UUID(deliveryID)))
if _, err := stmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("mark sent: %w", err)
}
return nil
}
// ScheduleRetry flips the delivery to status='retrying', bumps
// attempts, and arms next_attempt_at.
func (s *Store) ScheduleRetry(ctx context.Context, tx *sql.Tx, deliveryID uuid.UUID, at time.Time, nextAt time.Time, errMsg string) error {
d := table.MailDeliveries
stmt := d.UPDATE().
SET(
d.Status.SET(postgres.String(StatusRetrying)),
d.Attempts.SET(d.Attempts.ADD(postgres.Int(1))),
d.NextAttemptAt.SET(postgres.TimestampzT(nextAt)),
d.UpdatedAt.SET(postgres.TimestampzT(at)),
d.LastError.SET(postgres.String(errMsg)),
).
WHERE(d.DeliveryID.EQ(postgres.UUID(deliveryID)))
if _, err := stmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("schedule retry: %w", err)
}
return nil
}
// MarkDeadLettered moves the delivery to the terminal `dead_lettered`
// state and inserts the matching row into `mail_dead_letters` under
// the same transaction.
func (s *Store) MarkDeadLettered(ctx context.Context, tx *sql.Tx, deliveryID uuid.UUID, at time.Time, reason string) error {
d := table.MailDeliveries
updateStmt := d.UPDATE().
SET(
d.Status.SET(postgres.String(StatusDeadLettered)),
d.Attempts.SET(d.Attempts.ADD(postgres.Int(1))),
d.DeadLetteredAt.SET(postgres.TimestampzT(at)),
d.UpdatedAt.SET(postgres.TimestampzT(at)),
d.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
d.LastError.SET(postgres.String(reason)),
).
WHERE(d.DeliveryID.EQ(postgres.UUID(deliveryID)))
if _, err := updateStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("mark dead-lettered: %w", err)
}
dl := table.MailDeadLetters
insertStmt := dl.INSERT(
dl.DeadLetterID, dl.DeliveryID, dl.ArchivedAt, dl.Reason,
).VALUES(uuid.New(), deliveryID, at, reason)
if _, err := insertStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("insert dead-letter: %w", err)
}
return nil
}
// CountByStatus returns a map keyed by the four status values so the
// worker can publish `mail_outbox_depth{state}` without scanning the
// whole table per metric tick.
func (s *Store) CountByStatus(ctx context.Context) (map[string]int64, error) {
d := table.MailDeliveries
stmt := postgres.SELECT(
d.Status,
postgres.COUNT(postgres.STAR).AS("count"),
).FROM(d).GROUP_BY(d.Status)
var rows []struct {
MailDeliveries model.MailDeliveries
Count int64 `alias:"count"`
}
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return nil, fmt.Errorf("count by status: %w", err)
}
out := map[string]int64{
StatusPending: 0,
StatusRetrying: 0,
StatusSent: 0,
StatusDeadLettered: 0,
}
for _, row := range rows {
out[row.MailDeliveries.Status] = row.Count
}
return out, nil
}
// GetDelivery loads a single row by primary key. ErrDeliveryNotFound
// is returned when no row matches.
func (s *Store) GetDelivery(ctx context.Context, deliveryID uuid.UUID) (Delivery, error) {
stmt := postgres.SELECT(deliveryColumns()).
FROM(table.MailDeliveries).
WHERE(table.MailDeliveries.DeliveryID.EQ(postgres.UUID(deliveryID))).
LIMIT(1)
var row model.MailDeliveries
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return Delivery{}, ErrDeliveryNotFound
}
return Delivery{}, fmt.Errorf("get delivery: %w", err)
}
return modelToDelivery(row), nil
}
// ListDeliveries returns the deliveries page in newest-first order
// together with the total row count.
func (s *Store) ListDeliveries(ctx context.Context, offset, limit int) ([]Delivery, int64, error) {
total, err := countAll(ctx, s.db, table.MailDeliveries)
if err != nil {
return nil, 0, fmt.Errorf("count deliveries: %w", err)
}
d := table.MailDeliveries
stmt := postgres.SELECT(deliveryColumns()).
FROM(d).
ORDER_BY(d.CreatedAt.DESC(), d.DeliveryID.DESC()).
LIMIT(int64(limit)).OFFSET(int64(offset))
var rows []model.MailDeliveries
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return nil, 0, fmt.Errorf("list deliveries: %w", err)
}
out := make([]Delivery, 0, len(rows))
for _, row := range rows {
out = append(out, modelToDelivery(row))
}
return out, total, nil
}
// ListAttempts returns every attempt for the given delivery, ordered
// by attempt_no.
func (s *Store) ListAttempts(ctx context.Context, deliveryID uuid.UUID) ([]Attempt, error) {
a := table.MailAttempts
stmt := postgres.SELECT(
a.AttemptID, a.DeliveryID, a.AttemptNo,
a.StartedAt, a.FinishedAt, a.Outcome, a.Error,
).
FROM(a).
WHERE(a.DeliveryID.EQ(postgres.UUID(deliveryID))).
ORDER_BY(a.AttemptNo.ASC())
var rows []model.MailAttempts
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return nil, fmt.Errorf("list attempts: %w", err)
}
out := make([]Attempt, 0, len(rows))
for _, row := range rows {
out = append(out, modelToAttempt(row))
}
return out, nil
}
// ListDeadLetters returns the dead-letter page newest-first.
func (s *Store) ListDeadLetters(ctx context.Context, offset, limit int) ([]DeadLetter, int64, error) {
total, err := countAll(ctx, s.db, table.MailDeadLetters)
if err != nil {
return nil, 0, fmt.Errorf("count dead-letters: %w", err)
}
dl := table.MailDeadLetters
stmt := postgres.SELECT(
dl.DeadLetterID, dl.DeliveryID, dl.ArchivedAt, dl.Reason,
).
FROM(dl).
ORDER_BY(dl.ArchivedAt.DESC(), dl.DeadLetterID.DESC()).
LIMIT(int64(limit)).OFFSET(int64(offset))
var rows []model.MailDeadLetters
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return nil, 0, fmt.Errorf("list dead-letters: %w", err)
}
out := make([]DeadLetter, 0, len(rows))
for _, row := range rows {
out = append(out, DeadLetter{
DeadLetterID: row.DeadLetterID,
DeliveryID: row.DeliveryID,
ArchivedAt: row.ArchivedAt,
Reason: row.Reason,
})
}
return out, total, nil
}
// ResendNonSent re-arms the delivery for another attempt cycle. The
// `status <> 'sent'` clause makes it the storage-level guard that
// matches the contract: ErrResendOnSent is returned when the row is
// already terminal-sent. ErrDeliveryNotFound surfaces when no row
// matches.
func (s *Store) ResendNonSent(ctx context.Context, deliveryID uuid.UUID, at time.Time) (Delivery, error) {
var d Delivery
err := withTx(ctx, s.db, func(tx *sql.Tx) error {
md := table.MailDeliveries
lockStmt := postgres.SELECT(md.Status).
FROM(md).
WHERE(md.DeliveryID.EQ(postgres.UUID(deliveryID))).
FOR(postgres.UPDATE())
var locked model.MailDeliveries
if err := lockStmt.QueryContext(ctx, tx, &locked); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return ErrDeliveryNotFound
}
return fmt.Errorf("lock delivery: %w", err)
}
if locked.Status == StatusSent {
return ErrResendOnSent
}
updateStmt := md.UPDATE().
SET(
md.Status.SET(postgres.String(StatusPending)),
md.Attempts.SET(postgres.Int(0)),
md.NextAttemptAt.SET(postgres.TimestampzT(at)),
md.DeadLetteredAt.SET(postgres.TimestampzExp(postgres.NULL)),
md.LastError.SET(postgres.String("")),
md.UpdatedAt.SET(postgres.TimestampzT(at)),
).
WHERE(md.DeliveryID.EQ(postgres.UUID(deliveryID)))
if _, err := updateStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("re-arm delivery: %w", err)
}
reloadStmt := postgres.SELECT(deliveryColumns()).
FROM(md).
WHERE(md.DeliveryID.EQ(postgres.UUID(deliveryID))).
LIMIT(1)
var refreshed model.MailDeliveries
if err := reloadStmt.QueryContext(ctx, tx, &refreshed); err != nil {
return fmt.Errorf("reload delivery: %w", err)
}
d = modelToDelivery(refreshed)
return nil
})
if err != nil {
return Delivery{}, err
}
return d, nil
}
func (s *Store) loadPayloadTx(ctx context.Context, tx *sql.Tx, payloadID uuid.UUID) (Payload, error) {
p := table.MailPayloads
stmt := postgres.SELECT(
p.PayloadID, p.ContentType, p.Subject, p.Body, p.CreatedAt,
).FROM(p).
WHERE(p.PayloadID.EQ(postgres.UUID(payloadID))).
LIMIT(1)
var row model.MailPayloads
if err := stmt.QueryContext(ctx, tx, &row); err != nil {
return Payload{}, fmt.Errorf("load payload: %w", err)
}
return Payload{
PayloadID: row.PayloadID,
ContentType: row.ContentType,
Subject: row.Subject,
Body: row.Body,
CreatedAt: row.CreatedAt,
}, nil
}
func (s *Store) listRecipientsTx(ctx context.Context, tx *sql.Tx, deliveryID uuid.UUID) ([]Recipient, error) {
r := table.MailRecipients
stmt := postgres.SELECT(
r.RecipientID, r.DeliveryID, r.Address, r.Kind,
).FROM(r).
WHERE(r.DeliveryID.EQ(postgres.UUID(deliveryID))).
ORDER_BY(r.RecipientID.ASC())
var rows []model.MailRecipients
if err := stmt.QueryContext(ctx, tx, &rows); err != nil {
return nil, fmt.Errorf("list recipients: %w", err)
}
out := make([]Recipient, 0, len(rows))
for _, row := range rows {
out = append(out, Recipient{
RecipientID: row.RecipientID,
DeliveryID: row.DeliveryID,
Address: row.Address,
Kind: row.Kind,
})
}
return out, nil
}
// withTx wraps fn in a Postgres transaction. fn's return value
// determines commit (nil) vs rollback (non-nil). Rollback errors are
// swallowed when fn already returned an error, since the latter is
// more actionable.
func withTx(ctx context.Context, db *sql.DB, fn func(tx *sql.Tx) error) error {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("mail store: begin tx: %w", err)
}
if err := fn(tx); err != nil {
_ = tx.Rollback()
return err
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("mail store: commit tx: %w", err)
}
return nil
}
// BeginTx exposes the package-level transaction helper to the worker
// so it can scope ClaimDue + RecordAttempt + Mark* under a single
// commit boundary.
func (s *Store) BeginTx(ctx context.Context) (*sql.Tx, error) {
return s.db.BeginTx(ctx, nil)
}
// modelToDelivery projects a generated model row onto the public
// Delivery struct. Pointer fields are copied so callers cannot mutate
// the underlying scan buffer.
func modelToDelivery(row model.MailDeliveries) Delivery {
d := Delivery{
DeliveryID: row.DeliveryID,
TemplateID: row.TemplateID,
IdempotencyKey: row.IdempotencyKey,
Status: row.Status,
Attempts: row.Attempts,
PayloadID: row.PayloadID,
LastError: row.LastError,
CreatedAt: row.CreatedAt,
UpdatedAt: row.UpdatedAt,
}
if row.NextAttemptAt != nil {
t := *row.NextAttemptAt
d.NextAttemptAt = &t
}
if row.SentAt != nil {
t := *row.SentAt
d.SentAt = &t
}
if row.DeadLetteredAt != nil {
t := *row.DeadLetteredAt
d.DeadLetteredAt = &t
}
return d
}
// modelToAttempt projects a generated model row onto the public Attempt
// struct.
func modelToAttempt(row model.MailAttempts) Attempt {
a := Attempt{
AttemptID: row.AttemptID,
DeliveryID: row.DeliveryID,
AttemptNo: row.AttemptNo,
StartedAt: row.StartedAt,
Outcome: row.Outcome,
Error: row.Error,
}
if row.FinishedAt != nil {
t := *row.FinishedAt
a.FinishedAt = &t
}
return a
}
// countAll runs `SELECT COUNT(*) FROM <tbl>` through jet and returns
// the result as int64. The destination uses an alias-tagged scalar so
// QRM can map the un-prefixed alias produced by AS("count").
func countAll(ctx context.Context, db qrm.DB, tbl postgres.ReadableTable) (int64, error) {
stmt := postgres.SELECT(postgres.COUNT(postgres.STAR).AS("count")).FROM(tbl)
var dest struct {
Count int64 `alias:"count"`
}
if err := stmt.QueryContext(ctx, db, &dest); err != nil {
return 0, err
}
return dest.Count, nil
}
+350
View File
@@ -0,0 +1,350 @@
package mail_test
import (
"context"
"database/sql"
"errors"
"net/url"
"testing"
"time"
"galaxy/backend/internal/mail"
backendpg "galaxy/backend/internal/postgres"
pgshared "galaxy/postgres"
"github.com/google/uuid"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
)
const (
pgImage = "postgres:16-alpine"
pgUser = "galaxy"
pgPassword = "galaxy"
pgDatabase = "galaxy_backend"
pgSchema = "backend"
pgStartup = 90 * time.Second
pgOpTO = 10 * time.Second
)
// startPostgres mirrors the auth_e2e_test scaffolding: spin up
// Postgres, apply migrations, return *sql.DB.
func startPostgres(t *testing.T) *sql.DB {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
t.Cleanup(cancel)
pgContainer, err := tcpostgres.Run(ctx, pgImage,
tcpostgres.WithDatabase(pgDatabase),
tcpostgres.WithUsername(pgUser),
tcpostgres.WithPassword(pgPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(pgStartup),
),
)
if err != nil {
t.Skipf("postgres testcontainer unavailable, skipping: %v", err)
}
t.Cleanup(func() {
if termErr := testcontainers.TerminateContainer(pgContainer); termErr != nil {
t.Errorf("terminate postgres container: %v", termErr)
}
})
baseDSN, err := pgContainer.ConnectionString(ctx, "sslmode=disable")
if err != nil {
t.Fatalf("connection string: %v", err)
}
scopedDSN, err := dsnWithSearchPath(baseDSN, pgSchema)
if err != nil {
t.Fatalf("scope dsn: %v", err)
}
cfg := pgshared.DefaultConfig()
cfg.PrimaryDSN = scopedDSN
cfg.OperationTimeout = pgOpTO
db, err := pgshared.OpenPrimary(ctx, cfg)
if err != nil {
t.Fatalf("open primary: %v", err)
}
t.Cleanup(func() { _ = db.Close() })
if err := pgshared.Ping(ctx, db, cfg.OperationTimeout); err != nil {
t.Fatalf("ping: %v", err)
}
if err := backendpg.ApplyMigrations(ctx, db); err != nil {
t.Fatalf("apply migrations: %v", err)
}
return db
}
func dsnWithSearchPath(baseDSN, schema string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := parsed.Query()
values.Set("search_path", schema)
if values.Get("sslmode") == "" {
values.Set("sslmode", "disable")
}
parsed.RawQuery = values.Encode()
return parsed.String(), nil
}
func TestStoreInsertEnqueueRoundTrip(t *testing.T) {
t.Parallel()
db := startPostgres(t)
store := mail.NewStore(db)
ctx := context.Background()
args := mail.EnqueueArgs{
DeliveryID: uuid.New(),
TemplateID: mail.TemplateLoginCode,
IdempotencyKey: uuid.NewString(),
Recipients: []string{"alice@example.test"},
ContentType: "text/plain",
Subject: "hello",
Body: []byte("hi"),
}
inserted, err := store.InsertEnqueue(ctx, args)
if err != nil {
t.Fatalf("insert: %v", err)
}
if !inserted {
t.Fatal("first insert must report inserted=true")
}
// Same idempotency key must dedupe.
args2 := args
args2.DeliveryID = uuid.New()
inserted2, err := store.InsertEnqueue(ctx, args2)
if err != nil {
t.Fatalf("insert retry: %v", err)
}
if inserted2 {
t.Fatal("re-enqueue with same key must report inserted=false")
}
d, err := store.GetDelivery(ctx, args.DeliveryID)
if err != nil {
t.Fatalf("get delivery: %v", err)
}
if d.Status != mail.StatusPending {
t.Fatalf("status=%q want pending", d.Status)
}
if d.NextAttemptAt == nil {
t.Fatal("next_attempt_at must be set on insert")
}
}
func TestStoreClaimDueAndMarkSent(t *testing.T) {
t.Parallel()
db := startPostgres(t)
store := mail.NewStore(db)
ctx := context.Background()
deliveryID := uuid.New()
if _, err := store.InsertEnqueue(ctx, mail.EnqueueArgs{
DeliveryID: deliveryID,
TemplateID: mail.TemplateLoginCode,
IdempotencyKey: uuid.NewString(),
Recipients: []string{"bob@example.test"},
ContentType: "text/plain",
Subject: "hello",
Body: []byte("hi"),
}); err != nil {
t.Fatalf("insert: %v", err)
}
tx, err := store.BeginTx(ctx)
if err != nil {
t.Fatalf("begin: %v", err)
}
t.Cleanup(func() { _ = tx.Rollback() })
claimed, err := store.ClaimDue(ctx, tx, 5)
if err != nil {
t.Fatalf("claim: %v", err)
}
if len(claimed) != 1 {
t.Fatalf("got %d claimed, want 1", len(claimed))
}
if claimed[0].Delivery.DeliveryID != deliveryID {
t.Fatalf("claimed wrong delivery: %s", claimed[0].Delivery.DeliveryID)
}
if string(claimed[0].Payload.Body) != "hi" {
t.Fatalf("payload body lost in round trip: %q", claimed[0].Payload.Body)
}
if len(claimed[0].Recipients) != 1 || claimed[0].Recipients[0].Address != "bob@example.test" {
t.Fatalf("recipient lost: %+v", claimed[0].Recipients)
}
now := time.Now().UTC()
if _, err := store.RecordAttempt(ctx, tx, deliveryID, now, now, mail.OutcomeSuccess, ""); err != nil {
t.Fatalf("record attempt: %v", err)
}
if err := store.MarkSent(ctx, tx, deliveryID, now); err != nil {
t.Fatalf("mark sent: %v", err)
}
if err := tx.Commit(); err != nil {
t.Fatalf("commit: %v", err)
}
d, err := store.GetDelivery(ctx, deliveryID)
if err != nil {
t.Fatalf("get delivery: %v", err)
}
if d.Status != mail.StatusSent {
t.Fatalf("status=%q want sent", d.Status)
}
if d.SentAt == nil {
t.Fatal("sent_at must be set after MarkSent")
}
if d.Attempts != 1 {
t.Fatalf("attempts=%d want 1", d.Attempts)
}
attempts, err := store.ListAttempts(ctx, deliveryID)
if err != nil {
t.Fatalf("list attempts: %v", err)
}
if len(attempts) != 1 || attempts[0].Outcome != mail.OutcomeSuccess {
t.Fatalf("attempts=%+v", attempts)
}
}
func TestStoreScheduleRetryThenDeadLetter(t *testing.T) {
t.Parallel()
db := startPostgres(t)
store := mail.NewStore(db)
ctx := context.Background()
deliveryID := uuid.New()
if _, err := store.InsertEnqueue(ctx, mail.EnqueueArgs{
DeliveryID: deliveryID,
TemplateID: "test.template",
IdempotencyKey: uuid.NewString(),
Recipients: []string{"carol@example.test"},
ContentType: "text/plain",
Subject: "hi",
Body: []byte("body"),
}); err != nil {
t.Fatalf("insert: %v", err)
}
tx, err := store.BeginTx(ctx)
if err != nil {
t.Fatalf("begin tx 1: %v", err)
}
if _, err := store.ClaimDue(ctx, tx, 1); err != nil {
t.Fatalf("claim 1: %v", err)
}
now := time.Now().UTC()
if _, err := store.RecordAttempt(ctx, tx, deliveryID, now, now, mail.OutcomeTransientError, "boom"); err != nil {
t.Fatalf("record attempt: %v", err)
}
if err := store.ScheduleRetry(ctx, tx, deliveryID, now, now.Add(2*time.Second), "boom"); err != nil {
t.Fatalf("schedule retry: %v", err)
}
if err := tx.Commit(); err != nil {
t.Fatalf("commit 1: %v", err)
}
d, err := store.GetDelivery(ctx, deliveryID)
if err != nil {
t.Fatalf("get delivery: %v", err)
}
if d.Status != mail.StatusRetrying {
t.Fatalf("status=%q want retrying", d.Status)
}
if d.LastError != "boom" {
t.Fatalf("last_error=%q want boom", d.LastError)
}
tx2, err := store.BeginTx(ctx)
if err != nil {
t.Fatalf("begin tx 2: %v", err)
}
if err := store.MarkDeadLettered(ctx, tx2, deliveryID, now, "max attempts"); err != nil {
t.Fatalf("mark dead-lettered: %v", err)
}
if err := tx2.Commit(); err != nil {
t.Fatalf("commit 2: %v", err)
}
d, err = store.GetDelivery(ctx, deliveryID)
if err != nil {
t.Fatalf("get delivery 2: %v", err)
}
if d.Status != mail.StatusDeadLettered {
t.Fatalf("status=%q want dead_lettered", d.Status)
}
if d.DeadLetteredAt == nil {
t.Fatal("dead_lettered_at must be set")
}
_, total, err := store.ListDeadLetters(ctx, 0, 25)
if err != nil {
t.Fatalf("list dead letters: %v", err)
}
if total != 1 {
t.Fatalf("dead-letter total=%d want 1", total)
}
}
func TestStoreResendNonSent(t *testing.T) {
t.Parallel()
db := startPostgres(t)
store := mail.NewStore(db)
ctx := context.Background()
deliveryID := uuid.New()
if _, err := store.InsertEnqueue(ctx, mail.EnqueueArgs{
DeliveryID: deliveryID,
TemplateID: "test.template",
IdempotencyKey: uuid.NewString(),
Recipients: []string{"d@example.test"},
ContentType: "text/plain",
Subject: "hi",
Body: []byte("b"),
}); err != nil {
t.Fatalf("insert: %v", err)
}
// re-arm pending row -> ok.
if _, err := store.ResendNonSent(ctx, deliveryID, time.Now().UTC()); err != nil {
t.Fatalf("resend pending: %v", err)
}
// flip to sent and verify resend now errors.
tx, err := store.BeginTx(ctx)
if err != nil {
t.Fatalf("begin: %v", err)
}
if _, err := store.ClaimDue(ctx, tx, 1); err != nil {
t.Fatalf("claim: %v", err)
}
now := time.Now().UTC()
if _, err := store.RecordAttempt(ctx, tx, deliveryID, now, now, mail.OutcomeSuccess, ""); err != nil {
t.Fatalf("record attempt: %v", err)
}
if err := store.MarkSent(ctx, tx, deliveryID, now); err != nil {
t.Fatalf("mark sent: %v", err)
}
if err := tx.Commit(); err != nil {
t.Fatalf("commit: %v", err)
}
if _, err := store.ResendNonSent(ctx, deliveryID, time.Now().UTC()); !errors.Is(err, mail.ErrResendOnSent) {
t.Fatalf("resend on sent: want ErrResendOnSent, got %v", err)
}
if _, err := store.ResendNonSent(ctx, uuid.New(), time.Now().UTC()); !errors.Is(err, mail.ErrDeliveryNotFound) {
t.Fatalf("resend on missing: want ErrDeliveryNotFound, got %v", err)
}
}
+230
View File
@@ -0,0 +1,230 @@
package mail
import (
"context"
"errors"
"math"
"math/rand/v2"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
)
// Worker drains the mail outbox: per tick it walks due rows under
// `SELECT … FOR UPDATE SKIP LOCKED`, dispatches each through the SMTP
// sender, and atomically updates the delivery + attempt rows.
// Implements `internal/app.Component`.
type Worker struct {
svc *Service
}
// NewWorker constructs a Worker bound to svc.
func NewWorker(svc *Service) *Worker { return &Worker{svc: svc} }
// claimBatchSize bounds how many rows the worker processes per tick.
// 16 keeps each tick under a second on a developer machine while
// leaving headroom for transient SMTP back-pressure.
const claimBatchSize = 16
// Run drives the scan loop until ctx is cancelled. The first tick is
// the startup-drain pass mandated by `PLAN.md` §5.6.
func (w *Worker) Run(ctx context.Context) error {
if w == nil {
return nil
}
logger := w.svc.deps.Logger.Named("worker")
if err := w.tick(ctx); err != nil && !errors.Is(err, context.Canceled) {
logger.Warn("initial mail outbox drain failed", zap.Error(err))
}
ticker := time.NewTicker(w.svc.deps.Config.WorkerInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return nil
case <-ticker.C:
if err := w.tick(ctx); err != nil && !errors.Is(err, context.Canceled) {
logger.Warn("mail outbox tick failed", zap.Error(err))
}
}
}
}
// Shutdown is a no-op: each per-row transaction is self-contained, so
// a cancelled ctx above is enough to stop the loop. Any row already
// inside a Send call finishes its commit (or rolls back on context
// cancel) before the worker returns.
func (w *Worker) Shutdown(_ context.Context) error { return nil }
// Tick is exposed for tests so they can drive the worker without
// timing dependencies.
func (w *Worker) Tick(ctx context.Context) error { return w.tick(ctx) }
// tick processes up to claimBatchSize rows. Each row is handled in its
// own transaction so a slow SMTP send only holds one row lock at a
// time. The loop exits as soon as a tick claims zero rows or ctx is
// cancelled. Rows already handled in this tick are tracked in the
// `seen` set and excluded from subsequent claims so a transient retry
// scheduled with next_attempt_at in the past does not chew through a
// delivery's MaxAttempts budget within a single tick.
func (w *Worker) tick(ctx context.Context) error {
seen := make([]uuid.UUID, 0, claimBatchSize)
for range claimBatchSize {
if ctx.Err() != nil {
return ctx.Err()
}
more, processed, err := w.processOne(ctx, seen)
if err != nil {
return err
}
if !more {
return nil
}
seen = append(seen, processed)
}
return nil
}
// processOne claims a single due row, dispatches it, and commits the
// state transition. Returns more=false when no row was due, so the
// caller can short-circuit the tick loop. The delivery_id of the
// processed row is returned so the tick loop can skip it on
// subsequent iterations.
func (w *Worker) processOne(ctx context.Context, exclude []uuid.UUID) (bool, uuid.UUID, error) {
tx, err := w.svc.deps.Store.BeginTx(ctx)
if err != nil {
return false, uuid.Nil, err
}
defer func() {
// Rollback is a no-op after Commit; this catches every error
// path inside the function.
_ = tx.Rollback()
}()
claimed, err := w.svc.deps.Store.ClaimDue(ctx, tx, 1, exclude...)
if err != nil {
return false, uuid.Nil, err
}
if len(claimed) == 0 {
return false, uuid.Nil, nil
}
c := claimed[0]
logger := w.svc.deps.Logger.Named("worker").With(
zap.String("delivery_id", c.Delivery.DeliveryID.String()),
zap.String("template_id", c.Delivery.TemplateID),
)
now := w.svc.deps.Now()
addresses := make([]string, 0, len(c.Recipients))
for _, r := range c.Recipients {
addresses = append(addresses, r.Address)
}
subject := ""
if c.Payload.Subject != nil {
subject = *c.Payload.Subject
}
out := OutboundMessage{
To: addresses,
Subject: subject,
ContentType: c.Payload.ContentType,
Body: c.Payload.Body,
}
sendErr := w.svc.deps.SMTP.Send(ctx, out)
finishedAt := w.svc.deps.Now()
cycleAttempt := c.Delivery.Attempts + 1
if sendErr == nil {
attemptNo, err := w.svc.deps.Store.RecordAttempt(ctx, tx, c.Delivery.DeliveryID, now, finishedAt, OutcomeSuccess, "")
if err != nil {
return false, uuid.Nil, err
}
if err := w.svc.deps.Store.MarkSent(ctx, tx, c.Delivery.DeliveryID, finishedAt); err != nil {
return false, uuid.Nil, err
}
logger.Info("mail delivery sent",
zap.Int32("cycle_attempt", cycleAttempt),
zap.Int32("attempt_no", attemptNo),
)
} else {
permanent := IsPermanent(sendErr)
outcome := OutcomeTransientError
if permanent {
outcome = OutcomePermanentError
}
attemptNo, err := w.svc.deps.Store.RecordAttempt(ctx, tx, c.Delivery.DeliveryID, now, finishedAt, outcome, sendErr.Error())
if err != nil {
return false, uuid.Nil, err
}
maxAttempts := int32(w.svc.deps.Config.MaxAttempts)
giveUp := permanent || cycleAttempt >= maxAttempts
if giveUp {
reason := sendErr.Error()
if permanent {
reason = "permanent: " + reason
}
if err := w.svc.deps.Store.MarkDeadLettered(ctx, tx, c.Delivery.DeliveryID, finishedAt, reason); err != nil {
return false, uuid.Nil, err
}
logger.Warn("mail delivery dead-lettered",
zap.Int32("cycle_attempt", cycleAttempt),
zap.Int32("attempt_no", attemptNo),
zap.Int32("max_attempts", maxAttempts),
zap.Bool("permanent", permanent),
zap.String("reason", reason),
)
} else {
nextAt := finishedAt.Add(nextBackoff(int(cycleAttempt)))
if err := w.svc.deps.Store.ScheduleRetry(ctx, tx, c.Delivery.DeliveryID, finishedAt, nextAt, sendErr.Error()); err != nil {
return false, uuid.Nil, err
}
logger.Info("mail delivery retry scheduled",
zap.Int32("cycle_attempt", cycleAttempt),
zap.Int32("attempt_no", attemptNo),
zap.Time("next_attempt_at", nextAt),
)
}
}
if err := tx.Commit(); err != nil {
return false, uuid.Nil, err
}
if sendErr != nil {
permanent := IsPermanent(sendErr)
giveUp := permanent || (c.Delivery.Attempts+1) >= int32(w.svc.deps.Config.MaxAttempts)
if giveUp {
w.svc.deps.Admin.OnDeadLetter(ctx, c.Delivery.DeliveryID, c.Delivery.TemplateID, sendErr.Error())
}
}
return true, c.Delivery.DeliveryID, nil
}
// nextBackoff returns the wait between attempt N (1-indexed) and the
// next try. The schedule grows exponentially up to backoffMax with a
// uniform ±backoffJitter shake to prevent retry storms.
func nextBackoff(attempt int) time.Duration {
if attempt < 1 {
attempt = 1
}
scaled := float64(backoffBase) * math.Pow(backoffFactor, float64(attempt-1))
if scaled > float64(backoffMax) {
scaled = float64(backoffMax)
}
// Symmetric jitter in [-backoffJitter, +backoffJitter].
jitter := (rand.Float64()*2 - 1) * backoffJitter
final := scaled * (1 + jitter)
if final < float64(backoffBase) {
final = float64(backoffBase)
}
return time.Duration(final)
}
// Compile-time check that Worker satisfies the lifecycle interface
// shape used elsewhere (Run + Shutdown).
var _ interface {
Run(context.Context) error
Shutdown(context.Context) error
} = (*Worker)(nil)
+247
View File
@@ -0,0 +1,247 @@
package mail_test
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/backend/internal/config"
"galaxy/backend/internal/mail"
"github.com/google/uuid"
"go.uber.org/zap/zaptest"
)
// recordingSender is a SMTPSender stub with programmable per-call
// behaviour. Tests append behaviours; each Send pops the head.
type recordingSender struct {
mu sync.Mutex
sent []mail.OutboundMessage
behaviour []func(mail.OutboundMessage) error
}
func newRecordingSender() *recordingSender { return &recordingSender{} }
func (r *recordingSender) Send(_ context.Context, msg mail.OutboundMessage) error {
r.mu.Lock()
defer r.mu.Unlock()
r.sent = append(r.sent, msg)
if len(r.behaviour) == 0 {
return nil
}
fn := r.behaviour[0]
r.behaviour = r.behaviour[1:]
return fn(msg)
}
func (r *recordingSender) snapshot() []mail.OutboundMessage {
r.mu.Lock()
defer r.mu.Unlock()
out := make([]mail.OutboundMessage, len(r.sent))
copy(out, r.sent)
return out
}
// recordingAdminNotifier captures every dead-letter notification call.
type recordingAdminNotifier struct {
mu sync.Mutex
calls int
}
func (r *recordingAdminNotifier) OnDeadLetter(_ context.Context, _ uuid.UUID, _, _ string) {
r.mu.Lock()
defer r.mu.Unlock()
r.calls++
}
func (r *recordingAdminNotifier) count() int {
r.mu.Lock()
defer r.mu.Unlock()
return r.calls
}
// buildService spins up a Service backed by a real Postgres testcontainer.
// The fake clock and configurable max-attempts let tests exercise the
// retry / dead-letter branches without real time.
func buildService(t *testing.T, sender mail.SMTPSender, admin mail.AdminNotifier, maxAttempts int, now func() time.Time) *mail.Service {
t.Helper()
db := startPostgres(t)
svc := mail.NewService(mail.Deps{
Store: mail.NewStore(db),
SMTP: sender,
Admin: admin,
Config: config.MailConfig{WorkerInterval: time.Hour, MaxAttempts: maxAttempts},
Now: now,
Logger: zaptest.NewLogger(t),
})
return svc
}
func TestWorkerSuccessFirstAttempt(t *testing.T) {
t.Parallel()
sender := newRecordingSender()
admin := &recordingAdminNotifier{}
svc := buildService(t, sender, admin, 3, time.Now)
if err := svc.EnqueueLoginCode(context.Background(), "alice@example.test", "111111", 5*time.Minute); err != nil {
t.Fatalf("enqueue: %v", err)
}
worker := mail.NewWorker(svc)
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick: %v", err)
}
sent := sender.snapshot()
if len(sent) != 1 {
t.Fatalf("got %d sent messages, want 1", len(sent))
}
if sent[0].Subject == "" || len(sent[0].Body) == 0 {
t.Fatalf("sent message missing fields: %+v", sent[0])
}
page, err := svc.AdminListDeliveries(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list: %v", err)
}
if len(page.Items) != 1 {
t.Fatalf("want 1 delivery, got %d", len(page.Items))
}
if page.Items[0].Status != mail.StatusSent {
t.Fatalf("status=%q want sent", page.Items[0].Status)
}
if page.Items[0].Attempts != 1 {
t.Fatalf("attempts=%d want 1", page.Items[0].Attempts)
}
if admin.count() != 0 {
t.Fatalf("admin notifier must not fire on success, got %d", admin.count())
}
}
func TestWorkerTransientThenDeadLetter(t *testing.T) {
t.Parallel()
sender := newRecordingSender()
sender.behaviour = []func(mail.OutboundMessage) error{
func(mail.OutboundMessage) error { return errors.New("smtp transient #1") },
func(mail.OutboundMessage) error { return errors.New("smtp transient #2") },
}
admin := &recordingAdminNotifier{}
// Start the fake clock 2 hours behind wall-clock so the
// `finishedAt + backoff` computed by ScheduleRetry lands in the
// past relative to DB `now()` and the second tick re-claims the
// row immediately.
clock := time.Now().UTC().Add(-2 * time.Hour)
svc := buildService(t, sender, admin, 2, func() time.Time { return clock })
if err := svc.EnqueueLoginCode(context.Background(), "bob@example.test", "222222", 5*time.Minute); err != nil {
t.Fatalf("enqueue: %v", err)
}
worker := mail.NewWorker(svc)
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #1: %v", err)
}
page, err := svc.AdminListDeliveries(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list: %v", err)
}
if got := page.Items[0].Status; got != mail.StatusRetrying {
t.Fatalf("after first failure status=%q want retrying", got)
}
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #2: %v", err)
}
page, err = svc.AdminListDeliveries(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list 2: %v", err)
}
if got := page.Items[0].Status; got != mail.StatusDeadLettered {
t.Fatalf("after second failure status=%q want dead_lettered", got)
}
if page.Items[0].Attempts != 2 {
t.Fatalf("attempts=%d want 2", page.Items[0].Attempts)
}
if admin.count() != 1 {
t.Fatalf("admin notifier calls=%d want 1", admin.count())
}
// Check dead-letter row exists.
dl, err := svc.AdminListDeadLetters(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list dead-letters: %v", err)
}
if dl.Total != 1 {
t.Fatalf("dead-letter total=%d want 1", dl.Total)
}
}
func TestWorkerPermanentDeadLettersImmediately(t *testing.T) {
t.Parallel()
sender := newRecordingSender()
sender.behaviour = []func(mail.OutboundMessage) error{
func(mail.OutboundMessage) error { return &mail.SendError{Err: errors.New("rejected"), Permanent: true} },
}
admin := &recordingAdminNotifier{}
svc := buildService(t, sender, admin, 5, time.Now)
if err := svc.EnqueueLoginCode(context.Background(), "e@example.test", "333333", 5*time.Minute); err != nil {
t.Fatalf("enqueue: %v", err)
}
worker := mail.NewWorker(svc)
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick: %v", err)
}
page, err := svc.AdminListDeliveries(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list: %v", err)
}
if got := page.Items[0].Status; got != mail.StatusDeadLettered {
t.Fatalf("status=%q want dead_lettered after permanent error", got)
}
if admin.count() != 1 {
t.Fatalf("admin notifier calls=%d want 1", admin.count())
}
}
func TestWorkerRespectsNextAttemptAt(t *testing.T) {
t.Parallel()
sender := newRecordingSender()
sender.behaviour = []func(mail.OutboundMessage) error{
func(mail.OutboundMessage) error { return errors.New("transient") },
}
// Push the fake clock far into the future so the post-retry
// next_attempt_at lands well past wall-clock now() and the second
// tick deterministically skips the row.
clock := time.Now().UTC().Add(24 * time.Hour)
admin := &recordingAdminNotifier{}
svc := buildService(t, sender, admin, 5, func() time.Time { return clock })
if err := svc.EnqueueLoginCode(context.Background(), "f@example.test", "444444", 5*time.Minute); err != nil {
t.Fatalf("enqueue: %v", err)
}
worker := mail.NewWorker(svc)
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #1: %v", err)
}
// Without advancing the clock the next tick must skip the row
// because next_attempt_at > now().
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #2: %v", err)
}
if got := len(sender.snapshot()); got != 1 {
t.Fatalf("sender saw %d messages while still backing off, want 1", got)
}
}
+121
View File
@@ -0,0 +1,121 @@
// Package metricsapi hosts the optional Prometheus scrape listener.
//
// The listener is enabled only when BACKEND_OTEL_METRICS_EXPORTER=prometheus
// and the configured listen address is non-empty. main.go wires this server
// into the application lifecycle only when Enabled returns true.
package metricsapi
import (
"context"
"errors"
"fmt"
"net"
"net/http"
"sync"
"go.uber.org/zap"
)
// Server owns the optional Prometheus HTTP listener.
type Server struct {
addr string
handler http.Handler
logger *zap.Logger
stateMu sync.RWMutex
server *http.Server
listener net.Listener
}
// NewServer constructs a Prometheus scrape server bound to addr. A handler of
// nil is replaced with http.NotFoundHandler so the server can still serve
// 404s in unconfigured deployments.
func NewServer(addr string, handler http.Handler, logger *zap.Logger) *Server {
if handler == nil {
handler = http.NotFoundHandler()
}
if logger == nil {
logger = zap.NewNop()
}
return &Server{
addr: addr,
handler: handler,
logger: logger.Named("metricsapi"),
}
}
// Enabled reports whether the metrics listener should run.
func (s *Server) Enabled() bool {
return s != nil && s.addr != ""
}
// Run binds the listener and serves the scrape surface. A disabled server
// blocks until ctx is cancelled so the App lifecycle can still treat it as a
// regular Component.
func (s *Server) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run backend metrics server: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
if !s.Enabled() {
<-ctx.Done()
return nil
}
listener, err := net.Listen("tcp", s.addr)
if err != nil {
return fmt.Errorf("run backend metrics server: listen on %q: %w", s.addr, err)
}
server := &http.Server{
Handler: s.handler,
}
s.stateMu.Lock()
s.server = server
s.listener = listener
s.stateMu.Unlock()
s.logger.Info("backend metrics server started", zap.String("addr", listener.Addr().String()))
defer func() {
s.stateMu.Lock()
s.server = nil
s.listener = nil
s.stateMu.Unlock()
}()
err = server.Serve(listener)
switch {
case err == nil:
return nil
case errors.Is(err, http.ErrServerClosed):
s.logger.Info("backend metrics server stopped")
return nil
default:
return fmt.Errorf("run backend metrics server: serve on %q: %w", s.addr, err)
}
}
// Shutdown gracefully stops the metrics listener within ctx.
func (s *Server) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown backend metrics server: nil context")
}
s.stateMu.RLock()
server := s.server
s.stateMu.RUnlock()
if server == nil {
return nil
}
if err := server.Shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) {
return fmt.Errorf("shutdown backend metrics server: %w", err)
}
return nil
}
+107
View File
@@ -0,0 +1,107 @@
package notification
import (
"context"
"github.com/google/uuid"
)
// AdminListNotificationsPage bundles the pagination metadata returned to
// the admin API. The shape mirrors `mail.AdminListDeliveriesPage` so
// handlers stay symmetric.
type AdminListNotificationsPage struct {
Items []Notification
Page int
PageSize int
Total int64
}
// AdminListDeadLettersPage mirrors AdminListNotificationsPage for the
// dead-letter listing.
type AdminListDeadLettersPage struct {
Items []DeadLetter
Page int
PageSize int
Total int64
}
// AdminListMalformedPage mirrors AdminListNotificationsPage for the
// malformed-intent listing.
type AdminListMalformedPage struct {
Items []MalformedIntent
Page int
PageSize int
Total int64
}
// AdminListNotifications returns the notification page newest-first.
// page is 1-indexed; pageSize is bounded by normalisePaging.
func (s *Service) AdminListNotifications(ctx context.Context, page, pageSize int) (AdminListNotificationsPage, error) {
page, pageSize = normalisePaging(page, pageSize)
offset := (page - 1) * pageSize
res, err := s.deps.Store.ListNotifications(ctx, offset, pageSize)
if err != nil {
return AdminListNotificationsPage{}, err
}
return AdminListNotificationsPage{
Items: res.Items,
Page: page,
PageSize: pageSize,
Total: res.Total,
}, nil
}
// AdminGetNotification returns a single notification by id; the
// sentinel ErrNotificationNotFound surfaces a 404 in the handler
// layer.
func (s *Service) AdminGetNotification(ctx context.Context, id uuid.UUID) (Notification, error) {
return s.deps.Store.GetNotification(ctx, id)
}
// AdminListDeadLetters returns the dead-letter page newest-first.
func (s *Service) AdminListDeadLetters(ctx context.Context, page, pageSize int) (AdminListDeadLettersPage, error) {
page, pageSize = normalisePaging(page, pageSize)
offset := (page - 1) * pageSize
res, err := s.deps.Store.ListDeadLetters(ctx, offset, pageSize)
if err != nil {
return AdminListDeadLettersPage{}, err
}
return AdminListDeadLettersPage{
Items: res.Items,
Page: page,
PageSize: pageSize,
Total: res.Total,
}, nil
}
// AdminListMalformed returns the malformed-intent page newest-first.
func (s *Service) AdminListMalformed(ctx context.Context, page, pageSize int) (AdminListMalformedPage, error) {
page, pageSize = normalisePaging(page, pageSize)
offset := (page - 1) * pageSize
res, err := s.deps.Store.ListMalformed(ctx, offset, pageSize)
if err != nil {
return AdminListMalformedPage{}, err
}
return AdminListMalformedPage{
Items: res.Items,
Page: page,
PageSize: pageSize,
Total: res.Total,
}, nil
}
// normalisePaging clamps page and pageSize to the values handlers can
// safely pass through to the store. Defaults match the existing admin
// endpoints (`mail` package); pageSize is capped at 200.
func normalisePaging(page, pageSize int) (int, int) {
if page <= 0 {
page = 1
}
if pageSize <= 0 {
pageSize = 25
}
if pageSize > 200 {
pageSize = 200
}
return page, pageSize
}
+35
View File
@@ -0,0 +1,35 @@
package notification
import (
"context"
"github.com/google/uuid"
"go.uber.org/zap"
)
// OnUserDeleted is the user-side soft-delete cascade hook. It marks
// every pending or retrying route owned by userID as `skipped` so the
// worker stops trying to deliver to a vanished account; published
// rows stay intact as audit trail.
//
// The catalog (`backend/README.md` §10) does not include a
// `user.*` kind, so this is the only place where the notification
// module reacts to user lifecycle events directly. The cascade is
// idempotent — repeated invocations on the same user simply find no
// pending rows.
func (s *Service) OnUserDeleted(ctx context.Context, userID uuid.UUID) error {
if userID == uuid.Nil {
return nil
}
skipped, err := s.deps.Store.SkipPendingRoutesForUser(ctx, userID, s.nowUTC())
if err != nil {
return err
}
if skipped > 0 {
s.deps.Logger.Info("notification routes skipped on user delete",
zap.String("user_id", userID.String()),
zap.Int64("count", skipped),
)
}
return nil
}
+127
View File
@@ -0,0 +1,127 @@
package notification
// Kind constants name every supported notification kind. The implementation // trims the README §10 catalog to the set with active producers in
// the codebase; further kinds (`game.*`, `mail.dead_lettered`) require
// an additive change here together with a producer.
const (
KindLobbyInviteReceived = "lobby.invite.received"
KindLobbyInviteRevoked = "lobby.invite.revoked"
KindLobbyApplicationSubmitted = "lobby.application.submitted"
KindLobbyApplicationApproved = "lobby.application.approved"
KindLobbyApplicationRejected = "lobby.application.rejected"
KindLobbyMembershipRemoved = "lobby.membership.removed"
KindLobbyMembershipBlocked = "lobby.membership.blocked"
KindLobbyRaceNameRegistered = "lobby.race_name.registered"
KindLobbyRaceNamePending = "lobby.race_name.pending"
KindLobbyRaceNameExpired = "lobby.race_name.expired"
KindRuntimeImagePullFailed = "runtime.image_pull_failed"
KindRuntimeContainerStartFailed = "runtime.container_start_failed"
KindRuntimeStartConfigInvalid = "runtime.start_config_invalid"
)
// CatalogEntry describes the per-kind delivery policy: which channels
// fan out and whether the kind targets the platform admin recipient
// instead of per-user accounts.
type CatalogEntry struct {
// Channels lists the channels this kind fans out to, in the order
// rows are materialised in `notification_routes`. The closed set is
// {`push`, `email`}.
Channels []string
// Admin reports whether the email channel targets the configured
// admin recipient (`BACKEND_NOTIFICATION_ADMIN_EMAIL`) rather than
// per-user accounts. Admin-targeted kinds carry an empty Recipients
// slice on the producer side.
Admin bool
// MailTemplateID is the template_id passed to `mail.EnqueueTemplate`
// for email routes. The catalog uses the kind itself by convention,
// matching `mail.TemplateLoginCode`'s use of `auth.login_code`.
MailTemplateID string
}
// catalog maps each supported kind to its delivery policy. The map is
// queried by Submit and by the dispatcher worker; producers do not
// inspect it directly.
var catalog = map[string]CatalogEntry{
KindLobbyInviteReceived: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyInviteReceived,
},
KindLobbyInviteRevoked: {
Channels: []string{ChannelPush},
},
KindLobbyApplicationSubmitted: {
Channels: []string{ChannelPush},
},
KindLobbyApplicationApproved: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyApplicationApproved,
},
KindLobbyApplicationRejected: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyApplicationRejected,
},
KindLobbyMembershipRemoved: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyMembershipRemoved,
},
KindLobbyMembershipBlocked: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyMembershipBlocked,
},
KindLobbyRaceNameRegistered: {
Channels: []string{ChannelPush},
},
KindLobbyRaceNamePending: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyRaceNamePending,
},
KindLobbyRaceNameExpired: {
Channels: []string{ChannelPush},
},
KindRuntimeImagePullFailed: {
Channels: []string{ChannelEmail},
Admin: true,
MailTemplateID: KindRuntimeImagePullFailed,
},
KindRuntimeContainerStartFailed: {
Channels: []string{ChannelEmail},
Admin: true,
MailTemplateID: KindRuntimeContainerStartFailed,
},
KindRuntimeStartConfigInvalid: {
Channels: []string{ChannelEmail},
Admin: true,
MailTemplateID: KindRuntimeStartConfigInvalid,
},
}
// LookupCatalog returns the per-kind policy and a boolean reporting
// whether the kind exists. Callers (Submit, Worker) branch on the
// boolean rather than receiving a sentinel error.
func LookupCatalog(kind string) (CatalogEntry, bool) {
entry, ok := catalog[kind]
return entry, ok
}
// SupportedKinds returns the closed kind set in deterministic order.
// The function exists to back tests and the migration CHECK constraint
// audit; it is not on the hot path.
func SupportedKinds() []string {
return []string{
KindLobbyInviteReceived,
KindLobbyInviteRevoked,
KindLobbyApplicationSubmitted,
KindLobbyApplicationApproved,
KindLobbyApplicationRejected,
KindLobbyMembershipRemoved,
KindLobbyMembershipBlocked,
KindLobbyRaceNameRegistered,
KindLobbyRaceNamePending,
KindLobbyRaceNameExpired,
KindRuntimeImagePullFailed,
KindRuntimeContainerStartFailed,
KindRuntimeStartConfigInvalid,
}
}
@@ -0,0 +1,77 @@
package notification
import (
"testing"
)
// TestCatalogClosure asserts that the SupportedKinds slice and the
// `catalog` map agree on the kind set. This catches dropped entries
// during catalog edits.
func TestCatalogClosure(t *testing.T) {
t.Parallel()
want := SupportedKinds()
if len(want) != len(catalog) {
t.Fatalf("supported kinds=%d, catalog entries=%d", len(want), len(catalog))
}
for _, k := range want {
if _, ok := catalog[k]; !ok {
t.Errorf("kind %q listed by SupportedKinds but missing from catalog", k)
}
}
}
// TestCatalogChannels enforces the per-kind channel set documented in
// `backend/README.md` §10. A drift here means the README and the code
// disagree — either fix the table or fix the test.
func TestCatalogChannels(t *testing.T) {
t.Parallel()
expect := map[string][]string{
KindLobbyInviteReceived: {ChannelPush, ChannelEmail},
KindLobbyInviteRevoked: {ChannelPush},
KindLobbyApplicationSubmitted: {ChannelPush},
KindLobbyApplicationApproved: {ChannelPush, ChannelEmail},
KindLobbyApplicationRejected: {ChannelPush, ChannelEmail},
KindLobbyMembershipRemoved: {ChannelPush, ChannelEmail},
KindLobbyMembershipBlocked: {ChannelPush, ChannelEmail},
KindLobbyRaceNameRegistered: {ChannelPush},
KindLobbyRaceNamePending: {ChannelPush, ChannelEmail},
KindLobbyRaceNameExpired: {ChannelPush},
KindRuntimeImagePullFailed: {ChannelEmail},
KindRuntimeContainerStartFailed: {ChannelEmail},
KindRuntimeStartConfigInvalid: {ChannelEmail},
}
for kind, want := range expect {
entry, ok := LookupCatalog(kind)
if !ok {
t.Errorf("kind %q missing from catalog", kind)
continue
}
if len(entry.Channels) != len(want) {
t.Errorf("kind %q channels=%v want %v", kind, entry.Channels, want)
continue
}
for i, ch := range want {
if entry.Channels[i] != ch {
t.Errorf("kind %q channels[%d]=%s want %s", kind, i, entry.Channels[i], ch)
}
}
}
}
// TestCatalogAdminOnlyForRuntime keeps the runtime kinds admin-only and
// every lobby kind user-facing.
func TestCatalogAdminOnlyForRuntime(t *testing.T) {
t.Parallel()
for kind, entry := range catalog {
switch kind {
case KindRuntimeImagePullFailed, KindRuntimeContainerStartFailed, KindRuntimeStartConfigInvalid:
if !entry.Admin {
t.Errorf("kind %q expected Admin=true", kind)
}
default:
if entry.Admin {
t.Errorf("kind %q expected Admin=false", kind)
}
}
}
}
+99
View File
@@ -0,0 +1,99 @@
package notification
import (
"context"
"time"
"galaxy/backend/internal/config"
"galaxy/backend/internal/user"
"github.com/google/uuid"
"go.uber.org/zap"
)
// PushPublisher is the publisher contract notification uses to emit a
// `client_event` push frame to gateway. The real implementation lives
// in `backend/internal/push` ; NewNoopPushPublisher satisfies
// the interface for tests that do not exercise push behaviour.
//
// Implementations must be concurrency-safe. The deviceSessionID pointer
// narrows the event to a single device session when non-nil; nil means
// fan out to every active session of userID. eventID, requestID and
// traceID are correlation identifiers that gateway forwards verbatim
// into the signed client envelope; empty strings are forwarded
// unchanged.
type PushPublisher interface {
PublishClientEvent(ctx context.Context, userID uuid.UUID, deviceSessionID *uuid.UUID, kind string, payload map[string]any, eventID, requestID, traceID string) error
}
// Mailer is the email surface notification uses for outbound mail. The
// canonical implementation is `*mail.Service.EnqueueTemplate`; tests
// substitute a recording fake. The contract matches mail's existing
// signature so the wiring layer can pass the concrete service directly.
type Mailer interface {
EnqueueTemplate(ctx context.Context, templateID, recipient string, payload map[string]any, idempotencyKey string) error
}
// AccountResolver looks up the recipient profile (email + preferred
// language) by user_id. The canonical implementation is
// `*user.Service.GetAccount`. The narrow interface keeps the Service
// from depending on every part of the user surface.
type AccountResolver interface {
GetAccount(ctx context.Context, userID uuid.UUID) (user.Account, error)
}
// Deps aggregates every collaborator the Service depends on.
//
// Store, Mail, and Accounts must be non-nil. Push defaults to the no-op
// publisher when omitted; Now defaults to time.Now; Logger defaults to
// zap.NewNop. Config carries the worker interval, the max-attempts cap,
// and the optional admin-email destination from `BACKEND_NOTIFICATION_*`.
type Deps struct {
Store *Store
Mail Mailer
Push PushPublisher
Accounts AccountResolver
Config config.NotificationConfig
// Now overrides time.Now for deterministic tests. A nil Now defaults
// to time.Now in NewService.
Now func() time.Time
// Logger is named under "notification" by NewService. Nil falls back
// to zap.NewNop.
Logger *zap.Logger
}
// NewNoopPushPublisher returns a PushPublisher that logs every event
// at debug level and returns nil. The canonical publisher lives in
// `backend/internal/push`; this constructor exists for tests.
func NewNoopPushPublisher(logger *zap.Logger) PushPublisher {
if logger == nil {
logger = zap.NewNop()
}
return &noopPushPublisher{logger: logger.Named("push.noop")}
}
type noopPushPublisher struct {
logger *zap.Logger
}
func (p *noopPushPublisher) PublishClientEvent(_ context.Context, userID uuid.UUID, deviceSessionID *uuid.UUID, kind string, payload map[string]any, eventID, requestID, traceID string) error {
fields := []zap.Field{
zap.String("user_id", userID.String()),
zap.String("kind", kind),
zap.Int("payload_keys", len(payload)),
}
if deviceSessionID != nil {
fields = append(fields, zap.String("device_session_id", deviceSessionID.String()))
}
if eventID != "" {
fields = append(fields, zap.String("event_id", eventID))
}
if requestID != "" {
fields = append(fields, zap.String("request_id", requestID))
}
if traceID != "" {
fields = append(fields, zap.String("trace_id", traceID))
}
p.logger.Debug("client event (noop publisher)", fields...)
return nil
}
+175
View File
@@ -0,0 +1,175 @@
package notification
import (
"context"
"database/sql"
"errors"
"fmt"
"math/rand/v2"
"time"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)
// traceIDFromContext returns the W3C trace id of the active span as a
// hex string, or an empty string when ctx carries no recording span.
// The id is forwarded to gateway as ClientEvent.trace_id so push
// envelopes can be correlated to the producing trace.
func traceIDFromContext(ctx context.Context) string {
if ctx == nil {
return ""
}
spanCtx := trace.SpanContextFromContext(ctx)
if !spanCtx.HasTraceID() {
return ""
}
return spanCtx.TraceID().String()
}
// finaliseDispatch records the outcome of a single delivery attempt
// inside tx. The status transition table mirrors README §10 and the
// `notification_routes`'s CHECK constraint:
//
// - success → published (next_attempt_at NULL)
// - failure with attempt < max → retrying (next_attempt_at armed)
// - failure with attempt >= max → dead_lettered (+ insert
// notification_dead_letters row)
//
// The function does not commit tx: the caller (worker / Submit best-
// effort) owns the transaction so it can compose the dispatch with the
// preceding ClaimDueRoutes lock.
func (s *Service) finaliseDispatch(ctx context.Context, tx *sql.Tx, claim ClaimedRoute, dispatchErr error, at time.Time) error {
if dispatchErr == nil {
return s.deps.Store.MarkRoutePublished(ctx, tx, claim.Route.RouteID, at)
}
attempt := claim.Route.Attempts + 1
reason := dispatchErr.Error()
maxAttempts := claim.Route.MaxAttempts
if maxAttempts <= 0 {
maxAttempts = int32(s.deps.Config.MaxAttempts)
}
if attempt >= maxAttempts {
s.deps.Logger.Warn("notification route dead-lettered",
zap.String("kind", claim.Notification.Kind),
zap.String("channel", claim.Route.Channel),
zap.String("route_id", claim.Route.RouteID.String()),
zap.Int32("attempt", attempt),
zap.Error(dispatchErr),
)
return s.deps.Store.MarkRouteDeadLettered(ctx, tx, claim.Notification.NotificationID, claim.Route.RouteID, at, reason)
}
nextAt := at.Add(routeBackoff(attempt))
s.deps.Logger.Info("notification route retry scheduled",
zap.String("kind", claim.Notification.Kind),
zap.String("channel", claim.Route.Channel),
zap.String("route_id", claim.Route.RouteID.String()),
zap.Int32("attempt", attempt),
zap.Time("next_attempt_at", nextAt),
zap.Error(dispatchErr),
)
return s.deps.Store.ScheduleRouteRetry(ctx, tx, claim.Route.RouteID, at, nextAt, reason)
}
// bestEffortDispatch is invoked from Submit immediately after a route
// is durably persisted. It opens its own short transaction, runs the
// channel call, and writes the outcome with the same Mark* helpers
// the worker uses. Failures here are logged at debug level — the
// worker will retry on the next tick, so the producer never sees the
// synchronous failure.
func (s *Service) bestEffortDispatch(ctx context.Context, n Notification, route Route) {
if route.Status != RouteStatusPending {
return
}
claim := ClaimedRoute{Route: route, Notification: n}
tx, err := s.deps.Store.BeginTx(ctx)
if err != nil {
s.deps.Logger.Debug("best-effort dispatch: begin tx failed",
zap.String("route_id", route.RouteID.String()),
zap.Error(err))
return
}
defer func() { _ = tx.Rollback() }()
dispatchErr := s.performDispatch(ctx, claim)
at := s.nowUTC()
if err := s.finaliseDispatch(ctx, tx, claim, dispatchErr, at); err != nil {
s.deps.Logger.Debug("best-effort dispatch finalise failed",
zap.String("route_id", route.RouteID.String()),
zap.Error(err))
return
}
if err := tx.Commit(); err != nil {
s.deps.Logger.Debug("best-effort dispatch commit failed",
zap.String("route_id", route.RouteID.String()),
zap.Error(err))
}
}
// performDispatch runs the channel-specific delivery. Returns nil on
// success and any error otherwise. The caller decides between retry
// and dead-letter based on the attempt counter and persisted state.
func (s *Service) performDispatch(ctx context.Context, claim ClaimedRoute) error {
if ctx.Err() != nil {
return ctx.Err()
}
switch claim.Route.Channel {
case ChannelPush:
if claim.Route.UserID == nil {
return errors.New("push route missing user_id")
}
eventID := claim.Route.RouteID.String()
requestID := claim.Notification.IdempotencyKey
traceID := traceIDFromContext(ctx)
return s.deps.Push.PublishClientEvent(ctx, *claim.Route.UserID, claim.Route.DeviceSessionID, claim.Notification.Kind, claim.Notification.Payload, eventID, requestID, traceID)
case ChannelEmail:
entry, ok := LookupCatalog(claim.Notification.Kind)
if !ok {
return fmt.Errorf("unknown kind %q", claim.Notification.Kind)
}
recipient := claim.Route.ResolvedEmail
if trimSpace(recipient) == "" {
return errors.New("email route missing resolved recipient")
}
// Use the route id as idempotency_key so the mail outbox
// UNIQUE(template_id, idempotency_key) catches a duplicate
// enqueue if the worker re-claims after a crash before
// commit. Producers should never need to know the route id.
return s.deps.Mail.EnqueueTemplate(ctx, entry.MailTemplateID, recipient, claim.Notification.Payload, claim.Route.RouteID.String())
default:
return fmt.Errorf("unknown channel %q", claim.Route.Channel)
}
}
// routeBackoff computes the per-attempt delay using the package
// constants and ±backoffJitter randomisation. attempt is 1-indexed
// (the value the row will carry after Mark*); attempt==1 maps to
// `backoffBase × backoffFactor⁰`.
func routeBackoff(attempt int32) time.Duration {
if attempt <= 1 {
return jitter(backoffBase)
}
d := float64(backoffBase)
for i := int32(1); i < attempt; i++ {
d *= backoffFactor
if time.Duration(d) >= backoffMax {
return jitter(backoffMax)
}
}
return jitter(time.Duration(d))
}
// jitter applies the package-standard ±backoffJitter swing using the
// new global v2 rand source.
func jitter(d time.Duration) time.Duration {
if backoffJitter <= 0 {
return d
}
span := float64(d) * backoffJitter
delta := (rand.Float64()*2 - 1) * span
out := time.Duration(float64(d) + delta)
if out < 0 {
return d
}
return out
}
@@ -0,0 +1,45 @@
package notification
import (
"testing"
"time"
)
// TestRouteBackoffMonotonic locks the documented schedule:
// attempt 1 == ~backoffBase, each subsequent attempt grows by
// backoffFactor up to backoffMax. The check uses the lower bound of
// the jitter window so the assertion is robust under random output.
func TestRouteBackoffMonotonic(t *testing.T) {
t.Parallel()
lower := func(d time.Duration) time.Duration {
return time.Duration(float64(d) * (1 - backoffJitter))
}
upper := func(d time.Duration) time.Duration {
return time.Duration(float64(d) * (1 + backoffJitter))
}
cases := []struct {
attempt int32
want time.Duration
}{
{attempt: 1, want: backoffBase},
{attempt: 2, want: time.Duration(float64(backoffBase) * backoffFactor)},
{attempt: 3, want: time.Duration(float64(backoffBase) * backoffFactor * backoffFactor)},
}
for _, tc := range cases {
got := routeBackoff(tc.attempt)
if got < lower(tc.want) || got > upper(tc.want) {
t.Fatalf("attempt=%d got=%s want ~%s (±%.0f%%)", tc.attempt, got, tc.want, backoffJitter*100)
}
}
}
// TestRouteBackoffCap asserts the schedule clamps at backoffMax.
func TestRouteBackoffCap(t *testing.T) {
t.Parallel()
upper := time.Duration(float64(backoffMax) * (1 + backoffJitter))
got := routeBackoff(50)
if got > upper {
t.Fatalf("attempt=50 got=%s exceeds cap (max=%s)", got, backoffMax)
}
}
+22
View File
@@ -0,0 +1,22 @@
package notification
import "errors"
// ErrNotificationNotFound is returned by AdminGetNotification when no
// row matches the supplied identifier. Handlers map it to HTTP 404.
var ErrNotificationNotFound = errors.New("notification: notification not found")
// ErrUnknownKind is returned by Submit when the intent's Kind is not in
// the catalog (`backend/README.md` §10). Submit does not surface it to
// the producer — it persists a malformed-intent record and returns nil.
// The exported sentinel exists so the package internals can branch on it.
var ErrUnknownKind = errors.New("notification: unknown kind")
// ErrEmptyIdempotencyKey is returned by Submit when the intent does not
// carry an idempotency_key. Same surface treatment as ErrUnknownKind.
var ErrEmptyIdempotencyKey = errors.New("notification: idempotency_key must be non-empty")
// ErrNoRecipients is returned by Submit when a kind that requires user
// recipients arrives without any. Same surface treatment as
// ErrUnknownKind.
var ErrNoRecipients = errors.New("notification: at least one recipient is required")
@@ -0,0 +1,35 @@
package notification
import (
"context"
"galaxy/backend/internal/lobby"
)
// LobbyAdapter returns an implementation of `lobby.NotificationPublisher`
// backed by *Service. The adapter copies the producer-side intent shape
// into notification.Intent and calls Submit; Submit's own malformed
// fallback handles invalid payloads, so the adapter never blocks the
// caller. The interface is the same one The wiring connects through the
// no-op publisher.
func (s *Service) LobbyAdapter() lobby.NotificationPublisher {
return &lobbyAdapter{svc: s}
}
type lobbyAdapter struct {
svc *Service
}
func (a *lobbyAdapter) PublishLobbyEvent(ctx context.Context, ev lobby.LobbyNotification) error {
if a == nil || a.svc == nil {
return nil
}
intent := Intent{
Kind: ev.Kind,
IdempotencyKey: ev.IdempotencyKey,
Recipients: ev.Recipients,
Payload: ev.Payload,
}
_, err := a.svc.Submit(ctx, intent)
return err
}
@@ -0,0 +1,117 @@
// Package notification implements the in-process notification pipeline
// described in `backend/PLAN.md` §5.7, `ARCHITECTURE.md` §12, and the
// catalog in `backend/README.md` §10. Producers (lobby, runtime) submit
// intents via Submit; the service persists each intent into
// `backend.notifications`, materialises one row per (recipient, channel)
// in `backend.notification_routes`, and attempts a synchronous best-effort
// dispatch. Failed routes are picked up by a background Worker that retries
// with exponential backoff and dead-letters past the configured maximum.
//
// Push routes are emitted via PushPublisher (the canonical
// `push.Service` over gRPC; the package also ships a
// NoopPushPublisher for tests). Email routes call
// mail.EnqueueTemplate, which feeds the durable mail outbox.
package notification
import (
"time"
"galaxy/backend/internal/config"
"go.uber.org/zap"
)
// Status values stored in `notification_routes.status`. Mirrored by the
// CHECK constraint in migration 00001.
const (
RouteStatusPending = "pending"
RouteStatusRetrying = "retrying"
RouteStatusPublished = "published"
RouteStatusSkipped = "skipped"
RouteStatusDeadLettered = "dead_lettered"
)
// Channel values stored in `notification_routes.channel`. The catalog in
// `backend/README.md` §10 documents the per-kind set.
const (
ChannelPush = "push"
ChannelEmail = "email"
)
// Backoff parameters for the route worker. Mirrors the trade-off captured
// for the mail outbox in `backend/README.md`: exponential
// growth from a 10 second base, capped at 10 minutes, with ±25% jitter.
const (
backoffBase = 10 * time.Second
backoffFactor = 2.0
backoffMax = 10 * time.Minute
backoffJitter = 0.25
// claimBatchSize bounds the number of routes pulled out of Postgres
// per worker tick. Same logic as `mail.claimBatchSize`: each row is
// processed in its own short transaction so a slow channel does not
// block its peers.
claimBatchSize = 16
)
// Service is the notification entry point. It composes the persistence
// store, the push and mail dispatchers, the account resolver used for
// recipient email lookups, runtime configuration, and a structured
// logger.
type Service struct {
deps Deps
}
// NewService constructs a Service from deps. Nil Logger defaults to
// zap.NewNop; nil Now defaults to time.Now. Store, Mail, and Accounts
// must be non-nil — calling Service methods with either nil panics on
// first use, matching how the rest of `internal/*` signals missing
// wiring. A nil Push defaults to the no-op publisher used by tests
// that do not exercise the gRPC stream.
func NewService(deps Deps) *Service {
if deps.Now == nil {
deps.Now = time.Now
}
if deps.Logger == nil {
deps.Logger = zap.NewNop()
}
deps.Logger = deps.Logger.Named("notification")
if deps.Push == nil {
deps.Push = NewNoopPushPublisher(deps.Logger)
}
return &Service{deps: deps}
}
// Config returns the runtime notification configuration. Worker uses it
// to schedule the scan loop and bound retries.
func (s *Service) Config() config.NotificationConfig {
return s.deps.Config
}
// Logger returns the package-named structured logger. Worker and the
// admin handlers reuse it so scoped fields stay consistent.
func (s *Service) Logger() *zap.Logger {
return s.deps.Logger
}
// now returns the package-configured clock; the helper keeps the rest
// of the code free from `if s.deps.Now == nil` checks.
func (s *Service) now() time.Time {
if s.deps.Now == nil {
return time.Now()
}
return s.deps.Now()
}
// nowUTC returns the configured clock normalised to UTC, matching the
// convention used by `time.Time` columns elsewhere in `backend`.
func (s *Service) nowUTC() time.Time {
return s.now().UTC()
}
// adminEmail returns the configured admin recipient address with
// surrounding whitespace removed; the empty string indicates no admin
// recipient is configured.
func (s *Service) adminEmail() string {
return trimSpace(s.deps.Config.AdminEmail)
}
@@ -0,0 +1,35 @@
package notification
import (
"context"
"galaxy/backend/internal/runtime"
)
// RuntimeAdapter returns an implementation of
// `runtime.NotificationPublisher` backed by *Service. The adapter
// translates runtime's narrow `(kind, idempotency_key, payload)` shape
// into a notification.Intent and calls Submit. Recipient resolution is
// handled by Submit's catalog lookup: every kind runtime emits is
// `Admin: true`, so the recipient comes from the configured
// `BACKEND_NOTIFICATION_ADMIN_EMAIL`.
func (s *Service) RuntimeAdapter() runtime.NotificationPublisher {
return &runtimeAdapter{svc: s}
}
type runtimeAdapter struct {
svc *Service
}
func (a *runtimeAdapter) PublishRuntimeEvent(ctx context.Context, kind, idempotencyKey string, payload map[string]any) error {
if a == nil || a.svc == nil {
return nil
}
intent := Intent{
Kind: kind,
IdempotencyKey: idempotencyKey,
Payload: payload,
}
_, err := a.svc.Submit(ctx, intent)
return err
}
+606
View File
@@ -0,0 +1,606 @@
package notification
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"galaxy/backend/internal/postgres/jet/backend/model"
"galaxy/backend/internal/postgres/jet/backend/table"
"github.com/go-jet/jet/v2/postgres"
"github.com/go-jet/jet/v2/qrm"
"github.com/google/uuid"
)
// Store is the Postgres-backed query surface for notifications,
// notification_routes, notification_dead_letters, and
// notification_malformed_intents. All queries are built through go-jet
// against the generated table bindings under
// `backend/internal/postgres/jet/backend/table`.
type Store struct {
db *sql.DB
}
// NewStore constructs a Store wrapping db.
func NewStore(db *sql.DB) *Store {
return &Store{db: db}
}
// BeginTx exposes the transaction handle to the worker so the
// claim-dispatch-mark cycle stays within a single commit boundary.
func (s *Store) BeginTx(ctx context.Context) (*sql.Tx, error) {
return s.db.BeginTx(ctx, nil)
}
// RouteSeed describes one freshly-materialised route destined for an
// `INSERT INTO notification_routes` inside InsertNotification.
type RouteSeed struct {
RouteID uuid.UUID
Channel string
Status string
MaxAttempts int32
NextAttemptAt *time.Time
ResolvedEmail string
ResolvedLocale string
UserID *uuid.UUID
DeviceSessionID *uuid.UUID
SkippedAt *time.Time
LastError string
}
// InsertNotificationArgs aggregates the inputs to InsertNotification.
type InsertNotificationArgs struct {
NotificationID uuid.UUID
Kind string
IdempotencyKey string
UserID *uuid.UUID
Payload map[string]any
Routes []RouteSeed
}
// InsertNotification persists a notification row together with its
// route rows in a single transaction. The (kind, idempotency_key)
// UNIQUE constraint serves the idempotency contract: the second
// caller observes inserted=false and the existing notification_id is
// returned. On the duplicate path no route rows are inserted and the
// transaction rolls back so an orphan notification cannot exist.
func (s *Store) InsertNotification(ctx context.Context, args InsertNotificationArgs) (uuid.UUID, bool, error) {
payload, err := encodePayload(args.Payload)
if err != nil {
return uuid.Nil, false, fmt.Errorf("encode payload: %w", err)
}
var (
storedID uuid.UUID
inserted bool
)
err = withTx(ctx, s.db, func(tx *sql.Tx) error {
insertStmt := table.Notifications.INSERT(
table.Notifications.NotificationID,
table.Notifications.Kind,
table.Notifications.IdempotencyKey,
table.Notifications.UserID,
table.Notifications.Payload,
).VALUES(
args.NotificationID, args.Kind, args.IdempotencyKey, args.UserID, string(payload),
).
ON_CONFLICT(table.Notifications.Kind, table.Notifications.IdempotencyKey).
DO_NOTHING().
RETURNING(table.Notifications.NotificationID)
var freshRow model.Notifications
err := insertStmt.QueryContext(ctx, tx, &freshRow)
switch {
case errors.Is(err, qrm.ErrNoRows):
// Idempotent re-submit. Look up the existing row id and bail.
lookupStmt := postgres.SELECT(table.Notifications.NotificationID).
FROM(table.Notifications).
WHERE(
table.Notifications.Kind.EQ(postgres.String(args.Kind)).
AND(table.Notifications.IdempotencyKey.EQ(postgres.String(args.IdempotencyKey))),
).
LIMIT(1)
var existing model.Notifications
if scanErr := lookupStmt.QueryContext(ctx, tx, &existing); scanErr != nil {
return fmt.Errorf("lookup existing notification: %w", scanErr)
}
storedID = existing.NotificationID
return errIdempotentNoop
case err != nil:
return fmt.Errorf("insert notification: %w", err)
}
storedID = freshRow.NotificationID
inserted = true
for _, r := range args.Routes {
routeStmt := table.NotificationRoutes.INSERT(
table.NotificationRoutes.RouteID,
table.NotificationRoutes.NotificationID,
table.NotificationRoutes.Channel,
table.NotificationRoutes.Status,
table.NotificationRoutes.MaxAttempts,
table.NotificationRoutes.NextAttemptAt,
table.NotificationRoutes.ResolvedEmail,
table.NotificationRoutes.ResolvedLocale,
table.NotificationRoutes.LastError,
table.NotificationRoutes.SkippedAt,
).VALUES(
r.RouteID, args.NotificationID, r.Channel, r.Status,
r.MaxAttempts, r.NextAttemptAt,
r.ResolvedEmail, r.ResolvedLocale, r.LastError,
r.SkippedAt,
)
if _, err := routeStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("insert route %s: %w", r.RouteID, err)
}
}
return nil
})
if errors.Is(err, errIdempotentNoop) {
return storedID, false, nil
}
if err != nil {
return uuid.Nil, false, err
}
return storedID, inserted, nil
}
// errIdempotentNoop tells withTx to roll back the transaction without
// surfacing an error to the caller. It must never escape this package.
var errIdempotentNoop = errors.New("notification store: idempotent noop")
// MarkRoutePublished flips a route to status='published', clears the
// retry schedule, stamps published_at and last_attempt_at, and clears
// last_error.
func (s *Store) MarkRoutePublished(ctx context.Context, tx *sql.Tx, routeID uuid.UUID, at time.Time) error {
r := table.NotificationRoutes
stmt := r.UPDATE().
SET(
r.Status.SET(postgres.String(RouteStatusPublished)),
r.Attempts.SET(r.Attempts.ADD(postgres.Int(1))),
r.LastAttemptAt.SET(postgres.TimestampzT(at)),
r.PublishedAt.SET(postgres.TimestampzT(at)),
r.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
r.LastError.SET(postgres.String("")),
r.UpdatedAt.SET(postgres.TimestampzT(at)),
).
WHERE(r.RouteID.EQ(postgres.UUID(routeID)))
if _, err := stmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("mark route published: %w", err)
}
return nil
}
// ScheduleRouteRetry flips a route to status='retrying', bumps
// attempts, arms next_attempt_at, and stamps the diagnostic message.
func (s *Store) ScheduleRouteRetry(ctx context.Context, tx *sql.Tx, routeID uuid.UUID, at time.Time, nextAt time.Time, errMsg string) error {
r := table.NotificationRoutes
stmt := r.UPDATE().
SET(
r.Status.SET(postgres.String(RouteStatusRetrying)),
r.Attempts.SET(r.Attempts.ADD(postgres.Int(1))),
r.LastAttemptAt.SET(postgres.TimestampzT(at)),
r.NextAttemptAt.SET(postgres.TimestampzT(nextAt)),
r.LastError.SET(postgres.String(errMsg)),
r.UpdatedAt.SET(postgres.TimestampzT(at)),
).
WHERE(r.RouteID.EQ(postgres.UUID(routeID)))
if _, err := stmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("schedule route retry: %w", err)
}
return nil
}
// MarkRouteDeadLettered moves the route to the terminal `dead_lettered`
// state and inserts a notification_dead_letters row under the same
// transaction.
func (s *Store) MarkRouteDeadLettered(ctx context.Context, tx *sql.Tx, notificationID, routeID uuid.UUID, at time.Time, reason string) error {
r := table.NotificationRoutes
updateStmt := r.UPDATE().
SET(
r.Status.SET(postgres.String(RouteStatusDeadLettered)),
r.Attempts.SET(r.Attempts.ADD(postgres.Int(1))),
r.LastAttemptAt.SET(postgres.TimestampzT(at)),
r.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
r.DeadLetteredAt.SET(postgres.TimestampzT(at)),
r.LastError.SET(postgres.String(reason)),
r.UpdatedAt.SET(postgres.TimestampzT(at)),
).
WHERE(r.RouteID.EQ(postgres.UUID(routeID)))
if _, err := updateStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("mark route dead-lettered: %w", err)
}
dl := table.NotificationDeadLetters
insertStmt := dl.INSERT(
dl.DeadLetterID, dl.NotificationID, dl.RouteID, dl.ArchivedAt, dl.Reason,
).VALUES(uuid.New(), notificationID, routeID, at, reason)
if _, err := insertStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("insert notification dead-letter: %w", err)
}
return nil
}
// ClaimedRoute bundles a locked route row with its parent notification
// so the worker has every field it needs in one trip.
type ClaimedRoute struct {
Route Route
Notification Notification
}
// ClaimDueRoutes locks up to `limit` due routes with FOR UPDATE SKIP
// LOCKED, joins the parent notification to surface kind/payload, and
// returns them. exclude is the list of route_ids already handled in
// the current tick — they are filtered out so the same row cannot
// chew through MaxAttempts inside a single tick when its retry
// schedule lands at <= now().
func (s *Store) ClaimDueRoutes(ctx context.Context, tx *sql.Tx, limit int, exclude ...uuid.UUID) ([]ClaimedRoute, error) {
r := table.NotificationRoutes
n := table.Notifications
condition := r.Status.IN(postgres.String(RouteStatusPending), postgres.String(RouteStatusRetrying)).
AND(r.NextAttemptAt.IS_NULL().OR(r.NextAttemptAt.LT_EQ(postgres.NOW())))
if len(exclude) > 0 {
excludeExprs := make([]postgres.Expression, 0, len(exclude))
for _, id := range exclude {
excludeExprs = append(excludeExprs, postgres.UUID(id))
}
condition = condition.AND(r.RouteID.NOT_IN(excludeExprs...))
}
stmt := postgres.SELECT(
r.AllColumns,
n.Kind, n.IdempotencyKey, n.UserID, n.Payload, n.CreatedAt,
).
FROM(r.INNER_JOIN(n, n.NotificationID.EQ(r.NotificationID))).
WHERE(condition).
ORDER_BY(postgres.COALESCE(r.NextAttemptAt, r.CreatedAt).ASC()).
LIMIT(int64(limit)).
FOR(postgres.UPDATE().OF(r).SKIP_LOCKED())
var rows []struct {
model.NotificationRoutes
Notifications struct {
Kind string
IdempotencyKey string
UserID *uuid.UUID
Payload *string
CreatedAt time.Time
}
}
if err := stmt.QueryContext(ctx, tx, &rows); err != nil {
return nil, fmt.Errorf("claim due routes: %w", err)
}
out := make([]ClaimedRoute, 0, len(rows))
for _, row := range rows {
route := modelToRoute(row.NotificationRoutes)
route.UserID = row.Notifications.UserID
notif := Notification{
NotificationID: row.NotificationRoutes.NotificationID,
Kind: row.Notifications.Kind,
IdempotencyKey: row.Notifications.IdempotencyKey,
UserID: row.Notifications.UserID,
CreatedAt: row.Notifications.CreatedAt,
}
decoded, err := decodePayload(payloadBytesFromPtr(row.Notifications.Payload))
if err != nil {
return nil, fmt.Errorf("decode notification payload: %w", err)
}
notif.Payload = decoded
out = append(out, ClaimedRoute{Route: route, Notification: notif})
}
return out, nil
}
// ListNotificationsResult bundles a page of notifications and the
// total-row count. Layout mirrors `mail.AdminListDeliveriesPage`.
type ListNotificationsResult struct {
Items []Notification
Total int64
}
// ListNotifications returns the page newest-first.
func (s *Store) ListNotifications(ctx context.Context, offset, limit int) (ListNotificationsResult, error) {
total, err := countAll(ctx, s.db, table.Notifications)
if err != nil {
return ListNotificationsResult{}, fmt.Errorf("count notifications: %w", err)
}
n := table.Notifications
stmt := postgres.SELECT(
n.NotificationID, n.Kind, n.IdempotencyKey, n.UserID,
n.Payload, n.CreatedAt,
).
FROM(n).
ORDER_BY(n.CreatedAt.DESC(), n.NotificationID.DESC()).
LIMIT(int64(limit)).OFFSET(int64(offset))
var rows []model.Notifications
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return ListNotificationsResult{}, fmt.Errorf("list notifications: %w", err)
}
items := make([]Notification, 0, len(rows))
for _, row := range rows {
notif, err := modelToNotification(row)
if err != nil {
return ListNotificationsResult{}, err
}
items = append(items, notif)
}
return ListNotificationsResult{Items: items, Total: total}, nil
}
// GetNotification loads a notification by primary key. The sentinel
// ErrNotificationNotFound is returned when no row matches.
func (s *Store) GetNotification(ctx context.Context, id uuid.UUID) (Notification, error) {
n := table.Notifications
stmt := postgres.SELECT(
n.NotificationID, n.Kind, n.IdempotencyKey, n.UserID,
n.Payload, n.CreatedAt,
).
FROM(n).
WHERE(n.NotificationID.EQ(postgres.UUID(id))).
LIMIT(1)
var row model.Notifications
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return Notification{}, ErrNotificationNotFound
}
return Notification{}, fmt.Errorf("get notification: %w", err)
}
return modelToNotification(row)
}
// ListDeadLettersResult bundles a page of dead-letters and the total
// row count.
type ListDeadLettersResult struct {
Items []DeadLetter
Total int64
}
// ListDeadLetters returns the dead-letter page newest-first.
func (s *Store) ListDeadLetters(ctx context.Context, offset, limit int) (ListDeadLettersResult, error) {
total, err := countAll(ctx, s.db, table.NotificationDeadLetters)
if err != nil {
return ListDeadLettersResult{}, fmt.Errorf("count dead-letters: %w", err)
}
dl := table.NotificationDeadLetters
stmt := postgres.SELECT(
dl.DeadLetterID, dl.NotificationID, dl.RouteID, dl.ArchivedAt, dl.Reason,
).
FROM(dl).
ORDER_BY(dl.ArchivedAt.DESC(), dl.DeadLetterID.DESC()).
LIMIT(int64(limit)).OFFSET(int64(offset))
var rows []model.NotificationDeadLetters
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return ListDeadLettersResult{}, fmt.Errorf("list dead-letters: %w", err)
}
items := make([]DeadLetter, 0, len(rows))
for _, row := range rows {
items = append(items, DeadLetter{
DeadLetterID: row.DeadLetterID,
NotificationID: row.NotificationID,
RouteID: row.RouteID,
ArchivedAt: row.ArchivedAt,
Reason: row.Reason,
})
}
return ListDeadLettersResult{Items: items, Total: total}, nil
}
// ListMalformedResult bundles a page of malformed intents and the
// total row count.
type ListMalformedResult struct {
Items []MalformedIntent
Total int64
}
// ListMalformed returns the malformed page newest-first.
func (s *Store) ListMalformed(ctx context.Context, offset, limit int) (ListMalformedResult, error) {
total, err := countAll(ctx, s.db, table.NotificationMalformedIntents)
if err != nil {
return ListMalformedResult{}, fmt.Errorf("count malformed intents: %w", err)
}
m := table.NotificationMalformedIntents
stmt := postgres.SELECT(m.ID, m.ReceivedAt, m.Payload, m.Reason).
FROM(m).
ORDER_BY(m.ReceivedAt.DESC(), m.ID.DESC()).
LIMIT(int64(limit)).OFFSET(int64(offset))
var rows []model.NotificationMalformedIntents
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return ListMalformedResult{}, fmt.Errorf("list malformed intents: %w", err)
}
items := make([]MalformedIntent, 0, len(rows))
for _, row := range rows {
decoded, err := decodePayload([]byte(row.Payload))
if err != nil {
return ListMalformedResult{}, fmt.Errorf("decode malformed payload: %w", err)
}
items = append(items, MalformedIntent{
ID: row.ID,
ReceivedAt: row.ReceivedAt,
Payload: decoded,
Reason: row.Reason,
})
}
return ListMalformedResult{Items: items, Total: total}, nil
}
// InsertMalformed records a producer-supplied intent that failed
// validation. The payload is best-effort JSON-encoded by the caller;
// the row never blocks the producer.
func (s *Store) InsertMalformed(ctx context.Context, payload map[string]any, reason string) error {
encoded, err := encodePayload(payload)
if err != nil {
return fmt.Errorf("encode malformed payload: %w", err)
}
m := table.NotificationMalformedIntents
stmt := m.INSERT(m.ID, m.Payload, m.Reason).
VALUES(uuid.New(), string(encoded), reason)
if _, err := stmt.ExecContext(ctx, s.db); err != nil {
return fmt.Errorf("insert malformed intent: %w", err)
}
return nil
}
// SkipPendingRoutesForUser flips every pending or retrying route owned
// by userID to status='skipped'. The `OnUserDeleted` cascade calls it so
// the worker stops trying to deliver notifications to a vanished
// account; published rows are kept as audit trail.
func (s *Store) SkipPendingRoutesForUser(ctx context.Context, userID uuid.UUID, at time.Time) (int64, error) {
r := table.NotificationRoutes
n := table.Notifications
notifSubquery := postgres.SELECT(n.NotificationID).
FROM(n).
WHERE(n.UserID.EQ(postgres.UUID(userID)))
stmt := r.UPDATE().
SET(
r.Status.SET(postgres.String(RouteStatusSkipped)),
r.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
r.SkippedAt.SET(postgres.TimestampzT(at)),
r.UpdatedAt.SET(postgres.TimestampzT(at)),
r.LastError.SET(postgres.String("recipient soft-deleted")),
).
WHERE(
r.Status.IN(postgres.String(RouteStatusPending), postgres.String(RouteStatusRetrying)).
AND(r.NotificationID.IN(notifSubquery)),
)
res, err := stmt.ExecContext(ctx, s.db)
if err != nil {
return 0, fmt.Errorf("skip pending routes: %w", err)
}
affected, err := res.RowsAffected()
if err != nil {
return 0, fmt.Errorf("rows affected: %w", err)
}
return affected, nil
}
// withTx wraps fn in a Postgres transaction. fn's return value
// determines commit (nil) vs rollback (non-nil). Rollback errors are
// swallowed when fn already returned an error, since the latter is
// more actionable.
func withTx(ctx context.Context, db *sql.DB, fn func(tx *sql.Tx) error) error {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("notification store: begin tx: %w", err)
}
if err := fn(tx); err != nil {
_ = tx.Rollback()
return err
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("notification store: commit tx: %w", err)
}
return nil
}
// modelToRoute projects a generated model row onto the public Route
// struct (without the user-id which lives on the parent notification).
func modelToRoute(row model.NotificationRoutes) Route {
r := Route{
RouteID: row.RouteID,
NotificationID: row.NotificationID,
Channel: row.Channel,
Status: row.Status,
Attempts: row.Attempts,
MaxAttempts: row.MaxAttempts,
LastError: row.LastError,
ResolvedEmail: row.ResolvedEmail,
ResolvedLocale: row.ResolvedLocale,
CreatedAt: row.CreatedAt,
UpdatedAt: row.UpdatedAt,
}
if row.NextAttemptAt != nil {
t := *row.NextAttemptAt
r.NextAttemptAt = &t
}
if row.LastAttemptAt != nil {
t := *row.LastAttemptAt
r.LastAttemptAt = &t
}
if row.PublishedAt != nil {
t := *row.PublishedAt
r.PublishedAt = &t
}
if row.DeadLetteredAt != nil {
t := *row.DeadLetteredAt
r.DeadLetteredAt = &t
}
if row.SkippedAt != nil {
t := *row.SkippedAt
r.SkippedAt = &t
}
return r
}
// modelToNotification decodes a generated model row into the public
// Notification struct, including the JSON payload.
func modelToNotification(row model.Notifications) (Notification, error) {
decoded, err := decodePayload(payloadBytesFromPtr(row.Payload))
if err != nil {
return Notification{}, fmt.Errorf("decode payload: %w", err)
}
return Notification{
NotificationID: row.NotificationID,
Kind: row.Kind,
IdempotencyKey: row.IdempotencyKey,
UserID: row.UserID,
Payload: decoded,
CreatedAt: row.CreatedAt,
}, nil
}
// payloadBytesFromPtr converts the nullable string from the generated
// jsonb-as-text model into the byte slice expected by decodePayload.
func payloadBytesFromPtr(p *string) []byte {
if p == nil {
return nil
}
return []byte(*p)
}
// encodePayload renders a map[string]any to JSON for storage in
// jsonb columns. A nil map encodes as JSON null; this is harmless on
// the read path because decodePayload returns nil for it.
func encodePayload(payload map[string]any) ([]byte, error) {
if payload == nil {
return []byte("null"), nil
}
return json.Marshal(payload)
}
// decodePayload parses a jsonb column back into the producer's map.
// A NULL or empty buffer round-trips to nil.
func decodePayload(buf []byte) (map[string]any, error) {
if len(buf) == 0 || strings.EqualFold(strings.TrimSpace(string(buf)), "null") {
return nil, nil
}
out := map[string]any{}
if err := json.Unmarshal(buf, &out); err != nil {
return nil, err
}
return out, nil
}
// countAll runs `SELECT COUNT(*) FROM <tbl>` through jet and returns
// the result. The destination uses an alias-tagged scalar so QRM can
// map the un-prefixed alias produced by AS("count").
func countAll(ctx context.Context, db qrm.DB, tbl postgres.ReadableTable) (int64, error) {
stmt := postgres.SELECT(postgres.COUNT(postgres.STAR).AS("count")).FROM(tbl)
var dest struct {
Count int64 `alias:"count"`
}
if err := stmt.QueryContext(ctx, db, &dest); err != nil {
return 0, err
}
return dest.Count, nil
}
+258
View File
@@ -0,0 +1,258 @@
package notification
import (
"context"
"errors"
"fmt"
"time"
"galaxy/backend/internal/user"
"github.com/google/uuid"
"go.uber.org/zap"
)
// Submit accepts a producer intent, validates it against the catalog,
// resolves recipients, materialises route rows, persists everything in
// one transaction, and best-effort dispatches the routes synchronously.
//
// The contract: producers never block on Submit, and Submit never
// surfaces a validation failure as an error — malformed intents go to
// `notification_malformed_intents` and the call returns nil. Real
// errors (encoder failure, Postgres trouble) are wrapped and returned.
//
// On idempotent re-submit (same kind + idempotency_key) the existing
// notification id is honoured and route materialisation is skipped.
func (s *Service) Submit(ctx context.Context, intent Intent) (uuid.UUID, error) {
entry, ok := LookupCatalog(intent.Kind)
if !ok {
s.recordMalformed(ctx, intent, ErrUnknownKind.Error())
return uuid.Nil, nil
}
if trimSpace(intent.IdempotencyKey) == "" {
s.recordMalformed(ctx, intent, ErrEmptyIdempotencyKey.Error())
return uuid.Nil, nil
}
if !entry.Admin && len(intent.Recipients) == 0 {
s.recordMalformed(ctx, intent, ErrNoRecipients.Error())
return uuid.Nil, nil
}
now := s.nowUTC()
notificationID := uuid.New()
var primaryUserID *uuid.UUID
if !entry.Admin && len(intent.Recipients) == 1 {
uid := intent.Recipients[0]
primaryUserID = &uid
}
routes, err := s.materialiseRoutes(ctx, notificationID, entry, intent, now)
if err != nil {
return uuid.Nil, err
}
storedID, inserted, err := s.deps.Store.InsertNotification(ctx, InsertNotificationArgs{
NotificationID: notificationID,
Kind: intent.Kind,
IdempotencyKey: intent.IdempotencyKey,
UserID: primaryUserID,
Payload: intent.Payload,
Routes: routes,
})
if err != nil {
return uuid.Nil, fmt.Errorf("notification submit: %w", err)
}
if !inserted {
s.deps.Logger.Debug("idempotent submit, returning existing notification",
zap.String("kind", intent.Kind),
zap.String("idempotency_key", intent.IdempotencyKey),
zap.String("notification_id", storedID.String()),
)
return storedID, nil
}
// Best-effort synchronous dispatch: any pending route gets a single
// attempt right now. Failures stay on the row for the worker to
// retry; they are not surfaced to producers.
for i := range routes {
if routes[i].Status != RouteStatusPending {
continue
}
s.bestEffortDispatch(ctx, Notification{
NotificationID: notificationID,
Kind: intent.Kind,
IdempotencyKey: intent.IdempotencyKey,
UserID: primaryUserID,
Payload: intent.Payload,
CreatedAt: now,
}, routeFromSeed(notificationID, routes[i], now))
}
return notificationID, nil
}
// materialiseRoutes builds the per-(recipient, channel) seeds that
// land in `notification_routes`. The function performs recipient
// resolution and the catalog-aware channel fan-out. Each seed already
// carries its terminal status (`pending` for live routes, `skipped`
// for cases where the destination cannot be resolved).
func (s *Service) materialiseRoutes(ctx context.Context, notificationID uuid.UUID, entry CatalogEntry, intent Intent, now time.Time) ([]RouteSeed, error) {
_ = notificationID
maxAttempts := int32(s.deps.Config.MaxAttempts)
if maxAttempts <= 0 {
maxAttempts = 1
}
pendingNext := timePtr(now.UTC())
if entry.Admin {
// Admin-channel kinds: one row per channel, no per-user fan-out.
seeds := make([]RouteSeed, 0, len(entry.Channels))
for _, ch := range entry.Channels {
seed := RouteSeed{
RouteID: uuid.New(),
Channel: ch,
Status: RouteStatusPending,
MaxAttempts: maxAttempts,
NextAttemptAt: pendingNext,
}
if ch == ChannelEmail {
seed.ResolvedEmail = s.adminEmail()
if seed.ResolvedEmail == "" {
seed.Status = RouteStatusSkipped
seed.NextAttemptAt = nil
seed.SkippedAt = timePtr(now.UTC())
seed.LastError = "BACKEND_NOTIFICATION_ADMIN_EMAIL not configured"
s.deps.Logger.Warn("admin notification skipped: admin email not configured",
zap.String("kind", intent.Kind),
zap.String("idempotency_key", intent.IdempotencyKey),
)
}
}
seeds = append(seeds, seed)
}
return seeds, nil
}
// Per-user kinds: fan out across (recipient × channel).
seeds := make([]RouteSeed, 0, len(intent.Recipients)*len(entry.Channels))
for _, userID := range intent.Recipients {
uid := userID
account, err := s.resolveAccount(ctx, userID)
for _, ch := range entry.Channels {
seed := RouteSeed{
RouteID: uuid.New(),
Channel: ch,
Status: RouteStatusPending,
MaxAttempts: maxAttempts,
NextAttemptAt: pendingNext,
UserID: &uid,
DeviceSessionID: intent.DeviceSessionID,
}
switch ch {
case ChannelEmail:
if err != nil {
seed.Status = RouteStatusSkipped
seed.NextAttemptAt = nil
seed.SkippedAt = timePtr(now.UTC())
seed.LastError = err.Error()
} else {
seed.ResolvedEmail = account.Email
seed.ResolvedLocale = account.PreferredLanguage
if trimSpace(seed.ResolvedEmail) == "" {
seed.Status = RouteStatusSkipped
seed.NextAttemptAt = nil
seed.SkippedAt = timePtr(now.UTC())
seed.LastError = "recipient has no email on file"
}
}
case ChannelPush:
if err != nil {
seed.Status = RouteStatusSkipped
seed.NextAttemptAt = nil
seed.SkippedAt = timePtr(now.UTC())
seed.LastError = err.Error()
} else if account.PreferredLanguage != "" {
seed.ResolvedLocale = account.PreferredLanguage
}
}
seeds = append(seeds, seed)
}
}
return seeds, nil
}
// resolveAccount fetches the recipient profile through the configured
// AccountResolver. user.ErrAccountNotFound is mapped to a sentinel-free
// error string so the route is skipped without a stack-trace log.
func (s *Service) resolveAccount(ctx context.Context, userID uuid.UUID) (user.Account, error) {
account, err := s.deps.Accounts.GetAccount(ctx, userID)
if err != nil {
if errors.Is(err, user.ErrAccountNotFound) {
return user.Account{}, errors.New("recipient account not found")
}
return user.Account{}, fmt.Errorf("resolve recipient %s: %w", userID, err)
}
if account.DeletedAt != nil {
return user.Account{}, errors.New("recipient account soft-deleted")
}
return account, nil
}
// recordMalformed best-effort persists an invalid intent. Logger is
// informational; a Postgres failure here is logged but never bubbles
// up to the producer, matching the README §10 contract.
func (s *Service) recordMalformed(ctx context.Context, intent Intent, reason string) {
payload := map[string]any{
"kind": intent.Kind,
"idempotency_key": intent.IdempotencyKey,
}
if len(intent.Payload) > 0 {
payload["payload"] = intent.Payload
}
if len(intent.Recipients) > 0 {
recipients := make([]string, 0, len(intent.Recipients))
for _, r := range intent.Recipients {
recipients = append(recipients, r.String())
}
payload["recipients"] = recipients
}
if intent.DeviceSessionID != nil {
payload["device_session_id"] = intent.DeviceSessionID.String()
}
if err := s.deps.Store.InsertMalformed(ctx, payload, reason); err != nil {
s.deps.Logger.Warn("failed to persist malformed notification intent",
zap.String("kind", intent.Kind),
zap.String("reason", reason),
zap.Error(err),
)
return
}
s.deps.Logger.Info("notification intent dropped as malformed",
zap.String("kind", intent.Kind),
zap.String("reason", reason),
)
}
// routeFromSeed converts a RouteSeed (the pre-insert snapshot the
// dispatcher needs) to a Route value the worker / dispatcher exchange
// after the row is durably persisted.
func routeFromSeed(notificationID uuid.UUID, seed RouteSeed, now time.Time) Route {
r := Route{
RouteID: seed.RouteID,
NotificationID: notificationID,
Channel: seed.Channel,
Status: seed.Status,
Attempts: 0,
MaxAttempts: seed.MaxAttempts,
NextAttemptAt: seed.NextAttemptAt,
ResolvedEmail: seed.ResolvedEmail,
ResolvedLocale: seed.ResolvedLocale,
UserID: seed.UserID,
DeviceSessionID: seed.DeviceSessionID,
CreatedAt: now.UTC(),
UpdatedAt: now.UTC(),
SkippedAt: seed.SkippedAt,
LastError: seed.LastError,
}
return r
}
@@ -0,0 +1,458 @@
package notification_test
import (
"context"
"database/sql"
"errors"
"net/url"
"sync"
"testing"
"time"
"galaxy/backend/internal/config"
"galaxy/backend/internal/notification"
backendpg "galaxy/backend/internal/postgres"
"galaxy/backend/internal/user"
pgshared "galaxy/postgres"
"github.com/google/uuid"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
"go.uber.org/zap/zaptest"
)
const (
pgImage = "postgres:16-alpine"
pgUser = "galaxy"
pgPassword = "galaxy"
pgDatabase = "galaxy_backend"
pgSchema = "backend"
pgStartup = 90 * time.Second
pgOpTO = 10 * time.Second
)
// startPostgres mirrors the mail/auth scaffolding: spin up Postgres,
// apply migrations, return *sql.DB.
func startPostgres(t *testing.T) *sql.DB {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
t.Cleanup(cancel)
pgContainer, err := tcpostgres.Run(ctx, pgImage,
tcpostgres.WithDatabase(pgDatabase),
tcpostgres.WithUsername(pgUser),
tcpostgres.WithPassword(pgPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(pgStartup),
),
)
if err != nil {
t.Skipf("postgres testcontainer unavailable, skipping: %v", err)
}
t.Cleanup(func() {
if termErr := testcontainers.TerminateContainer(pgContainer); termErr != nil {
t.Errorf("terminate postgres container: %v", termErr)
}
})
baseDSN, err := pgContainer.ConnectionString(ctx, "sslmode=disable")
if err != nil {
t.Fatalf("connection string: %v", err)
}
scoped, err := dsnWithSearchPath(baseDSN, pgSchema)
if err != nil {
t.Fatalf("scope dsn: %v", err)
}
cfg := pgshared.DefaultConfig()
cfg.PrimaryDSN = scoped
cfg.OperationTimeout = pgOpTO
db, err := pgshared.OpenPrimary(ctx, cfg)
if err != nil {
t.Fatalf("open primary: %v", err)
}
t.Cleanup(func() { _ = db.Close() })
if err := pgshared.Ping(ctx, db, cfg.OperationTimeout); err != nil {
t.Fatalf("ping: %v", err)
}
if err := backendpg.ApplyMigrations(ctx, db); err != nil {
t.Fatalf("apply migrations: %v", err)
}
return db
}
func dsnWithSearchPath(baseDSN, schema string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := parsed.Query()
values.Set("search_path", schema)
if values.Get("sslmode") == "" {
values.Set("sslmode", "disable")
}
parsed.RawQuery = values.Encode()
return parsed.String(), nil
}
// recordingMailer captures every EnqueueTemplate call.
type recordingMailer struct {
mu sync.Mutex
calls []recordedEnqueue
err error
}
type recordedEnqueue struct {
TemplateID string
Recipient string
Payload map[string]any
IdempotencyKey string
}
func (r *recordingMailer) EnqueueTemplate(_ context.Context, templateID, recipient string, payload map[string]any, idempotencyKey string) error {
r.mu.Lock()
defer r.mu.Unlock()
if r.err != nil {
return r.err
}
r.calls = append(r.calls, recordedEnqueue{
TemplateID: templateID,
Recipient: recipient,
Payload: payload,
IdempotencyKey: idempotencyKey,
})
return nil
}
func (r *recordingMailer) Calls() []recordedEnqueue {
r.mu.Lock()
defer r.mu.Unlock()
out := make([]recordedEnqueue, len(r.calls))
copy(out, r.calls)
return out
}
// recordingPush captures every PublishClientEvent call.
type recordingPush struct {
mu sync.Mutex
calls []recordedPushEvent
}
type recordedPushEvent struct {
UserID uuid.UUID
Kind string
Payload map[string]any
EventID string
RequestID string
TraceID string
}
func (r *recordingPush) PublishClientEvent(_ context.Context, userID uuid.UUID, _ *uuid.UUID, kind string, payload map[string]any, eventID, requestID, traceID string) error {
r.mu.Lock()
defer r.mu.Unlock()
r.calls = append(r.calls, recordedPushEvent{
UserID: userID,
Kind: kind,
Payload: payload,
EventID: eventID,
RequestID: requestID,
TraceID: traceID,
})
return nil
}
func (r *recordingPush) Calls() []recordedPushEvent {
r.mu.Lock()
defer r.mu.Unlock()
out := make([]recordedPushEvent, len(r.calls))
copy(out, r.calls)
return out
}
// stubAccounts hands back a fixed account record for any user_id, so
// tests don't need to seed the accounts table.
type stubAccounts struct {
account user.Account
err error
}
func (s *stubAccounts) GetAccount(_ context.Context, userID uuid.UUID) (user.Account, error) {
if s.err != nil {
return user.Account{}, s.err
}
out := s.account
out.UserID = userID
return out, nil
}
func newService(t *testing.T, db *sql.DB, mailer notification.Mailer, push notification.PushPublisher, accounts notification.AccountResolver, adminEmail string) *notification.Service {
t.Helper()
cfg := config.NotificationConfig{
AdminEmail: adminEmail,
WorkerInterval: 10 * time.Millisecond,
MaxAttempts: 3,
}
return notification.NewService(notification.Deps{
Store: notification.NewStore(db),
Mail: mailer,
Push: push,
Accounts: accounts,
Config: cfg,
Logger: zaptest.NewLogger(t),
})
}
func TestSubmitFansOutLobbyInviteToPushAndEmail(t *testing.T) {
t.Parallel()
db := startPostgres(t)
mailer := &recordingMailer{}
push := &recordingPush{}
accounts := &stubAccounts{account: user.Account{
Email: "alice@example.test",
PreferredLanguage: "en",
}}
svc := newService(t, db, mailer, push, accounts, "")
recipient := uuid.New()
id, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindLobbyInviteReceived,
IdempotencyKey: "invite:" + uuid.NewString(),
Recipients: []uuid.UUID{recipient},
Payload: map[string]any{
"game_id": uuid.NewString(),
"inviter_user_id": uuid.NewString(),
},
})
if err != nil {
t.Fatalf("submit: %v", err)
}
if id == uuid.Nil {
t.Fatal("submit returned nil id")
}
// Best-effort dispatch ran synchronously; both channels should
// have observed exactly one call.
if got := len(push.Calls()); got != 1 {
t.Errorf("push calls=%d, want 1", got)
}
if got := len(mailer.Calls()); got != 1 {
t.Errorf("mail calls=%d, want 1", got)
} else {
call := mailer.Calls()[0]
if call.Recipient != "alice@example.test" {
t.Errorf("mail recipient=%q", call.Recipient)
}
if call.TemplateID != notification.KindLobbyInviteReceived {
t.Errorf("mail template=%q", call.TemplateID)
}
}
}
func TestSubmitIsIdempotent(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := newService(t, db, &recordingMailer{}, &recordingPush{}, &stubAccounts{account: user.Account{Email: "x@example.test"}}, "")
intent := notification.Intent{
Kind: notification.KindLobbyApplicationSubmitted,
IdempotencyKey: "dedupe-key",
Recipients: []uuid.UUID{uuid.New()},
Payload: map[string]any{"game_id": uuid.NewString(), "application_id": uuid.NewString()},
}
first, err := svc.Submit(context.Background(), intent)
if err != nil {
t.Fatalf("first submit: %v", err)
}
second, err := svc.Submit(context.Background(), intent)
if err != nil {
t.Fatalf("second submit: %v", err)
}
if first != second {
t.Fatalf("idempotent submit must return same id: %s vs %s", first, second)
}
}
func TestSubmitMalformedPersists(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := newService(t, db, &recordingMailer{}, &recordingPush{}, &stubAccounts{}, "")
id, err := svc.Submit(context.Background(), notification.Intent{
Kind: "nonsense.kind",
IdempotencyKey: "anything",
Recipients: []uuid.UUID{uuid.New()},
})
if err != nil {
t.Fatalf("submit: %v", err)
}
if id != uuid.Nil {
t.Fatalf("malformed submit must return nil id, got %s", id)
}
page, err := svc.AdminListMalformed(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list malformed: %v", err)
}
if page.Total < 1 {
t.Fatalf("malformed total=%d, want >= 1", page.Total)
}
}
func TestSubmitAdminEmailSkipsWhenNotConfigured(t *testing.T) {
t.Parallel()
db := startPostgres(t)
mailer := &recordingMailer{}
svc := newService(t, db, mailer, &recordingPush{}, &stubAccounts{}, "")
id, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindRuntimeImagePullFailed,
IdempotencyKey: "ipf-1",
Payload: map[string]any{"game_id": uuid.NewString(), "image_ref": "registry/img:tag"},
})
if err != nil {
t.Fatalf("submit: %v", err)
}
if id == uuid.Nil {
t.Fatal("admin submit returned nil id")
}
if got := len(mailer.Calls()); got != 0 {
t.Errorf("mail calls=%d, want 0 (admin email unset)", got)
}
}
func TestSubmitAdminEmailDispatchesWhenConfigured(t *testing.T) {
t.Parallel()
db := startPostgres(t)
mailer := &recordingMailer{}
svc := newService(t, db, mailer, &recordingPush{}, &stubAccounts{}, "ops@example.test")
if _, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindRuntimeContainerStartFailed,
IdempotencyKey: "csf-1",
Payload: map[string]any{"game_id": uuid.NewString()},
}); err != nil {
t.Fatalf("submit: %v", err)
}
calls := mailer.Calls()
if len(calls) != 1 {
t.Fatalf("mail calls=%d, want 1", len(calls))
}
if calls[0].Recipient != "ops@example.test" {
t.Errorf("admin recipient=%q", calls[0].Recipient)
}
}
func TestSubmitMissingAccountSkipsEmail(t *testing.T) {
t.Parallel()
db := startPostgres(t)
mailer := &recordingMailer{}
push := &recordingPush{}
accounts := &stubAccounts{err: user.ErrAccountNotFound}
svc := newService(t, db, mailer, push, accounts, "")
if _, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindLobbyApplicationApproved,
IdempotencyKey: "missing-1",
Recipients: []uuid.UUID{uuid.New()},
Payload: map[string]any{"game_id": uuid.NewString()},
}); err != nil {
t.Fatalf("submit: %v", err)
}
if got := len(mailer.Calls()); got != 0 {
t.Errorf("mail calls=%d want 0 when account missing", got)
}
if got := len(push.Calls()); got != 0 {
t.Errorf("push calls=%d want 0 when account missing", got)
}
}
func TestWorkerRetryAndDeadLetter(t *testing.T) {
t.Parallel()
db := startPostgres(t)
failingMailer := &recordingMailer{err: errors.New("smtp down")}
push := &recordingPush{}
accounts := &stubAccounts{account: user.Account{Email: "alice@example.test", PreferredLanguage: "en"}}
svc := newService(t, db, failingMailer, push, accounts, "")
// MaxAttempts=3 from newService config. Submit fires one
// best-effort attempt; subsequent Tick calls drive attempts 2 and
// 3, the last one dead-letters.
if _, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindLobbyInviteReceived,
IdempotencyKey: "fail-1",
Recipients: []uuid.UUID{uuid.New()},
Payload: map[string]any{"game_id": uuid.NewString(), "inviter_user_id": uuid.NewString()},
}); err != nil {
t.Fatalf("submit: %v", err)
}
// Force every retry to be due immediately.
if _, err := db.Exec(`UPDATE backend.notification_routes SET next_attempt_at = now() WHERE channel = 'email'`); err != nil {
t.Fatalf("force due: %v", err)
}
worker := notification.NewWorker(svc)
for range 5 {
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick: %v", err)
}
if _, err := db.Exec(`UPDATE backend.notification_routes SET next_attempt_at = now() WHERE channel = 'email' AND status = 'retrying'`); err != nil {
t.Fatalf("force due: %v", err)
}
}
dead, err := svc.AdminListDeadLetters(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list dead-letters: %v", err)
}
if dead.Total < 1 {
t.Fatalf("expected dead-letter row, got total=%d (mail attempts=%d)", dead.Total, len(failingMailer.Calls()))
}
}
func TestOnUserDeletedSkipsPendingRoutes(t *testing.T) {
t.Parallel()
db := startPostgres(t)
failingMailer := &recordingMailer{err: errors.New("smtp down")}
push := &recordingPush{}
userID := uuid.New()
accounts := &stubAccounts{account: user.Account{Email: "alice@example.test", PreferredLanguage: "en"}}
svc := newService(t, db, failingMailer, push, accounts, "")
// Submit something that owns user_id so the cascade picks it up.
if _, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindLobbyApplicationApproved,
IdempotencyKey: "cascade-1",
Recipients: []uuid.UUID{userID},
Payload: map[string]any{"game_id": uuid.NewString()},
}); err != nil {
t.Fatalf("submit: %v", err)
}
if err := svc.OnUserDeleted(context.Background(), userID); err != nil {
t.Fatalf("OnUserDeleted: %v", err)
}
var skipped int
if err := db.QueryRow(`
SELECT COUNT(*)
FROM backend.notification_routes r
JOIN backend.notifications n ON n.notification_id = r.notification_id
WHERE n.user_id = $1 AND r.status = 'skipped'
`, userID).Scan(&skipped); err != nil {
t.Fatalf("count skipped: %v", err)
}
if skipped == 0 {
t.Fatal("expected at least one skipped route after cascade")
}
}
func TestAdminGetMissing(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := newService(t, db, &recordingMailer{}, &recordingPush{}, &stubAccounts{}, "")
if _, err := svc.AdminGetNotification(context.Background(), uuid.New()); !errors.Is(err, notification.ErrNotificationNotFound) {
t.Fatalf("got %v, want ErrNotificationNotFound", err)
}
}

Some files were not shown because too many files have changed in this diff Show More