diff --git a/.gitea/workflows/ci.yaml b/.gitea/workflows/ci.yaml
index 3283130..00df876 100644
--- a/.gitea/workflows/ci.yaml
+++ b/.gitea/workflows/ci.yaml
@@ -298,17 +298,21 @@ jobs:
# pick up the fresh config.
docker compose --ansi never up -d --force-recreate --no-deps caddy otelcol prometheus tempo grafana
- - name: Probe the gateway through caddy
+ - name: Probe the landing and the gateway through caddy
run: |
set -u
+ # Two probes through the contour caddy (R3 split): "/" is the static
+ # landing container, "/app/" is the gateway-served SPA shell.
for i in $(seq 1 20); do
- if docker run --rm --network edge alpine:3.20 wget -q -T 5 -O /dev/null http://scrabble/; then
- echo "healthy: GET http://scrabble/"
+ if docker run --rm --network edge alpine:3.20 wget -q -T 5 -O /dev/null http://scrabble/ &&
+ docker run --rm --network edge alpine:3.20 wget -q -T 5 -O /dev/null http://scrabble/app/; then
+ echo "healthy: GET http://scrabble/ (landing) + /app/ (gateway)"
exit 0
fi
sleep 3
done
- echo "probe failed; recent gateway logs:"
+ echo "probe failed; recent landing + gateway logs:"
+ docker logs --tail 50 scrabble-landing || true
docker logs --tail 50 scrabble-gateway || true
exit 1
diff --git a/.gitignore b/.gitignore
index 259564b..912d4c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,6 @@
# Local, unstaged env overrides
**/.env.local
**/.env.*.local
+
+# Claude Code harness runtime artifacts
+.claude/scheduled_tasks.lock
diff --git a/CLAUDE.md b/CLAUDE.md
index ba32697..28a1502 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -127,8 +127,8 @@ docs/ .gitea/workflows/ PLAN.md CLAUDE.md README.md
gateway/ ui/ pkg/ # added by their stages
platform/telegram/ # Telegram connector side-service (Stage 9): bot + gRPC API
loadtest/ # module scrabble/loadtest: the pre-release stress harness (R2)
-backend/Dockerfile gateway/Dockerfile platform/telegram/Dockerfile loadtest/Dockerfile # multi-stage distroless (Stage 16; loadtest R2)
-deploy/ # docker-compose + caddy + otelcol/prometheus/tempo/grafana (+ cAdvisor/postgres_exporter, R2)
+backend/Dockerfile gateway/Dockerfile platform/telegram/Dockerfile loadtest/Dockerfile # multi-stage distroless (Stage 16; loadtest R2); gateway/Dockerfile also has the `landing` target (R3)
+deploy/ # docker-compose + caddy + landing + otelcol/prometheus/tempo/grafana (+ cAdvisor/postgres_exporter, R2)
```
## Build & test
@@ -144,8 +144,9 @@ go run ./backend/cmd/backend # /healthz, /readyz on :8080
cd ui && pnpm install && pnpm check && pnpm test:unit && pnpm build # the UI (Stage 7+)
pnpm start # UI mock mode: lobby -> game, no backend
-docker build -f backend/Dockerfile -t scrabble-backend . # images (Stage 16); gateway embeds the UI
-docker build -f gateway/Dockerfile -t scrabble-gateway .
+docker build -f backend/Dockerfile -t scrabble-backend . # images (Stage 16); gateway embeds the SPA
+docker build -f gateway/Dockerfile --target gateway -t scrabble-gateway .
+docker build -f gateway/Dockerfile --target landing -t scrabble-landing . # static landing (R3)
docker compose -f deploy/docker-compose.yml config # validate the full contour
```
diff --git a/PRERELEASE.md b/PRERELEASE.md
index 46cf842..51a2c75 100644
--- a/PRERELEASE.md
+++ b/PRERELEASE.md
@@ -19,7 +19,7 @@ the edge before prod. Each phase maps back to the owner's raw pre-release TODO l
|---|-------|-----------|--------|
| R1 | Schema & naming reset | 1 + 10 | **done** |
| R2 | Stress harness + contour observability + early run | 9a | **done** |
-| R3 | Edge hardening | 2 + 8 + 3 | todo |
+| R3 | Edge hardening | 2 + 8 + 3 | **done** |
| R4 | Push enrichment + kill the last poll | 4 + 5 | todo |
| R5 | Bundle slimming | 6 | todo |
| R6 | Refactor + docs reconciliation + de-staging | 7 | todo |
@@ -253,3 +253,31 @@ Then Stage 18.
it feeds R3 (h2c `MaxConcurrentStreams`/timeouts, body-size cap), R6 and R7 (per-player transports,
separate hardware, pool/limit sizing).
- **CI:** `./loadtest/...` added to the path filter + vet/build/test; `go.work.sum` carries the new deps.
+
+- **R3** (interview + implementation):
+ - **Locked decisions:** the flag column lands by **editing the R1 baseline** (+ a contour schema
+ wipe after merge — no migration chain accrues before prod); auto-flag defaults **1000 rejected /
+ 10 min** (`BACKEND_HIGHRATE_FLAG_THRESHOLD`/`_WINDOW`, rolling window, set-once, operator clears,
+ no auto-ban); landing image = **caddy:2-alpine**; throttle data flows **gateway → backend** (a
+ 30 s per-key summary POST to the new `/api/v1/internal/ratelimit/report`, the existing trusted
+ direction) with the episode window + flag rule in the backend (`internal/ratewatch`); rejection
+ logging = **Warn summary per key per window + Debug per rejection** — a deliberate deviation from
+ the phase's "structured log per rejection" (the R2 hammer would have logged ~522k lines in
+ minutes); all three R2-report tails included (explicit h2c sizing, the session-resolve failure
+ cause at Warn, reviving the admin limiter).
+ - **Body cap:** `GATEWAY_MAX_BODY_BYTES` (default 1 MiB) as both the Connect per-message read limit
+ and an `http.MaxBytesReader` wrap of the public mux; an oversized Execute is `resource_exhausted`.
+ - **Dead config found:** `AdminPerMinute`/`AdminBurst` were never wired — the gateway `/_gm` mount is
+ now 429-guarded per IP ahead of its Basic-Auth. The caddy-fronted contour path stays unlimited
+ (stock caddy has no limiter) — an accepted gap, recorded in `docs/ARCHITECTURE.md` §12.
+ - **Landing split:** a `landing` target in `gateway/Dockerfile` (the UI build stage is shared;
+ identical compose build args keep it one cached build); the gateway drops `landing.html` from the
+ embed and 308-redirects `/` → `/app/`; the contour caddy routes `/app/`, `/telegram/` and the
+ Connect path to the gateway and the catch-all to the landing container; the CI deploy probe now
+ checks both `/` (landing) and `/app/` (gateway).
+ - **Observability:** `gateway_rate_limited_total{class}` (user/public/email/admin, aggregate-only)
+ + a rate-vs-rejections panel on the Edge/UX dashboard; the admin console gains the **Throttled**
+ page (the in-memory episode window, reset-on-restart like `active_users`, plus the flagged-account
+ queue) and the flag badge / clear action on the user list / card.
+ - The jet regen also restored the previously missing `game_drafts`/`game_hidden` generated models
+ (their tables were added after the last jetgen run; no behaviour change).
diff --git a/backend/README.md b/backend/README.md
index 03453ce..724642f 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -99,6 +99,14 @@ durable owner — then the durable account wins and a fresh session is minted fo
The `accounts.paid_account`/`merged_into`/`merged_at` columns back this. This supersedes the
Stage 8 `email.bind.*` edge surface (the `RequestCode`/`ConfirmCode` primitives stay).
+**R3** adds rate-limit observability: the gateway posts its periodic rejection
+summaries to `POST /api/v1/internal/ratelimit/report`; `internal/ratewatch` keeps a
+bounded in-memory episode window for the console's **Throttled** page and applies the
+conservative auto-flag — an account sustaining `BACKEND_HIGHRATE_FLAG_THRESHOLD`
+rejected calls within `BACKEND_HIGHRATE_FLAG_WINDOW` gets the soft, reversible
+`accounts.flagged_high_rate_at` marker (set-once; a badge in the user list and a
+**Clear** action on the user card; never an automatic ban).
+
## Package layout
```
@@ -121,6 +129,7 @@ internal/lobby/ # in-memory matchmaking pool (+ robot substitution) + frien
internal/robot/ # human-like robot opponent: account pool, seed-derived strategy, move driver
internal/adminconsole/ # server-rendered admin console (Go templates + embedded CSS, view models), served at /_gm
internal/connector/ # backend gRPC client to the Telegram connector (operator broadcasts)
+internal/ratewatch/ # gateway rate-limit reports: episode window for the console + the high-rate auto-flag (R3)
```
## Configuration (environment)
@@ -153,6 +162,8 @@ internal/connector/ # backend gRPC client to the Telegram connector (operator b
| `BACKEND_CONNECTOR_ADDR` | — | Telegram connector gRPC address for admin-console operator broadcasts. Empty disables broadcasts. |
| `BACKEND_GUEST_REAP_INTERVAL` | `1h` | How often the abandoned-guest reaper sweeps. |
| `BACKEND_GUEST_RETENTION` | `720h` | Account age past which a guest with no game seat is deleted. |
+| `BACKEND_HIGHRATE_FLAG_THRESHOLD` | `1000` | Gateway-reported rejected calls within the window past which an account is soft-flagged (R3). |
+| `BACKEND_HIGHRATE_FLAG_WINDOW` | `10m` | The rolling window those rejections accumulate over. |
## Run
diff --git a/backend/cmd/backend/main.go b/backend/cmd/backend/main.go
index 0fca27e..0854508 100644
--- a/backend/cmd/backend/main.go
+++ b/backend/cmd/backend/main.go
@@ -28,6 +28,7 @@ import (
"scrabble/backend/internal/notify"
"scrabble/backend/internal/postgres"
"scrabble/backend/internal/pushgrpc"
+ "scrabble/backend/internal/ratewatch"
"scrabble/backend/internal/robot"
"scrabble/backend/internal/server"
"scrabble/backend/internal/session"
@@ -177,6 +178,13 @@ func run(ctx context.Context, cfg config.Config, logger *zap.Logger) error {
invitations.SetNotifier(hub)
logger.Info("lobby and social domains ready", zap.Duration("robot_wait", cfg.Lobby.RobotWait))
+ // R3 rate-limit observability: ingest the gateway's rejection reports for the
+ // admin throttled view and the conservative high-rate auto-flag.
+ rateWatch := ratewatch.New(cfg.RateWatch, accounts, logger)
+ logger.Info("rate watch ready",
+ zap.Int("flag_threshold", cfg.RateWatch.FlagThreshold),
+ zap.Duration("flag_window", cfg.RateWatch.FlagWindow))
+
srv := server.New(cfg.HTTPAddr, server.Deps{
Logger: logger,
DB: db,
@@ -193,6 +201,7 @@ func run(ctx context.Context, cfg config.Config, logger *zap.Logger) error {
Registry: registry,
DictDir: cfg.Game.DictDir,
Connector: conn,
+ RateWatch: rateWatch,
})
pushSrv := pushgrpc.NewServer(cfg.GRPCAddr, hub, logger)
diff --git a/backend/internal/account/account.go b/backend/internal/account/account.go
index 8fee67a..975484a 100644
--- a/backend/internal/account/account.go
+++ b/backend/internal/account/account.go
@@ -76,8 +76,13 @@ type Account struct {
// uuid.Nil for a live account. A tombstone keeps the row so the no-cascade
// foreign keys of a shared finished game stay valid (Stage 11).
MergedInto uuid.UUID
- CreatedAt time.Time
- UpdatedAt time.Time
+ // FlaggedHighRateAt is the soft, reversible "suspected high-rate" marker: the
+ // zero time for an unflagged account, otherwise when the gateway-reported
+ // rate-limiter rejections first crossed the sustained threshold (R3). An
+ // operator clears it in the admin console; it never gates any request.
+ FlaggedHighRateAt time.Time
+ CreatedAt time.Time
+ UpdatedAt time.Time
}
// Identity is one of an account's platform/email identities, surfaced on the
@@ -422,6 +427,43 @@ func (s *Store) SpendHint(ctx context.Context, id uuid.UUID) (bool, error) {
return n > 0, nil
}
+// FlagHighRate stamps the soft "suspected high-rate" marker with at, only when
+// the account is not already flagged — the first sustained episode wins, and a
+// re-flag after an operator clear starts a fresh timestamp. An infra marker, not
+// a profile edit, so updated_at is untouched; it never gates any request (R3).
+// It reports whether the flag was newly set.
+func (s *Store) FlagHighRate(ctx context.Context, id uuid.UUID, at time.Time) (bool, error) {
+ stmt := table.Accounts.
+ UPDATE(table.Accounts.FlaggedHighRateAt).
+ SET(postgres.TimestampzT(at.UTC())).
+ WHERE(
+ table.Accounts.AccountID.EQ(postgres.UUID(id)).
+ AND(table.Accounts.FlaggedHighRateAt.IS_NULL()),
+ )
+ res, err := stmt.ExecContext(ctx, s.db)
+ if err != nil {
+ return false, fmt.Errorf("account: flag high rate %s: %w", id, err)
+ }
+ n, err := res.RowsAffected()
+ if err != nil {
+ return false, fmt.Errorf("account: flag high rate rows %s: %w", id, err)
+ }
+ return n > 0, nil
+}
+
+// ClearHighRateFlag removes the high-rate marker — the operator's reversible
+// action in the admin console. Clearing an unflagged account is a no-op.
+func (s *Store) ClearHighRateFlag(ctx context.Context, id uuid.UUID) error {
+ stmt := table.Accounts.
+ UPDATE(table.Accounts.FlaggedHighRateAt).
+ SET(postgres.NULL).
+ WHERE(table.Accounts.AccountID.EQ(postgres.UUID(id)))
+ if _, err := stmt.ExecContext(ctx, s.db); err != nil {
+ return fmt.Errorf("account: clear high-rate flag %s: %w", id, err)
+ }
+ return nil
+}
+
// SetServiceLanguage records the service language (en/ru) of the bot a Telegram
// user authenticated through. It is called on every Telegram login — new and
// existing accounts — so it tracks the bot the user last came through (last-login-
@@ -452,6 +494,10 @@ func modelToAccount(row model.Accounts) Account {
if row.ServiceLanguage != nil {
serviceLanguage = *row.ServiceLanguage
}
+ var flaggedHighRateAt time.Time
+ if row.FlaggedHighRateAt != nil {
+ flaggedHighRateAt = *row.FlaggedHighRateAt
+ }
return Account{
ID: row.AccountID,
DisplayName: row.DisplayName,
@@ -467,6 +513,7 @@ func modelToAccount(row model.Accounts) Account {
NotificationsInAppOnly: row.NotificationsInAppOnly,
PaidAccount: row.PaidAccount,
MergedInto: mergedInto,
+ FlaggedHighRateAt: flaggedHighRateAt,
CreatedAt: row.CreatedAt,
UpdatedAt: row.UpdatedAt,
}
diff --git a/backend/internal/account/userlist.go b/backend/internal/account/userlist.go
index b2bd372..6f2d877 100644
--- a/backend/internal/account/userlist.go
+++ b/backend/internal/account/userlist.go
@@ -2,6 +2,7 @@ package account
import (
"context"
+ "database/sql"
"fmt"
"strings"
"time"
@@ -18,6 +19,9 @@ type UserListItem struct {
PreferredLanguage string
IsGuest bool
IsRobot bool
+ // FlaggedHighRateAt is the soft high-rate marker (zero when unflagged), shown
+ // as a badge in the console list (R3).
+ FlaggedHighRateAt time.Time
CreatedAt time.Time
}
@@ -65,7 +69,7 @@ func userListWhere(f UserFilter) (string, []any) {
// ListUsers returns the filtered admin user list, newest first, paginated.
func (s *Store) ListUsers(ctx context.Context, f UserFilter, limit, offset int) ([]UserListItem, error) {
where, args := userListWhere(f)
- q := `SELECT a.account_id, a.display_name, a.preferred_language, a.is_guest, a.created_at, ` + robotExists + ` AS is_robot
+ q := `SELECT a.account_id, a.display_name, a.preferred_language, a.is_guest, a.flagged_high_rate_at, a.created_at, ` + robotExists + ` AS is_robot
FROM backend.accounts a WHERE ` + where +
fmt.Sprintf(` ORDER BY a.created_at DESC LIMIT $%d OFFSET $%d`, len(args)+1, len(args)+2)
args = append(args, limit, offset)
@@ -77,14 +81,51 @@ FROM backend.accounts a WHERE ` + where +
var out []UserListItem
for rows.Next() {
var it UserListItem
- if err := rows.Scan(&it.ID, &it.DisplayName, &it.PreferredLanguage, &it.IsGuest, &it.CreatedAt, &it.IsRobot); err != nil {
+ var flagged sql.NullTime
+ if err := rows.Scan(&it.ID, &it.DisplayName, &it.PreferredLanguage, &it.IsGuest, &flagged, &it.CreatedAt, &it.IsRobot); err != nil {
return nil, fmt.Errorf("account: scan user: %w", err)
}
+ if flagged.Valid {
+ it.FlaggedHighRateAt = flagged.Time
+ }
out = append(out, it)
}
return out, rows.Err()
}
+// FlaggedAccount is one row of the console's high-rate review queue.
+type FlaggedAccount struct {
+ ID uuid.UUID
+ DisplayName string
+ FlaggedHighRateAt time.Time
+}
+
+// flaggedListCap bounds the console's flagged-account list; the operator clears
+// flags as they are reviewed, so the queue stays short in practice.
+const flaggedListCap = 200
+
+// ListFlaggedHighRate returns the accounts carrying the high-rate flag, most
+// recently flagged first (R3).
+func (s *Store) ListFlaggedHighRate(ctx context.Context) ([]FlaggedAccount, error) {
+ rows, err := s.db.QueryContext(ctx,
+ `SELECT account_id, display_name, flagged_high_rate_at
+FROM backend.accounts WHERE flagged_high_rate_at IS NOT NULL
+ORDER BY flagged_high_rate_at DESC LIMIT $1`, flaggedListCap)
+ if err != nil {
+ return nil, fmt.Errorf("account: list flagged: %w", err)
+ }
+ defer rows.Close()
+ var out []FlaggedAccount
+ for rows.Next() {
+ var fa FlaggedAccount
+ if err := rows.Scan(&fa.ID, &fa.DisplayName, &fa.FlaggedHighRateAt); err != nil {
+ return nil, fmt.Errorf("account: scan flagged: %w", err)
+ }
+ out = append(out, fa)
+ }
+ return out, rows.Err()
+}
+
// CountUsers counts the filtered admin user list, for pagination.
func (s *Store) CountUsers(ctx context.Context, f UserFilter) (int, error) {
where, args := userListWhere(f)
diff --git a/backend/internal/adminconsole/render_test.go b/backend/internal/adminconsole/render_test.go
index 4bdffad..eb6ab9c 100644
--- a/backend/internal/adminconsole/render_test.go
+++ b/backend/internal/adminconsole/render_test.go
@@ -21,8 +21,14 @@ func TestRendererRendersEveryPage(t *testing.T) {
want string
}{
{"dashboard", DashboardView{Accounts: 3, Variants: []VariantVersions{{Variant: "scrabble_en", Latest: "v1", Versions: []string{"v1"}}}}, "Dashboard"},
- {"users", UsersView{Items: []UserRow{{ID: "a1", DisplayName: "Kaya"}}, Pager: NewPager(1, 50, 1)}, "Kaya"},
+ {"users", UsersView{Items: []UserRow{{ID: "a1", DisplayName: "Kaya", FlaggedHighRate: true}}, Pager: NewPager(1, 50, 1)}, "high-rate"},
{"user_detail", UserDetailView{ID: "a1", DisplayName: "Kaya", HasStats: true, Stats: StatsRow{Wins: 2}, TelegramID: "123", ConnectorEnabled: true}, "Send Telegram message"},
+ {"user_detail", UserDetailView{ID: "a1", DisplayName: "Kaya", FlaggedHighRateAt: "2026-06-10 12:00"}, "Clear high-rate flag"},
+ {"throttled", ThrottledView{
+ Episodes: []ThrottleEpisodeRow{{Class: "user", Key: "a1", UserID: "a1", Rejected: 1234, FirstSeen: "2026-06-10 12:00", LastSeen: "2026-06-10 12:05"}},
+ Flagged: []FlaggedAccountRow{{ID: "a1", DisplayName: "Kaya", FlaggedAt: "2026-06-10 12:05"}},
+ FlagThreshold: 1000, FlagWindow: "10m0s",
+ }, "Recent episodes"},
{"games", GamesView{Items: []GameRow{{ID: "g1", Variant: "scrabble_en", Status: "active"}}, Status: "active", Pager: NewPager(1, 50, 1)}, "g1"},
{"game_detail", GameDetailView{ID: "g1", Variant: "scrabble_en", Seats: []SeatRow{{Seat: 0, DisplayName: "Kaya"}}}, "Seats"},
{"complaints", ComplaintsView{Items: []ComplaintRow{{ID: "c1", Word: "qi", Status: "open"}}, Status: "open", Pager: NewPager(1, 50, 1)}, "qi"},
diff --git a/backend/internal/adminconsole/templates/layout.gohtml b/backend/internal/adminconsole/templates/layout.gohtml
index 970d876..14e09d9 100644
--- a/backend/internal/adminconsole/templates/layout.gohtml
+++ b/backend/internal/adminconsole/templates/layout.gohtml
@@ -17,6 +17,7 @@
Games
Complaints
Messages
+ Throttled
Dictionary
Broadcast
Grafana ↗
diff --git a/backend/internal/adminconsole/templates/pages/throttled.gohtml b/backend/internal/adminconsole/templates/pages/throttled.gohtml
new file mode 100644
index 0000000..696b339
--- /dev/null
+++ b/backend/internal/adminconsole/templates/pages/throttled.gohtml
@@ -0,0 +1,39 @@
+{{define "content" -}}
+
Throttled
+{{with .Data}}
+Rate-limiter rejections reported periodically by the gateway. The episode
+list is in-memory and resets on a backend restart. An account sustaining
+{{.FlagThreshold}}+ rejected calls within {{.FlagWindow}} is soft-flagged for review
+below — never banned automatically; clear the flag on the user card.
+Recent episodes
+
+| Class | Key | Rejected | First seen | Last seen |
+
+{{range .Episodes}}
+
+| {{.Class}} |
+{{if .UserID}}{{.Key}}{{else}}{{.Key}}{{end}} |
+{{.Rejected}} |
+{{.FirstSeen}} |
+{{.LastSeen}} |
+
+{{else}}
+| nothing throttled recently |
+{{end}}
+
+
+
+Flagged accounts
+
+| Account | Display name | Flagged |
+
+{{range .Flagged}}
+| {{.ID}} | {{.DisplayName}} | {{.FlaggedAt}} |
+{{else}}
+| no flagged accounts |
+{{end}}
+
+
+
+{{end}}
+{{- end}}
diff --git a/backend/internal/adminconsole/templates/pages/user_detail.gohtml b/backend/internal/adminconsole/templates/pages/user_detail.gohtml
index 08b75b2..f4396d1 100644
--- a/backend/internal/adminconsole/templates/pages/user_detail.gohtml
+++ b/backend/internal/adminconsole/templates/pages/user_detail.gohtml
@@ -13,8 +13,14 @@
Paid {{if .PaidAccount}}yes{{else}}no{{end}}
Hint wallet {{.HintBalance}}
{{if .MergedInto}}Merged into {{.MergedInto}}{{end}}
+{{if .FlaggedHighRateAt}}High-rate flag {{.FlaggedHighRateAt}}{{end}}
Created {{.CreatedAt}}
+{{if .FlaggedHighRateAt}}
+
+{{end}}
Statistics
{{if .HasStats}}
diff --git a/backend/internal/adminconsole/templates/pages/users.gohtml b/backend/internal/adminconsole/templates/pages/users.gohtml
index ef8cf21..9a116a0 100644
--- a/backend/internal/adminconsole/templates/pages/users.gohtml
+++ b/backend/internal/adminconsole/templates/pages/users.gohtml
@@ -17,7 +17,7 @@
{{range .Items}}
| {{.ID}} |
-{{.DisplayName}}{{if .Guest}} guest{{end}} |
+{{.DisplayName}}{{if .Guest}} guest{{end}}{{if .FlaggedHighRate}} high-rate{{end}} |
{{.Kind}} |
{{.Language}} |
{{.CreatedAt}} |
diff --git a/backend/internal/adminconsole/views.go b/backend/internal/adminconsole/views.go
index 1ef4aed..8aef4c0 100644
--- a/backend/internal/adminconsole/views.go
+++ b/backend/internal/adminconsole/views.go
@@ -59,18 +59,20 @@ type UsersView struct {
}
// UserRow is one account row in the list. MoveMin/Avg/Max are the account's
-// pre-formatted move-duration summary (empty when it has no timed move).
+// pre-formatted move-duration summary (empty when it has no timed move);
+// FlaggedHighRate marks the soft high-rate badge (R3).
type UserRow struct {
- ID string
- DisplayName string
- Kind string
- Language string
- Guest bool
- CreatedAt string
- HasMoveStats bool
- MoveMin string
- MoveAvg string
- MoveMax string
+ ID string
+ DisplayName string
+ Kind string
+ Language string
+ Guest bool
+ FlaggedHighRate bool
+ CreatedAt string
+ HasMoveStats bool
+ MoveMin string
+ MoveAvg string
+ MoveMax string
}
// MessagesView is the paginated chat-message moderation list. NameMask/ExtMask are the
@@ -110,15 +112,18 @@ type UserDetailView struct {
PaidAccount bool
// MergedInto is the primary account id when this account has been retired by a
// merge (Stage 11), or empty for a live account.
- MergedInto string
- HintBalance int
- CreatedAt string
- HasStats bool
- Stats StatsRow
- Identities []IdentityRow
- Games []GameRow
- TelegramID string
- ConnectorEnabled bool
+ MergedInto string
+ // FlaggedHighRateAt is the pre-formatted soft high-rate marker timestamp,
+ // empty for an unflagged account; the card shows it with the Clear action (R3).
+ FlaggedHighRateAt string
+ HintBalance int
+ CreatedAt string
+ HasStats bool
+ Stats StatsRow
+ Identities []IdentityRow
+ Games []GameRow
+ TelegramID string
+ ConnectorEnabled bool
// MoveChart is the pre-rendered inline SVG of the account's per-move-number think
// time (min/mean/max), empty when the account has no timed move.
MoveChart template.HTML
@@ -247,6 +252,35 @@ type BroadcastView struct {
ConnectorEnabled bool
}
+// ThrottledView is the rate-limit observability page: the recent gateway-reported
+// throttle episodes (in-memory, reset on restart) and the accounts currently
+// carrying the high-rate flag. FlagThreshold and FlagWindow caption the active
+// auto-flag tuning.
+type ThrottledView struct {
+ Episodes []ThrottleEpisodeRow
+ Flagged []FlaggedAccountRow
+ FlagThreshold int
+ FlagWindow string
+}
+
+// ThrottleEpisodeRow is one recently throttled limiter key. UserID links to the
+// user card and is set only for the user class (the other classes key by IP).
+type ThrottleEpisodeRow struct {
+ Class string
+ Key string
+ UserID string
+ Rejected int
+ FirstSeen string
+ LastSeen string
+}
+
+// FlaggedAccountRow is one account carrying the high-rate flag.
+type FlaggedAccountRow struct {
+ ID string
+ DisplayName string
+ FlaggedAt string
+}
+
// MessageView is the result page shown after a POST action.
type MessageView struct {
Heading string
diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go
index 9cc69e0..063911f 100644
--- a/backend/internal/config/config.go
+++ b/backend/internal/config/config.go
@@ -12,6 +12,7 @@ import (
"scrabble/backend/internal/game"
"scrabble/backend/internal/lobby"
"scrabble/backend/internal/postgres"
+ "scrabble/backend/internal/ratewatch"
"scrabble/backend/internal/robot"
"scrabble/backend/internal/telemetry"
)
@@ -35,6 +36,9 @@ type Config struct {
Lobby lobby.Config
// Robot configures the robot opponent driver (scan cadence).
Robot robot.Config
+ // RateWatch tunes the conservative high-rate auto-flag applied to the
+ // gateway's rate-limiter rejection reports (R3).
+ RateWatch ratewatch.Config
// SMTP configures the email relay used for confirm-codes. An empty Host
// selects the development log mailer (the code is logged, not sent).
SMTP account.SMTPConfig
@@ -105,6 +109,14 @@ func Load() (Config, error) {
return Config{}, err
}
+ rw := ratewatch.DefaultConfig()
+ if rw.FlagThreshold, err = envInt("BACKEND_HIGHRATE_FLAG_THRESHOLD", rw.FlagThreshold); err != nil {
+ return Config{}, err
+ }
+ if rw.FlagWindow, err = envDuration("BACKEND_HIGHRATE_FLAG_WINDOW", rw.FlagWindow); err != nil {
+ return Config{}, err
+ }
+
guestReapInterval, err := envDuration("BACKEND_GUEST_REAP_INTERVAL", defaultGuestReapInterval)
if err != nil {
return Config{}, err
@@ -131,6 +143,7 @@ func Load() (Config, error) {
Game: gm,
Lobby: lb,
Robot: rb,
+ RateWatch: rw,
SMTP: smtp,
ConnectorAddr: os.Getenv("BACKEND_CONNECTOR_ADDR"),
GuestReapInterval: guestReapInterval,
@@ -170,6 +183,9 @@ func (c Config) validate() error {
if err := c.Robot.Validate(); err != nil {
return fmt.Errorf("config: %w", err)
}
+ if err := c.RateWatch.Validate(); err != nil {
+ return fmt.Errorf("config: %w", err)
+ }
if c.GuestReapInterval <= 0 {
return fmt.Errorf("config: BACKEND_GUEST_REAP_INTERVAL must be positive")
}
diff --git a/backend/internal/inttest/account_test.go b/backend/internal/inttest/account_test.go
index 1be5922..286b171 100644
--- a/backend/internal/inttest/account_test.go
+++ b/backend/internal/inttest/account_test.go
@@ -6,6 +6,7 @@ import (
"context"
"errors"
"testing"
+ "time"
"github.com/google/uuid"
@@ -195,6 +196,62 @@ func TestServiceLanguageRoundTrip(t *testing.T) {
}
}
+// TestHighRateFlagRoundTrip covers the R3 soft high-rate marker: a fresh account
+// is unflagged, FlagHighRate stamps it exactly once (a second sustained episode
+// never moves the timestamp), ClearHighRateFlag reverses it, and a re-flag after
+// the operator clear takes a fresh timestamp.
+func TestHighRateFlagRoundTrip(t *testing.T) {
+ ctx := context.Background()
+ store := account.NewStore(testDB)
+ acc, err := store.ProvisionTelegram(ctx, "tg-"+uuid.NewString(), "en", "", "Player")
+ if err != nil {
+ t.Fatalf("provision telegram: %v", err)
+ }
+ if !acc.FlaggedHighRateAt.IsZero() {
+ t.Fatalf("fresh FlaggedHighRateAt = %v, want zero", acc.FlaggedHighRateAt)
+ }
+
+ first := time.Date(2026, 6, 1, 12, 0, 0, 0, time.UTC)
+ set, err := store.FlagHighRate(ctx, acc.ID, first)
+ if err != nil {
+ t.Fatalf("flag: %v", err)
+ }
+ if !set {
+ t.Fatal("first FlagHighRate reported not set")
+ }
+ if set, err = store.FlagHighRate(ctx, acc.ID, first.Add(time.Hour)); err != nil {
+ t.Fatalf("re-flag: %v", err)
+ } else if set {
+ t.Fatal("second FlagHighRate must not overwrite the marker")
+ }
+ got, err := store.GetByID(ctx, acc.ID)
+ if err != nil {
+ t.Fatalf("get by id: %v", err)
+ }
+ if !got.FlaggedHighRateAt.Equal(first) {
+ t.Errorf("FlaggedHighRateAt = %v, want %v", got.FlaggedHighRateAt, first)
+ }
+
+ if err := store.ClearHighRateFlag(ctx, acc.ID); err != nil {
+ t.Fatalf("clear: %v", err)
+ }
+ if got, err = store.GetByID(ctx, acc.ID); err != nil {
+ t.Fatalf("get by id: %v", err)
+ } else if !got.FlaggedHighRateAt.IsZero() {
+ t.Errorf("cleared FlaggedHighRateAt = %v, want zero", got.FlaggedHighRateAt)
+ }
+
+ second := first.Add(24 * time.Hour)
+ if set, err = store.FlagHighRate(ctx, acc.ID, second); err != nil || !set {
+ t.Fatalf("re-flag after clear = (%v, %v), want (true, nil)", set, err)
+ }
+ if got, err = store.GetByID(ctx, acc.ID); err != nil {
+ t.Fatalf("get by id: %v", err)
+ } else if !got.FlaggedHighRateAt.Equal(second) {
+ t.Errorf("re-flagged FlaggedHighRateAt = %v, want %v", got.FlaggedHighRateAt, second)
+ }
+}
+
// TestIdentityExternalID covers the reverse identity lookup the push-target route
// uses: it returns the external_id for the matching kind and ErrNotFound otherwise,
// including for a guest that carries no identity.
diff --git a/backend/internal/inttest/admin_test.go b/backend/internal/inttest/admin_test.go
index 6b77c97..0171c54 100644
--- a/backend/internal/inttest/admin_test.go
+++ b/backend/internal/inttest/admin_test.go
@@ -16,6 +16,7 @@ import (
"scrabble/backend/internal/account"
"scrabble/backend/internal/engine"
"scrabble/backend/internal/game"
+ "scrabble/backend/internal/ratewatch"
"scrabble/backend/internal/server"
)
@@ -206,6 +207,72 @@ func TestConsoleGameDetailRobotSchedule(t *testing.T) {
}
}
+// TestConsoleThrottledViewAndFlagClear drives the R3 rate-limit surface end to
+// end against real stores: a gateway report past the threshold auto-flags the
+// account, the throttled view shows the episode and the flagged account, the
+// user card carries the marker, and the operator clear (a same-origin POST)
+// reverses it.
+func TestConsoleThrottledViewAndFlagClear(t *testing.T) {
+ ctx := context.Background()
+ accounts := account.NewStore(testDB)
+ acc, err := accounts.ProvisionTelegram(ctx, "tg-"+uuid.NewString(), "en", "", "Throttled Player")
+ if err != nil {
+ t.Fatalf("provision: %v", err)
+ }
+
+ watch := ratewatch.New(ratewatch.Config{FlagThreshold: 100, FlagWindow: 10 * time.Minute}, accounts, zap.NewNop())
+ srv := server.New(":0", server.Deps{
+ Logger: zap.NewNop(),
+ Accounts: accounts,
+ Games: newGameService(),
+ Registry: testRegistry,
+ DictDir: dictDir(),
+ RateWatch: watch,
+ })
+ h := srv.Handler()
+
+ report := `{"window_seconds":30,"entries":[` +
+ `{"class":"user","key":"` + acc.ID.String() + `","rejected":150},` +
+ `{"class":"public","key":"10.1.2.3","rejected":7}]}`
+ req := httptest.NewRequest(http.MethodPost, "http://admin.test/api/v1/internal/ratelimit/report", strings.NewReader(report))
+ req.Header.Set("Content-Type", "application/json")
+ rec := httptest.NewRecorder()
+ h.ServeHTTP(rec, req)
+ if rec.Code != http.StatusNoContent {
+ t.Fatalf("report = %d, want 204", rec.Code)
+ }
+
+ got, err := accounts.GetByID(ctx, acc.ID)
+ if err != nil {
+ t.Fatalf("get by id: %v", err)
+ }
+ if got.FlaggedHighRateAt.IsZero() {
+ t.Fatal("account not auto-flagged past the threshold")
+ }
+
+ base := "http://admin.test/_gm"
+ code, body := consoleDo(h, http.MethodGet, base+"/throttled", "", "")
+ if code != http.StatusOK || !strings.Contains(body, acc.ID.String()) ||
+ !strings.Contains(body, "10.1.2.3") || !strings.Contains(body, "Throttled Player") {
+ t.Fatalf("throttled view = %d, episode/flag shown = %v/%v",
+ code, strings.Contains(body, "10.1.2.3"), strings.Contains(body, "Throttled Player"))
+ }
+ if code, body = consoleDo(h, http.MethodGet, base+"/users/"+acc.ID.String(), "", ""); code != http.StatusOK || !strings.Contains(body, "Clear high-rate flag") {
+ t.Fatalf("user card = %d, has clear action = %v", code, strings.Contains(body, "Clear high-rate flag"))
+ }
+
+ // The clear POST is CSRF-guarded like every console action.
+ if code, _ = consoleDo(h, http.MethodPost, base+"/users/"+acc.ID.String()+"/clear-high-rate-flag", "", ""); code != http.StatusForbidden {
+ t.Fatalf("clear without origin = %d, want 403", code)
+ }
+ if code, body = consoleDo(h, http.MethodPost, base+"/users/"+acc.ID.String()+"/clear-high-rate-flag", "x=1", "http://admin.test"); code != http.StatusOK || !strings.Contains(body, "Cleared") {
+ t.Fatalf("clear with origin = %d, has Cleared = %v", code, strings.Contains(body, "Cleared"))
+ }
+ if got, err = accounts.GetByID(ctx, acc.ID); err != nil || !got.FlaggedHighRateAt.IsZero() {
+ t.Fatalf("flag survived the clear: %v (err %v)", got.FlaggedHighRateAt, err)
+ }
+}
+
// consoleDo issues a request to h, optionally with an Origin header, and returns
// the status and body. Form bodies are sent as application/x-www-form-urlencoded.
func consoleDo(h http.Handler, method, target, body, origin string) (int, string) {
diff --git a/backend/internal/postgres/jet/backend/model/accounts.go b/backend/internal/postgres/jet/backend/model/accounts.go
index 2501d3a..ae26769 100644
--- a/backend/internal/postgres/jet/backend/model/accounts.go
+++ b/backend/internal/postgres/jet/backend/model/accounts.go
@@ -30,4 +30,5 @@ type Accounts struct {
MergedInto *uuid.UUID
MergedAt *time.Time
ServiceLanguage *string
+ FlaggedHighRateAt *time.Time
}
diff --git a/backend/internal/postgres/jet/backend/model/game_drafts.go b/backend/internal/postgres/jet/backend/model/game_drafts.go
new file mode 100644
index 0000000..c9c32c2
--- /dev/null
+++ b/backend/internal/postgres/jet/backend/model/game_drafts.go
@@ -0,0 +1,21 @@
+//
+// Code generated by go-jet DO NOT EDIT.
+//
+// WARNING: Changes to this file may cause incorrect behavior
+// and will be lost if the code is regenerated
+//
+
+package model
+
+import (
+ "github.com/google/uuid"
+ "time"
+)
+
+type GameDrafts struct {
+ GameID uuid.UUID `sql:"primary_key"`
+ AccountID uuid.UUID `sql:"primary_key"`
+ RackOrder string
+ BoardTiles string
+ UpdatedAt time.Time
+}
diff --git a/backend/internal/postgres/jet/backend/model/game_hidden.go b/backend/internal/postgres/jet/backend/model/game_hidden.go
new file mode 100644
index 0000000..d64f4a1
--- /dev/null
+++ b/backend/internal/postgres/jet/backend/model/game_hidden.go
@@ -0,0 +1,19 @@
+//
+// Code generated by go-jet DO NOT EDIT.
+//
+// WARNING: Changes to this file may cause incorrect behavior
+// and will be lost if the code is regenerated
+//
+
+package model
+
+import (
+ "github.com/google/uuid"
+ "time"
+)
+
+type GameHidden struct {
+ AccountID uuid.UUID `sql:"primary_key"`
+ GameID uuid.UUID `sql:"primary_key"`
+ CreatedAt time.Time
+}
diff --git a/backend/internal/postgres/jet/backend/table/accounts.go b/backend/internal/postgres/jet/backend/table/accounts.go
index 906a396..021e7a4 100644
--- a/backend/internal/postgres/jet/backend/table/accounts.go
+++ b/backend/internal/postgres/jet/backend/table/accounts.go
@@ -34,6 +34,7 @@ type accountsTable struct {
MergedInto postgres.ColumnString
MergedAt postgres.ColumnTimestampz
ServiceLanguage postgres.ColumnString
+ FlaggedHighRateAt postgres.ColumnTimestampz
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
@@ -92,8 +93,9 @@ func newAccountsTableImpl(schemaName, tableName, alias string) accountsTable {
MergedIntoColumn = postgres.StringColumn("merged_into")
MergedAtColumn = postgres.TimestampzColumn("merged_at")
ServiceLanguageColumn = postgres.StringColumn("service_language")
- allColumns = postgres.ColumnList{AccountIDColumn, DisplayNameColumn, PreferredLanguageColumn, TimeZoneColumn, BlockChatColumn, BlockFriendRequestsColumn, CreatedAtColumn, UpdatedAtColumn, AwayStartColumn, AwayEndColumn, HintBalanceColumn, IsGuestColumn, NotificationsInAppOnlyColumn, PaidAccountColumn, MergedIntoColumn, MergedAtColumn, ServiceLanguageColumn}
- mutableColumns = postgres.ColumnList{DisplayNameColumn, PreferredLanguageColumn, TimeZoneColumn, BlockChatColumn, BlockFriendRequestsColumn, CreatedAtColumn, UpdatedAtColumn, AwayStartColumn, AwayEndColumn, HintBalanceColumn, IsGuestColumn, NotificationsInAppOnlyColumn, PaidAccountColumn, MergedIntoColumn, MergedAtColumn, ServiceLanguageColumn}
+ FlaggedHighRateAtColumn = postgres.TimestampzColumn("flagged_high_rate_at")
+ allColumns = postgres.ColumnList{AccountIDColumn, DisplayNameColumn, PreferredLanguageColumn, TimeZoneColumn, BlockChatColumn, BlockFriendRequestsColumn, CreatedAtColumn, UpdatedAtColumn, AwayStartColumn, AwayEndColumn, HintBalanceColumn, IsGuestColumn, NotificationsInAppOnlyColumn, PaidAccountColumn, MergedIntoColumn, MergedAtColumn, ServiceLanguageColumn, FlaggedHighRateAtColumn}
+ mutableColumns = postgres.ColumnList{DisplayNameColumn, PreferredLanguageColumn, TimeZoneColumn, BlockChatColumn, BlockFriendRequestsColumn, CreatedAtColumn, UpdatedAtColumn, AwayStartColumn, AwayEndColumn, HintBalanceColumn, IsGuestColumn, NotificationsInAppOnlyColumn, PaidAccountColumn, MergedIntoColumn, MergedAtColumn, ServiceLanguageColumn, FlaggedHighRateAtColumn}
defaultColumns = postgres.ColumnList{DisplayNameColumn, PreferredLanguageColumn, TimeZoneColumn, BlockChatColumn, BlockFriendRequestsColumn, CreatedAtColumn, UpdatedAtColumn, AwayStartColumn, AwayEndColumn, HintBalanceColumn, IsGuestColumn, NotificationsInAppOnlyColumn, PaidAccountColumn}
)
@@ -118,6 +120,7 @@ func newAccountsTableImpl(schemaName, tableName, alias string) accountsTable {
MergedInto: MergedIntoColumn,
MergedAt: MergedAtColumn,
ServiceLanguage: ServiceLanguageColumn,
+ FlaggedHighRateAt: FlaggedHighRateAtColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
diff --git a/backend/internal/postgres/jet/backend/table/game_drafts.go b/backend/internal/postgres/jet/backend/table/game_drafts.go
new file mode 100644
index 0000000..18f099e
--- /dev/null
+++ b/backend/internal/postgres/jet/backend/table/game_drafts.go
@@ -0,0 +1,90 @@
+//
+// Code generated by go-jet DO NOT EDIT.
+//
+// WARNING: Changes to this file may cause incorrect behavior
+// and will be lost if the code is regenerated
+//
+
+package table
+
+import (
+ "github.com/go-jet/jet/v2/postgres"
+)
+
+var GameDrafts = newGameDraftsTable("backend", "game_drafts", "")
+
+type gameDraftsTable struct {
+ postgres.Table
+
+ // Columns
+ GameID postgres.ColumnString
+ AccountID postgres.ColumnString
+ RackOrder postgres.ColumnString
+ BoardTiles postgres.ColumnString
+ UpdatedAt postgres.ColumnTimestampz
+
+ AllColumns postgres.ColumnList
+ MutableColumns postgres.ColumnList
+ DefaultColumns postgres.ColumnList
+}
+
+type GameDraftsTable struct {
+ gameDraftsTable
+
+ EXCLUDED gameDraftsTable
+}
+
+// AS creates new GameDraftsTable with assigned alias
+func (a GameDraftsTable) AS(alias string) *GameDraftsTable {
+ return newGameDraftsTable(a.SchemaName(), a.TableName(), alias)
+}
+
+// Schema creates new GameDraftsTable with assigned schema name
+func (a GameDraftsTable) FromSchema(schemaName string) *GameDraftsTable {
+ return newGameDraftsTable(schemaName, a.TableName(), a.Alias())
+}
+
+// WithPrefix creates new GameDraftsTable with assigned table prefix
+func (a GameDraftsTable) WithPrefix(prefix string) *GameDraftsTable {
+ return newGameDraftsTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
+}
+
+// WithSuffix creates new GameDraftsTable with assigned table suffix
+func (a GameDraftsTable) WithSuffix(suffix string) *GameDraftsTable {
+ return newGameDraftsTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
+}
+
+func newGameDraftsTable(schemaName, tableName, alias string) *GameDraftsTable {
+ return &GameDraftsTable{
+ gameDraftsTable: newGameDraftsTableImpl(schemaName, tableName, alias),
+ EXCLUDED: newGameDraftsTableImpl("", "excluded", ""),
+ }
+}
+
+func newGameDraftsTableImpl(schemaName, tableName, alias string) gameDraftsTable {
+ var (
+ GameIDColumn = postgres.StringColumn("game_id")
+ AccountIDColumn = postgres.StringColumn("account_id")
+ RackOrderColumn = postgres.StringColumn("rack_order")
+ BoardTilesColumn = postgres.StringColumn("board_tiles")
+ UpdatedAtColumn = postgres.TimestampzColumn("updated_at")
+ allColumns = postgres.ColumnList{GameIDColumn, AccountIDColumn, RackOrderColumn, BoardTilesColumn, UpdatedAtColumn}
+ mutableColumns = postgres.ColumnList{RackOrderColumn, BoardTilesColumn, UpdatedAtColumn}
+ defaultColumns = postgres.ColumnList{RackOrderColumn, BoardTilesColumn, UpdatedAtColumn}
+ )
+
+ return gameDraftsTable{
+ Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
+
+ //Columns
+ GameID: GameIDColumn,
+ AccountID: AccountIDColumn,
+ RackOrder: RackOrderColumn,
+ BoardTiles: BoardTilesColumn,
+ UpdatedAt: UpdatedAtColumn,
+
+ AllColumns: allColumns,
+ MutableColumns: mutableColumns,
+ DefaultColumns: defaultColumns,
+ }
+}
diff --git a/backend/internal/postgres/jet/backend/table/game_hidden.go b/backend/internal/postgres/jet/backend/table/game_hidden.go
new file mode 100644
index 0000000..d55a2a9
--- /dev/null
+++ b/backend/internal/postgres/jet/backend/table/game_hidden.go
@@ -0,0 +1,84 @@
+//
+// Code generated by go-jet DO NOT EDIT.
+//
+// WARNING: Changes to this file may cause incorrect behavior
+// and will be lost if the code is regenerated
+//
+
+package table
+
+import (
+ "github.com/go-jet/jet/v2/postgres"
+)
+
+var GameHidden = newGameHiddenTable("backend", "game_hidden", "")
+
+type gameHiddenTable struct {
+ postgres.Table
+
+ // Columns
+ AccountID postgres.ColumnString
+ GameID postgres.ColumnString
+ CreatedAt postgres.ColumnTimestampz
+
+ AllColumns postgres.ColumnList
+ MutableColumns postgres.ColumnList
+ DefaultColumns postgres.ColumnList
+}
+
+type GameHiddenTable struct {
+ gameHiddenTable
+
+ EXCLUDED gameHiddenTable
+}
+
+// AS creates new GameHiddenTable with assigned alias
+func (a GameHiddenTable) AS(alias string) *GameHiddenTable {
+ return newGameHiddenTable(a.SchemaName(), a.TableName(), alias)
+}
+
+// Schema creates new GameHiddenTable with assigned schema name
+func (a GameHiddenTable) FromSchema(schemaName string) *GameHiddenTable {
+ return newGameHiddenTable(schemaName, a.TableName(), a.Alias())
+}
+
+// WithPrefix creates new GameHiddenTable with assigned table prefix
+func (a GameHiddenTable) WithPrefix(prefix string) *GameHiddenTable {
+ return newGameHiddenTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
+}
+
+// WithSuffix creates new GameHiddenTable with assigned table suffix
+func (a GameHiddenTable) WithSuffix(suffix string) *GameHiddenTable {
+ return newGameHiddenTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
+}
+
+func newGameHiddenTable(schemaName, tableName, alias string) *GameHiddenTable {
+ return &GameHiddenTable{
+ gameHiddenTable: newGameHiddenTableImpl(schemaName, tableName, alias),
+ EXCLUDED: newGameHiddenTableImpl("", "excluded", ""),
+ }
+}
+
+func newGameHiddenTableImpl(schemaName, tableName, alias string) gameHiddenTable {
+ var (
+ AccountIDColumn = postgres.StringColumn("account_id")
+ GameIDColumn = postgres.StringColumn("game_id")
+ CreatedAtColumn = postgres.TimestampzColumn("created_at")
+ allColumns = postgres.ColumnList{AccountIDColumn, GameIDColumn, CreatedAtColumn}
+ mutableColumns = postgres.ColumnList{CreatedAtColumn}
+ defaultColumns = postgres.ColumnList{CreatedAtColumn}
+ )
+
+ return gameHiddenTable{
+ Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
+
+ //Columns
+ AccountID: AccountIDColumn,
+ GameID: GameIDColumn,
+ CreatedAt: CreatedAtColumn,
+
+ AllColumns: allColumns,
+ MutableColumns: mutableColumns,
+ DefaultColumns: defaultColumns,
+ }
+}
diff --git a/backend/internal/postgres/jet/backend/table/table_use_schema.go b/backend/internal/postgres/jet/backend/table/table_use_schema.go
index edc7307..70d757d 100644
--- a/backend/internal/postgres/jet/backend/table/table_use_schema.go
+++ b/backend/internal/postgres/jet/backend/table/table_use_schema.go
@@ -18,6 +18,8 @@ func UseSchema(schema string) {
EmailConfirmations = EmailConfirmations.FromSchema(schema)
FriendCodes = FriendCodes.FromSchema(schema)
Friendships = Friendships.FromSchema(schema)
+ GameDrafts = GameDrafts.FromSchema(schema)
+ GameHidden = GameHidden.FromSchema(schema)
GameInvitationInvitees = GameInvitationInvitees.FromSchema(schema)
GameInvitations = GameInvitations.FromSchema(schema)
GameMoves = GameMoves.FromSchema(schema)
diff --git a/backend/internal/postgres/migrations/00001_baseline.sql b/backend/internal/postgres/migrations/00001_baseline.sql
index 7a63bef..f33592e 100644
--- a/backend/internal/postgres/migrations/00001_baseline.sql
+++ b/backend/internal/postgres/migrations/00001_baseline.sql
@@ -35,6 +35,10 @@ CREATE TABLE accounts (
merged_into uuid REFERENCES accounts (account_id) ON DELETE SET NULL,
merged_at timestamptz,
service_language text CHECK (service_language IN ('en', 'ru')),
+ -- Soft, reversible "suspected high-rate" marker (R3): set once when the gateway
+ -- reports sustained rate-limiter rejections past the threshold; an operator
+ -- clears it in the admin console. Never an automatic ban.
+ flagged_high_rate_at timestamptz,
CONSTRAINT accounts_preferred_language_chk CHECK (preferred_language IN ('en', 'ru')),
CONSTRAINT accounts_hint_balance_chk CHECK (hint_balance >= 0)
);
diff --git a/backend/internal/ratewatch/ratewatch.go b/backend/internal/ratewatch/ratewatch.go
new file mode 100644
index 0000000..f236f78
--- /dev/null
+++ b/backend/internal/ratewatch/ratewatch.go
@@ -0,0 +1,235 @@
+// Package ratewatch ingests the gateway's periodic rate-limiter rejection
+// reports (R3). It keeps an in-memory window of recent throttle episodes for
+// the admin console's view and applies the conservative high-rate auto-flag:
+// when one account's rejections within the rolling window cross the threshold,
+// the account store stamps the soft, reversible flagged_high_rate_at marker
+// (set-once; an operator clears it; never an automatic ban). Like the gateway's
+// active_users gauge it is single-instance and resets on restart by design —
+// the durable part is the account flag, not the episode window.
+package ratewatch
+
+import (
+ "context"
+ "fmt"
+ "sort"
+ "sync"
+ "time"
+
+ "github.com/google/uuid"
+ "go.uber.org/zap"
+)
+
+// ClassUser is the limiter class whose keys are account ids — the only class
+// the auto-flag applies to (the others are keyed by client IP).
+const ClassUser = "user"
+
+const (
+ // maxSeries bounds the distinct (class, key) series kept for the console
+ // view, so a key-spraying client cannot grow the map: past the bound the
+ // least-recently-throttled series is evicted.
+ maxSeries = 200
+ // minRetention keeps an episode visible in the console for at least an hour
+ // after its last rejection (longer when the flag window is longer).
+ minRetention = time.Hour
+)
+
+// Config tunes the conservative high-rate auto-flag.
+type Config struct {
+ // FlagThreshold is the rejected-call count within FlagWindow past which a
+ // user account is flagged.
+ FlagThreshold int
+ // FlagWindow is the rolling window the rejections accumulate over.
+ FlagWindow time.Duration
+}
+
+// DefaultConfig returns the agreed conservative defaults — 1000 rejected calls
+// within a rolling 10 minutes (~1.7/s sustained, far above the client's
+// capped-backoff retry noise yet a fraction of an abusive loop).
+func DefaultConfig() Config {
+ return Config{FlagThreshold: 1000, FlagWindow: 10 * time.Minute}
+}
+
+// Validate reports whether the configuration values are acceptable.
+func (c Config) Validate() error {
+ if c.FlagThreshold <= 0 {
+ return fmt.Errorf("ratewatch: flag threshold must be positive")
+ }
+ if c.FlagWindow <= 0 {
+ return fmt.Errorf("ratewatch: flag window must be positive")
+ }
+ return nil
+}
+
+// Flagger stamps the account-level high-rate marker; account.Store satisfies it.
+type Flagger interface {
+ FlagHighRate(ctx context.Context, id uuid.UUID, at time.Time) (bool, error)
+}
+
+// Entry is one reported aggregate: the rejections of one limiter key within one
+// gateway report window (the wire mirror of the gateway's rejection summary).
+type Entry struct {
+ Class string
+ Key string
+ Rejected int
+}
+
+// Episode is one key's recent-throttle aggregate for the admin view.
+type Episode struct {
+ Class string
+ Key string
+ Rejected int
+ FirstSeen time.Time
+ LastSeen time.Time
+}
+
+// Watch accumulates reports and applies the auto-flag rule.
+type Watch struct {
+ cfg Config
+ flagger Flagger
+ log *zap.Logger
+ now func() time.Time
+
+ mu sync.Mutex
+ series map[seriesKey]*series
+}
+
+type seriesKey struct{ class, key string }
+
+type series struct {
+ points []point // ascending by time
+}
+
+type point struct {
+ at time.Time
+ n int
+}
+
+// New constructs a Watch over flagger with cfg. A nil logger is replaced by a
+// no-op one; a nil flagger disables the auto-flag (the view still works).
+func New(cfg Config, flagger Flagger, log *zap.Logger) *Watch {
+ if log == nil {
+ log = zap.NewNop()
+ }
+ return &Watch{
+ cfg: cfg,
+ flagger: flagger,
+ log: log,
+ now: time.Now,
+ series: make(map[seriesKey]*series),
+ }
+}
+
+// Ingest records one gateway report. Entries with an empty class or key or a
+// non-positive count are skipped. When a user-class series crosses the flag
+// threshold within the flag window, the account is flagged (the store keeps it
+// set-once, so a sustained episode costs one no-op UPDATE per report).
+func (w *Watch) Ingest(ctx context.Context, entries []Entry) {
+ if len(entries) == 0 {
+ return
+ }
+ now := w.now()
+ var flag []uuid.UUID
+ w.mu.Lock()
+ for _, e := range entries {
+ if e.Class == "" || e.Key == "" || e.Rejected <= 0 {
+ continue
+ }
+ k := seriesKey{class: e.Class, key: e.Key}
+ s := w.series[k]
+ if s == nil {
+ s = &series{}
+ w.series[k] = s
+ }
+ s.points = append(s.points, point{at: now, n: e.Rejected})
+ if e.Class == ClassUser && s.sumSince(now.Add(-w.cfg.FlagWindow)) >= w.cfg.FlagThreshold {
+ if id, err := uuid.Parse(e.Key); err == nil {
+ flag = append(flag, id)
+ }
+ }
+ }
+ w.pruneLocked(now)
+ w.mu.Unlock()
+
+ if w.flagger == nil {
+ return
+ }
+ for _, id := range flag {
+ set, err := w.flagger.FlagHighRate(ctx, id, now)
+ switch {
+ case err != nil:
+ w.log.Warn("high-rate flag failed", zap.String("account_id", id.String()), zap.Error(err))
+ case set:
+ w.log.Info("account flagged high-rate",
+ zap.String("account_id", id.String()),
+ zap.Int("threshold", w.cfg.FlagThreshold),
+ zap.Duration("window", w.cfg.FlagWindow))
+ }
+ }
+}
+
+// Config returns the active auto-flag tuning (the admin console captions it).
+func (w *Watch) Config() Config { return w.cfg }
+
+// Recent returns the retained throttle episodes, most recently throttled first.
+func (w *Watch) Recent() []Episode {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ out := make([]Episode, 0, len(w.series))
+ for k, s := range w.series {
+ if len(s.points) == 0 {
+ continue
+ }
+ ep := Episode{
+ Class: k.class,
+ Key: k.key,
+ FirstSeen: s.points[0].at,
+ LastSeen: s.points[len(s.points)-1].at,
+ }
+ for _, p := range s.points {
+ ep.Rejected += p.n
+ }
+ out = append(out, ep)
+ }
+ sort.Slice(out, func(i, j int) bool { return out[i].LastSeen.After(out[j].LastSeen) })
+ return out
+}
+
+// sumSince totals the points at or after cutoff.
+func (s *series) sumSince(cutoff time.Time) int {
+ sum := 0
+ for i := len(s.points) - 1; i >= 0; i-- {
+ if s.points[i].at.Before(cutoff) {
+ break
+ }
+ sum += s.points[i].n
+ }
+ return sum
+}
+
+// pruneLocked drops points past retention, empty series, and — past maxSeries —
+// the least-recently-throttled series. The caller holds w.mu.
+func (w *Watch) pruneLocked(now time.Time) {
+ cutoff := now.Add(-max(minRetention, w.cfg.FlagWindow))
+ for k, s := range w.series {
+ i := 0
+ for i < len(s.points) && s.points[i].at.Before(cutoff) {
+ i++
+ }
+ s.points = s.points[i:]
+ if len(s.points) == 0 {
+ delete(w.series, k)
+ }
+ }
+ for len(w.series) > maxSeries {
+ var oldest seriesKey
+ var oldestAt time.Time
+ first := true
+ for k, s := range w.series {
+ last := s.points[len(s.points)-1].at
+ if first || last.Before(oldestAt) {
+ oldest, oldestAt, first = k, last, false
+ }
+ }
+ delete(w.series, oldest)
+ }
+}
diff --git a/backend/internal/ratewatch/ratewatch_test.go b/backend/internal/ratewatch/ratewatch_test.go
new file mode 100644
index 0000000..0bde791
--- /dev/null
+++ b/backend/internal/ratewatch/ratewatch_test.go
@@ -0,0 +1,140 @@
+package ratewatch
+
+import (
+ "context"
+ "fmt"
+ "testing"
+ "time"
+
+ "github.com/google/uuid"
+)
+
+// fakeFlagger records flag calls and reports them as newly set.
+type fakeFlagger struct {
+ calls []uuid.UUID
+}
+
+func (f *fakeFlagger) FlagHighRate(_ context.Context, id uuid.UUID, _ time.Time) (bool, error) {
+ f.calls = append(f.calls, id)
+ return true, nil
+}
+
+// watchAt returns a Watch with a controllable clock.
+func watchAt(cfg Config, flagger Flagger, at *time.Time) *Watch {
+ w := New(cfg, flagger, nil)
+ w.now = func() time.Time { return *at }
+ return w
+}
+
+// TestIngestAggregatesAndRecent verifies episodes accumulate per (class, key),
+// invalid entries are skipped, and Recent orders by last rejection.
+func TestIngestAggregatesAndRecent(t *testing.T) {
+ now := time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC)
+ w := watchAt(DefaultConfig(), nil, &now)
+ ctx := context.Background()
+
+ w.Ingest(ctx, []Entry{
+ {Class: "public", Key: "10.0.0.1", Rejected: 3},
+ {Class: "user", Key: "u-1", Rejected: 5},
+ {Class: "", Key: "x", Rejected: 1},
+ {Class: "user", Key: "", Rejected: 1},
+ {Class: "user", Key: "u-1", Rejected: 0},
+ })
+ now = now.Add(30 * time.Second)
+ w.Ingest(ctx, []Entry{{Class: "public", Key: "10.0.0.1", Rejected: 4}})
+
+ got := w.Recent()
+ if len(got) != 2 {
+ t.Fatalf("Recent returned %d episodes, want 2", len(got))
+ }
+ if got[0].Class != "public" || got[0].Key != "10.0.0.1" || got[0].Rejected != 7 {
+ t.Errorf("first episode = %+v, want public/10.0.0.1 rejected=7", got[0])
+ }
+ if !got[0].LastSeen.After(got[0].FirstSeen) {
+ t.Errorf("episode span = [%v, %v], want a positive span", got[0].FirstSeen, got[0].LastSeen)
+ }
+ if got[1].Class != "user" || got[1].Rejected != 5 {
+ t.Errorf("second episode = %+v, want user rejected=5", got[1])
+ }
+}
+
+// TestAutoFlagThreshold verifies the flag fires only for a user-class series
+// crossing the threshold within the window, with a parseable account id.
+func TestAutoFlagThreshold(t *testing.T) {
+ now := time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC)
+ flagged := &fakeFlagger{}
+ id := uuid.New()
+ w := watchAt(Config{FlagThreshold: 100, FlagWindow: 10 * time.Minute}, flagged, &now)
+ ctx := context.Background()
+
+ w.Ingest(ctx, []Entry{
+ {Class: "user", Key: id.String(), Rejected: 99},
+ {Class: "public", Key: "10.0.0.1", Rejected: 1000},
+ {Class: "user", Key: "not-a-uuid", Rejected: 1000},
+ })
+ if len(flagged.calls) != 0 {
+ t.Fatalf("flagged %v below the threshold", flagged.calls)
+ }
+
+ now = now.Add(30 * time.Second)
+ w.Ingest(ctx, []Entry{{Class: "user", Key: id.String(), Rejected: 1}})
+ if len(flagged.calls) != 1 || flagged.calls[0] != id {
+ t.Fatalf("flag calls = %v, want exactly [%s]", flagged.calls, id)
+ }
+}
+
+// TestAutoFlagWindowExpiry verifies rejections age out of the rolling window.
+func TestAutoFlagWindowExpiry(t *testing.T) {
+ now := time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC)
+ flagged := &fakeFlagger{}
+ id := uuid.New()
+ w := watchAt(Config{FlagThreshold: 100, FlagWindow: 10 * time.Minute}, flagged, &now)
+ ctx := context.Background()
+
+ w.Ingest(ctx, []Entry{{Class: "user", Key: id.String(), Rejected: 60}})
+ now = now.Add(11 * time.Minute)
+ w.Ingest(ctx, []Entry{{Class: "user", Key: id.String(), Rejected: 60}})
+ if len(flagged.calls) != 0 {
+ t.Fatalf("flagged %v across an expired window", flagged.calls)
+ }
+ now = now.Add(time.Minute)
+ w.Ingest(ctx, []Entry{{Class: "user", Key: id.String(), Rejected: 50}})
+ if len(flagged.calls) != 1 {
+ t.Fatalf("flag calls = %v, want one in-window crossing", flagged.calls)
+ }
+}
+
+// TestSeriesBound verifies the episode map stays bounded by evicting the
+// least-recently-throttled series.
+func TestSeriesBound(t *testing.T) {
+ now := time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC)
+ w := watchAt(DefaultConfig(), nil, &now)
+ ctx := context.Background()
+
+ for i := range maxSeries + 10 {
+ now = now.Add(time.Second)
+ w.Ingest(ctx, []Entry{{Class: "public", Key: fmt.Sprintf("10.0.%d.%d", i/256, i%256), Rejected: 1}})
+ }
+ got := w.Recent()
+ if len(got) != maxSeries {
+ t.Fatalf("retained %d series, want %d", len(got), maxSeries)
+ }
+ for _, ep := range got {
+ if ep.Key == "10.0.0.0" {
+ t.Fatal("the least-recently-throttled series survived the bound")
+ }
+ }
+}
+
+// TestConfigValidate covers the tuning guards.
+func TestConfigValidate(t *testing.T) {
+ if err := DefaultConfig().Validate(); err != nil {
+ t.Errorf("default config invalid: %v", err)
+ }
+ if err := (Config{FlagThreshold: 0, FlagWindow: time.Minute}).Validate(); err == nil {
+ t.Error("zero threshold passed validation")
+ }
+ if err := (Config{FlagThreshold: 1, FlagWindow: 0}).Validate(); err == nil {
+ t.Error("zero window passed validation")
+ }
+}
diff --git a/backend/internal/server/handlers.go b/backend/internal/server/handlers.go
index f760409..374df86 100644
--- a/backend/internal/server/handlers.go
+++ b/backend/internal/server/handlers.go
@@ -37,6 +37,11 @@ func (s *Server) registerRoutes() {
// before delivering an out-of-app notification.
in.POST("/push-target", s.handlePushTarget)
}
+ if s.ratewatch != nil {
+ // The gateway's periodic rate-limiter rejection summary (R3): feeds the
+ // admin console's throttled view and the high-rate auto-flag.
+ s.internal.POST("/ratelimit/report", s.handleRateLimitReport)
+ }
u := s.user
if s.accounts != nil {
u.GET("/profile", s.handleProfile)
@@ -120,10 +125,8 @@ func gameIDParam(c *gin.Context) (uuid.UUID, bool) {
// X-Forwarded-For (the first hop), falling back to the direct peer.
func clientIP(c *gin.Context) string {
if xff := c.GetHeader("X-Forwarded-For"); xff != "" {
- if i := strings.IndexByte(xff, ','); i >= 0 {
- return strings.TrimSpace(xff[:i])
- }
- return strings.TrimSpace(xff)
+ first, _, _ := strings.Cut(xff, ",")
+ return strings.TrimSpace(first)
}
return c.ClientIP()
}
diff --git a/backend/internal/server/handlers_admin_console.go b/backend/internal/server/handlers_admin_console.go
index bfa3758..e86302f 100644
--- a/backend/internal/server/handlers_admin_console.go
+++ b/backend/internal/server/handlers_admin_console.go
@@ -19,6 +19,7 @@ import (
"scrabble/backend/internal/adminconsole"
"scrabble/backend/internal/engine"
"scrabble/backend/internal/game"
+ "scrabble/backend/internal/ratewatch"
"scrabble/backend/internal/robot"
"scrabble/backend/internal/social"
)
@@ -48,6 +49,8 @@ func (s *Server) registerConsole(router *gin.Engine) {
gm.GET("/users", s.consoleUsers)
gm.GET("/users/:id", s.consoleUserDetail)
gm.POST("/users/:id/message", s.consoleUserMessage)
+ gm.POST("/users/:id/clear-high-rate-flag", s.consoleClearHighRateFlag)
+ gm.GET("/throttled", s.consoleThrottled)
gm.GET("/games", s.consoleGames)
gm.GET("/games/:id", s.consoleGameDetail)
gm.GET("/complaints", s.consoleComplaints)
@@ -117,7 +120,8 @@ func (s *Server) consoleUsers(c *gin.Context) {
}
view.Items = append(view.Items, adminconsole.UserRow{
ID: it.ID.String(), DisplayName: it.DisplayName, Kind: kind,
- Language: it.PreferredLanguage, Guest: it.IsGuest, CreatedAt: fmtTime(it.CreatedAt),
+ Language: it.PreferredLanguage, Guest: it.IsGuest,
+ FlaggedHighRate: !it.FlaggedHighRateAt.IsZero(), CreatedAt: fmtTime(it.CreatedAt),
})
ids = append(ids, it.ID)
}
@@ -257,6 +261,9 @@ func (s *Server) consoleUserDetail(c *gin.Context) {
if acc.MergedInto != uuid.Nil {
view.MergedInto = acc.MergedInto.String()
}
+ if !acc.FlaggedHighRateAt.IsZero() {
+ view.FlaggedHighRateAt = fmtTime(acc.FlaggedHighRateAt)
+ }
if view.HasStats {
if st, err := s.accounts.GetStats(ctx, id); err == nil {
view.Stats = adminconsole.StatsRow{Wins: st.Wins, Losses: st.Losses, Draws: st.Draws, MaxGamePoints: st.MaxGamePoints, MaxWordPoints: st.MaxWordPoints}
@@ -551,6 +558,56 @@ func (s *Server) consolePostBroadcast(c *gin.Context) {
}
}
+// consoleThrottled renders the rate-limit observability page: the recent
+// gateway-reported throttle episodes (in-memory, reset on a backend restart)
+// and the accounts currently carrying the soft high-rate flag (R3).
+func (s *Server) consoleThrottled(c *gin.Context) {
+ ctx := c.Request.Context()
+ var view adminconsole.ThrottledView
+ if s.ratewatch != nil {
+ cfg := s.ratewatch.Config()
+ view.FlagThreshold = cfg.FlagThreshold
+ view.FlagWindow = cfg.FlagWindow.String()
+ for _, ep := range s.ratewatch.Recent() {
+ row := adminconsole.ThrottleEpisodeRow{
+ Class: ep.Class, Key: ep.Key, Rejected: ep.Rejected,
+ FirstSeen: fmtTime(ep.FirstSeen), LastSeen: fmtTime(ep.LastSeen),
+ }
+ if ep.Class == ratewatch.ClassUser {
+ if id, err := uuid.Parse(ep.Key); err == nil {
+ row.UserID = id.String()
+ }
+ }
+ view.Episodes = append(view.Episodes, row)
+ }
+ }
+ flagged, err := s.accounts.ListFlaggedHighRate(ctx)
+ if err != nil {
+ s.consoleError(c, err)
+ return
+ }
+ for _, fa := range flagged {
+ view.Flagged = append(view.Flagged, adminconsole.FlaggedAccountRow{
+ ID: fa.ID.String(), DisplayName: fa.DisplayName, FlaggedAt: fmtTime(fa.FlaggedHighRateAt),
+ })
+ }
+ s.renderConsole(c, "throttled", "throttled", "Throttled", view)
+}
+
+// consoleClearHighRateFlag clears the soft high-rate marker — the operator's
+// reversible review action (R3).
+func (s *Server) consoleClearHighRateFlag(c *gin.Context) {
+ id, ok := s.consoleUUID(c, "/_gm/users")
+ if !ok {
+ return
+ }
+ if err := s.accounts.ClearHighRateFlag(c.Request.Context(), id); err != nil {
+ s.consoleError(c, err)
+ return
+ }
+ s.renderConsoleMessage(c, "Cleared", "high-rate flag cleared", "/_gm/users/"+id.String())
+}
+
// variantVersions builds the per-variant resident-version summary from the registry.
func (s *Server) variantVersions() []adminconsole.VariantVersions {
out := make([]adminconsole.VariantVersions, 0, len(engine.Variants()))
diff --git a/backend/internal/server/handlers_ratelimit.go b/backend/internal/server/handlers_ratelimit.go
new file mode 100644
index 0000000..512a9ae
--- /dev/null
+++ b/backend/internal/server/handlers_ratelimit.go
@@ -0,0 +1,41 @@
+package server
+
+import (
+ "net/http"
+
+ "github.com/gin-gonic/gin"
+
+ "scrabble/backend/internal/ratewatch"
+)
+
+// rateLimitReportRequest mirrors the gateway's periodic rejection summary: every
+// entry aggregates one limiter key (class + key) over the report window.
+type rateLimitReportRequest struct {
+ WindowSeconds int `json:"window_seconds"`
+ Entries []rateLimitReportEntry `json:"entries"`
+}
+
+// rateLimitReportEntry is one (class, key) aggregate of the report.
+type rateLimitReportEntry struct {
+ Class string `json:"class"`
+ Key string `json:"key"`
+ Rejected int `json:"rejected"`
+}
+
+// handleRateLimitReport ingests one gateway rejection report into the rate
+// watch — the admin console's throttled view and the high-rate auto-flag (R3).
+// Internal, gateway-only: like sessions/resolve it trusts the network segment.
+// Malformed individual entries are skipped by the watch itself.
+func (s *Server) handleRateLimitReport(c *gin.Context) {
+ var req rateLimitReportRequest
+ if err := c.ShouldBindJSON(&req); err != nil {
+ abortBadRequest(c, "invalid rate-limit report")
+ return
+ }
+ entries := make([]ratewatch.Entry, 0, len(req.Entries))
+ for _, e := range req.Entries {
+ entries = append(entries, ratewatch.Entry{Class: e.Class, Key: e.Key, Rejected: e.Rejected})
+ }
+ s.ratewatch.Ingest(c.Request.Context(), entries)
+ c.Status(http.StatusNoContent)
+}
diff --git a/backend/internal/server/handlers_test.go b/backend/internal/server/handlers_test.go
index d3257dd..c8e086a 100644
--- a/backend/internal/server/handlers_test.go
+++ b/backend/internal/server/handlers_test.go
@@ -10,6 +10,7 @@ import (
"scrabble/backend/internal/account"
"scrabble/backend/internal/game"
+ "scrabble/backend/internal/ratewatch"
"scrabble/backend/internal/session"
)
@@ -57,6 +58,23 @@ func TestResolveSessionRejectsEmptyToken(t *testing.T) {
}
}
+// TestRateLimitReportEndpoint covers the internal R3 report route: a malformed
+// body is a 400, a valid report lands in the rate watch with 204.
+func TestRateLimitReportEndpoint(t *testing.T) {
+ watch := ratewatch.New(ratewatch.DefaultConfig(), nil, nil)
+ s := New(":0", Deps{RateWatch: watch})
+ if rec := do(t, s, http.MethodPost, "/api/v1/internal/ratelimit/report", `{bad`, nil); rec.Code != http.StatusBadRequest {
+ t.Fatalf("malformed report = %d, want 400", rec.Code)
+ }
+ body := `{"window_seconds":30,"entries":[{"class":"user","key":"` + uuid.NewString() + `","rejected":7}]}`
+ if rec := do(t, s, http.MethodPost, "/api/v1/internal/ratelimit/report", body, nil); rec.Code != http.StatusNoContent {
+ t.Fatalf("report = %d, want 204", rec.Code)
+ }
+ if eps := watch.Recent(); len(eps) != 1 || eps[0].Rejected != 7 {
+ t.Fatalf("watch episodes = %+v, want one entry with rejected=7", eps)
+ }
+}
+
func TestSubmitPlayRejectsBadDirection(t *testing.T) {
headers := map[string]string{"X-User-ID": uuid.New().String()}
path := "/api/v1/user/games/" + uuid.New().String() + "/play"
diff --git a/backend/internal/server/server.go b/backend/internal/server/server.go
index 9c914f7..618efdc 100644
--- a/backend/internal/server/server.go
+++ b/backend/internal/server/server.go
@@ -24,6 +24,7 @@ import (
"scrabble/backend/internal/game"
"scrabble/backend/internal/link"
"scrabble/backend/internal/lobby"
+ "scrabble/backend/internal/ratewatch"
"scrabble/backend/internal/session"
"scrabble/backend/internal/social"
"scrabble/backend/internal/telemetry"
@@ -71,6 +72,10 @@ type Deps struct {
// nil when BACKEND_CONNECTOR_ADDR is unset (broadcasts show a "not configured"
// notice).
Connector *connector.Client
+ // RateWatch ingests the gateway's rate-limiter rejection reports (R3): the
+ // admin console's throttled view + the high-rate auto-flag. A nil RateWatch
+ // disables the internal report endpoint and the console view.
+ RateWatch *ratewatch.Watch
}
// Server owns the gin engine, the underlying HTTP server and the readiness
@@ -93,6 +98,7 @@ type Server struct {
registry *engine.Registry
dictDir string
connector *connector.Client
+ ratewatch *ratewatch.Watch
console *adminconsole.Renderer
public *gin.RouterGroup
@@ -133,6 +139,7 @@ func New(addr string, deps Deps) *Server {
registry: deps.Registry,
dictDir: deps.DictDir,
connector: deps.Connector,
+ ratewatch: deps.RateWatch,
http: &http.Server{Addr: addr, Handler: engine},
}
s.registerProbes(engine)
diff --git a/deploy/README.md b/deploy/README.md
index 9435c8a..5caa0e5 100644
--- a/deploy/README.md
+++ b/deploy/README.md
@@ -1,9 +1,9 @@
# deploy
-The full Scrabble contour: `backend` + `gateway` + Postgres + the Telegram
-connector (with a VPN sidecar) + the observability stack (OTel Collector →
-Prometheus + Tempo → Grafana), fronted by a **caddy** that owns a single `/_gm`
-Basic-Auth (the admin console + Grafana). Topology and the decision record are in
+The full Scrabble contour: `backend` + `gateway` + the static `landing` + Postgres +
+the Telegram connector (with a VPN sidecar) + the observability stack (OTel
+Collector → Prometheus + Tempo → Grafana), fronted by a **caddy** that owns a single
+`/_gm` Basic-Auth (the admin console + Grafana). Topology and the decision record are in
[`../docs/ARCHITECTURE.md`](../docs/ARCHITECTURE.md) §13; this file is the
operational reference for **every environment variable**.
@@ -11,8 +11,9 @@ operational reference for **every environment variable**.
| Service | Image | Role |
| --- | --- | --- |
-| `caddy` | `caddy:2-alpine` | Edge proxy (alias `scrabble` on `edge`): single `/_gm` Basic-Auth → admin console + Grafana; everything else → gateway. TLS per `CADDY_SITE_ADDRESS`. |
-| `gateway` | built (`gateway/Dockerfile`) | Public edge; serves the embedded landing at `/` and the game SPA at `/app/` + `/telegram/`; Connect-RPC edge. |
+| `caddy` | `caddy:2-alpine` | Edge proxy (alias `scrabble` on `edge`): single `/_gm` Basic-Auth → admin console + Grafana; `/app/`, `/telegram/` + the Connect path → gateway; the catch-all (incl. `/`) → landing. TLS per `CADDY_SITE_ADDRESS`. |
+| `gateway` | built (`gateway/Dockerfile`, target `gateway`) | Public edge; serves the embedded game SPA at `/app/` + `/telegram/`; Connect-RPC edge. `/` redirects to `/app/`. |
+| `landing` | built (`gateway/Dockerfile`, target `landing`) | Static landing page at `/` (caddy:2-alpine + the shared Vite build, `deploy/landing/Caddyfile`); absorbs stray public paths (R3). |
| `backend` | built (`backend/Dockerfile`) | Domain service; bakes in the DAWG dictionaries; runs migrations at boot. |
| `postgres` | `postgres:17-alpine` | Database (named volume, `pg_isready` healthcheck). |
| `vpn` + `telegram` | sidecar + built (`platform/telegram/Dockerfile`) | Telegram connector; egresses through the AmneziaWG sidecar; internal gRPC at `telegram:9091`. |
@@ -88,8 +89,9 @@ connector **fails at boot** if both are empty.
| `VITE_TELEGRAM_GAME_CHANNEL_NAME_RU` | variable | _(empty)_ | UI build-arg: the landing "Play in Telegram" link for the **Russian** bot (e.g. `https://t.me/Erudit_Game`). |
| `VITE_GATEWAY_URL` | variable | _(empty)_ | UI build-arg: gateway origin; empty = same-origin (the usual single-origin deploy). |
-The five `VITE_*` are **build-args** baked into the gateway image at build time, so
-changing them requires a rebuild (`--build`), not just a restart.
+The five `VITE_*` are **build-args** baked into the gateway and landing images at
+build time (both targets share one UI build stage — keep the args identical so it is
+built once), so changing them requires a rebuild (`--build`), not just a restart.
## Fixed internal wiring (not operator-set)
diff --git a/deploy/caddy/Caddyfile b/deploy/caddy/Caddyfile
index ba26fbe..8860263 100644
--- a/deploy/caddy/Caddyfile
+++ b/deploy/caddy/Caddyfile
@@ -1,7 +1,9 @@
# Edge reverse proxy for the Scrabble contour. A single Basic-Auth gate covers
# every operator surface under /_gm (the backend-rendered admin console and the
-# Grafana subpath); everything else (the SPA at / and /telegram/, plus the
-# Connect edge) goes to the gateway. Mirrors ../galaxy-game's /_gm model.
+# Grafana subpath); the game SPA (/app/, /telegram/) and the Connect edge go to
+# the gateway; the catch-all — notably the public landing at / — goes to the
+# static landing container (R3), so stray traffic never reaches the Go edge.
+# Mirrors ../galaxy-game's /_gm model.
#
# CADDY_SITE_ADDRESS is ":80" in the test contour (the host caddy terminates TLS
# and forwards); set it to a domain in prod (Stage 18) so this caddy does its own
@@ -36,8 +38,14 @@
}
}
- # The SPA (/, /telegram/) and the Connect edge are served by the gateway.
- handle {
+ # The game SPA and the Connect edge are served by the gateway.
+ @gateway path /app /app/* /telegram /telegram/* /scrabble.edge.v1.Gateway/*
+ handle @gateway {
reverse_proxy gateway:8081
}
+
+ # Everything else — the public landing at / and any stray path — is static.
+ handle {
+ reverse_proxy landing:80
+ }
}
diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml
index cb071f6..953825a 100644
--- a/deploy/docker-compose.yml
+++ b/deploy/docker-compose.yml
@@ -75,6 +75,7 @@ services:
build:
context: ..
dockerfile: gateway/Dockerfile
+ target: gateway
args:
VITE_TELEGRAM_BOT_ID: ${VITE_TELEGRAM_BOT_ID:-}
VITE_TELEGRAM_LINK: ${VITE_TELEGRAM_LINK:-}
@@ -100,6 +101,28 @@ services:
# caddy owns the /_gm Basic-Auth and routes /_gm to the backend directly.
networks: [internal]
+ # --- Landing (static) -------------------------------------------------------
+ # The public landing page in its own caddy container (R3): the contour caddy
+ # routes the catch-all (notably /) here, the gateway keeps only /app/,
+ # /telegram/ and the Connect edge. Shares the gateway Dockerfile's UI build
+ # stage — identical build args keep that stage a single cached build.
+ landing:
+ container_name: scrabble-landing
+ image: scrabble-landing:latest
+ build:
+ context: ..
+ dockerfile: gateway/Dockerfile
+ target: landing
+ args:
+ VITE_TELEGRAM_BOT_ID: ${VITE_TELEGRAM_BOT_ID:-}
+ VITE_TELEGRAM_LINK: ${VITE_TELEGRAM_LINK:-}
+ VITE_TELEGRAM_GAME_CHANNEL_NAME_EN: ${VITE_TELEGRAM_GAME_CHANNEL_NAME_EN:-}
+ VITE_TELEGRAM_GAME_CHANNEL_NAME_RU: ${VITE_TELEGRAM_GAME_CHANNEL_NAME_RU:-}
+ VITE_GATEWAY_URL: ${VITE_GATEWAY_URL:-}
+ VITE_APP_VERSION: ${APP_VERSION:-dev}
+ restart: unless-stopped
+ networks: [internal]
+
# --- Telegram connector (egress via the VPN sidecar) -----------------------
vpn:
container_name: scrabble-telegram-vpn
@@ -145,12 +168,13 @@ services:
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
OTEL_EXPORTER_OTLP_INSECURE: "true"
- # --- Edge reverse proxy (single /_gm Basic-Auth; SPA + Connect -> gateway) --
+ # --- Edge reverse proxy (single /_gm Basic-Auth; SPA + Connect -> gateway;
+ # the catch-all incl. the landing -> the static landing container) -------
caddy:
container_name: scrabble-caddy
image: caddy:2-alpine
restart: unless-stopped
- depends_on: [gateway, backend, grafana]
+ depends_on: [gateway, backend, grafana, landing]
environment:
# Test: ":80" (host caddy terminates TLS). Prod: a domain for own ACME.
CADDY_SITE_ADDRESS: ${CADDY_SITE_ADDRESS:-:80}
diff --git a/deploy/grafana/dashboards/edge-ux.json b/deploy/grafana/dashboards/edge-ux.json
index 44b49d6..413a268 100644
--- a/deploy/grafana/dashboards/edge-ux.json
+++ b/deploy/grafana/dashboards/edge-ux.json
@@ -34,6 +34,18 @@
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m])) by (result)", "legendFormat": "{{result}}" }]
+ },
+ {
+ "type": "timeseries",
+ "title": "Rate limiting — request rate vs rejections (R3)",
+ "description": "Aggregate only (no per-user labels, the Stage 12/17 discipline): total edge request rate against the limiter rejection rate by class. Per-key detail lives in the admin console's Throttled view.",
+ "gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 },
+ "fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
+ "datasource": { "type": "prometheus", "uid": "prometheus" },
+ "targets": [
+ { "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m]))", "legendFormat": "requests" },
+ { "refId": "B", "expr": "sum(rate(gateway_rate_limited_total[5m])) by (class)", "legendFormat": "rejected · {{class}}" }
+ ]
}
]
}
diff --git a/deploy/landing/Caddyfile b/deploy/landing/Caddyfile
new file mode 100644
index 0000000..f14b522
--- /dev/null
+++ b/deploy/landing/Caddyfile
@@ -0,0 +1,27 @@
+# Static landing container (R3). Serves the public landing page and the built
+# assets it references at /; the game SPA (/app/, /telegram/) and the Connect
+# edge stay on the gateway. The contour caddy routes the catch-all here, so
+# stray public paths are absorbed by static file serving and never reach the Go
+# edge. This file is baked into the image at build time (gateway/Dockerfile,
+# target `landing`), not bind-mounted.
+{
+ admin off
+}
+
+:80 {
+ root * /srv
+ encode zstd gzip
+
+ # Mirror the gateway webui caching: hash-named build assets are immutable,
+ # every HTML shell is no-cache so a new deploy is picked up immediately.
+ header /assets/* Cache-Control "public, max-age=31536000, immutable"
+ @shell not path /assets/*
+ header @shell Cache-Control "no-cache"
+
+ # An unknown path falls back to the landing shell (the gateway's old "/"
+ # behaviour); "/" itself resolves through the index below.
+ try_files {path} /landing.html
+ file_server {
+ index landing.html
+ }
+}
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 6b6e0f2..d1546a5 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -98,6 +98,15 @@ dropped). Horizontal scaling is explicit future work.
response was lost — its button is disabled while offline and the player re-issues it on
reconnect). A reachability watcher (a lightweight `profile.get` probe) clears the signal when no
other traffic is in flight; the live `Subscribe` stream's drop/recovery feeds the same signal.
+ **Edge hardening (R3):** every request body on the public listener is capped at
+ `GATEWAY_MAX_BODY_BYTES` (default 1 MiB — far above any legitimate payload), both at the HTTP
+ layer (`http.MaxBytesReader`) and as the Connect per-message read limit, so an oversized
+ `Execute` is refused (`resource_exhausted`) without buffering. The h2c server carries explicit
+ sizing: `MaxConcurrentStreams` 250 (the x/net default made visible — a real client holds one
+ `Subscribe` stream plus a few unary calls) and a 3-minute connection `IdleTimeout` (a live
+ `Subscribe` stream keeps its connection active, so only abandoned connections are reaped); the
+ `http.Server` sets only `ReadHeaderTimeout` (10 s) — Read/WriteTimeout would kill the stream.
+ R7 revisits the exact values under load.
- **Alphabet on the wire (Stage 13)**: live play exchanges **alphabet indices**, not
concrete letters. The rack (`StateView.rack`), the `SubmitPlay`/`Evaluate` tiles, the
`Exchange` tiles and the `CheckWord` word are `ubyte` indices into the variant's alphabet
@@ -572,6 +581,21 @@ promotions) is future work and would deliver short markdown messages (text + lin
distinct accounts that performed an authenticated edge action in the window. The
gauge is single-process by design (single-instance MVP, §10): it is correct for one
gateway, resets on restart, and is a live operational figure, not a billing count.
+- **Rate-limit observability (R3):** every limiter rejection increments the gateway
+ counter `gateway_rate_limited_total` (`class` = user/public/email/admin — aggregate
+ only, honouring the no-per-user-label discipline above) and logs one **Debug** line;
+ a gateway reporter drains the per-key rejection tracker every 30 s, emits one **Warn**
+ summary per throttled key and posts the report to the backend
+ (`POST /api/v1/internal/ratelimit/report`, network-trusted like `sessions/resolve`).
+ The backend's `ratewatch` keeps a bounded in-memory episode window (single-instance,
+ resets on restart, like `active_users`) surfaced on the admin console's **Throttled**
+ page next to the flagged-account review queue, and applies the **conservative
+ auto-flag**: an account sustaining `BACKEND_HIGHRATE_FLAG_THRESHOLD` rejected calls
+ (default 1000) within `BACKEND_HIGHRATE_FLAG_WINDOW` (default 10 min) gets the soft,
+ reversible `accounts.flagged_high_rate_at` marker — set once, shown in the user
+ list/detail, cleared by the operator, **never an automatic ban** and never a request
+ gate. The Edge/UX dashboard graphs the aggregate request rate against the rejection
+ rate by class.
- Unauthenticated `GET /healthz` (liveness) and `GET /readyz` (readiness — the
database answers a bounded ping and the session cache is warmed).
- The backend serves a **second listener** — a gRPC server
@@ -582,12 +606,12 @@ promotions) is future work and would deliver short markdown messages (text + lin
| Concern | Enforced by |
| --- | --- |
-| Public rate limiting / anti-abuse | gateway |
+| Public rate limiting / anti-abuse | gateway (per-IP public/email/admin classes, per-user authenticated class; a request body cap of `GATEWAY_MAX_BODY_BYTES`; rejections are metered, summarised to the backend and surfaced in the admin console with a conservative reversible auto-flag — R3, §11) |
| Telegram initData validation (bot-token HMAC) | the Telegram connector; the gateway delegates it over gRPC, so the bot token lives only in the connector |
| Session minting; email-code / guest validation | gateway (with backend) |
| Session → `user_id` resolution, `X-User-ID` injection | gateway |
| Authorisation, ownership, state transitions | backend (`X-User-ID` is the sole identity input) |
-| Admin authentication | a single Basic-Auth gate on `/_gm/*`, forwarded **verbatim** to the backend's server-rendered admin console (and, in the deployed contour, routing `/_gm/grafana/*` to Grafana). In the deploy the **caddy** owns this gate (§13); a local non-caddy run uses the gateway's own `GATEWAY_ADMIN_*` proxy. The backend trusts the proxy (no admin principal) and guards its state-changing POSTs with a **same-origin** check — the console's CSRF defence. No operator identity is tracked |
+| Admin authentication | a single Basic-Auth gate on `/_gm/*`, forwarded **verbatim** to the backend's server-rendered admin console (and, in the deployed contour, routing `/_gm/grafana/*` to Grafana). In the deploy the **caddy** owns this gate (§13); a local non-caddy run uses the gateway's own `GATEWAY_ADMIN_*` proxy, which the per-IP admin limiter class guards ahead of its Basic-Auth (R3) — the caddy-fronted path has no limiter (stock caddy), an accepted gap. The backend trusts the proxy (no admin principal) and guards its state-changing POSTs with a **same-origin** check — the console's CSRF defence. No operator identity is tracked |
| backend ↔ gateway ↔ connector trust | the network (only gateway may reach backend; the connector serves unauthenticated gRPC on the internal segment) |
This is an explicit, accepted MVP risk: compromise of the gateway↔backend
@@ -597,7 +621,7 @@ mutual auth is a future hardening step.
**Short numeric codes** (email confirm-codes and Stage 8 friend codes) are stored
only as SHA-256 hashes and are short-lived and single-use. The unauthenticated
email path carries a tight per-IP sub-limit (5 / 10 min); the **friend-code redeem**
-is authenticated, so it rides the per-user limit (120 / min) and is further bounded
+is authenticated, so it rides the per-user limit (300 / min) and is further bounded
by the code's 12 h TTL, single use, and **one live code per issuer** (which caps the
valid-code population). Brute-forcing a 6-digit friend code within these limits is an
accepted MVP risk with low blast radius (an unwanted friendship is removable/blockable);
@@ -605,22 +629,27 @@ a dedicated redeem sub-limit or a longer code is the hardening step if abuse app
## 13. Deployment (informational)
-Single public origin, path-routed. The gateway **embeds** the static UI build
-(`go:embed`, baked in by a node stage in `gateway/Dockerfile`). The Vite build has two
-entries: a lightweight **landing page** served at `/`, and the game **SPA** served at
+Single public origin, path-routed. The Vite build has two entries: a lightweight
+**landing page** and the game **SPA**. The gateway **embeds** the SPA build
+(`go:embed`, baked in by a node stage in `gateway/Dockerfile`) and serves it at
`/app/` (web) and `/telegram/` (the Telegram Mini App; outside Telegram that path
-redirects to the root — the client-side guard). Hash-named `/assets/*` are served
+redirects to the root — the client-side guard); a stray hit on the gateway's `/`
+308-redirects to `/app/`. The **landing** ships in its own static container (R3): the
+`landing` target of `gateway/Dockerfile` (caddy:2-alpine + the same Vite build,
+`deploy/landing/Caddyfile`) serves it at `/`, so stray public traffic is absorbed by
+static file serving and never reaches the Go edge. Hash-named `/assets/*` are served
`immutable` (a relaunch is a cache hit, not a re-download); the HTML shells are
-`no-cache` so a new deploy is picked up. An in-compose **caddy** is the
-contour's edge: it owns a single `/_gm` Basic-Auth and routes `/_gm/grafana/*` to
-**Grafana** (anonymous-admin, so the one shared login gates it with no per-user
-Grafana accounts) and the rest of `/_gm/*` to the backend-rendered **admin console**;
-everything else (`/`, `/app/`, `/telegram/`, the Connect edge) goes to the gateway. The
+`no-cache` so a new deploy is picked up — both containers apply the same caching. An
+in-compose **caddy** is the contour's edge: it owns a single `/_gm` Basic-Auth and
+routes `/_gm/grafana/*` to **Grafana** (anonymous-admin, so the one shared login gates
+it with no per-user Grafana accounts) and the rest of `/_gm/*` to the backend-rendered
+**admin console**; `/app/`, `/telegram/` and the Connect path go to the gateway; the
+catch-all — notably the landing at `/` — goes to the landing container. The
**Telegram connector** runs as a separate container with **no public ingress** — it
long-polls Telegram and egresses through a VPN sidecar, answering only internal gRPC.
The full contour (`deploy/docker-compose.yml`) runs one `gateway`, one `backend`,
-one Postgres, the connector (+ its VPN sidecar) and the **observability stack** —
+one Postgres, the static `landing`, the connector (+ its VPN sidecar) and the **observability stack** —
OTel Collector (OTLP/gRPC ingest → Prometheus metrics + Tempo traces) and Grafana
with provisioned datasources and dashboards. All three services export OTLP to the
collector; the connector shares the VPN sidecar's netns, so its `AWG_CONF` must not
@@ -633,7 +662,8 @@ network (project-scoped DNS); only caddy joins the shared external `edge` networ
Two contours, two secret/variable prefixes (`TEST_` / `PROD_`):
- **Test** (Stage 16): auto-deploys on a PR into — or a push to — `development`
(`.gitea/workflows/ci.yaml` → `docker compose up -d --build` on the Gitea runner
- host, then a `GET /` probe through caddy). The host caddy terminates TLS and
+ host, then `GET /` + `GET /app/` probes through caddy — the landing container and
+ the gateway, R3). The host caddy terminates TLS and
forwards the domain to `scrabble:80`, so the in-compose caddy serves plain HTTP
(`CADDY_SITE_ADDRESS=:80`). The in-compose caddy **trusts X-Forwarded-For from
private-range upstreams** (`trusted_proxies private_ranges`), so the real client IP —
diff --git a/docs/FUNCTIONAL.md b/docs/FUNCTIONAL.md
index c425027..3399b89 100644
--- a/docs/FUNCTIONAL.md
+++ b/docs/FUNCTIONAL.md
@@ -171,3 +171,11 @@ applied after a reload). When a Telegram connector is configured an operator can
**message a user** (by their Telegram identity) or **post to the game channel**.
State-changing actions are protected by a same-origin check; the console tracks no
operator identity.
+
+The console also surfaces **rate-limit abuse** (R3): a **Throttled** page lists the
+recently throttled users/IPs the gateway reported (an in-memory window — it resets on
+a backend restart) and the accounts currently carrying the soft **high-rate flag**. An
+account sustaining rejections past a tunable threshold is flagged automatically —
+the marker is reversible, shown as a badge in the user list and on the user card, and
+**never blocks play**; the operator reviews and clears it from the user card. There is
+no automatic ban.
diff --git a/docs/FUNCTIONAL_ru.md b/docs/FUNCTIONAL_ru.md
index f82266b..ee0e7b3 100644
--- a/docs/FUNCTIONAL_ru.md
+++ b/docs/FUNCTIONAL_ru.md
@@ -175,3 +175,11 @@ identity, их игры) и **игры** (сводка + места), разби
подключён Telegram-коннектор, оператор также может **написать пользователю** (по его
Telegram-identity) или **отправить пост в игровой канал**. Изменяющие действия
защищены проверкой same-origin; личность оператора не отслеживается.
+
+Консоль также показывает **злоупотребление лимитами** (R3): страница **Throttled**
+перечисляет недавно затроттленных пользователей/IP по отчётам gateway (окно в памяти —
+сбрасывается при рестарте backend) и аккаунты с действующим мягким **high-rate
+флагом**. Аккаунт, устойчиво превышающий настраиваемый порог отказов, помечается
+автоматически — маркер обратим, виден бейджем в списке пользователей и на карточке
+аккаунта и **никогда не блокирует игру**; оператор рассматривает и снимает его с
+карточки пользователя. Автоматического бана нет.
diff --git a/docs/TESTING.md b/docs/TESTING.md
index 2319a8e..02dc510 100644
--- a/docs/TESTING.md
+++ b/docs/TESTING.md
@@ -76,7 +76,14 @@ tests or touching CI.
unsubscribe), the transcode round-trips (FlatBuffers↔JSON, X-User-ID
forwarding, nested GameView, domain-code surfacing), the admin Basic-Auth
reverse proxy (401 / forward), and a full Connect `Execute` path end to end
- (guest auth, unauthenticated rejection, unknown message type). The backend gains
+ (guest auth, unauthenticated rejection, unknown message type). **R3** adds the
+ edge-hardening cases: an oversized `Execute` payload is refused
+ (`resource_exhausted`, the `GATEWAY_MAX_BODY_BYTES` cap), a limiter rejection
+ lands in `gateway_rate_limited_total{class}` and the rejection tracker
+ (drain/aggregate unit tests), the report POST reaches
+ `/api/v1/internal/ratelimit/report` with the agreed JSON shape, the `/_gm`
+ mount is 429-guarded by the per-IP admin class, and the gateway's `/`
+ 308-redirects to `/app/` (the landing left the embed). The backend gains
the **guest** lifecycle (a guest plays an auto-match to a natural end yet accrues
no statistics) and the **email-as-login** flow (request/verify, returning user)
in `inttest`. Stage 8 adds gateway transcode round-trips for the new social/account
@@ -92,7 +99,12 @@ tests or touching CI.
404 when not). Postgres-backed `inttest` drives the **complaint resolution →
dictionary-change pipeline** (file → resolve with a disposition → pending change → mark
applied), the admin **list/count** read queries, and the **/_gm console over HTTP**
- (pages render; a resolve POST needs a same-origin header).
+ (pages render; a resolve POST needs a same-origin header). **R3** adds `ratewatch`
+ unit tests (window accumulation, the auto-flag threshold + expiry, the bounded
+ episode map), the account-store **high-rate flag round-trip** (set-once / clear /
+ re-flag) and a console flow in `inttest`: a gateway report auto-flags the account,
+ the **Throttled** page shows the episode and the flagged queue, the user card
+ carries the marker and the CSRF-guarded **Clear** reverses it.
- **Observability & performance** *(Stage 12)* — `pkg/telemetry` unit-tests the exporter
selection (`none`/`stdout`/`otlp` build providers; OTLP constructs with no collector;
the nil-runtime fallback). The domain metrics are exercised through a manual
diff --git a/gateway/Dockerfile b/gateway/Dockerfile
index 8a45a94..092fd6c 100644
--- a/gateway/Dockerfile
+++ b/gateway/Dockerfile
@@ -1,15 +1,18 @@
-# Multi-stage build for the gateway service. A node stage builds the static UI
-# (Vite), the result is embedded into the Go binary (gateway/internal/webui/dist),
-# and the Go stage — mirroring platform/telegram/Dockerfile — yields a static
-# binary shipped on distroless nonroot. So the single binary serves the SPA at /
-# and /telegram/ (docs/ARCHITECTURE.md §13) with no separate static container.
+# Multi-stage build for the gateway service and the landing image. A node stage
+# builds the static UI (Vite) once; the `landing` target serves that build from a
+# static caddy container (the public landing at /, R3), while the final gateway
+# target embeds it — minus landing.html — into the Go binary
+# (gateway/internal/webui/dist), which serves the game SPA at /app/ and
+# /telegram/ (docs/ARCHITECTURE.md §13). The Go stage mirrors
+# platform/telegram/Dockerfile and ships on distroless nonroot.
#
# The production UI build vars are image build-args, baked into the bundle.
-# Build from the repository root so go.work, pkg/, gateway/ and ui/ are all in the
-# Docker context:
+# Build from the repository root so go.work, pkg/, gateway/, ui/ and
+# deploy/landing/ are all in the Docker context:
# docker build -f gateway/Dockerfile \
# --build-arg VITE_GATEWAY_URL=https://example \
# -t scrabble-gateway .
+# docker build -f gateway/Dockerfile --target landing -t scrabble-landing .
# --- UI build ----------------------------------------------------------------
FROM node:22-alpine AS ui
@@ -38,6 +41,14 @@ RUN pnpm install --frozen-lockfile
COPY ui ./
RUN pnpm build
+# --- landing -------------------------------------------------------------------
+# The public landing page as its own static container (R3): the same Vite build
+# served by caddy at /, so stray public traffic is absorbed by static file
+# serving and never reaches the Go edge.
+FROM caddy:2-alpine AS landing
+COPY deploy/landing/Caddyfile /etc/caddy/Caddyfile
+COPY --from=ui /ui/dist /srv
+
# --- Go build ----------------------------------------------------------------
FROM golang:1.26.3-alpine AS build
WORKDIR /src
@@ -46,9 +57,11 @@ COPY pkg ./pkg
COPY gateway ./gateway
# Replace the committed placeholder with the freshly built UI before compiling, so
-# go:embed bakes the real bundle into the binary.
+# go:embed bakes the real bundle into the binary. The landing shell ships in the
+# landing image, not in the gateway (R3).
RUN rm -rf gateway/internal/webui/dist
COPY --from=ui /ui/dist gateway/internal/webui/dist
+RUN rm gateway/internal/webui/dist/landing.html
# Reduce the workspace to what the gateway needs: gateway + pkg (loadtest is not in
# this context; its scrabble/gateway replace targets ./gateway, which is present here).
@@ -56,6 +69,6 @@ RUN go work edit -dropuse=./backend -dropuse=./platform/telegram -dropuse=./load
RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -o /out/gateway ./gateway/cmd/gateway
# --- runtime -----------------------------------------------------------------
-FROM gcr.io/distroless/static-debian12:nonroot
+FROM gcr.io/distroless/static-debian12:nonroot AS gateway
COPY --from=build /out/gateway /usr/local/bin/gateway
ENTRYPOINT ["/usr/local/bin/gateway"]
diff --git a/gateway/README.md b/gateway/README.md
index af4201f..d162bc0 100644
--- a/gateway/README.md
+++ b/gateway/README.md
@@ -23,7 +23,7 @@ proto/edge/v1/ # Connect envelope contract (committed generated Go)
internal/config/ # GATEWAY_* env config
internal/backendclient/ # typed REST client (+ X-User-ID) and push gRPC client
internal/session/ # in-memory session cache (LRU/TTL, backend fallback)
-internal/ratelimit/ # token-bucket limiter (golang.org/x/time/rate)
+internal/ratelimit/ # token-bucket limiter (golang.org/x/time/rate) + the rejection tracker (R3)
internal/connector/ # gRPC client to the Telegram connector (initData validate, out-of-app push) + routing
internal/push/ # live-event fan-out hub (per-user client streams)
internal/transcode/ # FlatBuffers<->REST bridge + message_type registry
@@ -79,12 +79,21 @@ connector (`ValidateLoginWidget`) and forward the trusted `external_id`. These
| `GATEWAY_SESSION_TTL` | `10m` | cached session lifetime |
| `GATEWAY_SESSION_CACHE_MAX` | `50000` | cached session cap |
| `GATEWAY_PUSH_HEARTBEAT_INTERVAL` | `10s` | live-stream keep-alive (an immediate heartbeat also fires on open, under the ~15s edge idle timeout) |
+| `GATEWAY_MAX_BODY_BYTES` | `1048576` | caps one request body and one Connect message read; an oversized Execute is refused with `resource_exhausted` (R3) |
| `GATEWAY_SERVICE_NAME` | `scrabble-gateway` | OpenTelemetry `service.name` |
| `GATEWAY_OTEL_TRACES_EXPORTER` | `none` | `none`, `stdout` or `otlp` (gRPC; endpoint from `OTEL_EXPORTER_OTLP_*`) |
| `GATEWAY_OTEL_METRICS_EXPORTER` | `none` | `none`, `stdout` or `otlp` |
Rate-limit defaults (built-in): public 30/min·IP (burst 10), authenticated
-120/min·user (burst 40), admin 60/min·IP (burst 20), email-code 5/10 min·IP.
+300/min·user (burst 80, raised in Stage 17 for multi-device play), admin
+60/min·IP (burst 20, guarding the `/_gm` mount ahead of its Basic-Auth),
+email-code 5/10 min·IP (burst 2).
+
+Every rejection increments `gateway_rate_limited_total{class}`
+(`user`/`public`/`email`/`admin`) and logs one Debug line; a reporter drains the
+per-key rejection tracker every 30 s, emits a Warn summary per throttled key and
+posts the report to the backend (`/api/v1/internal/ratelimit/report`), feeding
+the admin console's throttled view and the high-rate auto-flag (R3).
## Run
diff --git a/gateway/cmd/gateway/main.go b/gateway/cmd/gateway/main.go
index 458b287..d6439f0 100644
--- a/gateway/cmd/gateway/main.go
+++ b/gateway/cmd/gateway/main.go
@@ -39,6 +39,14 @@ const (
pushReconnectDelay = 2 * time.Second
// gatewayID identifies this gateway instance to the backend push channel.
gatewayID = "gateway"
+ // readHeaderTimeout bounds reading one request's headers on the public
+ // listener (a slowloris guard). Bodies and long-lived streams are governed by
+ // the h2c settings in connectsrv — Read/WriteTimeout stay unset on purpose,
+ // they would kill the Subscribe stream (R3).
+ readHeaderTimeout = 10 * time.Second
+ // throttleReportInterval is the cadence of the rate-limiter rejection
+ // summary: the Warn log per throttled key and the report to the backend (R3).
+ throttleReportInterval = 30 * time.Second
)
func main() {
@@ -89,6 +97,7 @@ func run(ctx context.Context, cfg config.Config, logger *zap.Logger) error {
sessions := session.NewCache(backend, cfg.SessionTTL, cfg.SessionCacheMax)
limiter := ratelimit.New()
+ tracker := ratelimit.NewTracker()
hub := push.NewHub(0)
var conn *connector.Client
@@ -119,22 +128,26 @@ func run(ctx context.Context, cfg config.Config, logger *zap.Logger) error {
registry := transcode.NewRegistry(backend, validator, cfg.DefaultSupportedLanguages...)
edge := connectsrv.NewServer(connectsrv.Deps{
- Registry: registry,
- Sessions: sessions,
- Limiter: limiter,
- Hub: hub,
- RateLimit: cfg.RateLimit,
- Heartbeat: cfg.PushHeartbeatInterval,
- Logger: logger,
- AdminProxy: adminProxy,
- Meter: tel.MeterProvider().Meter("scrabble/gateway/edge"),
+ Registry: registry,
+ Sessions: sessions,
+ Limiter: limiter,
+ Tracker: tracker,
+ Hub: hub,
+ RateLimit: cfg.RateLimit,
+ Heartbeat: cfg.PushHeartbeatInterval,
+ Logger: logger,
+ AdminProxy: adminProxy,
+ Meter: tel.MeterProvider().Meter("scrabble/gateway/edge"),
+ MaxBodyBytes: cfg.MaxBodyBytes,
})
// Bridge the backend push stream into the fan-out hub (and the out-of-app
// channel via the connector).
go runPushPump(ctx, backend, hub, conn, logger)
+ // Periodically summarise rate-limiter rejections (Warn log + backend report).
+ go runThrottleReporter(ctx, tracker, backend, logger)
- public := &http.Server{Addr: cfg.HTTPAddr, Handler: edge.HTTPHandler()}
+ public := &http.Server{Addr: cfg.HTTPAddr, Handler: edge.HTTPHandler(), ReadHeaderTimeout: readHeaderTimeout}
servers := []*namedServer{{name: "public", srv: public}}
logger.Info("gateway starting",
@@ -182,6 +195,37 @@ func runServers(ctx context.Context, cancel context.CancelFunc, servers []*named
return first
}
+// runThrottleReporter drains the rate-limiter rejection tracker on a fixed
+// cadence, emits one Warn summary per throttled key and forwards the report to
+// the backend (which feeds the admin throttled view and the high-rate
+// auto-flag), until the context is done. A failed delivery is logged and
+// dropped — the next window reports fresh data anyway.
+func runThrottleReporter(ctx context.Context, tracker *ratelimit.Tracker, backend *backendclient.Client, logger *zap.Logger) {
+ ticker := time.NewTicker(throttleReportInterval)
+ defer ticker.Stop()
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case <-ticker.C:
+ }
+ entries := tracker.Drain()
+ if len(entries) == 0 {
+ continue
+ }
+ for _, e := range entries {
+ logger.Warn("rate limited",
+ zap.String("class", e.Class),
+ zap.String("key", e.Key),
+ zap.Int("rejected", e.Rejected),
+ zap.Duration("window", throttleReportInterval))
+ }
+ if err := backend.ReportRateLimited(ctx, int(throttleReportInterval.Seconds()), entries); err != nil {
+ logger.Warn("rate-limit report failed", zap.Error(err))
+ }
+ }
+}
+
// runPushPump keeps a backend push subscription open, forwarding every event to
// the hub and re-subscribing after the stream ends, until the context is done. For
// the out-of-app push kinds it also routes events whose recipient has no live
diff --git a/gateway/internal/backendclient/client.go b/gateway/internal/backendclient/client.go
index c63f2fb..d1caeac 100644
--- a/gateway/internal/backendclient/client.go
+++ b/gateway/internal/backendclient/client.go
@@ -18,6 +18,7 @@ import (
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
+ "scrabble/gateway/internal/ratelimit"
pushv1 "scrabble/pkg/proto/push/v1"
)
@@ -124,3 +125,15 @@ func parseAPIError(status int, data []byte) *APIError {
func (c *Client) SubscribePush(ctx context.Context, gatewayID string) (grpc.ServerStreamingClient[pushv1.Event], error) {
return c.push.Subscribe(ctx, &pushv1.SubscribeRequest{GatewayId: gatewayID})
}
+
+// ReportRateLimited posts the gateway's periodic rate-limiter rejection summary
+// to the backend, which feeds the admin console's throttled view and the
+// high-rate auto-flag. The endpoint carries no user identity: like
+// sessions/resolve it rides the trusted internal segment (R3).
+func (c *Client) ReportRateLimited(ctx context.Context, windowSeconds int, entries []ratelimit.Rejection) error {
+ body := struct {
+ WindowSeconds int `json:"window_seconds"`
+ Entries []ratelimit.Rejection `json:"entries"`
+ }{WindowSeconds: windowSeconds, Entries: entries}
+ return c.do(ctx, http.MethodPost, "/api/v1/internal/ratelimit/report", "", "", body, nil)
+}
diff --git a/gateway/internal/backendclient/client_test.go b/gateway/internal/backendclient/client_test.go
new file mode 100644
index 0000000..3ad3612
--- /dev/null
+++ b/gateway/internal/backendclient/client_test.go
@@ -0,0 +1,48 @@
+package backendclient_test
+
+import (
+ "context"
+ "encoding/json"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+ "time"
+
+ "scrabble/gateway/internal/backendclient"
+ "scrabble/gateway/internal/ratelimit"
+)
+
+// TestReportRateLimited verifies the rejection report reaches the backend's
+// internal endpoint with the agreed JSON shape and no user identity.
+func TestReportRateLimited(t *testing.T) {
+ var got struct {
+ WindowSeconds int `json:"window_seconds"`
+ Entries []ratelimit.Rejection `json:"entries"`
+ }
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodPost || r.URL.Path != "/api/v1/internal/ratelimit/report" {
+ t.Errorf("call = %s %s, want POST /api/v1/internal/ratelimit/report", r.Method, r.URL.Path)
+ }
+ if uid := r.Header.Get("X-User-ID"); uid != "" {
+ t.Errorf("X-User-ID = %q, want empty", uid)
+ }
+ if err := json.NewDecoder(r.Body).Decode(&got); err != nil {
+ t.Errorf("decode report: %v", err)
+ }
+ }))
+ defer srv.Close()
+
+ c, err := backendclient.New(srv.URL, "localhost:9090", 2*time.Second)
+ if err != nil {
+ t.Fatalf("backendclient: %v", err)
+ }
+ defer func() { _ = c.Close() }()
+
+ entries := []ratelimit.Rejection{{Class: "user", Key: "u-1", Rejected: 5}}
+ if err := c.ReportRateLimited(context.Background(), 30, entries); err != nil {
+ t.Fatalf("ReportRateLimited: %v", err)
+ }
+ if got.WindowSeconds != 30 || len(got.Entries) != 1 || got.Entries[0] != entries[0] {
+ t.Fatalf("backend received %+v, want window 30 + %+v", got, entries[0])
+ }
+}
diff --git a/gateway/internal/config/config.go b/gateway/internal/config/config.go
index e035687..3800be6 100644
--- a/gateway/internal/config/config.go
+++ b/gateway/internal/config/config.go
@@ -44,6 +44,9 @@ type Config struct {
SessionCacheMax int
// PushHeartbeatInterval is the idle keep-alive cadence on a client live stream.
PushHeartbeatInterval time.Duration
+ // MaxBodyBytes caps one inbound request body on the public listener and one
+ // Connect message read; oversized requests are refused without buffering.
+ MaxBodyBytes int
// RateLimit configures the in-memory anti-abuse limiter.
RateLimit RateLimitConfig
// Telemetry configures the OpenTelemetry providers (shared bootstrap).
@@ -77,6 +80,11 @@ const (
defaultServiceName = "scrabble-gateway"
)
+// DefaultMaxBodyBytes is the default request-body cap (GATEWAY_MAX_BODY_BYTES):
+// 1 MiB — far above any legitimate edge payload (drafts and chat are a few KB)
+// yet small enough to stop a cheap memory-amplification upload (R3).
+const DefaultMaxBodyBytes = 1 << 20
+
// supportedLanguages is the set of game languages a service may declare for the
// New Game variant gating; defaultSupportedLanguages is the non-platform fallback.
var (
@@ -130,6 +138,9 @@ func Load() (Config, error) {
if c.PushHeartbeatInterval, err = envDuration("GATEWAY_PUSH_HEARTBEAT_INTERVAL", defaultPushHeartbeatInterval); err != nil {
return Config{}, err
}
+ if c.MaxBodyBytes, err = envInt("GATEWAY_MAX_BODY_BYTES", DefaultMaxBodyBytes); err != nil {
+ return Config{}, err
+ }
if c.DefaultSupportedLanguages, err = envLanguages("GATEWAY_DEFAULT_SUPPORTED_LANGUAGES", defaultSupportedLanguages); err != nil {
return Config{}, err
}
@@ -161,6 +172,9 @@ func (c Config) validate() error {
if c.BackendGRPCAddr == "" {
return fmt.Errorf("config: GATEWAY_BACKEND_GRPC_ADDR must not be empty")
}
+ if c.MaxBodyBytes <= 0 {
+ return fmt.Errorf("config: GATEWAY_MAX_BODY_BYTES must be positive")
+ }
if err := c.Telemetry.Validate(); err != nil {
return fmt.Errorf("config: %w", err)
}
diff --git a/gateway/internal/config/config_test.go b/gateway/internal/config/config_test.go
index 8f9afcd..9667ea4 100644
--- a/gateway/internal/config/config_test.go
+++ b/gateway/internal/config/config_test.go
@@ -29,3 +29,19 @@ func TestLoadRejectsUnsupportedExporter(t *testing.T) {
t.Fatal("Load: expected an error for an unsupported exporter, got nil")
}
}
+
+// TestLoadMaxBodyBytes verifies the body-cap default and that a non-positive
+// override fails validation.
+func TestLoadMaxBodyBytes(t *testing.T) {
+ c, err := Load()
+ if err != nil {
+ t.Fatalf("Load: %v", err)
+ }
+ if c.MaxBodyBytes != DefaultMaxBodyBytes {
+ t.Errorf("MaxBodyBytes = %d, want %d", c.MaxBodyBytes, DefaultMaxBodyBytes)
+ }
+ t.Setenv("GATEWAY_MAX_BODY_BYTES", "0")
+ if _, err := Load(); err == nil {
+ t.Fatal("Load: expected an error for a non-positive body cap, got nil")
+ }
+}
diff --git a/gateway/internal/connectsrv/metrics.go b/gateway/internal/connectsrv/metrics.go
index a08c9c2..ab18bf1 100644
--- a/gateway/internal/connectsrv/metrics.go
+++ b/gateway/internal/connectsrv/metrics.go
@@ -24,8 +24,9 @@ var activeUserWindows = []struct {
// serverMetrics holds the edge's operational instruments. It defaults to no-ops;
// NewServer installs the real meter when one is supplied in Deps.
type serverMetrics struct {
- edge metric.Float64Histogram
- active *activeUsers
+ edge metric.Float64Histogram
+ rateLimited metric.Int64Counter
+ active *activeUsers
}
// newServerMetrics builds the instruments on meter (nil selects a no-op meter),
@@ -42,7 +43,12 @@ func newServerMetrics(meter metric.Meter) *serverMetrics {
if err != nil {
h, _ = noop.NewMeterProvider().Meter(meterName).Float64Histogram("edge_request_duration")
}
- m := &serverMetrics{edge: h, active: newActiveUsers()}
+ c, err := meter.Int64Counter("gateway_rate_limited_total",
+ metric.WithDescription("Rate-limiter rejections at the edge, by limiter class (user, public, email or admin) — aggregate only, no per-user attributes."))
+ if err != nil {
+ c, _ = noop.NewMeterProvider().Meter(meterName).Int64Counter("gateway_rate_limited_total")
+ }
+ m := &serverMetrics{edge: h, rateLimited: c, active: newActiveUsers()}
gauge, err := meter.Int64ObservableGauge("active_users",
metric.WithDescription("Distinct accounts that performed an authenticated action within the window (in-memory, single gateway instance)."))
@@ -75,3 +81,8 @@ func (m *serverMetrics) recordEdge(ctx context.Context, msgType, result string,
func (m *serverMetrics) recordActive(uid string) {
m.active.seen(uid)
}
+
+// recordRateLimited counts one limiter rejection under class.
+func (m *serverMetrics) recordRateLimited(ctx context.Context, class string) {
+ m.rateLimited.Add(ctx, 1, metric.WithAttributes(attribute.String("class", class)))
+}
diff --git a/gateway/internal/connectsrv/metrics_test.go b/gateway/internal/connectsrv/metrics_test.go
index 80987cc..2f0910c 100644
--- a/gateway/internal/connectsrv/metrics_test.go
+++ b/gateway/internal/connectsrv/metrics_test.go
@@ -52,3 +52,41 @@ func TestEdgeMetric(t *testing.T) {
t.Errorf("edge auth.guest/domain = %d, want 1", got)
}
}
+
+// TestRateLimitedMetric records limiter rejections through a manual reader and
+// asserts gateway_rate_limited_total splits by class.
+func TestRateLimitedMetric(t *testing.T) {
+ ctx := context.Background()
+ reader := sdkmetric.NewManualReader()
+ meter := sdkmetric.NewMeterProvider(sdkmetric.WithReader(reader)).Meter("test")
+ m := newServerMetrics(meter)
+
+ m.recordRateLimited(ctx, "user")
+ m.recordRateLimited(ctx, "user")
+ m.recordRateLimited(ctx, "public")
+
+ var rm metricdata.ResourceMetrics
+ if err := reader.Collect(ctx, &rm); err != nil {
+ t.Fatalf("collect: %v", err)
+ }
+
+ counts := map[string]int64{}
+ for _, sm := range rm.ScopeMetrics {
+ for _, md := range sm.Metrics {
+ if md.Name != "gateway_rate_limited_total" {
+ continue
+ }
+ sum, ok := md.Data.(metricdata.Sum[int64])
+ if !ok {
+ t.Fatalf("gateway_rate_limited_total is not an int64 sum")
+ }
+ for _, dp := range sum.DataPoints {
+ class, _ := dp.Attributes.Value(attribute.Key("class"))
+ counts[class.AsString()] += dp.Value
+ }
+ }
+ }
+ if counts["user"] != 2 || counts["public"] != 1 {
+ t.Errorf("rate_limited counts = %v, want user=2 public=1", counts)
+ }
+}
diff --git a/gateway/internal/connectsrv/server.go b/gateway/internal/connectsrv/server.go
index 455ad3d..f1880d3 100644
--- a/gateway/internal/connectsrv/server.go
+++ b/gateway/internal/connectsrv/server.go
@@ -8,6 +8,7 @@ package connectsrv
import (
"context"
+ "errors"
"net"
"net/http"
"strings"
@@ -19,6 +20,7 @@ import (
"golang.org/x/net/http2"
"golang.org/x/net/http2/h2c"
+ "scrabble/gateway/internal/backendclient"
"scrabble/gateway/internal/config"
"scrabble/gateway/internal/push"
"scrabble/gateway/internal/ratelimit"
@@ -32,33 +34,68 @@ import (
// heartbeatKind is the live-stream keep-alive event kind.
const heartbeatKind = "heartbeat"
+// Limiter classes, the `class` attribute of gateway_rate_limited_total and the
+// class field of the periodic rejection report (R3).
+const (
+ classUser = "user"
+ classPublic = "public"
+ classEmail = "email"
+ classAdmin = "admin"
+)
+
+// Explicit h2c server sizing (R3, after the R2 stress run questioned the
+// implicit defaults).
+const (
+ // h2cMaxConcurrentStreams bounds the open streams per client connection — the
+ // x/net default made explicit. A real client holds one Subscribe stream plus a
+ // few unary calls; only a synthetic load multiplexing many players over one
+ // transport approaches it. R7 revisits the sizing.
+ h2cMaxConcurrentStreams = 250
+ // h2cIdleTimeout closes a connection with no open streams. A live Subscribe
+ // stream keeps its connection active, so long-lived clients are unaffected;
+ // only abandoned connections are reaped.
+ h2cIdleTimeout = 3 * time.Minute
+)
+
// Server implements edgev1connect.GatewayHandler.
type Server struct {
registry *transcode.Registry
sessions *session.Cache
limiter *ratelimit.Limiter
+ tracker *ratelimit.Tracker
hub *push.Hub
heartbeat time.Duration
log *zap.Logger
adminProxy http.Handler
metrics *serverMetrics
+ maxBodyBytes int
+
publicPolicy ratelimit.Policy
userPolicy ratelimit.Policy
emailPolicy ratelimit.Policy
+ adminPolicy ratelimit.Policy
}
-// Deps carries the Server's dependencies.
+// Deps carries the Server's dependencies. A nil Limiter, nil Tracker, zero
+// RateLimit and non-positive MaxBodyBytes each select a safe default.
type Deps struct {
- Registry *transcode.Registry
- Sessions *session.Cache
- Limiter *ratelimit.Limiter
+ Registry *transcode.Registry
+ Sessions *session.Cache
+ Limiter *ratelimit.Limiter
+ // Tracker accumulates limiter rejections for the periodic report; nil
+ // selects a private tracker (rejections are then only counted, never
+ // reported).
+ Tracker *ratelimit.Tracker
Hub *push.Hub
RateLimit config.RateLimitConfig
Heartbeat time.Duration
Logger *zap.Logger
AdminProxy http.Handler
Meter metric.Meter
+ // MaxBodyBytes caps one inbound request body and one Connect message read;
+ // zero or negative selects config.DefaultMaxBodyBytes.
+ MaxBodyBytes int
}
// NewServer constructs the edge service.
@@ -67,47 +104,85 @@ func NewServer(d Deps) *Server {
if log == nil {
log = zap.NewNop()
}
+ maxBody := d.MaxBodyBytes
+ if maxBody <= 0 {
+ maxBody = config.DefaultMaxBodyBytes
+ }
+ tracker := d.Tracker
+ if tracker == nil {
+ tracker = ratelimit.NewTracker()
+ }
+ limiter := d.Limiter
+ if limiter == nil {
+ limiter = ratelimit.New()
+ }
+ rl := d.RateLimit
+ if rl == (config.RateLimitConfig{}) {
+ rl = config.DefaultRateLimit()
+ }
return &Server{
registry: d.Registry,
sessions: d.Sessions,
- limiter: d.Limiter,
+ limiter: limiter,
+ tracker: tracker,
hub: d.Hub,
heartbeat: d.Heartbeat,
log: log,
adminProxy: d.AdminProxy,
metrics: newServerMetrics(d.Meter),
- publicPolicy: ratelimit.PerMinute(d.RateLimit.PublicPerMinute, d.RateLimit.PublicBurst),
- userPolicy: ratelimit.PerMinute(d.RateLimit.UserPerMinute, d.RateLimit.UserBurst),
- emailPolicy: ratelimit.Per(d.RateLimit.EmailPer10Min, 10*time.Minute, d.RateLimit.EmailBurst),
+ maxBodyBytes: maxBody,
+ publicPolicy: ratelimit.PerMinute(rl.PublicPerMinute, rl.PublicBurst),
+ userPolicy: ratelimit.PerMinute(rl.UserPerMinute, rl.UserBurst),
+ emailPolicy: ratelimit.Per(rl.EmailPer10Min, 10*time.Minute, rl.EmailBurst),
+ adminPolicy: ratelimit.PerMinute(rl.AdminPerMinute, rl.AdminBurst),
}
}
// HTTPHandler returns the h2c-wrapped Connect handler ready to serve.
func (s *Server) HTTPHandler() http.Handler {
mux := http.NewServeMux()
- path, h := edgev1connect.NewGatewayHandler(s)
+ // The Connect read cap mirrors the HTTP-level body cap below; an oversized
+ // Execute message is refused (resource_exhausted) instead of buffered.
+ path, h := edgev1connect.NewGatewayHandler(s, connect.WithReadMaxBytes(s.maxBodyBytes))
mux.Handle(path, h)
if s.adminProxy != nil {
// The admin console (backend /_gm) is served on the public listener behind
// the proxy's Basic-Auth, mounted below the h2c wrap so the Connect edge keeps
// working over h2c (docs/ARCHITECTURE.md §12). In the deployed contour the
// front caddy owns the /_gm Basic-Auth and Grafana routing; this mount serves
- // a non-caddy (local) setup.
- mux.Handle("/_gm/", s.adminProxy)
+ // a non-caddy (local) setup. The per-IP admin limiter class guards it —
+ // notably a Basic-Auth brute force (R3).
+ mux.Handle("/_gm/", s.limitAdmin(s.adminProxy))
} else {
// With the console disabled here, keep /_gm a 404 so the SPA catch-all below
// does not serve the app shell at the operator path.
mux.Handle("/_gm/", http.NotFoundHandler())
}
- // The embedded UI: the game SPA under /app/ (web) and /telegram/ (the Telegram Mini
- // App), with a separate landing page at the catch-all "/" — the single-origin model
- // (docs/ARCHITECTURE.md §13). All sit below the h2c wrap so the Connect edge (a more
- // specific prefix) keeps priority. Each SPA mount falls back to the app shell
- // (index.html) for the hash router; "/" falls back to the landing (landing.html).
+ // The embedded UI: the game SPA under /app/ (web) and /telegram/ (the Telegram
+ // Mini App) — the single-origin model (docs/ARCHITECTURE.md §13). Both sit below
+ // the h2c wrap so the Connect edge (a more specific prefix) keeps priority, and
+ // each mount falls back to the app shell (index.html) for the hash router. The
+ // public landing moved to its own static container behind the contour caddy
+ // (R3), so the catch-all redirects a stray root hit to the app shell — which
+ // keeps a local no-caddy run usable.
mux.Handle("/telegram/", webui.Handler("/telegram/", "index.html"))
mux.Handle("/app/", webui.Handler("/app/", "index.html"))
- mux.Handle("/", webui.Handler("", "landing.html"))
- return h2c.NewHandler(mux, &http2.Server{})
+ mux.Handle("/", http.RedirectHandler("/app/", http.StatusPermanentRedirect))
+ // Every request body on the public listener is capped (the admin proxy POSTs
+ // included); the h2c server carries explicit stream/idle sizing (R3).
+ return h2c.NewHandler(maxBodyHandler(s.maxBodyBytes, mux), &http2.Server{
+ MaxConcurrentStreams: h2cMaxConcurrentStreams,
+ IdleTimeout: h2cIdleTimeout,
+ })
+}
+
+// maxBodyHandler caps every inbound request body at limit bytes: a read past the
+// cap fails with *http.MaxBytesError and the connection is marked to close.
+func maxBodyHandler(limit int, next http.Handler) http.Handler {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ r.Body = http.MaxBytesReader(w, r.Body, int64(limit))
+ next.ServeHTTP(w, r)
+ })
}
// Execute runs one unary operation. Domain failures are returned in the envelope
@@ -138,17 +213,17 @@ func (s *Server) Execute(ctx context.Context, req *connect.Request[edgev1.Execut
s.metrics.recordActive(uid)
if !s.limiter.Allow("user:"+uid, s.userPolicy) {
result = "rate_limited"
- return nil, connect.NewError(connect.CodeResourceExhausted, errRateLimited)
+ return nil, s.rejectRateLimited(ctx, classUser, uid, msgType)
}
tr.UserID = uid
} else {
if !s.limiter.Allow("ip:"+clientIP, s.publicPolicy) {
result = "rate_limited"
- return nil, connect.NewError(connect.CodeResourceExhausted, errRateLimited)
+ return nil, s.rejectRateLimited(ctx, classPublic, clientIP, msgType)
}
if op.Email && !s.limiter.Allow("email:"+clientIP, s.emailPolicy) {
result = "rate_limited"
- return nil, connect.NewError(connect.CodeResourceExhausted, errRateLimited)
+ return nil, s.rejectRateLimited(ctx, classEmail, clientIP, msgType)
}
}
@@ -180,7 +255,7 @@ func (s *Server) Subscribe(ctx context.Context, req *connect.Request[edgev1.Subs
return err
}
if !s.limiter.Allow("user:"+uid, s.userPolicy) {
- return connect.NewError(connect.CodeResourceExhausted, errRateLimited)
+ return s.rejectRateLimited(ctx, classUser, uid, "subscribe")
}
events, cancel := s.hub.Subscribe(uid)
@@ -216,6 +291,43 @@ func (s *Server) Subscribe(ctx context.Context, req *connect.Request[edgev1.Subs
}
}
+// noteRateLimited accounts one limiter rejection: the aggregate counter, the
+// per-rejection Debug line and the periodic-report tracker. The operational
+// signal is the reporter's Warn summary; per-rejection logging stays at Debug so
+// a rejection flood cannot flood the log (R3).
+func (s *Server) noteRateLimited(ctx context.Context, class, key, msgType string) {
+ s.metrics.recordRateLimited(ctx, class)
+ s.tracker.Add(class, key)
+ s.log.Debug("rate limited",
+ zap.String("class", class),
+ zap.String("key", key),
+ zap.String("message_type", msgType))
+}
+
+// rejectRateLimited accounts one limiter rejection and returns the Connect error
+// for the caller.
+func (s *Server) rejectRateLimited(ctx context.Context, class, key, msgType string) error {
+ s.noteRateLimited(ctx, class, key, msgType)
+ return connect.NewError(connect.CodeResourceExhausted, errRateLimited)
+}
+
+// limitAdmin guards the admin proxy with the per-IP admin limiter class, ahead
+// of its Basic-Auth check (a credential brute force is exactly what it bounds).
+// It covers the gateway-fronted /_gm mount; in the deployed contour /_gm reaches
+// the backend through caddy, whose Basic-Auth has no limiter (stock caddy) — see
+// docs/ARCHITECTURE.md §12 (R3).
+func (s *Server) limitAdmin(next http.Handler) http.Handler {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ ip := peerIP(r.RemoteAddr, r.Header)
+ if !s.limiter.Allow("admin:"+ip, s.adminPolicy) {
+ s.noteRateLimited(r.Context(), classAdmin, ip, "admin")
+ http.Error(w, "rate limited", http.StatusTooManyRequests)
+ return
+ }
+ next.ServeHTTP(w, r)
+ })
+}
+
// resolve extracts and resolves the Authorization bearer token to an account id,
// returning a Connect Unauthenticated error when it is missing or unknown.
func (s *Server) resolve(ctx context.Context, h http.Header) (string, error) {
@@ -225,6 +337,15 @@ func (s *Server) resolve(ctx context.Context, h http.Header) (string, error) {
}
uid, err := s.sessions.Resolve(ctx, token)
if err != nil {
+ // An unknown or expired token (a backend 4xx) is the client's problem and
+ // stays silent; anything else — a resolve timeout, a refused connection, a
+ // backend 5xx — is an infra failure misread as "unauthenticated" by the
+ // client, so surface the cause (the transient resolves seen under load in
+ // the R2 stress run). The token itself is never logged.
+ var apiErr *backendclient.APIError
+ if !errors.As(err, &apiErr) || apiErr.Status >= http.StatusInternalServerError {
+ s.log.Warn("session resolve failed", zap.Error(err))
+ }
return "", connect.NewError(connect.CodeUnauthenticated, errInvalidSession)
}
return uid, nil
@@ -247,10 +368,8 @@ func bearerToken(header string) string {
// peer address (host part).
func peerIP(peerAddr string, h http.Header) string {
if xff := h.Get("X-Forwarded-For"); xff != "" {
- if i := strings.IndexByte(xff, ','); i >= 0 {
- return strings.TrimSpace(xff[:i])
- }
- return strings.TrimSpace(xff)
+ first, _, _ := strings.Cut(xff, ",")
+ return strings.TrimSpace(first)
}
if host, _, err := net.SplitHostPort(peerAddr); err == nil {
return host
diff --git a/gateway/internal/connectsrv/server_test.go b/gateway/internal/connectsrv/server_test.go
index 59fcadf..d5c8f52 100644
--- a/gateway/internal/connectsrv/server_test.go
+++ b/gateway/internal/connectsrv/server_test.go
@@ -83,6 +83,140 @@ func TestExecuteAuthedRequiresSession(t *testing.T) {
}
}
+// TestExecuteRateLimitedTracked verifies a limiter rejection returns
+// ResourceExhausted and lands in the rejection tracker under the public class,
+// keyed by the client IP (R3).
+func TestExecuteRateLimitedTracked(t *testing.T) {
+ backendSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write([]byte(`{"token":"tok","user_id":"u-1","is_guest":true,"display_name":"Guest"}`))
+ }))
+ defer backendSrv.Close()
+ backend, err := backendclient.New(backendSrv.URL, "localhost:9090", 2*time.Second)
+ if err != nil {
+ t.Fatalf("backendclient: %v", err)
+ }
+ defer func() { _ = backend.Close() }()
+
+ limits := config.DefaultRateLimit()
+ limits.PublicPerMinute, limits.PublicBurst = 1, 1
+ tracker := ratelimit.NewTracker()
+ edge := connectsrv.NewServer(connectsrv.Deps{
+ Registry: transcode.NewRegistry(backend, nil),
+ Sessions: session.NewCache(backend, time.Minute, 100),
+ Limiter: ratelimit.New(),
+ Tracker: tracker,
+ Hub: push.NewHub(0),
+ RateLimit: limits,
+ Heartbeat: 15 * time.Second,
+ })
+ edgeSrv := httptest.NewServer(edge.HTTPHandler())
+ defer edgeSrv.Close()
+ client := edgev1connect.NewGatewayClient(http.DefaultClient, edgeSrv.URL)
+
+ if _, err := client.Execute(context.Background(), connect.NewRequest(&edgev1.ExecuteRequest{
+ MessageType: transcode.MsgAuthGuest,
+ })); err != nil {
+ t.Fatalf("first execute: %v", err)
+ }
+ _, err = client.Execute(context.Background(), connect.NewRequest(&edgev1.ExecuteRequest{
+ MessageType: transcode.MsgAuthGuest,
+ }))
+ if connect.CodeOf(err) != connect.CodeResourceExhausted {
+ t.Fatalf("code = %v, want ResourceExhausted", connect.CodeOf(err))
+ }
+
+ entries := tracker.Drain()
+ if len(entries) != 1 {
+ t.Fatalf("tracker drained %d entries, want 1", len(entries))
+ }
+ if e := entries[0]; e.Class != "public" || e.Key != "127.0.0.1" || e.Rejected != 1 {
+ t.Fatalf("tracked %+v, want public/127.0.0.1 rejected=1", e)
+ }
+}
+
+// TestAdminMountRateLimited verifies the /_gm mount is guarded by the per-IP
+// admin limiter class ahead of the proxy's Basic-Auth (R3).
+func TestAdminMountRateLimited(t *testing.T) {
+ backendSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}))
+ defer backendSrv.Close()
+ backend, err := backendclient.New(backendSrv.URL, "localhost:9090", 2*time.Second)
+ if err != nil {
+ t.Fatalf("backendclient: %v", err)
+ }
+ defer func() { _ = backend.Close() }()
+
+ limits := config.DefaultRateLimit()
+ limits.AdminPerMinute, limits.AdminBurst = 1, 1
+ edge := connectsrv.NewServer(connectsrv.Deps{
+ Registry: transcode.NewRegistry(backend, nil),
+ Sessions: session.NewCache(backend, time.Minute, 100),
+ Limiter: ratelimit.New(),
+ Hub: push.NewHub(0),
+ RateLimit: limits,
+ Heartbeat: 15 * time.Second,
+ AdminProxy: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ }),
+ })
+ edgeSrv := httptest.NewServer(edge.HTTPHandler())
+ defer edgeSrv.Close()
+
+ first, err := http.Get(edgeSrv.URL + "/_gm/")
+ if err != nil {
+ t.Fatalf("first /_gm: %v", err)
+ }
+ _ = first.Body.Close()
+ if first.StatusCode != http.StatusOK {
+ t.Fatalf("first /_gm = %d, want 200", first.StatusCode)
+ }
+ second, err := http.Get(edgeSrv.URL + "/_gm/")
+ if err != nil {
+ t.Fatalf("second /_gm: %v", err)
+ }
+ _ = second.Body.Close()
+ if second.StatusCode != http.StatusTooManyRequests {
+ t.Fatalf("second /_gm = %d, want 429", second.StatusCode)
+ }
+}
+
+// TestExecuteOversizedPayloadRejected verifies the request-body cap: an Execute
+// message above GATEWAY_MAX_BODY_BYTES is refused at the edge without reaching
+// the backend (R3).
+func TestExecuteOversizedPayloadRejected(t *testing.T) {
+ client, cleanup := newEdge(t, func(w http.ResponseWriter, r *http.Request) {
+ t.Error("backend must not be called for an oversized payload")
+ })
+ defer cleanup()
+
+ _, err := client.Execute(context.Background(), connect.NewRequest(&edgev1.ExecuteRequest{
+ MessageType: transcode.MsgAuthGuest,
+ Payload: make([]byte, config.DefaultMaxBodyBytes+1),
+ }))
+ if connect.CodeOf(err) != connect.CodeResourceExhausted {
+ t.Fatalf("code = %v, want ResourceExhausted", connect.CodeOf(err))
+ }
+}
+
+// TestRootRedirectsToApp verifies the gateway no longer serves a landing at "/"
+// (it lives in the landing container since R3): a stray root hit is redirected
+// to the app shell.
+func TestRootRedirectsToApp(t *testing.T) {
+ front := httptest.NewServer(connectsrv.NewServer(connectsrv.Deps{}).HTTPHandler())
+ defer front.Close()
+
+ client := &http.Client{CheckRedirect: func(*http.Request, []*http.Request) error {
+ return http.ErrUseLastResponse
+ }}
+ resp, err := client.Get(front.URL + "/")
+ if err != nil {
+ t.Fatalf("get /: %v", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+ if resp.StatusCode != http.StatusPermanentRedirect || resp.Header.Get("Location") != "/app/" {
+ t.Fatalf("GET / = %d -> %q, want 308 -> /app/", resp.StatusCode, resp.Header.Get("Location"))
+ }
+}
+
func TestExecuteUnknownMessageType(t *testing.T) {
client, cleanup := newEdge(t, func(w http.ResponseWriter, r *http.Request) {})
defer cleanup()
diff --git a/gateway/internal/ratelimit/tracker.go b/gateway/internal/ratelimit/tracker.go
new file mode 100644
index 0000000..92aa602
--- /dev/null
+++ b/gateway/internal/ratelimit/tracker.go
@@ -0,0 +1,52 @@
+package ratelimit
+
+import "sync"
+
+// Rejection aggregates the limiter rejections of one key within one report
+// window. Class is the limiter class (user, public, email or admin); Key is the
+// class-specific subject — an account id for the user class, a client IP for the
+// others. The JSON shape is the gateway→backend rate-limit report wire contract.
+type Rejection struct {
+ Class string `json:"class"`
+ Key string `json:"key"`
+ Rejected int `json:"rejected"`
+}
+
+// Tracker accumulates limiter rejections between drains. The gateway's periodic
+// reporter drains it to emit the per-key log summary and the backend report; the
+// per-rejection cost is one map increment under a mutex, safe on the hot path.
+type Tracker struct {
+ mu sync.Mutex
+ m map[trackerKey]int
+}
+
+type trackerKey struct{ class, key string }
+
+// NewTracker constructs an empty Tracker.
+func NewTracker() *Tracker {
+ return &Tracker{m: make(map[trackerKey]int)}
+}
+
+// Add counts one rejection of key under class.
+func (t *Tracker) Add(class, key string) {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ t.m[trackerKey{class: class, key: key}]++
+}
+
+// Drain returns the rejections accumulated since the previous drain, in
+// unspecified order, and resets the tracker. It returns nil when nothing was
+// rejected.
+func (t *Tracker) Drain() []Rejection {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ if len(t.m) == 0 {
+ return nil
+ }
+ out := make([]Rejection, 0, len(t.m))
+ for k, n := range t.m {
+ out = append(out, Rejection{Class: k.class, Key: k.key, Rejected: n})
+ }
+ clear(t.m)
+ return out
+}
diff --git a/gateway/internal/ratelimit/tracker_test.go b/gateway/internal/ratelimit/tracker_test.go
new file mode 100644
index 0000000..4f10f3d
--- /dev/null
+++ b/gateway/internal/ratelimit/tracker_test.go
@@ -0,0 +1,38 @@
+package ratelimit_test
+
+import (
+ "testing"
+
+ "scrabble/gateway/internal/ratelimit"
+)
+
+// TestTrackerDrain verifies rejections aggregate per (class, key) and that a
+// drain resets the tracker.
+func TestTrackerDrain(t *testing.T) {
+ tr := ratelimit.NewTracker()
+ if got := tr.Drain(); got != nil {
+ t.Fatalf("empty tracker drained %v, want nil", got)
+ }
+
+ tr.Add("user", "u-1")
+ tr.Add("user", "u-1")
+ tr.Add("public", "10.0.0.1")
+
+ got := map[string]ratelimit.Rejection{}
+ for _, r := range tr.Drain() {
+ got[r.Class+"/"+r.Key] = r
+ }
+ if len(got) != 2 {
+ t.Fatalf("drained %d entries, want 2", len(got))
+ }
+ if r := got["user/u-1"]; r.Rejected != 2 {
+ t.Errorf("user/u-1 rejected = %d, want 2", r.Rejected)
+ }
+ if r := got["public/10.0.0.1"]; r.Rejected != 1 {
+ t.Errorf("public/10.0.0.1 rejected = %d, want 1", r.Rejected)
+ }
+
+ if got := tr.Drain(); got != nil {
+ t.Fatalf("second drain = %v, want nil", got)
+ }
+}
diff --git a/gateway/internal/webui/dist/landing.html b/gateway/internal/webui/dist/landing.html
deleted file mode 100644
index f06ae67..0000000
--- a/gateway/internal/webui/dist/landing.html
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-
-
-
- Scrabble
-
-
-
-
- Landing build placeholder. The production gateway image embeds the real Vite
- build (see gateway/Dockerfile); seeing this page means the binary was built
- without a UI build.
-
-
-
diff --git a/gateway/internal/webui/webui.go b/gateway/internal/webui/webui.go
index 5bde63e..2336265 100644
--- a/gateway/internal/webui/webui.go
+++ b/gateway/internal/webui/webui.go
@@ -1,12 +1,13 @@
// Package webui serves the embedded static UI build over the public edge.
//
-// The committed dist/ holds only placeholder index.html / landing.html so the gateway
-// module compiles with a plain `go build` (and in CI) without a UI build. The production
-// gateway image replaces dist/ with the real Vite build before compiling (see
-// gateway/Dockerfile), so the binary ships the UI inside it. Because Vite is built with a
-// relative asset base, one build serves under any path: the game SPA is mounted at /app/
-// (web) and /telegram/ (the Telegram Mini App), with a separate landing page at / — the
-// single-origin model in docs/ARCHITECTURE.md §13.
+// The committed dist/ holds only a placeholder index.html so the gateway module
+// compiles with a plain `go build` (and in CI) without a UI build. The production
+// gateway image replaces dist/ with the real Vite build — minus landing.html, which
+// ships in the separate landing container since R3 — before compiling (see
+// gateway/Dockerfile), so the binary ships the UI inside it. Because Vite is built
+// with a relative asset base, one build serves under any path: the game SPA is
+// mounted at /app/ (web) and /telegram/ (the Telegram Mini App) — the single-origin
+// model in docs/ARCHITECTURE.md §13.
//
// Caching (Stage 17): Vite emits hash-named files under assets/, so those are immutable and
// cached hard (a reload/relaunch is a cache hit, not a re-download); the HTML shells carry
@@ -35,10 +36,10 @@ func distFS() fs.FS {
}
// Handler serves the embedded UI. An existing file is served directly (hash-named assets get
-// an immutable cache); every other path falls back to indexName (the SPA shell or the landing
-// page) so a client-side deep link still loads. When stripPrefix is non-empty it is removed
-// from the request path before lookup, so the same build serves under a sub-path (e.g.
-// "/app/" or "/telegram/").
+// an immutable cache); every other path falls back to indexName (the SPA shell) so a
+// client-side deep link still loads. When stripPrefix is non-empty it is removed from the
+// request path before lookup, so the same build serves under a sub-path (e.g. "/app/" or
+// "/telegram/").
func Handler(stripPrefix, indexName string) http.Handler {
content := distFS()
files := http.FileServer(http.FS(content))
diff --git a/gateway/internal/webui/webui_test.go b/gateway/internal/webui/webui_test.go
index c7a4d4a..c40d30f 100644
--- a/gateway/internal/webui/webui_test.go
+++ b/gateway/internal/webui/webui_test.go
@@ -22,20 +22,12 @@ func body(t *testing.T, resp *http.Response) string {
return string(b)
}
-// TestLandingMountServesLandingAndFallsBack: "/" serves the landing shell (no-cache) and
-// any unknown path falls back to it.
-func TestLandingMountServesLandingAndFallsBack(t *testing.T) {
- h := Handler("", "landing.html")
-
- resp := get(t, h, "/")
- if resp.StatusCode != http.StatusOK || !strings.Contains(body(t, resp), "scrabble-landing") {
- t.Fatalf("GET / did not serve the landing shell (status %d)", resp.StatusCode)
- }
- if cc := get(t, h, "/").Header.Get("Cache-Control"); cc != "no-cache" {
- t.Errorf("landing Cache-Control = %q, want no-cache", cc)
- }
- if resp := get(t, h, "/whatever"); resp.StatusCode != http.StatusOK {
- t.Fatalf("GET /whatever status = %d, want 200 (fallback)", resp.StatusCode)
+// TestShellNoCache: the served HTML shell carries no-cache so a new deploy's
+// shell (and the asset URLs it references) is fetched fresh.
+func TestShellNoCache(t *testing.T) {
+ h := Handler("/app/", "index.html")
+ if cc := get(t, h, "/app/").Header.Get("Cache-Control"); cc != "no-cache" {
+ t.Errorf("shell Cache-Control = %q, want no-cache", cc)
}
}