Merge pull request 'R3: edge hardening — body cap, rate-limit observability, auto-flag, landing split' (#34) from feature/r3-edge-hardening into development
This commit was merged in pull request #34.
This commit is contained in:
@@ -298,17 +298,21 @@ jobs:
|
||||
# pick up the fresh config.
|
||||
docker compose --ansi never up -d --force-recreate --no-deps caddy otelcol prometheus tempo grafana
|
||||
|
||||
- name: Probe the gateway through caddy
|
||||
- name: Probe the landing and the gateway through caddy
|
||||
run: |
|
||||
set -u
|
||||
# Two probes through the contour caddy (R3 split): "/" is the static
|
||||
# landing container, "/app/" is the gateway-served SPA shell.
|
||||
for i in $(seq 1 20); do
|
||||
if docker run --rm --network edge alpine:3.20 wget -q -T 5 -O /dev/null http://scrabble/; then
|
||||
echo "healthy: GET http://scrabble/"
|
||||
if docker run --rm --network edge alpine:3.20 wget -q -T 5 -O /dev/null http://scrabble/ &&
|
||||
docker run --rm --network edge alpine:3.20 wget -q -T 5 -O /dev/null http://scrabble/app/; then
|
||||
echo "healthy: GET http://scrabble/ (landing) + /app/ (gateway)"
|
||||
exit 0
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
echo "probe failed; recent gateway logs:"
|
||||
echo "probe failed; recent landing + gateway logs:"
|
||||
docker logs --tail 50 scrabble-landing || true
|
||||
docker logs --tail 50 scrabble-gateway || true
|
||||
exit 1
|
||||
|
||||
|
||||
@@ -16,3 +16,6 @@
|
||||
# Local, unstaged env overrides
|
||||
**/.env.local
|
||||
**/.env.*.local
|
||||
|
||||
# Claude Code harness runtime artifacts
|
||||
.claude/scheduled_tasks.lock
|
||||
|
||||
@@ -127,8 +127,8 @@ docs/ .gitea/workflows/ PLAN.md CLAUDE.md README.md
|
||||
gateway/ ui/ pkg/ # added by their stages
|
||||
platform/telegram/ # Telegram connector side-service (Stage 9): bot + gRPC API
|
||||
loadtest/ # module scrabble/loadtest: the pre-release stress harness (R2)
|
||||
backend/Dockerfile gateway/Dockerfile platform/telegram/Dockerfile loadtest/Dockerfile # multi-stage distroless (Stage 16; loadtest R2)
|
||||
deploy/ # docker-compose + caddy + otelcol/prometheus/tempo/grafana (+ cAdvisor/postgres_exporter, R2)
|
||||
backend/Dockerfile gateway/Dockerfile platform/telegram/Dockerfile loadtest/Dockerfile # multi-stage distroless (Stage 16; loadtest R2); gateway/Dockerfile also has the `landing` target (R3)
|
||||
deploy/ # docker-compose + caddy + landing + otelcol/prometheus/tempo/grafana (+ cAdvisor/postgres_exporter, R2)
|
||||
```
|
||||
|
||||
## Build & test
|
||||
@@ -144,8 +144,9 @@ go run ./backend/cmd/backend # /healthz, /readyz on :8080
|
||||
cd ui && pnpm install && pnpm check && pnpm test:unit && pnpm build # the UI (Stage 7+)
|
||||
pnpm start # UI mock mode: lobby -> game, no backend
|
||||
|
||||
docker build -f backend/Dockerfile -t scrabble-backend . # images (Stage 16); gateway embeds the UI
|
||||
docker build -f gateway/Dockerfile -t scrabble-gateway .
|
||||
docker build -f backend/Dockerfile -t scrabble-backend . # images (Stage 16); gateway embeds the SPA
|
||||
docker build -f gateway/Dockerfile --target gateway -t scrabble-gateway .
|
||||
docker build -f gateway/Dockerfile --target landing -t scrabble-landing . # static landing (R3)
|
||||
docker compose -f deploy/docker-compose.yml config # validate the full contour
|
||||
```
|
||||
|
||||
|
||||
+29
-1
@@ -19,7 +19,7 @@ the edge before prod. Each phase maps back to the owner's raw pre-release TODO l
|
||||
|---|-------|-----------|--------|
|
||||
| R1 | Schema & naming reset | 1 + 10 | **done** |
|
||||
| R2 | Stress harness + contour observability + early run | 9a | **done** |
|
||||
| R3 | Edge hardening | 2 + 8 + 3 | todo |
|
||||
| R3 | Edge hardening | 2 + 8 + 3 | **done** |
|
||||
| R4 | Push enrichment + kill the last poll | 4 + 5 | todo |
|
||||
| R5 | Bundle slimming | 6 | todo |
|
||||
| R6 | Refactor + docs reconciliation + de-staging | 7 | todo |
|
||||
@@ -253,3 +253,31 @@ Then Stage 18.
|
||||
it feeds R3 (h2c `MaxConcurrentStreams`/timeouts, body-size cap), R6 and R7 (per-player transports,
|
||||
separate hardware, pool/limit sizing).
|
||||
- **CI:** `./loadtest/...` added to the path filter + vet/build/test; `go.work.sum` carries the new deps.
|
||||
|
||||
- **R3** (interview + implementation):
|
||||
- **Locked decisions:** the flag column lands by **editing the R1 baseline** (+ a contour schema
|
||||
wipe after merge — no migration chain accrues before prod); auto-flag defaults **1000 rejected /
|
||||
10 min** (`BACKEND_HIGHRATE_FLAG_THRESHOLD`/`_WINDOW`, rolling window, set-once, operator clears,
|
||||
no auto-ban); landing image = **caddy:2-alpine**; throttle data flows **gateway → backend** (a
|
||||
30 s per-key summary POST to the new `/api/v1/internal/ratelimit/report`, the existing trusted
|
||||
direction) with the episode window + flag rule in the backend (`internal/ratewatch`); rejection
|
||||
logging = **Warn summary per key per window + Debug per rejection** — a deliberate deviation from
|
||||
the phase's "structured log per rejection" (the R2 hammer would have logged ~522k lines in
|
||||
minutes); all three R2-report tails included (explicit h2c sizing, the session-resolve failure
|
||||
cause at Warn, reviving the admin limiter).
|
||||
- **Body cap:** `GATEWAY_MAX_BODY_BYTES` (default 1 MiB) as both the Connect per-message read limit
|
||||
and an `http.MaxBytesReader` wrap of the public mux; an oversized Execute is `resource_exhausted`.
|
||||
- **Dead config found:** `AdminPerMinute`/`AdminBurst` were never wired — the gateway `/_gm` mount is
|
||||
now 429-guarded per IP ahead of its Basic-Auth. The caddy-fronted contour path stays unlimited
|
||||
(stock caddy has no limiter) — an accepted gap, recorded in `docs/ARCHITECTURE.md` §12.
|
||||
- **Landing split:** a `landing` target in `gateway/Dockerfile` (the UI build stage is shared;
|
||||
identical compose build args keep it one cached build); the gateway drops `landing.html` from the
|
||||
embed and 308-redirects `/` → `/app/`; the contour caddy routes `/app/`, `/telegram/` and the
|
||||
Connect path to the gateway and the catch-all to the landing container; the CI deploy probe now
|
||||
checks both `/` (landing) and `/app/` (gateway).
|
||||
- **Observability:** `gateway_rate_limited_total{class}` (user/public/email/admin, aggregate-only)
|
||||
+ a rate-vs-rejections panel on the Edge/UX dashboard; the admin console gains the **Throttled**
|
||||
page (the in-memory episode window, reset-on-restart like `active_users`, plus the flagged-account
|
||||
queue) and the flag badge / clear action on the user list / card.
|
||||
- The jet regen also restored the previously missing `game_drafts`/`game_hidden` generated models
|
||||
(their tables were added after the last jetgen run; no behaviour change).
|
||||
|
||||
@@ -99,6 +99,14 @@ durable owner — then the durable account wins and a fresh session is minted fo
|
||||
The `accounts.paid_account`/`merged_into`/`merged_at` columns back this. This supersedes the
|
||||
Stage 8 `email.bind.*` edge surface (the `RequestCode`/`ConfirmCode` primitives stay).
|
||||
|
||||
**R3** adds rate-limit observability: the gateway posts its periodic rejection
|
||||
summaries to `POST /api/v1/internal/ratelimit/report`; `internal/ratewatch` keeps a
|
||||
bounded in-memory episode window for the console's **Throttled** page and applies the
|
||||
conservative auto-flag — an account sustaining `BACKEND_HIGHRATE_FLAG_THRESHOLD`
|
||||
rejected calls within `BACKEND_HIGHRATE_FLAG_WINDOW` gets the soft, reversible
|
||||
`accounts.flagged_high_rate_at` marker (set-once; a badge in the user list and a
|
||||
**Clear** action on the user card; never an automatic ban).
|
||||
|
||||
## Package layout
|
||||
|
||||
```
|
||||
@@ -121,6 +129,7 @@ internal/lobby/ # in-memory matchmaking pool (+ robot substitution) + frien
|
||||
internal/robot/ # human-like robot opponent: account pool, seed-derived strategy, move driver
|
||||
internal/adminconsole/ # server-rendered admin console (Go templates + embedded CSS, view models), served at /_gm
|
||||
internal/connector/ # backend gRPC client to the Telegram connector (operator broadcasts)
|
||||
internal/ratewatch/ # gateway rate-limit reports: episode window for the console + the high-rate auto-flag (R3)
|
||||
```
|
||||
|
||||
## Configuration (environment)
|
||||
@@ -153,6 +162,8 @@ internal/connector/ # backend gRPC client to the Telegram connector (operator b
|
||||
| `BACKEND_CONNECTOR_ADDR` | — | Telegram connector gRPC address for admin-console operator broadcasts. Empty disables broadcasts. |
|
||||
| `BACKEND_GUEST_REAP_INTERVAL` | `1h` | How often the abandoned-guest reaper sweeps. |
|
||||
| `BACKEND_GUEST_RETENTION` | `720h` | Account age past which a guest with no game seat is deleted. |
|
||||
| `BACKEND_HIGHRATE_FLAG_THRESHOLD` | `1000` | Gateway-reported rejected calls within the window past which an account is soft-flagged (R3). |
|
||||
| `BACKEND_HIGHRATE_FLAG_WINDOW` | `10m` | The rolling window those rejections accumulate over. |
|
||||
|
||||
## Run
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ import (
|
||||
"scrabble/backend/internal/notify"
|
||||
"scrabble/backend/internal/postgres"
|
||||
"scrabble/backend/internal/pushgrpc"
|
||||
"scrabble/backend/internal/ratewatch"
|
||||
"scrabble/backend/internal/robot"
|
||||
"scrabble/backend/internal/server"
|
||||
"scrabble/backend/internal/session"
|
||||
@@ -177,6 +178,13 @@ func run(ctx context.Context, cfg config.Config, logger *zap.Logger) error {
|
||||
invitations.SetNotifier(hub)
|
||||
logger.Info("lobby and social domains ready", zap.Duration("robot_wait", cfg.Lobby.RobotWait))
|
||||
|
||||
// R3 rate-limit observability: ingest the gateway's rejection reports for the
|
||||
// admin throttled view and the conservative high-rate auto-flag.
|
||||
rateWatch := ratewatch.New(cfg.RateWatch, accounts, logger)
|
||||
logger.Info("rate watch ready",
|
||||
zap.Int("flag_threshold", cfg.RateWatch.FlagThreshold),
|
||||
zap.Duration("flag_window", cfg.RateWatch.FlagWindow))
|
||||
|
||||
srv := server.New(cfg.HTTPAddr, server.Deps{
|
||||
Logger: logger,
|
||||
DB: db,
|
||||
@@ -193,6 +201,7 @@ func run(ctx context.Context, cfg config.Config, logger *zap.Logger) error {
|
||||
Registry: registry,
|
||||
DictDir: cfg.Game.DictDir,
|
||||
Connector: conn,
|
||||
RateWatch: rateWatch,
|
||||
})
|
||||
pushSrv := pushgrpc.NewServer(cfg.GRPCAddr, hub, logger)
|
||||
|
||||
|
||||
@@ -76,8 +76,13 @@ type Account struct {
|
||||
// uuid.Nil for a live account. A tombstone keeps the row so the no-cascade
|
||||
// foreign keys of a shared finished game stay valid (Stage 11).
|
||||
MergedInto uuid.UUID
|
||||
CreatedAt time.Time
|
||||
UpdatedAt time.Time
|
||||
// FlaggedHighRateAt is the soft, reversible "suspected high-rate" marker: the
|
||||
// zero time for an unflagged account, otherwise when the gateway-reported
|
||||
// rate-limiter rejections first crossed the sustained threshold (R3). An
|
||||
// operator clears it in the admin console; it never gates any request.
|
||||
FlaggedHighRateAt time.Time
|
||||
CreatedAt time.Time
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
|
||||
// Identity is one of an account's platform/email identities, surfaced on the
|
||||
@@ -422,6 +427,43 @@ func (s *Store) SpendHint(ctx context.Context, id uuid.UUID) (bool, error) {
|
||||
return n > 0, nil
|
||||
}
|
||||
|
||||
// FlagHighRate stamps the soft "suspected high-rate" marker with at, only when
|
||||
// the account is not already flagged — the first sustained episode wins, and a
|
||||
// re-flag after an operator clear starts a fresh timestamp. An infra marker, not
|
||||
// a profile edit, so updated_at is untouched; it never gates any request (R3).
|
||||
// It reports whether the flag was newly set.
|
||||
func (s *Store) FlagHighRate(ctx context.Context, id uuid.UUID, at time.Time) (bool, error) {
|
||||
stmt := table.Accounts.
|
||||
UPDATE(table.Accounts.FlaggedHighRateAt).
|
||||
SET(postgres.TimestampzT(at.UTC())).
|
||||
WHERE(
|
||||
table.Accounts.AccountID.EQ(postgres.UUID(id)).
|
||||
AND(table.Accounts.FlaggedHighRateAt.IS_NULL()),
|
||||
)
|
||||
res, err := stmt.ExecContext(ctx, s.db)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("account: flag high rate %s: %w", id, err)
|
||||
}
|
||||
n, err := res.RowsAffected()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("account: flag high rate rows %s: %w", id, err)
|
||||
}
|
||||
return n > 0, nil
|
||||
}
|
||||
|
||||
// ClearHighRateFlag removes the high-rate marker — the operator's reversible
|
||||
// action in the admin console. Clearing an unflagged account is a no-op.
|
||||
func (s *Store) ClearHighRateFlag(ctx context.Context, id uuid.UUID) error {
|
||||
stmt := table.Accounts.
|
||||
UPDATE(table.Accounts.FlaggedHighRateAt).
|
||||
SET(postgres.NULL).
|
||||
WHERE(table.Accounts.AccountID.EQ(postgres.UUID(id)))
|
||||
if _, err := stmt.ExecContext(ctx, s.db); err != nil {
|
||||
return fmt.Errorf("account: clear high-rate flag %s: %w", id, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetServiceLanguage records the service language (en/ru) of the bot a Telegram
|
||||
// user authenticated through. It is called on every Telegram login — new and
|
||||
// existing accounts — so it tracks the bot the user last came through (last-login-
|
||||
@@ -452,6 +494,10 @@ func modelToAccount(row model.Accounts) Account {
|
||||
if row.ServiceLanguage != nil {
|
||||
serviceLanguage = *row.ServiceLanguage
|
||||
}
|
||||
var flaggedHighRateAt time.Time
|
||||
if row.FlaggedHighRateAt != nil {
|
||||
flaggedHighRateAt = *row.FlaggedHighRateAt
|
||||
}
|
||||
return Account{
|
||||
ID: row.AccountID,
|
||||
DisplayName: row.DisplayName,
|
||||
@@ -467,6 +513,7 @@ func modelToAccount(row model.Accounts) Account {
|
||||
NotificationsInAppOnly: row.NotificationsInAppOnly,
|
||||
PaidAccount: row.PaidAccount,
|
||||
MergedInto: mergedInto,
|
||||
FlaggedHighRateAt: flaggedHighRateAt,
|
||||
CreatedAt: row.CreatedAt,
|
||||
UpdatedAt: row.UpdatedAt,
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package account
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -18,6 +19,9 @@ type UserListItem struct {
|
||||
PreferredLanguage string
|
||||
IsGuest bool
|
||||
IsRobot bool
|
||||
// FlaggedHighRateAt is the soft high-rate marker (zero when unflagged), shown
|
||||
// as a badge in the console list (R3).
|
||||
FlaggedHighRateAt time.Time
|
||||
CreatedAt time.Time
|
||||
}
|
||||
|
||||
@@ -65,7 +69,7 @@ func userListWhere(f UserFilter) (string, []any) {
|
||||
// ListUsers returns the filtered admin user list, newest first, paginated.
|
||||
func (s *Store) ListUsers(ctx context.Context, f UserFilter, limit, offset int) ([]UserListItem, error) {
|
||||
where, args := userListWhere(f)
|
||||
q := `SELECT a.account_id, a.display_name, a.preferred_language, a.is_guest, a.created_at, ` + robotExists + ` AS is_robot
|
||||
q := `SELECT a.account_id, a.display_name, a.preferred_language, a.is_guest, a.flagged_high_rate_at, a.created_at, ` + robotExists + ` AS is_robot
|
||||
FROM backend.accounts a WHERE ` + where +
|
||||
fmt.Sprintf(` ORDER BY a.created_at DESC LIMIT $%d OFFSET $%d`, len(args)+1, len(args)+2)
|
||||
args = append(args, limit, offset)
|
||||
@@ -77,14 +81,51 @@ FROM backend.accounts a WHERE ` + where +
|
||||
var out []UserListItem
|
||||
for rows.Next() {
|
||||
var it UserListItem
|
||||
if err := rows.Scan(&it.ID, &it.DisplayName, &it.PreferredLanguage, &it.IsGuest, &it.CreatedAt, &it.IsRobot); err != nil {
|
||||
var flagged sql.NullTime
|
||||
if err := rows.Scan(&it.ID, &it.DisplayName, &it.PreferredLanguage, &it.IsGuest, &flagged, &it.CreatedAt, &it.IsRobot); err != nil {
|
||||
return nil, fmt.Errorf("account: scan user: %w", err)
|
||||
}
|
||||
if flagged.Valid {
|
||||
it.FlaggedHighRateAt = flagged.Time
|
||||
}
|
||||
out = append(out, it)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// FlaggedAccount is one row of the console's high-rate review queue.
|
||||
type FlaggedAccount struct {
|
||||
ID uuid.UUID
|
||||
DisplayName string
|
||||
FlaggedHighRateAt time.Time
|
||||
}
|
||||
|
||||
// flaggedListCap bounds the console's flagged-account list; the operator clears
|
||||
// flags as they are reviewed, so the queue stays short in practice.
|
||||
const flaggedListCap = 200
|
||||
|
||||
// ListFlaggedHighRate returns the accounts carrying the high-rate flag, most
|
||||
// recently flagged first (R3).
|
||||
func (s *Store) ListFlaggedHighRate(ctx context.Context) ([]FlaggedAccount, error) {
|
||||
rows, err := s.db.QueryContext(ctx,
|
||||
`SELECT account_id, display_name, flagged_high_rate_at
|
||||
FROM backend.accounts WHERE flagged_high_rate_at IS NOT NULL
|
||||
ORDER BY flagged_high_rate_at DESC LIMIT $1`, flaggedListCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("account: list flagged: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
var out []FlaggedAccount
|
||||
for rows.Next() {
|
||||
var fa FlaggedAccount
|
||||
if err := rows.Scan(&fa.ID, &fa.DisplayName, &fa.FlaggedHighRateAt); err != nil {
|
||||
return nil, fmt.Errorf("account: scan flagged: %w", err)
|
||||
}
|
||||
out = append(out, fa)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// CountUsers counts the filtered admin user list, for pagination.
|
||||
func (s *Store) CountUsers(ctx context.Context, f UserFilter) (int, error) {
|
||||
where, args := userListWhere(f)
|
||||
|
||||
@@ -21,8 +21,14 @@ func TestRendererRendersEveryPage(t *testing.T) {
|
||||
want string
|
||||
}{
|
||||
{"dashboard", DashboardView{Accounts: 3, Variants: []VariantVersions{{Variant: "scrabble_en", Latest: "v1", Versions: []string{"v1"}}}}, "Dashboard"},
|
||||
{"users", UsersView{Items: []UserRow{{ID: "a1", DisplayName: "Kaya"}}, Pager: NewPager(1, 50, 1)}, "Kaya"},
|
||||
{"users", UsersView{Items: []UserRow{{ID: "a1", DisplayName: "Kaya", FlaggedHighRate: true}}, Pager: NewPager(1, 50, 1)}, "high-rate"},
|
||||
{"user_detail", UserDetailView{ID: "a1", DisplayName: "Kaya", HasStats: true, Stats: StatsRow{Wins: 2}, TelegramID: "123", ConnectorEnabled: true}, "Send Telegram message"},
|
||||
{"user_detail", UserDetailView{ID: "a1", DisplayName: "Kaya", FlaggedHighRateAt: "2026-06-10 12:00"}, "Clear high-rate flag"},
|
||||
{"throttled", ThrottledView{
|
||||
Episodes: []ThrottleEpisodeRow{{Class: "user", Key: "a1", UserID: "a1", Rejected: 1234, FirstSeen: "2026-06-10 12:00", LastSeen: "2026-06-10 12:05"}},
|
||||
Flagged: []FlaggedAccountRow{{ID: "a1", DisplayName: "Kaya", FlaggedAt: "2026-06-10 12:05"}},
|
||||
FlagThreshold: 1000, FlagWindow: "10m0s",
|
||||
}, "Recent episodes"},
|
||||
{"games", GamesView{Items: []GameRow{{ID: "g1", Variant: "scrabble_en", Status: "active"}}, Status: "active", Pager: NewPager(1, 50, 1)}, "g1"},
|
||||
{"game_detail", GameDetailView{ID: "g1", Variant: "scrabble_en", Seats: []SeatRow{{Seat: 0, DisplayName: "Kaya"}}}, "Seats"},
|
||||
{"complaints", ComplaintsView{Items: []ComplaintRow{{ID: "c1", Word: "qi", Status: "open"}}, Status: "open", Pager: NewPager(1, 50, 1)}, "qi"},
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
<a href="/_gm/games"{{if eq .ActiveNav "games"}} class="active"{{end}}>Games</a>
|
||||
<a href="/_gm/complaints"{{if eq .ActiveNav "complaints"}} class="active"{{end}}>Complaints</a>
|
||||
<a href="/_gm/messages"{{if eq .ActiveNav "messages"}} class="active"{{end}}>Messages</a>
|
||||
<a href="/_gm/throttled"{{if eq .ActiveNav "throttled"}} class="active"{{end}}>Throttled</a>
|
||||
<a href="/_gm/dictionary"{{if eq .ActiveNav "dictionary"}} class="active"{{end}}>Dictionary</a>
|
||||
<a href="/_gm/broadcast"{{if eq .ActiveNav "broadcast"}} class="active"{{end}}>Broadcast</a>
|
||||
<a href="/_gm/grafana/">Grafana ↗</a>
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
{{define "content" -}}
|
||||
<h1>Throttled</h1>
|
||||
{{with .Data}}
|
||||
<p class="note">Rate-limiter rejections reported periodically by the gateway. The episode
|
||||
list is in-memory and resets on a backend restart. An account sustaining
|
||||
{{.FlagThreshold}}+ rejected calls within {{.FlagWindow}} is soft-flagged for review
|
||||
below — never banned automatically; clear the flag on the user card.</p>
|
||||
<section class="panel"><h2>Recent episodes</h2>
|
||||
<table class="list">
|
||||
<thead><tr><th>Class</th><th>Key</th><th class="num">Rejected</th><th>First seen</th><th>Last seen</th></tr></thead>
|
||||
<tbody>
|
||||
{{range .Episodes}}
|
||||
<tr>
|
||||
<td>{{.Class}}</td>
|
||||
<td>{{if .UserID}}<a href="/_gm/users/{{.UserID}}">{{.Key}}</a>{{else}}<code>{{.Key}}</code>{{end}}</td>
|
||||
<td class="num">{{.Rejected}}</td>
|
||||
<td>{{.FirstSeen}}</td>
|
||||
<td>{{.LastSeen}}</td>
|
||||
</tr>
|
||||
{{else}}
|
||||
<tr><td colspan="5"><span class="note">nothing throttled recently</span></td></tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
<section class="panel"><h2>Flagged accounts</h2>
|
||||
<table class="list">
|
||||
<thead><tr><th>Account</th><th>Display name</th><th>Flagged</th></tr></thead>
|
||||
<tbody>
|
||||
{{range .Flagged}}
|
||||
<tr><td><a href="/_gm/users/{{.ID}}">{{.ID}}</a></td><td>{{.DisplayName}}</td><td>{{.FlaggedAt}}</td></tr>
|
||||
{{else}}
|
||||
<tr><td colspan="3"><span class="note">no flagged accounts</span></td></tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
{{end}}
|
||||
{{- end}}
|
||||
@@ -13,8 +13,14 @@
|
||||
<li><b>Paid</b> {{if .PaidAccount}}yes{{else}}no{{end}}</li>
|
||||
<li><b>Hint wallet</b> {{.HintBalance}}</li>
|
||||
{{if .MergedInto}}<li><b>Merged into</b> {{.MergedInto}}</li>{{end}}
|
||||
{{if .FlaggedHighRateAt}}<li><b>High-rate flag</b> <span class="warn">{{.FlaggedHighRateAt}}</span></li>{{end}}
|
||||
<li><b>Created</b> {{.CreatedAt}}</li>
|
||||
</ul>
|
||||
{{if .FlaggedHighRateAt}}
|
||||
<form class="form" method="post" action="/_gm/users/{{.ID}}/clear-high-rate-flag">
|
||||
<button type="submit">Clear high-rate flag</button>
|
||||
</form>
|
||||
{{end}}
|
||||
</section>
|
||||
<section class="panel"><h2>Statistics</h2>
|
||||
{{if .HasStats}}
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
{{range .Items}}
|
||||
<tr>
|
||||
<td><a href="/_gm/users/{{.ID}}">{{.ID}}</a></td>
|
||||
<td>{{.DisplayName}}{{if .Guest}} <span class="pill">guest</span>{{end}}</td>
|
||||
<td>{{.DisplayName}}{{if .Guest}} <span class="pill">guest</span>{{end}}{{if .FlaggedHighRate}} <span class="pill">high-rate</span>{{end}}</td>
|
||||
<td>{{.Kind}}</td>
|
||||
<td>{{.Language}}</td>
|
||||
<td>{{.CreatedAt}}</td>
|
||||
|
||||
@@ -59,18 +59,20 @@ type UsersView struct {
|
||||
}
|
||||
|
||||
// UserRow is one account row in the list. MoveMin/Avg/Max are the account's
|
||||
// pre-formatted move-duration summary (empty when it has no timed move).
|
||||
// pre-formatted move-duration summary (empty when it has no timed move);
|
||||
// FlaggedHighRate marks the soft high-rate badge (R3).
|
||||
type UserRow struct {
|
||||
ID string
|
||||
DisplayName string
|
||||
Kind string
|
||||
Language string
|
||||
Guest bool
|
||||
CreatedAt string
|
||||
HasMoveStats bool
|
||||
MoveMin string
|
||||
MoveAvg string
|
||||
MoveMax string
|
||||
ID string
|
||||
DisplayName string
|
||||
Kind string
|
||||
Language string
|
||||
Guest bool
|
||||
FlaggedHighRate bool
|
||||
CreatedAt string
|
||||
HasMoveStats bool
|
||||
MoveMin string
|
||||
MoveAvg string
|
||||
MoveMax string
|
||||
}
|
||||
|
||||
// MessagesView is the paginated chat-message moderation list. NameMask/ExtMask are the
|
||||
@@ -110,15 +112,18 @@ type UserDetailView struct {
|
||||
PaidAccount bool
|
||||
// MergedInto is the primary account id when this account has been retired by a
|
||||
// merge (Stage 11), or empty for a live account.
|
||||
MergedInto string
|
||||
HintBalance int
|
||||
CreatedAt string
|
||||
HasStats bool
|
||||
Stats StatsRow
|
||||
Identities []IdentityRow
|
||||
Games []GameRow
|
||||
TelegramID string
|
||||
ConnectorEnabled bool
|
||||
MergedInto string
|
||||
// FlaggedHighRateAt is the pre-formatted soft high-rate marker timestamp,
|
||||
// empty for an unflagged account; the card shows it with the Clear action (R3).
|
||||
FlaggedHighRateAt string
|
||||
HintBalance int
|
||||
CreatedAt string
|
||||
HasStats bool
|
||||
Stats StatsRow
|
||||
Identities []IdentityRow
|
||||
Games []GameRow
|
||||
TelegramID string
|
||||
ConnectorEnabled bool
|
||||
// MoveChart is the pre-rendered inline SVG of the account's per-move-number think
|
||||
// time (min/mean/max), empty when the account has no timed move.
|
||||
MoveChart template.HTML
|
||||
@@ -247,6 +252,35 @@ type BroadcastView struct {
|
||||
ConnectorEnabled bool
|
||||
}
|
||||
|
||||
// ThrottledView is the rate-limit observability page: the recent gateway-reported
|
||||
// throttle episodes (in-memory, reset on restart) and the accounts currently
|
||||
// carrying the high-rate flag. FlagThreshold and FlagWindow caption the active
|
||||
// auto-flag tuning.
|
||||
type ThrottledView struct {
|
||||
Episodes []ThrottleEpisodeRow
|
||||
Flagged []FlaggedAccountRow
|
||||
FlagThreshold int
|
||||
FlagWindow string
|
||||
}
|
||||
|
||||
// ThrottleEpisodeRow is one recently throttled limiter key. UserID links to the
|
||||
// user card and is set only for the user class (the other classes key by IP).
|
||||
type ThrottleEpisodeRow struct {
|
||||
Class string
|
||||
Key string
|
||||
UserID string
|
||||
Rejected int
|
||||
FirstSeen string
|
||||
LastSeen string
|
||||
}
|
||||
|
||||
// FlaggedAccountRow is one account carrying the high-rate flag.
|
||||
type FlaggedAccountRow struct {
|
||||
ID string
|
||||
DisplayName string
|
||||
FlaggedAt string
|
||||
}
|
||||
|
||||
// MessageView is the result page shown after a POST action.
|
||||
type MessageView struct {
|
||||
Heading string
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"scrabble/backend/internal/game"
|
||||
"scrabble/backend/internal/lobby"
|
||||
"scrabble/backend/internal/postgres"
|
||||
"scrabble/backend/internal/ratewatch"
|
||||
"scrabble/backend/internal/robot"
|
||||
"scrabble/backend/internal/telemetry"
|
||||
)
|
||||
@@ -35,6 +36,9 @@ type Config struct {
|
||||
Lobby lobby.Config
|
||||
// Robot configures the robot opponent driver (scan cadence).
|
||||
Robot robot.Config
|
||||
// RateWatch tunes the conservative high-rate auto-flag applied to the
|
||||
// gateway's rate-limiter rejection reports (R3).
|
||||
RateWatch ratewatch.Config
|
||||
// SMTP configures the email relay used for confirm-codes. An empty Host
|
||||
// selects the development log mailer (the code is logged, not sent).
|
||||
SMTP account.SMTPConfig
|
||||
@@ -105,6 +109,14 @@ func Load() (Config, error) {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
rw := ratewatch.DefaultConfig()
|
||||
if rw.FlagThreshold, err = envInt("BACKEND_HIGHRATE_FLAG_THRESHOLD", rw.FlagThreshold); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
if rw.FlagWindow, err = envDuration("BACKEND_HIGHRATE_FLAG_WINDOW", rw.FlagWindow); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
guestReapInterval, err := envDuration("BACKEND_GUEST_REAP_INTERVAL", defaultGuestReapInterval)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
@@ -131,6 +143,7 @@ func Load() (Config, error) {
|
||||
Game: gm,
|
||||
Lobby: lb,
|
||||
Robot: rb,
|
||||
RateWatch: rw,
|
||||
SMTP: smtp,
|
||||
ConnectorAddr: os.Getenv("BACKEND_CONNECTOR_ADDR"),
|
||||
GuestReapInterval: guestReapInterval,
|
||||
@@ -170,6 +183,9 @@ func (c Config) validate() error {
|
||||
if err := c.Robot.Validate(); err != nil {
|
||||
return fmt.Errorf("config: %w", err)
|
||||
}
|
||||
if err := c.RateWatch.Validate(); err != nil {
|
||||
return fmt.Errorf("config: %w", err)
|
||||
}
|
||||
if c.GuestReapInterval <= 0 {
|
||||
return fmt.Errorf("config: BACKEND_GUEST_REAP_INTERVAL must be positive")
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
@@ -195,6 +196,62 @@ func TestServiceLanguageRoundTrip(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestHighRateFlagRoundTrip covers the R3 soft high-rate marker: a fresh account
|
||||
// is unflagged, FlagHighRate stamps it exactly once (a second sustained episode
|
||||
// never moves the timestamp), ClearHighRateFlag reverses it, and a re-flag after
|
||||
// the operator clear takes a fresh timestamp.
|
||||
func TestHighRateFlagRoundTrip(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
store := account.NewStore(testDB)
|
||||
acc, err := store.ProvisionTelegram(ctx, "tg-"+uuid.NewString(), "en", "", "Player")
|
||||
if err != nil {
|
||||
t.Fatalf("provision telegram: %v", err)
|
||||
}
|
||||
if !acc.FlaggedHighRateAt.IsZero() {
|
||||
t.Fatalf("fresh FlaggedHighRateAt = %v, want zero", acc.FlaggedHighRateAt)
|
||||
}
|
||||
|
||||
first := time.Date(2026, 6, 1, 12, 0, 0, 0, time.UTC)
|
||||
set, err := store.FlagHighRate(ctx, acc.ID, first)
|
||||
if err != nil {
|
||||
t.Fatalf("flag: %v", err)
|
||||
}
|
||||
if !set {
|
||||
t.Fatal("first FlagHighRate reported not set")
|
||||
}
|
||||
if set, err = store.FlagHighRate(ctx, acc.ID, first.Add(time.Hour)); err != nil {
|
||||
t.Fatalf("re-flag: %v", err)
|
||||
} else if set {
|
||||
t.Fatal("second FlagHighRate must not overwrite the marker")
|
||||
}
|
||||
got, err := store.GetByID(ctx, acc.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get by id: %v", err)
|
||||
}
|
||||
if !got.FlaggedHighRateAt.Equal(first) {
|
||||
t.Errorf("FlaggedHighRateAt = %v, want %v", got.FlaggedHighRateAt, first)
|
||||
}
|
||||
|
||||
if err := store.ClearHighRateFlag(ctx, acc.ID); err != nil {
|
||||
t.Fatalf("clear: %v", err)
|
||||
}
|
||||
if got, err = store.GetByID(ctx, acc.ID); err != nil {
|
||||
t.Fatalf("get by id: %v", err)
|
||||
} else if !got.FlaggedHighRateAt.IsZero() {
|
||||
t.Errorf("cleared FlaggedHighRateAt = %v, want zero", got.FlaggedHighRateAt)
|
||||
}
|
||||
|
||||
second := first.Add(24 * time.Hour)
|
||||
if set, err = store.FlagHighRate(ctx, acc.ID, second); err != nil || !set {
|
||||
t.Fatalf("re-flag after clear = (%v, %v), want (true, nil)", set, err)
|
||||
}
|
||||
if got, err = store.GetByID(ctx, acc.ID); err != nil {
|
||||
t.Fatalf("get by id: %v", err)
|
||||
} else if !got.FlaggedHighRateAt.Equal(second) {
|
||||
t.Errorf("re-flagged FlaggedHighRateAt = %v, want %v", got.FlaggedHighRateAt, second)
|
||||
}
|
||||
}
|
||||
|
||||
// TestIdentityExternalID covers the reverse identity lookup the push-target route
|
||||
// uses: it returns the external_id for the matching kind and ErrNotFound otherwise,
|
||||
// including for a guest that carries no identity.
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"scrabble/backend/internal/account"
|
||||
"scrabble/backend/internal/engine"
|
||||
"scrabble/backend/internal/game"
|
||||
"scrabble/backend/internal/ratewatch"
|
||||
"scrabble/backend/internal/server"
|
||||
)
|
||||
|
||||
@@ -206,6 +207,72 @@ func TestConsoleGameDetailRobotSchedule(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestConsoleThrottledViewAndFlagClear drives the R3 rate-limit surface end to
|
||||
// end against real stores: a gateway report past the threshold auto-flags the
|
||||
// account, the throttled view shows the episode and the flagged account, the
|
||||
// user card carries the marker, and the operator clear (a same-origin POST)
|
||||
// reverses it.
|
||||
func TestConsoleThrottledViewAndFlagClear(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
accounts := account.NewStore(testDB)
|
||||
acc, err := accounts.ProvisionTelegram(ctx, "tg-"+uuid.NewString(), "en", "", "Throttled Player")
|
||||
if err != nil {
|
||||
t.Fatalf("provision: %v", err)
|
||||
}
|
||||
|
||||
watch := ratewatch.New(ratewatch.Config{FlagThreshold: 100, FlagWindow: 10 * time.Minute}, accounts, zap.NewNop())
|
||||
srv := server.New(":0", server.Deps{
|
||||
Logger: zap.NewNop(),
|
||||
Accounts: accounts,
|
||||
Games: newGameService(),
|
||||
Registry: testRegistry,
|
||||
DictDir: dictDir(),
|
||||
RateWatch: watch,
|
||||
})
|
||||
h := srv.Handler()
|
||||
|
||||
report := `{"window_seconds":30,"entries":[` +
|
||||
`{"class":"user","key":"` + acc.ID.String() + `","rejected":150},` +
|
||||
`{"class":"public","key":"10.1.2.3","rejected":7}]}`
|
||||
req := httptest.NewRequest(http.MethodPost, "http://admin.test/api/v1/internal/ratelimit/report", strings.NewReader(report))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
h.ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusNoContent {
|
||||
t.Fatalf("report = %d, want 204", rec.Code)
|
||||
}
|
||||
|
||||
got, err := accounts.GetByID(ctx, acc.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("get by id: %v", err)
|
||||
}
|
||||
if got.FlaggedHighRateAt.IsZero() {
|
||||
t.Fatal("account not auto-flagged past the threshold")
|
||||
}
|
||||
|
||||
base := "http://admin.test/_gm"
|
||||
code, body := consoleDo(h, http.MethodGet, base+"/throttled", "", "")
|
||||
if code != http.StatusOK || !strings.Contains(body, acc.ID.String()) ||
|
||||
!strings.Contains(body, "10.1.2.3") || !strings.Contains(body, "Throttled Player") {
|
||||
t.Fatalf("throttled view = %d, episode/flag shown = %v/%v",
|
||||
code, strings.Contains(body, "10.1.2.3"), strings.Contains(body, "Throttled Player"))
|
||||
}
|
||||
if code, body = consoleDo(h, http.MethodGet, base+"/users/"+acc.ID.String(), "", ""); code != http.StatusOK || !strings.Contains(body, "Clear high-rate flag") {
|
||||
t.Fatalf("user card = %d, has clear action = %v", code, strings.Contains(body, "Clear high-rate flag"))
|
||||
}
|
||||
|
||||
// The clear POST is CSRF-guarded like every console action.
|
||||
if code, _ = consoleDo(h, http.MethodPost, base+"/users/"+acc.ID.String()+"/clear-high-rate-flag", "", ""); code != http.StatusForbidden {
|
||||
t.Fatalf("clear without origin = %d, want 403", code)
|
||||
}
|
||||
if code, body = consoleDo(h, http.MethodPost, base+"/users/"+acc.ID.String()+"/clear-high-rate-flag", "x=1", "http://admin.test"); code != http.StatusOK || !strings.Contains(body, "Cleared") {
|
||||
t.Fatalf("clear with origin = %d, has Cleared = %v", code, strings.Contains(body, "Cleared"))
|
||||
}
|
||||
if got, err = accounts.GetByID(ctx, acc.ID); err != nil || !got.FlaggedHighRateAt.IsZero() {
|
||||
t.Fatalf("flag survived the clear: %v (err %v)", got.FlaggedHighRateAt, err)
|
||||
}
|
||||
}
|
||||
|
||||
// consoleDo issues a request to h, optionally with an Origin header, and returns
|
||||
// the status and body. Form bodies are sent as application/x-www-form-urlencoded.
|
||||
func consoleDo(h http.Handler, method, target, body, origin string) (int, string) {
|
||||
|
||||
@@ -30,4 +30,5 @@ type Accounts struct {
|
||||
MergedInto *uuid.UUID
|
||||
MergedAt *time.Time
|
||||
ServiceLanguage *string
|
||||
FlaggedHighRateAt *time.Time
|
||||
}
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
//
|
||||
// Code generated by go-jet DO NOT EDIT.
|
||||
//
|
||||
// WARNING: Changes to this file may cause incorrect behavior
|
||||
// and will be lost if the code is regenerated
|
||||
//
|
||||
|
||||
package model
|
||||
|
||||
import (
|
||||
"github.com/google/uuid"
|
||||
"time"
|
||||
)
|
||||
|
||||
type GameDrafts struct {
|
||||
GameID uuid.UUID `sql:"primary_key"`
|
||||
AccountID uuid.UUID `sql:"primary_key"`
|
||||
RackOrder string
|
||||
BoardTiles string
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
//
|
||||
// Code generated by go-jet DO NOT EDIT.
|
||||
//
|
||||
// WARNING: Changes to this file may cause incorrect behavior
|
||||
// and will be lost if the code is regenerated
|
||||
//
|
||||
|
||||
package model
|
||||
|
||||
import (
|
||||
"github.com/google/uuid"
|
||||
"time"
|
||||
)
|
||||
|
||||
type GameHidden struct {
|
||||
AccountID uuid.UUID `sql:"primary_key"`
|
||||
GameID uuid.UUID `sql:"primary_key"`
|
||||
CreatedAt time.Time
|
||||
}
|
||||
@@ -34,6 +34,7 @@ type accountsTable struct {
|
||||
MergedInto postgres.ColumnString
|
||||
MergedAt postgres.ColumnTimestampz
|
||||
ServiceLanguage postgres.ColumnString
|
||||
FlaggedHighRateAt postgres.ColumnTimestampz
|
||||
|
||||
AllColumns postgres.ColumnList
|
||||
MutableColumns postgres.ColumnList
|
||||
@@ -92,8 +93,9 @@ func newAccountsTableImpl(schemaName, tableName, alias string) accountsTable {
|
||||
MergedIntoColumn = postgres.StringColumn("merged_into")
|
||||
MergedAtColumn = postgres.TimestampzColumn("merged_at")
|
||||
ServiceLanguageColumn = postgres.StringColumn("service_language")
|
||||
allColumns = postgres.ColumnList{AccountIDColumn, DisplayNameColumn, PreferredLanguageColumn, TimeZoneColumn, BlockChatColumn, BlockFriendRequestsColumn, CreatedAtColumn, UpdatedAtColumn, AwayStartColumn, AwayEndColumn, HintBalanceColumn, IsGuestColumn, NotificationsInAppOnlyColumn, PaidAccountColumn, MergedIntoColumn, MergedAtColumn, ServiceLanguageColumn}
|
||||
mutableColumns = postgres.ColumnList{DisplayNameColumn, PreferredLanguageColumn, TimeZoneColumn, BlockChatColumn, BlockFriendRequestsColumn, CreatedAtColumn, UpdatedAtColumn, AwayStartColumn, AwayEndColumn, HintBalanceColumn, IsGuestColumn, NotificationsInAppOnlyColumn, PaidAccountColumn, MergedIntoColumn, MergedAtColumn, ServiceLanguageColumn}
|
||||
FlaggedHighRateAtColumn = postgres.TimestampzColumn("flagged_high_rate_at")
|
||||
allColumns = postgres.ColumnList{AccountIDColumn, DisplayNameColumn, PreferredLanguageColumn, TimeZoneColumn, BlockChatColumn, BlockFriendRequestsColumn, CreatedAtColumn, UpdatedAtColumn, AwayStartColumn, AwayEndColumn, HintBalanceColumn, IsGuestColumn, NotificationsInAppOnlyColumn, PaidAccountColumn, MergedIntoColumn, MergedAtColumn, ServiceLanguageColumn, FlaggedHighRateAtColumn}
|
||||
mutableColumns = postgres.ColumnList{DisplayNameColumn, PreferredLanguageColumn, TimeZoneColumn, BlockChatColumn, BlockFriendRequestsColumn, CreatedAtColumn, UpdatedAtColumn, AwayStartColumn, AwayEndColumn, HintBalanceColumn, IsGuestColumn, NotificationsInAppOnlyColumn, PaidAccountColumn, MergedIntoColumn, MergedAtColumn, ServiceLanguageColumn, FlaggedHighRateAtColumn}
|
||||
defaultColumns = postgres.ColumnList{DisplayNameColumn, PreferredLanguageColumn, TimeZoneColumn, BlockChatColumn, BlockFriendRequestsColumn, CreatedAtColumn, UpdatedAtColumn, AwayStartColumn, AwayEndColumn, HintBalanceColumn, IsGuestColumn, NotificationsInAppOnlyColumn, PaidAccountColumn}
|
||||
)
|
||||
|
||||
@@ -118,6 +120,7 @@ func newAccountsTableImpl(schemaName, tableName, alias string) accountsTable {
|
||||
MergedInto: MergedIntoColumn,
|
||||
MergedAt: MergedAtColumn,
|
||||
ServiceLanguage: ServiceLanguageColumn,
|
||||
FlaggedHighRateAt: FlaggedHighRateAtColumn,
|
||||
|
||||
AllColumns: allColumns,
|
||||
MutableColumns: mutableColumns,
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
//
|
||||
// Code generated by go-jet DO NOT EDIT.
|
||||
//
|
||||
// WARNING: Changes to this file may cause incorrect behavior
|
||||
// and will be lost if the code is regenerated
|
||||
//
|
||||
|
||||
package table
|
||||
|
||||
import (
|
||||
"github.com/go-jet/jet/v2/postgres"
|
||||
)
|
||||
|
||||
var GameDrafts = newGameDraftsTable("backend", "game_drafts", "")
|
||||
|
||||
type gameDraftsTable struct {
|
||||
postgres.Table
|
||||
|
||||
// Columns
|
||||
GameID postgres.ColumnString
|
||||
AccountID postgres.ColumnString
|
||||
RackOrder postgres.ColumnString
|
||||
BoardTiles postgres.ColumnString
|
||||
UpdatedAt postgres.ColumnTimestampz
|
||||
|
||||
AllColumns postgres.ColumnList
|
||||
MutableColumns postgres.ColumnList
|
||||
DefaultColumns postgres.ColumnList
|
||||
}
|
||||
|
||||
type GameDraftsTable struct {
|
||||
gameDraftsTable
|
||||
|
||||
EXCLUDED gameDraftsTable
|
||||
}
|
||||
|
||||
// AS creates new GameDraftsTable with assigned alias
|
||||
func (a GameDraftsTable) AS(alias string) *GameDraftsTable {
|
||||
return newGameDraftsTable(a.SchemaName(), a.TableName(), alias)
|
||||
}
|
||||
|
||||
// Schema creates new GameDraftsTable with assigned schema name
|
||||
func (a GameDraftsTable) FromSchema(schemaName string) *GameDraftsTable {
|
||||
return newGameDraftsTable(schemaName, a.TableName(), a.Alias())
|
||||
}
|
||||
|
||||
// WithPrefix creates new GameDraftsTable with assigned table prefix
|
||||
func (a GameDraftsTable) WithPrefix(prefix string) *GameDraftsTable {
|
||||
return newGameDraftsTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
|
||||
}
|
||||
|
||||
// WithSuffix creates new GameDraftsTable with assigned table suffix
|
||||
func (a GameDraftsTable) WithSuffix(suffix string) *GameDraftsTable {
|
||||
return newGameDraftsTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
|
||||
}
|
||||
|
||||
func newGameDraftsTable(schemaName, tableName, alias string) *GameDraftsTable {
|
||||
return &GameDraftsTable{
|
||||
gameDraftsTable: newGameDraftsTableImpl(schemaName, tableName, alias),
|
||||
EXCLUDED: newGameDraftsTableImpl("", "excluded", ""),
|
||||
}
|
||||
}
|
||||
|
||||
func newGameDraftsTableImpl(schemaName, tableName, alias string) gameDraftsTable {
|
||||
var (
|
||||
GameIDColumn = postgres.StringColumn("game_id")
|
||||
AccountIDColumn = postgres.StringColumn("account_id")
|
||||
RackOrderColumn = postgres.StringColumn("rack_order")
|
||||
BoardTilesColumn = postgres.StringColumn("board_tiles")
|
||||
UpdatedAtColumn = postgres.TimestampzColumn("updated_at")
|
||||
allColumns = postgres.ColumnList{GameIDColumn, AccountIDColumn, RackOrderColumn, BoardTilesColumn, UpdatedAtColumn}
|
||||
mutableColumns = postgres.ColumnList{RackOrderColumn, BoardTilesColumn, UpdatedAtColumn}
|
||||
defaultColumns = postgres.ColumnList{RackOrderColumn, BoardTilesColumn, UpdatedAtColumn}
|
||||
)
|
||||
|
||||
return gameDraftsTable{
|
||||
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
|
||||
|
||||
//Columns
|
||||
GameID: GameIDColumn,
|
||||
AccountID: AccountIDColumn,
|
||||
RackOrder: RackOrderColumn,
|
||||
BoardTiles: BoardTilesColumn,
|
||||
UpdatedAt: UpdatedAtColumn,
|
||||
|
||||
AllColumns: allColumns,
|
||||
MutableColumns: mutableColumns,
|
||||
DefaultColumns: defaultColumns,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
//
|
||||
// Code generated by go-jet DO NOT EDIT.
|
||||
//
|
||||
// WARNING: Changes to this file may cause incorrect behavior
|
||||
// and will be lost if the code is regenerated
|
||||
//
|
||||
|
||||
package table
|
||||
|
||||
import (
|
||||
"github.com/go-jet/jet/v2/postgres"
|
||||
)
|
||||
|
||||
var GameHidden = newGameHiddenTable("backend", "game_hidden", "")
|
||||
|
||||
type gameHiddenTable struct {
|
||||
postgres.Table
|
||||
|
||||
// Columns
|
||||
AccountID postgres.ColumnString
|
||||
GameID postgres.ColumnString
|
||||
CreatedAt postgres.ColumnTimestampz
|
||||
|
||||
AllColumns postgres.ColumnList
|
||||
MutableColumns postgres.ColumnList
|
||||
DefaultColumns postgres.ColumnList
|
||||
}
|
||||
|
||||
type GameHiddenTable struct {
|
||||
gameHiddenTable
|
||||
|
||||
EXCLUDED gameHiddenTable
|
||||
}
|
||||
|
||||
// AS creates new GameHiddenTable with assigned alias
|
||||
func (a GameHiddenTable) AS(alias string) *GameHiddenTable {
|
||||
return newGameHiddenTable(a.SchemaName(), a.TableName(), alias)
|
||||
}
|
||||
|
||||
// Schema creates new GameHiddenTable with assigned schema name
|
||||
func (a GameHiddenTable) FromSchema(schemaName string) *GameHiddenTable {
|
||||
return newGameHiddenTable(schemaName, a.TableName(), a.Alias())
|
||||
}
|
||||
|
||||
// WithPrefix creates new GameHiddenTable with assigned table prefix
|
||||
func (a GameHiddenTable) WithPrefix(prefix string) *GameHiddenTable {
|
||||
return newGameHiddenTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
|
||||
}
|
||||
|
||||
// WithSuffix creates new GameHiddenTable with assigned table suffix
|
||||
func (a GameHiddenTable) WithSuffix(suffix string) *GameHiddenTable {
|
||||
return newGameHiddenTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
|
||||
}
|
||||
|
||||
func newGameHiddenTable(schemaName, tableName, alias string) *GameHiddenTable {
|
||||
return &GameHiddenTable{
|
||||
gameHiddenTable: newGameHiddenTableImpl(schemaName, tableName, alias),
|
||||
EXCLUDED: newGameHiddenTableImpl("", "excluded", ""),
|
||||
}
|
||||
}
|
||||
|
||||
func newGameHiddenTableImpl(schemaName, tableName, alias string) gameHiddenTable {
|
||||
var (
|
||||
AccountIDColumn = postgres.StringColumn("account_id")
|
||||
GameIDColumn = postgres.StringColumn("game_id")
|
||||
CreatedAtColumn = postgres.TimestampzColumn("created_at")
|
||||
allColumns = postgres.ColumnList{AccountIDColumn, GameIDColumn, CreatedAtColumn}
|
||||
mutableColumns = postgres.ColumnList{CreatedAtColumn}
|
||||
defaultColumns = postgres.ColumnList{CreatedAtColumn}
|
||||
)
|
||||
|
||||
return gameHiddenTable{
|
||||
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
|
||||
|
||||
//Columns
|
||||
AccountID: AccountIDColumn,
|
||||
GameID: GameIDColumn,
|
||||
CreatedAt: CreatedAtColumn,
|
||||
|
||||
AllColumns: allColumns,
|
||||
MutableColumns: mutableColumns,
|
||||
DefaultColumns: defaultColumns,
|
||||
}
|
||||
}
|
||||
@@ -18,6 +18,8 @@ func UseSchema(schema string) {
|
||||
EmailConfirmations = EmailConfirmations.FromSchema(schema)
|
||||
FriendCodes = FriendCodes.FromSchema(schema)
|
||||
Friendships = Friendships.FromSchema(schema)
|
||||
GameDrafts = GameDrafts.FromSchema(schema)
|
||||
GameHidden = GameHidden.FromSchema(schema)
|
||||
GameInvitationInvitees = GameInvitationInvitees.FromSchema(schema)
|
||||
GameInvitations = GameInvitations.FromSchema(schema)
|
||||
GameMoves = GameMoves.FromSchema(schema)
|
||||
|
||||
@@ -35,6 +35,10 @@ CREATE TABLE accounts (
|
||||
merged_into uuid REFERENCES accounts (account_id) ON DELETE SET NULL,
|
||||
merged_at timestamptz,
|
||||
service_language text CHECK (service_language IN ('en', 'ru')),
|
||||
-- Soft, reversible "suspected high-rate" marker (R3): set once when the gateway
|
||||
-- reports sustained rate-limiter rejections past the threshold; an operator
|
||||
-- clears it in the admin console. Never an automatic ban.
|
||||
flagged_high_rate_at timestamptz,
|
||||
CONSTRAINT accounts_preferred_language_chk CHECK (preferred_language IN ('en', 'ru')),
|
||||
CONSTRAINT accounts_hint_balance_chk CHECK (hint_balance >= 0)
|
||||
);
|
||||
|
||||
@@ -0,0 +1,235 @@
|
||||
// Package ratewatch ingests the gateway's periodic rate-limiter rejection
|
||||
// reports (R3). It keeps an in-memory window of recent throttle episodes for
|
||||
// the admin console's view and applies the conservative high-rate auto-flag:
|
||||
// when one account's rejections within the rolling window cross the threshold,
|
||||
// the account store stamps the soft, reversible flagged_high_rate_at marker
|
||||
// (set-once; an operator clears it; never an automatic ban). Like the gateway's
|
||||
// active_users gauge it is single-instance and resets on restart by design —
|
||||
// the durable part is the account flag, not the episode window.
|
||||
package ratewatch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// ClassUser is the limiter class whose keys are account ids — the only class
|
||||
// the auto-flag applies to (the others are keyed by client IP).
|
||||
const ClassUser = "user"
|
||||
|
||||
const (
|
||||
// maxSeries bounds the distinct (class, key) series kept for the console
|
||||
// view, so a key-spraying client cannot grow the map: past the bound the
|
||||
// least-recently-throttled series is evicted.
|
||||
maxSeries = 200
|
||||
// minRetention keeps an episode visible in the console for at least an hour
|
||||
// after its last rejection (longer when the flag window is longer).
|
||||
minRetention = time.Hour
|
||||
)
|
||||
|
||||
// Config tunes the conservative high-rate auto-flag.
|
||||
type Config struct {
|
||||
// FlagThreshold is the rejected-call count within FlagWindow past which a
|
||||
// user account is flagged.
|
||||
FlagThreshold int
|
||||
// FlagWindow is the rolling window the rejections accumulate over.
|
||||
FlagWindow time.Duration
|
||||
}
|
||||
|
||||
// DefaultConfig returns the agreed conservative defaults — 1000 rejected calls
|
||||
// within a rolling 10 minutes (~1.7/s sustained, far above the client's
|
||||
// capped-backoff retry noise yet a fraction of an abusive loop).
|
||||
func DefaultConfig() Config {
|
||||
return Config{FlagThreshold: 1000, FlagWindow: 10 * time.Minute}
|
||||
}
|
||||
|
||||
// Validate reports whether the configuration values are acceptable.
|
||||
func (c Config) Validate() error {
|
||||
if c.FlagThreshold <= 0 {
|
||||
return fmt.Errorf("ratewatch: flag threshold must be positive")
|
||||
}
|
||||
if c.FlagWindow <= 0 {
|
||||
return fmt.Errorf("ratewatch: flag window must be positive")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Flagger stamps the account-level high-rate marker; account.Store satisfies it.
|
||||
type Flagger interface {
|
||||
FlagHighRate(ctx context.Context, id uuid.UUID, at time.Time) (bool, error)
|
||||
}
|
||||
|
||||
// Entry is one reported aggregate: the rejections of one limiter key within one
|
||||
// gateway report window (the wire mirror of the gateway's rejection summary).
|
||||
type Entry struct {
|
||||
Class string
|
||||
Key string
|
||||
Rejected int
|
||||
}
|
||||
|
||||
// Episode is one key's recent-throttle aggregate for the admin view.
|
||||
type Episode struct {
|
||||
Class string
|
||||
Key string
|
||||
Rejected int
|
||||
FirstSeen time.Time
|
||||
LastSeen time.Time
|
||||
}
|
||||
|
||||
// Watch accumulates reports and applies the auto-flag rule.
|
||||
type Watch struct {
|
||||
cfg Config
|
||||
flagger Flagger
|
||||
log *zap.Logger
|
||||
now func() time.Time
|
||||
|
||||
mu sync.Mutex
|
||||
series map[seriesKey]*series
|
||||
}
|
||||
|
||||
type seriesKey struct{ class, key string }
|
||||
|
||||
type series struct {
|
||||
points []point // ascending by time
|
||||
}
|
||||
|
||||
type point struct {
|
||||
at time.Time
|
||||
n int
|
||||
}
|
||||
|
||||
// New constructs a Watch over flagger with cfg. A nil logger is replaced by a
|
||||
// no-op one; a nil flagger disables the auto-flag (the view still works).
|
||||
func New(cfg Config, flagger Flagger, log *zap.Logger) *Watch {
|
||||
if log == nil {
|
||||
log = zap.NewNop()
|
||||
}
|
||||
return &Watch{
|
||||
cfg: cfg,
|
||||
flagger: flagger,
|
||||
log: log,
|
||||
now: time.Now,
|
||||
series: make(map[seriesKey]*series),
|
||||
}
|
||||
}
|
||||
|
||||
// Ingest records one gateway report. Entries with an empty class or key or a
|
||||
// non-positive count are skipped. When a user-class series crosses the flag
|
||||
// threshold within the flag window, the account is flagged (the store keeps it
|
||||
// set-once, so a sustained episode costs one no-op UPDATE per report).
|
||||
func (w *Watch) Ingest(ctx context.Context, entries []Entry) {
|
||||
if len(entries) == 0 {
|
||||
return
|
||||
}
|
||||
now := w.now()
|
||||
var flag []uuid.UUID
|
||||
w.mu.Lock()
|
||||
for _, e := range entries {
|
||||
if e.Class == "" || e.Key == "" || e.Rejected <= 0 {
|
||||
continue
|
||||
}
|
||||
k := seriesKey{class: e.Class, key: e.Key}
|
||||
s := w.series[k]
|
||||
if s == nil {
|
||||
s = &series{}
|
||||
w.series[k] = s
|
||||
}
|
||||
s.points = append(s.points, point{at: now, n: e.Rejected})
|
||||
if e.Class == ClassUser && s.sumSince(now.Add(-w.cfg.FlagWindow)) >= w.cfg.FlagThreshold {
|
||||
if id, err := uuid.Parse(e.Key); err == nil {
|
||||
flag = append(flag, id)
|
||||
}
|
||||
}
|
||||
}
|
||||
w.pruneLocked(now)
|
||||
w.mu.Unlock()
|
||||
|
||||
if w.flagger == nil {
|
||||
return
|
||||
}
|
||||
for _, id := range flag {
|
||||
set, err := w.flagger.FlagHighRate(ctx, id, now)
|
||||
switch {
|
||||
case err != nil:
|
||||
w.log.Warn("high-rate flag failed", zap.String("account_id", id.String()), zap.Error(err))
|
||||
case set:
|
||||
w.log.Info("account flagged high-rate",
|
||||
zap.String("account_id", id.String()),
|
||||
zap.Int("threshold", w.cfg.FlagThreshold),
|
||||
zap.Duration("window", w.cfg.FlagWindow))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Config returns the active auto-flag tuning (the admin console captions it).
|
||||
func (w *Watch) Config() Config { return w.cfg }
|
||||
|
||||
// Recent returns the retained throttle episodes, most recently throttled first.
|
||||
func (w *Watch) Recent() []Episode {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
out := make([]Episode, 0, len(w.series))
|
||||
for k, s := range w.series {
|
||||
if len(s.points) == 0 {
|
||||
continue
|
||||
}
|
||||
ep := Episode{
|
||||
Class: k.class,
|
||||
Key: k.key,
|
||||
FirstSeen: s.points[0].at,
|
||||
LastSeen: s.points[len(s.points)-1].at,
|
||||
}
|
||||
for _, p := range s.points {
|
||||
ep.Rejected += p.n
|
||||
}
|
||||
out = append(out, ep)
|
||||
}
|
||||
sort.Slice(out, func(i, j int) bool { return out[i].LastSeen.After(out[j].LastSeen) })
|
||||
return out
|
||||
}
|
||||
|
||||
// sumSince totals the points at or after cutoff.
|
||||
func (s *series) sumSince(cutoff time.Time) int {
|
||||
sum := 0
|
||||
for i := len(s.points) - 1; i >= 0; i-- {
|
||||
if s.points[i].at.Before(cutoff) {
|
||||
break
|
||||
}
|
||||
sum += s.points[i].n
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
// pruneLocked drops points past retention, empty series, and — past maxSeries —
|
||||
// the least-recently-throttled series. The caller holds w.mu.
|
||||
func (w *Watch) pruneLocked(now time.Time) {
|
||||
cutoff := now.Add(-max(minRetention, w.cfg.FlagWindow))
|
||||
for k, s := range w.series {
|
||||
i := 0
|
||||
for i < len(s.points) && s.points[i].at.Before(cutoff) {
|
||||
i++
|
||||
}
|
||||
s.points = s.points[i:]
|
||||
if len(s.points) == 0 {
|
||||
delete(w.series, k)
|
||||
}
|
||||
}
|
||||
for len(w.series) > maxSeries {
|
||||
var oldest seriesKey
|
||||
var oldestAt time.Time
|
||||
first := true
|
||||
for k, s := range w.series {
|
||||
last := s.points[len(s.points)-1].at
|
||||
if first || last.Before(oldestAt) {
|
||||
oldest, oldestAt, first = k, last, false
|
||||
}
|
||||
}
|
||||
delete(w.series, oldest)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,140 @@
|
||||
package ratewatch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// fakeFlagger records flag calls and reports them as newly set.
|
||||
type fakeFlagger struct {
|
||||
calls []uuid.UUID
|
||||
}
|
||||
|
||||
func (f *fakeFlagger) FlagHighRate(_ context.Context, id uuid.UUID, _ time.Time) (bool, error) {
|
||||
f.calls = append(f.calls, id)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// watchAt returns a Watch with a controllable clock.
|
||||
func watchAt(cfg Config, flagger Flagger, at *time.Time) *Watch {
|
||||
w := New(cfg, flagger, nil)
|
||||
w.now = func() time.Time { return *at }
|
||||
return w
|
||||
}
|
||||
|
||||
// TestIngestAggregatesAndRecent verifies episodes accumulate per (class, key),
|
||||
// invalid entries are skipped, and Recent orders by last rejection.
|
||||
func TestIngestAggregatesAndRecent(t *testing.T) {
|
||||
now := time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC)
|
||||
w := watchAt(DefaultConfig(), nil, &now)
|
||||
ctx := context.Background()
|
||||
|
||||
w.Ingest(ctx, []Entry{
|
||||
{Class: "public", Key: "10.0.0.1", Rejected: 3},
|
||||
{Class: "user", Key: "u-1", Rejected: 5},
|
||||
{Class: "", Key: "x", Rejected: 1},
|
||||
{Class: "user", Key: "", Rejected: 1},
|
||||
{Class: "user", Key: "u-1", Rejected: 0},
|
||||
})
|
||||
now = now.Add(30 * time.Second)
|
||||
w.Ingest(ctx, []Entry{{Class: "public", Key: "10.0.0.1", Rejected: 4}})
|
||||
|
||||
got := w.Recent()
|
||||
if len(got) != 2 {
|
||||
t.Fatalf("Recent returned %d episodes, want 2", len(got))
|
||||
}
|
||||
if got[0].Class != "public" || got[0].Key != "10.0.0.1" || got[0].Rejected != 7 {
|
||||
t.Errorf("first episode = %+v, want public/10.0.0.1 rejected=7", got[0])
|
||||
}
|
||||
if !got[0].LastSeen.After(got[0].FirstSeen) {
|
||||
t.Errorf("episode span = [%v, %v], want a positive span", got[0].FirstSeen, got[0].LastSeen)
|
||||
}
|
||||
if got[1].Class != "user" || got[1].Rejected != 5 {
|
||||
t.Errorf("second episode = %+v, want user rejected=5", got[1])
|
||||
}
|
||||
}
|
||||
|
||||
// TestAutoFlagThreshold verifies the flag fires only for a user-class series
|
||||
// crossing the threshold within the window, with a parseable account id.
|
||||
func TestAutoFlagThreshold(t *testing.T) {
|
||||
now := time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC)
|
||||
flagged := &fakeFlagger{}
|
||||
id := uuid.New()
|
||||
w := watchAt(Config{FlagThreshold: 100, FlagWindow: 10 * time.Minute}, flagged, &now)
|
||||
ctx := context.Background()
|
||||
|
||||
w.Ingest(ctx, []Entry{
|
||||
{Class: "user", Key: id.String(), Rejected: 99},
|
||||
{Class: "public", Key: "10.0.0.1", Rejected: 1000},
|
||||
{Class: "user", Key: "not-a-uuid", Rejected: 1000},
|
||||
})
|
||||
if len(flagged.calls) != 0 {
|
||||
t.Fatalf("flagged %v below the threshold", flagged.calls)
|
||||
}
|
||||
|
||||
now = now.Add(30 * time.Second)
|
||||
w.Ingest(ctx, []Entry{{Class: "user", Key: id.String(), Rejected: 1}})
|
||||
if len(flagged.calls) != 1 || flagged.calls[0] != id {
|
||||
t.Fatalf("flag calls = %v, want exactly [%s]", flagged.calls, id)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAutoFlagWindowExpiry verifies rejections age out of the rolling window.
|
||||
func TestAutoFlagWindowExpiry(t *testing.T) {
|
||||
now := time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC)
|
||||
flagged := &fakeFlagger{}
|
||||
id := uuid.New()
|
||||
w := watchAt(Config{FlagThreshold: 100, FlagWindow: 10 * time.Minute}, flagged, &now)
|
||||
ctx := context.Background()
|
||||
|
||||
w.Ingest(ctx, []Entry{{Class: "user", Key: id.String(), Rejected: 60}})
|
||||
now = now.Add(11 * time.Minute)
|
||||
w.Ingest(ctx, []Entry{{Class: "user", Key: id.String(), Rejected: 60}})
|
||||
if len(flagged.calls) != 0 {
|
||||
t.Fatalf("flagged %v across an expired window", flagged.calls)
|
||||
}
|
||||
now = now.Add(time.Minute)
|
||||
w.Ingest(ctx, []Entry{{Class: "user", Key: id.String(), Rejected: 50}})
|
||||
if len(flagged.calls) != 1 {
|
||||
t.Fatalf("flag calls = %v, want one in-window crossing", flagged.calls)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSeriesBound verifies the episode map stays bounded by evicting the
|
||||
// least-recently-throttled series.
|
||||
func TestSeriesBound(t *testing.T) {
|
||||
now := time.Date(2026, 6, 10, 12, 0, 0, 0, time.UTC)
|
||||
w := watchAt(DefaultConfig(), nil, &now)
|
||||
ctx := context.Background()
|
||||
|
||||
for i := range maxSeries + 10 {
|
||||
now = now.Add(time.Second)
|
||||
w.Ingest(ctx, []Entry{{Class: "public", Key: fmt.Sprintf("10.0.%d.%d", i/256, i%256), Rejected: 1}})
|
||||
}
|
||||
got := w.Recent()
|
||||
if len(got) != maxSeries {
|
||||
t.Fatalf("retained %d series, want %d", len(got), maxSeries)
|
||||
}
|
||||
for _, ep := range got {
|
||||
if ep.Key == "10.0.0.0" {
|
||||
t.Fatal("the least-recently-throttled series survived the bound")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestConfigValidate covers the tuning guards.
|
||||
func TestConfigValidate(t *testing.T) {
|
||||
if err := DefaultConfig().Validate(); err != nil {
|
||||
t.Errorf("default config invalid: %v", err)
|
||||
}
|
||||
if err := (Config{FlagThreshold: 0, FlagWindow: time.Minute}).Validate(); err == nil {
|
||||
t.Error("zero threshold passed validation")
|
||||
}
|
||||
if err := (Config{FlagThreshold: 1, FlagWindow: 0}).Validate(); err == nil {
|
||||
t.Error("zero window passed validation")
|
||||
}
|
||||
}
|
||||
@@ -37,6 +37,11 @@ func (s *Server) registerRoutes() {
|
||||
// before delivering an out-of-app notification.
|
||||
in.POST("/push-target", s.handlePushTarget)
|
||||
}
|
||||
if s.ratewatch != nil {
|
||||
// The gateway's periodic rate-limiter rejection summary (R3): feeds the
|
||||
// admin console's throttled view and the high-rate auto-flag.
|
||||
s.internal.POST("/ratelimit/report", s.handleRateLimitReport)
|
||||
}
|
||||
u := s.user
|
||||
if s.accounts != nil {
|
||||
u.GET("/profile", s.handleProfile)
|
||||
@@ -120,10 +125,8 @@ func gameIDParam(c *gin.Context) (uuid.UUID, bool) {
|
||||
// X-Forwarded-For (the first hop), falling back to the direct peer.
|
||||
func clientIP(c *gin.Context) string {
|
||||
if xff := c.GetHeader("X-Forwarded-For"); xff != "" {
|
||||
if i := strings.IndexByte(xff, ','); i >= 0 {
|
||||
return strings.TrimSpace(xff[:i])
|
||||
}
|
||||
return strings.TrimSpace(xff)
|
||||
first, _, _ := strings.Cut(xff, ",")
|
||||
return strings.TrimSpace(first)
|
||||
}
|
||||
return c.ClientIP()
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"scrabble/backend/internal/adminconsole"
|
||||
"scrabble/backend/internal/engine"
|
||||
"scrabble/backend/internal/game"
|
||||
"scrabble/backend/internal/ratewatch"
|
||||
"scrabble/backend/internal/robot"
|
||||
"scrabble/backend/internal/social"
|
||||
)
|
||||
@@ -48,6 +49,8 @@ func (s *Server) registerConsole(router *gin.Engine) {
|
||||
gm.GET("/users", s.consoleUsers)
|
||||
gm.GET("/users/:id", s.consoleUserDetail)
|
||||
gm.POST("/users/:id/message", s.consoleUserMessage)
|
||||
gm.POST("/users/:id/clear-high-rate-flag", s.consoleClearHighRateFlag)
|
||||
gm.GET("/throttled", s.consoleThrottled)
|
||||
gm.GET("/games", s.consoleGames)
|
||||
gm.GET("/games/:id", s.consoleGameDetail)
|
||||
gm.GET("/complaints", s.consoleComplaints)
|
||||
@@ -117,7 +120,8 @@ func (s *Server) consoleUsers(c *gin.Context) {
|
||||
}
|
||||
view.Items = append(view.Items, adminconsole.UserRow{
|
||||
ID: it.ID.String(), DisplayName: it.DisplayName, Kind: kind,
|
||||
Language: it.PreferredLanguage, Guest: it.IsGuest, CreatedAt: fmtTime(it.CreatedAt),
|
||||
Language: it.PreferredLanguage, Guest: it.IsGuest,
|
||||
FlaggedHighRate: !it.FlaggedHighRateAt.IsZero(), CreatedAt: fmtTime(it.CreatedAt),
|
||||
})
|
||||
ids = append(ids, it.ID)
|
||||
}
|
||||
@@ -257,6 +261,9 @@ func (s *Server) consoleUserDetail(c *gin.Context) {
|
||||
if acc.MergedInto != uuid.Nil {
|
||||
view.MergedInto = acc.MergedInto.String()
|
||||
}
|
||||
if !acc.FlaggedHighRateAt.IsZero() {
|
||||
view.FlaggedHighRateAt = fmtTime(acc.FlaggedHighRateAt)
|
||||
}
|
||||
if view.HasStats {
|
||||
if st, err := s.accounts.GetStats(ctx, id); err == nil {
|
||||
view.Stats = adminconsole.StatsRow{Wins: st.Wins, Losses: st.Losses, Draws: st.Draws, MaxGamePoints: st.MaxGamePoints, MaxWordPoints: st.MaxWordPoints}
|
||||
@@ -551,6 +558,56 @@ func (s *Server) consolePostBroadcast(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// consoleThrottled renders the rate-limit observability page: the recent
|
||||
// gateway-reported throttle episodes (in-memory, reset on a backend restart)
|
||||
// and the accounts currently carrying the soft high-rate flag (R3).
|
||||
func (s *Server) consoleThrottled(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
var view adminconsole.ThrottledView
|
||||
if s.ratewatch != nil {
|
||||
cfg := s.ratewatch.Config()
|
||||
view.FlagThreshold = cfg.FlagThreshold
|
||||
view.FlagWindow = cfg.FlagWindow.String()
|
||||
for _, ep := range s.ratewatch.Recent() {
|
||||
row := adminconsole.ThrottleEpisodeRow{
|
||||
Class: ep.Class, Key: ep.Key, Rejected: ep.Rejected,
|
||||
FirstSeen: fmtTime(ep.FirstSeen), LastSeen: fmtTime(ep.LastSeen),
|
||||
}
|
||||
if ep.Class == ratewatch.ClassUser {
|
||||
if id, err := uuid.Parse(ep.Key); err == nil {
|
||||
row.UserID = id.String()
|
||||
}
|
||||
}
|
||||
view.Episodes = append(view.Episodes, row)
|
||||
}
|
||||
}
|
||||
flagged, err := s.accounts.ListFlaggedHighRate(ctx)
|
||||
if err != nil {
|
||||
s.consoleError(c, err)
|
||||
return
|
||||
}
|
||||
for _, fa := range flagged {
|
||||
view.Flagged = append(view.Flagged, adminconsole.FlaggedAccountRow{
|
||||
ID: fa.ID.String(), DisplayName: fa.DisplayName, FlaggedAt: fmtTime(fa.FlaggedHighRateAt),
|
||||
})
|
||||
}
|
||||
s.renderConsole(c, "throttled", "throttled", "Throttled", view)
|
||||
}
|
||||
|
||||
// consoleClearHighRateFlag clears the soft high-rate marker — the operator's
|
||||
// reversible review action (R3).
|
||||
func (s *Server) consoleClearHighRateFlag(c *gin.Context) {
|
||||
id, ok := s.consoleUUID(c, "/_gm/users")
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if err := s.accounts.ClearHighRateFlag(c.Request.Context(), id); err != nil {
|
||||
s.consoleError(c, err)
|
||||
return
|
||||
}
|
||||
s.renderConsoleMessage(c, "Cleared", "high-rate flag cleared", "/_gm/users/"+id.String())
|
||||
}
|
||||
|
||||
// variantVersions builds the per-variant resident-version summary from the registry.
|
||||
func (s *Server) variantVersions() []adminconsole.VariantVersions {
|
||||
out := make([]adminconsole.VariantVersions, 0, len(engine.Variants()))
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"scrabble/backend/internal/ratewatch"
|
||||
)
|
||||
|
||||
// rateLimitReportRequest mirrors the gateway's periodic rejection summary: every
|
||||
// entry aggregates one limiter key (class + key) over the report window.
|
||||
type rateLimitReportRequest struct {
|
||||
WindowSeconds int `json:"window_seconds"`
|
||||
Entries []rateLimitReportEntry `json:"entries"`
|
||||
}
|
||||
|
||||
// rateLimitReportEntry is one (class, key) aggregate of the report.
|
||||
type rateLimitReportEntry struct {
|
||||
Class string `json:"class"`
|
||||
Key string `json:"key"`
|
||||
Rejected int `json:"rejected"`
|
||||
}
|
||||
|
||||
// handleRateLimitReport ingests one gateway rejection report into the rate
|
||||
// watch — the admin console's throttled view and the high-rate auto-flag (R3).
|
||||
// Internal, gateway-only: like sessions/resolve it trusts the network segment.
|
||||
// Malformed individual entries are skipped by the watch itself.
|
||||
func (s *Server) handleRateLimitReport(c *gin.Context) {
|
||||
var req rateLimitReportRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
abortBadRequest(c, "invalid rate-limit report")
|
||||
return
|
||||
}
|
||||
entries := make([]ratewatch.Entry, 0, len(req.Entries))
|
||||
for _, e := range req.Entries {
|
||||
entries = append(entries, ratewatch.Entry{Class: e.Class, Key: e.Key, Rejected: e.Rejected})
|
||||
}
|
||||
s.ratewatch.Ingest(c.Request.Context(), entries)
|
||||
c.Status(http.StatusNoContent)
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
|
||||
"scrabble/backend/internal/account"
|
||||
"scrabble/backend/internal/game"
|
||||
"scrabble/backend/internal/ratewatch"
|
||||
"scrabble/backend/internal/session"
|
||||
)
|
||||
|
||||
@@ -57,6 +58,23 @@ func TestResolveSessionRejectsEmptyToken(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestRateLimitReportEndpoint covers the internal R3 report route: a malformed
|
||||
// body is a 400, a valid report lands in the rate watch with 204.
|
||||
func TestRateLimitReportEndpoint(t *testing.T) {
|
||||
watch := ratewatch.New(ratewatch.DefaultConfig(), nil, nil)
|
||||
s := New(":0", Deps{RateWatch: watch})
|
||||
if rec := do(t, s, http.MethodPost, "/api/v1/internal/ratelimit/report", `{bad`, nil); rec.Code != http.StatusBadRequest {
|
||||
t.Fatalf("malformed report = %d, want 400", rec.Code)
|
||||
}
|
||||
body := `{"window_seconds":30,"entries":[{"class":"user","key":"` + uuid.NewString() + `","rejected":7}]}`
|
||||
if rec := do(t, s, http.MethodPost, "/api/v1/internal/ratelimit/report", body, nil); rec.Code != http.StatusNoContent {
|
||||
t.Fatalf("report = %d, want 204", rec.Code)
|
||||
}
|
||||
if eps := watch.Recent(); len(eps) != 1 || eps[0].Rejected != 7 {
|
||||
t.Fatalf("watch episodes = %+v, want one entry with rejected=7", eps)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubmitPlayRejectsBadDirection(t *testing.T) {
|
||||
headers := map[string]string{"X-User-ID": uuid.New().String()}
|
||||
path := "/api/v1/user/games/" + uuid.New().String() + "/play"
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"scrabble/backend/internal/game"
|
||||
"scrabble/backend/internal/link"
|
||||
"scrabble/backend/internal/lobby"
|
||||
"scrabble/backend/internal/ratewatch"
|
||||
"scrabble/backend/internal/session"
|
||||
"scrabble/backend/internal/social"
|
||||
"scrabble/backend/internal/telemetry"
|
||||
@@ -71,6 +72,10 @@ type Deps struct {
|
||||
// nil when BACKEND_CONNECTOR_ADDR is unset (broadcasts show a "not configured"
|
||||
// notice).
|
||||
Connector *connector.Client
|
||||
// RateWatch ingests the gateway's rate-limiter rejection reports (R3): the
|
||||
// admin console's throttled view + the high-rate auto-flag. A nil RateWatch
|
||||
// disables the internal report endpoint and the console view.
|
||||
RateWatch *ratewatch.Watch
|
||||
}
|
||||
|
||||
// Server owns the gin engine, the underlying HTTP server and the readiness
|
||||
@@ -93,6 +98,7 @@ type Server struct {
|
||||
registry *engine.Registry
|
||||
dictDir string
|
||||
connector *connector.Client
|
||||
ratewatch *ratewatch.Watch
|
||||
console *adminconsole.Renderer
|
||||
|
||||
public *gin.RouterGroup
|
||||
@@ -133,6 +139,7 @@ func New(addr string, deps Deps) *Server {
|
||||
registry: deps.Registry,
|
||||
dictDir: deps.DictDir,
|
||||
connector: deps.Connector,
|
||||
ratewatch: deps.RateWatch,
|
||||
http: &http.Server{Addr: addr, Handler: engine},
|
||||
}
|
||||
s.registerProbes(engine)
|
||||
|
||||
+10
-8
@@ -1,9 +1,9 @@
|
||||
# deploy
|
||||
|
||||
The full Scrabble contour: `backend` + `gateway` + Postgres + the Telegram
|
||||
connector (with a VPN sidecar) + the observability stack (OTel Collector →
|
||||
Prometheus + Tempo → Grafana), fronted by a **caddy** that owns a single `/_gm`
|
||||
Basic-Auth (the admin console + Grafana). Topology and the decision record are in
|
||||
The full Scrabble contour: `backend` + `gateway` + the static `landing` + Postgres +
|
||||
the Telegram connector (with a VPN sidecar) + the observability stack (OTel
|
||||
Collector → Prometheus + Tempo → Grafana), fronted by a **caddy** that owns a single
|
||||
`/_gm` Basic-Auth (the admin console + Grafana). Topology and the decision record are in
|
||||
[`../docs/ARCHITECTURE.md`](../docs/ARCHITECTURE.md) §13; this file is the
|
||||
operational reference for **every environment variable**.
|
||||
|
||||
@@ -11,8 +11,9 @@ operational reference for **every environment variable**.
|
||||
|
||||
| Service | Image | Role |
|
||||
| --- | --- | --- |
|
||||
| `caddy` | `caddy:2-alpine` | Edge proxy (alias `scrabble` on `edge`): single `/_gm` Basic-Auth → admin console + Grafana; everything else → gateway. TLS per `CADDY_SITE_ADDRESS`. |
|
||||
| `gateway` | built (`gateway/Dockerfile`) | Public edge; serves the embedded landing at `/` and the game SPA at `/app/` + `/telegram/`; Connect-RPC edge. |
|
||||
| `caddy` | `caddy:2-alpine` | Edge proxy (alias `scrabble` on `edge`): single `/_gm` Basic-Auth → admin console + Grafana; `/app/`, `/telegram/` + the Connect path → gateway; the catch-all (incl. `/`) → landing. TLS per `CADDY_SITE_ADDRESS`. |
|
||||
| `gateway` | built (`gateway/Dockerfile`, target `gateway`) | Public edge; serves the embedded game SPA at `/app/` + `/telegram/`; Connect-RPC edge. `/` redirects to `/app/`. |
|
||||
| `landing` | built (`gateway/Dockerfile`, target `landing`) | Static landing page at `/` (caddy:2-alpine + the shared Vite build, `deploy/landing/Caddyfile`); absorbs stray public paths (R3). |
|
||||
| `backend` | built (`backend/Dockerfile`) | Domain service; bakes in the DAWG dictionaries; runs migrations at boot. |
|
||||
| `postgres` | `postgres:17-alpine` | Database (named volume, `pg_isready` healthcheck). |
|
||||
| `vpn` + `telegram` | sidecar + built (`platform/telegram/Dockerfile`) | Telegram connector; egresses through the AmneziaWG sidecar; internal gRPC at `telegram:9091`. |
|
||||
@@ -88,8 +89,9 @@ connector **fails at boot** if both are empty.
|
||||
| `VITE_TELEGRAM_GAME_CHANNEL_NAME_RU` | variable | _(empty)_ | UI build-arg: the landing "Play in Telegram" link for the **Russian** bot (e.g. `https://t.me/Erudit_Game`). |
|
||||
| `VITE_GATEWAY_URL` | variable | _(empty)_ | UI build-arg: gateway origin; empty = same-origin (the usual single-origin deploy). |
|
||||
|
||||
The five `VITE_*` are **build-args** baked into the gateway image at build time, so
|
||||
changing them requires a rebuild (`--build`), not just a restart.
|
||||
The five `VITE_*` are **build-args** baked into the gateway and landing images at
|
||||
build time (both targets share one UI build stage — keep the args identical so it is
|
||||
built once), so changing them requires a rebuild (`--build`), not just a restart.
|
||||
|
||||
## Fixed internal wiring (not operator-set)
|
||||
|
||||
|
||||
+12
-4
@@ -1,7 +1,9 @@
|
||||
# Edge reverse proxy for the Scrabble contour. A single Basic-Auth gate covers
|
||||
# every operator surface under /_gm (the backend-rendered admin console and the
|
||||
# Grafana subpath); everything else (the SPA at / and /telegram/, plus the
|
||||
# Connect edge) goes to the gateway. Mirrors ../galaxy-game's /_gm model.
|
||||
# Grafana subpath); the game SPA (/app/, /telegram/) and the Connect edge go to
|
||||
# the gateway; the catch-all — notably the public landing at / — goes to the
|
||||
# static landing container (R3), so stray traffic never reaches the Go edge.
|
||||
# Mirrors ../galaxy-game's /_gm model.
|
||||
#
|
||||
# CADDY_SITE_ADDRESS is ":80" in the test contour (the host caddy terminates TLS
|
||||
# and forwards); set it to a domain in prod (Stage 18) so this caddy does its own
|
||||
@@ -36,8 +38,14 @@
|
||||
}
|
||||
}
|
||||
|
||||
# The SPA (/, /telegram/) and the Connect edge are served by the gateway.
|
||||
handle {
|
||||
# The game SPA and the Connect edge are served by the gateway.
|
||||
@gateway path /app /app/* /telegram /telegram/* /scrabble.edge.v1.Gateway/*
|
||||
handle @gateway {
|
||||
reverse_proxy gateway:8081
|
||||
}
|
||||
|
||||
# Everything else — the public landing at / and any stray path — is static.
|
||||
handle {
|
||||
reverse_proxy landing:80
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,6 +75,7 @@ services:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: gateway/Dockerfile
|
||||
target: gateway
|
||||
args:
|
||||
VITE_TELEGRAM_BOT_ID: ${VITE_TELEGRAM_BOT_ID:-}
|
||||
VITE_TELEGRAM_LINK: ${VITE_TELEGRAM_LINK:-}
|
||||
@@ -100,6 +101,28 @@ services:
|
||||
# caddy owns the /_gm Basic-Auth and routes /_gm to the backend directly.
|
||||
networks: [internal]
|
||||
|
||||
# --- Landing (static) -------------------------------------------------------
|
||||
# The public landing page in its own caddy container (R3): the contour caddy
|
||||
# routes the catch-all (notably /) here, the gateway keeps only /app/,
|
||||
# /telegram/ and the Connect edge. Shares the gateway Dockerfile's UI build
|
||||
# stage — identical build args keep that stage a single cached build.
|
||||
landing:
|
||||
container_name: scrabble-landing
|
||||
image: scrabble-landing:latest
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: gateway/Dockerfile
|
||||
target: landing
|
||||
args:
|
||||
VITE_TELEGRAM_BOT_ID: ${VITE_TELEGRAM_BOT_ID:-}
|
||||
VITE_TELEGRAM_LINK: ${VITE_TELEGRAM_LINK:-}
|
||||
VITE_TELEGRAM_GAME_CHANNEL_NAME_EN: ${VITE_TELEGRAM_GAME_CHANNEL_NAME_EN:-}
|
||||
VITE_TELEGRAM_GAME_CHANNEL_NAME_RU: ${VITE_TELEGRAM_GAME_CHANNEL_NAME_RU:-}
|
||||
VITE_GATEWAY_URL: ${VITE_GATEWAY_URL:-}
|
||||
VITE_APP_VERSION: ${APP_VERSION:-dev}
|
||||
restart: unless-stopped
|
||||
networks: [internal]
|
||||
|
||||
# --- Telegram connector (egress via the VPN sidecar) -----------------------
|
||||
vpn:
|
||||
container_name: scrabble-telegram-vpn
|
||||
@@ -145,12 +168,13 @@ services:
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4317
|
||||
OTEL_EXPORTER_OTLP_INSECURE: "true"
|
||||
|
||||
# --- Edge reverse proxy (single /_gm Basic-Auth; SPA + Connect -> gateway) --
|
||||
# --- Edge reverse proxy (single /_gm Basic-Auth; SPA + Connect -> gateway;
|
||||
# the catch-all incl. the landing -> the static landing container) -------
|
||||
caddy:
|
||||
container_name: scrabble-caddy
|
||||
image: caddy:2-alpine
|
||||
restart: unless-stopped
|
||||
depends_on: [gateway, backend, grafana]
|
||||
depends_on: [gateway, backend, grafana, landing]
|
||||
environment:
|
||||
# Test: ":80" (host caddy terminates TLS). Prod: a domain for own ACME.
|
||||
CADDY_SITE_ADDRESS: ${CADDY_SITE_ADDRESS:-:80}
|
||||
|
||||
@@ -34,6 +34,18 @@
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m])) by (result)", "legendFormat": "{{result}}" }]
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Rate limiting — request rate vs rejections (R3)",
|
||||
"description": "Aggregate only (no per-user labels, the Stage 12/17 discipline): total edge request rate against the limiter rejection rate by class. Per-key detail lives in the admin console's Throttled view.",
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 },
|
||||
"fieldConfig": { "defaults": { "unit": "reqps" }, "overrides": [] },
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"targets": [
|
||||
{ "refId": "A", "expr": "sum(rate(edge_request_duration_count[5m]))", "legendFormat": "requests" },
|
||||
{ "refId": "B", "expr": "sum(rate(gateway_rate_limited_total[5m])) by (class)", "legendFormat": "rejected · {{class}}" }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
# Static landing container (R3). Serves the public landing page and the built
|
||||
# assets it references at /; the game SPA (/app/, /telegram/) and the Connect
|
||||
# edge stay on the gateway. The contour caddy routes the catch-all here, so
|
||||
# stray public paths are absorbed by static file serving and never reach the Go
|
||||
# edge. This file is baked into the image at build time (gateway/Dockerfile,
|
||||
# target `landing`), not bind-mounted.
|
||||
{
|
||||
admin off
|
||||
}
|
||||
|
||||
:80 {
|
||||
root * /srv
|
||||
encode zstd gzip
|
||||
|
||||
# Mirror the gateway webui caching: hash-named build assets are immutable,
|
||||
# every HTML shell is no-cache so a new deploy is picked up immediately.
|
||||
header /assets/* Cache-Control "public, max-age=31536000, immutable"
|
||||
@shell not path /assets/*
|
||||
header @shell Cache-Control "no-cache"
|
||||
|
||||
# An unknown path falls back to the landing shell (the gateway's old "/"
|
||||
# behaviour); "/" itself resolves through the index below.
|
||||
try_files {path} /landing.html
|
||||
file_server {
|
||||
index landing.html
|
||||
}
|
||||
}
|
||||
+44
-14
@@ -98,6 +98,15 @@ dropped). Horizontal scaling is explicit future work.
|
||||
response was lost — its button is disabled while offline and the player re-issues it on
|
||||
reconnect). A reachability watcher (a lightweight `profile.get` probe) clears the signal when no
|
||||
other traffic is in flight; the live `Subscribe` stream's drop/recovery feeds the same signal.
|
||||
**Edge hardening (R3):** every request body on the public listener is capped at
|
||||
`GATEWAY_MAX_BODY_BYTES` (default 1 MiB — far above any legitimate payload), both at the HTTP
|
||||
layer (`http.MaxBytesReader`) and as the Connect per-message read limit, so an oversized
|
||||
`Execute` is refused (`resource_exhausted`) without buffering. The h2c server carries explicit
|
||||
sizing: `MaxConcurrentStreams` 250 (the x/net default made visible — a real client holds one
|
||||
`Subscribe` stream plus a few unary calls) and a 3-minute connection `IdleTimeout` (a live
|
||||
`Subscribe` stream keeps its connection active, so only abandoned connections are reaped); the
|
||||
`http.Server` sets only `ReadHeaderTimeout` (10 s) — Read/WriteTimeout would kill the stream.
|
||||
R7 revisits the exact values under load.
|
||||
- **Alphabet on the wire (Stage 13)**: live play exchanges **alphabet indices**, not
|
||||
concrete letters. The rack (`StateView.rack`), the `SubmitPlay`/`Evaluate` tiles, the
|
||||
`Exchange` tiles and the `CheckWord` word are `ubyte` indices into the variant's alphabet
|
||||
@@ -572,6 +581,21 @@ promotions) is future work and would deliver short markdown messages (text + lin
|
||||
distinct accounts that performed an authenticated edge action in the window. The
|
||||
gauge is single-process by design (single-instance MVP, §10): it is correct for one
|
||||
gateway, resets on restart, and is a live operational figure, not a billing count.
|
||||
- **Rate-limit observability (R3):** every limiter rejection increments the gateway
|
||||
counter `gateway_rate_limited_total` (`class` = user/public/email/admin — aggregate
|
||||
only, honouring the no-per-user-label discipline above) and logs one **Debug** line;
|
||||
a gateway reporter drains the per-key rejection tracker every 30 s, emits one **Warn**
|
||||
summary per throttled key and posts the report to the backend
|
||||
(`POST /api/v1/internal/ratelimit/report`, network-trusted like `sessions/resolve`).
|
||||
The backend's `ratewatch` keeps a bounded in-memory episode window (single-instance,
|
||||
resets on restart, like `active_users`) surfaced on the admin console's **Throttled**
|
||||
page next to the flagged-account review queue, and applies the **conservative
|
||||
auto-flag**: an account sustaining `BACKEND_HIGHRATE_FLAG_THRESHOLD` rejected calls
|
||||
(default 1000) within `BACKEND_HIGHRATE_FLAG_WINDOW` (default 10 min) gets the soft,
|
||||
reversible `accounts.flagged_high_rate_at` marker — set once, shown in the user
|
||||
list/detail, cleared by the operator, **never an automatic ban** and never a request
|
||||
gate. The Edge/UX dashboard graphs the aggregate request rate against the rejection
|
||||
rate by class.
|
||||
- Unauthenticated `GET /healthz` (liveness) and `GET /readyz` (readiness — the
|
||||
database answers a bounded ping and the session cache is warmed).
|
||||
- The backend serves a **second listener** — a gRPC server
|
||||
@@ -582,12 +606,12 @@ promotions) is future work and would deliver short markdown messages (text + lin
|
||||
|
||||
| Concern | Enforced by |
|
||||
| --- | --- |
|
||||
| Public rate limiting / anti-abuse | gateway |
|
||||
| Public rate limiting / anti-abuse | gateway (per-IP public/email/admin classes, per-user authenticated class; a request body cap of `GATEWAY_MAX_BODY_BYTES`; rejections are metered, summarised to the backend and surfaced in the admin console with a conservative reversible auto-flag — R3, §11) |
|
||||
| Telegram initData validation (bot-token HMAC) | the Telegram connector; the gateway delegates it over gRPC, so the bot token lives only in the connector |
|
||||
| Session minting; email-code / guest validation | gateway (with backend) |
|
||||
| Session → `user_id` resolution, `X-User-ID` injection | gateway |
|
||||
| Authorisation, ownership, state transitions | backend (`X-User-ID` is the sole identity input) |
|
||||
| Admin authentication | a single Basic-Auth gate on `/_gm/*`, forwarded **verbatim** to the backend's server-rendered admin console (and, in the deployed contour, routing `/_gm/grafana/*` to Grafana). In the deploy the **caddy** owns this gate (§13); a local non-caddy run uses the gateway's own `GATEWAY_ADMIN_*` proxy. The backend trusts the proxy (no admin principal) and guards its state-changing POSTs with a **same-origin** check — the console's CSRF defence. No operator identity is tracked |
|
||||
| Admin authentication | a single Basic-Auth gate on `/_gm/*`, forwarded **verbatim** to the backend's server-rendered admin console (and, in the deployed contour, routing `/_gm/grafana/*` to Grafana). In the deploy the **caddy** owns this gate (§13); a local non-caddy run uses the gateway's own `GATEWAY_ADMIN_*` proxy, which the per-IP admin limiter class guards ahead of its Basic-Auth (R3) — the caddy-fronted path has no limiter (stock caddy), an accepted gap. The backend trusts the proxy (no admin principal) and guards its state-changing POSTs with a **same-origin** check — the console's CSRF defence. No operator identity is tracked |
|
||||
| backend ↔ gateway ↔ connector trust | the network (only gateway may reach backend; the connector serves unauthenticated gRPC on the internal segment) |
|
||||
|
||||
This is an explicit, accepted MVP risk: compromise of the gateway↔backend
|
||||
@@ -597,7 +621,7 @@ mutual auth is a future hardening step.
|
||||
**Short numeric codes** (email confirm-codes and Stage 8 friend codes) are stored
|
||||
only as SHA-256 hashes and are short-lived and single-use. The unauthenticated
|
||||
email path carries a tight per-IP sub-limit (5 / 10 min); the **friend-code redeem**
|
||||
is authenticated, so it rides the per-user limit (120 / min) and is further bounded
|
||||
is authenticated, so it rides the per-user limit (300 / min) and is further bounded
|
||||
by the code's 12 h TTL, single use, and **one live code per issuer** (which caps the
|
||||
valid-code population). Brute-forcing a 6-digit friend code within these limits is an
|
||||
accepted MVP risk with low blast radius (an unwanted friendship is removable/blockable);
|
||||
@@ -605,22 +629,27 @@ a dedicated redeem sub-limit or a longer code is the hardening step if abuse app
|
||||
|
||||
## 13. Deployment (informational)
|
||||
|
||||
Single public origin, path-routed. The gateway **embeds** the static UI build
|
||||
(`go:embed`, baked in by a node stage in `gateway/Dockerfile`). The Vite build has two
|
||||
entries: a lightweight **landing page** served at `/`, and the game **SPA** served at
|
||||
Single public origin, path-routed. The Vite build has two entries: a lightweight
|
||||
**landing page** and the game **SPA**. The gateway **embeds** the SPA build
|
||||
(`go:embed`, baked in by a node stage in `gateway/Dockerfile`) and serves it at
|
||||
`/app/` (web) and `/telegram/` (the Telegram Mini App; outside Telegram that path
|
||||
redirects to the root — the client-side guard). Hash-named `/assets/*` are served
|
||||
redirects to the root — the client-side guard); a stray hit on the gateway's `/`
|
||||
308-redirects to `/app/`. The **landing** ships in its own static container (R3): the
|
||||
`landing` target of `gateway/Dockerfile` (caddy:2-alpine + the same Vite build,
|
||||
`deploy/landing/Caddyfile`) serves it at `/`, so stray public traffic is absorbed by
|
||||
static file serving and never reaches the Go edge. Hash-named `/assets/*` are served
|
||||
`immutable` (a relaunch is a cache hit, not a re-download); the HTML shells are
|
||||
`no-cache` so a new deploy is picked up. An in-compose **caddy** is the
|
||||
contour's edge: it owns a single `/_gm` Basic-Auth and routes `/_gm/grafana/*` to
|
||||
**Grafana** (anonymous-admin, so the one shared login gates it with no per-user
|
||||
Grafana accounts) and the rest of `/_gm/*` to the backend-rendered **admin console**;
|
||||
everything else (`/`, `/app/`, `/telegram/`, the Connect edge) goes to the gateway. The
|
||||
`no-cache` so a new deploy is picked up — both containers apply the same caching. An
|
||||
in-compose **caddy** is the contour's edge: it owns a single `/_gm` Basic-Auth and
|
||||
routes `/_gm/grafana/*` to **Grafana** (anonymous-admin, so the one shared login gates
|
||||
it with no per-user Grafana accounts) and the rest of `/_gm/*` to the backend-rendered
|
||||
**admin console**; `/app/`, `/telegram/` and the Connect path go to the gateway; the
|
||||
catch-all — notably the landing at `/` — goes to the landing container. The
|
||||
**Telegram connector** runs as a separate container with **no public ingress** — it
|
||||
long-polls Telegram and egresses through a VPN sidecar, answering only internal gRPC.
|
||||
|
||||
The full contour (`deploy/docker-compose.yml`) runs one `gateway`, one `backend`,
|
||||
one Postgres, the connector (+ its VPN sidecar) and the **observability stack** —
|
||||
one Postgres, the static `landing`, the connector (+ its VPN sidecar) and the **observability stack** —
|
||||
OTel Collector (OTLP/gRPC ingest → Prometheus metrics + Tempo traces) and Grafana
|
||||
with provisioned datasources and dashboards. All three services export OTLP to the
|
||||
collector; the connector shares the VPN sidecar's netns, so its `AWG_CONF` must not
|
||||
@@ -633,7 +662,8 @@ network (project-scoped DNS); only caddy joins the shared external `edge` networ
|
||||
Two contours, two secret/variable prefixes (`TEST_` / `PROD_`):
|
||||
- **Test** (Stage 16): auto-deploys on a PR into — or a push to — `development`
|
||||
(`.gitea/workflows/ci.yaml` → `docker compose up -d --build` on the Gitea runner
|
||||
host, then a `GET /` probe through caddy). The host caddy terminates TLS and
|
||||
host, then `GET /` + `GET /app/` probes through caddy — the landing container and
|
||||
the gateway, R3). The host caddy terminates TLS and
|
||||
forwards the domain to `scrabble:80`, so the in-compose caddy serves plain HTTP
|
||||
(`CADDY_SITE_ADDRESS=:80`). The in-compose caddy **trusts X-Forwarded-For from
|
||||
private-range upstreams** (`trusted_proxies private_ranges`), so the real client IP —
|
||||
|
||||
@@ -171,3 +171,11 @@ applied after a reload). When a Telegram connector is configured an operator can
|
||||
**message a user** (by their Telegram identity) or **post to the game channel**.
|
||||
State-changing actions are protected by a same-origin check; the console tracks no
|
||||
operator identity.
|
||||
|
||||
The console also surfaces **rate-limit abuse** (R3): a **Throttled** page lists the
|
||||
recently throttled users/IPs the gateway reported (an in-memory window — it resets on
|
||||
a backend restart) and the accounts currently carrying the soft **high-rate flag**. An
|
||||
account sustaining rejections past a tunable threshold is flagged automatically —
|
||||
the marker is reversible, shown as a badge in the user list and on the user card, and
|
||||
**never blocks play**; the operator reviews and clears it from the user card. There is
|
||||
no automatic ban.
|
||||
|
||||
@@ -175,3 +175,11 @@ identity, их игры) и **игры** (сводка + места), разби
|
||||
подключён Telegram-коннектор, оператор также может **написать пользователю** (по его
|
||||
Telegram-identity) или **отправить пост в игровой канал**. Изменяющие действия
|
||||
защищены проверкой same-origin; личность оператора не отслеживается.
|
||||
|
||||
Консоль также показывает **злоупотребление лимитами** (R3): страница **Throttled**
|
||||
перечисляет недавно затроттленных пользователей/IP по отчётам gateway (окно в памяти —
|
||||
сбрасывается при рестарте backend) и аккаунты с действующим мягким **high-rate
|
||||
флагом**. Аккаунт, устойчиво превышающий настраиваемый порог отказов, помечается
|
||||
автоматически — маркер обратим, виден бейджем в списке пользователей и на карточке
|
||||
аккаунта и **никогда не блокирует игру**; оператор рассматривает и снимает его с
|
||||
карточки пользователя. Автоматического бана нет.
|
||||
|
||||
+14
-2
@@ -76,7 +76,14 @@ tests or touching CI.
|
||||
unsubscribe), the transcode round-trips (FlatBuffers↔JSON, X-User-ID
|
||||
forwarding, nested GameView, domain-code surfacing), the admin Basic-Auth
|
||||
reverse proxy (401 / forward), and a full Connect `Execute` path end to end
|
||||
(guest auth, unauthenticated rejection, unknown message type). The backend gains
|
||||
(guest auth, unauthenticated rejection, unknown message type). **R3** adds the
|
||||
edge-hardening cases: an oversized `Execute` payload is refused
|
||||
(`resource_exhausted`, the `GATEWAY_MAX_BODY_BYTES` cap), a limiter rejection
|
||||
lands in `gateway_rate_limited_total{class}` and the rejection tracker
|
||||
(drain/aggregate unit tests), the report POST reaches
|
||||
`/api/v1/internal/ratelimit/report` with the agreed JSON shape, the `/_gm`
|
||||
mount is 429-guarded by the per-IP admin class, and the gateway's `/`
|
||||
308-redirects to `/app/` (the landing left the embed). The backend gains
|
||||
the **guest** lifecycle (a guest plays an auto-match to a natural end yet accrues
|
||||
no statistics) and the **email-as-login** flow (request/verify, returning user)
|
||||
in `inttest`. Stage 8 adds gateway transcode round-trips for the new social/account
|
||||
@@ -92,7 +99,12 @@ tests or touching CI.
|
||||
404 when not). Postgres-backed `inttest` drives the **complaint resolution →
|
||||
dictionary-change pipeline** (file → resolve with a disposition → pending change → mark
|
||||
applied), the admin **list/count** read queries, and the **/_gm console over HTTP**
|
||||
(pages render; a resolve POST needs a same-origin header).
|
||||
(pages render; a resolve POST needs a same-origin header). **R3** adds `ratewatch`
|
||||
unit tests (window accumulation, the auto-flag threshold + expiry, the bounded
|
||||
episode map), the account-store **high-rate flag round-trip** (set-once / clear /
|
||||
re-flag) and a console flow in `inttest`: a gateway report auto-flags the account,
|
||||
the **Throttled** page shows the episode and the flagged queue, the user card
|
||||
carries the marker and the CSRF-guarded **Clear** reverses it.
|
||||
- **Observability & performance** *(Stage 12)* — `pkg/telemetry` unit-tests the exporter
|
||||
selection (`none`/`stdout`/`otlp` build providers; OTLP constructs with no collector;
|
||||
the nil-runtime fallback). The domain metrics are exercised through a manual
|
||||
|
||||
+22
-9
@@ -1,15 +1,18 @@
|
||||
# Multi-stage build for the gateway service. A node stage builds the static UI
|
||||
# (Vite), the result is embedded into the Go binary (gateway/internal/webui/dist),
|
||||
# and the Go stage — mirroring platform/telegram/Dockerfile — yields a static
|
||||
# binary shipped on distroless nonroot. So the single binary serves the SPA at /
|
||||
# and /telegram/ (docs/ARCHITECTURE.md §13) with no separate static container.
|
||||
# Multi-stage build for the gateway service and the landing image. A node stage
|
||||
# builds the static UI (Vite) once; the `landing` target serves that build from a
|
||||
# static caddy container (the public landing at /, R3), while the final gateway
|
||||
# target embeds it — minus landing.html — into the Go binary
|
||||
# (gateway/internal/webui/dist), which serves the game SPA at /app/ and
|
||||
# /telegram/ (docs/ARCHITECTURE.md §13). The Go stage mirrors
|
||||
# platform/telegram/Dockerfile and ships on distroless nonroot.
|
||||
#
|
||||
# The production UI build vars are image build-args, baked into the bundle.
|
||||
# Build from the repository root so go.work, pkg/, gateway/ and ui/ are all in the
|
||||
# Docker context:
|
||||
# Build from the repository root so go.work, pkg/, gateway/, ui/ and
|
||||
# deploy/landing/ are all in the Docker context:
|
||||
# docker build -f gateway/Dockerfile \
|
||||
# --build-arg VITE_GATEWAY_URL=https://example \
|
||||
# -t scrabble-gateway .
|
||||
# docker build -f gateway/Dockerfile --target landing -t scrabble-landing .
|
||||
|
||||
# --- UI build ----------------------------------------------------------------
|
||||
FROM node:22-alpine AS ui
|
||||
@@ -38,6 +41,14 @@ RUN pnpm install --frozen-lockfile
|
||||
COPY ui ./
|
||||
RUN pnpm build
|
||||
|
||||
# --- landing -------------------------------------------------------------------
|
||||
# The public landing page as its own static container (R3): the same Vite build
|
||||
# served by caddy at /, so stray public traffic is absorbed by static file
|
||||
# serving and never reaches the Go edge.
|
||||
FROM caddy:2-alpine AS landing
|
||||
COPY deploy/landing/Caddyfile /etc/caddy/Caddyfile
|
||||
COPY --from=ui /ui/dist /srv
|
||||
|
||||
# --- Go build ----------------------------------------------------------------
|
||||
FROM golang:1.26.3-alpine AS build
|
||||
WORKDIR /src
|
||||
@@ -46,9 +57,11 @@ COPY pkg ./pkg
|
||||
COPY gateway ./gateway
|
||||
|
||||
# Replace the committed placeholder with the freshly built UI before compiling, so
|
||||
# go:embed bakes the real bundle into the binary.
|
||||
# go:embed bakes the real bundle into the binary. The landing shell ships in the
|
||||
# landing image, not in the gateway (R3).
|
||||
RUN rm -rf gateway/internal/webui/dist
|
||||
COPY --from=ui /ui/dist gateway/internal/webui/dist
|
||||
RUN rm gateway/internal/webui/dist/landing.html
|
||||
|
||||
# Reduce the workspace to what the gateway needs: gateway + pkg (loadtest is not in
|
||||
# this context; its scrabble/gateway replace targets ./gateway, which is present here).
|
||||
@@ -56,6 +69,6 @@ RUN go work edit -dropuse=./backend -dropuse=./platform/telegram -dropuse=./load
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -trimpath -o /out/gateway ./gateway/cmd/gateway
|
||||
|
||||
# --- runtime -----------------------------------------------------------------
|
||||
FROM gcr.io/distroless/static-debian12:nonroot
|
||||
FROM gcr.io/distroless/static-debian12:nonroot AS gateway
|
||||
COPY --from=build /out/gateway /usr/local/bin/gateway
|
||||
ENTRYPOINT ["/usr/local/bin/gateway"]
|
||||
|
||||
+11
-2
@@ -23,7 +23,7 @@ proto/edge/v1/ # Connect envelope contract (committed generated Go)
|
||||
internal/config/ # GATEWAY_* env config
|
||||
internal/backendclient/ # typed REST client (+ X-User-ID) and push gRPC client
|
||||
internal/session/ # in-memory session cache (LRU/TTL, backend fallback)
|
||||
internal/ratelimit/ # token-bucket limiter (golang.org/x/time/rate)
|
||||
internal/ratelimit/ # token-bucket limiter (golang.org/x/time/rate) + the rejection tracker (R3)
|
||||
internal/connector/ # gRPC client to the Telegram connector (initData validate, out-of-app push) + routing
|
||||
internal/push/ # live-event fan-out hub (per-user client streams)
|
||||
internal/transcode/ # FlatBuffers<->REST bridge + message_type registry
|
||||
@@ -79,12 +79,21 @@ connector (`ValidateLoginWidget`) and forward the trusted `external_id`. These
|
||||
| `GATEWAY_SESSION_TTL` | `10m` | cached session lifetime |
|
||||
| `GATEWAY_SESSION_CACHE_MAX` | `50000` | cached session cap |
|
||||
| `GATEWAY_PUSH_HEARTBEAT_INTERVAL` | `10s` | live-stream keep-alive (an immediate heartbeat also fires on open, under the ~15s edge idle timeout) |
|
||||
| `GATEWAY_MAX_BODY_BYTES` | `1048576` | caps one request body and one Connect message read; an oversized Execute is refused with `resource_exhausted` (R3) |
|
||||
| `GATEWAY_SERVICE_NAME` | `scrabble-gateway` | OpenTelemetry `service.name` |
|
||||
| `GATEWAY_OTEL_TRACES_EXPORTER` | `none` | `none`, `stdout` or `otlp` (gRPC; endpoint from `OTEL_EXPORTER_OTLP_*`) |
|
||||
| `GATEWAY_OTEL_METRICS_EXPORTER` | `none` | `none`, `stdout` or `otlp` |
|
||||
|
||||
Rate-limit defaults (built-in): public 30/min·IP (burst 10), authenticated
|
||||
120/min·user (burst 40), admin 60/min·IP (burst 20), email-code 5/10 min·IP.
|
||||
300/min·user (burst 80, raised in Stage 17 for multi-device play), admin
|
||||
60/min·IP (burst 20, guarding the `/_gm` mount ahead of its Basic-Auth),
|
||||
email-code 5/10 min·IP (burst 2).
|
||||
|
||||
Every rejection increments `gateway_rate_limited_total{class}`
|
||||
(`user`/`public`/`email`/`admin`) and logs one Debug line; a reporter drains the
|
||||
per-key rejection tracker every 30 s, emits a Warn summary per throttled key and
|
||||
posts the report to the backend (`/api/v1/internal/ratelimit/report`), feeding
|
||||
the admin console's throttled view and the high-rate auto-flag (R3).
|
||||
|
||||
## Run
|
||||
|
||||
|
||||
+54
-10
@@ -39,6 +39,14 @@ const (
|
||||
pushReconnectDelay = 2 * time.Second
|
||||
// gatewayID identifies this gateway instance to the backend push channel.
|
||||
gatewayID = "gateway"
|
||||
// readHeaderTimeout bounds reading one request's headers on the public
|
||||
// listener (a slowloris guard). Bodies and long-lived streams are governed by
|
||||
// the h2c settings in connectsrv — Read/WriteTimeout stay unset on purpose,
|
||||
// they would kill the Subscribe stream (R3).
|
||||
readHeaderTimeout = 10 * time.Second
|
||||
// throttleReportInterval is the cadence of the rate-limiter rejection
|
||||
// summary: the Warn log per throttled key and the report to the backend (R3).
|
||||
throttleReportInterval = 30 * time.Second
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -89,6 +97,7 @@ func run(ctx context.Context, cfg config.Config, logger *zap.Logger) error {
|
||||
|
||||
sessions := session.NewCache(backend, cfg.SessionTTL, cfg.SessionCacheMax)
|
||||
limiter := ratelimit.New()
|
||||
tracker := ratelimit.NewTracker()
|
||||
hub := push.NewHub(0)
|
||||
|
||||
var conn *connector.Client
|
||||
@@ -119,22 +128,26 @@ func run(ctx context.Context, cfg config.Config, logger *zap.Logger) error {
|
||||
|
||||
registry := transcode.NewRegistry(backend, validator, cfg.DefaultSupportedLanguages...)
|
||||
edge := connectsrv.NewServer(connectsrv.Deps{
|
||||
Registry: registry,
|
||||
Sessions: sessions,
|
||||
Limiter: limiter,
|
||||
Hub: hub,
|
||||
RateLimit: cfg.RateLimit,
|
||||
Heartbeat: cfg.PushHeartbeatInterval,
|
||||
Logger: logger,
|
||||
AdminProxy: adminProxy,
|
||||
Meter: tel.MeterProvider().Meter("scrabble/gateway/edge"),
|
||||
Registry: registry,
|
||||
Sessions: sessions,
|
||||
Limiter: limiter,
|
||||
Tracker: tracker,
|
||||
Hub: hub,
|
||||
RateLimit: cfg.RateLimit,
|
||||
Heartbeat: cfg.PushHeartbeatInterval,
|
||||
Logger: logger,
|
||||
AdminProxy: adminProxy,
|
||||
Meter: tel.MeterProvider().Meter("scrabble/gateway/edge"),
|
||||
MaxBodyBytes: cfg.MaxBodyBytes,
|
||||
})
|
||||
|
||||
// Bridge the backend push stream into the fan-out hub (and the out-of-app
|
||||
// channel via the connector).
|
||||
go runPushPump(ctx, backend, hub, conn, logger)
|
||||
// Periodically summarise rate-limiter rejections (Warn log + backend report).
|
||||
go runThrottleReporter(ctx, tracker, backend, logger)
|
||||
|
||||
public := &http.Server{Addr: cfg.HTTPAddr, Handler: edge.HTTPHandler()}
|
||||
public := &http.Server{Addr: cfg.HTTPAddr, Handler: edge.HTTPHandler(), ReadHeaderTimeout: readHeaderTimeout}
|
||||
servers := []*namedServer{{name: "public", srv: public}}
|
||||
|
||||
logger.Info("gateway starting",
|
||||
@@ -182,6 +195,37 @@ func runServers(ctx context.Context, cancel context.CancelFunc, servers []*named
|
||||
return first
|
||||
}
|
||||
|
||||
// runThrottleReporter drains the rate-limiter rejection tracker on a fixed
|
||||
// cadence, emits one Warn summary per throttled key and forwards the report to
|
||||
// the backend (which feeds the admin throttled view and the high-rate
|
||||
// auto-flag), until the context is done. A failed delivery is logged and
|
||||
// dropped — the next window reports fresh data anyway.
|
||||
func runThrottleReporter(ctx context.Context, tracker *ratelimit.Tracker, backend *backendclient.Client, logger *zap.Logger) {
|
||||
ticker := time.NewTicker(throttleReportInterval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
}
|
||||
entries := tracker.Drain()
|
||||
if len(entries) == 0 {
|
||||
continue
|
||||
}
|
||||
for _, e := range entries {
|
||||
logger.Warn("rate limited",
|
||||
zap.String("class", e.Class),
|
||||
zap.String("key", e.Key),
|
||||
zap.Int("rejected", e.Rejected),
|
||||
zap.Duration("window", throttleReportInterval))
|
||||
}
|
||||
if err := backend.ReportRateLimited(ctx, int(throttleReportInterval.Seconds()), entries); err != nil {
|
||||
logger.Warn("rate-limit report failed", zap.Error(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// runPushPump keeps a backend push subscription open, forwarding every event to
|
||||
// the hub and re-subscribing after the stream ends, until the context is done. For
|
||||
// the out-of-app push kinds it also routes events whose recipient has no live
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
|
||||
"scrabble/gateway/internal/ratelimit"
|
||||
pushv1 "scrabble/pkg/proto/push/v1"
|
||||
)
|
||||
|
||||
@@ -124,3 +125,15 @@ func parseAPIError(status int, data []byte) *APIError {
|
||||
func (c *Client) SubscribePush(ctx context.Context, gatewayID string) (grpc.ServerStreamingClient[pushv1.Event], error) {
|
||||
return c.push.Subscribe(ctx, &pushv1.SubscribeRequest{GatewayId: gatewayID})
|
||||
}
|
||||
|
||||
// ReportRateLimited posts the gateway's periodic rate-limiter rejection summary
|
||||
// to the backend, which feeds the admin console's throttled view and the
|
||||
// high-rate auto-flag. The endpoint carries no user identity: like
|
||||
// sessions/resolve it rides the trusted internal segment (R3).
|
||||
func (c *Client) ReportRateLimited(ctx context.Context, windowSeconds int, entries []ratelimit.Rejection) error {
|
||||
body := struct {
|
||||
WindowSeconds int `json:"window_seconds"`
|
||||
Entries []ratelimit.Rejection `json:"entries"`
|
||||
}{WindowSeconds: windowSeconds, Entries: entries}
|
||||
return c.do(ctx, http.MethodPost, "/api/v1/internal/ratelimit/report", "", "", body, nil)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
package backendclient_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"scrabble/gateway/internal/backendclient"
|
||||
"scrabble/gateway/internal/ratelimit"
|
||||
)
|
||||
|
||||
// TestReportRateLimited verifies the rejection report reaches the backend's
|
||||
// internal endpoint with the agreed JSON shape and no user identity.
|
||||
func TestReportRateLimited(t *testing.T) {
|
||||
var got struct {
|
||||
WindowSeconds int `json:"window_seconds"`
|
||||
Entries []ratelimit.Rejection `json:"entries"`
|
||||
}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost || r.URL.Path != "/api/v1/internal/ratelimit/report" {
|
||||
t.Errorf("call = %s %s, want POST /api/v1/internal/ratelimit/report", r.Method, r.URL.Path)
|
||||
}
|
||||
if uid := r.Header.Get("X-User-ID"); uid != "" {
|
||||
t.Errorf("X-User-ID = %q, want empty", uid)
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&got); err != nil {
|
||||
t.Errorf("decode report: %v", err)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c, err := backendclient.New(srv.URL, "localhost:9090", 2*time.Second)
|
||||
if err != nil {
|
||||
t.Fatalf("backendclient: %v", err)
|
||||
}
|
||||
defer func() { _ = c.Close() }()
|
||||
|
||||
entries := []ratelimit.Rejection{{Class: "user", Key: "u-1", Rejected: 5}}
|
||||
if err := c.ReportRateLimited(context.Background(), 30, entries); err != nil {
|
||||
t.Fatalf("ReportRateLimited: %v", err)
|
||||
}
|
||||
if got.WindowSeconds != 30 || len(got.Entries) != 1 || got.Entries[0] != entries[0] {
|
||||
t.Fatalf("backend received %+v, want window 30 + %+v", got, entries[0])
|
||||
}
|
||||
}
|
||||
@@ -44,6 +44,9 @@ type Config struct {
|
||||
SessionCacheMax int
|
||||
// PushHeartbeatInterval is the idle keep-alive cadence on a client live stream.
|
||||
PushHeartbeatInterval time.Duration
|
||||
// MaxBodyBytes caps one inbound request body on the public listener and one
|
||||
// Connect message read; oversized requests are refused without buffering.
|
||||
MaxBodyBytes int
|
||||
// RateLimit configures the in-memory anti-abuse limiter.
|
||||
RateLimit RateLimitConfig
|
||||
// Telemetry configures the OpenTelemetry providers (shared bootstrap).
|
||||
@@ -77,6 +80,11 @@ const (
|
||||
defaultServiceName = "scrabble-gateway"
|
||||
)
|
||||
|
||||
// DefaultMaxBodyBytes is the default request-body cap (GATEWAY_MAX_BODY_BYTES):
|
||||
// 1 MiB — far above any legitimate edge payload (drafts and chat are a few KB)
|
||||
// yet small enough to stop a cheap memory-amplification upload (R3).
|
||||
const DefaultMaxBodyBytes = 1 << 20
|
||||
|
||||
// supportedLanguages is the set of game languages a service may declare for the
|
||||
// New Game variant gating; defaultSupportedLanguages is the non-platform fallback.
|
||||
var (
|
||||
@@ -130,6 +138,9 @@ func Load() (Config, error) {
|
||||
if c.PushHeartbeatInterval, err = envDuration("GATEWAY_PUSH_HEARTBEAT_INTERVAL", defaultPushHeartbeatInterval); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
if c.MaxBodyBytes, err = envInt("GATEWAY_MAX_BODY_BYTES", DefaultMaxBodyBytes); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
if c.DefaultSupportedLanguages, err = envLanguages("GATEWAY_DEFAULT_SUPPORTED_LANGUAGES", defaultSupportedLanguages); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
@@ -161,6 +172,9 @@ func (c Config) validate() error {
|
||||
if c.BackendGRPCAddr == "" {
|
||||
return fmt.Errorf("config: GATEWAY_BACKEND_GRPC_ADDR must not be empty")
|
||||
}
|
||||
if c.MaxBodyBytes <= 0 {
|
||||
return fmt.Errorf("config: GATEWAY_MAX_BODY_BYTES must be positive")
|
||||
}
|
||||
if err := c.Telemetry.Validate(); err != nil {
|
||||
return fmt.Errorf("config: %w", err)
|
||||
}
|
||||
|
||||
@@ -29,3 +29,19 @@ func TestLoadRejectsUnsupportedExporter(t *testing.T) {
|
||||
t.Fatal("Load: expected an error for an unsupported exporter, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
// TestLoadMaxBodyBytes verifies the body-cap default and that a non-positive
|
||||
// override fails validation.
|
||||
func TestLoadMaxBodyBytes(t *testing.T) {
|
||||
c, err := Load()
|
||||
if err != nil {
|
||||
t.Fatalf("Load: %v", err)
|
||||
}
|
||||
if c.MaxBodyBytes != DefaultMaxBodyBytes {
|
||||
t.Errorf("MaxBodyBytes = %d, want %d", c.MaxBodyBytes, DefaultMaxBodyBytes)
|
||||
}
|
||||
t.Setenv("GATEWAY_MAX_BODY_BYTES", "0")
|
||||
if _, err := Load(); err == nil {
|
||||
t.Fatal("Load: expected an error for a non-positive body cap, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,8 +24,9 @@ var activeUserWindows = []struct {
|
||||
// serverMetrics holds the edge's operational instruments. It defaults to no-ops;
|
||||
// NewServer installs the real meter when one is supplied in Deps.
|
||||
type serverMetrics struct {
|
||||
edge metric.Float64Histogram
|
||||
active *activeUsers
|
||||
edge metric.Float64Histogram
|
||||
rateLimited metric.Int64Counter
|
||||
active *activeUsers
|
||||
}
|
||||
|
||||
// newServerMetrics builds the instruments on meter (nil selects a no-op meter),
|
||||
@@ -42,7 +43,12 @@ func newServerMetrics(meter metric.Meter) *serverMetrics {
|
||||
if err != nil {
|
||||
h, _ = noop.NewMeterProvider().Meter(meterName).Float64Histogram("edge_request_duration")
|
||||
}
|
||||
m := &serverMetrics{edge: h, active: newActiveUsers()}
|
||||
c, err := meter.Int64Counter("gateway_rate_limited_total",
|
||||
metric.WithDescription("Rate-limiter rejections at the edge, by limiter class (user, public, email or admin) — aggregate only, no per-user attributes."))
|
||||
if err != nil {
|
||||
c, _ = noop.NewMeterProvider().Meter(meterName).Int64Counter("gateway_rate_limited_total")
|
||||
}
|
||||
m := &serverMetrics{edge: h, rateLimited: c, active: newActiveUsers()}
|
||||
|
||||
gauge, err := meter.Int64ObservableGauge("active_users",
|
||||
metric.WithDescription("Distinct accounts that performed an authenticated action within the window (in-memory, single gateway instance)."))
|
||||
@@ -75,3 +81,8 @@ func (m *serverMetrics) recordEdge(ctx context.Context, msgType, result string,
|
||||
func (m *serverMetrics) recordActive(uid string) {
|
||||
m.active.seen(uid)
|
||||
}
|
||||
|
||||
// recordRateLimited counts one limiter rejection under class.
|
||||
func (m *serverMetrics) recordRateLimited(ctx context.Context, class string) {
|
||||
m.rateLimited.Add(ctx, 1, metric.WithAttributes(attribute.String("class", class)))
|
||||
}
|
||||
|
||||
@@ -52,3 +52,41 @@ func TestEdgeMetric(t *testing.T) {
|
||||
t.Errorf("edge auth.guest/domain = %d, want 1", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRateLimitedMetric records limiter rejections through a manual reader and
|
||||
// asserts gateway_rate_limited_total splits by class.
|
||||
func TestRateLimitedMetric(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
reader := sdkmetric.NewManualReader()
|
||||
meter := sdkmetric.NewMeterProvider(sdkmetric.WithReader(reader)).Meter("test")
|
||||
m := newServerMetrics(meter)
|
||||
|
||||
m.recordRateLimited(ctx, "user")
|
||||
m.recordRateLimited(ctx, "user")
|
||||
m.recordRateLimited(ctx, "public")
|
||||
|
||||
var rm metricdata.ResourceMetrics
|
||||
if err := reader.Collect(ctx, &rm); err != nil {
|
||||
t.Fatalf("collect: %v", err)
|
||||
}
|
||||
|
||||
counts := map[string]int64{}
|
||||
for _, sm := range rm.ScopeMetrics {
|
||||
for _, md := range sm.Metrics {
|
||||
if md.Name != "gateway_rate_limited_total" {
|
||||
continue
|
||||
}
|
||||
sum, ok := md.Data.(metricdata.Sum[int64])
|
||||
if !ok {
|
||||
t.Fatalf("gateway_rate_limited_total is not an int64 sum")
|
||||
}
|
||||
for _, dp := range sum.DataPoints {
|
||||
class, _ := dp.Attributes.Value(attribute.Key("class"))
|
||||
counts[class.AsString()] += dp.Value
|
||||
}
|
||||
}
|
||||
}
|
||||
if counts["user"] != 2 || counts["public"] != 1 {
|
||||
t.Errorf("rate_limited counts = %v, want user=2 public=1", counts)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ package connectsrv
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net"
|
||||
"net/http"
|
||||
"strings"
|
||||
@@ -19,6 +20,7 @@ import (
|
||||
"golang.org/x/net/http2"
|
||||
"golang.org/x/net/http2/h2c"
|
||||
|
||||
"scrabble/gateway/internal/backendclient"
|
||||
"scrabble/gateway/internal/config"
|
||||
"scrabble/gateway/internal/push"
|
||||
"scrabble/gateway/internal/ratelimit"
|
||||
@@ -32,33 +34,68 @@ import (
|
||||
// heartbeatKind is the live-stream keep-alive event kind.
|
||||
const heartbeatKind = "heartbeat"
|
||||
|
||||
// Limiter classes, the `class` attribute of gateway_rate_limited_total and the
|
||||
// class field of the periodic rejection report (R3).
|
||||
const (
|
||||
classUser = "user"
|
||||
classPublic = "public"
|
||||
classEmail = "email"
|
||||
classAdmin = "admin"
|
||||
)
|
||||
|
||||
// Explicit h2c server sizing (R3, after the R2 stress run questioned the
|
||||
// implicit defaults).
|
||||
const (
|
||||
// h2cMaxConcurrentStreams bounds the open streams per client connection — the
|
||||
// x/net default made explicit. A real client holds one Subscribe stream plus a
|
||||
// few unary calls; only a synthetic load multiplexing many players over one
|
||||
// transport approaches it. R7 revisits the sizing.
|
||||
h2cMaxConcurrentStreams = 250
|
||||
// h2cIdleTimeout closes a connection with no open streams. A live Subscribe
|
||||
// stream keeps its connection active, so long-lived clients are unaffected;
|
||||
// only abandoned connections are reaped.
|
||||
h2cIdleTimeout = 3 * time.Minute
|
||||
)
|
||||
|
||||
// Server implements edgev1connect.GatewayHandler.
|
||||
type Server struct {
|
||||
registry *transcode.Registry
|
||||
sessions *session.Cache
|
||||
limiter *ratelimit.Limiter
|
||||
tracker *ratelimit.Tracker
|
||||
hub *push.Hub
|
||||
heartbeat time.Duration
|
||||
log *zap.Logger
|
||||
adminProxy http.Handler
|
||||
metrics *serverMetrics
|
||||
|
||||
maxBodyBytes int
|
||||
|
||||
publicPolicy ratelimit.Policy
|
||||
userPolicy ratelimit.Policy
|
||||
emailPolicy ratelimit.Policy
|
||||
adminPolicy ratelimit.Policy
|
||||
}
|
||||
|
||||
// Deps carries the Server's dependencies.
|
||||
// Deps carries the Server's dependencies. A nil Limiter, nil Tracker, zero
|
||||
// RateLimit and non-positive MaxBodyBytes each select a safe default.
|
||||
type Deps struct {
|
||||
Registry *transcode.Registry
|
||||
Sessions *session.Cache
|
||||
Limiter *ratelimit.Limiter
|
||||
Registry *transcode.Registry
|
||||
Sessions *session.Cache
|
||||
Limiter *ratelimit.Limiter
|
||||
// Tracker accumulates limiter rejections for the periodic report; nil
|
||||
// selects a private tracker (rejections are then only counted, never
|
||||
// reported).
|
||||
Tracker *ratelimit.Tracker
|
||||
Hub *push.Hub
|
||||
RateLimit config.RateLimitConfig
|
||||
Heartbeat time.Duration
|
||||
Logger *zap.Logger
|
||||
AdminProxy http.Handler
|
||||
Meter metric.Meter
|
||||
// MaxBodyBytes caps one inbound request body and one Connect message read;
|
||||
// zero or negative selects config.DefaultMaxBodyBytes.
|
||||
MaxBodyBytes int
|
||||
}
|
||||
|
||||
// NewServer constructs the edge service.
|
||||
@@ -67,47 +104,85 @@ func NewServer(d Deps) *Server {
|
||||
if log == nil {
|
||||
log = zap.NewNop()
|
||||
}
|
||||
maxBody := d.MaxBodyBytes
|
||||
if maxBody <= 0 {
|
||||
maxBody = config.DefaultMaxBodyBytes
|
||||
}
|
||||
tracker := d.Tracker
|
||||
if tracker == nil {
|
||||
tracker = ratelimit.NewTracker()
|
||||
}
|
||||
limiter := d.Limiter
|
||||
if limiter == nil {
|
||||
limiter = ratelimit.New()
|
||||
}
|
||||
rl := d.RateLimit
|
||||
if rl == (config.RateLimitConfig{}) {
|
||||
rl = config.DefaultRateLimit()
|
||||
}
|
||||
return &Server{
|
||||
registry: d.Registry,
|
||||
sessions: d.Sessions,
|
||||
limiter: d.Limiter,
|
||||
limiter: limiter,
|
||||
tracker: tracker,
|
||||
hub: d.Hub,
|
||||
heartbeat: d.Heartbeat,
|
||||
log: log,
|
||||
adminProxy: d.AdminProxy,
|
||||
metrics: newServerMetrics(d.Meter),
|
||||
publicPolicy: ratelimit.PerMinute(d.RateLimit.PublicPerMinute, d.RateLimit.PublicBurst),
|
||||
userPolicy: ratelimit.PerMinute(d.RateLimit.UserPerMinute, d.RateLimit.UserBurst),
|
||||
emailPolicy: ratelimit.Per(d.RateLimit.EmailPer10Min, 10*time.Minute, d.RateLimit.EmailBurst),
|
||||
maxBodyBytes: maxBody,
|
||||
publicPolicy: ratelimit.PerMinute(rl.PublicPerMinute, rl.PublicBurst),
|
||||
userPolicy: ratelimit.PerMinute(rl.UserPerMinute, rl.UserBurst),
|
||||
emailPolicy: ratelimit.Per(rl.EmailPer10Min, 10*time.Minute, rl.EmailBurst),
|
||||
adminPolicy: ratelimit.PerMinute(rl.AdminPerMinute, rl.AdminBurst),
|
||||
}
|
||||
}
|
||||
|
||||
// HTTPHandler returns the h2c-wrapped Connect handler ready to serve.
|
||||
func (s *Server) HTTPHandler() http.Handler {
|
||||
mux := http.NewServeMux()
|
||||
path, h := edgev1connect.NewGatewayHandler(s)
|
||||
// The Connect read cap mirrors the HTTP-level body cap below; an oversized
|
||||
// Execute message is refused (resource_exhausted) instead of buffered.
|
||||
path, h := edgev1connect.NewGatewayHandler(s, connect.WithReadMaxBytes(s.maxBodyBytes))
|
||||
mux.Handle(path, h)
|
||||
if s.adminProxy != nil {
|
||||
// The admin console (backend /_gm) is served on the public listener behind
|
||||
// the proxy's Basic-Auth, mounted below the h2c wrap so the Connect edge keeps
|
||||
// working over h2c (docs/ARCHITECTURE.md §12). In the deployed contour the
|
||||
// front caddy owns the /_gm Basic-Auth and Grafana routing; this mount serves
|
||||
// a non-caddy (local) setup.
|
||||
mux.Handle("/_gm/", s.adminProxy)
|
||||
// a non-caddy (local) setup. The per-IP admin limiter class guards it —
|
||||
// notably a Basic-Auth brute force (R3).
|
||||
mux.Handle("/_gm/", s.limitAdmin(s.adminProxy))
|
||||
} else {
|
||||
// With the console disabled here, keep /_gm a 404 so the SPA catch-all below
|
||||
// does not serve the app shell at the operator path.
|
||||
mux.Handle("/_gm/", http.NotFoundHandler())
|
||||
}
|
||||
// The embedded UI: the game SPA under /app/ (web) and /telegram/ (the Telegram Mini
|
||||
// App), with a separate landing page at the catch-all "/" — the single-origin model
|
||||
// (docs/ARCHITECTURE.md §13). All sit below the h2c wrap so the Connect edge (a more
|
||||
// specific prefix) keeps priority. Each SPA mount falls back to the app shell
|
||||
// (index.html) for the hash router; "/" falls back to the landing (landing.html).
|
||||
// The embedded UI: the game SPA under /app/ (web) and /telegram/ (the Telegram
|
||||
// Mini App) — the single-origin model (docs/ARCHITECTURE.md §13). Both sit below
|
||||
// the h2c wrap so the Connect edge (a more specific prefix) keeps priority, and
|
||||
// each mount falls back to the app shell (index.html) for the hash router. The
|
||||
// public landing moved to its own static container behind the contour caddy
|
||||
// (R3), so the catch-all redirects a stray root hit to the app shell — which
|
||||
// keeps a local no-caddy run usable.
|
||||
mux.Handle("/telegram/", webui.Handler("/telegram/", "index.html"))
|
||||
mux.Handle("/app/", webui.Handler("/app/", "index.html"))
|
||||
mux.Handle("/", webui.Handler("", "landing.html"))
|
||||
return h2c.NewHandler(mux, &http2.Server{})
|
||||
mux.Handle("/", http.RedirectHandler("/app/", http.StatusPermanentRedirect))
|
||||
// Every request body on the public listener is capped (the admin proxy POSTs
|
||||
// included); the h2c server carries explicit stream/idle sizing (R3).
|
||||
return h2c.NewHandler(maxBodyHandler(s.maxBodyBytes, mux), &http2.Server{
|
||||
MaxConcurrentStreams: h2cMaxConcurrentStreams,
|
||||
IdleTimeout: h2cIdleTimeout,
|
||||
})
|
||||
}
|
||||
|
||||
// maxBodyHandler caps every inbound request body at limit bytes: a read past the
|
||||
// cap fails with *http.MaxBytesError and the connection is marked to close.
|
||||
func maxBodyHandler(limit int, next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
r.Body = http.MaxBytesReader(w, r.Body, int64(limit))
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
// Execute runs one unary operation. Domain failures are returned in the envelope
|
||||
@@ -138,17 +213,17 @@ func (s *Server) Execute(ctx context.Context, req *connect.Request[edgev1.Execut
|
||||
s.metrics.recordActive(uid)
|
||||
if !s.limiter.Allow("user:"+uid, s.userPolicy) {
|
||||
result = "rate_limited"
|
||||
return nil, connect.NewError(connect.CodeResourceExhausted, errRateLimited)
|
||||
return nil, s.rejectRateLimited(ctx, classUser, uid, msgType)
|
||||
}
|
||||
tr.UserID = uid
|
||||
} else {
|
||||
if !s.limiter.Allow("ip:"+clientIP, s.publicPolicy) {
|
||||
result = "rate_limited"
|
||||
return nil, connect.NewError(connect.CodeResourceExhausted, errRateLimited)
|
||||
return nil, s.rejectRateLimited(ctx, classPublic, clientIP, msgType)
|
||||
}
|
||||
if op.Email && !s.limiter.Allow("email:"+clientIP, s.emailPolicy) {
|
||||
result = "rate_limited"
|
||||
return nil, connect.NewError(connect.CodeResourceExhausted, errRateLimited)
|
||||
return nil, s.rejectRateLimited(ctx, classEmail, clientIP, msgType)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -180,7 +255,7 @@ func (s *Server) Subscribe(ctx context.Context, req *connect.Request[edgev1.Subs
|
||||
return err
|
||||
}
|
||||
if !s.limiter.Allow("user:"+uid, s.userPolicy) {
|
||||
return connect.NewError(connect.CodeResourceExhausted, errRateLimited)
|
||||
return s.rejectRateLimited(ctx, classUser, uid, "subscribe")
|
||||
}
|
||||
|
||||
events, cancel := s.hub.Subscribe(uid)
|
||||
@@ -216,6 +291,43 @@ func (s *Server) Subscribe(ctx context.Context, req *connect.Request[edgev1.Subs
|
||||
}
|
||||
}
|
||||
|
||||
// noteRateLimited accounts one limiter rejection: the aggregate counter, the
|
||||
// per-rejection Debug line and the periodic-report tracker. The operational
|
||||
// signal is the reporter's Warn summary; per-rejection logging stays at Debug so
|
||||
// a rejection flood cannot flood the log (R3).
|
||||
func (s *Server) noteRateLimited(ctx context.Context, class, key, msgType string) {
|
||||
s.metrics.recordRateLimited(ctx, class)
|
||||
s.tracker.Add(class, key)
|
||||
s.log.Debug("rate limited",
|
||||
zap.String("class", class),
|
||||
zap.String("key", key),
|
||||
zap.String("message_type", msgType))
|
||||
}
|
||||
|
||||
// rejectRateLimited accounts one limiter rejection and returns the Connect error
|
||||
// for the caller.
|
||||
func (s *Server) rejectRateLimited(ctx context.Context, class, key, msgType string) error {
|
||||
s.noteRateLimited(ctx, class, key, msgType)
|
||||
return connect.NewError(connect.CodeResourceExhausted, errRateLimited)
|
||||
}
|
||||
|
||||
// limitAdmin guards the admin proxy with the per-IP admin limiter class, ahead
|
||||
// of its Basic-Auth check (a credential brute force is exactly what it bounds).
|
||||
// It covers the gateway-fronted /_gm mount; in the deployed contour /_gm reaches
|
||||
// the backend through caddy, whose Basic-Auth has no limiter (stock caddy) — see
|
||||
// docs/ARCHITECTURE.md §12 (R3).
|
||||
func (s *Server) limitAdmin(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
ip := peerIP(r.RemoteAddr, r.Header)
|
||||
if !s.limiter.Allow("admin:"+ip, s.adminPolicy) {
|
||||
s.noteRateLimited(r.Context(), classAdmin, ip, "admin")
|
||||
http.Error(w, "rate limited", http.StatusTooManyRequests)
|
||||
return
|
||||
}
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
// resolve extracts and resolves the Authorization bearer token to an account id,
|
||||
// returning a Connect Unauthenticated error when it is missing or unknown.
|
||||
func (s *Server) resolve(ctx context.Context, h http.Header) (string, error) {
|
||||
@@ -225,6 +337,15 @@ func (s *Server) resolve(ctx context.Context, h http.Header) (string, error) {
|
||||
}
|
||||
uid, err := s.sessions.Resolve(ctx, token)
|
||||
if err != nil {
|
||||
// An unknown or expired token (a backend 4xx) is the client's problem and
|
||||
// stays silent; anything else — a resolve timeout, a refused connection, a
|
||||
// backend 5xx — is an infra failure misread as "unauthenticated" by the
|
||||
// client, so surface the cause (the transient resolves seen under load in
|
||||
// the R2 stress run). The token itself is never logged.
|
||||
var apiErr *backendclient.APIError
|
||||
if !errors.As(err, &apiErr) || apiErr.Status >= http.StatusInternalServerError {
|
||||
s.log.Warn("session resolve failed", zap.Error(err))
|
||||
}
|
||||
return "", connect.NewError(connect.CodeUnauthenticated, errInvalidSession)
|
||||
}
|
||||
return uid, nil
|
||||
@@ -247,10 +368,8 @@ func bearerToken(header string) string {
|
||||
// peer address (host part).
|
||||
func peerIP(peerAddr string, h http.Header) string {
|
||||
if xff := h.Get("X-Forwarded-For"); xff != "" {
|
||||
if i := strings.IndexByte(xff, ','); i >= 0 {
|
||||
return strings.TrimSpace(xff[:i])
|
||||
}
|
||||
return strings.TrimSpace(xff)
|
||||
first, _, _ := strings.Cut(xff, ",")
|
||||
return strings.TrimSpace(first)
|
||||
}
|
||||
if host, _, err := net.SplitHostPort(peerAddr); err == nil {
|
||||
return host
|
||||
|
||||
@@ -83,6 +83,140 @@ func TestExecuteAuthedRequiresSession(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestExecuteRateLimitedTracked verifies a limiter rejection returns
|
||||
// ResourceExhausted and lands in the rejection tracker under the public class,
|
||||
// keyed by the client IP (R3).
|
||||
func TestExecuteRateLimitedTracked(t *testing.T) {
|
||||
backendSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
_, _ = w.Write([]byte(`{"token":"tok","user_id":"u-1","is_guest":true,"display_name":"Guest"}`))
|
||||
}))
|
||||
defer backendSrv.Close()
|
||||
backend, err := backendclient.New(backendSrv.URL, "localhost:9090", 2*time.Second)
|
||||
if err != nil {
|
||||
t.Fatalf("backendclient: %v", err)
|
||||
}
|
||||
defer func() { _ = backend.Close() }()
|
||||
|
||||
limits := config.DefaultRateLimit()
|
||||
limits.PublicPerMinute, limits.PublicBurst = 1, 1
|
||||
tracker := ratelimit.NewTracker()
|
||||
edge := connectsrv.NewServer(connectsrv.Deps{
|
||||
Registry: transcode.NewRegistry(backend, nil),
|
||||
Sessions: session.NewCache(backend, time.Minute, 100),
|
||||
Limiter: ratelimit.New(),
|
||||
Tracker: tracker,
|
||||
Hub: push.NewHub(0),
|
||||
RateLimit: limits,
|
||||
Heartbeat: 15 * time.Second,
|
||||
})
|
||||
edgeSrv := httptest.NewServer(edge.HTTPHandler())
|
||||
defer edgeSrv.Close()
|
||||
client := edgev1connect.NewGatewayClient(http.DefaultClient, edgeSrv.URL)
|
||||
|
||||
if _, err := client.Execute(context.Background(), connect.NewRequest(&edgev1.ExecuteRequest{
|
||||
MessageType: transcode.MsgAuthGuest,
|
||||
})); err != nil {
|
||||
t.Fatalf("first execute: %v", err)
|
||||
}
|
||||
_, err = client.Execute(context.Background(), connect.NewRequest(&edgev1.ExecuteRequest{
|
||||
MessageType: transcode.MsgAuthGuest,
|
||||
}))
|
||||
if connect.CodeOf(err) != connect.CodeResourceExhausted {
|
||||
t.Fatalf("code = %v, want ResourceExhausted", connect.CodeOf(err))
|
||||
}
|
||||
|
||||
entries := tracker.Drain()
|
||||
if len(entries) != 1 {
|
||||
t.Fatalf("tracker drained %d entries, want 1", len(entries))
|
||||
}
|
||||
if e := entries[0]; e.Class != "public" || e.Key != "127.0.0.1" || e.Rejected != 1 {
|
||||
t.Fatalf("tracked %+v, want public/127.0.0.1 rejected=1", e)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAdminMountRateLimited verifies the /_gm mount is guarded by the per-IP
|
||||
// admin limiter class ahead of the proxy's Basic-Auth (R3).
|
||||
func TestAdminMountRateLimited(t *testing.T) {
|
||||
backendSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}))
|
||||
defer backendSrv.Close()
|
||||
backend, err := backendclient.New(backendSrv.URL, "localhost:9090", 2*time.Second)
|
||||
if err != nil {
|
||||
t.Fatalf("backendclient: %v", err)
|
||||
}
|
||||
defer func() { _ = backend.Close() }()
|
||||
|
||||
limits := config.DefaultRateLimit()
|
||||
limits.AdminPerMinute, limits.AdminBurst = 1, 1
|
||||
edge := connectsrv.NewServer(connectsrv.Deps{
|
||||
Registry: transcode.NewRegistry(backend, nil),
|
||||
Sessions: session.NewCache(backend, time.Minute, 100),
|
||||
Limiter: ratelimit.New(),
|
||||
Hub: push.NewHub(0),
|
||||
RateLimit: limits,
|
||||
Heartbeat: 15 * time.Second,
|
||||
AdminProxy: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}),
|
||||
})
|
||||
edgeSrv := httptest.NewServer(edge.HTTPHandler())
|
||||
defer edgeSrv.Close()
|
||||
|
||||
first, err := http.Get(edgeSrv.URL + "/_gm/")
|
||||
if err != nil {
|
||||
t.Fatalf("first /_gm: %v", err)
|
||||
}
|
||||
_ = first.Body.Close()
|
||||
if first.StatusCode != http.StatusOK {
|
||||
t.Fatalf("first /_gm = %d, want 200", first.StatusCode)
|
||||
}
|
||||
second, err := http.Get(edgeSrv.URL + "/_gm/")
|
||||
if err != nil {
|
||||
t.Fatalf("second /_gm: %v", err)
|
||||
}
|
||||
_ = second.Body.Close()
|
||||
if second.StatusCode != http.StatusTooManyRequests {
|
||||
t.Fatalf("second /_gm = %d, want 429", second.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
// TestExecuteOversizedPayloadRejected verifies the request-body cap: an Execute
|
||||
// message above GATEWAY_MAX_BODY_BYTES is refused at the edge without reaching
|
||||
// the backend (R3).
|
||||
func TestExecuteOversizedPayloadRejected(t *testing.T) {
|
||||
client, cleanup := newEdge(t, func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Error("backend must not be called for an oversized payload")
|
||||
})
|
||||
defer cleanup()
|
||||
|
||||
_, err := client.Execute(context.Background(), connect.NewRequest(&edgev1.ExecuteRequest{
|
||||
MessageType: transcode.MsgAuthGuest,
|
||||
Payload: make([]byte, config.DefaultMaxBodyBytes+1),
|
||||
}))
|
||||
if connect.CodeOf(err) != connect.CodeResourceExhausted {
|
||||
t.Fatalf("code = %v, want ResourceExhausted", connect.CodeOf(err))
|
||||
}
|
||||
}
|
||||
|
||||
// TestRootRedirectsToApp verifies the gateway no longer serves a landing at "/"
|
||||
// (it lives in the landing container since R3): a stray root hit is redirected
|
||||
// to the app shell.
|
||||
func TestRootRedirectsToApp(t *testing.T) {
|
||||
front := httptest.NewServer(connectsrv.NewServer(connectsrv.Deps{}).HTTPHandler())
|
||||
defer front.Close()
|
||||
|
||||
client := &http.Client{CheckRedirect: func(*http.Request, []*http.Request) error {
|
||||
return http.ErrUseLastResponse
|
||||
}}
|
||||
resp, err := client.Get(front.URL + "/")
|
||||
if err != nil {
|
||||
t.Fatalf("get /: %v", err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != http.StatusPermanentRedirect || resp.Header.Get("Location") != "/app/" {
|
||||
t.Fatalf("GET / = %d -> %q, want 308 -> /app/", resp.StatusCode, resp.Header.Get("Location"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecuteUnknownMessageType(t *testing.T) {
|
||||
client, cleanup := newEdge(t, func(w http.ResponseWriter, r *http.Request) {})
|
||||
defer cleanup()
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
package ratelimit
|
||||
|
||||
import "sync"
|
||||
|
||||
// Rejection aggregates the limiter rejections of one key within one report
|
||||
// window. Class is the limiter class (user, public, email or admin); Key is the
|
||||
// class-specific subject — an account id for the user class, a client IP for the
|
||||
// others. The JSON shape is the gateway→backend rate-limit report wire contract.
|
||||
type Rejection struct {
|
||||
Class string `json:"class"`
|
||||
Key string `json:"key"`
|
||||
Rejected int `json:"rejected"`
|
||||
}
|
||||
|
||||
// Tracker accumulates limiter rejections between drains. The gateway's periodic
|
||||
// reporter drains it to emit the per-key log summary and the backend report; the
|
||||
// per-rejection cost is one map increment under a mutex, safe on the hot path.
|
||||
type Tracker struct {
|
||||
mu sync.Mutex
|
||||
m map[trackerKey]int
|
||||
}
|
||||
|
||||
type trackerKey struct{ class, key string }
|
||||
|
||||
// NewTracker constructs an empty Tracker.
|
||||
func NewTracker() *Tracker {
|
||||
return &Tracker{m: make(map[trackerKey]int)}
|
||||
}
|
||||
|
||||
// Add counts one rejection of key under class.
|
||||
func (t *Tracker) Add(class, key string) {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
t.m[trackerKey{class: class, key: key}]++
|
||||
}
|
||||
|
||||
// Drain returns the rejections accumulated since the previous drain, in
|
||||
// unspecified order, and resets the tracker. It returns nil when nothing was
|
||||
// rejected.
|
||||
func (t *Tracker) Drain() []Rejection {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
if len(t.m) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]Rejection, 0, len(t.m))
|
||||
for k, n := range t.m {
|
||||
out = append(out, Rejection{Class: k.class, Key: k.key, Rejected: n})
|
||||
}
|
||||
clear(t.m)
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
package ratelimit_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"scrabble/gateway/internal/ratelimit"
|
||||
)
|
||||
|
||||
// TestTrackerDrain verifies rejections aggregate per (class, key) and that a
|
||||
// drain resets the tracker.
|
||||
func TestTrackerDrain(t *testing.T) {
|
||||
tr := ratelimit.NewTracker()
|
||||
if got := tr.Drain(); got != nil {
|
||||
t.Fatalf("empty tracker drained %v, want nil", got)
|
||||
}
|
||||
|
||||
tr.Add("user", "u-1")
|
||||
tr.Add("user", "u-1")
|
||||
tr.Add("public", "10.0.0.1")
|
||||
|
||||
got := map[string]ratelimit.Rejection{}
|
||||
for _, r := range tr.Drain() {
|
||||
got[r.Class+"/"+r.Key] = r
|
||||
}
|
||||
if len(got) != 2 {
|
||||
t.Fatalf("drained %d entries, want 2", len(got))
|
||||
}
|
||||
if r := got["user/u-1"]; r.Rejected != 2 {
|
||||
t.Errorf("user/u-1 rejected = %d, want 2", r.Rejected)
|
||||
}
|
||||
if r := got["public/10.0.0.1"]; r.Rejected != 1 {
|
||||
t.Errorf("public/10.0.0.1 rejected = %d, want 1", r.Rejected)
|
||||
}
|
||||
|
||||
if got := tr.Drain(); got != nil {
|
||||
t.Fatalf("second drain = %v, want nil", got)
|
||||
}
|
||||
}
|
||||
-16
@@ -1,16 +0,0 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>Scrabble</title>
|
||||
</head>
|
||||
<body>
|
||||
<!-- scrabble-landing -->
|
||||
<p>
|
||||
Landing build placeholder. The production gateway image embeds the real Vite
|
||||
build (see gateway/Dockerfile); seeing this page means the binary was built
|
||||
without a UI build.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,12 +1,13 @@
|
||||
// Package webui serves the embedded static UI build over the public edge.
|
||||
//
|
||||
// The committed dist/ holds only placeholder index.html / landing.html so the gateway
|
||||
// module compiles with a plain `go build` (and in CI) without a UI build. The production
|
||||
// gateway image replaces dist/ with the real Vite build before compiling (see
|
||||
// gateway/Dockerfile), so the binary ships the UI inside it. Because Vite is built with a
|
||||
// relative asset base, one build serves under any path: the game SPA is mounted at /app/
|
||||
// (web) and /telegram/ (the Telegram Mini App), with a separate landing page at / — the
|
||||
// single-origin model in docs/ARCHITECTURE.md §13.
|
||||
// The committed dist/ holds only a placeholder index.html so the gateway module
|
||||
// compiles with a plain `go build` (and in CI) without a UI build. The production
|
||||
// gateway image replaces dist/ with the real Vite build — minus landing.html, which
|
||||
// ships in the separate landing container since R3 — before compiling (see
|
||||
// gateway/Dockerfile), so the binary ships the UI inside it. Because Vite is built
|
||||
// with a relative asset base, one build serves under any path: the game SPA is
|
||||
// mounted at /app/ (web) and /telegram/ (the Telegram Mini App) — the single-origin
|
||||
// model in docs/ARCHITECTURE.md §13.
|
||||
//
|
||||
// Caching (Stage 17): Vite emits hash-named files under assets/, so those are immutable and
|
||||
// cached hard (a reload/relaunch is a cache hit, not a re-download); the HTML shells carry
|
||||
@@ -35,10 +36,10 @@ func distFS() fs.FS {
|
||||
}
|
||||
|
||||
// Handler serves the embedded UI. An existing file is served directly (hash-named assets get
|
||||
// an immutable cache); every other path falls back to indexName (the SPA shell or the landing
|
||||
// page) so a client-side deep link still loads. When stripPrefix is non-empty it is removed
|
||||
// from the request path before lookup, so the same build serves under a sub-path (e.g.
|
||||
// "/app/" or "/telegram/").
|
||||
// an immutable cache); every other path falls back to indexName (the SPA shell) so a
|
||||
// client-side deep link still loads. When stripPrefix is non-empty it is removed from the
|
||||
// request path before lookup, so the same build serves under a sub-path (e.g. "/app/" or
|
||||
// "/telegram/").
|
||||
func Handler(stripPrefix, indexName string) http.Handler {
|
||||
content := distFS()
|
||||
files := http.FileServer(http.FS(content))
|
||||
|
||||
@@ -22,20 +22,12 @@ func body(t *testing.T, resp *http.Response) string {
|
||||
return string(b)
|
||||
}
|
||||
|
||||
// TestLandingMountServesLandingAndFallsBack: "/" serves the landing shell (no-cache) and
|
||||
// any unknown path falls back to it.
|
||||
func TestLandingMountServesLandingAndFallsBack(t *testing.T) {
|
||||
h := Handler("", "landing.html")
|
||||
|
||||
resp := get(t, h, "/")
|
||||
if resp.StatusCode != http.StatusOK || !strings.Contains(body(t, resp), "scrabble-landing") {
|
||||
t.Fatalf("GET / did not serve the landing shell (status %d)", resp.StatusCode)
|
||||
}
|
||||
if cc := get(t, h, "/").Header.Get("Cache-Control"); cc != "no-cache" {
|
||||
t.Errorf("landing Cache-Control = %q, want no-cache", cc)
|
||||
}
|
||||
if resp := get(t, h, "/whatever"); resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("GET /whatever status = %d, want 200 (fallback)", resp.StatusCode)
|
||||
// TestShellNoCache: the served HTML shell carries no-cache so a new deploy's
|
||||
// shell (and the asset URLs it references) is fetched fresh.
|
||||
func TestShellNoCache(t *testing.T) {
|
||||
h := Handler("/app/", "index.html")
|
||||
if cc := get(t, h, "/app/").Header.Get("Cache-Control"); cc != "no-cache" {
|
||||
t.Errorf("shell Cache-Control = %q, want no-cache", cc)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user