R2: load-test harness + contour resource observability
CI / changes (pull_request) Successful in 2s
CI / unit (pull_request) Successful in 9s
CI / integration (pull_request) Successful in 11s
CI / ui (pull_request) Successful in 38s
CI / gate (pull_request) Successful in 0s
CI / deploy (pull_request) Failing after 3s

New scrabble/loadtest module (the pre-release stress harness): seeds 1000 guest +
10000 durable accounts with pre-created sessions directly in Postgres (token hash
matches backend/internal/session), drives virtual players through the edge protocol
(real 2-4p games assembled via invitations, mid-ranked legal moves generated locally
by the embedded scrabble-solver — the edge carries no board, so the client replays
history), plus nudge/chat/check-word/draft/profile/stats and a gateway-hammer that
verifies the rate limiter. Prints a trip-report summary (per-op latency percentiles,
result codes, live-event tally). Go unit tests cover the pure pieces; the DAWG-backed
move test runs under BACKEND_DICT_DIR.

Contour: add cAdvisor + postgres_exporter + a 'Scrabble - Resources' Grafana
dashboard and the two Prometheus scrape jobs, for the R2/R7 stress-run resource
baseline.

CI: gate ./loadtest/... (path filter + vet/build/test). Docs: TESTING, ARCHITECTURE,
project CLAUDE repo layout.
This commit is contained in:
Ilia Denisov
2026-06-09 23:45:24 +02:00
parent bf3ee62711
commit aa137e3558
27 changed files with 2554 additions and 7 deletions
+170
View File
@@ -0,0 +1,170 @@
package scenario
import (
"context"
"fmt"
"math/rand"
"time"
"scrabble/loadtest/internal/edge"
"scrabble/loadtest/internal/moves"
"scrabble/loadtest/internal/seed"
)
// Game is one assembled match: its id, variant and members in seat order (Members[0]
// is the inviter, seat 0).
type Game struct {
ID string
Variant string
Members []seed.Account
}
// seatOf returns the seat index of accountID in the game, or -1.
func (g *Game) seatOf(accountID string) int {
for i, m := range g.Members {
if m.ID.String() == accountID {
return i
}
}
return -1
}
// assembleCohort forms games among a cohort of active players via the invitation
// flow, aiming for gamesPerPlayer (3-5) concurrent games per player with 2-4 players
// each. It returns the games it managed to start. Failures are logged and skipped so
// a partial assembly still drives load.
func (d *Driver) assembleCohort(ctx context.Context, cohort []seed.Account, gamesPerPlayer int, rng *rand.Rand) []*Game {
if len(cohort) < 2 {
return nil
}
gamesOf := make(map[string]int, len(cohort))
var games []*Game
for i := range cohort {
inviter := cohort[i]
target := 3 + rng.Intn(3) // 3..5
if gamesPerPlayer > 0 {
target = gamesPerPlayer
}
for gamesOf[inviter.ID.String()] < target {
members := pickMembers(cohort, inviter, rng)
if len(members) < 2 {
break
}
variant := moves.Variants()[rng.Intn(len(moves.Variants()))]
g, err := d.assemble(ctx, members, variant)
if err != nil {
d.log.Debug("assemble game", "err", err)
break
}
games = append(games, g)
for _, m := range members {
gamesOf[m.ID.String()]++
}
}
}
return games
}
// pickMembers builds a 2-4 player group led by inviter, drawing distinct others from
// the cohort at random.
func pickMembers(cohort []seed.Account, inviter seed.Account, rng *rand.Rand) []seed.Account {
size := 2 + rng.Intn(3) // 2..4
members := []seed.Account{inviter}
seen := map[string]bool{inviter.ID.String(): true}
for attempts := 0; len(members) < size && attempts < 4*size; attempts++ {
cand := cohort[rng.Intn(len(cohort))]
if seen[cand.ID.String()] {
continue
}
seen[cand.ID.String()] = true
members = append(members, cand)
}
return members
}
// assemble runs the invitation flow for one game: the inviter (members[0]) invites
// the rest, each invitee accepts the pending invitation, and the completing accept
// starts the game, which is then located in the inviter's game list.
func (d *Driver) assemble(ctx context.Context, members []seed.Account, variant string) (*Game, error) {
inviter := members[0]
inviteeIDs := make([]string, len(members)-1)
for i, m := range members[1:] {
inviteeIDs[i] = m.ID.String()
}
t0 := time.Now()
code, err := d.edge.CreateInvitation(ctx, inviter.Token, inviteeIDs, variant)
d.rec.Record("invitation.create", code, time.Since(t0))
if err != nil || code != "ok" {
return nil, fmt.Errorf("invitation.create: %s", code)
}
for _, invitee := range members[1:] {
t0 = time.Now()
list, lc, err := d.edge.ListInvitations(ctx, invitee.Token)
d.rec.Record("invitation.list", lc, time.Since(t0))
if err != nil || lc != "ok" {
return nil, fmt.Errorf("invitation.list: %s", lc)
}
invID := findPending(list, inviter.ID.String())
if invID == "" {
return nil, fmt.Errorf("no pending invitation from %s", inviter.ID)
}
t0 = time.Now()
ac, err := d.edge.AcceptInvitation(ctx, invitee.Token, invID)
d.rec.Record("invitation.accept", ac, time.Since(t0))
if err != nil || ac != "ok" {
return nil, fmt.Errorf("invitation.accept: %s", ac)
}
}
t0 = time.Now()
games, gc, err := d.edge.GamesList(ctx, inviter.Token)
d.rec.Record("games.list", gc, time.Since(t0))
if err != nil || gc != "ok" {
return nil, fmt.Errorf("games.list: %s", gc)
}
ids := make([]string, len(members))
for i, m := range members {
ids[i] = m.ID.String()
}
gameID := findGame(games, ids)
if gameID == "" {
return nil, fmt.Errorf("started game not found for %d members", len(members))
}
return &Game{ID: gameID, Variant: variant, Members: members}, nil
}
// findPending returns the id of a pending invitation from inviterID, or "".
func findPending(list []edge.Invitation, inviterID string) string {
for _, inv := range list {
if inv.InviterID == inviterID && inv.Status == "pending" {
return inv.ID
}
}
return ""
}
// findGame returns the id of the active game whose seat set equals memberIDs, or "".
func findGame(games []edge.Game, memberIDs []string) string {
want := make(map[string]bool, len(memberIDs))
for _, id := range memberIDs {
want[id] = true
}
for _, g := range games {
if !g.Active() || len(g.Seats) != len(memberIDs) {
continue
}
match := true
for _, s := range g.Seats {
if !want[s] {
match = false
break
}
}
if match {
return g.ID
}
}
return ""
}
+45
View File
@@ -0,0 +1,45 @@
package scenario
import (
"context"
"sync"
"time"
"scrabble/loadtest/internal/seed"
)
// HammerConfig parameterises the gateway-hammer: how many concurrent callers and for
// how long to deliberately exceed the per-user rate limit from a single account.
type HammerConfig struct {
Workers int
Duration time.Duration
}
// DefaultHammer returns a hammer that comfortably exceeds the 300/min per-user limit.
func DefaultHammer() HammerConfig {
return HammerConfig{Workers: 20, Duration: 15 * time.Second}
}
// Hammer drives games.list from a single account far above the per-user rate limit to
// verify the limiter holds — rejections surface as the "rate_limited" code — and to
// measure its cost. Every call is recorded under "hammer:games.list" so the report
// shows the ok/rate_limited split and the rejection latency separately from the
// realistic traffic.
func (d *Driver) Hammer(ctx context.Context, acc seed.Account, cfg HammerConfig) {
runCtx, cancel := context.WithTimeout(ctx, cfg.Duration)
defer cancel()
d.log.Info("gateway-hammer", "workers", cfg.Workers, "duration", cfg.Duration)
var wg sync.WaitGroup
for w := 0; w < cfg.Workers; w++ {
wg.Add(1)
go func() {
defer wg.Done()
for runCtx.Err() == nil {
t0 := time.Now()
_, code, _ := d.edge.GamesList(runCtx, acc.Token)
d.rec.Record("hammer:games.list", code, time.Since(t0))
}
}()
}
wg.Wait()
}
+241
View File
@@ -0,0 +1,241 @@
// Package scenario drives virtual players against the gateway edge protocol: it
// assembles real games through the invitation flow, then runs each player's turn
// loop (poll state, replay history, generate a legal move with the embedded solver,
// submit it) plus a fraction of secondary operations. It exposes the moderate
// realistic ramp agreed for the R2 early pass and a separate gateway-hammer.
package scenario
import (
"context"
"log/slog"
"math/rand"
"sync"
"time"
"scrabble/loadtest/internal/edge"
"scrabble/loadtest/internal/moves"
"scrabble/loadtest/internal/report"
"scrabble/loadtest/internal/seed"
)
// Driver ties the edge client, the local move generator and the run recorder
// together. All three are safe for concurrent use by many player goroutines.
type Driver struct {
edge *edge.Client
moves *moves.Registry
rec *report.Recorder
log *slog.Logger
}
// NewDriver builds a Driver.
func NewDriver(c *edge.Client, m *moves.Registry, rec *report.Recorder, log *slog.Logger) *Driver {
return &Driver{edge: c, moves: m, rec: rec, log: log}
}
// RealisticConfig parameterises the under-the-limit ramp.
type RealisticConfig struct {
Steps []int // concurrent active players per step (cumulative)
StepDur time.Duration // hold time per step
GamesPerPlayer int // target concurrent games per player; 0 => random 3..5
Tick time.Duration // per-player operation cadence (keeps a player under the per-user limit)
SecondaryProb float64 // chance per tick of a non-move operation
}
// DefaultRealistic returns the moderate ramp agreed for the R2 early pass: 50 -> 200
// -> 500 concurrent players, ~12 minutes per step, ~1 op/s per player.
func DefaultRealistic() RealisticConfig {
return RealisticConfig{
Steps: []int{50, 200, 500},
StepDur: 12 * time.Minute,
Tick: 800 * time.Millisecond,
SecondaryProb: 0.08,
}
}
// RunRealistic runs the staged ramp. Each step activates more players (drawn from the
// seeded pool), assembles a cohort of games for them and starts their turn loops; the
// loops run until the whole ramp ends. Players from earlier steps keep playing, so
// load is cumulative.
func (d *Driver) RunRealistic(ctx context.Context, pool *seed.Pool, cfg RealisticConfig) error {
players := shuffledPool(pool)
runCtx, cancel := context.WithCancel(ctx)
defer cancel()
var wg sync.WaitGroup
activated := 0
for si, target := range cfg.Steps {
if target > len(players) {
target = len(players)
}
cohort := players[activated:target]
activated = target
if len(cohort) >= 2 {
rng := rand.New(rand.NewSource(time.Now().UnixNano() + int64(si)))
games := d.assembleCohort(runCtx, cohort, cfg.GamesPerPlayer, rng)
byPlayer := gamesByPlayer(games)
d.log.Info("ramp step", "step", si+1, "active", activated, "cohort", len(cohort), "games", len(games))
for pi := range cohort {
p := cohort[pi]
wg.Add(1)
go func(p seed.Account, pg []*Game, sd int64) {
defer wg.Done()
d.playerLoop(runCtx, p, pg, cfg, rand.New(rand.NewSource(sd)))
}(p, byPlayer[p.ID.String()], time.Now().UnixNano()+int64(pi))
}
} else {
d.log.Warn("ramp step skipped: cohort too small", "step", si+1, "cohort", len(cohort))
}
select {
case <-time.After(cfg.StepDur):
case <-ctx.Done():
cancel()
wg.Wait()
return ctx.Err()
}
}
cancel()
wg.Wait()
return nil
}
// playerLoop runs one virtual player: a live-event subscription (loads the push hub,
// counts events) plus a round-robin turn loop over the player's games.
func (d *Driver) playerLoop(ctx context.Context, p seed.Account, games []*Game, cfg RealisticConfig, rng *rand.Rand) {
go d.subscribeLoop(ctx, p)
if len(games) == 0 {
<-ctx.Done()
return
}
ticker := time.NewTicker(cfg.Tick)
defer ticker.Stop()
gi := 0
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
g := games[gi%len(games)]
gi++
if rng.Float64() < cfg.SecondaryProb {
d.secondaryOp(ctx, p, g, rng)
continue
}
d.playTurn(ctx, p, g, rng)
}
}
}
// subscribeLoop holds the player's live-event stream open, counting events and
// reconnecting with a brief backoff after a drop, until the run ends.
func (d *Driver) subscribeLoop(ctx context.Context, p seed.Account) {
for ctx.Err() == nil {
err := d.edge.Subscribe(ctx, p.Token, func(e edge.Event) { d.rec.Event(e.Kind) })
if ctx.Err() != nil {
return
}
if err != nil {
d.rec.StreamErr()
}
select {
case <-ctx.Done():
return
case <-time.After(time.Second):
}
}
}
// playTurn plays one turn in g when it is the player's move: fetch state, replay
// history, pick a legal move and submit it (or exchange / pass).
func (d *Driver) playTurn(ctx context.Context, p seed.Account, g *Game, rng *rand.Rand) {
seat := g.seatOf(p.ID.String())
if seat < 0 {
return
}
t0 := time.Now()
st, code, err := d.edge.State(ctx, p.Token, g.ID)
d.rec.Record("game.state", code, time.Since(t0))
if err != nil || code != "ok" || !st.Game.Active() || st.Game.ToMove != seat {
return
}
t0 = time.Now()
hist, hc, err := d.edge.History(ctx, p.Token, g.ID)
d.rec.Record("game.history", hc, time.Since(t0))
if err != nil || hc != "ok" {
return
}
action, err := d.moves.Pick(g.Variant, hist, st.Rack, st.BagLen, rng)
if err != nil {
d.log.Debug("pick move", "variant", g.Variant, "err", err)
return
}
switch action.Kind {
case "play":
t0 = time.Now()
_, c, _ := d.edge.SubmitPlay(ctx, p.Token, g.ID, action.Dir, action.Tiles)
d.rec.Record("game.submit_play", c, time.Since(t0))
case "exchange":
t0 = time.Now()
_, c, _ := d.edge.Exchange(ctx, p.Token, g.ID, action.Exchange)
d.rec.Record("game.exchange", c, time.Since(t0))
default:
t0 = time.Now()
_, c, _ := d.edge.Pass(ctx, p.Token, g.ID)
d.rec.Record("game.pass", c, time.Since(t0))
}
}
// secondaryOp exercises one of the non-move edge operations the plan calls out, so
// the run touches nudge / chat / check-word / draft / profile / stats too.
func (d *Driver) secondaryOp(ctx context.Context, p seed.Account, g *Game, rng *rand.Rand) {
t0 := time.Now()
switch rng.Intn(7) {
case 0:
c, _ := d.edge.Nudge(ctx, p.Token, g.ID)
d.rec.Record("chat.nudge", c, time.Since(t0))
case 1:
c, _ := d.edge.ChatPost(ctx, p.Token, g.ID, "gg")
d.rec.Record("chat.post", c, time.Since(t0))
case 2:
c, _ := d.edge.CheckWord(ctx, p.Token, g.ID, []byte{0, 1, 2})
d.rec.Record("game.check_word", c, time.Since(t0))
case 3:
c, _ := d.edge.DraftSave(ctx, p.Token, g.ID, `{"rack_order":[],"board_tiles":[]}`)
d.rec.Record("draft.save", c, time.Since(t0))
case 4:
c, _ := d.edge.DraftGet(ctx, p.Token, g.ID)
d.rec.Record("draft.get", c, time.Since(t0))
case 5:
lang := "en"
if rng.Intn(2) == 1 {
lang = "ru"
}
c, _ := d.edge.ProfileUpdate(ctx, p.Token, p.Name, lang)
d.rec.Record("profile.update", c, time.Since(t0))
default:
c, _ := d.edge.Stats(ctx, p.Token)
d.rec.Record("stats.get", c, time.Since(t0))
}
}
// shuffledPool returns every seeded account in random order, so an active set is a
// representative mix of durable and guest accounts.
func shuffledPool(pool *seed.Pool) []seed.Account {
all := pool.All()
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
rng.Shuffle(len(all), func(i, j int) { all[i], all[j] = all[j], all[i] })
return all
}
// gamesByPlayer indexes the assembled games by each member's account id.
func gamesByPlayer(games []*Game) map[string][]*Game {
m := make(map[string][]*Game)
for _, g := range games {
for _, mem := range g.Members {
id := mem.ID.String()
m[id] = append(m[id], g)
}
}
return m
}