Stage 5: robot opponent (pool, seed-derived strategy, move driver, matchmaker substitution)

- internal/robot: durable kind='robot' account pool (migration 00004); every per-game and per-turn choice derived deterministically from the game seed (restart-stable FNV mix); a background move driver; margin targeting (band 1-30, closest-to-band); right-skewed [2,90]min delays (median ~10m); opponent-anchored sleep with +/-3h drift; daytime nudge reply + proactive 12h nudge; friend/chat blocked via profile toggles. - engine.Candidates (decoded ranked plays); game.Candidates + RobotTurns; social.LastNudgeAt. - matchmaker: 10s wait then robot substitution (reaper) + Poll delivery seam. - config (BACKEND_ROBOT_DRIVE_INTERVAL, BACKEND_LOBBY_ROBOT_WAIT, BACKEND_LOBBY_REAPER_INTERVAL); main wiring + boot-time pool provisioning. - metrics: robot account_stats (authoritative balance) + robot_games_finished_total OTel counter + per-finish log. - docs: PLAN, ARCHITECTURE, FUNCTIONAL(+ru), TESTING, README; account.go comment. - tests: robot strategy units, matchmaker reaper/Poll, engine.Candidates; inttest robot full-game / substitution / proactive-nudge.
2026-06-02 21:02:20 +02:00
parent 12fc6e498e
commit 85baabe4ba
26 changed files with 1700 additions and 85 deletions
@@ -0,0 +1,201 @@
+package robot
+
+import (
+	"context"
+	"errors"
+	"time"
+
+	"github.com/google/uuid"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/metric"
+	"go.uber.org/zap"
+
+	"scrabble/backend/internal/game"
+)
+
+// Run drives the robot until ctx is cancelled, scanning for due turns every
+// interval. It mirrors the game turn-timeout sweeper and is started once from
+// main; it simply calls Drive on each tick.
+func (s *Service) Run(ctx context.Context, interval time.Duration) {
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			s.Drive(ctx, s.clock())
+		}
+	}
+}
+
+// Drive performs one scan: it handles every active game seating a pool robot as
+// of now. Run calls it on a timer; it takes now explicitly so tests and ops can
+// drive a single pass at a chosen instant (mirroring game.Service.SweepTimeouts).
+func (s *Service) Drive(ctx context.Context, now time.Time) {
+	turns, err := s.games.RobotTurns(ctx, s.poolIDs())
+	if err != nil {
+		s.log.Warn("robot scan failed", zap.Error(err))
+		return
+	}
+	for _, rt := range turns {
+		if err := s.handle(ctx, rt, now); err != nil {
+			s.log.Warn("robot turn failed", zap.String("game", rt.GameID.String()), zap.Error(err))
+		}
+	}
+}
+
+// handle resolves the opponent (a two-player auto-match), honours the robot's
+// sleep window, then either makes a move on the robot's turn or considers a
+// proactive nudge on the human's turn. The seat→account mapping is fixed for the
+// game's life, so reading it at a different instant than the scan is consistent;
+// the turn cursor comes from the scan snapshot (rt), and the submit/nudge calls
+// re-validate against the live state and skip benignly if it has moved on.
+func (s *Service) handle(ctx context.Context, rt game.RobotTurn, now time.Time) error {
+	seats, _, status, err := s.games.Participants(ctx, rt.GameID)
+	if err != nil {
+		return err
+	}
+	if status != game.StatusActive {
+		return nil
+	}
+	oppID, ok := opponentOf(seats, rt.RobotSeat)
+	if !ok {
+		return nil
+	}
+	opp, err := s.accounts.GetByID(ctx, oppID)
+	if err != nil {
+		return err
+	}
+	if asleep(opp.TimeZone, sleepDrift(rt.Seed), now) {
+		return nil
+	}
+
+	if rt.ToMove == rt.RobotSeat {
+		return s.maybeMove(ctx, rt, oppID, now)
+	}
+	return s.maybeNudge(ctx, rt, now)
+}
+
+// maybeMove acts when the robot's think time has elapsed. A daytime nudge from
+// the opponent during the current turn pulls the move in to the short reply
+// window; otherwise the robot waits out its sampled delay.
+func (s *Service) maybeMove(ctx context.Context, rt game.RobotTurn, oppID uuid.UUID, now time.Time) error {
+	if now.Before(rt.TurnStartedAt.Add(moveDelay(rt.Seed, rt.MoveCount))) {
+		last, ok, err := s.social.LastNudgeAt(ctx, rt.GameID, oppID)
+		if err != nil {
+			return err
+		}
+		if !ok || !last.After(rt.TurnStartedAt) {
+			return nil // not yet due and no nudge this turn
+		}
+		if now.Before(last.Add(nudgeReplyDelay(rt.Seed, rt.MoveCount))) {
+			return nil // within the reply window
+		}
+	}
+	return s.act(ctx, rt, now)
+}
+
+// maybeNudge sends a proactive nudge once the human has been idle past the
+// threshold. The social service enforces the once-per-hour-per-game limit and
+// rejects a nudge on the robot's own turn, so any such rejection is benign.
+func (s *Service) maybeNudge(ctx context.Context, rt game.RobotTurn, now time.Time) error {
+	if now.Sub(rt.TurnStartedAt) < proactiveNudgeIdle {
+		return nil
+	}
+	if _, err := s.social.Nudge(ctx, rt.GameID, rt.RobotID); err != nil {
+		s.log.Debug("robot nudge skipped", zap.String("game", rt.GameID.String()), zap.Error(err))
+	}
+	return nil
+}
+
+// act reads the live turn, chooses a move by margin and submits it. State that
+// has moved on since the scan (a finished game, a turn that is no longer the
+// robot's) surfaces as a benign error and is skipped.
+func (s *Service) act(ctx context.Context, rt game.RobotTurn, now time.Time) error {
+	st, err := s.games.GameState(ctx, rt.GameID, rt.RobotID)
+	if err != nil {
+		return skipBenign(err)
+	}
+	cands, err := s.games.Candidates(ctx, rt.GameID, rt.RobotID)
+	if err != nil {
+		return skipBenign(err)
+	}
+
+	myScore := st.Game.Seats[st.Seat].Score
+	oppScore := bestOpponentScore(st.Game.Seats, st.Seat)
+	d := selectMove(cands, myScore, oppScore, playToWin(rt.Seed), defaultBand, st.Rack, st.BagLen)
+
+	var res game.MoveResult
+	switch d.kind {
+	case decidePlay:
+		res, err = s.games.SubmitPlay(ctx, rt.GameID, rt.RobotID, d.move.Dir, d.move.Tiles)
+	case decideExchange:
+		res, err = s.games.Exchange(ctx, rt.GameID, rt.RobotID, d.exchange)
+	default:
+		res, err = s.games.Pass(ctx, rt.GameID, rt.RobotID)
+	}
+	if err != nil {
+		return skipBenign(err)
+	}
+	s.recordFinish(ctx, rt.GameID, rt.RobotID, res.Game)
+	return nil
+}
+
+// recordFinish counts and logs a robot game that the robot's move has just
+// finished. account_stats remains the authoritative, complete balance metric
+// (it also captures games the human finishes); this live counter only sees
+// robot-finished games.
+func (s *Service) recordFinish(ctx context.Context, gameID, robotID uuid.UUID, g game.Game) {
+	if g.Status != game.StatusFinished {
+		return
+	}
+	result := "draw"
+	for _, seat := range g.Seats {
+		if seat.IsWinner {
+			if seat.AccountID == robotID {
+				result = "win"
+			} else {
+				result = "loss"
+			}
+			break
+		}
+	}
+	s.finished.Add(ctx, 1, metric.WithAttributes(attribute.String("result", result)))
+	s.log.Info("robot game finished",
+		zap.String("game", gameID.String()),
+		zap.String("result", result),
+		zap.String("reason", g.EndReason))
+}
+
+// opponentOf returns the account at the single non-robot seat of a two-player
+// auto-match, and false when none differs from the robot seat.
+func opponentOf(seats []uuid.UUID, robotSeat int) (uuid.UUID, bool) {
+	for seat, id := range seats {
+		if seat != robotSeat {
+			return id, true
+		}
+	}
+	return uuid.Nil, false
+}
+
+// bestOpponentScore is the highest score among the seats other than the robot's.
+func bestOpponentScore(seats []game.Seat, robotSeat int) int {
+	best := 0
+	for _, s := range seats {
+		if s.Seat != robotSeat && s.Score > best {
+			best = s.Score
+		}
+	}
+	return best
+}
+
+// skipBenign swallows the errors that mean the game moved on since the scan (it
+// finished, or it is no longer the robot's turn), so the driver simply tries
+// again next tick.
+func skipBenign(err error) error {
+	if errors.Is(err, game.ErrFinished) || errors.Is(err, game.ErrNotYourTurn) || errors.Is(err, game.ErrNotAPlayer) {
+		return nil
+	}
+	return err
+}
@@ -0,0 +1,177 @@
+// Package robot is the human-like computer opponent. It substitutes for a missing
+// human in two-player auto-match: a pool of durable accounts (one robot identity
+// each) is provisioned at startup, and a background driver makes their moves with
+// human-like timing, a night sleep window and nudge behaviour
+// (docs/ARCHITECTURE.md §7).
+//
+// The robot consumes the public game API as an ordinary seated player and works
+// on decoded values only, so it never imports the solver (only internal/engine
+// does). All of a robot's per-game and per-turn choices are derived
+// deterministically from the game's bag seed (see strategy.go), so the driver
+// holds no per-game state and is restart-safe.
+package robot
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"math/rand/v2"
+	"sync"
+	"time"
+
+	"github.com/google/uuid"
+	"go.opentelemetry.io/otel/metric"
+	"go.opentelemetry.io/otel/metric/noop"
+	"go.uber.org/zap"
+
+	"scrabble/backend/internal/account"
+	"scrabble/backend/internal/engine"
+	"scrabble/backend/internal/game"
+	"scrabble/backend/internal/social"
+)
+
+// ErrNoRobotAvailable is returned by Pick when the pool is empty (EnsurePool has
+// not run or failed).
+var ErrNoRobotAvailable = errors.New("robot: no robot available in the pool")
+
+// GameDriver is the slice of the game domain the robot needs: scanning its active
+// games, reading a turn's candidates and state, and making moves as a seated
+// player. game.Service satisfies it.
+type GameDriver interface {
+	RobotTurns(ctx context.Context, robotIDs []uuid.UUID) ([]game.RobotTurn, error)
+	Participants(ctx context.Context, gameID uuid.UUID) ([]uuid.UUID, int, string, error)
+	Candidates(ctx context.Context, gameID, accountID uuid.UUID) ([]engine.MoveRecord, error)
+	GameState(ctx context.Context, gameID, accountID uuid.UUID) (game.StateView, error)
+	SubmitPlay(ctx context.Context, gameID, accountID uuid.UUID, dir engine.Direction, tiles []engine.TileRecord) (game.MoveResult, error)
+	Pass(ctx context.Context, gameID, accountID uuid.UUID) (game.MoveResult, error)
+	Exchange(ctx context.Context, gameID, accountID uuid.UUID, tiles []string) (game.MoveResult, error)
+}
+
+// Nudger is the slice of the social domain the robot needs: sending a proactive
+// nudge and reading the opponent's last nudge to answer it. social.Service
+// satisfies it.
+type Nudger interface {
+	Nudge(ctx context.Context, gameID, senderID uuid.UUID) (social.Message, error)
+	LastNudgeAt(ctx context.Context, gameID, senderID uuid.UUID) (time.Time, bool, error)
+}
+
+// robotNames is the curated, human-like name pool. Each name backs one durable
+// robot account, addressed by a stable robot identity (its lower-cased name).
+var robotNames = []string{
+	"Alex", "Sam", "Jordan", "Riley", "Casey", "Taylor", "Jamie", "Morgan",
+	"Robin", "Quinn", "Avery", "Drew", "Skyler", "Reese", "Harper", "Sage",
+}
+
+// Config configures the robot subsystem.
+type Config struct {
+	// DriveInterval is how often the driver scans for robot turns. Sourced from
+	// BACKEND_ROBOT_DRIVE_INTERVAL.
+	DriveInterval time.Duration
+}
+
+// DefaultConfig returns the robot configuration defaults.
+func DefaultConfig() Config {
+	return Config{DriveInterval: 30 * time.Second}
+}
+
+// Validate reports whether the configuration is usable.
+func (c Config) Validate() error {
+	if c.DriveInterval <= 0 {
+		return fmt.Errorf("robot: drive interval must be positive, got %s", c.DriveInterval)
+	}
+	return nil
+}
+
+// Service owns the robot pool and the move driver. It is safe for concurrent use.
+type Service struct {
+	games    GameDriver
+	accounts *account.Store
+	social   Nudger
+	finished metric.Int64Counter
+	clock    func() time.Time
+	log      *zap.Logger
+
+	mu   sync.RWMutex
+	pool []uuid.UUID
+}
+
+// NewService constructs a robot Service. games and social are the domain seams it
+// drives; accounts provisions the pool and resolves opponent timezones; meter
+// records the balance counter; log carries driver diagnostics.
+func NewService(games GameDriver, accounts *account.Store, soc Nudger, meter metric.Meter, log *zap.Logger) *Service {
+	if log == nil {
+		log = zap.NewNop()
+	}
+	counter, err := meter.Int64Counter(
+		"robot_games_finished_total",
+		metric.WithDescription("Robot games finished, labelled by result from the robot's view (win/loss/draw)."),
+	)
+	if err != nil {
+		log.Warn("robot: create finished counter", zap.Error(err))
+		counter, _ = noop.NewMeterProvider().Meter("robot").Int64Counter("robot_games_finished_total")
+	}
+	return &Service{
+		games:    games,
+		accounts: accounts,
+		social:   soc,
+		finished: counter,
+		clock:    func() time.Time { return time.Now().UTC() },
+		log:      log,
+	}
+}
+
+// EnsurePool idempotently provisions the named robot accounts and records their
+// ids as the pool. Each robot is a durable account bound to a robot identity,
+// with chat and friend requests blocked so it never engages socially
+// (docs/ARCHITECTURE.md §7). It is a startup dependency, like the dictionary
+// registry: a failure fails the boot.
+func (s *Service) EnsurePool(ctx context.Context) error {
+	ids := make([]uuid.UUID, 0, len(robotNames))
+	for _, name := range robotNames {
+		acc, err := s.accounts.ProvisionByIdentity(ctx, account.KindRobot, externalID(name))
+		if err != nil {
+			return fmt.Errorf("robot: provision %q: %w", name, err)
+		}
+		if acc.DisplayName != name || !acc.BlockChat || !acc.BlockFriendRequests {
+			if _, err := s.accounts.UpdateProfile(ctx, acc.ID, account.ProfileUpdate{
+				DisplayName:         name,
+				PreferredLanguage:   acc.PreferredLanguage,
+				TimeZone:            acc.TimeZone,
+				AwayStart:           acc.AwayStart,
+				AwayEnd:             acc.AwayEnd,
+				BlockChat:           true,
+				BlockFriendRequests: true,
+			}); err != nil {
+				return fmt.Errorf("robot: profile %q: %w", name, err)
+			}
+		}
+		ids = append(ids, acc.ID)
+	}
+	s.mu.Lock()
+	s.pool = ids
+	s.mu.Unlock()
+	return nil
+}
+
+// Pick returns a random robot account from the pool, for the matchmaker to
+// substitute into an auto-match. It satisfies lobby.RobotProvider.
+func (s *Service) Pick() (uuid.UUID, error) {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if len(s.pool) == 0 {
+		return uuid.Nil, ErrNoRobotAvailable
+	}
+	return s.pool[rand.IntN(len(s.pool))], nil
+}
+
+// poolIDs returns a snapshot of the pool for the driver scan.
+func (s *Service) poolIDs() []uuid.UUID {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return append([]uuid.UUID(nil), s.pool...)
+}
+
+// externalID is the stable robot identity for a pool name.
+func externalID(name string) string {
+	return "robot-" + name
+}
@@ -0,0 +1,201 @@
+package robot
+
+import (
+	"encoding/binary"
+	"hash/fnv"
+	"math"
+	"time"
+
+	"scrabble/backend/internal/engine"
+)
+
+// The robot's per-game and per-turn choices are derived deterministically from
+// the game's bag seed, so the scheduler keeps no extra state and recomputes the
+// same behaviour on every tick and after a restart (mirroring how the engine
+// replays a game from the same seed). The mixing must be stable across process
+// restarts, so it uses FNV-1a rather than hash/maphash (whose seed is process
+// random).
+
+const (
+	// playToWinPercent is the probability, in percent, that the robot decides at
+	// game start to play to win; the rest of the time it plays to lose, so the
+	// human wins about 60% of games (docs/ARCHITECTURE.md §7).
+	playToWinPercent = 40
+
+	// delayMinMinutes and delayMaxMinutes bound a move delay; delaySkew shapes the
+	// right-skewed distribution (short delays frequent). With skew 3.5 the median
+	// is about 10 minutes and the mean about 20, with a tail out to the maximum.
+	delayMinMinutes = 2.0
+	delayMaxMinutes = 90.0
+	delaySkew       = 3.5
+
+	// nudgeReplyMinMinutes and nudgeReplyMaxMinutes bound how soon the robot
+	// answers a daytime nudge on its turn.
+	nudgeReplyMinMinutes = 2.0
+	nudgeReplyMaxMinutes = 10.0
+
+	// sleepStartHour and sleepEndHour bound the robot's nightly sleep in its
+	// (opponent-anchored, drifted) local time: it makes no move and sends no nudge
+	// while the local hour is in [sleepStartHour, sleepEndHour).
+	sleepStartHour = 0
+	sleepEndHour   = 7
+
+	// sleepDriftHours is the half-width of the random drift applied to the robot's
+	// sleep window relative to the opponent's timezone, in hours.
+	sleepDriftHours = 3
+
+	// proactiveNudgeIdle is how long the robot waits on the human's turn before it
+	// proactively nudges (subject to the social once-per-hour-per-game limit).
+	proactiveNudgeIdle = 12 * time.Hour
+)
+
+// defaultBand is the target resulting score margin after the robot's move: when
+// playing to win it aims to lead by 1..30 points, when playing to lose it aims to
+// trail by 1..30 (the band is negated). It picks the candidate closest to the
+// band rather than the maximum (docs/ARCHITECTURE.md §7).
+var defaultBand = marginBand{lo: 1, hi: 30}
+
+// marginBand is an inclusive target range for the resulting score margin
+// (own score after the move minus the opponent's).
+type marginBand struct{ lo, hi int }
+
+// decisionKind enumerates the move the robot makes on its turn.
+type decisionKind int
+
+const (
+	decidePlay decisionKind = iota
+	decideExchange
+	decidePass
+)
+
+// decision is the robot's chosen action for a turn: a play (Move), an exchange of
+// the listed tiles, or a pass.
+type decision struct {
+	kind     decisionKind
+	move     engine.MoveRecord
+	exchange []string
+}
+
+// mix folds the game seed and a salt (a label plus optional integers such as the
+// move index) into a stable 64-bit value. It is deterministic across process
+// restarts.
+func mix(seed int64, salt string, nums ...int) uint64 {
+	h := fnv.New64a()
+	var b [8]byte
+	binary.LittleEndian.PutUint64(b[:], uint64(seed))
+	_, _ = h.Write(b[:])
+	_, _ = h.Write([]byte(salt))
+	for _, n := range nums {
+		binary.LittleEndian.PutUint64(b[:], uint64(int64(n)))
+		_, _ = h.Write(b[:])
+	}
+	return h.Sum64()
+}
+
+// unitFloat maps a mixed value to a float in [0, 1).
+func unitFloat(v uint64) float64 {
+	return float64(v) / (float64(math.MaxUint64) + 1)
+}
+
+// playToWin reports the robot's once-per-game decision to play to win, derived
+// from the seed so it is fixed for the whole game.
+func playToWin(seed int64) bool {
+	return mix(seed, "win")%100 < playToWinPercent
+}
+
+// moveDelay is the robot's think time for the move at moveCount, sampled from the
+// right-skewed distribution and bounded to [delayMinMinutes, delayMaxMinutes).
+func moveDelay(seed int64, moveCount int) time.Duration {
+	u := unitFloat(mix(seed, "delay", moveCount))
+	mins := delayMinMinutes + (delayMaxMinutes-delayMinMinutes)*math.Pow(u, delaySkew)
+	return time.Duration(mins * float64(time.Minute))
+}
+
+// nudgeReplyDelay is how soon after a daytime nudge the robot answers the move at
+// moveCount, sampled uniformly from [nudgeReplyMinMinutes, nudgeReplyMaxMinutes).
+func nudgeReplyDelay(seed int64, moveCount int) time.Duration {
+	u := unitFloat(mix(seed, "nudge", moveCount))
+	mins := nudgeReplyMinMinutes + (nudgeReplyMaxMinutes-nudgeReplyMinMinutes)*u
+	return time.Duration(mins * float64(time.Minute))
+}
+
+// sleepDrift is the per-game shift of the robot's sleep window relative to the
+// opponent's timezone, in [-sleepDriftHours, +sleepDriftHours] hours.
+func sleepDrift(seed int64) time.Duration {
+	span := 2*sleepDriftHours + 1
+	h := int(mix(seed, "tz")%uint64(span)) - sleepDriftHours
+	return time.Duration(h) * time.Hour
+}
+
+// asleep reports whether the robot is in its nightly sleep window at now. The
+// window is [sleepStartHour, sleepEndHour) in the opponent's timezone shifted by
+// drift; an unknown or empty timezone falls back to UTC.
+func asleep(opponentTZ string, drift time.Duration, now time.Time) bool {
+	local := now.In(loadLocation(opponentTZ)).Add(drift)
+	h := local.Hour()
+	return h >= sleepStartHour && h < sleepEndHour
+}
+
+// loadLocation resolves an IANA timezone name, falling back to UTC when it is
+// empty or unknown (so a bad opponent profile never breaks the driver).
+func loadLocation(name string) *time.Location {
+	if name == "" {
+		return time.UTC
+	}
+	loc, err := time.LoadLocation(name)
+	if err != nil {
+		return time.UTC
+	}
+	return loc
+}
+
+// selectMove chooses the robot's action given the ranked candidate plays, the
+// current scores, the play-to-win decision and the target band. With at least one
+// legal play it picks the candidate whose resulting margin (myScore + score -
+// oppScore) is closest to the band, breaking ties toward the conservative edge
+// (the smallest lead when winning, the smallest deficit when losing). With no
+// legal play it exchanges the whole rack when the bag can refill it, else passes.
+func selectMove(cands []engine.MoveRecord, myScore, oppScore int, win bool, band marginBand, rack []string, bagLen int) decision {
+	if len(cands) == 0 {
+		if len(rack) > 0 && bagLen >= len(rack) {
+			return decision{kind: decideExchange, exchange: append([]string(nil), rack...)}
+		}
+		return decision{kind: decidePass}
+	}
+
+	lo, hi := band.lo, band.hi
+	if !win {
+		lo, hi = -band.hi, -band.lo
+	}
+
+	margin := func(c engine.MoveRecord) int { return myScore + c.Score - oppScore }
+	best := 0
+	bestDist := math.MaxInt
+	for i, c := range cands {
+		m := margin(c)
+		dist := distanceToBand(m, lo, hi)
+		switch {
+		case dist < bestDist:
+			best, bestDist = i, dist
+		case dist == bestDist:
+			// Conservative tie-break inside the band: keep the lead (win) or the
+			// deficit (lose) small.
+			if win && m < margin(cands[best]) || !win && m > margin(cands[best]) {
+				best = i
+			}
+		}
+	}
+	return decision{kind: decidePlay, move: cands[best]}
+}
+
+// distanceToBand is how far m lies outside [lo, hi], or 0 when inside.
+func distanceToBand(m, lo, hi int) int {
+	switch {
+	case m < lo:
+		return lo - m
+	case m > hi:
+		return m - hi
+	default:
+		return 0
+	}
+}
@@ -0,0 +1,190 @@
+package robot
+
+import (
+	"sort"
+	"testing"
+	"time"
+
+	"scrabble/backend/internal/engine"
+)
+
+// TestPlayToWinDistribution checks the once-per-game decision is fixed per seed
+// and lands near the 40% target over many games.
+func TestPlayToWinDistribution(t *testing.T) {
+	const n = 20000
+	wins := 0
+	for seed := int64(1); seed <= n; seed++ {
+		if playToWin(seed) {
+			wins++
+		}
+		if playToWin(seed) != playToWin(seed) {
+			t.Fatalf("playToWin not deterministic for seed %d", seed)
+		}
+	}
+	pct := float64(wins) / float64(n) * 100
+	if pct < 37 || pct > 43 {
+		t.Errorf("play-to-win rate = %.1f%%, want ~40%% (37-43)", pct)
+	}
+}
+
+// TestMoveDelayBoundsAndDeterminism checks every sampled delay stays in
+// [2min, 90min) and is reproducible for a (seed, moveCount).
+func TestMoveDelayBoundsAndDeterminism(t *testing.T) {
+	for seed := int64(1); seed <= 200; seed++ {
+		for mc := 0; mc < 50; mc++ {
+			d := moveDelay(seed, mc)
+			if d < 2*time.Minute || d >= 90*time.Minute {
+				t.Fatalf("delay %s out of [2m,90m) for seed=%d mc=%d", d, seed, mc)
+			}
+			if moveDelay(seed, mc) != d {
+				t.Fatalf("delay not deterministic for seed=%d mc=%d", seed, mc)
+			}
+		}
+	}
+}
+
+// TestMoveDelaySkew checks the distribution is right-skewed with the intended
+// ~10-minute median: most delays are short, the mean sits above the median.
+func TestMoveDelaySkew(t *testing.T) {
+	const n = 20000
+	mins := make([]float64, 0, n)
+	var sum float64
+	for mc := 0; mc < n; mc++ {
+		m := moveDelay(42, mc).Minutes()
+		mins = append(mins, m)
+		sum += m
+	}
+	sort.Float64s(mins)
+	median := mins[n/2]
+	mean := sum / float64(n)
+	if median < 7 || median > 13 {
+		t.Errorf("median delay = %.1f min, want ~10 (7-13)", median)
+	}
+	if mean <= median {
+		t.Errorf("mean %.1f should exceed median %.1f (right skew)", mean, median)
+	}
+}
+
+// TestSelectMovePlayToWinKeepsLeadSmall checks the winning robot prefers an
+// in-band move with the smallest resulting lead.
+func TestSelectMovePlayToWinKeepsLeadSmall(t *testing.T) {
+	cands := plays(50, 20, 5, 2) // margins 50,20,5,2 with scores even
+	d := selectMove(cands, 100, 100, true, marginBand{1, 30}, nil, 0)
+	if d.kind != decidePlay || d.move.Score != 2 {
+		t.Errorf("got kind=%d score=%d, want play score=2 (smallest in-band lead)", d.kind, d.move.Score)
+	}
+}
+
+// TestSelectMovePlayToLoseKeepsDeficitSmall checks the losing robot prefers the
+// in-band move with the smallest deficit.
+func TestSelectMovePlayToLoseKeepsDeficitSmall(t *testing.T) {
+	cands := plays(50, 20, 15, 5) // myScore 80, opp 100 → margins 30,0,-5,-15
+	d := selectMove(cands, 80, 100, false, marginBand{1, 30}, nil, 0)
+	if d.kind != decidePlay || d.move.Score != 15 {
+		t.Errorf("got kind=%d score=%d, want play score=15 (smallest deficit in band)", d.kind, d.move.Score)
+	}
+}
+
+// TestSelectMoveFallbackBehind checks that when even the best play cannot reach
+// the band the winning robot takes the highest-scoring move (best catch-up).
+func TestSelectMoveFallbackBehind(t *testing.T) {
+	cands := plays(10, 5) // myScore 50, opp 100 → margins -40,-45, both below band
+	d := selectMove(cands, 50, 100, true, marginBand{1, 30}, nil, 0)
+	if d.move.Score != 10 {
+		t.Errorf("got score=%d, want 10 (closest to band from below)", d.move.Score)
+	}
+}
+
+// TestSelectMoveFallbackOvershoot checks that when every play overshoots the band
+// the winning robot takes the lowest-scoring move (keeps the lead near the cap).
+func TestSelectMoveFallbackOvershoot(t *testing.T) {
+	cands := plays(40, 10) // myScore 100, opp 50 → margins 90,60, both above band
+	d := selectMove(cands, 100, 50, true, marginBand{1, 30}, nil, 0)
+	if d.move.Score != 10 {
+		t.Errorf("got score=%d, want 10 (closest to band from above)", d.move.Score)
+	}
+}
+
+// TestSelectMoveNoPlay checks the exchange-or-pass fallback.
+func TestSelectMoveNoPlay(t *testing.T) {
+	rack := []string{"A", "B", "C"}
+	if d := selectMove(nil, 0, 0, true, defaultBand, rack, 5); d.kind != decideExchange || len(d.exchange) != 3 {
+		t.Errorf("with a refillable bag want exchange of 3, got kind=%d n=%d", d.kind, len(d.exchange))
+	}
+	if d := selectMove(nil, 0, 0, true, defaultBand, rack, 2); d.kind != decidePass {
+		t.Errorf("with a short bag want pass, got kind=%d", d.kind)
+	}
+	if d := selectMove(nil, 0, 0, true, defaultBand, nil, 9); d.kind != decidePass {
+		t.Errorf("with an empty rack want pass, got kind=%d", d.kind)
+	}
+}
+
+// TestSleepDriftBounds checks the drift stays within ±3h and is deterministic.
+func TestSleepDriftBounds(t *testing.T) {
+	for seed := int64(1); seed <= 5000; seed++ {
+		d := sleepDrift(seed)
+		if d < -3*time.Hour || d > 3*time.Hour {
+			t.Fatalf("drift %s out of ±3h for seed %d", d, seed)
+		}
+		if sleepDrift(seed) != d {
+			t.Fatalf("drift not deterministic for seed %d", seed)
+		}
+	}
+}
+
+// TestAsleep covers the window, the drift shift, a real timezone and the
+// midnight wrap.
+func TestAsleep(t *testing.T) {
+	at := func(tz string, y int, mo time.Month, d, h int) time.Time {
+		loc, err := time.LoadLocation(tz)
+		if err != nil {
+			t.Fatalf("load %s: %v", tz, err)
+		}
+		return time.Date(y, mo, d, h, 0, 0, 0, loc)
+	}
+	cases := []struct {
+		name  string
+		tz    string
+		drift time.Duration
+		now   time.Time
+		want  bool
+	}{
+		{"utc night", "UTC", 0, at("UTC", 2024, 1, 1, 3), true},
+		{"utc day", "UTC", 0, at("UTC", 2024, 1, 1, 12), false},
+		{"utc edge end", "UTC", 0, at("UTC", 2024, 1, 1, 7), false},
+		{"drift+3 shifts earlier", "UTC", 3 * time.Hour, at("UTC", 2024, 1, 1, 22), true},
+		{"drift+3 awake midday", "UTC", 3 * time.Hour, at("UTC", 2024, 1, 1, 5), false},
+		{"drift-3 shifts later", "UTC", -3 * time.Hour, at("UTC", 2024, 1, 1, 9), true},
+		{"tokyo asleep", "Asia/Tokyo", 0, at("UTC", 2024, 1, 1, 18), true}, // 03:00 JST
+		{"tokyo awake", "Asia/Tokyo", 0, at("UTC", 2024, 1, 1, 0), false},  // 09:00 JST
+		{"bad tz falls back to utc", "Nowhere/Bad", 0, at("UTC", 2024, 1, 1, 3), true},
+	}
+	for _, c := range cases {
+		if got := asleep(c.tz, c.drift, c.now); got != c.want {
+			t.Errorf("%s: asleep = %v, want %v", c.name, got, c.want)
+		}
+	}
+}
+
+// TestMixDeterministic checks the mixer is stable (across calls, and so across
+// restarts) and salt-sensitive.
+func TestMixDeterministic(t *testing.T) {
+	if mix(7, "win") != mix(7, "win") {
+		t.Error("mix not stable for the same inputs")
+	}
+	if mix(7, "win") == mix(7, "delay") {
+		t.Error("mix should differ by salt")
+	}
+	if mix(7, "delay", 1) == mix(7, "delay", 2) {
+		t.Error("mix should differ by move index")
+	}
+}
+
+// plays builds candidate plays carrying only the given scores (ranked as passed).
+func plays(scores ...int) []engine.MoveRecord {
+	out := make([]engine.MoveRecord, len(scores))
+	for i, s := range scores {
+		out[i] = engine.MoveRecord{Action: engine.ActionPlay, Score: s}
+	}
+	return out
+}