Stage 5: robot opponent (pool, seed-derived strategy, move driver, matchmaker substitution)

- internal/robot: durable kind='robot' account pool (migration 00004); every per-game and per-turn choice derived deterministically from the game seed (restart-stable FNV mix); a background move driver; margin targeting (band 1-30, closest-to-band); right-skewed [2,90]min delays (median ~10m); opponent-anchored sleep with +/-3h drift; daytime nudge reply + proactive 12h nudge; friend/chat blocked via profile toggles. - engine.Candidates (decoded ranked plays); game.Candidates + RobotTurns; social.LastNudgeAt. - matchmaker: 10s wait then robot substitution (reaper) + Poll delivery seam. - config (BACKEND_ROBOT_DRIVE_INTERVAL, BACKEND_LOBBY_ROBOT_WAIT, BACKEND_LOBBY_REAPER_INTERVAL); main wiring + boot-time pool provisioning. - metrics: robot account_stats (authoritative balance) + robot_games_finished_total OTel counter + per-finish log. - docs: PLAN, ARCHITECTURE, FUNCTIONAL(+ru), TESTING, README; account.go comment. - tests: robot strategy units, matchmaker reaper/Poll, engine.Candidates; inttest robot full-game / substitution / proactive-nudge.
2026-06-02 21:02:20 +02:00
parent 12fc6e498e
commit 85baabe4ba
26 changed files with 1700 additions and 85 deletions
@@ -0,0 +1,201 @@
+package robot
+
+import (
+	"encoding/binary"
+	"hash/fnv"
+	"math"
+	"time"
+
+	"scrabble/backend/internal/engine"
+)
+
+// The robot's per-game and per-turn choices are derived deterministically from
+// the game's bag seed, so the scheduler keeps no extra state and recomputes the
+// same behaviour on every tick and after a restart (mirroring how the engine
+// replays a game from the same seed). The mixing must be stable across process
+// restarts, so it uses FNV-1a rather than hash/maphash (whose seed is process
+// random).
+
+const (
+	// playToWinPercent is the probability, in percent, that the robot decides at
+	// game start to play to win; the rest of the time it plays to lose, so the
+	// human wins about 60% of games (docs/ARCHITECTURE.md §7).
+	playToWinPercent = 40
+
+	// delayMinMinutes and delayMaxMinutes bound a move delay; delaySkew shapes the
+	// right-skewed distribution (short delays frequent). With skew 3.5 the median
+	// is about 10 minutes and the mean about 20, with a tail out to the maximum.
+	delayMinMinutes = 2.0
+	delayMaxMinutes = 90.0
+	delaySkew       = 3.5
+
+	// nudgeReplyMinMinutes and nudgeReplyMaxMinutes bound how soon the robot
+	// answers a daytime nudge on its turn.
+	nudgeReplyMinMinutes = 2.0
+	nudgeReplyMaxMinutes = 10.0
+
+	// sleepStartHour and sleepEndHour bound the robot's nightly sleep in its
+	// (opponent-anchored, drifted) local time: it makes no move and sends no nudge
+	// while the local hour is in [sleepStartHour, sleepEndHour).
+	sleepStartHour = 0
+	sleepEndHour   = 7
+
+	// sleepDriftHours is the half-width of the random drift applied to the robot's
+	// sleep window relative to the opponent's timezone, in hours.
+	sleepDriftHours = 3
+
+	// proactiveNudgeIdle is how long the robot waits on the human's turn before it
+	// proactively nudges (subject to the social once-per-hour-per-game limit).
+	proactiveNudgeIdle = 12 * time.Hour
+)
+
+// defaultBand is the target resulting score margin after the robot's move: when
+// playing to win it aims to lead by 1..30 points, when playing to lose it aims to
+// trail by 1..30 (the band is negated). It picks the candidate closest to the
+// band rather than the maximum (docs/ARCHITECTURE.md §7).
+var defaultBand = marginBand{lo: 1, hi: 30}
+
+// marginBand is an inclusive target range for the resulting score margin
+// (own score after the move minus the opponent's).
+type marginBand struct{ lo, hi int }
+
+// decisionKind enumerates the move the robot makes on its turn.
+type decisionKind int
+
+const (
+	decidePlay decisionKind = iota
+	decideExchange
+	decidePass
+)
+
+// decision is the robot's chosen action for a turn: a play (Move), an exchange of
+// the listed tiles, or a pass.
+type decision struct {
+	kind     decisionKind
+	move     engine.MoveRecord
+	exchange []string
+}
+
+// mix folds the game seed and a salt (a label plus optional integers such as the
+// move index) into a stable 64-bit value. It is deterministic across process
+// restarts.
+func mix(seed int64, salt string, nums ...int) uint64 {
+	h := fnv.New64a()
+	var b [8]byte
+	binary.LittleEndian.PutUint64(b[:], uint64(seed))
+	_, _ = h.Write(b[:])
+	_, _ = h.Write([]byte(salt))
+	for _, n := range nums {
+		binary.LittleEndian.PutUint64(b[:], uint64(int64(n)))
+		_, _ = h.Write(b[:])
+	}
+	return h.Sum64()
+}
+
+// unitFloat maps a mixed value to a float in [0, 1).
+func unitFloat(v uint64) float64 {
+	return float64(v) / (float64(math.MaxUint64) + 1)
+}
+
+// playToWin reports the robot's once-per-game decision to play to win, derived
+// from the seed so it is fixed for the whole game.
+func playToWin(seed int64) bool {
+	return mix(seed, "win")%100 < playToWinPercent
+}
+
+// moveDelay is the robot's think time for the move at moveCount, sampled from the
+// right-skewed distribution and bounded to [delayMinMinutes, delayMaxMinutes).
+func moveDelay(seed int64, moveCount int) time.Duration {
+	u := unitFloat(mix(seed, "delay", moveCount))
+	mins := delayMinMinutes + (delayMaxMinutes-delayMinMinutes)*math.Pow(u, delaySkew)
+	return time.Duration(mins * float64(time.Minute))
+}
+
+// nudgeReplyDelay is how soon after a daytime nudge the robot answers the move at
+// moveCount, sampled uniformly from [nudgeReplyMinMinutes, nudgeReplyMaxMinutes).
+func nudgeReplyDelay(seed int64, moveCount int) time.Duration {
+	u := unitFloat(mix(seed, "nudge", moveCount))
+	mins := nudgeReplyMinMinutes + (nudgeReplyMaxMinutes-nudgeReplyMinMinutes)*u
+	return time.Duration(mins * float64(time.Minute))
+}
+
+// sleepDrift is the per-game shift of the robot's sleep window relative to the
+// opponent's timezone, in [-sleepDriftHours, +sleepDriftHours] hours.
+func sleepDrift(seed int64) time.Duration {
+	span := 2*sleepDriftHours + 1
+	h := int(mix(seed, "tz")%uint64(span)) - sleepDriftHours
+	return time.Duration(h) * time.Hour
+}
+
+// asleep reports whether the robot is in its nightly sleep window at now. The
+// window is [sleepStartHour, sleepEndHour) in the opponent's timezone shifted by
+// drift; an unknown or empty timezone falls back to UTC.
+func asleep(opponentTZ string, drift time.Duration, now time.Time) bool {
+	local := now.In(loadLocation(opponentTZ)).Add(drift)
+	h := local.Hour()
+	return h >= sleepStartHour && h < sleepEndHour
+}
+
+// loadLocation resolves an IANA timezone name, falling back to UTC when it is
+// empty or unknown (so a bad opponent profile never breaks the driver).
+func loadLocation(name string) *time.Location {
+	if name == "" {
+		return time.UTC
+	}
+	loc, err := time.LoadLocation(name)
+	if err != nil {
+		return time.UTC
+	}
+	return loc
+}
+
+// selectMove chooses the robot's action given the ranked candidate plays, the
+// current scores, the play-to-win decision and the target band. With at least one
+// legal play it picks the candidate whose resulting margin (myScore + score -
+// oppScore) is closest to the band, breaking ties toward the conservative edge
+// (the smallest lead when winning, the smallest deficit when losing). With no
+// legal play it exchanges the whole rack when the bag can refill it, else passes.
+func selectMove(cands []engine.MoveRecord, myScore, oppScore int, win bool, band marginBand, rack []string, bagLen int) decision {
+	if len(cands) == 0 {
+		if len(rack) > 0 && bagLen >= len(rack) {
+			return decision{kind: decideExchange, exchange: append([]string(nil), rack...)}
+		}
+		return decision{kind: decidePass}
+	}
+
+	lo, hi := band.lo, band.hi
+	if !win {
+		lo, hi = -band.hi, -band.lo
+	}
+
+	margin := func(c engine.MoveRecord) int { return myScore + c.Score - oppScore }
+	best := 0
+	bestDist := math.MaxInt
+	for i, c := range cands {
+		m := margin(c)
+		dist := distanceToBand(m, lo, hi)
+		switch {
+		case dist < bestDist:
+			best, bestDist = i, dist
+		case dist == bestDist:
+			// Conservative tie-break inside the band: keep the lead (win) or the
+			// deficit (lose) small.
+			if win && m < margin(cands[best]) || !win && m > margin(cands[best]) {
+				best = i
+			}
+		}
+	}
+	return decision{kind: decidePlay, move: cands[best]}
+}
+
+// distanceToBand is how far m lies outside [lo, hi], or 0 when inside.
+func distanceToBand(m, lo, hi int) int {
+	switch {
+	case m < lo:
+		return lo - m
+	case m > hi:
+		return m - hi
+	default:
+		return 0
+	}
+}