f916d5e0ca
CI / changes (pull_request) Successful in 2s
CI / unit (pull_request) Successful in 8s
CI / integration (pull_request) Successful in 11s
CI / ui (pull_request) Successful in 29s
CI / gate (pull_request) Successful in 1s
CI / deploy (pull_request) Successful in 1m14s
The admin game detail now shows, per robot seat, the game's deterministic play-to-win decision (from the bag seed) and — while it is that robot's turn — its scheduled next-move ETA (sampled think-time delay, deferred past the sleep window), plus a caption with the ~40% global target. Wiring: robot.PlayToWin/NextMoveAt/PlayToWinTargetPercent exports, account.IsRobot, game RobotSchedule (seed + turn-start). Tests: NextMoveAt invariants (never early, never in the sleep window), PlayToWin export, and an admin render integration test asserting the intent + ETA + target appear.
270 lines
10 KiB
Go
270 lines
10 KiB
Go
package robot
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"hash/fnv"
|
|
"math"
|
|
"time"
|
|
|
|
"scrabble/backend/internal/account"
|
|
"scrabble/backend/internal/engine"
|
|
)
|
|
|
|
// The robot's per-game and per-turn choices are derived deterministically from
|
|
// the game's bag seed, so the scheduler keeps no extra state and recomputes the
|
|
// same behaviour on every tick and after a restart (mirroring how the engine
|
|
// replays a game from the same seed). The mixing must be stable across process
|
|
// restarts, so it uses FNV-1a rather than hash/maphash (whose seed is process
|
|
// random).
|
|
|
|
const (
|
|
// playToWinPercent is the probability, in percent, that the robot decides at
|
|
// game start to play to win; the rest of the time it plays to lose, so the
|
|
// human wins about 60% of games (docs/ARCHITECTURE.md §7).
|
|
playToWinPercent = 40
|
|
|
|
// The robot's think time depends on how far the game has progressed: early moves
|
|
// are quick and late moves can be long (endgame deliberation). The delay is drawn
|
|
// from a band that interpolates with the move count from [delayEarlyLoMinutes,
|
|
// delayEarlyHiMinutes] at the first move to [delayLateLoMinutes, delayLateHiMinutes]
|
|
// by avgGameMoves, then right-skewed by delaySkew (a larger exponent concentrates
|
|
// delays near the band's floor — an active player). The result is clamped to
|
|
// [delayHardMinMinutes, delayHardMaxMinutes]. The numbers are deliberate estimates,
|
|
// to be retuned once real play statistics arrive (docs/ARCHITECTURE.md §7).
|
|
delayEarlyLoMinutes = 3.0
|
|
delayEarlyHiMinutes = 10.0
|
|
delayLateLoMinutes = 10.0
|
|
delayLateHiMinutes = 90.0
|
|
delaySkew = 4.0
|
|
avgGameMoves = 28.0
|
|
delayHardMinMinutes = 1.0
|
|
delayHardMaxMinutes = 90.0
|
|
|
|
// nudgeReplySpreadMinutes is the width of the quick window, anchored at the move's
|
|
// lower band (delayBand's lo), within which the robot answers a daytime nudge on
|
|
// its turn — so a nudged robot replies near the floor of its think time.
|
|
nudgeReplySpreadMinutes = 5.0
|
|
|
|
// sleepStartHour and sleepEndHour bound the robot's nightly sleep in its
|
|
// (opponent-anchored, drifted) local time: it makes no move and sends no nudge
|
|
// while the local hour is in [sleepStartHour, sleepEndHour).
|
|
sleepStartHour = 0
|
|
sleepEndHour = 7
|
|
|
|
// sleepDriftHours is the half-width of the random drift applied to the robot's
|
|
// sleep window relative to the opponent's timezone, in hours.
|
|
sleepDriftHours = 3
|
|
|
|
// proactiveNudgeIdle is how long the robot waits on the human's turn before it
|
|
// proactively nudges (subject to the social once-per-hour-per-game limit).
|
|
proactiveNudgeIdle = 12 * time.Hour
|
|
)
|
|
|
|
// defaultBand is the target resulting score margin after the robot's move: when
|
|
// playing to win it aims to lead by 1..30 points, when playing to lose it aims to
|
|
// trail by 1..30 (the band is negated). It picks the candidate closest to the
|
|
// band rather than the maximum (docs/ARCHITECTURE.md §7).
|
|
var defaultBand = marginBand{lo: 1, hi: 30}
|
|
|
|
// marginBand is an inclusive target range for the resulting score margin
|
|
// (own score after the move minus the opponent's).
|
|
type marginBand struct{ lo, hi int }
|
|
|
|
// decisionKind enumerates the move the robot makes on its turn.
|
|
type decisionKind int
|
|
|
|
const (
|
|
decidePlay decisionKind = iota
|
|
decideExchange
|
|
decidePass
|
|
)
|
|
|
|
// decision is the robot's chosen action for a turn: a play (Move), an exchange of
|
|
// the listed tiles, or a pass.
|
|
type decision struct {
|
|
kind decisionKind
|
|
move engine.MoveRecord
|
|
exchange []string
|
|
}
|
|
|
|
// mix folds the game seed and a salt (a label plus optional integers such as the
|
|
// move index) into a stable 64-bit value. It is deterministic across process
|
|
// restarts.
|
|
func mix(seed int64, salt string, nums ...int) uint64 {
|
|
h := fnv.New64a()
|
|
var b [8]byte
|
|
binary.LittleEndian.PutUint64(b[:], uint64(seed))
|
|
_, _ = h.Write(b[:])
|
|
_, _ = h.Write([]byte(salt))
|
|
for _, n := range nums {
|
|
binary.LittleEndian.PutUint64(b[:], uint64(int64(n)))
|
|
_, _ = h.Write(b[:])
|
|
}
|
|
return h.Sum64()
|
|
}
|
|
|
|
// unitFloat maps a mixed value to a float in [0, 1).
|
|
func unitFloat(v uint64) float64 {
|
|
return float64(v) / (float64(math.MaxUint64) + 1)
|
|
}
|
|
|
|
// playToWin reports the robot's once-per-game decision to play to win, derived
|
|
// from the seed so it is fixed for the whole game.
|
|
func playToWin(seed int64) bool {
|
|
return mix(seed, "win")%100 < playToWinPercent
|
|
}
|
|
|
|
// PlayToWin exposes the once-per-game play-to-win decision for a game's bag seed, for the
|
|
// admin console (it is deterministic and fixed for the whole game).
|
|
func PlayToWin(seed int64) bool { return playToWin(seed) }
|
|
|
|
// PlayToWinTargetPercent is the configured probability, in percent, that a robot plays to
|
|
// win in any given game (the admin console shows it alongside the per-game decision).
|
|
const PlayToWinTargetPercent = playToWinPercent
|
|
|
|
// NextMoveAt is the deterministic instant the robot is scheduled to play the move at
|
|
// moveCount, given when the turn started and the opponent's timezone (which anchors the
|
|
// robot's sleep window). It is the sampled think-time delay, deferred to the end of the
|
|
// sleep window when it would otherwise land while the robot is asleep. The driver acts on
|
|
// a scan tick, so the real move lands at the first scan at or after this instant. It is
|
|
// meaningful only on the robot's own turn; the admin console surfaces it as an ETA.
|
|
func NextMoveAt(seed int64, moveCount int, turnStartedAt time.Time, opponentTZ string) time.Time {
|
|
t := turnStartedAt.Add(moveDelay(seed, moveCount))
|
|
drift := sleepDrift(seed)
|
|
if asleep(opponentTZ, drift, t) {
|
|
t = wakeAfter(opponentTZ, drift, t)
|
|
}
|
|
return t
|
|
}
|
|
|
|
// wakeAfter returns the first instant at or after t when the robot is awake — the local
|
|
// hour reaches sleepEndHour in the opponent's drifted timezone — converted back to UTC.
|
|
func wakeAfter(opponentTZ string, drift time.Duration, t time.Time) time.Time {
|
|
local := t.In(loadLocation(opponentTZ)).Add(drift)
|
|
wake := time.Date(local.Year(), local.Month(), local.Day(), sleepEndHour, 0, 0, 0, local.Location())
|
|
if !wake.After(local) {
|
|
wake = wake.Add(24 * time.Hour)
|
|
}
|
|
return wake.Add(-drift).UTC()
|
|
}
|
|
|
|
// delayBand returns the lower and upper bounds, in minutes, of the move-delay band
|
|
// for the move at moveCount. It interpolates linearly with game progress (the move
|
|
// count over avgGameMoves, capped at 1): early moves sit in a short band and late
|
|
// moves in a long one.
|
|
func delayBand(moveCount int) (lo, hi float64) {
|
|
p := float64(moveCount) / avgGameMoves
|
|
if p > 1 {
|
|
p = 1
|
|
}
|
|
lo = delayEarlyLoMinutes + (delayLateLoMinutes-delayEarlyLoMinutes)*p
|
|
hi = delayEarlyHiMinutes + (delayLateHiMinutes-delayEarlyHiMinutes)*p
|
|
return lo, hi
|
|
}
|
|
|
|
// moveDelay is the robot's think time for the move at moveCount: a right-skewed
|
|
// sample from the move's delayBand, clamped to the hard bounds. The skew (delaySkew
|
|
// > 1) makes short delays frequent and long ones rare, with a tail to the band's top.
|
|
func moveDelay(seed int64, moveCount int) time.Duration {
|
|
lo, hi := delayBand(moveCount)
|
|
u := unitFloat(mix(seed, "delay", moveCount))
|
|
return clampMinutes(lo + (hi-lo)*math.Pow(u, delaySkew))
|
|
}
|
|
|
|
// nudgeReplyDelay is how soon after a daytime nudge the robot answers the move at
|
|
// moveCount: a uniform sample from the quick window [lo, lo+nudgeReplySpreadMinutes],
|
|
// where lo is the move's lower band — so a nudge pulls the move in near the floor of
|
|
// the robot's think time.
|
|
func nudgeReplyDelay(seed int64, moveCount int) time.Duration {
|
|
lo, _ := delayBand(moveCount)
|
|
u := unitFloat(mix(seed, "nudge", moveCount))
|
|
return clampMinutes(lo + nudgeReplySpreadMinutes*u)
|
|
}
|
|
|
|
// clampMinutes converts a minute count to a duration, clamping it to the hard delay
|
|
// bounds so an out-of-range band can never produce an absurd think time.
|
|
func clampMinutes(mins float64) time.Duration {
|
|
if mins < delayHardMinMinutes {
|
|
mins = delayHardMinMinutes
|
|
}
|
|
if mins > delayHardMaxMinutes {
|
|
mins = delayHardMaxMinutes
|
|
}
|
|
return time.Duration(mins * float64(time.Minute))
|
|
}
|
|
|
|
// sleepDrift is the per-game shift of the robot's sleep window relative to the
|
|
// opponent's timezone, in [-sleepDriftHours, +sleepDriftHours] hours.
|
|
func sleepDrift(seed int64) time.Duration {
|
|
span := 2*sleepDriftHours + 1
|
|
h := int(mix(seed, "tz")%uint64(span)) - sleepDriftHours
|
|
return time.Duration(h) * time.Hour
|
|
}
|
|
|
|
// asleep reports whether the robot is in its nightly sleep window at now. The
|
|
// window is [sleepStartHour, sleepEndHour) in the opponent's timezone shifted by
|
|
// drift; an unknown or empty timezone falls back to UTC.
|
|
func asleep(opponentTZ string, drift time.Duration, now time.Time) bool {
|
|
local := now.In(loadLocation(opponentTZ)).Add(drift)
|
|
h := local.Hour()
|
|
return h >= sleepStartHour && h < sleepEndHour
|
|
}
|
|
|
|
// loadLocation resolves a stored timezone (an IANA name or a "±HH:MM" offset),
|
|
// falling back to UTC when it is empty or unknown (so a bad opponent profile never
|
|
// breaks the driver). It defers to account.ResolveZone.
|
|
func loadLocation(name string) *time.Location {
|
|
return account.ResolveZone(name)
|
|
}
|
|
|
|
// selectMove chooses the robot's action given the ranked candidate plays, the
|
|
// current scores, the play-to-win decision and the target band. With at least one
|
|
// legal play it picks the candidate whose resulting margin (myScore + score -
|
|
// oppScore) is closest to the band, breaking ties toward the conservative edge
|
|
// (the smallest lead when winning, the smallest deficit when losing). With no
|
|
// legal play it exchanges the whole rack when the bag can refill it, else passes.
|
|
func selectMove(cands []engine.MoveRecord, myScore, oppScore int, win bool, band marginBand, rack []string, bagLen int) decision {
|
|
if len(cands) == 0 {
|
|
if len(rack) > 0 && bagLen >= len(rack) {
|
|
return decision{kind: decideExchange, exchange: append([]string(nil), rack...)}
|
|
}
|
|
return decision{kind: decidePass}
|
|
}
|
|
|
|
lo, hi := band.lo, band.hi
|
|
if !win {
|
|
lo, hi = -band.hi, -band.lo
|
|
}
|
|
|
|
margin := func(c engine.MoveRecord) int { return myScore + c.Score - oppScore }
|
|
best := 0
|
|
bestDist := math.MaxInt
|
|
for i, c := range cands {
|
|
m := margin(c)
|
|
dist := distanceToBand(m, lo, hi)
|
|
switch {
|
|
case dist < bestDist:
|
|
best, bestDist = i, dist
|
|
case dist == bestDist:
|
|
// Conservative tie-break inside the band: keep the lead (win) or the
|
|
// deficit (lose) small.
|
|
if win && m < margin(cands[best]) || !win && m > margin(cands[best]) {
|
|
best = i
|
|
}
|
|
}
|
|
}
|
|
return decision{kind: decidePlay, move: cands[best]}
|
|
}
|
|
|
|
// distanceToBand is how far m lies outside [lo, hi], or 0 when inside.
|
|
func distanceToBand(m, lo, hi int) int {
|
|
switch {
|
|
case m < lo:
|
|
return lo - m
|
|
case m > hi:
|
|
return m - hi
|
|
default:
|
|
return 0
|
|
}
|
|
}
|