package robot import ( "encoding/binary" "hash/fnv" "math" "time" "scrabble/backend/internal/account" "scrabble/backend/internal/engine" ) // The robot's per-game and per-turn choices are derived deterministically from // the game's bag seed, so the scheduler keeps no extra state and recomputes the // same behaviour on every tick and after a restart (mirroring how the engine // replays a game from the same seed). The mixing must be stable across process // restarts, so it uses FNV-1a rather than hash/maphash (whose seed is process // random). const ( // playToWinPercent is the probability, in percent, that the robot decides at // game start to play to win; the rest of the time it plays to lose, so the // human wins about 60% of games (docs/ARCHITECTURE.md §7). playToWinPercent = 40 // The robot's think time depends on how far the game has progressed: early moves // are quick and late moves can be long (endgame deliberation). The delay is drawn // from a band that interpolates with the move count from [delayEarlyLoMinutes, // delayEarlyHiMinutes] at the first move to [delayLateLoMinutes, delayLateHiMinutes] // by avgGameMoves, then right-skewed by delaySkew (a larger exponent concentrates // delays near the band's floor — an active player). The result is clamped to // [delayHardMinMinutes, delayHardMaxMinutes]. The numbers are deliberate estimates, // to be retuned once real play statistics arrive (docs/ARCHITECTURE.md §7). delayEarlyLoMinutes = 1.0 delayEarlyHiMinutes = 5.0 delayLateLoMinutes = 10.0 delayLateHiMinutes = 90.0 delaySkew = 4.0 avgGameMoves = 28.0 delayHardMinMinutes = 1.0 delayHardMaxMinutes = 90.0 // nudgeReplySpreadMinutes is the width of the quick window, anchored at the move's // lower band (delayBand's lo), within which the robot answers a daytime nudge on // its turn — so a nudged robot replies near the floor of its think time. nudgeReplySpreadMinutes = 5.0 // sleepStartHour and sleepEndHour bound the robot's nightly sleep in its // (opponent-anchored, drifted) local time: it makes no move and sends no nudge // while the local hour is in [sleepStartHour, sleepEndHour). sleepStartHour = 0 sleepEndHour = 7 // sleepDriftHours is the half-width of the random drift applied to the robot's // sleep window relative to the opponent's timezone, in hours. sleepDriftHours = 3 // proactiveNudgeIdle is how long the robot waits on the human's turn before it // proactively nudges (subject to the social once-per-hour-per-game limit). proactiveNudgeIdle = 12 * time.Hour ) // defaultBand is the target resulting score margin after the robot's move: when // playing to win it aims to lead by 1..30 points, when playing to lose it aims to // trail by 1..30 (the band is negated). It picks the candidate closest to the // band rather than the maximum (docs/ARCHITECTURE.md §7). var defaultBand = marginBand{lo: 1, hi: 30} // marginBand is an inclusive target range for the resulting score margin // (own score after the move minus the opponent's). type marginBand struct{ lo, hi int } // decisionKind enumerates the move the robot makes on its turn. type decisionKind int const ( decidePlay decisionKind = iota decideExchange decidePass ) // decision is the robot's chosen action for a turn: a play (Move), an exchange of // the listed tiles, or a pass. type decision struct { kind decisionKind move engine.MoveRecord exchange []string } // mix folds the game seed and a salt (a label plus optional integers such as the // move index) into a stable 64-bit value. It is deterministic across process // restarts. func mix(seed int64, salt string, nums ...int) uint64 { h := fnv.New64a() var b [8]byte binary.LittleEndian.PutUint64(b[:], uint64(seed)) _, _ = h.Write(b[:]) _, _ = h.Write([]byte(salt)) for _, n := range nums { binary.LittleEndian.PutUint64(b[:], uint64(int64(n))) _, _ = h.Write(b[:]) } return h.Sum64() } // unitFloat maps a mixed value to a float in [0, 1). func unitFloat(v uint64) float64 { return float64(v) / (float64(math.MaxUint64) + 1) } // playToWin reports the robot's once-per-game decision to play to win, derived // from the seed so it is fixed for the whole game. func playToWin(seed int64) bool { return mix(seed, "win")%100 < playToWinPercent } // delayBand returns the lower and upper bounds, in minutes, of the move-delay band // for the move at moveCount. It interpolates linearly with game progress (the move // count over avgGameMoves, capped at 1): early moves sit in a short band and late // moves in a long one. func delayBand(moveCount int) (lo, hi float64) { p := float64(moveCount) / avgGameMoves if p > 1 { p = 1 } lo = delayEarlyLoMinutes + (delayLateLoMinutes-delayEarlyLoMinutes)*p hi = delayEarlyHiMinutes + (delayLateHiMinutes-delayEarlyHiMinutes)*p return lo, hi } // moveDelay is the robot's think time for the move at moveCount: a right-skewed // sample from the move's delayBand, clamped to the hard bounds. The skew (delaySkew // > 1) makes short delays frequent and long ones rare, with a tail to the band's top. func moveDelay(seed int64, moveCount int) time.Duration { lo, hi := delayBand(moveCount) u := unitFloat(mix(seed, "delay", moveCount)) return clampMinutes(lo + (hi-lo)*math.Pow(u, delaySkew)) } // nudgeReplyDelay is how soon after a daytime nudge the robot answers the move at // moveCount: a uniform sample from the quick window [lo, lo+nudgeReplySpreadMinutes], // where lo is the move's lower band — so a nudge pulls the move in near the floor of // the robot's think time. func nudgeReplyDelay(seed int64, moveCount int) time.Duration { lo, _ := delayBand(moveCount) u := unitFloat(mix(seed, "nudge", moveCount)) return clampMinutes(lo + nudgeReplySpreadMinutes*u) } // clampMinutes converts a minute count to a duration, clamping it to the hard delay // bounds so an out-of-range band can never produce an absurd think time. func clampMinutes(mins float64) time.Duration { if mins < delayHardMinMinutes { mins = delayHardMinMinutes } if mins > delayHardMaxMinutes { mins = delayHardMaxMinutes } return time.Duration(mins * float64(time.Minute)) } // sleepDrift is the per-game shift of the robot's sleep window relative to the // opponent's timezone, in [-sleepDriftHours, +sleepDriftHours] hours. func sleepDrift(seed int64) time.Duration { span := 2*sleepDriftHours + 1 h := int(mix(seed, "tz")%uint64(span)) - sleepDriftHours return time.Duration(h) * time.Hour } // asleep reports whether the robot is in its nightly sleep window at now. The // window is [sleepStartHour, sleepEndHour) in the opponent's timezone shifted by // drift; an unknown or empty timezone falls back to UTC. func asleep(opponentTZ string, drift time.Duration, now time.Time) bool { local := now.In(loadLocation(opponentTZ)).Add(drift) h := local.Hour() return h >= sleepStartHour && h < sleepEndHour } // loadLocation resolves a stored timezone (an IANA name or a "±HH:MM" offset), // falling back to UTC when it is empty or unknown (so a bad opponent profile never // breaks the driver). It defers to account.ResolveZone. func loadLocation(name string) *time.Location { return account.ResolveZone(name) } // selectMove chooses the robot's action given the ranked candidate plays, the // current scores, the play-to-win decision and the target band. With at least one // legal play it picks the candidate whose resulting margin (myScore + score - // oppScore) is closest to the band, breaking ties toward the conservative edge // (the smallest lead when winning, the smallest deficit when losing). With no // legal play it exchanges the whole rack when the bag can refill it, else passes. func selectMove(cands []engine.MoveRecord, myScore, oppScore int, win bool, band marginBand, rack []string, bagLen int) decision { if len(cands) == 0 { if len(rack) > 0 && bagLen >= len(rack) { return decision{kind: decideExchange, exchange: append([]string(nil), rack...)} } return decision{kind: decidePass} } lo, hi := band.lo, band.hi if !win { lo, hi = -band.hi, -band.lo } margin := func(c engine.MoveRecord) int { return myScore + c.Score - oppScore } best := 0 bestDist := math.MaxInt for i, c := range cands { m := margin(c) dist := distanceToBand(m, lo, hi) switch { case dist < bestDist: best, bestDist = i, dist case dist == bestDist: // Conservative tie-break inside the band: keep the lead (win) or the // deficit (lose) small. if win && m < margin(cands[best]) || !win && m > margin(cands[best]) { best = i } } } return decision{kind: decidePlay, move: cands[best]} } // distanceToBand is how far m lies outside [lo, hi], or 0 when inside. func distanceToBand(m, lo, hi int) int { switch { case m < lo: return lo - m case m > hi: return m - hi default: return 0 } }