scrabble-game/backend/internal/robot/strategy.go

package robot

import (
	"encoding/binary"
	"hash/fnv"
	"math"
	"time"

	"scrabble/backend/internal/engine"
)

// The robot's per-game and per-turn choices are derived deterministically from
// the game's bag seed, so the scheduler keeps no extra state and recomputes the
// same behaviour on every tick and after a restart (mirroring how the engine
// replays a game from the same seed). The mixing must be stable across process
// restarts, so it uses FNV-1a rather than hash/maphash (whose seed is process
// random).

const (
	// playToWinPercent is the probability, in percent, that the robot decides at
	// game start to play to win; the rest of the time it plays to lose, so the
	// human wins about 60% of games (docs/ARCHITECTURE.md §7).
	playToWinPercent = 40

	// delayMinMinutes and delayMaxMinutes bound a move delay; delaySkew shapes the
	// right-skewed distribution (short delays frequent). With skew 3.5 the median
	// is about 10 minutes and the mean about 20, with a tail out to the maximum.
	delayMinMinutes = 2.0
	delayMaxMinutes = 90.0
	delaySkew       = 3.5

	// nudgeReplyMinMinutes and nudgeReplyMaxMinutes bound how soon the robot
	// answers a daytime nudge on its turn.
	nudgeReplyMinMinutes = 2.0
	nudgeReplyMaxMinutes = 10.0

	// sleepStartHour and sleepEndHour bound the robot's nightly sleep in its
	// (opponent-anchored, drifted) local time: it makes no move and sends no nudge
	// while the local hour is in [sleepStartHour, sleepEndHour).
	sleepStartHour = 0
	sleepEndHour   = 7

	// sleepDriftHours is the half-width of the random drift applied to the robot's
	// sleep window relative to the opponent's timezone, in hours.
	sleepDriftHours = 3

	// proactiveNudgeIdle is how long the robot waits on the human's turn before it
	// proactively nudges (subject to the social once-per-hour-per-game limit).
	proactiveNudgeIdle = 12 * time.Hour
)

// defaultBand is the target resulting score margin after the robot's move: when
// playing to win it aims to lead by 1..30 points, when playing to lose it aims to
// trail by 1..30 (the band is negated). It picks the candidate closest to the
// band rather than the maximum (docs/ARCHITECTURE.md §7).
var defaultBand = marginBand{lo: 1, hi: 30}

// marginBand is an inclusive target range for the resulting score margin
// (own score after the move minus the opponent's).
type marginBand struct{ lo, hi int }

// decisionKind enumerates the move the robot makes on its turn.
type decisionKind int

const (
	decidePlay decisionKind = iota
	decideExchange
	decidePass
)

// decision is the robot's chosen action for a turn: a play (Move), an exchange of
// the listed tiles, or a pass.
type decision struct {
	kind     decisionKind
	move     engine.MoveRecord
	exchange []string
}

// mix folds the game seed and a salt (a label plus optional integers such as the
// move index) into a stable 64-bit value. It is deterministic across process
// restarts.
func mix(seed int64, salt string, nums ...int) uint64 {
	h := fnv.New64a()
	var b [8]byte
	binary.LittleEndian.PutUint64(b[:], uint64(seed))
	_, _ = h.Write(b[:])
	_, _ = h.Write([]byte(salt))
	for _, n := range nums {
		binary.LittleEndian.PutUint64(b[:], uint64(int64(n)))
		_, _ = h.Write(b[:])
	}
	return h.Sum64()
}

// unitFloat maps a mixed value to a float in [0, 1).
func unitFloat(v uint64) float64 {
	return float64(v) / (float64(math.MaxUint64) + 1)
}

// playToWin reports the robot's once-per-game decision to play to win, derived
// from the seed so it is fixed for the whole game.
func playToWin(seed int64) bool {
	return mix(seed, "win")%100 < playToWinPercent
}

// moveDelay is the robot's think time for the move at moveCount, sampled from the
// right-skewed distribution and bounded to [delayMinMinutes, delayMaxMinutes).
func moveDelay(seed int64, moveCount int) time.Duration {
	u := unitFloat(mix(seed, "delay", moveCount))
	mins := delayMinMinutes + (delayMaxMinutes-delayMinMinutes)*math.Pow(u, delaySkew)
	return time.Duration(mins * float64(time.Minute))
}

// nudgeReplyDelay is how soon after a daytime nudge the robot answers the move at
// moveCount, sampled uniformly from [nudgeReplyMinMinutes, nudgeReplyMaxMinutes).
func nudgeReplyDelay(seed int64, moveCount int) time.Duration {
	u := unitFloat(mix(seed, "nudge", moveCount))
	mins := nudgeReplyMinMinutes + (nudgeReplyMaxMinutes-nudgeReplyMinMinutes)*u
	return time.Duration(mins * float64(time.Minute))
}

// sleepDrift is the per-game shift of the robot's sleep window relative to the
// opponent's timezone, in [-sleepDriftHours, +sleepDriftHours] hours.
func sleepDrift(seed int64) time.Duration {
	span := 2*sleepDriftHours + 1
	h := int(mix(seed, "tz")%uint64(span)) - sleepDriftHours
	return time.Duration(h) * time.Hour
}

// asleep reports whether the robot is in its nightly sleep window at now. The
// window is [sleepStartHour, sleepEndHour) in the opponent's timezone shifted by
// drift; an unknown or empty timezone falls back to UTC.
func asleep(opponentTZ string, drift time.Duration, now time.Time) bool {
	local := now.In(loadLocation(opponentTZ)).Add(drift)
	h := local.Hour()
	return h >= sleepStartHour && h < sleepEndHour
}

// loadLocation resolves an IANA timezone name, falling back to UTC when it is
// empty or unknown (so a bad opponent profile never breaks the driver).
func loadLocation(name string) *time.Location {
	if name == "" {
		return time.UTC
	}
	loc, err := time.LoadLocation(name)
	if err != nil {
		return time.UTC
	}
	return loc
}

// selectMove chooses the robot's action given the ranked candidate plays, the
// current scores, the play-to-win decision and the target band. With at least one
// legal play it picks the candidate whose resulting margin (myScore + score -
// oppScore) is closest to the band, breaking ties toward the conservative edge
// (the smallest lead when winning, the smallest deficit when losing). With no
// legal play it exchanges the whole rack when the bag can refill it, else passes.
func selectMove(cands []engine.MoveRecord, myScore, oppScore int, win bool, band marginBand, rack []string, bagLen int) decision {
	if len(cands) == 0 {
		if len(rack) > 0 && bagLen >= len(rack) {
			return decision{kind: decideExchange, exchange: append([]string(nil), rack...)}
		}
		return decision{kind: decidePass}
	}

	lo, hi := band.lo, band.hi
	if !win {
		lo, hi = -band.hi, -band.lo
	}

	margin := func(c engine.MoveRecord) int { return myScore + c.Score - oppScore }
	best := 0
	bestDist := math.MaxInt
	for i, c := range cands {
		m := margin(c)
		dist := distanceToBand(m, lo, hi)
		switch {
		case dist < bestDist:
			best, bestDist = i, dist
		case dist == bestDist:
			// Conservative tie-break inside the band: keep the lead (win) or the
			// deficit (lose) small.
			if win && m < margin(cands[best]) || !win && m > margin(cands[best]) {
				best = i
			}
		}
	}
	return decision{kind: decidePlay, move: cands[best]}
}

// distanceToBand is how far m lies outside [lo, hi], or 0 when inside.
func distanceToBand(m, lo, hi int) int {
	switch {
	case m < lo:
		return lo - m
	case m > hi:
		return m - hi
	default:
		return 0
	}
}