// Package selfplay drives greedy AI-vs-AI Scrabble games used to validate the move // generators (the same position is offered to both) and to benchmark them. package selfplay import ( "math/rand" "sort" "time" "gitea.iliadenisov.ru/developer/scrabble-solver/board" "gitea.iliadenisov.ru/developer/scrabble-solver/rack" "gitea.iliadenisov.ru/developer/scrabble-solver/rules" "gitea.iliadenisov.ru/developer/scrabble-solver/scrabble" ) // blankTile marks a blank in the bag and in a player's hand. const blankTile byte = 0xff // Bag is a shuffled draw pile of tiles. type Bag struct { tiles []byte } // NewBag fills a bag from the ruleset's tile counts and blanks and shuffles it with the // given seed (so games are reproducible). func NewBag(rs *rules.Ruleset, seed int64) *Bag { var tiles []byte for i, n := range rs.Counts { for range n { tiles = append(tiles, byte(i)) } } for range rs.Blanks { tiles = append(tiles, blankTile) } rng := rand.New(rand.NewSource(seed)) rng.Shuffle(len(tiles), func(i, j int) { tiles[i], tiles[j] = tiles[j], tiles[i] }) return &Bag{tiles: tiles} } // Len returns the number of tiles left in the bag. func (b *Bag) Len() int { return len(b.tiles) } // Draw removes up to n tiles from the bag and returns them. func (b *Bag) Draw(n int) []byte { if n > len(b.tiles) { n = len(b.tiles) } out := b.tiles[len(b.tiles)-n:] b.tiles = b.tiles[:len(b.tiles)-n] return out } // rackOf builds a generation rack from a hand of tiles. func rackOf(tiles []byte, size int) rack.Rack { r := rack.New(size) for _, t := range tiles { if t == blankTile { r.AddBlank() } else { r.Add(t) } } return r } // removeUsed returns the hand with the tiles consumed by m removed. func removeUsed(tiles []byte, m scrabble.Move) []byte { out := append([]byte(nil), tiles...) for _, p := range m.Tiles { want := p.Letter if p.Blank { want = blankTile } for i, t := range out { if t == want { out = append(out[:i], out[i+1:]...) break } } } return out } // greedy returns the highest-scoring move (ties broken by canonical key for // reproducibility), or ok=false if there is no legal move. func greedy(gen scrabble.Generator, b *board.Board, rk rack.Rack, mode scrabble.Mode) (scrabble.Move, int, bool) { moves := gen.GenerateMoves(b, rk, mode) if len(moves) == 0 { return scrabble.Move{}, 0, false } sort.Slice(moves, func(i, j int) bool { if moves[i].Score != moves[j].Score { return moves[i].Score > moves[j].Score } return moves[i].Key() < moves[j].Key() }) return moves[0], len(moves), true } // Result summarizes a finished game. type Result struct { Turns int // turns taken (plays plus passes) Plays int // scoring plays made Scores [2]int // final score per player MovesGenerated int // total legal moves generated across all turns GenTime time.Duration // time spent generating moves } // PlayGame plays one greedy AI-vs-AI game with generator gen and returns its result. If // observe is non-nil it is called before each turn with a clone of the board and the // player's rack, so a caller can compare generators on identical positions. func PlayGame(rs *rules.Ruleset, gen scrabble.Generator, mode scrabble.Mode, seed int64, observe func(b *board.Board, rk rack.Rack)) Result { const maxTurns = 300 bag := NewBag(rs, seed) b := board.New(rs.Rows, rs.Cols) hands := [2][]byte{bag.Draw(rs.RackSize), bag.Draw(rs.RackSize)} var res Result passes := 0 for turn := range maxTurns { p := turn % 2 rk := rackOf(hands[p], rs.Size()) if observe != nil { observe(b.Clone(), rk.Clone()) } res.Turns++ t0 := time.Now() m, n, ok := greedy(gen, b, rk, mode) res.GenTime += time.Since(t0) res.MovesGenerated += n if !ok { if passes++; passes >= 4 { break } continue } passes = 0 scrabble.Apply(b, m) res.Scores[p] += m.Score res.Plays++ hands[p] = removeUsed(hands[p], m) if need := rs.RackSize - len(hands[p]); need > 0 { hands[p] = append(hands[p], bag.Draw(need)...) } if len(hands[p]) == 0 && bag.Len() == 0 { break } } return res }