Implement Scrabble move generator (DAWG) with English and Russian rules
A Go library that returns every legal play ranked by score and scores or validates plays, using the Appel-Jacobson DAWG algorithm over github.com/iliadenisov/dafsa v1.1.0. - DAWG move generation (across / down / both), full tournament scoring with a per-tile breakdown; public Solver: GenerateMoves (ranked), ScorePlay, ValidatePlay. - Rulesets: English Scrabble, Russian Scrabble, Эрудит (parameterizable Ruleset). - cmd/builddict (build the DAWG from the dictionaries submodule), cmd/stress (self-play benchmark), selfplay engine; brute-force test oracle. - A GADDAG was implemented, benchmarked and removed (the DAWG was smaller and faster for a scoring solver); see RESULTS.md and ALGORITHM.md.
This commit is contained in:
@@ -0,0 +1,76 @@
|
||||
// Package dict loads the English test dictionary as a DAWG, preferring the serialized
|
||||
// cache under testdata and falling back to building from the dictionaries submodule.
|
||||
// Paths are resolved relative to the repository root so it works both from the repo root
|
||||
// (commands) and from a package directory (tests).
|
||||
package dict
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/iliadenisov/alphabet"
|
||||
dawg "github.com/iliadenisov/dafsa"
|
||||
|
||||
"scrabble-solver/internal/dictdawg"
|
||||
"scrabble-solver/internal/wordlist"
|
||||
)
|
||||
|
||||
// MinLen and MaxLen bound playable word lengths (a 15x15 board holds at most 15).
|
||||
const (
|
||||
MinLen = 2
|
||||
MaxLen = 15
|
||||
)
|
||||
|
||||
func exists(p string) bool { _, err := os.Stat(p); return err == nil }
|
||||
|
||||
// Root returns the repository root by walking up from the working directory to the
|
||||
// directory containing go.mod, or "." if none is found.
|
||||
func Root() string {
|
||||
dir, err := os.Getwd()
|
||||
if err != nil {
|
||||
return "."
|
||||
}
|
||||
for {
|
||||
if exists(filepath.Join(dir, "go.mod")) {
|
||||
return dir
|
||||
}
|
||||
parent := filepath.Dir(dir)
|
||||
if parent == dir {
|
||||
return "."
|
||||
}
|
||||
dir = parent
|
||||
}
|
||||
}
|
||||
|
||||
// DAWGCache and WordlistPath locate the English cache file and source word list,
|
||||
// relative to the repository root.
|
||||
func DAWGCache() string { return filepath.Join(Root(), "testdata", "sowpods.dawg") }
|
||||
func WordlistPath() string { return filepath.Join(Root(), "dictionaries", "english", "sowpods.txt") }
|
||||
|
||||
// EnglishAvailable reports whether the English dictionary can be loaded (cache or source).
|
||||
func EnglishAvailable() bool {
|
||||
return exists(DAWGCache()) || exists(WordlistPath())
|
||||
}
|
||||
|
||||
// EnglishWords returns the encoded English word list (from the submodule source).
|
||||
func EnglishWords() ([][]byte, error) {
|
||||
return wordlist.Read(WordlistPath(), alphabet.Latin(), MinLen, MaxLen)
|
||||
}
|
||||
|
||||
// EnglishDAWG returns the English DAWG, loading the cache if present, otherwise building
|
||||
// it from the word list and caching it (best effort).
|
||||
func EnglishDAWG() (dawg.Finder, error) {
|
||||
if exists(DAWGCache()) {
|
||||
return dictdawg.Load(DAWGCache())
|
||||
}
|
||||
words, err := EnglishWords()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f, err := dictdawg.Build(alphabet.Latin(), words)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_ = dictdawg.Save(f, DAWGCache())
|
||||
return f, nil
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
// Package dictdawg builds a plain left-to-right DAWG of a dictionary, as used by the
|
||||
// Appel-Jacobson move generator.
|
||||
package dictdawg
|
||||
|
||||
import (
|
||||
"github.com/iliadenisov/alphabet"
|
||||
dawg "github.com/iliadenisov/dafsa"
|
||||
)
|
||||
|
||||
// Build returns a DAWG Finder over words, which must be alphabet-index slices sorted by
|
||||
// index order and de-duplicated (see wordlist.Encode).
|
||||
func Build(idx alphabet.Indexer, words [][]byte) (dawg.Finder, error) {
|
||||
d := dawg.New(idx)
|
||||
for _, w := range words {
|
||||
if err := d.AddB(w); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return d.Finish(), nil
|
||||
}
|
||||
|
||||
// Save writes the DAWG to filename. It requires an embedded alphabet (for example
|
||||
// alphabet.Latin()), so that Load can reconstruct it.
|
||||
func Save(f dawg.Finder, filename string) error {
|
||||
_, err := f.Save(filename)
|
||||
return err
|
||||
}
|
||||
|
||||
// Load reopens a DAWG saved with Save.
|
||||
func Load(filename string) (dawg.Finder, error) { return dawg.Load(filename) }
|
||||
@@ -0,0 +1,44 @@
|
||||
package dictdawg_test
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/iliadenisov/alphabet"
|
||||
|
||||
"scrabble-solver/internal/dictdawg"
|
||||
"scrabble-solver/internal/wordlist"
|
||||
)
|
||||
|
||||
func TestBuildAndQuery(t *testing.T) {
|
||||
words := wordlist.Encode([]string{"care", "cares", "cat"}, alphabet.Latin(), 2, 15)
|
||||
f, err := dictdawg.Build(alphabet.Latin(), words)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if f.NumAdded() != 3 {
|
||||
t.Fatalf("NumAdded = %d, want 3", f.NumAdded())
|
||||
}
|
||||
if i := f.IndexOfB([]byte{2, 0, 17, 4}); i != 0 { // care
|
||||
t.Errorf("IndexOf(care) = %d, want 0", i)
|
||||
}
|
||||
if i := f.IndexOfB([]byte{2, 0, 19}); i != 2 { // cat
|
||||
t.Errorf("IndexOf(cat) = %d, want 2", i)
|
||||
}
|
||||
if i := f.IndexOfB([]byte{2, 0, 17}); i != -1 { // car (absent)
|
||||
t.Errorf("IndexOf(car) = %d, want -1", i)
|
||||
}
|
||||
|
||||
path := filepath.Join(t.TempDir(), "d.dawg")
|
||||
if err := dictdawg.Save(f, path); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
g, err := dictdawg.Load(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer g.Close()
|
||||
if i := g.IndexOfB([]byte{2, 0, 17, 4, 18}); i != 1 { // cares
|
||||
t.Errorf("loaded IndexOf(cares) = %d, want 1", i)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
// Package encoding defines the compact byte conventions shared by the board, rack,
|
||||
// move output and (for letters) the dictionary graph.
|
||||
//
|
||||
// One uniform "symbol byte" is used everywhere:
|
||||
//
|
||||
// bits 0..5 the alphabet letter index plus one (1..63); 0 means "empty / no tile"
|
||||
// bit 6 reserved (unused)
|
||||
// bit 7 Blank — the tile is a blank standing for that letter; it scores 0
|
||||
//
|
||||
// The +1 offset lets 0 mean an empty board square. The same byte represents a board
|
||||
// cell, a placed tile and a rack tile; the graph stores raw letter indexes (without the
|
||||
// +1).
|
||||
package encoding
|
||||
|
||||
const (
|
||||
// Blank flags a tile as a blank standing for its letter; a blank scores 0.
|
||||
Blank byte = 0x80
|
||||
|
||||
// Empty is the value of an unoccupied board square.
|
||||
Empty byte = 0x00
|
||||
|
||||
letterBits byte = 0x3f // low 6 bits: letter index + 1
|
||||
)
|
||||
|
||||
// Cell builds the byte for a tile of the given alphabet letter index. When blank is
|
||||
// true the tile is marked as a blank (it scores 0).
|
||||
func Cell(letter byte, blank bool) byte {
|
||||
c := (letter + 1) & letterBits
|
||||
if blank {
|
||||
c |= Blank
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// IsEmpty reports whether a board cell is unoccupied.
|
||||
func IsEmpty(cell byte) bool { return cell&letterBits == 0 }
|
||||
|
||||
// Letter returns the alphabet letter index of a non-empty cell or tile byte. The
|
||||
// result is meaningless for an empty cell.
|
||||
func Letter(cell byte) byte { return (cell & letterBits) - 1 }
|
||||
|
||||
// IsBlank reports whether a cell or tile byte is a blank (scores 0).
|
||||
func IsBlank(cell byte) bool { return cell&Blank != 0 }
|
||||
@@ -0,0 +1,39 @@
|
||||
package encoding
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestCellRoundTrip(t *testing.T) {
|
||||
for letter := range byte(26) {
|
||||
c := Cell(letter, false)
|
||||
if IsEmpty(c) {
|
||||
t.Errorf("Cell(%d,false) reports empty", letter)
|
||||
}
|
||||
if IsBlank(c) {
|
||||
t.Errorf("Cell(%d,false) reports blank", letter)
|
||||
}
|
||||
if got := Letter(c); got != letter {
|
||||
t.Errorf("Letter(Cell(%d,false)) = %d", letter, got)
|
||||
}
|
||||
|
||||
b := Cell(letter, true)
|
||||
if !IsBlank(b) {
|
||||
t.Errorf("Cell(%d,true) not blank", letter)
|
||||
}
|
||||
if got := Letter(b); got != letter {
|
||||
t.Errorf("Letter(Cell(%d,true)) = %d, want %d", letter, got, letter)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmpty(t *testing.T) {
|
||||
if !IsEmpty(Empty) {
|
||||
t.Error("IsEmpty(Empty) = false")
|
||||
}
|
||||
if IsEmpty(Cell(0, false)) {
|
||||
t.Error("IsEmpty(Cell('a')) = true")
|
||||
}
|
||||
// 'a' (index 0) must not collide with empty.
|
||||
if Cell(0, false) == Empty {
|
||||
t.Error("Cell('a') collides with Empty")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
// Package graph provides thin, reusable helpers over a dafsa Cursor that the move
|
||||
// generator builds on. It keeps the rest of the solver from depending on dafsa
|
||||
// traversal details directly.
|
||||
package graph
|
||||
|
||||
import dawg "github.com/iliadenisov/dafsa"
|
||||
|
||||
// Spell follows the given alphabet indices from the cursor's root. It returns the
|
||||
// state reached, whether that state is accepting, and whether the whole path exists.
|
||||
// When ok is false the path ran into a missing edge; n and final are meaningless.
|
||||
func Spell(c *dawg.Cursor, indices []byte) (n dawg.Node, final, ok bool) {
|
||||
n = c.Root()
|
||||
final = c.Final(n)
|
||||
for _, ix := range indices {
|
||||
n, final, ok = c.Next(n, ix)
|
||||
if !ok {
|
||||
return n, false, false
|
||||
}
|
||||
}
|
||||
return n, final, true
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package graph_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/iliadenisov/alphabet"
|
||||
dawg "github.com/iliadenisov/dafsa"
|
||||
|
||||
"scrabble-solver/internal/graph"
|
||||
)
|
||||
|
||||
// TestSpellSmoke also exercises the go.mod replace => ../dafsa wiring and the new
|
||||
// dafsa traversal API end-to-end from the solver module.
|
||||
func TestSpellSmoke(t *testing.T) {
|
||||
d := dawg.New(alphabet.Latin())
|
||||
for _, w := range []string{"cat", "cats", "dog"} {
|
||||
if err := d.Add(w); err != nil {
|
||||
t.Fatalf("Add(%q): %v", w, err)
|
||||
}
|
||||
}
|
||||
c, err := dawg.NewCursor(d.Finish())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
enc := func(s string) []byte {
|
||||
b, err := alphabet.Latin().Encode(s)
|
||||
if err != nil {
|
||||
t.Fatalf("Encode(%q): %v", s, err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
if _, final, ok := graph.Spell(c, enc("cat")); !ok || !final {
|
||||
t.Errorf("Spell(cat): ok=%v final=%v, want both true", ok, final)
|
||||
}
|
||||
if _, final, ok := graph.Spell(c, enc("ca")); !ok || final {
|
||||
t.Errorf("Spell(ca): ok=%v final=%v, want ok=true final=false", ok, final)
|
||||
}
|
||||
if _, _, ok := graph.Spell(c, enc("xyz")); ok {
|
||||
t.Errorf("Spell(xyz): ok=true, want false")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
// Package wordlist reads dictionaries and encodes them into alphabet-index words,
|
||||
// ready to add to a DAWG.
|
||||
package wordlist
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/iliadenisov/alphabet"
|
||||
)
|
||||
|
||||
// Encode turns words into alphabet-index slices, keeping only those whose length is in
|
||||
// [minLen, maxLen] and whose characters all belong to idx's alphabet (case-folded).
|
||||
// The result is sorted by index order and de-duplicated, as a DAWG builder requires.
|
||||
func Encode(words []string, idx alphabet.Indexer, minLen, maxLen int) [][]byte {
|
||||
out := make([][]byte, 0, len(words))
|
||||
for _, w := range words {
|
||||
w = strings.TrimSpace(w)
|
||||
if w == "" {
|
||||
continue
|
||||
}
|
||||
b, err := idx.Encode(strings.ToLower(w))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if len(b) < minLen || len(b) > maxLen {
|
||||
continue
|
||||
}
|
||||
out = append(out, b)
|
||||
}
|
||||
sort.Slice(out, func(i, j int) bool { return bytes.Compare(out[i], out[j]) < 0 })
|
||||
return Dedupe(out)
|
||||
}
|
||||
|
||||
// Read is Encode applied to the lines (one word per line) of the file at path.
|
||||
func Read(path string, idx alphabet.Indexer, minLen, maxLen int) ([][]byte, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var words []string
|
||||
sc := bufio.NewScanner(f)
|
||||
sc.Buffer(make([]byte, 1<<20), 1<<20)
|
||||
for sc.Scan() {
|
||||
words = append(words, sc.Text())
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return Encode(words, idx, minLen, maxLen), nil
|
||||
}
|
||||
|
||||
// FoldYo replaces Ё/ё with Е/е. The Russian "Эрудит" variant has no Ё tile and treats
|
||||
// Е and Ё as the same letter, so apply this when preparing an Эрудит dictionary (it is a
|
||||
// dictionary-preparation step, not an engine behaviour).
|
||||
func FoldYo(s string) string {
|
||||
return strings.NewReplacer("ё", "е", "Ё", "Е").Replace(s)
|
||||
}
|
||||
|
||||
// Dedupe removes adjacent duplicates from a sorted slice of index words in place.
|
||||
func Dedupe(s [][]byte) [][]byte {
|
||||
if len(s) == 0 {
|
||||
return s
|
||||
}
|
||||
out := s[:1]
|
||||
for i := 1; i < len(s); i++ {
|
||||
if !bytes.Equal(s[i], s[i-1]) {
|
||||
out = append(out, s[i])
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
package wordlist
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/iliadenisov/alphabet"
|
||||
)
|
||||
|
||||
func TestFoldYo(t *testing.T) {
|
||||
if got := FoldYo("ёлка"); got != "елка" {
|
||||
t.Errorf("FoldYo(ёлка) = %q, want елка", got)
|
||||
}
|
||||
if got := FoldYo("Ёжик"); got != "Ежик" {
|
||||
t.Errorf("FoldYo(Ёжик) = %q, want Ежик", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeFilterSortDedupe(t *testing.T) {
|
||||
got := Encode([]string{
|
||||
"cat", "CATS", "ab", "b", "abcdefghi", "cat", " do ", "qu1rk",
|
||||
}, alphabet.Latin(), 2, 8)
|
||||
|
||||
want := [][]byte{
|
||||
{0, 1}, // ab
|
||||
{2, 0, 19}, // cat
|
||||
{2, 0, 19, 18}, // cats (from CATS, case-folded)
|
||||
{3, 14}, // do (trimmed)
|
||||
}
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("got %d words %v, want %d", len(got), got, len(want))
|
||||
}
|
||||
for i := range want {
|
||||
if string(got[i]) != string(want[i]) {
|
||||
t.Errorf("word %d = %v, want %v", i, got[i], want[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user