Implement Scrabble move generator (DAWG) with English and Russian rules
A Go library that returns every legal play ranked by score and scores or validates plays, using the Appel-Jacobson DAWG algorithm over github.com/iliadenisov/dafsa v1.1.0. - DAWG move generation (across / down / both), full tournament scoring with a per-tile breakdown; public Solver: GenerateMoves (ranked), ScorePlay, ValidatePlay. - Rulesets: English Scrabble, Russian Scrabble, Эрудит (parameterizable Ruleset). - cmd/builddict (build the DAWG from the dictionaries submodule), cmd/stress (self-play benchmark), selfplay engine; brute-force test oracle. - A GADDAG was implemented, benchmarked and removed (the DAWG was smaller and faster for a scoring solver); see RESULTS.md and ALGORITHM.md.
This commit is contained in:
@@ -0,0 +1,77 @@
|
||||
// Package wordlist reads dictionaries and encodes them into alphabet-index words,
|
||||
// ready to add to a DAWG.
|
||||
package wordlist
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/iliadenisov/alphabet"
|
||||
)
|
||||
|
||||
// Encode turns words into alphabet-index slices, keeping only those whose length is in
|
||||
// [minLen, maxLen] and whose characters all belong to idx's alphabet (case-folded).
|
||||
// The result is sorted by index order and de-duplicated, as a DAWG builder requires.
|
||||
func Encode(words []string, idx alphabet.Indexer, minLen, maxLen int) [][]byte {
|
||||
out := make([][]byte, 0, len(words))
|
||||
for _, w := range words {
|
||||
w = strings.TrimSpace(w)
|
||||
if w == "" {
|
||||
continue
|
||||
}
|
||||
b, err := idx.Encode(strings.ToLower(w))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if len(b) < minLen || len(b) > maxLen {
|
||||
continue
|
||||
}
|
||||
out = append(out, b)
|
||||
}
|
||||
sort.Slice(out, func(i, j int) bool { return bytes.Compare(out[i], out[j]) < 0 })
|
||||
return Dedupe(out)
|
||||
}
|
||||
|
||||
// Read is Encode applied to the lines (one word per line) of the file at path.
|
||||
func Read(path string, idx alphabet.Indexer, minLen, maxLen int) ([][]byte, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var words []string
|
||||
sc := bufio.NewScanner(f)
|
||||
sc.Buffer(make([]byte, 1<<20), 1<<20)
|
||||
for sc.Scan() {
|
||||
words = append(words, sc.Text())
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return Encode(words, idx, minLen, maxLen), nil
|
||||
}
|
||||
|
||||
// FoldYo replaces Ё/ё with Е/е. The Russian "Эрудит" variant has no Ё tile and treats
|
||||
// Е and Ё as the same letter, so apply this when preparing an Эрудит dictionary (it is a
|
||||
// dictionary-preparation step, not an engine behaviour).
|
||||
func FoldYo(s string) string {
|
||||
return strings.NewReplacer("ё", "е", "Ё", "Е").Replace(s)
|
||||
}
|
||||
|
||||
// Dedupe removes adjacent duplicates from a sorted slice of index words in place.
|
||||
func Dedupe(s [][]byte) [][]byte {
|
||||
if len(s) == 0 {
|
||||
return s
|
||||
}
|
||||
out := s[:1]
|
||||
for i := 1; i < len(s); i++ {
|
||||
if !bytes.Equal(s[i], s[i-1]) {
|
||||
out = append(out, s[i])
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
package wordlist
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/iliadenisov/alphabet"
|
||||
)
|
||||
|
||||
func TestFoldYo(t *testing.T) {
|
||||
if got := FoldYo("ёлка"); got != "елка" {
|
||||
t.Errorf("FoldYo(ёлка) = %q, want елка", got)
|
||||
}
|
||||
if got := FoldYo("Ёжик"); got != "Ежик" {
|
||||
t.Errorf("FoldYo(Ёжик) = %q, want Ежик", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeFilterSortDedupe(t *testing.T) {
|
||||
got := Encode([]string{
|
||||
"cat", "CATS", "ab", "b", "abcdefghi", "cat", " do ", "qu1rk",
|
||||
}, alphabet.Latin(), 2, 8)
|
||||
|
||||
want := [][]byte{
|
||||
{0, 1}, // ab
|
||||
{2, 0, 19}, // cat
|
||||
{2, 0, 19, 18}, // cats (from CATS, case-folded)
|
||||
{3, 14}, // do (trimmed)
|
||||
}
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("got %d words %v, want %d", len(got), got, len(want))
|
||||
}
|
||||
for i := range want {
|
||||
if string(got[i]) != string(want[i]) {
|
||||
t.Errorf("word %d = %v, want %v", i, got[i], want[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user