Files
scrabble-solver/internal/wordlist/wordlist.go
T
Ilia Denisov 15c7959d96 Implement Scrabble move generator (DAWG) with English and Russian rules
A Go library that returns every legal play ranked by score and scores or
validates plays, using the Appel-Jacobson DAWG algorithm over
github.com/iliadenisov/dafsa v1.1.0.

- DAWG move generation (across / down / both), full tournament scoring with a
  per-tile breakdown; public Solver: GenerateMoves (ranked), ScorePlay,
  ValidatePlay.
- Rulesets: English Scrabble, Russian Scrabble, Эрудит (parameterizable Ruleset).
- cmd/builddict (build the DAWG from the dictionaries submodule), cmd/stress
  (self-play benchmark), selfplay engine; brute-force test oracle.
- A GADDAG was implemented, benchmarked and removed (the DAWG was smaller and
  faster for a scoring solver); see RESULTS.md and ALGORITHM.md.
2026-06-01 16:07:32 +02:00

78 lines
2.0 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Package wordlist reads dictionaries and encodes them into alphabet-index words,
// ready to add to a DAWG.
package wordlist
import (
"bufio"
"bytes"
"os"
"sort"
"strings"
"github.com/iliadenisov/alphabet"
)
// Encode turns words into alphabet-index slices, keeping only those whose length is in
// [minLen, maxLen] and whose characters all belong to idx's alphabet (case-folded).
// The result is sorted by index order and de-duplicated, as a DAWG builder requires.
func Encode(words []string, idx alphabet.Indexer, minLen, maxLen int) [][]byte {
out := make([][]byte, 0, len(words))
for _, w := range words {
w = strings.TrimSpace(w)
if w == "" {
continue
}
b, err := idx.Encode(strings.ToLower(w))
if err != nil {
continue
}
if len(b) < minLen || len(b) > maxLen {
continue
}
out = append(out, b)
}
sort.Slice(out, func(i, j int) bool { return bytes.Compare(out[i], out[j]) < 0 })
return Dedupe(out)
}
// Read is Encode applied to the lines (one word per line) of the file at path.
func Read(path string, idx alphabet.Indexer, minLen, maxLen int) ([][]byte, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
var words []string
sc := bufio.NewScanner(f)
sc.Buffer(make([]byte, 1<<20), 1<<20)
for sc.Scan() {
words = append(words, sc.Text())
}
if err := sc.Err(); err != nil {
return nil, err
}
return Encode(words, idx, minLen, maxLen), nil
}
// FoldYo replaces Ё/ё with Е/е. The Russian "Эрудит" variant has no Ё tile and treats
// Е and Ё as the same letter, so apply this when preparing an Эрудит dictionary (it is a
// dictionary-preparation step, not an engine behaviour).
func FoldYo(s string) string {
return strings.NewReplacer("ё", "е", "Ё", "Е").Replace(s)
}
// Dedupe removes adjacent duplicates from a sorted slice of index words in place.
func Dedupe(s [][]byte) [][]byte {
if len(s) == 0 {
return s
}
out := s[:1]
for i := 1; i < len(s); i++ {
if !bytes.Equal(s[i], s[i-1]) {
out = append(out, s[i])
}
}
return out
}