Implement Scrabble move generator (DAWG) #1
@@ -0,0 +1,28 @@
|
|||||||
|
# Scrabble-solver build helpers.
|
||||||
|
#
|
||||||
|
# `make dawg` (re)builds the committed dictionary DAWGs under dawg/ from their word lists:
|
||||||
|
# en_sowpods.dawg — English SOWPODS (Latin alphabet)
|
||||||
|
# ru_scrabble.dawg — Russian Scrabble nouns (Cyrillic, 33 letters)
|
||||||
|
# ru_erudit.dawg — Эрудит (the same list with Ё→Е folded and de-duped)
|
||||||
|
|
||||||
|
GO ?= go
|
||||||
|
PYTHON ?= python3
|
||||||
|
DAWG_DIR := dawg
|
||||||
|
BUILDDICT := $(GO) run ./cmd/builddict
|
||||||
|
|
||||||
|
.PHONY: dawg dawg-en dawg-ru dawg-erudit clean-dawg
|
||||||
|
|
||||||
|
dawg: dawg-en dawg-ru dawg-erudit
|
||||||
|
|
||||||
|
dawg-en:
|
||||||
|
$(BUILDDICT) -dict dictionaries/english/sowpods.txt -alphabet latin -name en_sowpods -out $(DAWG_DIR)
|
||||||
|
|
||||||
|
dawg-ru:
|
||||||
|
$(BUILDDICT) -dict dictprep/russian/scrabble.txt -alphabet russian -name ru_scrabble -out $(DAWG_DIR)
|
||||||
|
|
||||||
|
dawg-erudit:
|
||||||
|
$(PYTHON) dictprep/fold_yo.py dictprep/russian/scrabble.txt > /tmp/ru_erudit_words.txt
|
||||||
|
$(BUILDDICT) -dict /tmp/ru_erudit_words.txt -alphabet russian -name ru_erudit -out $(DAWG_DIR)
|
||||||
|
|
||||||
|
clean-dawg:
|
||||||
|
rm -f $(DAWG_DIR)/*.dawg
|
||||||
+12
-4
@@ -1,6 +1,5 @@
|
|||||||
// Command builddict converts a word list into a serialized DAWG, cached under testdata
|
// Command builddict converts a word list into a serialized DAWG. By default it reads the
|
||||||
// for the tests and the benchmark. By default it reads the English SOWPODS list from
|
// English SOWPODS list (Latin alphabet); pass -alphabet russian for the Cyrillic lists.
|
||||||
// the dictionaries submodule.
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -23,9 +22,18 @@ func main() {
|
|||||||
name := flag.String("name", "sowpods", "base name for the output file")
|
name := flag.String("name", "sowpods", "base name for the output file")
|
||||||
minLen := flag.Int("min", 2, "minimum word length")
|
minLen := flag.Int("min", 2, "minimum word length")
|
||||||
maxLen := flag.Int("max", 15, "maximum word length")
|
maxLen := flag.Int("max", 15, "maximum word length")
|
||||||
|
alpha := flag.String("alphabet", "latin", "alphabet: latin (English) or russian")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
idx := alphabet.Latin()
|
var idx alphabet.Indexer
|
||||||
|
switch *alpha {
|
||||||
|
case "latin":
|
||||||
|
idx = alphabet.Latin()
|
||||||
|
case "russian":
|
||||||
|
idx = alphabet.Embedded(alphabet.Langs.LangRu)
|
||||||
|
default:
|
||||||
|
log.Fatalf("unknown -alphabet %q (want latin or russian)", *alpha)
|
||||||
|
}
|
||||||
|
|
||||||
t0 := time.Now()
|
t0 := time.Now()
|
||||||
words, err := wordlist.Read(*dict, idx, *minLen, *maxLen)
|
words, err := wordlist.Read(*dict, idx, *minLen, *maxLen)
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,27 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Fold Ё/ё → Е/е in a word list and de-duplicate — the dictionary prep for "Эрудит".
|
||||||
|
|
||||||
|
The Эрудит ruleset has no Ё tile and treats Е/Ё as one letter, so its dictionary must be
|
||||||
|
folded before the DAWG is built. Folding merges pairs like ёж/еж, hence the de-dup. Output
|
||||||
|
is sorted (Russian order over the 32 folded letters) and LF-separated.
|
||||||
|
|
||||||
|
Run: python3 dictprep/fold_yo.py dictprep/russian/scrabble.txt > /tmp/ru_erudit_words.txt
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
|
||||||
|
ORDER = {c: i for i, c in enumerate("абвгдежзийклмнопрстуфхцчшщъыьэюя")} # 32 letters, no ё
|
||||||
|
|
||||||
|
|
||||||
|
def key(w):
|
||||||
|
return [ORDER.get(c, 99) for c in w]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
src = sys.argv[1] if len(sys.argv) > 1 else "/dev/stdin"
|
||||||
|
words = {line.strip().replace("ё", "е").replace("Ё", "Е") for line in open(src, encoding="utf-8")}
|
||||||
|
words.discard("")
|
||||||
|
sys.stdout.write("\n".join(sorted(words, key=key)) + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user