dawg: build committed dictionary DAWGs (en SOWPODS, ru Scrabble, ru Эрудит)
- cmd/builddict: add -alphabet latin|russian (russian = alphabet.Embedded(LangRu)).
- dictprep/fold_yo.py: fold Ё→Е and de-dup, the Эрудит dictionary prep.
- Makefile: `make dawg` rebuilds dawg/{en_sowpods,ru_scrabble,ru_erudit}.dawg.
- dawg/: committed DAWGs verified by enumeration — 267752 / 83385 / 83343 words.
This commit is contained in:
@@ -0,0 +1,28 @@
|
||||
# Scrabble-solver build helpers.
|
||||
#
|
||||
# `make dawg` (re)builds the committed dictionary DAWGs under dawg/ from their word lists:
|
||||
# en_sowpods.dawg — English SOWPODS (Latin alphabet)
|
||||
# ru_scrabble.dawg — Russian Scrabble nouns (Cyrillic, 33 letters)
|
||||
# ru_erudit.dawg — Эрудит (the same list with Ё→Е folded and de-duped)
|
||||
|
||||
GO ?= go
|
||||
PYTHON ?= python3
|
||||
DAWG_DIR := dawg
|
||||
BUILDDICT := $(GO) run ./cmd/builddict
|
||||
|
||||
.PHONY: dawg dawg-en dawg-ru dawg-erudit clean-dawg
|
||||
|
||||
dawg: dawg-en dawg-ru dawg-erudit
|
||||
|
||||
dawg-en:
|
||||
$(BUILDDICT) -dict dictionaries/english/sowpods.txt -alphabet latin -name en_sowpods -out $(DAWG_DIR)
|
||||
|
||||
dawg-ru:
|
||||
$(BUILDDICT) -dict dictprep/russian/scrabble.txt -alphabet russian -name ru_scrabble -out $(DAWG_DIR)
|
||||
|
||||
dawg-erudit:
|
||||
$(PYTHON) dictprep/fold_yo.py dictprep/russian/scrabble.txt > /tmp/ru_erudit_words.txt
|
||||
$(BUILDDICT) -dict /tmp/ru_erudit_words.txt -alphabet russian -name ru_erudit -out $(DAWG_DIR)
|
||||
|
||||
clean-dawg:
|
||||
rm -f $(DAWG_DIR)/*.dawg
|
||||
Reference in New Issue
Block a user