Consolidate the scattered build inputs (dictionaries/english/, dictprep/russian/) into one sources/ tree keyed by the variant labels (scrabble_en/scrabble_ru/ erudit_ru), and move the Russian prep pipeline to tools/. The dawg outputs and their filenames are unchanged — rebuilt byte-identical (en_sowpods/ru_scrabble/ ru_erudit) — so the release artifact and the backend are unaffected. ru_stage2.py OUT_DIR and the ruwords flag defaults are repointed to sources/scrabble_ru/; Makefile / CI / cmd/builddict default / README updated; pipeline intermediates git-ignored. Verified: make dawg byte-identical to the committed baseline, py_compile + go vet of the moved tools. The full Russian regeneration pipeline (pymorphy3/libmorph/orfo PDF) was not run here.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
# ru_erudit.dawg — Эрудит (the Ё→Е folded + de-duped list, committed as russian/erudit.txt)
|
||||
#
|
||||
# CI builds the DAWGs as a validation gate; release artifacts are published from this output
|
||||
# (see README.md). Regenerate russian/erudit.txt from scrabble.txt with dictprep/fold_yo.py.
|
||||
# (see README.md). Regenerate russian/erudit.txt from scrabble.txt with tools/fold_yo.py.
|
||||
|
||||
export GOPRIVATE := gitea.iliadenisov.ru/*
|
||||
|
||||
@@ -21,13 +21,13 @@ BUILDDICT := $(GO) run ./cmd/builddict
|
||||
dawg: dawg-en dawg-ru dawg-erudit
|
||||
|
||||
dawg-en:
|
||||
$(BUILDDICT) -dict dictionaries/english/sowpods.txt -alphabet latin -name en_sowpods -out $(DAWG_DIR)
|
||||
$(BUILDDICT) -dict sources/scrabble_en/sowpods.txt -alphabet latin -name en_sowpods -out $(DAWG_DIR)
|
||||
|
||||
dawg-ru:
|
||||
$(BUILDDICT) -dict dictprep/russian/scrabble.txt -alphabet russian -name ru_scrabble -out $(DAWG_DIR)
|
||||
$(BUILDDICT) -dict sources/scrabble_ru/scrabble.txt -alphabet russian -name ru_scrabble -out $(DAWG_DIR)
|
||||
|
||||
dawg-erudit:
|
||||
$(BUILDDICT) -dict dictprep/russian/erudit.txt -alphabet russian -name ru_erudit -out $(DAWG_DIR)
|
||||
$(BUILDDICT) -dict sources/erudit_ru/erudit.txt -alphabet russian -name ru_erudit -out $(DAWG_DIR)
|
||||
|
||||
clean-dawg:
|
||||
rm -f $(DAWG_DIR)/*.dawg
|
||||
|
||||
Reference in New Issue
Block a user