Publish as versioned Gitea module; move dictionary pipeline out

- Rename module to gitea.iliadenisov.ru/developer/scrabble-solver so it can be
  consumed as a versioned dependency (no go.work replace / CI clone).
- De-internalize wordlist and dictdawg as public packages.
- Remove cmd/builddict, dictprep/, the dictionaries submodule and the dawg
  Makefile: the word-list parsing and DAWG build now live in the separate
  scrabble-dictionary repository, which publishes the DAWG set as a release artifact.
- internal/dict loads the committed dawg/en_sowpods.dawg fixture for cmd/stress.
- Update README/CLAUDE docs accordingly.
This commit is contained in:
Ilia Denisov
2026-06-04 19:11:46 +02:00
parent 63a7c663bf
commit 256999b42c
41 changed files with 93 additions and 402477 deletions
-75
View File
@@ -1,75 +0,0 @@
// Command builddict converts a word list into a serialized DAWG. By default it reads the
// English SOWPODS list (Latin alphabet); pass -alphabet russian for the Cyrillic lists.
package main
import (
"flag"
"fmt"
"log"
"os"
"path/filepath"
"time"
"github.com/iliadenisov/alphabet"
"scrabble-solver/internal/dictdawg"
"scrabble-solver/internal/wordlist"
)
func main() {
dict := flag.String("dict", "dictionaries/english/sowpods.txt", "word list file (one word per line)")
out := flag.String("out", "testdata", "output directory")
name := flag.String("name", "sowpods", "base name for the output file")
minLen := flag.Int("min", 2, "minimum word length")
maxLen := flag.Int("max", 15, "maximum word length")
alpha := flag.String("alphabet", "latin", "alphabet: latin (English) or russian")
flag.Parse()
var idx alphabet.Indexer
switch *alpha {
case "latin":
idx = alphabet.Latin()
case "russian":
idx = alphabet.Embedded(alphabet.Langs.LangRu)
default:
log.Fatalf("unknown -alphabet %q (want latin or russian)", *alpha)
}
t0 := time.Now()
words, err := wordlist.Read(*dict, idx, *minLen, *maxLen)
if err != nil {
log.Fatalf("read %s: %v", *dict, err)
}
fmt.Printf("loaded %d words from %s in %s\n", len(words), *dict, time.Since(t0).Round(time.Millisecond))
if err := os.MkdirAll(*out, 0o755); err != nil {
log.Fatal(err)
}
t := time.Now()
f, err := dictdawg.Build(idx, words)
if err != nil {
log.Fatalf("build dawg: %v", err)
}
path := filepath.Join(*out, *name+".dawg")
if err := dictdawg.Save(f, path); err != nil {
log.Fatalf("save: %v", err)
}
size := int64(0)
if fi, err := os.Stat(path); err == nil {
size = fi.Size()
}
fmt.Printf("DAWG %d nodes, %s, built+saved in %s -> %s\n",
f.NumNodes(), humanBytes(size), time.Since(t).Round(time.Millisecond), path)
}
func humanBytes(n int64) string {
switch {
case n >= 1<<20:
return fmt.Sprintf("%.2f MB", float64(n)/(1<<20))
case n >= 1<<10:
return fmt.Sprintf("%.1f KB", float64(n)/(1<<10))
default:
return fmt.Sprintf("%d B", n)
}
}
+5 -5
View File
@@ -12,10 +12,10 @@ import (
"strings"
"time"
"scrabble-solver/internal/dict"
"scrabble-solver/rules"
"scrabble-solver/scrabble"
"scrabble-solver/selfplay"
"gitea.iliadenisov.ru/developer/scrabble-solver/internal/dict"
"gitea.iliadenisov.ru/developer/scrabble-solver/rules"
"gitea.iliadenisov.ru/developer/scrabble-solver/scrabble"
"gitea.iliadenisov.ru/developer/scrabble-solver/selfplay"
)
func main() {
@@ -24,7 +24,7 @@ func main() {
rs := rules.English()
if !dict.EnglishAvailable() {
log.Fatal("English dictionary not available; run `go run ./cmd/builddict` first")
log.Fatal("English dictionary not available: dawg/en_sowpods.dawg missing")
}
f, err := dict.EnglishDAWG()
if err != nil {