Files
Ilia Denisov 256999b42c Publish as versioned Gitea module; move dictionary pipeline out
- Rename module to gitea.iliadenisov.ru/developer/scrabble-solver so it can be
  consumed as a versioned dependency (no go.work replace / CI clone).
- De-internalize wordlist and dictdawg as public packages.
- Remove cmd/builddict, dictprep/, the dictionaries submodule and the dawg
  Makefile: the word-list parsing and DAWG build now live in the separate
  scrabble-dictionary repository, which publishes the DAWG set as a release artifact.
- internal/dict loads the committed dawg/en_sowpods.dawg fixture for cmd/stress.
- Update README/CLAUDE docs accordingly.
2026-06-04 19:11:46 +02:00

109 lines
3.0 KiB
Go

package scrabble
import (
dawg "github.com/iliadenisov/dafsa"
"gitea.iliadenisov.ru/developer/scrabble-solver/board"
"gitea.iliadenisov.ru/developer/scrabble-solver/internal/encoding"
)
// letterSet is a bit set over alphabet letter indexes (alphabets are at most 63
// letters, so a uint64 suffices). It encodes a square's cross-set: the letters that,
// placed on the square, form a legal perpendicular word.
type letterSet uint64
func (s letterSet) has(l byte) bool { return s&(letterSet(1)<<l) != 0 }
// fullSet is the cross-set of a square with no perpendicular neighbours: every letter
// is allowed.
func fullSet(size int) letterSet { return letterSet(uint64(1)<<uint(size)) - 1 }
// columnContext returns the contiguous run of filled cells immediately above and below
// the empty square (r, c), each read top to bottom, as alphabet letter indexes. These
// are the tiles a perpendicular (vertical) word through (r, c) would include.
func columnContext(b *board.Board, r, c int) (above, below []byte) {
start := r
for start-1 >= 0 && b.Filled(start-1, c) {
start--
}
for rr := start; rr < r; rr++ {
above = append(above, encoding.Letter(b.At(rr, c)))
}
end := r
for end+1 < b.Rows() && b.Filled(end+1, c) {
end++
}
for rr := r + 1; rr <= end; rr++ {
below = append(below, encoding.Letter(b.At(rr, c)))
}
return above, below
}
// completers returns the letters X (< size) that complete a word when followed from
// state: those whose arc leads directly to an accepting node. It is a single arc
// enumeration — the deterministic cross-set primitive.
func completers(cur *dawg.Cursor, state dawg.Node, size int) letterSet {
var set letterSet
lim := byte(size)
cur.Arcs(state, func(a dawg.Arc) bool {
if a.Final && a.Label < lim {
set |= letterSet(1) << a.Label
}
return true
})
return set
}
// walk follows word left to right from the cursor's root.
func walk(cur *dawg.Cursor, word []byte) (dawg.Node, bool) {
n := cur.Root()
for _, l := range word {
var ok bool
if n, _, ok = cur.Next(n, l); !ok {
return n, false
}
}
return n, true
}
// dawgCrossSet returns the letters X for which above·X·below is a stored word. A right
// extension (no tiles below) is deterministic — X just completes the prefix above. A
// left extension (tiles below) is non-deterministic and must probe each X.
func dawgCrossSet(cur *dawg.Cursor, above, below []byte, size int) letterSet {
switch {
case len(above) == 0 && len(below) == 0:
return fullSet(size)
case len(below) == 0:
node, ok := walk(cur, above)
if !ok {
return 0
}
return completers(cur, node, size)
default:
node := cur.Root()
if len(above) > 0 {
var ok bool
if node, ok = walk(cur, above); !ok {
return 0
}
}
var set letterSet
for x := range size {
m, final, ok := cur.Next(node, byte(x))
if !ok {
continue
}
for _, l := range below {
if m, final, ok = cur.Next(m, l); !ok {
break
}
}
if ok && final {
set |= letterSet(1) << uint(x)
}
}
return set
}
}