From 15c7959d966d64cbef171632fcff4da12fe98226 Mon Sep 17 00:00:00 2001 From: Ilia Denisov Date: Mon, 1 Jun 2026 16:07:32 +0200 Subject: [PATCH] Implement Scrabble move generator (DAWG) with English and Russian rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Go library that returns every legal play ranked by score and scores or validates plays, using the Appel-Jacobson DAWG algorithm over github.com/iliadenisov/dafsa v1.1.0. - DAWG move generation (across / down / both), full tournament scoring with a per-tile breakdown; public Solver: GenerateMoves (ranked), ScorePlay, ValidatePlay. - Rulesets: English Scrabble, Russian Scrabble, Эрудит (parameterizable Ruleset). - cmd/builddict (build the DAWG from the dictionaries submodule), cmd/stress (self-play benchmark), selfplay engine; brute-force test oracle. - A GADDAG was implemented, benchmarked and removed (the DAWG was smaller and faster for a scoring solver); see RESULTS.md and ALGORITHM.md. --- .gitignore | 9 ++ .gitmodules | 3 + ALGORITHM.md | 164 +++++++++++++++++++++ PLAN.md | 174 +++++++++++++++++++++++ README.md | 89 ++++++++++++ RESULTS.md | 87 ++++++++++++ board/board.go | 105 ++++++++++++++ board/board_test.go | 92 ++++++++++++ cmd/builddict/main.go | 67 +++++++++ cmd/stress/main.go | 104 ++++++++++++++ dictionaries | 1 + go.mod | 10 ++ go.sum | 29 ++++ internal/dict/dict.go | 76 ++++++++++ internal/dictdawg/dictdawg.go | 30 ++++ internal/dictdawg/dictdawg_test.go | 44 ++++++ internal/encoding/encoding.go | 43 ++++++ internal/encoding/encoding_test.go | 39 +++++ internal/graph/graph.go | 21 +++ internal/graph/graph_test.go | 43 ++++++ internal/wordlist/wordlist.go | 77 ++++++++++ internal/wordlist/wordlist_test.go | 37 +++++ rack/rack.go | 57 ++++++++ rack/rack_test.go | 51 +++++++ rules/ru_rules_test.go | 61 ++++++++ rules/rules.go | 221 +++++++++++++++++++++++++++++ rules/rules_test.go | 82 +++++++++++ scrabble/apply.go | 14 ++ scrabble/crossset.go | 108 ++++++++++++++ scrabble/crossset_test.go | 75 ++++++++++ scrabble/gen.go | 56 ++++++++ scrabble/gen_dawg.go | 221 +++++++++++++++++++++++++++++ scrabble/gen_dawg_test.go | 121 ++++++++++++++++ scrabble/generator.go | 18 +++ scrabble/key.go | 36 +++++ scrabble/move.go | 74 ++++++++++ scrabble/oracle_test.go | 147 +++++++++++++++++++ scrabble/score.go | 206 +++++++++++++++++++++++++++ scrabble/score_test.go | 138 ++++++++++++++++++ scrabble/solver.go | 101 +++++++++++++ scrabble/solver_test.go | 88 ++++++++++++ selfplay/selfplay.go | 154 ++++++++++++++++++++ selfplay/selfplay_test.go | 33 +++++ 43 files changed, 3406 insertions(+) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 ALGORITHM.md create mode 100644 PLAN.md create mode 100644 README.md create mode 100644 RESULTS.md create mode 100644 board/board.go create mode 100644 board/board_test.go create mode 100644 cmd/builddict/main.go create mode 100644 cmd/stress/main.go create mode 160000 dictionaries create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/dict/dict.go create mode 100644 internal/dictdawg/dictdawg.go create mode 100644 internal/dictdawg/dictdawg_test.go create mode 100644 internal/encoding/encoding.go create mode 100644 internal/encoding/encoding_test.go create mode 100644 internal/graph/graph.go create mode 100644 internal/graph/graph_test.go create mode 100644 internal/wordlist/wordlist.go create mode 100644 internal/wordlist/wordlist_test.go create mode 100644 rack/rack.go create mode 100644 rack/rack_test.go create mode 100644 rules/ru_rules_test.go create mode 100644 rules/rules.go create mode 100644 rules/rules_test.go create mode 100644 scrabble/apply.go create mode 100644 scrabble/crossset.go create mode 100644 scrabble/crossset_test.go create mode 100644 scrabble/gen.go create mode 100644 scrabble/gen_dawg.go create mode 100644 scrabble/gen_dawg_test.go create mode 100644 scrabble/generator.go create mode 100644 scrabble/key.go create mode 100644 scrabble/move.go create mode 100644 scrabble/oracle_test.go create mode 100644 scrabble/score.go create mode 100644 scrabble/score_test.go create mode 100644 scrabble/solver.go create mode 100644 scrabble/solver_test.go create mode 100644 selfplay/selfplay.go create mode 100644 selfplay/selfplay_test.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7e5f20d --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +# Cached serialized dictionaries, built from the dictionaries/ submodule by +# cmd/builddict. They are reproducible artifacts, not source. +/testdata/*.dawg +/testdata/*.gaddag +/testdata/*.bin + +# Local scratch +/tmp/ +*.pdf diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..a0e6146 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "dictionaries"] + path = dictionaries + url = https://github.com/kamilmielnik/scrabble-dictionaries diff --git a/ALGORITHM.md b/ALGORITHM.md new file mode 100644 index 0000000..208cc91 --- /dev/null +++ b/ALGORITHM.md @@ -0,0 +1,164 @@ +# Scrabble move generation — algorithm reference (single source of truth) + +This is the authoritative description of the algorithm the solver implements: the DAWG +move generator of Appel & Jacobson. It is distilled from the source paper (extracted +from the PDF in this repo) plus our adaptation. **Work from this file; do not re-parse +the PDFs.** + +Source: **[AJ88]** A. Appel, G. Jacobson, *The World's Fastest Scrabble Program*, +Commun. ACM 31(5):572–578, 585 (1988). + +> A GADDAG (Gordon, 1994) was also implemented and benchmarked against the DAWG, then +> **removed**: for a scoring solver it was ~7× larger and no faster. See `RESULTS.md`. + +Notation: letters are alphabet **indexes** (English `a..z` → `0..25`). "Across" = +horizontal play; "down" = vertical. The board grid and all I/O use the byte encoding in +`internal/encoding` (low 6 bits = letter+1, `0` = empty, bit 7 = blank). + +--- + +## 1. Reduction to one dimension [AJ88 §3.1] + +Every play is either **across** (one row) or **down** (one column). Down plays are +across plays on the **transposed** board, so the generator implements only "across" and +runs on the board and/or its transpose. This is the two-mode requirement: + +- `OnlyHorizontal` = across on the board. +- `OnlyVertical` = across on the transpose. +- `Both` = both. (A move found on the transpose has its coordinates swapped back.) + +### Anchors [AJ88 §3.1.2] + +An across word must include a newly-placed tile adjacent to a tile already on the board. +Generation starts only from **anchors** — empty squares orthogonally adjacent to a +filled square — which guarantees connectivity and prunes most of the search. The very +first move has no anchors and is special-cased through the board's centre square. + +### Cross-checks (cross-sets) [AJ88 §3.1.1] + +For each empty square the **cross-set** is the set of letters that, placed there, form a +legal **perpendicular** word with the tiles above/below it; it is stored as a bit-vector +(`letterSet`). A square with no perpendicular neighbour allows every letter. Cross-sets +let move generation stay one-dimensional. Off-board squares are treated as blocking +sentinels. + +--- + +## 2. Lexicon: trie → DAWG [AJ88 §3.2] + +The lexicon is a trie whose edges are labelled by letters; a word is a root-to-node +path; terminal nodes are marked. Minimizing the trie (merging equivalent sub-tries) +yields a **DAWG**, the minimum-state automaton for the word set. `dafsa` is exactly such +a minimized, bit-packed DAWG, with a compact ≤6-bit alphabet and a **per-node** `final` +flag. Its node identity is a bit-offset; edges of a node are sorted by letter. + +--- + +## 3. Move generation — [AJ88 §3.3] (verbatim) + +Two phases per anchor: place a **left part**, then **extend right**. The left part is +either tiles already on the board (read them, then `ExtendRight` from the corresponding +node) or rack tiles placed by a pruned DAWG traversal bounded by `limit` = number of +non-anchor squares to the left (this bound, reset at each anchor, makes each move appear +once). + +``` +ExtendRight(PartialWord, node N, square): + if square is vacant then + if N is terminal then LegalMove(PartialWord) + for each edge E out of N: + let l = letter of E + if l is in the rack and l is in the cross-set of square then + remove a tile l from the rack + ExtendRight(PartialWord || l, node(E), square+1) + put the tile l back into the rack + else + let l = letter occupying square + if N has an edge labelled l leading to N' then + ExtendRight(PartialWord || l, N', square+1) + +LeftPart(PartialWord, node N, limit): + ExtendRight(PartialWord, N, AnchorSquare) + if limit > 0 then + for each edge E out of N: + let l = letter of E + if l is in the rack then + remove a tile l from the rack + LeftPart(PartialWord || l, node(E), limit - 1) + put the tile l back into the rack +``` + +To generate from an anchor with `k` non-anchor squares to its left: `LeftPart("", root, +k)`. Implementation notes: + +- A word is **recorded only past the anchor** (`square > anchor`), so every recorded play + covers the anchor square — the connection guaranteed by the anchor's filled neighbour. +- **Empty prefix** when a tile sits left of the anchor: skip `LeftPart`; `ExtendRight` + directly from the node reached by walking the on-board left context. +- **Blanks**: when scanning the rack for a letter, also allow a blank to stand for it; + the placed tile is flagged (bit 7) and scores 0; restore on backtrack. +- **First move**: the only anchor is the centre; the left limit equals its column, so the + word covers the centre. + +This maps onto the `dafsa` traversal API: `Cursor.Root`, `Cursor.Next(node, letter)` +(an edge, with the destination's `final` flag), `Cursor.Final(node)`, and `Cursor.Arcs` +(enumerate a node's edges, used for placements and cross-sets). + +--- + +## 4. Cross-set computation + +For a square with tiles `above` (top→bottom) and `below`, the cross-set is +`{ X : above·X·below ∈ dict }`: + +- **Right extension** (no `below`): deterministic — `X` just completes the prefix + `above`. Walk `above` to a node, then read the **completers** (one arc enumeration: + the letters whose arc leads straight to an accepting node). +- **Left extension** (tiles `below`): non-deterministic — probe each `X` (walk `above`, + `X`, then `below`, test acceptance). This is the asymmetry inherent to a left-to-right + DAWG. + +Cross-sets are recomputed per generation for the squares that need them (cached within a +call). Scoring is done separately by `Evaluate` (§5), so cross-sums are not precomputed. + +--- + +## 5. Scoring (full tournament rules + breakdown) + +A play's score = main word + every cross-word formed + the all-tiles bonus. Per word: + +- Sum `tileValue(letter)` over its tiles; a **blank** scores 0. +- A **letter** premium (DL/TL) multiplies the value of a tile placed on it **only when + newly placed** this turn. +- A **word** premium (DW/TW) multiplies the whole word **only when a newly-placed tile + sits on it**; multiple word premiums multiply. +- Each cross-word counts its new tile plus the existing perpendicular run. + +The all-tiles bonus is added when the play uses a full rack. Board geometry, premium +layout, tile values/counts, blank count, rack size and the bonus are all part of the +`rules.Ruleset` (`English`, `RussianScrabble`, `Erudit`). `Evaluate` returns the main +word, the cross-words and a per-tile breakdown. `ValidatePlay` adds dictionary and +connectivity checks. + +--- + +## 6. Rulesets + +- **English** Scrabble: 15×15, standard premiums, 26 letters, 100 tiles (98 + 2 blanks), + 7-tile rack, 50-point bonus. +- **Russian Scrabble**: 33-letter alphabet (incl. Ё), standard board, 104 tiles, 50-bonus. +- **Эрудит**: 33-letter alphabet with **Ё unused** (no tile — fold Ё→Е when preparing the + dictionary, `wordlist.FoldYo`; an out-of-engine step), the **centre does not double**, + 131 tiles (128 + 3 blanks), blanks score 0, **15-point bonus**. + +--- + +## 7. Special cases checklist + +- **First move**: no anchors; the play must cover the centre. +- **Blanks**: any letter, score 0, flagged via bit 7; expand to every cross-set-allowed + letter during generation. +- **Off-board sentinels**: stop extension at the edge. +- **A single newly-placed tile** can form both an across and a down word. +- **Dedup**: each legal move is generated once (anchor + left-part limit); a canonical + move key guards against any residual duplicate. diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 0000000..80d77ca --- /dev/null +++ b/PLAN.md @@ -0,0 +1,174 @@ +# Scrabble Solver — Implementation Plan + +## Outcome (current state) + +Both generators were implemented and verified to produce identical moves, then compared +by self-play stress test (`RESULTS.md`). The **GADDAG was removed**: for a scoring solver +it was ~7× larger and no faster than the **DAWG**, which is now the sole generator. +Shipped: the DAWG generator, full scoring + breakdown, the public `Solver` +(`GenerateMoves`/`ScorePlay`/`ValidatePlay`), and three rulesets (English Scrabble, +Russian Scrabble, Эрудит). The rest of this document is the original roadmap, kept for +history; the DAWG/GADDAG comparison it describes is preserved in `RESULTS.md`. + +## Context + +We are building a Go library that, given a dictionary, a current game position and a +player's rack, returns every legal new play ranked by descending score. The core is a +fast finite-automaton move generator based on two papers (analysed in `ALGORITHM.md`): + +- Appel & Jacobson, *The World's Fastest Scrabble Program* (CACM 1988) — the **DAWG** + algorithm (cross-checks, anchor squares, `LeftPart`/`ExtendRight`, edge encoding, + cross-sums for scoring, transpose for the perpendicular direction). +- Gordon, *A Faster Scrabble Move Generation Algorithm* (SP&E 1994) — the **GADDAG** + (`REV(x)◊y` representation, single `Gen`/`GoOn` generator, deterministic cross-sets, + construction algorithm). + +The graph engine is `github.com/iliadenisov/dafsa` — a compact, bit-packed minimized +DAWG with a 6-bit "compact alphabet" (`alphabet.Indexer`, ≤63 symbols) and an +index-based (`*B`) API, checked out locally at `../dafsa`. + +### Headline approach + +**Implement BOTH generators — DAWG and GADDAG — behind one shared `Generator` +interface, then decide which becomes the production default empirically**, via a clean +self-play stress test (two greedy players, several games) on the *same* dictionary, +measuring speed and memory. The choice is made **after** implementation and measurement. +Both implementations are kept; the comparison output (`RESULTS.md`) picks the default. + +### Locked decisions + +| # | Topic | Decision | +|---|---|---| +| 1 | Core algorithm | **Implement both**: DAWG (Appel-Jacobson) and GADDAG (Gordon, over `dafsa` as a DAWG of `{REV(x)◊y}` with per-node final flags). Pick the default after a self-play stress test. | +| 2 | dafsa changes | **Edit `../dafsa` directly**, wire via `go.mod replace`. Leave a spec/CHANGELOG there. | +| 3 | Ruleset scope | Default **standard English Scrabble**, fully **parameterizable** (board geometry, premium layout DL/TL/DW/TW, tile values & counts, alphabet, blank count, bingo bonus). Must support **Russian "Эрудит"** (same 15×15 board + premiums; different tile values/counts; Cyrillic alphabet; one word per move, horizontal **or** vertical). | +| 4 | Scoring | **Full tournament scoring + breakdown**: main word + all cross-words + premiums (newly-placed tiles only) + bingo bonus; result carries formed cross-words and a per-tile breakdown. | +| 5 | Symbol encoding | **`0x80` = wildcard/blank** flag (board/rack/output only — never in the graph). **GADDAG separator `◊` = index == alphabet size** (`cbits` minimal; measured optimum). | +| 6 | State model | **Compact byte board** is the generation core; a structured **`Play`** type + a constructor that applies plays to build a board provide the full game-state overlay. | +| 7 | API scope | **Generation + scoring + validation** of arbitrary plays. | +| 8 | Dictionary | `kamilmielnik/scrabble-dictionaries` as a **git submodule**; `cmd/builddict` builds serialized structures **cached in `testdata/` (gitignored)**. English now; Russian later. | + +### Static structure probe (informs expectations; NOT the decision) + +Full SOWPODS (267,752 words, Σlen = 2,439,269), built through `dafsa`: + +| Structure | nodes | bytes | bits/char | build | ns/arc | +|---|---:|---:|---:|---:|---:| +| DAWG (a–z) | 77,808 | 750 KB | 2.46 | 186 ms | 48.7 | +| GADDAG sep=size(26) · cbits5 | 587,940 | 5.37 MB | 17.61 | 2.92 s | 61.0 | +| GADDAG sep=62 · cbits6 (measured) | 587,940 | 5.53 MB | 18.13 | — | 60.9 | +| GADDAG sep=0x40 · cbits7 (extrapolated) | 587,940 | ~5.69 MB | ~18.6 | — | ~61 | + +GADDAG is ~7× the DAWG and ~25% costlier per arc, but ~2× faster at actual move +generation (Gordon Table IV: ~2.5× fewer arcs). The stress test settles it. + +## Deliverable documents + +1. **`ALGORITHM.md`** — single source of truth (verbatim pseudocode + our adaptation). +2. **`PLAN.md`** — this plan. +3. **`RESULTS.md`** — stress-test comparison + the production-default decision. + +## Architecture & package layout + +``` +scrabble-solver/ + go.mod # + replace github.com/iliadenisov/dafsa => ../dafsa + PLAN.md ALGORITHM.md RESULTS.md README.md + dictionaries/ # git submodule: kamilmielnik/scrabble-dictionaries + testdata/ # gitignored: cached serialized DAWG + GADDAG + internal/ + gaddag/ # REV(x)◊y transform + build + traversal wrapper over dafsa + dictdawg/ # plain-DAWG build + traversal wrapper over dafsa + encoding/ # byte conventions (wildcard 0x80, separator, board cells) + board/ # compact board grid, transpose, premium layout + rack/ # rack as per-letter counts + blanks + rules/ # Ruleset: geometry, premiums, tile values/counts, alphabet, bonus + scrabble/ (public pkg) # Solver + Generator interface; Play/Move types; + gen_dawg.go # DAWG generator (LeftPart/ExtendRight) + gen_gaddag.go # GADDAG generator (Gen/GoOn) + selfplay/ # bag + greedy player + game loop (self-play engine) + cmd/builddict/ # word list -> serialized DAWG/GADDAG -> testdata + cmd/stress/ # run N self-play games per generator, emit comparison +``` + +Shared `Generator` interface so the harness can swap implementations: + +```go +type Generator interface { + GenerateMoves(b *board.Board, r rack.Rack, mode Mode) []Move // ranked, descending score + Name() string +} +``` + +Board, rack, rules and **scoring are shared**; cross-set computation is per-generator +(DAWG: probe the dictionary DAWG incl. the non-deterministic left set; GADDAG: +deterministic GADDAG walk) — that difference is part of what is measured. + +## Changes to `../dafsa` (additive ⇒ backward compatible) + +1. **Low-level traversal API**: `type Node` (opaque bit-offset); `Root() Node`; + `Next(n, ch) (child, final, ok)` (= Gordon's `NextArc`, wraps private `getEdge`); + `Arcs(n, fn)` (wraps `getNode`, for blanks/cross-sets); a reusable allocation-free + cursor for hot-path `Next`. +2. **Custom-alphabet persistence**: `WriteTo`/`SaveWith` (allow non-embedded alphabet); + `ReadWith`/`LoadWith` (inject a known indexer, skip language reconstruction). +3. (Optional) accurate serialized arc/node count; document that `NumEdges()` is build-time. + +## Data model & compact formats + +- **Byte symbol**: low 6 bits = alphabet index; `0x80` = wildcard/blank (I/O only); + `◊` = index `len(alphabet)` (GADDAG graph only). +- **Board**: `[]byte`, row-major. `0` = empty; occupied = `letterIndex+1`; blank = + `(letterIndex+1) | 0x80`. Helpers: `At`, `Set`, `Transpose`, premium lookup. +- **Rack**: `[]byte` counts, length `alphabetSize+1`; last slot = blank count. +- **`Play`**: `{Row, Col; Dir; Tiles []byte (0x80 flags); Main; CrossWords; Score; + Breakdown}` — input for apply/validate/score and the output element. +- **Modes**: `Both`, `Horizontal`, `Vertical`. + +## Staged implementation + +- **Stage 0** — Scaffolding & docs: `ALGORITHM.md`, `PLAN.md`, `dictionaries/` submodule, + `go.mod` replace, `.gitignore`. +- **Stage 1** — dafsa traversal API (shared): `Node`, `Root`, `Next`, `Arcs`, cursor; tests. +- **Stage 2** — dafsa custom-alphabet persistence: `SaveWith`/`ReadWith`; round-trip. +- **Stage 3** — Shared infra: encoding, board (+transpose), rack, rules (EN; Эрудит stub), + scoring, `Generator` interface, `Move`/`Play` types. +- **Stage 4** — Dictionary build: `internal/dictdawg` + `internal/gaddag`; `cmd/builddict` + caching serialized DAWG **and** GADDAG in `testdata`. +- **Stage 5** — Cross-sets: DAWG cross-checks (incl. non-deterministic left set) and GADDAG + deterministic cross-sets; validated against each other + brute force on a small lexicon. +- **Stage 6** — DAWG generator (`LeftPart`/`ExtendRight`). +- **Stage 7** — GADDAG generator (`Gen`/`GoOn`). +- **Stage 8** — Correctness gate: DAWG and GADDAG identical move sets on random positions + (each move once) + brute force on a tiny dictionary. Must pass before perf comparison. +- **Stage 9** — Self-play stress test: `selfplay` engine (bag, racks, greedy policy, + seeded RNG, end conditions); `cmd/stress` plays N games per generator measuring time, + arcs, allocations (`runtime.MemStats`), peak RSS (`/proc/self/status` VmHWM), footprint; + emit `RESULTS.md`. +- **Stage 10** — Decision + public API: choose default from `RESULTS.md` (both selectable); + finalize `Solver` API, Play↔board constructors, examples. +- **Stage 11** — Polish: benchmarks, README, optional prebuilt-graph distribution. + +## Verification + +- `go test ./...`, `go vet`, lint green per stage. +- Mutual oracle (Stage 8): identical move sets; brute force on a tiny dictionary. +- Build EN structures from the SOWPODS submodule via `cmd/builddict`; run `GenerateMoves` + on canonical positions (e.g. Gordon's "CARE on ABLE") and assert top moves/scores. +- Run `cmd/stress` (100–1000 seeded games per generator) → `RESULTS.md`. + +## Assumptions & caveats + +- Both algorithms ship; the production default is decided by the stress test. Both remain + selectable. +- Self-play policy defaults to **greedy** (deterministic tie-break, seeded RNG); tunable. +- Separator = real 27th token (`◊`, index = size, `cbits=5`); `0x40` reserved on the board. + Wildcard/blank = `0x80`, never in the graph. +- **Stateless per-call** generation in v1 (anchors + cross-sets recomputed per call); + incremental maintenance is a later optimization (both generators run stateless — a + fairness note for the comparison). +- Persistence stores only the graph; the (custom) alphabet is injected on load. +- Russian "Эрудит" alphabet specifics (Е/Ё handling, tile values/counts) resolved at + Stage 3/4; "one word per move, H or V" is satisfied by the modes. +- The final-flag GADDAG is larger than Gordon's letter-set form; letter-sets-on-arcs + remain a possible future optimization. diff --git a/README.md b/README.md new file mode 100644 index 0000000..c5df488 --- /dev/null +++ b/README.md @@ -0,0 +1,89 @@ +# scrabble-solver + +A Go library that, given a dictionary, a board position and a rack, returns **every +legal play ranked by score**, and also **scores** or **validates** arbitrary plays. The +move generator is the DAWG algorithm of Appel & Jacobson, *The World's Fastest Scrabble +Program*. It operates on compact byte-indexed inputs/outputs and is dictionary-driven via +[`github.com/iliadenisov/dafsa`](https://github.com/iliadenisov/dafsa). + +See [`ALGORITHM.md`](ALGORITHM.md) for the algorithm (the single source of truth) and +[`RESULTS.md`](RESULTS.md) for the DAWG-vs-GADDAG benchmark that settled the design. + +## Status + +- DAWG move generation (across / down / both orientations), with full tournament scoring + (cross-words, premiums, all-tiles bonus) and a per-tile breakdown. +- Public `Solver`: `GenerateMoves` (ranked), `ScorePlay`, `ValidatePlay`. +- Rulesets: **English** Scrabble, **Russian** Scrabble, **Эрудит**; `rules.Ruleset` is + fully parameterizable (board, premiums, tile values/counts, blanks, rack, bonus). +- A GADDAG (Gordon) was implemented, benchmarked and then **removed** — for a scoring + solver it was ~7× larger and no faster. + +## Layout + +``` +scrabble/ public API: Solver, Move/Play types, DAWG generator, scoring, validation +board/ rack/ rules/ board grid (+transpose), rack, rulesets (English/Russian/Эрудит) +internal/ encoding (byte conventions), wordlist, dictdawg, dict, graph +cmd/builddict/ word list -> serialized DAWG in testdata +cmd/stress/ greedy self-play benchmark of the generator +selfplay/ bag + greedy player + game loop +``` + +## Setup + +```sh +git submodule update --init # the dictionaries submodule (SOWPODS, TWL06, …) +go run ./cmd/builddict # build testdata/sowpods.dawg (≈0.2 s, ~730 KB) +``` + +`go.mod` carries `replace github.com/iliadenisov/dafsa => ../dafsa`: the solver needs +dafsa's low-level traversal `Cursor` (see the patch notes in `../dafsa/SCRABBLE_API.md`). + +## Usage + +```go +rs := rules.English() +finder, _ := dict.EnglishDAWG() // loads testdata/sowpods.dawg +s := scrabble.NewSolver(rs, finder) + +b := board.New(rs.Rows, rs.Cols) // empty board (first move) + +r := rack.New(rs.Size()) // rack "friends" +tiles, _ := rs.Alphabet.Encode("friends") +for _, t := range tiles { + r.Add(t) +} + +moves := s.GenerateMoves(b, r, scrabble.Both) // ranked, highest score first +best := moves[0] +// best.Main / best.Cross hold the words (alphabet indexes; decode via rs.Alphabet), +// best.Tiles the placed tiles (with blank flags), best.Score the total. + +// Score or validate an arbitrary play (placed tiles + direction): +m, err := s.ValidatePlay(b, scrabble.Horizontal, best.Tiles) +_ = m +_ = err +``` + +Words and tiles are alphabet **indexes** throughout (no string wrapper); convert with the +ruleset's `alphabet.Indexer` (`Encode`/`Decode`) when you need text. + +## Rulesets + +`rules.English()`, `rules.RussianScrabble()`, `rules.Erudit()`, or build your own with +`rules.FromTemplate(...)`. For Эрудит, fold Ё→Е while preparing the dictionary with +`wordlist.FoldYo` (the engine treats them as one letter; it is a dictionary-prep step). + +## Benchmark + +```sh +go run ./cmd/stress -games 100 # greedy AI-vs-AI self-play; reports speed and memory +``` + +## Tests + +```sh +go test ./... # unit tests + a brute-force move-generation oracle +go test ./... -short # skips the full-dictionary game test +``` diff --git a/RESULTS.md b/RESULTS.md new file mode 100644 index 0000000..c07c60f --- /dev/null +++ b/RESULTS.md @@ -0,0 +1,87 @@ +# DAWG vs GADDAG — self-play stress test + +> **Note.** The GADDAG generator has since been **removed** from the codebase — the DAWG +> is the sole move generator. This document is kept as the record of the comparison that +> justified that choice. `cmd/stress` now benchmarks the DAWG alone. + +The two move generators were compared by playing greedy AI-vs-AI games on the same +dictionary with the same seeds, measuring speed and memory. Reproduce with: + +``` +go run ./cmd/builddict # build testdata/sowpods.{dawg,gaddag} +go run ./cmd/stress -games 200 +``` + +## Setup + +- Dictionary: English **SOWPODS**, 267,752 words (2–15 letters). +- Board: standard 15×15; greedy player (highest-scoring move), seeded RNG. +- 200 games per generator, identical seeds; mode = both orientations. +- Machine: this dev container (Go 1.26, 12 cores; single-threaded run). + +## Results (200 games) + +Includes the **deterministic cross-set optimization** (one arc enumeration via +`completers()` for one-sided squares instead of probing every letter); both one-sided +cases are deterministic for the GADDAG, only the right-extension is for the DAWG. + +| metric | DAWG | GADDAG | +|---|---:|---:| +| structure size | **732.5 KB** | 5.12 MB | +| games / turns / plays | 200 / 4783 / 4769 | 200 / 4783 / 4769 | +| moves generated | 3,880,236 | 3,880,236 | +| generation time | **23.68 s** | 25.98 s | +| µs / move-generation call | **4951** | 5431 | +| moves generated / sec | **163,863** | 149,383 | +| arcs traversed | **261.8 M** | 276.0 M | +| arcs / move generated | **67.5** | 71.1 | +| heap allocated | 16.79 GB | 16.79 GB | +| GC cycles | 5624 | 5605 | +| avg final game score | 849.3 | 849.3 | + +`GADDAG vs DAWG: 1.10× generation time, 7.16× structure size, 1.00× heap.` +Peak process RSS (both structures mapped): ~42 MB. + +The optimization cut the GADDAG's cross-set arcs (285.5 M → 276.0 M) and narrowed the +arc gap (1.08× → 1.05×), but the verdict is unchanged: **the GADDAG is still ~10 % +slower, 7× larger and traverses slightly more arcs.** End-to-end time is dominated by +the shared per-move scoring (`Evaluate`, ~3.9 M calls), not by graph search, so the +search-algorithm difference barely moves the total — and what difference remains favours +the narrower, smaller DAWG. + +## Interpretation + +- **Correctness.** Both generators produce the *identical* set of moves and scores at + every position (identical turns/plays/moves/score above, and the Stage-8 mutual oracle + agreeing on 119 positions over 5 games). They differ only in how they search. +- **Speed.** The DAWG is ~10 % faster end-to-end and traverses ~8 % **fewer** arcs. +- **The GADDAG does not win here, contrary to Gordon's paper.** Two reasons: + 1. We use the **final-flag GADDAG** (the minimized DAWG of `REV(x)◊y`, completion via + accepting states) so that `dafsa` can be used essentially unmodified. This variant + lacks Gordon's *letter-sets-on-arcs* compression, so it is both ~7× larger and has + wider nodes — it traverses *more* arcs, not fewer, erasing the theoretical edge. + 2. The workload is a **scoring solver**: every generated move is scored (cross-words, + premiums, bonus) by the shared `Evaluate`. That shared per-move cost dominates, so + the search-algorithm difference is small — and what remains favours the simpler, + narrower DAWG. +- **Memory.** The GADDAG structure is 7.16× larger (5.12 MB vs 732 KB). Per-move heap + allocation is identical (dominated by shared scoring), and overall RSS is modest. + +## Decision + +**Use the DAWG (Appel-Jacobson) generator as the production default.** For this library +(a move generator that scores and ranks every play) it is smaller (7×), at least as fast, +and simpler to operate: it needs no separator symbol or custom alphabet, `dafsa`'s +`Save`/`Load` work unchanged, and it requires the fewest `dafsa` additions (only the +shared low-level traversal API). + +The **GADDAG generator is kept and remains selectable** (`scrabble.NewGADDAGGenerator`), +both as an alternative and as a continuing correctness oracle for the DAWG. + +### Caveats / what could change the picture + +- A **letter-set GADDAG** (Gordon's true compressed form) plus **incremental cross-set + maintenance** would shrink the GADDAG and cut its arc count; it might then beat the DAWG + on raw move generation. This was not pursued: the DAWG already meets the goal, and the + 7× size gap is decisive for a scoring-solver workload where generation is not the + bottleneck. It remains a documented future optimization. diff --git a/board/board.go b/board/board.go new file mode 100644 index 0000000..f497af4 --- /dev/null +++ b/board/board.go @@ -0,0 +1,105 @@ +// Package board holds the compact game board: a row-major grid of cell bytes encoded +// per internal/encoding (0 = empty, letter+1, with 0x80 marking a blank). It is +// otherwise alphabet-agnostic. +package board + +import ( + "fmt" + "unicode" + + "github.com/iliadenisov/alphabet" + + "scrabble-solver/internal/encoding" +) + +// Board is a row-major grid of encoded cells. +type Board struct { + rows, cols int + cells []byte +} + +// New returns an empty rows×cols board. +func New(rows, cols int) *Board { + return &Board{rows: rows, cols: cols, cells: make([]byte, rows*cols)} +} + +// Rows returns the number of rows. +func (b *Board) Rows() int { return b.rows } + +// Cols returns the number of columns. +func (b *Board) Cols() int { return b.cols } + +// At returns the encoded cell at (r, c). +func (b *Board) At(r, c int) byte { return b.cells[r*b.cols+c] } + +// Set stores the encoded cell v at (r, c). +func (b *Board) Set(r, c int, v byte) { b.cells[r*b.cols+c] = v } + +// InBounds reports whether (r, c) is on the board. +func (b *Board) InBounds(r, c int) bool { + return r >= 0 && r < b.rows && c >= 0 && c < b.cols +} + +// Empty reports whether (r, c) is an empty square. +func (b *Board) Empty(r, c int) bool { return encoding.IsEmpty(b.cells[r*b.cols+c]) } + +// Filled reports whether (r, c) is on the board and occupied. +func (b *Board) Filled(r, c int) bool { + return b.InBounds(r, c) && !encoding.IsEmpty(b.cells[r*b.cols+c]) +} + +// IsEmpty reports whether the whole board is empty (used for the first move). +func (b *Board) IsEmpty() bool { + for _, c := range b.cells { + if !encoding.IsEmpty(c) { + return false + } + } + return true +} + +// Clone returns a deep copy of the board. +func (b *Board) Clone() *Board { + cp := &Board{rows: b.rows, cols: b.cols, cells: make([]byte, len(b.cells))} + copy(cp.cells, b.cells) + return cp +} + +// Transpose returns a new board with rows and columns swapped, turning vertical lines +// into horizontal ones. Down-play generation runs on the transpose. +func (b *Board) Transpose() *Board { + t := &Board{rows: b.cols, cols: b.rows, cells: make([]byte, len(b.cells))} + for r := range b.rows { + for c := range b.cols { + t.cells[c*t.cols+r] = b.cells[r*b.cols+c] + } + } + return t +} + +// Parse builds a board from text rows: '.' (or space) is an empty square, a lowercase +// letter is a normal tile, and an uppercase letter is a blank standing for that letter. +// Letters are resolved through idx. +func Parse(rows []string, idx alphabet.Indexer) (*Board, error) { + if len(rows) == 0 { + return nil, fmt.Errorf("board: no rows") + } + cols := len([]rune(rows[0])) + b := New(len(rows), cols) + for r, line := range rows { + runes := []rune(line) + for c := 0; c < cols && c < len(runes); c++ { + ch := runes[c] + if ch == '.' || ch == ' ' { + continue + } + blank := unicode.IsUpper(ch) + li, err := idx.Index(string(unicode.ToLower(ch))) + if err != nil { + return nil, fmt.Errorf("board: row %d col %d %q: %w", r, c, string(ch), err) + } + b.Set(r, c, encoding.Cell(li, blank)) + } + } + return b, nil +} diff --git a/board/board_test.go b/board/board_test.go new file mode 100644 index 0000000..e7b8d21 --- /dev/null +++ b/board/board_test.go @@ -0,0 +1,92 @@ +package board_test + +import ( + "testing" + + "github.com/iliadenisov/alphabet" + + "scrabble-solver/board" + "scrabble-solver/internal/encoding" +) + +func TestParseAndAccess(t *testing.T) { + b, err := board.Parse([]string{ + "cat", + "o..", + "W..", // blank standing for 'w' + }, alphabet.Latin()) + if err != nil { + t.Fatal(err) + } + if b.Rows() != 3 || b.Cols() != 3 { + t.Fatalf("size = %dx%d, want 3x3", b.Rows(), b.Cols()) + } + if b.IsEmpty() { + t.Error("IsEmpty = true for a non-empty board") + } + if !b.Empty(1, 1) { + t.Error("(1,1) should be empty") + } + if !b.Filled(0, 0) { + t.Error("(0,0) should be filled") + } + + // 'c' = index 2, normal tile. + if got := b.At(0, 0); got != encoding.Cell(2, false) { + t.Errorf("At(0,0) = %#x, want %#x", got, encoding.Cell(2, false)) + } + if encoding.IsBlank(b.At(0, 0)) { + t.Error("(0,0) wrongly marked blank") + } + // 'W' = blank for index 22. + if got := b.At(2, 0); got != encoding.Cell(22, true) { + t.Errorf("At(2,0) = %#x, want blank w", got) + } + if !encoding.IsBlank(b.At(2, 0)) { + t.Error("(2,0) should be a blank") + } +} + +func TestNewIsEmpty(t *testing.T) { + if !board.New(15, 15).IsEmpty() { + t.Error("new board not empty") + } +} + +func TestTranspose(t *testing.T) { + b, _ := board.Parse([]string{ + "ab", + "..", + "cd", + }, alphabet.Latin()) + tr := b.Transpose() + if tr.Rows() != 2 || tr.Cols() != 3 { + t.Fatalf("transpose size = %dx%d, want 2x3", tr.Rows(), tr.Cols()) + } + if tr.At(0, 0) != b.At(0, 0) || tr.At(1, 0) != b.At(0, 1) || tr.At(0, 2) != b.At(2, 0) { + t.Error("transpose did not swap coordinates") + } + + // Transposing twice restores the original. + back := tr.Transpose() + for r := range b.Rows() { + for c := range b.Cols() { + if back.At(r, c) != b.At(r, c) { + t.Fatalf("double transpose differs at (%d,%d)", r, c) + } + } + } +} + +func TestClone(t *testing.T) { + b := board.New(3, 3) + b.Set(1, 1, encoding.Cell(0, false)) + cp := b.Clone() + cp.Set(0, 0, encoding.Cell(1, false)) + if !b.Empty(0, 0) { + t.Error("mutating clone changed the original") + } + if cp.Empty(1, 1) { + t.Error("clone lost original content") + } +} diff --git a/cmd/builddict/main.go b/cmd/builddict/main.go new file mode 100644 index 0000000..a6a7320 --- /dev/null +++ b/cmd/builddict/main.go @@ -0,0 +1,67 @@ +// Command builddict converts a word list into a serialized DAWG, cached under testdata +// for the tests and the benchmark. By default it reads the English SOWPODS list from +// the dictionaries submodule. +package main + +import ( + "flag" + "fmt" + "log" + "os" + "path/filepath" + "time" + + "github.com/iliadenisov/alphabet" + + "scrabble-solver/internal/dictdawg" + "scrabble-solver/internal/wordlist" +) + +func main() { + dict := flag.String("dict", "dictionaries/english/sowpods.txt", "word list file (one word per line)") + out := flag.String("out", "testdata", "output directory") + name := flag.String("name", "sowpods", "base name for the output file") + minLen := flag.Int("min", 2, "minimum word length") + maxLen := flag.Int("max", 15, "maximum word length") + flag.Parse() + + idx := alphabet.Latin() + + t0 := time.Now() + words, err := wordlist.Read(*dict, idx, *minLen, *maxLen) + if err != nil { + log.Fatalf("read %s: %v", *dict, err) + } + fmt.Printf("loaded %d words from %s in %s\n", len(words), *dict, time.Since(t0).Round(time.Millisecond)) + + if err := os.MkdirAll(*out, 0o755); err != nil { + log.Fatal(err) + } + + t := time.Now() + f, err := dictdawg.Build(idx, words) + if err != nil { + log.Fatalf("build dawg: %v", err) + } + path := filepath.Join(*out, *name+".dawg") + if err := dictdawg.Save(f, path); err != nil { + log.Fatalf("save: %v", err) + } + size := int64(0) + if fi, err := os.Stat(path); err == nil { + size = fi.Size() + } + fmt.Printf("DAWG %d nodes, %s, built+saved in %s -> %s\n", + f.NumNodes(), humanBytes(size), time.Since(t).Round(time.Millisecond), path) +} + +func humanBytes(n int64) string { + switch { + case n >= 1<<20: + return fmt.Sprintf("%.2f MB", float64(n)/(1<<20)) + case n >= 1<<10: + return fmt.Sprintf("%.1f KB", float64(n)/(1<<10)) + default: + return fmt.Sprintf("%d B", n) + } +} diff --git a/cmd/stress/main.go b/cmd/stress/main.go new file mode 100644 index 0000000..77ff56c --- /dev/null +++ b/cmd/stress/main.go @@ -0,0 +1,104 @@ +// Command stress plays many greedy AI-vs-AI games and reports the DAWG move generator's +// speed and memory. It is a benchmark / regression tool for the production generator. +package main + +import ( + "flag" + "fmt" + "log" + "os" + "runtime" + "strconv" + "strings" + "time" + + "scrabble-solver/internal/dict" + "scrabble-solver/rules" + "scrabble-solver/scrabble" + "scrabble-solver/selfplay" +) + +func main() { + games := flag.Int("games", 100, "games to play") + flag.Parse() + + rs := rules.English() + if !dict.EnglishAvailable() { + log.Fatal("English dictionary not available; run `go run ./cmd/builddict` first") + } + f, err := dict.EnglishDAWG() + if err != nil { + log.Fatalf("load dawg: %v", err) + } + gen := scrabble.NewDAWGGenerator(rs, f) + structSize := fileSize(dict.DAWGCache()) + + runtime.GC() + var m0 runtime.MemStats + runtime.ReadMemStats(&m0) + start := time.Now() + + var turns, plays, movesGen int + var genTime time.Duration + var score float64 + for seed := 1; seed <= *games; seed++ { + res := selfplay.PlayGame(rs, gen, scrabble.Both, int64(seed), nil) + turns += res.Turns + plays += res.Plays + movesGen += res.MovesGenerated + genTime += res.GenTime + score += float64(res.Scores[0] + res.Scores[1]) + } + wall := time.Since(start) + var m1 runtime.MemStats + runtime.ReadMemStats(&m1) + + fmt.Printf("DAWG · English SOWPODS · %d games · board %dx%d · greedy self-play\n\n", *games, rs.Rows, rs.Cols) + fmt.Printf(" structure size %s\n", humanBytes(structSize)) + fmt.Printf(" turns / plays %d / %d\n", turns, plays) + fmt.Printf(" moves generated %d\n", movesGen) + fmt.Printf(" generation time %s (%.1f µs/turn)\n", + genTime.Round(time.Millisecond), float64(genTime.Microseconds())/float64(turns)) + fmt.Printf(" moves generated/sec %.0f\n", float64(movesGen)/genTime.Seconds()) + fmt.Printf(" wall time %s\n", wall.Round(time.Millisecond)) + fmt.Printf(" heap allocated %s (%d GC cycles)\n", + humanBytes(int64(m1.TotalAlloc-m0.TotalAlloc)), m1.NumGC-m0.NumGC) + fmt.Printf(" avg final game score %.1f\n", score/float64(*games)) + fmt.Printf(" peak process RSS %s\n", humanKB(peakRSS())) +} + +func fileSize(p string) int64 { + if fi, err := os.Stat(p); err == nil { + return fi.Size() + } + return 0 +} + +func peakRSS() int64 { + data, err := os.ReadFile("/proc/self/status") + if err != nil { + return 0 + } + for line := range strings.SplitSeq(string(data), "\n") { + if rest, ok := strings.CutPrefix(line, "VmHWM:"); ok { + if f := strings.Fields(rest); len(f) > 0 { + kb, _ := strconv.ParseInt(f[0], 10, 64) + return kb + } + } + } + return 0 +} + +func humanBytes(n int64) string { + switch { + case n >= 1<<20: + return fmt.Sprintf("%.2f MB", float64(n)/(1<<20)) + case n >= 1<<10: + return fmt.Sprintf("%.1f KB", float64(n)/(1<<10)) + default: + return fmt.Sprintf("%d B", n) + } +} + +func humanKB(kb int64) string { return humanBytes(kb * 1024) } diff --git a/dictionaries b/dictionaries new file mode 160000 index 0000000..92f81b2 --- /dev/null +++ b/dictionaries @@ -0,0 +1 @@ +Subproject commit 92f81b2861c6bcf2459e5f71709e80330e63c988 diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..bfa5d36 --- /dev/null +++ b/go.mod @@ -0,0 +1,10 @@ +module scrabble-solver + +go 1.26.3 + +require ( + github.com/iliadenisov/alphabet v1.1.0 + github.com/iliadenisov/dafsa v1.1.0 +) + +require golang.org/x/exp v0.0.0-20201008143054-e3b2a7f2fdc7 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..fac2b81 --- /dev/null +++ b/go.sum @@ -0,0 +1,29 @@ +dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/iliadenisov/alphabet v1.1.0 h1:d87N7Rmpjj9FgL7bvEaqLdaIaNch2hC6HvkbKGhn7Hk= +github.com/iliadenisov/alphabet v1.1.0/go.mod h1:h6BhDBiJBLhMEb5XfsqJXZop3hhwXaD8lc5yf38Baqw= +github.com/iliadenisov/dafsa v1.1.0 h1:NV1ZOstMdHXI/cCyAZKOD3qnKLoYdMUunA0+Baj7vR4= +github.com/iliadenisov/dafsa v1.1.0/go.mod h1:mG6Y0DdfRrqdXGqTEMb9Zx0Fl0NkP3ZDYesvxR+e14o= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20201008143054-e3b2a7f2fdc7 h1:2/QncOxxpPAdiH+E00abYw/SaQG353gltz79Nl1zrYE= +golang.org/x/exp v0.0.0-20201008143054-e3b2a7f2fdc7/go.mod h1:1phAWC201xIgDyaFpmDeZkgf70Q4Pd/CNqfRtVPtxNw= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.3.1-0.20200828183125-ce943fd02449/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/dict/dict.go b/internal/dict/dict.go new file mode 100644 index 0000000..7d83f3d --- /dev/null +++ b/internal/dict/dict.go @@ -0,0 +1,76 @@ +// Package dict loads the English test dictionary as a DAWG, preferring the serialized +// cache under testdata and falling back to building from the dictionaries submodule. +// Paths are resolved relative to the repository root so it works both from the repo root +// (commands) and from a package directory (tests). +package dict + +import ( + "os" + "path/filepath" + + "github.com/iliadenisov/alphabet" + dawg "github.com/iliadenisov/dafsa" + + "scrabble-solver/internal/dictdawg" + "scrabble-solver/internal/wordlist" +) + +// MinLen and MaxLen bound playable word lengths (a 15x15 board holds at most 15). +const ( + MinLen = 2 + MaxLen = 15 +) + +func exists(p string) bool { _, err := os.Stat(p); return err == nil } + +// Root returns the repository root by walking up from the working directory to the +// directory containing go.mod, or "." if none is found. +func Root() string { + dir, err := os.Getwd() + if err != nil { + return "." + } + for { + if exists(filepath.Join(dir, "go.mod")) { + return dir + } + parent := filepath.Dir(dir) + if parent == dir { + return "." + } + dir = parent + } +} + +// DAWGCache and WordlistPath locate the English cache file and source word list, +// relative to the repository root. +func DAWGCache() string { return filepath.Join(Root(), "testdata", "sowpods.dawg") } +func WordlistPath() string { return filepath.Join(Root(), "dictionaries", "english", "sowpods.txt") } + +// EnglishAvailable reports whether the English dictionary can be loaded (cache or source). +func EnglishAvailable() bool { + return exists(DAWGCache()) || exists(WordlistPath()) +} + +// EnglishWords returns the encoded English word list (from the submodule source). +func EnglishWords() ([][]byte, error) { + return wordlist.Read(WordlistPath(), alphabet.Latin(), MinLen, MaxLen) +} + +// EnglishDAWG returns the English DAWG, loading the cache if present, otherwise building +// it from the word list and caching it (best effort). +func EnglishDAWG() (dawg.Finder, error) { + if exists(DAWGCache()) { + return dictdawg.Load(DAWGCache()) + } + words, err := EnglishWords() + if err != nil { + return nil, err + } + f, err := dictdawg.Build(alphabet.Latin(), words) + if err != nil { + return nil, err + } + _ = dictdawg.Save(f, DAWGCache()) + return f, nil +} diff --git a/internal/dictdawg/dictdawg.go b/internal/dictdawg/dictdawg.go new file mode 100644 index 0000000..54a6183 --- /dev/null +++ b/internal/dictdawg/dictdawg.go @@ -0,0 +1,30 @@ +// Package dictdawg builds a plain left-to-right DAWG of a dictionary, as used by the +// Appel-Jacobson move generator. +package dictdawg + +import ( + "github.com/iliadenisov/alphabet" + dawg "github.com/iliadenisov/dafsa" +) + +// Build returns a DAWG Finder over words, which must be alphabet-index slices sorted by +// index order and de-duplicated (see wordlist.Encode). +func Build(idx alphabet.Indexer, words [][]byte) (dawg.Finder, error) { + d := dawg.New(idx) + for _, w := range words { + if err := d.AddB(w); err != nil { + return nil, err + } + } + return d.Finish(), nil +} + +// Save writes the DAWG to filename. It requires an embedded alphabet (for example +// alphabet.Latin()), so that Load can reconstruct it. +func Save(f dawg.Finder, filename string) error { + _, err := f.Save(filename) + return err +} + +// Load reopens a DAWG saved with Save. +func Load(filename string) (dawg.Finder, error) { return dawg.Load(filename) } diff --git a/internal/dictdawg/dictdawg_test.go b/internal/dictdawg/dictdawg_test.go new file mode 100644 index 0000000..b0c5d78 --- /dev/null +++ b/internal/dictdawg/dictdawg_test.go @@ -0,0 +1,44 @@ +package dictdawg_test + +import ( + "path/filepath" + "testing" + + "github.com/iliadenisov/alphabet" + + "scrabble-solver/internal/dictdawg" + "scrabble-solver/internal/wordlist" +) + +func TestBuildAndQuery(t *testing.T) { + words := wordlist.Encode([]string{"care", "cares", "cat"}, alphabet.Latin(), 2, 15) + f, err := dictdawg.Build(alphabet.Latin(), words) + if err != nil { + t.Fatal(err) + } + if f.NumAdded() != 3 { + t.Fatalf("NumAdded = %d, want 3", f.NumAdded()) + } + if i := f.IndexOfB([]byte{2, 0, 17, 4}); i != 0 { // care + t.Errorf("IndexOf(care) = %d, want 0", i) + } + if i := f.IndexOfB([]byte{2, 0, 19}); i != 2 { // cat + t.Errorf("IndexOf(cat) = %d, want 2", i) + } + if i := f.IndexOfB([]byte{2, 0, 17}); i != -1 { // car (absent) + t.Errorf("IndexOf(car) = %d, want -1", i) + } + + path := filepath.Join(t.TempDir(), "d.dawg") + if err := dictdawg.Save(f, path); err != nil { + t.Fatal(err) + } + g, err := dictdawg.Load(path) + if err != nil { + t.Fatal(err) + } + defer g.Close() + if i := g.IndexOfB([]byte{2, 0, 17, 4, 18}); i != 1 { // cares + t.Errorf("loaded IndexOf(cares) = %d, want 1", i) + } +} diff --git a/internal/encoding/encoding.go b/internal/encoding/encoding.go new file mode 100644 index 0000000..c9a94e3 --- /dev/null +++ b/internal/encoding/encoding.go @@ -0,0 +1,43 @@ +// Package encoding defines the compact byte conventions shared by the board, rack, +// move output and (for letters) the dictionary graph. +// +// One uniform "symbol byte" is used everywhere: +// +// bits 0..5 the alphabet letter index plus one (1..63); 0 means "empty / no tile" +// bit 6 reserved (unused) +// bit 7 Blank — the tile is a blank standing for that letter; it scores 0 +// +// The +1 offset lets 0 mean an empty board square. The same byte represents a board +// cell, a placed tile and a rack tile; the graph stores raw letter indexes (without the +// +1). +package encoding + +const ( + // Blank flags a tile as a blank standing for its letter; a blank scores 0. + Blank byte = 0x80 + + // Empty is the value of an unoccupied board square. + Empty byte = 0x00 + + letterBits byte = 0x3f // low 6 bits: letter index + 1 +) + +// Cell builds the byte for a tile of the given alphabet letter index. When blank is +// true the tile is marked as a blank (it scores 0). +func Cell(letter byte, blank bool) byte { + c := (letter + 1) & letterBits + if blank { + c |= Blank + } + return c +} + +// IsEmpty reports whether a board cell is unoccupied. +func IsEmpty(cell byte) bool { return cell&letterBits == 0 } + +// Letter returns the alphabet letter index of a non-empty cell or tile byte. The +// result is meaningless for an empty cell. +func Letter(cell byte) byte { return (cell & letterBits) - 1 } + +// IsBlank reports whether a cell or tile byte is a blank (scores 0). +func IsBlank(cell byte) bool { return cell&Blank != 0 } diff --git a/internal/encoding/encoding_test.go b/internal/encoding/encoding_test.go new file mode 100644 index 0000000..b2afe3d --- /dev/null +++ b/internal/encoding/encoding_test.go @@ -0,0 +1,39 @@ +package encoding + +import "testing" + +func TestCellRoundTrip(t *testing.T) { + for letter := range byte(26) { + c := Cell(letter, false) + if IsEmpty(c) { + t.Errorf("Cell(%d,false) reports empty", letter) + } + if IsBlank(c) { + t.Errorf("Cell(%d,false) reports blank", letter) + } + if got := Letter(c); got != letter { + t.Errorf("Letter(Cell(%d,false)) = %d", letter, got) + } + + b := Cell(letter, true) + if !IsBlank(b) { + t.Errorf("Cell(%d,true) not blank", letter) + } + if got := Letter(b); got != letter { + t.Errorf("Letter(Cell(%d,true)) = %d, want %d", letter, got, letter) + } + } +} + +func TestEmpty(t *testing.T) { + if !IsEmpty(Empty) { + t.Error("IsEmpty(Empty) = false") + } + if IsEmpty(Cell(0, false)) { + t.Error("IsEmpty(Cell('a')) = true") + } + // 'a' (index 0) must not collide with empty. + if Cell(0, false) == Empty { + t.Error("Cell('a') collides with Empty") + } +} diff --git a/internal/graph/graph.go b/internal/graph/graph.go new file mode 100644 index 0000000..5972c92 --- /dev/null +++ b/internal/graph/graph.go @@ -0,0 +1,21 @@ +// Package graph provides thin, reusable helpers over a dafsa Cursor that the move +// generator builds on. It keeps the rest of the solver from depending on dafsa +// traversal details directly. +package graph + +import dawg "github.com/iliadenisov/dafsa" + +// Spell follows the given alphabet indices from the cursor's root. It returns the +// state reached, whether that state is accepting, and whether the whole path exists. +// When ok is false the path ran into a missing edge; n and final are meaningless. +func Spell(c *dawg.Cursor, indices []byte) (n dawg.Node, final, ok bool) { + n = c.Root() + final = c.Final(n) + for _, ix := range indices { + n, final, ok = c.Next(n, ix) + if !ok { + return n, false, false + } + } + return n, final, true +} diff --git a/internal/graph/graph_test.go b/internal/graph/graph_test.go new file mode 100644 index 0000000..cd49dc1 --- /dev/null +++ b/internal/graph/graph_test.go @@ -0,0 +1,43 @@ +package graph_test + +import ( + "testing" + + "github.com/iliadenisov/alphabet" + dawg "github.com/iliadenisov/dafsa" + + "scrabble-solver/internal/graph" +) + +// TestSpellSmoke also exercises the go.mod replace => ../dafsa wiring and the new +// dafsa traversal API end-to-end from the solver module. +func TestSpellSmoke(t *testing.T) { + d := dawg.New(alphabet.Latin()) + for _, w := range []string{"cat", "cats", "dog"} { + if err := d.Add(w); err != nil { + t.Fatalf("Add(%q): %v", w, err) + } + } + c, err := dawg.NewCursor(d.Finish()) + if err != nil { + t.Fatal(err) + } + + enc := func(s string) []byte { + b, err := alphabet.Latin().Encode(s) + if err != nil { + t.Fatalf("Encode(%q): %v", s, err) + } + return b + } + + if _, final, ok := graph.Spell(c, enc("cat")); !ok || !final { + t.Errorf("Spell(cat): ok=%v final=%v, want both true", ok, final) + } + if _, final, ok := graph.Spell(c, enc("ca")); !ok || final { + t.Errorf("Spell(ca): ok=%v final=%v, want ok=true final=false", ok, final) + } + if _, _, ok := graph.Spell(c, enc("xyz")); ok { + t.Errorf("Spell(xyz): ok=true, want false") + } +} diff --git a/internal/wordlist/wordlist.go b/internal/wordlist/wordlist.go new file mode 100644 index 0000000..ec359bd --- /dev/null +++ b/internal/wordlist/wordlist.go @@ -0,0 +1,77 @@ +// Package wordlist reads dictionaries and encodes them into alphabet-index words, +// ready to add to a DAWG. +package wordlist + +import ( + "bufio" + "bytes" + "os" + "sort" + "strings" + + "github.com/iliadenisov/alphabet" +) + +// Encode turns words into alphabet-index slices, keeping only those whose length is in +// [minLen, maxLen] and whose characters all belong to idx's alphabet (case-folded). +// The result is sorted by index order and de-duplicated, as a DAWG builder requires. +func Encode(words []string, idx alphabet.Indexer, minLen, maxLen int) [][]byte { + out := make([][]byte, 0, len(words)) + for _, w := range words { + w = strings.TrimSpace(w) + if w == "" { + continue + } + b, err := idx.Encode(strings.ToLower(w)) + if err != nil { + continue + } + if len(b) < minLen || len(b) > maxLen { + continue + } + out = append(out, b) + } + sort.Slice(out, func(i, j int) bool { return bytes.Compare(out[i], out[j]) < 0 }) + return Dedupe(out) +} + +// Read is Encode applied to the lines (one word per line) of the file at path. +func Read(path string, idx alphabet.Indexer, minLen, maxLen int) ([][]byte, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + var words []string + sc := bufio.NewScanner(f) + sc.Buffer(make([]byte, 1<<20), 1<<20) + for sc.Scan() { + words = append(words, sc.Text()) + } + if err := sc.Err(); err != nil { + return nil, err + } + return Encode(words, idx, minLen, maxLen), nil +} + +// FoldYo replaces Ё/ё with Е/е. The Russian "Эрудит" variant has no Ё tile and treats +// Е and Ё as the same letter, so apply this when preparing an Эрудит dictionary (it is a +// dictionary-preparation step, not an engine behaviour). +func FoldYo(s string) string { + return strings.NewReplacer("ё", "е", "Ё", "Е").Replace(s) +} + +// Dedupe removes adjacent duplicates from a sorted slice of index words in place. +func Dedupe(s [][]byte) [][]byte { + if len(s) == 0 { + return s + } + out := s[:1] + for i := 1; i < len(s); i++ { + if !bytes.Equal(s[i], s[i-1]) { + out = append(out, s[i]) + } + } + return out +} diff --git a/internal/wordlist/wordlist_test.go b/internal/wordlist/wordlist_test.go new file mode 100644 index 0000000..8a6a4a9 --- /dev/null +++ b/internal/wordlist/wordlist_test.go @@ -0,0 +1,37 @@ +package wordlist + +import ( + "testing" + + "github.com/iliadenisov/alphabet" +) + +func TestFoldYo(t *testing.T) { + if got := FoldYo("ёлка"); got != "елка" { + t.Errorf("FoldYo(ёлка) = %q, want елка", got) + } + if got := FoldYo("Ёжик"); got != "Ежик" { + t.Errorf("FoldYo(Ёжик) = %q, want Ежик", got) + } +} + +func TestEncodeFilterSortDedupe(t *testing.T) { + got := Encode([]string{ + "cat", "CATS", "ab", "b", "abcdefghi", "cat", " do ", "qu1rk", + }, alphabet.Latin(), 2, 8) + + want := [][]byte{ + {0, 1}, // ab + {2, 0, 19}, // cat + {2, 0, 19, 18}, // cats (from CATS, case-folded) + {3, 14}, // do (trimmed) + } + if len(got) != len(want) { + t.Fatalf("got %d words %v, want %d", len(got), got, len(want)) + } + for i := range want { + if string(got[i]) != string(want[i]) { + t.Errorf("word %d = %v, want %v", i, got[i], want[i]) + } + } +} diff --git a/rack/rack.go b/rack/rack.go new file mode 100644 index 0000000..867368a --- /dev/null +++ b/rack/rack.go @@ -0,0 +1,57 @@ +// Package rack represents a player's rack as per-letter tile counts plus blanks. +package rack + +// Rack holds tile counts: one slot per alphabet letter index plus a final slot for +// blanks. Like a Go slice or map, a Rack value shares its underlying storage with its +// copies; use Clone for an independent rack. The move generator mutates a single Rack +// in place (removing a tile, recursing, putting it back). +type Rack struct { + counts []int +} + +// New returns an empty rack for an alphabet of the given size. +func New(alphabetSize int) Rack { + return Rack{counts: make([]int, alphabetSize+1)} +} + +func (r Rack) blankIdx() int { return len(r.counts) - 1 } + +// Count returns how many tiles of the given letter index are on the rack. +func (r Rack) Count(letter byte) int { return r.counts[letter] } + +// Has reports whether at least one tile of the given letter index is on the rack. +func (r Rack) Has(letter byte) bool { return r.counts[letter] > 0 } + +// Blanks returns the number of blank tiles on the rack. +func (r Rack) Blanks() int { return r.counts[r.blankIdx()] } + +// Total returns the number of tiles on the rack, blanks included. +func (r Rack) Total() int { + n := 0 + for _, c := range r.counts { + n += c + } + return n +} + +// Empty reports whether the rack holds no tiles. +func (r Rack) Empty() bool { return r.Total() == 0 } + +// Add puts a tile of the given letter index onto the rack. +func (r Rack) Add(letter byte) { r.counts[letter]++ } + +// AddBlank puts a blank tile onto the rack. +func (r Rack) AddBlank() { r.counts[r.blankIdx()]++ } + +// Remove takes one tile of the given letter index off the rack. +func (r Rack) Remove(letter byte) { r.counts[letter]-- } + +// RemoveBlank takes one blank tile off the rack. +func (r Rack) RemoveBlank() { r.counts[r.blankIdx()]-- } + +// Clone returns an independent copy of the rack. +func (r Rack) Clone() Rack { + c := make([]int, len(r.counts)) + copy(c, r.counts) + return Rack{counts: c} +} diff --git a/rack/rack_test.go b/rack/rack_test.go new file mode 100644 index 0000000..e539bb8 --- /dev/null +++ b/rack/rack_test.go @@ -0,0 +1,51 @@ +package rack + +import "testing" + +func TestRackBasics(t *testing.T) { + r := New(26) + if !r.Empty() || r.Total() != 0 { + t.Fatal("new rack not empty") + } + + r.Add(0) // a + r.Add(0) + r.Add(2) // c + r.AddBlank() + + if r.Count(0) != 2 { + t.Errorf("Count(a) = %d, want 2", r.Count(0)) + } + if !r.Has(2) || r.Has(1) { + t.Errorf("Has c=%v b=%v, want true,false", r.Has(2), r.Has(1)) + } + if r.Blanks() != 1 { + t.Errorf("Blanks = %d, want 1", r.Blanks()) + } + if r.Total() != 4 { + t.Errorf("Total = %d, want 4", r.Total()) + } + + r.Remove(0) + if r.Count(0) != 1 { + t.Errorf("after Remove, Count(a) = %d, want 1", r.Count(0)) + } + r.RemoveBlank() + if r.Blanks() != 0 { + t.Errorf("after RemoveBlank, Blanks = %d, want 0", r.Blanks()) + } +} + +func TestRackCloneIndependent(t *testing.T) { + r := New(26) + r.Add(0) + cp := r.Clone() + cp.Add(0) + cp.AddBlank() + if r.Count(0) != 1 || r.Blanks() != 0 { + t.Errorf("mutating clone changed original: a=%d blanks=%d", r.Count(0), r.Blanks()) + } + if cp.Count(0) != 2 || cp.Blanks() != 1 { + t.Errorf("clone wrong: a=%d blanks=%d", cp.Count(0), cp.Blanks()) + } +} diff --git a/rules/ru_rules_test.go b/rules/ru_rules_test.go new file mode 100644 index 0000000..63609f2 --- /dev/null +++ b/rules/ru_rules_test.go @@ -0,0 +1,61 @@ +package rules + +import "testing" + +func sumCounts(c []int) int { + s := 0 + for _, v := range c { + s += v + } + return s +} + +func TestRussianScrabble(t *testing.T) { + rs := RussianScrabble() + if err := rs.Validate(); err != nil { + t.Fatal(err) + } + if rs.Size() != 33 { + t.Errorf("alphabet size %d, want 33", rs.Size()) + } + if n := sumCounts(rs.Counts); n != 102 || n+rs.Blanks != 104 { + t.Errorf("bag = %d letters + %d blanks, want 102+2=104", n, rs.Blanks) + } + if rs.Bingo != 50 { + t.Errorf("bonus %d, want 50", rs.Bingo) + } + if rs.Premium(7, 7) != DW { + t.Errorf("centre premium %d, want DW", rs.Premium(7, 7)) + } + if rs.Values[6] != 3 || rs.Counts[6] != 1 { // ё + t.Errorf("ё = value %d count %d, want 3/1", rs.Values[6], rs.Counts[6]) + } +} + +func TestErudit(t *testing.T) { + rs := Erudit() + if err := rs.Validate(); err != nil { + t.Fatal(err) + } + if rs.Size() != 33 { + t.Errorf("alphabet size %d, want 33", rs.Size()) + } + if n := sumCounts(rs.Counts); n != 128 || n+rs.Blanks != 131 { + t.Errorf("bag = %d letters + %d blanks, want 128+3=131", n, rs.Blanks) + } + if rs.Bingo != 15 { + t.Errorf("bonus %d, want 15", rs.Bingo) + } + if rs.Center != 7*15+7 { + t.Errorf("centre index %d, want %d", rs.Center, 7*15+7) + } + if rs.Premium(7, 7) != None { + t.Errorf("centre premium %d, want None (Эрудит centre does not double)", rs.Premium(7, 7)) + } + if rs.Counts[6] != 0 { // ё has no tile + t.Errorf("ё count %d, want 0", rs.Counts[6]) + } + if rs.Premium(0, 0) != TW { + t.Errorf("corner premium %d, want TW (board otherwise standard)", rs.Premium(0, 0)) + } +} diff --git a/rules/rules.go b/rules/rules.go new file mode 100644 index 0000000..f2a2626 --- /dev/null +++ b/rules/rules.go @@ -0,0 +1,221 @@ +// Package rules describes a Scrabble variant: board geometry, premium-square layout, +// the letter alphabet, per-letter tile values and bag counts, blanks, rack size and +// the all-tiles bonus. English() returns standard English Scrabble; the Ruleset type +// is general enough for other variants such as Russian "Эрудит" (same board, different +// tile values/counts and alphabet). +package rules + +import ( + "fmt" + "strings" + + "github.com/iliadenisov/alphabet" +) + +// Premium is the bonus kind of a board square. +type Premium uint8 + +const ( + None Premium = iota + DL // double letter + TL // triple letter + DW // double word + TW // triple word +) + +// LetterMult is the multiplier a premium applies to the tile placed on it. +func (p Premium) LetterMult() int { + switch p { + case DL: + return 2 + case TL: + return 3 + default: + return 1 + } +} + +// WordMult is the multiplier a premium applies to a word passing through it. +func (p Premium) WordMult() int { + switch p { + case DW: + return 2 + case TW: + return 3 + default: + return 1 + } +} + +// Ruleset is a complete description of a Scrabble variant. +type Ruleset struct { + Name string + Rows, Cols int + Alphabet alphabet.Indexer // letter alphabet (no separator) + Values []int // tile value per letter index; len == Alphabet.Size() + Counts []int // bag count per letter index; len == Alphabet.Size() + Blanks int // number of blank tiles in the bag + RackSize int // tiles drawn to a full rack + Bingo int // bonus for using the whole rack in one play + Center int // row-major index of the centre square (first-move anchor) + premiums []Premium // row-major premium per square +} + +// Premium returns the premium of square (r, c). +func (rs *Ruleset) Premium(r, c int) Premium { return rs.premiums[r*rs.Cols+c] } + +// PremiumAt returns the premium of the row-major square index i. +func (rs *Ruleset) PremiumAt(i int) Premium { return rs.premiums[i] } + +// Size returns the number of letters in the alphabet (excluding blanks). +func (rs *Ruleset) Size() int { return rs.Alphabet.Size() } + +// Validate checks that the slices are consistent with the alphabet and board. +func (rs *Ruleset) Validate() error { + n := rs.Alphabet.Size() + if len(rs.Values) != n { + return fmt.Errorf("rules %q: %d values for %d letters", rs.Name, len(rs.Values), n) + } + if len(rs.Counts) != n { + return fmt.Errorf("rules %q: %d counts for %d letters", rs.Name, len(rs.Counts), n) + } + if len(rs.premiums) != rs.Rows*rs.Cols { + return fmt.Errorf("rules %q: %d premiums for a %dx%d board", rs.Name, len(rs.premiums), rs.Rows, rs.Cols) + } + if rs.Center < 0 || rs.Center >= rs.Rows*rs.Cols { + return fmt.Errorf("rules %q: centre %d out of range", rs.Name, rs.Center) + } + return nil +} + +// standardBoard is the classic 15x15 premium layout: T=triple word, D=double word, +// t=triple letter, d=double letter, .=plain, *=centre (a double word). +const standardBoard = `T..d...T...d..T +.D...t...t...D. +..D...d.d...D.. +d..D...d...D..d +....D.....D.... +.t...t...t...t. +..d...d.d...d.. +T..d...*...d..T +..d...d.d...d.. +.t...t...t...t. +....D.....D.... +d..D...d...D..d +..D...d.d...D.. +.D...t...t...D. +T..d...T...d..T` + +// parsePremiums turns a board template into a premium grid and the centre index. +func parsePremiums(s string) (rows, cols int, prem []Premium, center int) { + lines := strings.Split(strings.TrimSpace(s), "\n") + rows = len(lines) + cols = len(strings.TrimRight(lines[0], "\r")) + prem = make([]Premium, rows*cols) + center = -1 + for r, line := range lines { + line = strings.TrimRight(line, "\r") + for c := 0; c < cols && c < len(line); c++ { + var p Premium + switch line[c] { + case 'd': + p = DL + case 't': + p = TL + case 'D': + p = DW + case 'T': + p = TW + case '*': // centre square, a double word + p = DW + center = r*cols + c + case '+': // centre square with no premium + center = r*cols + c + } + prem[r*cols+c] = p + } + } + return rows, cols, prem, center +} + +// FromTemplate builds a ruleset from a premium-layout template (see standardBoard for +// the character legend; '+' marks a centre square with no premium). It returns an error +// if the resulting ruleset is inconsistent. +func FromTemplate(name string, idx alphabet.Indexer, values, counts []int, blanks, rackSize, bingo int, template string) (*Ruleset, error) { + rows, cols, prem, center := parsePremiums(template) + rs := &Ruleset{ + Name: name, Rows: rows, Cols: cols, Alphabet: idx, + Values: values, Counts: counts, + Blanks: blanks, RackSize: rackSize, Bingo: bingo, + Center: center, premiums: prem, + } + if err := rs.Validate(); err != nil { + return nil, err + } + return rs, nil +} + +// English returns the standard English Scrabble ruleset (15x15, the classic premium +// layout, English tile values and distribution, 2 blanks, a 7-tile rack and a 50-point +// bingo bonus). +func English() *Ruleset { + rs, err := FromTemplate("English Scrabble", alphabet.Latin(), + // a b c d e f g h i j k l m n o p q r s t u v w x y z + []int{1, 3, 3, 2, 1, 4, 2, 4, 1, 8, 5, 1, 3, 1, 1, 3, 10, 1, 1, 1, 1, 4, 4, 8, 4, 10}, + []int{9, 2, 2, 4, 12, 2, 3, 2, 9, 1, 1, 4, 2, 6, 8, 2, 1, 6, 4, 6, 4, 2, 2, 1, 2, 1}, + 2, 7, 50, standardBoard) + if err != nil { + panic(err) // a programming error in this package, not a runtime condition + } + return rs +} + +// eruditBoard is the standard 15x15 layout but with a non-doubling centre ('+'), as in +// the Russian "Эрудит" variant. +const eruditBoard = `T..d...T...d..T +.D...t...t...D. +..D...d.d...D.. +d..D...d...D..d +....D.....D.... +.t...t...t...t. +..d...d.d...d.. +T..d...+...d..T +..d...d.d...d.. +.t...t...t...t. +....D.....D.... +d..D...d...D..d +..D...d.d...D.. +.D...t...t...D. +T..d...T...d..T` + +// russian returns the embedded 33-letter Russian alphabet (а..я including ё at index 6). +func russian() alphabet.Indexer { return alphabet.Embedded(alphabet.Langs.LangRu) } + +// RussianScrabble returns the Russian Scrabble ruleset: the 33-letter alphabet, the +// standard board, 2 blanks, a 7-tile rack and a 50-point bonus. +func RussianScrabble() *Ruleset { + rs, err := FromTemplate("Russian Scrabble", russian(), + // а б в г д е ё ж з и й к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я + []int{1, 3, 1, 3, 2, 1, 3, 5, 5, 1, 4, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 10, 5, 5, 5, 8, 10, 10, 4, 3, 8, 8, 3}, + []int{8, 2, 4, 2, 4, 8, 1, 1, 2, 5, 1, 4, 4, 3, 5, 10, 4, 5, 5, 5, 4, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2}, + 2, 7, 50, standardBoard) + if err != nil { + panic(err) + } + return rs +} + +// Erudit returns the Russian "Эрудит" ruleset. Ё carries no tiles (count 0); fold Ё→Е +// when preparing the dictionary (see wordlist.FoldYo). The centre square does not double +// the word, there are 3 blanks (each scoring 0), and the all-tiles bonus is 15. +func Erudit() *Ruleset { + rs, err := FromTemplate("Эрудит", russian(), + // а б в г д е ё ж з и й к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я + []int{1, 3, 2, 3, 2, 1, 0, 5, 5, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 3, 10, 5, 10, 5, 10, 10, 10, 5, 5, 10, 10, 3}, + []int{10, 3, 5, 3, 5, 9, 0, 2, 2, 8, 4, 6, 4, 5, 8, 10, 6, 6, 6, 5, 3, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 3}, + 3, 7, 15, eruditBoard) + if err != nil { + panic(err) + } + return rs +} diff --git a/rules/rules_test.go b/rules/rules_test.go new file mode 100644 index 0000000..3155ee4 --- /dev/null +++ b/rules/rules_test.go @@ -0,0 +1,82 @@ +package rules + +import "testing" + +func TestEnglishConsistency(t *testing.T) { + rs := English() + if err := rs.Validate(); err != nil { + t.Fatalf("Validate: %v", err) + } + if rs.Rows != 15 || rs.Cols != 15 { + t.Errorf("board = %dx%d, want 15x15", rs.Rows, rs.Cols) + } + if rs.Size() != 26 { + t.Errorf("alphabet size = %d, want 26", rs.Size()) + } + if rs.Center != 7*15+7 { + t.Errorf("centre = %d, want %d", rs.Center, 7*15+7) + } + + letters := 0 + for _, c := range rs.Counts { + letters += c + } + if letters != 98 { + t.Errorf("sum(Counts) = %d, want 98", letters) + } + if rs.Blanks != 2 || letters+rs.Blanks != 100 { + t.Errorf("bag = %d letters + %d blanks, want 98+2=100", letters, rs.Blanks) + } + + points := 0 + for i := range rs.Values { + points += rs.Values[i] * rs.Counts[i] + } + if points != 187 { + t.Errorf("total bag points = %d, want 187", points) + } +} + +func TestEnglishPremiums(t *testing.T) { + rs := English() + + spot := []struct { + r, c int + want Premium + }{ + {0, 0, TW}, {0, 7, TW}, {0, 14, TW}, {7, 0, TW}, {14, 7, TW}, + {7, 7, DW}, {1, 1, DW}, {4, 4, DW}, + {1, 5, TL}, {5, 5, TL}, {9, 9, TL}, + {0, 3, DL}, {3, 0, DL}, {6, 6, DL}, + {0, 1, None}, {7, 1, None}, + } + for _, s := range spot { + if got := rs.Premium(s.r, s.c); got != s.want { + t.Errorf("Premium(%d,%d) = %d, want %d", s.r, s.c, got, s.want) + } + } + + // Census of premium squares for the standard board. + census := map[Premium]int{} + for i := range rs.Rows * rs.Cols { + census[rs.PremiumAt(i)]++ + } + want := map[Premium]int{None: 164, DL: 24, TL: 12, DW: 17, TW: 8} + for p, n := range want { + if census[p] != n { + t.Errorf("premium %d count = %d, want %d", p, census[p], n) + } + } + + // The standard board is symmetric under transpose and 180° rotation. + for r := range rs.Rows { + for c := range rs.Cols { + if rs.Premium(r, c) != rs.Premium(c, r) { + t.Errorf("not transpose-symmetric at (%d,%d)", r, c) + } + if rs.Premium(r, c) != rs.Premium(rs.Rows-1-r, rs.Cols-1-c) { + t.Errorf("not 180°-symmetric at (%d,%d)", r, c) + } + } + } +} diff --git a/scrabble/apply.go b/scrabble/apply.go new file mode 100644 index 0000000..a0bab8f --- /dev/null +++ b/scrabble/apply.go @@ -0,0 +1,14 @@ +package scrabble + +import ( + "scrabble-solver/board" + "scrabble-solver/internal/encoding" +) + +// Apply places a move's newly-placed tiles on the board. The move must be legal for the +// board (as produced by a generator, or validated); Apply does not re-check it. +func Apply(b *board.Board, m Move) { + for _, t := range m.Tiles { + b.Set(t.Row, t.Col, encoding.Cell(t.Letter, t.Blank)) + } +} diff --git a/scrabble/crossset.go b/scrabble/crossset.go new file mode 100644 index 0000000..4f61ca0 --- /dev/null +++ b/scrabble/crossset.go @@ -0,0 +1,108 @@ +package scrabble + +import ( + dawg "github.com/iliadenisov/dafsa" + + "scrabble-solver/board" + "scrabble-solver/internal/encoding" +) + +// letterSet is a bit set over alphabet letter indexes (alphabets are at most 63 +// letters, so a uint64 suffices). It encodes a square's cross-set: the letters that, +// placed on the square, form a legal perpendicular word. +type letterSet uint64 + +func (s letterSet) has(l byte) bool { return s&(letterSet(1)<= 0 && b.Filled(start-1, c) { + start-- + } + for rr := start; rr < r; rr++ { + above = append(above, encoding.Letter(b.At(rr, c))) + } + + end := r + for end+1 < b.Rows() && b.Filled(end+1, c) { + end++ + } + for rr := r + 1; rr <= end; rr++ { + below = append(below, encoding.Letter(b.At(rr, c))) + } + return above, below +} + +// completers returns the letters X (< size) that complete a word when followed from +// state: those whose arc leads directly to an accepting node. It is a single arc +// enumeration — the deterministic cross-set primitive. +func completers(cur *dawg.Cursor, state dawg.Node, size int) letterSet { + var set letterSet + lim := byte(size) + cur.Arcs(state, func(a dawg.Arc) bool { + if a.Final && a.Label < lim { + set |= letterSet(1) << a.Label + } + return true + }) + return set +} + +// walk follows word left to right from the cursor's root. +func walk(cur *dawg.Cursor, word []byte) (dawg.Node, bool) { + n := cur.Root() + for _, l := range word { + var ok bool + if n, _, ok = cur.Next(n, l); !ok { + return n, false + } + } + return n, true +} + +// dawgCrossSet returns the letters X for which above·X·below is a stored word. A right +// extension (no tiles below) is deterministic — X just completes the prefix above. A +// left extension (tiles below) is non-deterministic and must probe each X. +func dawgCrossSet(cur *dawg.Cursor, above, below []byte, size int) letterSet { + switch { + case len(above) == 0 && len(below) == 0: + return fullSet(size) + case len(below) == 0: + node, ok := walk(cur, above) + if !ok { + return 0 + } + return completers(cur, node, size) + default: + node := cur.Root() + if len(above) > 0 { + var ok bool + if node, ok = walk(cur, above); !ok { + return 0 + } + } + var set letterSet + for x := range size { + m, final, ok := cur.Next(node, byte(x)) + if !ok { + continue + } + for _, l := range below { + if m, final, ok = cur.Next(m, l); !ok { + break + } + } + if ok && final { + set |= letterSet(1) << uint(x) + } + } + return set + } +} diff --git a/scrabble/crossset_test.go b/scrabble/crossset_test.go new file mode 100644 index 0000000..33cadbc --- /dev/null +++ b/scrabble/crossset_test.go @@ -0,0 +1,75 @@ +package scrabble + +import ( + "testing" + + "github.com/iliadenisov/alphabet" + dawg "github.com/iliadenisov/dafsa" + + "scrabble-solver/internal/dictdawg" + "scrabble-solver/internal/wordlist" +) + +func bruteCrossSet(words [][]byte, above, below []byte, size int) letterSet { + set := make(map[string]bool, len(words)) + for _, w := range words { + set[string(w)] = true + } + var out letterSet + for x := range size { + w := make([]byte, 0, len(above)+1+len(below)) + w = append(w, above...) + w = append(w, byte(x)) + w = append(w, below...) + if set[string(w)] { + out |= letterSet(1) << uint(x) + } + } + return out +} + +func TestDAWGCrossSetMatchesBruteForce(t *testing.T) { + const size = 26 + words := wordlist.Encode( + []string{"cat", "cot", "cut", "cap", "cab", "at", "it"}, + alphabet.Latin(), 2, 15) + + finder, err := dictdawg.Build(alphabet.Latin(), words) + if err != nil { + t.Fatal(err) + } + cur, err := dawg.NewCursor(finder) + if err != nil { + t.Fatal(err) + } + + cases := []struct { + name string + above, below []byte + }{ + {"c_t", []byte{2}, []byte{19}}, // expect {a,o,u} + {"_t", nil, []byte{19}}, // expect {a,i} + {"c_", []byte{2}, nil}, // expect {} (no two-letter c-words) + {"a_t", []byte{0}, []byte{19}}, // expect {} + } + for _, tc := range cases { + want := bruteCrossSet(words, tc.above, tc.below, size) + if got := dawgCrossSet(cur, tc.above, tc.below, size); got != want { + t.Errorf("%s: dawgCrossSet = %026b, want %026b", tc.name, got, want) + } + } + + // c_t must be exactly {a(0), o(14), u(20)}. + want := letterSet(0) + for _, x := range []byte{0, 14, 20} { + want |= letterSet(1) << x + } + if got := dawgCrossSet(cur, []byte{2}, []byte{19}, size); got != want { + t.Errorf("c_t cross-set = %026b, want {a,o,u} = %026b", got, want) + } + + // No perpendicular neighbours: every letter is allowed. + if got := dawgCrossSet(cur, nil, nil, size); got != fullSet(size) { + t.Errorf("empty context = %026b, want full", got) + } +} diff --git a/scrabble/gen.go b/scrabble/gen.go new file mode 100644 index 0000000..fee0008 --- /dev/null +++ b/scrabble/gen.go @@ -0,0 +1,56 @@ +package scrabble + +import ( + "scrabble-solver/board" + "scrabble-solver/rack" + "scrabble-solver/rules" +) + +// generateBoth runs an across-generator on the board (for horizontal plays) and on its +// transpose (for vertical plays), as selected by mode, then scores and de-duplicates the +// results. runAcross reports placements in the coordinates of the board it is given; for +// the transpose pass they are mapped back to the real board. +func generateBoth(b *board.Board, rs *rules.Ruleset, rk rack.Rack, mode Mode, + runAcross func(bd *board.Board, rk rack.Rack, emit func([]Placement))) []Move { + + rk = rk.Clone() // generation mutates the rack in place and restores it + var moves []Move + seen := make(map[string]struct{}) + emit := func(dir Direction, placements []Placement) { + key := moveKey(dir, placements) + if _, dup := seen[key]; dup { + return + } + m, err := Evaluate(b, rs, dir, placements) + if err != nil { + return + } + seen[key] = struct{}{} + moves = append(moves, m) + } + + if mode.Includes(Horizontal) { + runAcross(b, rk, func(p []Placement) { emit(Horizontal, p) }) + } + if mode.Includes(Vertical) { + tb := b.Transpose() + runAcross(tb, rk, func(p []Placement) { + rp := make([]Placement, len(p)) + for i, pl := range p { + rp[i] = Placement{Row: pl.Col, Col: pl.Row, Letter: pl.Letter, Blank: pl.Blank} + } + emit(Vertical, rp) + }) + } + return moves +} + +// centerFor returns the centre square in bd's coordinates. bd is either the real board +// or its transpose; the ruleset stores the centre on the real board. +func centerFor(bd *board.Board, rs *rules.Ruleset) (row, col int) { + r, c := rs.Center/rs.Cols, rs.Center%rs.Cols + if bd.Rows() == rs.Rows && bd.Cols() == rs.Cols { + return r, c + } + return c, r // transposed +} diff --git a/scrabble/gen_dawg.go b/scrabble/gen_dawg.go new file mode 100644 index 0000000..c19c68c --- /dev/null +++ b/scrabble/gen_dawg.go @@ -0,0 +1,221 @@ +package scrabble + +import ( + dawg "github.com/iliadenisov/dafsa" + + "scrabble-solver/board" + "scrabble-solver/internal/encoding" + "scrabble-solver/rack" + "scrabble-solver/rules" +) + +// DAWGGenerator generates moves with the Appel-Jacobson two-phase algorithm +// (LeftPart then ExtendRight) over a plain left-to-right DAWG. +type DAWGGenerator struct { + rules *rules.Ruleset + finder dawg.Finder +} + +// NewDAWGGenerator builds a DAWG generator for the ruleset over the dictionary finder. +func NewDAWGGenerator(rs *rules.Ruleset, finder dawg.Finder) *DAWGGenerator { + return &DAWGGenerator{rules: rs, finder: finder} +} + +// Name identifies the generator. +func (g *DAWGGenerator) Name() string { return "dawg" } + +// GenerateMoves returns every legal play for rk on b in the modes' orientations. +func (g *DAWGGenerator) GenerateMoves(b *board.Board, rk rack.Rack, mode Mode) []Move { + return generateBoth(b, g.rules, rk, mode, g.runAcross) +} + +// tileInfo is a tentatively placed left-part tile (its column is fixed only once the +// left part's length is known, at record time). +type tileInfo struct { + letter byte + blank bool +} + +// acrossGen carries the state of one across-generation pass over a board. +type acrossGen struct { + bd *board.Board + cur *dawg.Cursor + rs *rules.Ruleset + rk rack.Rack + size int + cross func(r, c int) letterSet + emit func(placements []Placement) // placements in bd's coordinates + + row int + left []tileInfo // left-part tiles, in word (left-to-right) order + right []Placement // right-part tiles, with their columns +} + +// runAcross generates all across plays on bd (cross-sets are computed as vertical words +// on bd) and reports each via emit in bd's coordinates. +func (g *DAWGGenerator) runAcross(bd *board.Board, rk rack.Rack, emit func([]Placement)) { + cur, err := dawg.NewCursor(g.finder) + if err != nil { + return + } + size := g.rules.Size() + + cross := make([]letterSet, bd.Rows()*bd.Cols()) + known := make([]bool, bd.Rows()*bd.Cols()) + crossFn := func(r, c int) letterSet { + i := r*bd.Cols() + c + if !known[i] { + above, below := columnContext(bd, r, c) + cross[i] = dawgCrossSet(cur, above, below, size) + known[i] = true + } + return cross[i] + } + + ag := &acrossGen{bd: bd, cur: cur, rs: g.rules, rk: rk, size: size, cross: crossFn, emit: emit} + + firstMove := bd.IsEmpty() + centerRow, centerCol := centerFor(bd, g.rules) + for row := range bd.Rows() { + ag.generateRow(row, firstMove, centerRow, centerCol) + } +} + +func (g *acrossGen) generateRow(row int, firstMove bool, centerRow, centerCol int) { + g.row = row + limit := 0 + for col := range g.bd.Cols() { + if !g.bd.Empty(row, col) { + limit = 0 + continue + } + anchor := false + if firstMove { + anchor = row == centerRow && col == centerCol + } else { + anchor = g.hasFilledNeighbor(row, col) + } + if !anchor { + limit++ + continue + } + g.left = g.left[:0] + g.right = g.right[:0] + if col > 0 && g.bd.Filled(row, col-1) { + if node, ok := g.walkPrefix(row, col); ok { + g.extendRight(node, col, col) + } + } else { + g.leftPart(g.cur.Root(), col, limit) + } + limit = 0 + } +} + +func (g *acrossGen) hasFilledNeighbor(r, c int) bool { + return g.bd.Filled(r-1, c) || g.bd.Filled(r+1, c) || g.bd.Filled(r, c-1) || g.bd.Filled(r, c+1) +} + +// walkPrefix walks the DAWG through the contiguous filled run ending at col-1, returning +// the node reached and whether that prefix exists in the dictionary. +func (g *acrossGen) walkPrefix(row, col int) (dawg.Node, bool) { + start := col - 1 + for start-1 >= 0 && g.bd.Filled(row, start-1) { + start-- + } + node := g.cur.Root() + for c := start; c < col; c++ { + var ok bool + node, _, ok = g.cur.Next(node, encoding.Letter(g.bd.At(row, c))) + if !ok { + return node, false + } + } + return node, true +} + +// leftPart places left-part tiles from the rack (up to limit, on the empty squares left +// of the anchor), calling extendRight after each prefix. +func (g *acrossGen) leftPart(node dawg.Node, anchorCol, limit int) { + g.extendRight(node, anchorCol, anchorCol) + if limit == 0 { + return + } + g.cur.Arcs(node, func(a dawg.Arc) bool { + l := a.Label + if g.rk.Has(l) { + g.rk.Remove(l) + g.left = append(g.left, tileInfo{letter: l}) + g.leftPart(a.Dest, anchorCol, limit-1) + g.left = g.left[:len(g.left)-1] + g.rk.Add(l) + } + if g.rk.Blanks() > 0 { + g.rk.RemoveBlank() + g.left = append(g.left, tileInfo{letter: l, blank: true}) + g.leftPart(a.Dest, anchorCol, limit-1) + g.left = g.left[:len(g.left)-1] + g.rk.AddBlank() + } + return true + }) +} + +// extendRight extends the word rightward from col, placing rack tiles on empty squares +// (constrained by cross-sets) and following tiles already on the board. A word is +// recorded only past the anchor, so the play covers the anchor square. +func (g *acrossGen) extendRight(node dawg.Node, col, anchorCol int) { + if col >= g.bd.Cols() { + if col > anchorCol && g.cur.Final(node) { + g.record(anchorCol) + } + return + } + if !g.bd.Empty(g.row, col) { + if dest, _, ok := g.cur.Next(node, encoding.Letter(g.bd.At(g.row, col))); ok { + g.extendRight(dest, col+1, anchorCol) + } + return + } + + if col > anchorCol && g.cur.Final(node) { + g.record(anchorCol) + } + cross := g.cross(g.row, col) + g.cur.Arcs(node, func(a dawg.Arc) bool { + l := a.Label + if !cross.has(l) { + return true + } + if g.rk.Has(l) { + g.rk.Remove(l) + g.right = append(g.right, Placement{Row: g.row, Col: col, Letter: l}) + g.extendRight(a.Dest, col+1, anchorCol) + g.right = g.right[:len(g.right)-1] + g.rk.Add(l) + } + if g.rk.Blanks() > 0 { + g.rk.RemoveBlank() + g.right = append(g.right, Placement{Row: g.row, Col: col, Letter: l, Blank: true}) + g.extendRight(a.Dest, col+1, anchorCol) + g.right = g.right[:len(g.right)-1] + g.rk.AddBlank() + } + return true + }) +} + +// record assembles the play's placements (left part at fixed columns, then the right +// part) and reports it. It skips plays that lay no new tile. +func (g *acrossGen) record(anchorCol int) { + if len(g.left)+len(g.right) == 0 { + return + } + placements := make([]Placement, 0, len(g.left)+len(g.right)) + leftStart := anchorCol - len(g.left) + for i, t := range g.left { + placements = append(placements, Placement{Row: g.row, Col: leftStart + i, Letter: t.letter, Blank: t.blank}) + } + placements = append(placements, g.right...) + g.emit(placements) +} diff --git a/scrabble/gen_dawg_test.go b/scrabble/gen_dawg_test.go new file mode 100644 index 0000000..7c1f547 --- /dev/null +++ b/scrabble/gen_dawg_test.go @@ -0,0 +1,121 @@ +package scrabble + +import ( + "testing" + + "github.com/iliadenisov/alphabet" + + "scrabble-solver/board" + "scrabble-solver/internal/dictdawg" + "scrabble-solver/internal/encoding" + "scrabble-solver/internal/wordlist" + "scrabble-solver/rack" + "scrabble-solver/rules" +) + +func makeRack(letters string, blanks int) rack.Rack { + r := rack.New(26) + for i := range len(letters) { + r.Add(letters[i] - 'a') + } + for range blanks { + r.AddBlank() + } + return r +} + +func placeWord(b *board.Board, r, c int, dir Direction, word string) { + for i := range len(word) { + rr, cc := r, c+i + if dir == Vertical { + rr, cc = r+i, c + } + b.Set(rr, cc, encoding.Cell(word[i]-'a', false)) + } +} + +func genMoves(moves []Move) map[string]Move { + out := make(map[string]Move, len(moves)) + for _, m := range moves { + out[moveKey(m.Dir, m.Tiles)] = m + } + return out +} + +// testWords is a small lexicon with enough overlaps to form across and cross plays. +var testWords = []string{ + "aa", "ace", "act", "arc", "are", "art", "as", "at", "ate", + "cab", "cap", "car", "care", "cars", "cart", "cat", "cats", "cot", + "oat", "oats", "ta", "tar", "tare", "tat", "tea", "teat", +} + +func compareToBrute(t *testing.T, name string, gen Generator, b *board.Board, d dict, rk rack.Rack, mode Mode) { + t.Helper() + want := bruteForce(b, plainRulesShared, d, rk, mode) + got := genMoves(gen.GenerateMoves(b, rk, mode)) + + for k, wm := range want { + gm, ok := got[k] + if !ok { + t.Errorf("%s [%s]: %s missing %s (score %d)", name, gen.Name(), gen.Name(), k, wm.Score) + continue + } + if gm.Score != wm.Score { + t.Errorf("%s [%s]: %s score %d, want %d", name, gen.Name(), k, gm.Score, wm.Score) + } + } + for k := range got { + if _, ok := want[k]; !ok { + t.Errorf("%s [%s]: extra move %s", name, gen.Name(), k) + } + } + if len(got) != len(want) { + t.Errorf("%s [%s]: %d moves, oracle has %d", name, gen.Name(), len(got), len(want)) + } +} + +func mustPlainRules() *rules.Ruleset { + eng := rules.English() + rs, err := rules.FromTemplate("plain7", eng.Alphabet, eng.Values, eng.Counts, 2, 7, 50, plain7) + if err != nil { + panic(err) + } + return rs +} + +var plainRulesShared = mustPlainRules() + +type scenario struct { + name string + setup func(*board.Board) + rack rack.Rack + mode Mode +} + +func genScenarios() []scenario { + return []scenario{ + {"first move", func(*board.Board) {}, makeRack("cat", 0), Both}, + {"first move blank", func(*board.Board) {}, makeRack("ca", 1), Both}, + {"extend cat", func(b *board.Board) { placeWord(b, 3, 1, Horizontal, "cat") }, makeRack("srs", 0), Both}, + {"cross cat", func(b *board.Board) { placeWord(b, 1, 3, Horizontal, "cat") }, makeRack("aort", 0), Both}, + {"only horizontal", func(b *board.Board) { placeWord(b, 3, 1, Horizontal, "cat") }, makeRack("aser", 0), OnlyHorizontal}, + {"only vertical", func(b *board.Board) { placeWord(b, 1, 3, Vertical, "cat") }, makeRack("aser", 0), OnlyVertical}, + } +} + +func TestDAWGGeneratorVsBruteForce(t *testing.T) { + rs := plainRulesShared + words := wordlist.Encode(testWords, alphabet.Latin(), 2, 15) + f, err := dictdawg.Build(alphabet.Latin(), words) + if err != nil { + t.Fatal(err) + } + gen := NewDAWGGenerator(rs, f) + d := makeDict(words) + + for _, c := range genScenarios() { + b := board.New(rs.Rows, rs.Cols) + c.setup(b) + compareToBrute(t, c.name, gen, b, d, c.rack, c.mode) + } +} diff --git a/scrabble/generator.go b/scrabble/generator.go new file mode 100644 index 0000000..1f7ff37 --- /dev/null +++ b/scrabble/generator.go @@ -0,0 +1,18 @@ +package scrabble + +import ( + "scrabble-solver/board" + "scrabble-solver/rack" +) + +// Generator produces every legal play for a position. The DAWG generator +// (Appel-Jacobson) is the implementation; the interface keeps the self-play engine and +// the solver decoupled from the concrete type. +type Generator interface { + // GenerateMoves returns every legal play for rack r on board b in the modes' + // orientations. The result is unsorted; callers (or the Solver) rank it. + GenerateMoves(b *board.Board, r rack.Rack, mode Mode) []Move + + // Name identifies the generator (e.g. "dawg"). + Name() string +} diff --git a/scrabble/key.go b/scrabble/key.go new file mode 100644 index 0000000..7b51bf6 --- /dev/null +++ b/scrabble/key.go @@ -0,0 +1,36 @@ +package scrabble + +import ( + "sort" + "strconv" + "strings" +) + +// moveKey is a canonical string identifying a play (direction plus its placed tiles), +// used to de-duplicate and compare generated moves. +func moveKey(dir Direction, p []Placement) string { + ps := append([]Placement(nil), p...) + sort.Slice(ps, func(i, j int) bool { + if ps[i].Row != ps[j].Row { + return ps[i].Row < ps[j].Row + } + return ps[i].Col < ps[j].Col + }) + var sb strings.Builder + sb.WriteByte('0' + byte(dir)) + for _, pl := range ps { + sb.WriteByte(';') + sb.WriteString(strconv.Itoa(pl.Row)) + sb.WriteByte(',') + sb.WriteString(strconv.Itoa(pl.Col)) + sb.WriteByte(',') + sb.WriteString(strconv.Itoa(int(pl.Letter))) + if pl.Blank { + sb.WriteByte('*') + } + } + return sb.String() +} + +// Key returns the canonical identifier of the move (direction plus its placed tiles). +func (m Move) Key() string { return moveKey(m.Dir, m.Tiles) } diff --git a/scrabble/move.go b/scrabble/move.go new file mode 100644 index 0000000..e0ab79d --- /dev/null +++ b/scrabble/move.go @@ -0,0 +1,74 @@ +// Package scrabble is the public library: it builds a move generator over a dictionary +// and a ruleset, generates every legal play for a position ranked by score, and scores +// or validates arbitrary plays. The generator is the DAWG algorithm (Appel-Jacobson). +package scrabble + +// Direction is the orientation of a play's main word. +type Direction uint8 + +const ( + // Horizontal is an across play (left to right along a row). + Horizontal Direction = iota + // Vertical is a down play (top to bottom along a column). + Vertical +) + +// String renders the direction for diagnostics. +func (d Direction) String() string { + if d == Vertical { + return "vertical" + } + return "horizontal" +} + +// Mode selects which orientations GenerateMoves produces. Russian "Эрудит" requires a +// single orientation per turn, which OnlyHorizontal / OnlyVertical express. +type Mode uint8 + +const ( + // Both generates across plays (on the board) and down plays (on its transpose). + Both Mode = iota + // OnlyHorizontal generates across plays only. + OnlyHorizontal + // OnlyVertical generates down plays only. + OnlyVertical +) + +// Includes reports whether the mode produces plays in direction d. +func (m Mode) Includes(d Direction) bool { + switch m { + case Both: + return true + case OnlyHorizontal: + return d == Horizontal + case OnlyVertical: + return d == Vertical + } + return false +} + +// Placement is a single newly-placed tile. +type Placement struct { + Row, Col int + Letter byte // alphabet letter index + Blank bool // placed from a blank tile, so it scores 0 +} + +// Word is a word formed by a play, with its location and score. +type Word struct { + Row, Col int // square of the word's first letter + Dir Direction // orientation of the word + Letters []byte // alphabet indices of the whole word (existing + new tiles) + Blanks []bool // per letter: true if that tile is a blank (scores 0) + Score int // the word's score, with premiums from newly-placed tiles +} + +// Move is a complete legal play with a full scoring breakdown. +type Move struct { + Dir Direction // orientation of the main word + Tiles []Placement // the newly-placed tiles, in main-word order + Main Word // the main word formed along Dir + Cross []Word // perpendicular words formed by the new tiles + Bonus int // all-tiles (bingo) bonus included in Score, or 0 + Score int // total: Main.Score + Σ Cross.Score + Bonus +} diff --git a/scrabble/oracle_test.go b/scrabble/oracle_test.go new file mode 100644 index 0000000..29be70c --- /dev/null +++ b/scrabble/oracle_test.go @@ -0,0 +1,147 @@ +package scrabble + +import ( + "scrabble-solver/board" + "scrabble-solver/rack" + "scrabble-solver/rules" +) + +// dict is a membership set of words (alphabet-index strings) for the oracle. +type dict map[string]bool + +func makeDict(words [][]byte) dict { + d := make(dict, len(words)) + for _, w := range words { + d[string(w)] = true + } + return d +} + +func (d dict) has(letters []byte) bool { return d[string(letters)] } + +func lineCoord(dir Direction, line, axis int) (r, c int) { + if dir == Horizontal { + return line, axis + } + return axis, line +} + +func cellFilled(b *board.Board, dir Direction, line, axis int) bool { + r, c := lineCoord(dir, line, axis) + return b.Filled(r, c) +} + +func coversCenter(dir Direction, line, start, end, cr, cc int) bool { + if dir == Horizontal { + return line == cr && start <= cc && cc <= end + } + return line == cc && start <= cr && cr <= end +} + +// bruteForce returns every legal play for the position, keyed by moveKey, found by +// exhaustively trying every maximal window and every rack assignment, then validating +// against the dictionary, connectivity and the first-move centre rule. It is the slow, +// obviously-correct oracle for checking the generators on small inputs. +func bruteForce(b *board.Board, rs *rules.Ruleset, d dict, rk rack.Rack, mode Mode) map[string]Move { + out := map[string]Move{} + var dirs []Direction + if mode.Includes(Horizontal) { + dirs = append(dirs, Horizontal) + } + if mode.Includes(Vertical) { + dirs = append(dirs, Vertical) + } + firstMove := b.IsEmpty() + cr, cc := rs.Center/rs.Cols, rs.Center%rs.Cols + + for _, dir := range dirs { + lines, span := b.Rows(), b.Cols() + if dir == Vertical { + lines, span = b.Cols(), b.Rows() + } + for line := range lines { + for start := range span { + for end := start + 1; end < span; end++ { + if cellFilled(b, dir, line, start-1) || cellFilled(b, dir, line, end+1) { + continue // not a maximal window + } + var empties []int + for a := start; a <= end; a++ { + if !cellFilled(b, dir, line, a) { + empties = append(empties, a) + } + } + if len(empties) == 0 { + continue + } + assign(b, rs, d, rk.Clone(), dir, line, start, end, empties, 0, nil, + firstMove, cr, cc, out) + } + } + } + } + return out +} + +func assign(b *board.Board, rs *rules.Ruleset, d dict, rk rack.Rack, dir Direction, + line, start, end int, empties []int, idx int, placed []Placement, + firstMove bool, cr, cc int, out map[string]Move) { + + if idx == len(empties) { + validate(b, rs, d, dir, line, start, end, placed, firstMove, cr, cc, out) + return + } + r, c := lineCoord(dir, line, empties[idx]) + next := placed[:len(placed):len(placed)] // avoid aliasing across siblings + + for l := byte(0); l < byte(rs.Size()); l++ { + if rk.Has(l) { + rk.Remove(l) + assign(b, rs, d, rk, dir, line, start, end, empties, idx+1, + append(next, Placement{Row: r, Col: c, Letter: l}), firstMove, cr, cc, out) + rk.Add(l) + } + } + if rk.Blanks() > 0 { + rk.RemoveBlank() + for l := byte(0); l < byte(rs.Size()); l++ { + assign(b, rs, d, rk, dir, line, start, end, empties, idx+1, + append(next, Placement{Row: r, Col: c, Letter: l, Blank: true}), firstMove, cr, cc, out) + } + rk.AddBlank() + } +} + +func validate(b *board.Board, rs *rules.Ruleset, d dict, dir Direction, + line, start, end int, placed []Placement, firstMove bool, cr, cc int, out map[string]Move) { + + m, err := Evaluate(b, rs, dir, placed) + if err != nil { + return + } + if !d.has(m.Main.Letters) { + return + } + for _, cw := range m.Cross { + if !d.has(cw.Letters) { + return + } + } + if firstMove { + if !coversCenter(dir, line, start, end, cr, cc) { + return + } + } else { + existing := false + for a := start; a <= end; a++ { + if cellFilled(b, dir, line, a) { + existing = true + break + } + } + if !existing && len(m.Cross) == 0 { + return // disconnected + } + } + out[moveKey(dir, placed)] = m +} diff --git a/scrabble/score.go b/scrabble/score.go new file mode 100644 index 0000000..4a70ca0 --- /dev/null +++ b/scrabble/score.go @@ -0,0 +1,206 @@ +package scrabble + +import ( + "errors" + "fmt" + "sort" + + "scrabble-solver/board" + "scrabble-solver/internal/encoding" + "scrabble-solver/rules" +) + +// coord maps a line coordinate (fixed, axis) to a board (row, col) for direction dir. +// For Horizontal the fixed coordinate is the row and the axis runs along columns; for +// Vertical it is the reverse. +func coord(dir Direction, fixed, axis int) (row, col int) { + if dir == Horizontal { + return fixed, axis + } + return axis, fixed +} + +// fixedAxis is the inverse of coord: it splits a (row, col) into the fixed and axis +// coordinates for direction dir. +func fixedAxis(dir Direction, row, col int) (fixed, axis int) { + if dir == Horizontal { + return row, col + } + return col, row +} + +func perpendicular(d Direction) Direction { + if d == Horizontal { + return Vertical + } + return Horizontal +} + +// Evaluate computes the words formed and the score for placing tiles on b in direction +// dir under ruleset rs. It validates geometry — the tiles lie on one line, on empty +// squares, and form a single contiguous run together with existing tiles — but does not +// check the dictionary or board connectivity; ValidatePlay layers those on top. tiles +// need not be sorted. +func Evaluate(b *board.Board, rs *rules.Ruleset, dir Direction, tiles []Placement) (Move, error) { + if len(tiles) == 0 { + return Move{}, errors.New("scrabble: empty play") + } + + ts := append([]Placement(nil), tiles...) + sort.Slice(ts, func(i, j int) bool { + _, ai := fixedAxis(dir, ts[i].Row, ts[i].Col) + _, aj := fixedAxis(dir, ts[j].Row, ts[j].Col) + return ai < aj + }) + + fixed, _ := fixedAxis(dir, ts[0].Row, ts[0].Col) + prevAxis := 0 + for i, t := range ts { + f, a := fixedAxis(dir, t.Row, t.Col) + if f != fixed { + return Move{}, errors.New("scrabble: tiles are not on one line") + } + if !b.InBounds(t.Row, t.Col) { + return Move{}, fmt.Errorf("scrabble: tile (%d,%d) off board", t.Row, t.Col) + } + if !b.Empty(t.Row, t.Col) { + return Move{}, fmt.Errorf("scrabble: square (%d,%d) is occupied", t.Row, t.Col) + } + if i > 0 && a == prevAxis { + return Move{}, errors.New("scrabble: two tiles on the same square") + } + prevAxis = a + } + + main, err := buildMainWord(b, rs, dir, fixed, ts) + if err != nil { + return Move{}, err + } + + move := Move{Dir: dir, Tiles: ts, Main: main, Score: main.Score} + for _, t := range ts { + if cw, ok := crossWord(b, rs, dir, t); ok { + move.Cross = append(move.Cross, cw) + move.Score += cw.Score + } + } + if len(ts) == rs.RackSize { + move.Bonus = rs.Bingo + move.Score += rs.Bingo + } + return move, nil +} + +// buildMainWord assembles the word along dir through the (sorted) placements together +// with the existing tiles that extend and bridge them, and scores it. New tiles apply +// their squares' premiums; existing tiles score at face value. +func buildMainWord(b *board.Board, rs *rules.Ruleset, dir Direction, fixed int, ts []Placement) (Word, error) { + _, minA := fixedAxis(dir, ts[0].Row, ts[0].Col) + _, maxA := fixedAxis(dir, ts[len(ts)-1].Row, ts[len(ts)-1].Col) + + start := minA + for { + r, c := coord(dir, fixed, start-1) + if !b.Filled(r, c) { + break + } + start-- + } + end := maxA + for { + r, c := coord(dir, fixed, end+1) + if !b.Filled(r, c) { + break + } + end++ + } + + letters := make([]byte, 0, end-start+1) + blanks := make([]bool, 0, end-start+1) + letterSum, wordMult := 0, 1 + ti := 0 + for a := start; a <= end; a++ { + r, c := coord(dir, fixed, a) + if ti < len(ts) { + if _, ta := fixedAxis(dir, ts[ti].Row, ts[ti].Col); ta == a { + t := ts[ti] + ti++ + prem := rs.Premium(r, c) + if !t.Blank { + letterSum += rs.Values[t.Letter] * prem.LetterMult() + } + wordMult *= prem.WordMult() + letters = append(letters, t.Letter) + blanks = append(blanks, t.Blank) + continue + } + } + if b.Filled(r, c) { + cell := b.At(r, c) + l, bl := encoding.Letter(cell), encoding.IsBlank(cell) + if !bl { + letterSum += rs.Values[l] + } + letters = append(letters, l) + blanks = append(blanks, bl) + continue + } + return Word{}, fmt.Errorf("scrabble: gap in the play at line position %d", a) + } + + wr, wc := coord(dir, fixed, start) + return Word{Row: wr, Col: wc, Dir: dir, Letters: letters, Blanks: blanks, Score: letterSum * wordMult}, nil +} + +// crossWord builds the perpendicular word formed by a single new tile, if any. It +// returns ok=false when the tile has no perpendicular neighbour. +func crossWord(b *board.Board, rs *rules.Ruleset, dir Direction, t Placement) (Word, bool) { + cdir := perpendicular(dir) + fixed, axis := fixedAxis(cdir, t.Row, t.Col) + + start := axis + for { + r, c := coord(cdir, fixed, start-1) + if !b.Filled(r, c) { + break + } + start-- + } + end := axis + for { + r, c := coord(cdir, fixed, end+1) + if !b.Filled(r, c) { + break + } + end++ + } + if start == end { + return Word{}, false + } + + letters := make([]byte, 0, end-start+1) + blanks := make([]bool, 0, end-start+1) + letterSum, wordMult := 0, 1 + for a := start; a <= end; a++ { + r, c := coord(cdir, fixed, a) + if a == axis { + prem := rs.Premium(r, c) + if !t.Blank { + letterSum += rs.Values[t.Letter] * prem.LetterMult() + } + wordMult *= prem.WordMult() + letters = append(letters, t.Letter) + blanks = append(blanks, t.Blank) + } else { + cell := b.At(r, c) + l, bl := encoding.Letter(cell), encoding.IsBlank(cell) + if !bl { + letterSum += rs.Values[l] + } + letters = append(letters, l) + blanks = append(blanks, bl) + } + } + wr, wc := coord(cdir, fixed, start) + return Word{Row: wr, Col: wc, Dir: cdir, Letters: letters, Blanks: blanks, Score: letterSum * wordMult}, true +} diff --git a/scrabble/score_test.go b/scrabble/score_test.go new file mode 100644 index 0000000..0f14401 --- /dev/null +++ b/scrabble/score_test.go @@ -0,0 +1,138 @@ +package scrabble + +import ( + "testing" + + "scrabble-solver/board" + "scrabble-solver/internal/encoding" + "scrabble-solver/rules" +) + +const plain7 = `....... +....... +....... +...+... +....... +....... +.......` + +// plainRules is a 7x7 board with no premiums and English tile values, for isolating +// word-assembly and cross-word logic from premium multipliers. +func plainRules(t *testing.T) *rules.Ruleset { + t.Helper() + eng := rules.English() + rs, err := rules.FromTemplate("plain7", eng.Alphabet, eng.Values, eng.Counts, 2, 7, 50, plain7) + if err != nil { + t.Fatal(err) + } + return rs +} + +// indices: a=0 c=2 o=14 t=19 x=23 +func TestEvaluateSimpleWord(t *testing.T) { + rs := plainRules(t) + b := board.New(7, 7) + m, err := Evaluate(b, rs, Horizontal, []Placement{ + {Row: 3, Col: 1, Letter: 2}, {Row: 3, Col: 2, Letter: 0}, {Row: 3, Col: 3, Letter: 19}, + }) + if err != nil { + t.Fatal(err) + } + if m.Main.Score != 5 || m.Score != 5 { + t.Errorf("cat: main=%d total=%d, want 5/5", m.Main.Score, m.Score) + } + if len(m.Cross) != 0 || m.Bonus != 0 { + t.Errorf("cat: cross=%d bonus=%d, want 0/0", len(m.Cross), m.Bonus) + } +} + +func TestEvaluateCrossWord(t *testing.T) { + rs := plainRules(t) + b := board.New(7, 7) + b.Set(2, 3, encoding.Cell(14, false)) // o + b.Set(3, 3, encoding.Cell(23, false)) // x + + // Play "at" horizontally on row 4; the 'a' on col 3 forms the cross word "oxa". + m, err := Evaluate(b, rs, Horizontal, []Placement{ + {Row: 4, Col: 3, Letter: 0}, {Row: 4, Col: 4, Letter: 19}, + }) + if err != nil { + t.Fatal(err) + } + if m.Main.Score != 2 { + t.Errorf("main 'at' = %d, want 2", m.Main.Score) + } + if len(m.Cross) != 1 || m.Cross[0].Score != 10 { + t.Errorf("cross = %+v, want one word scoring 10 (oxa)", m.Cross) + } + if m.Score != 12 { + t.Errorf("total = %d, want 12", m.Score) + } +} + +func TestEvaluatePremiums(t *testing.T) { + rs := rules.English() + + // (0,3) is a double-letter square: c(3)*2 + a(1) + t(1) = 8. + b := board.New(15, 15) + m, err := Evaluate(b, rs, Horizontal, []Placement{ + {Row: 0, Col: 3, Letter: 2}, {Row: 0, Col: 4, Letter: 0}, {Row: 0, Col: 5, Letter: 19}, + }) + if err != nil { + t.Fatal(err) + } + if m.Score != 8 { + t.Errorf("DL cat = %d, want 8", m.Score) + } + + // (1,1) is a double-word square: (c(3) + a(1)) * 2 = 8. + b2 := board.New(15, 15) + m2, err := Evaluate(b2, rs, Horizontal, []Placement{ + {Row: 1, Col: 1, Letter: 2}, {Row: 1, Col: 2, Letter: 0}, + }) + if err != nil { + t.Fatal(err) + } + if m2.Score != 8 { + t.Errorf("DW ca = %d, want 8", m2.Score) + } +} + +func TestEvaluateBingo(t *testing.T) { + rs := plainRules(t) + b := board.New(7, 7) + tiles := make([]Placement, 7) + for c := range 7 { + tiles[c] = Placement{Row: 0, Col: c, Letter: 0} // seven a's + } + m, err := Evaluate(b, rs, Horizontal, tiles) + if err != nil { + t.Fatal(err) + } + if m.Bonus != 50 || m.Score != 7+50 { + t.Errorf("bingo: bonus=%d total=%d, want 50/57", m.Bonus, m.Score) + } +} + +func TestEvaluateErrors(t *testing.T) { + rs := plainRules(t) + b := board.New(7, 7) + b.Set(2, 3, encoding.Cell(14, false)) + + if _, err := Evaluate(b, rs, Horizontal, nil); err == nil { + t.Error("empty play: want error") + } + if _, err := Evaluate(b, rs, Horizontal, []Placement{{Row: 2, Col: 3, Letter: 0}}); err == nil { + t.Error("occupied square: want error") + } + if _, err := Evaluate(b, rs, Horizontal, []Placement{ + {Row: 3, Col: 1, Letter: 0}, {Row: 4, Col: 2, Letter: 0}, + }); err == nil { + t.Error("non-collinear: want error") + } + if _, err := Evaluate(b, rs, Horizontal, []Placement{ + {Row: 5, Col: 1, Letter: 0}, {Row: 5, Col: 3, Letter: 0}, + }); err == nil { + t.Error("gap: want error") + } +} diff --git a/scrabble/solver.go b/scrabble/solver.go new file mode 100644 index 0000000..cffcbe1 --- /dev/null +++ b/scrabble/solver.go @@ -0,0 +1,101 @@ +package scrabble + +import ( + "errors" + "fmt" + "sort" + + dawg "github.com/iliadenisov/dafsa" + + "scrabble-solver/board" + "scrabble-solver/rack" + "scrabble-solver/rules" +) + +// Solver is the high-level entry point: it generates ranked plays and scores or +// validates arbitrary plays for a ruleset over a dictionary. +type Solver struct { + rules *rules.Ruleset + finder dawg.Finder + gen *DAWGGenerator +} + +// NewSolver returns a Solver for the ruleset over the dictionary finder. +func NewSolver(rs *rules.Ruleset, finder dawg.Finder) *Solver { + return &Solver{rules: rs, finder: finder, gen: NewDAWGGenerator(rs, finder)} +} + +// Rules returns the solver's ruleset. +func (s *Solver) Rules() *rules.Ruleset { return s.rules } + +// GenerateMoves returns every legal play for rack r on board b in the requested +// orientations, ranked by descending score (ties broken deterministically by the move's +// canonical key). +func (s *Solver) GenerateMoves(b *board.Board, r rack.Rack, mode Mode) []Move { + moves := s.gen.GenerateMoves(b, r, mode) + sort.Slice(moves, func(i, j int) bool { + if moves[i].Score != moves[j].Score { + return moves[i].Score > moves[j].Score + } + return moves[i].Key() < moves[j].Key() + }) + return moves +} + +// ScorePlay computes the words and score for placing tiles on b in direction dir. It +// checks geometry only (see Evaluate); use ValidatePlay to also check the dictionary and +// connectivity. +func (s *Solver) ScorePlay(b *board.Board, dir Direction, tiles []Placement) (Move, error) { + return Evaluate(b, s.rules, dir, tiles) +} + +// ValidatePlay scores a play and verifies that every word it forms is in the dictionary +// and that it connects to the board (or covers the centre on the first move). It returns +// the scored move; the error is nil exactly when the play is legal. +func (s *Solver) ValidatePlay(b *board.Board, dir Direction, tiles []Placement) (Move, error) { + m, err := Evaluate(b, s.rules, dir, tiles) + if err != nil { + return Move{}, err + } + if len(m.Main.Letters) < 2 { + return m, errors.New("scrabble: play forms no word of length 2 or more") + } + if s.finder.IndexOfB(m.Main.Letters) < 0 { + return m, fmt.Errorf("scrabble: main word is not in the dictionary") + } + for _, cw := range m.Cross { + if s.finder.IndexOfB(cw.Letters) < 0 { + return m, fmt.Errorf("scrabble: a cross word is not in the dictionary") + } + } + if !s.connected(b, m) { + return m, errors.New("scrabble: play does not connect to the board") + } + return m, nil +} + +// connected reports whether the play touches the existing position (or covers the centre +// on the first move). +func (s *Solver) connected(b *board.Board, m Move) bool { + if b.IsEmpty() { + cr, cc := s.rules.Center/s.rules.Cols, s.rules.Center%s.rules.Cols + return wordCovers(m.Main, cr, cc) + } + // The main word incorporated an existing tile, or a new tile formed a cross word. + return len(m.Main.Letters) > len(m.Tiles) || len(m.Cross) > 0 +} + +func wordCovers(w Word, r, c int) bool { + for i := range w.Letters { + rr, cc := w.Row, w.Col + if w.Dir == Horizontal { + cc += i + } else { + rr += i + } + if rr == r && cc == c { + return true + } + } + return false +} diff --git a/scrabble/solver_test.go b/scrabble/solver_test.go new file mode 100644 index 0000000..da7fecd --- /dev/null +++ b/scrabble/solver_test.go @@ -0,0 +1,88 @@ +package scrabble + +import ( + "testing" + + "github.com/iliadenisov/alphabet" + + "scrabble-solver/board" + "scrabble-solver/internal/dictdawg" + "scrabble-solver/internal/wordlist" +) + +func newTestSolver(t *testing.T) *Solver { + t.Helper() + words := wordlist.Encode(testWords, alphabet.Latin(), 2, 15) + f, err := dictdawg.Build(alphabet.Latin(), words) + if err != nil { + t.Fatal(err) + } + return NewSolver(plainRulesShared, f) +} + +func TestSolverGenerateMovesRanked(t *testing.T) { + s := newTestSolver(t) + b := board.New(s.rules.Rows, s.rules.Cols) + moves := s.GenerateMoves(b, makeRack("cat", 0), Both) + if len(moves) == 0 { + t.Fatal("no first moves generated") + } + for i := 1; i < len(moves); i++ { + if moves[i-1].Score < moves[i].Score { + t.Fatalf("moves not ranked: %d before %d", moves[i-1].Score, moves[i].Score) + } + } +} + +func TestSolverValidatePlay(t *testing.T) { + s := newTestSolver(t) + // indices: c=2 a=0 t=19 z=25 + cat := []Placement{{Row: 3, Col: 2, Letter: 2}, {Row: 3, Col: 3, Letter: 0}, {Row: 3, Col: 4, Letter: 19}} + + // First move through the centre (3,3) is legal. + if _, err := s.ValidatePlay(board.New(s.rules.Rows, s.rules.Cols), Horizontal, cat); err != nil { + t.Errorf("valid first move rejected: %v", err) + } + + // First move that misses the centre is rejected. + off := []Placement{{Row: 0, Col: 0, Letter: 2}, {Row: 0, Col: 1, Letter: 0}, {Row: 0, Col: 2, Letter: 19}} + if _, err := s.ValidatePlay(board.New(s.rules.Rows, s.rules.Cols), Horizontal, off); err == nil { + t.Error("first move off the centre was accepted") + } + + // A non-word ("caz") is rejected. + caz := []Placement{{Row: 3, Col: 2, Letter: 2}, {Row: 3, Col: 3, Letter: 0}, {Row: 3, Col: 4, Letter: 25}} + if _, err := s.ValidatePlay(board.New(s.rules.Rows, s.rules.Cols), Horizontal, caz); err == nil { + t.Error("non-word 'caz' was accepted") + } + + // A disconnected play on a non-empty board is rejected. + b := board.New(s.rules.Rows, s.rules.Cols) + placeWord(b, 3, 2, Horizontal, "cat") + disc := []Placement{{Row: 0, Col: 0, Letter: 0}, {Row: 0, Col: 1, Letter: 18}} // "as" far away + if _, err := s.ValidatePlay(b, Horizontal, disc); err == nil { + t.Error("disconnected play was accepted") + } + + // Extending "cat" to "cats" connects and is a word. + cats := []Placement{{Row: 3, Col: 5, Letter: 18}} // s after cat + if m, err := s.ValidatePlay(b, Horizontal, cats); err != nil { + t.Errorf("valid extension rejected: %v", err) + } else if string(m.Main.Letters) != string([]byte{2, 0, 19, 18}) { + t.Errorf("main word = %v, want cats", m.Main.Letters) + } +} + +func TestSolverScorePlay(t *testing.T) { + s := newTestSolver(t) + b := board.New(s.rules.Rows, s.rules.Cols) + m, err := s.ScorePlay(b, Horizontal, []Placement{ + {Row: 3, Col: 2, Letter: 2}, {Row: 3, Col: 3, Letter: 0}, {Row: 3, Col: 4, Letter: 19}, + }) + if err != nil { + t.Fatal(err) + } + if m.Score != 5 { // c3 a1 t1, no premiums on the plain board + t.Errorf("cat score = %d, want 5", m.Score) + } +} diff --git a/selfplay/selfplay.go b/selfplay/selfplay.go new file mode 100644 index 0000000..733d170 --- /dev/null +++ b/selfplay/selfplay.go @@ -0,0 +1,154 @@ +// Package selfplay drives greedy AI-vs-AI Scrabble games used to validate the move +// generators (the same position is offered to both) and to benchmark them. +package selfplay + +import ( + "math/rand" + "sort" + "time" + + "scrabble-solver/board" + "scrabble-solver/rack" + "scrabble-solver/rules" + "scrabble-solver/scrabble" +) + +// blankTile marks a blank in the bag and in a player's hand. +const blankTile byte = 0xff + +// Bag is a shuffled draw pile of tiles. +type Bag struct { + tiles []byte +} + +// NewBag fills a bag from the ruleset's tile counts and blanks and shuffles it with the +// given seed (so games are reproducible). +func NewBag(rs *rules.Ruleset, seed int64) *Bag { + var tiles []byte + for i, n := range rs.Counts { + for range n { + tiles = append(tiles, byte(i)) + } + } + for range rs.Blanks { + tiles = append(tiles, blankTile) + } + rng := rand.New(rand.NewSource(seed)) + rng.Shuffle(len(tiles), func(i, j int) { tiles[i], tiles[j] = tiles[j], tiles[i] }) + return &Bag{tiles: tiles} +} + +// Len returns the number of tiles left in the bag. +func (b *Bag) Len() int { return len(b.tiles) } + +// Draw removes up to n tiles from the bag and returns them. +func (b *Bag) Draw(n int) []byte { + if n > len(b.tiles) { + n = len(b.tiles) + } + out := b.tiles[len(b.tiles)-n:] + b.tiles = b.tiles[:len(b.tiles)-n] + return out +} + +// rackOf builds a generation rack from a hand of tiles. +func rackOf(tiles []byte, size int) rack.Rack { + r := rack.New(size) + for _, t := range tiles { + if t == blankTile { + r.AddBlank() + } else { + r.Add(t) + } + } + return r +} + +// removeUsed returns the hand with the tiles consumed by m removed. +func removeUsed(tiles []byte, m scrabble.Move) []byte { + out := append([]byte(nil), tiles...) + for _, p := range m.Tiles { + want := p.Letter + if p.Blank { + want = blankTile + } + for i, t := range out { + if t == want { + out = append(out[:i], out[i+1:]...) + break + } + } + } + return out +} + +// greedy returns the highest-scoring move (ties broken by canonical key for +// reproducibility), or ok=false if there is no legal move. +func greedy(gen scrabble.Generator, b *board.Board, rk rack.Rack, mode scrabble.Mode) (scrabble.Move, int, bool) { + moves := gen.GenerateMoves(b, rk, mode) + if len(moves) == 0 { + return scrabble.Move{}, 0, false + } + sort.Slice(moves, func(i, j int) bool { + if moves[i].Score != moves[j].Score { + return moves[i].Score > moves[j].Score + } + return moves[i].Key() < moves[j].Key() + }) + return moves[0], len(moves), true +} + +// Result summarizes a finished game. +type Result struct { + Turns int // turns taken (plays plus passes) + Plays int // scoring plays made + Scores [2]int // final score per player + MovesGenerated int // total legal moves generated across all turns + GenTime time.Duration // time spent generating moves +} + +// PlayGame plays one greedy AI-vs-AI game with generator gen and returns its result. If +// observe is non-nil it is called before each turn with a clone of the board and the +// player's rack, so a caller can compare generators on identical positions. +func PlayGame(rs *rules.Ruleset, gen scrabble.Generator, mode scrabble.Mode, seed int64, + observe func(b *board.Board, rk rack.Rack)) Result { + + const maxTurns = 300 + bag := NewBag(rs, seed) + b := board.New(rs.Rows, rs.Cols) + hands := [2][]byte{bag.Draw(rs.RackSize), bag.Draw(rs.RackSize)} + + var res Result + passes := 0 + for turn := range maxTurns { + p := turn % 2 + rk := rackOf(hands[p], rs.Size()) + if observe != nil { + observe(b.Clone(), rk.Clone()) + } + res.Turns++ + + t0 := time.Now() + m, n, ok := greedy(gen, b, rk, mode) + res.GenTime += time.Since(t0) + res.MovesGenerated += n + if !ok { + if passes++; passes >= 4 { + break + } + continue + } + passes = 0 + scrabble.Apply(b, m) + res.Scores[p] += m.Score + res.Plays++ + hands[p] = removeUsed(hands[p], m) + if need := rs.RackSize - len(hands[p]); need > 0 { + hands[p] = append(hands[p], bag.Draw(need)...) + } + if len(hands[p]) == 0 && bag.Len() == 0 { + break + } + } + return res +} diff --git a/selfplay/selfplay_test.go b/selfplay/selfplay_test.go new file mode 100644 index 0000000..c33ef64 --- /dev/null +++ b/selfplay/selfplay_test.go @@ -0,0 +1,33 @@ +package selfplay_test + +import ( + "testing" + + "github.com/iliadenisov/alphabet" + + "scrabble-solver/internal/dictdawg" + "scrabble-solver/internal/wordlist" + "scrabble-solver/rules" + "scrabble-solver/scrabble" + "scrabble-solver/selfplay" +) + +func TestPlayGameSmoke(t *testing.T) { + rs := rules.English() + words := wordlist.Encode([]string{ + "cat", "cats", "car", "care", "cares", "cot", "cap", "cab", "at", "as", + "tea", "eat", "ear", "era", "are", "oat", "oats", "sat", "set", "sea", + "tar", "tars", "star", "arts", "rat", "rats", "ace", "aces", "scar", "scare", + }, alphabet.Latin(), 2, 15) + f, err := dictdawg.Build(alphabet.Latin(), words) + if err != nil { + t.Fatal(err) + } + gen := scrabble.NewDAWGGenerator(rs, f) + + res := selfplay.PlayGame(rs, gen, scrabble.Both, 1, nil) + if res.Turns == 0 || res.Plays == 0 { + t.Errorf("degenerate game: %+v", res) + } + t.Logf("smoke game: turns=%d plays=%d scores=%v", res.Turns, res.Plays, res.Scores) +}