feat: gamemaster

This commit is contained in:
Ilia Denisov
2026-05-03 07:59:03 +02:00
committed by GitHub
parent a7cee15115
commit 3e2622757e
229 changed files with 41521 additions and 1098 deletions
@@ -0,0 +1,280 @@
// Package membership implements the in-process membership cache that
// authorises every hot-path call (commandexecute, orderput, reportget)
// owned by Game Master.
//
// The cache is a per-game TTL projection of Lobby's
// `/api/v1/internal/games/{game_id}/memberships` view. Lobby invokes the
// invalidation hook (`POST /api/v1/internal/games/{game_id}/memberships/invalidate`)
// post-commit on every roster mutation; the TTL is the safety net for any
// missed invalidation. Cache rules and trade-offs are documented in
// `gamemaster/README.md §Hot Path → Membership cache and invalidation` and
// `gamemaster/docs/stage16-membership-cache-and-invalidation.md`.
package membership
import (
"container/list"
"context"
"errors"
"fmt"
"log/slog"
"sync"
"time"
"galaxy/gamemaster/internal/logging"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/telemetry"
)
// Result labels used with `telemetry.Runtime.RecordMembershipCacheResult`.
const (
resultHit = "hit"
resultMiss = "miss"
resultInvalidate = "invalidate"
)
// Dependencies groups the collaborators required by Cache.
type Dependencies struct {
// Lobby loads the per-game membership projection on cache miss.
Lobby ports.LobbyClient
// Telemetry records `gamemaster.membership_cache.hits` outcomes.
Telemetry *telemetry.Runtime
// Logger records structured cache events. Defaults to
// `slog.Default()` when nil.
Logger *slog.Logger
// Clock supplies the wall-clock used for entry freshness. Defaults
// to `time.Now` when nil.
Clock func() time.Time
// TTL bounds the freshness of one cached entry; expired entries are
// re-fetched from Lobby. Must be positive.
TTL time.Duration
// MaxGames bounds the cache size in number of games. The
// least-recently-used entry is evicted when an insert overflows the
// bound. Must be positive.
MaxGames int
}
// Cache stores the per-game membership projection used by hot-path
// services. The zero value is not usable; construct with NewCache.
type Cache struct {
lobby ports.LobbyClient
telemetry *telemetry.Runtime
logger *slog.Logger
clock func() time.Time
ttl time.Duration
maxGames int
mu sync.Mutex
entries map[string]*list.Element // gameID → element holding *cacheEntry
lru *list.List // *cacheEntry, MRU at front
inflight map[string]*flight // gameID → in-flight Lobby fetch
}
// cacheEntry stores one per-game membership projection.
type cacheEntry struct {
gameID string
members map[string]string // user_id → status ("active"|"removed"|"blocked")
loadedAt time.Time
}
// flight coordinates concurrent misses on the same gameID so only one
// Lobby fetch is issued. Joiners wait on `done`; the leader populates
// `members` (or `err`) before closing the channel.
type flight struct {
done chan struct{}
members map[string]string
err error
}
// NewCache constructs a Cache from deps. Returns a Go-level error when a
// required dependency is missing or a numeric bound is non-positive.
func NewCache(deps Dependencies) (*Cache, error) {
switch {
case deps.Lobby == nil:
return nil, errors.New("new membership cache: nil lobby client")
case deps.Telemetry == nil:
return nil, errors.New("new membership cache: nil telemetry runtime")
case deps.TTL <= 0:
return nil, fmt.Errorf("new membership cache: ttl must be positive, got %s", deps.TTL)
case deps.MaxGames <= 0:
return nil, fmt.Errorf("new membership cache: max games must be positive, got %d", deps.MaxGames)
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
logger = logger.With("component", "gamemaster.membership_cache")
clock := deps.Clock
if clock == nil {
clock = time.Now
}
return &Cache{
lobby: deps.Lobby,
telemetry: deps.Telemetry,
logger: logger,
clock: clock,
ttl: deps.TTL,
maxGames: deps.MaxGames,
entries: make(map[string]*list.Element),
lru: list.New(),
inflight: make(map[string]*flight),
}, nil
}
// Resolve returns the membership status of userID inside gameID. The
// returned status is the raw Lobby vocabulary (`"active"`, `"removed"`,
// `"blocked"`) and is empty when the user is not present in the roster at
// all; callers must compare against `"active"` to authorise a hot-path
// call.
//
// Resolve fetches from Lobby on cache miss, on TTL expiry, or after an
// Invalidate. Concurrent misses on the same gameID share a single Lobby
// call. A failed Lobby fetch surfaces as ErrLobbyUnavailable and is not
// cached.
func (cache *Cache) Resolve(ctx context.Context, gameID, userID string) (string, error) {
if cache == nil {
return "", errors.New("membership cache: nil receiver")
}
if ctx == nil {
return "", errors.New("membership cache: nil context")
}
if entry, ok := cache.lookupFresh(gameID); ok {
cache.telemetry.RecordMembershipCacheResult(ctx, resultHit)
return entry.members[userID], nil
}
members, err := cache.fetch(ctx, gameID)
cache.telemetry.RecordMembershipCacheResult(ctx, resultMiss)
if err != nil {
logArgs := []any{
"game_id", gameID,
"err", err.Error(),
}
logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
cache.logger.WarnContext(ctx, "lobby fetch failed", logArgs...)
return "", err
}
return members[userID], nil
}
// Invalidate purges the cache entry for gameID, if any. Subsequent
// Resolve calls fetch from Lobby. Safe to call from the invalidation
// hook handler (Stage 19) at any time.
func (cache *Cache) Invalidate(gameID string) {
if cache == nil {
return
}
cache.mu.Lock()
if element, ok := cache.entries[gameID]; ok {
cache.lru.Remove(element)
delete(cache.entries, gameID)
}
cache.mu.Unlock()
cache.telemetry.RecordMembershipCacheResult(context.Background(), resultInvalidate)
}
// lookupFresh returns the cached entry for gameID when it exists and is
// still fresh. The MRU position is updated under the lock.
func (cache *Cache) lookupFresh(gameID string) (*cacheEntry, bool) {
cache.mu.Lock()
defer cache.mu.Unlock()
element, ok := cache.entries[gameID]
if !ok {
return nil, false
}
entry := element.Value.(*cacheEntry)
if cache.clock().Sub(entry.loadedAt) >= cache.ttl {
return nil, false
}
cache.lru.MoveToFront(element)
return entry, true
}
// fetch loads the membership projection from Lobby, deduplicating
// concurrent misses on the same gameID through the inflight map. The
// successful result is cached; failures are not.
func (cache *Cache) fetch(ctx context.Context, gameID string) (map[string]string, error) {
cache.mu.Lock()
if existing, ok := cache.inflight[gameID]; ok {
cache.mu.Unlock()
select {
case <-existing.done:
if existing.err != nil {
return nil, existing.err
}
return existing.members, nil
case <-ctx.Done():
return nil, ctx.Err()
}
}
current := &flight{done: make(chan struct{})}
cache.inflight[gameID] = current
cache.mu.Unlock()
members, err := cache.loadFromLobby(ctx, gameID)
cache.mu.Lock()
delete(cache.inflight, gameID)
if err == nil {
cache.installLocked(gameID, members)
}
cache.mu.Unlock()
if err != nil {
current.err = err
} else {
current.members = members
}
close(current.done)
if err != nil {
return nil, err
}
return members, nil
}
// loadFromLobby calls the LobbyClient and projects the raw response to
// the user_id → status map the cache stores.
func (cache *Cache) loadFromLobby(ctx context.Context, gameID string) (map[string]string, error) {
records, err := cache.lobby.GetMemberships(ctx, gameID)
if err != nil {
return nil, fmt.Errorf("%w: %w", ErrLobbyUnavailable, err)
}
members := make(map[string]string, len(records))
for _, record := range records {
members[record.UserID] = record.Status
}
return members, nil
}
// installLocked stores members under gameID, evicting the least-recently
// -used entry if the cache is at capacity. Caller must hold cache.mu.
func (cache *Cache) installLocked(gameID string, members map[string]string) {
now := cache.clock()
if element, ok := cache.entries[gameID]; ok {
entry := element.Value.(*cacheEntry)
entry.members = members
entry.loadedAt = now
cache.lru.MoveToFront(element)
return
}
entry := &cacheEntry{gameID: gameID, members: members, loadedAt: now}
cache.entries[gameID] = cache.lru.PushFront(entry)
for cache.lru.Len() > cache.maxGames {
oldest := cache.lru.Back()
if oldest == nil {
break
}
evicted := oldest.Value.(*cacheEntry)
cache.lru.Remove(oldest)
delete(cache.entries, evicted.gameID)
}
}
@@ -0,0 +1,376 @@
package membership_test
import (
"context"
"errors"
"fmt"
"sync"
"sync/atomic"
"testing"
"time"
"galaxy/gamemaster/internal/ports"
"galaxy/gamemaster/internal/service/membership"
"galaxy/gamemaster/internal/telemetry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// fakeLobby is a hand-rolled LobbyClient stub used by membership tests.
// It mirrors the test-double style used elsewhere in the gamemaster
// service tree.
type fakeLobby struct {
mu sync.Mutex
calls atomic.Int32
answers map[string][]ports.Membership
errs map[string]error
delay time.Duration
released chan struct{}
}
func newFakeLobby() *fakeLobby {
return &fakeLobby{
answers: map[string][]ports.Membership{},
errs: map[string]error{},
}
}
func (f *fakeLobby) seed(gameID string, members []ports.Membership) {
f.mu.Lock()
defer f.mu.Unlock()
f.answers[gameID] = members
}
func (f *fakeLobby) seedErr(gameID string, err error) {
f.mu.Lock()
defer f.mu.Unlock()
f.errs[gameID] = err
}
func (f *fakeLobby) GetMemberships(ctx context.Context, gameID string) ([]ports.Membership, error) {
f.calls.Add(1)
if f.delay > 0 {
select {
case <-time.After(f.delay):
case <-ctx.Done():
return nil, ctx.Err()
}
}
if f.released != nil {
select {
case <-f.released:
case <-ctx.Done():
return nil, ctx.Err()
}
}
f.mu.Lock()
defer f.mu.Unlock()
if err, ok := f.errs[gameID]; ok {
return nil, err
}
if members, ok := f.answers[gameID]; ok {
out := make([]ports.Membership, len(members))
copy(out, members)
return out, nil
}
return []ports.Membership{}, nil
}
func (f *fakeLobby) GetGameSummary(_ context.Context, _ string) (ports.GameSummary, error) {
return ports.GameSummary{}, errors.New("not used in cache tests")
}
func newTelemetry(t *testing.T) *telemetry.Runtime {
t.Helper()
tel, err := telemetry.NewWithProviders(nil, nil)
require.NoError(t, err)
return tel
}
func active(userID, raceName string) ports.Membership {
return ports.Membership{UserID: userID, RaceName: raceName, Status: "active", JoinedAt: time.Unix(0, 0).UTC()}
}
func newCacheForTest(t *testing.T, lobby ports.LobbyClient, ttl time.Duration, maxGames int, clock func() time.Time) *membership.Cache {
t.Helper()
cache, err := membership.NewCache(membership.Dependencies{
Lobby: lobby,
Telemetry: newTelemetry(t),
TTL: ttl,
MaxGames: maxGames,
Clock: clock,
})
require.NoError(t, err)
return cache
}
func TestNewCacheRejectsBadDependencies(t *testing.T) {
tel := newTelemetry(t)
cases := []struct {
name string
deps membership.Dependencies
}{
{"nil lobby", membership.Dependencies{Telemetry: tel, TTL: time.Second, MaxGames: 1}},
{"nil telemetry", membership.Dependencies{Lobby: newFakeLobby(), TTL: time.Second, MaxGames: 1}},
{"zero ttl", membership.Dependencies{Lobby: newFakeLobby(), Telemetry: tel, TTL: 0, MaxGames: 1}},
{"negative ttl", membership.Dependencies{Lobby: newFakeLobby(), Telemetry: tel, TTL: -time.Second, MaxGames: 1}},
{"zero max games", membership.Dependencies{Lobby: newFakeLobby(), Telemetry: tel, TTL: time.Second, MaxGames: 0}},
{"negative max games", membership.Dependencies{Lobby: newFakeLobby(), Telemetry: tel, TTL: time.Second, MaxGames: -1}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
cache, err := membership.NewCache(tc.deps)
require.Error(t, err)
assert.Nil(t, cache)
})
}
}
func TestResolveHitServesCachedEntry(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari"), active("user-2", "Drazi")})
now := time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)
clock := func() time.Time { return now }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
first, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, "active", first)
second, err := cache.Resolve(context.Background(), "game-1", "user-2")
require.NoError(t, err)
assert.Equal(t, "active", second)
assert.Equal(t, int32(1), lobby.calls.Load())
}
func TestResolveUnknownUserReturnsEmptyString(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari")})
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
status, err := cache.Resolve(context.Background(), "game-1", "ghost")
require.NoError(t, err)
assert.Empty(t, status)
}
func TestResolveTTLExpiryRefetches(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari")})
now := time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)
clockTime := now
clock := func() time.Time { return clockTime }
cache := newCacheForTest(t, lobby, 30*time.Second, 8, clock)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(1), lobby.calls.Load())
clockTime = now.Add(20 * time.Second)
_, err = cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(1), lobby.calls.Load(), "fresh entry must not refetch")
clockTime = now.Add(31 * time.Second)
_, err = cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(2), lobby.calls.Load(), "expired entry must refetch")
}
func TestInvalidatePurgesEntry(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari")})
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(1), lobby.calls.Load())
cache.Invalidate("game-1")
_, err = cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(2), lobby.calls.Load())
}
func TestInvalidateOnAbsentGameIsNoop(t *testing.T) {
lobby := newFakeLobby()
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
cache.Invalidate("missing")
}
func TestLRUEvictsOldestEntry(t *testing.T) {
lobby := newFakeLobby()
for index := range 4 {
gameID := fmt.Sprintf("game-%d", index)
lobby.seed(gameID, []ports.Membership{active("user-1", "Aelinari")})
}
now := time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)
clockTime := now
clock := func() time.Time { return clockTime }
cache := newCacheForTest(t, lobby, time.Minute, 2, clock)
// Load games 0, 1, 2 sequentially. The cache holds at most 2; game-0
// must have been evicted by the time game-2 lands.
for index := range 3 {
clockTime = now.Add(time.Duration(index) * time.Second)
_, err := cache.Resolve(context.Background(), fmt.Sprintf("game-%d", index), "user-1")
require.NoError(t, err)
}
require.Equal(t, int32(3), lobby.calls.Load())
// Re-resolving game-1 hits the cache.
clockTime = now.Add(3 * time.Second)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(3), lobby.calls.Load(), "game-1 must still be cached")
// Re-resolving game-0 misses (it was the LRU victim).
clockTime = now.Add(4 * time.Second)
_, err = cache.Resolve(context.Background(), "game-0", "user-1")
require.NoError(t, err)
assert.Equal(t, int32(4), lobby.calls.Load(), "game-0 must have been evicted")
}
func TestResolveLobbyUnavailableSurfacesAndDoesNotCache(t *testing.T) {
lobby := newFakeLobby()
lobby.seedErr("game-1", fmt.Errorf("dial: %w", ports.ErrLobbyUnavailable))
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.Error(t, err)
assert.True(t, errors.Is(err, membership.ErrLobbyUnavailable))
assert.True(t, errors.Is(err, ports.ErrLobbyUnavailable))
_, err = cache.Resolve(context.Background(), "game-1", "user-1")
require.Error(t, err)
assert.Equal(t, int32(2), lobby.calls.Load(), "failed fetch must not be cached")
}
func TestResolveUnwrappedLobbyErrorIsStillSurfacedAsLobbyUnavailable(t *testing.T) {
lobby := newFakeLobby()
lobby.seedErr("game-1", errors.New("transport"))
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.Error(t, err)
assert.True(t, errors.Is(err, membership.ErrLobbyUnavailable))
}
func TestResolveDeduplicatesConcurrentMisses(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari")})
gate := make(chan struct{})
lobby.released = gate
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
const callers = 16
var wg sync.WaitGroup
results := make([]string, callers)
errs := make([]error, callers)
wg.Add(callers)
for index := range callers {
go func(slot int) {
defer wg.Done()
results[slot], errs[slot] = cache.Resolve(context.Background(), "game-1", "user-1")
}(index)
}
// Give all goroutines a moment to register on the inflight map
// before releasing the Lobby fetch.
time.Sleep(10 * time.Millisecond)
close(gate)
wg.Wait()
for index := range callers {
require.NoError(t, errs[index])
assert.Equal(t, "active", results[index])
}
assert.Equal(t, int32(1), lobby.calls.Load(), "concurrent misses must collapse to one Lobby call")
}
func TestResolveRespectsContextCancellation(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{active("user-1", "Aelinari")})
gate := make(chan struct{})
lobby.released = gate
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
leaderDone := make(chan struct{})
go func() {
defer close(leaderDone)
_, _ = cache.Resolve(context.Background(), "game-1", "user-1")
}()
// Wait for leader to register the inflight slot.
time.Sleep(10 * time.Millisecond)
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := cache.Resolve(ctx, "game-1", "user-1")
require.Error(t, err)
assert.True(t, errors.Is(err, context.Canceled))
close(gate)
<-leaderDone
}
func TestResolveRefreshAfterErrorReturnsSuccess(t *testing.T) {
lobby := newFakeLobby()
lobby.seedErr("game-1", errors.New("transport"))
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
_, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.Error(t, err)
lobby.mu.Lock()
delete(lobby.errs, "game-1")
lobby.answers["game-1"] = []ports.Membership{active("user-1", "Aelinari")}
lobby.mu.Unlock()
status, err := cache.Resolve(context.Background(), "game-1", "user-1")
require.NoError(t, err)
assert.Equal(t, "active", status)
}
func TestResolveRejectsNilContextAndReceiver(t *testing.T) {
lobby := newFakeLobby()
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
var nilCtx context.Context
_, err := cache.Resolve(nilCtx, "game-1", "user-1")
require.Error(t, err)
var nilCache *membership.Cache
_, err = nilCache.Resolve(context.Background(), "game-1", "user-1")
require.Error(t, err)
}
func TestStatusFromLobbyIsPreserved(t *testing.T) {
lobby := newFakeLobby()
lobby.seed("game-1", []ports.Membership{
{UserID: "user-1", RaceName: "Aelinari", Status: "active", JoinedAt: time.Unix(0, 0).UTC()},
{UserID: "user-2", RaceName: "Drazi", Status: "removed", JoinedAt: time.Unix(0, 0).UTC()},
{UserID: "user-3", RaceName: "Vorlons", Status: "blocked", JoinedAt: time.Unix(0, 0).UTC()},
})
clock := func() time.Time { return time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC) }
cache := newCacheForTest(t, lobby, time.Minute, 8, clock)
for userID, expected := range map[string]string{"user-1": "active", "user-2": "removed", "user-3": "blocked"} {
status, err := cache.Resolve(context.Background(), "game-1", userID)
require.NoError(t, err)
assert.Equal(t, expected, status, "user %s", userID)
}
}
@@ -0,0 +1,13 @@
package membership
import "errors"
// ErrLobbyUnavailable signals that a Resolve call could not be completed
// because the upstream Lobby service was unreachable. The cache wraps
// `ports.ErrLobbyUnavailable` returned by the LobbyClient adapter; hot-path
// services map this sentinel to `service_unavailable`.
//
// Callers branch with errors.Is. Returned only on cache miss / TTL expiry
// when the Lobby fetch fails; cached entries are served regardless of
// upstream availability until the TTL elapses.
var ErrLobbyUnavailable = errors.New("membership cache: lobby unavailable")