feat: game lobby service

This commit is contained in:
Ilia Denisov
2026-04-25 23:20:55 +02:00
committed by GitHub
parent 32dc29359a
commit 48b0056b49
336 changed files with 57074 additions and 1418 deletions
@@ -0,0 +1,287 @@
// Package userlifecycle implements the Redis-Streams consumer for the
// `user:lifecycle_events` topic. wires the consumer behind the
// `ports.UserLifecycleConsumer` interface so the cascade worker can
// register a handler without depending on Redis directly.
//
// The consumer mirrors the reliability shape used by `worker/gmevents`:
// XREAD blocks for `BlockTimeout`, decoded events are dispatched to the
// registered handler, and the persisted offset advances only after the
// handler returns nil. Decoding errors and unknown event kinds are
// logged and absorbed (the offset advances) so a malformed entry never
// stalls the stream. Handler errors hold the offset on the current
// entry so the next loop iteration retries.
package userlifecycle
import (
"context"
"errors"
"fmt"
"log/slog"
"strconv"
"strings"
"sync"
"time"
"galaxy/lobby/internal/ports"
"github.com/redis/go-redis/v9"
)
// streamOffsetLabel identifies the user-lifecycle consumer in the
// stream-offset store. It stays stable when the underlying stream key
// is renamed via configuration.
const streamOffsetLabel = "user_lifecycle"
// Config groups the dependencies used by Consumer.
type Config struct {
// Client provides XREAD access to the user-lifecycle stream.
Client *redis.Client
// Stream stores the Redis Streams key consumed by the worker. The
// production default is `user:lifecycle_events`.
Stream string
// BlockTimeout bounds the blocking XREAD window.
BlockTimeout time.Duration
// OffsetStore persists the last successfully processed entry id under
// the `user_lifecycle` label.
OffsetStore ports.StreamOffsetStore
// Clock supplies the wall-clock used for log timestamps. Defaults to
// time.Now when nil.
Clock func() time.Time
// Logger receives structured worker-level events. Defaults to
// slog.Default when nil.
Logger *slog.Logger
}
// Consumer drives the user-lifecycle processing loop.
type Consumer struct {
client *redis.Client
stream string
blockTimeout time.Duration
offsetStore ports.StreamOffsetStore
clock func() time.Time
logger *slog.Logger
mu sync.Mutex
handler ports.UserLifecycleHandler
}
// NewConsumer constructs one Consumer from cfg.
func NewConsumer(cfg Config) (*Consumer, error) {
switch {
case cfg.Client == nil:
return nil, errors.New("new user lifecycle consumer: nil redis client")
case strings.TrimSpace(cfg.Stream) == "":
return nil, errors.New("new user lifecycle consumer: stream must not be empty")
case cfg.BlockTimeout <= 0:
return nil, errors.New("new user lifecycle consumer: block timeout must be positive")
case cfg.OffsetStore == nil:
return nil, errors.New("new user lifecycle consumer: nil offset store")
}
clock := cfg.Clock
if clock == nil {
clock = time.Now
}
logger := cfg.Logger
if logger == nil {
logger = slog.Default()
}
return &Consumer{
client: cfg.Client,
stream: cfg.Stream,
blockTimeout: cfg.BlockTimeout,
offsetStore: cfg.OffsetStore,
clock: clock,
logger: logger.With("worker", "lobby.userlifecycle", "stream", cfg.Stream),
}, nil
}
// OnEvent installs handler as the sole dispatcher for decoded events.
// A second call replaces the previous handler. Calling OnEvent
// concurrently with Run is safe.
func (consumer *Consumer) OnEvent(handler ports.UserLifecycleHandler) {
if consumer == nil {
return
}
consumer.mu.Lock()
consumer.handler = handler
consumer.mu.Unlock()
}
// Run drives the XREAD loop until ctx is cancelled. The offset advances
// only after a successful handler return so a transient failure replays
// the same entry on the next iteration.
func (consumer *Consumer) Run(ctx context.Context) error {
if consumer == nil || consumer.client == nil {
return errors.New("run user lifecycle consumer: nil consumer")
}
if ctx == nil {
return errors.New("run user lifecycle consumer: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
lastID, found, err := consumer.offsetStore.Load(ctx, streamOffsetLabel)
if err != nil {
return fmt.Errorf("run user lifecycle consumer: load offset: %w", err)
}
if !found {
lastID = "0-0"
}
consumer.logger.Info("user lifecycle consumer started",
"block_timeout", consumer.blockTimeout.String(),
"start_entry_id", lastID,
)
defer consumer.logger.Info("user lifecycle consumer stopped")
for {
streams, err := consumer.client.XRead(ctx, &redis.XReadArgs{
Streams: []string{consumer.stream, lastID},
Count: 1,
Block: consumer.blockTimeout,
}).Result()
switch {
case err == nil:
for _, stream := range streams {
for _, message := range stream.Messages {
if !consumer.handleMessage(ctx, message) {
continue
}
if err := consumer.offsetStore.Save(ctx, streamOffsetLabel, message.ID); err != nil {
return fmt.Errorf("run user lifecycle consumer: save offset: %w", err)
}
lastID = message.ID
}
}
case errors.Is(err, redis.Nil):
continue
case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, redis.ErrClosed)):
return ctx.Err()
case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded), errors.Is(err, redis.ErrClosed):
return fmt.Errorf("run user lifecycle consumer: %w", err)
default:
return fmt.Errorf("run user lifecycle consumer: %w", err)
}
}
}
// Shutdown is a no-op; the consumer relies on context cancellation.
func (consumer *Consumer) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown user lifecycle consumer: nil context")
}
return nil
}
// handleMessage decodes one Redis Stream entry and dispatches it to the
// registered handler. It returns true when the offset is allowed to
// advance, false when the consumer must hold the offset and retry on
// the next iteration. Decoding errors and unknown event kinds advance
// the offset so a malformed entry never stalls the stream.
func (consumer *Consumer) handleMessage(ctx context.Context, message redis.XMessage) bool {
event, err := decodeUserLifecycleEvent(message)
if err != nil {
consumer.logger.WarnContext(ctx, "decode user lifecycle event",
"stream_entry_id", message.ID,
"err", err.Error(),
)
return true
}
if !event.EventType.IsKnown() {
consumer.logger.InfoContext(ctx, "unknown user lifecycle event type",
"stream_entry_id", message.ID,
"event_type", event.EventType,
)
return true
}
consumer.mu.Lock()
handler := consumer.handler
consumer.mu.Unlock()
if handler == nil {
consumer.logger.WarnContext(ctx, "no user lifecycle handler registered; entry dropped",
"stream_entry_id", message.ID,
)
return true
}
if err := handler(ctx, event); err != nil {
consumer.logger.WarnContext(ctx, "handle user lifecycle event",
"stream_entry_id", message.ID,
"event_type", event.EventType,
"user_id", event.UserID,
"err", err.Error(),
)
return false
}
consumer.logger.InfoContext(ctx, "user lifecycle event processed",
"stream_entry_id", message.ID,
"event_type", event.EventType,
"user_id", event.UserID,
)
return true
}
func decodeUserLifecycleEvent(message redis.XMessage) (ports.UserLifecycleEvent, error) {
eventType := optionalString(message.Values, "event_type")
userID := optionalString(message.Values, "user_id")
occurredAtRaw := optionalString(message.Values, "occurred_at_ms")
if strings.TrimSpace(eventType) == "" {
return ports.UserLifecycleEvent{}, errors.New("missing event_type")
}
if strings.TrimSpace(userID) == "" {
return ports.UserLifecycleEvent{}, errors.New("missing user_id")
}
if strings.TrimSpace(occurredAtRaw) == "" {
return ports.UserLifecycleEvent{}, errors.New("missing occurred_at_ms")
}
ms, err := strconv.ParseInt(occurredAtRaw, 10, 64)
if err != nil {
return ports.UserLifecycleEvent{}, fmt.Errorf("invalid occurred_at_ms: %w", err)
}
if ms <= 0 {
return ports.UserLifecycleEvent{}, fmt.Errorf("invalid occurred_at_ms: must be positive")
}
return ports.UserLifecycleEvent{
EntryID: message.ID,
EventType: ports.UserLifecycleEventType(eventType),
UserID: strings.TrimSpace(userID),
OccurredAt: time.UnixMilli(ms).UTC(),
Source: optionalString(message.Values, "source"),
ActorType: optionalString(message.Values, "actor_type"),
ActorID: optionalString(message.Values, "actor_id"),
ReasonCode: optionalString(message.Values, "reason_code"),
TraceID: optionalString(message.Values, "trace_id"),
}, nil
}
func optionalString(values map[string]any, key string) string {
raw, ok := values[key]
if !ok {
return ""
}
switch typed := raw.(type) {
case string:
return typed
case []byte:
return string(typed)
default:
return ""
}
}
// Compile-time assertion: Consumer satisfies the port interface.
var _ ports.UserLifecycleConsumer = (*Consumer)(nil)
@@ -0,0 +1,323 @@
package userlifecycle_test
import (
"context"
"io"
"log/slog"
"strconv"
"sync"
"sync/atomic"
"testing"
"time"
"galaxy/lobby/internal/adapters/streamoffsetstub"
"galaxy/lobby/internal/adapters/userlifecycle"
"galaxy/lobby/internal/ports"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const (
testStream = "user:lifecycle_events"
offsetLabel = "user_lifecycle"
occurredAtMs = int64(1775200000000)
streamLabelKey = "user_lifecycle"
defaultUserID = "user-1"
)
func silentLogger() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) }
type harness struct {
server *miniredis.Miniredis
client *redis.Client
offsets *streamoffsetstub.Store
consumer *userlifecycle.Consumer
}
func newHarness(t *testing.T) *harness {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = client.Close() })
offsets := streamoffsetstub.NewStore()
consumer, err := userlifecycle.NewConsumer(userlifecycle.Config{
Client: client,
Stream: testStream,
BlockTimeout: 50 * time.Millisecond,
OffsetStore: offsets,
Clock: func() time.Time { return time.UnixMilli(occurredAtMs).UTC() },
Logger: silentLogger(),
})
require.NoError(t, err)
return &harness{
server: server,
client: client,
offsets: offsets,
consumer: consumer,
}
}
func TestNewConsumerRejectsMissingDeps(t *testing.T) {
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{Addr: server.Addr()})
t.Cleanup(func() { _ = client.Close() })
_, err := userlifecycle.NewConsumer(userlifecycle.Config{
Stream: testStream,
BlockTimeout: time.Second,
OffsetStore: streamoffsetstub.NewStore(),
})
require.Error(t, err)
_, err = userlifecycle.NewConsumer(userlifecycle.Config{
Client: client,
BlockTimeout: time.Second,
OffsetStore: streamoffsetstub.NewStore(),
})
require.Error(t, err)
_, err = userlifecycle.NewConsumer(userlifecycle.Config{
Client: client,
Stream: testStream,
OffsetStore: streamoffsetstub.NewStore(),
})
require.Error(t, err)
_, err = userlifecycle.NewConsumer(userlifecycle.Config{
Client: client,
Stream: testStream,
BlockTimeout: time.Second,
})
require.Error(t, err)
}
func TestRunDispatchesPermanentBlockedAndAdvancesOffset(t *testing.T) {
h := newHarness(t)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
var (
mu sync.Mutex
seen []ports.UserLifecycleEvent
ready = make(chan struct{}, 4)
)
h.consumer.OnEvent(func(_ context.Context, event ports.UserLifecycleEvent) error {
mu.Lock()
seen = append(seen, event)
mu.Unlock()
ready <- struct{}{}
return nil
})
doneCh := make(chan error, 1)
go func() { doneCh <- h.consumer.Run(ctx) }()
publishEvent(t, h, ports.UserLifecycleEventTypePermanentBlocked, defaultUserID,
map[string]any{"actor_id": "admin-1", "reason_code": "abuse"})
awaitDeliveries(t, ready, 1)
publishEvent(t, h, ports.UserLifecycleEventTypeDeleted, "user-2",
map[string]any{"reason_code": "user_request"})
awaitDeliveries(t, ready, 1)
cancel()
require.ErrorIs(t, <-doneCh, context.Canceled)
mu.Lock()
defer mu.Unlock()
require.Len(t, seen, 2)
first := seen[0]
assert.Equal(t, ports.UserLifecycleEventTypePermanentBlocked, first.EventType)
assert.Equal(t, defaultUserID, first.UserID)
assert.Equal(t, "admin-1", first.ActorID)
assert.Equal(t, "abuse", first.ReasonCode)
assert.False(t, first.OccurredAt.IsZero())
assert.Equal(t, time.UTC, first.OccurredAt.Location())
second := seen[1]
assert.Equal(t, ports.UserLifecycleEventTypeDeleted, second.EventType)
assert.Equal(t, "user-2", second.UserID)
stored, ok, err := h.offsets.Load(context.Background(), offsetLabel)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, second.EntryID, stored)
}
func TestRunHoldsOffsetWhenHandlerErrors(t *testing.T) {
h := newHarness(t)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
var attempts atomic.Int32
releaseHandler := make(chan struct{}, 1)
h.consumer.OnEvent(func(_ context.Context, event ports.UserLifecycleEvent) error {
attempt := attempts.Add(1)
if attempt == 1 {
releaseHandler <- struct{}{}
return assertErr{message: "transient"}
}
releaseHandler <- struct{}{}
return nil
})
doneCh := make(chan error, 1)
go func() { doneCh <- h.consumer.Run(ctx) }()
entryID := publishEvent(t, h, ports.UserLifecycleEventTypePermanentBlocked, defaultUserID, nil)
awaitDeliveries(t, releaseHandler, 2)
cancel()
require.ErrorIs(t, <-doneCh, context.Canceled)
require.GreaterOrEqual(t, int(attempts.Load()), 2)
stored, ok, err := h.offsets.Load(context.Background(), offsetLabel)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, entryID, stored)
}
func TestRunSkipsMalformedEntries(t *testing.T) {
h := newHarness(t)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
var dispatched atomic.Int32
called := make(chan struct{}, 4)
h.consumer.OnEvent(func(_ context.Context, _ ports.UserLifecycleEvent) error {
dispatched.Add(1)
called <- struct{}{}
return nil
})
doneCh := make(chan error, 1)
go func() { doneCh <- h.consumer.Run(ctx) }()
// Missing required user_id field.
require.NoError(t, h.client.XAdd(ctx, &redis.XAddArgs{
Stream: testStream,
Values: map[string]any{
"event_type": string(ports.UserLifecycleEventTypePermanentBlocked),
"occurred_at_ms": strconv.FormatInt(occurredAtMs, 10),
},
}).Err())
// Unknown event_type.
require.NoError(t, h.client.XAdd(ctx, &redis.XAddArgs{
Stream: testStream,
Values: map[string]any{
"event_type": "user.lifecycle.misnamed",
"user_id": defaultUserID,
"occurred_at_ms": strconv.FormatInt(occurredAtMs, 10),
},
}).Err())
// Valid event after the malformed ones.
validID := publishEvent(t, h, ports.UserLifecycleEventTypeDeleted, defaultUserID, nil)
awaitDeliveries(t, called, 1)
cancel()
require.ErrorIs(t, <-doneCh, context.Canceled)
assert.Equal(t, int32(1), dispatched.Load())
stored, ok, err := h.offsets.Load(context.Background(), offsetLabel)
require.NoError(t, err)
require.True(t, ok)
require.Equal(t, validID, stored)
}
func TestRunResumesFromPersistedOffset(t *testing.T) {
h := newHarness(t)
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
// Pre-publish a first event, then mark it as already processed via
// the offset store.
skippedID := publishEvent(t, h, ports.UserLifecycleEventTypePermanentBlocked, "user-skipped", nil)
h.offsets.Set(streamLabelKey, skippedID)
var (
mu sync.Mutex
seen []ports.UserLifecycleEvent
)
delivered := make(chan struct{}, 4)
h.consumer.OnEvent(func(_ context.Context, event ports.UserLifecycleEvent) error {
mu.Lock()
seen = append(seen, event)
mu.Unlock()
delivered <- struct{}{}
return nil
})
doneCh := make(chan error, 1)
go func() { doneCh <- h.consumer.Run(ctx) }()
wantID := publishEvent(t, h, ports.UserLifecycleEventTypeDeleted, "user-after", nil)
awaitDeliveries(t, delivered, 1)
cancel()
require.ErrorIs(t, <-doneCh, context.Canceled)
mu.Lock()
defer mu.Unlock()
require.Len(t, seen, 1)
require.Equal(t, "user-after", seen[0].UserID)
require.Equal(t, wantID, seen[0].EntryID)
}
func publishEvent(
t *testing.T,
h *harness,
eventType ports.UserLifecycleEventType,
userID string,
extra map[string]any,
) string {
t.Helper()
values := map[string]any{
"event_type": string(eventType),
"user_id": userID,
"occurred_at_ms": strconv.FormatInt(occurredAtMs, 10),
"source": "admin_internal_api",
"actor_type": "admin_user",
"reason_code": "policy_violation",
}
for key, value := range extra {
values[key] = value
}
id, err := h.client.XAdd(context.Background(), &redis.XAddArgs{
Stream: testStream,
Values: values,
}).Result()
require.NoError(t, err)
return id
}
func awaitDeliveries(t *testing.T, ch <-chan struct{}, count int) {
t.Helper()
deadline := time.After(2 * time.Second)
for i := 0; i < count; i++ {
select {
case <-ch:
case <-deadline:
t.Fatalf("timed out waiting for delivery %d/%d", i+1, count)
}
}
}
type assertErr struct{ message string }
func (e assertErr) Error() string { return e.message }