feat: backend service

This commit is contained in:
Ilia Denisov
2026-05-06 10:14:55 +03:00
committed by GitHub
parent 3e2622757e
commit f446c6a2ac
1486 changed files with 49720 additions and 266401 deletions
+107
View File
@@ -0,0 +1,107 @@
package notification
import (
"context"
"github.com/google/uuid"
)
// AdminListNotificationsPage bundles the pagination metadata returned to
// the admin API. The shape mirrors `mail.AdminListDeliveriesPage` so
// handlers stay symmetric.
type AdminListNotificationsPage struct {
Items []Notification
Page int
PageSize int
Total int64
}
// AdminListDeadLettersPage mirrors AdminListNotificationsPage for the
// dead-letter listing.
type AdminListDeadLettersPage struct {
Items []DeadLetter
Page int
PageSize int
Total int64
}
// AdminListMalformedPage mirrors AdminListNotificationsPage for the
// malformed-intent listing.
type AdminListMalformedPage struct {
Items []MalformedIntent
Page int
PageSize int
Total int64
}
// AdminListNotifications returns the notification page newest-first.
// page is 1-indexed; pageSize is bounded by normalisePaging.
func (s *Service) AdminListNotifications(ctx context.Context, page, pageSize int) (AdminListNotificationsPage, error) {
page, pageSize = normalisePaging(page, pageSize)
offset := (page - 1) * pageSize
res, err := s.deps.Store.ListNotifications(ctx, offset, pageSize)
if err != nil {
return AdminListNotificationsPage{}, err
}
return AdminListNotificationsPage{
Items: res.Items,
Page: page,
PageSize: pageSize,
Total: res.Total,
}, nil
}
// AdminGetNotification returns a single notification by id; the
// sentinel ErrNotificationNotFound surfaces a 404 in the handler
// layer.
func (s *Service) AdminGetNotification(ctx context.Context, id uuid.UUID) (Notification, error) {
return s.deps.Store.GetNotification(ctx, id)
}
// AdminListDeadLetters returns the dead-letter page newest-first.
func (s *Service) AdminListDeadLetters(ctx context.Context, page, pageSize int) (AdminListDeadLettersPage, error) {
page, pageSize = normalisePaging(page, pageSize)
offset := (page - 1) * pageSize
res, err := s.deps.Store.ListDeadLetters(ctx, offset, pageSize)
if err != nil {
return AdminListDeadLettersPage{}, err
}
return AdminListDeadLettersPage{
Items: res.Items,
Page: page,
PageSize: pageSize,
Total: res.Total,
}, nil
}
// AdminListMalformed returns the malformed-intent page newest-first.
func (s *Service) AdminListMalformed(ctx context.Context, page, pageSize int) (AdminListMalformedPage, error) {
page, pageSize = normalisePaging(page, pageSize)
offset := (page - 1) * pageSize
res, err := s.deps.Store.ListMalformed(ctx, offset, pageSize)
if err != nil {
return AdminListMalformedPage{}, err
}
return AdminListMalformedPage{
Items: res.Items,
Page: page,
PageSize: pageSize,
Total: res.Total,
}, nil
}
// normalisePaging clamps page and pageSize to the values handlers can
// safely pass through to the store. Defaults match the existing admin
// endpoints (`mail` package); pageSize is capped at 200.
func normalisePaging(page, pageSize int) (int, int) {
if page <= 0 {
page = 1
}
if pageSize <= 0 {
pageSize = 25
}
if pageSize > 200 {
pageSize = 200
}
return page, pageSize
}
+35
View File
@@ -0,0 +1,35 @@
package notification
import (
"context"
"github.com/google/uuid"
"go.uber.org/zap"
)
// OnUserDeleted is the user-side soft-delete cascade hook. It marks
// every pending or retrying route owned by userID as `skipped` so the
// worker stops trying to deliver to a vanished account; published
// rows stay intact as audit trail.
//
// The catalog (`backend/README.md` §10) does not include a
// `user.*` kind, so this is the only place where the notification
// module reacts to user lifecycle events directly. The cascade is
// idempotent — repeated invocations on the same user simply find no
// pending rows.
func (s *Service) OnUserDeleted(ctx context.Context, userID uuid.UUID) error {
if userID == uuid.Nil {
return nil
}
skipped, err := s.deps.Store.SkipPendingRoutesForUser(ctx, userID, s.nowUTC())
if err != nil {
return err
}
if skipped > 0 {
s.deps.Logger.Info("notification routes skipped on user delete",
zap.String("user_id", userID.String()),
zap.Int64("count", skipped),
)
}
return nil
}
+127
View File
@@ -0,0 +1,127 @@
package notification
// Kind constants name every supported notification kind. The implementation // trims the README §10 catalog to the set with active producers in
// the codebase; further kinds (`game.*`, `mail.dead_lettered`) require
// an additive change here together with a producer.
const (
KindLobbyInviteReceived = "lobby.invite.received"
KindLobbyInviteRevoked = "lobby.invite.revoked"
KindLobbyApplicationSubmitted = "lobby.application.submitted"
KindLobbyApplicationApproved = "lobby.application.approved"
KindLobbyApplicationRejected = "lobby.application.rejected"
KindLobbyMembershipRemoved = "lobby.membership.removed"
KindLobbyMembershipBlocked = "lobby.membership.blocked"
KindLobbyRaceNameRegistered = "lobby.race_name.registered"
KindLobbyRaceNamePending = "lobby.race_name.pending"
KindLobbyRaceNameExpired = "lobby.race_name.expired"
KindRuntimeImagePullFailed = "runtime.image_pull_failed"
KindRuntimeContainerStartFailed = "runtime.container_start_failed"
KindRuntimeStartConfigInvalid = "runtime.start_config_invalid"
)
// CatalogEntry describes the per-kind delivery policy: which channels
// fan out and whether the kind targets the platform admin recipient
// instead of per-user accounts.
type CatalogEntry struct {
// Channels lists the channels this kind fans out to, in the order
// rows are materialised in `notification_routes`. The closed set is
// {`push`, `email`}.
Channels []string
// Admin reports whether the email channel targets the configured
// admin recipient (`BACKEND_NOTIFICATION_ADMIN_EMAIL`) rather than
// per-user accounts. Admin-targeted kinds carry an empty Recipients
// slice on the producer side.
Admin bool
// MailTemplateID is the template_id passed to `mail.EnqueueTemplate`
// for email routes. The catalog uses the kind itself by convention,
// matching `mail.TemplateLoginCode`'s use of `auth.login_code`.
MailTemplateID string
}
// catalog maps each supported kind to its delivery policy. The map is
// queried by Submit and by the dispatcher worker; producers do not
// inspect it directly.
var catalog = map[string]CatalogEntry{
KindLobbyInviteReceived: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyInviteReceived,
},
KindLobbyInviteRevoked: {
Channels: []string{ChannelPush},
},
KindLobbyApplicationSubmitted: {
Channels: []string{ChannelPush},
},
KindLobbyApplicationApproved: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyApplicationApproved,
},
KindLobbyApplicationRejected: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyApplicationRejected,
},
KindLobbyMembershipRemoved: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyMembershipRemoved,
},
KindLobbyMembershipBlocked: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyMembershipBlocked,
},
KindLobbyRaceNameRegistered: {
Channels: []string{ChannelPush},
},
KindLobbyRaceNamePending: {
Channels: []string{ChannelPush, ChannelEmail},
MailTemplateID: KindLobbyRaceNamePending,
},
KindLobbyRaceNameExpired: {
Channels: []string{ChannelPush},
},
KindRuntimeImagePullFailed: {
Channels: []string{ChannelEmail},
Admin: true,
MailTemplateID: KindRuntimeImagePullFailed,
},
KindRuntimeContainerStartFailed: {
Channels: []string{ChannelEmail},
Admin: true,
MailTemplateID: KindRuntimeContainerStartFailed,
},
KindRuntimeStartConfigInvalid: {
Channels: []string{ChannelEmail},
Admin: true,
MailTemplateID: KindRuntimeStartConfigInvalid,
},
}
// LookupCatalog returns the per-kind policy and a boolean reporting
// whether the kind exists. Callers (Submit, Worker) branch on the
// boolean rather than receiving a sentinel error.
func LookupCatalog(kind string) (CatalogEntry, bool) {
entry, ok := catalog[kind]
return entry, ok
}
// SupportedKinds returns the closed kind set in deterministic order.
// The function exists to back tests and the migration CHECK constraint
// audit; it is not on the hot path.
func SupportedKinds() []string {
return []string{
KindLobbyInviteReceived,
KindLobbyInviteRevoked,
KindLobbyApplicationSubmitted,
KindLobbyApplicationApproved,
KindLobbyApplicationRejected,
KindLobbyMembershipRemoved,
KindLobbyMembershipBlocked,
KindLobbyRaceNameRegistered,
KindLobbyRaceNamePending,
KindLobbyRaceNameExpired,
KindRuntimeImagePullFailed,
KindRuntimeContainerStartFailed,
KindRuntimeStartConfigInvalid,
}
}
@@ -0,0 +1,77 @@
package notification
import (
"testing"
)
// TestCatalogClosure asserts that the SupportedKinds slice and the
// `catalog` map agree on the kind set. This catches dropped entries
// during catalog edits.
func TestCatalogClosure(t *testing.T) {
t.Parallel()
want := SupportedKinds()
if len(want) != len(catalog) {
t.Fatalf("supported kinds=%d, catalog entries=%d", len(want), len(catalog))
}
for _, k := range want {
if _, ok := catalog[k]; !ok {
t.Errorf("kind %q listed by SupportedKinds but missing from catalog", k)
}
}
}
// TestCatalogChannels enforces the per-kind channel set documented in
// `backend/README.md` §10. A drift here means the README and the code
// disagree — either fix the table or fix the test.
func TestCatalogChannels(t *testing.T) {
t.Parallel()
expect := map[string][]string{
KindLobbyInviteReceived: {ChannelPush, ChannelEmail},
KindLobbyInviteRevoked: {ChannelPush},
KindLobbyApplicationSubmitted: {ChannelPush},
KindLobbyApplicationApproved: {ChannelPush, ChannelEmail},
KindLobbyApplicationRejected: {ChannelPush, ChannelEmail},
KindLobbyMembershipRemoved: {ChannelPush, ChannelEmail},
KindLobbyMembershipBlocked: {ChannelPush, ChannelEmail},
KindLobbyRaceNameRegistered: {ChannelPush},
KindLobbyRaceNamePending: {ChannelPush, ChannelEmail},
KindLobbyRaceNameExpired: {ChannelPush},
KindRuntimeImagePullFailed: {ChannelEmail},
KindRuntimeContainerStartFailed: {ChannelEmail},
KindRuntimeStartConfigInvalid: {ChannelEmail},
}
for kind, want := range expect {
entry, ok := LookupCatalog(kind)
if !ok {
t.Errorf("kind %q missing from catalog", kind)
continue
}
if len(entry.Channels) != len(want) {
t.Errorf("kind %q channels=%v want %v", kind, entry.Channels, want)
continue
}
for i, ch := range want {
if entry.Channels[i] != ch {
t.Errorf("kind %q channels[%d]=%s want %s", kind, i, entry.Channels[i], ch)
}
}
}
}
// TestCatalogAdminOnlyForRuntime keeps the runtime kinds admin-only and
// every lobby kind user-facing.
func TestCatalogAdminOnlyForRuntime(t *testing.T) {
t.Parallel()
for kind, entry := range catalog {
switch kind {
case KindRuntimeImagePullFailed, KindRuntimeContainerStartFailed, KindRuntimeStartConfigInvalid:
if !entry.Admin {
t.Errorf("kind %q expected Admin=true", kind)
}
default:
if entry.Admin {
t.Errorf("kind %q expected Admin=false", kind)
}
}
}
}
+99
View File
@@ -0,0 +1,99 @@
package notification
import (
"context"
"time"
"galaxy/backend/internal/config"
"galaxy/backend/internal/user"
"github.com/google/uuid"
"go.uber.org/zap"
)
// PushPublisher is the publisher contract notification uses to emit a
// `client_event` push frame to gateway. The real implementation lives
// in `backend/internal/push` ; NewNoopPushPublisher satisfies
// the interface for tests that do not exercise push behaviour.
//
// Implementations must be concurrency-safe. The deviceSessionID pointer
// narrows the event to a single device session when non-nil; nil means
// fan out to every active session of userID. eventID, requestID and
// traceID are correlation identifiers that gateway forwards verbatim
// into the signed client envelope; empty strings are forwarded
// unchanged.
type PushPublisher interface {
PublishClientEvent(ctx context.Context, userID uuid.UUID, deviceSessionID *uuid.UUID, kind string, payload map[string]any, eventID, requestID, traceID string) error
}
// Mailer is the email surface notification uses for outbound mail. The
// canonical implementation is `*mail.Service.EnqueueTemplate`; tests
// substitute a recording fake. The contract matches mail's existing
// signature so the wiring layer can pass the concrete service directly.
type Mailer interface {
EnqueueTemplate(ctx context.Context, templateID, recipient string, payload map[string]any, idempotencyKey string) error
}
// AccountResolver looks up the recipient profile (email + preferred
// language) by user_id. The canonical implementation is
// `*user.Service.GetAccount`. The narrow interface keeps the Service
// from depending on every part of the user surface.
type AccountResolver interface {
GetAccount(ctx context.Context, userID uuid.UUID) (user.Account, error)
}
// Deps aggregates every collaborator the Service depends on.
//
// Store, Mail, and Accounts must be non-nil. Push defaults to the no-op
// publisher when omitted; Now defaults to time.Now; Logger defaults to
// zap.NewNop. Config carries the worker interval, the max-attempts cap,
// and the optional admin-email destination from `BACKEND_NOTIFICATION_*`.
type Deps struct {
Store *Store
Mail Mailer
Push PushPublisher
Accounts AccountResolver
Config config.NotificationConfig
// Now overrides time.Now for deterministic tests. A nil Now defaults
// to time.Now in NewService.
Now func() time.Time
// Logger is named under "notification" by NewService. Nil falls back
// to zap.NewNop.
Logger *zap.Logger
}
// NewNoopPushPublisher returns a PushPublisher that logs every event
// at debug level and returns nil. The canonical publisher lives in
// `backend/internal/push`; this constructor exists for tests.
func NewNoopPushPublisher(logger *zap.Logger) PushPublisher {
if logger == nil {
logger = zap.NewNop()
}
return &noopPushPublisher{logger: logger.Named("push.noop")}
}
type noopPushPublisher struct {
logger *zap.Logger
}
func (p *noopPushPublisher) PublishClientEvent(_ context.Context, userID uuid.UUID, deviceSessionID *uuid.UUID, kind string, payload map[string]any, eventID, requestID, traceID string) error {
fields := []zap.Field{
zap.String("user_id", userID.String()),
zap.String("kind", kind),
zap.Int("payload_keys", len(payload)),
}
if deviceSessionID != nil {
fields = append(fields, zap.String("device_session_id", deviceSessionID.String()))
}
if eventID != "" {
fields = append(fields, zap.String("event_id", eventID))
}
if requestID != "" {
fields = append(fields, zap.String("request_id", requestID))
}
if traceID != "" {
fields = append(fields, zap.String("trace_id", traceID))
}
p.logger.Debug("client event (noop publisher)", fields...)
return nil
}
+175
View File
@@ -0,0 +1,175 @@
package notification
import (
"context"
"database/sql"
"errors"
"fmt"
"math/rand/v2"
"time"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)
// traceIDFromContext returns the W3C trace id of the active span as a
// hex string, or an empty string when ctx carries no recording span.
// The id is forwarded to gateway as ClientEvent.trace_id so push
// envelopes can be correlated to the producing trace.
func traceIDFromContext(ctx context.Context) string {
if ctx == nil {
return ""
}
spanCtx := trace.SpanContextFromContext(ctx)
if !spanCtx.HasTraceID() {
return ""
}
return spanCtx.TraceID().String()
}
// finaliseDispatch records the outcome of a single delivery attempt
// inside tx. The status transition table mirrors README §10 and the
// `notification_routes`'s CHECK constraint:
//
// - success → published (next_attempt_at NULL)
// - failure with attempt < max → retrying (next_attempt_at armed)
// - failure with attempt >= max → dead_lettered (+ insert
// notification_dead_letters row)
//
// The function does not commit tx: the caller (worker / Submit best-
// effort) owns the transaction so it can compose the dispatch with the
// preceding ClaimDueRoutes lock.
func (s *Service) finaliseDispatch(ctx context.Context, tx *sql.Tx, claim ClaimedRoute, dispatchErr error, at time.Time) error {
if dispatchErr == nil {
return s.deps.Store.MarkRoutePublished(ctx, tx, claim.Route.RouteID, at)
}
attempt := claim.Route.Attempts + 1
reason := dispatchErr.Error()
maxAttempts := claim.Route.MaxAttempts
if maxAttempts <= 0 {
maxAttempts = int32(s.deps.Config.MaxAttempts)
}
if attempt >= maxAttempts {
s.deps.Logger.Warn("notification route dead-lettered",
zap.String("kind", claim.Notification.Kind),
zap.String("channel", claim.Route.Channel),
zap.String("route_id", claim.Route.RouteID.String()),
zap.Int32("attempt", attempt),
zap.Error(dispatchErr),
)
return s.deps.Store.MarkRouteDeadLettered(ctx, tx, claim.Notification.NotificationID, claim.Route.RouteID, at, reason)
}
nextAt := at.Add(routeBackoff(attempt))
s.deps.Logger.Info("notification route retry scheduled",
zap.String("kind", claim.Notification.Kind),
zap.String("channel", claim.Route.Channel),
zap.String("route_id", claim.Route.RouteID.String()),
zap.Int32("attempt", attempt),
zap.Time("next_attempt_at", nextAt),
zap.Error(dispatchErr),
)
return s.deps.Store.ScheduleRouteRetry(ctx, tx, claim.Route.RouteID, at, nextAt, reason)
}
// bestEffortDispatch is invoked from Submit immediately after a route
// is durably persisted. It opens its own short transaction, runs the
// channel call, and writes the outcome with the same Mark* helpers
// the worker uses. Failures here are logged at debug level — the
// worker will retry on the next tick, so the producer never sees the
// synchronous failure.
func (s *Service) bestEffortDispatch(ctx context.Context, n Notification, route Route) {
if route.Status != RouteStatusPending {
return
}
claim := ClaimedRoute{Route: route, Notification: n}
tx, err := s.deps.Store.BeginTx(ctx)
if err != nil {
s.deps.Logger.Debug("best-effort dispatch: begin tx failed",
zap.String("route_id", route.RouteID.String()),
zap.Error(err))
return
}
defer func() { _ = tx.Rollback() }()
dispatchErr := s.performDispatch(ctx, claim)
at := s.nowUTC()
if err := s.finaliseDispatch(ctx, tx, claim, dispatchErr, at); err != nil {
s.deps.Logger.Debug("best-effort dispatch finalise failed",
zap.String("route_id", route.RouteID.String()),
zap.Error(err))
return
}
if err := tx.Commit(); err != nil {
s.deps.Logger.Debug("best-effort dispatch commit failed",
zap.String("route_id", route.RouteID.String()),
zap.Error(err))
}
}
// performDispatch runs the channel-specific delivery. Returns nil on
// success and any error otherwise. The caller decides between retry
// and dead-letter based on the attempt counter and persisted state.
func (s *Service) performDispatch(ctx context.Context, claim ClaimedRoute) error {
if ctx.Err() != nil {
return ctx.Err()
}
switch claim.Route.Channel {
case ChannelPush:
if claim.Route.UserID == nil {
return errors.New("push route missing user_id")
}
eventID := claim.Route.RouteID.String()
requestID := claim.Notification.IdempotencyKey
traceID := traceIDFromContext(ctx)
return s.deps.Push.PublishClientEvent(ctx, *claim.Route.UserID, claim.Route.DeviceSessionID, claim.Notification.Kind, claim.Notification.Payload, eventID, requestID, traceID)
case ChannelEmail:
entry, ok := LookupCatalog(claim.Notification.Kind)
if !ok {
return fmt.Errorf("unknown kind %q", claim.Notification.Kind)
}
recipient := claim.Route.ResolvedEmail
if trimSpace(recipient) == "" {
return errors.New("email route missing resolved recipient")
}
// Use the route id as idempotency_key so the mail outbox
// UNIQUE(template_id, idempotency_key) catches a duplicate
// enqueue if the worker re-claims after a crash before
// commit. Producers should never need to know the route id.
return s.deps.Mail.EnqueueTemplate(ctx, entry.MailTemplateID, recipient, claim.Notification.Payload, claim.Route.RouteID.String())
default:
return fmt.Errorf("unknown channel %q", claim.Route.Channel)
}
}
// routeBackoff computes the per-attempt delay using the package
// constants and ±backoffJitter randomisation. attempt is 1-indexed
// (the value the row will carry after Mark*); attempt==1 maps to
// `backoffBase × backoffFactor⁰`.
func routeBackoff(attempt int32) time.Duration {
if attempt <= 1 {
return jitter(backoffBase)
}
d := float64(backoffBase)
for i := int32(1); i < attempt; i++ {
d *= backoffFactor
if time.Duration(d) >= backoffMax {
return jitter(backoffMax)
}
}
return jitter(time.Duration(d))
}
// jitter applies the package-standard ±backoffJitter swing using the
// new global v2 rand source.
func jitter(d time.Duration) time.Duration {
if backoffJitter <= 0 {
return d
}
span := float64(d) * backoffJitter
delta := (rand.Float64()*2 - 1) * span
out := time.Duration(float64(d) + delta)
if out < 0 {
return d
}
return out
}
@@ -0,0 +1,45 @@
package notification
import (
"testing"
"time"
)
// TestRouteBackoffMonotonic locks the documented schedule:
// attempt 1 == ~backoffBase, each subsequent attempt grows by
// backoffFactor up to backoffMax. The check uses the lower bound of
// the jitter window so the assertion is robust under random output.
func TestRouteBackoffMonotonic(t *testing.T) {
t.Parallel()
lower := func(d time.Duration) time.Duration {
return time.Duration(float64(d) * (1 - backoffJitter))
}
upper := func(d time.Duration) time.Duration {
return time.Duration(float64(d) * (1 + backoffJitter))
}
cases := []struct {
attempt int32
want time.Duration
}{
{attempt: 1, want: backoffBase},
{attempt: 2, want: time.Duration(float64(backoffBase) * backoffFactor)},
{attempt: 3, want: time.Duration(float64(backoffBase) * backoffFactor * backoffFactor)},
}
for _, tc := range cases {
got := routeBackoff(tc.attempt)
if got < lower(tc.want) || got > upper(tc.want) {
t.Fatalf("attempt=%d got=%s want ~%s (±%.0f%%)", tc.attempt, got, tc.want, backoffJitter*100)
}
}
}
// TestRouteBackoffCap asserts the schedule clamps at backoffMax.
func TestRouteBackoffCap(t *testing.T) {
t.Parallel()
upper := time.Duration(float64(backoffMax) * (1 + backoffJitter))
got := routeBackoff(50)
if got > upper {
t.Fatalf("attempt=50 got=%s exceeds cap (max=%s)", got, backoffMax)
}
}
+22
View File
@@ -0,0 +1,22 @@
package notification
import "errors"
// ErrNotificationNotFound is returned by AdminGetNotification when no
// row matches the supplied identifier. Handlers map it to HTTP 404.
var ErrNotificationNotFound = errors.New("notification: notification not found")
// ErrUnknownKind is returned by Submit when the intent's Kind is not in
// the catalog (`backend/README.md` §10). Submit does not surface it to
// the producer — it persists a malformed-intent record and returns nil.
// The exported sentinel exists so the package internals can branch on it.
var ErrUnknownKind = errors.New("notification: unknown kind")
// ErrEmptyIdempotencyKey is returned by Submit when the intent does not
// carry an idempotency_key. Same surface treatment as ErrUnknownKind.
var ErrEmptyIdempotencyKey = errors.New("notification: idempotency_key must be non-empty")
// ErrNoRecipients is returned by Submit when a kind that requires user
// recipients arrives without any. Same surface treatment as
// ErrUnknownKind.
var ErrNoRecipients = errors.New("notification: at least one recipient is required")
@@ -0,0 +1,35 @@
package notification
import (
"context"
"galaxy/backend/internal/lobby"
)
// LobbyAdapter returns an implementation of `lobby.NotificationPublisher`
// backed by *Service. The adapter copies the producer-side intent shape
// into notification.Intent and calls Submit; Submit's own malformed
// fallback handles invalid payloads, so the adapter never blocks the
// caller. The interface is the same one The wiring connects through the
// no-op publisher.
func (s *Service) LobbyAdapter() lobby.NotificationPublisher {
return &lobbyAdapter{svc: s}
}
type lobbyAdapter struct {
svc *Service
}
func (a *lobbyAdapter) PublishLobbyEvent(ctx context.Context, ev lobby.LobbyNotification) error {
if a == nil || a.svc == nil {
return nil
}
intent := Intent{
Kind: ev.Kind,
IdempotencyKey: ev.IdempotencyKey,
Recipients: ev.Recipients,
Payload: ev.Payload,
}
_, err := a.svc.Submit(ctx, intent)
return err
}
@@ -0,0 +1,117 @@
// Package notification implements the in-process notification pipeline
// described in `backend/PLAN.md` §5.7, `ARCHITECTURE.md` §12, and the
// catalog in `backend/README.md` §10. Producers (lobby, runtime) submit
// intents via Submit; the service persists each intent into
// `backend.notifications`, materialises one row per (recipient, channel)
// in `backend.notification_routes`, and attempts a synchronous best-effort
// dispatch. Failed routes are picked up by a background Worker that retries
// with exponential backoff and dead-letters past the configured maximum.
//
// Push routes are emitted via PushPublisher (the canonical
// `push.Service` over gRPC; the package also ships a
// NoopPushPublisher for tests). Email routes call
// mail.EnqueueTemplate, which feeds the durable mail outbox.
package notification
import (
"time"
"galaxy/backend/internal/config"
"go.uber.org/zap"
)
// Status values stored in `notification_routes.status`. Mirrored by the
// CHECK constraint in migration 00001.
const (
RouteStatusPending = "pending"
RouteStatusRetrying = "retrying"
RouteStatusPublished = "published"
RouteStatusSkipped = "skipped"
RouteStatusDeadLettered = "dead_lettered"
)
// Channel values stored in `notification_routes.channel`. The catalog in
// `backend/README.md` §10 documents the per-kind set.
const (
ChannelPush = "push"
ChannelEmail = "email"
)
// Backoff parameters for the route worker. Mirrors the trade-off captured
// for the mail outbox in `backend/README.md`: exponential
// growth from a 10 second base, capped at 10 minutes, with ±25% jitter.
const (
backoffBase = 10 * time.Second
backoffFactor = 2.0
backoffMax = 10 * time.Minute
backoffJitter = 0.25
// claimBatchSize bounds the number of routes pulled out of Postgres
// per worker tick. Same logic as `mail.claimBatchSize`: each row is
// processed in its own short transaction so a slow channel does not
// block its peers.
claimBatchSize = 16
)
// Service is the notification entry point. It composes the persistence
// store, the push and mail dispatchers, the account resolver used for
// recipient email lookups, runtime configuration, and a structured
// logger.
type Service struct {
deps Deps
}
// NewService constructs a Service from deps. Nil Logger defaults to
// zap.NewNop; nil Now defaults to time.Now. Store, Mail, and Accounts
// must be non-nil — calling Service methods with either nil panics on
// first use, matching how the rest of `internal/*` signals missing
// wiring. A nil Push defaults to the no-op publisher used by tests
// that do not exercise the gRPC stream.
func NewService(deps Deps) *Service {
if deps.Now == nil {
deps.Now = time.Now
}
if deps.Logger == nil {
deps.Logger = zap.NewNop()
}
deps.Logger = deps.Logger.Named("notification")
if deps.Push == nil {
deps.Push = NewNoopPushPublisher(deps.Logger)
}
return &Service{deps: deps}
}
// Config returns the runtime notification configuration. Worker uses it
// to schedule the scan loop and bound retries.
func (s *Service) Config() config.NotificationConfig {
return s.deps.Config
}
// Logger returns the package-named structured logger. Worker and the
// admin handlers reuse it so scoped fields stay consistent.
func (s *Service) Logger() *zap.Logger {
return s.deps.Logger
}
// now returns the package-configured clock; the helper keeps the rest
// of the code free from `if s.deps.Now == nil` checks.
func (s *Service) now() time.Time {
if s.deps.Now == nil {
return time.Now()
}
return s.deps.Now()
}
// nowUTC returns the configured clock normalised to UTC, matching the
// convention used by `time.Time` columns elsewhere in `backend`.
func (s *Service) nowUTC() time.Time {
return s.now().UTC()
}
// adminEmail returns the configured admin recipient address with
// surrounding whitespace removed; the empty string indicates no admin
// recipient is configured.
func (s *Service) adminEmail() string {
return trimSpace(s.deps.Config.AdminEmail)
}
@@ -0,0 +1,35 @@
package notification
import (
"context"
"galaxy/backend/internal/runtime"
)
// RuntimeAdapter returns an implementation of
// `runtime.NotificationPublisher` backed by *Service. The adapter
// translates runtime's narrow `(kind, idempotency_key, payload)` shape
// into a notification.Intent and calls Submit. Recipient resolution is
// handled by Submit's catalog lookup: every kind runtime emits is
// `Admin: true`, so the recipient comes from the configured
// `BACKEND_NOTIFICATION_ADMIN_EMAIL`.
func (s *Service) RuntimeAdapter() runtime.NotificationPublisher {
return &runtimeAdapter{svc: s}
}
type runtimeAdapter struct {
svc *Service
}
func (a *runtimeAdapter) PublishRuntimeEvent(ctx context.Context, kind, idempotencyKey string, payload map[string]any) error {
if a == nil || a.svc == nil {
return nil
}
intent := Intent{
Kind: kind,
IdempotencyKey: idempotencyKey,
Payload: payload,
}
_, err := a.svc.Submit(ctx, intent)
return err
}
+606
View File
@@ -0,0 +1,606 @@
package notification
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"galaxy/backend/internal/postgres/jet/backend/model"
"galaxy/backend/internal/postgres/jet/backend/table"
"github.com/go-jet/jet/v2/postgres"
"github.com/go-jet/jet/v2/qrm"
"github.com/google/uuid"
)
// Store is the Postgres-backed query surface for notifications,
// notification_routes, notification_dead_letters, and
// notification_malformed_intents. All queries are built through go-jet
// against the generated table bindings under
// `backend/internal/postgres/jet/backend/table`.
type Store struct {
db *sql.DB
}
// NewStore constructs a Store wrapping db.
func NewStore(db *sql.DB) *Store {
return &Store{db: db}
}
// BeginTx exposes the transaction handle to the worker so the
// claim-dispatch-mark cycle stays within a single commit boundary.
func (s *Store) BeginTx(ctx context.Context) (*sql.Tx, error) {
return s.db.BeginTx(ctx, nil)
}
// RouteSeed describes one freshly-materialised route destined for an
// `INSERT INTO notification_routes` inside InsertNotification.
type RouteSeed struct {
RouteID uuid.UUID
Channel string
Status string
MaxAttempts int32
NextAttemptAt *time.Time
ResolvedEmail string
ResolvedLocale string
UserID *uuid.UUID
DeviceSessionID *uuid.UUID
SkippedAt *time.Time
LastError string
}
// InsertNotificationArgs aggregates the inputs to InsertNotification.
type InsertNotificationArgs struct {
NotificationID uuid.UUID
Kind string
IdempotencyKey string
UserID *uuid.UUID
Payload map[string]any
Routes []RouteSeed
}
// InsertNotification persists a notification row together with its
// route rows in a single transaction. The (kind, idempotency_key)
// UNIQUE constraint serves the idempotency contract: the second
// caller observes inserted=false and the existing notification_id is
// returned. On the duplicate path no route rows are inserted and the
// transaction rolls back so an orphan notification cannot exist.
func (s *Store) InsertNotification(ctx context.Context, args InsertNotificationArgs) (uuid.UUID, bool, error) {
payload, err := encodePayload(args.Payload)
if err != nil {
return uuid.Nil, false, fmt.Errorf("encode payload: %w", err)
}
var (
storedID uuid.UUID
inserted bool
)
err = withTx(ctx, s.db, func(tx *sql.Tx) error {
insertStmt := table.Notifications.INSERT(
table.Notifications.NotificationID,
table.Notifications.Kind,
table.Notifications.IdempotencyKey,
table.Notifications.UserID,
table.Notifications.Payload,
).VALUES(
args.NotificationID, args.Kind, args.IdempotencyKey, args.UserID, string(payload),
).
ON_CONFLICT(table.Notifications.Kind, table.Notifications.IdempotencyKey).
DO_NOTHING().
RETURNING(table.Notifications.NotificationID)
var freshRow model.Notifications
err := insertStmt.QueryContext(ctx, tx, &freshRow)
switch {
case errors.Is(err, qrm.ErrNoRows):
// Idempotent re-submit. Look up the existing row id and bail.
lookupStmt := postgres.SELECT(table.Notifications.NotificationID).
FROM(table.Notifications).
WHERE(
table.Notifications.Kind.EQ(postgres.String(args.Kind)).
AND(table.Notifications.IdempotencyKey.EQ(postgres.String(args.IdempotencyKey))),
).
LIMIT(1)
var existing model.Notifications
if scanErr := lookupStmt.QueryContext(ctx, tx, &existing); scanErr != nil {
return fmt.Errorf("lookup existing notification: %w", scanErr)
}
storedID = existing.NotificationID
return errIdempotentNoop
case err != nil:
return fmt.Errorf("insert notification: %w", err)
}
storedID = freshRow.NotificationID
inserted = true
for _, r := range args.Routes {
routeStmt := table.NotificationRoutes.INSERT(
table.NotificationRoutes.RouteID,
table.NotificationRoutes.NotificationID,
table.NotificationRoutes.Channel,
table.NotificationRoutes.Status,
table.NotificationRoutes.MaxAttempts,
table.NotificationRoutes.NextAttemptAt,
table.NotificationRoutes.ResolvedEmail,
table.NotificationRoutes.ResolvedLocale,
table.NotificationRoutes.LastError,
table.NotificationRoutes.SkippedAt,
).VALUES(
r.RouteID, args.NotificationID, r.Channel, r.Status,
r.MaxAttempts, r.NextAttemptAt,
r.ResolvedEmail, r.ResolvedLocale, r.LastError,
r.SkippedAt,
)
if _, err := routeStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("insert route %s: %w", r.RouteID, err)
}
}
return nil
})
if errors.Is(err, errIdempotentNoop) {
return storedID, false, nil
}
if err != nil {
return uuid.Nil, false, err
}
return storedID, inserted, nil
}
// errIdempotentNoop tells withTx to roll back the transaction without
// surfacing an error to the caller. It must never escape this package.
var errIdempotentNoop = errors.New("notification store: idempotent noop")
// MarkRoutePublished flips a route to status='published', clears the
// retry schedule, stamps published_at and last_attempt_at, and clears
// last_error.
func (s *Store) MarkRoutePublished(ctx context.Context, tx *sql.Tx, routeID uuid.UUID, at time.Time) error {
r := table.NotificationRoutes
stmt := r.UPDATE().
SET(
r.Status.SET(postgres.String(RouteStatusPublished)),
r.Attempts.SET(r.Attempts.ADD(postgres.Int(1))),
r.LastAttemptAt.SET(postgres.TimestampzT(at)),
r.PublishedAt.SET(postgres.TimestampzT(at)),
r.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
r.LastError.SET(postgres.String("")),
r.UpdatedAt.SET(postgres.TimestampzT(at)),
).
WHERE(r.RouteID.EQ(postgres.UUID(routeID)))
if _, err := stmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("mark route published: %w", err)
}
return nil
}
// ScheduleRouteRetry flips a route to status='retrying', bumps
// attempts, arms next_attempt_at, and stamps the diagnostic message.
func (s *Store) ScheduleRouteRetry(ctx context.Context, tx *sql.Tx, routeID uuid.UUID, at time.Time, nextAt time.Time, errMsg string) error {
r := table.NotificationRoutes
stmt := r.UPDATE().
SET(
r.Status.SET(postgres.String(RouteStatusRetrying)),
r.Attempts.SET(r.Attempts.ADD(postgres.Int(1))),
r.LastAttemptAt.SET(postgres.TimestampzT(at)),
r.NextAttemptAt.SET(postgres.TimestampzT(nextAt)),
r.LastError.SET(postgres.String(errMsg)),
r.UpdatedAt.SET(postgres.TimestampzT(at)),
).
WHERE(r.RouteID.EQ(postgres.UUID(routeID)))
if _, err := stmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("schedule route retry: %w", err)
}
return nil
}
// MarkRouteDeadLettered moves the route to the terminal `dead_lettered`
// state and inserts a notification_dead_letters row under the same
// transaction.
func (s *Store) MarkRouteDeadLettered(ctx context.Context, tx *sql.Tx, notificationID, routeID uuid.UUID, at time.Time, reason string) error {
r := table.NotificationRoutes
updateStmt := r.UPDATE().
SET(
r.Status.SET(postgres.String(RouteStatusDeadLettered)),
r.Attempts.SET(r.Attempts.ADD(postgres.Int(1))),
r.LastAttemptAt.SET(postgres.TimestampzT(at)),
r.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
r.DeadLetteredAt.SET(postgres.TimestampzT(at)),
r.LastError.SET(postgres.String(reason)),
r.UpdatedAt.SET(postgres.TimestampzT(at)),
).
WHERE(r.RouteID.EQ(postgres.UUID(routeID)))
if _, err := updateStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("mark route dead-lettered: %w", err)
}
dl := table.NotificationDeadLetters
insertStmt := dl.INSERT(
dl.DeadLetterID, dl.NotificationID, dl.RouteID, dl.ArchivedAt, dl.Reason,
).VALUES(uuid.New(), notificationID, routeID, at, reason)
if _, err := insertStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("insert notification dead-letter: %w", err)
}
return nil
}
// ClaimedRoute bundles a locked route row with its parent notification
// so the worker has every field it needs in one trip.
type ClaimedRoute struct {
Route Route
Notification Notification
}
// ClaimDueRoutes locks up to `limit` due routes with FOR UPDATE SKIP
// LOCKED, joins the parent notification to surface kind/payload, and
// returns them. exclude is the list of route_ids already handled in
// the current tick — they are filtered out so the same row cannot
// chew through MaxAttempts inside a single tick when its retry
// schedule lands at <= now().
func (s *Store) ClaimDueRoutes(ctx context.Context, tx *sql.Tx, limit int, exclude ...uuid.UUID) ([]ClaimedRoute, error) {
r := table.NotificationRoutes
n := table.Notifications
condition := r.Status.IN(postgres.String(RouteStatusPending), postgres.String(RouteStatusRetrying)).
AND(r.NextAttemptAt.IS_NULL().OR(r.NextAttemptAt.LT_EQ(postgres.NOW())))
if len(exclude) > 0 {
excludeExprs := make([]postgres.Expression, 0, len(exclude))
for _, id := range exclude {
excludeExprs = append(excludeExprs, postgres.UUID(id))
}
condition = condition.AND(r.RouteID.NOT_IN(excludeExprs...))
}
stmt := postgres.SELECT(
r.AllColumns,
n.Kind, n.IdempotencyKey, n.UserID, n.Payload, n.CreatedAt,
).
FROM(r.INNER_JOIN(n, n.NotificationID.EQ(r.NotificationID))).
WHERE(condition).
ORDER_BY(postgres.COALESCE(r.NextAttemptAt, r.CreatedAt).ASC()).
LIMIT(int64(limit)).
FOR(postgres.UPDATE().OF(r).SKIP_LOCKED())
var rows []struct {
model.NotificationRoutes
Notifications struct {
Kind string
IdempotencyKey string
UserID *uuid.UUID
Payload *string
CreatedAt time.Time
}
}
if err := stmt.QueryContext(ctx, tx, &rows); err != nil {
return nil, fmt.Errorf("claim due routes: %w", err)
}
out := make([]ClaimedRoute, 0, len(rows))
for _, row := range rows {
route := modelToRoute(row.NotificationRoutes)
route.UserID = row.Notifications.UserID
notif := Notification{
NotificationID: row.NotificationRoutes.NotificationID,
Kind: row.Notifications.Kind,
IdempotencyKey: row.Notifications.IdempotencyKey,
UserID: row.Notifications.UserID,
CreatedAt: row.Notifications.CreatedAt,
}
decoded, err := decodePayload(payloadBytesFromPtr(row.Notifications.Payload))
if err != nil {
return nil, fmt.Errorf("decode notification payload: %w", err)
}
notif.Payload = decoded
out = append(out, ClaimedRoute{Route: route, Notification: notif})
}
return out, nil
}
// ListNotificationsResult bundles a page of notifications and the
// total-row count. Layout mirrors `mail.AdminListDeliveriesPage`.
type ListNotificationsResult struct {
Items []Notification
Total int64
}
// ListNotifications returns the page newest-first.
func (s *Store) ListNotifications(ctx context.Context, offset, limit int) (ListNotificationsResult, error) {
total, err := countAll(ctx, s.db, table.Notifications)
if err != nil {
return ListNotificationsResult{}, fmt.Errorf("count notifications: %w", err)
}
n := table.Notifications
stmt := postgres.SELECT(
n.NotificationID, n.Kind, n.IdempotencyKey, n.UserID,
n.Payload, n.CreatedAt,
).
FROM(n).
ORDER_BY(n.CreatedAt.DESC(), n.NotificationID.DESC()).
LIMIT(int64(limit)).OFFSET(int64(offset))
var rows []model.Notifications
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return ListNotificationsResult{}, fmt.Errorf("list notifications: %w", err)
}
items := make([]Notification, 0, len(rows))
for _, row := range rows {
notif, err := modelToNotification(row)
if err != nil {
return ListNotificationsResult{}, err
}
items = append(items, notif)
}
return ListNotificationsResult{Items: items, Total: total}, nil
}
// GetNotification loads a notification by primary key. The sentinel
// ErrNotificationNotFound is returned when no row matches.
func (s *Store) GetNotification(ctx context.Context, id uuid.UUID) (Notification, error) {
n := table.Notifications
stmt := postgres.SELECT(
n.NotificationID, n.Kind, n.IdempotencyKey, n.UserID,
n.Payload, n.CreatedAt,
).
FROM(n).
WHERE(n.NotificationID.EQ(postgres.UUID(id))).
LIMIT(1)
var row model.Notifications
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return Notification{}, ErrNotificationNotFound
}
return Notification{}, fmt.Errorf("get notification: %w", err)
}
return modelToNotification(row)
}
// ListDeadLettersResult bundles a page of dead-letters and the total
// row count.
type ListDeadLettersResult struct {
Items []DeadLetter
Total int64
}
// ListDeadLetters returns the dead-letter page newest-first.
func (s *Store) ListDeadLetters(ctx context.Context, offset, limit int) (ListDeadLettersResult, error) {
total, err := countAll(ctx, s.db, table.NotificationDeadLetters)
if err != nil {
return ListDeadLettersResult{}, fmt.Errorf("count dead-letters: %w", err)
}
dl := table.NotificationDeadLetters
stmt := postgres.SELECT(
dl.DeadLetterID, dl.NotificationID, dl.RouteID, dl.ArchivedAt, dl.Reason,
).
FROM(dl).
ORDER_BY(dl.ArchivedAt.DESC(), dl.DeadLetterID.DESC()).
LIMIT(int64(limit)).OFFSET(int64(offset))
var rows []model.NotificationDeadLetters
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return ListDeadLettersResult{}, fmt.Errorf("list dead-letters: %w", err)
}
items := make([]DeadLetter, 0, len(rows))
for _, row := range rows {
items = append(items, DeadLetter{
DeadLetterID: row.DeadLetterID,
NotificationID: row.NotificationID,
RouteID: row.RouteID,
ArchivedAt: row.ArchivedAt,
Reason: row.Reason,
})
}
return ListDeadLettersResult{Items: items, Total: total}, nil
}
// ListMalformedResult bundles a page of malformed intents and the
// total row count.
type ListMalformedResult struct {
Items []MalformedIntent
Total int64
}
// ListMalformed returns the malformed page newest-first.
func (s *Store) ListMalformed(ctx context.Context, offset, limit int) (ListMalformedResult, error) {
total, err := countAll(ctx, s.db, table.NotificationMalformedIntents)
if err != nil {
return ListMalformedResult{}, fmt.Errorf("count malformed intents: %w", err)
}
m := table.NotificationMalformedIntents
stmt := postgres.SELECT(m.ID, m.ReceivedAt, m.Payload, m.Reason).
FROM(m).
ORDER_BY(m.ReceivedAt.DESC(), m.ID.DESC()).
LIMIT(int64(limit)).OFFSET(int64(offset))
var rows []model.NotificationMalformedIntents
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return ListMalformedResult{}, fmt.Errorf("list malformed intents: %w", err)
}
items := make([]MalformedIntent, 0, len(rows))
for _, row := range rows {
decoded, err := decodePayload([]byte(row.Payload))
if err != nil {
return ListMalformedResult{}, fmt.Errorf("decode malformed payload: %w", err)
}
items = append(items, MalformedIntent{
ID: row.ID,
ReceivedAt: row.ReceivedAt,
Payload: decoded,
Reason: row.Reason,
})
}
return ListMalformedResult{Items: items, Total: total}, nil
}
// InsertMalformed records a producer-supplied intent that failed
// validation. The payload is best-effort JSON-encoded by the caller;
// the row never blocks the producer.
func (s *Store) InsertMalformed(ctx context.Context, payload map[string]any, reason string) error {
encoded, err := encodePayload(payload)
if err != nil {
return fmt.Errorf("encode malformed payload: %w", err)
}
m := table.NotificationMalformedIntents
stmt := m.INSERT(m.ID, m.Payload, m.Reason).
VALUES(uuid.New(), string(encoded), reason)
if _, err := stmt.ExecContext(ctx, s.db); err != nil {
return fmt.Errorf("insert malformed intent: %w", err)
}
return nil
}
// SkipPendingRoutesForUser flips every pending or retrying route owned
// by userID to status='skipped'. The `OnUserDeleted` cascade calls it so
// the worker stops trying to deliver notifications to a vanished
// account; published rows are kept as audit trail.
func (s *Store) SkipPendingRoutesForUser(ctx context.Context, userID uuid.UUID, at time.Time) (int64, error) {
r := table.NotificationRoutes
n := table.Notifications
notifSubquery := postgres.SELECT(n.NotificationID).
FROM(n).
WHERE(n.UserID.EQ(postgres.UUID(userID)))
stmt := r.UPDATE().
SET(
r.Status.SET(postgres.String(RouteStatusSkipped)),
r.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
r.SkippedAt.SET(postgres.TimestampzT(at)),
r.UpdatedAt.SET(postgres.TimestampzT(at)),
r.LastError.SET(postgres.String("recipient soft-deleted")),
).
WHERE(
r.Status.IN(postgres.String(RouteStatusPending), postgres.String(RouteStatusRetrying)).
AND(r.NotificationID.IN(notifSubquery)),
)
res, err := stmt.ExecContext(ctx, s.db)
if err != nil {
return 0, fmt.Errorf("skip pending routes: %w", err)
}
affected, err := res.RowsAffected()
if err != nil {
return 0, fmt.Errorf("rows affected: %w", err)
}
return affected, nil
}
// withTx wraps fn in a Postgres transaction. fn's return value
// determines commit (nil) vs rollback (non-nil). Rollback errors are
// swallowed when fn already returned an error, since the latter is
// more actionable.
func withTx(ctx context.Context, db *sql.DB, fn func(tx *sql.Tx) error) error {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("notification store: begin tx: %w", err)
}
if err := fn(tx); err != nil {
_ = tx.Rollback()
return err
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("notification store: commit tx: %w", err)
}
return nil
}
// modelToRoute projects a generated model row onto the public Route
// struct (without the user-id which lives on the parent notification).
func modelToRoute(row model.NotificationRoutes) Route {
r := Route{
RouteID: row.RouteID,
NotificationID: row.NotificationID,
Channel: row.Channel,
Status: row.Status,
Attempts: row.Attempts,
MaxAttempts: row.MaxAttempts,
LastError: row.LastError,
ResolvedEmail: row.ResolvedEmail,
ResolvedLocale: row.ResolvedLocale,
CreatedAt: row.CreatedAt,
UpdatedAt: row.UpdatedAt,
}
if row.NextAttemptAt != nil {
t := *row.NextAttemptAt
r.NextAttemptAt = &t
}
if row.LastAttemptAt != nil {
t := *row.LastAttemptAt
r.LastAttemptAt = &t
}
if row.PublishedAt != nil {
t := *row.PublishedAt
r.PublishedAt = &t
}
if row.DeadLetteredAt != nil {
t := *row.DeadLetteredAt
r.DeadLetteredAt = &t
}
if row.SkippedAt != nil {
t := *row.SkippedAt
r.SkippedAt = &t
}
return r
}
// modelToNotification decodes a generated model row into the public
// Notification struct, including the JSON payload.
func modelToNotification(row model.Notifications) (Notification, error) {
decoded, err := decodePayload(payloadBytesFromPtr(row.Payload))
if err != nil {
return Notification{}, fmt.Errorf("decode payload: %w", err)
}
return Notification{
NotificationID: row.NotificationID,
Kind: row.Kind,
IdempotencyKey: row.IdempotencyKey,
UserID: row.UserID,
Payload: decoded,
CreatedAt: row.CreatedAt,
}, nil
}
// payloadBytesFromPtr converts the nullable string from the generated
// jsonb-as-text model into the byte slice expected by decodePayload.
func payloadBytesFromPtr(p *string) []byte {
if p == nil {
return nil
}
return []byte(*p)
}
// encodePayload renders a map[string]any to JSON for storage in
// jsonb columns. A nil map encodes as JSON null; this is harmless on
// the read path because decodePayload returns nil for it.
func encodePayload(payload map[string]any) ([]byte, error) {
if payload == nil {
return []byte("null"), nil
}
return json.Marshal(payload)
}
// decodePayload parses a jsonb column back into the producer's map.
// A NULL or empty buffer round-trips to nil.
func decodePayload(buf []byte) (map[string]any, error) {
if len(buf) == 0 || strings.EqualFold(strings.TrimSpace(string(buf)), "null") {
return nil, nil
}
out := map[string]any{}
if err := json.Unmarshal(buf, &out); err != nil {
return nil, err
}
return out, nil
}
// countAll runs `SELECT COUNT(*) FROM <tbl>` through jet and returns
// the result. The destination uses an alias-tagged scalar so QRM can
// map the un-prefixed alias produced by AS("count").
func countAll(ctx context.Context, db qrm.DB, tbl postgres.ReadableTable) (int64, error) {
stmt := postgres.SELECT(postgres.COUNT(postgres.STAR).AS("count")).FROM(tbl)
var dest struct {
Count int64 `alias:"count"`
}
if err := stmt.QueryContext(ctx, db, &dest); err != nil {
return 0, err
}
return dest.Count, nil
}
+258
View File
@@ -0,0 +1,258 @@
package notification
import (
"context"
"errors"
"fmt"
"time"
"galaxy/backend/internal/user"
"github.com/google/uuid"
"go.uber.org/zap"
)
// Submit accepts a producer intent, validates it against the catalog,
// resolves recipients, materialises route rows, persists everything in
// one transaction, and best-effort dispatches the routes synchronously.
//
// The contract: producers never block on Submit, and Submit never
// surfaces a validation failure as an error — malformed intents go to
// `notification_malformed_intents` and the call returns nil. Real
// errors (encoder failure, Postgres trouble) are wrapped and returned.
//
// On idempotent re-submit (same kind + idempotency_key) the existing
// notification id is honoured and route materialisation is skipped.
func (s *Service) Submit(ctx context.Context, intent Intent) (uuid.UUID, error) {
entry, ok := LookupCatalog(intent.Kind)
if !ok {
s.recordMalformed(ctx, intent, ErrUnknownKind.Error())
return uuid.Nil, nil
}
if trimSpace(intent.IdempotencyKey) == "" {
s.recordMalformed(ctx, intent, ErrEmptyIdempotencyKey.Error())
return uuid.Nil, nil
}
if !entry.Admin && len(intent.Recipients) == 0 {
s.recordMalformed(ctx, intent, ErrNoRecipients.Error())
return uuid.Nil, nil
}
now := s.nowUTC()
notificationID := uuid.New()
var primaryUserID *uuid.UUID
if !entry.Admin && len(intent.Recipients) == 1 {
uid := intent.Recipients[0]
primaryUserID = &uid
}
routes, err := s.materialiseRoutes(ctx, notificationID, entry, intent, now)
if err != nil {
return uuid.Nil, err
}
storedID, inserted, err := s.deps.Store.InsertNotification(ctx, InsertNotificationArgs{
NotificationID: notificationID,
Kind: intent.Kind,
IdempotencyKey: intent.IdempotencyKey,
UserID: primaryUserID,
Payload: intent.Payload,
Routes: routes,
})
if err != nil {
return uuid.Nil, fmt.Errorf("notification submit: %w", err)
}
if !inserted {
s.deps.Logger.Debug("idempotent submit, returning existing notification",
zap.String("kind", intent.Kind),
zap.String("idempotency_key", intent.IdempotencyKey),
zap.String("notification_id", storedID.String()),
)
return storedID, nil
}
// Best-effort synchronous dispatch: any pending route gets a single
// attempt right now. Failures stay on the row for the worker to
// retry; they are not surfaced to producers.
for i := range routes {
if routes[i].Status != RouteStatusPending {
continue
}
s.bestEffortDispatch(ctx, Notification{
NotificationID: notificationID,
Kind: intent.Kind,
IdempotencyKey: intent.IdempotencyKey,
UserID: primaryUserID,
Payload: intent.Payload,
CreatedAt: now,
}, routeFromSeed(notificationID, routes[i], now))
}
return notificationID, nil
}
// materialiseRoutes builds the per-(recipient, channel) seeds that
// land in `notification_routes`. The function performs recipient
// resolution and the catalog-aware channel fan-out. Each seed already
// carries its terminal status (`pending` for live routes, `skipped`
// for cases where the destination cannot be resolved).
func (s *Service) materialiseRoutes(ctx context.Context, notificationID uuid.UUID, entry CatalogEntry, intent Intent, now time.Time) ([]RouteSeed, error) {
_ = notificationID
maxAttempts := int32(s.deps.Config.MaxAttempts)
if maxAttempts <= 0 {
maxAttempts = 1
}
pendingNext := timePtr(now.UTC())
if entry.Admin {
// Admin-channel kinds: one row per channel, no per-user fan-out.
seeds := make([]RouteSeed, 0, len(entry.Channels))
for _, ch := range entry.Channels {
seed := RouteSeed{
RouteID: uuid.New(),
Channel: ch,
Status: RouteStatusPending,
MaxAttempts: maxAttempts,
NextAttemptAt: pendingNext,
}
if ch == ChannelEmail {
seed.ResolvedEmail = s.adminEmail()
if seed.ResolvedEmail == "" {
seed.Status = RouteStatusSkipped
seed.NextAttemptAt = nil
seed.SkippedAt = timePtr(now.UTC())
seed.LastError = "BACKEND_NOTIFICATION_ADMIN_EMAIL not configured"
s.deps.Logger.Warn("admin notification skipped: admin email not configured",
zap.String("kind", intent.Kind),
zap.String("idempotency_key", intent.IdempotencyKey),
)
}
}
seeds = append(seeds, seed)
}
return seeds, nil
}
// Per-user kinds: fan out across (recipient × channel).
seeds := make([]RouteSeed, 0, len(intent.Recipients)*len(entry.Channels))
for _, userID := range intent.Recipients {
uid := userID
account, err := s.resolveAccount(ctx, userID)
for _, ch := range entry.Channels {
seed := RouteSeed{
RouteID: uuid.New(),
Channel: ch,
Status: RouteStatusPending,
MaxAttempts: maxAttempts,
NextAttemptAt: pendingNext,
UserID: &uid,
DeviceSessionID: intent.DeviceSessionID,
}
switch ch {
case ChannelEmail:
if err != nil {
seed.Status = RouteStatusSkipped
seed.NextAttemptAt = nil
seed.SkippedAt = timePtr(now.UTC())
seed.LastError = err.Error()
} else {
seed.ResolvedEmail = account.Email
seed.ResolvedLocale = account.PreferredLanguage
if trimSpace(seed.ResolvedEmail) == "" {
seed.Status = RouteStatusSkipped
seed.NextAttemptAt = nil
seed.SkippedAt = timePtr(now.UTC())
seed.LastError = "recipient has no email on file"
}
}
case ChannelPush:
if err != nil {
seed.Status = RouteStatusSkipped
seed.NextAttemptAt = nil
seed.SkippedAt = timePtr(now.UTC())
seed.LastError = err.Error()
} else if account.PreferredLanguage != "" {
seed.ResolvedLocale = account.PreferredLanguage
}
}
seeds = append(seeds, seed)
}
}
return seeds, nil
}
// resolveAccount fetches the recipient profile through the configured
// AccountResolver. user.ErrAccountNotFound is mapped to a sentinel-free
// error string so the route is skipped without a stack-trace log.
func (s *Service) resolveAccount(ctx context.Context, userID uuid.UUID) (user.Account, error) {
account, err := s.deps.Accounts.GetAccount(ctx, userID)
if err != nil {
if errors.Is(err, user.ErrAccountNotFound) {
return user.Account{}, errors.New("recipient account not found")
}
return user.Account{}, fmt.Errorf("resolve recipient %s: %w", userID, err)
}
if account.DeletedAt != nil {
return user.Account{}, errors.New("recipient account soft-deleted")
}
return account, nil
}
// recordMalformed best-effort persists an invalid intent. Logger is
// informational; a Postgres failure here is logged but never bubbles
// up to the producer, matching the README §10 contract.
func (s *Service) recordMalformed(ctx context.Context, intent Intent, reason string) {
payload := map[string]any{
"kind": intent.Kind,
"idempotency_key": intent.IdempotencyKey,
}
if len(intent.Payload) > 0 {
payload["payload"] = intent.Payload
}
if len(intent.Recipients) > 0 {
recipients := make([]string, 0, len(intent.Recipients))
for _, r := range intent.Recipients {
recipients = append(recipients, r.String())
}
payload["recipients"] = recipients
}
if intent.DeviceSessionID != nil {
payload["device_session_id"] = intent.DeviceSessionID.String()
}
if err := s.deps.Store.InsertMalformed(ctx, payload, reason); err != nil {
s.deps.Logger.Warn("failed to persist malformed notification intent",
zap.String("kind", intent.Kind),
zap.String("reason", reason),
zap.Error(err),
)
return
}
s.deps.Logger.Info("notification intent dropped as malformed",
zap.String("kind", intent.Kind),
zap.String("reason", reason),
)
}
// routeFromSeed converts a RouteSeed (the pre-insert snapshot the
// dispatcher needs) to a Route value the worker / dispatcher exchange
// after the row is durably persisted.
func routeFromSeed(notificationID uuid.UUID, seed RouteSeed, now time.Time) Route {
r := Route{
RouteID: seed.RouteID,
NotificationID: notificationID,
Channel: seed.Channel,
Status: seed.Status,
Attempts: 0,
MaxAttempts: seed.MaxAttempts,
NextAttemptAt: seed.NextAttemptAt,
ResolvedEmail: seed.ResolvedEmail,
ResolvedLocale: seed.ResolvedLocale,
UserID: seed.UserID,
DeviceSessionID: seed.DeviceSessionID,
CreatedAt: now.UTC(),
UpdatedAt: now.UTC(),
SkippedAt: seed.SkippedAt,
LastError: seed.LastError,
}
return r
}
@@ -0,0 +1,458 @@
package notification_test
import (
"context"
"database/sql"
"errors"
"net/url"
"sync"
"testing"
"time"
"galaxy/backend/internal/config"
"galaxy/backend/internal/notification"
backendpg "galaxy/backend/internal/postgres"
"galaxy/backend/internal/user"
pgshared "galaxy/postgres"
"github.com/google/uuid"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
"go.uber.org/zap/zaptest"
)
const (
pgImage = "postgres:16-alpine"
pgUser = "galaxy"
pgPassword = "galaxy"
pgDatabase = "galaxy_backend"
pgSchema = "backend"
pgStartup = 90 * time.Second
pgOpTO = 10 * time.Second
)
// startPostgres mirrors the mail/auth scaffolding: spin up Postgres,
// apply migrations, return *sql.DB.
func startPostgres(t *testing.T) *sql.DB {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
t.Cleanup(cancel)
pgContainer, err := tcpostgres.Run(ctx, pgImage,
tcpostgres.WithDatabase(pgDatabase),
tcpostgres.WithUsername(pgUser),
tcpostgres.WithPassword(pgPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(pgStartup),
),
)
if err != nil {
t.Skipf("postgres testcontainer unavailable, skipping: %v", err)
}
t.Cleanup(func() {
if termErr := testcontainers.TerminateContainer(pgContainer); termErr != nil {
t.Errorf("terminate postgres container: %v", termErr)
}
})
baseDSN, err := pgContainer.ConnectionString(ctx, "sslmode=disable")
if err != nil {
t.Fatalf("connection string: %v", err)
}
scoped, err := dsnWithSearchPath(baseDSN, pgSchema)
if err != nil {
t.Fatalf("scope dsn: %v", err)
}
cfg := pgshared.DefaultConfig()
cfg.PrimaryDSN = scoped
cfg.OperationTimeout = pgOpTO
db, err := pgshared.OpenPrimary(ctx, cfg)
if err != nil {
t.Fatalf("open primary: %v", err)
}
t.Cleanup(func() { _ = db.Close() })
if err := pgshared.Ping(ctx, db, cfg.OperationTimeout); err != nil {
t.Fatalf("ping: %v", err)
}
if err := backendpg.ApplyMigrations(ctx, db); err != nil {
t.Fatalf("apply migrations: %v", err)
}
return db
}
func dsnWithSearchPath(baseDSN, schema string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := parsed.Query()
values.Set("search_path", schema)
if values.Get("sslmode") == "" {
values.Set("sslmode", "disable")
}
parsed.RawQuery = values.Encode()
return parsed.String(), nil
}
// recordingMailer captures every EnqueueTemplate call.
type recordingMailer struct {
mu sync.Mutex
calls []recordedEnqueue
err error
}
type recordedEnqueue struct {
TemplateID string
Recipient string
Payload map[string]any
IdempotencyKey string
}
func (r *recordingMailer) EnqueueTemplate(_ context.Context, templateID, recipient string, payload map[string]any, idempotencyKey string) error {
r.mu.Lock()
defer r.mu.Unlock()
if r.err != nil {
return r.err
}
r.calls = append(r.calls, recordedEnqueue{
TemplateID: templateID,
Recipient: recipient,
Payload: payload,
IdempotencyKey: idempotencyKey,
})
return nil
}
func (r *recordingMailer) Calls() []recordedEnqueue {
r.mu.Lock()
defer r.mu.Unlock()
out := make([]recordedEnqueue, len(r.calls))
copy(out, r.calls)
return out
}
// recordingPush captures every PublishClientEvent call.
type recordingPush struct {
mu sync.Mutex
calls []recordedPushEvent
}
type recordedPushEvent struct {
UserID uuid.UUID
Kind string
Payload map[string]any
EventID string
RequestID string
TraceID string
}
func (r *recordingPush) PublishClientEvent(_ context.Context, userID uuid.UUID, _ *uuid.UUID, kind string, payload map[string]any, eventID, requestID, traceID string) error {
r.mu.Lock()
defer r.mu.Unlock()
r.calls = append(r.calls, recordedPushEvent{
UserID: userID,
Kind: kind,
Payload: payload,
EventID: eventID,
RequestID: requestID,
TraceID: traceID,
})
return nil
}
func (r *recordingPush) Calls() []recordedPushEvent {
r.mu.Lock()
defer r.mu.Unlock()
out := make([]recordedPushEvent, len(r.calls))
copy(out, r.calls)
return out
}
// stubAccounts hands back a fixed account record for any user_id, so
// tests don't need to seed the accounts table.
type stubAccounts struct {
account user.Account
err error
}
func (s *stubAccounts) GetAccount(_ context.Context, userID uuid.UUID) (user.Account, error) {
if s.err != nil {
return user.Account{}, s.err
}
out := s.account
out.UserID = userID
return out, nil
}
func newService(t *testing.T, db *sql.DB, mailer notification.Mailer, push notification.PushPublisher, accounts notification.AccountResolver, adminEmail string) *notification.Service {
t.Helper()
cfg := config.NotificationConfig{
AdminEmail: adminEmail,
WorkerInterval: 10 * time.Millisecond,
MaxAttempts: 3,
}
return notification.NewService(notification.Deps{
Store: notification.NewStore(db),
Mail: mailer,
Push: push,
Accounts: accounts,
Config: cfg,
Logger: zaptest.NewLogger(t),
})
}
func TestSubmitFansOutLobbyInviteToPushAndEmail(t *testing.T) {
t.Parallel()
db := startPostgres(t)
mailer := &recordingMailer{}
push := &recordingPush{}
accounts := &stubAccounts{account: user.Account{
Email: "alice@example.test",
PreferredLanguage: "en",
}}
svc := newService(t, db, mailer, push, accounts, "")
recipient := uuid.New()
id, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindLobbyInviteReceived,
IdempotencyKey: "invite:" + uuid.NewString(),
Recipients: []uuid.UUID{recipient},
Payload: map[string]any{
"game_id": uuid.NewString(),
"inviter_user_id": uuid.NewString(),
},
})
if err != nil {
t.Fatalf("submit: %v", err)
}
if id == uuid.Nil {
t.Fatal("submit returned nil id")
}
// Best-effort dispatch ran synchronously; both channels should
// have observed exactly one call.
if got := len(push.Calls()); got != 1 {
t.Errorf("push calls=%d, want 1", got)
}
if got := len(mailer.Calls()); got != 1 {
t.Errorf("mail calls=%d, want 1", got)
} else {
call := mailer.Calls()[0]
if call.Recipient != "alice@example.test" {
t.Errorf("mail recipient=%q", call.Recipient)
}
if call.TemplateID != notification.KindLobbyInviteReceived {
t.Errorf("mail template=%q", call.TemplateID)
}
}
}
func TestSubmitIsIdempotent(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := newService(t, db, &recordingMailer{}, &recordingPush{}, &stubAccounts{account: user.Account{Email: "x@example.test"}}, "")
intent := notification.Intent{
Kind: notification.KindLobbyApplicationSubmitted,
IdempotencyKey: "dedupe-key",
Recipients: []uuid.UUID{uuid.New()},
Payload: map[string]any{"game_id": uuid.NewString(), "application_id": uuid.NewString()},
}
first, err := svc.Submit(context.Background(), intent)
if err != nil {
t.Fatalf("first submit: %v", err)
}
second, err := svc.Submit(context.Background(), intent)
if err != nil {
t.Fatalf("second submit: %v", err)
}
if first != second {
t.Fatalf("idempotent submit must return same id: %s vs %s", first, second)
}
}
func TestSubmitMalformedPersists(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := newService(t, db, &recordingMailer{}, &recordingPush{}, &stubAccounts{}, "")
id, err := svc.Submit(context.Background(), notification.Intent{
Kind: "nonsense.kind",
IdempotencyKey: "anything",
Recipients: []uuid.UUID{uuid.New()},
})
if err != nil {
t.Fatalf("submit: %v", err)
}
if id != uuid.Nil {
t.Fatalf("malformed submit must return nil id, got %s", id)
}
page, err := svc.AdminListMalformed(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list malformed: %v", err)
}
if page.Total < 1 {
t.Fatalf("malformed total=%d, want >= 1", page.Total)
}
}
func TestSubmitAdminEmailSkipsWhenNotConfigured(t *testing.T) {
t.Parallel()
db := startPostgres(t)
mailer := &recordingMailer{}
svc := newService(t, db, mailer, &recordingPush{}, &stubAccounts{}, "")
id, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindRuntimeImagePullFailed,
IdempotencyKey: "ipf-1",
Payload: map[string]any{"game_id": uuid.NewString(), "image_ref": "registry/img:tag"},
})
if err != nil {
t.Fatalf("submit: %v", err)
}
if id == uuid.Nil {
t.Fatal("admin submit returned nil id")
}
if got := len(mailer.Calls()); got != 0 {
t.Errorf("mail calls=%d, want 0 (admin email unset)", got)
}
}
func TestSubmitAdminEmailDispatchesWhenConfigured(t *testing.T) {
t.Parallel()
db := startPostgres(t)
mailer := &recordingMailer{}
svc := newService(t, db, mailer, &recordingPush{}, &stubAccounts{}, "ops@example.test")
if _, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindRuntimeContainerStartFailed,
IdempotencyKey: "csf-1",
Payload: map[string]any{"game_id": uuid.NewString()},
}); err != nil {
t.Fatalf("submit: %v", err)
}
calls := mailer.Calls()
if len(calls) != 1 {
t.Fatalf("mail calls=%d, want 1", len(calls))
}
if calls[0].Recipient != "ops@example.test" {
t.Errorf("admin recipient=%q", calls[0].Recipient)
}
}
func TestSubmitMissingAccountSkipsEmail(t *testing.T) {
t.Parallel()
db := startPostgres(t)
mailer := &recordingMailer{}
push := &recordingPush{}
accounts := &stubAccounts{err: user.ErrAccountNotFound}
svc := newService(t, db, mailer, push, accounts, "")
if _, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindLobbyApplicationApproved,
IdempotencyKey: "missing-1",
Recipients: []uuid.UUID{uuid.New()},
Payload: map[string]any{"game_id": uuid.NewString()},
}); err != nil {
t.Fatalf("submit: %v", err)
}
if got := len(mailer.Calls()); got != 0 {
t.Errorf("mail calls=%d want 0 when account missing", got)
}
if got := len(push.Calls()); got != 0 {
t.Errorf("push calls=%d want 0 when account missing", got)
}
}
func TestWorkerRetryAndDeadLetter(t *testing.T) {
t.Parallel()
db := startPostgres(t)
failingMailer := &recordingMailer{err: errors.New("smtp down")}
push := &recordingPush{}
accounts := &stubAccounts{account: user.Account{Email: "alice@example.test", PreferredLanguage: "en"}}
svc := newService(t, db, failingMailer, push, accounts, "")
// MaxAttempts=3 from newService config. Submit fires one
// best-effort attempt; subsequent Tick calls drive attempts 2 and
// 3, the last one dead-letters.
if _, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindLobbyInviteReceived,
IdempotencyKey: "fail-1",
Recipients: []uuid.UUID{uuid.New()},
Payload: map[string]any{"game_id": uuid.NewString(), "inviter_user_id": uuid.NewString()},
}); err != nil {
t.Fatalf("submit: %v", err)
}
// Force every retry to be due immediately.
if _, err := db.Exec(`UPDATE backend.notification_routes SET next_attempt_at = now() WHERE channel = 'email'`); err != nil {
t.Fatalf("force due: %v", err)
}
worker := notification.NewWorker(svc)
for range 5 {
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick: %v", err)
}
if _, err := db.Exec(`UPDATE backend.notification_routes SET next_attempt_at = now() WHERE channel = 'email' AND status = 'retrying'`); err != nil {
t.Fatalf("force due: %v", err)
}
}
dead, err := svc.AdminListDeadLetters(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list dead-letters: %v", err)
}
if dead.Total < 1 {
t.Fatalf("expected dead-letter row, got total=%d (mail attempts=%d)", dead.Total, len(failingMailer.Calls()))
}
}
func TestOnUserDeletedSkipsPendingRoutes(t *testing.T) {
t.Parallel()
db := startPostgres(t)
failingMailer := &recordingMailer{err: errors.New("smtp down")}
push := &recordingPush{}
userID := uuid.New()
accounts := &stubAccounts{account: user.Account{Email: "alice@example.test", PreferredLanguage: "en"}}
svc := newService(t, db, failingMailer, push, accounts, "")
// Submit something that owns user_id so the cascade picks it up.
if _, err := svc.Submit(context.Background(), notification.Intent{
Kind: notification.KindLobbyApplicationApproved,
IdempotencyKey: "cascade-1",
Recipients: []uuid.UUID{userID},
Payload: map[string]any{"game_id": uuid.NewString()},
}); err != nil {
t.Fatalf("submit: %v", err)
}
if err := svc.OnUserDeleted(context.Background(), userID); err != nil {
t.Fatalf("OnUserDeleted: %v", err)
}
var skipped int
if err := db.QueryRow(`
SELECT COUNT(*)
FROM backend.notification_routes r
JOIN backend.notifications n ON n.notification_id = r.notification_id
WHERE n.user_id = $1 AND r.status = 'skipped'
`, userID).Scan(&skipped); err != nil {
t.Fatalf("count skipped: %v", err)
}
if skipped == 0 {
t.Fatal("expected at least one skipped route after cascade")
}
}
func TestAdminGetMissing(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := newService(t, db, &recordingMailer{}, &recordingPush{}, &stubAccounts{}, "")
if _, err := svc.AdminGetNotification(context.Background(), uuid.New()); !errors.Is(err, notification.ErrNotificationNotFound) {
t.Fatalf("got %v, want ErrNotificationNotFound", err)
}
}
+97
View File
@@ -0,0 +1,97 @@
package notification
import (
"strings"
"time"
"github.com/google/uuid"
)
// Intent is the open shape every producer submits to Submit. The package
// keeps the type unexported by package-side names so that producer
// packages (lobby, runtime) can construct values from their own
// vocabulary and let the wiring layer translate them with a thin
// adapter.
//
// Kind must be a value from the catalog (`backend/README.md` §10).
// IdempotencyKey is required and feeds the UNIQUE constraint on
// `notifications (kind, idempotency_key)`. Recipients lists user_ids
// for kinds that fan out per user; admin-channel kinds carry an empty
// slice. Payload is the kind-specific data blob; it is persisted as
// JSON on `notifications.payload` and forwarded to email templates.
// DeviceSessionID, when non-nil, narrows a push route to a single
// device session — the runtime / auth callers may use it to target
// specific clients.
type Intent struct {
Kind string
IdempotencyKey string
Recipients []uuid.UUID
Payload map[string]any
DeviceSessionID *uuid.UUID
}
// Notification mirrors a row in `backend.notifications`. The admin API
// returns it directly; Worker keeps a copy alongside each route to
// avoid a re-fetch per dispatch.
type Notification struct {
NotificationID uuid.UUID
Kind string
IdempotencyKey string
UserID *uuid.UUID
Payload map[string]any
CreatedAt time.Time
}
// Route mirrors a row in `backend.notification_routes`. Status keeps
// the value documented in `backend/README.md` §10; channel is `push`
// or `email`. ResolvedEmail / ResolvedLocale capture the recipient
// address pinned at materialisation time so a downstream account edit
// cannot retarget an in-flight email.
type Route struct {
RouteID uuid.UUID
NotificationID uuid.UUID
Channel string
Status string
Attempts int32
MaxAttempts int32
NextAttemptAt *time.Time
LastAttemptAt *time.Time
LastError string
ResolvedEmail string
ResolvedLocale string
UserID *uuid.UUID
DeviceSessionID *uuid.UUID
CreatedAt time.Time
UpdatedAt time.Time
PublishedAt *time.Time
DeadLetteredAt *time.Time
SkippedAt *time.Time
}
// DeadLetter mirrors a row in `backend.notification_dead_letters`.
type DeadLetter struct {
DeadLetterID uuid.UUID
NotificationID uuid.UUID
RouteID uuid.UUID
ArchivedAt time.Time
Reason string
}
// MalformedIntent mirrors a row in
// `backend.notification_malformed_intents`. payload is the producer-
// supplied blob (best effort JSON-encoded by Submit); reason records
// the validation failure that diverted it.
type MalformedIntent struct {
ID uuid.UUID
ReceivedAt time.Time
Payload map[string]any
Reason string
}
// trimSpace is a small helper used across the package; pulling it out
// avoids duplicate `strings.TrimSpace` import chains in tiny files.
func trimSpace(s string) string { return strings.TrimSpace(s) }
// timePtr returns a pointer to the supplied time. Useful in struct
// literals where Postgres-nullable timestamptz fields are pointers.
func timePtr(t time.Time) *time.Time { return &t }
+118
View File
@@ -0,0 +1,118 @@
package notification
import (
"context"
"errors"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
)
// Worker drains the notification routes table: per tick it walks due
// rows under `SELECT … FOR UPDATE SKIP LOCKED`, dispatches each through
// the matching channel, and atomically updates the route status.
// Implements `internal/app.Component`.
type Worker struct {
svc *Service
}
// NewWorker constructs a Worker bound to svc.
func NewWorker(svc *Service) *Worker { return &Worker{svc: svc} }
// Run drives the scan loop until ctx is cancelled. The first tick is
// the startup-drain pass: rows queued before the process restart get
// retried immediately rather than waiting for the first interval.
func (w *Worker) Run(ctx context.Context) error {
if w == nil {
return nil
}
logger := w.svc.deps.Logger.Named("worker")
if err := w.tick(ctx); err != nil && !errors.Is(err, context.Canceled) {
logger.Warn("initial notification routes drain failed", zap.Error(err))
}
interval := w.svc.deps.Config.WorkerInterval
if interval <= 0 {
interval = 5 * time.Second
}
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return nil
case <-ticker.C:
if err := w.tick(ctx); err != nil && !errors.Is(err, context.Canceled) {
logger.Warn("notification routes tick failed", zap.Error(err))
}
}
}
}
// Shutdown is a no-op: each per-row transaction is self-contained, so
// a cancelled ctx above the loop is enough to stop the worker.
func (w *Worker) Shutdown(_ context.Context) error { return nil }
// Tick is exposed for tests so they can drive the worker without
// timing dependencies.
func (w *Worker) Tick(ctx context.Context) error { return w.tick(ctx) }
// tick processes up to claimBatchSize rows. Each row is handled in its
// own transaction so a slow channel only holds one row lock at a time.
// The loop exits as soon as a tick claims zero rows or ctx is
// cancelled. Rows already handled in this tick are tracked in `seen`
// and excluded from subsequent claims so a transient retry scheduled
// with next_attempt_at <= now() does not chew through MaxAttempts in a
// single tick (mirrors the mail-worker pattern).
func (w *Worker) tick(ctx context.Context) error {
seen := make([]uuid.UUID, 0, claimBatchSize)
for range claimBatchSize {
if ctx.Err() != nil {
return ctx.Err()
}
more, processed, err := w.processOne(ctx, seen)
if err != nil {
return err
}
if !more {
return nil
}
seen = append(seen, processed)
}
return nil
}
// processOne claims a single due route, dispatches it, and commits the
// state transition. Returns more=false when no row was due.
func (w *Worker) processOne(ctx context.Context, exclude []uuid.UUID) (bool, uuid.UUID, error) {
tx, err := w.svc.deps.Store.BeginTx(ctx)
if err != nil {
return false, uuid.Nil, err
}
defer func() { _ = tx.Rollback() }()
claimed, err := w.svc.deps.Store.ClaimDueRoutes(ctx, tx, 1, exclude...)
if err != nil {
return false, uuid.Nil, err
}
if len(claimed) == 0 {
return false, uuid.Nil, nil
}
c := claimed[0]
dispatchErr := w.svc.performDispatch(ctx, c)
at := w.svc.nowUTC()
if err := w.svc.finaliseDispatch(ctx, tx, c, dispatchErr, at); err != nil {
return false, uuid.Nil, err
}
if err := tx.Commit(); err != nil {
return false, uuid.Nil, err
}
return true, c.Route.RouteID, nil
}
// Compile-time check that Worker satisfies the lifecycle interface
// shape used elsewhere (Run + Shutdown).
var _ interface {
Run(context.Context) error
Shutdown(context.Context) error
} = (*Worker)(nil)