feat: backend service
This commit is contained in:
@@ -0,0 +1,107 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// AdminListNotificationsPage bundles the pagination metadata returned to
|
||||
// the admin API. The shape mirrors `mail.AdminListDeliveriesPage` so
|
||||
// handlers stay symmetric.
|
||||
type AdminListNotificationsPage struct {
|
||||
Items []Notification
|
||||
Page int
|
||||
PageSize int
|
||||
Total int64
|
||||
}
|
||||
|
||||
// AdminListDeadLettersPage mirrors AdminListNotificationsPage for the
|
||||
// dead-letter listing.
|
||||
type AdminListDeadLettersPage struct {
|
||||
Items []DeadLetter
|
||||
Page int
|
||||
PageSize int
|
||||
Total int64
|
||||
}
|
||||
|
||||
// AdminListMalformedPage mirrors AdminListNotificationsPage for the
|
||||
// malformed-intent listing.
|
||||
type AdminListMalformedPage struct {
|
||||
Items []MalformedIntent
|
||||
Page int
|
||||
PageSize int
|
||||
Total int64
|
||||
}
|
||||
|
||||
// AdminListNotifications returns the notification page newest-first.
|
||||
// page is 1-indexed; pageSize is bounded by normalisePaging.
|
||||
func (s *Service) AdminListNotifications(ctx context.Context, page, pageSize int) (AdminListNotificationsPage, error) {
|
||||
page, pageSize = normalisePaging(page, pageSize)
|
||||
offset := (page - 1) * pageSize
|
||||
res, err := s.deps.Store.ListNotifications(ctx, offset, pageSize)
|
||||
if err != nil {
|
||||
return AdminListNotificationsPage{}, err
|
||||
}
|
||||
return AdminListNotificationsPage{
|
||||
Items: res.Items,
|
||||
Page: page,
|
||||
PageSize: pageSize,
|
||||
Total: res.Total,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// AdminGetNotification returns a single notification by id; the
|
||||
// sentinel ErrNotificationNotFound surfaces a 404 in the handler
|
||||
// layer.
|
||||
func (s *Service) AdminGetNotification(ctx context.Context, id uuid.UUID) (Notification, error) {
|
||||
return s.deps.Store.GetNotification(ctx, id)
|
||||
}
|
||||
|
||||
// AdminListDeadLetters returns the dead-letter page newest-first.
|
||||
func (s *Service) AdminListDeadLetters(ctx context.Context, page, pageSize int) (AdminListDeadLettersPage, error) {
|
||||
page, pageSize = normalisePaging(page, pageSize)
|
||||
offset := (page - 1) * pageSize
|
||||
res, err := s.deps.Store.ListDeadLetters(ctx, offset, pageSize)
|
||||
if err != nil {
|
||||
return AdminListDeadLettersPage{}, err
|
||||
}
|
||||
return AdminListDeadLettersPage{
|
||||
Items: res.Items,
|
||||
Page: page,
|
||||
PageSize: pageSize,
|
||||
Total: res.Total,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// AdminListMalformed returns the malformed-intent page newest-first.
|
||||
func (s *Service) AdminListMalformed(ctx context.Context, page, pageSize int) (AdminListMalformedPage, error) {
|
||||
page, pageSize = normalisePaging(page, pageSize)
|
||||
offset := (page - 1) * pageSize
|
||||
res, err := s.deps.Store.ListMalformed(ctx, offset, pageSize)
|
||||
if err != nil {
|
||||
return AdminListMalformedPage{}, err
|
||||
}
|
||||
return AdminListMalformedPage{
|
||||
Items: res.Items,
|
||||
Page: page,
|
||||
PageSize: pageSize,
|
||||
Total: res.Total,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// normalisePaging clamps page and pageSize to the values handlers can
|
||||
// safely pass through to the store. Defaults match the existing admin
|
||||
// endpoints (`mail` package); pageSize is capped at 200.
|
||||
func normalisePaging(page, pageSize int) (int, int) {
|
||||
if page <= 0 {
|
||||
page = 1
|
||||
}
|
||||
if pageSize <= 0 {
|
||||
pageSize = 25
|
||||
}
|
||||
if pageSize > 200 {
|
||||
pageSize = 200
|
||||
}
|
||||
return page, pageSize
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// OnUserDeleted is the user-side soft-delete cascade hook. It marks
|
||||
// every pending or retrying route owned by userID as `skipped` so the
|
||||
// worker stops trying to deliver to a vanished account; published
|
||||
// rows stay intact as audit trail.
|
||||
//
|
||||
// The catalog (`backend/README.md` §10) does not include a
|
||||
// `user.*` kind, so this is the only place where the notification
|
||||
// module reacts to user lifecycle events directly. The cascade is
|
||||
// idempotent — repeated invocations on the same user simply find no
|
||||
// pending rows.
|
||||
func (s *Service) OnUserDeleted(ctx context.Context, userID uuid.UUID) error {
|
||||
if userID == uuid.Nil {
|
||||
return nil
|
||||
}
|
||||
skipped, err := s.deps.Store.SkipPendingRoutesForUser(ctx, userID, s.nowUTC())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skipped > 0 {
|
||||
s.deps.Logger.Info("notification routes skipped on user delete",
|
||||
zap.String("user_id", userID.String()),
|
||||
zap.Int64("count", skipped),
|
||||
)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
package notification
|
||||
|
||||
// Kind constants name every supported notification kind. The implementation // trims the README §10 catalog to the set with active producers in
|
||||
// the codebase; further kinds (`game.*`, `mail.dead_lettered`) require
|
||||
// an additive change here together with a producer.
|
||||
const (
|
||||
KindLobbyInviteReceived = "lobby.invite.received"
|
||||
KindLobbyInviteRevoked = "lobby.invite.revoked"
|
||||
KindLobbyApplicationSubmitted = "lobby.application.submitted"
|
||||
KindLobbyApplicationApproved = "lobby.application.approved"
|
||||
KindLobbyApplicationRejected = "lobby.application.rejected"
|
||||
KindLobbyMembershipRemoved = "lobby.membership.removed"
|
||||
KindLobbyMembershipBlocked = "lobby.membership.blocked"
|
||||
KindLobbyRaceNameRegistered = "lobby.race_name.registered"
|
||||
KindLobbyRaceNamePending = "lobby.race_name.pending"
|
||||
KindLobbyRaceNameExpired = "lobby.race_name.expired"
|
||||
KindRuntimeImagePullFailed = "runtime.image_pull_failed"
|
||||
KindRuntimeContainerStartFailed = "runtime.container_start_failed"
|
||||
KindRuntimeStartConfigInvalid = "runtime.start_config_invalid"
|
||||
)
|
||||
|
||||
// CatalogEntry describes the per-kind delivery policy: which channels
|
||||
// fan out and whether the kind targets the platform admin recipient
|
||||
// instead of per-user accounts.
|
||||
type CatalogEntry struct {
|
||||
// Channels lists the channels this kind fans out to, in the order
|
||||
// rows are materialised in `notification_routes`. The closed set is
|
||||
// {`push`, `email`}.
|
||||
Channels []string
|
||||
|
||||
// Admin reports whether the email channel targets the configured
|
||||
// admin recipient (`BACKEND_NOTIFICATION_ADMIN_EMAIL`) rather than
|
||||
// per-user accounts. Admin-targeted kinds carry an empty Recipients
|
||||
// slice on the producer side.
|
||||
Admin bool
|
||||
|
||||
// MailTemplateID is the template_id passed to `mail.EnqueueTemplate`
|
||||
// for email routes. The catalog uses the kind itself by convention,
|
||||
// matching `mail.TemplateLoginCode`'s use of `auth.login_code`.
|
||||
MailTemplateID string
|
||||
}
|
||||
|
||||
// catalog maps each supported kind to its delivery policy. The map is
|
||||
// queried by Submit and by the dispatcher worker; producers do not
|
||||
// inspect it directly.
|
||||
var catalog = map[string]CatalogEntry{
|
||||
KindLobbyInviteReceived: {
|
||||
Channels: []string{ChannelPush, ChannelEmail},
|
||||
MailTemplateID: KindLobbyInviteReceived,
|
||||
},
|
||||
KindLobbyInviteRevoked: {
|
||||
Channels: []string{ChannelPush},
|
||||
},
|
||||
KindLobbyApplicationSubmitted: {
|
||||
Channels: []string{ChannelPush},
|
||||
},
|
||||
KindLobbyApplicationApproved: {
|
||||
Channels: []string{ChannelPush, ChannelEmail},
|
||||
MailTemplateID: KindLobbyApplicationApproved,
|
||||
},
|
||||
KindLobbyApplicationRejected: {
|
||||
Channels: []string{ChannelPush, ChannelEmail},
|
||||
MailTemplateID: KindLobbyApplicationRejected,
|
||||
},
|
||||
KindLobbyMembershipRemoved: {
|
||||
Channels: []string{ChannelPush, ChannelEmail},
|
||||
MailTemplateID: KindLobbyMembershipRemoved,
|
||||
},
|
||||
KindLobbyMembershipBlocked: {
|
||||
Channels: []string{ChannelPush, ChannelEmail},
|
||||
MailTemplateID: KindLobbyMembershipBlocked,
|
||||
},
|
||||
KindLobbyRaceNameRegistered: {
|
||||
Channels: []string{ChannelPush},
|
||||
},
|
||||
KindLobbyRaceNamePending: {
|
||||
Channels: []string{ChannelPush, ChannelEmail},
|
||||
MailTemplateID: KindLobbyRaceNamePending,
|
||||
},
|
||||
KindLobbyRaceNameExpired: {
|
||||
Channels: []string{ChannelPush},
|
||||
},
|
||||
KindRuntimeImagePullFailed: {
|
||||
Channels: []string{ChannelEmail},
|
||||
Admin: true,
|
||||
MailTemplateID: KindRuntimeImagePullFailed,
|
||||
},
|
||||
KindRuntimeContainerStartFailed: {
|
||||
Channels: []string{ChannelEmail},
|
||||
Admin: true,
|
||||
MailTemplateID: KindRuntimeContainerStartFailed,
|
||||
},
|
||||
KindRuntimeStartConfigInvalid: {
|
||||
Channels: []string{ChannelEmail},
|
||||
Admin: true,
|
||||
MailTemplateID: KindRuntimeStartConfigInvalid,
|
||||
},
|
||||
}
|
||||
|
||||
// LookupCatalog returns the per-kind policy and a boolean reporting
|
||||
// whether the kind exists. Callers (Submit, Worker) branch on the
|
||||
// boolean rather than receiving a sentinel error.
|
||||
func LookupCatalog(kind string) (CatalogEntry, bool) {
|
||||
entry, ok := catalog[kind]
|
||||
return entry, ok
|
||||
}
|
||||
|
||||
// SupportedKinds returns the closed kind set in deterministic order.
|
||||
// The function exists to back tests and the migration CHECK constraint
|
||||
// audit; it is not on the hot path.
|
||||
func SupportedKinds() []string {
|
||||
return []string{
|
||||
KindLobbyInviteReceived,
|
||||
KindLobbyInviteRevoked,
|
||||
KindLobbyApplicationSubmitted,
|
||||
KindLobbyApplicationApproved,
|
||||
KindLobbyApplicationRejected,
|
||||
KindLobbyMembershipRemoved,
|
||||
KindLobbyMembershipBlocked,
|
||||
KindLobbyRaceNameRegistered,
|
||||
KindLobbyRaceNamePending,
|
||||
KindLobbyRaceNameExpired,
|
||||
KindRuntimeImagePullFailed,
|
||||
KindRuntimeContainerStartFailed,
|
||||
KindRuntimeStartConfigInvalid,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestCatalogClosure asserts that the SupportedKinds slice and the
|
||||
// `catalog` map agree on the kind set. This catches dropped entries
|
||||
// during catalog edits.
|
||||
func TestCatalogClosure(t *testing.T) {
|
||||
t.Parallel()
|
||||
want := SupportedKinds()
|
||||
if len(want) != len(catalog) {
|
||||
t.Fatalf("supported kinds=%d, catalog entries=%d", len(want), len(catalog))
|
||||
}
|
||||
for _, k := range want {
|
||||
if _, ok := catalog[k]; !ok {
|
||||
t.Errorf("kind %q listed by SupportedKinds but missing from catalog", k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestCatalogChannels enforces the per-kind channel set documented in
|
||||
// `backend/README.md` §10. A drift here means the README and the code
|
||||
// disagree — either fix the table or fix the test.
|
||||
func TestCatalogChannels(t *testing.T) {
|
||||
t.Parallel()
|
||||
expect := map[string][]string{
|
||||
KindLobbyInviteReceived: {ChannelPush, ChannelEmail},
|
||||
KindLobbyInviteRevoked: {ChannelPush},
|
||||
KindLobbyApplicationSubmitted: {ChannelPush},
|
||||
KindLobbyApplicationApproved: {ChannelPush, ChannelEmail},
|
||||
KindLobbyApplicationRejected: {ChannelPush, ChannelEmail},
|
||||
KindLobbyMembershipRemoved: {ChannelPush, ChannelEmail},
|
||||
KindLobbyMembershipBlocked: {ChannelPush, ChannelEmail},
|
||||
KindLobbyRaceNameRegistered: {ChannelPush},
|
||||
KindLobbyRaceNamePending: {ChannelPush, ChannelEmail},
|
||||
KindLobbyRaceNameExpired: {ChannelPush},
|
||||
KindRuntimeImagePullFailed: {ChannelEmail},
|
||||
KindRuntimeContainerStartFailed: {ChannelEmail},
|
||||
KindRuntimeStartConfigInvalid: {ChannelEmail},
|
||||
}
|
||||
for kind, want := range expect {
|
||||
entry, ok := LookupCatalog(kind)
|
||||
if !ok {
|
||||
t.Errorf("kind %q missing from catalog", kind)
|
||||
continue
|
||||
}
|
||||
if len(entry.Channels) != len(want) {
|
||||
t.Errorf("kind %q channels=%v want %v", kind, entry.Channels, want)
|
||||
continue
|
||||
}
|
||||
for i, ch := range want {
|
||||
if entry.Channels[i] != ch {
|
||||
t.Errorf("kind %q channels[%d]=%s want %s", kind, i, entry.Channels[i], ch)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestCatalogAdminOnlyForRuntime keeps the runtime kinds admin-only and
|
||||
// every lobby kind user-facing.
|
||||
func TestCatalogAdminOnlyForRuntime(t *testing.T) {
|
||||
t.Parallel()
|
||||
for kind, entry := range catalog {
|
||||
switch kind {
|
||||
case KindRuntimeImagePullFailed, KindRuntimeContainerStartFailed, KindRuntimeStartConfigInvalid:
|
||||
if !entry.Admin {
|
||||
t.Errorf("kind %q expected Admin=true", kind)
|
||||
}
|
||||
default:
|
||||
if entry.Admin {
|
||||
t.Errorf("kind %q expected Admin=false", kind)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"galaxy/backend/internal/config"
|
||||
"galaxy/backend/internal/user"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// PushPublisher is the publisher contract notification uses to emit a
|
||||
// `client_event` push frame to gateway. The real implementation lives
|
||||
// in `backend/internal/push` ; NewNoopPushPublisher satisfies
|
||||
// the interface for tests that do not exercise push behaviour.
|
||||
//
|
||||
// Implementations must be concurrency-safe. The deviceSessionID pointer
|
||||
// narrows the event to a single device session when non-nil; nil means
|
||||
// fan out to every active session of userID. eventID, requestID and
|
||||
// traceID are correlation identifiers that gateway forwards verbatim
|
||||
// into the signed client envelope; empty strings are forwarded
|
||||
// unchanged.
|
||||
type PushPublisher interface {
|
||||
PublishClientEvent(ctx context.Context, userID uuid.UUID, deviceSessionID *uuid.UUID, kind string, payload map[string]any, eventID, requestID, traceID string) error
|
||||
}
|
||||
|
||||
// Mailer is the email surface notification uses for outbound mail. The
|
||||
// canonical implementation is `*mail.Service.EnqueueTemplate`; tests
|
||||
// substitute a recording fake. The contract matches mail's existing
|
||||
// signature so the wiring layer can pass the concrete service directly.
|
||||
type Mailer interface {
|
||||
EnqueueTemplate(ctx context.Context, templateID, recipient string, payload map[string]any, idempotencyKey string) error
|
||||
}
|
||||
|
||||
// AccountResolver looks up the recipient profile (email + preferred
|
||||
// language) by user_id. The canonical implementation is
|
||||
// `*user.Service.GetAccount`. The narrow interface keeps the Service
|
||||
// from depending on every part of the user surface.
|
||||
type AccountResolver interface {
|
||||
GetAccount(ctx context.Context, userID uuid.UUID) (user.Account, error)
|
||||
}
|
||||
|
||||
// Deps aggregates every collaborator the Service depends on.
|
||||
//
|
||||
// Store, Mail, and Accounts must be non-nil. Push defaults to the no-op
|
||||
// publisher when omitted; Now defaults to time.Now; Logger defaults to
|
||||
// zap.NewNop. Config carries the worker interval, the max-attempts cap,
|
||||
// and the optional admin-email destination from `BACKEND_NOTIFICATION_*`.
|
||||
type Deps struct {
|
||||
Store *Store
|
||||
Mail Mailer
|
||||
Push PushPublisher
|
||||
Accounts AccountResolver
|
||||
Config config.NotificationConfig
|
||||
// Now overrides time.Now for deterministic tests. A nil Now defaults
|
||||
// to time.Now in NewService.
|
||||
Now func() time.Time
|
||||
// Logger is named under "notification" by NewService. Nil falls back
|
||||
// to zap.NewNop.
|
||||
Logger *zap.Logger
|
||||
}
|
||||
|
||||
// NewNoopPushPublisher returns a PushPublisher that logs every event
|
||||
// at debug level and returns nil. The canonical publisher lives in
|
||||
// `backend/internal/push`; this constructor exists for tests.
|
||||
func NewNoopPushPublisher(logger *zap.Logger) PushPublisher {
|
||||
if logger == nil {
|
||||
logger = zap.NewNop()
|
||||
}
|
||||
return &noopPushPublisher{logger: logger.Named("push.noop")}
|
||||
}
|
||||
|
||||
type noopPushPublisher struct {
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
func (p *noopPushPublisher) PublishClientEvent(_ context.Context, userID uuid.UUID, deviceSessionID *uuid.UUID, kind string, payload map[string]any, eventID, requestID, traceID string) error {
|
||||
fields := []zap.Field{
|
||||
zap.String("user_id", userID.String()),
|
||||
zap.String("kind", kind),
|
||||
zap.Int("payload_keys", len(payload)),
|
||||
}
|
||||
if deviceSessionID != nil {
|
||||
fields = append(fields, zap.String("device_session_id", deviceSessionID.String()))
|
||||
}
|
||||
if eventID != "" {
|
||||
fields = append(fields, zap.String("event_id", eventID))
|
||||
}
|
||||
if requestID != "" {
|
||||
fields = append(fields, zap.String("request_id", requestID))
|
||||
}
|
||||
if traceID != "" {
|
||||
fields = append(fields, zap.String("trace_id", traceID))
|
||||
}
|
||||
p.logger.Debug("client event (noop publisher)", fields...)
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,175 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand/v2"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// traceIDFromContext returns the W3C trace id of the active span as a
|
||||
// hex string, or an empty string when ctx carries no recording span.
|
||||
// The id is forwarded to gateway as ClientEvent.trace_id so push
|
||||
// envelopes can be correlated to the producing trace.
|
||||
func traceIDFromContext(ctx context.Context) string {
|
||||
if ctx == nil {
|
||||
return ""
|
||||
}
|
||||
spanCtx := trace.SpanContextFromContext(ctx)
|
||||
if !spanCtx.HasTraceID() {
|
||||
return ""
|
||||
}
|
||||
return spanCtx.TraceID().String()
|
||||
}
|
||||
|
||||
// finaliseDispatch records the outcome of a single delivery attempt
|
||||
// inside tx. The status transition table mirrors README §10 and the
|
||||
// `notification_routes`'s CHECK constraint:
|
||||
//
|
||||
// - success → published (next_attempt_at NULL)
|
||||
// - failure with attempt < max → retrying (next_attempt_at armed)
|
||||
// - failure with attempt >= max → dead_lettered (+ insert
|
||||
// notification_dead_letters row)
|
||||
//
|
||||
// The function does not commit tx: the caller (worker / Submit best-
|
||||
// effort) owns the transaction so it can compose the dispatch with the
|
||||
// preceding ClaimDueRoutes lock.
|
||||
func (s *Service) finaliseDispatch(ctx context.Context, tx *sql.Tx, claim ClaimedRoute, dispatchErr error, at time.Time) error {
|
||||
if dispatchErr == nil {
|
||||
return s.deps.Store.MarkRoutePublished(ctx, tx, claim.Route.RouteID, at)
|
||||
}
|
||||
attempt := claim.Route.Attempts + 1
|
||||
reason := dispatchErr.Error()
|
||||
maxAttempts := claim.Route.MaxAttempts
|
||||
if maxAttempts <= 0 {
|
||||
maxAttempts = int32(s.deps.Config.MaxAttempts)
|
||||
}
|
||||
if attempt >= maxAttempts {
|
||||
s.deps.Logger.Warn("notification route dead-lettered",
|
||||
zap.String("kind", claim.Notification.Kind),
|
||||
zap.String("channel", claim.Route.Channel),
|
||||
zap.String("route_id", claim.Route.RouteID.String()),
|
||||
zap.Int32("attempt", attempt),
|
||||
zap.Error(dispatchErr),
|
||||
)
|
||||
return s.deps.Store.MarkRouteDeadLettered(ctx, tx, claim.Notification.NotificationID, claim.Route.RouteID, at, reason)
|
||||
}
|
||||
nextAt := at.Add(routeBackoff(attempt))
|
||||
s.deps.Logger.Info("notification route retry scheduled",
|
||||
zap.String("kind", claim.Notification.Kind),
|
||||
zap.String("channel", claim.Route.Channel),
|
||||
zap.String("route_id", claim.Route.RouteID.String()),
|
||||
zap.Int32("attempt", attempt),
|
||||
zap.Time("next_attempt_at", nextAt),
|
||||
zap.Error(dispatchErr),
|
||||
)
|
||||
return s.deps.Store.ScheduleRouteRetry(ctx, tx, claim.Route.RouteID, at, nextAt, reason)
|
||||
}
|
||||
|
||||
// bestEffortDispatch is invoked from Submit immediately after a route
|
||||
// is durably persisted. It opens its own short transaction, runs the
|
||||
// channel call, and writes the outcome with the same Mark* helpers
|
||||
// the worker uses. Failures here are logged at debug level — the
|
||||
// worker will retry on the next tick, so the producer never sees the
|
||||
// synchronous failure.
|
||||
func (s *Service) bestEffortDispatch(ctx context.Context, n Notification, route Route) {
|
||||
if route.Status != RouteStatusPending {
|
||||
return
|
||||
}
|
||||
claim := ClaimedRoute{Route: route, Notification: n}
|
||||
tx, err := s.deps.Store.BeginTx(ctx)
|
||||
if err != nil {
|
||||
s.deps.Logger.Debug("best-effort dispatch: begin tx failed",
|
||||
zap.String("route_id", route.RouteID.String()),
|
||||
zap.Error(err))
|
||||
return
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
|
||||
dispatchErr := s.performDispatch(ctx, claim)
|
||||
at := s.nowUTC()
|
||||
if err := s.finaliseDispatch(ctx, tx, claim, dispatchErr, at); err != nil {
|
||||
s.deps.Logger.Debug("best-effort dispatch finalise failed",
|
||||
zap.String("route_id", route.RouteID.String()),
|
||||
zap.Error(err))
|
||||
return
|
||||
}
|
||||
if err := tx.Commit(); err != nil {
|
||||
s.deps.Logger.Debug("best-effort dispatch commit failed",
|
||||
zap.String("route_id", route.RouteID.String()),
|
||||
zap.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
// performDispatch runs the channel-specific delivery. Returns nil on
|
||||
// success and any error otherwise. The caller decides between retry
|
||||
// and dead-letter based on the attempt counter and persisted state.
|
||||
func (s *Service) performDispatch(ctx context.Context, claim ClaimedRoute) error {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
switch claim.Route.Channel {
|
||||
case ChannelPush:
|
||||
if claim.Route.UserID == nil {
|
||||
return errors.New("push route missing user_id")
|
||||
}
|
||||
eventID := claim.Route.RouteID.String()
|
||||
requestID := claim.Notification.IdempotencyKey
|
||||
traceID := traceIDFromContext(ctx)
|
||||
return s.deps.Push.PublishClientEvent(ctx, *claim.Route.UserID, claim.Route.DeviceSessionID, claim.Notification.Kind, claim.Notification.Payload, eventID, requestID, traceID)
|
||||
case ChannelEmail:
|
||||
entry, ok := LookupCatalog(claim.Notification.Kind)
|
||||
if !ok {
|
||||
return fmt.Errorf("unknown kind %q", claim.Notification.Kind)
|
||||
}
|
||||
recipient := claim.Route.ResolvedEmail
|
||||
if trimSpace(recipient) == "" {
|
||||
return errors.New("email route missing resolved recipient")
|
||||
}
|
||||
// Use the route id as idempotency_key so the mail outbox
|
||||
// UNIQUE(template_id, idempotency_key) catches a duplicate
|
||||
// enqueue if the worker re-claims after a crash before
|
||||
// commit. Producers should never need to know the route id.
|
||||
return s.deps.Mail.EnqueueTemplate(ctx, entry.MailTemplateID, recipient, claim.Notification.Payload, claim.Route.RouteID.String())
|
||||
default:
|
||||
return fmt.Errorf("unknown channel %q", claim.Route.Channel)
|
||||
}
|
||||
}
|
||||
|
||||
// routeBackoff computes the per-attempt delay using the package
|
||||
// constants and ±backoffJitter randomisation. attempt is 1-indexed
|
||||
// (the value the row will carry after Mark*); attempt==1 maps to
|
||||
// `backoffBase × backoffFactor⁰`.
|
||||
func routeBackoff(attempt int32) time.Duration {
|
||||
if attempt <= 1 {
|
||||
return jitter(backoffBase)
|
||||
}
|
||||
d := float64(backoffBase)
|
||||
for i := int32(1); i < attempt; i++ {
|
||||
d *= backoffFactor
|
||||
if time.Duration(d) >= backoffMax {
|
||||
return jitter(backoffMax)
|
||||
}
|
||||
}
|
||||
return jitter(time.Duration(d))
|
||||
}
|
||||
|
||||
// jitter applies the package-standard ±backoffJitter swing using the
|
||||
// new global v2 rand source.
|
||||
func jitter(d time.Duration) time.Duration {
|
||||
if backoffJitter <= 0 {
|
||||
return d
|
||||
}
|
||||
span := float64(d) * backoffJitter
|
||||
delta := (rand.Float64()*2 - 1) * span
|
||||
out := time.Duration(float64(d) + delta)
|
||||
if out < 0 {
|
||||
return d
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestRouteBackoffMonotonic locks the documented schedule:
|
||||
// attempt 1 == ~backoffBase, each subsequent attempt grows by
|
||||
// backoffFactor up to backoffMax. The check uses the lower bound of
|
||||
// the jitter window so the assertion is robust under random output.
|
||||
func TestRouteBackoffMonotonic(t *testing.T) {
|
||||
t.Parallel()
|
||||
lower := func(d time.Duration) time.Duration {
|
||||
return time.Duration(float64(d) * (1 - backoffJitter))
|
||||
}
|
||||
upper := func(d time.Duration) time.Duration {
|
||||
return time.Duration(float64(d) * (1 + backoffJitter))
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
attempt int32
|
||||
want time.Duration
|
||||
}{
|
||||
{attempt: 1, want: backoffBase},
|
||||
{attempt: 2, want: time.Duration(float64(backoffBase) * backoffFactor)},
|
||||
{attempt: 3, want: time.Duration(float64(backoffBase) * backoffFactor * backoffFactor)},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
got := routeBackoff(tc.attempt)
|
||||
if got < lower(tc.want) || got > upper(tc.want) {
|
||||
t.Fatalf("attempt=%d got=%s want ~%s (±%.0f%%)", tc.attempt, got, tc.want, backoffJitter*100)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestRouteBackoffCap asserts the schedule clamps at backoffMax.
|
||||
func TestRouteBackoffCap(t *testing.T) {
|
||||
t.Parallel()
|
||||
upper := time.Duration(float64(backoffMax) * (1 + backoffJitter))
|
||||
got := routeBackoff(50)
|
||||
if got > upper {
|
||||
t.Fatalf("attempt=50 got=%s exceeds cap (max=%s)", got, backoffMax)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
package notification
|
||||
|
||||
import "errors"
|
||||
|
||||
// ErrNotificationNotFound is returned by AdminGetNotification when no
|
||||
// row matches the supplied identifier. Handlers map it to HTTP 404.
|
||||
var ErrNotificationNotFound = errors.New("notification: notification not found")
|
||||
|
||||
// ErrUnknownKind is returned by Submit when the intent's Kind is not in
|
||||
// the catalog (`backend/README.md` §10). Submit does not surface it to
|
||||
// the producer — it persists a malformed-intent record and returns nil.
|
||||
// The exported sentinel exists so the package internals can branch on it.
|
||||
var ErrUnknownKind = errors.New("notification: unknown kind")
|
||||
|
||||
// ErrEmptyIdempotencyKey is returned by Submit when the intent does not
|
||||
// carry an idempotency_key. Same surface treatment as ErrUnknownKind.
|
||||
var ErrEmptyIdempotencyKey = errors.New("notification: idempotency_key must be non-empty")
|
||||
|
||||
// ErrNoRecipients is returned by Submit when a kind that requires user
|
||||
// recipients arrives without any. Same surface treatment as
|
||||
// ErrUnknownKind.
|
||||
var ErrNoRecipients = errors.New("notification: at least one recipient is required")
|
||||
@@ -0,0 +1,35 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"galaxy/backend/internal/lobby"
|
||||
)
|
||||
|
||||
// LobbyAdapter returns an implementation of `lobby.NotificationPublisher`
|
||||
// backed by *Service. The adapter copies the producer-side intent shape
|
||||
// into notification.Intent and calls Submit; Submit's own malformed
|
||||
// fallback handles invalid payloads, so the adapter never blocks the
|
||||
// caller. The interface is the same one The wiring connects through the
|
||||
// no-op publisher.
|
||||
func (s *Service) LobbyAdapter() lobby.NotificationPublisher {
|
||||
return &lobbyAdapter{svc: s}
|
||||
}
|
||||
|
||||
type lobbyAdapter struct {
|
||||
svc *Service
|
||||
}
|
||||
|
||||
func (a *lobbyAdapter) PublishLobbyEvent(ctx context.Context, ev lobby.LobbyNotification) error {
|
||||
if a == nil || a.svc == nil {
|
||||
return nil
|
||||
}
|
||||
intent := Intent{
|
||||
Kind: ev.Kind,
|
||||
IdempotencyKey: ev.IdempotencyKey,
|
||||
Recipients: ev.Recipients,
|
||||
Payload: ev.Payload,
|
||||
}
|
||||
_, err := a.svc.Submit(ctx, intent)
|
||||
return err
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
// Package notification implements the in-process notification pipeline
|
||||
// described in `backend/PLAN.md` §5.7, `ARCHITECTURE.md` §12, and the
|
||||
// catalog in `backend/README.md` §10. Producers (lobby, runtime) submit
|
||||
// intents via Submit; the service persists each intent into
|
||||
// `backend.notifications`, materialises one row per (recipient, channel)
|
||||
// in `backend.notification_routes`, and attempts a synchronous best-effort
|
||||
// dispatch. Failed routes are picked up by a background Worker that retries
|
||||
// with exponential backoff and dead-letters past the configured maximum.
|
||||
//
|
||||
// Push routes are emitted via PushPublisher (the canonical
|
||||
// `push.Service` over gRPC; the package also ships a
|
||||
// NoopPushPublisher for tests). Email routes call
|
||||
// mail.EnqueueTemplate, which feeds the durable mail outbox.
|
||||
package notification
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"galaxy/backend/internal/config"
|
||||
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// Status values stored in `notification_routes.status`. Mirrored by the
|
||||
// CHECK constraint in migration 00001.
|
||||
const (
|
||||
RouteStatusPending = "pending"
|
||||
RouteStatusRetrying = "retrying"
|
||||
RouteStatusPublished = "published"
|
||||
RouteStatusSkipped = "skipped"
|
||||
RouteStatusDeadLettered = "dead_lettered"
|
||||
)
|
||||
|
||||
// Channel values stored in `notification_routes.channel`. The catalog in
|
||||
// `backend/README.md` §10 documents the per-kind set.
|
||||
const (
|
||||
ChannelPush = "push"
|
||||
ChannelEmail = "email"
|
||||
)
|
||||
|
||||
// Backoff parameters for the route worker. Mirrors the trade-off captured
|
||||
// for the mail outbox in `backend/README.md`: exponential
|
||||
// growth from a 10 second base, capped at 10 minutes, with ±25% jitter.
|
||||
const (
|
||||
backoffBase = 10 * time.Second
|
||||
backoffFactor = 2.0
|
||||
backoffMax = 10 * time.Minute
|
||||
backoffJitter = 0.25
|
||||
|
||||
// claimBatchSize bounds the number of routes pulled out of Postgres
|
||||
// per worker tick. Same logic as `mail.claimBatchSize`: each row is
|
||||
// processed in its own short transaction so a slow channel does not
|
||||
// block its peers.
|
||||
claimBatchSize = 16
|
||||
)
|
||||
|
||||
// Service is the notification entry point. It composes the persistence
|
||||
// store, the push and mail dispatchers, the account resolver used for
|
||||
// recipient email lookups, runtime configuration, and a structured
|
||||
// logger.
|
||||
type Service struct {
|
||||
deps Deps
|
||||
}
|
||||
|
||||
// NewService constructs a Service from deps. Nil Logger defaults to
|
||||
// zap.NewNop; nil Now defaults to time.Now. Store, Mail, and Accounts
|
||||
// must be non-nil — calling Service methods with either nil panics on
|
||||
// first use, matching how the rest of `internal/*` signals missing
|
||||
// wiring. A nil Push defaults to the no-op publisher used by tests
|
||||
// that do not exercise the gRPC stream.
|
||||
func NewService(deps Deps) *Service {
|
||||
if deps.Now == nil {
|
||||
deps.Now = time.Now
|
||||
}
|
||||
if deps.Logger == nil {
|
||||
deps.Logger = zap.NewNop()
|
||||
}
|
||||
deps.Logger = deps.Logger.Named("notification")
|
||||
if deps.Push == nil {
|
||||
deps.Push = NewNoopPushPublisher(deps.Logger)
|
||||
}
|
||||
return &Service{deps: deps}
|
||||
}
|
||||
|
||||
// Config returns the runtime notification configuration. Worker uses it
|
||||
// to schedule the scan loop and bound retries.
|
||||
func (s *Service) Config() config.NotificationConfig {
|
||||
return s.deps.Config
|
||||
}
|
||||
|
||||
// Logger returns the package-named structured logger. Worker and the
|
||||
// admin handlers reuse it so scoped fields stay consistent.
|
||||
func (s *Service) Logger() *zap.Logger {
|
||||
return s.deps.Logger
|
||||
}
|
||||
|
||||
// now returns the package-configured clock; the helper keeps the rest
|
||||
// of the code free from `if s.deps.Now == nil` checks.
|
||||
func (s *Service) now() time.Time {
|
||||
if s.deps.Now == nil {
|
||||
return time.Now()
|
||||
}
|
||||
return s.deps.Now()
|
||||
}
|
||||
|
||||
// nowUTC returns the configured clock normalised to UTC, matching the
|
||||
// convention used by `time.Time` columns elsewhere in `backend`.
|
||||
func (s *Service) nowUTC() time.Time {
|
||||
return s.now().UTC()
|
||||
}
|
||||
|
||||
// adminEmail returns the configured admin recipient address with
|
||||
// surrounding whitespace removed; the empty string indicates no admin
|
||||
// recipient is configured.
|
||||
func (s *Service) adminEmail() string {
|
||||
return trimSpace(s.deps.Config.AdminEmail)
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"galaxy/backend/internal/runtime"
|
||||
)
|
||||
|
||||
// RuntimeAdapter returns an implementation of
|
||||
// `runtime.NotificationPublisher` backed by *Service. The adapter
|
||||
// translates runtime's narrow `(kind, idempotency_key, payload)` shape
|
||||
// into a notification.Intent and calls Submit. Recipient resolution is
|
||||
// handled by Submit's catalog lookup: every kind runtime emits is
|
||||
// `Admin: true`, so the recipient comes from the configured
|
||||
// `BACKEND_NOTIFICATION_ADMIN_EMAIL`.
|
||||
func (s *Service) RuntimeAdapter() runtime.NotificationPublisher {
|
||||
return &runtimeAdapter{svc: s}
|
||||
}
|
||||
|
||||
type runtimeAdapter struct {
|
||||
svc *Service
|
||||
}
|
||||
|
||||
func (a *runtimeAdapter) PublishRuntimeEvent(ctx context.Context, kind, idempotencyKey string, payload map[string]any) error {
|
||||
if a == nil || a.svc == nil {
|
||||
return nil
|
||||
}
|
||||
intent := Intent{
|
||||
Kind: kind,
|
||||
IdempotencyKey: idempotencyKey,
|
||||
Payload: payload,
|
||||
}
|
||||
_, err := a.svc.Submit(ctx, intent)
|
||||
return err
|
||||
}
|
||||
@@ -0,0 +1,606 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/backend/internal/postgres/jet/backend/model"
|
||||
"galaxy/backend/internal/postgres/jet/backend/table"
|
||||
|
||||
"github.com/go-jet/jet/v2/postgres"
|
||||
"github.com/go-jet/jet/v2/qrm"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// Store is the Postgres-backed query surface for notifications,
|
||||
// notification_routes, notification_dead_letters, and
|
||||
// notification_malformed_intents. All queries are built through go-jet
|
||||
// against the generated table bindings under
|
||||
// `backend/internal/postgres/jet/backend/table`.
|
||||
type Store struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
// NewStore constructs a Store wrapping db.
|
||||
func NewStore(db *sql.DB) *Store {
|
||||
return &Store{db: db}
|
||||
}
|
||||
|
||||
// BeginTx exposes the transaction handle to the worker so the
|
||||
// claim-dispatch-mark cycle stays within a single commit boundary.
|
||||
func (s *Store) BeginTx(ctx context.Context) (*sql.Tx, error) {
|
||||
return s.db.BeginTx(ctx, nil)
|
||||
}
|
||||
|
||||
// RouteSeed describes one freshly-materialised route destined for an
|
||||
// `INSERT INTO notification_routes` inside InsertNotification.
|
||||
type RouteSeed struct {
|
||||
RouteID uuid.UUID
|
||||
Channel string
|
||||
Status string
|
||||
MaxAttempts int32
|
||||
NextAttemptAt *time.Time
|
||||
ResolvedEmail string
|
||||
ResolvedLocale string
|
||||
UserID *uuid.UUID
|
||||
DeviceSessionID *uuid.UUID
|
||||
SkippedAt *time.Time
|
||||
LastError string
|
||||
}
|
||||
|
||||
// InsertNotificationArgs aggregates the inputs to InsertNotification.
|
||||
type InsertNotificationArgs struct {
|
||||
NotificationID uuid.UUID
|
||||
Kind string
|
||||
IdempotencyKey string
|
||||
UserID *uuid.UUID
|
||||
Payload map[string]any
|
||||
Routes []RouteSeed
|
||||
}
|
||||
|
||||
// InsertNotification persists a notification row together with its
|
||||
// route rows in a single transaction. The (kind, idempotency_key)
|
||||
// UNIQUE constraint serves the idempotency contract: the second
|
||||
// caller observes inserted=false and the existing notification_id is
|
||||
// returned. On the duplicate path no route rows are inserted and the
|
||||
// transaction rolls back so an orphan notification cannot exist.
|
||||
func (s *Store) InsertNotification(ctx context.Context, args InsertNotificationArgs) (uuid.UUID, bool, error) {
|
||||
payload, err := encodePayload(args.Payload)
|
||||
if err != nil {
|
||||
return uuid.Nil, false, fmt.Errorf("encode payload: %w", err)
|
||||
}
|
||||
var (
|
||||
storedID uuid.UUID
|
||||
inserted bool
|
||||
)
|
||||
err = withTx(ctx, s.db, func(tx *sql.Tx) error {
|
||||
insertStmt := table.Notifications.INSERT(
|
||||
table.Notifications.NotificationID,
|
||||
table.Notifications.Kind,
|
||||
table.Notifications.IdempotencyKey,
|
||||
table.Notifications.UserID,
|
||||
table.Notifications.Payload,
|
||||
).VALUES(
|
||||
args.NotificationID, args.Kind, args.IdempotencyKey, args.UserID, string(payload),
|
||||
).
|
||||
ON_CONFLICT(table.Notifications.Kind, table.Notifications.IdempotencyKey).
|
||||
DO_NOTHING().
|
||||
RETURNING(table.Notifications.NotificationID)
|
||||
|
||||
var freshRow model.Notifications
|
||||
err := insertStmt.QueryContext(ctx, tx, &freshRow)
|
||||
switch {
|
||||
case errors.Is(err, qrm.ErrNoRows):
|
||||
// Idempotent re-submit. Look up the existing row id and bail.
|
||||
lookupStmt := postgres.SELECT(table.Notifications.NotificationID).
|
||||
FROM(table.Notifications).
|
||||
WHERE(
|
||||
table.Notifications.Kind.EQ(postgres.String(args.Kind)).
|
||||
AND(table.Notifications.IdempotencyKey.EQ(postgres.String(args.IdempotencyKey))),
|
||||
).
|
||||
LIMIT(1)
|
||||
var existing model.Notifications
|
||||
if scanErr := lookupStmt.QueryContext(ctx, tx, &existing); scanErr != nil {
|
||||
return fmt.Errorf("lookup existing notification: %w", scanErr)
|
||||
}
|
||||
storedID = existing.NotificationID
|
||||
return errIdempotentNoop
|
||||
case err != nil:
|
||||
return fmt.Errorf("insert notification: %w", err)
|
||||
}
|
||||
storedID = freshRow.NotificationID
|
||||
inserted = true
|
||||
for _, r := range args.Routes {
|
||||
routeStmt := table.NotificationRoutes.INSERT(
|
||||
table.NotificationRoutes.RouteID,
|
||||
table.NotificationRoutes.NotificationID,
|
||||
table.NotificationRoutes.Channel,
|
||||
table.NotificationRoutes.Status,
|
||||
table.NotificationRoutes.MaxAttempts,
|
||||
table.NotificationRoutes.NextAttemptAt,
|
||||
table.NotificationRoutes.ResolvedEmail,
|
||||
table.NotificationRoutes.ResolvedLocale,
|
||||
table.NotificationRoutes.LastError,
|
||||
table.NotificationRoutes.SkippedAt,
|
||||
).VALUES(
|
||||
r.RouteID, args.NotificationID, r.Channel, r.Status,
|
||||
r.MaxAttempts, r.NextAttemptAt,
|
||||
r.ResolvedEmail, r.ResolvedLocale, r.LastError,
|
||||
r.SkippedAt,
|
||||
)
|
||||
if _, err := routeStmt.ExecContext(ctx, tx); err != nil {
|
||||
return fmt.Errorf("insert route %s: %w", r.RouteID, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if errors.Is(err, errIdempotentNoop) {
|
||||
return storedID, false, nil
|
||||
}
|
||||
if err != nil {
|
||||
return uuid.Nil, false, err
|
||||
}
|
||||
return storedID, inserted, nil
|
||||
}
|
||||
|
||||
// errIdempotentNoop tells withTx to roll back the transaction without
|
||||
// surfacing an error to the caller. It must never escape this package.
|
||||
var errIdempotentNoop = errors.New("notification store: idempotent noop")
|
||||
|
||||
// MarkRoutePublished flips a route to status='published', clears the
|
||||
// retry schedule, stamps published_at and last_attempt_at, and clears
|
||||
// last_error.
|
||||
func (s *Store) MarkRoutePublished(ctx context.Context, tx *sql.Tx, routeID uuid.UUID, at time.Time) error {
|
||||
r := table.NotificationRoutes
|
||||
stmt := r.UPDATE().
|
||||
SET(
|
||||
r.Status.SET(postgres.String(RouteStatusPublished)),
|
||||
r.Attempts.SET(r.Attempts.ADD(postgres.Int(1))),
|
||||
r.LastAttemptAt.SET(postgres.TimestampzT(at)),
|
||||
r.PublishedAt.SET(postgres.TimestampzT(at)),
|
||||
r.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
|
||||
r.LastError.SET(postgres.String("")),
|
||||
r.UpdatedAt.SET(postgres.TimestampzT(at)),
|
||||
).
|
||||
WHERE(r.RouteID.EQ(postgres.UUID(routeID)))
|
||||
if _, err := stmt.ExecContext(ctx, tx); err != nil {
|
||||
return fmt.Errorf("mark route published: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ScheduleRouteRetry flips a route to status='retrying', bumps
|
||||
// attempts, arms next_attempt_at, and stamps the diagnostic message.
|
||||
func (s *Store) ScheduleRouteRetry(ctx context.Context, tx *sql.Tx, routeID uuid.UUID, at time.Time, nextAt time.Time, errMsg string) error {
|
||||
r := table.NotificationRoutes
|
||||
stmt := r.UPDATE().
|
||||
SET(
|
||||
r.Status.SET(postgres.String(RouteStatusRetrying)),
|
||||
r.Attempts.SET(r.Attempts.ADD(postgres.Int(1))),
|
||||
r.LastAttemptAt.SET(postgres.TimestampzT(at)),
|
||||
r.NextAttemptAt.SET(postgres.TimestampzT(nextAt)),
|
||||
r.LastError.SET(postgres.String(errMsg)),
|
||||
r.UpdatedAt.SET(postgres.TimestampzT(at)),
|
||||
).
|
||||
WHERE(r.RouteID.EQ(postgres.UUID(routeID)))
|
||||
if _, err := stmt.ExecContext(ctx, tx); err != nil {
|
||||
return fmt.Errorf("schedule route retry: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarkRouteDeadLettered moves the route to the terminal `dead_lettered`
|
||||
// state and inserts a notification_dead_letters row under the same
|
||||
// transaction.
|
||||
func (s *Store) MarkRouteDeadLettered(ctx context.Context, tx *sql.Tx, notificationID, routeID uuid.UUID, at time.Time, reason string) error {
|
||||
r := table.NotificationRoutes
|
||||
updateStmt := r.UPDATE().
|
||||
SET(
|
||||
r.Status.SET(postgres.String(RouteStatusDeadLettered)),
|
||||
r.Attempts.SET(r.Attempts.ADD(postgres.Int(1))),
|
||||
r.LastAttemptAt.SET(postgres.TimestampzT(at)),
|
||||
r.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
|
||||
r.DeadLetteredAt.SET(postgres.TimestampzT(at)),
|
||||
r.LastError.SET(postgres.String(reason)),
|
||||
r.UpdatedAt.SET(postgres.TimestampzT(at)),
|
||||
).
|
||||
WHERE(r.RouteID.EQ(postgres.UUID(routeID)))
|
||||
if _, err := updateStmt.ExecContext(ctx, tx); err != nil {
|
||||
return fmt.Errorf("mark route dead-lettered: %w", err)
|
||||
}
|
||||
|
||||
dl := table.NotificationDeadLetters
|
||||
insertStmt := dl.INSERT(
|
||||
dl.DeadLetterID, dl.NotificationID, dl.RouteID, dl.ArchivedAt, dl.Reason,
|
||||
).VALUES(uuid.New(), notificationID, routeID, at, reason)
|
||||
if _, err := insertStmt.ExecContext(ctx, tx); err != nil {
|
||||
return fmt.Errorf("insert notification dead-letter: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ClaimedRoute bundles a locked route row with its parent notification
|
||||
// so the worker has every field it needs in one trip.
|
||||
type ClaimedRoute struct {
|
||||
Route Route
|
||||
Notification Notification
|
||||
}
|
||||
|
||||
// ClaimDueRoutes locks up to `limit` due routes with FOR UPDATE SKIP
|
||||
// LOCKED, joins the parent notification to surface kind/payload, and
|
||||
// returns them. exclude is the list of route_ids already handled in
|
||||
// the current tick — they are filtered out so the same row cannot
|
||||
// chew through MaxAttempts inside a single tick when its retry
|
||||
// schedule lands at <= now().
|
||||
func (s *Store) ClaimDueRoutes(ctx context.Context, tx *sql.Tx, limit int, exclude ...uuid.UUID) ([]ClaimedRoute, error) {
|
||||
r := table.NotificationRoutes
|
||||
n := table.Notifications
|
||||
|
||||
condition := r.Status.IN(postgres.String(RouteStatusPending), postgres.String(RouteStatusRetrying)).
|
||||
AND(r.NextAttemptAt.IS_NULL().OR(r.NextAttemptAt.LT_EQ(postgres.NOW())))
|
||||
if len(exclude) > 0 {
|
||||
excludeExprs := make([]postgres.Expression, 0, len(exclude))
|
||||
for _, id := range exclude {
|
||||
excludeExprs = append(excludeExprs, postgres.UUID(id))
|
||||
}
|
||||
condition = condition.AND(r.RouteID.NOT_IN(excludeExprs...))
|
||||
}
|
||||
|
||||
stmt := postgres.SELECT(
|
||||
r.AllColumns,
|
||||
n.Kind, n.IdempotencyKey, n.UserID, n.Payload, n.CreatedAt,
|
||||
).
|
||||
FROM(r.INNER_JOIN(n, n.NotificationID.EQ(r.NotificationID))).
|
||||
WHERE(condition).
|
||||
ORDER_BY(postgres.COALESCE(r.NextAttemptAt, r.CreatedAt).ASC()).
|
||||
LIMIT(int64(limit)).
|
||||
FOR(postgres.UPDATE().OF(r).SKIP_LOCKED())
|
||||
|
||||
var rows []struct {
|
||||
model.NotificationRoutes
|
||||
Notifications struct {
|
||||
Kind string
|
||||
IdempotencyKey string
|
||||
UserID *uuid.UUID
|
||||
Payload *string
|
||||
CreatedAt time.Time
|
||||
}
|
||||
}
|
||||
if err := stmt.QueryContext(ctx, tx, &rows); err != nil {
|
||||
return nil, fmt.Errorf("claim due routes: %w", err)
|
||||
}
|
||||
|
||||
out := make([]ClaimedRoute, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
route := modelToRoute(row.NotificationRoutes)
|
||||
route.UserID = row.Notifications.UserID
|
||||
notif := Notification{
|
||||
NotificationID: row.NotificationRoutes.NotificationID,
|
||||
Kind: row.Notifications.Kind,
|
||||
IdempotencyKey: row.Notifications.IdempotencyKey,
|
||||
UserID: row.Notifications.UserID,
|
||||
CreatedAt: row.Notifications.CreatedAt,
|
||||
}
|
||||
decoded, err := decodePayload(payloadBytesFromPtr(row.Notifications.Payload))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decode notification payload: %w", err)
|
||||
}
|
||||
notif.Payload = decoded
|
||||
out = append(out, ClaimedRoute{Route: route, Notification: notif})
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// ListNotificationsResult bundles a page of notifications and the
|
||||
// total-row count. Layout mirrors `mail.AdminListDeliveriesPage`.
|
||||
type ListNotificationsResult struct {
|
||||
Items []Notification
|
||||
Total int64
|
||||
}
|
||||
|
||||
// ListNotifications returns the page newest-first.
|
||||
func (s *Store) ListNotifications(ctx context.Context, offset, limit int) (ListNotificationsResult, error) {
|
||||
total, err := countAll(ctx, s.db, table.Notifications)
|
||||
if err != nil {
|
||||
return ListNotificationsResult{}, fmt.Errorf("count notifications: %w", err)
|
||||
}
|
||||
n := table.Notifications
|
||||
stmt := postgres.SELECT(
|
||||
n.NotificationID, n.Kind, n.IdempotencyKey, n.UserID,
|
||||
n.Payload, n.CreatedAt,
|
||||
).
|
||||
FROM(n).
|
||||
ORDER_BY(n.CreatedAt.DESC(), n.NotificationID.DESC()).
|
||||
LIMIT(int64(limit)).OFFSET(int64(offset))
|
||||
|
||||
var rows []model.Notifications
|
||||
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
|
||||
return ListNotificationsResult{}, fmt.Errorf("list notifications: %w", err)
|
||||
}
|
||||
items := make([]Notification, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
notif, err := modelToNotification(row)
|
||||
if err != nil {
|
||||
return ListNotificationsResult{}, err
|
||||
}
|
||||
items = append(items, notif)
|
||||
}
|
||||
return ListNotificationsResult{Items: items, Total: total}, nil
|
||||
}
|
||||
|
||||
// GetNotification loads a notification by primary key. The sentinel
|
||||
// ErrNotificationNotFound is returned when no row matches.
|
||||
func (s *Store) GetNotification(ctx context.Context, id uuid.UUID) (Notification, error) {
|
||||
n := table.Notifications
|
||||
stmt := postgres.SELECT(
|
||||
n.NotificationID, n.Kind, n.IdempotencyKey, n.UserID,
|
||||
n.Payload, n.CreatedAt,
|
||||
).
|
||||
FROM(n).
|
||||
WHERE(n.NotificationID.EQ(postgres.UUID(id))).
|
||||
LIMIT(1)
|
||||
|
||||
var row model.Notifications
|
||||
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
|
||||
if errors.Is(err, qrm.ErrNoRows) {
|
||||
return Notification{}, ErrNotificationNotFound
|
||||
}
|
||||
return Notification{}, fmt.Errorf("get notification: %w", err)
|
||||
}
|
||||
return modelToNotification(row)
|
||||
}
|
||||
|
||||
// ListDeadLettersResult bundles a page of dead-letters and the total
|
||||
// row count.
|
||||
type ListDeadLettersResult struct {
|
||||
Items []DeadLetter
|
||||
Total int64
|
||||
}
|
||||
|
||||
// ListDeadLetters returns the dead-letter page newest-first.
|
||||
func (s *Store) ListDeadLetters(ctx context.Context, offset, limit int) (ListDeadLettersResult, error) {
|
||||
total, err := countAll(ctx, s.db, table.NotificationDeadLetters)
|
||||
if err != nil {
|
||||
return ListDeadLettersResult{}, fmt.Errorf("count dead-letters: %w", err)
|
||||
}
|
||||
dl := table.NotificationDeadLetters
|
||||
stmt := postgres.SELECT(
|
||||
dl.DeadLetterID, dl.NotificationID, dl.RouteID, dl.ArchivedAt, dl.Reason,
|
||||
).
|
||||
FROM(dl).
|
||||
ORDER_BY(dl.ArchivedAt.DESC(), dl.DeadLetterID.DESC()).
|
||||
LIMIT(int64(limit)).OFFSET(int64(offset))
|
||||
|
||||
var rows []model.NotificationDeadLetters
|
||||
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
|
||||
return ListDeadLettersResult{}, fmt.Errorf("list dead-letters: %w", err)
|
||||
}
|
||||
items := make([]DeadLetter, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
items = append(items, DeadLetter{
|
||||
DeadLetterID: row.DeadLetterID,
|
||||
NotificationID: row.NotificationID,
|
||||
RouteID: row.RouteID,
|
||||
ArchivedAt: row.ArchivedAt,
|
||||
Reason: row.Reason,
|
||||
})
|
||||
}
|
||||
return ListDeadLettersResult{Items: items, Total: total}, nil
|
||||
}
|
||||
|
||||
// ListMalformedResult bundles a page of malformed intents and the
|
||||
// total row count.
|
||||
type ListMalformedResult struct {
|
||||
Items []MalformedIntent
|
||||
Total int64
|
||||
}
|
||||
|
||||
// ListMalformed returns the malformed page newest-first.
|
||||
func (s *Store) ListMalformed(ctx context.Context, offset, limit int) (ListMalformedResult, error) {
|
||||
total, err := countAll(ctx, s.db, table.NotificationMalformedIntents)
|
||||
if err != nil {
|
||||
return ListMalformedResult{}, fmt.Errorf("count malformed intents: %w", err)
|
||||
}
|
||||
m := table.NotificationMalformedIntents
|
||||
stmt := postgres.SELECT(m.ID, m.ReceivedAt, m.Payload, m.Reason).
|
||||
FROM(m).
|
||||
ORDER_BY(m.ReceivedAt.DESC(), m.ID.DESC()).
|
||||
LIMIT(int64(limit)).OFFSET(int64(offset))
|
||||
|
||||
var rows []model.NotificationMalformedIntents
|
||||
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
|
||||
return ListMalformedResult{}, fmt.Errorf("list malformed intents: %w", err)
|
||||
}
|
||||
items := make([]MalformedIntent, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
decoded, err := decodePayload([]byte(row.Payload))
|
||||
if err != nil {
|
||||
return ListMalformedResult{}, fmt.Errorf("decode malformed payload: %w", err)
|
||||
}
|
||||
items = append(items, MalformedIntent{
|
||||
ID: row.ID,
|
||||
ReceivedAt: row.ReceivedAt,
|
||||
Payload: decoded,
|
||||
Reason: row.Reason,
|
||||
})
|
||||
}
|
||||
return ListMalformedResult{Items: items, Total: total}, nil
|
||||
}
|
||||
|
||||
// InsertMalformed records a producer-supplied intent that failed
|
||||
// validation. The payload is best-effort JSON-encoded by the caller;
|
||||
// the row never blocks the producer.
|
||||
func (s *Store) InsertMalformed(ctx context.Context, payload map[string]any, reason string) error {
|
||||
encoded, err := encodePayload(payload)
|
||||
if err != nil {
|
||||
return fmt.Errorf("encode malformed payload: %w", err)
|
||||
}
|
||||
m := table.NotificationMalformedIntents
|
||||
stmt := m.INSERT(m.ID, m.Payload, m.Reason).
|
||||
VALUES(uuid.New(), string(encoded), reason)
|
||||
if _, err := stmt.ExecContext(ctx, s.db); err != nil {
|
||||
return fmt.Errorf("insert malformed intent: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SkipPendingRoutesForUser flips every pending or retrying route owned
|
||||
// by userID to status='skipped'. The `OnUserDeleted` cascade calls it so
|
||||
// the worker stops trying to deliver notifications to a vanished
|
||||
// account; published rows are kept as audit trail.
|
||||
func (s *Store) SkipPendingRoutesForUser(ctx context.Context, userID uuid.UUID, at time.Time) (int64, error) {
|
||||
r := table.NotificationRoutes
|
||||
n := table.Notifications
|
||||
|
||||
notifSubquery := postgres.SELECT(n.NotificationID).
|
||||
FROM(n).
|
||||
WHERE(n.UserID.EQ(postgres.UUID(userID)))
|
||||
|
||||
stmt := r.UPDATE().
|
||||
SET(
|
||||
r.Status.SET(postgres.String(RouteStatusSkipped)),
|
||||
r.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
|
||||
r.SkippedAt.SET(postgres.TimestampzT(at)),
|
||||
r.UpdatedAt.SET(postgres.TimestampzT(at)),
|
||||
r.LastError.SET(postgres.String("recipient soft-deleted")),
|
||||
).
|
||||
WHERE(
|
||||
r.Status.IN(postgres.String(RouteStatusPending), postgres.String(RouteStatusRetrying)).
|
||||
AND(r.NotificationID.IN(notifSubquery)),
|
||||
)
|
||||
res, err := stmt.ExecContext(ctx, s.db)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("skip pending routes: %w", err)
|
||||
}
|
||||
affected, err := res.RowsAffected()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("rows affected: %w", err)
|
||||
}
|
||||
return affected, nil
|
||||
}
|
||||
|
||||
// withTx wraps fn in a Postgres transaction. fn's return value
|
||||
// determines commit (nil) vs rollback (non-nil). Rollback errors are
|
||||
// swallowed when fn already returned an error, since the latter is
|
||||
// more actionable.
|
||||
func withTx(ctx context.Context, db *sql.DB, fn func(tx *sql.Tx) error) error {
|
||||
tx, err := db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("notification store: begin tx: %w", err)
|
||||
}
|
||||
if err := fn(tx); err != nil {
|
||||
_ = tx.Rollback()
|
||||
return err
|
||||
}
|
||||
if err := tx.Commit(); err != nil {
|
||||
return fmt.Errorf("notification store: commit tx: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// modelToRoute projects a generated model row onto the public Route
|
||||
// struct (without the user-id which lives on the parent notification).
|
||||
func modelToRoute(row model.NotificationRoutes) Route {
|
||||
r := Route{
|
||||
RouteID: row.RouteID,
|
||||
NotificationID: row.NotificationID,
|
||||
Channel: row.Channel,
|
||||
Status: row.Status,
|
||||
Attempts: row.Attempts,
|
||||
MaxAttempts: row.MaxAttempts,
|
||||
LastError: row.LastError,
|
||||
ResolvedEmail: row.ResolvedEmail,
|
||||
ResolvedLocale: row.ResolvedLocale,
|
||||
CreatedAt: row.CreatedAt,
|
||||
UpdatedAt: row.UpdatedAt,
|
||||
}
|
||||
if row.NextAttemptAt != nil {
|
||||
t := *row.NextAttemptAt
|
||||
r.NextAttemptAt = &t
|
||||
}
|
||||
if row.LastAttemptAt != nil {
|
||||
t := *row.LastAttemptAt
|
||||
r.LastAttemptAt = &t
|
||||
}
|
||||
if row.PublishedAt != nil {
|
||||
t := *row.PublishedAt
|
||||
r.PublishedAt = &t
|
||||
}
|
||||
if row.DeadLetteredAt != nil {
|
||||
t := *row.DeadLetteredAt
|
||||
r.DeadLetteredAt = &t
|
||||
}
|
||||
if row.SkippedAt != nil {
|
||||
t := *row.SkippedAt
|
||||
r.SkippedAt = &t
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// modelToNotification decodes a generated model row into the public
|
||||
// Notification struct, including the JSON payload.
|
||||
func modelToNotification(row model.Notifications) (Notification, error) {
|
||||
decoded, err := decodePayload(payloadBytesFromPtr(row.Payload))
|
||||
if err != nil {
|
||||
return Notification{}, fmt.Errorf("decode payload: %w", err)
|
||||
}
|
||||
return Notification{
|
||||
NotificationID: row.NotificationID,
|
||||
Kind: row.Kind,
|
||||
IdempotencyKey: row.IdempotencyKey,
|
||||
UserID: row.UserID,
|
||||
Payload: decoded,
|
||||
CreatedAt: row.CreatedAt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// payloadBytesFromPtr converts the nullable string from the generated
|
||||
// jsonb-as-text model into the byte slice expected by decodePayload.
|
||||
func payloadBytesFromPtr(p *string) []byte {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
return []byte(*p)
|
||||
}
|
||||
|
||||
// encodePayload renders a map[string]any to JSON for storage in
|
||||
// jsonb columns. A nil map encodes as JSON null; this is harmless on
|
||||
// the read path because decodePayload returns nil for it.
|
||||
func encodePayload(payload map[string]any) ([]byte, error) {
|
||||
if payload == nil {
|
||||
return []byte("null"), nil
|
||||
}
|
||||
return json.Marshal(payload)
|
||||
}
|
||||
|
||||
// decodePayload parses a jsonb column back into the producer's map.
|
||||
// A NULL or empty buffer round-trips to nil.
|
||||
func decodePayload(buf []byte) (map[string]any, error) {
|
||||
if len(buf) == 0 || strings.EqualFold(strings.TrimSpace(string(buf)), "null") {
|
||||
return nil, nil
|
||||
}
|
||||
out := map[string]any{}
|
||||
if err := json.Unmarshal(buf, &out); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// countAll runs `SELECT COUNT(*) FROM <tbl>` through jet and returns
|
||||
// the result. The destination uses an alias-tagged scalar so QRM can
|
||||
// map the un-prefixed alias produced by AS("count").
|
||||
func countAll(ctx context.Context, db qrm.DB, tbl postgres.ReadableTable) (int64, error) {
|
||||
stmt := postgres.SELECT(postgres.COUNT(postgres.STAR).AS("count")).FROM(tbl)
|
||||
var dest struct {
|
||||
Count int64 `alias:"count"`
|
||||
}
|
||||
if err := stmt.QueryContext(ctx, db, &dest); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return dest.Count, nil
|
||||
}
|
||||
@@ -0,0 +1,258 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"galaxy/backend/internal/user"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// Submit accepts a producer intent, validates it against the catalog,
|
||||
// resolves recipients, materialises route rows, persists everything in
|
||||
// one transaction, and best-effort dispatches the routes synchronously.
|
||||
//
|
||||
// The contract: producers never block on Submit, and Submit never
|
||||
// surfaces a validation failure as an error — malformed intents go to
|
||||
// `notification_malformed_intents` and the call returns nil. Real
|
||||
// errors (encoder failure, Postgres trouble) are wrapped and returned.
|
||||
//
|
||||
// On idempotent re-submit (same kind + idempotency_key) the existing
|
||||
// notification id is honoured and route materialisation is skipped.
|
||||
func (s *Service) Submit(ctx context.Context, intent Intent) (uuid.UUID, error) {
|
||||
entry, ok := LookupCatalog(intent.Kind)
|
||||
if !ok {
|
||||
s.recordMalformed(ctx, intent, ErrUnknownKind.Error())
|
||||
return uuid.Nil, nil
|
||||
}
|
||||
if trimSpace(intent.IdempotencyKey) == "" {
|
||||
s.recordMalformed(ctx, intent, ErrEmptyIdempotencyKey.Error())
|
||||
return uuid.Nil, nil
|
||||
}
|
||||
if !entry.Admin && len(intent.Recipients) == 0 {
|
||||
s.recordMalformed(ctx, intent, ErrNoRecipients.Error())
|
||||
return uuid.Nil, nil
|
||||
}
|
||||
|
||||
now := s.nowUTC()
|
||||
notificationID := uuid.New()
|
||||
var primaryUserID *uuid.UUID
|
||||
if !entry.Admin && len(intent.Recipients) == 1 {
|
||||
uid := intent.Recipients[0]
|
||||
primaryUserID = &uid
|
||||
}
|
||||
|
||||
routes, err := s.materialiseRoutes(ctx, notificationID, entry, intent, now)
|
||||
if err != nil {
|
||||
return uuid.Nil, err
|
||||
}
|
||||
|
||||
storedID, inserted, err := s.deps.Store.InsertNotification(ctx, InsertNotificationArgs{
|
||||
NotificationID: notificationID,
|
||||
Kind: intent.Kind,
|
||||
IdempotencyKey: intent.IdempotencyKey,
|
||||
UserID: primaryUserID,
|
||||
Payload: intent.Payload,
|
||||
Routes: routes,
|
||||
})
|
||||
if err != nil {
|
||||
return uuid.Nil, fmt.Errorf("notification submit: %w", err)
|
||||
}
|
||||
if !inserted {
|
||||
s.deps.Logger.Debug("idempotent submit, returning existing notification",
|
||||
zap.String("kind", intent.Kind),
|
||||
zap.String("idempotency_key", intent.IdempotencyKey),
|
||||
zap.String("notification_id", storedID.String()),
|
||||
)
|
||||
return storedID, nil
|
||||
}
|
||||
|
||||
// Best-effort synchronous dispatch: any pending route gets a single
|
||||
// attempt right now. Failures stay on the row for the worker to
|
||||
// retry; they are not surfaced to producers.
|
||||
for i := range routes {
|
||||
if routes[i].Status != RouteStatusPending {
|
||||
continue
|
||||
}
|
||||
s.bestEffortDispatch(ctx, Notification{
|
||||
NotificationID: notificationID,
|
||||
Kind: intent.Kind,
|
||||
IdempotencyKey: intent.IdempotencyKey,
|
||||
UserID: primaryUserID,
|
||||
Payload: intent.Payload,
|
||||
CreatedAt: now,
|
||||
}, routeFromSeed(notificationID, routes[i], now))
|
||||
}
|
||||
|
||||
return notificationID, nil
|
||||
}
|
||||
|
||||
// materialiseRoutes builds the per-(recipient, channel) seeds that
|
||||
// land in `notification_routes`. The function performs recipient
|
||||
// resolution and the catalog-aware channel fan-out. Each seed already
|
||||
// carries its terminal status (`pending` for live routes, `skipped`
|
||||
// for cases where the destination cannot be resolved).
|
||||
func (s *Service) materialiseRoutes(ctx context.Context, notificationID uuid.UUID, entry CatalogEntry, intent Intent, now time.Time) ([]RouteSeed, error) {
|
||||
_ = notificationID
|
||||
maxAttempts := int32(s.deps.Config.MaxAttempts)
|
||||
if maxAttempts <= 0 {
|
||||
maxAttempts = 1
|
||||
}
|
||||
pendingNext := timePtr(now.UTC())
|
||||
|
||||
if entry.Admin {
|
||||
// Admin-channel kinds: one row per channel, no per-user fan-out.
|
||||
seeds := make([]RouteSeed, 0, len(entry.Channels))
|
||||
for _, ch := range entry.Channels {
|
||||
seed := RouteSeed{
|
||||
RouteID: uuid.New(),
|
||||
Channel: ch,
|
||||
Status: RouteStatusPending,
|
||||
MaxAttempts: maxAttempts,
|
||||
NextAttemptAt: pendingNext,
|
||||
}
|
||||
if ch == ChannelEmail {
|
||||
seed.ResolvedEmail = s.adminEmail()
|
||||
if seed.ResolvedEmail == "" {
|
||||
seed.Status = RouteStatusSkipped
|
||||
seed.NextAttemptAt = nil
|
||||
seed.SkippedAt = timePtr(now.UTC())
|
||||
seed.LastError = "BACKEND_NOTIFICATION_ADMIN_EMAIL not configured"
|
||||
s.deps.Logger.Warn("admin notification skipped: admin email not configured",
|
||||
zap.String("kind", intent.Kind),
|
||||
zap.String("idempotency_key", intent.IdempotencyKey),
|
||||
)
|
||||
}
|
||||
}
|
||||
seeds = append(seeds, seed)
|
||||
}
|
||||
return seeds, nil
|
||||
}
|
||||
|
||||
// Per-user kinds: fan out across (recipient × channel).
|
||||
seeds := make([]RouteSeed, 0, len(intent.Recipients)*len(entry.Channels))
|
||||
for _, userID := range intent.Recipients {
|
||||
uid := userID
|
||||
account, err := s.resolveAccount(ctx, userID)
|
||||
for _, ch := range entry.Channels {
|
||||
seed := RouteSeed{
|
||||
RouteID: uuid.New(),
|
||||
Channel: ch,
|
||||
Status: RouteStatusPending,
|
||||
MaxAttempts: maxAttempts,
|
||||
NextAttemptAt: pendingNext,
|
||||
UserID: &uid,
|
||||
DeviceSessionID: intent.DeviceSessionID,
|
||||
}
|
||||
switch ch {
|
||||
case ChannelEmail:
|
||||
if err != nil {
|
||||
seed.Status = RouteStatusSkipped
|
||||
seed.NextAttemptAt = nil
|
||||
seed.SkippedAt = timePtr(now.UTC())
|
||||
seed.LastError = err.Error()
|
||||
} else {
|
||||
seed.ResolvedEmail = account.Email
|
||||
seed.ResolvedLocale = account.PreferredLanguage
|
||||
if trimSpace(seed.ResolvedEmail) == "" {
|
||||
seed.Status = RouteStatusSkipped
|
||||
seed.NextAttemptAt = nil
|
||||
seed.SkippedAt = timePtr(now.UTC())
|
||||
seed.LastError = "recipient has no email on file"
|
||||
}
|
||||
}
|
||||
case ChannelPush:
|
||||
if err != nil {
|
||||
seed.Status = RouteStatusSkipped
|
||||
seed.NextAttemptAt = nil
|
||||
seed.SkippedAt = timePtr(now.UTC())
|
||||
seed.LastError = err.Error()
|
||||
} else if account.PreferredLanguage != "" {
|
||||
seed.ResolvedLocale = account.PreferredLanguage
|
||||
}
|
||||
}
|
||||
seeds = append(seeds, seed)
|
||||
}
|
||||
}
|
||||
return seeds, nil
|
||||
}
|
||||
|
||||
// resolveAccount fetches the recipient profile through the configured
|
||||
// AccountResolver. user.ErrAccountNotFound is mapped to a sentinel-free
|
||||
// error string so the route is skipped without a stack-trace log.
|
||||
func (s *Service) resolveAccount(ctx context.Context, userID uuid.UUID) (user.Account, error) {
|
||||
account, err := s.deps.Accounts.GetAccount(ctx, userID)
|
||||
if err != nil {
|
||||
if errors.Is(err, user.ErrAccountNotFound) {
|
||||
return user.Account{}, errors.New("recipient account not found")
|
||||
}
|
||||
return user.Account{}, fmt.Errorf("resolve recipient %s: %w", userID, err)
|
||||
}
|
||||
if account.DeletedAt != nil {
|
||||
return user.Account{}, errors.New("recipient account soft-deleted")
|
||||
}
|
||||
return account, nil
|
||||
}
|
||||
|
||||
// recordMalformed best-effort persists an invalid intent. Logger is
|
||||
// informational; a Postgres failure here is logged but never bubbles
|
||||
// up to the producer, matching the README §10 contract.
|
||||
func (s *Service) recordMalformed(ctx context.Context, intent Intent, reason string) {
|
||||
payload := map[string]any{
|
||||
"kind": intent.Kind,
|
||||
"idempotency_key": intent.IdempotencyKey,
|
||||
}
|
||||
if len(intent.Payload) > 0 {
|
||||
payload["payload"] = intent.Payload
|
||||
}
|
||||
if len(intent.Recipients) > 0 {
|
||||
recipients := make([]string, 0, len(intent.Recipients))
|
||||
for _, r := range intent.Recipients {
|
||||
recipients = append(recipients, r.String())
|
||||
}
|
||||
payload["recipients"] = recipients
|
||||
}
|
||||
if intent.DeviceSessionID != nil {
|
||||
payload["device_session_id"] = intent.DeviceSessionID.String()
|
||||
}
|
||||
if err := s.deps.Store.InsertMalformed(ctx, payload, reason); err != nil {
|
||||
s.deps.Logger.Warn("failed to persist malformed notification intent",
|
||||
zap.String("kind", intent.Kind),
|
||||
zap.String("reason", reason),
|
||||
zap.Error(err),
|
||||
)
|
||||
return
|
||||
}
|
||||
s.deps.Logger.Info("notification intent dropped as malformed",
|
||||
zap.String("kind", intent.Kind),
|
||||
zap.String("reason", reason),
|
||||
)
|
||||
}
|
||||
|
||||
// routeFromSeed converts a RouteSeed (the pre-insert snapshot the
|
||||
// dispatcher needs) to a Route value the worker / dispatcher exchange
|
||||
// after the row is durably persisted.
|
||||
func routeFromSeed(notificationID uuid.UUID, seed RouteSeed, now time.Time) Route {
|
||||
r := Route{
|
||||
RouteID: seed.RouteID,
|
||||
NotificationID: notificationID,
|
||||
Channel: seed.Channel,
|
||||
Status: seed.Status,
|
||||
Attempts: 0,
|
||||
MaxAttempts: seed.MaxAttempts,
|
||||
NextAttemptAt: seed.NextAttemptAt,
|
||||
ResolvedEmail: seed.ResolvedEmail,
|
||||
ResolvedLocale: seed.ResolvedLocale,
|
||||
UserID: seed.UserID,
|
||||
DeviceSessionID: seed.DeviceSessionID,
|
||||
CreatedAt: now.UTC(),
|
||||
UpdatedAt: now.UTC(),
|
||||
SkippedAt: seed.SkippedAt,
|
||||
LastError: seed.LastError,
|
||||
}
|
||||
return r
|
||||
}
|
||||
@@ -0,0 +1,458 @@
|
||||
package notification_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"net/url"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/backend/internal/config"
|
||||
"galaxy/backend/internal/notification"
|
||||
backendpg "galaxy/backend/internal/postgres"
|
||||
"galaxy/backend/internal/user"
|
||||
pgshared "galaxy/postgres"
|
||||
|
||||
"github.com/google/uuid"
|
||||
testcontainers "github.com/testcontainers/testcontainers-go"
|
||||
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
|
||||
"github.com/testcontainers/testcontainers-go/wait"
|
||||
"go.uber.org/zap/zaptest"
|
||||
)
|
||||
|
||||
const (
|
||||
pgImage = "postgres:16-alpine"
|
||||
pgUser = "galaxy"
|
||||
pgPassword = "galaxy"
|
||||
pgDatabase = "galaxy_backend"
|
||||
pgSchema = "backend"
|
||||
pgStartup = 90 * time.Second
|
||||
pgOpTO = 10 * time.Second
|
||||
)
|
||||
|
||||
// startPostgres mirrors the mail/auth scaffolding: spin up Postgres,
|
||||
// apply migrations, return *sql.DB.
|
||||
func startPostgres(t *testing.T) *sql.DB {
|
||||
t.Helper()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
||||
t.Cleanup(cancel)
|
||||
|
||||
pgContainer, err := tcpostgres.Run(ctx, pgImage,
|
||||
tcpostgres.WithDatabase(pgDatabase),
|
||||
tcpostgres.WithUsername(pgUser),
|
||||
tcpostgres.WithPassword(pgPassword),
|
||||
testcontainers.WithWaitStrategy(
|
||||
wait.ForLog("database system is ready to accept connections").
|
||||
WithOccurrence(2).
|
||||
WithStartupTimeout(pgStartup),
|
||||
),
|
||||
)
|
||||
if err != nil {
|
||||
t.Skipf("postgres testcontainer unavailable, skipping: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if termErr := testcontainers.TerminateContainer(pgContainer); termErr != nil {
|
||||
t.Errorf("terminate postgres container: %v", termErr)
|
||||
}
|
||||
})
|
||||
|
||||
baseDSN, err := pgContainer.ConnectionString(ctx, "sslmode=disable")
|
||||
if err != nil {
|
||||
t.Fatalf("connection string: %v", err)
|
||||
}
|
||||
scoped, err := dsnWithSearchPath(baseDSN, pgSchema)
|
||||
if err != nil {
|
||||
t.Fatalf("scope dsn: %v", err)
|
||||
}
|
||||
cfg := pgshared.DefaultConfig()
|
||||
cfg.PrimaryDSN = scoped
|
||||
cfg.OperationTimeout = pgOpTO
|
||||
db, err := pgshared.OpenPrimary(ctx, cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("open primary: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = db.Close() })
|
||||
if err := pgshared.Ping(ctx, db, cfg.OperationTimeout); err != nil {
|
||||
t.Fatalf("ping: %v", err)
|
||||
}
|
||||
if err := backendpg.ApplyMigrations(ctx, db); err != nil {
|
||||
t.Fatalf("apply migrations: %v", err)
|
||||
}
|
||||
return db
|
||||
}
|
||||
|
||||
func dsnWithSearchPath(baseDSN, schema string) (string, error) {
|
||||
parsed, err := url.Parse(baseDSN)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
values := parsed.Query()
|
||||
values.Set("search_path", schema)
|
||||
if values.Get("sslmode") == "" {
|
||||
values.Set("sslmode", "disable")
|
||||
}
|
||||
parsed.RawQuery = values.Encode()
|
||||
return parsed.String(), nil
|
||||
}
|
||||
|
||||
// recordingMailer captures every EnqueueTemplate call.
|
||||
type recordingMailer struct {
|
||||
mu sync.Mutex
|
||||
calls []recordedEnqueue
|
||||
err error
|
||||
}
|
||||
|
||||
type recordedEnqueue struct {
|
||||
TemplateID string
|
||||
Recipient string
|
||||
Payload map[string]any
|
||||
IdempotencyKey string
|
||||
}
|
||||
|
||||
func (r *recordingMailer) EnqueueTemplate(_ context.Context, templateID, recipient string, payload map[string]any, idempotencyKey string) error {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
if r.err != nil {
|
||||
return r.err
|
||||
}
|
||||
r.calls = append(r.calls, recordedEnqueue{
|
||||
TemplateID: templateID,
|
||||
Recipient: recipient,
|
||||
Payload: payload,
|
||||
IdempotencyKey: idempotencyKey,
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *recordingMailer) Calls() []recordedEnqueue {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
out := make([]recordedEnqueue, len(r.calls))
|
||||
copy(out, r.calls)
|
||||
return out
|
||||
}
|
||||
|
||||
// recordingPush captures every PublishClientEvent call.
|
||||
type recordingPush struct {
|
||||
mu sync.Mutex
|
||||
calls []recordedPushEvent
|
||||
}
|
||||
|
||||
type recordedPushEvent struct {
|
||||
UserID uuid.UUID
|
||||
Kind string
|
||||
Payload map[string]any
|
||||
EventID string
|
||||
RequestID string
|
||||
TraceID string
|
||||
}
|
||||
|
||||
func (r *recordingPush) PublishClientEvent(_ context.Context, userID uuid.UUID, _ *uuid.UUID, kind string, payload map[string]any, eventID, requestID, traceID string) error {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
r.calls = append(r.calls, recordedPushEvent{
|
||||
UserID: userID,
|
||||
Kind: kind,
|
||||
Payload: payload,
|
||||
EventID: eventID,
|
||||
RequestID: requestID,
|
||||
TraceID: traceID,
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *recordingPush) Calls() []recordedPushEvent {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
out := make([]recordedPushEvent, len(r.calls))
|
||||
copy(out, r.calls)
|
||||
return out
|
||||
}
|
||||
|
||||
// stubAccounts hands back a fixed account record for any user_id, so
|
||||
// tests don't need to seed the accounts table.
|
||||
type stubAccounts struct {
|
||||
account user.Account
|
||||
err error
|
||||
}
|
||||
|
||||
func (s *stubAccounts) GetAccount(_ context.Context, userID uuid.UUID) (user.Account, error) {
|
||||
if s.err != nil {
|
||||
return user.Account{}, s.err
|
||||
}
|
||||
out := s.account
|
||||
out.UserID = userID
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func newService(t *testing.T, db *sql.DB, mailer notification.Mailer, push notification.PushPublisher, accounts notification.AccountResolver, adminEmail string) *notification.Service {
|
||||
t.Helper()
|
||||
cfg := config.NotificationConfig{
|
||||
AdminEmail: adminEmail,
|
||||
WorkerInterval: 10 * time.Millisecond,
|
||||
MaxAttempts: 3,
|
||||
}
|
||||
return notification.NewService(notification.Deps{
|
||||
Store: notification.NewStore(db),
|
||||
Mail: mailer,
|
||||
Push: push,
|
||||
Accounts: accounts,
|
||||
Config: cfg,
|
||||
Logger: zaptest.NewLogger(t),
|
||||
})
|
||||
}
|
||||
|
||||
func TestSubmitFansOutLobbyInviteToPushAndEmail(t *testing.T) {
|
||||
t.Parallel()
|
||||
db := startPostgres(t)
|
||||
mailer := &recordingMailer{}
|
||||
push := &recordingPush{}
|
||||
accounts := &stubAccounts{account: user.Account{
|
||||
Email: "alice@example.test",
|
||||
PreferredLanguage: "en",
|
||||
}}
|
||||
svc := newService(t, db, mailer, push, accounts, "")
|
||||
|
||||
recipient := uuid.New()
|
||||
id, err := svc.Submit(context.Background(), notification.Intent{
|
||||
Kind: notification.KindLobbyInviteReceived,
|
||||
IdempotencyKey: "invite:" + uuid.NewString(),
|
||||
Recipients: []uuid.UUID{recipient},
|
||||
Payload: map[string]any{
|
||||
"game_id": uuid.NewString(),
|
||||
"inviter_user_id": uuid.NewString(),
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("submit: %v", err)
|
||||
}
|
||||
if id == uuid.Nil {
|
||||
t.Fatal("submit returned nil id")
|
||||
}
|
||||
|
||||
// Best-effort dispatch ran synchronously; both channels should
|
||||
// have observed exactly one call.
|
||||
if got := len(push.Calls()); got != 1 {
|
||||
t.Errorf("push calls=%d, want 1", got)
|
||||
}
|
||||
if got := len(mailer.Calls()); got != 1 {
|
||||
t.Errorf("mail calls=%d, want 1", got)
|
||||
} else {
|
||||
call := mailer.Calls()[0]
|
||||
if call.Recipient != "alice@example.test" {
|
||||
t.Errorf("mail recipient=%q", call.Recipient)
|
||||
}
|
||||
if call.TemplateID != notification.KindLobbyInviteReceived {
|
||||
t.Errorf("mail template=%q", call.TemplateID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubmitIsIdempotent(t *testing.T) {
|
||||
t.Parallel()
|
||||
db := startPostgres(t)
|
||||
svc := newService(t, db, &recordingMailer{}, &recordingPush{}, &stubAccounts{account: user.Account{Email: "x@example.test"}}, "")
|
||||
|
||||
intent := notification.Intent{
|
||||
Kind: notification.KindLobbyApplicationSubmitted,
|
||||
IdempotencyKey: "dedupe-key",
|
||||
Recipients: []uuid.UUID{uuid.New()},
|
||||
Payload: map[string]any{"game_id": uuid.NewString(), "application_id": uuid.NewString()},
|
||||
}
|
||||
first, err := svc.Submit(context.Background(), intent)
|
||||
if err != nil {
|
||||
t.Fatalf("first submit: %v", err)
|
||||
}
|
||||
second, err := svc.Submit(context.Background(), intent)
|
||||
if err != nil {
|
||||
t.Fatalf("second submit: %v", err)
|
||||
}
|
||||
if first != second {
|
||||
t.Fatalf("idempotent submit must return same id: %s vs %s", first, second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubmitMalformedPersists(t *testing.T) {
|
||||
t.Parallel()
|
||||
db := startPostgres(t)
|
||||
svc := newService(t, db, &recordingMailer{}, &recordingPush{}, &stubAccounts{}, "")
|
||||
|
||||
id, err := svc.Submit(context.Background(), notification.Intent{
|
||||
Kind: "nonsense.kind",
|
||||
IdempotencyKey: "anything",
|
||||
Recipients: []uuid.UUID{uuid.New()},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("submit: %v", err)
|
||||
}
|
||||
if id != uuid.Nil {
|
||||
t.Fatalf("malformed submit must return nil id, got %s", id)
|
||||
}
|
||||
|
||||
page, err := svc.AdminListMalformed(context.Background(), 1, 10)
|
||||
if err != nil {
|
||||
t.Fatalf("list malformed: %v", err)
|
||||
}
|
||||
if page.Total < 1 {
|
||||
t.Fatalf("malformed total=%d, want >= 1", page.Total)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubmitAdminEmailSkipsWhenNotConfigured(t *testing.T) {
|
||||
t.Parallel()
|
||||
db := startPostgres(t)
|
||||
mailer := &recordingMailer{}
|
||||
svc := newService(t, db, mailer, &recordingPush{}, &stubAccounts{}, "")
|
||||
|
||||
id, err := svc.Submit(context.Background(), notification.Intent{
|
||||
Kind: notification.KindRuntimeImagePullFailed,
|
||||
IdempotencyKey: "ipf-1",
|
||||
Payload: map[string]any{"game_id": uuid.NewString(), "image_ref": "registry/img:tag"},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("submit: %v", err)
|
||||
}
|
||||
if id == uuid.Nil {
|
||||
t.Fatal("admin submit returned nil id")
|
||||
}
|
||||
if got := len(mailer.Calls()); got != 0 {
|
||||
t.Errorf("mail calls=%d, want 0 (admin email unset)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubmitAdminEmailDispatchesWhenConfigured(t *testing.T) {
|
||||
t.Parallel()
|
||||
db := startPostgres(t)
|
||||
mailer := &recordingMailer{}
|
||||
svc := newService(t, db, mailer, &recordingPush{}, &stubAccounts{}, "ops@example.test")
|
||||
|
||||
if _, err := svc.Submit(context.Background(), notification.Intent{
|
||||
Kind: notification.KindRuntimeContainerStartFailed,
|
||||
IdempotencyKey: "csf-1",
|
||||
Payload: map[string]any{"game_id": uuid.NewString()},
|
||||
}); err != nil {
|
||||
t.Fatalf("submit: %v", err)
|
||||
}
|
||||
calls := mailer.Calls()
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("mail calls=%d, want 1", len(calls))
|
||||
}
|
||||
if calls[0].Recipient != "ops@example.test" {
|
||||
t.Errorf("admin recipient=%q", calls[0].Recipient)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubmitMissingAccountSkipsEmail(t *testing.T) {
|
||||
t.Parallel()
|
||||
db := startPostgres(t)
|
||||
mailer := &recordingMailer{}
|
||||
push := &recordingPush{}
|
||||
accounts := &stubAccounts{err: user.ErrAccountNotFound}
|
||||
svc := newService(t, db, mailer, push, accounts, "")
|
||||
|
||||
if _, err := svc.Submit(context.Background(), notification.Intent{
|
||||
Kind: notification.KindLobbyApplicationApproved,
|
||||
IdempotencyKey: "missing-1",
|
||||
Recipients: []uuid.UUID{uuid.New()},
|
||||
Payload: map[string]any{"game_id": uuid.NewString()},
|
||||
}); err != nil {
|
||||
t.Fatalf("submit: %v", err)
|
||||
}
|
||||
if got := len(mailer.Calls()); got != 0 {
|
||||
t.Errorf("mail calls=%d want 0 when account missing", got)
|
||||
}
|
||||
if got := len(push.Calls()); got != 0 {
|
||||
t.Errorf("push calls=%d want 0 when account missing", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWorkerRetryAndDeadLetter(t *testing.T) {
|
||||
t.Parallel()
|
||||
db := startPostgres(t)
|
||||
failingMailer := &recordingMailer{err: errors.New("smtp down")}
|
||||
push := &recordingPush{}
|
||||
accounts := &stubAccounts{account: user.Account{Email: "alice@example.test", PreferredLanguage: "en"}}
|
||||
svc := newService(t, db, failingMailer, push, accounts, "")
|
||||
|
||||
// MaxAttempts=3 from newService config. Submit fires one
|
||||
// best-effort attempt; subsequent Tick calls drive attempts 2 and
|
||||
// 3, the last one dead-letters.
|
||||
if _, err := svc.Submit(context.Background(), notification.Intent{
|
||||
Kind: notification.KindLobbyInviteReceived,
|
||||
IdempotencyKey: "fail-1",
|
||||
Recipients: []uuid.UUID{uuid.New()},
|
||||
Payload: map[string]any{"game_id": uuid.NewString(), "inviter_user_id": uuid.NewString()},
|
||||
}); err != nil {
|
||||
t.Fatalf("submit: %v", err)
|
||||
}
|
||||
|
||||
// Force every retry to be due immediately.
|
||||
if _, err := db.Exec(`UPDATE backend.notification_routes SET next_attempt_at = now() WHERE channel = 'email'`); err != nil {
|
||||
t.Fatalf("force due: %v", err)
|
||||
}
|
||||
worker := notification.NewWorker(svc)
|
||||
for range 5 {
|
||||
if err := worker.Tick(context.Background()); err != nil {
|
||||
t.Fatalf("tick: %v", err)
|
||||
}
|
||||
if _, err := db.Exec(`UPDATE backend.notification_routes SET next_attempt_at = now() WHERE channel = 'email' AND status = 'retrying'`); err != nil {
|
||||
t.Fatalf("force due: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
dead, err := svc.AdminListDeadLetters(context.Background(), 1, 10)
|
||||
if err != nil {
|
||||
t.Fatalf("list dead-letters: %v", err)
|
||||
}
|
||||
if dead.Total < 1 {
|
||||
t.Fatalf("expected dead-letter row, got total=%d (mail attempts=%d)", dead.Total, len(failingMailer.Calls()))
|
||||
}
|
||||
}
|
||||
|
||||
func TestOnUserDeletedSkipsPendingRoutes(t *testing.T) {
|
||||
t.Parallel()
|
||||
db := startPostgres(t)
|
||||
failingMailer := &recordingMailer{err: errors.New("smtp down")}
|
||||
push := &recordingPush{}
|
||||
userID := uuid.New()
|
||||
accounts := &stubAccounts{account: user.Account{Email: "alice@example.test", PreferredLanguage: "en"}}
|
||||
svc := newService(t, db, failingMailer, push, accounts, "")
|
||||
|
||||
// Submit something that owns user_id so the cascade picks it up.
|
||||
if _, err := svc.Submit(context.Background(), notification.Intent{
|
||||
Kind: notification.KindLobbyApplicationApproved,
|
||||
IdempotencyKey: "cascade-1",
|
||||
Recipients: []uuid.UUID{userID},
|
||||
Payload: map[string]any{"game_id": uuid.NewString()},
|
||||
}); err != nil {
|
||||
t.Fatalf("submit: %v", err)
|
||||
}
|
||||
|
||||
if err := svc.OnUserDeleted(context.Background(), userID); err != nil {
|
||||
t.Fatalf("OnUserDeleted: %v", err)
|
||||
}
|
||||
|
||||
var skipped int
|
||||
if err := db.QueryRow(`
|
||||
SELECT COUNT(*)
|
||||
FROM backend.notification_routes r
|
||||
JOIN backend.notifications n ON n.notification_id = r.notification_id
|
||||
WHERE n.user_id = $1 AND r.status = 'skipped'
|
||||
`, userID).Scan(&skipped); err != nil {
|
||||
t.Fatalf("count skipped: %v", err)
|
||||
}
|
||||
if skipped == 0 {
|
||||
t.Fatal("expected at least one skipped route after cascade")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminGetMissing(t *testing.T) {
|
||||
t.Parallel()
|
||||
db := startPostgres(t)
|
||||
svc := newService(t, db, &recordingMailer{}, &recordingPush{}, &stubAccounts{}, "")
|
||||
if _, err := svc.AdminGetNotification(context.Background(), uuid.New()); !errors.Is(err, notification.ErrNotificationNotFound) {
|
||||
t.Fatalf("got %v, want ErrNotificationNotFound", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// Intent is the open shape every producer submits to Submit. The package
|
||||
// keeps the type unexported by package-side names so that producer
|
||||
// packages (lobby, runtime) can construct values from their own
|
||||
// vocabulary and let the wiring layer translate them with a thin
|
||||
// adapter.
|
||||
//
|
||||
// Kind must be a value from the catalog (`backend/README.md` §10).
|
||||
// IdempotencyKey is required and feeds the UNIQUE constraint on
|
||||
// `notifications (kind, idempotency_key)`. Recipients lists user_ids
|
||||
// for kinds that fan out per user; admin-channel kinds carry an empty
|
||||
// slice. Payload is the kind-specific data blob; it is persisted as
|
||||
// JSON on `notifications.payload` and forwarded to email templates.
|
||||
// DeviceSessionID, when non-nil, narrows a push route to a single
|
||||
// device session — the runtime / auth callers may use it to target
|
||||
// specific clients.
|
||||
type Intent struct {
|
||||
Kind string
|
||||
IdempotencyKey string
|
||||
Recipients []uuid.UUID
|
||||
Payload map[string]any
|
||||
DeviceSessionID *uuid.UUID
|
||||
}
|
||||
|
||||
// Notification mirrors a row in `backend.notifications`. The admin API
|
||||
// returns it directly; Worker keeps a copy alongside each route to
|
||||
// avoid a re-fetch per dispatch.
|
||||
type Notification struct {
|
||||
NotificationID uuid.UUID
|
||||
Kind string
|
||||
IdempotencyKey string
|
||||
UserID *uuid.UUID
|
||||
Payload map[string]any
|
||||
CreatedAt time.Time
|
||||
}
|
||||
|
||||
// Route mirrors a row in `backend.notification_routes`. Status keeps
|
||||
// the value documented in `backend/README.md` §10; channel is `push`
|
||||
// or `email`. ResolvedEmail / ResolvedLocale capture the recipient
|
||||
// address pinned at materialisation time so a downstream account edit
|
||||
// cannot retarget an in-flight email.
|
||||
type Route struct {
|
||||
RouteID uuid.UUID
|
||||
NotificationID uuid.UUID
|
||||
Channel string
|
||||
Status string
|
||||
Attempts int32
|
||||
MaxAttempts int32
|
||||
NextAttemptAt *time.Time
|
||||
LastAttemptAt *time.Time
|
||||
LastError string
|
||||
ResolvedEmail string
|
||||
ResolvedLocale string
|
||||
UserID *uuid.UUID
|
||||
DeviceSessionID *uuid.UUID
|
||||
CreatedAt time.Time
|
||||
UpdatedAt time.Time
|
||||
PublishedAt *time.Time
|
||||
DeadLetteredAt *time.Time
|
||||
SkippedAt *time.Time
|
||||
}
|
||||
|
||||
// DeadLetter mirrors a row in `backend.notification_dead_letters`.
|
||||
type DeadLetter struct {
|
||||
DeadLetterID uuid.UUID
|
||||
NotificationID uuid.UUID
|
||||
RouteID uuid.UUID
|
||||
ArchivedAt time.Time
|
||||
Reason string
|
||||
}
|
||||
|
||||
// MalformedIntent mirrors a row in
|
||||
// `backend.notification_malformed_intents`. payload is the producer-
|
||||
// supplied blob (best effort JSON-encoded by Submit); reason records
|
||||
// the validation failure that diverted it.
|
||||
type MalformedIntent struct {
|
||||
ID uuid.UUID
|
||||
ReceivedAt time.Time
|
||||
Payload map[string]any
|
||||
Reason string
|
||||
}
|
||||
|
||||
// trimSpace is a small helper used across the package; pulling it out
|
||||
// avoids duplicate `strings.TrimSpace` import chains in tiny files.
|
||||
func trimSpace(s string) string { return strings.TrimSpace(s) }
|
||||
|
||||
// timePtr returns a pointer to the supplied time. Useful in struct
|
||||
// literals where Postgres-nullable timestamptz fields are pointers.
|
||||
func timePtr(t time.Time) *time.Time { return &t }
|
||||
@@ -0,0 +1,118 @@
|
||||
package notification
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// Worker drains the notification routes table: per tick it walks due
|
||||
// rows under `SELECT … FOR UPDATE SKIP LOCKED`, dispatches each through
|
||||
// the matching channel, and atomically updates the route status.
|
||||
// Implements `internal/app.Component`.
|
||||
type Worker struct {
|
||||
svc *Service
|
||||
}
|
||||
|
||||
// NewWorker constructs a Worker bound to svc.
|
||||
func NewWorker(svc *Service) *Worker { return &Worker{svc: svc} }
|
||||
|
||||
// Run drives the scan loop until ctx is cancelled. The first tick is
|
||||
// the startup-drain pass: rows queued before the process restart get
|
||||
// retried immediately rather than waiting for the first interval.
|
||||
func (w *Worker) Run(ctx context.Context) error {
|
||||
if w == nil {
|
||||
return nil
|
||||
}
|
||||
logger := w.svc.deps.Logger.Named("worker")
|
||||
if err := w.tick(ctx); err != nil && !errors.Is(err, context.Canceled) {
|
||||
logger.Warn("initial notification routes drain failed", zap.Error(err))
|
||||
}
|
||||
interval := w.svc.deps.Config.WorkerInterval
|
||||
if interval <= 0 {
|
||||
interval = 5 * time.Second
|
||||
}
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
case <-ticker.C:
|
||||
if err := w.tick(ctx); err != nil && !errors.Is(err, context.Canceled) {
|
||||
logger.Warn("notification routes tick failed", zap.Error(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown is a no-op: each per-row transaction is self-contained, so
|
||||
// a cancelled ctx above the loop is enough to stop the worker.
|
||||
func (w *Worker) Shutdown(_ context.Context) error { return nil }
|
||||
|
||||
// Tick is exposed for tests so they can drive the worker without
|
||||
// timing dependencies.
|
||||
func (w *Worker) Tick(ctx context.Context) error { return w.tick(ctx) }
|
||||
|
||||
// tick processes up to claimBatchSize rows. Each row is handled in its
|
||||
// own transaction so a slow channel only holds one row lock at a time.
|
||||
// The loop exits as soon as a tick claims zero rows or ctx is
|
||||
// cancelled. Rows already handled in this tick are tracked in `seen`
|
||||
// and excluded from subsequent claims so a transient retry scheduled
|
||||
// with next_attempt_at <= now() does not chew through MaxAttempts in a
|
||||
// single tick (mirrors the mail-worker pattern).
|
||||
func (w *Worker) tick(ctx context.Context) error {
|
||||
seen := make([]uuid.UUID, 0, claimBatchSize)
|
||||
for range claimBatchSize {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
more, processed, err := w.processOne(ctx, seen)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !more {
|
||||
return nil
|
||||
}
|
||||
seen = append(seen, processed)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// processOne claims a single due route, dispatches it, and commits the
|
||||
// state transition. Returns more=false when no row was due.
|
||||
func (w *Worker) processOne(ctx context.Context, exclude []uuid.UUID) (bool, uuid.UUID, error) {
|
||||
tx, err := w.svc.deps.Store.BeginTx(ctx)
|
||||
if err != nil {
|
||||
return false, uuid.Nil, err
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
|
||||
claimed, err := w.svc.deps.Store.ClaimDueRoutes(ctx, tx, 1, exclude...)
|
||||
if err != nil {
|
||||
return false, uuid.Nil, err
|
||||
}
|
||||
if len(claimed) == 0 {
|
||||
return false, uuid.Nil, nil
|
||||
}
|
||||
c := claimed[0]
|
||||
dispatchErr := w.svc.performDispatch(ctx, c)
|
||||
at := w.svc.nowUTC()
|
||||
if err := w.svc.finaliseDispatch(ctx, tx, c, dispatchErr, at); err != nil {
|
||||
return false, uuid.Nil, err
|
||||
}
|
||||
if err := tx.Commit(); err != nil {
|
||||
return false, uuid.Nil, err
|
||||
}
|
||||
return true, c.Route.RouteID, nil
|
||||
}
|
||||
|
||||
// Compile-time check that Worker satisfies the lifecycle interface
|
||||
// shape used elsewhere (Run + Shutdown).
|
||||
var _ interface {
|
||||
Run(context.Context) error
|
||||
Shutdown(context.Context) error
|
||||
} = (*Worker)(nil)
|
||||
Reference in New Issue
Block a user