feat: backend service

This commit is contained in:
Ilia Denisov
2026-05-06 10:14:55 +03:00
committed by GitHub
parent 3e2622757e
commit f446c6a2ac
1486 changed files with 49720 additions and 266401 deletions
+101
View File
@@ -0,0 +1,101 @@
package mail
import (
"context"
"github.com/google/uuid"
)
// AdminListDeliveriesPage bundles the pagination metadata returned to
// the admin API. The same shape is reused by AdminListDeadLettersPage
// — keeping it explicit clarifies the wire contract for handlers.
type AdminListDeliveriesPage struct {
Items []Delivery
Page int
PageSize int
Total int64
}
// AdminListDeadLettersPage mirrors AdminListDeliveriesPage for the
// dead-letter listing.
type AdminListDeadLettersPage struct {
Items []DeadLetter
Page int
PageSize int
Total int64
}
// AdminListDeliveries returns the requested delivery page. page is
// 1-indexed; pageSize is bounded by the caller (handler defaults).
func (s *Service) AdminListDeliveries(ctx context.Context, page, pageSize int) (AdminListDeliveriesPage, error) {
page, pageSize = normalisePaging(page, pageSize)
offset := (page - 1) * pageSize
items, total, err := s.deps.Store.ListDeliveries(ctx, offset, pageSize)
if err != nil {
return AdminListDeliveriesPage{}, err
}
return AdminListDeliveriesPage{
Items: items,
Page: page,
PageSize: pageSize,
Total: total,
}, nil
}
// AdminGetDelivery returns the delivery row by id, or
// ErrDeliveryNotFound when the row does not exist.
func (s *Service) AdminGetDelivery(ctx context.Context, deliveryID uuid.UUID) (Delivery, error) {
return s.deps.Store.GetDelivery(ctx, deliveryID)
}
// AdminListAttempts returns every attempt for the delivery in
// attempt_no order. ErrDeliveryNotFound is returned when the delivery
// row itself does not exist; an empty list (no rows yet) returns nil
// without error.
func (s *Service) AdminListAttempts(ctx context.Context, deliveryID uuid.UUID) ([]Attempt, error) {
if _, err := s.deps.Store.GetDelivery(ctx, deliveryID); err != nil {
return nil, err
}
return s.deps.Store.ListAttempts(ctx, deliveryID)
}
// AdminResendDelivery re-arms the targeted row for another delivery
// cycle. The contract: ErrDeliveryNotFound when the row is missing,
// ErrResendOnSent when the row is in the terminal `sent` state.
// Otherwise the row is reset to status='pending' with attempts=0 and
// next_attempt_at=now(); the worker picks it up on the next tick.
func (s *Service) AdminResendDelivery(ctx context.Context, deliveryID uuid.UUID) (Delivery, error) {
return s.deps.Store.ResendNonSent(ctx, deliveryID, s.deps.Now())
}
// AdminListDeadLetters returns the dead-letter page newest-first.
func (s *Service) AdminListDeadLetters(ctx context.Context, page, pageSize int) (AdminListDeadLettersPage, error) {
page, pageSize = normalisePaging(page, pageSize)
offset := (page - 1) * pageSize
items, total, err := s.deps.Store.ListDeadLetters(ctx, offset, pageSize)
if err != nil {
return AdminListDeadLettersPage{}, err
}
return AdminListDeadLettersPage{
Items: items,
Page: page,
PageSize: pageSize,
Total: total,
}, nil
}
// normalisePaging clamps page and pageSize to the values handlers can
// safely pass through to the store. The defaults match what the
// existing admin endpoints use elsewhere in `internal/server`.
func normalisePaging(page, pageSize int) (int, int) {
if page <= 0 {
page = 1
}
if pageSize <= 0 {
pageSize = 25
}
if pageSize > 200 {
pageSize = 200
}
return page, pageSize
}
+168
View File
@@ -0,0 +1,168 @@
package mail_test
import (
"context"
"errors"
"testing"
"time"
"galaxy/backend/internal/config"
"galaxy/backend/internal/mail"
"github.com/google/uuid"
"go.uber.org/zap/zaptest"
)
func TestAdminListPagination(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := mail.NewService(mail.Deps{
Store: mail.NewStore(db),
SMTP: newRecordingSender(),
Config: config.MailConfig{WorkerInterval: time.Hour, MaxAttempts: 3},
Logger: zaptest.NewLogger(t),
})
const total = 7
for i := range total {
if err := svc.EnqueueLoginCode(context.Background(), "a@example.test", "1234"+string(rune('0'+i)), 5*time.Minute); err != nil {
t.Fatalf("enqueue %d: %v", i, err)
}
}
page, err := svc.AdminListDeliveries(context.Background(), 1, 3)
if err != nil {
t.Fatalf("list page 1: %v", err)
}
if len(page.Items) != 3 {
t.Fatalf("page1 size=%d want 3", len(page.Items))
}
if page.Total != total {
t.Fatalf("page1 total=%d want %d", page.Total, total)
}
page, err = svc.AdminListDeliveries(context.Background(), 3, 3)
if err != nil {
t.Fatalf("list page 3: %v", err)
}
if len(page.Items) != 1 {
t.Fatalf("page3 size=%d want 1", len(page.Items))
}
}
func TestAdminGetDeliveryNotFound(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := mail.NewService(mail.Deps{
Store: mail.NewStore(db),
SMTP: newRecordingSender(),
Config: config.MailConfig{WorkerInterval: time.Hour, MaxAttempts: 3},
Logger: zaptest.NewLogger(t),
})
if _, err := svc.AdminGetDelivery(context.Background(), uuid.New()); !errors.Is(err, mail.ErrDeliveryNotFound) {
t.Fatalf("get missing: want ErrDeliveryNotFound, got %v", err)
}
}
func TestAdminResendStateMatrix(t *testing.T) {
t.Parallel()
db := startPostgres(t)
sender := newRecordingSender()
// Match the number of Send calls the matrix triggers (initial
// success path + resend re-send for the dead-lettered row).
sender.behaviour = []func(mail.OutboundMessage) error{
func(mail.OutboundMessage) error { return errors.New("transient #1") },
func(mail.OutboundMessage) error { return errors.New("transient #2") },
func(mail.OutboundMessage) error { return nil }, // sent path
}
clock := time.Now().UTC().Add(-2 * time.Hour) // bring next_attempt_at into the past
svc := mail.NewService(mail.Deps{
Store: mail.NewStore(db),
SMTP: sender,
Config: config.MailConfig{WorkerInterval: time.Hour, MaxAttempts: 2},
Now: func() time.Time { return clock },
Logger: zaptest.NewLogger(t),
})
worker := mail.NewWorker(svc)
// 1. Drive a row to dead-lettered (two failures with MaxAttempts=2).
if err := svc.EnqueueLoginCode(context.Background(), "dead@example.test", "111111", 5*time.Minute); err != nil {
t.Fatalf("enqueue dead: %v", err)
}
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #1: %v", err)
}
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #2: %v", err)
}
deadList, err := svc.AdminListDeliveries(context.Background(), 1, 5)
if err != nil {
t.Fatalf("list: %v", err)
}
if len(deadList.Items) != 1 || deadList.Items[0].Status != mail.StatusDeadLettered {
t.Fatalf("want 1 dead-lettered row, got %+v", deadList.Items)
}
deadID := deadList.Items[0].DeliveryID
// 2. Resend the dead-lettered row -> 200, status flips to pending,
// attempts=0.
resent, err := svc.AdminResendDelivery(context.Background(), deadID)
if err != nil {
t.Fatalf("resend dead: %v", err)
}
if resent.Status != mail.StatusPending {
t.Fatalf("status after resend=%q want pending", resent.Status)
}
if resent.Attempts != 0 {
t.Fatalf("attempts after resend=%d want 0", resent.Attempts)
}
// 3. Drive the worker once more — third Send call returns nil so
// the row transitions to sent.
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick post-resend: %v", err)
}
d, err := svc.AdminGetDelivery(context.Background(), deadID)
if err != nil {
t.Fatalf("get after send: %v", err)
}
if d.Status != mail.StatusSent {
t.Fatalf("status=%q want sent", d.Status)
}
// 4. Resend on `sent` -> ErrResendOnSent.
if _, err := svc.AdminResendDelivery(context.Background(), deadID); !errors.Is(err, mail.ErrResendOnSent) {
t.Fatalf("resend on sent: want ErrResendOnSent, got %v", err)
}
// 5. Resend on missing -> ErrDeliveryNotFound.
if _, err := svc.AdminResendDelivery(context.Background(), uuid.New()); !errors.Is(err, mail.ErrDeliveryNotFound) {
t.Fatalf("resend missing: want ErrDeliveryNotFound, got %v", err)
}
}
func TestServiceStats(t *testing.T) {
t.Parallel()
db := startPostgres(t)
svc := mail.NewService(mail.Deps{
Store: mail.NewStore(db),
SMTP: newRecordingSender(),
Config: config.MailConfig{WorkerInterval: time.Hour, MaxAttempts: 3},
Logger: zaptest.NewLogger(t),
})
for i := range 3 {
if err := svc.EnqueueLoginCode(context.Background(), "stats@example.test", "55555"+string(rune('0'+i)), 5*time.Minute); err != nil {
t.Fatalf("enqueue: %v", err)
}
}
stats, err := svc.Stats(context.Background())
if err != nil {
t.Fatalf("stats: %v", err)
}
if stats[mail.StatusPending] != 3 {
t.Fatalf("pending=%d want 3", stats[mail.StatusPending])
}
if _, ok := stats[mail.StatusSent]; !ok {
t.Fatal("Stats must always return all four buckets")
}
}
+121
View File
@@ -0,0 +1,121 @@
package mail
import (
"context"
"errors"
"time"
"galaxy/backend/internal/config"
"github.com/google/uuid"
"go.uber.org/zap"
)
// SMTPSender is the wire-level boundary the worker uses to deliver an
// outbox row through SMTP. Implementations are expected to be
// concurrency-safe and to honour ctx cancellation: the worker passes a
// per-row context bounded by the configured operation timeout.
//
// `Send` is the single point where transient-vs-permanent classification
// happens; the returned error carries IsPermanent to let the worker
// decide between schedule-a-retry and dead-letter.
type SMTPSender interface {
Send(ctx context.Context, msg OutboundMessage) error
}
// OutboundMessage is the rendered, recipient-addressed payload handed
// to SMTPSender. From is taken from BACKEND_SMTP_FROM at construction
// time, so producers and the worker never set it directly.
type OutboundMessage struct {
To []string
Subject string
ContentType string
Body []byte
}
// SendError augments a regular error with a permanence classification.
// Permanent errors (RFC 5321 5xx, malformed addresses, oversize body)
// dead-letter the row immediately on the next attempt; transient ones
// (4xx, network) trigger the backoff schedule.
type SendError struct {
Err error
Permanent bool
}
// Error returns the underlying error string.
func (e *SendError) Error() string {
if e == nil || e.Err == nil {
return ""
}
return e.Err.Error()
}
// Unwrap exposes the underlying error for errors.Is / errors.As.
func (e *SendError) Unwrap() error {
if e == nil {
return nil
}
return e.Err
}
// IsPermanent reports whether err is a *SendError marked Permanent.
// Non-SendError values are treated as transient by default — the
// worker will retry until MaxAttempts.
func IsPermanent(err error) bool {
if err == nil {
return false
}
var se *SendError
if errors.As(err, &se) && se != nil {
return se.Permanent
}
return false
}
// AdminNotifier is the outbound surface mail uses to flag a dead-letter
// to operators. The canonical notification wiring lives in `cmd/backend/main.go` and publisher; until
// then NewNoopAdminNotifier ships a logger-only stub matching the
// pattern used elsewhere in `backend/internal/*`.
type AdminNotifier interface {
OnDeadLetter(ctx context.Context, deliveryID uuid.UUID, templateID, reason string)
}
// Deps aggregates every collaborator the Service depends on.
//
// Store and SMTP must be non-nil. Admin defaults to a no-op publisher
// when omitted; Now defaults to time.Now; Logger defaults to
// zap.NewNop. Config carries the worker interval and max-attempts
// derived from `BACKEND_MAIL_*`.
type Deps struct {
Store *Store
SMTP SMTPSender
Admin AdminNotifier
Config config.MailConfig
// Now overrides time.Now for deterministic tests. A nil Now defaults
// to time.Now in NewService.
Now func() time.Time
// Logger is named under "mail" by NewService. Nil falls back to
// zap.NewNop.
Logger *zap.Logger
}
// NewNoopAdminNotifier returns an AdminNotifier that logs every
// dead-letter event at warn level and never blocks. The canonical implementation replaces // it with the real notification publisher.
func NewNoopAdminNotifier(logger *zap.Logger) AdminNotifier {
if logger == nil {
logger = zap.NewNop()
}
return &noopAdminNotifier{logger: logger.Named("notify.noop")}
}
type noopAdminNotifier struct {
logger *zap.Logger
}
func (n *noopAdminNotifier) OnDeadLetter(_ context.Context, deliveryID uuid.UUID, templateID, reason string) {
n.logger.Warn("mail dead-letter (noop publisher)",
zap.String("delivery_id", deliveryID.String()),
zap.String("template_id", templateID),
zap.String("reason", reason),
)
}
+243
View File
@@ -0,0 +1,243 @@
package mail
import (
"context"
"fmt"
netmail "net/mail"
"strings"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
)
// contentTypeTextPlain is the RFC 2046 text/plain MIME type stored in
// `mail_payloads.content_type` for plain-text bodies.
const contentTypeTextPlain = "text/plain"
// TemplateLoginCode is the template_id stored in `mail_deliveries` for
// the auth-issued login code. The value matches the kind in the
// notification catalog (`README.md` §10) so future cross-reporting
// stays consistent.
const TemplateLoginCode = "auth.login_code"
// EnqueueLoginCode renders the auth login-code email and inserts the
// outbox row. Each call gets a fresh server-side idempotency_key so
// the unique constraint cannot accidentally suppress a legitimate
// re-issue; double-enqueue protection lives in the auth challenge
// throttle (see `auth.Service.SendEmailCode`).
func (s *Service) EnqueueLoginCode(ctx context.Context, email, code string, ttl time.Duration) error {
addr, err := normaliseRecipient(email)
if err != nil {
return err
}
subject, body := renderLoginCode(code, ttl)
args := EnqueueArgs{
DeliveryID: uuid.New(),
TemplateID: TemplateLoginCode,
IdempotencyKey: uuid.NewString(),
Recipients: []string{addr},
ContentType: contentTypeTextPlain,
Subject: subject,
Body: []byte(body),
}
inserted, err := s.deps.Store.InsertEnqueue(ctx, args)
if err != nil {
return fmt.Errorf("mail: enqueue login code: %w", err)
}
if !inserted {
// Cannot happen given the random key, but keeps the invariant
// explicit for readers grep-ing for unexpected paths.
s.deps.Logger.Warn("login-code enqueue collided on random idempotency key",
zap.String("delivery_id", args.DeliveryID.String()))
}
return nil
}
// EnqueueTemplate is the generic producer surface used by future
// notification fan-out . Caller supplies a stable
// idempotencyKey so re-deliveries of the same logical event are
// deduplicated by the (template_id, idempotency_key) UNIQUE
// constraint.
func (s *Service) EnqueueTemplate(ctx context.Context, templateID, recipient string, payload map[string]any, idempotencyKey string) error {
if strings.TrimSpace(idempotencyKey) == "" {
return fmt.Errorf("mail: idempotency_key must not be empty")
}
addr, err := normaliseRecipient(recipient)
if err != nil {
return err
}
render, ok := templateRenderers[templateID]
if !ok {
return fmt.Errorf("%w: %q", ErrUnknownTemplate, templateID)
}
subject, body, err := render(payload)
if err != nil {
return fmt.Errorf("mail: render template %q: %w", templateID, err)
}
args := EnqueueArgs{
DeliveryID: uuid.New(),
TemplateID: templateID,
IdempotencyKey: idempotencyKey,
Recipients: []string{addr},
ContentType: contentTypeTextPlain,
Subject: subject,
Body: []byte(body),
}
if _, err := s.deps.Store.InsertEnqueue(ctx, args); err != nil {
return fmt.Errorf("mail: enqueue template: %w", err)
}
return nil
}
// normaliseRecipient trims whitespace and validates the address with
// stdlib RFC 5322 parsing. Empty / malformed addresses are rejected
// with ErrInvalidRecipient. The returned string is the canonical form
// (`mail.Address.Address`) without any display name.
func normaliseRecipient(addr string) (string, error) {
trimmed := strings.TrimSpace(addr)
if trimmed == "" {
return "", ErrInvalidRecipient
}
parsed, err := netmail.ParseAddress(trimmed)
if err != nil {
return "", ErrInvalidRecipient
}
return parsed.Address, nil
}
// templateRenderers is the inline catalog of mail templates the
// notification module dispatches against. The implementation added
// `auth.login_code`; The implementation added the rest of the email-bearing
// kinds enumerated in `README.md` §10. Each renderer takes the
// producer-supplied payload map and returns (subject, body) or an
// error when required fields are missing or wrongly typed.
var templateRenderers = map[string]func(map[string]any) (string, string, error){
TemplateLoginCode: func(payload map[string]any) (string, string, error) {
code, _ := payload["code"].(string)
if code == "" {
return "", "", fmt.Errorf("payload.code must be a non-empty string")
}
ttl, _ := payload["ttl"].(time.Duration)
subject, body := renderLoginCode(code, ttl)
return subject, body, nil
},
"lobby.invite.received": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
inviter := payloadString(payload, "inviter_user_id")
subject := "You have a new Galaxy game invite"
body := fmt.Sprintf(
"You have been invited to a Galaxy game.\n\nGame: %s\nInviter: %s\n\nOpen the Galaxy client to accept or decline.\n",
gameID, inviter,
)
return subject, body, nil
},
"lobby.application.approved": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
subject := "Your Galaxy application was approved"
body := fmt.Sprintf(
"Your application to join the Galaxy game %s has been approved. The game owner will start the match when ready.\n",
gameID,
)
return subject, body, nil
},
"lobby.application.rejected": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
subject := "Your Galaxy application was rejected"
body := fmt.Sprintf(
"Your application to join the Galaxy game %s has been rejected. You can apply to other public games from the lobby.\n",
gameID,
)
return subject, body, nil
},
"lobby.membership.removed": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
reason := payloadString(payload, "reason")
subject := "You were removed from a Galaxy game"
body := fmt.Sprintf(
"Your membership in the Galaxy game %s has been removed.\n\nReason: %s\n",
gameID, fallbackString(reason, "no reason provided"),
)
return subject, body, nil
},
"lobby.membership.blocked": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
subject := "You were blocked from a Galaxy game"
body := fmt.Sprintf(
"Your membership in the Galaxy game %s has been blocked. Please contact the game owner if this is unexpected.\n",
gameID,
)
return subject, body, nil
},
"lobby.race_name.pending": func(payload map[string]any) (string, string, error) {
raceName := payloadString(payload, "race_name")
expiresAt := payloadString(payload, "expires_at")
subject := "Your Galaxy race name is awaiting registration"
body := fmt.Sprintf(
"Congratulations — your Galaxy race name %q has reached pending registration. Confirm registration before %s to lock it permanently.\n",
raceName, fallbackString(expiresAt, "the listed deadline"),
)
return subject, body, nil
},
"runtime.image_pull_failed": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
imageRef := payloadString(payload, "image_ref")
subject := "Galaxy runtime: image pull failed"
body := fmt.Sprintf(
"Image pull failed while preparing engine container for game %s.\n\nimage_ref: %s\n\nReview the runtime operation log for details.\n",
gameID, fallbackString(imageRef, "unknown"),
)
return subject, body, nil
},
"runtime.container_start_failed": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
subject := "Galaxy runtime: container start failed"
body := fmt.Sprintf(
"Engine container start failed for game %s.\n\nReview the runtime operation log and Docker daemon logs for details.\n",
gameID,
)
return subject, body, nil
},
"runtime.start_config_invalid": func(payload map[string]any) (string, string, error) {
gameID := payloadString(payload, "game_id")
reason := payloadString(payload, "reason")
subject := "Galaxy runtime: start config invalid"
body := fmt.Sprintf(
"Engine container start was rejected by configuration validation for game %s.\n\nReason: %s\n",
gameID, fallbackString(reason, "no reason provided"),
)
return subject, body, nil
},
}
// payloadString fetches a string field from a notification payload
// without panicking on missing or wrongly-typed entries; an empty
// string is the documented fallback.
func payloadString(payload map[string]any, key string) string {
v, _ := payload[key].(string)
return v
}
// fallbackString returns alt when value is empty.
func fallbackString(value, alt string) string {
if strings.TrimSpace(value) == "" {
return alt
}
return value
}
// renderLoginCode builds the English plain-text body used for the
// `auth.login_code` template. Localisation is deferred to a future
// stage (see `backend/README.md` and `backend/docs/`).
func renderLoginCode(code string, ttl time.Duration) (subject, body string) {
subject = fmt.Sprintf("Galaxy login code: %s", code)
minutes := int(ttl.Round(time.Minute) / time.Minute)
if minutes <= 0 {
minutes = 1
}
body = fmt.Sprintf(
"Your one-time Galaxy login code is %s.\n\nThe code expires in %d minutes. If you did not request it, you can ignore this email.\n",
code, minutes,
)
return subject, body
}
+147
View File
@@ -0,0 +1,147 @@
package mail
import (
"strings"
"testing"
"time"
)
func TestRenderLoginCode(t *testing.T) {
t.Parallel()
subject, body := renderLoginCode("123456", 10*time.Minute)
if !strings.Contains(subject, "123456") {
t.Fatalf("subject must include code, got %q", subject)
}
if !strings.Contains(body, "123456") {
t.Fatalf("body must include code, got %q", body)
}
if !strings.Contains(body, "10 minutes") {
t.Fatalf("body must include human-readable TTL, got %q", body)
}
}
func TestRenderLoginCode_RoundsTTL(t *testing.T) {
t.Parallel()
cases := map[string]struct {
ttl time.Duration
expect string
}{
"sub-minute": {ttl: 30 * time.Second, expect: "1 minutes"},
"exact": {ttl: 10 * time.Minute, expect: "10 minutes"},
"with secs": {ttl: 5*time.Minute + 29*time.Second, expect: "5 minutes"},
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
t.Parallel()
_, body := renderLoginCode("000000", tc.ttl)
if !strings.Contains(body, tc.expect) {
t.Fatalf("body missing %q for ttl=%s, got %q", tc.expect, tc.ttl, body)
}
})
}
}
func TestNormaliseRecipient(t *testing.T) {
t.Parallel()
cases := map[string]struct {
input string
want string
err bool
}{
"plain": {input: "alice@example.test", want: "alice@example.test"},
"trims": {input: " bob@example.test ", want: "bob@example.test"},
"display-stripped": {input: "Alice <alice@example.test>", want: "alice@example.test"},
"empty": {input: "", err: true},
"whitespace": {input: " ", err: true},
"malformed": {input: "not-an-email", err: true},
"with-spaces": {input: "ali ce@example.test", err: true},
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
t.Parallel()
got, err := normaliseRecipient(tc.input)
if tc.err {
if err == nil {
t.Fatalf("expected error, got %q", got)
}
return
}
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != tc.want {
t.Fatalf("got %q want %q", got, tc.want)
}
})
}
}
func TestTemplateRendererLoginCode(t *testing.T) {
t.Parallel()
render := templateRenderers[TemplateLoginCode]
if render == nil {
t.Fatal("TemplateLoginCode renderer must be registered")
}
subject, body, err := render(map[string]any{"code": "654321", "ttl": 7 * time.Minute})
if err != nil {
t.Fatalf("render: %v", err)
}
if !strings.Contains(subject, "654321") || !strings.Contains(body, "654321") {
t.Fatalf("subject=%q body=%q must mention code", subject, body)
}
if _, _, err := render(map[string]any{"ttl": 7 * time.Minute}); err == nil {
t.Fatal("missing code must error")
}
}
func TestNextBackoffMonotonicAndCapped(t *testing.T) {
t.Parallel()
// Sample many runs per attempt so jitter does not flake the
// invariant: median of attempt N is below median of attempt N+1
// up to the cap.
prev := time.Duration(0)
for n := 1; n <= 12; n++ {
var sum time.Duration
runs := 32
for range runs {
sum += nextBackoff(n)
}
avg := sum / time.Duration(runs)
if avg > backoffMax+backoffMax/4 { // generous upper bound
t.Fatalf("attempt %d avg %s exceeds capped budget", n, avg)
}
if avg < backoffBase/2 {
t.Fatalf("attempt %d avg %s below base/2", n, avg)
}
if n > 1 && avg < prev/2 {
t.Fatalf("backoff decreased dramatically between attempts %d and %d (%s vs %s)", n-1, n, prev, avg)
}
prev = avg
}
}
func TestIsPermanent(t *testing.T) {
t.Parallel()
if IsPermanent(nil) {
t.Fatal("nil must not be permanent")
}
transient := &SendError{Err: errSentinel("transient")}
if IsPermanent(transient) {
t.Fatal("default SendError must not be permanent")
}
permanent := &SendError{Err: errSentinel("permanent"), Permanent: true}
if !IsPermanent(permanent) {
t.Fatal("Permanent=true must report true")
}
}
// errSentinel is a tiny sentinel error helper used only in tests.
type errSentinel string
func (e errSentinel) Error() string { return string(e) }
+27
View File
@@ -0,0 +1,27 @@
package mail
import "errors"
// Sentinel errors emitted by Service methods. Handlers translate them
// into HTTP responses; tests match on them with errors.Is.
var (
// ErrDeliveryNotFound is returned by AdminGetDelivery and AdminResend
// when the supplied delivery_id does not name a row.
ErrDeliveryNotFound = errors.New("mail: delivery not found")
// ErrResendOnSent is returned by AdminResend when the targeted row
// is in the terminal `sent` state. The admin contract maps this to
// 409 Conflict; resending an already-delivered mail would push a
// duplicate copy to the recipient.
ErrResendOnSent = errors.New("mail: cannot resend a sent delivery")
// ErrUnknownTemplate is returned by EnqueueTemplate when the
// supplied template_id is not registered in the inline template
// catalog. A typo at the producer is the typical cause.
ErrUnknownTemplate = errors.New("mail: unknown template")
// ErrInvalidRecipient is returned by EnqueueLoginCode and
// EnqueueTemplate when the supplied recipient address is empty or
// fails go-mail's RFC 5322 validation.
ErrInvalidRecipient = errors.New("mail: invalid recipient address")
)
+94
View File
@@ -0,0 +1,94 @@
// Package mail implements the durable mail outbox documented in
// `backend/PLAN.md` §5.6 and `backend/README.md` §9. Producers call
// EnqueueLoginCode or EnqueueTemplate; the rows land in
// `backend.mail_deliveries` together with their payload and recipients.
// A single Worker goroutine claims due rows with
// `SELECT … FOR UPDATE SKIP LOCKED`, sends them through SMTP via the
// `wneessen/go-mail` library, records every attempt, and dead-letters
// rows that exceed the configured maximum.
//
// Until The implementation lands the notification module, the AdminNotifier
// dependency is satisfied by NewNoopAdminNotifier — same pattern auth
// uses for LoginCodeMailer and SessionInvalidator.
package mail
import (
"context"
"galaxy/backend/internal/config"
"time"
"go.uber.org/zap"
)
// Service is the mail-domain entry point. It wires the persistence
// store, the SMTP sender, the admin-notification publisher used on
// dead-letter, the runtime configuration, and a structured logger.
type Service struct {
deps Deps
}
// NewService constructs a Service from deps. A nil Now defaults to
// time.Now; a nil Logger defaults to zap.NewNop. Store and SMTP must be
// supplied — calling Service methods with either nil panics on first
// use, matching how the rest of `internal/*` signals missing wiring.
func NewService(deps Deps) *Service {
if deps.Now == nil {
deps.Now = time.Now
}
if deps.Logger == nil {
deps.Logger = zap.NewNop()
}
deps.Logger = deps.Logger.Named("mail")
if deps.Admin == nil {
deps.Admin = NewNoopAdminNotifier(deps.Logger)
}
return &Service{deps: deps}
}
// Backoff parameters for the worker retry schedule. The values match
// the trade-off documented in `backend/README.md` and `backend/docs/`: a 5
// second base, ×2 growth, capped at one hour, with ±25% jitter.
const (
backoffBase = 5 * time.Second
backoffFactor = 2.0
backoffMax = time.Hour
backoffJitter = 0.25
)
// Status values stored in `mail_deliveries.status`. Mirrored by the
// CHECK constraint added in migration 00001.
const (
StatusPending = "pending"
StatusRetrying = "retrying"
StatusSent = "sent"
StatusDeadLettered = "dead_lettered"
)
// Outcome values stored in `mail_attempts.outcome`. Mirrored by the
// CHECK constraint added in migration 00001.
const (
OutcomeSuccess = "success"
OutcomeTransientError = "transient_error"
OutcomePermanentError = "permanent_error"
)
// Recipient kinds stored in `mail_recipients.kind`. The 5.6
// implementation only emits 'to'; cc/bcc/reply_to remain available
// for future producers.
const (
RecipientKindTo = "to"
)
// Config returns the runtime mail configuration. Worker uses it to
// schedule the scan loop and bound retries.
func (s *Service) Config() config.MailConfig {
return s.deps.Config
}
// Stats returns the live count of `mail_deliveries` rows grouped by
// status. The metricsapi server reads this through the Service so
// `mail_outbox_depth{state}` (README §15) does not require the worker
// to publish gauges from inside its hot path.
func (s *Service) Stats(ctx context.Context) (map[string]int64, error) {
return s.deps.Store.CountByStatus(ctx)
}
+131
View File
@@ -0,0 +1,131 @@
package mail
import (
"context"
"errors"
"fmt"
"galaxy/backend/internal/config"
gomail "github.com/wneessen/go-mail"
"go.uber.org/zap"
)
// SMTPClient is the abstraction surface over `wneessen/go-mail` so
// tests can stub the wire layer without dialling. Production wires
// realSMTPClient.
type SMTPClient interface {
DialAndSendWithContext(ctx context.Context, msg *gomail.Msg) error
}
// realSMTPClient adapts *gomail.Client to SMTPClient. The variadic
// nature of DialAndSendWithContext is hidden because the worker only
// ever sends one message per call.
type realSMTPClient struct {
inner *gomail.Client
}
func (c *realSMTPClient) DialAndSendWithContext(ctx context.Context, msg *gomail.Msg) error {
return c.inner.DialAndSendWithContext(ctx, msg)
}
// smtpSender implements SMTPSender on top of an SMTPClient. The
// `from` address is captured at construction time from
// `BACKEND_SMTP_FROM`.
type smtpSender struct {
client SMTPClient
from string
logger *zap.Logger
}
// NewSMTPSender constructs the production sender bound to the SMTP
// relay configured in cfg. The TLS-mode mapping is:
//
// - "none" → plain TCP, no TLS;
// - "starttls" → STARTTLS required (TLSMandatory);
// - "tls" → implicit TLS at the configured port (WithSSL).
//
// PLAIN authentication is enabled when both Username and Password are
// non-empty.
func NewSMTPSender(cfg config.SMTPConfig, logger *zap.Logger) (SMTPSender, error) {
if logger == nil {
logger = zap.NewNop()
}
logger = logger.Named("mail.smtp")
opts := []gomail.Option{gomail.WithPort(cfg.Port)}
switch cfg.TLSMode {
case "none":
opts = append(opts, gomail.WithTLSPolicy(gomail.NoTLS))
case "starttls":
opts = append(opts, gomail.WithTLSPolicy(gomail.TLSMandatory))
case "tls":
opts = append(opts, gomail.WithSSL())
default:
return nil, fmt.Errorf("mail: unsupported SMTP TLS mode %q", cfg.TLSMode)
}
if cfg.Username != "" && cfg.Password != "" {
opts = append(opts,
gomail.WithSMTPAuth(gomail.SMTPAuthPlain),
gomail.WithUsername(cfg.Username),
gomail.WithPassword(cfg.Password),
)
}
cli, err := gomail.NewClient(cfg.Host, opts...)
if err != nil {
return nil, fmt.Errorf("mail: build smtp client: %w", err)
}
return &smtpSender{
client: &realSMTPClient{inner: cli},
from: cfg.From,
logger: logger,
}, nil
}
// Send renders the OutboundMessage as a *gomail.Msg and dispatches it
// through the SMTP client. Address validation is intentional: a
// malformed To here means the producer slipped past
// normaliseRecipient, which is a programming error and gets wrapped
// as Permanent so the worker dead-letters immediately.
func (s *smtpSender) Send(ctx context.Context, msg OutboundMessage) error {
if len(msg.To) == 0 {
return &SendError{Err: errors.New("mail: outbound message has no recipients"), Permanent: true}
}
m := gomail.NewMsg()
if err := m.From(s.from); err != nil {
return &SendError{Err: fmt.Errorf("set FROM: %w", err), Permanent: true}
}
for _, addr := range msg.To {
if err := m.AddTo(addr); err != nil {
return &SendError{Err: fmt.Errorf("add TO %q: %w", addr, err), Permanent: true}
}
}
m.Subject(msg.Subject)
contentType := gomail.ContentType(msg.ContentType)
if msg.ContentType == "" {
contentType = gomail.TypeTextPlain
}
m.SetBodyString(contentType, string(msg.Body))
if err := s.client.DialAndSendWithContext(ctx, m); err != nil {
permanent := classifySMTPError(err)
return &SendError{Err: err, Permanent: permanent}
}
return nil
}
// classifySMTPError decides whether err is permanent. A *gomail.SendError
// reports its permanence through IsTemp; everything else (dial
// failures, context errors, generic I/O) is treated as transient so the
// worker retries until MaxAttempts.
func classifySMTPError(err error) bool {
if err == nil {
return false
}
var sendErr *gomail.SendError
if errors.As(err, &sendErr) && sendErr != nil {
return !sendErr.IsTemp()
}
return false
}
+665
View File
@@ -0,0 +1,665 @@
package mail
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"galaxy/backend/internal/postgres/jet/backend/model"
"galaxy/backend/internal/postgres/jet/backend/table"
"github.com/go-jet/jet/v2/postgres"
"github.com/go-jet/jet/v2/qrm"
"github.com/google/uuid"
)
// Store is the Postgres-backed query surface for the mail outbox
// (`mail_deliveries`, `mail_recipients`, `mail_attempts`,
// `mail_dead_letters`, `mail_payloads`). All queries are built through
// go-jet against the generated table bindings under
// `backend/internal/postgres/jet/backend/table`.
type Store struct {
db *sql.DB
}
// NewStore constructs a Store wrapping db.
func NewStore(db *sql.DB) *Store {
return &Store{db: db}
}
// Delivery mirrors a row in `backend.mail_deliveries`. Tests and
// admin endpoints work against this struct directly.
type Delivery struct {
DeliveryID uuid.UUID
TemplateID string
IdempotencyKey string
Status string
Attempts int32
NextAttemptAt *time.Time
PayloadID uuid.UUID
LastError string
CreatedAt time.Time
UpdatedAt time.Time
SentAt *time.Time
DeadLetteredAt *time.Time
}
// Attempt mirrors a row in `backend.mail_attempts`.
type Attempt struct {
AttemptID uuid.UUID
DeliveryID uuid.UUID
AttemptNo int32
StartedAt time.Time
FinishedAt *time.Time
Outcome string
Error string
}
// DeadLetter mirrors a row in `backend.mail_dead_letters`.
type DeadLetter struct {
DeadLetterID uuid.UUID
DeliveryID uuid.UUID
ArchivedAt time.Time
Reason string
}
// Payload mirrors a row in `backend.mail_payloads`. Body is the raw
// rendered bytes; Subject is nullable in the schema and is therefore a
// pointer here.
type Payload struct {
PayloadID uuid.UUID
ContentType string
Subject *string
Body []byte
CreatedAt time.Time
}
// Recipient mirrors a row in `backend.mail_recipients`.
type Recipient struct {
RecipientID uuid.UUID
DeliveryID uuid.UUID
Address string
Kind string
}
// EnqueueArgs aggregates the inputs to InsertEnqueue. Constructing the
// struct by name keeps the call site readable when the Service grows
// new optional fields (locale, headers, etc.).
type EnqueueArgs struct {
DeliveryID uuid.UUID
TemplateID string
IdempotencyKey string
Recipients []string
ContentType string
Subject string
Body []byte
}
// deliveryColumns lists the projection used by every read of
// `mail_deliveries`. The order matches model.MailDeliveries field
// layout for direct QRM scanning.
func deliveryColumns() postgres.ColumnList {
d := table.MailDeliveries
return postgres.ColumnList{
d.DeliveryID, d.TemplateID, d.IdempotencyKey, d.Status,
d.Attempts, d.NextAttemptAt, d.PayloadID, d.LastError,
d.CreatedAt, d.UpdatedAt, d.SentAt, d.DeadLetteredAt,
}
}
// InsertEnqueue persists a fresh delivery row together with its payload
// and recipients in a single transaction. The (template_id,
// idempotency_key) UNIQUE constraint handles duplicate enqueue: when
// the conflict triggers, the transaction rolls back the payload insert
// (so we do not leak orphaned payloads) and reports `inserted=false`
// to the caller.
func (s *Store) InsertEnqueue(ctx context.Context, args EnqueueArgs) (bool, error) {
var inserted bool
err := withTx(ctx, s.db, func(tx *sql.Tx) error {
payloadID := uuid.New()
payloadStmt := table.MailPayloads.INSERT(
table.MailPayloads.PayloadID,
table.MailPayloads.ContentType,
table.MailPayloads.Subject,
table.MailPayloads.Body,
).VALUES(payloadID, args.ContentType, args.Subject, args.Body)
if _, err := payloadStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("insert payload: %w", err)
}
deliveryStmt := table.MailDeliveries.INSERT(
table.MailDeliveries.DeliveryID,
table.MailDeliveries.TemplateID,
table.MailDeliveries.IdempotencyKey,
table.MailDeliveries.Status,
table.MailDeliveries.NextAttemptAt,
table.MailDeliveries.PayloadID,
).VALUES(
args.DeliveryID, args.TemplateID, args.IdempotencyKey, StatusPending,
postgres.NOW(), payloadID,
).
ON_CONFLICT(table.MailDeliveries.TemplateID, table.MailDeliveries.IdempotencyKey).
DO_NOTHING().
RETURNING(table.MailDeliveries.DeliveryID)
var stored model.MailDeliveries
if err := deliveryStmt.QueryContext(ctx, tx, &stored); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
// Idempotent re-enqueue. Roll back the transaction so the
// orphan payload insert does not survive.
return errIdempotentNoop
}
return fmt.Errorf("insert delivery: %w", err)
}
for _, addr := range args.Recipients {
recipientStmt := table.MailRecipients.INSERT(
table.MailRecipients.RecipientID,
table.MailRecipients.DeliveryID,
table.MailRecipients.Address,
table.MailRecipients.Kind,
).VALUES(uuid.New(), args.DeliveryID, addr, RecipientKindTo)
if _, err := recipientStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("insert recipient %q: %w", addr, err)
}
}
inserted = true
return nil
})
if errors.Is(err, errIdempotentNoop) {
return false, nil
}
if err != nil {
return false, err
}
return inserted, nil
}
// errIdempotentNoop is an internal sentinel that tells withTx to roll
// back the transaction without surfacing an error to the caller. It
// must never escape this package — InsertEnqueue catches it on the
// way out.
var errIdempotentNoop = errors.New("mail store: idempotent noop")
// ClaimDue locks up to `limit` due rows with FOR UPDATE SKIP LOCKED
// and returns them with their full payload and recipient set. The
// supplied tx must be the worker's per-row transaction; the caller
// completes the work and commits. exclude is the list of delivery_ids
// already handled in the current tick — they are filtered out so a
// row whose retry lands at next_attempt_at <= now() is not re-claimed
// inside the same tick loop.
func (s *Store) ClaimDue(ctx context.Context, tx *sql.Tx, limit int, exclude ...uuid.UUID) ([]ClaimedDelivery, error) {
d := table.MailDeliveries
condition := d.Status.IN(postgres.String(StatusPending), postgres.String(StatusRetrying)).
AND(d.NextAttemptAt.IS_NULL().OR(d.NextAttemptAt.LT_EQ(postgres.NOW())))
if len(exclude) > 0 {
excludeExprs := make([]postgres.Expression, 0, len(exclude))
for _, id := range exclude {
excludeExprs = append(excludeExprs, postgres.UUID(id))
}
condition = condition.AND(d.DeliveryID.NOT_IN(excludeExprs...))
}
stmt := postgres.SELECT(deliveryColumns()).
FROM(d).
WHERE(condition).
ORDER_BY(postgres.COALESCE(d.NextAttemptAt, d.CreatedAt).ASC()).
LIMIT(int64(limit)).
FOR(postgres.UPDATE().SKIP_LOCKED())
var rows []model.MailDeliveries
if err := stmt.QueryContext(ctx, tx, &rows); err != nil {
return nil, fmt.Errorf("claim due: %w", err)
}
claimed := make([]ClaimedDelivery, 0, len(rows))
for _, row := range rows {
delivery := modelToDelivery(row)
payload, err := s.loadPayloadTx(ctx, tx, delivery.PayloadID)
if err != nil {
return nil, err
}
recipients, err := s.listRecipientsTx(ctx, tx, delivery.DeliveryID)
if err != nil {
return nil, err
}
claimed = append(claimed, ClaimedDelivery{
Delivery: delivery,
Payload: payload,
Recipients: recipients,
})
}
return claimed, nil
}
// ClaimedDelivery bundles a locked delivery row with its payload and
// recipients so the worker has everything it needs in one structure.
type ClaimedDelivery struct {
Delivery Delivery
Payload Payload
Recipients []Recipient
}
// RecordAttempt inserts a row into `mail_attempts` for the given
// delivery. attempt_no is derived from MAX(attempt_no) + 1 within the
// transaction, which keeps the column monotonic across resend cycles
// — the delivery's wire-visible `attempts` field counts only the
// current cycle (and resets on resend), while `mail_attempts` stays
// append-only forensic history.
func (s *Store) RecordAttempt(ctx context.Context, tx *sql.Tx, deliveryID uuid.UUID, startedAt time.Time, finishedAt time.Time, outcome string, errMsg string) (int32, error) {
a := table.MailAttempts
// Read the current max attempt_no for this delivery first; the
// surrounding worker transaction guarantees no concurrent inserts on
// the same delivery_id, so a simple read-then-write is sufficient
// (and avoids the awkward correlated subquery inside INSERT...VALUES
// that jet does not parenthesise).
maxStmt := postgres.SELECT(postgres.MAXi(a.AttemptNo).AS("max")).
FROM(a).
WHERE(a.DeliveryID.EQ(postgres.UUID(deliveryID)))
var maxRow struct {
Max *int32 `alias:"max"`
}
if err := maxStmt.QueryContext(ctx, tx, &maxRow); err != nil {
return 0, fmt.Errorf("record attempt: read max attempt_no: %w", err)
}
nextNo := int32(1)
if maxRow.Max != nil {
nextNo = *maxRow.Max + 1
}
insertStmt := a.INSERT(
a.AttemptID, a.DeliveryID, a.AttemptNo,
a.StartedAt, a.FinishedAt, a.Outcome, a.Error,
).VALUES(
uuid.New(), deliveryID, nextNo,
startedAt, finishedAt, outcome, errMsg,
).RETURNING(a.AttemptNo)
var inserted model.MailAttempts
if err := insertStmt.QueryContext(ctx, tx, &inserted); err != nil {
return 0, fmt.Errorf("record attempt: %w", err)
}
return inserted.AttemptNo, nil
}
// MarkSent flips the delivery to status='sent' and stamps sent_at.
func (s *Store) MarkSent(ctx context.Context, tx *sql.Tx, deliveryID uuid.UUID, at time.Time) error {
d := table.MailDeliveries
stmt := d.UPDATE().
SET(
d.Status.SET(postgres.String(StatusSent)),
d.Attempts.SET(d.Attempts.ADD(postgres.Int(1))),
d.SentAt.SET(postgres.TimestampzT(at)),
d.UpdatedAt.SET(postgres.TimestampzT(at)),
d.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
d.LastError.SET(postgres.String("")),
).
WHERE(d.DeliveryID.EQ(postgres.UUID(deliveryID)))
if _, err := stmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("mark sent: %w", err)
}
return nil
}
// ScheduleRetry flips the delivery to status='retrying', bumps
// attempts, and arms next_attempt_at.
func (s *Store) ScheduleRetry(ctx context.Context, tx *sql.Tx, deliveryID uuid.UUID, at time.Time, nextAt time.Time, errMsg string) error {
d := table.MailDeliveries
stmt := d.UPDATE().
SET(
d.Status.SET(postgres.String(StatusRetrying)),
d.Attempts.SET(d.Attempts.ADD(postgres.Int(1))),
d.NextAttemptAt.SET(postgres.TimestampzT(nextAt)),
d.UpdatedAt.SET(postgres.TimestampzT(at)),
d.LastError.SET(postgres.String(errMsg)),
).
WHERE(d.DeliveryID.EQ(postgres.UUID(deliveryID)))
if _, err := stmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("schedule retry: %w", err)
}
return nil
}
// MarkDeadLettered moves the delivery to the terminal `dead_lettered`
// state and inserts the matching row into `mail_dead_letters` under
// the same transaction.
func (s *Store) MarkDeadLettered(ctx context.Context, tx *sql.Tx, deliveryID uuid.UUID, at time.Time, reason string) error {
d := table.MailDeliveries
updateStmt := d.UPDATE().
SET(
d.Status.SET(postgres.String(StatusDeadLettered)),
d.Attempts.SET(d.Attempts.ADD(postgres.Int(1))),
d.DeadLetteredAt.SET(postgres.TimestampzT(at)),
d.UpdatedAt.SET(postgres.TimestampzT(at)),
d.NextAttemptAt.SET(postgres.TimestampzExp(postgres.NULL)),
d.LastError.SET(postgres.String(reason)),
).
WHERE(d.DeliveryID.EQ(postgres.UUID(deliveryID)))
if _, err := updateStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("mark dead-lettered: %w", err)
}
dl := table.MailDeadLetters
insertStmt := dl.INSERT(
dl.DeadLetterID, dl.DeliveryID, dl.ArchivedAt, dl.Reason,
).VALUES(uuid.New(), deliveryID, at, reason)
if _, err := insertStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("insert dead-letter: %w", err)
}
return nil
}
// CountByStatus returns a map keyed by the four status values so the
// worker can publish `mail_outbox_depth{state}` without scanning the
// whole table per metric tick.
func (s *Store) CountByStatus(ctx context.Context) (map[string]int64, error) {
d := table.MailDeliveries
stmt := postgres.SELECT(
d.Status,
postgres.COUNT(postgres.STAR).AS("count"),
).FROM(d).GROUP_BY(d.Status)
var rows []struct {
MailDeliveries model.MailDeliveries
Count int64 `alias:"count"`
}
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return nil, fmt.Errorf("count by status: %w", err)
}
out := map[string]int64{
StatusPending: 0,
StatusRetrying: 0,
StatusSent: 0,
StatusDeadLettered: 0,
}
for _, row := range rows {
out[row.MailDeliveries.Status] = row.Count
}
return out, nil
}
// GetDelivery loads a single row by primary key. ErrDeliveryNotFound
// is returned when no row matches.
func (s *Store) GetDelivery(ctx context.Context, deliveryID uuid.UUID) (Delivery, error) {
stmt := postgres.SELECT(deliveryColumns()).
FROM(table.MailDeliveries).
WHERE(table.MailDeliveries.DeliveryID.EQ(postgres.UUID(deliveryID))).
LIMIT(1)
var row model.MailDeliveries
if err := stmt.QueryContext(ctx, s.db, &row); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return Delivery{}, ErrDeliveryNotFound
}
return Delivery{}, fmt.Errorf("get delivery: %w", err)
}
return modelToDelivery(row), nil
}
// ListDeliveries returns the deliveries page in newest-first order
// together with the total row count.
func (s *Store) ListDeliveries(ctx context.Context, offset, limit int) ([]Delivery, int64, error) {
total, err := countAll(ctx, s.db, table.MailDeliveries)
if err != nil {
return nil, 0, fmt.Errorf("count deliveries: %w", err)
}
d := table.MailDeliveries
stmt := postgres.SELECT(deliveryColumns()).
FROM(d).
ORDER_BY(d.CreatedAt.DESC(), d.DeliveryID.DESC()).
LIMIT(int64(limit)).OFFSET(int64(offset))
var rows []model.MailDeliveries
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return nil, 0, fmt.Errorf("list deliveries: %w", err)
}
out := make([]Delivery, 0, len(rows))
for _, row := range rows {
out = append(out, modelToDelivery(row))
}
return out, total, nil
}
// ListAttempts returns every attempt for the given delivery, ordered
// by attempt_no.
func (s *Store) ListAttempts(ctx context.Context, deliveryID uuid.UUID) ([]Attempt, error) {
a := table.MailAttempts
stmt := postgres.SELECT(
a.AttemptID, a.DeliveryID, a.AttemptNo,
a.StartedAt, a.FinishedAt, a.Outcome, a.Error,
).
FROM(a).
WHERE(a.DeliveryID.EQ(postgres.UUID(deliveryID))).
ORDER_BY(a.AttemptNo.ASC())
var rows []model.MailAttempts
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return nil, fmt.Errorf("list attempts: %w", err)
}
out := make([]Attempt, 0, len(rows))
for _, row := range rows {
out = append(out, modelToAttempt(row))
}
return out, nil
}
// ListDeadLetters returns the dead-letter page newest-first.
func (s *Store) ListDeadLetters(ctx context.Context, offset, limit int) ([]DeadLetter, int64, error) {
total, err := countAll(ctx, s.db, table.MailDeadLetters)
if err != nil {
return nil, 0, fmt.Errorf("count dead-letters: %w", err)
}
dl := table.MailDeadLetters
stmt := postgres.SELECT(
dl.DeadLetterID, dl.DeliveryID, dl.ArchivedAt, dl.Reason,
).
FROM(dl).
ORDER_BY(dl.ArchivedAt.DESC(), dl.DeadLetterID.DESC()).
LIMIT(int64(limit)).OFFSET(int64(offset))
var rows []model.MailDeadLetters
if err := stmt.QueryContext(ctx, s.db, &rows); err != nil {
return nil, 0, fmt.Errorf("list dead-letters: %w", err)
}
out := make([]DeadLetter, 0, len(rows))
for _, row := range rows {
out = append(out, DeadLetter{
DeadLetterID: row.DeadLetterID,
DeliveryID: row.DeliveryID,
ArchivedAt: row.ArchivedAt,
Reason: row.Reason,
})
}
return out, total, nil
}
// ResendNonSent re-arms the delivery for another attempt cycle. The
// `status <> 'sent'` clause makes it the storage-level guard that
// matches the contract: ErrResendOnSent is returned when the row is
// already terminal-sent. ErrDeliveryNotFound surfaces when no row
// matches.
func (s *Store) ResendNonSent(ctx context.Context, deliveryID uuid.UUID, at time.Time) (Delivery, error) {
var d Delivery
err := withTx(ctx, s.db, func(tx *sql.Tx) error {
md := table.MailDeliveries
lockStmt := postgres.SELECT(md.Status).
FROM(md).
WHERE(md.DeliveryID.EQ(postgres.UUID(deliveryID))).
FOR(postgres.UPDATE())
var locked model.MailDeliveries
if err := lockStmt.QueryContext(ctx, tx, &locked); err != nil {
if errors.Is(err, qrm.ErrNoRows) {
return ErrDeliveryNotFound
}
return fmt.Errorf("lock delivery: %w", err)
}
if locked.Status == StatusSent {
return ErrResendOnSent
}
updateStmt := md.UPDATE().
SET(
md.Status.SET(postgres.String(StatusPending)),
md.Attempts.SET(postgres.Int(0)),
md.NextAttemptAt.SET(postgres.TimestampzT(at)),
md.DeadLetteredAt.SET(postgres.TimestampzExp(postgres.NULL)),
md.LastError.SET(postgres.String("")),
md.UpdatedAt.SET(postgres.TimestampzT(at)),
).
WHERE(md.DeliveryID.EQ(postgres.UUID(deliveryID)))
if _, err := updateStmt.ExecContext(ctx, tx); err != nil {
return fmt.Errorf("re-arm delivery: %w", err)
}
reloadStmt := postgres.SELECT(deliveryColumns()).
FROM(md).
WHERE(md.DeliveryID.EQ(postgres.UUID(deliveryID))).
LIMIT(1)
var refreshed model.MailDeliveries
if err := reloadStmt.QueryContext(ctx, tx, &refreshed); err != nil {
return fmt.Errorf("reload delivery: %w", err)
}
d = modelToDelivery(refreshed)
return nil
})
if err != nil {
return Delivery{}, err
}
return d, nil
}
func (s *Store) loadPayloadTx(ctx context.Context, tx *sql.Tx, payloadID uuid.UUID) (Payload, error) {
p := table.MailPayloads
stmt := postgres.SELECT(
p.PayloadID, p.ContentType, p.Subject, p.Body, p.CreatedAt,
).FROM(p).
WHERE(p.PayloadID.EQ(postgres.UUID(payloadID))).
LIMIT(1)
var row model.MailPayloads
if err := stmt.QueryContext(ctx, tx, &row); err != nil {
return Payload{}, fmt.Errorf("load payload: %w", err)
}
return Payload{
PayloadID: row.PayloadID,
ContentType: row.ContentType,
Subject: row.Subject,
Body: row.Body,
CreatedAt: row.CreatedAt,
}, nil
}
func (s *Store) listRecipientsTx(ctx context.Context, tx *sql.Tx, deliveryID uuid.UUID) ([]Recipient, error) {
r := table.MailRecipients
stmt := postgres.SELECT(
r.RecipientID, r.DeliveryID, r.Address, r.Kind,
).FROM(r).
WHERE(r.DeliveryID.EQ(postgres.UUID(deliveryID))).
ORDER_BY(r.RecipientID.ASC())
var rows []model.MailRecipients
if err := stmt.QueryContext(ctx, tx, &rows); err != nil {
return nil, fmt.Errorf("list recipients: %w", err)
}
out := make([]Recipient, 0, len(rows))
for _, row := range rows {
out = append(out, Recipient{
RecipientID: row.RecipientID,
DeliveryID: row.DeliveryID,
Address: row.Address,
Kind: row.Kind,
})
}
return out, nil
}
// withTx wraps fn in a Postgres transaction. fn's return value
// determines commit (nil) vs rollback (non-nil). Rollback errors are
// swallowed when fn already returned an error, since the latter is
// more actionable.
func withTx(ctx context.Context, db *sql.DB, fn func(tx *sql.Tx) error) error {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("mail store: begin tx: %w", err)
}
if err := fn(tx); err != nil {
_ = tx.Rollback()
return err
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("mail store: commit tx: %w", err)
}
return nil
}
// BeginTx exposes the package-level transaction helper to the worker
// so it can scope ClaimDue + RecordAttempt + Mark* under a single
// commit boundary.
func (s *Store) BeginTx(ctx context.Context) (*sql.Tx, error) {
return s.db.BeginTx(ctx, nil)
}
// modelToDelivery projects a generated model row onto the public
// Delivery struct. Pointer fields are copied so callers cannot mutate
// the underlying scan buffer.
func modelToDelivery(row model.MailDeliveries) Delivery {
d := Delivery{
DeliveryID: row.DeliveryID,
TemplateID: row.TemplateID,
IdempotencyKey: row.IdempotencyKey,
Status: row.Status,
Attempts: row.Attempts,
PayloadID: row.PayloadID,
LastError: row.LastError,
CreatedAt: row.CreatedAt,
UpdatedAt: row.UpdatedAt,
}
if row.NextAttemptAt != nil {
t := *row.NextAttemptAt
d.NextAttemptAt = &t
}
if row.SentAt != nil {
t := *row.SentAt
d.SentAt = &t
}
if row.DeadLetteredAt != nil {
t := *row.DeadLetteredAt
d.DeadLetteredAt = &t
}
return d
}
// modelToAttempt projects a generated model row onto the public Attempt
// struct.
func modelToAttempt(row model.MailAttempts) Attempt {
a := Attempt{
AttemptID: row.AttemptID,
DeliveryID: row.DeliveryID,
AttemptNo: row.AttemptNo,
StartedAt: row.StartedAt,
Outcome: row.Outcome,
Error: row.Error,
}
if row.FinishedAt != nil {
t := *row.FinishedAt
a.FinishedAt = &t
}
return a
}
// countAll runs `SELECT COUNT(*) FROM <tbl>` through jet and returns
// the result as int64. The destination uses an alias-tagged scalar so
// QRM can map the un-prefixed alias produced by AS("count").
func countAll(ctx context.Context, db qrm.DB, tbl postgres.ReadableTable) (int64, error) {
stmt := postgres.SELECT(postgres.COUNT(postgres.STAR).AS("count")).FROM(tbl)
var dest struct {
Count int64 `alias:"count"`
}
if err := stmt.QueryContext(ctx, db, &dest); err != nil {
return 0, err
}
return dest.Count, nil
}
+350
View File
@@ -0,0 +1,350 @@
package mail_test
import (
"context"
"database/sql"
"errors"
"net/url"
"testing"
"time"
"galaxy/backend/internal/mail"
backendpg "galaxy/backend/internal/postgres"
pgshared "galaxy/postgres"
"github.com/google/uuid"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
)
const (
pgImage = "postgres:16-alpine"
pgUser = "galaxy"
pgPassword = "galaxy"
pgDatabase = "galaxy_backend"
pgSchema = "backend"
pgStartup = 90 * time.Second
pgOpTO = 10 * time.Second
)
// startPostgres mirrors the auth_e2e_test scaffolding: spin up
// Postgres, apply migrations, return *sql.DB.
func startPostgres(t *testing.T) *sql.DB {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
t.Cleanup(cancel)
pgContainer, err := tcpostgres.Run(ctx, pgImage,
tcpostgres.WithDatabase(pgDatabase),
tcpostgres.WithUsername(pgUser),
tcpostgres.WithPassword(pgPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(pgStartup),
),
)
if err != nil {
t.Skipf("postgres testcontainer unavailable, skipping: %v", err)
}
t.Cleanup(func() {
if termErr := testcontainers.TerminateContainer(pgContainer); termErr != nil {
t.Errorf("terminate postgres container: %v", termErr)
}
})
baseDSN, err := pgContainer.ConnectionString(ctx, "sslmode=disable")
if err != nil {
t.Fatalf("connection string: %v", err)
}
scopedDSN, err := dsnWithSearchPath(baseDSN, pgSchema)
if err != nil {
t.Fatalf("scope dsn: %v", err)
}
cfg := pgshared.DefaultConfig()
cfg.PrimaryDSN = scopedDSN
cfg.OperationTimeout = pgOpTO
db, err := pgshared.OpenPrimary(ctx, cfg)
if err != nil {
t.Fatalf("open primary: %v", err)
}
t.Cleanup(func() { _ = db.Close() })
if err := pgshared.Ping(ctx, db, cfg.OperationTimeout); err != nil {
t.Fatalf("ping: %v", err)
}
if err := backendpg.ApplyMigrations(ctx, db); err != nil {
t.Fatalf("apply migrations: %v", err)
}
return db
}
func dsnWithSearchPath(baseDSN, schema string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := parsed.Query()
values.Set("search_path", schema)
if values.Get("sslmode") == "" {
values.Set("sslmode", "disable")
}
parsed.RawQuery = values.Encode()
return parsed.String(), nil
}
func TestStoreInsertEnqueueRoundTrip(t *testing.T) {
t.Parallel()
db := startPostgres(t)
store := mail.NewStore(db)
ctx := context.Background()
args := mail.EnqueueArgs{
DeliveryID: uuid.New(),
TemplateID: mail.TemplateLoginCode,
IdempotencyKey: uuid.NewString(),
Recipients: []string{"alice@example.test"},
ContentType: "text/plain",
Subject: "hello",
Body: []byte("hi"),
}
inserted, err := store.InsertEnqueue(ctx, args)
if err != nil {
t.Fatalf("insert: %v", err)
}
if !inserted {
t.Fatal("first insert must report inserted=true")
}
// Same idempotency key must dedupe.
args2 := args
args2.DeliveryID = uuid.New()
inserted2, err := store.InsertEnqueue(ctx, args2)
if err != nil {
t.Fatalf("insert retry: %v", err)
}
if inserted2 {
t.Fatal("re-enqueue with same key must report inserted=false")
}
d, err := store.GetDelivery(ctx, args.DeliveryID)
if err != nil {
t.Fatalf("get delivery: %v", err)
}
if d.Status != mail.StatusPending {
t.Fatalf("status=%q want pending", d.Status)
}
if d.NextAttemptAt == nil {
t.Fatal("next_attempt_at must be set on insert")
}
}
func TestStoreClaimDueAndMarkSent(t *testing.T) {
t.Parallel()
db := startPostgres(t)
store := mail.NewStore(db)
ctx := context.Background()
deliveryID := uuid.New()
if _, err := store.InsertEnqueue(ctx, mail.EnqueueArgs{
DeliveryID: deliveryID,
TemplateID: mail.TemplateLoginCode,
IdempotencyKey: uuid.NewString(),
Recipients: []string{"bob@example.test"},
ContentType: "text/plain",
Subject: "hello",
Body: []byte("hi"),
}); err != nil {
t.Fatalf("insert: %v", err)
}
tx, err := store.BeginTx(ctx)
if err != nil {
t.Fatalf("begin: %v", err)
}
t.Cleanup(func() { _ = tx.Rollback() })
claimed, err := store.ClaimDue(ctx, tx, 5)
if err != nil {
t.Fatalf("claim: %v", err)
}
if len(claimed) != 1 {
t.Fatalf("got %d claimed, want 1", len(claimed))
}
if claimed[0].Delivery.DeliveryID != deliveryID {
t.Fatalf("claimed wrong delivery: %s", claimed[0].Delivery.DeliveryID)
}
if string(claimed[0].Payload.Body) != "hi" {
t.Fatalf("payload body lost in round trip: %q", claimed[0].Payload.Body)
}
if len(claimed[0].Recipients) != 1 || claimed[0].Recipients[0].Address != "bob@example.test" {
t.Fatalf("recipient lost: %+v", claimed[0].Recipients)
}
now := time.Now().UTC()
if _, err := store.RecordAttempt(ctx, tx, deliveryID, now, now, mail.OutcomeSuccess, ""); err != nil {
t.Fatalf("record attempt: %v", err)
}
if err := store.MarkSent(ctx, tx, deliveryID, now); err != nil {
t.Fatalf("mark sent: %v", err)
}
if err := tx.Commit(); err != nil {
t.Fatalf("commit: %v", err)
}
d, err := store.GetDelivery(ctx, deliveryID)
if err != nil {
t.Fatalf("get delivery: %v", err)
}
if d.Status != mail.StatusSent {
t.Fatalf("status=%q want sent", d.Status)
}
if d.SentAt == nil {
t.Fatal("sent_at must be set after MarkSent")
}
if d.Attempts != 1 {
t.Fatalf("attempts=%d want 1", d.Attempts)
}
attempts, err := store.ListAttempts(ctx, deliveryID)
if err != nil {
t.Fatalf("list attempts: %v", err)
}
if len(attempts) != 1 || attempts[0].Outcome != mail.OutcomeSuccess {
t.Fatalf("attempts=%+v", attempts)
}
}
func TestStoreScheduleRetryThenDeadLetter(t *testing.T) {
t.Parallel()
db := startPostgres(t)
store := mail.NewStore(db)
ctx := context.Background()
deliveryID := uuid.New()
if _, err := store.InsertEnqueue(ctx, mail.EnqueueArgs{
DeliveryID: deliveryID,
TemplateID: "test.template",
IdempotencyKey: uuid.NewString(),
Recipients: []string{"carol@example.test"},
ContentType: "text/plain",
Subject: "hi",
Body: []byte("body"),
}); err != nil {
t.Fatalf("insert: %v", err)
}
tx, err := store.BeginTx(ctx)
if err != nil {
t.Fatalf("begin tx 1: %v", err)
}
if _, err := store.ClaimDue(ctx, tx, 1); err != nil {
t.Fatalf("claim 1: %v", err)
}
now := time.Now().UTC()
if _, err := store.RecordAttempt(ctx, tx, deliveryID, now, now, mail.OutcomeTransientError, "boom"); err != nil {
t.Fatalf("record attempt: %v", err)
}
if err := store.ScheduleRetry(ctx, tx, deliveryID, now, now.Add(2*time.Second), "boom"); err != nil {
t.Fatalf("schedule retry: %v", err)
}
if err := tx.Commit(); err != nil {
t.Fatalf("commit 1: %v", err)
}
d, err := store.GetDelivery(ctx, deliveryID)
if err != nil {
t.Fatalf("get delivery: %v", err)
}
if d.Status != mail.StatusRetrying {
t.Fatalf("status=%q want retrying", d.Status)
}
if d.LastError != "boom" {
t.Fatalf("last_error=%q want boom", d.LastError)
}
tx2, err := store.BeginTx(ctx)
if err != nil {
t.Fatalf("begin tx 2: %v", err)
}
if err := store.MarkDeadLettered(ctx, tx2, deliveryID, now, "max attempts"); err != nil {
t.Fatalf("mark dead-lettered: %v", err)
}
if err := tx2.Commit(); err != nil {
t.Fatalf("commit 2: %v", err)
}
d, err = store.GetDelivery(ctx, deliveryID)
if err != nil {
t.Fatalf("get delivery 2: %v", err)
}
if d.Status != mail.StatusDeadLettered {
t.Fatalf("status=%q want dead_lettered", d.Status)
}
if d.DeadLetteredAt == nil {
t.Fatal("dead_lettered_at must be set")
}
_, total, err := store.ListDeadLetters(ctx, 0, 25)
if err != nil {
t.Fatalf("list dead letters: %v", err)
}
if total != 1 {
t.Fatalf("dead-letter total=%d want 1", total)
}
}
func TestStoreResendNonSent(t *testing.T) {
t.Parallel()
db := startPostgres(t)
store := mail.NewStore(db)
ctx := context.Background()
deliveryID := uuid.New()
if _, err := store.InsertEnqueue(ctx, mail.EnqueueArgs{
DeliveryID: deliveryID,
TemplateID: "test.template",
IdempotencyKey: uuid.NewString(),
Recipients: []string{"d@example.test"},
ContentType: "text/plain",
Subject: "hi",
Body: []byte("b"),
}); err != nil {
t.Fatalf("insert: %v", err)
}
// re-arm pending row -> ok.
if _, err := store.ResendNonSent(ctx, deliveryID, time.Now().UTC()); err != nil {
t.Fatalf("resend pending: %v", err)
}
// flip to sent and verify resend now errors.
tx, err := store.BeginTx(ctx)
if err != nil {
t.Fatalf("begin: %v", err)
}
if _, err := store.ClaimDue(ctx, tx, 1); err != nil {
t.Fatalf("claim: %v", err)
}
now := time.Now().UTC()
if _, err := store.RecordAttempt(ctx, tx, deliveryID, now, now, mail.OutcomeSuccess, ""); err != nil {
t.Fatalf("record attempt: %v", err)
}
if err := store.MarkSent(ctx, tx, deliveryID, now); err != nil {
t.Fatalf("mark sent: %v", err)
}
if err := tx.Commit(); err != nil {
t.Fatalf("commit: %v", err)
}
if _, err := store.ResendNonSent(ctx, deliveryID, time.Now().UTC()); !errors.Is(err, mail.ErrResendOnSent) {
t.Fatalf("resend on sent: want ErrResendOnSent, got %v", err)
}
if _, err := store.ResendNonSent(ctx, uuid.New(), time.Now().UTC()); !errors.Is(err, mail.ErrDeliveryNotFound) {
t.Fatalf("resend on missing: want ErrDeliveryNotFound, got %v", err)
}
}
+230
View File
@@ -0,0 +1,230 @@
package mail
import (
"context"
"errors"
"math"
"math/rand/v2"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
)
// Worker drains the mail outbox: per tick it walks due rows under
// `SELECT … FOR UPDATE SKIP LOCKED`, dispatches each through the SMTP
// sender, and atomically updates the delivery + attempt rows.
// Implements `internal/app.Component`.
type Worker struct {
svc *Service
}
// NewWorker constructs a Worker bound to svc.
func NewWorker(svc *Service) *Worker { return &Worker{svc: svc} }
// claimBatchSize bounds how many rows the worker processes per tick.
// 16 keeps each tick under a second on a developer machine while
// leaving headroom for transient SMTP back-pressure.
const claimBatchSize = 16
// Run drives the scan loop until ctx is cancelled. The first tick is
// the startup-drain pass mandated by `PLAN.md` §5.6.
func (w *Worker) Run(ctx context.Context) error {
if w == nil {
return nil
}
logger := w.svc.deps.Logger.Named("worker")
if err := w.tick(ctx); err != nil && !errors.Is(err, context.Canceled) {
logger.Warn("initial mail outbox drain failed", zap.Error(err))
}
ticker := time.NewTicker(w.svc.deps.Config.WorkerInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return nil
case <-ticker.C:
if err := w.tick(ctx); err != nil && !errors.Is(err, context.Canceled) {
logger.Warn("mail outbox tick failed", zap.Error(err))
}
}
}
}
// Shutdown is a no-op: each per-row transaction is self-contained, so
// a cancelled ctx above is enough to stop the loop. Any row already
// inside a Send call finishes its commit (or rolls back on context
// cancel) before the worker returns.
func (w *Worker) Shutdown(_ context.Context) error { return nil }
// Tick is exposed for tests so they can drive the worker without
// timing dependencies.
func (w *Worker) Tick(ctx context.Context) error { return w.tick(ctx) }
// tick processes up to claimBatchSize rows. Each row is handled in its
// own transaction so a slow SMTP send only holds one row lock at a
// time. The loop exits as soon as a tick claims zero rows or ctx is
// cancelled. Rows already handled in this tick are tracked in the
// `seen` set and excluded from subsequent claims so a transient retry
// scheduled with next_attempt_at in the past does not chew through a
// delivery's MaxAttempts budget within a single tick.
func (w *Worker) tick(ctx context.Context) error {
seen := make([]uuid.UUID, 0, claimBatchSize)
for range claimBatchSize {
if ctx.Err() != nil {
return ctx.Err()
}
more, processed, err := w.processOne(ctx, seen)
if err != nil {
return err
}
if !more {
return nil
}
seen = append(seen, processed)
}
return nil
}
// processOne claims a single due row, dispatches it, and commits the
// state transition. Returns more=false when no row was due, so the
// caller can short-circuit the tick loop. The delivery_id of the
// processed row is returned so the tick loop can skip it on
// subsequent iterations.
func (w *Worker) processOne(ctx context.Context, exclude []uuid.UUID) (bool, uuid.UUID, error) {
tx, err := w.svc.deps.Store.BeginTx(ctx)
if err != nil {
return false, uuid.Nil, err
}
defer func() {
// Rollback is a no-op after Commit; this catches every error
// path inside the function.
_ = tx.Rollback()
}()
claimed, err := w.svc.deps.Store.ClaimDue(ctx, tx, 1, exclude...)
if err != nil {
return false, uuid.Nil, err
}
if len(claimed) == 0 {
return false, uuid.Nil, nil
}
c := claimed[0]
logger := w.svc.deps.Logger.Named("worker").With(
zap.String("delivery_id", c.Delivery.DeliveryID.String()),
zap.String("template_id", c.Delivery.TemplateID),
)
now := w.svc.deps.Now()
addresses := make([]string, 0, len(c.Recipients))
for _, r := range c.Recipients {
addresses = append(addresses, r.Address)
}
subject := ""
if c.Payload.Subject != nil {
subject = *c.Payload.Subject
}
out := OutboundMessage{
To: addresses,
Subject: subject,
ContentType: c.Payload.ContentType,
Body: c.Payload.Body,
}
sendErr := w.svc.deps.SMTP.Send(ctx, out)
finishedAt := w.svc.deps.Now()
cycleAttempt := c.Delivery.Attempts + 1
if sendErr == nil {
attemptNo, err := w.svc.deps.Store.RecordAttempt(ctx, tx, c.Delivery.DeliveryID, now, finishedAt, OutcomeSuccess, "")
if err != nil {
return false, uuid.Nil, err
}
if err := w.svc.deps.Store.MarkSent(ctx, tx, c.Delivery.DeliveryID, finishedAt); err != nil {
return false, uuid.Nil, err
}
logger.Info("mail delivery sent",
zap.Int32("cycle_attempt", cycleAttempt),
zap.Int32("attempt_no", attemptNo),
)
} else {
permanent := IsPermanent(sendErr)
outcome := OutcomeTransientError
if permanent {
outcome = OutcomePermanentError
}
attemptNo, err := w.svc.deps.Store.RecordAttempt(ctx, tx, c.Delivery.DeliveryID, now, finishedAt, outcome, sendErr.Error())
if err != nil {
return false, uuid.Nil, err
}
maxAttempts := int32(w.svc.deps.Config.MaxAttempts)
giveUp := permanent || cycleAttempt >= maxAttempts
if giveUp {
reason := sendErr.Error()
if permanent {
reason = "permanent: " + reason
}
if err := w.svc.deps.Store.MarkDeadLettered(ctx, tx, c.Delivery.DeliveryID, finishedAt, reason); err != nil {
return false, uuid.Nil, err
}
logger.Warn("mail delivery dead-lettered",
zap.Int32("cycle_attempt", cycleAttempt),
zap.Int32("attempt_no", attemptNo),
zap.Int32("max_attempts", maxAttempts),
zap.Bool("permanent", permanent),
zap.String("reason", reason),
)
} else {
nextAt := finishedAt.Add(nextBackoff(int(cycleAttempt)))
if err := w.svc.deps.Store.ScheduleRetry(ctx, tx, c.Delivery.DeliveryID, finishedAt, nextAt, sendErr.Error()); err != nil {
return false, uuid.Nil, err
}
logger.Info("mail delivery retry scheduled",
zap.Int32("cycle_attempt", cycleAttempt),
zap.Int32("attempt_no", attemptNo),
zap.Time("next_attempt_at", nextAt),
)
}
}
if err := tx.Commit(); err != nil {
return false, uuid.Nil, err
}
if sendErr != nil {
permanent := IsPermanent(sendErr)
giveUp := permanent || (c.Delivery.Attempts+1) >= int32(w.svc.deps.Config.MaxAttempts)
if giveUp {
w.svc.deps.Admin.OnDeadLetter(ctx, c.Delivery.DeliveryID, c.Delivery.TemplateID, sendErr.Error())
}
}
return true, c.Delivery.DeliveryID, nil
}
// nextBackoff returns the wait between attempt N (1-indexed) and the
// next try. The schedule grows exponentially up to backoffMax with a
// uniform ±backoffJitter shake to prevent retry storms.
func nextBackoff(attempt int) time.Duration {
if attempt < 1 {
attempt = 1
}
scaled := float64(backoffBase) * math.Pow(backoffFactor, float64(attempt-1))
if scaled > float64(backoffMax) {
scaled = float64(backoffMax)
}
// Symmetric jitter in [-backoffJitter, +backoffJitter].
jitter := (rand.Float64()*2 - 1) * backoffJitter
final := scaled * (1 + jitter)
if final < float64(backoffBase) {
final = float64(backoffBase)
}
return time.Duration(final)
}
// Compile-time check that Worker satisfies the lifecycle interface
// shape used elsewhere (Run + Shutdown).
var _ interface {
Run(context.Context) error
Shutdown(context.Context) error
} = (*Worker)(nil)
+247
View File
@@ -0,0 +1,247 @@
package mail_test
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/backend/internal/config"
"galaxy/backend/internal/mail"
"github.com/google/uuid"
"go.uber.org/zap/zaptest"
)
// recordingSender is a SMTPSender stub with programmable per-call
// behaviour. Tests append behaviours; each Send pops the head.
type recordingSender struct {
mu sync.Mutex
sent []mail.OutboundMessage
behaviour []func(mail.OutboundMessage) error
}
func newRecordingSender() *recordingSender { return &recordingSender{} }
func (r *recordingSender) Send(_ context.Context, msg mail.OutboundMessage) error {
r.mu.Lock()
defer r.mu.Unlock()
r.sent = append(r.sent, msg)
if len(r.behaviour) == 0 {
return nil
}
fn := r.behaviour[0]
r.behaviour = r.behaviour[1:]
return fn(msg)
}
func (r *recordingSender) snapshot() []mail.OutboundMessage {
r.mu.Lock()
defer r.mu.Unlock()
out := make([]mail.OutboundMessage, len(r.sent))
copy(out, r.sent)
return out
}
// recordingAdminNotifier captures every dead-letter notification call.
type recordingAdminNotifier struct {
mu sync.Mutex
calls int
}
func (r *recordingAdminNotifier) OnDeadLetter(_ context.Context, _ uuid.UUID, _, _ string) {
r.mu.Lock()
defer r.mu.Unlock()
r.calls++
}
func (r *recordingAdminNotifier) count() int {
r.mu.Lock()
defer r.mu.Unlock()
return r.calls
}
// buildService spins up a Service backed by a real Postgres testcontainer.
// The fake clock and configurable max-attempts let tests exercise the
// retry / dead-letter branches without real time.
func buildService(t *testing.T, sender mail.SMTPSender, admin mail.AdminNotifier, maxAttempts int, now func() time.Time) *mail.Service {
t.Helper()
db := startPostgres(t)
svc := mail.NewService(mail.Deps{
Store: mail.NewStore(db),
SMTP: sender,
Admin: admin,
Config: config.MailConfig{WorkerInterval: time.Hour, MaxAttempts: maxAttempts},
Now: now,
Logger: zaptest.NewLogger(t),
})
return svc
}
func TestWorkerSuccessFirstAttempt(t *testing.T) {
t.Parallel()
sender := newRecordingSender()
admin := &recordingAdminNotifier{}
svc := buildService(t, sender, admin, 3, time.Now)
if err := svc.EnqueueLoginCode(context.Background(), "alice@example.test", "111111", 5*time.Minute); err != nil {
t.Fatalf("enqueue: %v", err)
}
worker := mail.NewWorker(svc)
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick: %v", err)
}
sent := sender.snapshot()
if len(sent) != 1 {
t.Fatalf("got %d sent messages, want 1", len(sent))
}
if sent[0].Subject == "" || len(sent[0].Body) == 0 {
t.Fatalf("sent message missing fields: %+v", sent[0])
}
page, err := svc.AdminListDeliveries(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list: %v", err)
}
if len(page.Items) != 1 {
t.Fatalf("want 1 delivery, got %d", len(page.Items))
}
if page.Items[0].Status != mail.StatusSent {
t.Fatalf("status=%q want sent", page.Items[0].Status)
}
if page.Items[0].Attempts != 1 {
t.Fatalf("attempts=%d want 1", page.Items[0].Attempts)
}
if admin.count() != 0 {
t.Fatalf("admin notifier must not fire on success, got %d", admin.count())
}
}
func TestWorkerTransientThenDeadLetter(t *testing.T) {
t.Parallel()
sender := newRecordingSender()
sender.behaviour = []func(mail.OutboundMessage) error{
func(mail.OutboundMessage) error { return errors.New("smtp transient #1") },
func(mail.OutboundMessage) error { return errors.New("smtp transient #2") },
}
admin := &recordingAdminNotifier{}
// Start the fake clock 2 hours behind wall-clock so the
// `finishedAt + backoff` computed by ScheduleRetry lands in the
// past relative to DB `now()` and the second tick re-claims the
// row immediately.
clock := time.Now().UTC().Add(-2 * time.Hour)
svc := buildService(t, sender, admin, 2, func() time.Time { return clock })
if err := svc.EnqueueLoginCode(context.Background(), "bob@example.test", "222222", 5*time.Minute); err != nil {
t.Fatalf("enqueue: %v", err)
}
worker := mail.NewWorker(svc)
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #1: %v", err)
}
page, err := svc.AdminListDeliveries(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list: %v", err)
}
if got := page.Items[0].Status; got != mail.StatusRetrying {
t.Fatalf("after first failure status=%q want retrying", got)
}
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #2: %v", err)
}
page, err = svc.AdminListDeliveries(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list 2: %v", err)
}
if got := page.Items[0].Status; got != mail.StatusDeadLettered {
t.Fatalf("after second failure status=%q want dead_lettered", got)
}
if page.Items[0].Attempts != 2 {
t.Fatalf("attempts=%d want 2", page.Items[0].Attempts)
}
if admin.count() != 1 {
t.Fatalf("admin notifier calls=%d want 1", admin.count())
}
// Check dead-letter row exists.
dl, err := svc.AdminListDeadLetters(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list dead-letters: %v", err)
}
if dl.Total != 1 {
t.Fatalf("dead-letter total=%d want 1", dl.Total)
}
}
func TestWorkerPermanentDeadLettersImmediately(t *testing.T) {
t.Parallel()
sender := newRecordingSender()
sender.behaviour = []func(mail.OutboundMessage) error{
func(mail.OutboundMessage) error { return &mail.SendError{Err: errors.New("rejected"), Permanent: true} },
}
admin := &recordingAdminNotifier{}
svc := buildService(t, sender, admin, 5, time.Now)
if err := svc.EnqueueLoginCode(context.Background(), "e@example.test", "333333", 5*time.Minute); err != nil {
t.Fatalf("enqueue: %v", err)
}
worker := mail.NewWorker(svc)
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick: %v", err)
}
page, err := svc.AdminListDeliveries(context.Background(), 1, 10)
if err != nil {
t.Fatalf("list: %v", err)
}
if got := page.Items[0].Status; got != mail.StatusDeadLettered {
t.Fatalf("status=%q want dead_lettered after permanent error", got)
}
if admin.count() != 1 {
t.Fatalf("admin notifier calls=%d want 1", admin.count())
}
}
func TestWorkerRespectsNextAttemptAt(t *testing.T) {
t.Parallel()
sender := newRecordingSender()
sender.behaviour = []func(mail.OutboundMessage) error{
func(mail.OutboundMessage) error { return errors.New("transient") },
}
// Push the fake clock far into the future so the post-retry
// next_attempt_at lands well past wall-clock now() and the second
// tick deterministically skips the row.
clock := time.Now().UTC().Add(24 * time.Hour)
admin := &recordingAdminNotifier{}
svc := buildService(t, sender, admin, 5, func() time.Time { return clock })
if err := svc.EnqueueLoginCode(context.Background(), "f@example.test", "444444", 5*time.Minute); err != nil {
t.Fatalf("enqueue: %v", err)
}
worker := mail.NewWorker(svc)
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #1: %v", err)
}
// Without advancing the clock the next tick must skip the row
// because next_attempt_at > now().
if err := worker.Tick(context.Background()); err != nil {
t.Fatalf("tick #2: %v", err)
}
if got := len(sender.snapshot()); got != 1 {
t.Fatalf("sender saw %d messages while still backing off, want 1", got)
}
}