feat: mail service

This commit is contained in:
Ilia Denisov
2026-04-17 18:39:16 +02:00
committed by GitHub
parent 23ffcb7535
commit 5b7593e6f6
183 changed files with 31215 additions and 248 deletions
@@ -0,0 +1,781 @@
// Package executeattempt implements provider execution, retry planning, and
// terminal state handling for claimed delivery attempts.
package executeattempt
import (
"context"
"encoding/base64"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/mail/internal/domain/attempt"
"galaxy/mail/internal/domain/common"
deliverydomain "galaxy/mail/internal/domain/delivery"
"galaxy/mail/internal/logging"
"galaxy/mail/internal/ports"
"galaxy/mail/internal/service/acceptgenericdelivery"
"galaxy/mail/internal/service/renderdelivery"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
oteltrace "go.opentelemetry.io/otel/trace"
)
var (
// ErrServiceUnavailable reports that attempt execution could not safely
// load or persist durable state.
ErrServiceUnavailable = errors.New("execute attempt service unavailable")
)
var retryDelays = [...]time.Duration{
time.Minute,
5 * time.Minute,
30 * time.Minute,
}
const (
retryExhaustedClassification = "retry_exhausted"
retryRecoveryHint = "check SMTP connectivity"
claimTTLClassification = "claim_ttl_expired"
claimTTLSummary = "attempt claim TTL expired"
deadlineExceededDetail = "deadline_exceeded"
tracerName = "galaxy/mail/executeattempt"
)
// WorkItem stores one delivery together with the concrete attempt that should
// be prepared, executed, or recovered.
type WorkItem struct {
// Delivery stores the owning logical delivery record.
Delivery deliverydomain.Delivery
// Attempt stores the concrete delivery attempt record.
Attempt attempt.Attempt
}
// ValidateForPreparation reports whether item can be prepared for claim-time
// rendering decisions.
func (item WorkItem) ValidateForPreparation() error {
if err := item.validateCommon(); err != nil {
return err
}
if item.Attempt.Status != attempt.StatusScheduled {
return fmt.Errorf("work attempt status must be %q", attempt.StatusScheduled)
}
switch item.Delivery.Status {
case deliverydomain.StatusQueued, deliverydomain.StatusRendered:
default:
return fmt.Errorf(
"work delivery status must be %q or %q",
deliverydomain.StatusQueued,
deliverydomain.StatusRendered,
)
}
return nil
}
// ValidateForExecution reports whether item represents one claimed in-flight
// provider execution.
func (item WorkItem) ValidateForExecution() error {
if err := item.validateCommon(); err != nil {
return err
}
if item.Delivery.Status != deliverydomain.StatusSending {
return fmt.Errorf("work delivery status must be %q", deliverydomain.StatusSending)
}
if item.Attempt.Status != attempt.StatusInProgress {
return fmt.Errorf("work attempt status must be %q", attempt.StatusInProgress)
}
return nil
}
func (item WorkItem) validateCommon() error {
if err := item.Delivery.Validate(); err != nil {
return fmt.Errorf("work delivery: %w", err)
}
if err := item.Attempt.Validate(); err != nil {
return fmt.Errorf("work attempt: %w", err)
}
if item.Attempt.DeliveryID != item.Delivery.DeliveryID {
return errors.New("work attempt delivery id must match delivery id")
}
if item.Delivery.AttemptCount != item.Attempt.AttemptNo {
return errors.New("work delivery attempt count must match attempt number")
}
return nil
}
// CommitStateInput stores one complete durable attempt outcome mutation.
type CommitStateInput struct {
// Delivery stores the mutated delivery record.
Delivery deliverydomain.Delivery
// Attempt stores the terminal current attempt record.
Attempt attempt.Attempt
// NextAttempt stores the optional next scheduled retry attempt.
NextAttempt *attempt.Attempt
// DeadLetter stores the optional dead-letter record when Delivery becomes
// `dead_letter`.
DeadLetter *deliverydomain.DeadLetterEntry
}
// Validate reports whether input stores one complete and internally
// consistent durable mutation.
func (input CommitStateInput) Validate() error {
if err := input.Delivery.Validate(); err != nil {
return fmt.Errorf("delivery: %w", err)
}
if err := input.Attempt.Validate(); err != nil {
return fmt.Errorf("attempt: %w", err)
}
if !input.Attempt.Status.IsTerminal() {
return errors.New("attempt status must be terminal")
}
if input.Attempt.DeliveryID != input.Delivery.DeliveryID {
return errors.New("attempt delivery id must match delivery id")
}
if input.Delivery.LastAttemptStatus != input.Attempt.Status {
return errors.New("delivery last attempt status must match attempt status")
}
if input.NextAttempt != nil {
if err := input.NextAttempt.Validate(); err != nil {
return fmt.Errorf("next attempt: %w", err)
}
if input.NextAttempt.DeliveryID != input.Delivery.DeliveryID {
return errors.New("next attempt delivery id must match delivery id")
}
if input.NextAttempt.Status != attempt.StatusScheduled {
return fmt.Errorf("next attempt status must be %q", attempt.StatusScheduled)
}
if input.Delivery.Status != deliverydomain.StatusQueued {
return fmt.Errorf("delivery status with next attempt must be %q", deliverydomain.StatusQueued)
}
if input.Delivery.AttemptCount != input.NextAttempt.AttemptNo {
return errors.New("delivery attempt count must match next attempt number")
}
if input.NextAttempt.AttemptNo != input.Attempt.AttemptNo+1 {
return errors.New("next attempt number must increment current attempt number")
}
if input.DeadLetter != nil {
return errors.New("next attempt and dead-letter entry are mutually exclusive")
}
} else if input.Delivery.AttemptCount != input.Attempt.AttemptNo {
return errors.New("delivery attempt count must match current attempt number without next attempt")
}
if err := deliverydomain.ValidateDeadLetterState(input.Delivery, input.DeadLetter); err != nil {
return fmt.Errorf("dead-letter state: %w", err)
}
switch input.Delivery.Status {
case deliverydomain.StatusSent:
if input.Attempt.Status != attempt.StatusProviderAccepted {
return fmt.Errorf("sent delivery requires attempt status %q", attempt.StatusProviderAccepted)
}
case deliverydomain.StatusSuppressed, deliverydomain.StatusFailed:
if input.Attempt.Status != attempt.StatusProviderRejected {
return fmt.Errorf(
"%s delivery requires attempt status %q",
input.Delivery.Status,
attempt.StatusProviderRejected,
)
}
case deliverydomain.StatusQueued:
if input.NextAttempt == nil {
return errors.New("queued delivery requires next attempt")
}
switch input.Attempt.Status {
case attempt.StatusTransportFailed, attempt.StatusTimedOut:
default:
return fmt.Errorf(
"queued delivery requires attempt status %q or %q",
attempt.StatusTransportFailed,
attempt.StatusTimedOut,
)
}
case deliverydomain.StatusDeadLetter:
switch input.Attempt.Status {
case attempt.StatusTransportFailed, attempt.StatusTimedOut:
default:
return fmt.Errorf(
"dead-letter delivery requires attempt status %q or %q",
attempt.StatusTransportFailed,
attempt.StatusTimedOut,
)
}
default:
return fmt.Errorf("unsupported delivery status %q for commit input", input.Delivery.Status)
}
return nil
}
// Renderer materializes template-mode deliveries before a scheduler claims an
// attempt for outbound execution.
type Renderer interface {
// Execute renders or terminally fails one queued template-mode delivery.
Execute(context.Context, renderdelivery.Input) (renderdelivery.Result, error)
}
// PayloadLoader loads raw attachment payloads for a delivery.
type PayloadLoader interface {
// LoadPayload returns the stored attachment payload bundle when one exists.
LoadPayload(context.Context, common.DeliveryID) (acceptgenericdelivery.DeliveryPayload, bool, error)
}
// Store persists durable attempt execution outcomes.
type Store interface {
// Commit applies one complete durable attempt outcome mutation.
Commit(context.Context, CommitStateInput) error
}
// Clock provides wall-clock time.
type Clock interface {
// Now returns the current time.
Now() time.Time
}
// Telemetry records low-cardinality attempt-execution metrics.
type Telemetry interface {
// RecordDeliveryStatusTransition records one durable delivery status
// transition.
RecordDeliveryStatusTransition(context.Context, string, string)
// RecordAttemptOutcome records one durable terminal attempt outcome.
RecordAttemptOutcome(context.Context, string, string)
// RecordProviderSendDuration records one provider-send latency sample.
RecordProviderSendDuration(context.Context, string, string, time.Duration)
}
// Config stores the dependencies used by Service.
type Config struct {
// Renderer stores the template renderer used during pre-claim preparation.
Renderer Renderer
// Provider stores the outbound provider adapter.
Provider ports.Provider
// PayloadLoader loads raw attachment payloads for SMTP construction.
PayloadLoader PayloadLoader
// Store persists durable attempt execution outcomes.
Store Store
// Clock provides wall-clock timestamps.
Clock Clock
// Telemetry records low-cardinality attempt-execution metrics.
Telemetry Telemetry
// TracerProvider constructs the application span recorder used by provider
// sends.
TracerProvider oteltrace.TracerProvider
// Logger writes structured attempt-execution logs.
Logger *slog.Logger
// AttemptTimeout bounds one provider execution budget.
AttemptTimeout time.Duration
}
// Service prepares template deliveries, executes claimed attempts, and
// applies retry policy.
type Service struct {
renderer Renderer
provider ports.Provider
payloadLoader PayloadLoader
store Store
clock Clock
telemetry Telemetry
tracerProvider oteltrace.TracerProvider
logger *slog.Logger
attemptTimeout time.Duration
}
// New constructs Service from cfg.
func New(cfg Config) (*Service, error) {
switch {
case cfg.Renderer == nil:
return nil, errors.New("new execute attempt service: nil renderer")
case cfg.Provider == nil:
return nil, errors.New("new execute attempt service: nil provider")
case cfg.PayloadLoader == nil:
return nil, errors.New("new execute attempt service: nil payload loader")
case cfg.Store == nil:
return nil, errors.New("new execute attempt service: nil store")
case cfg.Clock == nil:
return nil, errors.New("new execute attempt service: nil clock")
case cfg.AttemptTimeout <= 0:
return nil, errors.New("new execute attempt service: non-positive attempt timeout")
default:
tracerProvider := cfg.TracerProvider
if tracerProvider == nil {
tracerProvider = otel.GetTracerProvider()
}
logger := cfg.Logger
if logger == nil {
logger = slog.Default()
}
return &Service{
renderer: cfg.Renderer,
provider: cfg.Provider,
payloadLoader: cfg.PayloadLoader,
store: cfg.Store,
clock: cfg.Clock,
telemetry: cfg.Telemetry,
tracerProvider: tracerProvider,
logger: logger.With("component", "execute_attempt"),
attemptTimeout: cfg.AttemptTimeout,
}, nil
}
}
// Prepare renders one template-mode queued delivery when its content has not
// been materialized yet. The boolean result reports whether the scheduler may
// proceed to claim the attempt.
func (service *Service) Prepare(ctx context.Context, item WorkItem) (bool, error) {
if ctx == nil {
return false, errors.New("prepare execute attempt: nil context")
}
if service == nil {
return false, errors.New("prepare execute attempt: nil service")
}
if err := item.ValidateForPreparation(); err != nil {
return false, fmt.Errorf("prepare execute attempt: %w", err)
}
if item.Delivery.PayloadMode != deliverydomain.PayloadModeTemplate {
return true, nil
}
if item.Delivery.Status == deliverydomain.StatusRendered {
return true, nil
}
if err := item.Delivery.Content.ValidateMaterialized(); err == nil {
return true, nil
}
result, err := service.renderer.Execute(ctx, renderdelivery.Input{
Delivery: item.Delivery,
Attempt: item.Attempt,
})
if err != nil {
return false, fmt.Errorf("prepare execute attempt: %w", err)
}
if result.Outcome == renderdelivery.OutcomeFailed {
return false, nil
}
return true, nil
}
// Execute runs one claimed in-progress attempt through the provider and
// durably records the resulting outcome.
func (service *Service) Execute(ctx context.Context, item WorkItem) error {
if ctx == nil {
return errors.New("execute attempt: nil context")
}
if service == nil {
return errors.New("execute attempt: nil service")
}
if err := item.ValidateForExecution(); err != nil {
return fmt.Errorf("execute attempt: %w", err)
}
message, err := service.buildMessage(ctx, item.Delivery)
if err != nil {
return err
}
sendStartedAt := time.Now()
sendCtx, span := service.tracerProvider.Tracer(tracerName).Start(
ctx,
"mail.provider_send",
oteltrace.WithAttributes(
attribute.String("mail.delivery_id", item.Delivery.DeliveryID.String()),
attribute.String("mail.source", string(item.Delivery.Source)),
attribute.Int("mail.attempt_no", item.Attempt.AttemptNo),
),
)
if !item.Delivery.TemplateID.IsZero() {
span.SetAttributes(attribute.String("mail.template_id", item.Delivery.TemplateID.String()))
}
providerCtx, cancel := context.WithTimeout(sendCtx, service.attemptTimeout)
defer cancel()
defer span.End()
result, err := service.provider.Send(providerCtx, message)
if err != nil {
span.RecordError(err)
return fmt.Errorf("execute attempt: send provider message: %w", err)
}
if err := result.Validate(); err != nil {
span.RecordError(err)
return fmt.Errorf("execute attempt: provider result: %w", err)
}
providerName := providerNameFromSummary(result.Summary)
sendDuration := time.Since(sendStartedAt)
service.recordProviderSendDuration(sendCtx, providerName, string(result.Classification), sendDuration)
span.SetAttributes(
attribute.String("mail.provider", providerName),
attribute.String("mail.provider_outcome", string(result.Classification)),
attribute.String("mail.provider_summary", result.Summary),
)
commit, err := service.commitForProviderResult(item, result)
if err != nil {
return err
}
if err := service.store.Commit(ctx, commit); err != nil {
return fmt.Errorf("%w: commit attempt outcome: %v", ErrServiceUnavailable, err)
}
service.recordCommitMetrics(sendCtx, commit, item.Delivery.Source)
service.logProviderResult(sendCtx, item, result, commit, providerName, sendDuration)
return nil
}
// RecoverExpired marks one stale in-progress attempt as expired and applies
// the same retry policy used for runtime timeouts.
func (service *Service) RecoverExpired(ctx context.Context, item WorkItem) error {
if ctx == nil {
return errors.New("recover expired attempt: nil context")
}
if service == nil {
return errors.New("recover expired attempt: nil service")
}
if err := item.ValidateForExecution(); err != nil {
return fmt.Errorf("recover expired attempt: %w", err)
}
commit, err := service.commitForTimeout(item, claimTTLClassification, claimTTLSummary)
if err != nil {
return err
}
if err := service.store.Commit(ctx, commit); err != nil {
return fmt.Errorf("%w: commit recovered attempt outcome: %v", ErrServiceUnavailable, err)
}
service.recordCommitMetrics(ctx, commit, item.Delivery.Source)
return nil
}
func (service *Service) buildMessage(ctx context.Context, deliveryRecord deliverydomain.Delivery) (ports.Message, error) {
message := ports.Message{
Envelope: deliveryRecord.Envelope,
Content: deliveryRecord.Content,
}
if err := message.Content.ValidateMaterialized(); err != nil {
return ports.Message{}, fmt.Errorf("execute attempt: delivery content: %w", err)
}
if len(deliveryRecord.Attachments) == 0 {
if err := message.Validate(); err != nil {
return ports.Message{}, fmt.Errorf("execute attempt: provider message: %w", err)
}
return message, nil
}
payload, found, err := service.payloadLoader.LoadPayload(ctx, deliveryRecord.DeliveryID)
if err != nil {
return ports.Message{}, fmt.Errorf("%w: load delivery payload: %v", ErrServiceUnavailable, err)
}
if !found {
return ports.Message{}, fmt.Errorf("%w: delivery payload %q is missing", ErrServiceUnavailable, deliveryRecord.DeliveryID)
}
if len(payload.Attachments) != len(deliveryRecord.Attachments) {
return ports.Message{}, fmt.Errorf(
"%w: delivery payload attachment count %d mismatches delivery attachment count %d",
ErrServiceUnavailable,
len(payload.Attachments),
len(deliveryRecord.Attachments),
)
}
message.Attachments = make([]ports.Attachment, len(payload.Attachments))
for index, attachmentPayload := range payload.Attachments {
metadata := deliveryRecord.Attachments[index]
if metadata.Filename != attachmentPayload.Filename ||
metadata.ContentType != attachmentPayload.ContentType ||
metadata.SizeBytes != attachmentPayload.SizeBytes {
return ports.Message{}, fmt.Errorf(
"%w: delivery payload attachment %d metadata mismatches delivery audit metadata",
ErrServiceUnavailable,
index,
)
}
content, err := base64.StdEncoding.DecodeString(attachmentPayload.ContentBase64)
if err != nil {
return ports.Message{}, fmt.Errorf(
"%w: decode delivery payload attachment %d: %v",
ErrServiceUnavailable,
index,
err,
)
}
message.Attachments[index] = ports.Attachment{
Metadata: metadata,
Content: content,
}
}
if err := message.Validate(); err != nil {
return ports.Message{}, fmt.Errorf("execute attempt: provider message: %w", err)
}
return message, nil
}
func (service *Service) commitForProviderResult(item WorkItem, result ports.Result) (CommitStateInput, error) {
switch result.Classification {
case ports.ClassificationAccepted:
return service.commitTerminal(item, attempt.StatusProviderAccepted, deliverydomain.StatusSent, result.Summary, "")
case ports.ClassificationSuppressed:
return service.commitTerminal(item, attempt.StatusProviderRejected, deliverydomain.StatusSuppressed, result.Summary, "suppressed")
case ports.ClassificationPermanentFailure:
return service.commitTerminal(item, attempt.StatusProviderRejected, deliverydomain.StatusFailed, result.Summary, "permanent_failure")
case ports.ClassificationTransientFailure:
classification := attempt.StatusTransportFailed
providerClassification := "transient_failure"
if result.Details["error"] == deadlineExceededDetail {
classification = attempt.StatusTimedOut
providerClassification = deadlineExceededDetail
}
return service.commitForRetryableResult(item, classification, providerClassification, result.Summary)
default:
return CommitStateInput{}, fmt.Errorf("execute attempt: unsupported provider classification %q", result.Classification)
}
}
func (service *Service) commitForTimeout(item WorkItem, providerClassification string, providerSummary string) (CommitStateInput, error) {
return service.commitForRetryableResult(item, attempt.StatusTimedOut, providerClassification, providerSummary)
}
func (service *Service) commitForRetryableResult(
item WorkItem,
attemptStatus attempt.Status,
providerClassification string,
providerSummary string,
) (CommitStateInput, error) {
finishedAt := normalizedFinishedAt(service.clock.Now(), item.Attempt)
currentAttempt := item.Attempt
currentAttempt.Status = attemptStatus
currentAttempt.FinishedAt = ptrTime(finishedAt)
currentAttempt.ProviderClassification = providerClassification
currentAttempt.ProviderSummary = providerSummary
if err := currentAttempt.Validate(); err != nil {
return CommitStateInput{}, fmt.Errorf("execute attempt: build terminal attempt: %w", err)
}
nextDelay, ok := retryDelayForAttempt(currentAttempt.AttemptNo)
if ok {
nextScheduledFor := finishedAt.Add(nextDelay)
nextAttempt := attempt.Attempt{
DeliveryID: item.Delivery.DeliveryID,
AttemptNo: currentAttempt.AttemptNo + 1,
ScheduledFor: nextScheduledFor,
Status: attempt.StatusScheduled,
}
if err := nextAttempt.Validate(); err != nil {
return CommitStateInput{}, fmt.Errorf("execute attempt: build next attempt: %w", err)
}
deliveryRecord := item.Delivery
deliveryRecord.Status = deliverydomain.StatusQueued
deliveryRecord.AttemptCount = nextAttempt.AttemptNo
deliveryRecord.LastAttemptStatus = currentAttempt.Status
deliveryRecord.ProviderSummary = providerSummary
deliveryRecord.UpdatedAt = finishedAt
if err := deliveryRecord.Validate(); err != nil {
return CommitStateInput{}, fmt.Errorf("execute attempt: build queued delivery: %w", err)
}
input := CommitStateInput{
Delivery: deliveryRecord,
Attempt: currentAttempt,
NextAttempt: &nextAttempt,
}
if err := input.Validate(); err != nil {
return CommitStateInput{}, fmt.Errorf("execute attempt: build queued commit: %w", err)
}
return input, nil
}
deliveryRecord := item.Delivery
deliveryRecord.Status = deliverydomain.StatusDeadLetter
deliveryRecord.LastAttemptStatus = currentAttempt.Status
deliveryRecord.ProviderSummary = providerSummary
deliveryRecord.UpdatedAt = finishedAt
deliveryRecord.DeadLetteredAt = ptrTime(finishedAt)
if err := deliveryRecord.Validate(); err != nil {
return CommitStateInput{}, fmt.Errorf("execute attempt: build dead-letter delivery: %w", err)
}
deadLetter := &deliverydomain.DeadLetterEntry{
DeliveryID: deliveryRecord.DeliveryID,
FinalAttemptNo: currentAttempt.AttemptNo,
FailureClassification: retryExhaustedClassification,
ProviderSummary: providerSummary,
CreatedAt: finishedAt,
RecoveryHint: retryRecoveryHint,
}
input := CommitStateInput{
Delivery: deliveryRecord,
Attempt: currentAttempt,
DeadLetter: deadLetter,
}
if err := input.Validate(); err != nil {
return CommitStateInput{}, fmt.Errorf("execute attempt: build dead-letter commit: %w", err)
}
return input, nil
}
func (service *Service) commitTerminal(
item WorkItem,
attemptStatus attempt.Status,
deliveryStatus deliverydomain.Status,
providerSummary string,
providerClassification string,
) (CommitStateInput, error) {
finishedAt := normalizedFinishedAt(service.clock.Now(), item.Attempt)
currentAttempt := item.Attempt
currentAttempt.Status = attemptStatus
currentAttempt.FinishedAt = ptrTime(finishedAt)
currentAttempt.ProviderClassification = providerClassification
currentAttempt.ProviderSummary = providerSummary
if err := currentAttempt.Validate(); err != nil {
return CommitStateInput{}, fmt.Errorf("execute attempt: build terminal attempt: %w", err)
}
deliveryRecord := item.Delivery
deliveryRecord.Status = deliveryStatus
deliveryRecord.LastAttemptStatus = currentAttempt.Status
deliveryRecord.ProviderSummary = providerSummary
deliveryRecord.UpdatedAt = finishedAt
switch deliveryStatus {
case deliverydomain.StatusSent:
deliveryRecord.SentAt = ptrTime(finishedAt)
case deliverydomain.StatusSuppressed:
deliveryRecord.SuppressedAt = ptrTime(finishedAt)
case deliverydomain.StatusFailed:
deliveryRecord.FailedAt = ptrTime(finishedAt)
}
if err := deliveryRecord.Validate(); err != nil {
return CommitStateInput{}, fmt.Errorf("execute attempt: build terminal delivery: %w", err)
}
input := CommitStateInput{
Delivery: deliveryRecord,
Attempt: currentAttempt,
}
if err := input.Validate(); err != nil {
return CommitStateInput{}, fmt.Errorf("execute attempt: build terminal commit: %w", err)
}
return input, nil
}
func retryDelayForAttempt(attemptNo int) (time.Duration, bool) {
if attemptNo < 1 || attemptNo > len(retryDelays) {
return 0, false
}
return retryDelays[attemptNo-1], true
}
func normalizedFinishedAt(now time.Time, record attempt.Attempt) time.Time {
finishedAt := now.UTC().Truncate(time.Millisecond)
if record.StartedAt != nil && finishedAt.Before(*record.StartedAt) {
return *record.StartedAt
}
return finishedAt
}
func ptrTime(value time.Time) *time.Time {
return &value
}
func (service *Service) recordCommitMetrics(ctx context.Context, commit CommitStateInput, source deliverydomain.Source) {
if service == nil || service.telemetry == nil {
return
}
service.telemetry.RecordDeliveryStatusTransition(ctx, string(commit.Delivery.Status), string(source))
service.telemetry.RecordAttemptOutcome(ctx, string(commit.Attempt.Status), string(source))
}
func (service *Service) recordProviderSendDuration(ctx context.Context, provider string, outcome string, duration time.Duration) {
if service == nil || service.telemetry == nil {
return
}
service.telemetry.RecordProviderSendDuration(ctx, provider, outcome, duration)
}
func (service *Service) logProviderResult(
ctx context.Context,
item WorkItem,
result ports.Result,
commit CommitStateInput,
providerName string,
sendDuration time.Duration,
) {
logArgs := logging.DeliveryAttemptAttrs(item.Delivery, item.Attempt)
logArgs = append(logArgs,
"provider", providerName,
"provider_outcome", string(result.Classification),
"provider_summary", result.Summary,
"delivery_status", string(commit.Delivery.Status),
"attempt_status", string(commit.Attempt.Status),
"duration_ms", float64(sendDuration.Microseconds())/1000,
)
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
service.logger.Info("provider send completed", logArgs...)
if commit.NextAttempt != nil {
retryArgs := logging.DeliveryAttemptAttrs(item.Delivery, item.Attempt)
retryArgs = append(retryArgs,
"next_attempt_no", commit.NextAttempt.AttemptNo,
"next_scheduled_for", commit.NextAttempt.ScheduledFor,
"provider_summary", result.Summary,
)
retryArgs = append(retryArgs, logging.TraceAttrsFromContext(ctx)...)
service.logger.Info("delivery retry scheduled", retryArgs...)
}
if commit.DeadLetter != nil {
deadLetterArgs := logging.DeliveryAttemptAttrs(item.Delivery, item.Attempt)
deadLetterArgs = append(deadLetterArgs,
"failure_classification", commit.DeadLetter.FailureClassification,
"recovery_hint", commit.DeadLetter.RecoveryHint,
"provider_summary", commit.DeadLetter.ProviderSummary,
)
deadLetterArgs = append(deadLetterArgs, logging.TraceAttrsFromContext(ctx)...)
service.logger.Warn("delivery moved to dead letter", deadLetterArgs...)
}
}
func providerNameFromSummary(summary string) string {
for _, token := range strings.Split(strings.TrimSpace(summary), " ") {
key, value, ok := strings.Cut(token, "=")
if ok && key == "provider" && strings.TrimSpace(value) != "" {
return value
}
}
return "unknown"
}
@@ -0,0 +1,570 @@
package executeattempt
import (
"bytes"
"context"
"log/slog"
"testing"
"time"
"galaxy/mail/internal/domain/attempt"
"galaxy/mail/internal/domain/common"
deliverydomain "galaxy/mail/internal/domain/delivery"
"galaxy/mail/internal/ports"
"galaxy/mail/internal/service/acceptgenericdelivery"
"galaxy/mail/internal/service/renderdelivery"
"github.com/stretchr/testify/require"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/sdk/trace/tracetest"
)
func TestServicePrepareRendersQueuedTemplateDelivery(t *testing.T) {
t.Parallel()
renderedDelivery := queuedTemplateWorkItem(t).Delivery
renderedDelivery.Status = deliverydomain.StatusRendered
renderedDelivery.Content = deliverydomain.Content{
Subject: "Turn 54",
TextBody: "Hello Pilot",
}
renderedDelivery.UpdatedAt = renderedDelivery.CreatedAt.Add(time.Minute)
require.NoError(t, renderedDelivery.Validate())
renderer := &stubRenderer{
result: renderdelivery.Result{
Outcome: renderdelivery.OutcomeRendered,
Delivery: renderedDelivery,
ResolvedLocale: common.Locale("en"),
TemplateVersion: "sha256:template",
LocaleFallbackUsed: false,
},
}
service := newTestService(t, Config{
Renderer: renderer,
Provider: stubProvider{},
PayloadLoader: stubPayloadLoader{},
Store: &stubStore{},
Clock: stubClock{now: renderedDelivery.UpdatedAt},
AttemptTimeout: 15 * time.Second,
})
ready, err := service.Prepare(context.Background(), queuedTemplateWorkItem(t))
require.NoError(t, err)
require.True(t, ready)
require.Len(t, renderer.inputs, 1)
}
func TestServiceExecuteAcceptedRenderedDelivery(t *testing.T) {
t.Parallel()
store := &stubStore{}
service := newTestService(t, Config{
Renderer: &stubRenderer{},
Provider: stubProvider{
result: ports.Result{
Classification: ports.ClassificationAccepted,
Summary: "provider=smtp result=accepted",
},
},
PayloadLoader: stubPayloadLoader{},
Store: store,
Clock: stubClock{now: fixedNow().Add(time.Minute)},
AttemptTimeout: 15 * time.Second,
})
err := service.Execute(context.Background(), renderedWorkItem(t, 1))
require.NoError(t, err)
require.Len(t, store.inputs, 1)
require.Equal(t, deliverydomain.StatusSent, store.inputs[0].Delivery.Status)
require.Equal(t, attempt.StatusProviderAccepted, store.inputs[0].Attempt.Status)
require.Nil(t, store.inputs[0].NextAttempt)
require.Nil(t, store.inputs[0].DeadLetter)
}
func TestServiceExecuteMapsSuppressedToProviderRejected(t *testing.T) {
t.Parallel()
store := &stubStore{}
service := newTestService(t, Config{
Renderer: &stubRenderer{},
Provider: stubProvider{
result: ports.Result{
Classification: ports.ClassificationSuppressed,
Summary: "provider=stub result=suppressed script=policy_skip",
},
},
PayloadLoader: stubPayloadLoader{},
Store: store,
Clock: stubClock{now: fixedNow().Add(time.Minute)},
AttemptTimeout: 15 * time.Second,
})
err := service.Execute(context.Background(), renderedWorkItem(t, 1))
require.NoError(t, err)
require.Len(t, store.inputs, 1)
require.Equal(t, deliverydomain.StatusSuppressed, store.inputs[0].Delivery.Status)
require.Equal(t, attempt.StatusProviderRejected, store.inputs[0].Attempt.Status)
}
func TestServiceExecuteMapsPermanentFailureToFailed(t *testing.T) {
t.Parallel()
store := &stubStore{}
service := newTestService(t, Config{
Renderer: &stubRenderer{},
Provider: stubProvider{
result: ports.Result{
Classification: ports.ClassificationPermanentFailure,
Summary: "provider=smtp result=permanent_failure phase=data smtp_code=550",
},
},
PayloadLoader: stubPayloadLoader{},
Store: store,
Clock: stubClock{now: fixedNow().Add(time.Minute)},
AttemptTimeout: 15 * time.Second,
})
err := service.Execute(context.Background(), renderedWorkItem(t, 1))
require.NoError(t, err)
require.Len(t, store.inputs, 1)
require.Equal(t, deliverydomain.StatusFailed, store.inputs[0].Delivery.Status)
require.Equal(t, attempt.StatusProviderRejected, store.inputs[0].Attempt.Status)
require.Nil(t, store.inputs[0].DeadLetter)
}
func TestServiceExecuteBuildsRetryChainAndDeadLetter(t *testing.T) {
t.Parallel()
tests := []struct {
name string
attemptNo int
wantStatus deliverydomain.Status
wantAttemptStatus attempt.Status
wantNextAttemptNo int
wantNextDelay time.Duration
wantDeadLetterEntry bool
}{
{
name: "attempt one schedules retry after one minute",
attemptNo: 1,
wantStatus: deliverydomain.StatusQueued,
wantAttemptStatus: attempt.StatusTransportFailed,
wantNextAttemptNo: 2,
wantNextDelay: time.Minute,
},
{
name: "attempt two schedules retry after five minutes",
attemptNo: 2,
wantStatus: deliverydomain.StatusQueued,
wantAttemptStatus: attempt.StatusTransportFailed,
wantNextAttemptNo: 3,
wantNextDelay: 5 * time.Minute,
},
{
name: "attempt three schedules retry after thirty minutes",
attemptNo: 3,
wantStatus: deliverydomain.StatusQueued,
wantAttemptStatus: attempt.StatusTransportFailed,
wantNextAttemptNo: 4,
wantNextDelay: 30 * time.Minute,
},
{
name: "attempt four becomes dead letter",
attemptNo: 4,
wantStatus: deliverydomain.StatusDeadLetter,
wantAttemptStatus: attempt.StatusTransportFailed,
wantDeadLetterEntry: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
store := &stubStore{}
service := newTestService(t, Config{
Renderer: &stubRenderer{},
Provider: stubProvider{
result: ports.Result{
Classification: ports.ClassificationTransientFailure,
Summary: "provider=smtp result=transient_failure phase=data smtp_code=451",
Details: map[string]string{
"phase": "data",
},
},
},
PayloadLoader: stubPayloadLoader{},
Store: store,
Clock: stubClock{now: fixedNow().Add(time.Minute)},
AttemptTimeout: 15 * time.Second,
})
workItem := renderedWorkItem(t, tt.attemptNo)
err := service.Execute(context.Background(), workItem)
require.NoError(t, err)
require.Len(t, store.inputs, 1)
input := store.inputs[0]
require.Equal(t, tt.wantStatus, input.Delivery.Status)
require.Equal(t, tt.wantAttemptStatus, input.Attempt.Status)
if tt.wantDeadLetterEntry {
require.NotNil(t, input.DeadLetter)
require.Nil(t, input.NextAttempt)
require.Equal(t, "retry_exhausted", input.DeadLetter.FailureClassification)
return
}
require.NotNil(t, input.NextAttempt)
require.Nil(t, input.DeadLetter)
require.Equal(t, tt.wantNextAttemptNo, input.NextAttempt.AttemptNo)
require.Equal(t, input.Attempt.FinishedAt.Add(tt.wantNextDelay), input.NextAttempt.ScheduledFor)
})
}
}
func TestServiceExecuteClassifiesDeadlineExceededAsTimedOut(t *testing.T) {
t.Parallel()
store := &stubStore{}
service := newTestService(t, Config{
Renderer: &stubRenderer{},
Provider: stubProvider{
result: ports.Result{
Classification: ports.ClassificationTransientFailure,
Summary: "provider=smtp result=transient_failure phase=context",
Details: map[string]string{
"error": "deadline_exceeded",
},
},
},
PayloadLoader: stubPayloadLoader{},
Store: store,
Clock: stubClock{now: fixedNow().Add(time.Minute)},
AttemptTimeout: 15 * time.Second,
})
err := service.Execute(context.Background(), renderedWorkItem(t, 1))
require.NoError(t, err)
require.Len(t, store.inputs, 1)
require.Equal(t, attempt.StatusTimedOut, store.inputs[0].Attempt.Status)
require.Equal(t, "deadline_exceeded", store.inputs[0].Attempt.ProviderClassification)
}
func TestServiceRecoverExpiredSchedulesTimedOutRetry(t *testing.T) {
t.Parallel()
store := &stubStore{}
service := newTestService(t, Config{
Renderer: &stubRenderer{},
Provider: stubProvider{},
PayloadLoader: stubPayloadLoader{},
Store: store,
Clock: stubClock{now: fixedNow().Add(time.Minute)},
AttemptTimeout: 15 * time.Second,
})
err := service.RecoverExpired(context.Background(), renderedWorkItem(t, 1))
require.NoError(t, err)
require.Len(t, store.inputs, 1)
require.Equal(t, attempt.StatusTimedOut, store.inputs[0].Attempt.Status)
require.Equal(t, "claim_ttl_expired", store.inputs[0].Attempt.ProviderClassification)
require.Equal(t, "attempt claim TTL expired", store.inputs[0].Attempt.ProviderSummary)
require.NotNil(t, store.inputs[0].NextAttempt)
}
func TestServiceExecuteRecordsMetricsAndLogsProviderResult(t *testing.T) {
t.Parallel()
store := &stubStore{}
telemetry := &stubTelemetry{}
loggerBuffer := &bytes.Buffer{}
recorder := tracetest.NewSpanRecorder()
tracerProvider := sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(recorder))
service := newTestService(t, Config{
Renderer: &stubRenderer{},
Provider: stubProvider{
result: ports.Result{
Classification: ports.ClassificationAccepted,
Summary: "provider=smtp result=accepted",
},
},
PayloadLoader: stubPayloadLoader{},
Store: store,
Clock: stubClock{now: fixedNow().Add(time.Minute)},
Telemetry: telemetry,
TracerProvider: tracerProvider,
Logger: slog.New(slog.NewJSONHandler(loggerBuffer, nil)),
AttemptTimeout: 15 * time.Second,
})
err := service.Execute(context.Background(), sendingTemplateWorkItem(t, 1))
require.NoError(t, err)
require.Equal(t, []string{"notification:sent"}, telemetry.statuses)
require.Equal(t, []string{"notification:provider_accepted"}, telemetry.attempts)
require.Equal(t, []string{"smtp:accepted"}, telemetry.providerDurations)
require.Contains(t, loggerBuffer.String(), "\"delivery_id\":\"delivery-template-sending\"")
require.Contains(t, loggerBuffer.String(), "\"source\":\"notification\"")
require.Contains(t, loggerBuffer.String(), "\"template_id\":\"game.turn_ready\"")
require.Contains(t, loggerBuffer.String(), "\"attempt_no\":1")
require.Contains(t, loggerBuffer.String(), "\"otel_trace_id\":")
require.True(t, hasExecuteSpanNamed(recorder.Ended(), "mail.provider_send"))
}
func TestServiceExecuteReturnsServiceUnavailableOnMissingPayload(t *testing.T) {
t.Parallel()
service := newTestService(t, Config{
Renderer: &stubRenderer{},
Provider: stubProvider{
result: ports.Result{
Classification: ports.ClassificationAccepted,
Summary: "provider=smtp result=accepted",
},
},
PayloadLoader: stubPayloadLoader{},
Store: &stubStore{},
Clock: stubClock{now: fixedNow().Add(time.Minute)},
AttemptTimeout: 15 * time.Second,
})
workItem := renderedWorkItem(t, 1)
workItem.Delivery.Attachments = []common.AttachmentMetadata{
{Filename: "guide.txt", ContentType: "text/plain; charset=utf-8", SizeBytes: int64(len([]byte("read me")))},
}
require.NoError(t, workItem.Delivery.Validate())
err := service.Execute(context.Background(), workItem)
require.Error(t, err)
require.ErrorIs(t, err, ErrServiceUnavailable)
}
type stubRenderer struct {
result renderdelivery.Result
err error
inputs []renderdelivery.Input
}
func (renderer *stubRenderer) Execute(_ context.Context, input renderdelivery.Input) (renderdelivery.Result, error) {
renderer.inputs = append(renderer.inputs, input)
return renderer.result, renderer.err
}
type stubProvider struct {
result ports.Result
err error
inputs []ports.Message
}
func (provider stubProvider) Send(_ context.Context, message ports.Message) (ports.Result, error) {
provider.inputs = append(provider.inputs, message)
return provider.result, provider.err
}
func (provider stubProvider) Close() error {
return nil
}
type stubPayloadLoader struct {
payload acceptgenericdelivery.DeliveryPayload
found bool
err error
}
func (loader stubPayloadLoader) LoadPayload(context.Context, common.DeliveryID) (acceptgenericdelivery.DeliveryPayload, bool, error) {
return loader.payload, loader.found, loader.err
}
type stubStore struct {
inputs []CommitStateInput
err error
}
func (store *stubStore) Commit(_ context.Context, input CommitStateInput) error {
store.inputs = append(store.inputs, input)
return store.err
}
type stubClock struct {
now time.Time
}
func (clock stubClock) Now() time.Time {
return clock.now
}
type stubTelemetry struct {
statuses []string
attempts []string
providerDurations []string
}
func (telemetry *stubTelemetry) RecordDeliveryStatusTransition(_ context.Context, status string, source string) {
telemetry.statuses = append(telemetry.statuses, source+":"+status)
}
func (telemetry *stubTelemetry) RecordAttemptOutcome(_ context.Context, status string, source string) {
telemetry.attempts = append(telemetry.attempts, source+":"+status)
}
func (telemetry *stubTelemetry) RecordProviderSendDuration(_ context.Context, provider string, outcome string, _ time.Duration) {
telemetry.providerDurations = append(telemetry.providerDurations, provider+":"+outcome)
}
func newTestService(t *testing.T, cfg Config) *Service {
t.Helper()
service, err := New(cfg)
require.NoError(t, err)
return service
}
func queuedTemplateWorkItem(t *testing.T) WorkItem {
t.Helper()
createdAt := fixedNow().Add(-time.Minute)
deliveryRecord := deliverydomain.Delivery{
DeliveryID: common.DeliveryID("delivery-template"),
Source: deliverydomain.SourceNotification,
PayloadMode: deliverydomain.PayloadModeTemplate,
TemplateID: common.TemplateID("game.turn_ready"),
Envelope: deliverydomain.Envelope{
To: []common.Email{common.Email("pilot@example.com")},
},
Locale: common.Locale("en"),
TemplateVariables: map[string]any{
"player": map[string]any{
"name": "Pilot",
},
"turn_number": float64(54),
},
IdempotencyKey: common.IdempotencyKey("notification:delivery-template"),
Status: deliverydomain.StatusQueued,
AttemptCount: 1,
CreatedAt: createdAt,
UpdatedAt: createdAt,
}
require.NoError(t, deliveryRecord.Validate())
attemptRecord := attempt.Attempt{
DeliveryID: deliveryRecord.DeliveryID,
AttemptNo: 1,
ScheduledFor: createdAt,
Status: attempt.StatusScheduled,
}
require.NoError(t, attemptRecord.Validate())
return WorkItem{
Delivery: deliveryRecord,
Attempt: attemptRecord,
}
}
func renderedWorkItem(t *testing.T, attemptNo int) WorkItem {
t.Helper()
createdAt := fixedNow().Add(-time.Duration(attemptNo) * time.Minute)
deliveryRecord := deliverydomain.Delivery{
DeliveryID: common.DeliveryID("delivery-rendered"),
Source: deliverydomain.SourceNotification,
PayloadMode: deliverydomain.PayloadModeRendered,
Envelope: deliverydomain.Envelope{
To: []common.Email{common.Email("pilot@example.com")},
},
Content: deliverydomain.Content{
Subject: "Turn ready",
TextBody: "Turn 54 is ready.",
},
IdempotencyKey: common.IdempotencyKey("notification:delivery-rendered"),
Status: deliverydomain.StatusSending,
AttemptCount: attemptNo,
CreatedAt: createdAt,
UpdatedAt: createdAt.Add(time.Second),
}
require.NoError(t, deliveryRecord.Validate())
scheduledFor := createdAt
startedAt := scheduledFor.Add(5 * time.Second)
attemptRecord := attempt.Attempt{
DeliveryID: deliveryRecord.DeliveryID,
AttemptNo: attemptNo,
ScheduledFor: scheduledFor,
StartedAt: &startedAt,
Status: attempt.StatusInProgress,
}
require.NoError(t, attemptRecord.Validate())
return WorkItem{
Delivery: deliveryRecord,
Attempt: attemptRecord,
}
}
func sendingTemplateWorkItem(t *testing.T, attemptNo int) WorkItem {
t.Helper()
createdAt := fixedNow().Add(-time.Duration(attemptNo) * time.Minute)
deliveryRecord := deliverydomain.Delivery{
DeliveryID: common.DeliveryID("delivery-template-sending"),
Source: deliverydomain.SourceNotification,
PayloadMode: deliverydomain.PayloadModeTemplate,
TemplateID: common.TemplateID("game.turn_ready"),
Envelope: deliverydomain.Envelope{
To: []common.Email{common.Email("pilot@example.com")},
},
Content: deliverydomain.Content{
Subject: "Turn ready",
TextBody: "Turn 54 is ready.",
},
Locale: common.Locale("en"),
TemplateVariables: map[string]any{
"turn_number": float64(54),
},
IdempotencyKey: common.IdempotencyKey("notification:delivery-template-sending"),
Status: deliverydomain.StatusSending,
AttemptCount: attemptNo,
CreatedAt: createdAt,
UpdatedAt: createdAt.Add(time.Second),
}
require.NoError(t, deliveryRecord.Validate())
scheduledFor := createdAt
startedAt := scheduledFor.Add(5 * time.Second)
attemptRecord := attempt.Attempt{
DeliveryID: deliveryRecord.DeliveryID,
AttemptNo: attemptNo,
ScheduledFor: scheduledFor,
StartedAt: &startedAt,
Status: attempt.StatusInProgress,
}
require.NoError(t, attemptRecord.Validate())
return WorkItem{
Delivery: deliveryRecord,
Attempt: attemptRecord,
}
}
func fixedNow() time.Time {
return time.Unix(1_775_121_700, 0).UTC()
}
var _ Renderer = (*stubRenderer)(nil)
var _ ports.Provider = stubProvider{}
var _ PayloadLoader = stubPayloadLoader{}
var _ Store = (*stubStore)(nil)
var _ Telemetry = (*stubTelemetry)(nil)
func hasExecuteSpanNamed(spans []sdktrace.ReadOnlySpan, name string) bool {
for _, span := range spans {
if span.Name() == name {
return true
}
}
return false
}