782 lines
26 KiB
Go
782 lines
26 KiB
Go
// Package executeattempt implements provider execution, retry planning, and
|
|
// terminal state handling for claimed delivery attempts.
|
|
package executeattempt
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"strings"
|
|
"time"
|
|
|
|
"galaxy/mail/internal/domain/attempt"
|
|
"galaxy/mail/internal/domain/common"
|
|
deliverydomain "galaxy/mail/internal/domain/delivery"
|
|
"galaxy/mail/internal/logging"
|
|
"galaxy/mail/internal/ports"
|
|
"galaxy/mail/internal/service/acceptgenericdelivery"
|
|
"galaxy/mail/internal/service/renderdelivery"
|
|
|
|
"go.opentelemetry.io/otel"
|
|
"go.opentelemetry.io/otel/attribute"
|
|
oteltrace "go.opentelemetry.io/otel/trace"
|
|
)
|
|
|
|
var (
|
|
// ErrServiceUnavailable reports that attempt execution could not safely
|
|
// load or persist durable state.
|
|
ErrServiceUnavailable = errors.New("execute attempt service unavailable")
|
|
)
|
|
|
|
var retryDelays = [...]time.Duration{
|
|
time.Minute,
|
|
5 * time.Minute,
|
|
30 * time.Minute,
|
|
}
|
|
|
|
const (
|
|
retryExhaustedClassification = "retry_exhausted"
|
|
retryRecoveryHint = "check SMTP connectivity"
|
|
claimTTLClassification = "claim_ttl_expired"
|
|
claimTTLSummary = "attempt claim TTL expired"
|
|
deadlineExceededDetail = "deadline_exceeded"
|
|
tracerName = "galaxy/mail/executeattempt"
|
|
)
|
|
|
|
// WorkItem stores one delivery together with the concrete attempt that should
|
|
// be prepared, executed, or recovered.
|
|
type WorkItem struct {
|
|
// Delivery stores the owning logical delivery record.
|
|
Delivery deliverydomain.Delivery
|
|
|
|
// Attempt stores the concrete delivery attempt record.
|
|
Attempt attempt.Attempt
|
|
}
|
|
|
|
// ValidateForPreparation reports whether item can be prepared for claim-time
|
|
// rendering decisions.
|
|
func (item WorkItem) ValidateForPreparation() error {
|
|
if err := item.validateCommon(); err != nil {
|
|
return err
|
|
}
|
|
if item.Attempt.Status != attempt.StatusScheduled {
|
|
return fmt.Errorf("work attempt status must be %q", attempt.StatusScheduled)
|
|
}
|
|
switch item.Delivery.Status {
|
|
case deliverydomain.StatusQueued, deliverydomain.StatusRendered:
|
|
default:
|
|
return fmt.Errorf(
|
|
"work delivery status must be %q or %q",
|
|
deliverydomain.StatusQueued,
|
|
deliverydomain.StatusRendered,
|
|
)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ValidateForExecution reports whether item represents one claimed in-flight
|
|
// provider execution.
|
|
func (item WorkItem) ValidateForExecution() error {
|
|
if err := item.validateCommon(); err != nil {
|
|
return err
|
|
}
|
|
if item.Delivery.Status != deliverydomain.StatusSending {
|
|
return fmt.Errorf("work delivery status must be %q", deliverydomain.StatusSending)
|
|
}
|
|
if item.Attempt.Status != attempt.StatusInProgress {
|
|
return fmt.Errorf("work attempt status must be %q", attempt.StatusInProgress)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (item WorkItem) validateCommon() error {
|
|
if err := item.Delivery.Validate(); err != nil {
|
|
return fmt.Errorf("work delivery: %w", err)
|
|
}
|
|
if err := item.Attempt.Validate(); err != nil {
|
|
return fmt.Errorf("work attempt: %w", err)
|
|
}
|
|
if item.Attempt.DeliveryID != item.Delivery.DeliveryID {
|
|
return errors.New("work attempt delivery id must match delivery id")
|
|
}
|
|
if item.Delivery.AttemptCount != item.Attempt.AttemptNo {
|
|
return errors.New("work delivery attempt count must match attempt number")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// CommitStateInput stores one complete durable attempt outcome mutation.
|
|
type CommitStateInput struct {
|
|
// Delivery stores the mutated delivery record.
|
|
Delivery deliverydomain.Delivery
|
|
|
|
// Attempt stores the terminal current attempt record.
|
|
Attempt attempt.Attempt
|
|
|
|
// NextAttempt stores the optional next scheduled retry attempt.
|
|
NextAttempt *attempt.Attempt
|
|
|
|
// DeadLetter stores the optional dead-letter record when Delivery becomes
|
|
// `dead_letter`.
|
|
DeadLetter *deliverydomain.DeadLetterEntry
|
|
}
|
|
|
|
// Validate reports whether input stores one complete and internally
|
|
// consistent durable mutation.
|
|
func (input CommitStateInput) Validate() error {
|
|
if err := input.Delivery.Validate(); err != nil {
|
|
return fmt.Errorf("delivery: %w", err)
|
|
}
|
|
if err := input.Attempt.Validate(); err != nil {
|
|
return fmt.Errorf("attempt: %w", err)
|
|
}
|
|
if !input.Attempt.Status.IsTerminal() {
|
|
return errors.New("attempt status must be terminal")
|
|
}
|
|
if input.Attempt.DeliveryID != input.Delivery.DeliveryID {
|
|
return errors.New("attempt delivery id must match delivery id")
|
|
}
|
|
if input.Delivery.LastAttemptStatus != input.Attempt.Status {
|
|
return errors.New("delivery last attempt status must match attempt status")
|
|
}
|
|
|
|
if input.NextAttempt != nil {
|
|
if err := input.NextAttempt.Validate(); err != nil {
|
|
return fmt.Errorf("next attempt: %w", err)
|
|
}
|
|
if input.NextAttempt.DeliveryID != input.Delivery.DeliveryID {
|
|
return errors.New("next attempt delivery id must match delivery id")
|
|
}
|
|
if input.NextAttempt.Status != attempt.StatusScheduled {
|
|
return fmt.Errorf("next attempt status must be %q", attempt.StatusScheduled)
|
|
}
|
|
if input.Delivery.Status != deliverydomain.StatusQueued {
|
|
return fmt.Errorf("delivery status with next attempt must be %q", deliverydomain.StatusQueued)
|
|
}
|
|
if input.Delivery.AttemptCount != input.NextAttempt.AttemptNo {
|
|
return errors.New("delivery attempt count must match next attempt number")
|
|
}
|
|
if input.NextAttempt.AttemptNo != input.Attempt.AttemptNo+1 {
|
|
return errors.New("next attempt number must increment current attempt number")
|
|
}
|
|
if input.DeadLetter != nil {
|
|
return errors.New("next attempt and dead-letter entry are mutually exclusive")
|
|
}
|
|
} else if input.Delivery.AttemptCount != input.Attempt.AttemptNo {
|
|
return errors.New("delivery attempt count must match current attempt number without next attempt")
|
|
}
|
|
|
|
if err := deliverydomain.ValidateDeadLetterState(input.Delivery, input.DeadLetter); err != nil {
|
|
return fmt.Errorf("dead-letter state: %w", err)
|
|
}
|
|
|
|
switch input.Delivery.Status {
|
|
case deliverydomain.StatusSent:
|
|
if input.Attempt.Status != attempt.StatusProviderAccepted {
|
|
return fmt.Errorf("sent delivery requires attempt status %q", attempt.StatusProviderAccepted)
|
|
}
|
|
case deliverydomain.StatusSuppressed, deliverydomain.StatusFailed:
|
|
if input.Attempt.Status != attempt.StatusProviderRejected {
|
|
return fmt.Errorf(
|
|
"%s delivery requires attempt status %q",
|
|
input.Delivery.Status,
|
|
attempt.StatusProviderRejected,
|
|
)
|
|
}
|
|
case deliverydomain.StatusQueued:
|
|
if input.NextAttempt == nil {
|
|
return errors.New("queued delivery requires next attempt")
|
|
}
|
|
switch input.Attempt.Status {
|
|
case attempt.StatusTransportFailed, attempt.StatusTimedOut:
|
|
default:
|
|
return fmt.Errorf(
|
|
"queued delivery requires attempt status %q or %q",
|
|
attempt.StatusTransportFailed,
|
|
attempt.StatusTimedOut,
|
|
)
|
|
}
|
|
case deliverydomain.StatusDeadLetter:
|
|
switch input.Attempt.Status {
|
|
case attempt.StatusTransportFailed, attempt.StatusTimedOut:
|
|
default:
|
|
return fmt.Errorf(
|
|
"dead-letter delivery requires attempt status %q or %q",
|
|
attempt.StatusTransportFailed,
|
|
attempt.StatusTimedOut,
|
|
)
|
|
}
|
|
default:
|
|
return fmt.Errorf("unsupported delivery status %q for commit input", input.Delivery.Status)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Renderer materializes template-mode deliveries before a scheduler claims an
|
|
// attempt for outbound execution.
|
|
type Renderer interface {
|
|
// Execute renders or terminally fails one queued template-mode delivery.
|
|
Execute(context.Context, renderdelivery.Input) (renderdelivery.Result, error)
|
|
}
|
|
|
|
// PayloadLoader loads raw attachment payloads for a delivery.
|
|
type PayloadLoader interface {
|
|
// LoadPayload returns the stored attachment payload bundle when one exists.
|
|
LoadPayload(context.Context, common.DeliveryID) (acceptgenericdelivery.DeliveryPayload, bool, error)
|
|
}
|
|
|
|
// Store persists durable attempt execution outcomes.
|
|
type Store interface {
|
|
// Commit applies one complete durable attempt outcome mutation.
|
|
Commit(context.Context, CommitStateInput) error
|
|
}
|
|
|
|
// Clock provides wall-clock time.
|
|
type Clock interface {
|
|
// Now returns the current time.
|
|
Now() time.Time
|
|
}
|
|
|
|
// Telemetry records low-cardinality attempt-execution metrics.
|
|
type Telemetry interface {
|
|
// RecordDeliveryStatusTransition records one durable delivery status
|
|
// transition.
|
|
RecordDeliveryStatusTransition(context.Context, string, string)
|
|
|
|
// RecordAttemptOutcome records one durable terminal attempt outcome.
|
|
RecordAttemptOutcome(context.Context, string, string)
|
|
|
|
// RecordProviderSendDuration records one provider-send latency sample.
|
|
RecordProviderSendDuration(context.Context, string, string, time.Duration)
|
|
}
|
|
|
|
// Config stores the dependencies used by Service.
|
|
type Config struct {
|
|
// Renderer stores the template renderer used during pre-claim preparation.
|
|
Renderer Renderer
|
|
|
|
// Provider stores the outbound provider adapter.
|
|
Provider ports.Provider
|
|
|
|
// PayloadLoader loads raw attachment payloads for SMTP construction.
|
|
PayloadLoader PayloadLoader
|
|
|
|
// Store persists durable attempt execution outcomes.
|
|
Store Store
|
|
|
|
// Clock provides wall-clock timestamps.
|
|
Clock Clock
|
|
|
|
// Telemetry records low-cardinality attempt-execution metrics.
|
|
Telemetry Telemetry
|
|
|
|
// TracerProvider constructs the application span recorder used by provider
|
|
// sends.
|
|
TracerProvider oteltrace.TracerProvider
|
|
|
|
// Logger writes structured attempt-execution logs.
|
|
Logger *slog.Logger
|
|
|
|
// AttemptTimeout bounds one provider execution budget.
|
|
AttemptTimeout time.Duration
|
|
}
|
|
|
|
// Service prepares template deliveries, executes claimed attempts, and
|
|
// applies retry policy.
|
|
type Service struct {
|
|
renderer Renderer
|
|
provider ports.Provider
|
|
payloadLoader PayloadLoader
|
|
store Store
|
|
clock Clock
|
|
telemetry Telemetry
|
|
tracerProvider oteltrace.TracerProvider
|
|
logger *slog.Logger
|
|
attemptTimeout time.Duration
|
|
}
|
|
|
|
// New constructs Service from cfg.
|
|
func New(cfg Config) (*Service, error) {
|
|
switch {
|
|
case cfg.Renderer == nil:
|
|
return nil, errors.New("new execute attempt service: nil renderer")
|
|
case cfg.Provider == nil:
|
|
return nil, errors.New("new execute attempt service: nil provider")
|
|
case cfg.PayloadLoader == nil:
|
|
return nil, errors.New("new execute attempt service: nil payload loader")
|
|
case cfg.Store == nil:
|
|
return nil, errors.New("new execute attempt service: nil store")
|
|
case cfg.Clock == nil:
|
|
return nil, errors.New("new execute attempt service: nil clock")
|
|
case cfg.AttemptTimeout <= 0:
|
|
return nil, errors.New("new execute attempt service: non-positive attempt timeout")
|
|
default:
|
|
tracerProvider := cfg.TracerProvider
|
|
if tracerProvider == nil {
|
|
tracerProvider = otel.GetTracerProvider()
|
|
}
|
|
logger := cfg.Logger
|
|
if logger == nil {
|
|
logger = slog.Default()
|
|
}
|
|
|
|
return &Service{
|
|
renderer: cfg.Renderer,
|
|
provider: cfg.Provider,
|
|
payloadLoader: cfg.PayloadLoader,
|
|
store: cfg.Store,
|
|
clock: cfg.Clock,
|
|
telemetry: cfg.Telemetry,
|
|
tracerProvider: tracerProvider,
|
|
logger: logger.With("component", "execute_attempt"),
|
|
attemptTimeout: cfg.AttemptTimeout,
|
|
}, nil
|
|
}
|
|
}
|
|
|
|
// Prepare renders one template-mode queued delivery when its content has not
|
|
// been materialized yet. The boolean result reports whether the scheduler may
|
|
// proceed to claim the attempt.
|
|
func (service *Service) Prepare(ctx context.Context, item WorkItem) (bool, error) {
|
|
if ctx == nil {
|
|
return false, errors.New("prepare execute attempt: nil context")
|
|
}
|
|
if service == nil {
|
|
return false, errors.New("prepare execute attempt: nil service")
|
|
}
|
|
if err := item.ValidateForPreparation(); err != nil {
|
|
return false, fmt.Errorf("prepare execute attempt: %w", err)
|
|
}
|
|
if item.Delivery.PayloadMode != deliverydomain.PayloadModeTemplate {
|
|
return true, nil
|
|
}
|
|
if item.Delivery.Status == deliverydomain.StatusRendered {
|
|
return true, nil
|
|
}
|
|
if err := item.Delivery.Content.ValidateMaterialized(); err == nil {
|
|
return true, nil
|
|
}
|
|
|
|
result, err := service.renderer.Execute(ctx, renderdelivery.Input{
|
|
Delivery: item.Delivery,
|
|
Attempt: item.Attempt,
|
|
})
|
|
if err != nil {
|
|
return false, fmt.Errorf("prepare execute attempt: %w", err)
|
|
}
|
|
if result.Outcome == renderdelivery.OutcomeFailed {
|
|
return false, nil
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
// Execute runs one claimed in-progress attempt through the provider and
|
|
// durably records the resulting outcome.
|
|
func (service *Service) Execute(ctx context.Context, item WorkItem) error {
|
|
if ctx == nil {
|
|
return errors.New("execute attempt: nil context")
|
|
}
|
|
if service == nil {
|
|
return errors.New("execute attempt: nil service")
|
|
}
|
|
if err := item.ValidateForExecution(); err != nil {
|
|
return fmt.Errorf("execute attempt: %w", err)
|
|
}
|
|
|
|
message, err := service.buildMessage(ctx, item.Delivery)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
sendStartedAt := time.Now()
|
|
sendCtx, span := service.tracerProvider.Tracer(tracerName).Start(
|
|
ctx,
|
|
"mail.provider_send",
|
|
oteltrace.WithAttributes(
|
|
attribute.String("mail.delivery_id", item.Delivery.DeliveryID.String()),
|
|
attribute.String("mail.source", string(item.Delivery.Source)),
|
|
attribute.Int("mail.attempt_no", item.Attempt.AttemptNo),
|
|
),
|
|
)
|
|
if !item.Delivery.TemplateID.IsZero() {
|
|
span.SetAttributes(attribute.String("mail.template_id", item.Delivery.TemplateID.String()))
|
|
}
|
|
providerCtx, cancel := context.WithTimeout(sendCtx, service.attemptTimeout)
|
|
defer cancel()
|
|
defer span.End()
|
|
|
|
result, err := service.provider.Send(providerCtx, message)
|
|
if err != nil {
|
|
span.RecordError(err)
|
|
return fmt.Errorf("execute attempt: send provider message: %w", err)
|
|
}
|
|
if err := result.Validate(); err != nil {
|
|
span.RecordError(err)
|
|
return fmt.Errorf("execute attempt: provider result: %w", err)
|
|
}
|
|
providerName := providerNameFromSummary(result.Summary)
|
|
sendDuration := time.Since(sendStartedAt)
|
|
service.recordProviderSendDuration(sendCtx, providerName, string(result.Classification), sendDuration)
|
|
span.SetAttributes(
|
|
attribute.String("mail.provider", providerName),
|
|
attribute.String("mail.provider_outcome", string(result.Classification)),
|
|
attribute.String("mail.provider_summary", result.Summary),
|
|
)
|
|
|
|
commit, err := service.commitForProviderResult(item, result)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := service.store.Commit(ctx, commit); err != nil {
|
|
return fmt.Errorf("%w: commit attempt outcome: %v", ErrServiceUnavailable, err)
|
|
}
|
|
service.recordCommitMetrics(sendCtx, commit, item.Delivery.Source)
|
|
service.logProviderResult(sendCtx, item, result, commit, providerName, sendDuration)
|
|
|
|
return nil
|
|
}
|
|
|
|
// RecoverExpired marks one stale in-progress attempt as expired and applies
|
|
// the same retry policy used for runtime timeouts.
|
|
func (service *Service) RecoverExpired(ctx context.Context, item WorkItem) error {
|
|
if ctx == nil {
|
|
return errors.New("recover expired attempt: nil context")
|
|
}
|
|
if service == nil {
|
|
return errors.New("recover expired attempt: nil service")
|
|
}
|
|
if err := item.ValidateForExecution(); err != nil {
|
|
return fmt.Errorf("recover expired attempt: %w", err)
|
|
}
|
|
|
|
commit, err := service.commitForTimeout(item, claimTTLClassification, claimTTLSummary)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := service.store.Commit(ctx, commit); err != nil {
|
|
return fmt.Errorf("%w: commit recovered attempt outcome: %v", ErrServiceUnavailable, err)
|
|
}
|
|
service.recordCommitMetrics(ctx, commit, item.Delivery.Source)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (service *Service) buildMessage(ctx context.Context, deliveryRecord deliverydomain.Delivery) (ports.Message, error) {
|
|
message := ports.Message{
|
|
Envelope: deliveryRecord.Envelope,
|
|
Content: deliveryRecord.Content,
|
|
}
|
|
if err := message.Content.ValidateMaterialized(); err != nil {
|
|
return ports.Message{}, fmt.Errorf("execute attempt: delivery content: %w", err)
|
|
}
|
|
if len(deliveryRecord.Attachments) == 0 {
|
|
if err := message.Validate(); err != nil {
|
|
return ports.Message{}, fmt.Errorf("execute attempt: provider message: %w", err)
|
|
}
|
|
return message, nil
|
|
}
|
|
|
|
payload, found, err := service.payloadLoader.LoadPayload(ctx, deliveryRecord.DeliveryID)
|
|
if err != nil {
|
|
return ports.Message{}, fmt.Errorf("%w: load delivery payload: %v", ErrServiceUnavailable, err)
|
|
}
|
|
if !found {
|
|
return ports.Message{}, fmt.Errorf("%w: delivery payload %q is missing", ErrServiceUnavailable, deliveryRecord.DeliveryID)
|
|
}
|
|
if len(payload.Attachments) != len(deliveryRecord.Attachments) {
|
|
return ports.Message{}, fmt.Errorf(
|
|
"%w: delivery payload attachment count %d mismatches delivery attachment count %d",
|
|
ErrServiceUnavailable,
|
|
len(payload.Attachments),
|
|
len(deliveryRecord.Attachments),
|
|
)
|
|
}
|
|
|
|
message.Attachments = make([]ports.Attachment, len(payload.Attachments))
|
|
for index, attachmentPayload := range payload.Attachments {
|
|
metadata := deliveryRecord.Attachments[index]
|
|
if metadata.Filename != attachmentPayload.Filename ||
|
|
metadata.ContentType != attachmentPayload.ContentType ||
|
|
metadata.SizeBytes != attachmentPayload.SizeBytes {
|
|
return ports.Message{}, fmt.Errorf(
|
|
"%w: delivery payload attachment %d metadata mismatches delivery audit metadata",
|
|
ErrServiceUnavailable,
|
|
index,
|
|
)
|
|
}
|
|
|
|
content, err := base64.StdEncoding.DecodeString(attachmentPayload.ContentBase64)
|
|
if err != nil {
|
|
return ports.Message{}, fmt.Errorf(
|
|
"%w: decode delivery payload attachment %d: %v",
|
|
ErrServiceUnavailable,
|
|
index,
|
|
err,
|
|
)
|
|
}
|
|
|
|
message.Attachments[index] = ports.Attachment{
|
|
Metadata: metadata,
|
|
Content: content,
|
|
}
|
|
}
|
|
if err := message.Validate(); err != nil {
|
|
return ports.Message{}, fmt.Errorf("execute attempt: provider message: %w", err)
|
|
}
|
|
|
|
return message, nil
|
|
}
|
|
|
|
func (service *Service) commitForProviderResult(item WorkItem, result ports.Result) (CommitStateInput, error) {
|
|
switch result.Classification {
|
|
case ports.ClassificationAccepted:
|
|
return service.commitTerminal(item, attempt.StatusProviderAccepted, deliverydomain.StatusSent, result.Summary, "")
|
|
case ports.ClassificationSuppressed:
|
|
return service.commitTerminal(item, attempt.StatusProviderRejected, deliverydomain.StatusSuppressed, result.Summary, "suppressed")
|
|
case ports.ClassificationPermanentFailure:
|
|
return service.commitTerminal(item, attempt.StatusProviderRejected, deliverydomain.StatusFailed, result.Summary, "permanent_failure")
|
|
case ports.ClassificationTransientFailure:
|
|
classification := attempt.StatusTransportFailed
|
|
providerClassification := "transient_failure"
|
|
if result.Details["error"] == deadlineExceededDetail {
|
|
classification = attempt.StatusTimedOut
|
|
providerClassification = deadlineExceededDetail
|
|
}
|
|
return service.commitForRetryableResult(item, classification, providerClassification, result.Summary)
|
|
default:
|
|
return CommitStateInput{}, fmt.Errorf("execute attempt: unsupported provider classification %q", result.Classification)
|
|
}
|
|
}
|
|
|
|
func (service *Service) commitForTimeout(item WorkItem, providerClassification string, providerSummary string) (CommitStateInput, error) {
|
|
return service.commitForRetryableResult(item, attempt.StatusTimedOut, providerClassification, providerSummary)
|
|
}
|
|
|
|
func (service *Service) commitForRetryableResult(
|
|
item WorkItem,
|
|
attemptStatus attempt.Status,
|
|
providerClassification string,
|
|
providerSummary string,
|
|
) (CommitStateInput, error) {
|
|
finishedAt := normalizedFinishedAt(service.clock.Now(), item.Attempt)
|
|
|
|
currentAttempt := item.Attempt
|
|
currentAttempt.Status = attemptStatus
|
|
currentAttempt.FinishedAt = ptrTime(finishedAt)
|
|
currentAttempt.ProviderClassification = providerClassification
|
|
currentAttempt.ProviderSummary = providerSummary
|
|
if err := currentAttempt.Validate(); err != nil {
|
|
return CommitStateInput{}, fmt.Errorf("execute attempt: build terminal attempt: %w", err)
|
|
}
|
|
|
|
nextDelay, ok := retryDelayForAttempt(currentAttempt.AttemptNo)
|
|
if ok {
|
|
nextScheduledFor := finishedAt.Add(nextDelay)
|
|
nextAttempt := attempt.Attempt{
|
|
DeliveryID: item.Delivery.DeliveryID,
|
|
AttemptNo: currentAttempt.AttemptNo + 1,
|
|
ScheduledFor: nextScheduledFor,
|
|
Status: attempt.StatusScheduled,
|
|
}
|
|
if err := nextAttempt.Validate(); err != nil {
|
|
return CommitStateInput{}, fmt.Errorf("execute attempt: build next attempt: %w", err)
|
|
}
|
|
|
|
deliveryRecord := item.Delivery
|
|
deliveryRecord.Status = deliverydomain.StatusQueued
|
|
deliveryRecord.AttemptCount = nextAttempt.AttemptNo
|
|
deliveryRecord.LastAttemptStatus = currentAttempt.Status
|
|
deliveryRecord.ProviderSummary = providerSummary
|
|
deliveryRecord.UpdatedAt = finishedAt
|
|
if err := deliveryRecord.Validate(); err != nil {
|
|
return CommitStateInput{}, fmt.Errorf("execute attempt: build queued delivery: %w", err)
|
|
}
|
|
|
|
input := CommitStateInput{
|
|
Delivery: deliveryRecord,
|
|
Attempt: currentAttempt,
|
|
NextAttempt: &nextAttempt,
|
|
}
|
|
if err := input.Validate(); err != nil {
|
|
return CommitStateInput{}, fmt.Errorf("execute attempt: build queued commit: %w", err)
|
|
}
|
|
|
|
return input, nil
|
|
}
|
|
|
|
deliveryRecord := item.Delivery
|
|
deliveryRecord.Status = deliverydomain.StatusDeadLetter
|
|
deliveryRecord.LastAttemptStatus = currentAttempt.Status
|
|
deliveryRecord.ProviderSummary = providerSummary
|
|
deliveryRecord.UpdatedAt = finishedAt
|
|
deliveryRecord.DeadLetteredAt = ptrTime(finishedAt)
|
|
if err := deliveryRecord.Validate(); err != nil {
|
|
return CommitStateInput{}, fmt.Errorf("execute attempt: build dead-letter delivery: %w", err)
|
|
}
|
|
|
|
deadLetter := &deliverydomain.DeadLetterEntry{
|
|
DeliveryID: deliveryRecord.DeliveryID,
|
|
FinalAttemptNo: currentAttempt.AttemptNo,
|
|
FailureClassification: retryExhaustedClassification,
|
|
ProviderSummary: providerSummary,
|
|
CreatedAt: finishedAt,
|
|
RecoveryHint: retryRecoveryHint,
|
|
}
|
|
|
|
input := CommitStateInput{
|
|
Delivery: deliveryRecord,
|
|
Attempt: currentAttempt,
|
|
DeadLetter: deadLetter,
|
|
}
|
|
if err := input.Validate(); err != nil {
|
|
return CommitStateInput{}, fmt.Errorf("execute attempt: build dead-letter commit: %w", err)
|
|
}
|
|
|
|
return input, nil
|
|
}
|
|
|
|
func (service *Service) commitTerminal(
|
|
item WorkItem,
|
|
attemptStatus attempt.Status,
|
|
deliveryStatus deliverydomain.Status,
|
|
providerSummary string,
|
|
providerClassification string,
|
|
) (CommitStateInput, error) {
|
|
finishedAt := normalizedFinishedAt(service.clock.Now(), item.Attempt)
|
|
|
|
currentAttempt := item.Attempt
|
|
currentAttempt.Status = attemptStatus
|
|
currentAttempt.FinishedAt = ptrTime(finishedAt)
|
|
currentAttempt.ProviderClassification = providerClassification
|
|
currentAttempt.ProviderSummary = providerSummary
|
|
if err := currentAttempt.Validate(); err != nil {
|
|
return CommitStateInput{}, fmt.Errorf("execute attempt: build terminal attempt: %w", err)
|
|
}
|
|
|
|
deliveryRecord := item.Delivery
|
|
deliveryRecord.Status = deliveryStatus
|
|
deliveryRecord.LastAttemptStatus = currentAttempt.Status
|
|
deliveryRecord.ProviderSummary = providerSummary
|
|
deliveryRecord.UpdatedAt = finishedAt
|
|
switch deliveryStatus {
|
|
case deliverydomain.StatusSent:
|
|
deliveryRecord.SentAt = ptrTime(finishedAt)
|
|
case deliverydomain.StatusSuppressed:
|
|
deliveryRecord.SuppressedAt = ptrTime(finishedAt)
|
|
case deliverydomain.StatusFailed:
|
|
deliveryRecord.FailedAt = ptrTime(finishedAt)
|
|
}
|
|
if err := deliveryRecord.Validate(); err != nil {
|
|
return CommitStateInput{}, fmt.Errorf("execute attempt: build terminal delivery: %w", err)
|
|
}
|
|
|
|
input := CommitStateInput{
|
|
Delivery: deliveryRecord,
|
|
Attempt: currentAttempt,
|
|
}
|
|
if err := input.Validate(); err != nil {
|
|
return CommitStateInput{}, fmt.Errorf("execute attempt: build terminal commit: %w", err)
|
|
}
|
|
|
|
return input, nil
|
|
}
|
|
|
|
func retryDelayForAttempt(attemptNo int) (time.Duration, bool) {
|
|
if attemptNo < 1 || attemptNo > len(retryDelays) {
|
|
return 0, false
|
|
}
|
|
|
|
return retryDelays[attemptNo-1], true
|
|
}
|
|
|
|
func normalizedFinishedAt(now time.Time, record attempt.Attempt) time.Time {
|
|
finishedAt := now.UTC().Truncate(time.Millisecond)
|
|
if record.StartedAt != nil && finishedAt.Before(*record.StartedAt) {
|
|
return *record.StartedAt
|
|
}
|
|
|
|
return finishedAt
|
|
}
|
|
|
|
func ptrTime(value time.Time) *time.Time {
|
|
return &value
|
|
}
|
|
|
|
func (service *Service) recordCommitMetrics(ctx context.Context, commit CommitStateInput, source deliverydomain.Source) {
|
|
if service == nil || service.telemetry == nil {
|
|
return
|
|
}
|
|
|
|
service.telemetry.RecordDeliveryStatusTransition(ctx, string(commit.Delivery.Status), string(source))
|
|
service.telemetry.RecordAttemptOutcome(ctx, string(commit.Attempt.Status), string(source))
|
|
}
|
|
|
|
func (service *Service) recordProviderSendDuration(ctx context.Context, provider string, outcome string, duration time.Duration) {
|
|
if service == nil || service.telemetry == nil {
|
|
return
|
|
}
|
|
|
|
service.telemetry.RecordProviderSendDuration(ctx, provider, outcome, duration)
|
|
}
|
|
|
|
func (service *Service) logProviderResult(
|
|
ctx context.Context,
|
|
item WorkItem,
|
|
result ports.Result,
|
|
commit CommitStateInput,
|
|
providerName string,
|
|
sendDuration time.Duration,
|
|
) {
|
|
logArgs := logging.DeliveryAttemptAttrs(item.Delivery, item.Attempt)
|
|
logArgs = append(logArgs,
|
|
"provider", providerName,
|
|
"provider_outcome", string(result.Classification),
|
|
"provider_summary", result.Summary,
|
|
"delivery_status", string(commit.Delivery.Status),
|
|
"attempt_status", string(commit.Attempt.Status),
|
|
"duration_ms", float64(sendDuration.Microseconds())/1000,
|
|
)
|
|
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
|
service.logger.Info("provider send completed", logArgs...)
|
|
|
|
if commit.NextAttempt != nil {
|
|
retryArgs := logging.DeliveryAttemptAttrs(item.Delivery, item.Attempt)
|
|
retryArgs = append(retryArgs,
|
|
"next_attempt_no", commit.NextAttempt.AttemptNo,
|
|
"next_scheduled_for", commit.NextAttempt.ScheduledFor,
|
|
"provider_summary", result.Summary,
|
|
)
|
|
retryArgs = append(retryArgs, logging.TraceAttrsFromContext(ctx)...)
|
|
service.logger.Info("delivery retry scheduled", retryArgs...)
|
|
}
|
|
|
|
if commit.DeadLetter != nil {
|
|
deadLetterArgs := logging.DeliveryAttemptAttrs(item.Delivery, item.Attempt)
|
|
deadLetterArgs = append(deadLetterArgs,
|
|
"failure_classification", commit.DeadLetter.FailureClassification,
|
|
"recovery_hint", commit.DeadLetter.RecoveryHint,
|
|
"provider_summary", commit.DeadLetter.ProviderSummary,
|
|
)
|
|
deadLetterArgs = append(deadLetterArgs, logging.TraceAttrsFromContext(ctx)...)
|
|
service.logger.Warn("delivery moved to dead letter", deadLetterArgs...)
|
|
}
|
|
}
|
|
|
|
func providerNameFromSummary(summary string) string {
|
|
for _, token := range strings.Split(strings.TrimSpace(summary), " ") {
|
|
key, value, ok := strings.Cut(token, "=")
|
|
if ok && key == "provider" && strings.TrimSpace(value) != "" {
|
|
return value
|
|
}
|
|
}
|
|
|
|
return "unknown"
|
|
}
|