355 lines
12 KiB
Go
355 lines
12 KiB
Go
package mailstore
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
pgtable "galaxy/mail/internal/adapters/postgres/jet/mail/table"
|
|
"galaxy/mail/internal/domain/attempt"
|
|
"galaxy/mail/internal/domain/common"
|
|
deliverydomain "galaxy/mail/internal/domain/delivery"
|
|
"galaxy/mail/internal/service/acceptgenericdelivery"
|
|
"galaxy/mail/internal/service/executeattempt"
|
|
"galaxy/mail/internal/telemetry"
|
|
|
|
pg "github.com/go-jet/jet/v2/postgres"
|
|
)
|
|
|
|
// LoadPayload returns the raw attachment payload bundle for deliveryID. It
|
|
// satisfies executeattempt.PayloadLoader.
|
|
func (store *Store) LoadPayload(ctx context.Context, deliveryID common.DeliveryID) (acceptgenericdelivery.DeliveryPayload, bool, error) {
|
|
return store.GetDeliveryPayload(ctx, deliveryID)
|
|
}
|
|
|
|
// AttemptExecution returns a handle that satisfies executeattempt.Store and
|
|
// the worker.AttemptExecutionStore contract used by the scheduler.
|
|
func (store *Store) AttemptExecution() *AttemptExecutionStore {
|
|
return &AttemptExecutionStore{store: store}
|
|
}
|
|
|
|
// AttemptExecutionStore is the executeattempt.Store handle returned by
|
|
// Store.AttemptExecution.
|
|
type AttemptExecutionStore struct {
|
|
store *Store
|
|
}
|
|
|
|
var _ executeattempt.Store = (*AttemptExecutionStore)(nil)
|
|
|
|
// Commit applies one complete durable attempt outcome mutation: the
|
|
// terminal current attempt, an optional next scheduled retry attempt, and an
|
|
// optional dead-letter row.
|
|
func (handle *AttemptExecutionStore) Commit(ctx context.Context, input executeattempt.CommitStateInput) error {
|
|
if handle == nil || handle.store == nil {
|
|
return errors.New("commit attempt: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return errors.New("commit attempt: nil context")
|
|
}
|
|
if err := input.Validate(); err != nil {
|
|
return fmt.Errorf("commit attempt: %w", err)
|
|
}
|
|
|
|
return handle.store.withTx(ctx, "commit attempt", func(ctx context.Context, tx *sql.Tx) error {
|
|
if err := lockDelivery(ctx, tx, input.Delivery.DeliveryID); err != nil {
|
|
return fmt.Errorf("commit attempt: %w", err)
|
|
}
|
|
if err := updateAttempt(ctx, tx, input.Attempt); err != nil {
|
|
return fmt.Errorf("commit attempt: update current attempt: %w", err)
|
|
}
|
|
if input.NextAttempt != nil {
|
|
if err := insertAttempt(ctx, tx, *input.NextAttempt); err != nil {
|
|
return fmt.Errorf("commit attempt: insert next attempt: %w", err)
|
|
}
|
|
}
|
|
if input.DeadLetter != nil {
|
|
if err := insertDeadLetter(ctx, tx, *input.DeadLetter); err != nil {
|
|
return fmt.Errorf("commit attempt: insert dead-letter: %w", err)
|
|
}
|
|
}
|
|
if err := updateDelivery(ctx, tx, input.Delivery, input.NextAttempt); err != nil {
|
|
return fmt.Errorf("commit attempt: update delivery: %w", err)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// NextDueDeliveryIDs returns up to limit due delivery identifiers ordered by
|
|
// next_attempt_at. The query uses `FOR UPDATE SKIP LOCKED` to allow multiple
|
|
// schedulers to run concurrently without contending on the same row.
|
|
func (handle *AttemptExecutionStore) NextDueDeliveryIDs(ctx context.Context, now time.Time, limit int64) ([]common.DeliveryID, error) {
|
|
if handle == nil || handle.store == nil {
|
|
return nil, errors.New("next due delivery ids: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return nil, errors.New("next due delivery ids: nil context")
|
|
}
|
|
if limit <= 0 {
|
|
return nil, errors.New("next due delivery ids: non-positive limit")
|
|
}
|
|
operationCtx, cancel, err := handle.store.operationContext(ctx, "next due delivery ids")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer cancel()
|
|
|
|
stmt := pg.SELECT(pgtable.Deliveries.DeliveryID).
|
|
FROM(pgtable.Deliveries).
|
|
WHERE(pg.AND(
|
|
pgtable.Deliveries.NextAttemptAt.IS_NOT_NULL(),
|
|
pgtable.Deliveries.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())),
|
|
)).
|
|
ORDER_BY(pgtable.Deliveries.NextAttemptAt.ASC()).
|
|
LIMIT(limit)
|
|
|
|
query, args := stmt.Sql()
|
|
rows, err := handle.store.db.QueryContext(operationCtx, query, args...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("next due delivery ids: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
out := make([]common.DeliveryID, 0, limit)
|
|
for rows.Next() {
|
|
var id string
|
|
if err := rows.Scan(&id); err != nil {
|
|
return nil, fmt.Errorf("next due delivery ids: scan: %w", err)
|
|
}
|
|
out = append(out, common.DeliveryID(id))
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return nil, fmt.Errorf("next due delivery ids: %w", err)
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// SendingDeliveryIDs returns every delivery currently held by an in-progress
|
|
// attempt. The recovery loop uses the result to identify rows whose claim
|
|
// might have expired.
|
|
func (handle *AttemptExecutionStore) SendingDeliveryIDs(ctx context.Context) ([]common.DeliveryID, error) {
|
|
if handle == nil || handle.store == nil {
|
|
return nil, errors.New("sending delivery ids: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return nil, errors.New("sending delivery ids: nil context")
|
|
}
|
|
operationCtx, cancel, err := handle.store.operationContext(ctx, "sending delivery ids")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer cancel()
|
|
|
|
stmt := pg.SELECT(pgtable.Deliveries.DeliveryID).
|
|
FROM(pgtable.Deliveries).
|
|
WHERE(pgtable.Deliveries.Status.EQ(pg.String(string(deliverydomain.StatusSending))))
|
|
|
|
query, args := stmt.Sql()
|
|
rows, err := handle.store.db.QueryContext(operationCtx, query, args...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("sending delivery ids: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
out := []common.DeliveryID{}
|
|
for rows.Next() {
|
|
var id string
|
|
if err := rows.Scan(&id); err != nil {
|
|
return nil, fmt.Errorf("sending delivery ids: scan: %w", err)
|
|
}
|
|
out = append(out, common.DeliveryID(id))
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return nil, fmt.Errorf("sending delivery ids: %w", err)
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// LoadWorkItem returns the active attempt and delivery row for deliveryID.
|
|
// found is false when the delivery row does not exist.
|
|
func (handle *AttemptExecutionStore) LoadWorkItem(ctx context.Context, deliveryID common.DeliveryID) (executeattempt.WorkItem, bool, error) {
|
|
if handle == nil || handle.store == nil {
|
|
return executeattempt.WorkItem{}, false, errors.New("load work item: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return executeattempt.WorkItem{}, false, errors.New("load work item: nil context")
|
|
}
|
|
if err := deliveryID.Validate(); err != nil {
|
|
return executeattempt.WorkItem{}, false, fmt.Errorf("load work item: %w", err)
|
|
}
|
|
operationCtx, cancel, err := handle.store.operationContext(ctx, "load work item")
|
|
if err != nil {
|
|
return executeattempt.WorkItem{}, false, err
|
|
}
|
|
defer cancel()
|
|
|
|
delivery, ok, err := loadDeliveryByID(operationCtx, handle.store.db, deliveryID)
|
|
if err != nil {
|
|
return executeattempt.WorkItem{}, false, fmt.Errorf("load work item: %w", err)
|
|
}
|
|
if !ok {
|
|
return executeattempt.WorkItem{}, false, nil
|
|
}
|
|
if delivery.AttemptCount == 0 {
|
|
return executeattempt.WorkItem{}, false, fmt.Errorf("load work item %q: zero attempt count", deliveryID)
|
|
}
|
|
active, err := loadActiveAttempt(operationCtx, handle.store.db, deliveryID, delivery.AttemptCount)
|
|
if err != nil {
|
|
return executeattempt.WorkItem{}, false, fmt.Errorf("load work item: load active attempt: %w", err)
|
|
}
|
|
return executeattempt.WorkItem{Delivery: delivery, Attempt: active}, true, nil
|
|
}
|
|
|
|
// ClaimDueAttempt atomically claims the due scheduled attempt for deliveryID
|
|
// inside one transaction. The delivery transitions to `sending`, the active
|
|
// attempt to `in_progress`. found is false when no claimable row exists at
|
|
// now.
|
|
func (handle *AttemptExecutionStore) ClaimDueAttempt(ctx context.Context, deliveryID common.DeliveryID, now time.Time) (executeattempt.WorkItem, bool, error) {
|
|
if handle == nil || handle.store == nil {
|
|
return executeattempt.WorkItem{}, false, errors.New("claim due attempt: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return executeattempt.WorkItem{}, false, errors.New("claim due attempt: nil context")
|
|
}
|
|
if err := deliveryID.Validate(); err != nil {
|
|
return executeattempt.WorkItem{}, false, fmt.Errorf("claim due attempt: %w", err)
|
|
}
|
|
|
|
var (
|
|
claimed executeattempt.WorkItem
|
|
found bool
|
|
)
|
|
err := handle.store.withTx(ctx, "claim due attempt", func(ctx context.Context, tx *sql.Tx) error {
|
|
stmt := pg.SELECT(deliverySelectColumns).
|
|
FROM(pgtable.Deliveries).
|
|
WHERE(pg.AND(
|
|
pgtable.Deliveries.DeliveryID.EQ(pg.String(deliveryID.String())),
|
|
pgtable.Deliveries.Status.IN(
|
|
pg.String(string(deliverydomain.StatusQueued)),
|
|
pg.String(string(deliverydomain.StatusRendered)),
|
|
),
|
|
pgtable.Deliveries.NextAttemptAt.IS_NOT_NULL(),
|
|
pgtable.Deliveries.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())),
|
|
)).
|
|
FOR(pg.UPDATE().SKIP_LOCKED())
|
|
|
|
query, args := stmt.Sql()
|
|
row := tx.QueryRowContext(ctx, query, args...)
|
|
delivery, _, err := scanDelivery(row)
|
|
if errors.Is(err, sql.ErrNoRows) {
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("claim due attempt: load delivery: %w", err)
|
|
}
|
|
|
|
envelope, err := loadEnvelope(ctx, tx, deliveryID)
|
|
if err != nil {
|
|
return fmt.Errorf("claim due attempt: load envelope: %w", err)
|
|
}
|
|
delivery.Envelope = envelope
|
|
|
|
active, err := loadActiveAttempt(ctx, tx, deliveryID, delivery.AttemptCount)
|
|
if err != nil {
|
|
return fmt.Errorf("claim due attempt: load active attempt: %w", err)
|
|
}
|
|
if active.Status != attempt.StatusScheduled {
|
|
return nil
|
|
}
|
|
|
|
nowUTC := now.UTC().Truncate(time.Millisecond)
|
|
active.Status = attempt.StatusInProgress
|
|
active.StartedAt = &nowUTC
|
|
|
|
delivery.Status = deliverydomain.StatusSending
|
|
delivery.LastAttemptStatus = attempt.StatusInProgress
|
|
delivery.UpdatedAt = nowUTC
|
|
|
|
if err := updateAttempt(ctx, tx, active); err != nil {
|
|
return fmt.Errorf("claim due attempt: update attempt: %w", err)
|
|
}
|
|
if err := updateDelivery(ctx, tx, delivery, nil); err != nil {
|
|
return fmt.Errorf("claim due attempt: update delivery: %w", err)
|
|
}
|
|
|
|
claimed = executeattempt.WorkItem{Delivery: delivery, Attempt: active}
|
|
found = true
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return executeattempt.WorkItem{}, false, err
|
|
}
|
|
return claimed, found, nil
|
|
}
|
|
|
|
// RemoveScheduledDelivery clears next_attempt_at for deliveryID. The
|
|
// scheduler calls this when it discovers a stale schedule entry that no
|
|
// longer points to a claimable delivery.
|
|
func (handle *AttemptExecutionStore) RemoveScheduledDelivery(ctx context.Context, deliveryID common.DeliveryID) error {
|
|
if handle == nil || handle.store == nil {
|
|
return errors.New("remove scheduled delivery: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return errors.New("remove scheduled delivery: nil context")
|
|
}
|
|
if err := deliveryID.Validate(); err != nil {
|
|
return fmt.Errorf("remove scheduled delivery: %w", err)
|
|
}
|
|
operationCtx, cancel, err := handle.store.operationContext(ctx, "remove scheduled delivery")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer cancel()
|
|
|
|
stmt := pgtable.Deliveries.UPDATE(pgtable.Deliveries.NextAttemptAt).
|
|
SET(pg.NULL).
|
|
WHERE(pgtable.Deliveries.DeliveryID.EQ(pg.String(deliveryID.String())))
|
|
|
|
query, args := stmt.Sql()
|
|
if _, err := handle.store.db.ExecContext(operationCtx, query, args...); err != nil {
|
|
return fmt.Errorf("remove scheduled delivery: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ReadAttemptScheduleSnapshot returns the current attempt-schedule depth and
|
|
// oldest scheduled timestamp. The runtime exposes this via the telemetry
|
|
// snapshot reader contract.
|
|
func (handle *AttemptExecutionStore) ReadAttemptScheduleSnapshot(ctx context.Context) (telemetry.AttemptScheduleSnapshot, error) {
|
|
if handle == nil || handle.store == nil {
|
|
return telemetry.AttemptScheduleSnapshot{}, errors.New("read attempt schedule snapshot: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return telemetry.AttemptScheduleSnapshot{}, errors.New("read attempt schedule snapshot: nil context")
|
|
}
|
|
operationCtx, cancel, err := handle.store.operationContext(ctx, "read attempt schedule snapshot")
|
|
if err != nil {
|
|
return telemetry.AttemptScheduleSnapshot{}, err
|
|
}
|
|
defer cancel()
|
|
|
|
stmt := pg.SELECT(
|
|
pg.COUNT(pg.STAR),
|
|
pg.MIN(pgtable.Deliveries.NextAttemptAt),
|
|
).FROM(pgtable.Deliveries).
|
|
WHERE(pgtable.Deliveries.NextAttemptAt.IS_NOT_NULL())
|
|
|
|
query, args := stmt.Sql()
|
|
row := handle.store.db.QueryRowContext(operationCtx, query, args...)
|
|
var (
|
|
count int64
|
|
oldest sql.NullTime
|
|
summary telemetry.AttemptScheduleSnapshot
|
|
)
|
|
if err := row.Scan(&count, &oldest); err != nil {
|
|
return telemetry.AttemptScheduleSnapshot{}, fmt.Errorf("read attempt schedule snapshot: %w", err)
|
|
}
|
|
summary.Depth = count
|
|
if oldest.Valid {
|
|
oldestUTC := oldest.Time.UTC()
|
|
summary.OldestScheduledFor = &oldestUTC
|
|
}
|
|
return summary, nil
|
|
}
|