package mailstore import ( "context" "database/sql" "errors" "fmt" "time" pgtable "galaxy/mail/internal/adapters/postgres/jet/mail/table" "galaxy/mail/internal/domain/attempt" "galaxy/mail/internal/domain/common" deliverydomain "galaxy/mail/internal/domain/delivery" "galaxy/mail/internal/service/acceptgenericdelivery" "galaxy/mail/internal/service/executeattempt" "galaxy/mail/internal/telemetry" pg "github.com/go-jet/jet/v2/postgres" ) // LoadPayload returns the raw attachment payload bundle for deliveryID. It // satisfies executeattempt.PayloadLoader. func (store *Store) LoadPayload(ctx context.Context, deliveryID common.DeliveryID) (acceptgenericdelivery.DeliveryPayload, bool, error) { return store.GetDeliveryPayload(ctx, deliveryID) } // AttemptExecution returns a handle that satisfies executeattempt.Store and // the worker.AttemptExecutionStore contract used by the scheduler. func (store *Store) AttemptExecution() *AttemptExecutionStore { return &AttemptExecutionStore{store: store} } // AttemptExecutionStore is the executeattempt.Store handle returned by // Store.AttemptExecution. type AttemptExecutionStore struct { store *Store } var _ executeattempt.Store = (*AttemptExecutionStore)(nil) // Commit applies one complete durable attempt outcome mutation: the // terminal current attempt, an optional next scheduled retry attempt, and an // optional dead-letter row. func (handle *AttemptExecutionStore) Commit(ctx context.Context, input executeattempt.CommitStateInput) error { if handle == nil || handle.store == nil { return errors.New("commit attempt: nil store") } if ctx == nil { return errors.New("commit attempt: nil context") } if err := input.Validate(); err != nil { return fmt.Errorf("commit attempt: %w", err) } return handle.store.withTx(ctx, "commit attempt", func(ctx context.Context, tx *sql.Tx) error { if err := lockDelivery(ctx, tx, input.Delivery.DeliveryID); err != nil { return fmt.Errorf("commit attempt: %w", err) } if err := updateAttempt(ctx, tx, input.Attempt); err != nil { return fmt.Errorf("commit attempt: update current attempt: %w", err) } if input.NextAttempt != nil { if err := insertAttempt(ctx, tx, *input.NextAttempt); err != nil { return fmt.Errorf("commit attempt: insert next attempt: %w", err) } } if input.DeadLetter != nil { if err := insertDeadLetter(ctx, tx, *input.DeadLetter); err != nil { return fmt.Errorf("commit attempt: insert dead-letter: %w", err) } } if err := updateDelivery(ctx, tx, input.Delivery, input.NextAttempt); err != nil { return fmt.Errorf("commit attempt: update delivery: %w", err) } return nil }) } // NextDueDeliveryIDs returns up to limit due delivery identifiers ordered by // next_attempt_at. The query uses `FOR UPDATE SKIP LOCKED` to allow multiple // schedulers to run concurrently without contending on the same row. func (handle *AttemptExecutionStore) NextDueDeliveryIDs(ctx context.Context, now time.Time, limit int64) ([]common.DeliveryID, error) { if handle == nil || handle.store == nil { return nil, errors.New("next due delivery ids: nil store") } if ctx == nil { return nil, errors.New("next due delivery ids: nil context") } if limit <= 0 { return nil, errors.New("next due delivery ids: non-positive limit") } operationCtx, cancel, err := handle.store.operationContext(ctx, "next due delivery ids") if err != nil { return nil, err } defer cancel() stmt := pg.SELECT(pgtable.Deliveries.DeliveryID). FROM(pgtable.Deliveries). WHERE(pg.AND( pgtable.Deliveries.NextAttemptAt.IS_NOT_NULL(), pgtable.Deliveries.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())), )). ORDER_BY(pgtable.Deliveries.NextAttemptAt.ASC()). LIMIT(limit) query, args := stmt.Sql() rows, err := handle.store.db.QueryContext(operationCtx, query, args...) if err != nil { return nil, fmt.Errorf("next due delivery ids: %w", err) } defer rows.Close() out := make([]common.DeliveryID, 0, limit) for rows.Next() { var id string if err := rows.Scan(&id); err != nil { return nil, fmt.Errorf("next due delivery ids: scan: %w", err) } out = append(out, common.DeliveryID(id)) } if err := rows.Err(); err != nil { return nil, fmt.Errorf("next due delivery ids: %w", err) } return out, nil } // SendingDeliveryIDs returns every delivery currently held by an in-progress // attempt. The recovery loop uses the result to identify rows whose claim // might have expired. func (handle *AttemptExecutionStore) SendingDeliveryIDs(ctx context.Context) ([]common.DeliveryID, error) { if handle == nil || handle.store == nil { return nil, errors.New("sending delivery ids: nil store") } if ctx == nil { return nil, errors.New("sending delivery ids: nil context") } operationCtx, cancel, err := handle.store.operationContext(ctx, "sending delivery ids") if err != nil { return nil, err } defer cancel() stmt := pg.SELECT(pgtable.Deliveries.DeliveryID). FROM(pgtable.Deliveries). WHERE(pgtable.Deliveries.Status.EQ(pg.String(string(deliverydomain.StatusSending)))) query, args := stmt.Sql() rows, err := handle.store.db.QueryContext(operationCtx, query, args...) if err != nil { return nil, fmt.Errorf("sending delivery ids: %w", err) } defer rows.Close() out := []common.DeliveryID{} for rows.Next() { var id string if err := rows.Scan(&id); err != nil { return nil, fmt.Errorf("sending delivery ids: scan: %w", err) } out = append(out, common.DeliveryID(id)) } if err := rows.Err(); err != nil { return nil, fmt.Errorf("sending delivery ids: %w", err) } return out, nil } // LoadWorkItem returns the active attempt and delivery row for deliveryID. // found is false when the delivery row does not exist. func (handle *AttemptExecutionStore) LoadWorkItem(ctx context.Context, deliveryID common.DeliveryID) (executeattempt.WorkItem, bool, error) { if handle == nil || handle.store == nil { return executeattempt.WorkItem{}, false, errors.New("load work item: nil store") } if ctx == nil { return executeattempt.WorkItem{}, false, errors.New("load work item: nil context") } if err := deliveryID.Validate(); err != nil { return executeattempt.WorkItem{}, false, fmt.Errorf("load work item: %w", err) } operationCtx, cancel, err := handle.store.operationContext(ctx, "load work item") if err != nil { return executeattempt.WorkItem{}, false, err } defer cancel() delivery, ok, err := loadDeliveryByID(operationCtx, handle.store.db, deliveryID) if err != nil { return executeattempt.WorkItem{}, false, fmt.Errorf("load work item: %w", err) } if !ok { return executeattempt.WorkItem{}, false, nil } if delivery.AttemptCount == 0 { return executeattempt.WorkItem{}, false, fmt.Errorf("load work item %q: zero attempt count", deliveryID) } active, err := loadActiveAttempt(operationCtx, handle.store.db, deliveryID, delivery.AttemptCount) if err != nil { return executeattempt.WorkItem{}, false, fmt.Errorf("load work item: load active attempt: %w", err) } return executeattempt.WorkItem{Delivery: delivery, Attempt: active}, true, nil } // ClaimDueAttempt atomically claims the due scheduled attempt for deliveryID // inside one transaction. The delivery transitions to `sending`, the active // attempt to `in_progress`. found is false when no claimable row exists at // now. func (handle *AttemptExecutionStore) ClaimDueAttempt(ctx context.Context, deliveryID common.DeliveryID, now time.Time) (executeattempt.WorkItem, bool, error) { if handle == nil || handle.store == nil { return executeattempt.WorkItem{}, false, errors.New("claim due attempt: nil store") } if ctx == nil { return executeattempt.WorkItem{}, false, errors.New("claim due attempt: nil context") } if err := deliveryID.Validate(); err != nil { return executeattempt.WorkItem{}, false, fmt.Errorf("claim due attempt: %w", err) } var ( claimed executeattempt.WorkItem found bool ) err := handle.store.withTx(ctx, "claim due attempt", func(ctx context.Context, tx *sql.Tx) error { stmt := pg.SELECT(deliverySelectColumns). FROM(pgtable.Deliveries). WHERE(pg.AND( pgtable.Deliveries.DeliveryID.EQ(pg.String(deliveryID.String())), pgtable.Deliveries.Status.IN( pg.String(string(deliverydomain.StatusQueued)), pg.String(string(deliverydomain.StatusRendered)), ), pgtable.Deliveries.NextAttemptAt.IS_NOT_NULL(), pgtable.Deliveries.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())), )). FOR(pg.UPDATE().SKIP_LOCKED()) query, args := stmt.Sql() row := tx.QueryRowContext(ctx, query, args...) delivery, _, err := scanDelivery(row) if errors.Is(err, sql.ErrNoRows) { return nil } if err != nil { return fmt.Errorf("claim due attempt: load delivery: %w", err) } envelope, err := loadEnvelope(ctx, tx, deliveryID) if err != nil { return fmt.Errorf("claim due attempt: load envelope: %w", err) } delivery.Envelope = envelope active, err := loadActiveAttempt(ctx, tx, deliveryID, delivery.AttemptCount) if err != nil { return fmt.Errorf("claim due attempt: load active attempt: %w", err) } if active.Status != attempt.StatusScheduled { return nil } nowUTC := now.UTC().Truncate(time.Millisecond) active.Status = attempt.StatusInProgress active.StartedAt = &nowUTC delivery.Status = deliverydomain.StatusSending delivery.LastAttemptStatus = attempt.StatusInProgress delivery.UpdatedAt = nowUTC if err := updateAttempt(ctx, tx, active); err != nil { return fmt.Errorf("claim due attempt: update attempt: %w", err) } if err := updateDelivery(ctx, tx, delivery, nil); err != nil { return fmt.Errorf("claim due attempt: update delivery: %w", err) } claimed = executeattempt.WorkItem{Delivery: delivery, Attempt: active} found = true return nil }) if err != nil { return executeattempt.WorkItem{}, false, err } return claimed, found, nil } // RemoveScheduledDelivery clears next_attempt_at for deliveryID. The // scheduler calls this when it discovers a stale schedule entry that no // longer points to a claimable delivery. func (handle *AttemptExecutionStore) RemoveScheduledDelivery(ctx context.Context, deliveryID common.DeliveryID) error { if handle == nil || handle.store == nil { return errors.New("remove scheduled delivery: nil store") } if ctx == nil { return errors.New("remove scheduled delivery: nil context") } if err := deliveryID.Validate(); err != nil { return fmt.Errorf("remove scheduled delivery: %w", err) } operationCtx, cancel, err := handle.store.operationContext(ctx, "remove scheduled delivery") if err != nil { return err } defer cancel() stmt := pgtable.Deliveries.UPDATE(pgtable.Deliveries.NextAttemptAt). SET(pg.NULL). WHERE(pgtable.Deliveries.DeliveryID.EQ(pg.String(deliveryID.String()))) query, args := stmt.Sql() if _, err := handle.store.db.ExecContext(operationCtx, query, args...); err != nil { return fmt.Errorf("remove scheduled delivery: %w", err) } return nil } // ReadAttemptScheduleSnapshot returns the current attempt-schedule depth and // oldest scheduled timestamp. The runtime exposes this via the telemetry // snapshot reader contract. func (handle *AttemptExecutionStore) ReadAttemptScheduleSnapshot(ctx context.Context) (telemetry.AttemptScheduleSnapshot, error) { if handle == nil || handle.store == nil { return telemetry.AttemptScheduleSnapshot{}, errors.New("read attempt schedule snapshot: nil store") } if ctx == nil { return telemetry.AttemptScheduleSnapshot{}, errors.New("read attempt schedule snapshot: nil context") } operationCtx, cancel, err := handle.store.operationContext(ctx, "read attempt schedule snapshot") if err != nil { return telemetry.AttemptScheduleSnapshot{}, err } defer cancel() stmt := pg.SELECT( pg.COUNT(pg.STAR), pg.MIN(pgtable.Deliveries.NextAttemptAt), ).FROM(pgtable.Deliveries). WHERE(pgtable.Deliveries.NextAttemptAt.IS_NOT_NULL()) query, args := stmt.Sql() row := handle.store.db.QueryRowContext(operationCtx, query, args...) var ( count int64 oldest sql.NullTime summary telemetry.AttemptScheduleSnapshot ) if err := row.Scan(&count, &oldest); err != nil { return telemetry.AttemptScheduleSnapshot{}, fmt.Errorf("read attempt schedule snapshot: %w", err) } summary.Depth = count if oldest.Valid { oldestUTC := oldest.Time.UTC() summary.OldestScheduledFor = &oldestUTC } return summary, nil }