Files
galaxy-game/mail/internal/adapters/postgres/mailstore/attempt_execution.go
T
2026-04-26 20:34:39 +02:00

355 lines
12 KiB
Go

package mailstore
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
pgtable "galaxy/mail/internal/adapters/postgres/jet/mail/table"
"galaxy/mail/internal/domain/attempt"
"galaxy/mail/internal/domain/common"
deliverydomain "galaxy/mail/internal/domain/delivery"
"galaxy/mail/internal/service/acceptgenericdelivery"
"galaxy/mail/internal/service/executeattempt"
"galaxy/mail/internal/telemetry"
pg "github.com/go-jet/jet/v2/postgres"
)
// LoadPayload returns the raw attachment payload bundle for deliveryID. It
// satisfies executeattempt.PayloadLoader.
func (store *Store) LoadPayload(ctx context.Context, deliveryID common.DeliveryID) (acceptgenericdelivery.DeliveryPayload, bool, error) {
return store.GetDeliveryPayload(ctx, deliveryID)
}
// AttemptExecution returns a handle that satisfies executeattempt.Store and
// the worker.AttemptExecutionStore contract used by the scheduler.
func (store *Store) AttemptExecution() *AttemptExecutionStore {
return &AttemptExecutionStore{store: store}
}
// AttemptExecutionStore is the executeattempt.Store handle returned by
// Store.AttemptExecution.
type AttemptExecutionStore struct {
store *Store
}
var _ executeattempt.Store = (*AttemptExecutionStore)(nil)
// Commit applies one complete durable attempt outcome mutation: the
// terminal current attempt, an optional next scheduled retry attempt, and an
// optional dead-letter row.
func (handle *AttemptExecutionStore) Commit(ctx context.Context, input executeattempt.CommitStateInput) error {
if handle == nil || handle.store == nil {
return errors.New("commit attempt: nil store")
}
if ctx == nil {
return errors.New("commit attempt: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("commit attempt: %w", err)
}
return handle.store.withTx(ctx, "commit attempt", func(ctx context.Context, tx *sql.Tx) error {
if err := lockDelivery(ctx, tx, input.Delivery.DeliveryID); err != nil {
return fmt.Errorf("commit attempt: %w", err)
}
if err := updateAttempt(ctx, tx, input.Attempt); err != nil {
return fmt.Errorf("commit attempt: update current attempt: %w", err)
}
if input.NextAttempt != nil {
if err := insertAttempt(ctx, tx, *input.NextAttempt); err != nil {
return fmt.Errorf("commit attempt: insert next attempt: %w", err)
}
}
if input.DeadLetter != nil {
if err := insertDeadLetter(ctx, tx, *input.DeadLetter); err != nil {
return fmt.Errorf("commit attempt: insert dead-letter: %w", err)
}
}
if err := updateDelivery(ctx, tx, input.Delivery, input.NextAttempt); err != nil {
return fmt.Errorf("commit attempt: update delivery: %w", err)
}
return nil
})
}
// NextDueDeliveryIDs returns up to limit due delivery identifiers ordered by
// next_attempt_at. The query uses `FOR UPDATE SKIP LOCKED` to allow multiple
// schedulers to run concurrently without contending on the same row.
func (handle *AttemptExecutionStore) NextDueDeliveryIDs(ctx context.Context, now time.Time, limit int64) ([]common.DeliveryID, error) {
if handle == nil || handle.store == nil {
return nil, errors.New("next due delivery ids: nil store")
}
if ctx == nil {
return nil, errors.New("next due delivery ids: nil context")
}
if limit <= 0 {
return nil, errors.New("next due delivery ids: non-positive limit")
}
operationCtx, cancel, err := handle.store.operationContext(ctx, "next due delivery ids")
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(pgtable.Deliveries.DeliveryID).
FROM(pgtable.Deliveries).
WHERE(pg.AND(
pgtable.Deliveries.NextAttemptAt.IS_NOT_NULL(),
pgtable.Deliveries.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())),
)).
ORDER_BY(pgtable.Deliveries.NextAttemptAt.ASC()).
LIMIT(limit)
query, args := stmt.Sql()
rows, err := handle.store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("next due delivery ids: %w", err)
}
defer rows.Close()
out := make([]common.DeliveryID, 0, limit)
for rows.Next() {
var id string
if err := rows.Scan(&id); err != nil {
return nil, fmt.Errorf("next due delivery ids: scan: %w", err)
}
out = append(out, common.DeliveryID(id))
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("next due delivery ids: %w", err)
}
return out, nil
}
// SendingDeliveryIDs returns every delivery currently held by an in-progress
// attempt. The recovery loop uses the result to identify rows whose claim
// might have expired.
func (handle *AttemptExecutionStore) SendingDeliveryIDs(ctx context.Context) ([]common.DeliveryID, error) {
if handle == nil || handle.store == nil {
return nil, errors.New("sending delivery ids: nil store")
}
if ctx == nil {
return nil, errors.New("sending delivery ids: nil context")
}
operationCtx, cancel, err := handle.store.operationContext(ctx, "sending delivery ids")
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(pgtable.Deliveries.DeliveryID).
FROM(pgtable.Deliveries).
WHERE(pgtable.Deliveries.Status.EQ(pg.String(string(deliverydomain.StatusSending))))
query, args := stmt.Sql()
rows, err := handle.store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("sending delivery ids: %w", err)
}
defer rows.Close()
out := []common.DeliveryID{}
for rows.Next() {
var id string
if err := rows.Scan(&id); err != nil {
return nil, fmt.Errorf("sending delivery ids: scan: %w", err)
}
out = append(out, common.DeliveryID(id))
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("sending delivery ids: %w", err)
}
return out, nil
}
// LoadWorkItem returns the active attempt and delivery row for deliveryID.
// found is false when the delivery row does not exist.
func (handle *AttemptExecutionStore) LoadWorkItem(ctx context.Context, deliveryID common.DeliveryID) (executeattempt.WorkItem, bool, error) {
if handle == nil || handle.store == nil {
return executeattempt.WorkItem{}, false, errors.New("load work item: nil store")
}
if ctx == nil {
return executeattempt.WorkItem{}, false, errors.New("load work item: nil context")
}
if err := deliveryID.Validate(); err != nil {
return executeattempt.WorkItem{}, false, fmt.Errorf("load work item: %w", err)
}
operationCtx, cancel, err := handle.store.operationContext(ctx, "load work item")
if err != nil {
return executeattempt.WorkItem{}, false, err
}
defer cancel()
delivery, ok, err := loadDeliveryByID(operationCtx, handle.store.db, deliveryID)
if err != nil {
return executeattempt.WorkItem{}, false, fmt.Errorf("load work item: %w", err)
}
if !ok {
return executeattempt.WorkItem{}, false, nil
}
if delivery.AttemptCount == 0 {
return executeattempt.WorkItem{}, false, fmt.Errorf("load work item %q: zero attempt count", deliveryID)
}
active, err := loadActiveAttempt(operationCtx, handle.store.db, deliveryID, delivery.AttemptCount)
if err != nil {
return executeattempt.WorkItem{}, false, fmt.Errorf("load work item: load active attempt: %w", err)
}
return executeattempt.WorkItem{Delivery: delivery, Attempt: active}, true, nil
}
// ClaimDueAttempt atomically claims the due scheduled attempt for deliveryID
// inside one transaction. The delivery transitions to `sending`, the active
// attempt to `in_progress`. found is false when no claimable row exists at
// now.
func (handle *AttemptExecutionStore) ClaimDueAttempt(ctx context.Context, deliveryID common.DeliveryID, now time.Time) (executeattempt.WorkItem, bool, error) {
if handle == nil || handle.store == nil {
return executeattempt.WorkItem{}, false, errors.New("claim due attempt: nil store")
}
if ctx == nil {
return executeattempt.WorkItem{}, false, errors.New("claim due attempt: nil context")
}
if err := deliveryID.Validate(); err != nil {
return executeattempt.WorkItem{}, false, fmt.Errorf("claim due attempt: %w", err)
}
var (
claimed executeattempt.WorkItem
found bool
)
err := handle.store.withTx(ctx, "claim due attempt", func(ctx context.Context, tx *sql.Tx) error {
stmt := pg.SELECT(deliverySelectColumns).
FROM(pgtable.Deliveries).
WHERE(pg.AND(
pgtable.Deliveries.DeliveryID.EQ(pg.String(deliveryID.String())),
pgtable.Deliveries.Status.IN(
pg.String(string(deliverydomain.StatusQueued)),
pg.String(string(deliverydomain.StatusRendered)),
),
pgtable.Deliveries.NextAttemptAt.IS_NOT_NULL(),
pgtable.Deliveries.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())),
)).
FOR(pg.UPDATE().SKIP_LOCKED())
query, args := stmt.Sql()
row := tx.QueryRowContext(ctx, query, args...)
delivery, _, err := scanDelivery(row)
if errors.Is(err, sql.ErrNoRows) {
return nil
}
if err != nil {
return fmt.Errorf("claim due attempt: load delivery: %w", err)
}
envelope, err := loadEnvelope(ctx, tx, deliveryID)
if err != nil {
return fmt.Errorf("claim due attempt: load envelope: %w", err)
}
delivery.Envelope = envelope
active, err := loadActiveAttempt(ctx, tx, deliveryID, delivery.AttemptCount)
if err != nil {
return fmt.Errorf("claim due attempt: load active attempt: %w", err)
}
if active.Status != attempt.StatusScheduled {
return nil
}
nowUTC := now.UTC().Truncate(time.Millisecond)
active.Status = attempt.StatusInProgress
active.StartedAt = &nowUTC
delivery.Status = deliverydomain.StatusSending
delivery.LastAttemptStatus = attempt.StatusInProgress
delivery.UpdatedAt = nowUTC
if err := updateAttempt(ctx, tx, active); err != nil {
return fmt.Errorf("claim due attempt: update attempt: %w", err)
}
if err := updateDelivery(ctx, tx, delivery, nil); err != nil {
return fmt.Errorf("claim due attempt: update delivery: %w", err)
}
claimed = executeattempt.WorkItem{Delivery: delivery, Attempt: active}
found = true
return nil
})
if err != nil {
return executeattempt.WorkItem{}, false, err
}
return claimed, found, nil
}
// RemoveScheduledDelivery clears next_attempt_at for deliveryID. The
// scheduler calls this when it discovers a stale schedule entry that no
// longer points to a claimable delivery.
func (handle *AttemptExecutionStore) RemoveScheduledDelivery(ctx context.Context, deliveryID common.DeliveryID) error {
if handle == nil || handle.store == nil {
return errors.New("remove scheduled delivery: nil store")
}
if ctx == nil {
return errors.New("remove scheduled delivery: nil context")
}
if err := deliveryID.Validate(); err != nil {
return fmt.Errorf("remove scheduled delivery: %w", err)
}
operationCtx, cancel, err := handle.store.operationContext(ctx, "remove scheduled delivery")
if err != nil {
return err
}
defer cancel()
stmt := pgtable.Deliveries.UPDATE(pgtable.Deliveries.NextAttemptAt).
SET(pg.NULL).
WHERE(pgtable.Deliveries.DeliveryID.EQ(pg.String(deliveryID.String())))
query, args := stmt.Sql()
if _, err := handle.store.db.ExecContext(operationCtx, query, args...); err != nil {
return fmt.Errorf("remove scheduled delivery: %w", err)
}
return nil
}
// ReadAttemptScheduleSnapshot returns the current attempt-schedule depth and
// oldest scheduled timestamp. The runtime exposes this via the telemetry
// snapshot reader contract.
func (handle *AttemptExecutionStore) ReadAttemptScheduleSnapshot(ctx context.Context) (telemetry.AttemptScheduleSnapshot, error) {
if handle == nil || handle.store == nil {
return telemetry.AttemptScheduleSnapshot{}, errors.New("read attempt schedule snapshot: nil store")
}
if ctx == nil {
return telemetry.AttemptScheduleSnapshot{}, errors.New("read attempt schedule snapshot: nil context")
}
operationCtx, cancel, err := handle.store.operationContext(ctx, "read attempt schedule snapshot")
if err != nil {
return telemetry.AttemptScheduleSnapshot{}, err
}
defer cancel()
stmt := pg.SELECT(
pg.COUNT(pg.STAR),
pg.MIN(pgtable.Deliveries.NextAttemptAt),
).FROM(pgtable.Deliveries).
WHERE(pgtable.Deliveries.NextAttemptAt.IS_NOT_NULL())
query, args := stmt.Sql()
row := handle.store.db.QueryRowContext(operationCtx, query, args...)
var (
count int64
oldest sql.NullTime
summary telemetry.AttemptScheduleSnapshot
)
if err := row.Scan(&count, &oldest); err != nil {
return telemetry.AttemptScheduleSnapshot{}, fmt.Errorf("read attempt schedule snapshot: %w", err)
}
summary.Depth = count
if oldest.Valid {
oldestUTC := oldest.Time.UTC()
summary.OldestScheduledFor = &oldestUTC
}
return summary, nil
}