263 lines
8.8 KiB
Go
263 lines
8.8 KiB
Go
package notificationstore
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
|
|
"galaxy/notification/internal/service/acceptintent"
|
|
"galaxy/notification/internal/service/routestate"
|
|
"galaxy/notification/internal/telemetry"
|
|
|
|
pg "github.com/go-jet/jet/v2/postgres"
|
|
)
|
|
|
|
// scheduledRouteKey synthesises a stable, human-readable key for one
|
|
// ScheduledRoute. Notification publishers do not interpret the key beyond
|
|
// requiring it to be non-empty (`ScheduledRoute.Validate`).
|
|
func scheduledRouteKey(notificationID string, routeID string) string {
|
|
return notificationID + "/" + routeID
|
|
}
|
|
|
|
// ListDueRoutes returns up to limit routes whose `next_attempt_at` is at or
|
|
// before now. The query is non-locking; per-row contention is resolved by
|
|
// the lease (Redis) plus the optimistic-concurrency check inside `Complete*`.
|
|
func (store *Store) ListDueRoutes(ctx context.Context, now time.Time, limit int64) ([]routestate.ScheduledRoute, error) {
|
|
if store == nil {
|
|
return nil, errors.New("list due routes: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return nil, errors.New("list due routes: nil context")
|
|
}
|
|
if err := routestate.ValidateUTCMillisecondTimestamp("list due routes now", now); err != nil {
|
|
return nil, err
|
|
}
|
|
if limit <= 0 {
|
|
return nil, errors.New("list due routes: limit must be positive")
|
|
}
|
|
|
|
operationCtx, cancel, err := store.operationContext(ctx, "list due routes")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer cancel()
|
|
|
|
stmt := pg.SELECT(pgtable.Routes.NotificationID, pgtable.Routes.RouteID).
|
|
FROM(pgtable.Routes).
|
|
WHERE(pg.AND(
|
|
pgtable.Routes.NextAttemptAt.IS_NOT_NULL(),
|
|
pgtable.Routes.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())),
|
|
)).
|
|
ORDER_BY(pgtable.Routes.NextAttemptAt.ASC()).
|
|
LIMIT(limit)
|
|
|
|
query, args := stmt.Sql()
|
|
rows, err := store.db.QueryContext(operationCtx, query, args...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("list due routes: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
out := make([]routestate.ScheduledRoute, 0, limit)
|
|
for rows.Next() {
|
|
var (
|
|
notificationID string
|
|
routeID string
|
|
)
|
|
if err := rows.Scan(¬ificationID, &routeID); err != nil {
|
|
return nil, fmt.Errorf("list due routes: scan: %w", err)
|
|
}
|
|
out = append(out, routestate.ScheduledRoute{
|
|
RouteKey: scheduledRouteKey(notificationID, routeID),
|
|
NotificationID: notificationID,
|
|
RouteID: routeID,
|
|
})
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return nil, fmt.Errorf("list due routes: %w", err)
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// ReadRouteScheduleSnapshot returns the current depth of the route schedule
|
|
// (rows with non-NULL `next_attempt_at`) together with the oldest scheduled
|
|
// timestamp when one exists. The runtime exposes this through the telemetry
|
|
// snapshot reader.
|
|
func (store *Store) ReadRouteScheduleSnapshot(ctx context.Context) (telemetry.RouteScheduleSnapshot, error) {
|
|
if store == nil {
|
|
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil context")
|
|
}
|
|
|
|
operationCtx, cancel, err := store.operationContext(ctx, "read route schedule snapshot")
|
|
if err != nil {
|
|
return telemetry.RouteScheduleSnapshot{}, err
|
|
}
|
|
defer cancel()
|
|
|
|
stmt := pg.SELECT(
|
|
pg.COUNT(pg.STAR),
|
|
pg.MIN(pgtable.Routes.NextAttemptAt),
|
|
).
|
|
FROM(pgtable.Routes).
|
|
WHERE(pgtable.Routes.NextAttemptAt.IS_NOT_NULL())
|
|
|
|
query, args := stmt.Sql()
|
|
row := store.db.QueryRowContext(operationCtx, query, args...)
|
|
var (
|
|
depth int64
|
|
oldest sql.NullTime
|
|
summary telemetry.RouteScheduleSnapshot
|
|
)
|
|
if err := row.Scan(&depth, &oldest); err != nil {
|
|
return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: %w", err)
|
|
}
|
|
summary.Depth = depth
|
|
if oldest.Valid {
|
|
oldestUTC := oldest.Time.UTC()
|
|
summary.OldestScheduledFor = &oldestUTC
|
|
}
|
|
return summary, nil
|
|
}
|
|
|
|
// CompleteRoutePublished marks the expected route as `published`,
|
|
// increments attempt_count, and clears retry/error fields. Optimistic
|
|
// concurrency on `updated_at` rejects races that happened since the
|
|
// publisher loaded the row; a mismatch surfaces as `routestate.ErrConflict`.
|
|
//
|
|
// Note: the outbound stream emission (XADD) happens in the publisher
|
|
// before this call. The store deliberately ignores the input.Stream and
|
|
// input.StreamValues fields — they are kept on the input only so the
|
|
// publisher can pass one struct around through its state machine.
|
|
func (store *Store) CompleteRoutePublished(ctx context.Context, input routestate.CompleteRoutePublishedInput) error {
|
|
if store == nil {
|
|
return errors.New("complete route published: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return errors.New("complete route published: nil context")
|
|
}
|
|
if err := input.Validate(); err != nil {
|
|
return fmt.Errorf("complete route published: %w", err)
|
|
}
|
|
|
|
updated := input.ExpectedRoute
|
|
updated.Status = acceptintent.RouteStatusPublished
|
|
updated.AttemptCount++
|
|
updated.NextAttemptAt = time.Time{}
|
|
updated.LastErrorClassification = ""
|
|
updated.LastErrorMessage = ""
|
|
updated.LastErrorAt = time.Time{}
|
|
updated.UpdatedAt = input.PublishedAt
|
|
updated.PublishedAt = input.PublishedAt
|
|
updated.DeadLetteredAt = time.Time{}
|
|
|
|
return store.withTx(ctx, "complete route published", func(ctx context.Context, tx *sql.Tx) error {
|
|
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
|
|
if err != nil {
|
|
return fmt.Errorf("complete route published: %w", err)
|
|
}
|
|
if rows == 0 {
|
|
return routestate.ErrConflict
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// CompleteRouteFailed records one retryable publication failure: increments
|
|
// attempt_count, populates the last-error fields, and reschedules the row
|
|
// at `NextAttemptAt`.
|
|
func (store *Store) CompleteRouteFailed(ctx context.Context, input routestate.CompleteRouteFailedInput) error {
|
|
if store == nil {
|
|
return errors.New("complete route failed: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return errors.New("complete route failed: nil context")
|
|
}
|
|
if err := input.Validate(); err != nil {
|
|
return fmt.Errorf("complete route failed: %w", err)
|
|
}
|
|
|
|
updated := input.ExpectedRoute
|
|
updated.Status = acceptintent.RouteStatusFailed
|
|
updated.AttemptCount++
|
|
updated.NextAttemptAt = input.NextAttemptAt
|
|
updated.LastErrorClassification = input.FailureClassification
|
|
updated.LastErrorMessage = input.FailureMessage
|
|
updated.LastErrorAt = input.FailedAt
|
|
updated.UpdatedAt = input.FailedAt
|
|
|
|
return store.withTx(ctx, "complete route failed", func(ctx context.Context, tx *sql.Tx) error {
|
|
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
|
|
if err != nil {
|
|
return fmt.Errorf("complete route failed: %w", err)
|
|
}
|
|
if rows == 0 {
|
|
return routestate.ErrConflict
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// CompleteRouteDeadLetter records one terminal publication failure:
|
|
// marks the route `dead_letter`, clears the schedule, and inserts the
|
|
// dead-letter audit row.
|
|
func (store *Store) CompleteRouteDeadLetter(ctx context.Context, input routestate.CompleteRouteDeadLetterInput) error {
|
|
if store == nil {
|
|
return errors.New("complete route dead letter: nil store")
|
|
}
|
|
if ctx == nil {
|
|
return errors.New("complete route dead letter: nil context")
|
|
}
|
|
if err := input.Validate(); err != nil {
|
|
return fmt.Errorf("complete route dead letter: %w", err)
|
|
}
|
|
|
|
updated := input.ExpectedRoute
|
|
updated.Status = acceptintent.RouteStatusDeadLetter
|
|
updated.AttemptCount++
|
|
updated.NextAttemptAt = time.Time{}
|
|
updated.LastErrorClassification = input.FailureClassification
|
|
updated.LastErrorMessage = input.FailureMessage
|
|
updated.LastErrorAt = input.DeadLetteredAt
|
|
updated.UpdatedAt = input.DeadLetteredAt
|
|
updated.DeadLetteredAt = input.DeadLetteredAt
|
|
|
|
if updated.AttemptCount < updated.MaxAttempts {
|
|
return fmt.Errorf(
|
|
"complete route dead letter: final attempt count %d is below max attempts %d",
|
|
updated.AttemptCount,
|
|
updated.MaxAttempts,
|
|
)
|
|
}
|
|
|
|
return store.withTx(ctx, "complete route dead letter", func(ctx context.Context, tx *sql.Tx) error {
|
|
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
|
|
if err != nil {
|
|
return fmt.Errorf("complete route dead letter: %w", err)
|
|
}
|
|
if rows == 0 {
|
|
return routestate.ErrConflict
|
|
}
|
|
if err := insertDeadLetter(ctx, tx, deadLetterRow{
|
|
NotificationID: updated.NotificationID,
|
|
RouteID: updated.RouteID,
|
|
Channel: string(updated.Channel),
|
|
RecipientRef: updated.RecipientRef,
|
|
FinalAttemptCount: updated.AttemptCount,
|
|
MaxAttempts: updated.MaxAttempts,
|
|
FailureClassification: input.FailureClassification,
|
|
FailureMessage: input.FailureMessage,
|
|
RecoveryHint: input.RecoveryHint,
|
|
CreatedAt: input.DeadLetteredAt,
|
|
}); err != nil {
|
|
return fmt.Errorf("complete route dead letter: %w", err)
|
|
}
|
|
return nil
|
|
})
|
|
}
|