package notificationstore import ( "context" "database/sql" "errors" "fmt" "time" pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table" "galaxy/notification/internal/service/acceptintent" "galaxy/notification/internal/service/routestate" "galaxy/notification/internal/telemetry" pg "github.com/go-jet/jet/v2/postgres" ) // scheduledRouteKey synthesises a stable, human-readable key for one // ScheduledRoute. Notification publishers do not interpret the key beyond // requiring it to be non-empty (`ScheduledRoute.Validate`). func scheduledRouteKey(notificationID string, routeID string) string { return notificationID + "/" + routeID } // ListDueRoutes returns up to limit routes whose `next_attempt_at` is at or // before now. The query is non-locking; per-row contention is resolved by // the lease (Redis) plus the optimistic-concurrency check inside `Complete*`. func (store *Store) ListDueRoutes(ctx context.Context, now time.Time, limit int64) ([]routestate.ScheduledRoute, error) { if store == nil { return nil, errors.New("list due routes: nil store") } if ctx == nil { return nil, errors.New("list due routes: nil context") } if err := routestate.ValidateUTCMillisecondTimestamp("list due routes now", now); err != nil { return nil, err } if limit <= 0 { return nil, errors.New("list due routes: limit must be positive") } operationCtx, cancel, err := store.operationContext(ctx, "list due routes") if err != nil { return nil, err } defer cancel() stmt := pg.SELECT(pgtable.Routes.NotificationID, pgtable.Routes.RouteID). FROM(pgtable.Routes). WHERE(pg.AND( pgtable.Routes.NextAttemptAt.IS_NOT_NULL(), pgtable.Routes.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())), )). ORDER_BY(pgtable.Routes.NextAttemptAt.ASC()). LIMIT(limit) query, args := stmt.Sql() rows, err := store.db.QueryContext(operationCtx, query, args...) if err != nil { return nil, fmt.Errorf("list due routes: %w", err) } defer rows.Close() out := make([]routestate.ScheduledRoute, 0, limit) for rows.Next() { var ( notificationID string routeID string ) if err := rows.Scan(¬ificationID, &routeID); err != nil { return nil, fmt.Errorf("list due routes: scan: %w", err) } out = append(out, routestate.ScheduledRoute{ RouteKey: scheduledRouteKey(notificationID, routeID), NotificationID: notificationID, RouteID: routeID, }) } if err := rows.Err(); err != nil { return nil, fmt.Errorf("list due routes: %w", err) } return out, nil } // ReadRouteScheduleSnapshot returns the current depth of the route schedule // (rows with non-NULL `next_attempt_at`) together with the oldest scheduled // timestamp when one exists. The runtime exposes this through the telemetry // snapshot reader. func (store *Store) ReadRouteScheduleSnapshot(ctx context.Context) (telemetry.RouteScheduleSnapshot, error) { if store == nil { return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil store") } if ctx == nil { return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil context") } operationCtx, cancel, err := store.operationContext(ctx, "read route schedule snapshot") if err != nil { return telemetry.RouteScheduleSnapshot{}, err } defer cancel() stmt := pg.SELECT( pg.COUNT(pg.STAR), pg.MIN(pgtable.Routes.NextAttemptAt), ). FROM(pgtable.Routes). WHERE(pgtable.Routes.NextAttemptAt.IS_NOT_NULL()) query, args := stmt.Sql() row := store.db.QueryRowContext(operationCtx, query, args...) var ( depth int64 oldest sql.NullTime summary telemetry.RouteScheduleSnapshot ) if err := row.Scan(&depth, &oldest); err != nil { return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: %w", err) } summary.Depth = depth if oldest.Valid { oldestUTC := oldest.Time.UTC() summary.OldestScheduledFor = &oldestUTC } return summary, nil } // CompleteRoutePublished marks the expected route as `published`, // increments attempt_count, and clears retry/error fields. Optimistic // concurrency on `updated_at` rejects races that happened since the // publisher loaded the row; a mismatch surfaces as `routestate.ErrConflict`. // // Note: the outbound stream emission (XADD) happens in the publisher // before this call. The store deliberately ignores the input.Stream and // input.StreamValues fields — they are kept on the input only so the // publisher can pass one struct around through its state machine. func (store *Store) CompleteRoutePublished(ctx context.Context, input routestate.CompleteRoutePublishedInput) error { if store == nil { return errors.New("complete route published: nil store") } if ctx == nil { return errors.New("complete route published: nil context") } if err := input.Validate(); err != nil { return fmt.Errorf("complete route published: %w", err) } updated := input.ExpectedRoute updated.Status = acceptintent.RouteStatusPublished updated.AttemptCount++ updated.NextAttemptAt = time.Time{} updated.LastErrorClassification = "" updated.LastErrorMessage = "" updated.LastErrorAt = time.Time{} updated.UpdatedAt = input.PublishedAt updated.PublishedAt = input.PublishedAt updated.DeadLetteredAt = time.Time{} return store.withTx(ctx, "complete route published", func(ctx context.Context, tx *sql.Tx) error { rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt) if err != nil { return fmt.Errorf("complete route published: %w", err) } if rows == 0 { return routestate.ErrConflict } return nil }) } // CompleteRouteFailed records one retryable publication failure: increments // attempt_count, populates the last-error fields, and reschedules the row // at `NextAttemptAt`. func (store *Store) CompleteRouteFailed(ctx context.Context, input routestate.CompleteRouteFailedInput) error { if store == nil { return errors.New("complete route failed: nil store") } if ctx == nil { return errors.New("complete route failed: nil context") } if err := input.Validate(); err != nil { return fmt.Errorf("complete route failed: %w", err) } updated := input.ExpectedRoute updated.Status = acceptintent.RouteStatusFailed updated.AttemptCount++ updated.NextAttemptAt = input.NextAttemptAt updated.LastErrorClassification = input.FailureClassification updated.LastErrorMessage = input.FailureMessage updated.LastErrorAt = input.FailedAt updated.UpdatedAt = input.FailedAt return store.withTx(ctx, "complete route failed", func(ctx context.Context, tx *sql.Tx) error { rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt) if err != nil { return fmt.Errorf("complete route failed: %w", err) } if rows == 0 { return routestate.ErrConflict } return nil }) } // CompleteRouteDeadLetter records one terminal publication failure: // marks the route `dead_letter`, clears the schedule, and inserts the // dead-letter audit row. func (store *Store) CompleteRouteDeadLetter(ctx context.Context, input routestate.CompleteRouteDeadLetterInput) error { if store == nil { return errors.New("complete route dead letter: nil store") } if ctx == nil { return errors.New("complete route dead letter: nil context") } if err := input.Validate(); err != nil { return fmt.Errorf("complete route dead letter: %w", err) } updated := input.ExpectedRoute updated.Status = acceptintent.RouteStatusDeadLetter updated.AttemptCount++ updated.NextAttemptAt = time.Time{} updated.LastErrorClassification = input.FailureClassification updated.LastErrorMessage = input.FailureMessage updated.LastErrorAt = input.DeadLetteredAt updated.UpdatedAt = input.DeadLetteredAt updated.DeadLetteredAt = input.DeadLetteredAt if updated.AttemptCount < updated.MaxAttempts { return fmt.Errorf( "complete route dead letter: final attempt count %d is below max attempts %d", updated.AttemptCount, updated.MaxAttempts, ) } return store.withTx(ctx, "complete route dead letter", func(ctx context.Context, tx *sql.Tx) error { rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt) if err != nil { return fmt.Errorf("complete route dead letter: %w", err) } if rows == 0 { return routestate.ErrConflict } if err := insertDeadLetter(ctx, tx, deadLetterRow{ NotificationID: updated.NotificationID, RouteID: updated.RouteID, Channel: string(updated.Channel), RecipientRef: updated.RecipientRef, FinalAttemptCount: updated.AttemptCount, MaxAttempts: updated.MaxAttempts, FailureClassification: input.FailureClassification, FailureMessage: input.FailureMessage, RecoveryHint: input.RecoveryHint, CreatedAt: input.DeadLetteredAt, }); err != nil { return fmt.Errorf("complete route dead letter: %w", err) } return nil }) }