Files
galaxy-game/notification/internal/adapters/postgres/notificationstore/scheduler.go
T
2026-04-26 20:34:39 +02:00

263 lines
8.8 KiB
Go

package notificationstore
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/routestate"
"galaxy/notification/internal/telemetry"
pg "github.com/go-jet/jet/v2/postgres"
)
// scheduledRouteKey synthesises a stable, human-readable key for one
// ScheduledRoute. Notification publishers do not interpret the key beyond
// requiring it to be non-empty (`ScheduledRoute.Validate`).
func scheduledRouteKey(notificationID string, routeID string) string {
return notificationID + "/" + routeID
}
// ListDueRoutes returns up to limit routes whose `next_attempt_at` is at or
// before now. The query is non-locking; per-row contention is resolved by
// the lease (Redis) plus the optimistic-concurrency check inside `Complete*`.
func (store *Store) ListDueRoutes(ctx context.Context, now time.Time, limit int64) ([]routestate.ScheduledRoute, error) {
if store == nil {
return nil, errors.New("list due routes: nil store")
}
if ctx == nil {
return nil, errors.New("list due routes: nil context")
}
if err := routestate.ValidateUTCMillisecondTimestamp("list due routes now", now); err != nil {
return nil, err
}
if limit <= 0 {
return nil, errors.New("list due routes: limit must be positive")
}
operationCtx, cancel, err := store.operationContext(ctx, "list due routes")
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(pgtable.Routes.NotificationID, pgtable.Routes.RouteID).
FROM(pgtable.Routes).
WHERE(pg.AND(
pgtable.Routes.NextAttemptAt.IS_NOT_NULL(),
pgtable.Routes.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())),
)).
ORDER_BY(pgtable.Routes.NextAttemptAt.ASC()).
LIMIT(limit)
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("list due routes: %w", err)
}
defer rows.Close()
out := make([]routestate.ScheduledRoute, 0, limit)
for rows.Next() {
var (
notificationID string
routeID string
)
if err := rows.Scan(&notificationID, &routeID); err != nil {
return nil, fmt.Errorf("list due routes: scan: %w", err)
}
out = append(out, routestate.ScheduledRoute{
RouteKey: scheduledRouteKey(notificationID, routeID),
NotificationID: notificationID,
RouteID: routeID,
})
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("list due routes: %w", err)
}
return out, nil
}
// ReadRouteScheduleSnapshot returns the current depth of the route schedule
// (rows with non-NULL `next_attempt_at`) together with the oldest scheduled
// timestamp when one exists. The runtime exposes this through the telemetry
// snapshot reader.
func (store *Store) ReadRouteScheduleSnapshot(ctx context.Context) (telemetry.RouteScheduleSnapshot, error) {
if store == nil {
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil store")
}
if ctx == nil {
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil context")
}
operationCtx, cancel, err := store.operationContext(ctx, "read route schedule snapshot")
if err != nil {
return telemetry.RouteScheduleSnapshot{}, err
}
defer cancel()
stmt := pg.SELECT(
pg.COUNT(pg.STAR),
pg.MIN(pgtable.Routes.NextAttemptAt),
).
FROM(pgtable.Routes).
WHERE(pgtable.Routes.NextAttemptAt.IS_NOT_NULL())
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
var (
depth int64
oldest sql.NullTime
summary telemetry.RouteScheduleSnapshot
)
if err := row.Scan(&depth, &oldest); err != nil {
return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: %w", err)
}
summary.Depth = depth
if oldest.Valid {
oldestUTC := oldest.Time.UTC()
summary.OldestScheduledFor = &oldestUTC
}
return summary, nil
}
// CompleteRoutePublished marks the expected route as `published`,
// increments attempt_count, and clears retry/error fields. Optimistic
// concurrency on `updated_at` rejects races that happened since the
// publisher loaded the row; a mismatch surfaces as `routestate.ErrConflict`.
//
// Note: the outbound stream emission (XADD) happens in the publisher
// before this call. The store deliberately ignores the input.Stream and
// input.StreamValues fields — they are kept on the input only so the
// publisher can pass one struct around through its state machine.
func (store *Store) CompleteRoutePublished(ctx context.Context, input routestate.CompleteRoutePublishedInput) error {
if store == nil {
return errors.New("complete route published: nil store")
}
if ctx == nil {
return errors.New("complete route published: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route published: %w", err)
}
updated := input.ExpectedRoute
updated.Status = acceptintent.RouteStatusPublished
updated.AttemptCount++
updated.NextAttemptAt = time.Time{}
updated.LastErrorClassification = ""
updated.LastErrorMessage = ""
updated.LastErrorAt = time.Time{}
updated.UpdatedAt = input.PublishedAt
updated.PublishedAt = input.PublishedAt
updated.DeadLetteredAt = time.Time{}
return store.withTx(ctx, "complete route published", func(ctx context.Context, tx *sql.Tx) error {
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
if err != nil {
return fmt.Errorf("complete route published: %w", err)
}
if rows == 0 {
return routestate.ErrConflict
}
return nil
})
}
// CompleteRouteFailed records one retryable publication failure: increments
// attempt_count, populates the last-error fields, and reschedules the row
// at `NextAttemptAt`.
func (store *Store) CompleteRouteFailed(ctx context.Context, input routestate.CompleteRouteFailedInput) error {
if store == nil {
return errors.New("complete route failed: nil store")
}
if ctx == nil {
return errors.New("complete route failed: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route failed: %w", err)
}
updated := input.ExpectedRoute
updated.Status = acceptintent.RouteStatusFailed
updated.AttemptCount++
updated.NextAttemptAt = input.NextAttemptAt
updated.LastErrorClassification = input.FailureClassification
updated.LastErrorMessage = input.FailureMessage
updated.LastErrorAt = input.FailedAt
updated.UpdatedAt = input.FailedAt
return store.withTx(ctx, "complete route failed", func(ctx context.Context, tx *sql.Tx) error {
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
if err != nil {
return fmt.Errorf("complete route failed: %w", err)
}
if rows == 0 {
return routestate.ErrConflict
}
return nil
})
}
// CompleteRouteDeadLetter records one terminal publication failure:
// marks the route `dead_letter`, clears the schedule, and inserts the
// dead-letter audit row.
func (store *Store) CompleteRouteDeadLetter(ctx context.Context, input routestate.CompleteRouteDeadLetterInput) error {
if store == nil {
return errors.New("complete route dead letter: nil store")
}
if ctx == nil {
return errors.New("complete route dead letter: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
updated := input.ExpectedRoute
updated.Status = acceptintent.RouteStatusDeadLetter
updated.AttemptCount++
updated.NextAttemptAt = time.Time{}
updated.LastErrorClassification = input.FailureClassification
updated.LastErrorMessage = input.FailureMessage
updated.LastErrorAt = input.DeadLetteredAt
updated.UpdatedAt = input.DeadLetteredAt
updated.DeadLetteredAt = input.DeadLetteredAt
if updated.AttemptCount < updated.MaxAttempts {
return fmt.Errorf(
"complete route dead letter: final attempt count %d is below max attempts %d",
updated.AttemptCount,
updated.MaxAttempts,
)
}
return store.withTx(ctx, "complete route dead letter", func(ctx context.Context, tx *sql.Tx) error {
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
if err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
if rows == 0 {
return routestate.ErrConflict
}
if err := insertDeadLetter(ctx, tx, deadLetterRow{
NotificationID: updated.NotificationID,
RouteID: updated.RouteID,
Channel: string(updated.Channel),
RecipientRef: updated.RecipientRef,
FinalAttemptCount: updated.AttemptCount,
MaxAttempts: updated.MaxAttempts,
FailureClassification: input.FailureClassification,
FailureMessage: input.FailureMessage,
RecoveryHint: input.RecoveryHint,
CreatedAt: input.DeadLetteredAt,
}); err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
return nil
})
}