galaxy-game/notification/internal/adapters/postgres/notificationstore/scheduler.go

package notificationstore

import (
	"context"
	"database/sql"
	"errors"
	"fmt"
	"time"

	pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
	"galaxy/notification/internal/service/acceptintent"
	"galaxy/notification/internal/service/routestate"
	"galaxy/notification/internal/telemetry"

	pg "github.com/go-jet/jet/v2/postgres"
)

// scheduledRouteKey synthesises a stable, human-readable key for one
// ScheduledRoute. Notification publishers do not interpret the key beyond
// requiring it to be non-empty (`ScheduledRoute.Validate`).
func scheduledRouteKey(notificationID string, routeID string) string {
	return notificationID + "/" + routeID
}

// ListDueRoutes returns up to limit routes whose `next_attempt_at` is at or
// before now. The query is non-locking; per-row contention is resolved by
// the lease (Redis) plus the optimistic-concurrency check inside `Complete*`.
func (store *Store) ListDueRoutes(ctx context.Context, now time.Time, limit int64) ([]routestate.ScheduledRoute, error) {
	if store == nil {
		return nil, errors.New("list due routes: nil store")
	}
	if ctx == nil {
		return nil, errors.New("list due routes: nil context")
	}
	if err := routestate.ValidateUTCMillisecondTimestamp("list due routes now", now); err != nil {
		return nil, err
	}
	if limit <= 0 {
		return nil, errors.New("list due routes: limit must be positive")
	}

	operationCtx, cancel, err := store.operationContext(ctx, "list due routes")
	if err != nil {
		return nil, err
	}
	defer cancel()

	stmt := pg.SELECT(pgtable.Routes.NotificationID, pgtable.Routes.RouteID).
		FROM(pgtable.Routes).
		WHERE(pg.AND(
			pgtable.Routes.NextAttemptAt.IS_NOT_NULL(),
			pgtable.Routes.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())),
		)).
		ORDER_BY(pgtable.Routes.NextAttemptAt.ASC()).
		LIMIT(limit)

	query, args := stmt.Sql()
	rows, err := store.db.QueryContext(operationCtx, query, args...)
	if err != nil {
		return nil, fmt.Errorf("list due routes: %w", err)
	}
	defer rows.Close()

	out := make([]routestate.ScheduledRoute, 0, limit)
	for rows.Next() {
		var (
			notificationID string
			routeID        string
		)
		if err := rows.Scan(&notificationID, &routeID); err != nil {
			return nil, fmt.Errorf("list due routes: scan: %w", err)
		}
		out = append(out, routestate.ScheduledRoute{
			RouteKey:       scheduledRouteKey(notificationID, routeID),
			NotificationID: notificationID,
			RouteID:        routeID,
		})
	}
	if err := rows.Err(); err != nil {
		return nil, fmt.Errorf("list due routes: %w", err)
	}
	return out, nil
}

// ReadRouteScheduleSnapshot returns the current depth of the route schedule
// (rows with non-NULL `next_attempt_at`) together with the oldest scheduled
// timestamp when one exists. The runtime exposes this through the telemetry
// snapshot reader.
func (store *Store) ReadRouteScheduleSnapshot(ctx context.Context) (telemetry.RouteScheduleSnapshot, error) {
	if store == nil {
		return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil store")
	}
	if ctx == nil {
		return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil context")
	}

	operationCtx, cancel, err := store.operationContext(ctx, "read route schedule snapshot")
	if err != nil {
		return telemetry.RouteScheduleSnapshot{}, err
	}
	defer cancel()

	stmt := pg.SELECT(
		pg.COUNT(pg.STAR),
		pg.MIN(pgtable.Routes.NextAttemptAt),
	).
		FROM(pgtable.Routes).
		WHERE(pgtable.Routes.NextAttemptAt.IS_NOT_NULL())

	query, args := stmt.Sql()
	row := store.db.QueryRowContext(operationCtx, query, args...)
	var (
		depth   int64
		oldest  sql.NullTime
		summary telemetry.RouteScheduleSnapshot
	)
	if err := row.Scan(&depth, &oldest); err != nil {
		return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: %w", err)
	}
	summary.Depth = depth
	if oldest.Valid {
		oldestUTC := oldest.Time.UTC()
		summary.OldestScheduledFor = &oldestUTC
	}
	return summary, nil
}

// CompleteRoutePublished marks the expected route as `published`,
// increments attempt_count, and clears retry/error fields. Optimistic
// concurrency on `updated_at` rejects races that happened since the
// publisher loaded the row; a mismatch surfaces as `routestate.ErrConflict`.
//
// Note: the outbound stream emission (XADD) happens in the publisher
// before this call. The store deliberately ignores the input.Stream and
// input.StreamValues fields — they are kept on the input only so the
// publisher can pass one struct around through its state machine.
func (store *Store) CompleteRoutePublished(ctx context.Context, input routestate.CompleteRoutePublishedInput) error {
	if store == nil {
		return errors.New("complete route published: nil store")
	}
	if ctx == nil {
		return errors.New("complete route published: nil context")
	}
	if err := input.Validate(); err != nil {
		return fmt.Errorf("complete route published: %w", err)
	}

	updated := input.ExpectedRoute
	updated.Status = acceptintent.RouteStatusPublished
	updated.AttemptCount++
	updated.NextAttemptAt = time.Time{}
	updated.LastErrorClassification = ""
	updated.LastErrorMessage = ""
	updated.LastErrorAt = time.Time{}
	updated.UpdatedAt = input.PublishedAt
	updated.PublishedAt = input.PublishedAt
	updated.DeadLetteredAt = time.Time{}

	return store.withTx(ctx, "complete route published", func(ctx context.Context, tx *sql.Tx) error {
		rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
		if err != nil {
			return fmt.Errorf("complete route published: %w", err)
		}
		if rows == 0 {
			return routestate.ErrConflict
		}
		return nil
	})
}

// CompleteRouteFailed records one retryable publication failure: increments
// attempt_count, populates the last-error fields, and reschedules the row
// at `NextAttemptAt`.
func (store *Store) CompleteRouteFailed(ctx context.Context, input routestate.CompleteRouteFailedInput) error {
	if store == nil {
		return errors.New("complete route failed: nil store")
	}
	if ctx == nil {
		return errors.New("complete route failed: nil context")
	}
	if err := input.Validate(); err != nil {
		return fmt.Errorf("complete route failed: %w", err)
	}

	updated := input.ExpectedRoute
	updated.Status = acceptintent.RouteStatusFailed
	updated.AttemptCount++
	updated.NextAttemptAt = input.NextAttemptAt
	updated.LastErrorClassification = input.FailureClassification
	updated.LastErrorMessage = input.FailureMessage
	updated.LastErrorAt = input.FailedAt
	updated.UpdatedAt = input.FailedAt

	return store.withTx(ctx, "complete route failed", func(ctx context.Context, tx *sql.Tx) error {
		rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
		if err != nil {
			return fmt.Errorf("complete route failed: %w", err)
		}
		if rows == 0 {
			return routestate.ErrConflict
		}
		return nil
	})
}

// CompleteRouteDeadLetter records one terminal publication failure:
// marks the route `dead_letter`, clears the schedule, and inserts the
// dead-letter audit row.
func (store *Store) CompleteRouteDeadLetter(ctx context.Context, input routestate.CompleteRouteDeadLetterInput) error {
	if store == nil {
		return errors.New("complete route dead letter: nil store")
	}
	if ctx == nil {
		return errors.New("complete route dead letter: nil context")
	}
	if err := input.Validate(); err != nil {
		return fmt.Errorf("complete route dead letter: %w", err)
	}

	updated := input.ExpectedRoute
	updated.Status = acceptintent.RouteStatusDeadLetter
	updated.AttemptCount++
	updated.NextAttemptAt = time.Time{}
	updated.LastErrorClassification = input.FailureClassification
	updated.LastErrorMessage = input.FailureMessage
	updated.LastErrorAt = input.DeadLetteredAt
	updated.UpdatedAt = input.DeadLetteredAt
	updated.DeadLetteredAt = input.DeadLetteredAt

	if updated.AttemptCount < updated.MaxAttempts {
		return fmt.Errorf(
			"complete route dead letter: final attempt count %d is below max attempts %d",
			updated.AttemptCount,
			updated.MaxAttempts,
		)
	}

	return store.withTx(ctx, "complete route dead letter", func(ctx context.Context, tx *sql.Tx) error {
		rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
		if err != nil {
			return fmt.Errorf("complete route dead letter: %w", err)
		}
		if rows == 0 {
			return routestate.ErrConflict
		}
		if err := insertDeadLetter(ctx, tx, deadLetterRow{
			NotificationID:        updated.NotificationID,
			RouteID:               updated.RouteID,
			Channel:               string(updated.Channel),
			RecipientRef:          updated.RecipientRef,
			FinalAttemptCount:     updated.AttemptCount,
			MaxAttempts:           updated.MaxAttempts,
			FailureClassification: input.FailureClassification,
			FailureMessage:        input.FailureMessage,
			RecoveryHint:          input.RecoveryHint,
			CreatedAt:             input.DeadLetteredAt,
		}); err != nil {
			return fmt.Errorf("complete route dead letter: %w", err)
		}
		return nil
	})
}