feat: use postgres

This commit is contained in:
Ilia Denisov
2026-04-26 20:34:39 +02:00
committed by GitHub
parent 48b0056b49
commit fe829285a6
365 changed files with 29223 additions and 24049 deletions
+29 -12
View File
@@ -8,11 +8,13 @@ import (
"strings"
"time"
"galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/logging"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/publishmail"
"galaxy/notification/internal/service/routestate"
"github.com/redis/go-redis/v9"
)
const (
@@ -24,7 +26,7 @@ const (
// by EmailPublisher.
type EmailRouteStateStore interface {
// ListDueRoutes loads due scheduled routes.
ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error)
ListDueRoutes(context.Context, time.Time, int64) ([]routestate.ScheduledRoute, error)
// TryAcquireRouteLease attempts to acquire one temporary route lease.
TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error)
@@ -39,13 +41,13 @@ type EmailRouteStateStore interface {
GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error)
// CompleteRoutePublished records one successful publication.
CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error
CompleteRoutePublished(context.Context, routestate.CompleteRoutePublishedInput) error
// CompleteRouteFailed records one retryable publication failure.
CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error
CompleteRouteFailed(context.Context, routestate.CompleteRouteFailedInput) error
// CompleteRouteDeadLetter records one exhausted publication failure.
CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error
CompleteRouteDeadLetter(context.Context, routestate.CompleteRouteDeadLetterInput) error
}
// EmailCommandEncoder encodes one email-capable notification route into a
@@ -90,6 +92,10 @@ type EmailPublisherConfig struct {
// Clock provides wall-clock timestamps.
Clock Clock
// StreamPublisher emits the outbound mail-delivery command before the
// route's PostgreSQL state transition is committed.
StreamPublisher StreamPublisher
}
// EmailPublisher publishes due email routes into the Mail Service command
@@ -105,6 +111,7 @@ type EmailPublisher struct {
encoder EmailCommandEncoder
telemetry RoutePublisherTelemetry
clock Clock
streamPublisher StreamPublisher
workerToken string
logger *slog.Logger
}
@@ -114,6 +121,8 @@ func NewEmailPublisher(cfg EmailPublisherConfig, logger *slog.Logger) (*EmailPub
switch {
case cfg.Store == nil:
return nil, errors.New("new email publisher: nil store")
case cfg.StreamPublisher == nil:
return nil, errors.New("new email publisher: nil stream publisher")
case strings.TrimSpace(cfg.MailDeliveryCommandsStream) == "":
return nil, errors.New("new email publisher: mail delivery-commands stream must not be empty")
case cfg.RouteLeaseTTL <= 0:
@@ -157,6 +166,7 @@ func NewEmailPublisher(cfg EmailPublisherConfig, logger *slog.Logger) (*EmailPub
encoder: cfg.Encoder,
telemetry: cfg.Telemetry,
clock: cfg.Clock,
streamPublisher: cfg.StreamPublisher,
workerToken: workerToken,
logger: logger.With("component", "email_publisher", "stream", cfg.MailDeliveryCommandsStream),
}, nil
@@ -237,7 +247,7 @@ func (publisher *EmailPublisher) publishDueRoutes(ctx context.Context) (bool, er
return progress, nil
}
func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) {
func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute routestate.ScheduledRoute) (bool, error) {
acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL)
if err != nil {
return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err)
@@ -283,7 +293,14 @@ func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationPayloadEncoding, err.Error())
}
err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{
if err := publisher.streamPublisher.XAdd(ctx, &redis.XAddArgs{
Stream: publisher.mailDeliveryCommandsStream,
Values: command.Values(),
}).Err(); err != nil {
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationMailStreamWrite, err.Error())
}
err = publisher.store.CompleteRoutePublished(ctx, routestate.CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
PublishedAt: publisher.now(),
@@ -312,7 +329,7 @@ func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
publisher.logger.Info("email route published", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationMailStreamWrite, err.Error())
@@ -349,7 +366,7 @@ func (publisher *EmailPublisher) recordFailure(
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
if attemptNumber >= route.MaxAttempts {
err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{
err := publisher.store.CompleteRouteDeadLetter(ctx, routestate.CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
DeadLetteredAt: failureAt,
@@ -362,7 +379,7 @@ func (publisher *EmailPublisher) recordFailure(
publisher.recordRouteDeadLetter(ctx, notification, route, classification)
publisher.logger.Warn("email route dead-lettered", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err)
@@ -370,7 +387,7 @@ func (publisher *EmailPublisher) recordFailure(
}
nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond)
err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{
err := publisher.store.CompleteRouteFailed(ctx, routestate.CompleteRouteFailedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
FailedAt: failureAt,
@@ -385,7 +402,7 @@ func (publisher *EmailPublisher) recordFailure(
logArgs = append(logArgs, "next_attempt_at", nextAttemptAt)
publisher.logger.Warn("email route failed and was rescheduled", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err)
@@ -1,232 +0,0 @@
package worker
import (
"context"
"testing"
"time"
redisstate "galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/service/acceptintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/require"
)
func TestEmailPublisherPublishesDueEmailRouteAndLeavesPushRoutePending(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
running := runEmailPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
pushRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusPending, pushRoute.Status)
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "1775121700000-0/email:user:user-1", messages[0].Values["delivery_id"])
require.Equal(t, "notification", messages[0].Values["source"])
require.Equal(t, "template", messages[0].Values["payload_mode"])
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", ""))
}
func TestEmailPublisherRetriesMailStreamPublicationFailures(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err())
running := runEmailPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1
}, time.Second, 10*time.Millisecond)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "retry", emailFailureClassificationMailStreamWrite))
require.True(t, fixture.telemetry.hasRouteRetry("email"))
require.NoError(t, fixture.client.Del(context.Background(), fixture.mailStream).Err())
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2
}, 2*time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", ""))
}
func TestEmailPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
otherPublisher, err := NewEmailPublisher(EmailPublisherConfig{
Store: fixture.store,
MailDeliveryCommandsStream: fixture.mailStream,
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Clock: newSteppingClock(fixture.now, time.Millisecond),
}, testWorkerLogger())
require.NoError(t, err)
first := runEmailPublisher(t, fixture.publisher)
defer first.stop(t)
second := runEmailPublisher(t, otherPublisher)
defer second.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
}
func TestEmailPublisherDeadLettersExhaustedRoute(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 6)))
require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err())
running := runEmailPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 7
}, time.Second, 10*time.Millisecond)
deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "email:user:user-1")).Bytes()
require.NoError(t, err)
deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload)
require.NoError(t, err)
require.Equal(t, emailFailureClassificationMailStreamWrite, deadLetter.FailureClassification)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "dead_letter", emailFailureClassificationMailStreamWrite))
require.True(t, fixture.telemetry.hasRouteDeadLetter("email", emailFailureClassificationMailStreamWrite))
}
type emailPublisherFixture struct {
client *redis.Client
store *redisstate.AcceptanceStore
publisher *EmailPublisher
mailStream string
now time.Time
clock *steppingClock
telemetry *recordingWorkerTelemetry
}
func newEmailPublisherFixture(t *testing.T) emailPublisherFixture {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{
Addr: server.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
require.NoError(t, client.Close())
})
store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
clock := newSteppingClock(now, time.Millisecond)
telemetry := &recordingWorkerTelemetry{}
publisher, err := NewEmailPublisher(EmailPublisherConfig{
Store: store,
MailDeliveryCommandsStream: "mail:delivery_commands",
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Telemetry: telemetry,
Clock: clock,
}, testWorkerLogger())
require.NoError(t, err)
return emailPublisherFixture{
client: client,
store: store,
publisher: publisher,
mailStream: "mail:delivery_commands",
now: now,
clock: clock,
telemetry: telemetry,
}
}
func validEmailAcceptanceInput(now time.Time, emailAttemptCount int) acceptintent.CreateAcceptanceInput {
input := validPushAcceptanceInput(now)
for index := range input.Routes {
if input.Routes[index].RouteID != "email:user:user-1" {
continue
}
input.Routes[index].AttemptCount = emailAttemptCount
input.Routes[index].MaxAttempts = 7
}
return input
}
type runningEmailPublisher struct {
cancel context.CancelFunc
resultCh chan error
}
func runEmailPublisher(t *testing.T, publisher *EmailPublisher) runningEmailPublisher {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- publisher.Run(ctx)
}()
return runningEmailPublisher{
cancel: cancel,
resultCh: resultCh,
}
}
func (r runningEmailPublisher) stop(t *testing.T) {
t.Helper()
r.cancel()
select {
case err := <-r.resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "email publisher did not stop")
}
}
@@ -1,422 +0,0 @@
package worker
import (
"context"
"errors"
"io"
"log/slog"
"testing"
"time"
redisstate "galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/config"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/malformedintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestIntentConsumerStartsFromZeroOffsetWhenNoStoredOffsetExists(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), messageID)
return err == nil && found
}, time.Second, 10*time.Millisecond)
}
func TestIntentConsumerContinuesFromSavedOffsetAfterRestart(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
require.NoError(t, fixture.offsetStore.Save(context.Background(), fixture.stream, firstID))
secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), secondID)
return err == nil && found
}, time.Second, 10*time.Millisecond)
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), firstID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerRecordsIdempotencyConflictsAndAdvancesOffset(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(secondID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == "idempotency_conflict"
}, time.Second, 10*time.Millisecond)
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, secondID, offset)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), firstID)
require.NoError(t, err)
require.True(t, found)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), secondID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerShutdownInterruptsBlockingRead(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{})
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- fixture.consumer.Run(ctx)
}()
time.Sleep(50 * time.Millisecond)
cancel()
select {
case err := <-resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "intent consumer did not stop after shutdown")
}
}
func TestIntentConsumerRecordsRecipientNotFoundAndAdvancesOffset(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{})
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == malformedintent.FailureCodeRecipientNotFound
}, time.Second, 10*time.Millisecond)
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, messageID, offset)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerRecordsMalformedIntentAndAdvancesOffset(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
messageID, err := fixture.client.XAdd(context.Background(), &redis.XAddArgs{
Stream: fixture.stream,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
},
}).Result()
require.NoError(t, err)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == malformedintent.FailureCodeInvalidPayload &&
entry.StreamEntryID == messageID
}, time.Second, 10*time.Millisecond)
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, messageID, offset)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerRecordsTelemetryForOutcomesAndMalformedIntents(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
conflictID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(conflictID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == malformedintent.FailureCodeIdempotencyConflict
}, time.Second, 10*time.Millisecond)
require.Eventually(t, func() bool {
return fixture.telemetry.hasIntentOutcome("accepted") &&
fixture.telemetry.hasIntentOutcome("duplicate") &&
fixture.telemetry.hasMalformedIntent("idempotency_conflict")
}, time.Second, 10*time.Millisecond)
}
func TestIntentConsumerStopsWithoutAdvancingOffsetWhenUserDirectoryIsUnavailable(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
err: errors.New("user service unavailable"),
})
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
resultCh := make(chan error, 1)
go func() {
resultCh <- fixture.consumer.Run(ctx)
}()
var runErr error
require.Eventually(t, func() bool {
select {
case runErr = <-resultCh:
return true
default:
return false
}
}, time.Second, 10*time.Millisecond)
require.Error(t, runErr)
require.ErrorContains(t, runErr, "user service unavailable")
_, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.False(t, found)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
require.NoError(t, err)
require.False(t, found)
}
type intentConsumerFixture struct {
client *redis.Client
stream string
acceptanceStore *redisstate.AcceptanceStore
offsetStore *redisstate.StreamOffsetStore
consumer *IntentConsumer
telemetry *recordingWorkerTelemetry
}
func newIntentConsumerFixture(t *testing.T, userDirectory acceptintent.UserDirectory) intentConsumerFixture {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{
Addr: server.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, client.Close())
})
acceptanceStore, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
malformedStore, err := redisstate.NewMalformedIntentStore(client, 72*time.Hour)
require.NoError(t, err)
offsetStore, err := redisstate.NewStreamOffsetStore(client)
require.NoError(t, err)
telemetry := &recordingWorkerTelemetry{}
service, err := acceptintent.New(acceptintent.Config{
Store: acceptanceStore,
UserDirectory: userDirectory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
Telemetry: telemetry,
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
AdminRouting: config.AdminRoutingConfig{},
})
require.NoError(t, err)
consumer, err := NewIntentConsumer(IntentConsumerConfig{
Client: client,
Stream: "notification:intents",
BlockTimeout: 25 * time.Millisecond,
Acceptor: service,
MalformedRecorder: malformedStore,
OffsetStore: offsetStore,
Telemetry: telemetry,
Clock: fixedClock{now: time.UnixMilli(1775121700001).UTC()},
}, slog.New(slog.NewTextHandler(io.Discard, nil)))
require.NoError(t, err)
return intentConsumerFixture{
client: client,
stream: "notification:intents",
acceptanceStore: acceptanceStore,
offsetStore: offsetStore,
consumer: consumer,
telemetry: telemetry,
}
}
func addValidIntent(t *testing.T, client *redis.Client, stream string, payloadJSON string) string {
t.Helper()
messageID, err := client.XAdd(context.Background(), &redis.XAddArgs{
Stream: stream,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
"payload_json": payloadJSON,
},
}).Result()
require.NoError(t, err)
return messageID
}
type runningIntentConsumer struct {
cancel context.CancelFunc
resultCh chan error
}
func runIntentConsumer(t *testing.T, consumer *IntentConsumer) runningIntentConsumer {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- consumer.Run(ctx)
}()
time.Sleep(50 * time.Millisecond)
return runningIntentConsumer{
cancel: cancel,
resultCh: resultCh,
}
}
func (r runningIntentConsumer) stop(t *testing.T) {
t.Helper()
r.cancel()
select {
case err := <-r.resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "intent consumer did not stop")
}
}
type fixedClock struct {
now time.Time
}
func (clock fixedClock) Now() time.Time {
return clock.now
}
type stubUserDirectory struct {
records map[string]acceptintent.UserRecord
err error
}
func (directory stubUserDirectory) GetUserByID(_ context.Context, userID string) (acceptintent.UserRecord, error) {
if directory.err != nil {
return acceptintent.UserRecord{}, directory.err
}
if record, ok := directory.records[userID]; ok {
return record, nil
}
return acceptintent.UserRecord{}, acceptintent.ErrRecipientNotFound
}
+34 -12
View File
@@ -10,11 +10,13 @@ import (
"strings"
"time"
"galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/logging"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/publishpush"
"galaxy/notification/internal/service/routestate"
"github.com/redis/go-redis/v9"
)
const (
@@ -29,7 +31,7 @@ const (
// PushPublisher.
type PushRouteStateStore interface {
// ListDueRoutes loads due scheduled routes.
ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error)
ListDueRoutes(context.Context, time.Time, int64) ([]routestate.ScheduledRoute, error)
// TryAcquireRouteLease attempts to acquire one temporary route lease.
TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error)
@@ -44,13 +46,13 @@ type PushRouteStateStore interface {
GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error)
// CompleteRoutePublished records one successful publication.
CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error
CompleteRoutePublished(context.Context, routestate.CompleteRoutePublishedInput) error
// CompleteRouteFailed records one retryable publication failure.
CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error
CompleteRouteFailed(context.Context, routestate.CompleteRouteFailedInput) error
// CompleteRouteDeadLetter records one exhausted publication failure.
CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error
CompleteRouteDeadLetter(context.Context, routestate.CompleteRouteDeadLetterInput) error
}
// PushEventEncoder encodes one push-capable notification route into a
@@ -109,6 +111,10 @@ type PushPublisherConfig struct {
// Clock provides wall-clock timestamps.
Clock Clock
// StreamPublisher emits the outbound Gateway client-event before the
// route's PostgreSQL state transition is committed.
StreamPublisher StreamPublisher
}
// PushPublisher publishes due push routes into the Gateway client-events
@@ -125,6 +131,7 @@ type PushPublisher struct {
encoder PushEventEncoder
telemetry RoutePublisherTelemetry
clock Clock
streamPublisher StreamPublisher
workerToken string
logger *slog.Logger
}
@@ -134,6 +141,8 @@ func NewPushPublisher(cfg PushPublisherConfig, logger *slog.Logger) (*PushPublis
switch {
case cfg.Store == nil:
return nil, errors.New("new push publisher: nil store")
case cfg.StreamPublisher == nil:
return nil, errors.New("new push publisher: nil stream publisher")
case strings.TrimSpace(cfg.GatewayStream) == "":
return nil, errors.New("new push publisher: gateway stream must not be empty")
case cfg.GatewayStreamMaxLen <= 0:
@@ -180,6 +189,7 @@ func NewPushPublisher(cfg PushPublisherConfig, logger *slog.Logger) (*PushPublis
encoder: cfg.Encoder,
telemetry: cfg.Telemetry,
clock: cfg.Clock,
streamPublisher: cfg.StreamPublisher,
workerToken: workerToken,
logger: logger.With("component", "push_publisher", "stream", cfg.GatewayStream),
}, nil
@@ -260,7 +270,7 @@ func (publisher *PushPublisher) publishDueRoutes(ctx context.Context) (bool, err
return progress, nil
}
func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) {
func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute routestate.ScheduledRoute) (bool, error) {
acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL)
if err != nil {
return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err)
@@ -306,7 +316,19 @@ func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time,
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationPayloadEncoding, err.Error())
}
err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{
xaddArgs := &redis.XAddArgs{
Stream: publisher.gatewayStream,
Values: eventValues(event),
}
if publisher.gatewayStreamMaxLen > 0 {
xaddArgs.MaxLen = publisher.gatewayStreamMaxLen
xaddArgs.Approx = true
}
if err := publisher.streamPublisher.XAdd(ctx, xaddArgs).Err(); err != nil {
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationGatewayStreamWrite, err.Error())
}
err = publisher.store.CompleteRoutePublished(ctx, routestate.CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
PublishedAt: publisher.now(),
@@ -335,7 +357,7 @@ func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time,
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
publisher.logger.Info("push route published", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationGatewayStreamWrite, err.Error())
@@ -371,7 +393,7 @@ func (publisher *PushPublisher) recordFailure(
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
if attemptNumber >= route.MaxAttempts {
err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{
err := publisher.store.CompleteRouteDeadLetter(ctx, routestate.CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
DeadLetteredAt: failureAt,
@@ -384,7 +406,7 @@ func (publisher *PushPublisher) recordFailure(
publisher.recordRouteDeadLetter(ctx, notification, route, classification)
publisher.logger.Warn("push route dead-lettered", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err)
@@ -392,7 +414,7 @@ func (publisher *PushPublisher) recordFailure(
}
nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond)
err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{
err := publisher.store.CompleteRouteFailed(ctx, routestate.CompleteRouteFailedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
FailedAt: failureAt,
@@ -407,7 +429,7 @@ func (publisher *PushPublisher) recordFailure(
logArgs = append(logArgs, "next_attempt_at", nextAttemptAt)
publisher.logger.Warn("push route failed and was rescheduled", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err)
@@ -1,318 +0,0 @@
package worker
import (
"context"
"io"
"log/slog"
"sync"
"testing"
"time"
redisstate "galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestPushPublisherPublishesDuePushRouteAndLeavesEmailRoutePending(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
running := runPushPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
emailRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusPending, emailRoute.Status)
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "user-1", messages[0].Values["user_id"])
require.Equal(t, "game.turn.ready", messages[0].Values["event_type"])
require.Equal(t, "1775121700000-0/push:user:user-1", messages[0].Values["event_id"])
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", ""))
}
func TestPushPublisherRetriesGatewayStreamPublicationFailures(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err())
running := runPushPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1
}, time.Second, 10*time.Millisecond)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "retry", pushFailureClassificationGatewayStreamWrite))
require.True(t, fixture.telemetry.hasRouteRetry("push"))
require.NoError(t, fixture.client.Del(context.Background(), fixture.gatewayStream).Err())
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2
}, 2*time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", ""))
}
func TestPushPublisherDeadLettersExhaustedRoute(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
input := validPushAcceptanceInput(fixture.now)
for index := range input.Routes {
if input.Routes[index].RouteID == "push:user:user-1" {
input.Routes[index].AttemptCount = 2
input.Routes[index].MaxAttempts = 3
}
}
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), input))
require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err())
running := runPushPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 3
}, time.Second, 10*time.Millisecond)
deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "push:user:user-1")).Bytes()
require.NoError(t, err)
deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload)
require.NoError(t, err)
require.Equal(t, pushFailureClassificationGatewayStreamWrite, deadLetter.FailureClassification)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "dead_letter", pushFailureClassificationGatewayStreamWrite))
require.True(t, fixture.telemetry.hasRouteDeadLetter("push", pushFailureClassificationGatewayStreamWrite))
}
func TestPushPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
otherPublisher, err := NewPushPublisher(PushPublisherConfig{
Store: fixture.store,
GatewayStream: fixture.gatewayStream,
GatewayStreamMaxLen: 1024,
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Clock: newSteppingClock(fixture.now, time.Millisecond),
}, testWorkerLogger())
require.NoError(t, err)
first := runPushPublisher(t, fixture.publisher)
defer first.stop(t)
second := runPushPublisher(t, otherPublisher)
defer second.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
}
type pushPublisherFixture struct {
client *redis.Client
store *redisstate.AcceptanceStore
publisher *PushPublisher
gatewayStream string
now time.Time
clock *steppingClock
telemetry *recordingWorkerTelemetry
}
func newPushPublisherFixture(t *testing.T) pushPublisherFixture {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{
Addr: server.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, client.Close())
})
store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
clock := newSteppingClock(now, time.Millisecond)
telemetry := &recordingWorkerTelemetry{}
publisher, err := NewPushPublisher(PushPublisherConfig{
Store: store,
GatewayStream: "gateway:client-events",
GatewayStreamMaxLen: 1024,
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Telemetry: telemetry,
Clock: clock,
}, testWorkerLogger())
require.NoError(t, err)
return pushPublisherFixture{
client: client,
store: store,
publisher: publisher,
gatewayStream: "gateway:client-events",
now: now,
clock: clock,
telemetry: telemetry,
}
}
func validPushAcceptanceInput(now time.Time) acceptintent.CreateAcceptanceInput {
return acceptintent.CreateAcceptanceInput{
Notification: acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
IdempotencyKey: "game-123:turn-54",
RequestFingerprint: "sha256:deadbeef",
RequestID: "request-1",
TraceID: "trace-1",
OccurredAt: now,
AcceptedAt: now,
UpdatedAt: now,
},
Routes: []acceptintent.NotificationRoute{
{
NotificationID: "1775121700000-0",
RouteID: "push:user:user-1",
Channel: intentstream.ChannelPush,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 3,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
{
NotificationID: "1775121700000-0",
RouteID: "email:user:user-1",
Channel: intentstream.ChannelEmail,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
},
Idempotency: acceptintent.IdempotencyRecord{
Producer: intentstream.ProducerGameMaster,
IdempotencyKey: "game-123:turn-54",
NotificationID: "1775121700000-0",
RequestFingerprint: "sha256:deadbeef",
CreatedAt: now,
ExpiresAt: now.Add(7 * 24 * time.Hour),
},
}
}
type runningPushPublisher struct {
cancel context.CancelFunc
resultCh chan error
}
func runPushPublisher(t *testing.T, publisher *PushPublisher) runningPushPublisher {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- publisher.Run(ctx)
}()
return runningPushPublisher{
cancel: cancel,
resultCh: resultCh,
}
}
func (r runningPushPublisher) stop(t *testing.T) {
t.Helper()
r.cancel()
select {
case err := <-r.resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "push publisher did not stop")
}
}
type steppingClock struct {
mu sync.Mutex
current time.Time
step time.Duration
}
func newSteppingClock(start time.Time, step time.Duration) *steppingClock {
return &steppingClock{
current: start.UTC().Truncate(time.Millisecond),
step: step,
}
}
func (clock *steppingClock) Now() time.Time {
clock.mu.Lock()
defer clock.mu.Unlock()
now := clock.current
clock.current = clock.current.Add(clock.step).UTC().Truncate(time.Millisecond)
return now
}
func testWorkerLogger() *slog.Logger {
return slog.New(slog.NewTextHandler(io.Discard, nil))
}
@@ -0,0 +1,161 @@
package worker
import (
"context"
"errors"
"fmt"
"log/slog"
"time"
)
// SQLRetentionStore performs the durable DELETE statements applied by the
// retention worker. Implementations are typically the umbrella PostgreSQL
// notification store; the interface keeps the worker decoupled from the
// store package.
type SQLRetentionStore interface {
// DeleteRecordsOlderThan removes records rows whose accepted_at predates
// cutoff. Cascading FKs drop routes and dead_letters owned by the deleted
// rows.
DeleteRecordsOlderThan(ctx context.Context, cutoff time.Time) (int64, error)
// DeleteMalformedIntentsOlderThan removes malformed-intent rows whose
// recorded_at predates cutoff.
DeleteMalformedIntentsOlderThan(ctx context.Context, cutoff time.Time) (int64, error)
}
// SQLRetentionConfig stores the dependencies and policy used by
// SQLRetentionWorker.
type SQLRetentionConfig struct {
// Store applies the durable DELETE statements.
Store SQLRetentionStore
// RecordRetention bounds how long records (and their cascaded routes and
// dead_letters) survive after acceptance.
RecordRetention time.Duration
// MalformedIntentRetention bounds how long malformed-intent rows survive
// after recorded_at.
MalformedIntentRetention time.Duration
// CleanupInterval stores the wall-clock period between two retention
// passes.
CleanupInterval time.Duration
// Clock provides the wall-clock used to compute cutoff timestamps.
Clock Clock
}
// SQLRetentionWorker periodically deletes records and malformed-intent rows
// whose retention window has expired. The worker replaces the per-key
// Redis EXPIRE eviction that maintained TTLs on the previous Redis-backed
// notification keyspace.
type SQLRetentionWorker struct {
store SQLRetentionStore
recordRetention time.Duration
malformedIntentRetention time.Duration
cleanupInterval time.Duration
clock Clock
logger *slog.Logger
}
// NewSQLRetentionWorker constructs the periodic retention worker.
func NewSQLRetentionWorker(cfg SQLRetentionConfig, logger *slog.Logger) (*SQLRetentionWorker, error) {
switch {
case cfg.Store == nil:
return nil, errors.New("new sql retention worker: nil store")
case cfg.RecordRetention <= 0:
return nil, errors.New("new sql retention worker: non-positive record retention")
case cfg.MalformedIntentRetention <= 0:
return nil, errors.New("new sql retention worker: non-positive malformed intent retention")
case cfg.CleanupInterval <= 0:
return nil, errors.New("new sql retention worker: non-positive cleanup interval")
case cfg.Clock == nil:
return nil, errors.New("new sql retention worker: nil clock")
}
if logger == nil {
logger = slog.Default()
}
return &SQLRetentionWorker{
store: cfg.Store,
recordRetention: cfg.RecordRetention,
malformedIntentRetention: cfg.MalformedIntentRetention,
cleanupInterval: cfg.CleanupInterval,
clock: cfg.Clock,
logger: logger.With("component", "sql_retention_worker"),
}, nil
}
// Run starts the retention loop and blocks until ctx is canceled.
func (worker *SQLRetentionWorker) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run sql retention worker: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
if worker == nil {
return errors.New("run sql retention worker: nil worker")
}
worker.logger.Info("sql retention worker started",
"record_retention", worker.recordRetention.String(),
"malformed_intent_retention", worker.malformedIntentRetention.String(),
"cleanup_interval", worker.cleanupInterval.String(),
)
defer worker.logger.Info("sql retention worker stopped")
// First pass runs immediately so a freshly started service does not wait
// one full interval before evicting stale rows.
worker.runOnce(ctx)
ticker := time.NewTicker(worker.cleanupInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
worker.runOnce(ctx)
}
}
}
// Shutdown stops the retention worker within ctx.
func (worker *SQLRetentionWorker) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown sql retention worker: nil context")
}
return nil
}
func (worker *SQLRetentionWorker) runOnce(ctx context.Context) {
now := worker.clock.Now().UTC()
recordCutoff := now.Add(-worker.recordRetention)
if deleted, err := worker.store.DeleteRecordsOlderThan(ctx, recordCutoff); err != nil {
worker.logger.Warn("delete expired records failed",
"cutoff", recordCutoff,
"error", fmt.Sprintf("%v", err),
)
} else if deleted > 0 {
worker.logger.Info("expired records deleted",
"cutoff", recordCutoff,
"deleted", deleted,
)
}
malformedCutoff := now.Add(-worker.malformedIntentRetention)
if deleted, err := worker.store.DeleteMalformedIntentsOlderThan(ctx, malformedCutoff); err != nil {
worker.logger.Warn("delete expired malformed intents failed",
"cutoff", malformedCutoff,
"error", fmt.Sprintf("%v", err),
)
} else if deleted > 0 {
worker.logger.Info("expired malformed intents deleted",
"cutoff", malformedCutoff,
"deleted", deleted,
)
}
}
@@ -0,0 +1,18 @@
package worker
import (
"context"
"github.com/redis/go-redis/v9"
)
// StreamPublisher abstracts the subset of the Redis Streams API used by the
// route publishers to emit one outbound stream entry. The default
// implementation in production wiring is `*redis.Client`. Tests substitute
// an in-memory fake.
type StreamPublisher interface {
// XAdd appends one entry to the configured stream. Implementations must
// honour `args.MaxLen` plus `args.Approx == true` for approximate trimming
// when the caller sets them.
XAdd(ctx context.Context, args *redis.XAddArgs) *redis.StringCmd
}