feat: use postgres
This commit is contained in:
@@ -8,11 +8,13 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/adapters/redisstate"
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/logging"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/notification/internal/service/publishmail"
|
||||
"galaxy/notification/internal/service/routestate"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -24,7 +26,7 @@ const (
|
||||
// by EmailPublisher.
|
||||
type EmailRouteStateStore interface {
|
||||
// ListDueRoutes loads due scheduled routes.
|
||||
ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error)
|
||||
ListDueRoutes(context.Context, time.Time, int64) ([]routestate.ScheduledRoute, error)
|
||||
|
||||
// TryAcquireRouteLease attempts to acquire one temporary route lease.
|
||||
TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error)
|
||||
@@ -39,13 +41,13 @@ type EmailRouteStateStore interface {
|
||||
GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error)
|
||||
|
||||
// CompleteRoutePublished records one successful publication.
|
||||
CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error
|
||||
CompleteRoutePublished(context.Context, routestate.CompleteRoutePublishedInput) error
|
||||
|
||||
// CompleteRouteFailed records one retryable publication failure.
|
||||
CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error
|
||||
CompleteRouteFailed(context.Context, routestate.CompleteRouteFailedInput) error
|
||||
|
||||
// CompleteRouteDeadLetter records one exhausted publication failure.
|
||||
CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error
|
||||
CompleteRouteDeadLetter(context.Context, routestate.CompleteRouteDeadLetterInput) error
|
||||
}
|
||||
|
||||
// EmailCommandEncoder encodes one email-capable notification route into a
|
||||
@@ -90,6 +92,10 @@ type EmailPublisherConfig struct {
|
||||
|
||||
// Clock provides wall-clock timestamps.
|
||||
Clock Clock
|
||||
|
||||
// StreamPublisher emits the outbound mail-delivery command before the
|
||||
// route's PostgreSQL state transition is committed.
|
||||
StreamPublisher StreamPublisher
|
||||
}
|
||||
|
||||
// EmailPublisher publishes due email routes into the Mail Service command
|
||||
@@ -105,6 +111,7 @@ type EmailPublisher struct {
|
||||
encoder EmailCommandEncoder
|
||||
telemetry RoutePublisherTelemetry
|
||||
clock Clock
|
||||
streamPublisher StreamPublisher
|
||||
workerToken string
|
||||
logger *slog.Logger
|
||||
}
|
||||
@@ -114,6 +121,8 @@ func NewEmailPublisher(cfg EmailPublisherConfig, logger *slog.Logger) (*EmailPub
|
||||
switch {
|
||||
case cfg.Store == nil:
|
||||
return nil, errors.New("new email publisher: nil store")
|
||||
case cfg.StreamPublisher == nil:
|
||||
return nil, errors.New("new email publisher: nil stream publisher")
|
||||
case strings.TrimSpace(cfg.MailDeliveryCommandsStream) == "":
|
||||
return nil, errors.New("new email publisher: mail delivery-commands stream must not be empty")
|
||||
case cfg.RouteLeaseTTL <= 0:
|
||||
@@ -157,6 +166,7 @@ func NewEmailPublisher(cfg EmailPublisherConfig, logger *slog.Logger) (*EmailPub
|
||||
encoder: cfg.Encoder,
|
||||
telemetry: cfg.Telemetry,
|
||||
clock: cfg.Clock,
|
||||
streamPublisher: cfg.StreamPublisher,
|
||||
workerToken: workerToken,
|
||||
logger: logger.With("component", "email_publisher", "stream", cfg.MailDeliveryCommandsStream),
|
||||
}, nil
|
||||
@@ -237,7 +247,7 @@ func (publisher *EmailPublisher) publishDueRoutes(ctx context.Context) (bool, er
|
||||
return progress, nil
|
||||
}
|
||||
|
||||
func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) {
|
||||
func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute routestate.ScheduledRoute) (bool, error) {
|
||||
acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err)
|
||||
@@ -283,7 +293,14 @@ func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time
|
||||
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationPayloadEncoding, err.Error())
|
||||
}
|
||||
|
||||
err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{
|
||||
if err := publisher.streamPublisher.XAdd(ctx, &redis.XAddArgs{
|
||||
Stream: publisher.mailDeliveryCommandsStream,
|
||||
Values: command.Values(),
|
||||
}).Err(); err != nil {
|
||||
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationMailStreamWrite, err.Error())
|
||||
}
|
||||
|
||||
err = publisher.store.CompleteRoutePublished(ctx, routestate.CompleteRoutePublishedInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
PublishedAt: publisher.now(),
|
||||
@@ -312,7 +329,7 @@ func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
publisher.logger.Info("email route published", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
case errors.Is(err, routestate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationMailStreamWrite, err.Error())
|
||||
@@ -349,7 +366,7 @@ func (publisher *EmailPublisher) recordFailure(
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
|
||||
if attemptNumber >= route.MaxAttempts {
|
||||
err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{
|
||||
err := publisher.store.CompleteRouteDeadLetter(ctx, routestate.CompleteRouteDeadLetterInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
DeadLetteredAt: failureAt,
|
||||
@@ -362,7 +379,7 @@ func (publisher *EmailPublisher) recordFailure(
|
||||
publisher.recordRouteDeadLetter(ctx, notification, route, classification)
|
||||
publisher.logger.Warn("email route dead-lettered", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
case errors.Is(err, routestate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err)
|
||||
@@ -370,7 +387,7 @@ func (publisher *EmailPublisher) recordFailure(
|
||||
}
|
||||
|
||||
nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond)
|
||||
err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{
|
||||
err := publisher.store.CompleteRouteFailed(ctx, routestate.CompleteRouteFailedInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
FailedAt: failureAt,
|
||||
@@ -385,7 +402,7 @@ func (publisher *EmailPublisher) recordFailure(
|
||||
logArgs = append(logArgs, "next_attempt_at", nextAttemptAt)
|
||||
publisher.logger.Warn("email route failed and was rescheduled", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
case errors.Is(err, routestate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err)
|
||||
|
||||
@@ -1,232 +0,0 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
redisstate "galaxy/notification/internal/adapters/redisstate"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestEmailPublisherPublishesDueEmailRouteAndLeavesPushRoutePending(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newEmailPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
|
||||
|
||||
running := runEmailPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
pushRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, acceptintent.RouteStatusPending, pushRoute.Status)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.Equal(t, "1775121700000-0/email:user:user-1", messages[0].Values["delivery_id"])
|
||||
require.Equal(t, "notification", messages[0].Values["source"])
|
||||
require.Equal(t, "template", messages[0].Values["payload_mode"])
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", ""))
|
||||
}
|
||||
|
||||
func TestEmailPublisherRetriesMailStreamPublicationFailures(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newEmailPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
|
||||
require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err())
|
||||
|
||||
running := runEmailPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "retry", emailFailureClassificationMailStreamWrite))
|
||||
require.True(t, fixture.telemetry.hasRouteRetry("email"))
|
||||
|
||||
require.NoError(t, fixture.client.Del(context.Background(), fixture.mailStream).Err())
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2
|
||||
}, 2*time.Second, 10*time.Millisecond)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", ""))
|
||||
}
|
||||
|
||||
func TestEmailPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newEmailPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
|
||||
|
||||
otherPublisher, err := NewEmailPublisher(EmailPublisherConfig{
|
||||
Store: fixture.store,
|
||||
MailDeliveryCommandsStream: fixture.mailStream,
|
||||
RouteLeaseTTL: 200 * time.Millisecond,
|
||||
RouteBackoffMin: 20 * time.Millisecond,
|
||||
RouteBackoffMax: 20 * time.Millisecond,
|
||||
PollInterval: 10 * time.Millisecond,
|
||||
BatchSize: 16,
|
||||
Clock: newSteppingClock(fixture.now, time.Millisecond),
|
||||
}, testWorkerLogger())
|
||||
require.NoError(t, err)
|
||||
|
||||
first := runEmailPublisher(t, fixture.publisher)
|
||||
defer first.stop(t)
|
||||
second := runEmailPublisher(t, otherPublisher)
|
||||
defer second.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
}
|
||||
|
||||
func TestEmailPublisherDeadLettersExhaustedRoute(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newEmailPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 6)))
|
||||
require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err())
|
||||
|
||||
running := runEmailPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 7
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "email:user:user-1")).Bytes()
|
||||
require.NoError(t, err)
|
||||
deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, emailFailureClassificationMailStreamWrite, deadLetter.FailureClassification)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "dead_letter", emailFailureClassificationMailStreamWrite))
|
||||
require.True(t, fixture.telemetry.hasRouteDeadLetter("email", emailFailureClassificationMailStreamWrite))
|
||||
}
|
||||
|
||||
type emailPublisherFixture struct {
|
||||
client *redis.Client
|
||||
store *redisstate.AcceptanceStore
|
||||
publisher *EmailPublisher
|
||||
mailStream string
|
||||
now time.Time
|
||||
clock *steppingClock
|
||||
telemetry *recordingWorkerTelemetry
|
||||
}
|
||||
|
||||
func newEmailPublisherFixture(t *testing.T) emailPublisherFixture {
|
||||
t.Helper()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := redis.NewClient(&redis.Options{
|
||||
Addr: server.Addr(),
|
||||
Protocol: 2,
|
||||
DisableIdentity: true,
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
require.NoError(t, client.Close())
|
||||
})
|
||||
|
||||
store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
clock := newSteppingClock(now, time.Millisecond)
|
||||
telemetry := &recordingWorkerTelemetry{}
|
||||
publisher, err := NewEmailPublisher(EmailPublisherConfig{
|
||||
Store: store,
|
||||
MailDeliveryCommandsStream: "mail:delivery_commands",
|
||||
RouteLeaseTTL: 200 * time.Millisecond,
|
||||
RouteBackoffMin: 20 * time.Millisecond,
|
||||
RouteBackoffMax: 20 * time.Millisecond,
|
||||
PollInterval: 10 * time.Millisecond,
|
||||
BatchSize: 16,
|
||||
Telemetry: telemetry,
|
||||
Clock: clock,
|
||||
}, testWorkerLogger())
|
||||
require.NoError(t, err)
|
||||
|
||||
return emailPublisherFixture{
|
||||
client: client,
|
||||
store: store,
|
||||
publisher: publisher,
|
||||
mailStream: "mail:delivery_commands",
|
||||
now: now,
|
||||
clock: clock,
|
||||
telemetry: telemetry,
|
||||
}
|
||||
}
|
||||
|
||||
func validEmailAcceptanceInput(now time.Time, emailAttemptCount int) acceptintent.CreateAcceptanceInput {
|
||||
input := validPushAcceptanceInput(now)
|
||||
for index := range input.Routes {
|
||||
if input.Routes[index].RouteID != "email:user:user-1" {
|
||||
continue
|
||||
}
|
||||
input.Routes[index].AttemptCount = emailAttemptCount
|
||||
input.Routes[index].MaxAttempts = 7
|
||||
}
|
||||
|
||||
return input
|
||||
}
|
||||
|
||||
type runningEmailPublisher struct {
|
||||
cancel context.CancelFunc
|
||||
resultCh chan error
|
||||
}
|
||||
|
||||
func runEmailPublisher(t *testing.T, publisher *EmailPublisher) runningEmailPublisher {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- publisher.Run(ctx)
|
||||
}()
|
||||
|
||||
return runningEmailPublisher{
|
||||
cancel: cancel,
|
||||
resultCh: resultCh,
|
||||
}
|
||||
}
|
||||
|
||||
func (r runningEmailPublisher) stop(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
r.cancel()
|
||||
|
||||
select {
|
||||
case err := <-r.resultCh:
|
||||
require.ErrorIs(t, err, context.Canceled)
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "email publisher did not stop")
|
||||
}
|
||||
}
|
||||
@@ -1,422 +0,0 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"log/slog"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
redisstate "galaxy/notification/internal/adapters/redisstate"
|
||||
"galaxy/notification/internal/config"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/notification/internal/service/malformedintent"
|
||||
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestIntentConsumerStartsFromZeroOffsetWhenNoStoredOffsetExists(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
records: map[string]acceptintent.UserRecord{
|
||||
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
|
||||
},
|
||||
})
|
||||
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), messageID)
|
||||
return err == nil && found
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
}
|
||||
|
||||
func TestIntentConsumerContinuesFromSavedOffsetAfterRestart(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
records: map[string]acceptintent.UserRecord{
|
||||
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
|
||||
},
|
||||
})
|
||||
firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
require.NoError(t, fixture.offsetStore.Save(context.Background(), fixture.stream, firstID))
|
||||
secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), secondID)
|
||||
return err == nil && found
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), firstID)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
}
|
||||
|
||||
func TestIntentConsumerRecordsIdempotencyConflictsAndAdvancesOffset(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
records: map[string]acceptintent.UserRecord{
|
||||
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
|
||||
},
|
||||
})
|
||||
firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(secondID)).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
entry, err := redisstate.UnmarshalMalformedIntent(payload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return entry.FailureCode == "idempotency_conflict"
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, secondID, offset)
|
||||
|
||||
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), firstID)
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
|
||||
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), secondID)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
}
|
||||
|
||||
func TestIntentConsumerShutdownInterruptsBlockingRead(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- fixture.consumer.Run(ctx)
|
||||
}()
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
cancel()
|
||||
|
||||
select {
|
||||
case err := <-resultCh:
|
||||
require.ErrorIs(t, err, context.Canceled)
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "intent consumer did not stop after shutdown")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntentConsumerRecordsRecipientNotFoundAndAdvancesOffset(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{})
|
||||
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
entry, err := redisstate.UnmarshalMalformedIntent(payload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return entry.FailureCode == malformedintent.FailureCodeRecipientNotFound
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, messageID, offset)
|
||||
|
||||
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
}
|
||||
|
||||
func TestIntentConsumerRecordsMalformedIntentAndAdvancesOffset(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
records: map[string]acceptintent.UserRecord{
|
||||
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
|
||||
},
|
||||
})
|
||||
messageID, err := fixture.client.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: fixture.stream,
|
||||
Values: map[string]any{
|
||||
"notification_type": "game.turn.ready",
|
||||
"producer": "game_master",
|
||||
"audience_kind": "user",
|
||||
"recipient_user_ids_json": `["user-1"]`,
|
||||
"idempotency_key": "game-123:turn-ready",
|
||||
"occurred_at_ms": "1775121700000",
|
||||
},
|
||||
}).Result()
|
||||
require.NoError(t, err)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
entry, err := redisstate.UnmarshalMalformedIntent(payload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return entry.FailureCode == malformedintent.FailureCodeInvalidPayload &&
|
||||
entry.StreamEntryID == messageID
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, messageID, offset)
|
||||
|
||||
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
}
|
||||
|
||||
func TestIntentConsumerRecordsTelemetryForOutcomesAndMalformedIntents(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
records: map[string]acceptintent.UserRecord{
|
||||
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
|
||||
},
|
||||
})
|
||||
addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
conflictID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(conflictID)).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
entry, err := redisstate.UnmarshalMalformedIntent(payload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return entry.FailureCode == malformedintent.FailureCodeIdempotencyConflict
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
return fixture.telemetry.hasIntentOutcome("accepted") &&
|
||||
fixture.telemetry.hasIntentOutcome("duplicate") &&
|
||||
fixture.telemetry.hasMalformedIntent("idempotency_conflict")
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
}
|
||||
|
||||
func TestIntentConsumerStopsWithoutAdvancingOffsetWhenUserDirectoryIsUnavailable(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
err: errors.New("user service unavailable"),
|
||||
})
|
||||
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- fixture.consumer.Run(ctx)
|
||||
}()
|
||||
|
||||
var runErr error
|
||||
require.Eventually(t, func() bool {
|
||||
select {
|
||||
case runErr = <-resultCh:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
require.Error(t, runErr)
|
||||
require.ErrorContains(t, runErr, "user service unavailable")
|
||||
|
||||
_, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
|
||||
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
}
|
||||
|
||||
type intentConsumerFixture struct {
|
||||
client *redis.Client
|
||||
stream string
|
||||
acceptanceStore *redisstate.AcceptanceStore
|
||||
offsetStore *redisstate.StreamOffsetStore
|
||||
consumer *IntentConsumer
|
||||
telemetry *recordingWorkerTelemetry
|
||||
}
|
||||
|
||||
func newIntentConsumerFixture(t *testing.T, userDirectory acceptintent.UserDirectory) intentConsumerFixture {
|
||||
t.Helper()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := redis.NewClient(&redis.Options{
|
||||
Addr: server.Addr(),
|
||||
Protocol: 2,
|
||||
DisableIdentity: true,
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
assert.NoError(t, client.Close())
|
||||
})
|
||||
|
||||
acceptanceStore, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
malformedStore, err := redisstate.NewMalformedIntentStore(client, 72*time.Hour)
|
||||
require.NoError(t, err)
|
||||
offsetStore, err := redisstate.NewStreamOffsetStore(client)
|
||||
require.NoError(t, err)
|
||||
telemetry := &recordingWorkerTelemetry{}
|
||||
service, err := acceptintent.New(acceptintent.Config{
|
||||
Store: acceptanceStore,
|
||||
UserDirectory: userDirectory,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
|
||||
Telemetry: telemetry,
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
AdminRouting: config.AdminRoutingConfig{},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
consumer, err := NewIntentConsumer(IntentConsumerConfig{
|
||||
Client: client,
|
||||
Stream: "notification:intents",
|
||||
BlockTimeout: 25 * time.Millisecond,
|
||||
Acceptor: service,
|
||||
MalformedRecorder: malformedStore,
|
||||
OffsetStore: offsetStore,
|
||||
Telemetry: telemetry,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700001).UTC()},
|
||||
}, slog.New(slog.NewTextHandler(io.Discard, nil)))
|
||||
require.NoError(t, err)
|
||||
|
||||
return intentConsumerFixture{
|
||||
client: client,
|
||||
stream: "notification:intents",
|
||||
acceptanceStore: acceptanceStore,
|
||||
offsetStore: offsetStore,
|
||||
consumer: consumer,
|
||||
telemetry: telemetry,
|
||||
}
|
||||
}
|
||||
|
||||
func addValidIntent(t *testing.T, client *redis.Client, stream string, payloadJSON string) string {
|
||||
t.Helper()
|
||||
|
||||
messageID, err := client.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: stream,
|
||||
Values: map[string]any{
|
||||
"notification_type": "game.turn.ready",
|
||||
"producer": "game_master",
|
||||
"audience_kind": "user",
|
||||
"recipient_user_ids_json": `["user-1"]`,
|
||||
"idempotency_key": "game-123:turn-ready",
|
||||
"occurred_at_ms": "1775121700000",
|
||||
"payload_json": payloadJSON,
|
||||
},
|
||||
}).Result()
|
||||
require.NoError(t, err)
|
||||
|
||||
return messageID
|
||||
}
|
||||
|
||||
type runningIntentConsumer struct {
|
||||
cancel context.CancelFunc
|
||||
resultCh chan error
|
||||
}
|
||||
|
||||
func runIntentConsumer(t *testing.T, consumer *IntentConsumer) runningIntentConsumer {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- consumer.Run(ctx)
|
||||
}()
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
return runningIntentConsumer{
|
||||
cancel: cancel,
|
||||
resultCh: resultCh,
|
||||
}
|
||||
}
|
||||
|
||||
func (r runningIntentConsumer) stop(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
r.cancel()
|
||||
|
||||
select {
|
||||
case err := <-r.resultCh:
|
||||
require.ErrorIs(t, err, context.Canceled)
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "intent consumer did not stop")
|
||||
}
|
||||
}
|
||||
|
||||
type fixedClock struct {
|
||||
now time.Time
|
||||
}
|
||||
|
||||
func (clock fixedClock) Now() time.Time {
|
||||
return clock.now
|
||||
}
|
||||
|
||||
type stubUserDirectory struct {
|
||||
records map[string]acceptintent.UserRecord
|
||||
err error
|
||||
}
|
||||
|
||||
func (directory stubUserDirectory) GetUserByID(_ context.Context, userID string) (acceptintent.UserRecord, error) {
|
||||
if directory.err != nil {
|
||||
return acceptintent.UserRecord{}, directory.err
|
||||
}
|
||||
if record, ok := directory.records[userID]; ok {
|
||||
return record, nil
|
||||
}
|
||||
|
||||
return acceptintent.UserRecord{}, acceptintent.ErrRecipientNotFound
|
||||
}
|
||||
@@ -10,11 +10,13 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/adapters/redisstate"
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/logging"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/notification/internal/service/publishpush"
|
||||
"galaxy/notification/internal/service/routestate"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -29,7 +31,7 @@ const (
|
||||
// PushPublisher.
|
||||
type PushRouteStateStore interface {
|
||||
// ListDueRoutes loads due scheduled routes.
|
||||
ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error)
|
||||
ListDueRoutes(context.Context, time.Time, int64) ([]routestate.ScheduledRoute, error)
|
||||
|
||||
// TryAcquireRouteLease attempts to acquire one temporary route lease.
|
||||
TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error)
|
||||
@@ -44,13 +46,13 @@ type PushRouteStateStore interface {
|
||||
GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error)
|
||||
|
||||
// CompleteRoutePublished records one successful publication.
|
||||
CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error
|
||||
CompleteRoutePublished(context.Context, routestate.CompleteRoutePublishedInput) error
|
||||
|
||||
// CompleteRouteFailed records one retryable publication failure.
|
||||
CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error
|
||||
CompleteRouteFailed(context.Context, routestate.CompleteRouteFailedInput) error
|
||||
|
||||
// CompleteRouteDeadLetter records one exhausted publication failure.
|
||||
CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error
|
||||
CompleteRouteDeadLetter(context.Context, routestate.CompleteRouteDeadLetterInput) error
|
||||
}
|
||||
|
||||
// PushEventEncoder encodes one push-capable notification route into a
|
||||
@@ -109,6 +111,10 @@ type PushPublisherConfig struct {
|
||||
|
||||
// Clock provides wall-clock timestamps.
|
||||
Clock Clock
|
||||
|
||||
// StreamPublisher emits the outbound Gateway client-event before the
|
||||
// route's PostgreSQL state transition is committed.
|
||||
StreamPublisher StreamPublisher
|
||||
}
|
||||
|
||||
// PushPublisher publishes due push routes into the Gateway client-events
|
||||
@@ -125,6 +131,7 @@ type PushPublisher struct {
|
||||
encoder PushEventEncoder
|
||||
telemetry RoutePublisherTelemetry
|
||||
clock Clock
|
||||
streamPublisher StreamPublisher
|
||||
workerToken string
|
||||
logger *slog.Logger
|
||||
}
|
||||
@@ -134,6 +141,8 @@ func NewPushPublisher(cfg PushPublisherConfig, logger *slog.Logger) (*PushPublis
|
||||
switch {
|
||||
case cfg.Store == nil:
|
||||
return nil, errors.New("new push publisher: nil store")
|
||||
case cfg.StreamPublisher == nil:
|
||||
return nil, errors.New("new push publisher: nil stream publisher")
|
||||
case strings.TrimSpace(cfg.GatewayStream) == "":
|
||||
return nil, errors.New("new push publisher: gateway stream must not be empty")
|
||||
case cfg.GatewayStreamMaxLen <= 0:
|
||||
@@ -180,6 +189,7 @@ func NewPushPublisher(cfg PushPublisherConfig, logger *slog.Logger) (*PushPublis
|
||||
encoder: cfg.Encoder,
|
||||
telemetry: cfg.Telemetry,
|
||||
clock: cfg.Clock,
|
||||
streamPublisher: cfg.StreamPublisher,
|
||||
workerToken: workerToken,
|
||||
logger: logger.With("component", "push_publisher", "stream", cfg.GatewayStream),
|
||||
}, nil
|
||||
@@ -260,7 +270,7 @@ func (publisher *PushPublisher) publishDueRoutes(ctx context.Context) (bool, err
|
||||
return progress, nil
|
||||
}
|
||||
|
||||
func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) {
|
||||
func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute routestate.ScheduledRoute) (bool, error) {
|
||||
acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err)
|
||||
@@ -306,7 +316,19 @@ func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time,
|
||||
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationPayloadEncoding, err.Error())
|
||||
}
|
||||
|
||||
err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{
|
||||
xaddArgs := &redis.XAddArgs{
|
||||
Stream: publisher.gatewayStream,
|
||||
Values: eventValues(event),
|
||||
}
|
||||
if publisher.gatewayStreamMaxLen > 0 {
|
||||
xaddArgs.MaxLen = publisher.gatewayStreamMaxLen
|
||||
xaddArgs.Approx = true
|
||||
}
|
||||
if err := publisher.streamPublisher.XAdd(ctx, xaddArgs).Err(); err != nil {
|
||||
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationGatewayStreamWrite, err.Error())
|
||||
}
|
||||
|
||||
err = publisher.store.CompleteRoutePublished(ctx, routestate.CompleteRoutePublishedInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
PublishedAt: publisher.now(),
|
||||
@@ -335,7 +357,7 @@ func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time,
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
publisher.logger.Info("push route published", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
case errors.Is(err, routestate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationGatewayStreamWrite, err.Error())
|
||||
@@ -371,7 +393,7 @@ func (publisher *PushPublisher) recordFailure(
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
|
||||
if attemptNumber >= route.MaxAttempts {
|
||||
err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{
|
||||
err := publisher.store.CompleteRouteDeadLetter(ctx, routestate.CompleteRouteDeadLetterInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
DeadLetteredAt: failureAt,
|
||||
@@ -384,7 +406,7 @@ func (publisher *PushPublisher) recordFailure(
|
||||
publisher.recordRouteDeadLetter(ctx, notification, route, classification)
|
||||
publisher.logger.Warn("push route dead-lettered", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
case errors.Is(err, routestate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err)
|
||||
@@ -392,7 +414,7 @@ func (publisher *PushPublisher) recordFailure(
|
||||
}
|
||||
|
||||
nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond)
|
||||
err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{
|
||||
err := publisher.store.CompleteRouteFailed(ctx, routestate.CompleteRouteFailedInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
FailedAt: failureAt,
|
||||
@@ -407,7 +429,7 @@ func (publisher *PushPublisher) recordFailure(
|
||||
logArgs = append(logArgs, "next_attempt_at", nextAttemptAt)
|
||||
publisher.logger.Warn("push route failed and was rescheduled", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
case errors.Is(err, routestate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err)
|
||||
|
||||
@@ -1,318 +0,0 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
redisstate "galaxy/notification/internal/adapters/redisstate"
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestPushPublisherPublishesDuePushRouteAndLeavesEmailRoutePending(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newPushPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
|
||||
|
||||
running := runPushPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
emailRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, acceptintent.RouteStatusPending, emailRoute.Status)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.Equal(t, "user-1", messages[0].Values["user_id"])
|
||||
require.Equal(t, "game.turn.ready", messages[0].Values["event_type"])
|
||||
require.Equal(t, "1775121700000-0/push:user:user-1", messages[0].Values["event_id"])
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", ""))
|
||||
}
|
||||
|
||||
func TestPushPublisherRetriesGatewayStreamPublicationFailures(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newPushPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
|
||||
require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err())
|
||||
|
||||
running := runPushPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "retry", pushFailureClassificationGatewayStreamWrite))
|
||||
require.True(t, fixture.telemetry.hasRouteRetry("push"))
|
||||
|
||||
require.NoError(t, fixture.client.Del(context.Background(), fixture.gatewayStream).Err())
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2
|
||||
}, 2*time.Second, 10*time.Millisecond)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", ""))
|
||||
}
|
||||
|
||||
func TestPushPublisherDeadLettersExhaustedRoute(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newPushPublisherFixture(t)
|
||||
input := validPushAcceptanceInput(fixture.now)
|
||||
for index := range input.Routes {
|
||||
if input.Routes[index].RouteID == "push:user:user-1" {
|
||||
input.Routes[index].AttemptCount = 2
|
||||
input.Routes[index].MaxAttempts = 3
|
||||
}
|
||||
}
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), input))
|
||||
require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err())
|
||||
|
||||
running := runPushPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 3
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "push:user:user-1")).Bytes()
|
||||
require.NoError(t, err)
|
||||
deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, pushFailureClassificationGatewayStreamWrite, deadLetter.FailureClassification)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "dead_letter", pushFailureClassificationGatewayStreamWrite))
|
||||
require.True(t, fixture.telemetry.hasRouteDeadLetter("push", pushFailureClassificationGatewayStreamWrite))
|
||||
}
|
||||
|
||||
func TestPushPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newPushPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
|
||||
|
||||
otherPublisher, err := NewPushPublisher(PushPublisherConfig{
|
||||
Store: fixture.store,
|
||||
GatewayStream: fixture.gatewayStream,
|
||||
GatewayStreamMaxLen: 1024,
|
||||
RouteLeaseTTL: 200 * time.Millisecond,
|
||||
RouteBackoffMin: 20 * time.Millisecond,
|
||||
RouteBackoffMax: 20 * time.Millisecond,
|
||||
PollInterval: 10 * time.Millisecond,
|
||||
BatchSize: 16,
|
||||
Clock: newSteppingClock(fixture.now, time.Millisecond),
|
||||
}, testWorkerLogger())
|
||||
require.NoError(t, err)
|
||||
|
||||
first := runPushPublisher(t, fixture.publisher)
|
||||
defer first.stop(t)
|
||||
second := runPushPublisher(t, otherPublisher)
|
||||
defer second.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
}
|
||||
|
||||
type pushPublisherFixture struct {
|
||||
client *redis.Client
|
||||
store *redisstate.AcceptanceStore
|
||||
publisher *PushPublisher
|
||||
gatewayStream string
|
||||
now time.Time
|
||||
clock *steppingClock
|
||||
telemetry *recordingWorkerTelemetry
|
||||
}
|
||||
|
||||
func newPushPublisherFixture(t *testing.T) pushPublisherFixture {
|
||||
t.Helper()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := redis.NewClient(&redis.Options{
|
||||
Addr: server.Addr(),
|
||||
Protocol: 2,
|
||||
DisableIdentity: true,
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
assert.NoError(t, client.Close())
|
||||
})
|
||||
|
||||
store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
clock := newSteppingClock(now, time.Millisecond)
|
||||
telemetry := &recordingWorkerTelemetry{}
|
||||
publisher, err := NewPushPublisher(PushPublisherConfig{
|
||||
Store: store,
|
||||
GatewayStream: "gateway:client-events",
|
||||
GatewayStreamMaxLen: 1024,
|
||||
RouteLeaseTTL: 200 * time.Millisecond,
|
||||
RouteBackoffMin: 20 * time.Millisecond,
|
||||
RouteBackoffMax: 20 * time.Millisecond,
|
||||
PollInterval: 10 * time.Millisecond,
|
||||
BatchSize: 16,
|
||||
Telemetry: telemetry,
|
||||
Clock: clock,
|
||||
}, testWorkerLogger())
|
||||
require.NoError(t, err)
|
||||
|
||||
return pushPublisherFixture{
|
||||
client: client,
|
||||
store: store,
|
||||
publisher: publisher,
|
||||
gatewayStream: "gateway:client-events",
|
||||
now: now,
|
||||
clock: clock,
|
||||
telemetry: telemetry,
|
||||
}
|
||||
}
|
||||
|
||||
func validPushAcceptanceInput(now time.Time) acceptintent.CreateAcceptanceInput {
|
||||
return acceptintent.CreateAcceptanceInput{
|
||||
Notification: acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700000-0",
|
||||
NotificationType: intentstream.NotificationTypeGameTurnReady,
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: []string{"user-1"},
|
||||
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
IdempotencyKey: "game-123:turn-54",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
RequestID: "request-1",
|
||||
TraceID: "trace-1",
|
||||
OccurredAt: now,
|
||||
AcceptedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
Routes: []acceptintent.NotificationRoute{
|
||||
{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "push:user:user-1",
|
||||
Channel: intentstream.ChannelPush,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
AttemptCount: 0,
|
||||
MaxAttempts: 3,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "pilot@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "email:user:user-1",
|
||||
Channel: intentstream.ChannelEmail,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
AttemptCount: 0,
|
||||
MaxAttempts: 7,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "pilot@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
},
|
||||
Idempotency: acceptintent.IdempotencyRecord{
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
IdempotencyKey: "game-123:turn-54",
|
||||
NotificationID: "1775121700000-0",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
CreatedAt: now,
|
||||
ExpiresAt: now.Add(7 * 24 * time.Hour),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
type runningPushPublisher struct {
|
||||
cancel context.CancelFunc
|
||||
resultCh chan error
|
||||
}
|
||||
|
||||
func runPushPublisher(t *testing.T, publisher *PushPublisher) runningPushPublisher {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- publisher.Run(ctx)
|
||||
}()
|
||||
|
||||
return runningPushPublisher{
|
||||
cancel: cancel,
|
||||
resultCh: resultCh,
|
||||
}
|
||||
}
|
||||
|
||||
func (r runningPushPublisher) stop(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
r.cancel()
|
||||
|
||||
select {
|
||||
case err := <-r.resultCh:
|
||||
require.ErrorIs(t, err, context.Canceled)
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "push publisher did not stop")
|
||||
}
|
||||
}
|
||||
|
||||
type steppingClock struct {
|
||||
mu sync.Mutex
|
||||
current time.Time
|
||||
step time.Duration
|
||||
}
|
||||
|
||||
func newSteppingClock(start time.Time, step time.Duration) *steppingClock {
|
||||
return &steppingClock{
|
||||
current: start.UTC().Truncate(time.Millisecond),
|
||||
step: step,
|
||||
}
|
||||
}
|
||||
|
||||
func (clock *steppingClock) Now() time.Time {
|
||||
clock.mu.Lock()
|
||||
defer clock.mu.Unlock()
|
||||
|
||||
now := clock.current
|
||||
clock.current = clock.current.Add(clock.step).UTC().Truncate(time.Millisecond)
|
||||
|
||||
return now
|
||||
}
|
||||
|
||||
func testWorkerLogger() *slog.Logger {
|
||||
return slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
}
|
||||
@@ -0,0 +1,161 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"time"
|
||||
)
|
||||
|
||||
// SQLRetentionStore performs the durable DELETE statements applied by the
|
||||
// retention worker. Implementations are typically the umbrella PostgreSQL
|
||||
// notification store; the interface keeps the worker decoupled from the
|
||||
// store package.
|
||||
type SQLRetentionStore interface {
|
||||
// DeleteRecordsOlderThan removes records rows whose accepted_at predates
|
||||
// cutoff. Cascading FKs drop routes and dead_letters owned by the deleted
|
||||
// rows.
|
||||
DeleteRecordsOlderThan(ctx context.Context, cutoff time.Time) (int64, error)
|
||||
|
||||
// DeleteMalformedIntentsOlderThan removes malformed-intent rows whose
|
||||
// recorded_at predates cutoff.
|
||||
DeleteMalformedIntentsOlderThan(ctx context.Context, cutoff time.Time) (int64, error)
|
||||
}
|
||||
|
||||
// SQLRetentionConfig stores the dependencies and policy used by
|
||||
// SQLRetentionWorker.
|
||||
type SQLRetentionConfig struct {
|
||||
// Store applies the durable DELETE statements.
|
||||
Store SQLRetentionStore
|
||||
|
||||
// RecordRetention bounds how long records (and their cascaded routes and
|
||||
// dead_letters) survive after acceptance.
|
||||
RecordRetention time.Duration
|
||||
|
||||
// MalformedIntentRetention bounds how long malformed-intent rows survive
|
||||
// after recorded_at.
|
||||
MalformedIntentRetention time.Duration
|
||||
|
||||
// CleanupInterval stores the wall-clock period between two retention
|
||||
// passes.
|
||||
CleanupInterval time.Duration
|
||||
|
||||
// Clock provides the wall-clock used to compute cutoff timestamps.
|
||||
Clock Clock
|
||||
}
|
||||
|
||||
// SQLRetentionWorker periodically deletes records and malformed-intent rows
|
||||
// whose retention window has expired. The worker replaces the per-key
|
||||
// Redis EXPIRE eviction that maintained TTLs on the previous Redis-backed
|
||||
// notification keyspace.
|
||||
type SQLRetentionWorker struct {
|
||||
store SQLRetentionStore
|
||||
recordRetention time.Duration
|
||||
malformedIntentRetention time.Duration
|
||||
cleanupInterval time.Duration
|
||||
clock Clock
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
// NewSQLRetentionWorker constructs the periodic retention worker.
|
||||
func NewSQLRetentionWorker(cfg SQLRetentionConfig, logger *slog.Logger) (*SQLRetentionWorker, error) {
|
||||
switch {
|
||||
case cfg.Store == nil:
|
||||
return nil, errors.New("new sql retention worker: nil store")
|
||||
case cfg.RecordRetention <= 0:
|
||||
return nil, errors.New("new sql retention worker: non-positive record retention")
|
||||
case cfg.MalformedIntentRetention <= 0:
|
||||
return nil, errors.New("new sql retention worker: non-positive malformed intent retention")
|
||||
case cfg.CleanupInterval <= 0:
|
||||
return nil, errors.New("new sql retention worker: non-positive cleanup interval")
|
||||
case cfg.Clock == nil:
|
||||
return nil, errors.New("new sql retention worker: nil clock")
|
||||
}
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
|
||||
return &SQLRetentionWorker{
|
||||
store: cfg.Store,
|
||||
recordRetention: cfg.RecordRetention,
|
||||
malformedIntentRetention: cfg.MalformedIntentRetention,
|
||||
cleanupInterval: cfg.CleanupInterval,
|
||||
clock: cfg.Clock,
|
||||
logger: logger.With("component", "sql_retention_worker"),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Run starts the retention loop and blocks until ctx is canceled.
|
||||
func (worker *SQLRetentionWorker) Run(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("run sql retention worker: nil context")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if worker == nil {
|
||||
return errors.New("run sql retention worker: nil worker")
|
||||
}
|
||||
|
||||
worker.logger.Info("sql retention worker started",
|
||||
"record_retention", worker.recordRetention.String(),
|
||||
"malformed_intent_retention", worker.malformedIntentRetention.String(),
|
||||
"cleanup_interval", worker.cleanupInterval.String(),
|
||||
)
|
||||
defer worker.logger.Info("sql retention worker stopped")
|
||||
|
||||
// First pass runs immediately so a freshly started service does not wait
|
||||
// one full interval before evicting stale rows.
|
||||
worker.runOnce(ctx)
|
||||
|
||||
ticker := time.NewTicker(worker.cleanupInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-ticker.C:
|
||||
worker.runOnce(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown stops the retention worker within ctx.
|
||||
func (worker *SQLRetentionWorker) Shutdown(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("shutdown sql retention worker: nil context")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (worker *SQLRetentionWorker) runOnce(ctx context.Context) {
|
||||
now := worker.clock.Now().UTC()
|
||||
|
||||
recordCutoff := now.Add(-worker.recordRetention)
|
||||
if deleted, err := worker.store.DeleteRecordsOlderThan(ctx, recordCutoff); err != nil {
|
||||
worker.logger.Warn("delete expired records failed",
|
||||
"cutoff", recordCutoff,
|
||||
"error", fmt.Sprintf("%v", err),
|
||||
)
|
||||
} else if deleted > 0 {
|
||||
worker.logger.Info("expired records deleted",
|
||||
"cutoff", recordCutoff,
|
||||
"deleted", deleted,
|
||||
)
|
||||
}
|
||||
|
||||
malformedCutoff := now.Add(-worker.malformedIntentRetention)
|
||||
if deleted, err := worker.store.DeleteMalformedIntentsOlderThan(ctx, malformedCutoff); err != nil {
|
||||
worker.logger.Warn("delete expired malformed intents failed",
|
||||
"cutoff", malformedCutoff,
|
||||
"error", fmt.Sprintf("%v", err),
|
||||
)
|
||||
} else if deleted > 0 {
|
||||
worker.logger.Info("expired malformed intents deleted",
|
||||
"cutoff", malformedCutoff,
|
||||
"deleted", deleted,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// StreamPublisher abstracts the subset of the Redis Streams API used by the
|
||||
// route publishers to emit one outbound stream entry. The default
|
||||
// implementation in production wiring is `*redis.Client`. Tests substitute
|
||||
// an in-memory fake.
|
||||
type StreamPublisher interface {
|
||||
// XAdd appends one entry to the configured stream. Implementations must
|
||||
// honour `args.MaxLen` plus `args.Approx == true` for approximate trimming
|
||||
// when the caller sets them.
|
||||
XAdd(ctx context.Context, args *redis.XAddArgs) *redis.StringCmd
|
||||
}
|
||||
Reference in New Issue
Block a user