feat: notification service
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
// Package adapters reserves the adapter namespace of Notification Service.
|
||||
package adapters
|
||||
@@ -0,0 +1,72 @@
|
||||
// Package redisadapter provides the Redis client helpers used by Notification
|
||||
// Service runtime wiring.
|
||||
package redisadapter
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"galaxy/notification/internal/config"
|
||||
"galaxy/notification/internal/telemetry"
|
||||
|
||||
"github.com/redis/go-redis/extra/redisotel/v9"
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// NewClient constructs one Redis client from cfg.
|
||||
func NewClient(cfg config.RedisConfig) *redis.Client {
|
||||
return redis.NewClient(&redis.Options{
|
||||
Addr: cfg.Addr,
|
||||
Username: cfg.Username,
|
||||
Password: cfg.Password,
|
||||
DB: cfg.DB,
|
||||
TLSConfig: cfg.TLSConfig(),
|
||||
DialTimeout: cfg.OperationTimeout,
|
||||
ReadTimeout: cfg.OperationTimeout,
|
||||
WriteTimeout: cfg.OperationTimeout,
|
||||
})
|
||||
}
|
||||
|
||||
// InstrumentClient attaches Redis tracing and metrics exporters to client when
|
||||
// telemetryRuntime is available.
|
||||
func InstrumentClient(client *redis.Client, telemetryRuntime *telemetry.Runtime) error {
|
||||
if client == nil {
|
||||
return fmt.Errorf("instrument redis client: nil client")
|
||||
}
|
||||
if telemetryRuntime == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := redisotel.InstrumentTracing(
|
||||
client,
|
||||
redisotel.WithTracerProvider(telemetryRuntime.TracerProvider()),
|
||||
redisotel.WithDBStatement(false),
|
||||
); err != nil {
|
||||
return fmt.Errorf("instrument redis client tracing: %w", err)
|
||||
}
|
||||
if err := redisotel.InstrumentMetrics(
|
||||
client,
|
||||
redisotel.WithMeterProvider(telemetryRuntime.MeterProvider()),
|
||||
); err != nil {
|
||||
return fmt.Errorf("instrument redis client metrics: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Ping performs the startup Redis connectivity check bounded by
|
||||
// cfg.OperationTimeout.
|
||||
func Ping(ctx context.Context, cfg config.RedisConfig, client *redis.Client) error {
|
||||
if client == nil {
|
||||
return fmt.Errorf("ping redis: nil client")
|
||||
}
|
||||
|
||||
pingCtx, cancel := context.WithTimeout(ctx, cfg.OperationTimeout)
|
||||
defer cancel()
|
||||
|
||||
if err := client.Ping(pingCtx).Err(); err != nil {
|
||||
return fmt.Errorf("ping redis: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,140 @@
|
||||
package redisstate
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// AcceptanceStore provides the Redis-backed durable storage used by the
|
||||
// intent-acceptance use case.
|
||||
type AcceptanceStore struct {
|
||||
client *redis.Client
|
||||
writer *AtomicWriter
|
||||
keys Keyspace
|
||||
cfg AcceptanceConfig
|
||||
}
|
||||
|
||||
// NewAcceptanceStore constructs one Redis-backed acceptance store.
|
||||
func NewAcceptanceStore(client *redis.Client, cfg AcceptanceConfig) (*AcceptanceStore, error) {
|
||||
if client == nil {
|
||||
return nil, errors.New("new notification acceptance store: nil redis client")
|
||||
}
|
||||
|
||||
writer, err := NewAtomicWriter(client, cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new notification acceptance store: %w", err)
|
||||
}
|
||||
|
||||
return &AcceptanceStore{
|
||||
client: client,
|
||||
writer: writer,
|
||||
keys: Keyspace{},
|
||||
cfg: cfg,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// CreateAcceptance stores one complete accepted notification write set in
|
||||
// Redis.
|
||||
func (store *AcceptanceStore) CreateAcceptance(ctx context.Context, input acceptintent.CreateAcceptanceInput) error {
|
||||
if store == nil || store.client == nil || store.writer == nil {
|
||||
return errors.New("create notification acceptance: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return errors.New("create notification acceptance: nil context")
|
||||
}
|
||||
if err := input.Validate(); err != nil {
|
||||
return fmt.Errorf("create notification acceptance: %w", err)
|
||||
}
|
||||
|
||||
err := store.writer.CreateAcceptance(ctx, input)
|
||||
if errors.Is(err, ErrConflict) {
|
||||
return fmt.Errorf("create notification acceptance: %w", acceptintent.ErrConflict)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("create notification acceptance: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetIdempotency loads one accepted idempotency scope from Redis.
|
||||
func (store *AcceptanceStore) GetIdempotency(ctx context.Context, producer intentstream.Producer, idempotencyKey string) (acceptintent.IdempotencyRecord, bool, error) {
|
||||
if store == nil || store.client == nil {
|
||||
return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil context")
|
||||
}
|
||||
|
||||
payload, err := store.client.Get(ctx, store.keys.Idempotency(producer, idempotencyKey)).Bytes()
|
||||
switch {
|
||||
case errors.Is(err, redis.Nil):
|
||||
return acceptintent.IdempotencyRecord{}, false, nil
|
||||
case err != nil:
|
||||
return acceptintent.IdempotencyRecord{}, false, fmt.Errorf("get notification idempotency: %w", err)
|
||||
}
|
||||
|
||||
record, err := UnmarshalIdempotency(payload)
|
||||
if err != nil {
|
||||
return acceptintent.IdempotencyRecord{}, false, fmt.Errorf("get notification idempotency: %w", err)
|
||||
}
|
||||
|
||||
return record, true, nil
|
||||
}
|
||||
|
||||
// GetNotification loads one accepted notification record from Redis.
|
||||
func (store *AcceptanceStore) GetNotification(ctx context.Context, notificationID string) (acceptintent.NotificationRecord, bool, error) {
|
||||
if store == nil || store.client == nil {
|
||||
return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil context")
|
||||
}
|
||||
|
||||
payload, err := store.client.Get(ctx, store.keys.Notification(notificationID)).Bytes()
|
||||
switch {
|
||||
case errors.Is(err, redis.Nil):
|
||||
return acceptintent.NotificationRecord{}, false, nil
|
||||
case err != nil:
|
||||
return acceptintent.NotificationRecord{}, false, fmt.Errorf("get notification record: %w", err)
|
||||
}
|
||||
|
||||
record, err := UnmarshalNotification(payload)
|
||||
if err != nil {
|
||||
return acceptintent.NotificationRecord{}, false, fmt.Errorf("get notification record: %w", err)
|
||||
}
|
||||
|
||||
return record, true, nil
|
||||
}
|
||||
|
||||
// GetRoute loads one accepted notification route by NotificationID and
|
||||
// RouteID.
|
||||
func (store *AcceptanceStore) GetRoute(ctx context.Context, notificationID string, routeID string) (acceptintent.NotificationRoute, bool, error) {
|
||||
if store == nil || store.client == nil {
|
||||
return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil context")
|
||||
}
|
||||
|
||||
payload, err := store.client.Get(ctx, store.keys.Route(notificationID, routeID)).Bytes()
|
||||
switch {
|
||||
case errors.Is(err, redis.Nil):
|
||||
return acceptintent.NotificationRoute{}, false, nil
|
||||
case err != nil:
|
||||
return acceptintent.NotificationRoute{}, false, fmt.Errorf("get notification route: %w", err)
|
||||
}
|
||||
|
||||
record, err := UnmarshalRoute(payload)
|
||||
if err != nil {
|
||||
return acceptintent.NotificationRoute{}, false, fmt.Errorf("get notification route: %w", err)
|
||||
}
|
||||
|
||||
return record, true, nil
|
||||
}
|
||||
@@ -0,0 +1,311 @@
|
||||
package redisstate
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"log/slog"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/config"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/notification/internal/service/malformedintent"
|
||||
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestAcceptanceStoreCreateAcceptancePersistsNotificationRoutesAndSchedule(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewAcceptanceStore(client, AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
input := validAdminAcceptanceInput(now)
|
||||
|
||||
require.NoError(t, store.CreateAcceptance(context.Background(), input))
|
||||
|
||||
notificationRecord, found, err := store.GetNotification(context.Background(), input.Notification.NotificationID)
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, input.Notification.NotificationID, notificationRecord.NotificationID)
|
||||
|
||||
idempotencyRecord, found, err := store.GetIdempotency(context.Background(), input.Idempotency.Producer, input.Idempotency.IdempotencyKey)
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, input.Idempotency.RequestFingerprint, idempotencyRecord.RequestFingerprint)
|
||||
|
||||
pushRoutePayload, err := client.Get(context.Background(), Keyspace{}.Route(input.Notification.NotificationID, "push:email:owner@example.com")).Bytes()
|
||||
require.NoError(t, err)
|
||||
pushRoute, err := UnmarshalRoute(pushRoutePayload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, acceptintent.RouteStatusSkipped, pushRoute.Status)
|
||||
|
||||
emailRouteKey := Keyspace{}.Route(input.Notification.NotificationID, "email:email:owner@example.com")
|
||||
emailRoutePayload, err := client.Get(context.Background(), emailRouteKey).Bytes()
|
||||
require.NoError(t, err)
|
||||
emailRoute, err := UnmarshalRoute(emailRoutePayload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, acceptintent.RouteStatusPending, emailRoute.Status)
|
||||
|
||||
scheduled, err := client.ZRangeWithScores(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, scheduled, 1)
|
||||
require.Equal(t, emailRouteKey, scheduled[0].Member)
|
||||
require.Equal(t, float64(now.UnixMilli()), scheduled[0].Score)
|
||||
|
||||
notificationTTL, err := client.PTTL(context.Background(), Keyspace{}.Notification(input.Notification.NotificationID)).Result()
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, notificationTTL, 23*time.Hour)
|
||||
require.LessOrEqual(t, notificationTTL, 24*time.Hour)
|
||||
|
||||
routeTTL, err := client.PTTL(context.Background(), emailRouteKey).Result()
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, routeTTL, 23*time.Hour)
|
||||
require.LessOrEqual(t, routeTTL, 24*time.Hour)
|
||||
|
||||
idempotencyTTL, err := client.PTTL(context.Background(), Keyspace{}.Idempotency(input.Idempotency.Producer, input.Idempotency.IdempotencyKey)).Result()
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, idempotencyTTL, 6*24*time.Hour)
|
||||
require.LessOrEqual(t, idempotencyTTL, 7*24*time.Hour)
|
||||
}
|
||||
|
||||
func TestMalformedIntentStoreRecordPersistsEntry(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewMalformedIntentStore(client, 72*time.Hour)
|
||||
require.NoError(t, err)
|
||||
|
||||
entry := malformedintent.Entry{
|
||||
StreamEntryID: "1775121700000-0",
|
||||
NotificationType: "game.turn.ready",
|
||||
Producer: "game_master",
|
||||
IdempotencyKey: "game-123:turn-54",
|
||||
FailureCode: malformedintent.FailureCodeInvalidPayload,
|
||||
FailureMessage: "payload_json.turn_number is required",
|
||||
RawFields: map[string]any{
|
||||
"notification_type": "game.turn.ready",
|
||||
},
|
||||
RecordedAt: time.UnixMilli(1775121700000).UTC(),
|
||||
}
|
||||
|
||||
require.NoError(t, store.Record(context.Background(), entry))
|
||||
|
||||
payload, err := client.Get(context.Background(), Keyspace{}.MalformedIntent(entry.StreamEntryID)).Bytes()
|
||||
require.NoError(t, err)
|
||||
recordedEntry, err := UnmarshalMalformedIntent(payload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, entry.StreamEntryID, recordedEntry.StreamEntryID)
|
||||
require.Equal(t, entry.FailureCode, recordedEntry.FailureCode)
|
||||
|
||||
ttl, err := client.PTTL(context.Background(), Keyspace{}.MalformedIntent(entry.StreamEntryID)).Result()
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, ttl, 71*time.Hour)
|
||||
require.LessOrEqual(t, ttl, 72*time.Hour)
|
||||
}
|
||||
|
||||
func TestStreamOffsetStoreLoadAndSave(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewStreamOffsetStore(client)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, found, err := store.Load(context.Background(), "notification:intents")
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
|
||||
require.NoError(t, store.Save(context.Background(), "notification:intents", "1775121700000-0"))
|
||||
|
||||
entryID, found, err := store.Load(context.Background(), "notification:intents")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, "1775121700000-0", entryID)
|
||||
}
|
||||
|
||||
func TestIntentStreamLagReaderReadsOldestUnprocessedEntry(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewStreamOffsetStore(client)
|
||||
require.NoError(t, err)
|
||||
reader, err := NewIntentStreamLagReader(store, "notification:intents")
|
||||
require.NoError(t, err)
|
||||
|
||||
firstID, err := client.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: "notification:intents",
|
||||
ID: "1775121700000-0",
|
||||
Values: map[string]any{"payload": "first"},
|
||||
}).Result()
|
||||
require.NoError(t, err)
|
||||
secondID, err := client.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: "notification:intents",
|
||||
ID: "1775121701000-0",
|
||||
Values: map[string]any{"payload": "second"},
|
||||
}).Result()
|
||||
require.NoError(t, err)
|
||||
|
||||
snapshot, err := reader.ReadIntentStreamLagSnapshot(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, snapshot.OldestUnprocessedAt)
|
||||
require.Equal(t, time.UnixMilli(1775121700000).UTC(), *snapshot.OldestUnprocessedAt)
|
||||
|
||||
require.NoError(t, store.Save(context.Background(), "notification:intents", firstID))
|
||||
snapshot, err = reader.ReadIntentStreamLagSnapshot(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, snapshot.OldestUnprocessedAt)
|
||||
require.Equal(t, time.UnixMilli(1775121701000).UTC(), *snapshot.OldestUnprocessedAt)
|
||||
|
||||
require.NoError(t, store.Save(context.Background(), "notification:intents", secondID))
|
||||
snapshot, err = reader.ReadIntentStreamLagSnapshot(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.Nil(t, snapshot.OldestUnprocessedAt)
|
||||
}
|
||||
|
||||
func TestAcceptanceStoreWorksWithAcceptIntentService(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewAcceptanceStore(client, AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
service, err := acceptintent.New(acceptintent.Config{
|
||||
Store: store,
|
||||
UserDirectory: staticUserDirectory{},
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
AdminRouting: config.AdminRoutingConfig{
|
||||
LobbyApplicationSubmitted: []string{"owner@example.com"},
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
result, err := service.Execute(context.Background(), acceptintent.AcceptInput{
|
||||
NotificationID: "1775121700000-0",
|
||||
Intent: intentstream.Intent{
|
||||
NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
|
||||
Producer: intentstream.ProducerGameLobby,
|
||||
AudienceKind: intentstream.AudienceKindAdminEmail,
|
||||
IdempotencyKey: "game-456:application-submitted:user-42",
|
||||
OccurredAt: time.UnixMilli(1775121700002).UTC(),
|
||||
PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`,
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, acceptintent.OutcomeAccepted, result.Outcome)
|
||||
|
||||
record, found, err := store.GetNotification(context.Background(), "1775121700000-0")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, "1775121700000-0", record.NotificationID)
|
||||
}
|
||||
|
||||
type fixedClock struct {
|
||||
now time.Time
|
||||
}
|
||||
|
||||
func (clock fixedClock) Now() time.Time {
|
||||
return clock.now
|
||||
}
|
||||
|
||||
func validAdminAcceptanceInput(now time.Time) acceptintent.CreateAcceptanceInput {
|
||||
return acceptintent.CreateAcceptanceInput{
|
||||
Notification: acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700000-0",
|
||||
NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
|
||||
Producer: intentstream.ProducerGameLobby,
|
||||
AudienceKind: intentstream.AudienceKindAdminEmail,
|
||||
PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`,
|
||||
IdempotencyKey: "game-456:application-submitted:user-42",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
OccurredAt: now,
|
||||
AcceptedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
Routes: []acceptintent.NotificationRoute{
|
||||
{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "push:email:owner@example.com",
|
||||
Channel: intentstream.ChannelPush,
|
||||
RecipientRef: "email:owner@example.com",
|
||||
Status: acceptintent.RouteStatusSkipped,
|
||||
AttemptCount: 0,
|
||||
MaxAttempts: 3,
|
||||
ResolvedEmail: "owner@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
SkippedAt: now,
|
||||
},
|
||||
{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "email:email:owner@example.com",
|
||||
Channel: intentstream.ChannelEmail,
|
||||
RecipientRef: "email:owner@example.com",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
AttemptCount: 0,
|
||||
MaxAttempts: 7,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "owner@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
},
|
||||
Idempotency: acceptintent.IdempotencyRecord{
|
||||
Producer: intentstream.ProducerGameLobby,
|
||||
IdempotencyKey: "game-456:application-submitted:user-42",
|
||||
NotificationID: "1775121700000-0",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
CreatedAt: now,
|
||||
ExpiresAt: now.Add(7 * 24 * time.Hour),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func newTestRedisClient(t *testing.T, server *miniredis.Miniredis) *redis.Client {
|
||||
t.Helper()
|
||||
|
||||
client := redis.NewClient(&redis.Options{
|
||||
Addr: server.Addr(),
|
||||
Protocol: 2,
|
||||
DisableIdentity: true,
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
require.NoError(t, client.Close())
|
||||
})
|
||||
|
||||
return client
|
||||
}
|
||||
|
||||
type staticUserDirectory struct{}
|
||||
|
||||
func (staticUserDirectory) GetUserByID(context.Context, string) (acceptintent.UserRecord, error) {
|
||||
return acceptintent.UserRecord{}, acceptintent.ErrRecipientNotFound
|
||||
}
|
||||
@@ -0,0 +1,157 @@
|
||||
package redisstate
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// AcceptanceConfig stores the retention settings applied to accepted durable
|
||||
// notification state.
|
||||
type AcceptanceConfig struct {
|
||||
// RecordTTL stores the retention period applied to notification and route
|
||||
// records.
|
||||
RecordTTL time.Duration
|
||||
|
||||
// DeadLetterTTL stores the retention period applied to route dead-letter
|
||||
// entries.
|
||||
DeadLetterTTL time.Duration
|
||||
|
||||
// IdempotencyTTL stores the retention period applied to idempotency
|
||||
// reservations.
|
||||
IdempotencyTTL time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg contains usable retention settings.
|
||||
func (cfg AcceptanceConfig) Validate() error {
|
||||
switch {
|
||||
case cfg.RecordTTL <= 0:
|
||||
return fmt.Errorf("record ttl must be positive")
|
||||
case cfg.DeadLetterTTL <= 0:
|
||||
return fmt.Errorf("dead-letter ttl must be positive")
|
||||
case cfg.IdempotencyTTL <= 0:
|
||||
return fmt.Errorf("idempotency ttl must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// AtomicWriter performs the minimal multi-key Redis mutations required by
|
||||
// notification intent acceptance.
|
||||
type AtomicWriter struct {
|
||||
client *redis.Client
|
||||
keys Keyspace
|
||||
cfg AcceptanceConfig
|
||||
}
|
||||
|
||||
// NewAtomicWriter constructs a low-level Redis mutation helper.
|
||||
func NewAtomicWriter(client *redis.Client, cfg AcceptanceConfig) (*AtomicWriter, error) {
|
||||
if client == nil {
|
||||
return nil, errors.New("new notification redis atomic writer: nil client")
|
||||
}
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("new notification redis atomic writer: %w", err)
|
||||
}
|
||||
|
||||
return &AtomicWriter{
|
||||
client: client,
|
||||
keys: Keyspace{},
|
||||
cfg: cfg,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// CreateAcceptance stores one notification record, all derived routes, and
|
||||
// the matching idempotency reservation in one optimistic Redis transaction.
|
||||
func (writer *AtomicWriter) CreateAcceptance(ctx context.Context, input acceptintent.CreateAcceptanceInput) error {
|
||||
if writer == nil || writer.client == nil {
|
||||
return errors.New("create notification acceptance in redis: nil writer")
|
||||
}
|
||||
if ctx == nil {
|
||||
return errors.New("create notification acceptance in redis: nil context")
|
||||
}
|
||||
if err := input.Validate(); err != nil {
|
||||
return fmt.Errorf("create notification acceptance in redis: %w", err)
|
||||
}
|
||||
|
||||
notificationPayload, err := MarshalNotification(input.Notification)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create notification acceptance in redis: %w", err)
|
||||
}
|
||||
idempotencyPayload, err := MarshalIdempotency(input.Idempotency)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create notification acceptance in redis: %w", err)
|
||||
}
|
||||
|
||||
routePayloads := make([][]byte, len(input.Routes))
|
||||
routeKeys := make([]string, len(input.Routes))
|
||||
scheduledRouteKeys := make([]string, 0, len(input.Routes))
|
||||
scheduledRouteScores := make([]float64, 0, len(input.Routes))
|
||||
for index, route := range input.Routes {
|
||||
payload, err := MarshalRoute(route)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create notification acceptance in redis: route %d: %w", index, err)
|
||||
}
|
||||
routePayloads[index] = payload
|
||||
routeKeys[index] = writer.keys.Route(route.NotificationID, route.RouteID)
|
||||
if route.Status == acceptintent.RouteStatusPending {
|
||||
scheduledRouteKeys = append(scheduledRouteKeys, routeKeys[index])
|
||||
scheduledRouteScores = append(scheduledRouteScores, float64(route.NextAttemptAt.UTC().UnixMilli()))
|
||||
}
|
||||
}
|
||||
|
||||
notificationKey := writer.keys.Notification(input.Notification.NotificationID)
|
||||
idempotencyKey := writer.keys.Idempotency(input.Idempotency.Producer, input.Idempotency.IdempotencyKey)
|
||||
watchKeys := append([]string{notificationKey, idempotencyKey}, routeKeys...)
|
||||
|
||||
watchErr := writer.client.Watch(ctx, func(tx *redis.Tx) error {
|
||||
for _, key := range watchKeys {
|
||||
if err := ensureKeyAbsent(ctx, tx, key); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
_, err := tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
|
||||
pipe.Set(ctx, notificationKey, notificationPayload, writer.cfg.RecordTTL)
|
||||
pipe.Set(ctx, idempotencyKey, idempotencyPayload, writer.cfg.IdempotencyTTL)
|
||||
for index, routeKey := range routeKeys {
|
||||
pipe.Set(ctx, routeKey, routePayloads[index], writer.cfg.RecordTTL)
|
||||
}
|
||||
for index, routeKey := range scheduledRouteKeys {
|
||||
pipe.ZAdd(ctx, writer.keys.RouteSchedule(), redis.Z{
|
||||
Score: scheduledRouteScores[index],
|
||||
Member: routeKey,
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return err
|
||||
}, watchKeys...)
|
||||
|
||||
switch {
|
||||
case errors.Is(watchErr, ErrConflict), errors.Is(watchErr, redis.TxFailedErr):
|
||||
return ErrConflict
|
||||
case watchErr != nil:
|
||||
return fmt.Errorf("create notification acceptance in redis: %w", watchErr)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func ensureKeyAbsent(ctx context.Context, tx *redis.Tx, key string) error {
|
||||
exists, err := tx.Exists(ctx, key).Result()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if exists > 0 {
|
||||
return ErrConflict
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,547 @@
|
||||
package redisstate
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/notification/internal/service/malformedintent"
|
||||
)
|
||||
|
||||
// StreamOffset stores the persisted progress of the plain-XREAD intent
|
||||
// consumer.
|
||||
type StreamOffset struct {
|
||||
// Stream stores the Redis Stream name.
|
||||
Stream string
|
||||
|
||||
// LastProcessedEntryID stores the last durably processed Redis Stream entry
|
||||
// identifier.
|
||||
LastProcessedEntryID string
|
||||
|
||||
// UpdatedAt stores when the offset record was last updated.
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
|
||||
// DeadLetterEntry stores one terminal route-publication failure recorded for
|
||||
// later operator inspection.
|
||||
type DeadLetterEntry struct {
|
||||
// NotificationID stores the owning notification identifier.
|
||||
NotificationID string
|
||||
|
||||
// RouteID stores the exhausted route identifier.
|
||||
RouteID string
|
||||
|
||||
// Channel stores the failed route channel.
|
||||
Channel intentstream.Channel
|
||||
|
||||
// RecipientRef stores the stable failed recipient slot identifier.
|
||||
RecipientRef string
|
||||
|
||||
// FinalAttemptCount stores how many publication attempts were consumed.
|
||||
FinalAttemptCount int
|
||||
|
||||
// MaxAttempts stores the configured retry budget for Channel.
|
||||
MaxAttempts int
|
||||
|
||||
// FailureClassification stores the stable classified failure reason.
|
||||
FailureClassification string
|
||||
|
||||
// FailureMessage stores the last failure detail.
|
||||
FailureMessage string
|
||||
|
||||
// CreatedAt stores when the route moved to dead_letter.
|
||||
CreatedAt time.Time
|
||||
|
||||
// RecoveryHint stores the optional operator-facing recovery hint.
|
||||
RecoveryHint string
|
||||
}
|
||||
|
||||
type notificationRecordJSON struct {
|
||||
NotificationID string `json:"notification_id"`
|
||||
NotificationType intentstream.NotificationType `json:"notification_type"`
|
||||
Producer intentstream.Producer `json:"producer"`
|
||||
AudienceKind intentstream.AudienceKind `json:"audience_kind"`
|
||||
RecipientUserIDs []string `json:"recipient_user_ids,omitempty"`
|
||||
PayloadJSON string `json:"payload_json"`
|
||||
IdempotencyKey string `json:"idempotency_key"`
|
||||
RequestFingerprint string `json:"request_fingerprint"`
|
||||
RequestID string `json:"request_id,omitempty"`
|
||||
TraceID string `json:"trace_id,omitempty"`
|
||||
OccurredAtMS int64 `json:"occurred_at_ms"`
|
||||
AcceptedAtMS int64 `json:"accepted_at_ms"`
|
||||
UpdatedAtMS int64 `json:"updated_at_ms"`
|
||||
}
|
||||
|
||||
type notificationRouteJSON struct {
|
||||
NotificationID string `json:"notification_id"`
|
||||
RouteID string `json:"route_id"`
|
||||
Channel intentstream.Channel `json:"channel"`
|
||||
RecipientRef string `json:"recipient_ref"`
|
||||
Status acceptintent.RouteStatus `json:"status"`
|
||||
AttemptCount int `json:"attempt_count"`
|
||||
MaxAttempts int `json:"max_attempts"`
|
||||
NextAttemptAtMS *int64 `json:"next_attempt_at_ms,omitempty"`
|
||||
ResolvedEmail string `json:"resolved_email,omitempty"`
|
||||
ResolvedLocale string `json:"resolved_locale,omitempty"`
|
||||
LastErrorClassification string `json:"last_error_classification,omitempty"`
|
||||
LastErrorMessage string `json:"last_error_message,omitempty"`
|
||||
LastErrorAtMS *int64 `json:"last_error_at_ms,omitempty"`
|
||||
CreatedAtMS int64 `json:"created_at_ms"`
|
||||
UpdatedAtMS int64 `json:"updated_at_ms"`
|
||||
PublishedAtMS *int64 `json:"published_at_ms,omitempty"`
|
||||
DeadLetteredAtMS *int64 `json:"dead_lettered_at_ms,omitempty"`
|
||||
SkippedAtMS *int64 `json:"skipped_at_ms,omitempty"`
|
||||
}
|
||||
|
||||
type idempotencyRecordJSON struct {
|
||||
Producer intentstream.Producer `json:"producer"`
|
||||
IdempotencyKey string `json:"idempotency_key"`
|
||||
NotificationID string `json:"notification_id"`
|
||||
RequestFingerprint string `json:"request_fingerprint"`
|
||||
CreatedAtMS int64 `json:"created_at_ms"`
|
||||
ExpiresAtMS int64 `json:"expires_at_ms"`
|
||||
}
|
||||
|
||||
type malformedIntentJSON struct {
|
||||
StreamEntryID string `json:"stream_entry_id"`
|
||||
NotificationType string `json:"notification_type,omitempty"`
|
||||
Producer string `json:"producer,omitempty"`
|
||||
IdempotencyKey string `json:"idempotency_key,omitempty"`
|
||||
FailureCode malformedintent.FailureCode `json:"failure_code"`
|
||||
FailureMessage string `json:"failure_message"`
|
||||
RawFields map[string]any `json:"raw_fields_json"`
|
||||
RecordedAtMS int64 `json:"recorded_at_ms"`
|
||||
}
|
||||
|
||||
type streamOffsetJSON struct {
|
||||
Stream string `json:"stream"`
|
||||
LastProcessedEntryID string `json:"last_processed_entry_id"`
|
||||
UpdatedAtMS int64 `json:"updated_at_ms"`
|
||||
}
|
||||
|
||||
type deadLetterEntryJSON struct {
|
||||
NotificationID string `json:"notification_id"`
|
||||
RouteID string `json:"route_id"`
|
||||
Channel intentstream.Channel `json:"channel"`
|
||||
RecipientRef string `json:"recipient_ref"`
|
||||
FinalAttemptCount int `json:"final_attempt_count"`
|
||||
MaxAttempts int `json:"max_attempts"`
|
||||
FailureClassification string `json:"failure_classification"`
|
||||
FailureMessage string `json:"failure_message"`
|
||||
CreatedAtMS int64 `json:"created_at_ms"`
|
||||
RecoveryHint string `json:"recovery_hint,omitempty"`
|
||||
}
|
||||
|
||||
// MarshalNotification marshals one notification record into the strict JSON
|
||||
// representation owned by Notification Service.
|
||||
func MarshalNotification(record acceptintent.NotificationRecord) ([]byte, error) {
|
||||
if err := record.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("marshal notification record: %w", err)
|
||||
}
|
||||
|
||||
return marshalStrictJSON(notificationRecordJSON{
|
||||
NotificationID: record.NotificationID,
|
||||
NotificationType: record.NotificationType,
|
||||
Producer: record.Producer,
|
||||
AudienceKind: record.AudienceKind,
|
||||
RecipientUserIDs: append([]string(nil), record.RecipientUserIDs...),
|
||||
PayloadJSON: record.PayloadJSON,
|
||||
IdempotencyKey: record.IdempotencyKey,
|
||||
RequestFingerprint: record.RequestFingerprint,
|
||||
RequestID: record.RequestID,
|
||||
TraceID: record.TraceID,
|
||||
OccurredAtMS: unixMilli(record.OccurredAt),
|
||||
AcceptedAtMS: unixMilli(record.AcceptedAt),
|
||||
UpdatedAtMS: unixMilli(record.UpdatedAt),
|
||||
})
|
||||
}
|
||||
|
||||
// UnmarshalNotification unmarshals one strict JSON notification record.
|
||||
func UnmarshalNotification(payload []byte) (acceptintent.NotificationRecord, error) {
|
||||
var wire notificationRecordJSON
|
||||
if err := unmarshalStrictJSON(payload, &wire); err != nil {
|
||||
return acceptintent.NotificationRecord{}, fmt.Errorf("unmarshal notification record: %w", err)
|
||||
}
|
||||
|
||||
record := acceptintent.NotificationRecord{
|
||||
NotificationID: wire.NotificationID,
|
||||
NotificationType: wire.NotificationType,
|
||||
Producer: wire.Producer,
|
||||
AudienceKind: wire.AudienceKind,
|
||||
RecipientUserIDs: append([]string(nil), wire.RecipientUserIDs...),
|
||||
PayloadJSON: wire.PayloadJSON,
|
||||
IdempotencyKey: wire.IdempotencyKey,
|
||||
RequestFingerprint: wire.RequestFingerprint,
|
||||
RequestID: wire.RequestID,
|
||||
TraceID: wire.TraceID,
|
||||
OccurredAt: time.UnixMilli(wire.OccurredAtMS).UTC(),
|
||||
AcceptedAt: time.UnixMilli(wire.AcceptedAtMS).UTC(),
|
||||
UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(),
|
||||
}
|
||||
if err := record.Validate(); err != nil {
|
||||
return acceptintent.NotificationRecord{}, fmt.Errorf("unmarshal notification record: %w", err)
|
||||
}
|
||||
|
||||
return record, nil
|
||||
}
|
||||
|
||||
// MarshalRoute marshals one notification route into the strict JSON
|
||||
// representation owned by Notification Service.
|
||||
func MarshalRoute(route acceptintent.NotificationRoute) ([]byte, error) {
|
||||
if err := route.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("marshal notification route: %w", err)
|
||||
}
|
||||
|
||||
return marshalStrictJSON(notificationRouteJSON{
|
||||
NotificationID: route.NotificationID,
|
||||
RouteID: route.RouteID,
|
||||
Channel: route.Channel,
|
||||
RecipientRef: route.RecipientRef,
|
||||
Status: route.Status,
|
||||
AttemptCount: route.AttemptCount,
|
||||
MaxAttempts: route.MaxAttempts,
|
||||
NextAttemptAtMS: optionalUnixMilli(route.NextAttemptAt),
|
||||
ResolvedEmail: route.ResolvedEmail,
|
||||
ResolvedLocale: route.ResolvedLocale,
|
||||
LastErrorClassification: route.LastErrorClassification,
|
||||
LastErrorMessage: route.LastErrorMessage,
|
||||
LastErrorAtMS: optionalUnixMilli(route.LastErrorAt),
|
||||
CreatedAtMS: unixMilli(route.CreatedAt),
|
||||
UpdatedAtMS: unixMilli(route.UpdatedAt),
|
||||
PublishedAtMS: optionalUnixMilli(route.PublishedAt),
|
||||
DeadLetteredAtMS: optionalUnixMilli(route.DeadLetteredAt),
|
||||
SkippedAtMS: optionalUnixMilli(route.SkippedAt),
|
||||
})
|
||||
}
|
||||
|
||||
// UnmarshalRoute unmarshals one strict JSON notification route.
|
||||
func UnmarshalRoute(payload []byte) (acceptintent.NotificationRoute, error) {
|
||||
var wire notificationRouteJSON
|
||||
if err := unmarshalStrictJSON(payload, &wire); err != nil {
|
||||
return acceptintent.NotificationRoute{}, fmt.Errorf("unmarshal notification route: %w", err)
|
||||
}
|
||||
|
||||
route := acceptintent.NotificationRoute{
|
||||
NotificationID: wire.NotificationID,
|
||||
RouteID: wire.RouteID,
|
||||
Channel: wire.Channel,
|
||||
RecipientRef: wire.RecipientRef,
|
||||
Status: wire.Status,
|
||||
AttemptCount: wire.AttemptCount,
|
||||
MaxAttempts: wire.MaxAttempts,
|
||||
ResolvedEmail: wire.ResolvedEmail,
|
||||
ResolvedLocale: wire.ResolvedLocale,
|
||||
LastErrorClassification: wire.LastErrorClassification,
|
||||
LastErrorMessage: wire.LastErrorMessage,
|
||||
CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(),
|
||||
UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(),
|
||||
}
|
||||
if wire.NextAttemptAtMS != nil {
|
||||
route.NextAttemptAt = time.UnixMilli(*wire.NextAttemptAtMS).UTC()
|
||||
}
|
||||
if wire.LastErrorAtMS != nil {
|
||||
route.LastErrorAt = time.UnixMilli(*wire.LastErrorAtMS).UTC()
|
||||
}
|
||||
if wire.PublishedAtMS != nil {
|
||||
route.PublishedAt = time.UnixMilli(*wire.PublishedAtMS).UTC()
|
||||
}
|
||||
if wire.DeadLetteredAtMS != nil {
|
||||
route.DeadLetteredAt = time.UnixMilli(*wire.DeadLetteredAtMS).UTC()
|
||||
}
|
||||
if wire.SkippedAtMS != nil {
|
||||
route.SkippedAt = time.UnixMilli(*wire.SkippedAtMS).UTC()
|
||||
}
|
||||
if err := route.Validate(); err != nil {
|
||||
return acceptintent.NotificationRoute{}, fmt.Errorf("unmarshal notification route: %w", err)
|
||||
}
|
||||
|
||||
return route, nil
|
||||
}
|
||||
|
||||
// MarshalIdempotency marshals one idempotency record into the strict JSON
|
||||
// representation owned by Notification Service.
|
||||
func MarshalIdempotency(record acceptintent.IdempotencyRecord) ([]byte, error) {
|
||||
if err := record.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("marshal notification idempotency record: %w", err)
|
||||
}
|
||||
|
||||
return marshalStrictJSON(idempotencyRecordJSON{
|
||||
Producer: record.Producer,
|
||||
IdempotencyKey: record.IdempotencyKey,
|
||||
NotificationID: record.NotificationID,
|
||||
RequestFingerprint: record.RequestFingerprint,
|
||||
CreatedAtMS: unixMilli(record.CreatedAt),
|
||||
ExpiresAtMS: unixMilli(record.ExpiresAt),
|
||||
})
|
||||
}
|
||||
|
||||
// UnmarshalIdempotency unmarshals one strict JSON idempotency record.
|
||||
func UnmarshalIdempotency(payload []byte) (acceptintent.IdempotencyRecord, error) {
|
||||
var wire idempotencyRecordJSON
|
||||
if err := unmarshalStrictJSON(payload, &wire); err != nil {
|
||||
return acceptintent.IdempotencyRecord{}, fmt.Errorf("unmarshal notification idempotency record: %w", err)
|
||||
}
|
||||
|
||||
record := acceptintent.IdempotencyRecord{
|
||||
Producer: wire.Producer,
|
||||
IdempotencyKey: wire.IdempotencyKey,
|
||||
NotificationID: wire.NotificationID,
|
||||
RequestFingerprint: wire.RequestFingerprint,
|
||||
CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(),
|
||||
ExpiresAt: time.UnixMilli(wire.ExpiresAtMS).UTC(),
|
||||
}
|
||||
if err := record.Validate(); err != nil {
|
||||
return acceptintent.IdempotencyRecord{}, fmt.Errorf("unmarshal notification idempotency record: %w", err)
|
||||
}
|
||||
|
||||
return record, nil
|
||||
}
|
||||
|
||||
// MarshalDeadLetter marshals one dead-letter entry into the strict JSON
|
||||
// representation owned by Notification Service.
|
||||
func MarshalDeadLetter(entry DeadLetterEntry) ([]byte, error) {
|
||||
if err := entry.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("marshal dead letter entry: %w", err)
|
||||
}
|
||||
|
||||
return marshalStrictJSON(deadLetterEntryJSON{
|
||||
NotificationID: entry.NotificationID,
|
||||
RouteID: entry.RouteID,
|
||||
Channel: entry.Channel,
|
||||
RecipientRef: entry.RecipientRef,
|
||||
FinalAttemptCount: entry.FinalAttemptCount,
|
||||
MaxAttempts: entry.MaxAttempts,
|
||||
FailureClassification: entry.FailureClassification,
|
||||
FailureMessage: entry.FailureMessage,
|
||||
CreatedAtMS: unixMilli(entry.CreatedAt),
|
||||
RecoveryHint: entry.RecoveryHint,
|
||||
})
|
||||
}
|
||||
|
||||
// UnmarshalDeadLetter unmarshals one strict JSON dead-letter entry.
|
||||
func UnmarshalDeadLetter(payload []byte) (DeadLetterEntry, error) {
|
||||
var wire deadLetterEntryJSON
|
||||
if err := unmarshalStrictJSON(payload, &wire); err != nil {
|
||||
return DeadLetterEntry{}, fmt.Errorf("unmarshal dead letter entry: %w", err)
|
||||
}
|
||||
|
||||
entry := DeadLetterEntry{
|
||||
NotificationID: wire.NotificationID,
|
||||
RouteID: wire.RouteID,
|
||||
Channel: wire.Channel,
|
||||
RecipientRef: wire.RecipientRef,
|
||||
FinalAttemptCount: wire.FinalAttemptCount,
|
||||
MaxAttempts: wire.MaxAttempts,
|
||||
FailureClassification: wire.FailureClassification,
|
||||
FailureMessage: wire.FailureMessage,
|
||||
CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(),
|
||||
RecoveryHint: wire.RecoveryHint,
|
||||
}
|
||||
if err := entry.Validate(); err != nil {
|
||||
return DeadLetterEntry{}, fmt.Errorf("unmarshal dead letter entry: %w", err)
|
||||
}
|
||||
|
||||
return entry, nil
|
||||
}
|
||||
|
||||
// MarshalMalformedIntent marshals one malformed-intent entry into the strict
|
||||
// JSON representation owned by Notification Service.
|
||||
func MarshalMalformedIntent(entry malformedintent.Entry) ([]byte, error) {
|
||||
if err := entry.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("marshal malformed intent: %w", err)
|
||||
}
|
||||
|
||||
return marshalStrictJSON(malformedIntentJSON{
|
||||
StreamEntryID: entry.StreamEntryID,
|
||||
NotificationType: entry.NotificationType,
|
||||
Producer: entry.Producer,
|
||||
IdempotencyKey: entry.IdempotencyKey,
|
||||
FailureCode: entry.FailureCode,
|
||||
FailureMessage: entry.FailureMessage,
|
||||
RawFields: cloneJSONObject(entry.RawFields),
|
||||
RecordedAtMS: unixMilli(entry.RecordedAt),
|
||||
})
|
||||
}
|
||||
|
||||
// UnmarshalMalformedIntent unmarshals one strict JSON malformed-intent entry.
|
||||
func UnmarshalMalformedIntent(payload []byte) (malformedintent.Entry, error) {
|
||||
var wire malformedIntentJSON
|
||||
if err := unmarshalStrictJSON(payload, &wire); err != nil {
|
||||
return malformedintent.Entry{}, fmt.Errorf("unmarshal malformed intent: %w", err)
|
||||
}
|
||||
|
||||
entry := malformedintent.Entry{
|
||||
StreamEntryID: wire.StreamEntryID,
|
||||
NotificationType: wire.NotificationType,
|
||||
Producer: wire.Producer,
|
||||
IdempotencyKey: wire.IdempotencyKey,
|
||||
FailureCode: wire.FailureCode,
|
||||
FailureMessage: wire.FailureMessage,
|
||||
RawFields: cloneJSONObject(wire.RawFields),
|
||||
RecordedAt: time.UnixMilli(wire.RecordedAtMS).UTC(),
|
||||
}
|
||||
if err := entry.Validate(); err != nil {
|
||||
return malformedintent.Entry{}, fmt.Errorf("unmarshal malformed intent: %w", err)
|
||||
}
|
||||
|
||||
return entry, nil
|
||||
}
|
||||
|
||||
// MarshalStreamOffset marshals one stream-offset record into the strict JSON
|
||||
// representation owned by Notification Service.
|
||||
func MarshalStreamOffset(offset StreamOffset) ([]byte, error) {
|
||||
if err := offset.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("marshal stream offset: %w", err)
|
||||
}
|
||||
|
||||
return marshalStrictJSON(streamOffsetJSON{
|
||||
Stream: offset.Stream,
|
||||
LastProcessedEntryID: offset.LastProcessedEntryID,
|
||||
UpdatedAtMS: unixMilli(offset.UpdatedAt),
|
||||
})
|
||||
}
|
||||
|
||||
// UnmarshalStreamOffset unmarshals one strict JSON stream-offset record.
|
||||
func UnmarshalStreamOffset(payload []byte) (StreamOffset, error) {
|
||||
var wire streamOffsetJSON
|
||||
if err := unmarshalStrictJSON(payload, &wire); err != nil {
|
||||
return StreamOffset{}, fmt.Errorf("unmarshal stream offset: %w", err)
|
||||
}
|
||||
|
||||
offset := StreamOffset{
|
||||
Stream: wire.Stream,
|
||||
LastProcessedEntryID: wire.LastProcessedEntryID,
|
||||
UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(),
|
||||
}
|
||||
if err := offset.Validate(); err != nil {
|
||||
return StreamOffset{}, fmt.Errorf("unmarshal stream offset: %w", err)
|
||||
}
|
||||
|
||||
return offset, nil
|
||||
}
|
||||
|
||||
// Validate reports whether offset contains a complete persisted consumer
|
||||
// progress record.
|
||||
func (offset StreamOffset) Validate() error {
|
||||
if offset.Stream == "" {
|
||||
return fmt.Errorf("stream offset stream must not be empty")
|
||||
}
|
||||
if offset.LastProcessedEntryID == "" {
|
||||
return fmt.Errorf("stream offset last processed entry id must not be empty")
|
||||
}
|
||||
if offset.UpdatedAt.IsZero() {
|
||||
return fmt.Errorf("stream offset updated at must not be zero")
|
||||
}
|
||||
if !offset.UpdatedAt.Equal(offset.UpdatedAt.UTC()) {
|
||||
return fmt.Errorf("stream offset updated at must be UTC")
|
||||
}
|
||||
if !offset.UpdatedAt.Equal(offset.UpdatedAt.Truncate(time.Millisecond)) {
|
||||
return fmt.Errorf("stream offset updated at must use millisecond precision")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate reports whether entry contains a complete dead-letter record.
|
||||
func (entry DeadLetterEntry) Validate() error {
|
||||
if entry.NotificationID == "" {
|
||||
return fmt.Errorf("dead letter entry notification id must not be empty")
|
||||
}
|
||||
if entry.RouteID == "" {
|
||||
return fmt.Errorf("dead letter entry route id must not be empty")
|
||||
}
|
||||
if !entry.Channel.IsKnown() {
|
||||
return fmt.Errorf("dead letter entry channel %q is unsupported", entry.Channel)
|
||||
}
|
||||
if entry.RecipientRef == "" {
|
||||
return fmt.Errorf("dead letter entry recipient ref must not be empty")
|
||||
}
|
||||
if entry.FinalAttemptCount <= 0 {
|
||||
return fmt.Errorf("dead letter entry final attempt count must be positive")
|
||||
}
|
||||
if entry.MaxAttempts <= 0 {
|
||||
return fmt.Errorf("dead letter entry max attempts must be positive")
|
||||
}
|
||||
if entry.FailureClassification == "" {
|
||||
return fmt.Errorf("dead letter entry failure classification must not be empty")
|
||||
}
|
||||
if entry.FailureMessage == "" {
|
||||
return fmt.Errorf("dead letter entry failure message must not be empty")
|
||||
}
|
||||
if entry.CreatedAt.IsZero() {
|
||||
return fmt.Errorf("dead letter entry created at must not be zero")
|
||||
}
|
||||
if !entry.CreatedAt.Equal(entry.CreatedAt.UTC()) {
|
||||
return fmt.Errorf("dead letter entry created at must be UTC")
|
||||
}
|
||||
if !entry.CreatedAt.Equal(entry.CreatedAt.Truncate(time.Millisecond)) {
|
||||
return fmt.Errorf("dead letter entry created at must use millisecond precision")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func marshalStrictJSON(value any) ([]byte, error) {
|
||||
return json.Marshal(value)
|
||||
}
|
||||
|
||||
func unmarshalStrictJSON(payload []byte, target any) error {
|
||||
decoder := json.NewDecoder(bytes.NewBuffer(payload))
|
||||
decoder.DisallowUnknownFields()
|
||||
|
||||
if err := decoder.Decode(target); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := decoder.Decode(&struct{}{}); err != io.EOF {
|
||||
if err == nil {
|
||||
return fmt.Errorf("unexpected trailing JSON input")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func unixMilli(value time.Time) int64 {
|
||||
return value.UTC().UnixMilli()
|
||||
}
|
||||
|
||||
func optionalUnixMilli(value time.Time) *int64 {
|
||||
if value.IsZero() {
|
||||
return nil
|
||||
}
|
||||
millis := unixMilli(value)
|
||||
return &millis
|
||||
}
|
||||
|
||||
func cloneJSONObject(value map[string]any) map[string]any {
|
||||
if value == nil {
|
||||
return map[string]any{}
|
||||
}
|
||||
|
||||
cloned := make(map[string]any, len(value))
|
||||
for key, raw := range value {
|
||||
cloned[key] = cloneJSONValue(raw)
|
||||
}
|
||||
|
||||
return cloned
|
||||
}
|
||||
|
||||
func cloneJSONValue(value any) any {
|
||||
switch typed := value.(type) {
|
||||
case map[string]any:
|
||||
return cloneJSONObject(typed)
|
||||
case []any:
|
||||
cloned := make([]any, len(typed))
|
||||
for index, item := range typed {
|
||||
cloned[index] = cloneJSONValue(item)
|
||||
}
|
||||
return cloned
|
||||
default:
|
||||
return typed
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
// Package redisstate defines the frozen Redis keyspace, strict JSON records,
|
||||
// and low-level mutation helpers used by Notification Service durable state.
|
||||
package redisstate
|
||||
@@ -0,0 +1,10 @@
|
||||
package redisstate
|
||||
|
||||
import "errors"
|
||||
|
||||
var (
|
||||
// ErrConflict reports that a Redis mutation could not be applied because
|
||||
// one of the watched or newly created keys already existed or changed
|
||||
// concurrently.
|
||||
ErrConflict = errors.New("redis state conflict")
|
||||
)
|
||||
@@ -0,0 +1,105 @@
|
||||
package redisstate
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
)
|
||||
|
||||
const defaultPrefix = "notification:"
|
||||
|
||||
// Keyspace builds the frozen Notification Service Redis keys. All dynamic key
|
||||
// segments are encoded with base64url so raw key structure does not depend on
|
||||
// caller-provided characters.
|
||||
type Keyspace struct{}
|
||||
|
||||
// Notification returns the primary Redis key for one notification_record.
|
||||
func (Keyspace) Notification(notificationID string) string {
|
||||
return defaultPrefix + "records:" + encodeKeyComponent(notificationID)
|
||||
}
|
||||
|
||||
// Route returns the primary Redis key for one notification_route.
|
||||
func (Keyspace) Route(notificationID string, routeID string) string {
|
||||
return defaultPrefix + "routes:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID)
|
||||
}
|
||||
|
||||
// ParseRoute returns the notification identifier and route identifier encoded
|
||||
// inside routeKey.
|
||||
func (Keyspace) ParseRoute(routeKey string) (string, string, error) {
|
||||
trimmedPrefix := defaultPrefix + "routes:"
|
||||
if !strings.HasPrefix(routeKey, trimmedPrefix) {
|
||||
return "", "", fmt.Errorf("parse route key: %q does not use %q prefix", routeKey, trimmedPrefix)
|
||||
}
|
||||
|
||||
encoded := strings.TrimPrefix(routeKey, trimmedPrefix)
|
||||
parts := strings.Split(encoded, ":")
|
||||
if len(parts) != 2 {
|
||||
return "", "", fmt.Errorf("parse route key: %q must contain exactly two encoded segments", routeKey)
|
||||
}
|
||||
|
||||
notificationID, err := decodeKeyComponent(parts[0])
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("parse route key: notification id: %w", err)
|
||||
}
|
||||
routeID, err := decodeKeyComponent(parts[1])
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("parse route key: route id: %w", err)
|
||||
}
|
||||
|
||||
return notificationID, routeID, nil
|
||||
}
|
||||
|
||||
// Idempotency returns the primary Redis key for one
|
||||
// notification_idempotency_record.
|
||||
func (Keyspace) Idempotency(producer intentstream.Producer, idempotencyKey string) string {
|
||||
return defaultPrefix + "idempotency:" + encodeKeyComponent(string(producer)) + ":" + encodeKeyComponent(idempotencyKey)
|
||||
}
|
||||
|
||||
// DeadLetter returns the primary Redis key for one
|
||||
// notification_dead_letter_entry.
|
||||
func (Keyspace) DeadLetter(notificationID string, routeID string) string {
|
||||
return defaultPrefix + "dead_letters:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID)
|
||||
}
|
||||
|
||||
// RouteLease returns the temporary Redis key used to coordinate exclusive
|
||||
// publication of one notification_route across replicas.
|
||||
func (Keyspace) RouteLease(notificationID string, routeID string) string {
|
||||
return defaultPrefix + "route_leases:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID)
|
||||
}
|
||||
|
||||
// MalformedIntent returns the primary Redis key for one malformed-intent
|
||||
// record.
|
||||
func (Keyspace) MalformedIntent(streamEntryID string) string {
|
||||
return defaultPrefix + "malformed_intents:" + encodeKeyComponent(streamEntryID)
|
||||
}
|
||||
|
||||
// StreamOffset returns the primary Redis key for one persisted intent-consumer
|
||||
// offset.
|
||||
func (Keyspace) StreamOffset(stream string) string {
|
||||
return defaultPrefix + "stream_offsets:" + encodeKeyComponent(stream)
|
||||
}
|
||||
|
||||
// Intents returns the frozen ingress Redis Stream key.
|
||||
func (Keyspace) Intents() string {
|
||||
return defaultPrefix + "intents"
|
||||
}
|
||||
|
||||
// RouteSchedule returns the frozen route schedule sorted-set key.
|
||||
func (Keyspace) RouteSchedule() string {
|
||||
return defaultPrefix + "route_schedule"
|
||||
}
|
||||
|
||||
func encodeKeyComponent(value string) string {
|
||||
return base64.RawURLEncoding.EncodeToString([]byte(value))
|
||||
}
|
||||
|
||||
func decodeKeyComponent(value string) (string, error) {
|
||||
decoded, err := base64.RawURLEncoding.DecodeString(value)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return string(decoded), nil
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
package redisstate
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/service/malformedintent"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// MalformedIntentStore provides the Redis-backed storage used for
|
||||
// operator-visible malformed-intent records.
|
||||
type MalformedIntentStore struct {
|
||||
client *redis.Client
|
||||
keys Keyspace
|
||||
ttl time.Duration
|
||||
}
|
||||
|
||||
// NewMalformedIntentStore constructs one Redis-backed malformed-intent store.
|
||||
func NewMalformedIntentStore(client *redis.Client, ttl time.Duration) (*MalformedIntentStore, error) {
|
||||
if client == nil {
|
||||
return nil, errors.New("new malformed intent store: nil redis client")
|
||||
}
|
||||
if ttl <= 0 {
|
||||
return nil, errors.New("new malformed intent store: non-positive ttl")
|
||||
}
|
||||
|
||||
return &MalformedIntentStore{
|
||||
client: client,
|
||||
keys: Keyspace{},
|
||||
ttl: ttl,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Record stores entry idempotently by its Redis Stream entry identifier.
|
||||
func (store *MalformedIntentStore) Record(ctx context.Context, entry malformedintent.Entry) error {
|
||||
if store == nil || store.client == nil {
|
||||
return errors.New("record malformed intent: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return errors.New("record malformed intent: nil context")
|
||||
}
|
||||
if err := entry.Validate(); err != nil {
|
||||
return fmt.Errorf("record malformed intent: %w", err)
|
||||
}
|
||||
|
||||
payload, err := MarshalMalformedIntent(entry)
|
||||
if err != nil {
|
||||
return fmt.Errorf("record malformed intent: %w", err)
|
||||
}
|
||||
if err := store.client.Set(ctx, store.keys.MalformedIntent(entry.StreamEntryID), payload, store.ttl).Err(); err != nil {
|
||||
return fmt.Errorf("record malformed intent: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,657 @@
|
||||
package redisstate
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/notification/internal/telemetry"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
var releaseRouteLeaseScript = redis.NewScript(`
|
||||
if redis.call("GET", KEYS[1]) == ARGV[1] then
|
||||
return redis.call("DEL", KEYS[1])
|
||||
end
|
||||
return 0
|
||||
`)
|
||||
|
||||
var completePublishedRouteScript = redis.NewScript(`
|
||||
if redis.call("GET", KEYS[1]) ~= ARGV[1] then
|
||||
return 0
|
||||
end
|
||||
if redis.call("GET", KEYS[2]) ~= ARGV[2] then
|
||||
return 0
|
||||
end
|
||||
local field_count = tonumber(ARGV[6])
|
||||
local values = {}
|
||||
local index = 7
|
||||
for _ = 1, field_count do
|
||||
table.insert(values, ARGV[index])
|
||||
table.insert(values, ARGV[index + 1])
|
||||
index = index + 2
|
||||
end
|
||||
if tonumber(ARGV[4]) > 0 then
|
||||
redis.call("XADD", ARGV[3], "MAXLEN", "~", ARGV[4], "*", unpack(values))
|
||||
else
|
||||
redis.call("XADD", ARGV[3], "*", unpack(values))
|
||||
end
|
||||
redis.call("SET", KEYS[1], ARGV[5], "KEEPTTL")
|
||||
redis.call("ZREM", KEYS[3], KEYS[1])
|
||||
redis.call("DEL", KEYS[2])
|
||||
return 1
|
||||
`)
|
||||
|
||||
// ScheduledRoute stores one due route reference loaded from
|
||||
// `notification:route_schedule`.
|
||||
type ScheduledRoute struct {
|
||||
// RouteKey stores the full Redis route key scheduled for processing.
|
||||
RouteKey string
|
||||
|
||||
// NotificationID stores the owning notification identifier.
|
||||
NotificationID string
|
||||
|
||||
// RouteID stores the scheduled route identifier.
|
||||
RouteID string
|
||||
}
|
||||
|
||||
// CompleteRoutePublishedInput stores the data required to mark one route as
|
||||
// published while atomically appending one outbound stream entry.
|
||||
type CompleteRoutePublishedInput struct {
|
||||
// ExpectedRoute stores the current route state previously loaded by the
|
||||
// caller.
|
||||
ExpectedRoute acceptintent.NotificationRoute
|
||||
|
||||
// LeaseToken stores the route-lease owner token that must still be held.
|
||||
LeaseToken string
|
||||
|
||||
// PublishedAt stores when the publication attempt succeeded.
|
||||
PublishedAt time.Time
|
||||
|
||||
// Stream stores the outbound Redis Stream name.
|
||||
Stream string
|
||||
|
||||
// StreamMaxLen bounds Stream with approximate trimming when positive. Zero
|
||||
// disables trimming.
|
||||
StreamMaxLen int64
|
||||
|
||||
// StreamValues stores the exact Redis Stream fields appended to Stream.
|
||||
StreamValues map[string]any
|
||||
}
|
||||
|
||||
// CompleteRouteFailedInput stores the data required to record one retryable
|
||||
// publication failure.
|
||||
type CompleteRouteFailedInput struct {
|
||||
// ExpectedRoute stores the current route state previously loaded by the
|
||||
// caller.
|
||||
ExpectedRoute acceptintent.NotificationRoute
|
||||
|
||||
// LeaseToken stores the route-lease owner token that must still be held.
|
||||
LeaseToken string
|
||||
|
||||
// FailedAt stores when the publication attempt failed.
|
||||
FailedAt time.Time
|
||||
|
||||
// NextAttemptAt stores the next scheduled retry time.
|
||||
NextAttemptAt time.Time
|
||||
|
||||
// FailureClassification stores the classified publication failure kind.
|
||||
FailureClassification string
|
||||
|
||||
// FailureMessage stores the detailed publication failure text.
|
||||
FailureMessage string
|
||||
}
|
||||
|
||||
// CompleteRouteDeadLetterInput stores the data required to record one
|
||||
// exhausted publication failure.
|
||||
type CompleteRouteDeadLetterInput struct {
|
||||
// ExpectedRoute stores the current route state previously loaded by the
|
||||
// caller.
|
||||
ExpectedRoute acceptintent.NotificationRoute
|
||||
|
||||
// LeaseToken stores the route-lease owner token that must still be held.
|
||||
LeaseToken string
|
||||
|
||||
// DeadLetteredAt stores when the route exhausted its retry budget.
|
||||
DeadLetteredAt time.Time
|
||||
|
||||
// FailureClassification stores the classified terminal failure kind.
|
||||
FailureClassification string
|
||||
|
||||
// FailureMessage stores the detailed terminal failure text.
|
||||
FailureMessage string
|
||||
|
||||
// RecoveryHint stores the optional operator-facing recovery guidance.
|
||||
RecoveryHint string
|
||||
}
|
||||
|
||||
// ListDueRoutes loads up to limit scheduled routes whose next-attempt score is
|
||||
// due at or before now.
|
||||
func (store *AcceptanceStore) ListDueRoutes(ctx context.Context, now time.Time, limit int64) ([]ScheduledRoute, error) {
|
||||
if store == nil || store.client == nil {
|
||||
return nil, errors.New("list due routes: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return nil, errors.New("list due routes: nil context")
|
||||
}
|
||||
if err := validateRouteStateTimestamp("list due routes now", now); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if limit <= 0 {
|
||||
return nil, errors.New("list due routes: limit must be positive")
|
||||
}
|
||||
|
||||
members, err := store.client.ZRangeByScore(ctx, store.keys.RouteSchedule(), &redis.ZRangeBy{
|
||||
Min: "-inf",
|
||||
Max: strconv.FormatInt(now.UnixMilli(), 10),
|
||||
Count: limit,
|
||||
}).Result()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list due routes: %w", err)
|
||||
}
|
||||
|
||||
routes := make([]ScheduledRoute, 0, len(members))
|
||||
for _, member := range members {
|
||||
notificationID, routeID, err := store.keys.ParseRoute(member)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list due routes: %w", err)
|
||||
}
|
||||
routes = append(routes, ScheduledRoute{
|
||||
RouteKey: member,
|
||||
NotificationID: notificationID,
|
||||
RouteID: routeID,
|
||||
})
|
||||
}
|
||||
|
||||
return routes, nil
|
||||
}
|
||||
|
||||
// ReadRouteScheduleSnapshot returns the current depth of the durable route
|
||||
// schedule together with its oldest scheduled timestamp when one exists.
|
||||
func (store *AcceptanceStore) ReadRouteScheduleSnapshot(ctx context.Context) (telemetry.RouteScheduleSnapshot, error) {
|
||||
if store == nil || store.client == nil {
|
||||
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil context")
|
||||
}
|
||||
|
||||
depth, err := store.client.ZCard(ctx, store.keys.RouteSchedule()).Result()
|
||||
if err != nil {
|
||||
return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: depth: %w", err)
|
||||
}
|
||||
|
||||
snapshot := telemetry.RouteScheduleSnapshot{
|
||||
Depth: depth,
|
||||
}
|
||||
if depth == 0 {
|
||||
return snapshot, nil
|
||||
}
|
||||
|
||||
values, err := store.client.ZRangeWithScores(ctx, store.keys.RouteSchedule(), 0, 0).Result()
|
||||
if err != nil {
|
||||
return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: oldest scheduled entry: %w", err)
|
||||
}
|
||||
if len(values) == 0 {
|
||||
return snapshot, nil
|
||||
}
|
||||
|
||||
oldestScheduledFor := time.UnixMilli(int64(values[0].Score)).UTC()
|
||||
snapshot.OldestScheduledFor = &oldestScheduledFor
|
||||
return snapshot, nil
|
||||
}
|
||||
|
||||
// TryAcquireRouteLease attempts to acquire one temporary route lease owned by
|
||||
// token for ttl.
|
||||
func (store *AcceptanceStore) TryAcquireRouteLease(ctx context.Context, notificationID string, routeID string, token string, ttl time.Duration) (bool, error) {
|
||||
if store == nil || store.client == nil {
|
||||
return false, errors.New("try acquire route lease: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return false, errors.New("try acquire route lease: nil context")
|
||||
}
|
||||
if notificationID == "" {
|
||||
return false, errors.New("try acquire route lease: notification id must not be empty")
|
||||
}
|
||||
if routeID == "" {
|
||||
return false, errors.New("try acquire route lease: route id must not be empty")
|
||||
}
|
||||
if token == "" {
|
||||
return false, errors.New("try acquire route lease: token must not be empty")
|
||||
}
|
||||
if ttl <= 0 {
|
||||
return false, errors.New("try acquire route lease: ttl must be positive")
|
||||
}
|
||||
|
||||
acquired, err := store.client.SetNX(ctx, store.keys.RouteLease(notificationID, routeID), token, ttl).Result()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("try acquire route lease: %w", err)
|
||||
}
|
||||
|
||||
return acquired, nil
|
||||
}
|
||||
|
||||
// ReleaseRouteLease releases one temporary route lease only when token still
|
||||
// matches the stored owner value.
|
||||
func (store *AcceptanceStore) ReleaseRouteLease(ctx context.Context, notificationID string, routeID string, token string) error {
|
||||
if store == nil || store.client == nil {
|
||||
return errors.New("release route lease: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return errors.New("release route lease: nil context")
|
||||
}
|
||||
if notificationID == "" {
|
||||
return errors.New("release route lease: notification id must not be empty")
|
||||
}
|
||||
if routeID == "" {
|
||||
return errors.New("release route lease: route id must not be empty")
|
||||
}
|
||||
if token == "" {
|
||||
return errors.New("release route lease: token must not be empty")
|
||||
}
|
||||
|
||||
if err := releaseRouteLeaseScript.Run(
|
||||
ctx,
|
||||
store.client,
|
||||
[]string{store.keys.RouteLease(notificationID, routeID)},
|
||||
token,
|
||||
).Err(); err != nil {
|
||||
return fmt.Errorf("release route lease: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CompleteRoutePublished atomically appends one outbound stream entry and
|
||||
// marks the corresponding route as published.
|
||||
func (store *AcceptanceStore) CompleteRoutePublished(ctx context.Context, input CompleteRoutePublishedInput) error {
|
||||
if store == nil || store.client == nil {
|
||||
return errors.New("complete route published: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return errors.New("complete route published: nil context")
|
||||
}
|
||||
if err := input.Validate(); err != nil {
|
||||
return fmt.Errorf("complete route published: %w", err)
|
||||
}
|
||||
|
||||
updatedRoute := input.ExpectedRoute
|
||||
updatedRoute.Status = acceptintent.RouteStatusPublished
|
||||
updatedRoute.AttemptCount++
|
||||
updatedRoute.NextAttemptAt = time.Time{}
|
||||
updatedRoute.LastErrorClassification = ""
|
||||
updatedRoute.LastErrorMessage = ""
|
||||
updatedRoute.LastErrorAt = time.Time{}
|
||||
updatedRoute.UpdatedAt = input.PublishedAt
|
||||
updatedRoute.PublishedAt = input.PublishedAt
|
||||
updatedRoute.DeadLetteredAt = time.Time{}
|
||||
payload, err := MarshalRoute(updatedRoute)
|
||||
if err != nil {
|
||||
return fmt.Errorf("complete route published: %w", err)
|
||||
}
|
||||
expectedPayload, err := MarshalRoute(input.ExpectedRoute)
|
||||
if err != nil {
|
||||
return fmt.Errorf("complete route published: %w", err)
|
||||
}
|
||||
streamArgs, err := flattenStreamValues(input.StreamValues)
|
||||
if err != nil {
|
||||
return fmt.Errorf("complete route published: %w", err)
|
||||
}
|
||||
|
||||
result, err := completePublishedRouteScript.Run(
|
||||
ctx,
|
||||
store.client,
|
||||
[]string{
|
||||
store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID),
|
||||
store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID),
|
||||
store.keys.RouteSchedule(),
|
||||
},
|
||||
append([]any{
|
||||
string(expectedPayload),
|
||||
input.LeaseToken,
|
||||
input.Stream,
|
||||
input.StreamMaxLen,
|
||||
string(payload),
|
||||
len(streamArgs) / 2,
|
||||
}, streamArgs...)...,
|
||||
).Int()
|
||||
switch {
|
||||
case errors.Is(err, redis.Nil):
|
||||
return ErrConflict
|
||||
case err != nil:
|
||||
return err
|
||||
case result != 1:
|
||||
return ErrConflict
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// CompleteRouteFailed atomically records one retryable publication failure and
|
||||
// reschedules the route.
|
||||
func (store *AcceptanceStore) CompleteRouteFailed(ctx context.Context, input CompleteRouteFailedInput) error {
|
||||
if store == nil || store.client == nil {
|
||||
return errors.New("complete route failed: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return errors.New("complete route failed: nil context")
|
||||
}
|
||||
if err := input.Validate(); err != nil {
|
||||
return fmt.Errorf("complete route failed: %w", err)
|
||||
}
|
||||
|
||||
updatedRoute := input.ExpectedRoute
|
||||
updatedRoute.Status = acceptintent.RouteStatusFailed
|
||||
updatedRoute.AttemptCount++
|
||||
updatedRoute.NextAttemptAt = input.NextAttemptAt
|
||||
updatedRoute.LastErrorClassification = input.FailureClassification
|
||||
updatedRoute.LastErrorMessage = input.FailureMessage
|
||||
updatedRoute.LastErrorAt = input.FailedAt
|
||||
updatedRoute.UpdatedAt = input.FailedAt
|
||||
payload, err := MarshalRoute(updatedRoute)
|
||||
if err != nil {
|
||||
return fmt.Errorf("complete route failed: %w", err)
|
||||
}
|
||||
|
||||
return store.completeRouteMutation(ctx, input.ExpectedRoute, input.LeaseToken, func(pipe redis.Pipeliner) error {
|
||||
pipe.SetArgs(ctx, store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID), payload, redis.SetArgs{KeepTTL: true})
|
||||
pipe.ZAdd(ctx, store.keys.RouteSchedule(), redis.Z{
|
||||
Score: float64(input.NextAttemptAt.UnixMilli()),
|
||||
Member: store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID),
|
||||
})
|
||||
pipe.Del(ctx, store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID))
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// CompleteRouteDeadLetter atomically records one exhausted publication
|
||||
// failure, stores the dead-letter entry, and removes the route from the
|
||||
// retry schedule.
|
||||
func (store *AcceptanceStore) CompleteRouteDeadLetter(ctx context.Context, input CompleteRouteDeadLetterInput) error {
|
||||
if store == nil || store.client == nil {
|
||||
return errors.New("complete route dead letter: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return errors.New("complete route dead letter: nil context")
|
||||
}
|
||||
if err := input.Validate(); err != nil {
|
||||
return fmt.Errorf("complete route dead letter: %w", err)
|
||||
}
|
||||
|
||||
updatedRoute := input.ExpectedRoute
|
||||
updatedRoute.Status = acceptintent.RouteStatusDeadLetter
|
||||
updatedRoute.AttemptCount++
|
||||
updatedRoute.NextAttemptAt = time.Time{}
|
||||
updatedRoute.LastErrorClassification = input.FailureClassification
|
||||
updatedRoute.LastErrorMessage = input.FailureMessage
|
||||
updatedRoute.LastErrorAt = input.DeadLetteredAt
|
||||
updatedRoute.UpdatedAt = input.DeadLetteredAt
|
||||
updatedRoute.DeadLetteredAt = input.DeadLetteredAt
|
||||
if updatedRoute.AttemptCount < updatedRoute.MaxAttempts {
|
||||
return fmt.Errorf(
|
||||
"complete route dead letter: final attempt count %d is below max attempts %d",
|
||||
updatedRoute.AttemptCount,
|
||||
updatedRoute.MaxAttempts,
|
||||
)
|
||||
}
|
||||
|
||||
routePayload, err := MarshalRoute(updatedRoute)
|
||||
if err != nil {
|
||||
return fmt.Errorf("complete route dead letter: %w", err)
|
||||
}
|
||||
deadLetterPayload, err := MarshalDeadLetter(DeadLetterEntry{
|
||||
NotificationID: updatedRoute.NotificationID,
|
||||
RouteID: updatedRoute.RouteID,
|
||||
Channel: updatedRoute.Channel,
|
||||
RecipientRef: updatedRoute.RecipientRef,
|
||||
FinalAttemptCount: updatedRoute.AttemptCount,
|
||||
MaxAttempts: updatedRoute.MaxAttempts,
|
||||
FailureClassification: input.FailureClassification,
|
||||
FailureMessage: input.FailureMessage,
|
||||
CreatedAt: input.DeadLetteredAt,
|
||||
RecoveryHint: input.RecoveryHint,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("complete route dead letter: %w", err)
|
||||
}
|
||||
|
||||
return store.completeRouteMutation(ctx, input.ExpectedRoute, input.LeaseToken, func(pipe redis.Pipeliner) error {
|
||||
pipe.SetArgs(ctx, store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID), routePayload, redis.SetArgs{KeepTTL: true})
|
||||
pipe.Set(ctx, store.keys.DeadLetter(updatedRoute.NotificationID, updatedRoute.RouteID), deadLetterPayload, store.cfg.DeadLetterTTL)
|
||||
pipe.ZRem(ctx, store.keys.RouteSchedule(), store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID))
|
||||
pipe.Del(ctx, store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID))
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func (store *AcceptanceStore) completeRouteMutation(
|
||||
ctx context.Context,
|
||||
expectedRoute acceptintent.NotificationRoute,
|
||||
leaseToken string,
|
||||
mutate func(redis.Pipeliner) error,
|
||||
) error {
|
||||
routeKey := store.keys.Route(expectedRoute.NotificationID, expectedRoute.RouteID)
|
||||
leaseKey := store.keys.RouteLease(expectedRoute.NotificationID, expectedRoute.RouteID)
|
||||
|
||||
watchErr := store.client.Watch(ctx, func(tx *redis.Tx) error {
|
||||
currentRoute, err := loadWatchedRoute(ctx, tx, routeKey)
|
||||
switch {
|
||||
case errors.Is(err, redis.Nil):
|
||||
return ErrConflict
|
||||
case err != nil:
|
||||
return err
|
||||
}
|
||||
if err := ensureRoutesEqual(expectedRoute, currentRoute); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
leaseValue, err := tx.Get(ctx, leaseKey).Result()
|
||||
switch {
|
||||
case errors.Is(err, redis.Nil):
|
||||
return ErrConflict
|
||||
case err != nil:
|
||||
return err
|
||||
case leaseValue != leaseToken:
|
||||
return ErrConflict
|
||||
}
|
||||
|
||||
_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
|
||||
return mutate(pipe)
|
||||
})
|
||||
|
||||
return err
|
||||
}, routeKey, leaseKey)
|
||||
|
||||
switch {
|
||||
case errors.Is(watchErr, ErrConflict), errors.Is(watchErr, redis.TxFailedErr):
|
||||
return ErrConflict
|
||||
case watchErr != nil:
|
||||
return watchErr
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func loadWatchedRoute(ctx context.Context, tx *redis.Tx, routeKey string) (acceptintent.NotificationRoute, error) {
|
||||
payload, err := tx.Get(ctx, routeKey).Bytes()
|
||||
if err != nil {
|
||||
return acceptintent.NotificationRoute{}, err
|
||||
}
|
||||
|
||||
return UnmarshalRoute(payload)
|
||||
}
|
||||
|
||||
func ensureRoutesEqual(expected acceptintent.NotificationRoute, actual acceptintent.NotificationRoute) error {
|
||||
expectedPayload, err := MarshalRoute(expected)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal expected route: %w", err)
|
||||
}
|
||||
actualPayload, err := MarshalRoute(actual)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal current route: %w", err)
|
||||
}
|
||||
if !bytes.Equal(expectedPayload, actualPayload) {
|
||||
return ErrConflict
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateCompletionRoute(route acceptintent.NotificationRoute) error {
|
||||
if err := route.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
switch route.Status {
|
||||
case acceptintent.RouteStatusPending, acceptintent.RouteStatusFailed:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("route status %q is not completable", route.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func validateStreamValues(values map[string]any) error {
|
||||
if len(values) == 0 {
|
||||
return fmt.Errorf("stream values must not be empty")
|
||||
}
|
||||
|
||||
for key, raw := range values {
|
||||
if key == "" {
|
||||
return fmt.Errorf("stream values key must not be empty")
|
||||
}
|
||||
switch typed := raw.(type) {
|
||||
case string:
|
||||
if typed == "" {
|
||||
return fmt.Errorf("stream values %q must not be empty", key)
|
||||
}
|
||||
case []byte:
|
||||
if len(typed) == 0 {
|
||||
return fmt.Errorf("stream values %q must not be empty", key)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("stream values %q must be string or []byte", key)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func flattenStreamValues(values map[string]any) ([]any, error) {
|
||||
keys := make([]string, 0, len(values))
|
||||
for key := range values {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
args := make([]any, 0, len(values)*2)
|
||||
for _, key := range keys {
|
||||
args = append(args, key, values[key])
|
||||
}
|
||||
|
||||
return args, nil
|
||||
}
|
||||
|
||||
func validateRouteStateTimestamp(name string, value time.Time) error {
|
||||
if value.IsZero() {
|
||||
return fmt.Errorf("%s must not be zero", name)
|
||||
}
|
||||
if !value.Equal(value.UTC()) {
|
||||
return fmt.Errorf("%s must be UTC", name)
|
||||
}
|
||||
if !value.Equal(value.Truncate(time.Millisecond)) {
|
||||
return fmt.Errorf("%s must use millisecond precision", name)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate reports whether route contains a complete due-route reference.
|
||||
func (route ScheduledRoute) Validate() error {
|
||||
if route.RouteKey == "" {
|
||||
return fmt.Errorf("scheduled route key must not be empty")
|
||||
}
|
||||
if route.NotificationID == "" {
|
||||
return fmt.Errorf("scheduled route notification id must not be empty")
|
||||
}
|
||||
if route.RouteID == "" {
|
||||
return fmt.Errorf("scheduled route route id must not be empty")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate reports whether input contains a complete published-route
|
||||
// transition.
|
||||
func (input CompleteRoutePublishedInput) Validate() error {
|
||||
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
|
||||
return err
|
||||
}
|
||||
if input.LeaseToken == "" {
|
||||
return fmt.Errorf("lease token must not be empty")
|
||||
}
|
||||
if err := validateRouteStateTimestamp("published at", input.PublishedAt); err != nil {
|
||||
return err
|
||||
}
|
||||
if input.Stream == "" {
|
||||
return fmt.Errorf("stream must not be empty")
|
||||
}
|
||||
if input.StreamMaxLen < 0 {
|
||||
return fmt.Errorf("stream max len must not be negative")
|
||||
}
|
||||
if err := validateStreamValues(input.StreamValues); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate reports whether input contains a complete retryable failure
|
||||
// transition.
|
||||
func (input CompleteRouteFailedInput) Validate() error {
|
||||
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
|
||||
return err
|
||||
}
|
||||
if input.LeaseToken == "" {
|
||||
return fmt.Errorf("lease token must not be empty")
|
||||
}
|
||||
if err := validateRouteStateTimestamp("failed at", input.FailedAt); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateRouteStateTimestamp("next attempt at", input.NextAttemptAt); err != nil {
|
||||
return err
|
||||
}
|
||||
if input.FailureClassification == "" {
|
||||
return fmt.Errorf("failure classification must not be empty")
|
||||
}
|
||||
if input.FailureMessage == "" {
|
||||
return fmt.Errorf("failure message must not be empty")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate reports whether input contains a complete dead-letter transition.
|
||||
func (input CompleteRouteDeadLetterInput) Validate() error {
|
||||
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
|
||||
return err
|
||||
}
|
||||
if input.LeaseToken == "" {
|
||||
return fmt.Errorf("lease token must not be empty")
|
||||
}
|
||||
if err := validateRouteStateTimestamp("dead lettered at", input.DeadLetteredAt); err != nil {
|
||||
return err
|
||||
}
|
||||
if input.FailureClassification == "" {
|
||||
return fmt.Errorf("failure classification must not be empty")
|
||||
}
|
||||
if input.FailureMessage == "" {
|
||||
return fmt.Errorf("failure message must not be empty")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,465 @@
|
||||
package redisstate
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestAcceptanceStoreListDueRoutesLoadsScheduledMembers(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewAcceptanceStore(client, AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
require.NoError(t, store.CreateAcceptance(context.Background(), validUserAcceptanceInput(now, 0)))
|
||||
|
||||
routes, err := store.ListDueRoutes(context.Background(), now, 10)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, routes, 2)
|
||||
require.ElementsMatch(t, []string{"push:user:user-1", "email:user:user-1"}, []string{routes[0].RouteID, routes[1].RouteID})
|
||||
|
||||
for _, route := range routes {
|
||||
require.NoError(t, route.Validate())
|
||||
}
|
||||
}
|
||||
|
||||
func TestAcceptanceStoreReadRouteScheduleSnapshot(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewAcceptanceStore(client, AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
require.NoError(t, store.CreateAcceptance(context.Background(), validUserAcceptanceInput(now, 0)))
|
||||
|
||||
snapshot, err := store.ReadRouteScheduleSnapshot(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, int64(2), snapshot.Depth)
|
||||
require.NotNil(t, snapshot.OldestScheduledFor)
|
||||
require.Equal(t, now, *snapshot.OldestScheduledFor)
|
||||
}
|
||||
|
||||
func TestAcceptanceStoreRouteLeaseAcquireReleaseAndExpire(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewAcceptanceStore(client, AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
acquired, err := store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-1", 2*time.Second)
|
||||
require.NoError(t, err)
|
||||
require.True(t, acquired)
|
||||
|
||||
acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-2", 2*time.Second)
|
||||
require.NoError(t, err)
|
||||
require.False(t, acquired)
|
||||
|
||||
require.NoError(t, store.ReleaseRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-1"))
|
||||
acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-3", 2*time.Second)
|
||||
require.NoError(t, err)
|
||||
require.True(t, acquired)
|
||||
|
||||
server.FastForward(3 * time.Second)
|
||||
acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-4", 2*time.Second)
|
||||
require.NoError(t, err)
|
||||
require.True(t, acquired)
|
||||
}
|
||||
|
||||
func TestAcceptanceStoreCompleteRoutePublishedAppendsTrimmedStreamEntryAndMarksRoutePublished(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewAcceptanceStore(client, AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
input := validUserAcceptanceInput(now, 0)
|
||||
require.NoError(t, store.CreateAcceptance(context.Background(), input))
|
||||
|
||||
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
|
||||
require.NoError(t, err)
|
||||
require.True(t, acquired)
|
||||
|
||||
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
|
||||
publishedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
|
||||
require.NoError(t, store.CompleteRoutePublished(context.Background(), CompleteRoutePublishedInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: "token-1",
|
||||
PublishedAt: publishedAt,
|
||||
Stream: "gateway:client-events",
|
||||
StreamMaxLen: 1024,
|
||||
StreamValues: map[string]any{
|
||||
"user_id": "user-1",
|
||||
"event_type": "game.turn.ready",
|
||||
"event_id": input.Notification.NotificationID + "/push:user:user-1",
|
||||
"payload_bytes": []byte("payload-1"),
|
||||
"request_id": "request-1",
|
||||
"trace_id": "trace-1",
|
||||
},
|
||||
}))
|
||||
|
||||
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, acceptintent.RouteStatusPublished, updatedRoute.Status)
|
||||
require.Equal(t, 1, updatedRoute.AttemptCount)
|
||||
require.Equal(t, publishedAt, updatedRoute.PublishedAt)
|
||||
|
||||
scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, []string{Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1")}, scheduled)
|
||||
|
||||
messages, err := client.XRange(context.Background(), "gateway:client-events", "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.Equal(t, "user-1", messages[0].Values["user_id"])
|
||||
require.Equal(t, "game.turn.ready", messages[0].Values["event_type"])
|
||||
|
||||
leaseKey := Keyspace{}.RouteLease(input.Notification.NotificationID, "push:user:user-1")
|
||||
_, err = client.Get(context.Background(), leaseKey).Result()
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
func TestAcceptanceStoreCompleteRoutePublishedAppendsUntrimmedMailCommand(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewAcceptanceStore(client, AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
input := validUserAcceptanceInput(now, 0)
|
||||
require.NoError(t, store.CreateAcceptance(context.Background(), input))
|
||||
|
||||
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "email:user:user-1", "token-1", 5*time.Second)
|
||||
require.NoError(t, err)
|
||||
require.True(t, acquired)
|
||||
|
||||
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "email:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
|
||||
publishedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
|
||||
require.NoError(t, store.CompleteRoutePublished(context.Background(), CompleteRoutePublishedInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: "token-1",
|
||||
PublishedAt: publishedAt,
|
||||
Stream: "mail:delivery_commands",
|
||||
StreamMaxLen: 0,
|
||||
StreamValues: map[string]any{
|
||||
"delivery_id": input.Notification.NotificationID + "/email:user:user-1",
|
||||
"source": "notification",
|
||||
"payload_mode": "template",
|
||||
"idempotency_key": "notification:" + input.Notification.NotificationID + "/email:user:user-1",
|
||||
"requested_at_ms": "1775121700000",
|
||||
"payload_json": `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}`,
|
||||
},
|
||||
}))
|
||||
|
||||
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "email:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, acceptintent.RouteStatusPublished, updatedRoute.Status)
|
||||
require.Equal(t, 1, updatedRoute.AttemptCount)
|
||||
require.Equal(t, publishedAt, updatedRoute.PublishedAt)
|
||||
|
||||
messages, err := client.XRange(context.Background(), "mail:delivery_commands", "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.Equal(t, "notification", messages[0].Values["source"])
|
||||
require.Equal(t, "template", messages[0].Values["payload_mode"])
|
||||
require.Equal(t, "1775121700000-0/email:user:user-1", messages[0].Values["delivery_id"])
|
||||
}
|
||||
|
||||
func TestAcceptanceStoreCompleteRouteFailedReschedulesRoute(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewAcceptanceStore(client, AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
input := validUserAcceptanceInput(now, 0)
|
||||
require.NoError(t, store.CreateAcceptance(context.Background(), input))
|
||||
|
||||
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
|
||||
require.NoError(t, err)
|
||||
require.True(t, acquired)
|
||||
|
||||
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
|
||||
failedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
|
||||
nextAttemptAt := failedAt.Add(2 * time.Second).UTC().Truncate(time.Millisecond)
|
||||
require.NoError(t, store.CompleteRouteFailed(context.Background(), CompleteRouteFailedInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: "token-1",
|
||||
FailedAt: failedAt,
|
||||
NextAttemptAt: nextAttemptAt,
|
||||
FailureClassification: "gateway_stream_publish_failed",
|
||||
FailureMessage: "temporary outage",
|
||||
}))
|
||||
|
||||
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, acceptintent.RouteStatusFailed, updatedRoute.Status)
|
||||
require.Equal(t, 1, updatedRoute.AttemptCount)
|
||||
require.Equal(t, nextAttemptAt, updatedRoute.NextAttemptAt)
|
||||
require.Equal(t, "gateway_stream_publish_failed", updatedRoute.LastErrorClassification)
|
||||
|
||||
scheduled, err := client.ZRangeWithScores(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, scheduled, 2)
|
||||
require.Contains(t, []string{
|
||||
scheduled[0].Member.(string),
|
||||
scheduled[1].Member.(string),
|
||||
}, Keyspace{}.Route(input.Notification.NotificationID, "push:user:user-1"))
|
||||
}
|
||||
|
||||
func TestAcceptanceStoreCompleteRouteDeadLetterStoresTerminalFailure(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewAcceptanceStore(client, AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
input := validUserAcceptanceInput(now, 2)
|
||||
require.NoError(t, store.CreateAcceptance(context.Background(), input))
|
||||
|
||||
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
|
||||
require.NoError(t, err)
|
||||
require.True(t, acquired)
|
||||
|
||||
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
|
||||
deadLetteredAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
|
||||
require.NoError(t, store.CompleteRouteDeadLetter(context.Background(), CompleteRouteDeadLetterInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: "token-1",
|
||||
DeadLetteredAt: deadLetteredAt,
|
||||
FailureClassification: "payload_encoding_failed",
|
||||
FailureMessage: "payload is invalid",
|
||||
}))
|
||||
|
||||
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, acceptintent.RouteStatusDeadLetter, updatedRoute.Status)
|
||||
require.Equal(t, 3, updatedRoute.AttemptCount)
|
||||
require.Equal(t, deadLetteredAt, updatedRoute.DeadLetteredAt)
|
||||
|
||||
payload, err := client.Get(context.Background(), Keyspace{}.DeadLetter(input.Notification.NotificationID, "push:user:user-1")).Bytes()
|
||||
require.NoError(t, err)
|
||||
entry, err := UnmarshalDeadLetter(payload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "payload_encoding_failed", entry.FailureClassification)
|
||||
require.Equal(t, 3, entry.FinalAttemptCount)
|
||||
|
||||
scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, []string{Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1")}, scheduled)
|
||||
}
|
||||
|
||||
func TestAcceptanceStoreDeadLetterIsIsolatedByChannelAndRecipient(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := newTestRedisClient(t, server)
|
||||
|
||||
store, err := NewAcceptanceStore(client, AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
input := validUserAcceptanceInput(now, 2)
|
||||
input.Notification.RecipientUserIDs = []string{"user-1", "user-2"}
|
||||
input.Routes = append(input.Routes,
|
||||
acceptintent.NotificationRoute{
|
||||
NotificationID: input.Notification.NotificationID,
|
||||
RouteID: "push:user:user-2",
|
||||
Channel: intentstream.ChannelPush,
|
||||
RecipientRef: "user:user-2",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
AttemptCount: 0,
|
||||
MaxAttempts: 3,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "second@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
acceptintent.NotificationRoute{
|
||||
NotificationID: input.Notification.NotificationID,
|
||||
RouteID: "email:user:user-2",
|
||||
Channel: intentstream.ChannelEmail,
|
||||
RecipientRef: "user:user-2",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
AttemptCount: 0,
|
||||
MaxAttempts: 7,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "second@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
)
|
||||
require.NoError(t, store.CreateAcceptance(context.Background(), input))
|
||||
|
||||
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
|
||||
require.NoError(t, err)
|
||||
require.True(t, acquired)
|
||||
|
||||
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
|
||||
deadLetteredAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
|
||||
require.NoError(t, store.CompleteRouteDeadLetter(context.Background(), CompleteRouteDeadLetterInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: "token-1",
|
||||
DeadLetteredAt: deadLetteredAt,
|
||||
FailureClassification: "gateway_stream_publish_failed",
|
||||
FailureMessage: "gateway unavailable",
|
||||
}))
|
||||
|
||||
deadLetterRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, acceptintent.RouteStatusDeadLetter, deadLetterRoute.Status)
|
||||
|
||||
for _, routeID := range []string{"email:user:user-1", "push:user:user-2", "email:user:user-2"} {
|
||||
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, routeID)
|
||||
require.NoError(t, err)
|
||||
require.True(t, found, "route %s should remain stored", routeID)
|
||||
require.Equal(t, acceptintent.RouteStatusPending, route.Status, "route %s should remain pending", routeID)
|
||||
}
|
||||
|
||||
scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, []string{
|
||||
Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1"),
|
||||
Keyspace{}.Route(input.Notification.NotificationID, "push:user:user-2"),
|
||||
Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-2"),
|
||||
}, scheduled)
|
||||
}
|
||||
|
||||
func validUserAcceptanceInput(now time.Time, pushAttemptCount int) acceptintent.CreateAcceptanceInput {
|
||||
return acceptintent.CreateAcceptanceInput{
|
||||
Notification: acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700000-0",
|
||||
NotificationType: intentstream.NotificationTypeGameTurnReady,
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: []string{"user-1"},
|
||||
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
IdempotencyKey: "game-123:turn-54",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
RequestID: "request-1",
|
||||
TraceID: "trace-1",
|
||||
OccurredAt: now,
|
||||
AcceptedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
Routes: []acceptintent.NotificationRoute{
|
||||
{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "push:user:user-1",
|
||||
Channel: intentstream.ChannelPush,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
AttemptCount: pushAttemptCount,
|
||||
MaxAttempts: 3,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "pilot@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "email:user:user-1",
|
||||
Channel: intentstream.ChannelEmail,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
AttemptCount: 0,
|
||||
MaxAttempts: 7,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "pilot@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
},
|
||||
Idempotency: acceptintent.IdempotencyRecord{
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
IdempotencyKey: "game-123:turn-54",
|
||||
NotificationID: "1775121700000-0",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
CreatedAt: now,
|
||||
ExpiresAt: now.Add(7 * 24 * time.Hour),
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
package redisstate
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/telemetry"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// StreamOffsetStore provides the Redis-backed storage used for persisted
|
||||
// plain-XREAD consumer progress.
|
||||
type StreamOffsetStore struct {
|
||||
client *redis.Client
|
||||
keys Keyspace
|
||||
}
|
||||
|
||||
// NewStreamOffsetStore constructs one Redis-backed stream-offset store.
|
||||
func NewStreamOffsetStore(client *redis.Client) (*StreamOffsetStore, error) {
|
||||
if client == nil {
|
||||
return nil, errors.New("new notification stream offset store: nil redis client")
|
||||
}
|
||||
|
||||
return &StreamOffsetStore{
|
||||
client: client,
|
||||
keys: Keyspace{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Load returns the last processed entry id for stream when one is stored.
|
||||
func (store *StreamOffsetStore) Load(ctx context.Context, stream string) (string, bool, error) {
|
||||
if store == nil || store.client == nil {
|
||||
return "", false, errors.New("load notification stream offset: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return "", false, errors.New("load notification stream offset: nil context")
|
||||
}
|
||||
|
||||
payload, err := store.client.Get(ctx, store.keys.StreamOffset(stream)).Bytes()
|
||||
switch {
|
||||
case errors.Is(err, redis.Nil):
|
||||
return "", false, nil
|
||||
case err != nil:
|
||||
return "", false, fmt.Errorf("load notification stream offset: %w", err)
|
||||
}
|
||||
|
||||
offset, err := UnmarshalStreamOffset(payload)
|
||||
if err != nil {
|
||||
return "", false, fmt.Errorf("load notification stream offset: %w", err)
|
||||
}
|
||||
|
||||
return offset.LastProcessedEntryID, true, nil
|
||||
}
|
||||
|
||||
// Save stores the last processed entry id for stream.
|
||||
func (store *StreamOffsetStore) Save(ctx context.Context, stream string, entryID string) error {
|
||||
if store == nil || store.client == nil {
|
||||
return errors.New("save notification stream offset: nil store")
|
||||
}
|
||||
if ctx == nil {
|
||||
return errors.New("save notification stream offset: nil context")
|
||||
}
|
||||
|
||||
offset := StreamOffset{
|
||||
Stream: stream,
|
||||
LastProcessedEntryID: entryID,
|
||||
UpdatedAt: time.Now().UTC().Truncate(time.Millisecond),
|
||||
}
|
||||
payload, err := MarshalStreamOffset(offset)
|
||||
if err != nil {
|
||||
return fmt.Errorf("save notification stream offset: %w", err)
|
||||
}
|
||||
if err := store.client.Set(ctx, store.keys.StreamOffset(stream), payload, 0).Err(); err != nil {
|
||||
return fmt.Errorf("save notification stream offset: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IntentStreamLagReader provides Redis-backed lag snapshots for one intent
|
||||
// stream.
|
||||
type IntentStreamLagReader struct {
|
||||
store *StreamOffsetStore
|
||||
stream string
|
||||
}
|
||||
|
||||
// NewIntentStreamLagReader constructs a lag reader for stream using store.
|
||||
func NewIntentStreamLagReader(store *StreamOffsetStore, stream string) (*IntentStreamLagReader, error) {
|
||||
if store == nil || store.client == nil {
|
||||
return nil, errors.New("new notification intent stream lag reader: nil store")
|
||||
}
|
||||
if strings.TrimSpace(stream) == "" {
|
||||
return nil, errors.New("new notification intent stream lag reader: stream must not be empty")
|
||||
}
|
||||
|
||||
return &IntentStreamLagReader{
|
||||
store: store,
|
||||
stream: stream,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ReadIntentStreamLagSnapshot returns the oldest stream entry that is newer
|
||||
// than the persisted plain-XREAD consumer offset for the configured stream.
|
||||
func (reader *IntentStreamLagReader) ReadIntentStreamLagSnapshot(ctx context.Context) (telemetry.IntentStreamLagSnapshot, error) {
|
||||
if reader == nil || reader.store == nil {
|
||||
return telemetry.IntentStreamLagSnapshot{}, errors.New("read notification intent stream lag snapshot: nil reader")
|
||||
}
|
||||
if ctx == nil {
|
||||
return telemetry.IntentStreamLagSnapshot{}, errors.New("read notification intent stream lag snapshot: nil context")
|
||||
}
|
||||
|
||||
lastID, found, err := reader.store.Load(ctx, reader.stream)
|
||||
if err != nil {
|
||||
return telemetry.IntentStreamLagSnapshot{}, fmt.Errorf("read notification intent stream lag snapshot: %w", err)
|
||||
}
|
||||
|
||||
minID := "-"
|
||||
if found {
|
||||
minID = "(" + lastID
|
||||
}
|
||||
|
||||
messages, err := reader.store.client.XRangeN(ctx, reader.stream, minID, "+", 1).Result()
|
||||
if err != nil {
|
||||
return telemetry.IntentStreamLagSnapshot{}, fmt.Errorf("read notification intent stream lag snapshot: oldest entry: %w", err)
|
||||
}
|
||||
if len(messages) == 0 {
|
||||
return telemetry.IntentStreamLagSnapshot{}, nil
|
||||
}
|
||||
|
||||
oldestAt, err := streamEntryTime(messages[0].ID)
|
||||
if err != nil {
|
||||
return telemetry.IntentStreamLagSnapshot{}, fmt.Errorf("read notification intent stream lag snapshot: oldest entry id: %w", err)
|
||||
}
|
||||
|
||||
return telemetry.IntentStreamLagSnapshot{
|
||||
OldestUnprocessedAt: &oldestAt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func streamEntryTime(entryID string) (time.Time, error) {
|
||||
timestampText, _, ok := strings.Cut(entryID, "-")
|
||||
if !ok || strings.TrimSpace(timestampText) == "" {
|
||||
return time.Time{}, fmt.Errorf("entry id %q is not a Redis Stream id", entryID)
|
||||
}
|
||||
|
||||
timestampMS, err := strconv.ParseInt(timestampText, 10, 64)
|
||||
if err != nil {
|
||||
return time.Time{}, err
|
||||
}
|
||||
if timestampMS < 0 {
|
||||
return time.Time{}, fmt.Errorf("entry id %q has negative timestamp", entryID)
|
||||
}
|
||||
|
||||
return time.UnixMilli(timestampMS).UTC(), nil
|
||||
}
|
||||
@@ -0,0 +1,243 @@
|
||||
// Package userservice provides the trusted internal User Service HTTP client
|
||||
// used by Notification Service recipient enrichment.
|
||||
package userservice
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
|
||||
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
|
||||
)
|
||||
|
||||
const (
|
||||
getUserByIDPathSuffix = "/api/v1/internal/users/%s"
|
||||
subjectNotFoundErrorCode = "subject_not_found"
|
||||
)
|
||||
|
||||
// Config configures one HTTP-backed User Service enrichment client.
|
||||
type Config struct {
|
||||
// BaseURL stores the absolute base URL of the trusted internal User Service
|
||||
// HTTP API.
|
||||
BaseURL string
|
||||
|
||||
// RequestTimeout bounds one outbound lookup request.
|
||||
RequestTimeout time.Duration
|
||||
}
|
||||
|
||||
// Client resolves Notification Service recipients through the trusted
|
||||
// internal User Service HTTP API.
|
||||
type Client struct {
|
||||
baseURL string
|
||||
requestTimeout time.Duration
|
||||
httpClient *http.Client
|
||||
closeIdleConnections func()
|
||||
}
|
||||
|
||||
type getUserByIDResponse struct {
|
||||
User userView `json:"user"`
|
||||
}
|
||||
|
||||
type userView struct {
|
||||
Email string `json:"email"`
|
||||
PreferredLanguage string `json:"preferred_language"`
|
||||
}
|
||||
|
||||
type errorEnvelope struct {
|
||||
Error *errorBody `json:"error"`
|
||||
}
|
||||
|
||||
type errorBody struct {
|
||||
Code string `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
// NewClient constructs a User Service client that uses repository-standard
|
||||
// HTTP transport instrumentation through otelhttp.
|
||||
func NewClient(cfg Config) (*Client, error) {
|
||||
transport, ok := http.DefaultTransport.(*http.Transport)
|
||||
if !ok {
|
||||
return nil, errors.New("new notification user service client: default transport is not *http.Transport")
|
||||
}
|
||||
|
||||
baseTransport := transport.Clone()
|
||||
|
||||
return newClient(
|
||||
cfg,
|
||||
&http.Client{Transport: otelhttp.NewTransport(baseTransport)},
|
||||
baseTransport.CloseIdleConnections,
|
||||
)
|
||||
}
|
||||
|
||||
func newClient(cfg Config, httpClient *http.Client, closeIdleConnections func()) (*Client, error) {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.BaseURL) == "":
|
||||
return nil, errors.New("new notification user service client: base URL must not be empty")
|
||||
case cfg.RequestTimeout <= 0:
|
||||
return nil, errors.New("new notification user service client: request timeout must be positive")
|
||||
case httpClient == nil:
|
||||
return nil, errors.New("new notification user service client: http client must not be nil")
|
||||
}
|
||||
|
||||
parsedBaseURL, err := url.Parse(strings.TrimRight(strings.TrimSpace(cfg.BaseURL), "/"))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new notification user service client: parse base URL: %w", err)
|
||||
}
|
||||
if parsedBaseURL.Scheme == "" || parsedBaseURL.Host == "" {
|
||||
return nil, errors.New("new notification user service client: base URL must be absolute")
|
||||
}
|
||||
|
||||
return &Client{
|
||||
baseURL: parsedBaseURL.String(),
|
||||
requestTimeout: cfg.RequestTimeout,
|
||||
httpClient: httpClient,
|
||||
closeIdleConnections: closeIdleConnections,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Close releases idle HTTP connections owned by the client transport.
|
||||
func (client *Client) Close() error {
|
||||
if client == nil || client.closeIdleConnections == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
client.closeIdleConnections()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetUserByID resolves the current user email and preferred language for the
|
||||
// supplied stable userID.
|
||||
func (client *Client) GetUserByID(ctx context.Context, userID string) (acceptintent.UserRecord, error) {
|
||||
if client == nil || client.httpClient == nil {
|
||||
return acceptintent.UserRecord{}, errors.New("lookup user by id: nil client")
|
||||
}
|
||||
if ctx == nil {
|
||||
return acceptintent.UserRecord{}, errors.New("lookup user by id: nil context")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return acceptintent.UserRecord{}, err
|
||||
}
|
||||
if strings.TrimSpace(userID) == "" {
|
||||
return acceptintent.UserRecord{}, errors.New("lookup user by id: user id must not be empty")
|
||||
}
|
||||
|
||||
payload, statusCode, err := client.doRequest(ctx, http.MethodGet, fmt.Sprintf(getUserByIDPathSuffix, url.PathEscape(userID)))
|
||||
if err != nil {
|
||||
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: %w", userID, err)
|
||||
}
|
||||
|
||||
switch statusCode {
|
||||
case http.StatusOK:
|
||||
var response getUserByIDResponse
|
||||
if err := decodeJSONPayload(payload, &response); err != nil {
|
||||
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: decode success response: %w", userID, err)
|
||||
}
|
||||
|
||||
record := acceptintent.UserRecord{
|
||||
Email: response.User.Email,
|
||||
PreferredLanguage: response.User.PreferredLanguage,
|
||||
}
|
||||
if err := record.Validate(); err != nil {
|
||||
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: invalid success response: %w", userID, err)
|
||||
}
|
||||
|
||||
return record, nil
|
||||
case http.StatusNotFound:
|
||||
errorCode, err := decodeErrorCode(payload)
|
||||
if err != nil {
|
||||
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: decode error response: %w", userID, err)
|
||||
}
|
||||
if errorCode == subjectNotFoundErrorCode {
|
||||
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: %w", userID, acceptintent.ErrRecipientNotFound)
|
||||
}
|
||||
|
||||
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: unexpected error code %q for status %d", userID, errorCode, statusCode)
|
||||
default:
|
||||
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: unexpected HTTP status %d", userID, statusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func (client *Client) doRequest(ctx context.Context, method string, requestPath string) ([]byte, int, error) {
|
||||
attemptCtx, cancel := context.WithTimeout(ctx, client.requestTimeout)
|
||||
defer cancel()
|
||||
|
||||
request, err := http.NewRequestWithContext(attemptCtx, method, client.baseURL+requestPath, nil)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("build request: %w", err)
|
||||
}
|
||||
|
||||
response, err := client.httpClient.Do(request)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
defer response.Body.Close()
|
||||
|
||||
payload, err := io.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("read response body: %w", err)
|
||||
}
|
||||
|
||||
return payload, response.StatusCode, nil
|
||||
}
|
||||
|
||||
func decodeErrorCode(payload []byte) (string, error) {
|
||||
var envelope errorEnvelope
|
||||
if err := decodeStrictJSONPayload(payload, &envelope); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if envelope.Error == nil {
|
||||
return "", errors.New("missing error object")
|
||||
}
|
||||
if strings.TrimSpace(envelope.Error.Code) == "" {
|
||||
return "", errors.New("missing error code")
|
||||
}
|
||||
|
||||
return envelope.Error.Code, nil
|
||||
}
|
||||
|
||||
func decodeJSONPayload(payload []byte, target any) error {
|
||||
decoder := json.NewDecoder(bytes.NewReader(payload))
|
||||
|
||||
if err := decoder.Decode(target); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := decoder.Decode(&struct{}{}); err != io.EOF {
|
||||
if err == nil {
|
||||
return errors.New("unexpected trailing JSON input")
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func decodeStrictJSONPayload(payload []byte, target any) error {
|
||||
decoder := json.NewDecoder(bytes.NewReader(payload))
|
||||
decoder.DisallowUnknownFields()
|
||||
|
||||
if err := decoder.Decode(target); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := decoder.Decode(&struct{}{}); err != io.EOF {
|
||||
if err == nil {
|
||||
return errors.New("unexpected trailing JSON input")
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
var _ acceptintent.UserDirectory = (*Client)(nil)
|
||||
@@ -0,0 +1,219 @@
|
||||
package userservice
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewClient(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
cfg Config
|
||||
wantErr string
|
||||
}{
|
||||
{
|
||||
name: "valid config",
|
||||
cfg: Config{
|
||||
BaseURL: "http://127.0.0.1:8080",
|
||||
RequestTimeout: time.Second,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "empty base url",
|
||||
cfg: Config{
|
||||
RequestTimeout: time.Second,
|
||||
},
|
||||
wantErr: "base URL must not be empty",
|
||||
},
|
||||
{
|
||||
name: "relative base url",
|
||||
cfg: Config{
|
||||
BaseURL: "/relative",
|
||||
RequestTimeout: time.Second,
|
||||
},
|
||||
wantErr: "base URL must be absolute",
|
||||
},
|
||||
{
|
||||
name: "non positive timeout",
|
||||
cfg: Config{
|
||||
BaseURL: "http://127.0.0.1:8080",
|
||||
},
|
||||
wantErr: "request timeout must be positive",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
client, err := NewClient(tt.cfg)
|
||||
if tt.wantErr != "" {
|
||||
require.Error(t, err)
|
||||
assert.ErrorContains(t, err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
assert.NoError(t, client.Close())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientGetUserByID(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("success", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var captured capturedRequest
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
captured = captureRequest(t, r)
|
||||
writeJSON(t, w, http.StatusOK, map[string]any{
|
||||
"user": map[string]any{
|
||||
"user_id": "user-123",
|
||||
"email": "pilot@example.com",
|
||||
"preferred_language": "en-US",
|
||||
"time_zone": "Europe/Kaliningrad",
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := newTestClient(t, server.URL, 250*time.Millisecond)
|
||||
|
||||
record, err := client.GetUserByID(context.Background(), "user-123")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, acceptintent.UserRecord{
|
||||
Email: "pilot@example.com",
|
||||
PreferredLanguage: "en-US",
|
||||
}, record)
|
||||
require.Equal(t, capturedRequest{
|
||||
Method: http.MethodGet,
|
||||
Path: "/api/v1/internal/users/user-123",
|
||||
}, captured)
|
||||
})
|
||||
|
||||
t.Run("subject not found", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(t, w, http.StatusNotFound, map[string]any{
|
||||
"error": map[string]any{
|
||||
"code": "subject_not_found",
|
||||
"message": "subject not found",
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := newTestClient(t, server.URL, 250*time.Millisecond)
|
||||
|
||||
_, err := client.GetUserByID(context.Background(), "user-missing")
|
||||
require.Error(t, err)
|
||||
require.ErrorIs(t, err, acceptintent.ErrRecipientNotFound)
|
||||
})
|
||||
|
||||
t.Run("invalid email is treated as dependency failure", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(t, w, http.StatusOK, map[string]any{
|
||||
"user": map[string]any{
|
||||
"email": "bad@@example.com",
|
||||
"preferred_language": "en",
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := newTestClient(t, server.URL, 250*time.Millisecond)
|
||||
|
||||
_, err := client.GetUserByID(context.Background(), "user-123")
|
||||
require.Error(t, err)
|
||||
require.NotErrorIs(t, err, acceptintent.ErrRecipientNotFound)
|
||||
require.ErrorContains(t, err, "invalid success response")
|
||||
})
|
||||
|
||||
t.Run("timeout", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
<-r.Context().Done()
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := newTestClient(t, server.URL, 10*time.Millisecond)
|
||||
|
||||
_, err := client.GetUserByID(context.Background(), "user-123")
|
||||
require.Error(t, err)
|
||||
require.NotErrorIs(t, err, acceptintent.ErrRecipientNotFound)
|
||||
require.ErrorContains(t, err, "context deadline exceeded")
|
||||
})
|
||||
}
|
||||
|
||||
type capturedRequest struct {
|
||||
Method string
|
||||
Path string
|
||||
}
|
||||
|
||||
func newTestClient(t *testing.T, baseURL string, requestTimeout time.Duration) *Client {
|
||||
t.Helper()
|
||||
|
||||
client, err := newClient(
|
||||
Config{
|
||||
BaseURL: baseURL,
|
||||
RequestTimeout: requestTimeout,
|
||||
},
|
||||
&http.Client{Transport: http.DefaultTransport.(*http.Transport).Clone()},
|
||||
func() {},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
return client
|
||||
}
|
||||
|
||||
func captureRequest(t *testing.T, request *http.Request) capturedRequest {
|
||||
t.Helper()
|
||||
|
||||
_, err := io.ReadAll(request.Body)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, request.Body.Close())
|
||||
|
||||
return capturedRequest{
|
||||
Method: request.Method,
|
||||
Path: request.URL.Path,
|
||||
}
|
||||
}
|
||||
|
||||
func writeJSON(t *testing.T, writer http.ResponseWriter, statusCode int, payload any) {
|
||||
t.Helper()
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
require.NoError(t, err)
|
||||
|
||||
writer.Header().Set("Content-Type", "application/json")
|
||||
writer.WriteHeader(statusCode)
|
||||
_, err = writer.Write(body)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestClientCloseIsNilSafe(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var nilClient *Client
|
||||
require.NoError(t, nilClient.Close())
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
// Package api reserves the transport-layer namespace of Notification Service.
|
||||
package api
|
||||
@@ -0,0 +1,147 @@
|
||||
// Package intentstream defines the frozen Redis Stream contract used for
|
||||
// Notification Service intent intake.
|
||||
package intentstream
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"galaxy/notification/internal/service/malformedintent"
|
||||
"galaxy/notificationintent"
|
||||
)
|
||||
|
||||
const (
|
||||
fieldNotificationType = "notification_type"
|
||||
fieldProducer = "producer"
|
||||
fieldAudienceKind = "audience_kind"
|
||||
fieldRecipientUserIDs = "recipient_user_ids_json"
|
||||
fieldIdempotencyKey = "idempotency_key"
|
||||
fieldOccurredAtMS = "occurred_at_ms"
|
||||
fieldRequestID = "request_id"
|
||||
fieldTraceID = "trace_id"
|
||||
fieldPayloadJSON = "payload_json"
|
||||
defaultResolvedLocale = "en"
|
||||
)
|
||||
|
||||
// NotificationType identifies one supported normalized notification type.
|
||||
type NotificationType = notificationintent.NotificationType
|
||||
|
||||
const (
|
||||
// NotificationTypeGeoReviewRecommended identifies the
|
||||
// `geo.review_recommended` notification.
|
||||
NotificationTypeGeoReviewRecommended = notificationintent.NotificationTypeGeoReviewRecommended
|
||||
|
||||
// NotificationTypeGameTurnReady identifies the `game.turn.ready`
|
||||
// notification.
|
||||
NotificationTypeGameTurnReady = notificationintent.NotificationTypeGameTurnReady
|
||||
|
||||
// NotificationTypeGameFinished identifies the `game.finished`
|
||||
// notification.
|
||||
NotificationTypeGameFinished = notificationintent.NotificationTypeGameFinished
|
||||
|
||||
// NotificationTypeGameGenerationFailed identifies the
|
||||
// `game.generation_failed` notification.
|
||||
NotificationTypeGameGenerationFailed = notificationintent.NotificationTypeGameGenerationFailed
|
||||
|
||||
// NotificationTypeLobbyRuntimePausedAfterStart identifies the
|
||||
// `lobby.runtime_paused_after_start` notification.
|
||||
NotificationTypeLobbyRuntimePausedAfterStart = notificationintent.NotificationTypeLobbyRuntimePausedAfterStart
|
||||
|
||||
// NotificationTypeLobbyApplicationSubmitted identifies the
|
||||
// `lobby.application.submitted` notification.
|
||||
NotificationTypeLobbyApplicationSubmitted = notificationintent.NotificationTypeLobbyApplicationSubmitted
|
||||
|
||||
// NotificationTypeLobbyMembershipApproved identifies the
|
||||
// `lobby.membership.approved` notification.
|
||||
NotificationTypeLobbyMembershipApproved = notificationintent.NotificationTypeLobbyMembershipApproved
|
||||
|
||||
// NotificationTypeLobbyMembershipRejected identifies the
|
||||
// `lobby.membership.rejected` notification.
|
||||
NotificationTypeLobbyMembershipRejected = notificationintent.NotificationTypeLobbyMembershipRejected
|
||||
|
||||
// NotificationTypeLobbyInviteCreated identifies the
|
||||
// `lobby.invite.created` notification.
|
||||
NotificationTypeLobbyInviteCreated = notificationintent.NotificationTypeLobbyInviteCreated
|
||||
|
||||
// NotificationTypeLobbyInviteRedeemed identifies the
|
||||
// `lobby.invite.redeemed` notification.
|
||||
NotificationTypeLobbyInviteRedeemed = notificationintent.NotificationTypeLobbyInviteRedeemed
|
||||
|
||||
// NotificationTypeLobbyInviteExpired identifies the
|
||||
// `lobby.invite.expired` notification.
|
||||
NotificationTypeLobbyInviteExpired = notificationintent.NotificationTypeLobbyInviteExpired
|
||||
)
|
||||
|
||||
// Producer identifies one supported upstream producer.
|
||||
type Producer = notificationintent.Producer
|
||||
|
||||
const (
|
||||
// ProducerGeoProfile identifies Geo Profile Service.
|
||||
ProducerGeoProfile = notificationintent.ProducerGeoProfile
|
||||
|
||||
// ProducerGameMaster identifies Game Master.
|
||||
ProducerGameMaster = notificationintent.ProducerGameMaster
|
||||
|
||||
// ProducerGameLobby identifies Game Lobby.
|
||||
ProducerGameLobby = notificationintent.ProducerGameLobby
|
||||
)
|
||||
|
||||
// AudienceKind identifies one supported target-audience kind.
|
||||
type AudienceKind = notificationintent.AudienceKind
|
||||
|
||||
const (
|
||||
// AudienceKindUser identifies user-targeted notifications.
|
||||
AudienceKindUser = notificationintent.AudienceKindUser
|
||||
|
||||
// AudienceKindAdminEmail identifies administrator-email notifications.
|
||||
AudienceKindAdminEmail = notificationintent.AudienceKindAdminEmail
|
||||
)
|
||||
|
||||
// Channel identifies one durable notification-delivery channel slot.
|
||||
type Channel = notificationintent.Channel
|
||||
|
||||
const (
|
||||
// ChannelPush identifies the push-delivery channel.
|
||||
ChannelPush = notificationintent.ChannelPush
|
||||
|
||||
// ChannelEmail identifies the email-delivery channel.
|
||||
ChannelEmail = notificationintent.ChannelEmail
|
||||
)
|
||||
|
||||
// Intent stores one normalized notification intent accepted from the Redis
|
||||
// Stream ingress contract.
|
||||
type Intent = notificationintent.Intent
|
||||
|
||||
// DecodeIntent validates one raw Redis Stream entry and returns the normalized
|
||||
// notification intent frozen by the shared producer contract.
|
||||
func DecodeIntent(fields map[string]any) (Intent, error) {
|
||||
return notificationintent.DecodeIntent(fields)
|
||||
}
|
||||
|
||||
// ClassifyDecodeError maps one intake decoding or validation error to the
|
||||
// stable malformed-intent failure surface.
|
||||
func ClassifyDecodeError(err error) malformedintent.FailureCode {
|
||||
if err == nil {
|
||||
return malformedintent.FailureCodeInvalidIntent
|
||||
}
|
||||
|
||||
message := err.Error()
|
||||
switch {
|
||||
case strings.Contains(message, "payload_json"),
|
||||
strings.Contains(message, "turn_number"),
|
||||
strings.Contains(message, "final_turn_number"),
|
||||
strings.Contains(message, "failure_reason"),
|
||||
strings.Contains(message, "applicant_name"),
|
||||
strings.Contains(message, "inviter_name"),
|
||||
strings.Contains(message, "invitee_name"),
|
||||
strings.Contains(message, "review_reason"):
|
||||
return malformedintent.FailureCodeInvalidPayload
|
||||
default:
|
||||
return malformedintent.FailureCodeInvalidIntent
|
||||
}
|
||||
}
|
||||
|
||||
// DefaultResolvedLocale returns the frozen fallback locale assigned when the
|
||||
// current rollout has no supported exact user locale other than English.
|
||||
func DefaultResolvedLocale() string {
|
||||
return defaultResolvedLocale
|
||||
}
|
||||
@@ -0,0 +1,145 @@
|
||||
package intentstream
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDecodeIntentNormalizesUserRecipientsAndPayload(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fields := map[string]any{
|
||||
fieldNotificationType: NotificationTypeGameTurnReady.String(),
|
||||
fieldProducer: ProducerGameMaster.String(),
|
||||
fieldAudienceKind: AudienceKindUser.String(),
|
||||
fieldRecipientUserIDs: `["user-2","user-1"]`,
|
||||
fieldIdempotencyKey: "game-123:turn-54",
|
||||
fieldOccurredAtMS: "1775121700000",
|
||||
fieldPayloadJSON: `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`,
|
||||
fieldRequestID: "request-123",
|
||||
fieldTraceID: "trace-123",
|
||||
}
|
||||
|
||||
intent, err := DecodeIntent(fields)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, []string{"user-1", "user-2"}, intent.RecipientUserIDs)
|
||||
require.Equal(t, `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, intent.PayloadJSON)
|
||||
require.Equal(t, time.UnixMilli(1775121700000).UTC(), intent.OccurredAt)
|
||||
}
|
||||
|
||||
func TestDecodeIntentCanonicalizesEquivalentPayloadJSON(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fieldsA := map[string]any{
|
||||
fieldNotificationType: NotificationTypeGameFinished.String(),
|
||||
fieldProducer: ProducerGameMaster.String(),
|
||||
fieldAudienceKind: AudienceKindUser.String(),
|
||||
fieldRecipientUserIDs: `["user-1"]`,
|
||||
fieldIdempotencyKey: "game-123:finished",
|
||||
fieldOccurredAtMS: "1775121700001",
|
||||
fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","final_turn_number":54}`,
|
||||
}
|
||||
fieldsB := map[string]any{
|
||||
fieldNotificationType: NotificationTypeGameFinished.String(),
|
||||
fieldProducer: ProducerGameMaster.String(),
|
||||
fieldAudienceKind: AudienceKindUser.String(),
|
||||
fieldRecipientUserIDs: `["user-1"]`,
|
||||
fieldIdempotencyKey: "game-123:finished",
|
||||
fieldOccurredAtMS: "1775121709999",
|
||||
fieldPayloadJSON: `{"final_turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`,
|
||||
}
|
||||
|
||||
intentA, err := DecodeIntent(fieldsA)
|
||||
require.NoError(t, err)
|
||||
intentB, err := DecodeIntent(fieldsB)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, intentA.PayloadJSON, intentB.PayloadJSON)
|
||||
}
|
||||
|
||||
func TestDecodeIntentRejectsUnsupportedTopLevelField(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fields := map[string]any{
|
||||
fieldNotificationType: NotificationTypeGameTurnReady.String(),
|
||||
fieldProducer: ProducerGameMaster.String(),
|
||||
fieldAudienceKind: AudienceKindUser.String(),
|
||||
fieldRecipientUserIDs: `["user-1"]`,
|
||||
fieldIdempotencyKey: "game-123:turn-54",
|
||||
fieldOccurredAtMS: "1775121700000",
|
||||
fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
"unexpected": "boom",
|
||||
}
|
||||
|
||||
_, err := DecodeIntent(fields)
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "unsupported fields")
|
||||
require.Equal(t, malformedFailureCodeInvalidIntent(), string(ClassifyDecodeError(err)))
|
||||
}
|
||||
|
||||
func TestDecodeIntentRejectsDuplicateRecipientUserIDs(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fields := map[string]any{
|
||||
fieldNotificationType: NotificationTypeGameTurnReady.String(),
|
||||
fieldProducer: ProducerGameMaster.String(),
|
||||
fieldAudienceKind: AudienceKindUser.String(),
|
||||
fieldRecipientUserIDs: `["user-1","user-1"]`,
|
||||
fieldIdempotencyKey: "game-123:turn-54",
|
||||
fieldOccurredAtMS: "1775121700000",
|
||||
fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
}
|
||||
|
||||
_, err := DecodeIntent(fields)
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "duplicates user id")
|
||||
require.Equal(t, malformedFailureCodeInvalidIntent(), string(ClassifyDecodeError(err)))
|
||||
}
|
||||
|
||||
func TestDecodeIntentRejectsInvalidPayloadJSON(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fields := map[string]any{
|
||||
fieldNotificationType: NotificationTypeLobbyInviteCreated.String(),
|
||||
fieldProducer: ProducerGameLobby.String(),
|
||||
fieldAudienceKind: AudienceKindUser.String(),
|
||||
fieldRecipientUserIDs: `["user-1"]`,
|
||||
fieldIdempotencyKey: "invite-created:user-1",
|
||||
fieldOccurredAtMS: "1775121700000",
|
||||
fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","inviter_user_id":"user-2"}`,
|
||||
}
|
||||
|
||||
_, err := DecodeIntent(fields)
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "payload_json.inviter_name is required")
|
||||
require.Equal(t, malformedFailureCodeInvalidPayload(), string(ClassifyDecodeError(err)))
|
||||
}
|
||||
|
||||
func TestDecodeIntentRejectsAdminRecipientsField(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fields := map[string]any{
|
||||
fieldNotificationType: NotificationTypeGeoReviewRecommended.String(),
|
||||
fieldProducer: ProducerGeoProfile.String(),
|
||||
fieldAudienceKind: AudienceKindAdminEmail.String(),
|
||||
fieldRecipientUserIDs: `["user-1"]`,
|
||||
fieldIdempotencyKey: "geo:user-1",
|
||||
fieldOccurredAtMS: "1775121700000",
|
||||
fieldPayloadJSON: `{"user_id":"user-1","user_email":"pilot@example.com","observed_country":"DE","usual_connection_country":"PL","review_reason":"country_mismatch"}`,
|
||||
}
|
||||
|
||||
_, err := DecodeIntent(fields)
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "must not be present")
|
||||
require.Equal(t, malformedFailureCodeInvalidIntent(), string(ClassifyDecodeError(err)))
|
||||
}
|
||||
|
||||
func malformedFailureCodeInvalidIntent() string {
|
||||
return "invalid_intent"
|
||||
}
|
||||
|
||||
func malformedFailureCodeInvalidPayload() string {
|
||||
return "invalid_payload"
|
||||
}
|
||||
@@ -0,0 +1,252 @@
|
||||
// Package internalhttp provides the private probe HTTP listener used by the
|
||||
// runnable Notification Service process.
|
||||
package internalhttp
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/telemetry"
|
||||
|
||||
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
)
|
||||
|
||||
const jsonContentType = "application/json; charset=utf-8"
|
||||
|
||||
const (
|
||||
// HealthzPath is the private liveness probe route.
|
||||
HealthzPath = "/healthz"
|
||||
|
||||
// ReadyzPath is the private readiness probe route.
|
||||
ReadyzPath = "/readyz"
|
||||
)
|
||||
|
||||
// Config describes the private internal HTTP listener owned by Notification
|
||||
// Service.
|
||||
type Config struct {
|
||||
// Addr is the TCP listen address used by the private probe HTTP server.
|
||||
Addr string
|
||||
|
||||
// ReadHeaderTimeout bounds how long the listener may spend reading request
|
||||
// headers before the server rejects the connection.
|
||||
ReadHeaderTimeout time.Duration
|
||||
|
||||
// ReadTimeout bounds how long the listener may spend reading one request.
|
||||
ReadTimeout time.Duration
|
||||
|
||||
// IdleTimeout bounds how long the listener keeps an idle keep-alive
|
||||
// connection open.
|
||||
IdleTimeout time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg contains a usable private HTTP listener
|
||||
// configuration.
|
||||
func (cfg Config) Validate() error {
|
||||
switch {
|
||||
case cfg.Addr == "":
|
||||
return errors.New("internal HTTP addr must not be empty")
|
||||
case cfg.ReadHeaderTimeout <= 0:
|
||||
return errors.New("internal HTTP read header timeout must be positive")
|
||||
case cfg.ReadTimeout <= 0:
|
||||
return errors.New("internal HTTP read timeout must be positive")
|
||||
case cfg.IdleTimeout <= 0:
|
||||
return errors.New("internal HTTP idle timeout must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Dependencies describes the collaborators used by the private probe
|
||||
// transport layer.
|
||||
type Dependencies struct {
|
||||
// Logger writes structured listener lifecycle logs. When nil, slog.Default
|
||||
// is used.
|
||||
Logger *slog.Logger
|
||||
|
||||
// Telemetry records low-cardinality probe metrics and lifecycle events.
|
||||
Telemetry *telemetry.Runtime
|
||||
}
|
||||
|
||||
// Server owns the private probe HTTP listener exposed by Notification
|
||||
// Service.
|
||||
type Server struct {
|
||||
cfg Config
|
||||
|
||||
handler http.Handler
|
||||
logger *slog.Logger
|
||||
metrics *telemetry.Runtime
|
||||
|
||||
stateMu sync.RWMutex
|
||||
server *http.Server
|
||||
listener net.Listener
|
||||
}
|
||||
|
||||
// NewServer constructs one private probe HTTP server for cfg and deps.
|
||||
func NewServer(cfg Config, deps Dependencies) (*Server, error) {
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("new internal HTTP server: %w", err)
|
||||
}
|
||||
|
||||
logger := deps.Logger
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
|
||||
return &Server{
|
||||
cfg: cfg,
|
||||
handler: newHandler(logger, deps.Telemetry),
|
||||
logger: logger.With("component", "internal_http"),
|
||||
metrics: deps.Telemetry,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Run binds the configured listener and serves the private probe surface until
|
||||
// Shutdown closes the server.
|
||||
func (server *Server) Run(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("run internal HTTP server: nil context")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
listener, err := net.Listen("tcp", server.cfg.Addr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("run internal HTTP server: listen on %q: %w", server.cfg.Addr, err)
|
||||
}
|
||||
|
||||
httpServer := &http.Server{
|
||||
Handler: server.handler,
|
||||
ReadHeaderTimeout: server.cfg.ReadHeaderTimeout,
|
||||
ReadTimeout: server.cfg.ReadTimeout,
|
||||
IdleTimeout: server.cfg.IdleTimeout,
|
||||
}
|
||||
|
||||
server.stateMu.Lock()
|
||||
server.server = httpServer
|
||||
server.listener = listener
|
||||
server.stateMu.Unlock()
|
||||
|
||||
server.logger.Info("notification internal HTTP server started", "addr", listener.Addr().String())
|
||||
server.metrics.RecordInternalHTTPEvent(context.Background(), "started")
|
||||
|
||||
defer func() {
|
||||
server.stateMu.Lock()
|
||||
server.server = nil
|
||||
server.listener = nil
|
||||
server.stateMu.Unlock()
|
||||
}()
|
||||
|
||||
err = httpServer.Serve(listener)
|
||||
switch {
|
||||
case err == nil:
|
||||
return nil
|
||||
case errors.Is(err, http.ErrServerClosed):
|
||||
server.logger.Info("notification internal HTTP server stopped")
|
||||
server.metrics.RecordInternalHTTPEvent(context.Background(), "stopped")
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("run internal HTTP server: serve on %q: %w", server.cfg.Addr, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown gracefully stops the private probe HTTP server within ctx.
|
||||
func (server *Server) Shutdown(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("shutdown internal HTTP server: nil context")
|
||||
}
|
||||
|
||||
server.stateMu.RLock()
|
||||
httpServer := server.server
|
||||
server.stateMu.RUnlock()
|
||||
|
||||
if httpServer == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := httpServer.Shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) {
|
||||
return fmt.Errorf("shutdown internal HTTP server: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func newHandler(logger *slog.Logger, metrics *telemetry.Runtime) http.Handler {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("GET "+HealthzPath, handleHealthz)
|
||||
mux.HandleFunc("GET "+ReadyzPath, handleReadyz)
|
||||
|
||||
return otelhttp.NewHandler(withObservability(mux, metrics), "notification.internal_http")
|
||||
}
|
||||
|
||||
func withObservability(next http.Handler, metrics *telemetry.Runtime) http.Handler {
|
||||
return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
|
||||
startedAt := time.Now()
|
||||
recorder := &statusRecorder{
|
||||
ResponseWriter: writer,
|
||||
statusCode: http.StatusOK,
|
||||
}
|
||||
|
||||
next.ServeHTTP(recorder, request)
|
||||
|
||||
route := request.Pattern
|
||||
switch recorder.statusCode {
|
||||
case http.StatusMethodNotAllowed:
|
||||
route = "method_not_allowed"
|
||||
case http.StatusNotFound:
|
||||
route = "not_found"
|
||||
case 0:
|
||||
route = "unmatched"
|
||||
}
|
||||
if route == "" {
|
||||
route = "unmatched"
|
||||
}
|
||||
|
||||
metrics.RecordInternalHTTPRequest(
|
||||
request.Context(),
|
||||
[]attribute.KeyValue{
|
||||
attribute.String("route", route),
|
||||
attribute.String("method", request.Method),
|
||||
attribute.String("status_code", strconv.Itoa(recorder.statusCode)),
|
||||
},
|
||||
time.Since(startedAt),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
func handleHealthz(writer http.ResponseWriter, _ *http.Request) {
|
||||
writeStatusResponse(writer, http.StatusOK, "ok")
|
||||
}
|
||||
|
||||
func handleReadyz(writer http.ResponseWriter, _ *http.Request) {
|
||||
writeStatusResponse(writer, http.StatusOK, "ready")
|
||||
}
|
||||
|
||||
func writeStatusResponse(writer http.ResponseWriter, statusCode int, status string) {
|
||||
writer.Header().Set("Content-Type", jsonContentType)
|
||||
writer.WriteHeader(statusCode)
|
||||
_ = json.NewEncoder(writer).Encode(statusResponse{Status: status})
|
||||
}
|
||||
|
||||
type statusResponse struct {
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
type statusRecorder struct {
|
||||
http.ResponseWriter
|
||||
statusCode int
|
||||
}
|
||||
|
||||
func (recorder *statusRecorder) WriteHeader(statusCode int) {
|
||||
recorder.statusCode = statusCode
|
||||
recorder.ResponseWriter.WriteHeader(statusCode)
|
||||
}
|
||||
@@ -0,0 +1,272 @@
|
||||
package internalhttp
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewServerRejectsInvalidConfiguration(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := Config{
|
||||
ReadHeaderTimeout: time.Second,
|
||||
ReadTimeout: time.Second,
|
||||
IdleTimeout: time.Second,
|
||||
}
|
||||
|
||||
_, err := NewServer(cfg, Dependencies{})
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "addr")
|
||||
}
|
||||
|
||||
func TestServerRunAndShutdown(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := testConfig(t)
|
||||
server, err := NewServer(cfg, Dependencies{})
|
||||
require.NoError(t, err)
|
||||
|
||||
runErr := make(chan error, 1)
|
||||
go func() {
|
||||
runErr <- server.Run(context.Background())
|
||||
}()
|
||||
|
||||
client := newTestHTTPClient(t)
|
||||
waitForHealthzReady(t, client, cfg.Addr)
|
||||
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
require.NoError(t, server.Shutdown(shutdownCtx))
|
||||
waitForServerRunResult(t, runErr)
|
||||
}
|
||||
|
||||
func TestProbeRoutesReturnStableJSON(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := testConfig(t)
|
||||
server, err := NewServer(cfg, Dependencies{})
|
||||
require.NoError(t, err)
|
||||
|
||||
runErr := make(chan error, 1)
|
||||
go func() {
|
||||
runErr <- server.Run(context.Background())
|
||||
}()
|
||||
|
||||
client := newTestHTTPClient(t)
|
||||
waitForHealthzReady(t, client, cfg.Addr)
|
||||
|
||||
tests := []struct {
|
||||
path string
|
||||
status string
|
||||
}{
|
||||
{path: HealthzPath, status: "ok"},
|
||||
{path: ReadyzPath, status: "ready"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
|
||||
t.Run(tt.path, func(t *testing.T) {
|
||||
request, err := http.NewRequest(http.MethodGet, "http://"+cfg.Addr+tt.path, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
response, err := client.Do(request)
|
||||
require.NoError(t, err)
|
||||
defer response.Body.Close()
|
||||
|
||||
require.Equal(t, http.StatusOK, response.StatusCode)
|
||||
require.Equal(t, "application/json; charset=utf-8", response.Header.Get("Content-Type"))
|
||||
|
||||
var payload statusResponse
|
||||
require.NoError(t, json.NewDecoder(response.Body).Decode(&payload))
|
||||
require.Equal(t, tt.status, payload.Status)
|
||||
})
|
||||
}
|
||||
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
require.NoError(t, server.Shutdown(shutdownCtx))
|
||||
waitForServerRunResult(t, runErr)
|
||||
}
|
||||
|
||||
func TestServerDoesNotExposeMetricsOrUnknownRoutes(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := testConfig(t)
|
||||
server, err := NewServer(cfg, Dependencies{})
|
||||
require.NoError(t, err)
|
||||
|
||||
runErr := make(chan error, 1)
|
||||
go func() {
|
||||
runErr <- server.Run(context.Background())
|
||||
}()
|
||||
|
||||
client := newTestHTTPClient(t)
|
||||
waitForHealthzReady(t, client, cfg.Addr)
|
||||
|
||||
for _, path := range []string{"/metrics", "/unknown"} {
|
||||
request, err := http.NewRequest(http.MethodGet, "http://"+cfg.Addr+path, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
response, err := client.Do(request)
|
||||
require.NoError(t, err)
|
||||
_, _ = io.ReadAll(response.Body)
|
||||
response.Body.Close()
|
||||
|
||||
assert.Equalf(t, http.StatusNotFound, response.StatusCode, "path %s", path)
|
||||
}
|
||||
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
require.NoError(t, server.Shutdown(shutdownCtx))
|
||||
waitForServerRunResult(t, runErr)
|
||||
}
|
||||
|
||||
func TestServerPreservesStandardHEADBehavior(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := testConfig(t)
|
||||
server, err := NewServer(cfg, Dependencies{})
|
||||
require.NoError(t, err)
|
||||
|
||||
runErr := make(chan error, 1)
|
||||
go func() {
|
||||
runErr <- server.Run(context.Background())
|
||||
}()
|
||||
|
||||
client := newTestHTTPClient(t)
|
||||
waitForHealthzReady(t, client, cfg.Addr)
|
||||
|
||||
request, err := http.NewRequest(http.MethodHead, "http://"+cfg.Addr+HealthzPath, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
response, err := client.Do(request)
|
||||
require.NoError(t, err)
|
||||
defer response.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(response.Body)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, http.StatusOK, response.StatusCode)
|
||||
require.Empty(t, body)
|
||||
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
require.NoError(t, server.Shutdown(shutdownCtx))
|
||||
waitForServerRunResult(t, runErr)
|
||||
}
|
||||
|
||||
func TestServerUsesStandardMethodNotAllowedBehavior(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := testConfig(t)
|
||||
server, err := NewServer(cfg, Dependencies{})
|
||||
require.NoError(t, err)
|
||||
|
||||
runErr := make(chan error, 1)
|
||||
go func() {
|
||||
runErr <- server.Run(context.Background())
|
||||
}()
|
||||
|
||||
client := newTestHTTPClient(t)
|
||||
waitForHealthzReady(t, client, cfg.Addr)
|
||||
|
||||
request, err := http.NewRequest(http.MethodPost, "http://"+cfg.Addr+HealthzPath, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
response, err := client.Do(request)
|
||||
require.NoError(t, err)
|
||||
defer response.Body.Close()
|
||||
_, _ = io.ReadAll(response.Body)
|
||||
|
||||
require.Equal(t, http.StatusMethodNotAllowed, response.StatusCode)
|
||||
require.Contains(t, response.Header.Get("Allow"), http.MethodGet)
|
||||
require.Contains(t, response.Header.Get("Allow"), http.MethodHead)
|
||||
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
require.NoError(t, server.Shutdown(shutdownCtx))
|
||||
waitForServerRunResult(t, runErr)
|
||||
}
|
||||
|
||||
func testConfig(t *testing.T) Config {
|
||||
t.Helper()
|
||||
|
||||
return Config{
|
||||
Addr: mustFreeAddr(t),
|
||||
ReadHeaderTimeout: time.Second,
|
||||
ReadTimeout: 2 * time.Second,
|
||||
IdleTimeout: time.Minute,
|
||||
}
|
||||
}
|
||||
|
||||
func newTestHTTPClient(t *testing.T) *http.Client {
|
||||
t.Helper()
|
||||
|
||||
transport := &http.Transport{DisableKeepAlives: true}
|
||||
t.Cleanup(transport.CloseIdleConnections)
|
||||
|
||||
return &http.Client{
|
||||
Timeout: 250 * time.Millisecond,
|
||||
Transport: transport,
|
||||
}
|
||||
}
|
||||
|
||||
func waitForHealthzReady(t *testing.T, client *http.Client, addr string) {
|
||||
t.Helper()
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
request, err := http.NewRequest(http.MethodGet, "http://"+addr+HealthzPath, nil)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
response, err := client.Do(request)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer response.Body.Close()
|
||||
|
||||
payload, err := io.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return response.StatusCode == http.StatusOK && strings.Contains(string(payload), `"status":"ok"`)
|
||||
}, 5*time.Second, 25*time.Millisecond, "internal HTTP server did not become reachable")
|
||||
}
|
||||
|
||||
func waitForServerRunResult(t *testing.T, runErr <-chan error) {
|
||||
t.Helper()
|
||||
|
||||
var err error
|
||||
require.Eventually(t, func() bool {
|
||||
select {
|
||||
case err = <-runErr:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}, 5*time.Second, 10*time.Millisecond, "internal HTTP server did not stop")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func mustFreeAddr(t *testing.T) string {
|
||||
t.Helper()
|
||||
|
||||
listener, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
assert.NoError(t, listener.Close())
|
||||
}()
|
||||
|
||||
return listener.Addr().String()
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
// Package app wires the Notification Service process lifecycle and
|
||||
// coordinates component startup and graceful shutdown.
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"galaxy/notification/internal/config"
|
||||
)
|
||||
|
||||
// Component is a long-lived Notification Service subsystem that participates
|
||||
// in coordinated startup and graceful shutdown.
|
||||
type Component interface {
|
||||
// Run starts the component and blocks until it stops.
|
||||
Run(context.Context) error
|
||||
|
||||
// Shutdown stops the component within the provided timeout-bounded context.
|
||||
Shutdown(context.Context) error
|
||||
}
|
||||
|
||||
// App owns the process-level lifecycle of Notification Service and its
|
||||
// registered components.
|
||||
type App struct {
|
||||
cfg config.Config
|
||||
components []Component
|
||||
}
|
||||
|
||||
// New constructs App with a defensive copy of the supplied components.
|
||||
func New(cfg config.Config, components ...Component) *App {
|
||||
clonedComponents := append([]Component(nil), components...)
|
||||
|
||||
return &App{
|
||||
cfg: cfg,
|
||||
components: clonedComponents,
|
||||
}
|
||||
}
|
||||
|
||||
// Run starts all configured components, waits for cancellation or the first
|
||||
// component failure, and then executes best-effort graceful shutdown.
|
||||
func (app *App) Run(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("run notification app: nil context")
|
||||
}
|
||||
if err := app.validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(app.components) == 0 {
|
||||
<-ctx.Done()
|
||||
return nil
|
||||
}
|
||||
|
||||
runCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
results := make(chan componentResult, len(app.components))
|
||||
var runWaitGroup sync.WaitGroup
|
||||
|
||||
for index, component := range app.components {
|
||||
runWaitGroup.Add(1)
|
||||
|
||||
go func(componentIndex int, component Component) {
|
||||
defer runWaitGroup.Done()
|
||||
results <- componentResult{
|
||||
index: componentIndex,
|
||||
err: component.Run(runCtx),
|
||||
}
|
||||
}(index, component)
|
||||
}
|
||||
|
||||
var runErr error
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case result := <-results:
|
||||
runErr = classifyComponentResult(ctx, result)
|
||||
}
|
||||
|
||||
cancel()
|
||||
|
||||
shutdownErr := app.shutdownComponents()
|
||||
waitErr := app.waitForComponents(&runWaitGroup)
|
||||
|
||||
return errors.Join(runErr, shutdownErr, waitErr)
|
||||
}
|
||||
|
||||
type componentResult struct {
|
||||
index int
|
||||
err error
|
||||
}
|
||||
|
||||
func (app *App) validate() error {
|
||||
if app.cfg.ShutdownTimeout <= 0 {
|
||||
return fmt.Errorf("run notification app: shutdown timeout must be positive, got %s", app.cfg.ShutdownTimeout)
|
||||
}
|
||||
|
||||
for index, component := range app.components {
|
||||
if component == nil {
|
||||
return fmt.Errorf("run notification app: component %d is nil", index)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func classifyComponentResult(parentCtx context.Context, result componentResult) error {
|
||||
switch {
|
||||
case result.err == nil:
|
||||
if parentCtx.Err() != nil {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("run notification app: component %d exited without error before shutdown", result.index)
|
||||
case errors.Is(result.err, context.Canceled) && parentCtx.Err() != nil:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("run notification app: component %d: %w", result.index, result.err)
|
||||
}
|
||||
}
|
||||
|
||||
func (app *App) shutdownComponents() error {
|
||||
var shutdownWaitGroup sync.WaitGroup
|
||||
errs := make(chan error, len(app.components))
|
||||
|
||||
for index, component := range app.components {
|
||||
shutdownWaitGroup.Add(1)
|
||||
|
||||
go func(componentIndex int, component Component) {
|
||||
defer shutdownWaitGroup.Done()
|
||||
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), app.cfg.ShutdownTimeout)
|
||||
defer cancel()
|
||||
|
||||
if err := component.Shutdown(shutdownCtx); err != nil {
|
||||
errs <- fmt.Errorf("shutdown notification component %d: %w", componentIndex, err)
|
||||
}
|
||||
}(index, component)
|
||||
}
|
||||
|
||||
shutdownWaitGroup.Wait()
|
||||
close(errs)
|
||||
|
||||
var joined error
|
||||
for err := range errs {
|
||||
joined = errors.Join(joined, err)
|
||||
}
|
||||
|
||||
return joined
|
||||
}
|
||||
|
||||
func (app *App) waitForComponents(runWaitGroup *sync.WaitGroup) error {
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
runWaitGroup.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
waitCtx, cancel := context.WithTimeout(context.Background(), app.cfg.ShutdownTimeout)
|
||||
defer cancel()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
return nil
|
||||
case <-waitCtx.Done():
|
||||
return fmt.Errorf("wait for notification components: %w", waitCtx.Err())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,229 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
|
||||
redisadapter "galaxy/notification/internal/adapters/redis"
|
||||
"galaxy/notification/internal/adapters/redisstate"
|
||||
userserviceadapter "galaxy/notification/internal/adapters/userservice"
|
||||
"galaxy/notification/internal/api/internalhttp"
|
||||
"galaxy/notification/internal/config"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/notification/internal/telemetry"
|
||||
"galaxy/notification/internal/worker"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// Runtime owns the runnable Notification Service process plus the cleanup
|
||||
// functions that release runtime resources after shutdown.
|
||||
type Runtime struct {
|
||||
cfg config.Config
|
||||
|
||||
app *App
|
||||
|
||||
probeServer *internalhttp.Server
|
||||
telemetry *telemetry.Runtime
|
||||
intentConsumer *worker.IntentConsumer
|
||||
pushPublisher *worker.PushPublisher
|
||||
emailPublisher *worker.EmailPublisher
|
||||
|
||||
cleanupFns []func() error
|
||||
}
|
||||
|
||||
// NewRuntime constructs the runnable Notification Service process from cfg.
|
||||
func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*Runtime, error) {
|
||||
if ctx == nil {
|
||||
return nil, fmt.Errorf("new notification runtime: nil context")
|
||||
}
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("new notification runtime: %w", err)
|
||||
}
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
|
||||
runtime := &Runtime{
|
||||
cfg: cfg,
|
||||
}
|
||||
cleanupOnError := func(err error) (*Runtime, error) {
|
||||
if cleanupErr := runtime.Close(); cleanupErr != nil {
|
||||
return nil, fmt.Errorf("%w; cleanup: %w", err, cleanupErr)
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
telemetryRuntime, err := telemetry.NewProcess(ctx, telemetry.ProcessConfig{
|
||||
ServiceName: cfg.Telemetry.ServiceName,
|
||||
TracesExporter: cfg.Telemetry.TracesExporter,
|
||||
MetricsExporter: cfg.Telemetry.MetricsExporter,
|
||||
TracesProtocol: cfg.Telemetry.TracesProtocol,
|
||||
MetricsProtocol: cfg.Telemetry.MetricsProtocol,
|
||||
StdoutTracesEnabled: cfg.Telemetry.StdoutTracesEnabled,
|
||||
StdoutMetricsEnabled: cfg.Telemetry.StdoutMetricsEnabled,
|
||||
}, logger.With("component", "telemetry"))
|
||||
if err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: telemetry: %w", err))
|
||||
}
|
||||
runtime.telemetry = telemetryRuntime
|
||||
runtime.cleanupFns = append(runtime.cleanupFns, func() error {
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout)
|
||||
defer cancel()
|
||||
return telemetryRuntime.Shutdown(shutdownCtx)
|
||||
})
|
||||
|
||||
redisClient := redisadapter.NewClient(cfg.Redis)
|
||||
if err := redisadapter.InstrumentClient(redisClient, telemetryRuntime); err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: %w", err))
|
||||
}
|
||||
runtime.cleanupFns = append(runtime.cleanupFns, func() error {
|
||||
err := redisClient.Close()
|
||||
if errors.Is(err, redis.ErrClosed) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
})
|
||||
if err := redisadapter.Ping(ctx, cfg.Redis, redisClient); err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: %w", err))
|
||||
}
|
||||
|
||||
acceptanceStore, err := redisstate.NewAcceptanceStore(redisClient, redisstate.AcceptanceConfig{
|
||||
RecordTTL: cfg.Retry.RecordTTL,
|
||||
DeadLetterTTL: cfg.Retry.DeadLetterTTL,
|
||||
IdempotencyTTL: cfg.Retry.IdempotencyTTL,
|
||||
})
|
||||
if err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: acceptance store: %w", err))
|
||||
}
|
||||
malformedIntentStore, err := redisstate.NewMalformedIntentStore(redisClient, cfg.Retry.DeadLetterTTL)
|
||||
if err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: malformed intent store: %w", err))
|
||||
}
|
||||
streamOffsetStore, err := redisstate.NewStreamOffsetStore(redisClient)
|
||||
if err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: stream offset store: %w", err))
|
||||
}
|
||||
intentStreamLagReader, err := redisstate.NewIntentStreamLagReader(streamOffsetStore, cfg.Streams.Intents)
|
||||
if err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: intent stream lag reader: %w", err))
|
||||
}
|
||||
telemetryRuntime.SetRouteScheduleSnapshotReader(acceptanceStore)
|
||||
telemetryRuntime.SetIntentStreamLagSnapshotReader(intentStreamLagReader)
|
||||
userDirectory, err := userserviceadapter.NewClient(userserviceadapter.Config{
|
||||
BaseURL: cfg.UserService.BaseURL,
|
||||
RequestTimeout: cfg.UserService.Timeout,
|
||||
})
|
||||
if err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: user service client: %w", err))
|
||||
}
|
||||
runtime.cleanupFns = append(runtime.cleanupFns, userDirectory.Close)
|
||||
acceptIntentService, err := acceptintent.New(acceptintent.Config{
|
||||
Store: acceptanceStore,
|
||||
UserDirectory: userDirectory,
|
||||
Clock: nil,
|
||||
Logger: logger,
|
||||
Telemetry: telemetryRuntime,
|
||||
PushMaxAttempts: cfg.Retry.PushMaxAttempts,
|
||||
EmailMaxAttempts: cfg.Retry.EmailMaxAttempts,
|
||||
IdempotencyTTL: cfg.Retry.IdempotencyTTL,
|
||||
AdminRouting: cfg.AdminRouting,
|
||||
})
|
||||
if err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: accept intent service: %w", err))
|
||||
}
|
||||
intentConsumer, err := worker.NewIntentConsumer(worker.IntentConsumerConfig{
|
||||
Client: redisClient,
|
||||
Stream: cfg.Streams.Intents,
|
||||
BlockTimeout: cfg.IntentsReadBlockTimeout,
|
||||
Acceptor: acceptIntentService,
|
||||
MalformedRecorder: malformedIntentStore,
|
||||
OffsetStore: streamOffsetStore,
|
||||
Telemetry: telemetryRuntime,
|
||||
}, logger)
|
||||
if err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: intent consumer: %w", err))
|
||||
}
|
||||
runtime.intentConsumer = intentConsumer
|
||||
pushPublisher, err := worker.NewPushPublisher(worker.PushPublisherConfig{
|
||||
Store: acceptanceStore,
|
||||
GatewayStream: cfg.Streams.GatewayClientEvents,
|
||||
GatewayStreamMaxLen: cfg.Streams.GatewayClientEventsStreamMaxLen,
|
||||
RouteLeaseTTL: cfg.Retry.RouteLeaseTTL,
|
||||
RouteBackoffMin: cfg.Retry.RouteBackoffMin,
|
||||
RouteBackoffMax: cfg.Retry.RouteBackoffMax,
|
||||
Encoder: nil,
|
||||
Telemetry: telemetryRuntime,
|
||||
Clock: nil,
|
||||
}, logger)
|
||||
if err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: push publisher: %w", err))
|
||||
}
|
||||
runtime.pushPublisher = pushPublisher
|
||||
emailPublisher, err := worker.NewEmailPublisher(worker.EmailPublisherConfig{
|
||||
Store: acceptanceStore,
|
||||
MailDeliveryCommandsStream: cfg.Streams.MailDeliveryCommands,
|
||||
RouteLeaseTTL: cfg.Retry.RouteLeaseTTL,
|
||||
RouteBackoffMin: cfg.Retry.RouteBackoffMin,
|
||||
RouteBackoffMax: cfg.Retry.RouteBackoffMax,
|
||||
Encoder: nil,
|
||||
Telemetry: telemetryRuntime,
|
||||
Clock: nil,
|
||||
}, logger)
|
||||
if err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: email publisher: %w", err))
|
||||
}
|
||||
runtime.emailPublisher = emailPublisher
|
||||
|
||||
probeServer, err := internalhttp.NewServer(internalhttp.Config{
|
||||
Addr: cfg.InternalHTTP.Addr,
|
||||
ReadHeaderTimeout: cfg.InternalHTTP.ReadHeaderTimeout,
|
||||
ReadTimeout: cfg.InternalHTTP.ReadTimeout,
|
||||
IdleTimeout: cfg.InternalHTTP.IdleTimeout,
|
||||
}, internalhttp.Dependencies{
|
||||
Logger: logger,
|
||||
Telemetry: telemetryRuntime,
|
||||
})
|
||||
if err != nil {
|
||||
return cleanupOnError(fmt.Errorf("new notification runtime: internal HTTP server: %w", err))
|
||||
}
|
||||
runtime.probeServer = probeServer
|
||||
runtime.app = New(cfg, probeServer, intentConsumer, pushPublisher, emailPublisher)
|
||||
|
||||
return runtime, nil
|
||||
}
|
||||
|
||||
// Run serves the private probe HTTP listener until ctx is canceled or one
|
||||
// component fails.
|
||||
func (runtime *Runtime) Run(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("run notification runtime: nil context")
|
||||
}
|
||||
if runtime == nil {
|
||||
return errors.New("run notification runtime: nil runtime")
|
||||
}
|
||||
if runtime.app == nil {
|
||||
return errors.New("run notification runtime: nil app")
|
||||
}
|
||||
|
||||
return runtime.app.Run(ctx)
|
||||
}
|
||||
|
||||
// Close releases every runtime dependency in reverse construction order.
|
||||
func (runtime *Runtime) Close() error {
|
||||
if runtime == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var joined error
|
||||
for index := len(runtime.cleanupFns) - 1; index >= 0; index-- {
|
||||
if err := runtime.cleanupFns[index](); err != nil {
|
||||
joined = errors.Join(joined, err)
|
||||
}
|
||||
}
|
||||
|
||||
return joined
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/config"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
testcontainers "github.com/testcontainers/testcontainers-go"
|
||||
rediscontainer "github.com/testcontainers/testcontainers-go/modules/redis"
|
||||
)
|
||||
|
||||
const (
|
||||
realRuntimeSmokeEnv = "NOTIFICATION_REAL_RUNTIME_SMOKE"
|
||||
realRuntimeRedisImage = "redis:7"
|
||||
)
|
||||
|
||||
func TestRealRuntimeCompatibility(t *testing.T) {
|
||||
if os.Getenv(realRuntimeSmokeEnv) != "1" {
|
||||
t.Skipf("set %s=1 to run the real runtime smoke suite", realRuntimeSmokeEnv)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
redisContainer, err := rediscontainer.Run(ctx, realRuntimeRedisImage)
|
||||
require.NoError(t, err)
|
||||
testcontainers.CleanupContainer(t, redisContainer)
|
||||
|
||||
redisAddr, err := redisContainer.Endpoint(ctx, "")
|
||||
require.NoError(t, err)
|
||||
|
||||
cfg := config.DefaultConfig()
|
||||
cfg.Redis.Addr = redisAddr
|
||||
cfg.UserService.BaseURL = "http://user-service.internal"
|
||||
cfg.InternalHTTP.Addr = mustFreeAddr(t)
|
||||
cfg.ShutdownTimeout = 2 * time.Second
|
||||
cfg.Telemetry.TracesExporter = "none"
|
||||
cfg.Telemetry.MetricsExporter = "none"
|
||||
|
||||
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
require.NoError(t, runtime.Close())
|
||||
}()
|
||||
|
||||
runCtx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
runErrCh := make(chan error, 1)
|
||||
go func() {
|
||||
runErrCh <- runtime.Run(runCtx)
|
||||
}()
|
||||
|
||||
client := &http.Client{
|
||||
Timeout: 500 * time.Millisecond,
|
||||
Transport: &http.Transport{
|
||||
DisableKeepAlives: true,
|
||||
},
|
||||
}
|
||||
t.Cleanup(client.CloseIdleConnections)
|
||||
|
||||
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
|
||||
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/healthz", http.StatusOK)
|
||||
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/readyz", http.StatusOK)
|
||||
|
||||
cancel()
|
||||
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
|
||||
}
|
||||
@@ -0,0 +1,581 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
redisstate "galaxy/notification/internal/adapters/redisstate"
|
||||
"galaxy/notification/internal/config"
|
||||
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewRuntimeStartsProbeListenerAndStopsCleanly(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
redisServer := miniredis.RunT(t)
|
||||
userService := newUserLookupServer(t, func(http.ResponseWriter, *http.Request) {})
|
||||
defer userService.Close()
|
||||
|
||||
cfg := config.DefaultConfig()
|
||||
cfg.Redis.Addr = redisServer.Addr()
|
||||
cfg.UserService.BaseURL = userService.URL
|
||||
cfg.InternalHTTP.Addr = mustFreeAddr(t)
|
||||
cfg.ShutdownTimeout = 10 * time.Second
|
||||
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
|
||||
cfg.Telemetry.TracesExporter = "none"
|
||||
cfg.Telemetry.MetricsExporter = "none"
|
||||
|
||||
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
require.NoError(t, runtime.Close())
|
||||
}()
|
||||
|
||||
runCtx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
runErrCh := make(chan error, 1)
|
||||
go func() {
|
||||
runErrCh <- runtime.Run(runCtx)
|
||||
}()
|
||||
|
||||
client := newTestHTTPClient(t)
|
||||
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
|
||||
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/healthz", http.StatusOK)
|
||||
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/readyz", http.StatusOK)
|
||||
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/metrics", http.StatusNotFound)
|
||||
|
||||
cancel()
|
||||
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
|
||||
}
|
||||
|
||||
func TestNewRuntimeFailsFastWhenRedisPingCheckFails(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := config.DefaultConfig()
|
||||
cfg.Redis.Addr = mustFreeAddr(t)
|
||||
cfg.UserService.BaseURL = "http://127.0.0.1:18080"
|
||||
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
|
||||
cfg.Telemetry.TracesExporter = "none"
|
||||
cfg.Telemetry.MetricsExporter = "none"
|
||||
|
||||
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
|
||||
require.Nil(t, runtime)
|
||||
require.Error(t, err)
|
||||
assert.ErrorContains(t, err, "ping redis")
|
||||
}
|
||||
|
||||
func TestNewRuntimeAcceptsIntentThroughConsumer(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
redisServer := miniredis.RunT(t)
|
||||
redisClient := redis.NewClient(&redis.Options{
|
||||
Addr: redisServer.Addr(),
|
||||
Protocol: 2,
|
||||
DisableIdentity: true,
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
assert.NoError(t, redisClient.Close())
|
||||
})
|
||||
userService := newUserLookupServer(t, func(writer http.ResponseWriter, request *http.Request) {
|
||||
writeJSON(t, writer, http.StatusOK, map[string]any{
|
||||
"user": map[string]any{
|
||||
"email": "pilot@example.com",
|
||||
"preferred_language": "en-US",
|
||||
},
|
||||
})
|
||||
})
|
||||
defer userService.Close()
|
||||
|
||||
cfg := config.DefaultConfig()
|
||||
cfg.Redis.Addr = redisServer.Addr()
|
||||
cfg.UserService.BaseURL = userService.URL
|
||||
cfg.InternalHTTP.Addr = mustFreeAddr(t)
|
||||
cfg.ShutdownTimeout = 10 * time.Second
|
||||
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
|
||||
cfg.Telemetry.TracesExporter = "none"
|
||||
cfg.Telemetry.MetricsExporter = "none"
|
||||
|
||||
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
require.NoError(t, runtime.Close())
|
||||
}()
|
||||
|
||||
runCtx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
runErrCh := make(chan error, 1)
|
||||
go func() {
|
||||
runErrCh <- runtime.Run(runCtx)
|
||||
}()
|
||||
|
||||
client := newTestHTTPClient(t)
|
||||
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
|
||||
|
||||
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: cfg.Streams.Intents,
|
||||
Values: map[string]any{
|
||||
"notification_type": "game.turn.ready",
|
||||
"producer": "game_master",
|
||||
"audience_kind": "user",
|
||||
"recipient_user_ids_json": `["user-1"]`,
|
||||
"idempotency_key": "game-123:turn-ready",
|
||||
"occurred_at_ms": "1775121700000",
|
||||
"payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
},
|
||||
}).Result()
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
payload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
route, err := redisstate.UnmarshalRoute(payload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return route.ResolvedEmail == "pilot@example.com" && route.ResolvedLocale == "en"
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
cancel()
|
||||
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
|
||||
}
|
||||
|
||||
func TestNewRuntimePublishesAcceptedPushAndEmailRoutes(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
redisServer := miniredis.RunT(t)
|
||||
redisClient := redis.NewClient(&redis.Options{
|
||||
Addr: redisServer.Addr(),
|
||||
Protocol: 2,
|
||||
DisableIdentity: true,
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
assert.NoError(t, redisClient.Close())
|
||||
})
|
||||
userService := newUserLookupServer(t, func(writer http.ResponseWriter, request *http.Request) {
|
||||
writeJSON(t, writer, http.StatusOK, map[string]any{
|
||||
"user": map[string]any{
|
||||
"email": "pilot@example.com",
|
||||
"preferred_language": "en-US",
|
||||
},
|
||||
})
|
||||
})
|
||||
defer userService.Close()
|
||||
|
||||
cfg := config.DefaultConfig()
|
||||
cfg.Redis.Addr = redisServer.Addr()
|
||||
cfg.UserService.BaseURL = userService.URL
|
||||
cfg.InternalHTTP.Addr = mustFreeAddr(t)
|
||||
cfg.ShutdownTimeout = 10 * time.Second
|
||||
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
|
||||
cfg.Telemetry.TracesExporter = "none"
|
||||
cfg.Telemetry.MetricsExporter = "none"
|
||||
|
||||
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
require.NoError(t, runtime.Close())
|
||||
}()
|
||||
|
||||
runCtx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
runErrCh := make(chan error, 1)
|
||||
go func() {
|
||||
runErrCh <- runtime.Run(runCtx)
|
||||
}()
|
||||
|
||||
client := newTestHTTPClient(t)
|
||||
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
|
||||
|
||||
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: cfg.Streams.Intents,
|
||||
Values: map[string]any{
|
||||
"notification_type": "game.turn.ready",
|
||||
"producer": "game_master",
|
||||
"audience_kind": "user",
|
||||
"recipient_user_ids_json": `["user-1"]`,
|
||||
"idempotency_key": "game-123:turn-ready",
|
||||
"occurred_at_ms": "1775121700000",
|
||||
"payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
"request_id": "request-1",
|
||||
"trace_id": "trace-1",
|
||||
},
|
||||
}).Result()
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
pushPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:user:user-1")).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
pushRoute, err := redisstate.UnmarshalRoute(pushPayload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
emailPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
emailRoute, err := redisstate.UnmarshalRoute(emailPayload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return pushRoute.Status == "published" && pushRoute.AttemptCount == 1 &&
|
||||
emailRoute.Status == "published" && emailRoute.AttemptCount == 1
|
||||
}, 2*time.Second, 10*time.Millisecond)
|
||||
|
||||
pushRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:user:user-1")).Bytes()
|
||||
require.NoError(t, err)
|
||||
pushRoute, err := redisstate.UnmarshalRoute(pushRoutePayload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "published", string(pushRoute.Status))
|
||||
|
||||
notificationPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Notification(messageID)).Bytes()
|
||||
require.NoError(t, err)
|
||||
notificationRecord, err := redisstate.UnmarshalNotification(notificationPayload)
|
||||
require.NoError(t, err)
|
||||
|
||||
emailRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes()
|
||||
require.NoError(t, err)
|
||||
emailRoute, err := redisstate.UnmarshalRoute(emailRoutePayload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "published", string(emailRoute.Status))
|
||||
|
||||
messages, err := redisClient.XRange(context.Background(), cfg.Streams.GatewayClientEvents, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.Equal(t, "user-1", messages[0].Values["user_id"])
|
||||
require.Equal(t, "game.turn.ready", messages[0].Values["event_type"])
|
||||
require.Equal(t, messageID+"/push:user:user-1", messages[0].Values["event_id"])
|
||||
require.Equal(t, "request-1", messages[0].Values["request_id"])
|
||||
require.Equal(t, "trace-1", messages[0].Values["trace_id"])
|
||||
require.NotContains(t, messages[0].Values, "device_session_id")
|
||||
switch payload := messages[0].Values["payload_bytes"].(type) {
|
||||
case string:
|
||||
require.NotEmpty(t, payload)
|
||||
case []byte:
|
||||
require.NotEmpty(t, payload)
|
||||
default:
|
||||
require.Failf(t, "unexpected payload type", "payload_bytes has type %T", payload)
|
||||
}
|
||||
|
||||
mailCommands, err := redisClient.XRange(context.Background(), cfg.Streams.MailDeliveryCommands, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, mailCommands, 1)
|
||||
require.Equal(t, messageID+"/email:user:user-1", mailCommands[0].Values["delivery_id"])
|
||||
require.Equal(t, "notification", mailCommands[0].Values["source"])
|
||||
require.Equal(t, "template", mailCommands[0].Values["payload_mode"])
|
||||
require.Equal(t, "notification:"+messageID+"/email:user:user-1", mailCommands[0].Values["idempotency_key"])
|
||||
require.Equal(t, strconv.FormatInt(notificationRecord.AcceptedAt.UnixMilli(), 10), mailCommands[0].Values["requested_at_ms"])
|
||||
require.Equal(t, "request-1", mailCommands[0].Values["request_id"])
|
||||
require.Equal(t, "trace-1", mailCommands[0].Values["trace_id"])
|
||||
require.JSONEq(t,
|
||||
`{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}`,
|
||||
mailCommands[0].Values["payload_json"].(string),
|
||||
)
|
||||
|
||||
cancel()
|
||||
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
|
||||
}
|
||||
|
||||
func TestNewRuntimePublishesAdminEmailRouteOnlyToMailService(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
redisServer := miniredis.RunT(t)
|
||||
redisClient := redis.NewClient(&redis.Options{
|
||||
Addr: redisServer.Addr(),
|
||||
Protocol: 2,
|
||||
DisableIdentity: true,
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
assert.NoError(t, redisClient.Close())
|
||||
})
|
||||
userService := newUserLookupServer(t, func(http.ResponseWriter, *http.Request) {})
|
||||
defer userService.Close()
|
||||
|
||||
cfg := config.DefaultConfig()
|
||||
cfg.Redis.Addr = redisServer.Addr()
|
||||
cfg.UserService.BaseURL = userService.URL
|
||||
cfg.AdminRouting.LobbyApplicationSubmitted = []string{"owner@example.com"}
|
||||
cfg.InternalHTTP.Addr = mustFreeAddr(t)
|
||||
cfg.ShutdownTimeout = 10 * time.Second
|
||||
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
|
||||
cfg.Telemetry.TracesExporter = "none"
|
||||
cfg.Telemetry.MetricsExporter = "none"
|
||||
|
||||
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
require.NoError(t, runtime.Close())
|
||||
}()
|
||||
|
||||
runCtx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
runErrCh := make(chan error, 1)
|
||||
go func() {
|
||||
runErrCh <- runtime.Run(runCtx)
|
||||
}()
|
||||
|
||||
client := newTestHTTPClient(t)
|
||||
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
|
||||
|
||||
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: cfg.Streams.Intents,
|
||||
Values: map[string]any{
|
||||
"notification_type": "lobby.application.submitted",
|
||||
"producer": "game_lobby",
|
||||
"audience_kind": "admin_email",
|
||||
"idempotency_key": "game-123:application-submitted:user-42",
|
||||
"occurred_at_ms": "1775121700000",
|
||||
"payload_json": `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-123","game_name":"Nebula Clash"}`,
|
||||
},
|
||||
}).Result()
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
payload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:email:owner@example.com")).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
route, err := redisstate.UnmarshalRoute(payload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return route.Status == "published" && route.AttemptCount == 1
|
||||
}, 2*time.Second, 10*time.Millisecond)
|
||||
|
||||
pushRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:email:owner@example.com")).Bytes()
|
||||
require.NoError(t, err)
|
||||
pushRoute, err := redisstate.UnmarshalRoute(pushRoutePayload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "skipped", string(pushRoute.Status))
|
||||
|
||||
mailCommands, err := redisClient.XRange(context.Background(), cfg.Streams.MailDeliveryCommands, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, mailCommands, 1)
|
||||
require.Equal(t, messageID+"/email:email:owner@example.com", mailCommands[0].Values["delivery_id"])
|
||||
require.JSONEq(t,
|
||||
`{"to":["owner@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"lobby.application.submitted","locale":"en","variables":{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-123","game_name":"Nebula Clash"},"attachments":[]}`,
|
||||
mailCommands[0].Values["payload_json"].(string),
|
||||
)
|
||||
|
||||
gatewayMessages, err := redisClient.XRange(context.Background(), cfg.Streams.GatewayClientEvents, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, gatewayMessages)
|
||||
|
||||
cancel()
|
||||
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
|
||||
}
|
||||
|
||||
func TestNewRuntimeUsesConfiguredUserServiceTimeout(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
redisServer := miniredis.RunT(t)
|
||||
redisClient := redis.NewClient(&redis.Options{
|
||||
Addr: redisServer.Addr(),
|
||||
Protocol: 2,
|
||||
DisableIdentity: true,
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
assert.NoError(t, redisClient.Close())
|
||||
})
|
||||
userService := newUserLookupServer(t, func(_ http.ResponseWriter, request *http.Request) {
|
||||
<-request.Context().Done()
|
||||
})
|
||||
defer userService.Close()
|
||||
|
||||
cfg := config.DefaultConfig()
|
||||
cfg.Redis.Addr = redisServer.Addr()
|
||||
cfg.UserService.BaseURL = userService.URL
|
||||
cfg.UserService.Timeout = 20 * time.Millisecond
|
||||
cfg.InternalHTTP.Addr = mustFreeAddr(t)
|
||||
cfg.ShutdownTimeout = 10 * time.Second
|
||||
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
|
||||
cfg.Telemetry.TracesExporter = "none"
|
||||
cfg.Telemetry.MetricsExporter = "none"
|
||||
|
||||
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
require.NoError(t, runtime.Close())
|
||||
}()
|
||||
|
||||
runCtx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
runErrCh := make(chan error, 1)
|
||||
go func() {
|
||||
runErrCh <- runtime.Run(runCtx)
|
||||
}()
|
||||
|
||||
client := newTestHTTPClient(t)
|
||||
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
|
||||
|
||||
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: cfg.Streams.Intents,
|
||||
Values: map[string]any{
|
||||
"notification_type": "game.turn.ready",
|
||||
"producer": "game_master",
|
||||
"audience_kind": "user",
|
||||
"recipient_user_ids_json": `["user-1"]`,
|
||||
"idempotency_key": "game-123:turn-ready",
|
||||
"occurred_at_ms": "1775121700000",
|
||||
"payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
},
|
||||
}).Result()
|
||||
require.NoError(t, err)
|
||||
|
||||
var runErr error
|
||||
require.Eventually(t, func() bool {
|
||||
select {
|
||||
case runErr = <-runErrCh:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
require.Error(t, runErr)
|
||||
require.ErrorContains(t, runErr, "context deadline exceeded")
|
||||
|
||||
offsetStore, err := redisstate.NewStreamOffsetStore(redisClient)
|
||||
require.NoError(t, err)
|
||||
offset, found, err := offsetStore.Load(context.Background(), cfg.Streams.Intents)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
require.Empty(t, offset)
|
||||
|
||||
_, err = redisClient.Get(context.Background(), redisstate.Keyspace{}.Notification(messageID)).Bytes()
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
func testLogger() *slog.Logger {
|
||||
return slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
}
|
||||
|
||||
func newTestHTTPClient(t *testing.T) *http.Client {
|
||||
t.Helper()
|
||||
|
||||
transport := &http.Transport{DisableKeepAlives: true}
|
||||
t.Cleanup(transport.CloseIdleConnections)
|
||||
|
||||
return &http.Client{
|
||||
Timeout: 500 * time.Millisecond,
|
||||
Transport: transport,
|
||||
}
|
||||
}
|
||||
|
||||
func waitForRuntimeReady(t *testing.T, client *http.Client, addr string) {
|
||||
t.Helper()
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
request, err := http.NewRequest(http.MethodGet, "http://"+addr+"/readyz", nil)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
response, err := client.Do(request)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer response.Body.Close()
|
||||
_, _ = io.Copy(io.Discard, response.Body)
|
||||
|
||||
return response.StatusCode == http.StatusOK
|
||||
}, 5*time.Second, 25*time.Millisecond, "notification runtime did not become reachable")
|
||||
}
|
||||
|
||||
func waitForRunResult(t *testing.T, runErrCh <-chan error, waitTimeout time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
var err error
|
||||
require.Eventually(t, func() bool {
|
||||
select {
|
||||
case err = <-runErrCh:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}, waitTimeout, 10*time.Millisecond, "notification runtime did not stop")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func assertHTTPStatus(t *testing.T, client *http.Client, target string, want int) {
|
||||
t.Helper()
|
||||
|
||||
request, err := http.NewRequest(http.MethodGet, target, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
response, err := client.Do(request)
|
||||
require.NoError(t, err)
|
||||
defer response.Body.Close()
|
||||
_, _ = io.Copy(io.Discard, response.Body)
|
||||
|
||||
require.Equal(t, want, response.StatusCode)
|
||||
}
|
||||
|
||||
func mustFreeAddr(t *testing.T) string {
|
||||
t.Helper()
|
||||
|
||||
listener, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
assert.NoError(t, listener.Close())
|
||||
}()
|
||||
|
||||
return listener.Addr().String()
|
||||
}
|
||||
|
||||
func newUserLookupServer(t *testing.T, handler func(http.ResponseWriter, *http.Request)) *httptest.Server {
|
||||
t.Helper()
|
||||
|
||||
return httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
|
||||
if request.Method != http.MethodGet {
|
||||
http.NotFound(writer, request)
|
||||
return
|
||||
}
|
||||
if request.URL.Path != "/api/v1/internal/users/user-1" {
|
||||
writeJSON(t, writer, http.StatusNotFound, map[string]any{
|
||||
"error": map[string]any{
|
||||
"code": "subject_not_found",
|
||||
"message": "subject not found",
|
||||
},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
handler(writer, request)
|
||||
}))
|
||||
}
|
||||
|
||||
func writeJSON(t *testing.T, writer http.ResponseWriter, statusCode int, payload any) {
|
||||
t.Helper()
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
require.NoError(t, err)
|
||||
|
||||
writer.Header().Set("Content-Type", "application/json")
|
||||
writer.WriteHeader(statusCode)
|
||||
_, err = writer.Write(body)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
@@ -0,0 +1,839 @@
|
||||
// Package config loads the Notification Service process configuration from
|
||||
// environment variables.
|
||||
package config
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
netmail "net/mail"
|
||||
"net/url"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/telemetry"
|
||||
)
|
||||
|
||||
const (
|
||||
shutdownTimeoutEnvVar = "NOTIFICATION_SHUTDOWN_TIMEOUT"
|
||||
logLevelEnvVar = "NOTIFICATION_LOG_LEVEL"
|
||||
|
||||
internalHTTPAddrEnvVar = "NOTIFICATION_INTERNAL_HTTP_ADDR"
|
||||
internalHTTPReadHeaderTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_READ_HEADER_TIMEOUT"
|
||||
internalHTTPReadTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_READ_TIMEOUT"
|
||||
internalHTTPIdleTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_IDLE_TIMEOUT"
|
||||
|
||||
redisAddrEnvVar = "NOTIFICATION_REDIS_ADDR"
|
||||
redisUsernameEnvVar = "NOTIFICATION_REDIS_USERNAME"
|
||||
redisPasswordEnvVar = "NOTIFICATION_REDIS_PASSWORD"
|
||||
redisDBEnvVar = "NOTIFICATION_REDIS_DB"
|
||||
redisTLSEnabledEnvVar = "NOTIFICATION_REDIS_TLS_ENABLED"
|
||||
redisOperationTimeoutEnvVar = "NOTIFICATION_REDIS_OPERATION_TIMEOUT"
|
||||
|
||||
intentsStreamEnvVar = "NOTIFICATION_INTENTS_STREAM"
|
||||
intentsReadBlockTimeoutEnvVar = "NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT"
|
||||
gatewayClientEventsStreamEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM"
|
||||
gatewayClientEventsStreamMaxEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM_MAX_LEN"
|
||||
mailDeliveryCommandsStreamEnvVar = "NOTIFICATION_MAIL_DELIVERY_COMMANDS_STREAM"
|
||||
|
||||
pushRetryMaxAttemptsEnvVar = "NOTIFICATION_PUSH_RETRY_MAX_ATTEMPTS"
|
||||
emailRetryMaxAttemptsEnvVar = "NOTIFICATION_EMAIL_RETRY_MAX_ATTEMPTS"
|
||||
routeLeaseTTLEnvVar = "NOTIFICATION_ROUTE_LEASE_TTL"
|
||||
routeBackoffMinEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MIN"
|
||||
routeBackoffMaxEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MAX"
|
||||
deadLetterTTLEnvVar = "NOTIFICATION_DEAD_LETTER_TTL"
|
||||
recordTTLEnvVar = "NOTIFICATION_RECORD_TTL"
|
||||
idempotencyTTLEnvVar = "NOTIFICATION_IDEMPOTENCY_TTL"
|
||||
|
||||
userServiceBaseURLEnvVar = "NOTIFICATION_USER_SERVICE_BASE_URL"
|
||||
userServiceTimeoutEnvVar = "NOTIFICATION_USER_SERVICE_TIMEOUT"
|
||||
|
||||
adminEmailsGeoReviewRecommendedEnvVar = "NOTIFICATION_ADMIN_EMAILS_GEO_REVIEW_RECOMMENDED"
|
||||
adminEmailsGameGenerationFailedEnvVar = "NOTIFICATION_ADMIN_EMAILS_GAME_GENERATION_FAILED"
|
||||
adminEmailsLobbyRuntimePausedAfterEnvVar = "NOTIFICATION_ADMIN_EMAILS_LOBBY_RUNTIME_PAUSED_AFTER_START"
|
||||
adminEmailsLobbyApplicationSubmittedEnvVar = "NOTIFICATION_ADMIN_EMAILS_LOBBY_APPLICATION_SUBMITTED"
|
||||
|
||||
otelServiceNameEnvVar = "OTEL_SERVICE_NAME"
|
||||
otelTracesExporterEnvVar = "OTEL_TRACES_EXPORTER"
|
||||
otelMetricsExporterEnvVar = "OTEL_METRICS_EXPORTER"
|
||||
otelExporterOTLPProtocolEnvVar = "OTEL_EXPORTER_OTLP_PROTOCOL"
|
||||
otelExporterOTLPTracesProtocolEnvVar = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"
|
||||
otelExporterOTLPMetricsProtocolEnvVar = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL"
|
||||
otelStdoutTracesEnabledEnvVar = "NOTIFICATION_OTEL_STDOUT_TRACES_ENABLED"
|
||||
otelStdoutMetricsEnabledEnvVar = "NOTIFICATION_OTEL_STDOUT_METRICS_ENABLED"
|
||||
|
||||
defaultShutdownTimeout = 5 * time.Second
|
||||
defaultLogLevel = "info"
|
||||
defaultInternalHTTPAddr = ":8092"
|
||||
defaultReadHeaderTimeout = 2 * time.Second
|
||||
defaultReadTimeout = 10 * time.Second
|
||||
defaultIdleTimeout = time.Minute
|
||||
defaultRedisDB = 0
|
||||
defaultRedisOperationTimeout = 250 * time.Millisecond
|
||||
|
||||
defaultIntentsStream = "notification:intents"
|
||||
defaultIntentsReadBlockTimeout = 2 * time.Second
|
||||
defaultGatewayClientEventsStream = "gateway:client-events"
|
||||
defaultGatewayClientEventsStreamMaxLen int64 = 1024
|
||||
defaultMailDeliveryCommandsStream = "mail:delivery_commands"
|
||||
|
||||
defaultPushRetryMaxAttempts = 3
|
||||
defaultEmailRetryMaxAttempts = 7
|
||||
defaultRouteLeaseTTL = 5 * time.Second
|
||||
defaultRouteBackoffMin = time.Second
|
||||
defaultRouteBackoffMax = 5 * time.Minute
|
||||
defaultDeadLetterTTL = 720 * time.Hour
|
||||
defaultRecordTTL = 720 * time.Hour
|
||||
defaultIdempotencyTTL = 168 * time.Hour
|
||||
|
||||
defaultUserServiceTimeout = time.Second
|
||||
defaultOTelServiceName = "galaxy-notification"
|
||||
|
||||
otelExporterNone = "none"
|
||||
otelExporterOTLP = "otlp"
|
||||
otelProtocolHTTPProtobuf = "http/protobuf"
|
||||
otelProtocolGRPC = "grpc"
|
||||
)
|
||||
|
||||
// Config stores the full Notification Service process configuration.
|
||||
type Config struct {
|
||||
// ShutdownTimeout bounds graceful shutdown of every long-lived component.
|
||||
ShutdownTimeout time.Duration
|
||||
|
||||
// Logging configures the process-wide structured logger.
|
||||
Logging LoggingConfig
|
||||
|
||||
// InternalHTTP configures the private probe HTTP listener.
|
||||
InternalHTTP InternalHTTPConfig
|
||||
|
||||
// Redis configures the shared Redis client used by the process.
|
||||
Redis RedisConfig
|
||||
|
||||
// Streams stores the stable stream names reserved for notification ingress
|
||||
// and downstream publication.
|
||||
Streams StreamsConfig
|
||||
|
||||
// IntentsReadBlockTimeout stores the maximum Redis Streams blocking read
|
||||
// window used by the intent consumer.
|
||||
IntentsReadBlockTimeout time.Duration
|
||||
|
||||
// Retry stores the frozen retry and retention settings.
|
||||
Retry RetryConfig
|
||||
|
||||
// UserService configures the trusted user-enrichment dependency.
|
||||
UserService UserServiceConfig
|
||||
|
||||
// AdminRouting stores the type-specific configured administrator email
|
||||
// lists.
|
||||
AdminRouting AdminRoutingConfig
|
||||
|
||||
// Telemetry configures the process-wide OpenTelemetry runtime.
|
||||
Telemetry TelemetryConfig
|
||||
}
|
||||
|
||||
// LoggingConfig configures the process-wide structured logger.
|
||||
type LoggingConfig struct {
|
||||
// Level stores the process log level accepted by log/slog.
|
||||
Level string
|
||||
}
|
||||
|
||||
// InternalHTTPConfig configures the private probe HTTP listener.
|
||||
type InternalHTTPConfig struct {
|
||||
// Addr stores the TCP listen address.
|
||||
Addr string
|
||||
|
||||
// ReadHeaderTimeout bounds request-header reading.
|
||||
ReadHeaderTimeout time.Duration
|
||||
|
||||
// ReadTimeout bounds reading one request.
|
||||
ReadTimeout time.Duration
|
||||
|
||||
// IdleTimeout bounds how long keep-alive connections stay open.
|
||||
IdleTimeout time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable internal HTTP listener
|
||||
// configuration.
|
||||
func (cfg InternalHTTPConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.Addr) == "":
|
||||
return fmt.Errorf("internal HTTP addr must not be empty")
|
||||
case !isTCPAddr(cfg.Addr):
|
||||
return fmt.Errorf("internal HTTP addr %q must use host:port form", cfg.Addr)
|
||||
case cfg.ReadHeaderTimeout <= 0:
|
||||
return fmt.Errorf("internal HTTP read header timeout must be positive")
|
||||
case cfg.ReadTimeout <= 0:
|
||||
return fmt.Errorf("internal HTTP read timeout must be positive")
|
||||
case cfg.IdleTimeout <= 0:
|
||||
return fmt.Errorf("internal HTTP idle timeout must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// RedisConfig configures the shared Redis client and its connection settings.
|
||||
type RedisConfig struct {
|
||||
// Addr stores the Redis network address.
|
||||
Addr string
|
||||
|
||||
// Username stores the optional Redis ACL username.
|
||||
Username string
|
||||
|
||||
// Password stores the optional Redis ACL password.
|
||||
Password string
|
||||
|
||||
// DB stores the Redis logical database index.
|
||||
DB int
|
||||
|
||||
// TLSEnabled reports whether TLS must be used for Redis connections.
|
||||
TLSEnabled bool
|
||||
|
||||
// OperationTimeout bounds one Redis round trip including the startup PING.
|
||||
OperationTimeout time.Duration
|
||||
}
|
||||
|
||||
// TLSConfig returns the conservative TLS configuration used by the Redis
|
||||
// client when TLSEnabled is true.
|
||||
func (cfg RedisConfig) TLSConfig() *tls.Config {
|
||||
if !cfg.TLSEnabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &tls.Config{MinVersion: tls.VersionTLS12}
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable Redis configuration.
|
||||
func (cfg RedisConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.Addr) == "":
|
||||
return fmt.Errorf("redis addr must not be empty")
|
||||
case !isTCPAddr(cfg.Addr):
|
||||
return fmt.Errorf("redis addr %q must use host:port form", cfg.Addr)
|
||||
case cfg.DB < 0:
|
||||
return fmt.Errorf("redis db must not be negative")
|
||||
case cfg.OperationTimeout <= 0:
|
||||
return fmt.Errorf("redis operation timeout must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// StreamsConfig stores the stable Redis Stream names used by Notification
|
||||
// Service.
|
||||
type StreamsConfig struct {
|
||||
// Intents stores the ingress intent stream.
|
||||
Intents string
|
||||
|
||||
// GatewayClientEvents stores the downstream Gateway client-events stream.
|
||||
GatewayClientEvents string
|
||||
|
||||
// GatewayClientEventsStreamMaxLen bounds the downstream Gateway
|
||||
// client-events stream with approximate trimming.
|
||||
GatewayClientEventsStreamMaxLen int64
|
||||
|
||||
// MailDeliveryCommands stores the downstream Mail Service command stream.
|
||||
MailDeliveryCommands string
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores usable stream names.
|
||||
func (cfg StreamsConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.Intents) == "":
|
||||
return fmt.Errorf("intents stream must not be empty")
|
||||
case strings.TrimSpace(cfg.GatewayClientEvents) == "":
|
||||
return fmt.Errorf("gateway client-events stream must not be empty")
|
||||
case cfg.GatewayClientEventsStreamMaxLen <= 0:
|
||||
return fmt.Errorf("gateway client-events stream max len must be positive")
|
||||
case strings.TrimSpace(cfg.MailDeliveryCommands) == "":
|
||||
return fmt.Errorf("mail delivery-commands stream must not be empty")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// RetryConfig stores the frozen retry budgets, backoff settings, and retention
|
||||
// periods used by the service.
|
||||
type RetryConfig struct {
|
||||
// PushMaxAttempts stores the route retry budget for the `push` channel.
|
||||
PushMaxAttempts int
|
||||
|
||||
// EmailMaxAttempts stores the route retry budget for the `email` channel.
|
||||
EmailMaxAttempts int
|
||||
|
||||
// RouteLeaseTTL stores the temporary route-lease lifetime used to avoid
|
||||
// duplicate publication across replicas.
|
||||
RouteLeaseTTL time.Duration
|
||||
|
||||
// RouteBackoffMin stores the minimum retry backoff.
|
||||
RouteBackoffMin time.Duration
|
||||
|
||||
// RouteBackoffMax stores the maximum retry backoff.
|
||||
RouteBackoffMax time.Duration
|
||||
|
||||
// DeadLetterTTL stores the retention period for dead-letter and malformed
|
||||
// intent records.
|
||||
DeadLetterTTL time.Duration
|
||||
|
||||
// RecordTTL stores the retention period for notification and route records.
|
||||
RecordTTL time.Duration
|
||||
|
||||
// IdempotencyTTL stores the retention period for idempotency records.
|
||||
IdempotencyTTL time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores usable retry and retention settings.
|
||||
func (cfg RetryConfig) Validate() error {
|
||||
switch {
|
||||
case cfg.PushMaxAttempts <= 0:
|
||||
return fmt.Errorf("push retry max attempts must be positive")
|
||||
case cfg.EmailMaxAttempts <= 0:
|
||||
return fmt.Errorf("email retry max attempts must be positive")
|
||||
case cfg.RouteLeaseTTL <= 0:
|
||||
return fmt.Errorf("route lease ttl must be positive")
|
||||
case cfg.RouteBackoffMin <= 0:
|
||||
return fmt.Errorf("route backoff min must be positive")
|
||||
case cfg.RouteBackoffMax <= 0:
|
||||
return fmt.Errorf("route backoff max must be positive")
|
||||
case cfg.RouteBackoffMin > cfg.RouteBackoffMax:
|
||||
return fmt.Errorf("route backoff min must not exceed route backoff max")
|
||||
case cfg.DeadLetterTTL <= 0:
|
||||
return fmt.Errorf("dead-letter ttl must be positive")
|
||||
case cfg.RecordTTL <= 0:
|
||||
return fmt.Errorf("record ttl must be positive")
|
||||
case cfg.IdempotencyTTL <= 0:
|
||||
return fmt.Errorf("idempotency ttl must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// UserServiceConfig configures the trusted user-enrichment dependency.
|
||||
type UserServiceConfig struct {
|
||||
// BaseURL stores the absolute base URL of the trusted User Service.
|
||||
BaseURL string
|
||||
|
||||
// Timeout bounds one outbound User Service request.
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable User Service configuration.
|
||||
func (cfg UserServiceConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.BaseURL) == "":
|
||||
return fmt.Errorf("user service base URL must not be empty")
|
||||
case !isAbsoluteHTTPURL(cfg.BaseURL):
|
||||
return fmt.Errorf("user service base URL %q must be an absolute http(s) URL", cfg.BaseURL)
|
||||
case cfg.Timeout <= 0:
|
||||
return fmt.Errorf("user service timeout must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// AdminRoutingConfig stores the type-specific configured administrator email
|
||||
// lists.
|
||||
type AdminRoutingConfig struct {
|
||||
// GeoReviewRecommended stores recipients for
|
||||
// `geo.review_recommended`.
|
||||
GeoReviewRecommended []string
|
||||
|
||||
// GameGenerationFailed stores recipients for
|
||||
// `game.generation_failed`.
|
||||
GameGenerationFailed []string
|
||||
|
||||
// LobbyRuntimePausedAfterStart stores recipients for
|
||||
// `lobby.runtime_paused_after_start`.
|
||||
LobbyRuntimePausedAfterStart []string
|
||||
|
||||
// LobbyApplicationSubmitted stores recipients for public
|
||||
// `lobby.application.submitted` notifications.
|
||||
LobbyApplicationSubmitted []string
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores valid normalized administrator email
|
||||
// lists.
|
||||
func (cfg AdminRoutingConfig) Validate() error {
|
||||
if err := validateNormalizedEmailList("geo.review_recommended", cfg.GeoReviewRecommended); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateNormalizedEmailList("game.generation_failed", cfg.GameGenerationFailed); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateNormalizedEmailList("lobby.runtime_paused_after_start", cfg.LobbyRuntimePausedAfterStart); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateNormalizedEmailList("lobby.application.submitted", cfg.LobbyApplicationSubmitted); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// TelemetryConfig configures the Notification Service OpenTelemetry runtime.
|
||||
type TelemetryConfig struct {
|
||||
// ServiceName overrides the default OpenTelemetry service name.
|
||||
ServiceName string
|
||||
|
||||
// TracesExporter selects the external traces exporter. Supported values are
|
||||
// `none` and `otlp`.
|
||||
TracesExporter string
|
||||
|
||||
// MetricsExporter selects the external metrics exporter. Supported values
|
||||
// are `none` and `otlp`.
|
||||
MetricsExporter string
|
||||
|
||||
// TracesProtocol selects the OTLP traces protocol when TracesExporter is
|
||||
// `otlp`.
|
||||
TracesProtocol string
|
||||
|
||||
// MetricsProtocol selects the OTLP metrics protocol when MetricsExporter is
|
||||
// `otlp`.
|
||||
MetricsProtocol string
|
||||
|
||||
// StdoutTracesEnabled enables the additional stdout trace exporter used for
|
||||
// local development and debugging.
|
||||
StdoutTracesEnabled bool
|
||||
|
||||
// StdoutMetricsEnabled enables the additional stdout metric exporter used
|
||||
// for local development and debugging.
|
||||
StdoutMetricsEnabled bool
|
||||
}
|
||||
|
||||
// Validate reports whether cfg contains a supported OpenTelemetry
|
||||
// configuration.
|
||||
func (cfg TelemetryConfig) Validate() error {
|
||||
return telemetry.ProcessConfig{
|
||||
ServiceName: cfg.ServiceName,
|
||||
TracesExporter: cfg.TracesExporter,
|
||||
MetricsExporter: cfg.MetricsExporter,
|
||||
TracesProtocol: cfg.TracesProtocol,
|
||||
MetricsProtocol: cfg.MetricsProtocol,
|
||||
StdoutTracesEnabled: cfg.StdoutTracesEnabled,
|
||||
StdoutMetricsEnabled: cfg.StdoutMetricsEnabled,
|
||||
}.Validate()
|
||||
}
|
||||
|
||||
// DefaultConfig returns the default Notification Service process
|
||||
// configuration.
|
||||
func DefaultConfig() Config {
|
||||
return Config{
|
||||
ShutdownTimeout: defaultShutdownTimeout,
|
||||
Logging: LoggingConfig{
|
||||
Level: defaultLogLevel,
|
||||
},
|
||||
InternalHTTP: InternalHTTPConfig{
|
||||
Addr: defaultInternalHTTPAddr,
|
||||
ReadHeaderTimeout: defaultReadHeaderTimeout,
|
||||
ReadTimeout: defaultReadTimeout,
|
||||
IdleTimeout: defaultIdleTimeout,
|
||||
},
|
||||
Redis: RedisConfig{
|
||||
DB: defaultRedisDB,
|
||||
OperationTimeout: defaultRedisOperationTimeout,
|
||||
},
|
||||
Streams: StreamsConfig{
|
||||
Intents: defaultIntentsStream,
|
||||
GatewayClientEvents: defaultGatewayClientEventsStream,
|
||||
GatewayClientEventsStreamMaxLen: defaultGatewayClientEventsStreamMaxLen,
|
||||
MailDeliveryCommands: defaultMailDeliveryCommandsStream,
|
||||
},
|
||||
IntentsReadBlockTimeout: defaultIntentsReadBlockTimeout,
|
||||
Retry: RetryConfig{
|
||||
PushMaxAttempts: defaultPushRetryMaxAttempts,
|
||||
EmailMaxAttempts: defaultEmailRetryMaxAttempts,
|
||||
RouteLeaseTTL: defaultRouteLeaseTTL,
|
||||
RouteBackoffMin: defaultRouteBackoffMin,
|
||||
RouteBackoffMax: defaultRouteBackoffMax,
|
||||
DeadLetterTTL: defaultDeadLetterTTL,
|
||||
RecordTTL: defaultRecordTTL,
|
||||
IdempotencyTTL: defaultIdempotencyTTL,
|
||||
},
|
||||
UserService: UserServiceConfig{
|
||||
Timeout: defaultUserServiceTimeout,
|
||||
},
|
||||
Telemetry: TelemetryConfig{
|
||||
ServiceName: defaultOTelServiceName,
|
||||
TracesExporter: otelExporterNone,
|
||||
MetricsExporter: otelExporterNone,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// LoadFromEnv loads the Notification Service process configuration from
|
||||
// environment variables, applying documented defaults where appropriate.
|
||||
func LoadFromEnv() (Config, error) {
|
||||
cfg := DefaultConfig()
|
||||
|
||||
var err error
|
||||
|
||||
cfg.ShutdownTimeout, err = loadDurationEnvWithDefault(shutdownTimeoutEnvVar, cfg.ShutdownTimeout)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
|
||||
cfg.Logging.Level = loadStringEnvWithDefault(logLevelEnvVar, cfg.Logging.Level)
|
||||
if err := validateLogLevel(cfg.Logging.Level); err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %s: %w", logLevelEnvVar, err)
|
||||
}
|
||||
|
||||
cfg.InternalHTTP.Addr = loadStringEnvWithDefault(internalHTTPAddrEnvVar, cfg.InternalHTTP.Addr)
|
||||
cfg.InternalHTTP.ReadHeaderTimeout, err = loadDurationEnvWithDefault(internalHTTPReadHeaderTimeoutEnvVar, cfg.InternalHTTP.ReadHeaderTimeout)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.InternalHTTP.ReadTimeout, err = loadDurationEnvWithDefault(internalHTTPReadTimeoutEnvVar, cfg.InternalHTTP.ReadTimeout)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.InternalHTTP.IdleTimeout, err = loadDurationEnvWithDefault(internalHTTPIdleTimeoutEnvVar, cfg.InternalHTTP.IdleTimeout)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
|
||||
cfg.Redis.Addr = loadStringEnvWithDefault(redisAddrEnvVar, cfg.Redis.Addr)
|
||||
cfg.Redis.Username = os.Getenv(redisUsernameEnvVar)
|
||||
cfg.Redis.Password = os.Getenv(redisPasswordEnvVar)
|
||||
cfg.Redis.DB, err = loadIntEnvWithDefault(redisDBEnvVar, cfg.Redis.DB)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.Redis.TLSEnabled, err = loadBoolEnvWithDefault(redisTLSEnabledEnvVar, cfg.Redis.TLSEnabled)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.Redis.OperationTimeout, err = loadDurationEnvWithDefault(redisOperationTimeoutEnvVar, cfg.Redis.OperationTimeout)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
|
||||
cfg.Streams.Intents = loadStringEnvWithDefault(intentsStreamEnvVar, cfg.Streams.Intents)
|
||||
cfg.Streams.GatewayClientEvents = loadStringEnvWithDefault(gatewayClientEventsStreamEnvVar, cfg.Streams.GatewayClientEvents)
|
||||
cfg.Streams.GatewayClientEventsStreamMaxLen, err = loadInt64EnvWithDefault(gatewayClientEventsStreamMaxEnvVar, cfg.Streams.GatewayClientEventsStreamMaxLen)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.Streams.MailDeliveryCommands = loadStringEnvWithDefault(mailDeliveryCommandsStreamEnvVar, cfg.Streams.MailDeliveryCommands)
|
||||
cfg.IntentsReadBlockTimeout, err = loadDurationEnvWithDefault(intentsReadBlockTimeoutEnvVar, cfg.IntentsReadBlockTimeout)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
|
||||
cfg.Retry.PushMaxAttempts, err = loadIntEnvWithDefault(pushRetryMaxAttemptsEnvVar, cfg.Retry.PushMaxAttempts)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.Retry.EmailMaxAttempts, err = loadIntEnvWithDefault(emailRetryMaxAttemptsEnvVar, cfg.Retry.EmailMaxAttempts)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.Retry.RouteLeaseTTL, err = loadDurationEnvWithDefault(routeLeaseTTLEnvVar, cfg.Retry.RouteLeaseTTL)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.Retry.RouteBackoffMin, err = loadDurationEnvWithDefault(routeBackoffMinEnvVar, cfg.Retry.RouteBackoffMin)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.Retry.RouteBackoffMax, err = loadDurationEnvWithDefault(routeBackoffMaxEnvVar, cfg.Retry.RouteBackoffMax)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.Retry.DeadLetterTTL, err = loadDurationEnvWithDefault(deadLetterTTLEnvVar, cfg.Retry.DeadLetterTTL)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.Retry.RecordTTL, err = loadDurationEnvWithDefault(recordTTLEnvVar, cfg.Retry.RecordTTL)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.Retry.IdempotencyTTL, err = loadDurationEnvWithDefault(idempotencyTTLEnvVar, cfg.Retry.IdempotencyTTL)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
|
||||
cfg.UserService.BaseURL = normalizeBaseURL(loadStringEnvWithDefault(userServiceBaseURLEnvVar, cfg.UserService.BaseURL))
|
||||
cfg.UserService.Timeout, err = loadDurationEnvWithDefault(userServiceTimeoutEnvVar, cfg.UserService.Timeout)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
|
||||
cfg.AdminRouting.GeoReviewRecommended, err = loadEmailListEnv(adminEmailsGeoReviewRecommendedEnvVar, cfg.AdminRouting.GeoReviewRecommended)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.AdminRouting.GameGenerationFailed, err = loadEmailListEnv(adminEmailsGameGenerationFailedEnvVar, cfg.AdminRouting.GameGenerationFailed)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.AdminRouting.LobbyRuntimePausedAfterStart, err = loadEmailListEnv(adminEmailsLobbyRuntimePausedAfterEnvVar, cfg.AdminRouting.LobbyRuntimePausedAfterStart)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.AdminRouting.LobbyApplicationSubmitted, err = loadEmailListEnv(adminEmailsLobbyApplicationSubmittedEnvVar, cfg.AdminRouting.LobbyApplicationSubmitted)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
|
||||
cfg.Telemetry.ServiceName = loadStringEnvWithDefault(otelServiceNameEnvVar, cfg.Telemetry.ServiceName)
|
||||
cfg.Telemetry.TracesExporter = normalizeExporterValue(loadStringEnvWithDefault(otelTracesExporterEnvVar, cfg.Telemetry.TracesExporter))
|
||||
cfg.Telemetry.MetricsExporter = normalizeExporterValue(loadStringEnvWithDefault(otelMetricsExporterEnvVar, cfg.Telemetry.MetricsExporter))
|
||||
cfg.Telemetry.TracesProtocol = loadOTLPProtocol(
|
||||
os.Getenv(otelExporterOTLPTracesProtocolEnvVar),
|
||||
os.Getenv(otelExporterOTLPProtocolEnvVar),
|
||||
cfg.Telemetry.TracesExporter,
|
||||
)
|
||||
cfg.Telemetry.MetricsProtocol = loadOTLPProtocol(
|
||||
os.Getenv(otelExporterOTLPMetricsProtocolEnvVar),
|
||||
os.Getenv(otelExporterOTLPProtocolEnvVar),
|
||||
cfg.Telemetry.MetricsExporter,
|
||||
)
|
||||
cfg.Telemetry.StdoutTracesEnabled, err = loadBoolEnvWithDefault(otelStdoutTracesEnabledEnvVar, cfg.Telemetry.StdoutTracesEnabled)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
cfg.Telemetry.StdoutMetricsEnabled, err = loadBoolEnvWithDefault(otelStdoutMetricsEnabledEnvVar, cfg.Telemetry.StdoutMetricsEnabled)
|
||||
if err != nil {
|
||||
return Config{}, fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// Validate reports whether cfg contains a consistent Notification Service
|
||||
// process configuration.
|
||||
func (cfg Config) Validate() error {
|
||||
switch {
|
||||
case cfg.ShutdownTimeout <= 0:
|
||||
return fmt.Errorf("load notification config: %s must be positive", shutdownTimeoutEnvVar)
|
||||
case strings.TrimSpace(cfg.Redis.Addr) == "":
|
||||
return fmt.Errorf("load notification config: %s must not be empty", redisAddrEnvVar)
|
||||
case strings.TrimSpace(cfg.UserService.BaseURL) == "":
|
||||
return fmt.Errorf("load notification config: %s must not be empty", userServiceBaseURLEnvVar)
|
||||
}
|
||||
|
||||
if err := cfg.InternalHTTP.Validate(); err != nil {
|
||||
return fmt.Errorf("load notification config: %s", err)
|
||||
}
|
||||
if err := cfg.Redis.Validate(); err != nil {
|
||||
return fmt.Errorf("load notification config: %s", err)
|
||||
}
|
||||
if err := cfg.Streams.Validate(); err != nil {
|
||||
return fmt.Errorf("load notification config: %s", err)
|
||||
}
|
||||
if cfg.IntentsReadBlockTimeout <= 0 {
|
||||
return fmt.Errorf("load notification config: %s must be positive", intentsReadBlockTimeoutEnvVar)
|
||||
}
|
||||
if err := cfg.Retry.Validate(); err != nil {
|
||||
return fmt.Errorf("load notification config: %s", err)
|
||||
}
|
||||
if err := cfg.UserService.Validate(); err != nil {
|
||||
return fmt.Errorf("load notification config: %s", err)
|
||||
}
|
||||
if err := cfg.AdminRouting.Validate(); err != nil {
|
||||
return fmt.Errorf("load notification config: %s", err)
|
||||
}
|
||||
if err := cfg.Telemetry.Validate(); err != nil {
|
||||
return fmt.Errorf("load notification config: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadStringEnvWithDefault(name string, value string) string {
|
||||
if raw, ok := os.LookupEnv(name); ok {
|
||||
return strings.TrimSpace(raw)
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
func loadDurationEnvWithDefault(name string, value time.Duration) (time.Duration, error) {
|
||||
raw, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return value, nil
|
||||
}
|
||||
|
||||
parsed, err := time.ParseDuration(strings.TrimSpace(raw))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s: %w", name, err)
|
||||
}
|
||||
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func loadIntEnvWithDefault(name string, value int) (int, error) {
|
||||
raw, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return value, nil
|
||||
}
|
||||
|
||||
parsed, err := strconv.Atoi(strings.TrimSpace(raw))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s: %w", name, err)
|
||||
}
|
||||
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func loadInt64EnvWithDefault(name string, value int64) (int64, error) {
|
||||
raw, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return value, nil
|
||||
}
|
||||
|
||||
parsed, err := strconv.ParseInt(strings.TrimSpace(raw), 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s: %w", name, err)
|
||||
}
|
||||
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func loadBoolEnvWithDefault(name string, value bool) (bool, error) {
|
||||
raw, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return value, nil
|
||||
}
|
||||
|
||||
parsed, err := strconv.ParseBool(strings.TrimSpace(raw))
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("%s: %w", name, err)
|
||||
}
|
||||
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func loadEmailListEnv(name string, value []string) ([]string, error) {
|
||||
raw, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return append([]string(nil), value...), nil
|
||||
}
|
||||
|
||||
return parseEmailList(name, raw)
|
||||
}
|
||||
|
||||
func parseEmailList(name string, raw string) ([]string, error) {
|
||||
trimmed := strings.TrimSpace(raw)
|
||||
if trimmed == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
parts := strings.Split(trimmed, ",")
|
||||
addresses := make([]string, 0, len(parts))
|
||||
seen := make(map[string]struct{}, len(parts))
|
||||
for index, part := range parts {
|
||||
normalized, err := normalizeMailboxAddress(part)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s[%d]: %w", name, index, err)
|
||||
}
|
||||
if _, ok := seen[normalized]; ok {
|
||||
continue
|
||||
}
|
||||
seen[normalized] = struct{}{}
|
||||
addresses = append(addresses, normalized)
|
||||
}
|
||||
|
||||
return addresses, nil
|
||||
}
|
||||
|
||||
func normalizeMailboxAddress(value string) (string, error) {
|
||||
trimmed := strings.TrimSpace(value)
|
||||
if trimmed == "" {
|
||||
return "", fmt.Errorf("email address must not be empty")
|
||||
}
|
||||
|
||||
parsed, err := netmail.ParseAddress(trimmed)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid email address %q: %w", trimmed, err)
|
||||
}
|
||||
if parsed.Name != "" {
|
||||
return "", fmt.Errorf("email address %q must not include a display name", trimmed)
|
||||
}
|
||||
|
||||
return strings.ToLower(parsed.Address), nil
|
||||
}
|
||||
|
||||
func validateNormalizedEmailList(name string, values []string) error {
|
||||
for index, value := range values {
|
||||
normalized, err := normalizeMailboxAddress(value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s[%d]: %w", name, index, err)
|
||||
}
|
||||
if normalized != value {
|
||||
return fmt.Errorf("%s[%d]: email address must already be normalized", name, index)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateLogLevel(value string) error {
|
||||
var level slog.Level
|
||||
return level.UnmarshalText([]byte(strings.TrimSpace(value)))
|
||||
}
|
||||
|
||||
func normalizeExporterValue(value string) string {
|
||||
switch strings.TrimSpace(value) {
|
||||
case "", otelExporterNone:
|
||||
return otelExporterNone
|
||||
default:
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
}
|
||||
|
||||
func loadOTLPProtocol(primary string, fallback string, exporter string) string {
|
||||
protocol := strings.TrimSpace(primary)
|
||||
if protocol == "" {
|
||||
protocol = strings.TrimSpace(fallback)
|
||||
}
|
||||
if protocol == "" && exporter == otelExporterOTLP {
|
||||
return otelProtocolHTTPProtobuf
|
||||
}
|
||||
|
||||
return protocol
|
||||
}
|
||||
|
||||
func normalizeBaseURL(value string) string {
|
||||
trimmed := strings.TrimSpace(value)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
return strings.TrimRight(trimmed, "/")
|
||||
}
|
||||
|
||||
func isAbsoluteHTTPURL(value string) bool {
|
||||
parsed, err := url.Parse(strings.TrimSpace(value))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if parsed.Scheme != "http" && parsed.Scheme != "https" {
|
||||
return false
|
||||
}
|
||||
|
||||
return parsed.Host != ""
|
||||
}
|
||||
|
||||
func isTCPAddr(value string) bool {
|
||||
host, port, err := net.SplitHostPort(strings.TrimSpace(value))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if port == "" {
|
||||
return false
|
||||
}
|
||||
if host == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
@@ -0,0 +1,252 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestLoadFromEnvUsesDefaults(t *testing.T) {
|
||||
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
|
||||
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
|
||||
|
||||
cfg, err := LoadFromEnv()
|
||||
require.NoError(t, err)
|
||||
|
||||
defaults := DefaultConfig()
|
||||
require.Equal(t, defaults.ShutdownTimeout, cfg.ShutdownTimeout)
|
||||
require.Equal(t, defaults.Logging, cfg.Logging)
|
||||
require.Equal(t, defaults.InternalHTTP, cfg.InternalHTTP)
|
||||
require.Equal(t, "127.0.0.1:6379", cfg.Redis.Addr)
|
||||
require.Equal(t, defaults.Redis.DB, cfg.Redis.DB)
|
||||
require.Equal(t, defaults.Redis.OperationTimeout, cfg.Redis.OperationTimeout)
|
||||
require.Equal(t, defaults.Streams, cfg.Streams)
|
||||
require.Equal(t, defaults.Retry, cfg.Retry)
|
||||
require.Equal(t, UserServiceConfig{
|
||||
BaseURL: "http://user-service.internal",
|
||||
Timeout: defaults.UserService.Timeout,
|
||||
}, cfg.UserService)
|
||||
require.Equal(t, defaults.AdminRouting, cfg.AdminRouting)
|
||||
require.Equal(t, defaults.Telemetry, cfg.Telemetry)
|
||||
}
|
||||
|
||||
func TestLoadFromEnvAppliesOverrides(t *testing.T) {
|
||||
t.Setenv(shutdownTimeoutEnvVar, "9s")
|
||||
t.Setenv(logLevelEnvVar, "debug")
|
||||
t.Setenv(internalHTTPAddrEnvVar, "127.0.0.1:18092")
|
||||
t.Setenv(internalHTTPReadHeaderTimeoutEnvVar, "3s")
|
||||
t.Setenv(internalHTTPReadTimeoutEnvVar, "11s")
|
||||
t.Setenv(internalHTTPIdleTimeoutEnvVar, "61s")
|
||||
t.Setenv(redisAddrEnvVar, "127.0.0.1:6380")
|
||||
t.Setenv(redisUsernameEnvVar, "alice")
|
||||
t.Setenv(redisPasswordEnvVar, "secret")
|
||||
t.Setenv(redisDBEnvVar, "3")
|
||||
t.Setenv(redisTLSEnabledEnvVar, "true")
|
||||
t.Setenv(redisOperationTimeoutEnvVar, "750ms")
|
||||
t.Setenv(intentsStreamEnvVar, "notification:test_intents")
|
||||
t.Setenv(intentsReadBlockTimeoutEnvVar, "3500ms")
|
||||
t.Setenv(gatewayClientEventsStreamEnvVar, "gateway:test_client-events")
|
||||
t.Setenv(gatewayClientEventsStreamMaxEnvVar, "2048")
|
||||
t.Setenv(mailDeliveryCommandsStreamEnvVar, "mail:test_delivery_commands")
|
||||
t.Setenv(pushRetryMaxAttemptsEnvVar, "5")
|
||||
t.Setenv(emailRetryMaxAttemptsEnvVar, "9")
|
||||
t.Setenv(routeLeaseTTLEnvVar, "7s")
|
||||
t.Setenv(routeBackoffMinEnvVar, "2s")
|
||||
t.Setenv(routeBackoffMaxEnvVar, "7m")
|
||||
t.Setenv(deadLetterTTLEnvVar, "120h")
|
||||
t.Setenv(recordTTLEnvVar, "240h")
|
||||
t.Setenv(idempotencyTTLEnvVar, "48h")
|
||||
t.Setenv(userServiceBaseURLEnvVar, "https://user-service.internal/api/")
|
||||
t.Setenv(userServiceTimeoutEnvVar, "1500ms")
|
||||
t.Setenv(adminEmailsGeoReviewRecommendedEnvVar, "First@example.com, second@example.com, first@example.com")
|
||||
t.Setenv(adminEmailsGameGenerationFailedEnvVar, "ops@example.com")
|
||||
t.Setenv(adminEmailsLobbyRuntimePausedAfterEnvVar, "pause@example.com, PAUSE@example.com")
|
||||
t.Setenv(adminEmailsLobbyApplicationSubmittedEnvVar, "owner@example.com, OWNER@example.com")
|
||||
t.Setenv(otelServiceNameEnvVar, "custom-notification")
|
||||
t.Setenv(otelTracesExporterEnvVar, "otlp")
|
||||
t.Setenv(otelMetricsExporterEnvVar, "otlp")
|
||||
t.Setenv(otelExporterOTLPProtocolEnvVar, "grpc")
|
||||
t.Setenv(otelStdoutTracesEnabledEnvVar, "true")
|
||||
t.Setenv(otelStdoutMetricsEnabledEnvVar, "true")
|
||||
|
||||
cfg, err := LoadFromEnv()
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, 9*time.Second, cfg.ShutdownTimeout)
|
||||
require.Equal(t, "debug", cfg.Logging.Level)
|
||||
require.Equal(t, InternalHTTPConfig{
|
||||
Addr: "127.0.0.1:18092",
|
||||
ReadHeaderTimeout: 3 * time.Second,
|
||||
ReadTimeout: 11 * time.Second,
|
||||
IdleTimeout: 61 * time.Second,
|
||||
}, cfg.InternalHTTP)
|
||||
require.Equal(t, RedisConfig{
|
||||
Addr: "127.0.0.1:6380",
|
||||
Username: "alice",
|
||||
Password: "secret",
|
||||
DB: 3,
|
||||
TLSEnabled: true,
|
||||
OperationTimeout: 750 * time.Millisecond,
|
||||
}, cfg.Redis)
|
||||
require.Equal(t, StreamsConfig{
|
||||
Intents: "notification:test_intents",
|
||||
GatewayClientEvents: "gateway:test_client-events",
|
||||
GatewayClientEventsStreamMaxLen: 2048,
|
||||
MailDeliveryCommands: "mail:test_delivery_commands",
|
||||
}, cfg.Streams)
|
||||
require.Equal(t, 3500*time.Millisecond, cfg.IntentsReadBlockTimeout)
|
||||
require.Equal(t, RetryConfig{
|
||||
PushMaxAttempts: 5,
|
||||
EmailMaxAttempts: 9,
|
||||
RouteLeaseTTL: 7 * time.Second,
|
||||
RouteBackoffMin: 2 * time.Second,
|
||||
RouteBackoffMax: 7 * time.Minute,
|
||||
DeadLetterTTL: 120 * time.Hour,
|
||||
RecordTTL: 240 * time.Hour,
|
||||
IdempotencyTTL: 48 * time.Hour,
|
||||
}, cfg.Retry)
|
||||
require.Equal(t, UserServiceConfig{
|
||||
BaseURL: "https://user-service.internal/api",
|
||||
Timeout: 1500 * time.Millisecond,
|
||||
}, cfg.UserService)
|
||||
require.Equal(t, AdminRoutingConfig{
|
||||
GeoReviewRecommended: []string{"first@example.com", "second@example.com"},
|
||||
GameGenerationFailed: []string{"ops@example.com"},
|
||||
LobbyRuntimePausedAfterStart: []string{"pause@example.com"},
|
||||
LobbyApplicationSubmitted: []string{"owner@example.com"},
|
||||
}, cfg.AdminRouting)
|
||||
require.Equal(t, TelemetryConfig{
|
||||
ServiceName: "custom-notification",
|
||||
TracesExporter: "otlp",
|
||||
MetricsExporter: "otlp",
|
||||
TracesProtocol: "grpc",
|
||||
MetricsProtocol: "grpc",
|
||||
StdoutTracesEnabled: true,
|
||||
StdoutMetricsEnabled: true,
|
||||
}, cfg.Telemetry)
|
||||
}
|
||||
|
||||
func TestLoadFromEnvRejectsInvalidValues(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
envName string
|
||||
envVal string
|
||||
}{
|
||||
{name: "invalid duration", envName: shutdownTimeoutEnvVar, envVal: "later"},
|
||||
{name: "invalid log level", envName: logLevelEnvVar, envVal: "verbose"},
|
||||
{name: "invalid redis db", envName: redisDBEnvVar, envVal: "db-three"},
|
||||
{name: "invalid redis tls", envName: redisTLSEnabledEnvVar, envVal: "sometimes"},
|
||||
{name: "invalid push retries", envName: pushRetryMaxAttemptsEnvVar, envVal: "many"},
|
||||
{name: "invalid email retries", envName: emailRetryMaxAttemptsEnvVar, envVal: "several"},
|
||||
{name: "invalid gateway client events stream max len", envName: gatewayClientEventsStreamMaxEnvVar, envVal: "many"},
|
||||
{name: "invalid user service timeout", envName: userServiceTimeoutEnvVar, envVal: "soon"},
|
||||
{name: "invalid intents read block timeout", envName: intentsReadBlockTimeoutEnvVar, envVal: "later"},
|
||||
{name: "invalid route lease ttl", envName: routeLeaseTTLEnvVar, envVal: "eventually"},
|
||||
{name: "invalid traces exporter", envName: otelTracesExporterEnvVar, envVal: "stdout"},
|
||||
{name: "invalid metrics protocol", envName: otelExporterOTLPMetricsProtocolEnvVar, envVal: "udp"},
|
||||
{name: "invalid stdout traces", envName: otelStdoutTracesEnabledEnvVar, envVal: "sometimes"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
|
||||
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
|
||||
t.Setenv(tt.envName, tt.envVal)
|
||||
|
||||
_, err := LoadFromEnv()
|
||||
require.Error(t, err)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadFromEnvRejectsMissingRequiredValues(t *testing.T) {
|
||||
t.Run("missing redis addr", func(t *testing.T) {
|
||||
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
|
||||
|
||||
_, err := LoadFromEnv()
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), redisAddrEnvVar)
|
||||
})
|
||||
|
||||
t.Run("missing user service base url", func(t *testing.T) {
|
||||
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
|
||||
|
||||
_, err := LoadFromEnv()
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), userServiceBaseURLEnvVar)
|
||||
})
|
||||
}
|
||||
|
||||
func TestLoadFromEnvRejectsInvalidConfiguration(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
envName string
|
||||
envVal string
|
||||
want string
|
||||
}{
|
||||
{name: "invalid internal http addr", envName: internalHTTPAddrEnvVar, envVal: "127.0.0.1", want: "internal HTTP addr"},
|
||||
{name: "invalid redis addr", envName: redisAddrEnvVar, envVal: "127.0.0.1", want: "redis addr"},
|
||||
{name: "relative user service url", envName: userServiceBaseURLEnvVar, envVal: "/internal/users", want: "absolute http(s) URL"},
|
||||
{name: "invalid admin email", envName: adminEmailsGeoReviewRecommendedEnvVar, envVal: "broken-email", want: "invalid email address"},
|
||||
{name: "blank admin email slot", envName: adminEmailsGameGenerationFailedEnvVar, envVal: "ops@example.com, , second@example.com", want: "must not be empty"},
|
||||
{name: "invalid public application admin email", envName: adminEmailsLobbyApplicationSubmittedEnvVar, envVal: "Owner <owner@example.com>", want: "must not include a display name"},
|
||||
{name: "nonpositive gateway client events stream max len", envName: gatewayClientEventsStreamMaxEnvVar, envVal: "0", want: "must be positive"},
|
||||
{name: "backoff min above max", envName: routeBackoffMinEnvVar, envVal: "10m", want: "must not exceed"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
|
||||
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
|
||||
t.Setenv(routeBackoffMaxEnvVar, "5m")
|
||||
t.Setenv(tt.envName, tt.envVal)
|
||||
|
||||
_, err := LoadFromEnv()
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), tt.want)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadFromEnvRejectsNonPositiveValues(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
envName string
|
||||
envVal string
|
||||
}{
|
||||
{name: "shutdown timeout", envName: shutdownTimeoutEnvVar, envVal: "0s"},
|
||||
{name: "read header timeout", envName: internalHTTPReadHeaderTimeoutEnvVar, envVal: "0s"},
|
||||
{name: "read timeout", envName: internalHTTPReadTimeoutEnvVar, envVal: "0s"},
|
||||
{name: "idle timeout", envName: internalHTTPIdleTimeoutEnvVar, envVal: "0s"},
|
||||
{name: "redis timeout", envName: redisOperationTimeoutEnvVar, envVal: "0s"},
|
||||
{name: "intents read block timeout", envName: intentsReadBlockTimeoutEnvVar, envVal: "0s"},
|
||||
{name: "push retries", envName: pushRetryMaxAttemptsEnvVar, envVal: "0"},
|
||||
{name: "email retries", envName: emailRetryMaxAttemptsEnvVar, envVal: "0"},
|
||||
{name: "gateway client events stream max len", envName: gatewayClientEventsStreamMaxEnvVar, envVal: "0"},
|
||||
{name: "route lease ttl", envName: routeLeaseTTLEnvVar, envVal: "0s"},
|
||||
{name: "route backoff min", envName: routeBackoffMinEnvVar, envVal: "0s"},
|
||||
{name: "route backoff max", envName: routeBackoffMaxEnvVar, envVal: "0s"},
|
||||
{name: "dead letter ttl", envName: deadLetterTTLEnvVar, envVal: "0s"},
|
||||
{name: "record ttl", envName: recordTTLEnvVar, envVal: "0s"},
|
||||
{name: "idempotency ttl", envName: idempotencyTTLEnvVar, envVal: "0s"},
|
||||
{name: "user service timeout", envName: userServiceTimeoutEnvVar, envVal: "0s"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
|
||||
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
|
||||
t.Setenv(tt.envName, tt.envVal)
|
||||
|
||||
_, err := LoadFromEnv()
|
||||
require.Error(t, err)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
// Package logging configures the Notification Service process logger and
|
||||
// provides context-aware helpers for trace fields.
|
||||
package logging
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
// New constructs the process-wide JSON logger from level.
|
||||
func New(level string) (*slog.Logger, error) {
|
||||
var slogLevel slog.Level
|
||||
if err := slogLevel.UnmarshalText([]byte(strings.TrimSpace(level))); err != nil {
|
||||
return nil, fmt.Errorf("build logger: %w", err)
|
||||
}
|
||||
|
||||
return slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
|
||||
Level: slogLevel,
|
||||
})), nil
|
||||
}
|
||||
|
||||
// TraceAttrsFromContext returns slog key-value pairs for the active
|
||||
// OpenTelemetry span when ctx carries a valid span context.
|
||||
func TraceAttrsFromContext(ctx context.Context) []any {
|
||||
if ctx == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
spanContext := trace.SpanContextFromContext(ctx)
|
||||
if !spanContext.IsValid() {
|
||||
return nil
|
||||
}
|
||||
|
||||
return []any{
|
||||
"otel_trace_id", spanContext.TraceID().String(),
|
||||
"otel_span_id", spanContext.SpanID().String(),
|
||||
}
|
||||
}
|
||||
|
||||
// NotificationAttrs returns structured notification-identifying log fields.
|
||||
func NotificationAttrs(
|
||||
notificationID string,
|
||||
notificationType intentstream.NotificationType,
|
||||
producer intentstream.Producer,
|
||||
audienceKind intentstream.AudienceKind,
|
||||
idempotencyKey string,
|
||||
requestID string,
|
||||
traceID string,
|
||||
) []any {
|
||||
attrs := []any{
|
||||
"notification_id", notificationID,
|
||||
"notification_type", string(notificationType),
|
||||
"producer", string(producer),
|
||||
"audience_kind", string(audienceKind),
|
||||
"idempotency_key", idempotencyKey,
|
||||
}
|
||||
if strings.TrimSpace(requestID) != "" {
|
||||
attrs = append(attrs, "request_id", requestID)
|
||||
}
|
||||
if strings.TrimSpace(traceID) != "" {
|
||||
attrs = append(attrs, "trace_id", traceID)
|
||||
}
|
||||
|
||||
return attrs
|
||||
}
|
||||
|
||||
// IntentAttrs returns structured intent-identifying log fields when a durable
|
||||
// notification record does not yet exist.
|
||||
func IntentAttrs(intent intentstream.Intent) []any {
|
||||
attrs := []any{
|
||||
"notification_type", string(intent.NotificationType),
|
||||
"producer", string(intent.Producer),
|
||||
"audience_kind", string(intent.AudienceKind),
|
||||
"idempotency_key", intent.IdempotencyKey,
|
||||
}
|
||||
if strings.TrimSpace(intent.RequestID) != "" {
|
||||
attrs = append(attrs, "request_id", intent.RequestID)
|
||||
}
|
||||
if strings.TrimSpace(intent.TraceID) != "" {
|
||||
attrs = append(attrs, "trace_id", intent.TraceID)
|
||||
}
|
||||
|
||||
return attrs
|
||||
}
|
||||
|
||||
// RouteAttrs returns structured route-identifying log fields.
|
||||
func RouteAttrs(
|
||||
notificationID string,
|
||||
notificationType intentstream.NotificationType,
|
||||
producer intentstream.Producer,
|
||||
audienceKind intentstream.AudienceKind,
|
||||
idempotencyKey string,
|
||||
requestID string,
|
||||
traceID string,
|
||||
routeID string,
|
||||
channel intentstream.Channel,
|
||||
) []any {
|
||||
attrs := NotificationAttrs(notificationID, notificationType, producer, audienceKind, idempotencyKey, requestID, traceID)
|
||||
attrs = append(attrs,
|
||||
"route_id", routeID,
|
||||
"channel", string(channel),
|
||||
)
|
||||
|
||||
return attrs
|
||||
}
|
||||
@@ -0,0 +1,946 @@
|
||||
// Package acceptintent implements durable idempotent acceptance of normalized
|
||||
// notification intents.
|
||||
package acceptintent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
netmail "net/mail"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/config"
|
||||
"galaxy/notification/internal/logging"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrConflict reports that an idempotency scope already exists for
|
||||
// different normalized content.
|
||||
ErrConflict = errors.New("accept intent conflict")
|
||||
|
||||
// ErrRecipientNotFound reports that at least one user-targeted recipient
|
||||
// does not exist in the trusted User Service directory.
|
||||
ErrRecipientNotFound = errors.New("accept intent recipient not found")
|
||||
|
||||
// ErrServiceUnavailable reports that durable acceptance could not be
|
||||
// completed or recovered safely.
|
||||
ErrServiceUnavailable = errors.New("accept intent service unavailable")
|
||||
)
|
||||
|
||||
// Outcome identifies the coarse intent-acceptance outcome.
|
||||
type Outcome string
|
||||
|
||||
const (
|
||||
// OutcomeAccepted reports that the intent was durably accepted into local
|
||||
// notification state.
|
||||
OutcomeAccepted Outcome = "accepted"
|
||||
|
||||
// OutcomeDuplicate reports that the intent matched already accepted
|
||||
// normalized content and therefore became a replay no-op.
|
||||
OutcomeDuplicate Outcome = "duplicate"
|
||||
)
|
||||
|
||||
// RouteStatus identifies one stable notification-route state.
|
||||
type RouteStatus string
|
||||
|
||||
const (
|
||||
// RouteStatusPending reports that the route is ready for first publication.
|
||||
RouteStatusPending RouteStatus = "pending"
|
||||
|
||||
// RouteStatusPublished reports that the route was durably handed off.
|
||||
RouteStatusPublished RouteStatus = "published"
|
||||
|
||||
// RouteStatusFailed reports that the last publish attempt failed and a
|
||||
// retry is scheduled.
|
||||
RouteStatusFailed RouteStatus = "failed"
|
||||
|
||||
// RouteStatusDeadLetter reports that the route exhausted its retry budget.
|
||||
RouteStatusDeadLetter RouteStatus = "dead_letter"
|
||||
|
||||
// RouteStatusSkipped reports that the route slot was durably materialized
|
||||
// but intentionally not emitted.
|
||||
RouteStatusSkipped RouteStatus = "skipped"
|
||||
)
|
||||
|
||||
// Result stores the coarse outcome of one intent-acceptance attempt.
|
||||
type Result struct {
|
||||
// Outcome stores the stable intent-acceptance outcome.
|
||||
Outcome Outcome
|
||||
}
|
||||
|
||||
// NotificationRecord stores the primary durable notification record accepted
|
||||
// from one normalized intent.
|
||||
type NotificationRecord struct {
|
||||
// NotificationID stores the stable notification identifier.
|
||||
NotificationID string
|
||||
|
||||
// NotificationType stores the frozen notification vocabulary value.
|
||||
NotificationType intentstream.NotificationType
|
||||
|
||||
// Producer stores the frozen producer identifier.
|
||||
Producer intentstream.Producer
|
||||
|
||||
// AudienceKind stores the normalized audience selector.
|
||||
AudienceKind intentstream.AudienceKind
|
||||
|
||||
// RecipientUserIDs stores the normalized recipient user set for
|
||||
// user-targeted intents.
|
||||
RecipientUserIDs []string
|
||||
|
||||
// PayloadJSON stores the canonical normalized payload JSON string.
|
||||
PayloadJSON string
|
||||
|
||||
// IdempotencyKey stores the producer-owned idempotency key.
|
||||
IdempotencyKey string
|
||||
|
||||
// RequestFingerprint stores the stable normalized request fingerprint.
|
||||
RequestFingerprint string
|
||||
|
||||
// RequestID stores the optional tracing request identifier.
|
||||
RequestID string
|
||||
|
||||
// TraceID stores the optional tracing trace identifier.
|
||||
TraceID string
|
||||
|
||||
// OccurredAt stores when the producer says the event happened.
|
||||
OccurredAt time.Time
|
||||
|
||||
// AcceptedAt stores when Notification Service durably accepted the intent.
|
||||
AcceptedAt time.Time
|
||||
|
||||
// UpdatedAt stores the last notification-record mutation timestamp.
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
|
||||
// NotificationRoute stores one durable route slot derived from an accepted
|
||||
// notification.
|
||||
type NotificationRoute struct {
|
||||
// NotificationID stores the owning notification identifier.
|
||||
NotificationID string
|
||||
|
||||
// RouteID stores the stable `<channel>:<recipient_ref>` identifier.
|
||||
RouteID string
|
||||
|
||||
// Channel stores the route channel slot.
|
||||
Channel intentstream.Channel
|
||||
|
||||
// RecipientRef stores the stable target slot identifier.
|
||||
RecipientRef string
|
||||
|
||||
// Status stores the current route status.
|
||||
Status RouteStatus
|
||||
|
||||
// AttemptCount stores how many publication attempts already ran.
|
||||
AttemptCount int
|
||||
|
||||
// MaxAttempts stores the total retry budget for Channel.
|
||||
MaxAttempts int
|
||||
|
||||
// NextAttemptAt stores the next scheduled publication time when Status is
|
||||
// RouteStatusPending or RouteStatusFailed.
|
||||
NextAttemptAt time.Time
|
||||
|
||||
// ResolvedEmail stores the already-known email target when available.
|
||||
ResolvedEmail string
|
||||
|
||||
// ResolvedLocale stores the already-known locale when available.
|
||||
ResolvedLocale string
|
||||
|
||||
// LastErrorClassification stores the optional last classified route error.
|
||||
LastErrorClassification string
|
||||
|
||||
// LastErrorMessage stores the optional last route error message.
|
||||
LastErrorMessage string
|
||||
|
||||
// LastErrorAt stores when the last route error happened.
|
||||
LastErrorAt time.Time
|
||||
|
||||
// CreatedAt stores when the route was materialized.
|
||||
CreatedAt time.Time
|
||||
|
||||
// UpdatedAt stores the last route mutation timestamp.
|
||||
UpdatedAt time.Time
|
||||
|
||||
// PublishedAt stores when the route reached published.
|
||||
PublishedAt time.Time
|
||||
|
||||
// DeadLetteredAt stores when the route reached dead_letter.
|
||||
DeadLetteredAt time.Time
|
||||
|
||||
// SkippedAt stores when the route reached skipped.
|
||||
SkippedAt time.Time
|
||||
}
|
||||
|
||||
// IdempotencyRecord stores one durable `(producer, idempotency_key)`
|
||||
// reservation.
|
||||
type IdempotencyRecord struct {
|
||||
// Producer stores the owning producer identifier.
|
||||
Producer intentstream.Producer
|
||||
|
||||
// IdempotencyKey stores the producer-owned idempotency key.
|
||||
IdempotencyKey string
|
||||
|
||||
// NotificationID stores the accepted notification identifier.
|
||||
NotificationID string
|
||||
|
||||
// RequestFingerprint stores the stable normalized request fingerprint.
|
||||
RequestFingerprint string
|
||||
|
||||
// CreatedAt stores when the reservation was created.
|
||||
CreatedAt time.Time
|
||||
|
||||
// ExpiresAt stores when the reservation expires.
|
||||
ExpiresAt time.Time
|
||||
}
|
||||
|
||||
// AcceptInput stores one normalized intent plus its chosen notification
|
||||
// identifier.
|
||||
type AcceptInput struct {
|
||||
// NotificationID stores the stable accepted notification identifier.
|
||||
NotificationID string
|
||||
|
||||
// Intent stores the normalized decoded ingress intent.
|
||||
Intent intentstream.Intent
|
||||
}
|
||||
|
||||
// CreateAcceptanceInput stores the durable write set required to accept one
|
||||
// notification intent.
|
||||
type CreateAcceptanceInput struct {
|
||||
// Notification stores the accepted notification record.
|
||||
Notification NotificationRecord
|
||||
|
||||
// Routes stores every durable route slot derived from Notification.
|
||||
Routes []NotificationRoute
|
||||
|
||||
// Idempotency stores the idempotency reservation bound to Notification.
|
||||
Idempotency IdempotencyRecord
|
||||
}
|
||||
|
||||
// Store describes the durable storage required by the intent-acceptance use
|
||||
// case.
|
||||
type Store interface {
|
||||
// CreateAcceptance stores the complete durable write set for one intent
|
||||
// acceptance attempt. Implementations must wrap ErrConflict when the write
|
||||
// set races with already accepted state.
|
||||
CreateAcceptance(context.Context, CreateAcceptanceInput) error
|
||||
|
||||
// GetIdempotency loads one existing idempotency reservation.
|
||||
GetIdempotency(context.Context, intentstream.Producer, string) (IdempotencyRecord, bool, error)
|
||||
|
||||
// GetNotification loads one accepted notification by NotificationID.
|
||||
GetNotification(context.Context, string) (NotificationRecord, bool, error)
|
||||
}
|
||||
|
||||
// UserRecord stores the enrichment data resolved for one recipient user.
|
||||
type UserRecord struct {
|
||||
// Email stores the current user email address.
|
||||
Email string
|
||||
|
||||
// PreferredLanguage stores the current user preferred language tag.
|
||||
PreferredLanguage string
|
||||
}
|
||||
|
||||
// Validate reports whether record contains usable recipient enrichment data.
|
||||
func (record UserRecord) Validate() error {
|
||||
if strings.TrimSpace(record.Email) == "" {
|
||||
return errors.New("user record email must not be empty")
|
||||
}
|
||||
if _, err := netmail.ParseAddress(record.Email); err != nil {
|
||||
return fmt.Errorf("user record email: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UserDirectory resolves trusted recipient data from User Service. Missing
|
||||
// users must wrap ErrRecipientNotFound. Other failures are treated as
|
||||
// dependency unavailability.
|
||||
type UserDirectory interface {
|
||||
// GetUserByID loads one user by stable user identifier.
|
||||
GetUserByID(context.Context, string) (UserRecord, error)
|
||||
}
|
||||
|
||||
// Telemetry records low-cardinality intent-acceptance and user-enrichment
|
||||
// outcomes.
|
||||
type Telemetry interface {
|
||||
// RecordIntentOutcome records one accepted notification-intent outcome.
|
||||
RecordIntentOutcome(context.Context, string, string, string, string)
|
||||
|
||||
// RecordUserEnrichmentAttempt records one User Service enrichment lookup
|
||||
// outcome.
|
||||
RecordUserEnrichmentAttempt(context.Context, string, string)
|
||||
}
|
||||
|
||||
// Clock provides the current wall-clock time.
|
||||
type Clock interface {
|
||||
// Now returns the current time.
|
||||
Now() time.Time
|
||||
}
|
||||
|
||||
type systemClock struct{}
|
||||
|
||||
func (systemClock) Now() time.Time {
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
// Config stores the dependencies and policies used by Service.
|
||||
type Config struct {
|
||||
// Store owns the durable accepted state.
|
||||
Store Store
|
||||
|
||||
// UserDirectory resolves recipient email and locale from User Service.
|
||||
UserDirectory UserDirectory
|
||||
|
||||
// Clock provides wall-clock timestamps.
|
||||
Clock Clock
|
||||
|
||||
// Logger writes structured acceptance logs.
|
||||
Logger *slog.Logger
|
||||
|
||||
// Telemetry records low-cardinality acceptance and enrichment outcomes.
|
||||
Telemetry Telemetry
|
||||
|
||||
// PushMaxAttempts stores the retry budget for push routes.
|
||||
PushMaxAttempts int
|
||||
|
||||
// EmailMaxAttempts stores the retry budget for email routes.
|
||||
EmailMaxAttempts int
|
||||
|
||||
// IdempotencyTTL stores how long accepted idempotency scopes remain valid.
|
||||
IdempotencyTTL time.Duration
|
||||
|
||||
// AdminRouting stores the type-specific administrator email lists.
|
||||
AdminRouting config.AdminRoutingConfig
|
||||
}
|
||||
|
||||
// Service durably accepts normalized notification intents.
|
||||
type Service struct {
|
||||
store Store
|
||||
userDirectory UserDirectory
|
||||
clock Clock
|
||||
logger *slog.Logger
|
||||
telemetry Telemetry
|
||||
pushMaxAttempts int
|
||||
emailMaxAttempts int
|
||||
idempotencyTTL time.Duration
|
||||
adminRouting config.AdminRoutingConfig
|
||||
}
|
||||
|
||||
// New constructs Service from cfg.
|
||||
func New(cfg Config) (*Service, error) {
|
||||
if cfg.Store == nil {
|
||||
return nil, errors.New("new accept intent service: nil store")
|
||||
}
|
||||
if cfg.UserDirectory == nil {
|
||||
return nil, errors.New("new accept intent service: nil user directory")
|
||||
}
|
||||
if cfg.Clock == nil {
|
||||
cfg.Clock = systemClock{}
|
||||
}
|
||||
if cfg.PushMaxAttempts <= 0 {
|
||||
return nil, errors.New("new accept intent service: push max attempts must be positive")
|
||||
}
|
||||
if cfg.EmailMaxAttempts <= 0 {
|
||||
return nil, errors.New("new accept intent service: email max attempts must be positive")
|
||||
}
|
||||
if cfg.IdempotencyTTL <= 0 {
|
||||
return nil, errors.New("new accept intent service: idempotency ttl must be positive")
|
||||
}
|
||||
if cfg.Logger == nil {
|
||||
cfg.Logger = slog.Default()
|
||||
}
|
||||
if err := cfg.AdminRouting.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("new accept intent service: %w", err)
|
||||
}
|
||||
|
||||
return &Service{
|
||||
store: cfg.Store,
|
||||
userDirectory: cfg.UserDirectory,
|
||||
clock: cfg.Clock,
|
||||
logger: cfg.Logger.With("component", "accept_intent"),
|
||||
telemetry: cfg.Telemetry,
|
||||
pushMaxAttempts: cfg.PushMaxAttempts,
|
||||
emailMaxAttempts: cfg.EmailMaxAttempts,
|
||||
idempotencyTTL: cfg.IdempotencyTTL,
|
||||
adminRouting: cfg.AdminRouting,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Execute durably accepts one normalized intent.
|
||||
func (service *Service) Execute(ctx context.Context, input AcceptInput) (Result, error) {
|
||||
if ctx == nil {
|
||||
return Result{}, errors.New("accept intent: nil context")
|
||||
}
|
||||
if service == nil {
|
||||
return Result{}, errors.New("accept intent: nil service")
|
||||
}
|
||||
if err := input.Validate(); err != nil {
|
||||
return Result{}, fmt.Errorf("accept intent: %w", err)
|
||||
}
|
||||
|
||||
fingerprint, err := requestFingerprint(input.Intent)
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("accept intent: %w", err)
|
||||
}
|
||||
|
||||
if result, handled, err := service.resolveReplay(ctx, input, fingerprint); handled {
|
||||
return result, err
|
||||
}
|
||||
|
||||
createInput, result, err := service.buildCreateInput(ctx, input, fingerprint)
|
||||
if err != nil {
|
||||
switch {
|
||||
case errors.Is(err, ErrRecipientNotFound):
|
||||
return Result{}, err
|
||||
case errors.Is(err, ErrServiceUnavailable):
|
||||
return Result{}, err
|
||||
default:
|
||||
return Result{}, fmt.Errorf("accept intent: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := service.store.CreateAcceptance(ctx, createInput); err != nil {
|
||||
if !errors.Is(err, ErrConflict) {
|
||||
return Result{}, fmt.Errorf("%w: create acceptance: %v", ErrServiceUnavailable, err)
|
||||
}
|
||||
|
||||
if replayResult, handled, replayErr := service.resolveReplay(ctx, input, fingerprint); handled {
|
||||
return replayResult, replayErr
|
||||
}
|
||||
|
||||
return Result{}, fmt.Errorf("%w: create acceptance conflict without replay state", ErrServiceUnavailable)
|
||||
}
|
||||
|
||||
service.recordIntentOutcome(ctx, createInput.Notification, string(result.Outcome))
|
||||
|
||||
logArgs := logging.NotificationAttrs(
|
||||
createInput.Notification.NotificationID,
|
||||
createInput.Notification.NotificationType,
|
||||
createInput.Notification.Producer,
|
||||
createInput.Notification.AudienceKind,
|
||||
createInput.Notification.IdempotencyKey,
|
||||
createInput.Notification.RequestID,
|
||||
createInput.Notification.TraceID,
|
||||
)
|
||||
logArgs = append(logArgs,
|
||||
"route_count", len(createInput.Routes),
|
||||
"outcome", string(result.Outcome),
|
||||
)
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
service.logger.Info("notification intent accepted", logArgs...)
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Validate reports whether result stores a supported intent-acceptance
|
||||
// outcome.
|
||||
func (result Result) Validate() error {
|
||||
switch result.Outcome {
|
||||
case OutcomeAccepted, OutcomeDuplicate:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("accept intent outcome %q is unsupported", result.Outcome)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate reports whether input contains a usable acceptance request.
|
||||
func (input AcceptInput) Validate() error {
|
||||
if strings.TrimSpace(input.NotificationID) == "" {
|
||||
return errors.New("accept input notification id must not be empty")
|
||||
}
|
||||
if err := input.Intent.Validate(); err != nil {
|
||||
return fmt.Errorf("accept input intent: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate reports whether record contains a complete notification record.
|
||||
func (record NotificationRecord) Validate() error {
|
||||
if strings.TrimSpace(record.NotificationID) == "" {
|
||||
return errors.New("notification record notification id must not be empty")
|
||||
}
|
||||
if !record.NotificationType.IsKnown() {
|
||||
return fmt.Errorf("notification record type %q is unsupported", record.NotificationType)
|
||||
}
|
||||
if !record.Producer.IsKnown() {
|
||||
return fmt.Errorf("notification record producer %q is unsupported", record.Producer)
|
||||
}
|
||||
if !record.AudienceKind.IsKnown() {
|
||||
return fmt.Errorf("notification record audience kind %q is unsupported", record.AudienceKind)
|
||||
}
|
||||
if strings.TrimSpace(record.PayloadJSON) == "" {
|
||||
return errors.New("notification record payload json must not be empty")
|
||||
}
|
||||
if strings.TrimSpace(record.IdempotencyKey) == "" {
|
||||
return errors.New("notification record idempotency key must not be empty")
|
||||
}
|
||||
if strings.TrimSpace(record.RequestFingerprint) == "" {
|
||||
return errors.New("notification record request fingerprint must not be empty")
|
||||
}
|
||||
if err := validateTimestamp("notification record occurred at", record.OccurredAt); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateTimestamp("notification record accepted at", record.AcceptedAt); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateTimestamp("notification record updated at", record.UpdatedAt); err != nil {
|
||||
return err
|
||||
}
|
||||
if record.AudienceKind == intentstream.AudienceKindUser && len(record.RecipientUserIDs) == 0 {
|
||||
return errors.New("notification record recipient user ids must not be empty for audience kind user")
|
||||
}
|
||||
if record.AudienceKind == intentstream.AudienceKindAdminEmail && len(record.RecipientUserIDs) > 0 {
|
||||
return errors.New("notification record recipient user ids must be empty for audience kind admin_email")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate reports whether route contains a complete route record.
|
||||
func (route NotificationRoute) Validate() error {
|
||||
if strings.TrimSpace(route.NotificationID) == "" {
|
||||
return errors.New("notification route notification id must not be empty")
|
||||
}
|
||||
if strings.TrimSpace(route.RouteID) == "" {
|
||||
return errors.New("notification route route id must not be empty")
|
||||
}
|
||||
if !route.Channel.IsKnown() {
|
||||
return fmt.Errorf("notification route channel %q is unsupported", route.Channel)
|
||||
}
|
||||
if strings.TrimSpace(route.RecipientRef) == "" {
|
||||
return errors.New("notification route recipient ref must not be empty")
|
||||
}
|
||||
if !route.Status.IsKnown() {
|
||||
return fmt.Errorf("notification route status %q is unsupported", route.Status)
|
||||
}
|
||||
if route.AttemptCount < 0 {
|
||||
return errors.New("notification route attempt count must not be negative")
|
||||
}
|
||||
if route.MaxAttempts <= 0 {
|
||||
return errors.New("notification route max attempts must be positive")
|
||||
}
|
||||
if err := validateTimestamp("notification route created at", route.CreatedAt); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateTimestamp("notification route updated at", route.UpdatedAt); err != nil {
|
||||
return err
|
||||
}
|
||||
switch route.Status {
|
||||
case RouteStatusPending, RouteStatusFailed:
|
||||
if err := validateTimestamp("notification route next attempt at", route.NextAttemptAt); err != nil {
|
||||
return err
|
||||
}
|
||||
case RouteStatusSkipped:
|
||||
if !route.NextAttemptAt.IsZero() {
|
||||
return errors.New("notification route next attempt at must be zero for skipped routes")
|
||||
}
|
||||
if err := validateTimestamp("notification route skipped at", route.SkippedAt); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsKnown reports whether status belongs to the frozen route-status surface.
|
||||
func (status RouteStatus) IsKnown() bool {
|
||||
switch status {
|
||||
case RouteStatusPending,
|
||||
RouteStatusPublished,
|
||||
RouteStatusFailed,
|
||||
RouteStatusDeadLetter,
|
||||
RouteStatusSkipped:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Validate reports whether record contains a complete idempotency record.
|
||||
func (record IdempotencyRecord) Validate() error {
|
||||
if !record.Producer.IsKnown() {
|
||||
return fmt.Errorf("idempotency record producer %q is unsupported", record.Producer)
|
||||
}
|
||||
if strings.TrimSpace(record.IdempotencyKey) == "" {
|
||||
return errors.New("idempotency record idempotency key must not be empty")
|
||||
}
|
||||
if strings.TrimSpace(record.NotificationID) == "" {
|
||||
return errors.New("idempotency record notification id must not be empty")
|
||||
}
|
||||
if strings.TrimSpace(record.RequestFingerprint) == "" {
|
||||
return errors.New("idempotency record request fingerprint must not be empty")
|
||||
}
|
||||
if err := validateTimestamp("idempotency record created at", record.CreatedAt); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateTimestamp("idempotency record expires at", record.ExpiresAt); err != nil {
|
||||
return err
|
||||
}
|
||||
if !record.ExpiresAt.After(record.CreatedAt) {
|
||||
return errors.New("idempotency record expires at must be after created at")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate reports whether input contains a consistent durable write set.
|
||||
func (input CreateAcceptanceInput) Validate() error {
|
||||
if err := input.Notification.Validate(); err != nil {
|
||||
return fmt.Errorf("notification: %w", err)
|
||||
}
|
||||
if err := input.Idempotency.Validate(); err != nil {
|
||||
return fmt.Errorf("idempotency: %w", err)
|
||||
}
|
||||
if input.Idempotency.NotificationID != input.Notification.NotificationID {
|
||||
return errors.New("idempotency notification id must match notification record")
|
||||
}
|
||||
if input.Idempotency.Producer != input.Notification.Producer {
|
||||
return errors.New("idempotency producer must match notification record")
|
||||
}
|
||||
if input.Idempotency.IdempotencyKey != input.Notification.IdempotencyKey {
|
||||
return errors.New("idempotency key must match notification record")
|
||||
}
|
||||
if input.Idempotency.RequestFingerprint != input.Notification.RequestFingerprint {
|
||||
return errors.New("idempotency request fingerprint must match notification record")
|
||||
}
|
||||
|
||||
seenRouteIDs := make(map[string]struct{}, len(input.Routes))
|
||||
for index, route := range input.Routes {
|
||||
if err := route.Validate(); err != nil {
|
||||
return fmt.Errorf("routes[%d]: %w", index, err)
|
||||
}
|
||||
if route.NotificationID != input.Notification.NotificationID {
|
||||
return fmt.Errorf("routes[%d]: notification id must match notification record", index)
|
||||
}
|
||||
if _, ok := seenRouteIDs[route.RouteID]; ok {
|
||||
return fmt.Errorf("routes[%d]: route id %q is duplicated", index, route.RouteID)
|
||||
}
|
||||
seenRouteIDs[route.RouteID] = struct{}{}
|
||||
if input.Notification.AudienceKind == intentstream.AudienceKindUser {
|
||||
if !strings.HasPrefix(route.RecipientRef, "user:") {
|
||||
return fmt.Errorf("routes[%d]: recipient ref must use user: prefix for audience kind user", index)
|
||||
}
|
||||
if strings.TrimSpace(route.ResolvedEmail) == "" {
|
||||
return fmt.Errorf("routes[%d]: resolved email must not be empty for audience kind user", index)
|
||||
}
|
||||
if strings.TrimSpace(route.ResolvedLocale) == "" {
|
||||
return fmt.Errorf("routes[%d]: resolved locale must not be empty for audience kind user", index)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (service *Service) buildCreateInput(ctx context.Context, input AcceptInput, fingerprint string) (CreateAcceptanceInput, Result, error) {
|
||||
now := service.clock.Now().UTC().Truncate(time.Millisecond)
|
||||
|
||||
record := NotificationRecord{
|
||||
NotificationID: input.NotificationID,
|
||||
NotificationType: input.Intent.NotificationType,
|
||||
Producer: input.Intent.Producer,
|
||||
AudienceKind: input.Intent.AudienceKind,
|
||||
RecipientUserIDs: append([]string(nil), input.Intent.RecipientUserIDs...),
|
||||
PayloadJSON: input.Intent.PayloadJSON,
|
||||
IdempotencyKey: input.Intent.IdempotencyKey,
|
||||
RequestFingerprint: fingerprint,
|
||||
RequestID: input.Intent.RequestID,
|
||||
TraceID: input.Intent.TraceID,
|
||||
OccurredAt: input.Intent.OccurredAt,
|
||||
AcceptedAt: now,
|
||||
UpdatedAt: now,
|
||||
}
|
||||
|
||||
routes, err := service.materializeRoutes(ctx, record, now)
|
||||
if err != nil {
|
||||
return CreateAcceptanceInput{}, Result{}, fmt.Errorf("materialize routes: %w", err)
|
||||
}
|
||||
|
||||
createInput := CreateAcceptanceInput{
|
||||
Notification: record,
|
||||
Routes: routes,
|
||||
Idempotency: IdempotencyRecord{
|
||||
Producer: record.Producer,
|
||||
IdempotencyKey: record.IdempotencyKey,
|
||||
NotificationID: record.NotificationID,
|
||||
RequestFingerprint: fingerprint,
|
||||
CreatedAt: now,
|
||||
ExpiresAt: now.Add(service.idempotencyTTL),
|
||||
},
|
||||
}
|
||||
if err := createInput.Validate(); err != nil {
|
||||
return CreateAcceptanceInput{}, Result{}, fmt.Errorf("build create acceptance input: %w", err)
|
||||
}
|
||||
|
||||
result := Result{Outcome: OutcomeAccepted}
|
||||
if err := result.Validate(); err != nil {
|
||||
return CreateAcceptanceInput{}, Result{}, fmt.Errorf("build acceptance result: %w", err)
|
||||
}
|
||||
|
||||
return createInput, result, nil
|
||||
}
|
||||
|
||||
func (service *Service) materializeRoutes(ctx context.Context, record NotificationRecord, now time.Time) ([]NotificationRoute, error) {
|
||||
switch record.AudienceKind {
|
||||
case intentstream.AudienceKindUser:
|
||||
recipients, err := service.resolveRecipients(ctx, record.NotificationType, record.RecipientUserIDs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
routes := make([]NotificationRoute, 0, len(record.RecipientUserIDs)*2)
|
||||
for _, userID := range record.RecipientUserIDs {
|
||||
recipient := recipients[userID]
|
||||
recipientRef := "user:" + userID
|
||||
routes = append(routes,
|
||||
service.newRoute(record, now, intentstream.ChannelPush, recipientRef, recipient.Email, resolveLocale(recipient.PreferredLanguage)),
|
||||
service.newRoute(record, now, intentstream.ChannelEmail, recipientRef, recipient.Email, resolveLocale(recipient.PreferredLanguage)),
|
||||
)
|
||||
}
|
||||
return routes, nil
|
||||
case intentstream.AudienceKindAdminEmail:
|
||||
adminEmails := service.adminEmailsFor(record.NotificationType)
|
||||
if len(adminEmails) == 0 {
|
||||
return []NotificationRoute{
|
||||
service.newSyntheticAdminConfigRoute(record, now),
|
||||
}, nil
|
||||
}
|
||||
|
||||
routes := make([]NotificationRoute, 0, len(adminEmails)*2)
|
||||
for _, email := range adminEmails {
|
||||
recipientRef := "email:" + email
|
||||
routes = append(routes,
|
||||
service.newRoute(record, now, intentstream.ChannelPush, recipientRef, email, intentstream.DefaultResolvedLocale()),
|
||||
service.newRoute(record, now, intentstream.ChannelEmail, recipientRef, email, intentstream.DefaultResolvedLocale()),
|
||||
)
|
||||
}
|
||||
return routes, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported audience kind %q", record.AudienceKind)
|
||||
}
|
||||
}
|
||||
|
||||
func (service *Service) resolveRecipients(ctx context.Context, notificationType intentstream.NotificationType, userIDs []string) (map[string]UserRecord, error) {
|
||||
recipients := make(map[string]UserRecord, len(userIDs))
|
||||
for _, userID := range userIDs {
|
||||
record, err := service.userDirectory.GetUserByID(ctx, userID)
|
||||
switch {
|
||||
case err == nil:
|
||||
if err := record.Validate(); err != nil {
|
||||
service.recordUserEnrichmentAttempt(ctx, notificationType, "service_unavailable")
|
||||
return nil, fmt.Errorf("%w: resolve recipient %q: %v", ErrServiceUnavailable, userID, err)
|
||||
}
|
||||
service.recordUserEnrichmentAttempt(ctx, notificationType, "success")
|
||||
recipients[userID] = record
|
||||
case errors.Is(err, ErrRecipientNotFound):
|
||||
service.recordUserEnrichmentAttempt(ctx, notificationType, "recipient_not_found")
|
||||
return nil, fmt.Errorf("%w: resolve recipient %q: %v", ErrRecipientNotFound, userID, err)
|
||||
default:
|
||||
service.recordUserEnrichmentAttempt(ctx, notificationType, "service_unavailable")
|
||||
return nil, fmt.Errorf("%w: resolve recipient %q: %v", ErrServiceUnavailable, userID, err)
|
||||
}
|
||||
}
|
||||
|
||||
return recipients, nil
|
||||
}
|
||||
|
||||
func (service *Service) newRoute(
|
||||
record NotificationRecord,
|
||||
now time.Time,
|
||||
channel intentstream.Channel,
|
||||
recipientRef string,
|
||||
resolvedEmail string,
|
||||
resolvedLocale string,
|
||||
) NotificationRoute {
|
||||
route := NotificationRoute{
|
||||
NotificationID: record.NotificationID,
|
||||
RouteID: string(channel) + ":" + recipientRef,
|
||||
Channel: channel,
|
||||
RecipientRef: recipientRef,
|
||||
AttemptCount: 0,
|
||||
MaxAttempts: service.maxAttempts(channel),
|
||||
ResolvedEmail: resolvedEmail,
|
||||
ResolvedLocale: resolvedLocale,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}
|
||||
|
||||
if record.NotificationType.SupportsChannel(record.AudienceKind, channel) {
|
||||
route.Status = RouteStatusPending
|
||||
route.NextAttemptAt = now
|
||||
return route
|
||||
}
|
||||
|
||||
route.Status = RouteStatusSkipped
|
||||
route.SkippedAt = now
|
||||
return route
|
||||
}
|
||||
|
||||
func (service *Service) newSyntheticAdminConfigRoute(record NotificationRecord, now time.Time) NotificationRoute {
|
||||
recipientRef := "config:" + string(record.NotificationType)
|
||||
return NotificationRoute{
|
||||
NotificationID: record.NotificationID,
|
||||
RouteID: string(intentstream.ChannelEmail) + ":" + recipientRef,
|
||||
Channel: intentstream.ChannelEmail,
|
||||
RecipientRef: recipientRef,
|
||||
Status: RouteStatusSkipped,
|
||||
AttemptCount: 0,
|
||||
MaxAttempts: service.emailMaxAttempts,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
SkippedAt: now,
|
||||
}
|
||||
}
|
||||
|
||||
func (service *Service) adminEmailsFor(notificationType intentstream.NotificationType) []string {
|
||||
switch notificationType {
|
||||
case intentstream.NotificationTypeGeoReviewRecommended:
|
||||
return append([]string(nil), service.adminRouting.GeoReviewRecommended...)
|
||||
case intentstream.NotificationTypeGameGenerationFailed:
|
||||
return append([]string(nil), service.adminRouting.GameGenerationFailed...)
|
||||
case intentstream.NotificationTypeLobbyRuntimePausedAfterStart:
|
||||
return append([]string(nil), service.adminRouting.LobbyRuntimePausedAfterStart...)
|
||||
case intentstream.NotificationTypeLobbyApplicationSubmitted:
|
||||
return append([]string(nil), service.adminRouting.LobbyApplicationSubmitted...)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (service *Service) maxAttempts(channel intentstream.Channel) int {
|
||||
switch channel {
|
||||
case intentstream.ChannelPush:
|
||||
return service.pushMaxAttempts
|
||||
case intentstream.ChannelEmail:
|
||||
return service.emailMaxAttempts
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func resolveLocale(preferredLanguage string) string {
|
||||
if preferredLanguage == intentstream.DefaultResolvedLocale() {
|
||||
return intentstream.DefaultResolvedLocale()
|
||||
}
|
||||
|
||||
return intentstream.DefaultResolvedLocale()
|
||||
}
|
||||
|
||||
func (service *Service) resolveReplay(ctx context.Context, input AcceptInput, fingerprint string) (Result, bool, error) {
|
||||
record, found, err := service.store.GetIdempotency(ctx, input.Intent.Producer, input.Intent.IdempotencyKey)
|
||||
if err != nil {
|
||||
return Result{}, true, fmt.Errorf("%w: load idempotency: %v", ErrServiceUnavailable, err)
|
||||
}
|
||||
if !found {
|
||||
return Result{}, false, nil
|
||||
}
|
||||
if record.RequestFingerprint != fingerprint {
|
||||
return Result{}, true, fmt.Errorf("%w: request conflicts with current state", ErrConflict)
|
||||
}
|
||||
|
||||
notificationRecord, found, err := service.store.GetNotification(ctx, record.NotificationID)
|
||||
if err != nil {
|
||||
return Result{}, true, fmt.Errorf("%w: load notification: %v", ErrServiceUnavailable, err)
|
||||
}
|
||||
if !found {
|
||||
return Result{}, true, fmt.Errorf("%w: notification %q is missing for idempotency scope", ErrServiceUnavailable, record.NotificationID)
|
||||
}
|
||||
|
||||
if notificationRecord.NotificationID != record.NotificationID {
|
||||
return Result{}, true, fmt.Errorf("%w: replay notification id mismatch", ErrServiceUnavailable)
|
||||
}
|
||||
|
||||
result := Result{Outcome: OutcomeDuplicate}
|
||||
if err := result.Validate(); err != nil {
|
||||
return Result{}, true, fmt.Errorf("%w: %v", ErrServiceUnavailable, err)
|
||||
}
|
||||
|
||||
service.recordIntentOutcome(ctx, notificationRecord, string(result.Outcome))
|
||||
|
||||
logArgs := logging.NotificationAttrs(
|
||||
notificationRecord.NotificationID,
|
||||
notificationRecord.NotificationType,
|
||||
notificationRecord.Producer,
|
||||
notificationRecord.AudienceKind,
|
||||
notificationRecord.IdempotencyKey,
|
||||
notificationRecord.RequestID,
|
||||
notificationRecord.TraceID,
|
||||
)
|
||||
logArgs = append(logArgs,
|
||||
"outcome", string(result.Outcome),
|
||||
)
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
service.logger.Info("notification intent replay resolved", logArgs...)
|
||||
|
||||
return result, true, nil
|
||||
}
|
||||
|
||||
func requestFingerprint(intent intentstream.Intent) (string, error) {
|
||||
if err := intent.Validate(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
normalized := struct {
|
||||
NotificationType intentstream.NotificationType `json:"notification_type"`
|
||||
AudienceKind intentstream.AudienceKind `json:"audience_kind"`
|
||||
RecipientUserIDs []string `json:"recipient_user_ids,omitempty"`
|
||||
PayloadJSON json.RawMessage `json:"payload_json"`
|
||||
}{
|
||||
NotificationType: intent.NotificationType,
|
||||
AudienceKind: intent.AudienceKind,
|
||||
RecipientUserIDs: append([]string(nil), intent.RecipientUserIDs...),
|
||||
PayloadJSON: json.RawMessage(intent.PayloadJSON),
|
||||
}
|
||||
|
||||
payload, err := json.Marshal(normalized)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("marshal request fingerprint: %w", err)
|
||||
}
|
||||
|
||||
sum := sha256.Sum256(payload)
|
||||
|
||||
return "sha256:" + hex.EncodeToString(sum[:]), nil
|
||||
}
|
||||
|
||||
func (service *Service) recordIntentOutcome(ctx context.Context, record NotificationRecord, outcome string) {
|
||||
if service == nil || service.telemetry == nil || strings.TrimSpace(outcome) == "" {
|
||||
return
|
||||
}
|
||||
|
||||
service.telemetry.RecordIntentOutcome(
|
||||
ctx,
|
||||
string(record.NotificationType),
|
||||
string(record.Producer),
|
||||
string(record.AudienceKind),
|
||||
outcome,
|
||||
)
|
||||
}
|
||||
|
||||
func (service *Service) recordUserEnrichmentAttempt(ctx context.Context, notificationType intentstream.NotificationType, result string) {
|
||||
if service == nil || service.telemetry == nil || strings.TrimSpace(result) == "" {
|
||||
return
|
||||
}
|
||||
|
||||
service.telemetry.RecordUserEnrichmentAttempt(ctx, string(notificationType), result)
|
||||
}
|
||||
|
||||
func validateTimestamp(name string, value time.Time) error {
|
||||
if value.IsZero() {
|
||||
return fmt.Errorf("%s must not be zero", name)
|
||||
}
|
||||
if !value.Equal(value.UTC()) {
|
||||
return fmt.Errorf("%s must be UTC", name)
|
||||
}
|
||||
if !value.Equal(value.Truncate(time.Millisecond)) {
|
||||
return fmt.Errorf("%s must use millisecond precision", name)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,613 @@
|
||||
package acceptintent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/config"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestServiceAcceptsIntentAndMaterializesUserRoutes(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
store := newRecordingStore()
|
||||
directory := newStaticUserDirectory(map[string]UserRecord{
|
||||
"user-1": {Email: "one@example.com", PreferredLanguage: "en"},
|
||||
"user-2": {Email: "two@example.com", PreferredLanguage: "en-US"},
|
||||
})
|
||||
service, err := New(Config{
|
||||
Store: store,
|
||||
UserDirectory: directory,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
result, err := service.Execute(context.Background(), AcceptInput{
|
||||
NotificationID: "1775121700000-0",
|
||||
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-2", "user-1"}, "request-123", "trace-123", time.UnixMilli(1775121700001).UTC()),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, OutcomeAccepted, result.Outcome)
|
||||
require.Len(t, store.createInputs, 1)
|
||||
|
||||
createInput := store.createInputs[0]
|
||||
require.Equal(t, "1775121700000-0", createInput.Notification.NotificationID)
|
||||
require.Equal(t, []string{"user-1", "user-2"}, createInput.Notification.RecipientUserIDs)
|
||||
require.Equal(t, `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, createInput.Notification.PayloadJSON)
|
||||
require.Len(t, createInput.Routes, 4)
|
||||
pushUser1 := routeByID(t, createInput.Routes, "push:user:user-1")
|
||||
emailUser1 := routeByID(t, createInput.Routes, "email:user:user-1")
|
||||
pushUser2 := routeByID(t, createInput.Routes, "push:user:user-2")
|
||||
emailUser2 := routeByID(t, createInput.Routes, "email:user:user-2")
|
||||
require.Equal(t, RouteStatusPending, pushUser1.Status)
|
||||
require.Equal(t, 3, pushUser1.MaxAttempts)
|
||||
require.Equal(t, "one@example.com", pushUser1.ResolvedEmail)
|
||||
require.Equal(t, "en", pushUser1.ResolvedLocale)
|
||||
require.Equal(t, RouteStatusPending, emailUser1.Status)
|
||||
require.Equal(t, 7, emailUser1.MaxAttempts)
|
||||
require.Equal(t, "one@example.com", emailUser1.ResolvedEmail)
|
||||
require.Equal(t, "en", emailUser1.ResolvedLocale)
|
||||
require.Equal(t, "two@example.com", pushUser2.ResolvedEmail)
|
||||
require.Equal(t, "en", pushUser2.ResolvedLocale)
|
||||
require.Equal(t, "two@example.com", emailUser2.ResolvedEmail)
|
||||
require.Equal(t, "en", emailUser2.ResolvedLocale)
|
||||
require.Equal(t, []string{"user-1", "user-2"}, directory.lookups)
|
||||
}
|
||||
|
||||
func TestServiceTreatsEquivalentReplayAsDuplicate(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
store := newRecordingStore()
|
||||
directory := newStaticUserDirectory(map[string]UserRecord{
|
||||
"user-1": {Email: "one@example.com", PreferredLanguage: "en"},
|
||||
})
|
||||
service, err := New(Config{
|
||||
Store: store,
|
||||
UserDirectory: directory,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
firstInput := AcceptInput{
|
||||
NotificationID: "1775121700000-0",
|
||||
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "request-1", "trace-1", time.UnixMilli(1775121700001).UTC()),
|
||||
}
|
||||
secondInput := AcceptInput{
|
||||
NotificationID: "1775121700001-0",
|
||||
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "request-2", "trace-2", time.UnixMilli(1775121799999).UTC()),
|
||||
}
|
||||
|
||||
firstResult, err := service.Execute(context.Background(), firstInput)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, OutcomeAccepted, firstResult.Outcome)
|
||||
|
||||
secondResult, err := service.Execute(context.Background(), secondInput)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, OutcomeDuplicate, secondResult.Outcome)
|
||||
require.Len(t, store.createInputs, 1)
|
||||
require.Equal(t, []string{"user-1"}, directory.lookups)
|
||||
}
|
||||
|
||||
func TestServiceRejectsConflictOnSameIdempotencyScope(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
store := newRecordingStore()
|
||||
directory := newStaticUserDirectory(map[string]UserRecord{
|
||||
"user-1": {Email: "one@example.com", PreferredLanguage: "en"},
|
||||
})
|
||||
service, err := New(Config{
|
||||
Store: store,
|
||||
UserDirectory: directory,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = service.Execute(context.Background(), AcceptInput{
|
||||
NotificationID: "1775121700000-0",
|
||||
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = service.Execute(context.Background(), AcceptInput{
|
||||
NotificationID: "1775121700002-0",
|
||||
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":55}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700002).UTC()),
|
||||
})
|
||||
require.ErrorIs(t, err, ErrConflict)
|
||||
}
|
||||
|
||||
func TestServiceMaterializesPublicLobbyApplicationAdminRoutes(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
store := newRecordingStore()
|
||||
directory := newStaticUserDirectory(nil)
|
||||
service, err := New(Config{
|
||||
Store: store,
|
||||
UserDirectory: directory,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
AdminRouting: config.AdminRoutingConfig{
|
||||
LobbyApplicationSubmitted: []string{"owner@example.com"},
|
||||
},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
result, err := service.Execute(context.Background(), AcceptInput{
|
||||
NotificationID: "1775121700000-0",
|
||||
Intent: validPublicApplicationIntent(),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, OutcomeAccepted, result.Outcome)
|
||||
require.Len(t, store.createInputs, 1)
|
||||
require.Len(t, store.createInputs[0].Routes, 2)
|
||||
|
||||
pushRoute := routeByID(t, store.createInputs[0].Routes, "push:email:owner@example.com")
|
||||
emailRoute := routeByID(t, store.createInputs[0].Routes, "email:email:owner@example.com")
|
||||
|
||||
require.Equal(t, RouteStatusSkipped, pushRoute.Status)
|
||||
require.Equal(t, "owner@example.com", pushRoute.ResolvedEmail)
|
||||
require.Equal(t, "en", pushRoute.ResolvedLocale)
|
||||
require.Equal(t, RouteStatusPending, emailRoute.Status)
|
||||
require.Equal(t, "owner@example.com", emailRoute.ResolvedEmail)
|
||||
require.Equal(t, "en", emailRoute.ResolvedLocale)
|
||||
require.Empty(t, directory.lookups)
|
||||
}
|
||||
|
||||
func TestServiceMaterializesSyntheticAdminConfigRouteWhenListIsEmpty(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
store := newRecordingStore()
|
||||
directory := newStaticUserDirectory(nil)
|
||||
service, err := New(Config{
|
||||
Store: store,
|
||||
UserDirectory: directory,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
result, err := service.Execute(context.Background(), AcceptInput{
|
||||
NotificationID: "1775121700000-0",
|
||||
Intent: validPublicApplicationIntent(),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, OutcomeAccepted, result.Outcome)
|
||||
require.Len(t, store.createInputs, 1)
|
||||
require.Len(t, store.createInputs[0].Routes, 1)
|
||||
|
||||
route := store.createInputs[0].Routes[0]
|
||||
require.Equal(t, "email:config:lobby.application.submitted", route.RouteID)
|
||||
require.Equal(t, RouteStatusSkipped, route.Status)
|
||||
require.Equal(t, 7, route.MaxAttempts)
|
||||
require.True(t, route.NextAttemptAt.IsZero())
|
||||
require.Empty(t, directory.lookups)
|
||||
}
|
||||
|
||||
func TestServiceMaterializesChannelMatrixAndRetryBudgets(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
tests := []struct {
|
||||
name string
|
||||
intent intentstream.Intent
|
||||
adminRouting config.AdminRoutingConfig
|
||||
wantRoutes map[string]struct {
|
||||
status RouteStatus
|
||||
maxAttempts int
|
||||
}
|
||||
}{
|
||||
{
|
||||
name: "user push and email",
|
||||
intent: validTurnReadyIntent(
|
||||
`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
[]string{"user-1"},
|
||||
"",
|
||||
"",
|
||||
now,
|
||||
),
|
||||
wantRoutes: map[string]struct {
|
||||
status RouteStatus
|
||||
maxAttempts int
|
||||
}{
|
||||
"push:user:user-1": {status: RouteStatusPending, maxAttempts: 3},
|
||||
"email:user:user-1": {status: RouteStatusPending, maxAttempts: 7},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "user email only",
|
||||
intent: intentstream.Intent{
|
||||
NotificationType: intentstream.NotificationTypeLobbyInviteExpired,
|
||||
Producer: intentstream.ProducerGameLobby,
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: []string{"user-1"},
|
||||
IdempotencyKey: "game-123:invite-expired",
|
||||
OccurredAt: now,
|
||||
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","invitee_name":"Nova Pilot","invitee_user_id":"user-2"}`,
|
||||
},
|
||||
wantRoutes: map[string]struct {
|
||||
status RouteStatus
|
||||
maxAttempts int
|
||||
}{
|
||||
"push:user:user-1": {status: RouteStatusSkipped, maxAttempts: 3},
|
||||
"email:user:user-1": {status: RouteStatusPending, maxAttempts: 7},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "admin email only",
|
||||
intent: intentstream.Intent{
|
||||
NotificationType: intentstream.NotificationTypeGeoReviewRecommended,
|
||||
Producer: intentstream.ProducerGeoProfile,
|
||||
AudienceKind: intentstream.AudienceKindAdminEmail,
|
||||
IdempotencyKey: "geo:user-1",
|
||||
OccurredAt: now,
|
||||
PayloadJSON: `{"observed_country":"DE","review_reason":"country_mismatch","usual_connection_country":"PL","user_email":"pilot@example.com","user_id":"user-1"}`,
|
||||
},
|
||||
adminRouting: config.AdminRoutingConfig{
|
||||
GeoReviewRecommended: []string{"admin@example.com"},
|
||||
},
|
||||
wantRoutes: map[string]struct {
|
||||
status RouteStatus
|
||||
maxAttempts int
|
||||
}{
|
||||
"push:email:admin@example.com": {status: RouteStatusSkipped, maxAttempts: 3},
|
||||
"email:email:admin@example.com": {status: RouteStatusPending, maxAttempts: 7},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
store := newRecordingStore()
|
||||
directory := newStaticUserDirectory(map[string]UserRecord{
|
||||
"user-1": {Email: "pilot@example.com", PreferredLanguage: "fr-FR"},
|
||||
})
|
||||
service, err := New(Config{
|
||||
Store: store,
|
||||
UserDirectory: directory,
|
||||
Clock: fixedClock{now: now},
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
AdminRouting: tt.adminRouting,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
result, err := service.Execute(context.Background(), AcceptInput{
|
||||
NotificationID: "1775121700000-0",
|
||||
Intent: tt.intent,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, OutcomeAccepted, result.Outcome)
|
||||
require.Len(t, store.createInputs, 1)
|
||||
require.Len(t, store.createInputs[0].Routes, len(tt.wantRoutes))
|
||||
|
||||
for routeID, want := range tt.wantRoutes {
|
||||
route := routeByID(t, store.createInputs[0].Routes, routeID)
|
||||
require.Equal(t, want.status, route.Status)
|
||||
require.Equal(t, want.maxAttempts, route.MaxAttempts)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestServiceReturnsRecipientNotFoundForMissingUser(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
store := newRecordingStore()
|
||||
directory := newStaticUserDirectory(nil)
|
||||
service, err := New(Config{
|
||||
Store: store,
|
||||
UserDirectory: directory,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = service.Execute(context.Background(), AcceptInput{
|
||||
NotificationID: "1775121700000-0",
|
||||
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-missing"}, "", "", time.UnixMilli(1775121700001).UTC()),
|
||||
})
|
||||
require.ErrorIs(t, err, ErrRecipientNotFound)
|
||||
require.Empty(t, store.createInputs)
|
||||
require.Equal(t, []string{"user-missing"}, directory.lookups)
|
||||
}
|
||||
|
||||
func TestServiceReturnsServiceUnavailableWhenDirectoryFails(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
store := newRecordingStore()
|
||||
directory := newStaticUserDirectory(nil)
|
||||
directory.errByUserID["user-1"] = errors.New("user service unavailable")
|
||||
service, err := New(Config{
|
||||
Store: store,
|
||||
UserDirectory: directory,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = service.Execute(context.Background(), AcceptInput{
|
||||
NotificationID: "1775121700000-0",
|
||||
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()),
|
||||
})
|
||||
require.ErrorIs(t, err, ErrServiceUnavailable)
|
||||
require.Empty(t, store.createInputs)
|
||||
}
|
||||
|
||||
func TestServiceRecordsIntentAndUserEnrichmentTelemetry(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
store := newRecordingStore()
|
||||
directory := newStaticUserDirectory(map[string]UserRecord{
|
||||
"user-1": {Email: "one@example.com", PreferredLanguage: "en"},
|
||||
})
|
||||
telemetry := &recordingTelemetry{}
|
||||
service, err := New(Config{
|
||||
Store: store,
|
||||
UserDirectory: directory,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
Telemetry: telemetry,
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
input := AcceptInput{
|
||||
NotificationID: "1775121700000-0",
|
||||
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()),
|
||||
}
|
||||
result, err := service.Execute(context.Background(), input)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, OutcomeAccepted, result.Outcome)
|
||||
|
||||
duplicateInput := input
|
||||
duplicateInput.NotificationID = "1775121700001-0"
|
||||
result, err = service.Execute(context.Background(), duplicateInput)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, OutcomeDuplicate, result.Outcome)
|
||||
|
||||
require.Equal(t, []intentOutcomeRecord{
|
||||
{
|
||||
notificationType: "game.turn.ready",
|
||||
producer: "game_master",
|
||||
audienceKind: "user",
|
||||
outcome: "accepted",
|
||||
},
|
||||
{
|
||||
notificationType: "game.turn.ready",
|
||||
producer: "game_master",
|
||||
audienceKind: "user",
|
||||
outcome: "duplicate",
|
||||
},
|
||||
}, telemetry.intentOutcomes)
|
||||
require.Equal(t, []userEnrichmentRecord{
|
||||
{notificationType: "game.turn.ready", result: "success"},
|
||||
}, telemetry.userEnrichment)
|
||||
}
|
||||
|
||||
func TestServiceRecordsUserEnrichmentFailureTelemetry(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
directory *staticUserDirectory
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "recipient not found",
|
||||
directory: newStaticUserDirectory(nil),
|
||||
want: "recipient_not_found",
|
||||
},
|
||||
{
|
||||
name: "service unavailable",
|
||||
directory: func() *staticUserDirectory {
|
||||
directory := newStaticUserDirectory(nil)
|
||||
directory.errByUserID["user-1"] = errors.New("user service unavailable")
|
||||
return directory
|
||||
}(),
|
||||
want: "service_unavailable",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
telemetry := &recordingTelemetry{}
|
||||
service, err := New(Config{
|
||||
Store: newRecordingStore(),
|
||||
UserDirectory: tt.directory,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
Telemetry: telemetry,
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = service.Execute(context.Background(), AcceptInput{
|
||||
NotificationID: "1775121700000-0",
|
||||
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()),
|
||||
})
|
||||
require.Error(t, err)
|
||||
require.Equal(t, []userEnrichmentRecord{
|
||||
{notificationType: "game.turn.ready", result: tt.want},
|
||||
}, telemetry.userEnrichment)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type recordingStore struct {
|
||||
createInputs []CreateAcceptanceInput
|
||||
idempotency map[string]IdempotencyRecord
|
||||
notifications map[string]NotificationRecord
|
||||
}
|
||||
|
||||
func newRecordingStore() *recordingStore {
|
||||
return &recordingStore{
|
||||
idempotency: make(map[string]IdempotencyRecord),
|
||||
notifications: make(map[string]NotificationRecord),
|
||||
}
|
||||
}
|
||||
|
||||
func (store *recordingStore) CreateAcceptance(_ context.Context, input CreateAcceptanceInput) error {
|
||||
if err := input.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
key := string(input.Idempotency.Producer) + ":" + input.Idempotency.IdempotencyKey
|
||||
if _, ok := store.idempotency[key]; ok {
|
||||
return ErrConflict
|
||||
}
|
||||
|
||||
store.createInputs = append(store.createInputs, input)
|
||||
store.idempotency[key] = input.Idempotency
|
||||
store.notifications[input.Notification.NotificationID] = input.Notification
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (store *recordingStore) GetIdempotency(_ context.Context, producer intentstream.Producer, idempotencyKey string) (IdempotencyRecord, bool, error) {
|
||||
record, ok := store.idempotency[string(producer)+":"+idempotencyKey]
|
||||
return record, ok, nil
|
||||
}
|
||||
|
||||
func (store *recordingStore) GetNotification(_ context.Context, notificationID string) (NotificationRecord, bool, error) {
|
||||
record, ok := store.notifications[notificationID]
|
||||
return record, ok, nil
|
||||
}
|
||||
|
||||
type fixedClock struct {
|
||||
now time.Time
|
||||
}
|
||||
|
||||
func (clock fixedClock) Now() time.Time {
|
||||
return clock.now
|
||||
}
|
||||
|
||||
func validTurnReadyIntent(payload string, recipients []string, requestID string, traceID string, occurredAt time.Time) intentstream.Intent {
|
||||
sorted := append([]string(nil), recipients...)
|
||||
if len(sorted) == 2 && sorted[0] == "user-2" {
|
||||
sorted[0], sorted[1] = sorted[1], sorted[0]
|
||||
}
|
||||
return intentstream.Intent{
|
||||
NotificationType: intentstream.NotificationTypeGameTurnReady,
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: sorted,
|
||||
IdempotencyKey: "game-123:turn-54",
|
||||
OccurredAt: occurredAt.UTC().Truncate(time.Millisecond),
|
||||
RequestID: requestID,
|
||||
TraceID: traceID,
|
||||
PayloadJSON: payload,
|
||||
}
|
||||
}
|
||||
|
||||
func validPublicApplicationIntent() intentstream.Intent {
|
||||
return intentstream.Intent{
|
||||
NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
|
||||
Producer: intentstream.ProducerGameLobby,
|
||||
AudienceKind: intentstream.AudienceKindAdminEmail,
|
||||
IdempotencyKey: "game-456:application-submitted:user-42",
|
||||
OccurredAt: time.UnixMilli(1775121700002).UTC(),
|
||||
PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`,
|
||||
}
|
||||
}
|
||||
|
||||
func routeByID(t *testing.T, routes []NotificationRoute, routeID string) NotificationRoute {
|
||||
t.Helper()
|
||||
|
||||
for _, route := range routes {
|
||||
if route.RouteID == routeID {
|
||||
return route
|
||||
}
|
||||
}
|
||||
|
||||
t.Fatalf("route %q not found", routeID)
|
||||
return NotificationRoute{}
|
||||
}
|
||||
|
||||
type staticUserDirectory struct {
|
||||
records map[string]UserRecord
|
||||
errByUserID map[string]error
|
||||
lookups []string
|
||||
}
|
||||
|
||||
func newStaticUserDirectory(records map[string]UserRecord) *staticUserDirectory {
|
||||
return &staticUserDirectory{
|
||||
records: records,
|
||||
errByUserID: make(map[string]error),
|
||||
}
|
||||
}
|
||||
|
||||
func (directory *staticUserDirectory) GetUserByID(_ context.Context, userID string) (UserRecord, error) {
|
||||
directory.lookups = append(directory.lookups, userID)
|
||||
if err, ok := directory.errByUserID[userID]; ok {
|
||||
return UserRecord{}, err
|
||||
}
|
||||
record, ok := directory.records[userID]
|
||||
if !ok {
|
||||
return UserRecord{}, ErrRecipientNotFound
|
||||
}
|
||||
|
||||
return record, nil
|
||||
}
|
||||
|
||||
type recordingTelemetry struct {
|
||||
intentOutcomes []intentOutcomeRecord
|
||||
userEnrichment []userEnrichmentRecord
|
||||
}
|
||||
|
||||
func (telemetry *recordingTelemetry) RecordIntentOutcome(_ context.Context, notificationType string, producer string, audienceKind string, outcome string) {
|
||||
telemetry.intentOutcomes = append(telemetry.intentOutcomes, intentOutcomeRecord{
|
||||
notificationType: notificationType,
|
||||
producer: producer,
|
||||
audienceKind: audienceKind,
|
||||
outcome: outcome,
|
||||
})
|
||||
}
|
||||
|
||||
func (telemetry *recordingTelemetry) RecordUserEnrichmentAttempt(_ context.Context, notificationType string, result string) {
|
||||
telemetry.userEnrichment = append(telemetry.userEnrichment, userEnrichmentRecord{
|
||||
notificationType: notificationType,
|
||||
result: result,
|
||||
})
|
||||
}
|
||||
|
||||
type intentOutcomeRecord struct {
|
||||
notificationType string
|
||||
producer string
|
||||
audienceKind string
|
||||
outcome string
|
||||
}
|
||||
|
||||
type userEnrichmentRecord struct {
|
||||
notificationType string
|
||||
result string
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
// Package service reserves the application-service namespace of Notification
|
||||
// Service.
|
||||
package service
|
||||
@@ -0,0 +1,135 @@
|
||||
// Package malformedintent defines the operator-visible record used for
|
||||
// malformed notification intents.
|
||||
package malformedintent
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// FailureCode identifies one stable malformed-intent rejection reason.
|
||||
type FailureCode string
|
||||
|
||||
const (
|
||||
// FailureCodeInvalidIntent reports malformed top-level intent fields or an
|
||||
// invalid normalized envelope.
|
||||
FailureCodeInvalidIntent FailureCode = "invalid_intent"
|
||||
|
||||
// FailureCodeInvalidPayload reports malformed or schema-invalid
|
||||
// `payload_json`.
|
||||
FailureCodeInvalidPayload FailureCode = "invalid_payload"
|
||||
|
||||
// FailureCodeIdempotencyConflict reports a duplicate idempotency scope that
|
||||
// conflicts with already accepted normalized content.
|
||||
FailureCodeIdempotencyConflict FailureCode = "idempotency_conflict"
|
||||
|
||||
// FailureCodeRecipientNotFound reports that a user-targeted recipient user
|
||||
// id could not be resolved through User Service.
|
||||
FailureCodeRecipientNotFound FailureCode = "recipient_not_found"
|
||||
)
|
||||
|
||||
// Entry stores one operator-visible malformed notification-intent record.
|
||||
type Entry struct {
|
||||
// StreamEntryID stores the Redis Stream entry identifier of the rejected
|
||||
// intent.
|
||||
StreamEntryID string
|
||||
|
||||
// NotificationType stores the optional raw notification type extracted from
|
||||
// the rejected entry.
|
||||
NotificationType string
|
||||
|
||||
// Producer stores the optional raw producer value extracted from the
|
||||
// rejected entry.
|
||||
Producer string
|
||||
|
||||
// IdempotencyKey stores the optional raw idempotency key extracted from the
|
||||
// rejected entry.
|
||||
IdempotencyKey string
|
||||
|
||||
// FailureCode stores the stable rejection classification.
|
||||
FailureCode FailureCode
|
||||
|
||||
// FailureMessage stores the detailed validation or decode failure.
|
||||
FailureMessage string
|
||||
|
||||
// RawFields stores the raw top-level stream fields captured for operator
|
||||
// inspection.
|
||||
RawFields map[string]any
|
||||
|
||||
// RecordedAt stores when the malformed intent was durably recorded.
|
||||
RecordedAt time.Time
|
||||
}
|
||||
|
||||
// IsKnown reports whether code belongs to the frozen malformed-intent
|
||||
// rejection surface.
|
||||
func (code FailureCode) IsKnown() bool {
|
||||
switch code {
|
||||
case FailureCodeInvalidIntent, FailureCodeInvalidPayload, FailureCodeIdempotencyConflict, FailureCodeRecipientNotFound:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Validate reports whether entry contains a complete malformed-intent record.
|
||||
func (entry Entry) Validate() error {
|
||||
if strings.TrimSpace(entry.StreamEntryID) == "" {
|
||||
return fmt.Errorf("malformed intent stream entry id must not be empty")
|
||||
}
|
||||
if !entry.FailureCode.IsKnown() {
|
||||
return fmt.Errorf("malformed intent failure code %q is unsupported", entry.FailureCode)
|
||||
}
|
||||
if strings.TrimSpace(entry.FailureMessage) == "" {
|
||||
return fmt.Errorf("malformed intent failure message must not be empty")
|
||||
}
|
||||
if strings.TrimSpace(entry.FailureMessage) != entry.FailureMessage {
|
||||
return fmt.Errorf("malformed intent failure message must not contain surrounding whitespace")
|
||||
}
|
||||
if entry.RawFields == nil {
|
||||
return fmt.Errorf("malformed intent raw fields must not be nil")
|
||||
}
|
||||
if err := validateJSONObject("malformed intent raw fields", entry.RawFields); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := validateTimestamp("malformed intent recorded at", entry.RecordedAt); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateJSONObject(name string, value map[string]any) error {
|
||||
payload, err := json.Marshal(value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s: %w", name, err)
|
||||
}
|
||||
if string(payload) == "null" {
|
||||
return fmt.Errorf("%s must encode as a JSON object", name)
|
||||
}
|
||||
|
||||
var decoded map[string]any
|
||||
if err := json.Unmarshal(payload, &decoded); err != nil {
|
||||
return fmt.Errorf("%s: %w", name, err)
|
||||
}
|
||||
if decoded == nil {
|
||||
return fmt.Errorf("%s must encode as a JSON object", name)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateTimestamp(name string, value time.Time) error {
|
||||
if value.IsZero() {
|
||||
return fmt.Errorf("%s must not be zero", name)
|
||||
}
|
||||
if !value.Equal(value.UTC()) {
|
||||
return fmt.Errorf("%s must be UTC", name)
|
||||
}
|
||||
if !value.Equal(value.Truncate(time.Millisecond)) {
|
||||
return fmt.Errorf("%s must use millisecond precision", name)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,178 @@
|
||||
// Package publishmail encodes accepted email routes into Mail Service generic
|
||||
// asynchronous template commands.
|
||||
package publishmail
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
netmail "net/mail"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
)
|
||||
|
||||
const (
|
||||
commandSourceNotification = "notification"
|
||||
commandPayloadModeTemplate = "template"
|
||||
)
|
||||
|
||||
// Command stores one Mail Service-compatible template delivery command
|
||||
// produced from a durable notification email route.
|
||||
type Command struct {
|
||||
// DeliveryID stores the stable route-level delivery identifier.
|
||||
DeliveryID string
|
||||
|
||||
// IdempotencyKey stores the stable Mail Service deduplication key.
|
||||
IdempotencyKey string
|
||||
|
||||
// RequestedAt stores when Notification Service durably accepted the
|
||||
// notification intent.
|
||||
RequestedAt time.Time
|
||||
|
||||
// PayloadJSON stores the fully encoded template-mode command payload.
|
||||
PayloadJSON string
|
||||
|
||||
// RequestID stores the optional correlation identifier.
|
||||
RequestID string
|
||||
|
||||
// TraceID stores the optional tracing correlation identifier.
|
||||
TraceID string
|
||||
}
|
||||
|
||||
// Values returns the Redis Stream fields appended to the Mail Service command
|
||||
// stream for Command.
|
||||
func (command Command) Values() map[string]any {
|
||||
values := map[string]any{
|
||||
"delivery_id": command.DeliveryID,
|
||||
"source": commandSourceNotification,
|
||||
"payload_mode": commandPayloadModeTemplate,
|
||||
"idempotency_key": command.IdempotencyKey,
|
||||
"requested_at_ms": strconv.FormatInt(command.RequestedAt.UTC().UnixMilli(), 10),
|
||||
"payload_json": command.PayloadJSON,
|
||||
}
|
||||
if command.RequestID != "" {
|
||||
values["request_id"] = command.RequestID
|
||||
}
|
||||
if command.TraceID != "" {
|
||||
values["trace_id"] = command.TraceID
|
||||
}
|
||||
|
||||
return values
|
||||
}
|
||||
|
||||
// Encoder converts one accepted notification record plus its email route into
|
||||
// one Mail Service-compatible generic template command.
|
||||
type Encoder struct{}
|
||||
|
||||
// Encode converts notification plus route into one template delivery command.
|
||||
func (Encoder) Encode(notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) (Command, error) {
|
||||
if err := notification.Validate(); err != nil {
|
||||
return Command{}, fmt.Errorf("encode mail command: %w", err)
|
||||
}
|
||||
if err := route.Validate(); err != nil {
|
||||
return Command{}, fmt.Errorf("encode mail command: %w", err)
|
||||
}
|
||||
if notification.NotificationID != route.NotificationID {
|
||||
return Command{}, fmt.Errorf("encode mail command: notification id %q does not match route notification id %q", notification.NotificationID, route.NotificationID)
|
||||
}
|
||||
if route.Channel != intentstream.ChannelEmail {
|
||||
return Command{}, fmt.Errorf("encode mail command: route channel %q is unsupported", route.Channel)
|
||||
}
|
||||
if !notification.NotificationType.SupportsChannel(notification.AudienceKind, intentstream.ChannelEmail) {
|
||||
return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: notification type %q does not support email", notification.NotificationType)
|
||||
}
|
||||
|
||||
recipientEmail, err := normalizedRecipientEmail(route.ResolvedEmail)
|
||||
if err != nil {
|
||||
return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: %w", err)
|
||||
}
|
||||
locale, err := normalizedLocale(route.ResolvedLocale)
|
||||
if err != nil {
|
||||
return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: %w", err)
|
||||
}
|
||||
variables, err := payloadVariables(notification.PayloadJSON)
|
||||
if err != nil {
|
||||
return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: %w", err)
|
||||
}
|
||||
|
||||
payloadJSON, err := json.Marshal(templatePayloadJSON{
|
||||
To: []string{recipientEmail},
|
||||
Cc: []string{},
|
||||
Bcc: []string{},
|
||||
ReplyTo: []string{},
|
||||
TemplateID: string(notification.NotificationType),
|
||||
Locale: locale,
|
||||
Variables: variables,
|
||||
Attachments: []templateAttachmentJSON{},
|
||||
})
|
||||
if err != nil {
|
||||
return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: marshal payload_json: %w", err)
|
||||
}
|
||||
|
||||
return Command{
|
||||
DeliveryID: notification.NotificationID + "/" + route.RouteID,
|
||||
IdempotencyKey: "notification:" + notification.NotificationID + "/" + route.RouteID,
|
||||
RequestedAt: notification.AcceptedAt,
|
||||
PayloadJSON: string(payloadJSON),
|
||||
RequestID: notification.RequestID,
|
||||
TraceID: notification.TraceID,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type templatePayloadJSON struct {
|
||||
To []string `json:"to"`
|
||||
Cc []string `json:"cc"`
|
||||
Bcc []string `json:"bcc"`
|
||||
ReplyTo []string `json:"reply_to"`
|
||||
TemplateID string `json:"template_id"`
|
||||
Locale string `json:"locale"`
|
||||
Variables json.RawMessage `json:"variables"`
|
||||
Attachments []templateAttachmentJSON `json:"attachments"`
|
||||
}
|
||||
|
||||
type templateAttachmentJSON struct {
|
||||
Filename string `json:"filename"`
|
||||
ContentType string `json:"content_type"`
|
||||
ContentBase64 string `json:"content_base64"`
|
||||
}
|
||||
|
||||
func normalizedRecipientEmail(value string) (string, error) {
|
||||
if strings.TrimSpace(value) == "" {
|
||||
return "", fmt.Errorf("resolved email must not be empty")
|
||||
}
|
||||
parsed, err := netmail.ParseAddress(value)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("resolved email %q must be valid: %w", value, err)
|
||||
}
|
||||
if parsed.Name != "" || parsed.Address != value {
|
||||
return "", fmt.Errorf("resolved email %q must not include a display name", value)
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
func normalizedLocale(value string) (string, error) {
|
||||
switch {
|
||||
case strings.TrimSpace(value) == "":
|
||||
return "", fmt.Errorf("resolved locale must not be empty")
|
||||
case strings.TrimSpace(value) != value:
|
||||
return "", fmt.Errorf("resolved locale %q must not contain surrounding whitespace", value)
|
||||
default:
|
||||
return value, nil
|
||||
}
|
||||
}
|
||||
|
||||
func payloadVariables(payloadJSON string) (json.RawMessage, error) {
|
||||
var payloadObject map[string]json.RawMessage
|
||||
if err := json.Unmarshal([]byte(payloadJSON), &payloadObject); err != nil {
|
||||
return nil, fmt.Errorf("decode payload_json: %w", err)
|
||||
}
|
||||
if payloadObject == nil {
|
||||
return nil, fmt.Errorf("payload_json must be a JSON object")
|
||||
}
|
||||
|
||||
return json.RawMessage(payloadJSON), nil
|
||||
}
|
||||
@@ -0,0 +1,275 @@
|
||||
package publishmail
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestEncoderEncodesUserAndAdminEmailCommands(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
tests := []struct {
|
||||
name string
|
||||
notification acceptintent.NotificationRecord
|
||||
route acceptintent.NotificationRoute
|
||||
wantDeliveryID string
|
||||
wantIdempotency string
|
||||
wantPayloadJSON string
|
||||
}{
|
||||
{
|
||||
name: "user route",
|
||||
notification: acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700000-0",
|
||||
NotificationType: intentstream.NotificationTypeGameTurnReady,
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: []string{"user-1"},
|
||||
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
IdempotencyKey: "game-123:turn-54",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
AcceptedAt: now,
|
||||
OccurredAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
route: acceptintent.NotificationRoute{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "email:user:user-1",
|
||||
Channel: intentstream.ChannelEmail,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
MaxAttempts: 7,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "pilot@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
wantDeliveryID: "1775121700000-0/email:user:user-1",
|
||||
wantIdempotency: "notification:1775121700000-0/email:user:user-1",
|
||||
wantPayloadJSON: `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}`,
|
||||
},
|
||||
{
|
||||
name: "admin route",
|
||||
notification: acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700001-0",
|
||||
NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
|
||||
Producer: intentstream.ProducerGameLobby,
|
||||
AudienceKind: intentstream.AudienceKindAdminEmail,
|
||||
PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`,
|
||||
IdempotencyKey: "game-456:application-submitted:user-42",
|
||||
RequestFingerprint: "sha256:cafebabe",
|
||||
AcceptedAt: now,
|
||||
OccurredAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
route: acceptintent.NotificationRoute{
|
||||
NotificationID: "1775121700001-0",
|
||||
RouteID: "email:email:owner@example.com",
|
||||
Channel: intentstream.ChannelEmail,
|
||||
RecipientRef: "email:owner@example.com",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
MaxAttempts: 7,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "owner@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
wantDeliveryID: "1775121700001-0/email:email:owner@example.com",
|
||||
wantIdempotency: "notification:1775121700001-0/email:email:owner@example.com",
|
||||
wantPayloadJSON: `{"to":["owner@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"lobby.application.submitted","locale":"en","variables":{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"},"attachments":[]}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
command, err := Encoder{}.Encode(tt.notification, tt.route)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tt.wantDeliveryID, command.DeliveryID)
|
||||
require.Equal(t, tt.wantIdempotency, command.IdempotencyKey)
|
||||
require.Equal(t, now, command.RequestedAt)
|
||||
require.JSONEq(t, tt.wantPayloadJSON, command.PayloadJSON)
|
||||
|
||||
values := command.Values()
|
||||
require.Equal(t, tt.wantDeliveryID, values["delivery_id"])
|
||||
require.Equal(t, "notification", values["source"])
|
||||
require.Equal(t, "template", values["payload_mode"])
|
||||
require.Equal(t, tt.wantIdempotency, values["idempotency_key"])
|
||||
require.Equal(t, "1775121700000", values["requested_at_ms"])
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncoderPropagatesTracingMetadata(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
command, err := Encoder{}.Encode(
|
||||
acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700000-0",
|
||||
NotificationType: intentstream.NotificationTypeGameTurnReady,
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: []string{"user-1"},
|
||||
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
IdempotencyKey: "game-123:turn-54",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
RequestID: "request-1",
|
||||
TraceID: "trace-1",
|
||||
AcceptedAt: now,
|
||||
OccurredAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
acceptintent.NotificationRoute{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "email:user:user-1",
|
||||
Channel: intentstream.ChannelEmail,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
MaxAttempts: 7,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "pilot@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
values := command.Values()
|
||||
require.Equal(t, "request-1", values["request_id"])
|
||||
require.Equal(t, "trace-1", values["trace_id"])
|
||||
}
|
||||
|
||||
func TestEncoderPreservesNormalizedPayloadAsTemplateVariables(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
command, err := Encoder{}.Encode(
|
||||
acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700000-0",
|
||||
NotificationType: intentstream.NotificationTypeGameFinished,
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: []string{"user-1"},
|
||||
PayloadJSON: `{"final_turn_number":81,"game_id":"game-123","game_name":"Nebula Clash"}`,
|
||||
IdempotencyKey: "game-123:final",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
AcceptedAt: now,
|
||||
OccurredAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
acceptintent.NotificationRoute{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "email:user:user-1",
|
||||
Channel: intentstream.ChannelEmail,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
MaxAttempts: 7,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "pilot@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
var payload struct {
|
||||
Variables map[string]any `json:"variables"`
|
||||
}
|
||||
require.NoError(t, json.Unmarshal([]byte(command.PayloadJSON), &payload))
|
||||
require.Equal(t, map[string]any{
|
||||
"final_turn_number": float64(81),
|
||||
"game_id": "game-123",
|
||||
"game_name": "Nebula Clash",
|
||||
}, payload.Variables)
|
||||
}
|
||||
|
||||
func TestEncoderUsesEmptyAncillaryEnvelopeFields(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
command, err := Encoder{}.Encode(
|
||||
acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700000-0",
|
||||
NotificationType: intentstream.NotificationTypeLobbyInviteExpired,
|
||||
Producer: intentstream.ProducerGameLobby,
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: []string{"user-1"},
|
||||
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","invitee_name":"Nova Pilot","invitee_user_id":"user-2"}`,
|
||||
IdempotencyKey: "game-123:invite-expired",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
AcceptedAt: now,
|
||||
OccurredAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
acceptintent.NotificationRoute{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "email:user:user-1",
|
||||
Channel: intentstream.ChannelEmail,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
MaxAttempts: 7,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "pilot@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.JSONEq(
|
||||
t,
|
||||
`{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"lobby.invite.expired","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","invitee_name":"Nova Pilot","invitee_user_id":"user-2"},"attachments":[]}`,
|
||||
command.PayloadJSON,
|
||||
)
|
||||
}
|
||||
|
||||
func TestEncoderRejectsInvalidRouteForMailPublication(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
_, err := Encoder{}.Encode(
|
||||
acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700000-0",
|
||||
NotificationType: intentstream.NotificationTypeGameTurnReady,
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: []string{"user-1"},
|
||||
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
IdempotencyKey: "game-123:turn-54",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
AcceptedAt: now,
|
||||
OccurredAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
acceptintent.NotificationRoute{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "push:user:user-1",
|
||||
Channel: intentstream.ChannelPush,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
MaxAttempts: 3,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "pilot@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
)
|
||||
require.Error(t, err)
|
||||
require.ErrorContains(t, err, `route channel "push" is unsupported`)
|
||||
}
|
||||
@@ -0,0 +1,221 @@
|
||||
// Package publishpush encodes user-facing notification routes into Gateway
|
||||
// client-event payloads.
|
||||
package publishpush
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/transcoder"
|
||||
)
|
||||
|
||||
// Event stores one Gateway-compatible client event produced from a
|
||||
// user-targeted notification route.
|
||||
type Event struct {
|
||||
// UserID stores the authenticated user fan-out target.
|
||||
UserID string
|
||||
|
||||
// EventType stores the stable client-facing event type.
|
||||
EventType string
|
||||
|
||||
// EventID stores the stable route-level event identifier.
|
||||
EventID string
|
||||
|
||||
// PayloadBytes stores the encoded FlatBuffers payload bytes.
|
||||
PayloadBytes []byte
|
||||
|
||||
// RequestID stores the optional correlation identifier.
|
||||
RequestID string
|
||||
|
||||
// TraceID stores the optional tracing correlation identifier.
|
||||
TraceID string
|
||||
}
|
||||
|
||||
// Encoder maps one supported notification_type to the corresponding checked-in
|
||||
// FlatBuffers payload encoder.
|
||||
type Encoder struct{}
|
||||
|
||||
// Encode converts one accepted notification record plus its push route into a
|
||||
// Gateway-compatible client event.
|
||||
func (Encoder) Encode(notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) (Event, error) {
|
||||
if err := notification.Validate(); err != nil {
|
||||
return Event{}, fmt.Errorf("encode push event: %w", err)
|
||||
}
|
||||
if err := route.Validate(); err != nil {
|
||||
return Event{}, fmt.Errorf("encode push event: %w", err)
|
||||
}
|
||||
if route.Channel != intentstream.ChannelPush {
|
||||
return Event{}, fmt.Errorf("encode push event: route channel %q is unsupported", route.Channel)
|
||||
}
|
||||
|
||||
userID, err := userIDFromRecipientRef(route.RecipientRef)
|
||||
if err != nil {
|
||||
return Event{}, fmt.Errorf("encode push event: %w", err)
|
||||
}
|
||||
|
||||
payloadBytes, err := encodePayload(notification.NotificationType, notification.PayloadJSON)
|
||||
if err != nil {
|
||||
return Event{}, fmt.Errorf("encode push event: %w", err)
|
||||
}
|
||||
|
||||
return Event{
|
||||
UserID: userID,
|
||||
EventType: string(notification.NotificationType),
|
||||
EventID: notification.NotificationID + "/" + route.RouteID,
|
||||
PayloadBytes: payloadBytes,
|
||||
RequestID: notification.RequestID,
|
||||
TraceID: notification.TraceID,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func encodePayload(notificationType intentstream.NotificationType, payloadJSON string) ([]byte, error) {
|
||||
switch notificationType {
|
||||
case intentstream.NotificationTypeGameTurnReady:
|
||||
var payload struct {
|
||||
GameID string `json:"game_id"`
|
||||
TurnNumber int64 `json:"turn_number"`
|
||||
}
|
||||
if err := decodePayload(payloadJSON, &payload); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if payload.GameID == "" {
|
||||
return nil, errors.New("payload_encoding_failed: game_id is empty")
|
||||
}
|
||||
if payload.TurnNumber < 1 {
|
||||
return nil, errors.New("payload_encoding_failed: turn_number must be at least 1")
|
||||
}
|
||||
return wrapPayloadEncoding(transcoder.GameTurnReadyEventToPayload(&transcoder.GameTurnReadyEvent{
|
||||
GameID: payload.GameID,
|
||||
TurnNumber: payload.TurnNumber,
|
||||
}))
|
||||
case intentstream.NotificationTypeGameFinished:
|
||||
var payload struct {
|
||||
GameID string `json:"game_id"`
|
||||
FinalTurnNumber int64 `json:"final_turn_number"`
|
||||
}
|
||||
if err := decodePayload(payloadJSON, &payload); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if payload.GameID == "" {
|
||||
return nil, errors.New("payload_encoding_failed: game_id is empty")
|
||||
}
|
||||
if payload.FinalTurnNumber < 1 {
|
||||
return nil, errors.New("payload_encoding_failed: final_turn_number must be at least 1")
|
||||
}
|
||||
return wrapPayloadEncoding(transcoder.GameFinishedEventToPayload(&transcoder.GameFinishedEvent{
|
||||
GameID: payload.GameID,
|
||||
FinalTurnNumber: payload.FinalTurnNumber,
|
||||
}))
|
||||
case intentstream.NotificationTypeLobbyApplicationSubmitted:
|
||||
var payload struct {
|
||||
GameID string `json:"game_id"`
|
||||
ApplicantUserID string `json:"applicant_user_id"`
|
||||
}
|
||||
if err := decodePayload(payloadJSON, &payload); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if payload.GameID == "" {
|
||||
return nil, errors.New("payload_encoding_failed: game_id is empty")
|
||||
}
|
||||
if payload.ApplicantUserID == "" {
|
||||
return nil, errors.New("payload_encoding_failed: applicant_user_id is empty")
|
||||
}
|
||||
return wrapPayloadEncoding(transcoder.LobbyApplicationSubmittedEventToPayload(&transcoder.LobbyApplicationSubmittedEvent{
|
||||
GameID: payload.GameID,
|
||||
ApplicantUserID: payload.ApplicantUserID,
|
||||
}))
|
||||
case intentstream.NotificationTypeLobbyMembershipApproved:
|
||||
var payload struct {
|
||||
GameID string `json:"game_id"`
|
||||
}
|
||||
if err := decodePayload(payloadJSON, &payload); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if payload.GameID == "" {
|
||||
return nil, errors.New("payload_encoding_failed: game_id is empty")
|
||||
}
|
||||
return wrapPayloadEncoding(transcoder.LobbyMembershipApprovedEventToPayload(&transcoder.LobbyMembershipApprovedEvent{
|
||||
GameID: payload.GameID,
|
||||
}))
|
||||
case intentstream.NotificationTypeLobbyMembershipRejected:
|
||||
var payload struct {
|
||||
GameID string `json:"game_id"`
|
||||
}
|
||||
if err := decodePayload(payloadJSON, &payload); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if payload.GameID == "" {
|
||||
return nil, errors.New("payload_encoding_failed: game_id is empty")
|
||||
}
|
||||
return wrapPayloadEncoding(transcoder.LobbyMembershipRejectedEventToPayload(&transcoder.LobbyMembershipRejectedEvent{
|
||||
GameID: payload.GameID,
|
||||
}))
|
||||
case intentstream.NotificationTypeLobbyInviteCreated:
|
||||
var payload struct {
|
||||
GameID string `json:"game_id"`
|
||||
InviterUserID string `json:"inviter_user_id"`
|
||||
}
|
||||
if err := decodePayload(payloadJSON, &payload); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if payload.GameID == "" {
|
||||
return nil, errors.New("payload_encoding_failed: game_id is empty")
|
||||
}
|
||||
if payload.InviterUserID == "" {
|
||||
return nil, errors.New("payload_encoding_failed: inviter_user_id is empty")
|
||||
}
|
||||
return wrapPayloadEncoding(transcoder.LobbyInviteCreatedEventToPayload(&transcoder.LobbyInviteCreatedEvent{
|
||||
GameID: payload.GameID,
|
||||
InviterUserID: payload.InviterUserID,
|
||||
}))
|
||||
case intentstream.NotificationTypeLobbyInviteRedeemed:
|
||||
var payload struct {
|
||||
GameID string `json:"game_id"`
|
||||
InviteeUserID string `json:"invitee_user_id"`
|
||||
}
|
||||
if err := decodePayload(payloadJSON, &payload); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if payload.GameID == "" {
|
||||
return nil, errors.New("payload_encoding_failed: game_id is empty")
|
||||
}
|
||||
if payload.InviteeUserID == "" {
|
||||
return nil, errors.New("payload_encoding_failed: invitee_user_id is empty")
|
||||
}
|
||||
return wrapPayloadEncoding(transcoder.LobbyInviteRedeemedEventToPayload(&transcoder.LobbyInviteRedeemedEvent{
|
||||
GameID: payload.GameID,
|
||||
InviteeUserID: payload.InviteeUserID,
|
||||
}))
|
||||
default:
|
||||
return nil, fmt.Errorf("payload_encoding_failed: notification type %q does not support push", notificationType)
|
||||
}
|
||||
}
|
||||
|
||||
func decodePayload(payloadJSON string, target any) error {
|
||||
if err := json.Unmarshal([]byte(payloadJSON), target); err != nil {
|
||||
return fmt.Errorf("payload_encoding_failed: decode payload_json: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func wrapPayloadEncoding(payload []byte, err error) ([]byte, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("payload_encoding_failed: %w", err)
|
||||
}
|
||||
|
||||
return payload, nil
|
||||
}
|
||||
|
||||
func userIDFromRecipientRef(recipientRef string) (string, error) {
|
||||
userID, ok := strings.CutPrefix(recipientRef, "user:")
|
||||
if !ok || userID == "" {
|
||||
return "", fmt.Errorf("recipient_ref %q is not user-targeted", recipientRef)
|
||||
}
|
||||
|
||||
return userID, nil
|
||||
}
|
||||
@@ -0,0 +1,186 @@
|
||||
package publishpush
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/transcoder"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestEncoderEncodesSupportedPushNotificationTypes(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
tests := []struct {
|
||||
name string
|
||||
notificationType intentstream.NotificationType
|
||||
payloadJSON string
|
||||
assertPayload func(*testing.T, []byte)
|
||||
}{
|
||||
{
|
||||
name: "game turn ready",
|
||||
notificationType: intentstream.NotificationTypeGameTurnReady,
|
||||
payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash","turn_number":54}`,
|
||||
assertPayload: func(t *testing.T, payload []byte) {
|
||||
t.Helper()
|
||||
event, err := transcoder.PayloadToGameTurnReadyEvent(payload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "game-1", event.GameID)
|
||||
require.Equal(t, int64(54), event.TurnNumber)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "game finished",
|
||||
notificationType: intentstream.NotificationTypeGameFinished,
|
||||
payloadJSON: `{"final_turn_number":81,"game_id":"game-2","game_name":"Nova"}`,
|
||||
assertPayload: func(t *testing.T, payload []byte) {
|
||||
t.Helper()
|
||||
event, err := transcoder.PayloadToGameFinishedEvent(payload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "game-2", event.GameID)
|
||||
require.Equal(t, int64(81), event.FinalTurnNumber)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "lobby application submitted",
|
||||
notificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
|
||||
payloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-2","game_id":"game-3","game_name":"Orion Front"}`,
|
||||
assertPayload: func(t *testing.T, payload []byte) {
|
||||
t.Helper()
|
||||
event, err := transcoder.PayloadToLobbyApplicationSubmittedEvent(payload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "game-3", event.GameID)
|
||||
require.Equal(t, "user-2", event.ApplicantUserID)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "lobby membership approved",
|
||||
notificationType: intentstream.NotificationTypeLobbyMembershipApproved,
|
||||
payloadJSON: `{"game_id":"game-4","game_name":"Ares"}`,
|
||||
assertPayload: func(t *testing.T, payload []byte) {
|
||||
t.Helper()
|
||||
event, err := transcoder.PayloadToLobbyMembershipApprovedEvent(payload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "game-4", event.GameID)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "lobby membership rejected",
|
||||
notificationType: intentstream.NotificationTypeLobbyMembershipRejected,
|
||||
payloadJSON: `{"game_id":"game-5","game_name":"Atlas"}`,
|
||||
assertPayload: func(t *testing.T, payload []byte) {
|
||||
t.Helper()
|
||||
event, err := transcoder.PayloadToLobbyMembershipRejectedEvent(payload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "game-5", event.GameID)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "lobby invite created",
|
||||
notificationType: intentstream.NotificationTypeLobbyInviteCreated,
|
||||
payloadJSON: `{"game_id":"game-6","game_name":"Vega","inviter_name":"Nova Pilot","inviter_user_id":"user-9"}`,
|
||||
assertPayload: func(t *testing.T, payload []byte) {
|
||||
t.Helper()
|
||||
event, err := transcoder.PayloadToLobbyInviteCreatedEvent(payload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "game-6", event.GameID)
|
||||
require.Equal(t, "user-9", event.InviterUserID)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "lobby invite redeemed",
|
||||
notificationType: intentstream.NotificationTypeLobbyInviteRedeemed,
|
||||
payloadJSON: `{"game_id":"game-7","game_name":"Lyra","invitee_name":"Skipper","invitee_user_id":"user-10"}`,
|
||||
assertPayload: func(t *testing.T, payload []byte) {
|
||||
t.Helper()
|
||||
event, err := transcoder.PayloadToLobbyInviteRedeemedEvent(payload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "game-7", event.GameID)
|
||||
require.Equal(t, "user-10", event.InviteeUserID)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
event, err := Encoder{}.Encode(
|
||||
acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700000-0",
|
||||
NotificationType: tt.notificationType,
|
||||
Producer: tt.notificationType.ExpectedProducer(),
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: []string{"user-1"},
|
||||
PayloadJSON: tt.payloadJSON,
|
||||
IdempotencyKey: "idem-1",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
RequestID: "request-1",
|
||||
TraceID: "trace-1",
|
||||
OccurredAt: now,
|
||||
AcceptedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
acceptintent.NotificationRoute{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "push:user:user-1",
|
||||
Channel: intentstream.ChannelPush,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
MaxAttempts: 3,
|
||||
NextAttemptAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "user-1", event.UserID)
|
||||
require.Equal(t, string(tt.notificationType), event.EventType)
|
||||
require.Equal(t, "1775121700000-0/push:user:user-1", event.EventID)
|
||||
require.Equal(t, "request-1", event.RequestID)
|
||||
require.Equal(t, "trace-1", event.TraceID)
|
||||
require.NotEmpty(t, event.PayloadBytes)
|
||||
tt.assertPayload(t, event.PayloadBytes)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncoderRejectsInvalidStoredPayload(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
_, err := Encoder{}.Encode(
|
||||
acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700000-0",
|
||||
NotificationType: intentstream.NotificationTypeGameTurnReady,
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: []string{"user-1"},
|
||||
PayloadJSON: `{"game_id":"","game_name":"Nebula Clash","turn_number":0}`,
|
||||
IdempotencyKey: "idem-1",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
OccurredAt: now,
|
||||
AcceptedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
acceptintent.NotificationRoute{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "push:user:user-1",
|
||||
Channel: intentstream.ChannelPush,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
MaxAttempts: 3,
|
||||
NextAttemptAt: now,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
)
|
||||
require.Error(t, err)
|
||||
require.ErrorContains(t, err, "payload_encoding_failed")
|
||||
}
|
||||
@@ -0,0 +1,694 @@
|
||||
// Package telemetry provides lightweight OpenTelemetry helpers and
|
||||
// low-cardinality Notification Service instruments.
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
|
||||
"go.opentelemetry.io/otel/exporters/stdout/stdoutmetric"
|
||||
"go.opentelemetry.io/otel/exporters/stdout/stdouttrace"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/propagation"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||
oteltrace "go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
const meterName = "galaxy/notification"
|
||||
|
||||
const (
|
||||
defaultServiceName = "galaxy-notification"
|
||||
|
||||
processExporterNone = "none"
|
||||
processExporterOTLP = "otlp"
|
||||
processProtocolHTTPProtobuf = "http/protobuf"
|
||||
processProtocolGRPC = "grpc"
|
||||
)
|
||||
|
||||
// ProcessConfig configures the process-wide OpenTelemetry runtime.
|
||||
type ProcessConfig struct {
|
||||
// ServiceName overrides the default OpenTelemetry service name.
|
||||
ServiceName string
|
||||
|
||||
// TracesExporter selects the external traces exporter. Supported values are
|
||||
// `none` and `otlp`.
|
||||
TracesExporter string
|
||||
|
||||
// MetricsExporter selects the external metrics exporter. Supported values
|
||||
// are `none` and `otlp`.
|
||||
MetricsExporter string
|
||||
|
||||
// TracesProtocol selects the OTLP traces protocol when TracesExporter is
|
||||
// `otlp`.
|
||||
TracesProtocol string
|
||||
|
||||
// MetricsProtocol selects the OTLP metrics protocol when MetricsExporter is
|
||||
// `otlp`.
|
||||
MetricsProtocol string
|
||||
|
||||
// StdoutTracesEnabled enables the additional stdout trace exporter used for
|
||||
// local development and debugging.
|
||||
StdoutTracesEnabled bool
|
||||
|
||||
// StdoutMetricsEnabled enables the additional stdout metric exporter used
|
||||
// for local development and debugging.
|
||||
StdoutMetricsEnabled bool
|
||||
}
|
||||
|
||||
// Validate reports whether cfg contains a supported OpenTelemetry exporter
|
||||
// configuration.
|
||||
func (cfg ProcessConfig) Validate() error {
|
||||
switch cfg.TracesExporter {
|
||||
case processExporterNone, processExporterOTLP:
|
||||
default:
|
||||
return fmt.Errorf("unsupported traces exporter %q", cfg.TracesExporter)
|
||||
}
|
||||
|
||||
switch cfg.MetricsExporter {
|
||||
case processExporterNone, processExporterOTLP:
|
||||
default:
|
||||
return fmt.Errorf("unsupported metrics exporter %q", cfg.MetricsExporter)
|
||||
}
|
||||
|
||||
if cfg.TracesProtocol != "" && cfg.TracesProtocol != processProtocolHTTPProtobuf && cfg.TracesProtocol != processProtocolGRPC {
|
||||
return fmt.Errorf("unsupported OTLP traces protocol %q", cfg.TracesProtocol)
|
||||
}
|
||||
if cfg.MetricsProtocol != "" && cfg.MetricsProtocol != processProtocolHTTPProtobuf && cfg.MetricsProtocol != processProtocolGRPC {
|
||||
return fmt.Errorf("unsupported OTLP metrics protocol %q", cfg.MetricsProtocol)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Runtime owns the Notification Service OpenTelemetry providers and
|
||||
// low-cardinality custom instruments.
|
||||
type Runtime struct {
|
||||
tracerProvider oteltrace.TracerProvider
|
||||
meterProvider metric.MeterProvider
|
||||
|
||||
shutdownMu sync.Mutex
|
||||
shutdownDone bool
|
||||
shutdownErr error
|
||||
shutdownFns []func(context.Context) error
|
||||
|
||||
routeScheduleReaderMu sync.RWMutex
|
||||
routeScheduleReader RouteScheduleSnapshotReader
|
||||
|
||||
intentStreamLagReaderMu sync.RWMutex
|
||||
intentStreamLagReader IntentStreamLagSnapshotReader
|
||||
|
||||
internalHTTPRequests metric.Int64Counter
|
||||
internalHTTPDuration metric.Float64Histogram
|
||||
internalHTTPLifecycle metric.Int64Counter
|
||||
intentOutcomes metric.Int64Counter
|
||||
malformedIntents metric.Int64Counter
|
||||
userEnrichment metric.Int64Counter
|
||||
routePublishAttempts metric.Int64Counter
|
||||
routeRetries metric.Int64Counter
|
||||
routeDeadLetters metric.Int64Counter
|
||||
}
|
||||
|
||||
// RouteScheduleSnapshot stores the current observable state of the durable
|
||||
// notification route schedule.
|
||||
type RouteScheduleSnapshot struct {
|
||||
// Depth stores how many route keys are currently present in the route
|
||||
// schedule.
|
||||
Depth int64
|
||||
|
||||
// OldestScheduledFor stores the oldest currently scheduled due time when
|
||||
// one exists.
|
||||
OldestScheduledFor *time.Time
|
||||
}
|
||||
|
||||
// RouteScheduleSnapshotReader loads one current route-schedule snapshot for
|
||||
// observable gauge reporting.
|
||||
type RouteScheduleSnapshotReader interface {
|
||||
// ReadRouteScheduleSnapshot returns the current route-schedule depth and
|
||||
// its oldest scheduled timestamp when one exists.
|
||||
ReadRouteScheduleSnapshot(context.Context) (RouteScheduleSnapshot, error)
|
||||
}
|
||||
|
||||
// IntentStreamLagSnapshot stores the current observable lag of the plain-XREAD
|
||||
// notification-intent consumer.
|
||||
type IntentStreamLagSnapshot struct {
|
||||
// OldestUnprocessedAt stores the Redis Stream timestamp of the oldest
|
||||
// entry that has not yet been durably processed.
|
||||
OldestUnprocessedAt *time.Time
|
||||
}
|
||||
|
||||
// IntentStreamLagSnapshotReader loads one current intent-stream lag snapshot
|
||||
// for observable gauge reporting.
|
||||
type IntentStreamLagSnapshotReader interface {
|
||||
// ReadIntentStreamLagSnapshot returns the oldest unprocessed stream entry
|
||||
// timestamp when one exists.
|
||||
ReadIntentStreamLagSnapshot(context.Context) (IntentStreamLagSnapshot, error)
|
||||
}
|
||||
|
||||
// New constructs a lightweight telemetry runtime around meterProvider for
|
||||
// tests and embedded use cases that do not need process-level exporter wiring.
|
||||
func New(meterProvider metric.MeterProvider) (*Runtime, error) {
|
||||
return NewWithProviders(meterProvider, nil)
|
||||
}
|
||||
|
||||
// NewWithProviders constructs a telemetry runtime around explicitly supplied
|
||||
// meterProvider and tracerProvider values.
|
||||
func NewWithProviders(meterProvider metric.MeterProvider, tracerProvider oteltrace.TracerProvider) (*Runtime, error) {
|
||||
if meterProvider == nil {
|
||||
meterProvider = otel.GetMeterProvider()
|
||||
}
|
||||
if tracerProvider == nil {
|
||||
tracerProvider = otel.GetTracerProvider()
|
||||
}
|
||||
if meterProvider == nil {
|
||||
return nil, errors.New("new notification telemetry runtime: nil meter provider")
|
||||
}
|
||||
if tracerProvider == nil {
|
||||
return nil, errors.New("new notification telemetry runtime: nil tracer provider")
|
||||
}
|
||||
|
||||
return buildRuntime(meterProvider, tracerProvider, nil)
|
||||
}
|
||||
|
||||
// NewProcess constructs the process-wide Notification Service OpenTelemetry
|
||||
// runtime from cfg, installs the resulting providers globally, and returns the
|
||||
// runtime.
|
||||
func NewProcess(ctx context.Context, cfg ProcessConfig, logger *slog.Logger) (*Runtime, error) {
|
||||
if ctx == nil {
|
||||
return nil, errors.New("new notification telemetry process: nil context")
|
||||
}
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("new notification telemetry process: %w", err)
|
||||
}
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
|
||||
serviceName := strings.TrimSpace(cfg.ServiceName)
|
||||
if serviceName == "" {
|
||||
serviceName = defaultServiceName
|
||||
}
|
||||
|
||||
res := resource.NewSchemaless(attribute.String("service.name", serviceName))
|
||||
|
||||
tracerProvider, err := newTracerProvider(ctx, res, cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new notification telemetry process: tracer provider: %w", err)
|
||||
}
|
||||
meterProvider, err := newMeterProvider(ctx, res, cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new notification telemetry process: meter provider: %w", err)
|
||||
}
|
||||
|
||||
otel.SetTracerProvider(tracerProvider)
|
||||
otel.SetMeterProvider(meterProvider)
|
||||
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
|
||||
propagation.TraceContext{},
|
||||
propagation.Baggage{},
|
||||
))
|
||||
|
||||
runtime, err := buildRuntime(meterProvider, tracerProvider, []func(context.Context) error{
|
||||
meterProvider.Shutdown,
|
||||
tracerProvider.Shutdown,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new notification telemetry process: runtime: %w", err)
|
||||
}
|
||||
|
||||
logger.Info("notification telemetry configured",
|
||||
"service_name", serviceName,
|
||||
"traces_exporter", cfg.TracesExporter,
|
||||
"metrics_exporter", cfg.MetricsExporter,
|
||||
)
|
||||
|
||||
return runtime, nil
|
||||
}
|
||||
|
||||
// TracerProvider returns the runtime tracer provider.
|
||||
func (runtime *Runtime) TracerProvider() oteltrace.TracerProvider {
|
||||
if runtime == nil || runtime.tracerProvider == nil {
|
||||
return otel.GetTracerProvider()
|
||||
}
|
||||
|
||||
return runtime.tracerProvider
|
||||
}
|
||||
|
||||
// MeterProvider returns the runtime meter provider.
|
||||
func (runtime *Runtime) MeterProvider() metric.MeterProvider {
|
||||
if runtime == nil || runtime.meterProvider == nil {
|
||||
return otel.GetMeterProvider()
|
||||
}
|
||||
|
||||
return runtime.meterProvider
|
||||
}
|
||||
|
||||
// Shutdown flushes and stops the configured telemetry providers. Shutdown is
|
||||
// idempotent.
|
||||
func (runtime *Runtime) Shutdown(ctx context.Context) error {
|
||||
if runtime == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
runtime.shutdownMu.Lock()
|
||||
if runtime.shutdownDone {
|
||||
err := runtime.shutdownErr
|
||||
runtime.shutdownMu.Unlock()
|
||||
return err
|
||||
}
|
||||
runtime.shutdownDone = true
|
||||
runtime.shutdownMu.Unlock()
|
||||
|
||||
var shutdownErr error
|
||||
for index := len(runtime.shutdownFns) - 1; index >= 0; index-- {
|
||||
shutdownErr = errors.Join(shutdownErr, runtime.shutdownFns[index](ctx))
|
||||
}
|
||||
|
||||
runtime.shutdownMu.Lock()
|
||||
runtime.shutdownErr = shutdownErr
|
||||
runtime.shutdownMu.Unlock()
|
||||
|
||||
return shutdownErr
|
||||
}
|
||||
|
||||
// RecordInternalHTTPRequest records one internal HTTP request outcome.
|
||||
func (runtime *Runtime) RecordInternalHTTPRequest(ctx context.Context, attrs []attribute.KeyValue, duration time.Duration) {
|
||||
if runtime == nil {
|
||||
return
|
||||
}
|
||||
|
||||
options := metric.WithAttributes(attrs...)
|
||||
runtime.internalHTTPRequests.Add(normalizeContext(ctx), 1, options)
|
||||
runtime.internalHTTPDuration.Record(normalizeContext(ctx), duration.Seconds()*1000, options)
|
||||
}
|
||||
|
||||
// RecordInternalHTTPEvent records one internal HTTP server lifecycle event.
|
||||
func (runtime *Runtime) RecordInternalHTTPEvent(ctx context.Context, event string) {
|
||||
if runtime == nil {
|
||||
return
|
||||
}
|
||||
|
||||
runtime.internalHTTPLifecycle.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(attribute.String("event", strings.TrimSpace(event))),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordIntentOutcome records one accepted notification-intent outcome.
|
||||
func (runtime *Runtime) RecordIntentOutcome(ctx context.Context, notificationType string, producer string, audienceKind string, outcome string) {
|
||||
if runtime == nil {
|
||||
return
|
||||
}
|
||||
|
||||
runtime.intentOutcomes.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(
|
||||
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
|
||||
attribute.String("producer", cleanAttribute(producer, "unknown")),
|
||||
attribute.String("audience_kind", cleanAttribute(audienceKind, "unknown")),
|
||||
attribute.String("outcome", cleanAttribute(outcome, "unknown")),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordMalformedIntent records one malformed or rejected notification intent.
|
||||
func (runtime *Runtime) RecordMalformedIntent(ctx context.Context, failureCode string, notificationType string, producer string) {
|
||||
if runtime == nil {
|
||||
return
|
||||
}
|
||||
|
||||
runtime.malformedIntents.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(
|
||||
attribute.String("failure_code", cleanAttribute(failureCode, "unknown")),
|
||||
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
|
||||
attribute.String("producer", cleanAttribute(producer, "unknown")),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordUserEnrichmentAttempt records one User Service enrichment lookup
|
||||
// outcome.
|
||||
func (runtime *Runtime) RecordUserEnrichmentAttempt(ctx context.Context, notificationType string, result string) {
|
||||
if runtime == nil {
|
||||
return
|
||||
}
|
||||
|
||||
runtime.userEnrichment.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(
|
||||
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
|
||||
attribute.String("result", cleanAttribute(result, "unknown")),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordRoutePublishAttempt records one route publication attempt outcome.
|
||||
func (runtime *Runtime) RecordRoutePublishAttempt(ctx context.Context, channel string, notificationType string, result string, failureClassification string) {
|
||||
if runtime == nil {
|
||||
return
|
||||
}
|
||||
|
||||
runtime.routePublishAttempts.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(
|
||||
attribute.String("channel", cleanAttribute(channel, "unknown")),
|
||||
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
|
||||
attribute.String("result", cleanAttribute(result, "unknown")),
|
||||
attribute.String("failure_classification", cleanAttribute(failureClassification, "none")),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordRouteRetry records one route retry scheduling event.
|
||||
func (runtime *Runtime) RecordRouteRetry(ctx context.Context, channel string, notificationType string) {
|
||||
if runtime == nil {
|
||||
return
|
||||
}
|
||||
|
||||
runtime.routeRetries.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(
|
||||
attribute.String("channel", cleanAttribute(channel, "unknown")),
|
||||
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordRouteDeadLetter records one route transition to dead_letter.
|
||||
func (runtime *Runtime) RecordRouteDeadLetter(ctx context.Context, channel string, notificationType string, failureClassification string) {
|
||||
if runtime == nil {
|
||||
return
|
||||
}
|
||||
|
||||
runtime.routeDeadLetters.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(
|
||||
attribute.String("channel", cleanAttribute(channel, "unknown")),
|
||||
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
|
||||
attribute.String("failure_classification", cleanAttribute(failureClassification, "unknown")),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// SetRouteScheduleSnapshotReader installs the route-schedule reader used by
|
||||
// the observable route schedule gauges.
|
||||
func (runtime *Runtime) SetRouteScheduleSnapshotReader(reader RouteScheduleSnapshotReader) {
|
||||
if runtime == nil {
|
||||
return
|
||||
}
|
||||
|
||||
runtime.routeScheduleReaderMu.Lock()
|
||||
runtime.routeScheduleReader = reader
|
||||
runtime.routeScheduleReaderMu.Unlock()
|
||||
}
|
||||
|
||||
// SetIntentStreamLagSnapshotReader installs the intent-stream lag reader used
|
||||
// by the observable lag gauge.
|
||||
func (runtime *Runtime) SetIntentStreamLagSnapshotReader(reader IntentStreamLagSnapshotReader) {
|
||||
if runtime == nil {
|
||||
return
|
||||
}
|
||||
|
||||
runtime.intentStreamLagReaderMu.Lock()
|
||||
runtime.intentStreamLagReader = reader
|
||||
runtime.intentStreamLagReaderMu.Unlock()
|
||||
}
|
||||
|
||||
func buildRuntime(meterProvider metric.MeterProvider, tracerProvider oteltrace.TracerProvider, shutdownFns []func(context.Context) error) (*Runtime, error) {
|
||||
meter := meterProvider.Meter(meterName)
|
||||
runtime := &Runtime{
|
||||
tracerProvider: tracerProvider,
|
||||
meterProvider: meterProvider,
|
||||
shutdownFns: append([]func(context.Context) error(nil), shutdownFns...),
|
||||
}
|
||||
|
||||
internalHTTPRequests, err := meter.Int64Counter("notification.internal_http.requests")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: internal_http.requests: %w", err)
|
||||
}
|
||||
internalHTTPDuration, err := meter.Float64Histogram("notification.internal_http.duration_ms", metric.WithUnit("ms"))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: internal_http.duration_ms: %w", err)
|
||||
}
|
||||
internalHTTPLifecycle, err := meter.Int64Counter("notification.internal_http.lifecycle")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: internal_http.lifecycle: %w", err)
|
||||
}
|
||||
intentOutcomes, err := meter.Int64Counter("notification.intent.outcomes")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: intent.outcomes: %w", err)
|
||||
}
|
||||
malformedIntents, err := meter.Int64Counter("notification.intent.malformed")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: intent.malformed: %w", err)
|
||||
}
|
||||
userEnrichment, err := meter.Int64Counter("notification.user_enrichment.attempts")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: user_enrichment.attempts: %w", err)
|
||||
}
|
||||
routePublishAttempts, err := meter.Int64Counter("notification.route.publish_attempts")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: route.publish_attempts: %w", err)
|
||||
}
|
||||
routeRetries, err := meter.Int64Counter("notification.route.retries")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: route.retries: %w", err)
|
||||
}
|
||||
routeDeadLetters, err := meter.Int64Counter("notification.route.dead_letters")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: route.dead_letters: %w", err)
|
||||
}
|
||||
routeScheduleDepth, err := meter.Int64ObservableGauge("notification.route_schedule.depth")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: route_schedule.depth: %w", err)
|
||||
}
|
||||
routeScheduleOldestAge, err := meter.Int64ObservableGauge("notification.route_schedule.oldest_age_ms", metric.WithUnit("ms"))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: route_schedule.oldest_age_ms: %w", err)
|
||||
}
|
||||
intentStreamOldestUnprocessedAge, err := meter.Int64ObservableGauge("notification.intent_stream.oldest_unprocessed_age_ms", metric.WithUnit("ms"))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: intent_stream.oldest_unprocessed_age_ms: %w", err)
|
||||
}
|
||||
registration, err := meter.RegisterCallback(func(ctx context.Context, observer metric.Observer) error {
|
||||
runtime.observeRouteSchedule(ctx, observer, routeScheduleDepth, routeScheduleOldestAge)
|
||||
runtime.observeIntentStreamLag(ctx, observer, intentStreamOldestUnprocessedAge)
|
||||
return nil
|
||||
}, routeScheduleDepth, routeScheduleOldestAge, intentStreamOldestUnprocessedAge)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build notification telemetry runtime: observable callbacks: %w", err)
|
||||
}
|
||||
runtime.shutdownFns = append(runtime.shutdownFns, func(context.Context) error {
|
||||
return registration.Unregister()
|
||||
})
|
||||
|
||||
runtime.internalHTTPRequests = internalHTTPRequests
|
||||
runtime.internalHTTPDuration = internalHTTPDuration
|
||||
runtime.internalHTTPLifecycle = internalHTTPLifecycle
|
||||
runtime.intentOutcomes = intentOutcomes
|
||||
runtime.malformedIntents = malformedIntents
|
||||
runtime.userEnrichment = userEnrichment
|
||||
runtime.routePublishAttempts = routePublishAttempts
|
||||
runtime.routeRetries = routeRetries
|
||||
runtime.routeDeadLetters = routeDeadLetters
|
||||
|
||||
return runtime, nil
|
||||
}
|
||||
|
||||
func newTracerProvider(ctx context.Context, res *resource.Resource, cfg ProcessConfig) (*sdktrace.TracerProvider, error) {
|
||||
options := []sdktrace.TracerProviderOption{
|
||||
sdktrace.WithResource(res),
|
||||
}
|
||||
|
||||
if exporter, err := traceExporter(ctx, cfg); err != nil {
|
||||
return nil, err
|
||||
} else if exporter != nil {
|
||||
options = append(options, sdktrace.WithBatcher(exporter))
|
||||
}
|
||||
|
||||
if cfg.StdoutTracesEnabled {
|
||||
exporter, err := stdouttrace.New(stdouttrace.WithWriter(os.Stdout))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("stdout traces exporter: %w", err)
|
||||
}
|
||||
options = append(options, sdktrace.WithBatcher(exporter))
|
||||
}
|
||||
|
||||
return sdktrace.NewTracerProvider(options...), nil
|
||||
}
|
||||
|
||||
func newMeterProvider(ctx context.Context, res *resource.Resource, cfg ProcessConfig) (*sdkmetric.MeterProvider, error) {
|
||||
options := []sdkmetric.Option{
|
||||
sdkmetric.WithResource(res),
|
||||
}
|
||||
|
||||
if exporter, err := metricExporter(ctx, cfg); err != nil {
|
||||
return nil, err
|
||||
} else if exporter != nil {
|
||||
options = append(options, sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)))
|
||||
}
|
||||
|
||||
if cfg.StdoutMetricsEnabled {
|
||||
exporter, err := stdoutmetric.New(stdoutmetric.WithWriter(os.Stdout))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("stdout metrics exporter: %w", err)
|
||||
}
|
||||
options = append(options, sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)))
|
||||
}
|
||||
|
||||
return sdkmetric.NewMeterProvider(options...), nil
|
||||
}
|
||||
|
||||
func traceExporter(ctx context.Context, cfg ProcessConfig) (sdktrace.SpanExporter, error) {
|
||||
if cfg.TracesExporter != processExporterOTLP {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
switch normalizeProtocol(cfg.TracesProtocol) {
|
||||
case processProtocolGRPC:
|
||||
exporter, err := otlptracegrpc.New(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otlp grpc traces exporter: %w", err)
|
||||
}
|
||||
return exporter, nil
|
||||
default:
|
||||
exporter, err := otlptracehttp.New(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otlp http traces exporter: %w", err)
|
||||
}
|
||||
return exporter, nil
|
||||
}
|
||||
}
|
||||
|
||||
func metricExporter(ctx context.Context, cfg ProcessConfig) (sdkmetric.Exporter, error) {
|
||||
if cfg.MetricsExporter != processExporterOTLP {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
switch normalizeProtocol(cfg.MetricsProtocol) {
|
||||
case processProtocolGRPC:
|
||||
exporter, err := otlpmetricgrpc.New(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otlp grpc metrics exporter: %w", err)
|
||||
}
|
||||
return exporter, nil
|
||||
default:
|
||||
exporter, err := otlpmetrichttp.New(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otlp http metrics exporter: %w", err)
|
||||
}
|
||||
return exporter, nil
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeProtocol(value string) string {
|
||||
switch strings.TrimSpace(value) {
|
||||
case processProtocolGRPC:
|
||||
return processProtocolGRPC
|
||||
default:
|
||||
return processProtocolHTTPProtobuf
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeContext(ctx context.Context) context.Context {
|
||||
if ctx == nil {
|
||||
return context.Background()
|
||||
}
|
||||
|
||||
return ctx
|
||||
}
|
||||
|
||||
func cleanAttribute(value string, fallback string) string {
|
||||
trimmed := strings.TrimSpace(value)
|
||||
if trimmed == "" {
|
||||
return fallback
|
||||
}
|
||||
|
||||
return trimmed
|
||||
}
|
||||
|
||||
func (runtime *Runtime) observeRouteSchedule(
|
||||
ctx context.Context,
|
||||
observer metric.Observer,
|
||||
depthGauge metric.Int64ObservableGauge,
|
||||
oldestAgeGauge metric.Int64ObservableGauge,
|
||||
) {
|
||||
depth := int64(0)
|
||||
oldestAge := int64(0)
|
||||
|
||||
reader := runtime.currentRouteScheduleReader()
|
||||
if reader != nil {
|
||||
snapshot, err := reader.ReadRouteScheduleSnapshot(ctx)
|
||||
if err != nil {
|
||||
otel.Handle(fmt.Errorf("observe notification route schedule: %w", err))
|
||||
} else {
|
||||
if snapshot.Depth > 0 {
|
||||
depth = snapshot.Depth
|
||||
}
|
||||
if snapshot.OldestScheduledFor != nil {
|
||||
oldestAge = time.Since(snapshot.OldestScheduledFor.UTC()).Milliseconds()
|
||||
if oldestAge < 0 {
|
||||
oldestAge = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
observer.ObserveInt64(depthGauge, depth)
|
||||
observer.ObserveInt64(oldestAgeGauge, oldestAge)
|
||||
}
|
||||
|
||||
func (runtime *Runtime) observeIntentStreamLag(
|
||||
ctx context.Context,
|
||||
observer metric.Observer,
|
||||
oldestUnprocessedAgeGauge metric.Int64ObservableGauge,
|
||||
) {
|
||||
oldestAge := int64(0)
|
||||
|
||||
reader := runtime.currentIntentStreamLagReader()
|
||||
if reader != nil {
|
||||
snapshot, err := reader.ReadIntentStreamLagSnapshot(ctx)
|
||||
if err != nil {
|
||||
otel.Handle(fmt.Errorf("observe notification intent stream lag: %w", err))
|
||||
} else if snapshot.OldestUnprocessedAt != nil {
|
||||
oldestAge = time.Since(snapshot.OldestUnprocessedAt.UTC()).Milliseconds()
|
||||
if oldestAge < 0 {
|
||||
oldestAge = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
observer.ObserveInt64(oldestUnprocessedAgeGauge, oldestAge)
|
||||
}
|
||||
|
||||
func (runtime *Runtime) currentRouteScheduleReader() RouteScheduleSnapshotReader {
|
||||
runtime.routeScheduleReaderMu.RLock()
|
||||
defer runtime.routeScheduleReaderMu.RUnlock()
|
||||
return runtime.routeScheduleReader
|
||||
}
|
||||
|
||||
func (runtime *Runtime) currentIntentStreamLagReader() IntentStreamLagSnapshotReader {
|
||||
runtime.intentStreamLagReaderMu.RLock()
|
||||
defer runtime.intentStreamLagReaderMu.RUnlock()
|
||||
return runtime.intentStreamLagReader
|
||||
}
|
||||
@@ -0,0 +1,228 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/metric/metricdata"
|
||||
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||
)
|
||||
|
||||
func TestRuntimeRecordsMetrics(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
reader := sdkmetric.NewManualReader()
|
||||
meterProvider := sdkmetric.NewMeterProvider(sdkmetric.WithReader(reader))
|
||||
tracerProvider := sdktrace.NewTracerProvider()
|
||||
|
||||
runtime, err := NewWithProviders(meterProvider, tracerProvider)
|
||||
require.NoError(t, err)
|
||||
|
||||
runtime.RecordInternalHTTPRequest(context.Background(), []attribute.KeyValue{
|
||||
attribute.String("route", "/healthz"),
|
||||
attribute.String("method", "GET"),
|
||||
attribute.String("edge_outcome", "success"),
|
||||
}, 5*time.Millisecond)
|
||||
runtime.RecordInternalHTTPEvent(context.Background(), "started")
|
||||
runtime.RecordIntentOutcome(context.Background(), "game.turn.ready", "game_master", "user", "accepted")
|
||||
runtime.RecordIntentOutcome(context.Background(), "game.turn.ready", "game_master", "user", "duplicate")
|
||||
runtime.RecordMalformedIntent(context.Background(), "idempotency_conflict", "game.turn.ready", "game_master")
|
||||
runtime.RecordUserEnrichmentAttempt(context.Background(), "game.turn.ready", "success")
|
||||
runtime.RecordUserEnrichmentAttempt(context.Background(), "game.turn.ready", "recipient_not_found")
|
||||
runtime.RecordRoutePublishAttempt(context.Background(), "push", "game.turn.ready", "published", "")
|
||||
runtime.RecordRoutePublishAttempt(context.Background(), "email", "game.turn.ready", "retry", "mail_stream_publish_failed")
|
||||
runtime.RecordRouteRetry(context.Background(), "email", "game.turn.ready")
|
||||
runtime.RecordRouteDeadLetter(context.Background(), "email", "game.turn.ready", "mail_stream_publish_failed")
|
||||
scheduledAt := time.Now().Add(-time.Second).UTC()
|
||||
unprocessedAt := time.Now().Add(-2 * time.Second).UTC()
|
||||
runtime.SetRouteScheduleSnapshotReader(stubRouteScheduleSnapshotReader{
|
||||
snapshot: RouteScheduleSnapshot{
|
||||
Depth: 3,
|
||||
OldestScheduledFor: &scheduledAt,
|
||||
},
|
||||
})
|
||||
runtime.SetIntentStreamLagSnapshotReader(stubIntentStreamLagSnapshotReader{
|
||||
snapshot: IntentStreamLagSnapshot{
|
||||
OldestUnprocessedAt: &unprocessedAt,
|
||||
},
|
||||
})
|
||||
|
||||
assertMetricCount(t, reader, "notification.internal_http.requests", map[string]string{
|
||||
"route": "/healthz",
|
||||
"method": "GET",
|
||||
"edge_outcome": "success",
|
||||
}, 1)
|
||||
assertMetricCount(t, reader, "notification.internal_http.lifecycle", map[string]string{
|
||||
"event": "started",
|
||||
}, 1)
|
||||
assertMetricCount(t, reader, "notification.intent.outcomes", map[string]string{
|
||||
"notification_type": "game.turn.ready",
|
||||
"producer": "game_master",
|
||||
"audience_kind": "user",
|
||||
"outcome": "accepted",
|
||||
}, 1)
|
||||
assertMetricCount(t, reader, "notification.intent.outcomes", map[string]string{
|
||||
"notification_type": "game.turn.ready",
|
||||
"producer": "game_master",
|
||||
"audience_kind": "user",
|
||||
"outcome": "duplicate",
|
||||
}, 1)
|
||||
assertMetricCount(t, reader, "notification.intent.malformed", map[string]string{
|
||||
"failure_code": "idempotency_conflict",
|
||||
"notification_type": "game.turn.ready",
|
||||
"producer": "game_master",
|
||||
}, 1)
|
||||
assertMetricCount(t, reader, "notification.user_enrichment.attempts", map[string]string{
|
||||
"notification_type": "game.turn.ready",
|
||||
"result": "success",
|
||||
}, 1)
|
||||
assertMetricCount(t, reader, "notification.user_enrichment.attempts", map[string]string{
|
||||
"notification_type": "game.turn.ready",
|
||||
"result": "recipient_not_found",
|
||||
}, 1)
|
||||
assertMetricCount(t, reader, "notification.route.publish_attempts", map[string]string{
|
||||
"channel": "push",
|
||||
"notification_type": "game.turn.ready",
|
||||
"result": "published",
|
||||
"failure_classification": "none",
|
||||
}, 1)
|
||||
assertMetricCount(t, reader, "notification.route.publish_attempts", map[string]string{
|
||||
"channel": "email",
|
||||
"notification_type": "game.turn.ready",
|
||||
"result": "retry",
|
||||
"failure_classification": "mail_stream_publish_failed",
|
||||
}, 1)
|
||||
assertMetricCount(t, reader, "notification.route.retries", map[string]string{
|
||||
"channel": "email",
|
||||
"notification_type": "game.turn.ready",
|
||||
}, 1)
|
||||
assertMetricCount(t, reader, "notification.route.dead_letters", map[string]string{
|
||||
"channel": "email",
|
||||
"notification_type": "game.turn.ready",
|
||||
"failure_classification": "mail_stream_publish_failed",
|
||||
}, 1)
|
||||
assertGaugeValue(t, reader, "notification.route_schedule.depth", nil, 3)
|
||||
assertGaugePositive(t, reader, "notification.route_schedule.oldest_age_ms", nil)
|
||||
assertGaugePositive(t, reader, "notification.intent_stream.oldest_unprocessed_age_ms", nil)
|
||||
}
|
||||
|
||||
func assertMetricCount(t *testing.T, reader *sdkmetric.ManualReader, metricName string, wantAttrs map[string]string, wantValue int64) {
|
||||
t.Helper()
|
||||
|
||||
var resourceMetrics metricdata.ResourceMetrics
|
||||
require.NoError(t, reader.Collect(context.Background(), &resourceMetrics))
|
||||
|
||||
for _, scopeMetrics := range resourceMetrics.ScopeMetrics {
|
||||
for _, metric := range scopeMetrics.Metrics {
|
||||
if metric.Name != metricName {
|
||||
continue
|
||||
}
|
||||
|
||||
sum, ok := metric.Data.(metricdata.Sum[int64])
|
||||
require.True(t, ok)
|
||||
|
||||
for _, point := range sum.DataPoints {
|
||||
if hasMetricAttributes(point.Attributes.ToSlice(), wantAttrs) {
|
||||
assert.Equal(t, wantValue, point.Value)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
require.Failf(t, "test failed", "metric %q with attrs %v not found", metricName, wantAttrs)
|
||||
}
|
||||
|
||||
func assertGaugeValue(t *testing.T, reader *sdkmetric.ManualReader, metricName string, wantAttrs map[string]string, wantValue int64) {
|
||||
t.Helper()
|
||||
|
||||
var resourceMetrics metricdata.ResourceMetrics
|
||||
require.NoError(t, reader.Collect(context.Background(), &resourceMetrics))
|
||||
|
||||
for _, scopeMetrics := range resourceMetrics.ScopeMetrics {
|
||||
for _, metric := range scopeMetrics.Metrics {
|
||||
if metric.Name != metricName {
|
||||
continue
|
||||
}
|
||||
|
||||
gauge, ok := metric.Data.(metricdata.Gauge[int64])
|
||||
require.True(t, ok)
|
||||
|
||||
for _, point := range gauge.DataPoints {
|
||||
if hasMetricAttributes(point.Attributes.ToSlice(), wantAttrs) {
|
||||
assert.Equal(t, wantValue, point.Value)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
require.Failf(t, "test failed", "gauge %q with attrs %v not found", metricName, wantAttrs)
|
||||
}
|
||||
|
||||
func assertGaugePositive(t *testing.T, reader *sdkmetric.ManualReader, metricName string, wantAttrs map[string]string) {
|
||||
t.Helper()
|
||||
|
||||
var resourceMetrics metricdata.ResourceMetrics
|
||||
require.NoError(t, reader.Collect(context.Background(), &resourceMetrics))
|
||||
|
||||
for _, scopeMetrics := range resourceMetrics.ScopeMetrics {
|
||||
for _, metric := range scopeMetrics.Metrics {
|
||||
if metric.Name != metricName {
|
||||
continue
|
||||
}
|
||||
|
||||
gauge, ok := metric.Data.(metricdata.Gauge[int64])
|
||||
require.True(t, ok)
|
||||
|
||||
for _, point := range gauge.DataPoints {
|
||||
if hasMetricAttributes(point.Attributes.ToSlice(), wantAttrs) {
|
||||
assert.Greater(t, point.Value, int64(0))
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
require.Failf(t, "test failed", "gauge %q with attrs %v not found", metricName, wantAttrs)
|
||||
}
|
||||
|
||||
func hasMetricAttributes(values []attribute.KeyValue, want map[string]string) bool {
|
||||
if len(want) == 0 {
|
||||
return len(values) == 0
|
||||
}
|
||||
if len(values) != len(want) {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, value := range values {
|
||||
if want[string(value.Key)] != value.Value.AsString() {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
type stubRouteScheduleSnapshotReader struct {
|
||||
snapshot RouteScheduleSnapshot
|
||||
err error
|
||||
}
|
||||
|
||||
func (reader stubRouteScheduleSnapshotReader) ReadRouteScheduleSnapshot(context.Context) (RouteScheduleSnapshot, error) {
|
||||
return reader.snapshot, reader.err
|
||||
}
|
||||
|
||||
type stubIntentStreamLagSnapshotReader struct {
|
||||
snapshot IntentStreamLagSnapshot
|
||||
err error
|
||||
}
|
||||
|
||||
func (reader stubIntentStreamLagSnapshotReader) ReadIntentStreamLagSnapshot(context.Context) (IntentStreamLagSnapshot, error) {
|
||||
return reader.snapshot, reader.err
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
// Package worker provides the long-lived background components used by the
|
||||
// runnable Notification Service process.
|
||||
package worker
|
||||
@@ -0,0 +1,421 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/adapters/redisstate"
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/logging"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/notification/internal/service/publishmail"
|
||||
)
|
||||
|
||||
const (
|
||||
emailFailureClassificationPayloadEncoding = "payload_encoding_failed"
|
||||
emailFailureClassificationMailStreamWrite = "mail_stream_publish_failed"
|
||||
)
|
||||
|
||||
// EmailRouteStateStore describes the durable route-state operations required
|
||||
// by EmailPublisher.
|
||||
type EmailRouteStateStore interface {
|
||||
// ListDueRoutes loads due scheduled routes.
|
||||
ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error)
|
||||
|
||||
// TryAcquireRouteLease attempts to acquire one temporary route lease.
|
||||
TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error)
|
||||
|
||||
// ReleaseRouteLease best-effort releases one temporary route lease.
|
||||
ReleaseRouteLease(context.Context, string, string, string) error
|
||||
|
||||
// GetNotification loads one accepted notification.
|
||||
GetNotification(context.Context, string) (acceptintent.NotificationRecord, bool, error)
|
||||
|
||||
// GetRoute loads one accepted notification route.
|
||||
GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error)
|
||||
|
||||
// CompleteRoutePublished records one successful publication.
|
||||
CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error
|
||||
|
||||
// CompleteRouteFailed records one retryable publication failure.
|
||||
CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error
|
||||
|
||||
// CompleteRouteDeadLetter records one exhausted publication failure.
|
||||
CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error
|
||||
}
|
||||
|
||||
// EmailCommandEncoder encodes one email-capable notification route into a
|
||||
// Mail Service-compatible generic command.
|
||||
type EmailCommandEncoder interface {
|
||||
// Encode converts notification plus route to one outbound command.
|
||||
Encode(acceptintent.NotificationRecord, acceptintent.NotificationRoute) (publishmail.Command, error)
|
||||
}
|
||||
|
||||
// EmailPublisherConfig stores the dependencies and policies used by
|
||||
// EmailPublisher.
|
||||
type EmailPublisherConfig struct {
|
||||
// Store owns the durable route-state transitions.
|
||||
Store EmailRouteStateStore
|
||||
|
||||
// MailDeliveryCommandsStream stores the outbound Mail Service command
|
||||
// stream name.
|
||||
MailDeliveryCommandsStream string
|
||||
|
||||
// RouteLeaseTTL stores the temporary route-lease lifetime.
|
||||
RouteLeaseTTL time.Duration
|
||||
|
||||
// RouteBackoffMin stores the minimum retry backoff.
|
||||
RouteBackoffMin time.Duration
|
||||
|
||||
// RouteBackoffMax stores the maximum retry backoff.
|
||||
RouteBackoffMax time.Duration
|
||||
|
||||
// PollInterval stores how long the worker waits before the next due-route
|
||||
// scan when no progress was made.
|
||||
PollInterval time.Duration
|
||||
|
||||
// BatchSize stores the maximum number of due schedule members loaded per
|
||||
// scan.
|
||||
BatchSize int64
|
||||
|
||||
// Encoder stores the email command encoder.
|
||||
Encoder EmailCommandEncoder
|
||||
|
||||
// Telemetry records route publication counters.
|
||||
Telemetry RoutePublisherTelemetry
|
||||
|
||||
// Clock provides wall-clock timestamps.
|
||||
Clock Clock
|
||||
}
|
||||
|
||||
// EmailPublisher publishes due email routes into the Mail Service command
|
||||
// stream with retry and dead-letter handling.
|
||||
type EmailPublisher struct {
|
||||
store EmailRouteStateStore
|
||||
mailDeliveryCommandsStream string
|
||||
routeLeaseTTL time.Duration
|
||||
routeBackoffMin time.Duration
|
||||
routeBackoffMax time.Duration
|
||||
pollInterval time.Duration
|
||||
batchSize int64
|
||||
encoder EmailCommandEncoder
|
||||
telemetry RoutePublisherTelemetry
|
||||
clock Clock
|
||||
workerToken string
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
// NewEmailPublisher constructs the email publication worker.
|
||||
func NewEmailPublisher(cfg EmailPublisherConfig, logger *slog.Logger) (*EmailPublisher, error) {
|
||||
switch {
|
||||
case cfg.Store == nil:
|
||||
return nil, errors.New("new email publisher: nil store")
|
||||
case strings.TrimSpace(cfg.MailDeliveryCommandsStream) == "":
|
||||
return nil, errors.New("new email publisher: mail delivery-commands stream must not be empty")
|
||||
case cfg.RouteLeaseTTL <= 0:
|
||||
return nil, errors.New("new email publisher: route lease ttl must be positive")
|
||||
case cfg.RouteBackoffMin <= 0:
|
||||
return nil, errors.New("new email publisher: route backoff min must be positive")
|
||||
case cfg.RouteBackoffMax <= 0:
|
||||
return nil, errors.New("new email publisher: route backoff max must be positive")
|
||||
case cfg.RouteBackoffMin > cfg.RouteBackoffMax:
|
||||
return nil, errors.New("new email publisher: route backoff min must not exceed route backoff max")
|
||||
}
|
||||
if cfg.PollInterval <= 0 {
|
||||
cfg.PollInterval = defaultPushPublisherPollInterval
|
||||
}
|
||||
if cfg.BatchSize <= 0 {
|
||||
cfg.BatchSize = defaultPushPublisherBatchSize
|
||||
}
|
||||
if cfg.Clock == nil {
|
||||
cfg.Clock = systemClock{}
|
||||
}
|
||||
if cfg.Encoder == nil {
|
||||
cfg.Encoder = publishmail.Encoder{}
|
||||
}
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
|
||||
workerToken, err := newWorkerToken()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new email publisher: %w", err)
|
||||
}
|
||||
|
||||
return &EmailPublisher{
|
||||
store: cfg.Store,
|
||||
mailDeliveryCommandsStream: cfg.MailDeliveryCommandsStream,
|
||||
routeLeaseTTL: cfg.RouteLeaseTTL,
|
||||
routeBackoffMin: cfg.RouteBackoffMin,
|
||||
routeBackoffMax: cfg.RouteBackoffMax,
|
||||
pollInterval: cfg.PollInterval,
|
||||
batchSize: cfg.BatchSize,
|
||||
encoder: cfg.Encoder,
|
||||
telemetry: cfg.Telemetry,
|
||||
clock: cfg.Clock,
|
||||
workerToken: workerToken,
|
||||
logger: logger.With("component", "email_publisher", "stream", cfg.MailDeliveryCommandsStream),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Run starts the email publication loop and blocks until ctx is canceled or
|
||||
// an unexpected publication error occurs.
|
||||
func (publisher *EmailPublisher) Run(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("run email publisher: nil context")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if publisher == nil {
|
||||
return errors.New("run email publisher: nil publisher")
|
||||
}
|
||||
|
||||
publisher.logger.Info("email publisher started",
|
||||
"poll_interval", publisher.pollInterval.String(),
|
||||
"batch_size", publisher.batchSize,
|
||||
)
|
||||
|
||||
for {
|
||||
progress, err := publisher.publishDueRoutes(ctx)
|
||||
switch {
|
||||
case err == nil && progress:
|
||||
continue
|
||||
case err == nil:
|
||||
if waitErr := waitWithContext(ctx, publisher.pollInterval); waitErr != nil {
|
||||
publisher.logger.Info("email publisher stopped")
|
||||
return waitErr
|
||||
}
|
||||
case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)):
|
||||
publisher.logger.Info("email publisher stopped")
|
||||
return ctx.Err()
|
||||
default:
|
||||
return fmt.Errorf("run email publisher: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown stops the email publisher within ctx. The worker relies on context
|
||||
// cancellation and a bounded polling interval, so it has no dedicated
|
||||
// resources to release here.
|
||||
func (publisher *EmailPublisher) Shutdown(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("shutdown email publisher: nil context")
|
||||
}
|
||||
if publisher == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (publisher *EmailPublisher) publishDueRoutes(ctx context.Context) (bool, error) {
|
||||
now := publisher.now()
|
||||
|
||||
dueRoutes, err := publisher.store.ListDueRoutes(ctx, now, publisher.batchSize)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
progress := false
|
||||
for _, dueRoute := range dueRoutes {
|
||||
if !strings.HasPrefix(dueRoute.RouteID, "email:") {
|
||||
continue
|
||||
}
|
||||
|
||||
processed, err := publisher.publishRoute(ctx, now, dueRoute)
|
||||
if err != nil {
|
||||
return progress, err
|
||||
}
|
||||
progress = progress || processed
|
||||
}
|
||||
|
||||
return progress, nil
|
||||
}
|
||||
|
||||
func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) {
|
||||
acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err)
|
||||
}
|
||||
if !acquired {
|
||||
return false, nil
|
||||
}
|
||||
defer func() {
|
||||
releaseCtx, cancel := context.WithTimeout(context.Background(), publisher.routeLeaseTTL)
|
||||
defer cancel()
|
||||
_ = publisher.store.ReleaseRouteLease(releaseCtx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken)
|
||||
}()
|
||||
|
||||
notification, found, err := publisher.store.GetNotification(ctx, dueRoute.NotificationID)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("load notification %q: %w", dueRoute.NotificationID, err)
|
||||
}
|
||||
if !found {
|
||||
return false, fmt.Errorf("notification %q is missing for route %q", dueRoute.NotificationID, dueRoute.RouteID)
|
||||
}
|
||||
|
||||
route, found, err := publisher.store.GetRoute(ctx, dueRoute.NotificationID, dueRoute.RouteID)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("load route %q: %w", dueRoute.RouteID, err)
|
||||
}
|
||||
if !found {
|
||||
return false, fmt.Errorf("route %q is missing for notification %q", dueRoute.RouteID, dueRoute.NotificationID)
|
||||
}
|
||||
if route.Channel != intentstream.ChannelEmail {
|
||||
return false, nil
|
||||
}
|
||||
switch route.Status {
|
||||
case acceptintent.RouteStatusPending, acceptintent.RouteStatusFailed:
|
||||
default:
|
||||
return false, nil
|
||||
}
|
||||
if route.NextAttemptAt.After(now) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
command, err := publisher.encoder.Encode(notification, route)
|
||||
if err != nil {
|
||||
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationPayloadEncoding, err.Error())
|
||||
}
|
||||
|
||||
err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
PublishedAt: publisher.now(),
|
||||
Stream: publisher.mailDeliveryCommandsStream,
|
||||
StreamMaxLen: 0,
|
||||
StreamValues: command.Values(),
|
||||
})
|
||||
switch {
|
||||
case err == nil:
|
||||
publisher.recordPublishAttempt(ctx, notification, route, "published", "")
|
||||
logArgs := logging.RouteAttrs(
|
||||
notification.NotificationID,
|
||||
notification.NotificationType,
|
||||
notification.Producer,
|
||||
notification.AudienceKind,
|
||||
notification.IdempotencyKey,
|
||||
notification.RequestID,
|
||||
notification.TraceID,
|
||||
route.RouteID,
|
||||
route.Channel,
|
||||
)
|
||||
logArgs = append(logArgs,
|
||||
"delivery_id", command.DeliveryID,
|
||||
"resolved_email", route.ResolvedEmail,
|
||||
)
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
publisher.logger.Info("email route published", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationMailStreamWrite, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func (publisher *EmailPublisher) recordFailure(
|
||||
ctx context.Context,
|
||||
notification acceptintent.NotificationRecord,
|
||||
route acceptintent.NotificationRoute,
|
||||
classification string,
|
||||
message string,
|
||||
) (bool, error) {
|
||||
failureAt := publisher.now()
|
||||
attemptNumber := route.AttemptCount + 1
|
||||
logArgs := logging.RouteAttrs(
|
||||
notification.NotificationID,
|
||||
notification.NotificationType,
|
||||
notification.Producer,
|
||||
notification.AudienceKind,
|
||||
notification.IdempotencyKey,
|
||||
notification.RequestID,
|
||||
notification.TraceID,
|
||||
route.RouteID,
|
||||
route.Channel,
|
||||
)
|
||||
logArgs = append(logArgs,
|
||||
"resolved_email", route.ResolvedEmail,
|
||||
"failure_classification", classification,
|
||||
"failure_message", strings.TrimSpace(message),
|
||||
"attempt_number", attemptNumber,
|
||||
"max_attempts", route.MaxAttempts,
|
||||
)
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
|
||||
if attemptNumber >= route.MaxAttempts {
|
||||
err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
DeadLetteredAt: failureAt,
|
||||
FailureClassification: classification,
|
||||
FailureMessage: strings.TrimSpace(message),
|
||||
})
|
||||
switch {
|
||||
case err == nil:
|
||||
publisher.recordPublishAttempt(ctx, notification, route, "dead_letter", classification)
|
||||
publisher.recordRouteDeadLetter(ctx, notification, route, classification)
|
||||
publisher.logger.Warn("email route dead-lettered", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err)
|
||||
}
|
||||
}
|
||||
|
||||
nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond)
|
||||
err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
FailedAt: failureAt,
|
||||
NextAttemptAt: nextAttemptAt,
|
||||
FailureClassification: classification,
|
||||
FailureMessage: strings.TrimSpace(message),
|
||||
})
|
||||
switch {
|
||||
case err == nil:
|
||||
publisher.recordPublishAttempt(ctx, notification, route, "retry", classification)
|
||||
publisher.recordRouteRetry(ctx, notification, route)
|
||||
logArgs = append(logArgs, "next_attempt_at", nextAttemptAt)
|
||||
publisher.logger.Warn("email route failed and was rescheduled", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err)
|
||||
}
|
||||
}
|
||||
|
||||
func (publisher *EmailPublisher) now() time.Time {
|
||||
return publisher.clock.Now().UTC().Truncate(time.Millisecond)
|
||||
}
|
||||
|
||||
func (publisher *EmailPublisher) recordPublishAttempt(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, result string, classification string) {
|
||||
if publisher == nil || publisher.telemetry == nil {
|
||||
return
|
||||
}
|
||||
|
||||
publisher.telemetry.RecordRoutePublishAttempt(ctx, string(route.Channel), string(notification.NotificationType), result, classification)
|
||||
}
|
||||
|
||||
func (publisher *EmailPublisher) recordRouteRetry(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) {
|
||||
if publisher == nil || publisher.telemetry == nil {
|
||||
return
|
||||
}
|
||||
|
||||
publisher.telemetry.RecordRouteRetry(ctx, string(route.Channel), string(notification.NotificationType))
|
||||
}
|
||||
|
||||
func (publisher *EmailPublisher) recordRouteDeadLetter(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, classification string) {
|
||||
if publisher == nil || publisher.telemetry == nil {
|
||||
return
|
||||
}
|
||||
|
||||
publisher.telemetry.RecordRouteDeadLetter(ctx, string(route.Channel), string(notification.NotificationType), classification)
|
||||
}
|
||||
@@ -0,0 +1,232 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
redisstate "galaxy/notification/internal/adapters/redisstate"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestEmailPublisherPublishesDueEmailRouteAndLeavesPushRoutePending(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newEmailPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
|
||||
|
||||
running := runEmailPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
pushRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, acceptintent.RouteStatusPending, pushRoute.Status)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.Equal(t, "1775121700000-0/email:user:user-1", messages[0].Values["delivery_id"])
|
||||
require.Equal(t, "notification", messages[0].Values["source"])
|
||||
require.Equal(t, "template", messages[0].Values["payload_mode"])
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", ""))
|
||||
}
|
||||
|
||||
func TestEmailPublisherRetriesMailStreamPublicationFailures(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newEmailPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
|
||||
require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err())
|
||||
|
||||
running := runEmailPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "retry", emailFailureClassificationMailStreamWrite))
|
||||
require.True(t, fixture.telemetry.hasRouteRetry("email"))
|
||||
|
||||
require.NoError(t, fixture.client.Del(context.Background(), fixture.mailStream).Err())
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2
|
||||
}, 2*time.Second, 10*time.Millisecond)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", ""))
|
||||
}
|
||||
|
||||
func TestEmailPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newEmailPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
|
||||
|
||||
otherPublisher, err := NewEmailPublisher(EmailPublisherConfig{
|
||||
Store: fixture.store,
|
||||
MailDeliveryCommandsStream: fixture.mailStream,
|
||||
RouteLeaseTTL: 200 * time.Millisecond,
|
||||
RouteBackoffMin: 20 * time.Millisecond,
|
||||
RouteBackoffMax: 20 * time.Millisecond,
|
||||
PollInterval: 10 * time.Millisecond,
|
||||
BatchSize: 16,
|
||||
Clock: newSteppingClock(fixture.now, time.Millisecond),
|
||||
}, testWorkerLogger())
|
||||
require.NoError(t, err)
|
||||
|
||||
first := runEmailPublisher(t, fixture.publisher)
|
||||
defer first.stop(t)
|
||||
second := runEmailPublisher(t, otherPublisher)
|
||||
defer second.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
}
|
||||
|
||||
func TestEmailPublisherDeadLettersExhaustedRoute(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newEmailPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 6)))
|
||||
require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err())
|
||||
|
||||
running := runEmailPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 7
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "email:user:user-1")).Bytes()
|
||||
require.NoError(t, err)
|
||||
deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, emailFailureClassificationMailStreamWrite, deadLetter.FailureClassification)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "dead_letter", emailFailureClassificationMailStreamWrite))
|
||||
require.True(t, fixture.telemetry.hasRouteDeadLetter("email", emailFailureClassificationMailStreamWrite))
|
||||
}
|
||||
|
||||
type emailPublisherFixture struct {
|
||||
client *redis.Client
|
||||
store *redisstate.AcceptanceStore
|
||||
publisher *EmailPublisher
|
||||
mailStream string
|
||||
now time.Time
|
||||
clock *steppingClock
|
||||
telemetry *recordingWorkerTelemetry
|
||||
}
|
||||
|
||||
func newEmailPublisherFixture(t *testing.T) emailPublisherFixture {
|
||||
t.Helper()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := redis.NewClient(&redis.Options{
|
||||
Addr: server.Addr(),
|
||||
Protocol: 2,
|
||||
DisableIdentity: true,
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
require.NoError(t, client.Close())
|
||||
})
|
||||
|
||||
store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
clock := newSteppingClock(now, time.Millisecond)
|
||||
telemetry := &recordingWorkerTelemetry{}
|
||||
publisher, err := NewEmailPublisher(EmailPublisherConfig{
|
||||
Store: store,
|
||||
MailDeliveryCommandsStream: "mail:delivery_commands",
|
||||
RouteLeaseTTL: 200 * time.Millisecond,
|
||||
RouteBackoffMin: 20 * time.Millisecond,
|
||||
RouteBackoffMax: 20 * time.Millisecond,
|
||||
PollInterval: 10 * time.Millisecond,
|
||||
BatchSize: 16,
|
||||
Telemetry: telemetry,
|
||||
Clock: clock,
|
||||
}, testWorkerLogger())
|
||||
require.NoError(t, err)
|
||||
|
||||
return emailPublisherFixture{
|
||||
client: client,
|
||||
store: store,
|
||||
publisher: publisher,
|
||||
mailStream: "mail:delivery_commands",
|
||||
now: now,
|
||||
clock: clock,
|
||||
telemetry: telemetry,
|
||||
}
|
||||
}
|
||||
|
||||
func validEmailAcceptanceInput(now time.Time, emailAttemptCount int) acceptintent.CreateAcceptanceInput {
|
||||
input := validPushAcceptanceInput(now)
|
||||
for index := range input.Routes {
|
||||
if input.Routes[index].RouteID != "email:user:user-1" {
|
||||
continue
|
||||
}
|
||||
input.Routes[index].AttemptCount = emailAttemptCount
|
||||
input.Routes[index].MaxAttempts = 7
|
||||
}
|
||||
|
||||
return input
|
||||
}
|
||||
|
||||
type runningEmailPublisher struct {
|
||||
cancel context.CancelFunc
|
||||
resultCh chan error
|
||||
}
|
||||
|
||||
func runEmailPublisher(t *testing.T, publisher *EmailPublisher) runningEmailPublisher {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- publisher.Run(ctx)
|
||||
}()
|
||||
|
||||
return runningEmailPublisher{
|
||||
cancel: cancel,
|
||||
resultCh: resultCh,
|
||||
}
|
||||
}
|
||||
|
||||
func (r runningEmailPublisher) stop(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
r.cancel()
|
||||
|
||||
select {
|
||||
case err := <-r.resultCh:
|
||||
require.ErrorIs(t, err, context.Canceled)
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "email publisher did not stop")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,331 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/logging"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/notification/internal/service/malformedintent"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// AcceptIntentUseCase accepts one normalized notification intent.
|
||||
type AcceptIntentUseCase interface {
|
||||
// Execute durably accepts one normalized notification intent.
|
||||
Execute(context.Context, acceptintent.AcceptInput) (acceptintent.Result, error)
|
||||
}
|
||||
|
||||
// MalformedIntentRecorder stores one operator-visible malformed-intent record.
|
||||
type MalformedIntentRecorder interface {
|
||||
// Record persists entry idempotently by stream entry id.
|
||||
Record(context.Context, malformedintent.Entry) error
|
||||
}
|
||||
|
||||
// StreamOffsetStore stores the last durably processed entry id of one plain
|
||||
// XREAD consumer.
|
||||
type StreamOffsetStore interface {
|
||||
// Load returns the last processed entry id for stream when one is stored.
|
||||
Load(context.Context, string) (string, bool, error)
|
||||
|
||||
// Save stores the last processed entry id for stream.
|
||||
Save(context.Context, string, string) error
|
||||
}
|
||||
|
||||
// IntentConsumerTelemetry records low-cardinality stream-consumer events.
|
||||
type IntentConsumerTelemetry interface {
|
||||
// RecordMalformedIntent records one malformed or rejected notification
|
||||
// intent.
|
||||
RecordMalformedIntent(context.Context, string, string, string)
|
||||
}
|
||||
|
||||
// Clock provides the current wall-clock time.
|
||||
type Clock interface {
|
||||
// Now returns the current time.
|
||||
Now() time.Time
|
||||
}
|
||||
|
||||
type systemClock struct{}
|
||||
|
||||
func (systemClock) Now() time.Time {
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
// IntentConsumerConfig stores the dependencies used by IntentConsumer.
|
||||
type IntentConsumerConfig struct {
|
||||
// Client stores the Redis client used for XREAD.
|
||||
Client *redis.Client
|
||||
|
||||
// Stream stores the Redis Stream name to consume.
|
||||
Stream string
|
||||
|
||||
// BlockTimeout stores the blocking XREAD timeout.
|
||||
BlockTimeout time.Duration
|
||||
|
||||
// Acceptor durably accepts valid notification intents.
|
||||
Acceptor AcceptIntentUseCase
|
||||
|
||||
// MalformedRecorder persists operator-visible malformed-intent entries.
|
||||
MalformedRecorder MalformedIntentRecorder
|
||||
|
||||
// OffsetStore stores the last durably processed stream entry id.
|
||||
OffsetStore StreamOffsetStore
|
||||
|
||||
// Telemetry records malformed-intent counters.
|
||||
Telemetry IntentConsumerTelemetry
|
||||
|
||||
// Clock provides wall-clock timestamps for malformed-intent records.
|
||||
Clock Clock
|
||||
}
|
||||
|
||||
// IntentConsumer stores the Redis Streams consumer used for notification
|
||||
// intent intake.
|
||||
type IntentConsumer struct {
|
||||
client *redis.Client
|
||||
stream string
|
||||
blockTimeout time.Duration
|
||||
acceptor AcceptIntentUseCase
|
||||
malformedRecorder MalformedIntentRecorder
|
||||
offsetStore StreamOffsetStore
|
||||
telemetry IntentConsumerTelemetry
|
||||
clock Clock
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
// NewIntentConsumer constructs the notification-intent consumer.
|
||||
func NewIntentConsumer(cfg IntentConsumerConfig, logger *slog.Logger) (*IntentConsumer, error) {
|
||||
switch {
|
||||
case cfg.Client == nil:
|
||||
return nil, errors.New("new intent consumer: nil redis client")
|
||||
case strings.TrimSpace(cfg.Stream) == "":
|
||||
return nil, errors.New("new intent consumer: stream must not be empty")
|
||||
case cfg.BlockTimeout <= 0:
|
||||
return nil, errors.New("new intent consumer: block timeout must be positive")
|
||||
case cfg.Acceptor == nil:
|
||||
return nil, errors.New("new intent consumer: nil acceptor")
|
||||
case cfg.MalformedRecorder == nil:
|
||||
return nil, errors.New("new intent consumer: nil malformed recorder")
|
||||
case cfg.OffsetStore == nil:
|
||||
return nil, errors.New("new intent consumer: nil offset store")
|
||||
}
|
||||
if cfg.Clock == nil {
|
||||
cfg.Clock = systemClock{}
|
||||
}
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
|
||||
return &IntentConsumer{
|
||||
client: cfg.Client,
|
||||
stream: cfg.Stream,
|
||||
blockTimeout: cfg.BlockTimeout,
|
||||
acceptor: cfg.Acceptor,
|
||||
malformedRecorder: cfg.MalformedRecorder,
|
||||
offsetStore: cfg.OffsetStore,
|
||||
telemetry: cfg.Telemetry,
|
||||
clock: cfg.Clock,
|
||||
logger: logger.With("component", "intent_consumer", "stream", cfg.Stream),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Run starts the intent consumer and blocks until ctx is canceled or Redis
|
||||
// returns an unexpected error.
|
||||
func (consumer *IntentConsumer) Run(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("run intent consumer: nil context")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if consumer == nil || consumer.client == nil {
|
||||
return errors.New("run intent consumer: nil consumer")
|
||||
}
|
||||
|
||||
lastID, found, err := consumer.offsetStore.Load(ctx, consumer.stream)
|
||||
if err != nil {
|
||||
return fmt.Errorf("run intent consumer: load stream offset: %w", err)
|
||||
}
|
||||
if !found {
|
||||
lastID = "0-0"
|
||||
}
|
||||
|
||||
consumer.logger.Info("intent consumer started", "block_timeout", consumer.blockTimeout.String(), "start_entry_id", lastID)
|
||||
|
||||
for {
|
||||
streams, err := consumer.client.XRead(ctx, &redis.XReadArgs{
|
||||
Streams: []string{consumer.stream, lastID},
|
||||
Count: 1,
|
||||
Block: consumer.blockTimeout,
|
||||
}).Result()
|
||||
switch {
|
||||
case err == nil:
|
||||
for _, stream := range streams {
|
||||
for _, message := range stream.Messages {
|
||||
if err := consumer.handleMessage(ctx, message); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := consumer.offsetStore.Save(ctx, consumer.stream, message.ID); err != nil {
|
||||
return fmt.Errorf("run intent consumer: save stream offset: %w", err)
|
||||
}
|
||||
lastID = message.ID
|
||||
}
|
||||
}
|
||||
case errors.Is(err, redis.Nil):
|
||||
continue
|
||||
case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, redis.ErrClosed)):
|
||||
consumer.logger.Info("intent consumer stopped")
|
||||
return ctx.Err()
|
||||
case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded), errors.Is(err, redis.ErrClosed):
|
||||
return fmt.Errorf("run intent consumer: %w", err)
|
||||
default:
|
||||
return fmt.Errorf("run intent consumer: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (consumer *IntentConsumer) handleMessage(ctx context.Context, message redis.XMessage) error {
|
||||
rawFields := cloneRawFields(message.Values)
|
||||
|
||||
intent, err := intentstream.DecodeIntent(rawFields)
|
||||
if err != nil {
|
||||
return consumer.recordMalformed(
|
||||
ctx,
|
||||
message.ID,
|
||||
rawFields,
|
||||
intentstream.ClassifyDecodeError(err),
|
||||
err,
|
||||
)
|
||||
}
|
||||
|
||||
result, err := consumer.acceptor.Execute(ctx, acceptintent.AcceptInput{
|
||||
NotificationID: message.ID,
|
||||
Intent: intent,
|
||||
})
|
||||
switch {
|
||||
case err == nil:
|
||||
logArgs := []any{
|
||||
"stream_entry_id", message.ID,
|
||||
"notification_id", message.ID,
|
||||
}
|
||||
logArgs = append(logArgs, logging.IntentAttrs(intent)...)
|
||||
logArgs = append(logArgs,
|
||||
"outcome", string(result.Outcome),
|
||||
)
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
consumer.logger.Info("notification intent handled", logArgs...)
|
||||
return nil
|
||||
case errors.Is(err, acceptintent.ErrConflict):
|
||||
return consumer.recordMalformed(ctx, message.ID, rawFields, malformedintent.FailureCodeIdempotencyConflict, err)
|
||||
case errors.Is(err, acceptintent.ErrRecipientNotFound):
|
||||
return consumer.recordMalformed(ctx, message.ID, rawFields, malformedintent.FailureCodeRecipientNotFound, err)
|
||||
case errors.Is(err, acceptintent.ErrServiceUnavailable):
|
||||
return fmt.Errorf("handle intent %q: %w", message.ID, err)
|
||||
default:
|
||||
return fmt.Errorf("handle intent %q: %w", message.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
func (consumer *IntentConsumer) recordMalformed(
|
||||
ctx context.Context,
|
||||
streamEntryID string,
|
||||
rawFields map[string]any,
|
||||
failureCode malformedintent.FailureCode,
|
||||
cause error,
|
||||
) error {
|
||||
entry := malformedintent.Entry{
|
||||
StreamEntryID: streamEntryID,
|
||||
NotificationType: optionalRawString(rawFields, "notification_type"),
|
||||
Producer: optionalRawString(rawFields, "producer"),
|
||||
IdempotencyKey: optionalRawString(rawFields, "idempotency_key"),
|
||||
FailureCode: failureCode,
|
||||
FailureMessage: strings.TrimSpace(cause.Error()),
|
||||
RawFields: cloneRawFields(rawFields),
|
||||
RecordedAt: consumer.clock.Now().UTC().Truncate(time.Millisecond),
|
||||
}
|
||||
if err := consumer.malformedRecorder.Record(ctx, entry); err != nil {
|
||||
return fmt.Errorf("record malformed intent %q: %w", streamEntryID, err)
|
||||
}
|
||||
if consumer.telemetry != nil {
|
||||
consumer.telemetry.RecordMalformedIntent(ctx, string(failureCode), entry.NotificationType, entry.Producer)
|
||||
}
|
||||
|
||||
logArgs := []any{
|
||||
"stream_entry_id", streamEntryID,
|
||||
"notification_type", entry.NotificationType,
|
||||
"producer", entry.Producer,
|
||||
"idempotency_key", entry.IdempotencyKey,
|
||||
"failure_code", string(entry.FailureCode),
|
||||
"failure_message", entry.FailureMessage,
|
||||
}
|
||||
if traceID := optionalRawString(rawFields, "trace_id"); traceID != "" {
|
||||
logArgs = append(logArgs, "trace_id", traceID)
|
||||
}
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
consumer.logger.Warn("notification intent rejected", logArgs...)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func cloneRawFields(values map[string]any) map[string]any {
|
||||
if values == nil {
|
||||
return map[string]any{}
|
||||
}
|
||||
|
||||
cloned := make(map[string]any, len(values))
|
||||
for key, value := range values {
|
||||
cloned[key] = cloneRawValue(value)
|
||||
}
|
||||
|
||||
return cloned
|
||||
}
|
||||
|
||||
func cloneRawValue(value any) any {
|
||||
switch typed := value.(type) {
|
||||
case map[string]any:
|
||||
return cloneRawFields(typed)
|
||||
case []any:
|
||||
cloned := make([]any, len(typed))
|
||||
for index, item := range typed {
|
||||
cloned[index] = cloneRawValue(item)
|
||||
}
|
||||
return cloned
|
||||
default:
|
||||
return typed
|
||||
}
|
||||
}
|
||||
|
||||
func optionalRawString(values map[string]any, key string) string {
|
||||
raw, ok := values[key]
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
|
||||
switch typed := raw.(type) {
|
||||
case string:
|
||||
return typed
|
||||
case []byte:
|
||||
return string(typed)
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown stops the intent consumer within ctx. The consumer relies on
|
||||
// context cancellation and a bounded block timeout, so it has no dedicated
|
||||
// resources to release here.
|
||||
func (consumer *IntentConsumer) Shutdown(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("shutdown intent consumer: nil context")
|
||||
}
|
||||
if consumer == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,422 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"log/slog"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
redisstate "galaxy/notification/internal/adapters/redisstate"
|
||||
"galaxy/notification/internal/config"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/notification/internal/service/malformedintent"
|
||||
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestIntentConsumerStartsFromZeroOffsetWhenNoStoredOffsetExists(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
records: map[string]acceptintent.UserRecord{
|
||||
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
|
||||
},
|
||||
})
|
||||
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), messageID)
|
||||
return err == nil && found
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
}
|
||||
|
||||
func TestIntentConsumerContinuesFromSavedOffsetAfterRestart(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
records: map[string]acceptintent.UserRecord{
|
||||
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
|
||||
},
|
||||
})
|
||||
firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
require.NoError(t, fixture.offsetStore.Save(context.Background(), fixture.stream, firstID))
|
||||
secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), secondID)
|
||||
return err == nil && found
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), firstID)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
}
|
||||
|
||||
func TestIntentConsumerRecordsIdempotencyConflictsAndAdvancesOffset(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
records: map[string]acceptintent.UserRecord{
|
||||
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
|
||||
},
|
||||
})
|
||||
firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(secondID)).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
entry, err := redisstate.UnmarshalMalformedIntent(payload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return entry.FailureCode == "idempotency_conflict"
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, secondID, offset)
|
||||
|
||||
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), firstID)
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
|
||||
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), secondID)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
}
|
||||
|
||||
func TestIntentConsumerShutdownInterruptsBlockingRead(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- fixture.consumer.Run(ctx)
|
||||
}()
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
cancel()
|
||||
|
||||
select {
|
||||
case err := <-resultCh:
|
||||
require.ErrorIs(t, err, context.Canceled)
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "intent consumer did not stop after shutdown")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntentConsumerRecordsRecipientNotFoundAndAdvancesOffset(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{})
|
||||
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
entry, err := redisstate.UnmarshalMalformedIntent(payload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return entry.FailureCode == malformedintent.FailureCodeRecipientNotFound
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, messageID, offset)
|
||||
|
||||
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
}
|
||||
|
||||
func TestIntentConsumerRecordsMalformedIntentAndAdvancesOffset(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
records: map[string]acceptintent.UserRecord{
|
||||
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
|
||||
},
|
||||
})
|
||||
messageID, err := fixture.client.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: fixture.stream,
|
||||
Values: map[string]any{
|
||||
"notification_type": "game.turn.ready",
|
||||
"producer": "game_master",
|
||||
"audience_kind": "user",
|
||||
"recipient_user_ids_json": `["user-1"]`,
|
||||
"idempotency_key": "game-123:turn-ready",
|
||||
"occurred_at_ms": "1775121700000",
|
||||
},
|
||||
}).Result()
|
||||
require.NoError(t, err)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
entry, err := redisstate.UnmarshalMalformedIntent(payload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return entry.FailureCode == malformedintent.FailureCodeInvalidPayload &&
|
||||
entry.StreamEntryID == messageID
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, messageID, offset)
|
||||
|
||||
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
}
|
||||
|
||||
func TestIntentConsumerRecordsTelemetryForOutcomesAndMalformedIntents(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
records: map[string]acceptintent.UserRecord{
|
||||
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
|
||||
},
|
||||
})
|
||||
addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
conflictID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
running := runIntentConsumer(t, fixture.consumer)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(conflictID)).Bytes()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
entry, err := redisstate.UnmarshalMalformedIntent(payload)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return entry.FailureCode == malformedintent.FailureCodeIdempotencyConflict
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
return fixture.telemetry.hasIntentOutcome("accepted") &&
|
||||
fixture.telemetry.hasIntentOutcome("duplicate") &&
|
||||
fixture.telemetry.hasMalformedIntent("idempotency_conflict")
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
}
|
||||
|
||||
func TestIntentConsumerStopsWithoutAdvancingOffsetWhenUserDirectoryIsUnavailable(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newIntentConsumerFixture(t, stubUserDirectory{
|
||||
err: errors.New("user service unavailable"),
|
||||
})
|
||||
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- fixture.consumer.Run(ctx)
|
||||
}()
|
||||
|
||||
var runErr error
|
||||
require.Eventually(t, func() bool {
|
||||
select {
|
||||
case runErr = <-resultCh:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
require.Error(t, runErr)
|
||||
require.ErrorContains(t, runErr, "user service unavailable")
|
||||
|
||||
_, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
|
||||
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
|
||||
require.NoError(t, err)
|
||||
require.False(t, found)
|
||||
}
|
||||
|
||||
type intentConsumerFixture struct {
|
||||
client *redis.Client
|
||||
stream string
|
||||
acceptanceStore *redisstate.AcceptanceStore
|
||||
offsetStore *redisstate.StreamOffsetStore
|
||||
consumer *IntentConsumer
|
||||
telemetry *recordingWorkerTelemetry
|
||||
}
|
||||
|
||||
func newIntentConsumerFixture(t *testing.T, userDirectory acceptintent.UserDirectory) intentConsumerFixture {
|
||||
t.Helper()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := redis.NewClient(&redis.Options{
|
||||
Addr: server.Addr(),
|
||||
Protocol: 2,
|
||||
DisableIdentity: true,
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
assert.NoError(t, client.Close())
|
||||
})
|
||||
|
||||
acceptanceStore, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
malformedStore, err := redisstate.NewMalformedIntentStore(client, 72*time.Hour)
|
||||
require.NoError(t, err)
|
||||
offsetStore, err := redisstate.NewStreamOffsetStore(client)
|
||||
require.NoError(t, err)
|
||||
telemetry := &recordingWorkerTelemetry{}
|
||||
service, err := acceptintent.New(acceptintent.Config{
|
||||
Store: acceptanceStore,
|
||||
UserDirectory: userDirectory,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
|
||||
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
|
||||
Telemetry: telemetry,
|
||||
PushMaxAttempts: 3,
|
||||
EmailMaxAttempts: 7,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
AdminRouting: config.AdminRoutingConfig{},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
consumer, err := NewIntentConsumer(IntentConsumerConfig{
|
||||
Client: client,
|
||||
Stream: "notification:intents",
|
||||
BlockTimeout: 25 * time.Millisecond,
|
||||
Acceptor: service,
|
||||
MalformedRecorder: malformedStore,
|
||||
OffsetStore: offsetStore,
|
||||
Telemetry: telemetry,
|
||||
Clock: fixedClock{now: time.UnixMilli(1775121700001).UTC()},
|
||||
}, slog.New(slog.NewTextHandler(io.Discard, nil)))
|
||||
require.NoError(t, err)
|
||||
|
||||
return intentConsumerFixture{
|
||||
client: client,
|
||||
stream: "notification:intents",
|
||||
acceptanceStore: acceptanceStore,
|
||||
offsetStore: offsetStore,
|
||||
consumer: consumer,
|
||||
telemetry: telemetry,
|
||||
}
|
||||
}
|
||||
|
||||
func addValidIntent(t *testing.T, client *redis.Client, stream string, payloadJSON string) string {
|
||||
t.Helper()
|
||||
|
||||
messageID, err := client.XAdd(context.Background(), &redis.XAddArgs{
|
||||
Stream: stream,
|
||||
Values: map[string]any{
|
||||
"notification_type": "game.turn.ready",
|
||||
"producer": "game_master",
|
||||
"audience_kind": "user",
|
||||
"recipient_user_ids_json": `["user-1"]`,
|
||||
"idempotency_key": "game-123:turn-ready",
|
||||
"occurred_at_ms": "1775121700000",
|
||||
"payload_json": payloadJSON,
|
||||
},
|
||||
}).Result()
|
||||
require.NoError(t, err)
|
||||
|
||||
return messageID
|
||||
}
|
||||
|
||||
type runningIntentConsumer struct {
|
||||
cancel context.CancelFunc
|
||||
resultCh chan error
|
||||
}
|
||||
|
||||
func runIntentConsumer(t *testing.T, consumer *IntentConsumer) runningIntentConsumer {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- consumer.Run(ctx)
|
||||
}()
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
return runningIntentConsumer{
|
||||
cancel: cancel,
|
||||
resultCh: resultCh,
|
||||
}
|
||||
}
|
||||
|
||||
func (r runningIntentConsumer) stop(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
r.cancel()
|
||||
|
||||
select {
|
||||
case err := <-r.resultCh:
|
||||
require.ErrorIs(t, err, context.Canceled)
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "intent consumer did not stop")
|
||||
}
|
||||
}
|
||||
|
||||
type fixedClock struct {
|
||||
now time.Time
|
||||
}
|
||||
|
||||
func (clock fixedClock) Now() time.Time {
|
||||
return clock.now
|
||||
}
|
||||
|
||||
type stubUserDirectory struct {
|
||||
records map[string]acceptintent.UserRecord
|
||||
err error
|
||||
}
|
||||
|
||||
func (directory stubUserDirectory) GetUserByID(_ context.Context, userID string) (acceptintent.UserRecord, error) {
|
||||
if directory.err != nil {
|
||||
return acceptintent.UserRecord{}, directory.err
|
||||
}
|
||||
if record, ok := directory.records[userID]; ok {
|
||||
return record, nil
|
||||
}
|
||||
|
||||
return acceptintent.UserRecord{}, acceptintent.ErrRecipientNotFound
|
||||
}
|
||||
@@ -0,0 +1,499 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/notification/internal/adapters/redisstate"
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/logging"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
"galaxy/notification/internal/service/publishpush"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultPushPublisherPollInterval = 100 * time.Millisecond
|
||||
defaultPushPublisherBatchSize = 64
|
||||
|
||||
pushFailureClassificationPayloadEncoding = "payload_encoding_failed"
|
||||
pushFailureClassificationGatewayStreamWrite = "gateway_stream_publish_failed"
|
||||
)
|
||||
|
||||
// PushRouteStateStore describes the durable route-state operations required by
|
||||
// PushPublisher.
|
||||
type PushRouteStateStore interface {
|
||||
// ListDueRoutes loads due scheduled routes.
|
||||
ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error)
|
||||
|
||||
// TryAcquireRouteLease attempts to acquire one temporary route lease.
|
||||
TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error)
|
||||
|
||||
// ReleaseRouteLease best-effort releases one temporary route lease.
|
||||
ReleaseRouteLease(context.Context, string, string, string) error
|
||||
|
||||
// GetNotification loads one accepted notification.
|
||||
GetNotification(context.Context, string) (acceptintent.NotificationRecord, bool, error)
|
||||
|
||||
// GetRoute loads one accepted notification route.
|
||||
GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error)
|
||||
|
||||
// CompleteRoutePublished records one successful publication.
|
||||
CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error
|
||||
|
||||
// CompleteRouteFailed records one retryable publication failure.
|
||||
CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error
|
||||
|
||||
// CompleteRouteDeadLetter records one exhausted publication failure.
|
||||
CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error
|
||||
}
|
||||
|
||||
// PushEventEncoder encodes one push-capable notification route into a
|
||||
// Gateway-compatible client event.
|
||||
type PushEventEncoder interface {
|
||||
// Encode converts notification plus route to one outbound event.
|
||||
Encode(acceptintent.NotificationRecord, acceptintent.NotificationRoute) (publishpush.Event, error)
|
||||
}
|
||||
|
||||
// RoutePublisherTelemetry records low-cardinality route publication outcomes.
|
||||
type RoutePublisherTelemetry interface {
|
||||
// RecordRoutePublishAttempt records one route publication attempt outcome.
|
||||
RecordRoutePublishAttempt(context.Context, string, string, string, string)
|
||||
|
||||
// RecordRouteRetry records one route retry scheduling event.
|
||||
RecordRouteRetry(context.Context, string, string)
|
||||
|
||||
// RecordRouteDeadLetter records one route transition to dead_letter.
|
||||
RecordRouteDeadLetter(context.Context, string, string, string)
|
||||
}
|
||||
|
||||
// PushPublisherConfig stores the dependencies and policies used by
|
||||
// PushPublisher.
|
||||
type PushPublisherConfig struct {
|
||||
// Store owns the durable route-state transitions.
|
||||
Store PushRouteStateStore
|
||||
|
||||
// GatewayStream stores the outbound Gateway client-events stream name.
|
||||
GatewayStream string
|
||||
|
||||
// GatewayStreamMaxLen bounds GatewayStream with approximate trimming.
|
||||
GatewayStreamMaxLen int64
|
||||
|
||||
// RouteLeaseTTL stores the temporary route-lease lifetime.
|
||||
RouteLeaseTTL time.Duration
|
||||
|
||||
// RouteBackoffMin stores the minimum retry backoff.
|
||||
RouteBackoffMin time.Duration
|
||||
|
||||
// RouteBackoffMax stores the maximum retry backoff.
|
||||
RouteBackoffMax time.Duration
|
||||
|
||||
// PollInterval stores how long the worker waits before the next due-route
|
||||
// scan when no progress was made.
|
||||
PollInterval time.Duration
|
||||
|
||||
// BatchSize stores the maximum number of due schedule members loaded per
|
||||
// scan.
|
||||
BatchSize int64
|
||||
|
||||
// Encoder stores the push payload encoder.
|
||||
Encoder PushEventEncoder
|
||||
|
||||
// Telemetry records route publication counters.
|
||||
Telemetry RoutePublisherTelemetry
|
||||
|
||||
// Clock provides wall-clock timestamps.
|
||||
Clock Clock
|
||||
}
|
||||
|
||||
// PushPublisher publishes due push routes into the Gateway client-events
|
||||
// stream with retry and dead-letter handling.
|
||||
type PushPublisher struct {
|
||||
store PushRouteStateStore
|
||||
gatewayStream string
|
||||
gatewayStreamMaxLen int64
|
||||
routeLeaseTTL time.Duration
|
||||
routeBackoffMin time.Duration
|
||||
routeBackoffMax time.Duration
|
||||
pollInterval time.Duration
|
||||
batchSize int64
|
||||
encoder PushEventEncoder
|
||||
telemetry RoutePublisherTelemetry
|
||||
clock Clock
|
||||
workerToken string
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
// NewPushPublisher constructs the push publication worker.
|
||||
func NewPushPublisher(cfg PushPublisherConfig, logger *slog.Logger) (*PushPublisher, error) {
|
||||
switch {
|
||||
case cfg.Store == nil:
|
||||
return nil, errors.New("new push publisher: nil store")
|
||||
case strings.TrimSpace(cfg.GatewayStream) == "":
|
||||
return nil, errors.New("new push publisher: gateway stream must not be empty")
|
||||
case cfg.GatewayStreamMaxLen <= 0:
|
||||
return nil, errors.New("new push publisher: gateway stream max len must be positive")
|
||||
case cfg.RouteLeaseTTL <= 0:
|
||||
return nil, errors.New("new push publisher: route lease ttl must be positive")
|
||||
case cfg.RouteBackoffMin <= 0:
|
||||
return nil, errors.New("new push publisher: route backoff min must be positive")
|
||||
case cfg.RouteBackoffMax <= 0:
|
||||
return nil, errors.New("new push publisher: route backoff max must be positive")
|
||||
case cfg.RouteBackoffMin > cfg.RouteBackoffMax:
|
||||
return nil, errors.New("new push publisher: route backoff min must not exceed route backoff max")
|
||||
}
|
||||
if cfg.PollInterval <= 0 {
|
||||
cfg.PollInterval = defaultPushPublisherPollInterval
|
||||
}
|
||||
if cfg.BatchSize <= 0 {
|
||||
cfg.BatchSize = defaultPushPublisherBatchSize
|
||||
}
|
||||
if cfg.Clock == nil {
|
||||
cfg.Clock = systemClock{}
|
||||
}
|
||||
if cfg.Encoder == nil {
|
||||
cfg.Encoder = publishpush.Encoder{}
|
||||
}
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
|
||||
workerToken, err := newWorkerToken()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new push publisher: %w", err)
|
||||
}
|
||||
|
||||
return &PushPublisher{
|
||||
store: cfg.Store,
|
||||
gatewayStream: cfg.GatewayStream,
|
||||
gatewayStreamMaxLen: cfg.GatewayStreamMaxLen,
|
||||
routeLeaseTTL: cfg.RouteLeaseTTL,
|
||||
routeBackoffMin: cfg.RouteBackoffMin,
|
||||
routeBackoffMax: cfg.RouteBackoffMax,
|
||||
pollInterval: cfg.PollInterval,
|
||||
batchSize: cfg.BatchSize,
|
||||
encoder: cfg.Encoder,
|
||||
telemetry: cfg.Telemetry,
|
||||
clock: cfg.Clock,
|
||||
workerToken: workerToken,
|
||||
logger: logger.With("component", "push_publisher", "stream", cfg.GatewayStream),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Run starts the push publication loop and blocks until ctx is canceled or an
|
||||
// unexpected publication error occurs.
|
||||
func (publisher *PushPublisher) Run(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("run push publisher: nil context")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if publisher == nil {
|
||||
return errors.New("run push publisher: nil publisher")
|
||||
}
|
||||
|
||||
publisher.logger.Info("push publisher started",
|
||||
"poll_interval", publisher.pollInterval.String(),
|
||||
"batch_size", publisher.batchSize,
|
||||
)
|
||||
|
||||
for {
|
||||
progress, err := publisher.publishDueRoutes(ctx)
|
||||
switch {
|
||||
case err == nil && progress:
|
||||
continue
|
||||
case err == nil:
|
||||
if waitErr := waitWithContext(ctx, publisher.pollInterval); waitErr != nil {
|
||||
publisher.logger.Info("push publisher stopped")
|
||||
return waitErr
|
||||
}
|
||||
case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)):
|
||||
publisher.logger.Info("push publisher stopped")
|
||||
return ctx.Err()
|
||||
default:
|
||||
return fmt.Errorf("run push publisher: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown stops the push publisher within ctx. The worker relies on context
|
||||
// cancellation and a bounded polling interval, so it has no dedicated
|
||||
// resources to release here.
|
||||
func (publisher *PushPublisher) Shutdown(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("shutdown push publisher: nil context")
|
||||
}
|
||||
if publisher == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (publisher *PushPublisher) publishDueRoutes(ctx context.Context) (bool, error) {
|
||||
now := publisher.now()
|
||||
|
||||
dueRoutes, err := publisher.store.ListDueRoutes(ctx, now, publisher.batchSize)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
progress := false
|
||||
for _, dueRoute := range dueRoutes {
|
||||
if !strings.HasPrefix(dueRoute.RouteID, "push:") {
|
||||
continue
|
||||
}
|
||||
|
||||
processed, err := publisher.publishRoute(ctx, now, dueRoute)
|
||||
if err != nil {
|
||||
return progress, err
|
||||
}
|
||||
progress = progress || processed
|
||||
}
|
||||
|
||||
return progress, nil
|
||||
}
|
||||
|
||||
func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) {
|
||||
acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err)
|
||||
}
|
||||
if !acquired {
|
||||
return false, nil
|
||||
}
|
||||
defer func() {
|
||||
releaseCtx, cancel := context.WithTimeout(context.Background(), publisher.routeLeaseTTL)
|
||||
defer cancel()
|
||||
_ = publisher.store.ReleaseRouteLease(releaseCtx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken)
|
||||
}()
|
||||
|
||||
notification, found, err := publisher.store.GetNotification(ctx, dueRoute.NotificationID)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("load notification %q: %w", dueRoute.NotificationID, err)
|
||||
}
|
||||
if !found {
|
||||
return false, fmt.Errorf("notification %q is missing for route %q", dueRoute.NotificationID, dueRoute.RouteID)
|
||||
}
|
||||
|
||||
route, found, err := publisher.store.GetRoute(ctx, dueRoute.NotificationID, dueRoute.RouteID)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("load route %q: %w", dueRoute.RouteID, err)
|
||||
}
|
||||
if !found {
|
||||
return false, fmt.Errorf("route %q is missing for notification %q", dueRoute.RouteID, dueRoute.NotificationID)
|
||||
}
|
||||
if route.Channel != intentstream.ChannelPush {
|
||||
return false, nil
|
||||
}
|
||||
switch route.Status {
|
||||
case acceptintent.RouteStatusPending, acceptintent.RouteStatusFailed:
|
||||
default:
|
||||
return false, nil
|
||||
}
|
||||
if route.NextAttemptAt.After(now) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
event, err := publisher.encoder.Encode(notification, route)
|
||||
if err != nil {
|
||||
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationPayloadEncoding, err.Error())
|
||||
}
|
||||
|
||||
err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
PublishedAt: publisher.now(),
|
||||
Stream: publisher.gatewayStream,
|
||||
StreamMaxLen: publisher.gatewayStreamMaxLen,
|
||||
StreamValues: eventValues(event),
|
||||
})
|
||||
switch {
|
||||
case err == nil:
|
||||
publisher.recordPublishAttempt(ctx, notification, route, "published", "")
|
||||
logArgs := logging.RouteAttrs(
|
||||
notification.NotificationID,
|
||||
notification.NotificationType,
|
||||
notification.Producer,
|
||||
notification.AudienceKind,
|
||||
notification.IdempotencyKey,
|
||||
notification.RequestID,
|
||||
notification.TraceID,
|
||||
route.RouteID,
|
||||
route.Channel,
|
||||
)
|
||||
logArgs = append(logArgs,
|
||||
"event_id", event.EventID,
|
||||
"user_id", event.UserID,
|
||||
)
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
publisher.logger.Info("push route published", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationGatewayStreamWrite, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func (publisher *PushPublisher) recordFailure(
|
||||
ctx context.Context,
|
||||
notification acceptintent.NotificationRecord,
|
||||
route acceptintent.NotificationRoute,
|
||||
classification string,
|
||||
message string,
|
||||
) (bool, error) {
|
||||
failureAt := publisher.now()
|
||||
attemptNumber := route.AttemptCount + 1
|
||||
logArgs := logging.RouteAttrs(
|
||||
notification.NotificationID,
|
||||
notification.NotificationType,
|
||||
notification.Producer,
|
||||
notification.AudienceKind,
|
||||
notification.IdempotencyKey,
|
||||
notification.RequestID,
|
||||
notification.TraceID,
|
||||
route.RouteID,
|
||||
route.Channel,
|
||||
)
|
||||
logArgs = append(logArgs,
|
||||
"failure_classification", classification,
|
||||
"failure_message", strings.TrimSpace(message),
|
||||
"attempt_number", attemptNumber,
|
||||
"max_attempts", route.MaxAttempts,
|
||||
)
|
||||
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
|
||||
|
||||
if attemptNumber >= route.MaxAttempts {
|
||||
err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
DeadLetteredAt: failureAt,
|
||||
FailureClassification: classification,
|
||||
FailureMessage: strings.TrimSpace(message),
|
||||
})
|
||||
switch {
|
||||
case err == nil:
|
||||
publisher.recordPublishAttempt(ctx, notification, route, "dead_letter", classification)
|
||||
publisher.recordRouteDeadLetter(ctx, notification, route, classification)
|
||||
publisher.logger.Warn("push route dead-lettered", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err)
|
||||
}
|
||||
}
|
||||
|
||||
nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond)
|
||||
err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{
|
||||
ExpectedRoute: route,
|
||||
LeaseToken: publisher.workerToken,
|
||||
FailedAt: failureAt,
|
||||
NextAttemptAt: nextAttemptAt,
|
||||
FailureClassification: classification,
|
||||
FailureMessage: strings.TrimSpace(message),
|
||||
})
|
||||
switch {
|
||||
case err == nil:
|
||||
publisher.recordPublishAttempt(ctx, notification, route, "retry", classification)
|
||||
publisher.recordRouteRetry(ctx, notification, route)
|
||||
logArgs = append(logArgs, "next_attempt_at", nextAttemptAt)
|
||||
publisher.logger.Warn("push route failed and was rescheduled", logArgs...)
|
||||
return true, nil
|
||||
case errors.Is(err, redisstate.ErrConflict):
|
||||
return false, nil
|
||||
default:
|
||||
return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err)
|
||||
}
|
||||
}
|
||||
|
||||
func eventValues(event publishpush.Event) map[string]any {
|
||||
values := map[string]any{
|
||||
"user_id": event.UserID,
|
||||
"event_type": event.EventType,
|
||||
"event_id": event.EventID,
|
||||
"payload_bytes": append([]byte(nil), event.PayloadBytes...),
|
||||
}
|
||||
if event.RequestID != "" {
|
||||
values["request_id"] = event.RequestID
|
||||
}
|
||||
if event.TraceID != "" {
|
||||
values["trace_id"] = event.TraceID
|
||||
}
|
||||
|
||||
return values
|
||||
}
|
||||
|
||||
func routeBackoffDelay(attemptNumber int, minBackoff time.Duration, maxBackoff time.Duration) time.Duration {
|
||||
delay := minBackoff
|
||||
for step := 1; step < attemptNumber; step++ {
|
||||
if delay >= maxBackoff/2 {
|
||||
return maxBackoff
|
||||
}
|
||||
delay *= 2
|
||||
}
|
||||
if delay < minBackoff {
|
||||
return minBackoff
|
||||
}
|
||||
if delay > maxBackoff {
|
||||
return maxBackoff
|
||||
}
|
||||
|
||||
return delay
|
||||
}
|
||||
|
||||
func waitWithContext(ctx context.Context, delay time.Duration) error {
|
||||
timer := time.NewTimer(delay)
|
||||
defer timer.Stop()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-timer.C:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func newWorkerToken() (string, error) {
|
||||
buffer := make([]byte, 16)
|
||||
if _, err := rand.Read(buffer); err != nil {
|
||||
return "", fmt.Errorf("generate worker token: %w", err)
|
||||
}
|
||||
|
||||
return hex.EncodeToString(buffer), nil
|
||||
}
|
||||
|
||||
func (publisher *PushPublisher) now() time.Time {
|
||||
return publisher.clock.Now().UTC().Truncate(time.Millisecond)
|
||||
}
|
||||
|
||||
func (publisher *PushPublisher) recordPublishAttempt(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, result string, classification string) {
|
||||
if publisher == nil || publisher.telemetry == nil {
|
||||
return
|
||||
}
|
||||
|
||||
publisher.telemetry.RecordRoutePublishAttempt(ctx, string(route.Channel), string(notification.NotificationType), result, classification)
|
||||
}
|
||||
|
||||
func (publisher *PushPublisher) recordRouteRetry(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) {
|
||||
if publisher == nil || publisher.telemetry == nil {
|
||||
return
|
||||
}
|
||||
|
||||
publisher.telemetry.RecordRouteRetry(ctx, string(route.Channel), string(notification.NotificationType))
|
||||
}
|
||||
|
||||
func (publisher *PushPublisher) recordRouteDeadLetter(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, classification string) {
|
||||
if publisher == nil || publisher.telemetry == nil {
|
||||
return
|
||||
}
|
||||
|
||||
publisher.telemetry.RecordRouteDeadLetter(ctx, string(route.Channel), string(notification.NotificationType), classification)
|
||||
}
|
||||
@@ -0,0 +1,318 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
redisstate "galaxy/notification/internal/adapters/redisstate"
|
||||
"galaxy/notification/internal/api/intentstream"
|
||||
"galaxy/notification/internal/service/acceptintent"
|
||||
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
"github.com/redis/go-redis/v9"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestPushPublisherPublishesDuePushRouteAndLeavesEmailRoutePending(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newPushPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
|
||||
|
||||
running := runPushPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
emailRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
|
||||
require.NoError(t, err)
|
||||
require.True(t, found)
|
||||
require.Equal(t, acceptintent.RouteStatusPending, emailRoute.Status)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.Equal(t, "user-1", messages[0].Values["user_id"])
|
||||
require.Equal(t, "game.turn.ready", messages[0].Values["event_type"])
|
||||
require.Equal(t, "1775121700000-0/push:user:user-1", messages[0].Values["event_id"])
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", ""))
|
||||
}
|
||||
|
||||
func TestPushPublisherRetriesGatewayStreamPublicationFailures(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newPushPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
|
||||
require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err())
|
||||
|
||||
running := runPushPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "retry", pushFailureClassificationGatewayStreamWrite))
|
||||
require.True(t, fixture.telemetry.hasRouteRetry("push"))
|
||||
|
||||
require.NoError(t, fixture.client.Del(context.Background(), fixture.gatewayStream).Err())
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2
|
||||
}, 2*time.Second, 10*time.Millisecond)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", ""))
|
||||
}
|
||||
|
||||
func TestPushPublisherDeadLettersExhaustedRoute(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newPushPublisherFixture(t)
|
||||
input := validPushAcceptanceInput(fixture.now)
|
||||
for index := range input.Routes {
|
||||
if input.Routes[index].RouteID == "push:user:user-1" {
|
||||
input.Routes[index].AttemptCount = 2
|
||||
input.Routes[index].MaxAttempts = 3
|
||||
}
|
||||
}
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), input))
|
||||
require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err())
|
||||
|
||||
running := runPushPublisher(t, fixture.publisher)
|
||||
defer running.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 3
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "push:user:user-1")).Bytes()
|
||||
require.NoError(t, err)
|
||||
deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, pushFailureClassificationGatewayStreamWrite, deadLetter.FailureClassification)
|
||||
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "dead_letter", pushFailureClassificationGatewayStreamWrite))
|
||||
require.True(t, fixture.telemetry.hasRouteDeadLetter("push", pushFailureClassificationGatewayStreamWrite))
|
||||
}
|
||||
|
||||
func TestPushPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
fixture := newPushPublisherFixture(t)
|
||||
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
|
||||
|
||||
otherPublisher, err := NewPushPublisher(PushPublisherConfig{
|
||||
Store: fixture.store,
|
||||
GatewayStream: fixture.gatewayStream,
|
||||
GatewayStreamMaxLen: 1024,
|
||||
RouteLeaseTTL: 200 * time.Millisecond,
|
||||
RouteBackoffMin: 20 * time.Millisecond,
|
||||
RouteBackoffMax: 20 * time.Millisecond,
|
||||
PollInterval: 10 * time.Millisecond,
|
||||
BatchSize: 16,
|
||||
Clock: newSteppingClock(fixture.now, time.Millisecond),
|
||||
}, testWorkerLogger())
|
||||
require.NoError(t, err)
|
||||
|
||||
first := runPushPublisher(t, fixture.publisher)
|
||||
defer first.stop(t)
|
||||
second := runPushPublisher(t, otherPublisher)
|
||||
defer second.stop(t)
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
|
||||
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
|
||||
}, time.Second, 10*time.Millisecond)
|
||||
|
||||
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
}
|
||||
|
||||
type pushPublisherFixture struct {
|
||||
client *redis.Client
|
||||
store *redisstate.AcceptanceStore
|
||||
publisher *PushPublisher
|
||||
gatewayStream string
|
||||
now time.Time
|
||||
clock *steppingClock
|
||||
telemetry *recordingWorkerTelemetry
|
||||
}
|
||||
|
||||
func newPushPublisherFixture(t *testing.T) pushPublisherFixture {
|
||||
t.Helper()
|
||||
|
||||
server := miniredis.RunT(t)
|
||||
client := redis.NewClient(&redis.Options{
|
||||
Addr: server.Addr(),
|
||||
Protocol: 2,
|
||||
DisableIdentity: true,
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
assert.NoError(t, client.Close())
|
||||
})
|
||||
|
||||
store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
|
||||
RecordTTL: 24 * time.Hour,
|
||||
DeadLetterTTL: 72 * time.Hour,
|
||||
IdempotencyTTL: 7 * 24 * time.Hour,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.UnixMilli(1775121700000).UTC()
|
||||
clock := newSteppingClock(now, time.Millisecond)
|
||||
telemetry := &recordingWorkerTelemetry{}
|
||||
publisher, err := NewPushPublisher(PushPublisherConfig{
|
||||
Store: store,
|
||||
GatewayStream: "gateway:client-events",
|
||||
GatewayStreamMaxLen: 1024,
|
||||
RouteLeaseTTL: 200 * time.Millisecond,
|
||||
RouteBackoffMin: 20 * time.Millisecond,
|
||||
RouteBackoffMax: 20 * time.Millisecond,
|
||||
PollInterval: 10 * time.Millisecond,
|
||||
BatchSize: 16,
|
||||
Telemetry: telemetry,
|
||||
Clock: clock,
|
||||
}, testWorkerLogger())
|
||||
require.NoError(t, err)
|
||||
|
||||
return pushPublisherFixture{
|
||||
client: client,
|
||||
store: store,
|
||||
publisher: publisher,
|
||||
gatewayStream: "gateway:client-events",
|
||||
now: now,
|
||||
clock: clock,
|
||||
telemetry: telemetry,
|
||||
}
|
||||
}
|
||||
|
||||
func validPushAcceptanceInput(now time.Time) acceptintent.CreateAcceptanceInput {
|
||||
return acceptintent.CreateAcceptanceInput{
|
||||
Notification: acceptintent.NotificationRecord{
|
||||
NotificationID: "1775121700000-0",
|
||||
NotificationType: intentstream.NotificationTypeGameTurnReady,
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
AudienceKind: intentstream.AudienceKindUser,
|
||||
RecipientUserIDs: []string{"user-1"},
|
||||
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
|
||||
IdempotencyKey: "game-123:turn-54",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
RequestID: "request-1",
|
||||
TraceID: "trace-1",
|
||||
OccurredAt: now,
|
||||
AcceptedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
Routes: []acceptintent.NotificationRoute{
|
||||
{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "push:user:user-1",
|
||||
Channel: intentstream.ChannelPush,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
AttemptCount: 0,
|
||||
MaxAttempts: 3,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "pilot@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
{
|
||||
NotificationID: "1775121700000-0",
|
||||
RouteID: "email:user:user-1",
|
||||
Channel: intentstream.ChannelEmail,
|
||||
RecipientRef: "user:user-1",
|
||||
Status: acceptintent.RouteStatusPending,
|
||||
AttemptCount: 0,
|
||||
MaxAttempts: 7,
|
||||
NextAttemptAt: now,
|
||||
ResolvedEmail: "pilot@example.com",
|
||||
ResolvedLocale: "en",
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
},
|
||||
},
|
||||
Idempotency: acceptintent.IdempotencyRecord{
|
||||
Producer: intentstream.ProducerGameMaster,
|
||||
IdempotencyKey: "game-123:turn-54",
|
||||
NotificationID: "1775121700000-0",
|
||||
RequestFingerprint: "sha256:deadbeef",
|
||||
CreatedAt: now,
|
||||
ExpiresAt: now.Add(7 * 24 * time.Hour),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
type runningPushPublisher struct {
|
||||
cancel context.CancelFunc
|
||||
resultCh chan error
|
||||
}
|
||||
|
||||
func runPushPublisher(t *testing.T, publisher *PushPublisher) runningPushPublisher {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- publisher.Run(ctx)
|
||||
}()
|
||||
|
||||
return runningPushPublisher{
|
||||
cancel: cancel,
|
||||
resultCh: resultCh,
|
||||
}
|
||||
}
|
||||
|
||||
func (r runningPushPublisher) stop(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
r.cancel()
|
||||
|
||||
select {
|
||||
case err := <-r.resultCh:
|
||||
require.ErrorIs(t, err, context.Canceled)
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "push publisher did not stop")
|
||||
}
|
||||
}
|
||||
|
||||
type steppingClock struct {
|
||||
mu sync.Mutex
|
||||
current time.Time
|
||||
step time.Duration
|
||||
}
|
||||
|
||||
func newSteppingClock(start time.Time, step time.Duration) *steppingClock {
|
||||
return &steppingClock{
|
||||
current: start.UTC().Truncate(time.Millisecond),
|
||||
step: step,
|
||||
}
|
||||
}
|
||||
|
||||
func (clock *steppingClock) Now() time.Time {
|
||||
clock.mu.Lock()
|
||||
defer clock.mu.Unlock()
|
||||
|
||||
now := clock.current
|
||||
clock.current = clock.current.Add(clock.step).UTC().Truncate(time.Millisecond)
|
||||
|
||||
return now
|
||||
}
|
||||
|
||||
func testWorkerLogger() *slog.Logger {
|
||||
return slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
}
|
||||
@@ -0,0 +1,184 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type recordingWorkerTelemetry struct {
|
||||
mu sync.Mutex
|
||||
|
||||
intentOutcomes []intentOutcomeTelemetryRecord
|
||||
malformedIntents []malformedIntentTelemetryRecord
|
||||
userEnrichment []userEnrichmentTelemetryRecord
|
||||
routePublishAttempts []routePublishTelemetryRecord
|
||||
routeRetries []routeTelemetryRecord
|
||||
routeDeadLetters []routeDeadLetterTelemetryRecord
|
||||
}
|
||||
|
||||
func (telemetry *recordingWorkerTelemetry) RecordIntentOutcome(_ context.Context, notificationType string, producer string, audienceKind string, outcome string) {
|
||||
telemetry.mu.Lock()
|
||||
defer telemetry.mu.Unlock()
|
||||
|
||||
telemetry.intentOutcomes = append(telemetry.intentOutcomes, intentOutcomeTelemetryRecord{
|
||||
notificationType: notificationType,
|
||||
producer: producer,
|
||||
audienceKind: audienceKind,
|
||||
outcome: outcome,
|
||||
})
|
||||
}
|
||||
|
||||
func (telemetry *recordingWorkerTelemetry) RecordMalformedIntent(_ context.Context, failureCode string, notificationType string, producer string) {
|
||||
telemetry.mu.Lock()
|
||||
defer telemetry.mu.Unlock()
|
||||
|
||||
telemetry.malformedIntents = append(telemetry.malformedIntents, malformedIntentTelemetryRecord{
|
||||
failureCode: failureCode,
|
||||
notificationType: notificationType,
|
||||
producer: producer,
|
||||
})
|
||||
}
|
||||
|
||||
func (telemetry *recordingWorkerTelemetry) RecordUserEnrichmentAttempt(_ context.Context, notificationType string, result string) {
|
||||
telemetry.mu.Lock()
|
||||
defer telemetry.mu.Unlock()
|
||||
|
||||
telemetry.userEnrichment = append(telemetry.userEnrichment, userEnrichmentTelemetryRecord{
|
||||
notificationType: notificationType,
|
||||
result: result,
|
||||
})
|
||||
}
|
||||
|
||||
func (telemetry *recordingWorkerTelemetry) RecordRoutePublishAttempt(_ context.Context, channel string, notificationType string, result string, failureClassification string) {
|
||||
telemetry.mu.Lock()
|
||||
defer telemetry.mu.Unlock()
|
||||
|
||||
telemetry.routePublishAttempts = append(telemetry.routePublishAttempts, routePublishTelemetryRecord{
|
||||
channel: channel,
|
||||
notificationType: notificationType,
|
||||
result: result,
|
||||
failureClassification: failureClassification,
|
||||
})
|
||||
}
|
||||
|
||||
func (telemetry *recordingWorkerTelemetry) RecordRouteRetry(_ context.Context, channel string, notificationType string) {
|
||||
telemetry.mu.Lock()
|
||||
defer telemetry.mu.Unlock()
|
||||
|
||||
telemetry.routeRetries = append(telemetry.routeRetries, routeTelemetryRecord{
|
||||
channel: channel,
|
||||
notificationType: notificationType,
|
||||
})
|
||||
}
|
||||
|
||||
func (telemetry *recordingWorkerTelemetry) RecordRouteDeadLetter(_ context.Context, channel string, notificationType string, failureClassification string) {
|
||||
telemetry.mu.Lock()
|
||||
defer telemetry.mu.Unlock()
|
||||
|
||||
telemetry.routeDeadLetters = append(telemetry.routeDeadLetters, routeDeadLetterTelemetryRecord{
|
||||
channel: channel,
|
||||
notificationType: notificationType,
|
||||
failureClassification: failureClassification,
|
||||
})
|
||||
}
|
||||
|
||||
func (telemetry *recordingWorkerTelemetry) hasIntentOutcome(outcome string) bool {
|
||||
telemetry.mu.Lock()
|
||||
defer telemetry.mu.Unlock()
|
||||
|
||||
for _, record := range telemetry.intentOutcomes {
|
||||
if record.outcome == outcome {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (telemetry *recordingWorkerTelemetry) hasMalformedIntent(failureCode string) bool {
|
||||
telemetry.mu.Lock()
|
||||
defer telemetry.mu.Unlock()
|
||||
|
||||
for _, record := range telemetry.malformedIntents {
|
||||
if record.failureCode == failureCode {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (telemetry *recordingWorkerTelemetry) hasRoutePublishAttempt(channel string, result string, failureClassification string) bool {
|
||||
telemetry.mu.Lock()
|
||||
defer telemetry.mu.Unlock()
|
||||
|
||||
for _, record := range telemetry.routePublishAttempts {
|
||||
if record.channel == channel && record.result == result && record.failureClassification == failureClassification {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (telemetry *recordingWorkerTelemetry) hasRouteRetry(channel string) bool {
|
||||
telemetry.mu.Lock()
|
||||
defer telemetry.mu.Unlock()
|
||||
|
||||
for _, record := range telemetry.routeRetries {
|
||||
if record.channel == channel {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (telemetry *recordingWorkerTelemetry) hasRouteDeadLetter(channel string, failureClassification string) bool {
|
||||
telemetry.mu.Lock()
|
||||
defer telemetry.mu.Unlock()
|
||||
|
||||
for _, record := range telemetry.routeDeadLetters {
|
||||
if record.channel == channel && record.failureClassification == failureClassification {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
type intentOutcomeTelemetryRecord struct {
|
||||
notificationType string
|
||||
producer string
|
||||
audienceKind string
|
||||
outcome string
|
||||
}
|
||||
|
||||
type malformedIntentTelemetryRecord struct {
|
||||
failureCode string
|
||||
notificationType string
|
||||
producer string
|
||||
}
|
||||
|
||||
type userEnrichmentTelemetryRecord struct {
|
||||
notificationType string
|
||||
result string
|
||||
}
|
||||
|
||||
type routePublishTelemetryRecord struct {
|
||||
channel string
|
||||
notificationType string
|
||||
result string
|
||||
failureClassification string
|
||||
}
|
||||
|
||||
type routeTelemetryRecord struct {
|
||||
channel string
|
||||
notificationType string
|
||||
}
|
||||
|
||||
type routeDeadLetterTelemetryRecord struct {
|
||||
channel string
|
||||
notificationType string
|
||||
failureClassification string
|
||||
}
|
||||
Reference in New Issue
Block a user