feat: notification service

This commit is contained in:
Ilia Denisov
2026-04-22 08:49:45 +02:00
committed by GitHub
parent 5b7593e6f6
commit 32dc29359a
135 changed files with 21828 additions and 130 deletions
+2
View File
@@ -0,0 +1,2 @@
// Package adapters reserves the adapter namespace of Notification Service.
package adapters
@@ -0,0 +1,72 @@
// Package redisadapter provides the Redis client helpers used by Notification
// Service runtime wiring.
package redisadapter
import (
"context"
"fmt"
"galaxy/notification/internal/config"
"galaxy/notification/internal/telemetry"
"github.com/redis/go-redis/extra/redisotel/v9"
"github.com/redis/go-redis/v9"
)
// NewClient constructs one Redis client from cfg.
func NewClient(cfg config.RedisConfig) *redis.Client {
return redis.NewClient(&redis.Options{
Addr: cfg.Addr,
Username: cfg.Username,
Password: cfg.Password,
DB: cfg.DB,
TLSConfig: cfg.TLSConfig(),
DialTimeout: cfg.OperationTimeout,
ReadTimeout: cfg.OperationTimeout,
WriteTimeout: cfg.OperationTimeout,
})
}
// InstrumentClient attaches Redis tracing and metrics exporters to client when
// telemetryRuntime is available.
func InstrumentClient(client *redis.Client, telemetryRuntime *telemetry.Runtime) error {
if client == nil {
return fmt.Errorf("instrument redis client: nil client")
}
if telemetryRuntime == nil {
return nil
}
if err := redisotel.InstrumentTracing(
client,
redisotel.WithTracerProvider(telemetryRuntime.TracerProvider()),
redisotel.WithDBStatement(false),
); err != nil {
return fmt.Errorf("instrument redis client tracing: %w", err)
}
if err := redisotel.InstrumentMetrics(
client,
redisotel.WithMeterProvider(telemetryRuntime.MeterProvider()),
); err != nil {
return fmt.Errorf("instrument redis client metrics: %w", err)
}
return nil
}
// Ping performs the startup Redis connectivity check bounded by
// cfg.OperationTimeout.
func Ping(ctx context.Context, cfg config.RedisConfig, client *redis.Client) error {
if client == nil {
return fmt.Errorf("ping redis: nil client")
}
pingCtx, cancel := context.WithTimeout(ctx, cfg.OperationTimeout)
defer cancel()
if err := client.Ping(pingCtx).Err(); err != nil {
return fmt.Errorf("ping redis: %w", err)
}
return nil
}
@@ -0,0 +1,140 @@
package redisstate
import (
"context"
"errors"
"fmt"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"github.com/redis/go-redis/v9"
)
// AcceptanceStore provides the Redis-backed durable storage used by the
// intent-acceptance use case.
type AcceptanceStore struct {
client *redis.Client
writer *AtomicWriter
keys Keyspace
cfg AcceptanceConfig
}
// NewAcceptanceStore constructs one Redis-backed acceptance store.
func NewAcceptanceStore(client *redis.Client, cfg AcceptanceConfig) (*AcceptanceStore, error) {
if client == nil {
return nil, errors.New("new notification acceptance store: nil redis client")
}
writer, err := NewAtomicWriter(client, cfg)
if err != nil {
return nil, fmt.Errorf("new notification acceptance store: %w", err)
}
return &AcceptanceStore{
client: client,
writer: writer,
keys: Keyspace{},
cfg: cfg,
}, nil
}
// CreateAcceptance stores one complete accepted notification write set in
// Redis.
func (store *AcceptanceStore) CreateAcceptance(ctx context.Context, input acceptintent.CreateAcceptanceInput) error {
if store == nil || store.client == nil || store.writer == nil {
return errors.New("create notification acceptance: nil store")
}
if ctx == nil {
return errors.New("create notification acceptance: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("create notification acceptance: %w", err)
}
err := store.writer.CreateAcceptance(ctx, input)
if errors.Is(err, ErrConflict) {
return fmt.Errorf("create notification acceptance: %w", acceptintent.ErrConflict)
}
if err != nil {
return fmt.Errorf("create notification acceptance: %w", err)
}
return nil
}
// GetIdempotency loads one accepted idempotency scope from Redis.
func (store *AcceptanceStore) GetIdempotency(ctx context.Context, producer intentstream.Producer, idempotencyKey string) (acceptintent.IdempotencyRecord, bool, error) {
if store == nil || store.client == nil {
return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil store")
}
if ctx == nil {
return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil context")
}
payload, err := store.client.Get(ctx, store.keys.Idempotency(producer, idempotencyKey)).Bytes()
switch {
case errors.Is(err, redis.Nil):
return acceptintent.IdempotencyRecord{}, false, nil
case err != nil:
return acceptintent.IdempotencyRecord{}, false, fmt.Errorf("get notification idempotency: %w", err)
}
record, err := UnmarshalIdempotency(payload)
if err != nil {
return acceptintent.IdempotencyRecord{}, false, fmt.Errorf("get notification idempotency: %w", err)
}
return record, true, nil
}
// GetNotification loads one accepted notification record from Redis.
func (store *AcceptanceStore) GetNotification(ctx context.Context, notificationID string) (acceptintent.NotificationRecord, bool, error) {
if store == nil || store.client == nil {
return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil store")
}
if ctx == nil {
return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil context")
}
payload, err := store.client.Get(ctx, store.keys.Notification(notificationID)).Bytes()
switch {
case errors.Is(err, redis.Nil):
return acceptintent.NotificationRecord{}, false, nil
case err != nil:
return acceptintent.NotificationRecord{}, false, fmt.Errorf("get notification record: %w", err)
}
record, err := UnmarshalNotification(payload)
if err != nil {
return acceptintent.NotificationRecord{}, false, fmt.Errorf("get notification record: %w", err)
}
return record, true, nil
}
// GetRoute loads one accepted notification route by NotificationID and
// RouteID.
func (store *AcceptanceStore) GetRoute(ctx context.Context, notificationID string, routeID string) (acceptintent.NotificationRoute, bool, error) {
if store == nil || store.client == nil {
return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil store")
}
if ctx == nil {
return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil context")
}
payload, err := store.client.Get(ctx, store.keys.Route(notificationID, routeID)).Bytes()
switch {
case errors.Is(err, redis.Nil):
return acceptintent.NotificationRoute{}, false, nil
case err != nil:
return acceptintent.NotificationRoute{}, false, fmt.Errorf("get notification route: %w", err)
}
record, err := UnmarshalRoute(payload)
if err != nil {
return acceptintent.NotificationRoute{}, false, fmt.Errorf("get notification route: %w", err)
}
return record, true, nil
}
@@ -0,0 +1,311 @@
package redisstate
import (
"context"
"io"
"log/slog"
"testing"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/config"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/malformedintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/require"
)
func TestAcceptanceStoreCreateAcceptancePersistsNotificationRoutesAndSchedule(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validAdminAcceptanceInput(now)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
notificationRecord, found, err := store.GetNotification(context.Background(), input.Notification.NotificationID)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, input.Notification.NotificationID, notificationRecord.NotificationID)
idempotencyRecord, found, err := store.GetIdempotency(context.Background(), input.Idempotency.Producer, input.Idempotency.IdempotencyKey)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, input.Idempotency.RequestFingerprint, idempotencyRecord.RequestFingerprint)
pushRoutePayload, err := client.Get(context.Background(), Keyspace{}.Route(input.Notification.NotificationID, "push:email:owner@example.com")).Bytes()
require.NoError(t, err)
pushRoute, err := UnmarshalRoute(pushRoutePayload)
require.NoError(t, err)
require.Equal(t, acceptintent.RouteStatusSkipped, pushRoute.Status)
emailRouteKey := Keyspace{}.Route(input.Notification.NotificationID, "email:email:owner@example.com")
emailRoutePayload, err := client.Get(context.Background(), emailRouteKey).Bytes()
require.NoError(t, err)
emailRoute, err := UnmarshalRoute(emailRoutePayload)
require.NoError(t, err)
require.Equal(t, acceptintent.RouteStatusPending, emailRoute.Status)
scheduled, err := client.ZRangeWithScores(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
require.NoError(t, err)
require.Len(t, scheduled, 1)
require.Equal(t, emailRouteKey, scheduled[0].Member)
require.Equal(t, float64(now.UnixMilli()), scheduled[0].Score)
notificationTTL, err := client.PTTL(context.Background(), Keyspace{}.Notification(input.Notification.NotificationID)).Result()
require.NoError(t, err)
require.Greater(t, notificationTTL, 23*time.Hour)
require.LessOrEqual(t, notificationTTL, 24*time.Hour)
routeTTL, err := client.PTTL(context.Background(), emailRouteKey).Result()
require.NoError(t, err)
require.Greater(t, routeTTL, 23*time.Hour)
require.LessOrEqual(t, routeTTL, 24*time.Hour)
idempotencyTTL, err := client.PTTL(context.Background(), Keyspace{}.Idempotency(input.Idempotency.Producer, input.Idempotency.IdempotencyKey)).Result()
require.NoError(t, err)
require.Greater(t, idempotencyTTL, 6*24*time.Hour)
require.LessOrEqual(t, idempotencyTTL, 7*24*time.Hour)
}
func TestMalformedIntentStoreRecordPersistsEntry(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewMalformedIntentStore(client, 72*time.Hour)
require.NoError(t, err)
entry := malformedintent.Entry{
StreamEntryID: "1775121700000-0",
NotificationType: "game.turn.ready",
Producer: "game_master",
IdempotencyKey: "game-123:turn-54",
FailureCode: malformedintent.FailureCodeInvalidPayload,
FailureMessage: "payload_json.turn_number is required",
RawFields: map[string]any{
"notification_type": "game.turn.ready",
},
RecordedAt: time.UnixMilli(1775121700000).UTC(),
}
require.NoError(t, store.Record(context.Background(), entry))
payload, err := client.Get(context.Background(), Keyspace{}.MalformedIntent(entry.StreamEntryID)).Bytes()
require.NoError(t, err)
recordedEntry, err := UnmarshalMalformedIntent(payload)
require.NoError(t, err)
require.Equal(t, entry.StreamEntryID, recordedEntry.StreamEntryID)
require.Equal(t, entry.FailureCode, recordedEntry.FailureCode)
ttl, err := client.PTTL(context.Background(), Keyspace{}.MalformedIntent(entry.StreamEntryID)).Result()
require.NoError(t, err)
require.Greater(t, ttl, 71*time.Hour)
require.LessOrEqual(t, ttl, 72*time.Hour)
}
func TestStreamOffsetStoreLoadAndSave(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewStreamOffsetStore(client)
require.NoError(t, err)
_, found, err := store.Load(context.Background(), "notification:intents")
require.NoError(t, err)
require.False(t, found)
require.NoError(t, store.Save(context.Background(), "notification:intents", "1775121700000-0"))
entryID, found, err := store.Load(context.Background(), "notification:intents")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, "1775121700000-0", entryID)
}
func TestIntentStreamLagReaderReadsOldestUnprocessedEntry(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewStreamOffsetStore(client)
require.NoError(t, err)
reader, err := NewIntentStreamLagReader(store, "notification:intents")
require.NoError(t, err)
firstID, err := client.XAdd(context.Background(), &redis.XAddArgs{
Stream: "notification:intents",
ID: "1775121700000-0",
Values: map[string]any{"payload": "first"},
}).Result()
require.NoError(t, err)
secondID, err := client.XAdd(context.Background(), &redis.XAddArgs{
Stream: "notification:intents",
ID: "1775121701000-0",
Values: map[string]any{"payload": "second"},
}).Result()
require.NoError(t, err)
snapshot, err := reader.ReadIntentStreamLagSnapshot(context.Background())
require.NoError(t, err)
require.NotNil(t, snapshot.OldestUnprocessedAt)
require.Equal(t, time.UnixMilli(1775121700000).UTC(), *snapshot.OldestUnprocessedAt)
require.NoError(t, store.Save(context.Background(), "notification:intents", firstID))
snapshot, err = reader.ReadIntentStreamLagSnapshot(context.Background())
require.NoError(t, err)
require.NotNil(t, snapshot.OldestUnprocessedAt)
require.Equal(t, time.UnixMilli(1775121701000).UTC(), *snapshot.OldestUnprocessedAt)
require.NoError(t, store.Save(context.Background(), "notification:intents", secondID))
snapshot, err = reader.ReadIntentStreamLagSnapshot(context.Background())
require.NoError(t, err)
require.Nil(t, snapshot.OldestUnprocessedAt)
}
func TestAcceptanceStoreWorksWithAcceptIntentService(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
service, err := acceptintent.New(acceptintent.Config{
Store: store,
UserDirectory: staticUserDirectory{},
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
AdminRouting: config.AdminRoutingConfig{
LobbyApplicationSubmitted: []string{"owner@example.com"},
},
})
require.NoError(t, err)
result, err := service.Execute(context.Background(), acceptintent.AcceptInput{
NotificationID: "1775121700000-0",
Intent: intentstream.Intent{
NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
Producer: intentstream.ProducerGameLobby,
AudienceKind: intentstream.AudienceKindAdminEmail,
IdempotencyKey: "game-456:application-submitted:user-42",
OccurredAt: time.UnixMilli(1775121700002).UTC(),
PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`,
},
})
require.NoError(t, err)
require.Equal(t, acceptintent.OutcomeAccepted, result.Outcome)
record, found, err := store.GetNotification(context.Background(), "1775121700000-0")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, "1775121700000-0", record.NotificationID)
}
type fixedClock struct {
now time.Time
}
func (clock fixedClock) Now() time.Time {
return clock.now
}
func validAdminAcceptanceInput(now time.Time) acceptintent.CreateAcceptanceInput {
return acceptintent.CreateAcceptanceInput{
Notification: acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
Producer: intentstream.ProducerGameLobby,
AudienceKind: intentstream.AudienceKindAdminEmail,
PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`,
IdempotencyKey: "game-456:application-submitted:user-42",
RequestFingerprint: "sha256:deadbeef",
OccurredAt: now,
AcceptedAt: now,
UpdatedAt: now,
},
Routes: []acceptintent.NotificationRoute{
{
NotificationID: "1775121700000-0",
RouteID: "push:email:owner@example.com",
Channel: intentstream.ChannelPush,
RecipientRef: "email:owner@example.com",
Status: acceptintent.RouteStatusSkipped,
AttemptCount: 0,
MaxAttempts: 3,
ResolvedEmail: "owner@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
SkippedAt: now,
},
{
NotificationID: "1775121700000-0",
RouteID: "email:email:owner@example.com",
Channel: intentstream.ChannelEmail,
RecipientRef: "email:owner@example.com",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "owner@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
},
Idempotency: acceptintent.IdempotencyRecord{
Producer: intentstream.ProducerGameLobby,
IdempotencyKey: "game-456:application-submitted:user-42",
NotificationID: "1775121700000-0",
RequestFingerprint: "sha256:deadbeef",
CreatedAt: now,
ExpiresAt: now.Add(7 * 24 * time.Hour),
},
}
}
func newTestRedisClient(t *testing.T, server *miniredis.Miniredis) *redis.Client {
t.Helper()
client := redis.NewClient(&redis.Options{
Addr: server.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
require.NoError(t, client.Close())
})
return client
}
type staticUserDirectory struct{}
func (staticUserDirectory) GetUserByID(context.Context, string) (acceptintent.UserRecord, error) {
return acceptintent.UserRecord{}, acceptintent.ErrRecipientNotFound
}
@@ -0,0 +1,157 @@
package redisstate
import (
"context"
"errors"
"fmt"
"time"
"galaxy/notification/internal/service/acceptintent"
"github.com/redis/go-redis/v9"
)
// AcceptanceConfig stores the retention settings applied to accepted durable
// notification state.
type AcceptanceConfig struct {
// RecordTTL stores the retention period applied to notification and route
// records.
RecordTTL time.Duration
// DeadLetterTTL stores the retention period applied to route dead-letter
// entries.
DeadLetterTTL time.Duration
// IdempotencyTTL stores the retention period applied to idempotency
// reservations.
IdempotencyTTL time.Duration
}
// Validate reports whether cfg contains usable retention settings.
func (cfg AcceptanceConfig) Validate() error {
switch {
case cfg.RecordTTL <= 0:
return fmt.Errorf("record ttl must be positive")
case cfg.DeadLetterTTL <= 0:
return fmt.Errorf("dead-letter ttl must be positive")
case cfg.IdempotencyTTL <= 0:
return fmt.Errorf("idempotency ttl must be positive")
default:
return nil
}
}
// AtomicWriter performs the minimal multi-key Redis mutations required by
// notification intent acceptance.
type AtomicWriter struct {
client *redis.Client
keys Keyspace
cfg AcceptanceConfig
}
// NewAtomicWriter constructs a low-level Redis mutation helper.
func NewAtomicWriter(client *redis.Client, cfg AcceptanceConfig) (*AtomicWriter, error) {
if client == nil {
return nil, errors.New("new notification redis atomic writer: nil client")
}
if err := cfg.Validate(); err != nil {
return nil, fmt.Errorf("new notification redis atomic writer: %w", err)
}
return &AtomicWriter{
client: client,
keys: Keyspace{},
cfg: cfg,
}, nil
}
// CreateAcceptance stores one notification record, all derived routes, and
// the matching idempotency reservation in one optimistic Redis transaction.
func (writer *AtomicWriter) CreateAcceptance(ctx context.Context, input acceptintent.CreateAcceptanceInput) error {
if writer == nil || writer.client == nil {
return errors.New("create notification acceptance in redis: nil writer")
}
if ctx == nil {
return errors.New("create notification acceptance in redis: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("create notification acceptance in redis: %w", err)
}
notificationPayload, err := MarshalNotification(input.Notification)
if err != nil {
return fmt.Errorf("create notification acceptance in redis: %w", err)
}
idempotencyPayload, err := MarshalIdempotency(input.Idempotency)
if err != nil {
return fmt.Errorf("create notification acceptance in redis: %w", err)
}
routePayloads := make([][]byte, len(input.Routes))
routeKeys := make([]string, len(input.Routes))
scheduledRouteKeys := make([]string, 0, len(input.Routes))
scheduledRouteScores := make([]float64, 0, len(input.Routes))
for index, route := range input.Routes {
payload, err := MarshalRoute(route)
if err != nil {
return fmt.Errorf("create notification acceptance in redis: route %d: %w", index, err)
}
routePayloads[index] = payload
routeKeys[index] = writer.keys.Route(route.NotificationID, route.RouteID)
if route.Status == acceptintent.RouteStatusPending {
scheduledRouteKeys = append(scheduledRouteKeys, routeKeys[index])
scheduledRouteScores = append(scheduledRouteScores, float64(route.NextAttemptAt.UTC().UnixMilli()))
}
}
notificationKey := writer.keys.Notification(input.Notification.NotificationID)
idempotencyKey := writer.keys.Idempotency(input.Idempotency.Producer, input.Idempotency.IdempotencyKey)
watchKeys := append([]string{notificationKey, idempotencyKey}, routeKeys...)
watchErr := writer.client.Watch(ctx, func(tx *redis.Tx) error {
for _, key := range watchKeys {
if err := ensureKeyAbsent(ctx, tx, key); err != nil {
return err
}
}
_, err := tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
pipe.Set(ctx, notificationKey, notificationPayload, writer.cfg.RecordTTL)
pipe.Set(ctx, idempotencyKey, idempotencyPayload, writer.cfg.IdempotencyTTL)
for index, routeKey := range routeKeys {
pipe.Set(ctx, routeKey, routePayloads[index], writer.cfg.RecordTTL)
}
for index, routeKey := range scheduledRouteKeys {
pipe.ZAdd(ctx, writer.keys.RouteSchedule(), redis.Z{
Score: scheduledRouteScores[index],
Member: routeKey,
})
}
return nil
})
return err
}, watchKeys...)
switch {
case errors.Is(watchErr, ErrConflict), errors.Is(watchErr, redis.TxFailedErr):
return ErrConflict
case watchErr != nil:
return fmt.Errorf("create notification acceptance in redis: %w", watchErr)
default:
return nil
}
}
func ensureKeyAbsent(ctx context.Context, tx *redis.Tx, key string) error {
exists, err := tx.Exists(ctx, key).Result()
if err != nil {
return err
}
if exists > 0 {
return ErrConflict
}
return nil
}
@@ -0,0 +1,547 @@
package redisstate
import (
"bytes"
"encoding/json"
"fmt"
"io"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/malformedintent"
)
// StreamOffset stores the persisted progress of the plain-XREAD intent
// consumer.
type StreamOffset struct {
// Stream stores the Redis Stream name.
Stream string
// LastProcessedEntryID stores the last durably processed Redis Stream entry
// identifier.
LastProcessedEntryID string
// UpdatedAt stores when the offset record was last updated.
UpdatedAt time.Time
}
// DeadLetterEntry stores one terminal route-publication failure recorded for
// later operator inspection.
type DeadLetterEntry struct {
// NotificationID stores the owning notification identifier.
NotificationID string
// RouteID stores the exhausted route identifier.
RouteID string
// Channel stores the failed route channel.
Channel intentstream.Channel
// RecipientRef stores the stable failed recipient slot identifier.
RecipientRef string
// FinalAttemptCount stores how many publication attempts were consumed.
FinalAttemptCount int
// MaxAttempts stores the configured retry budget for Channel.
MaxAttempts int
// FailureClassification stores the stable classified failure reason.
FailureClassification string
// FailureMessage stores the last failure detail.
FailureMessage string
// CreatedAt stores when the route moved to dead_letter.
CreatedAt time.Time
// RecoveryHint stores the optional operator-facing recovery hint.
RecoveryHint string
}
type notificationRecordJSON struct {
NotificationID string `json:"notification_id"`
NotificationType intentstream.NotificationType `json:"notification_type"`
Producer intentstream.Producer `json:"producer"`
AudienceKind intentstream.AudienceKind `json:"audience_kind"`
RecipientUserIDs []string `json:"recipient_user_ids,omitempty"`
PayloadJSON string `json:"payload_json"`
IdempotencyKey string `json:"idempotency_key"`
RequestFingerprint string `json:"request_fingerprint"`
RequestID string `json:"request_id,omitempty"`
TraceID string `json:"trace_id,omitempty"`
OccurredAtMS int64 `json:"occurred_at_ms"`
AcceptedAtMS int64 `json:"accepted_at_ms"`
UpdatedAtMS int64 `json:"updated_at_ms"`
}
type notificationRouteJSON struct {
NotificationID string `json:"notification_id"`
RouteID string `json:"route_id"`
Channel intentstream.Channel `json:"channel"`
RecipientRef string `json:"recipient_ref"`
Status acceptintent.RouteStatus `json:"status"`
AttemptCount int `json:"attempt_count"`
MaxAttempts int `json:"max_attempts"`
NextAttemptAtMS *int64 `json:"next_attempt_at_ms,omitempty"`
ResolvedEmail string `json:"resolved_email,omitempty"`
ResolvedLocale string `json:"resolved_locale,omitempty"`
LastErrorClassification string `json:"last_error_classification,omitempty"`
LastErrorMessage string `json:"last_error_message,omitempty"`
LastErrorAtMS *int64 `json:"last_error_at_ms,omitempty"`
CreatedAtMS int64 `json:"created_at_ms"`
UpdatedAtMS int64 `json:"updated_at_ms"`
PublishedAtMS *int64 `json:"published_at_ms,omitempty"`
DeadLetteredAtMS *int64 `json:"dead_lettered_at_ms,omitempty"`
SkippedAtMS *int64 `json:"skipped_at_ms,omitempty"`
}
type idempotencyRecordJSON struct {
Producer intentstream.Producer `json:"producer"`
IdempotencyKey string `json:"idempotency_key"`
NotificationID string `json:"notification_id"`
RequestFingerprint string `json:"request_fingerprint"`
CreatedAtMS int64 `json:"created_at_ms"`
ExpiresAtMS int64 `json:"expires_at_ms"`
}
type malformedIntentJSON struct {
StreamEntryID string `json:"stream_entry_id"`
NotificationType string `json:"notification_type,omitempty"`
Producer string `json:"producer,omitempty"`
IdempotencyKey string `json:"idempotency_key,omitempty"`
FailureCode malformedintent.FailureCode `json:"failure_code"`
FailureMessage string `json:"failure_message"`
RawFields map[string]any `json:"raw_fields_json"`
RecordedAtMS int64 `json:"recorded_at_ms"`
}
type streamOffsetJSON struct {
Stream string `json:"stream"`
LastProcessedEntryID string `json:"last_processed_entry_id"`
UpdatedAtMS int64 `json:"updated_at_ms"`
}
type deadLetterEntryJSON struct {
NotificationID string `json:"notification_id"`
RouteID string `json:"route_id"`
Channel intentstream.Channel `json:"channel"`
RecipientRef string `json:"recipient_ref"`
FinalAttemptCount int `json:"final_attempt_count"`
MaxAttempts int `json:"max_attempts"`
FailureClassification string `json:"failure_classification"`
FailureMessage string `json:"failure_message"`
CreatedAtMS int64 `json:"created_at_ms"`
RecoveryHint string `json:"recovery_hint,omitempty"`
}
// MarshalNotification marshals one notification record into the strict JSON
// representation owned by Notification Service.
func MarshalNotification(record acceptintent.NotificationRecord) ([]byte, error) {
if err := record.Validate(); err != nil {
return nil, fmt.Errorf("marshal notification record: %w", err)
}
return marshalStrictJSON(notificationRecordJSON{
NotificationID: record.NotificationID,
NotificationType: record.NotificationType,
Producer: record.Producer,
AudienceKind: record.AudienceKind,
RecipientUserIDs: append([]string(nil), record.RecipientUserIDs...),
PayloadJSON: record.PayloadJSON,
IdempotencyKey: record.IdempotencyKey,
RequestFingerprint: record.RequestFingerprint,
RequestID: record.RequestID,
TraceID: record.TraceID,
OccurredAtMS: unixMilli(record.OccurredAt),
AcceptedAtMS: unixMilli(record.AcceptedAt),
UpdatedAtMS: unixMilli(record.UpdatedAt),
})
}
// UnmarshalNotification unmarshals one strict JSON notification record.
func UnmarshalNotification(payload []byte) (acceptintent.NotificationRecord, error) {
var wire notificationRecordJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return acceptintent.NotificationRecord{}, fmt.Errorf("unmarshal notification record: %w", err)
}
record := acceptintent.NotificationRecord{
NotificationID: wire.NotificationID,
NotificationType: wire.NotificationType,
Producer: wire.Producer,
AudienceKind: wire.AudienceKind,
RecipientUserIDs: append([]string(nil), wire.RecipientUserIDs...),
PayloadJSON: wire.PayloadJSON,
IdempotencyKey: wire.IdempotencyKey,
RequestFingerprint: wire.RequestFingerprint,
RequestID: wire.RequestID,
TraceID: wire.TraceID,
OccurredAt: time.UnixMilli(wire.OccurredAtMS).UTC(),
AcceptedAt: time.UnixMilli(wire.AcceptedAtMS).UTC(),
UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(),
}
if err := record.Validate(); err != nil {
return acceptintent.NotificationRecord{}, fmt.Errorf("unmarshal notification record: %w", err)
}
return record, nil
}
// MarshalRoute marshals one notification route into the strict JSON
// representation owned by Notification Service.
func MarshalRoute(route acceptintent.NotificationRoute) ([]byte, error) {
if err := route.Validate(); err != nil {
return nil, fmt.Errorf("marshal notification route: %w", err)
}
return marshalStrictJSON(notificationRouteJSON{
NotificationID: route.NotificationID,
RouteID: route.RouteID,
Channel: route.Channel,
RecipientRef: route.RecipientRef,
Status: route.Status,
AttemptCount: route.AttemptCount,
MaxAttempts: route.MaxAttempts,
NextAttemptAtMS: optionalUnixMilli(route.NextAttemptAt),
ResolvedEmail: route.ResolvedEmail,
ResolvedLocale: route.ResolvedLocale,
LastErrorClassification: route.LastErrorClassification,
LastErrorMessage: route.LastErrorMessage,
LastErrorAtMS: optionalUnixMilli(route.LastErrorAt),
CreatedAtMS: unixMilli(route.CreatedAt),
UpdatedAtMS: unixMilli(route.UpdatedAt),
PublishedAtMS: optionalUnixMilli(route.PublishedAt),
DeadLetteredAtMS: optionalUnixMilli(route.DeadLetteredAt),
SkippedAtMS: optionalUnixMilli(route.SkippedAt),
})
}
// UnmarshalRoute unmarshals one strict JSON notification route.
func UnmarshalRoute(payload []byte) (acceptintent.NotificationRoute, error) {
var wire notificationRouteJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return acceptintent.NotificationRoute{}, fmt.Errorf("unmarshal notification route: %w", err)
}
route := acceptintent.NotificationRoute{
NotificationID: wire.NotificationID,
RouteID: wire.RouteID,
Channel: wire.Channel,
RecipientRef: wire.RecipientRef,
Status: wire.Status,
AttemptCount: wire.AttemptCount,
MaxAttempts: wire.MaxAttempts,
ResolvedEmail: wire.ResolvedEmail,
ResolvedLocale: wire.ResolvedLocale,
LastErrorClassification: wire.LastErrorClassification,
LastErrorMessage: wire.LastErrorMessage,
CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(),
UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(),
}
if wire.NextAttemptAtMS != nil {
route.NextAttemptAt = time.UnixMilli(*wire.NextAttemptAtMS).UTC()
}
if wire.LastErrorAtMS != nil {
route.LastErrorAt = time.UnixMilli(*wire.LastErrorAtMS).UTC()
}
if wire.PublishedAtMS != nil {
route.PublishedAt = time.UnixMilli(*wire.PublishedAtMS).UTC()
}
if wire.DeadLetteredAtMS != nil {
route.DeadLetteredAt = time.UnixMilli(*wire.DeadLetteredAtMS).UTC()
}
if wire.SkippedAtMS != nil {
route.SkippedAt = time.UnixMilli(*wire.SkippedAtMS).UTC()
}
if err := route.Validate(); err != nil {
return acceptintent.NotificationRoute{}, fmt.Errorf("unmarshal notification route: %w", err)
}
return route, nil
}
// MarshalIdempotency marshals one idempotency record into the strict JSON
// representation owned by Notification Service.
func MarshalIdempotency(record acceptintent.IdempotencyRecord) ([]byte, error) {
if err := record.Validate(); err != nil {
return nil, fmt.Errorf("marshal notification idempotency record: %w", err)
}
return marshalStrictJSON(idempotencyRecordJSON{
Producer: record.Producer,
IdempotencyKey: record.IdempotencyKey,
NotificationID: record.NotificationID,
RequestFingerprint: record.RequestFingerprint,
CreatedAtMS: unixMilli(record.CreatedAt),
ExpiresAtMS: unixMilli(record.ExpiresAt),
})
}
// UnmarshalIdempotency unmarshals one strict JSON idempotency record.
func UnmarshalIdempotency(payload []byte) (acceptintent.IdempotencyRecord, error) {
var wire idempotencyRecordJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return acceptintent.IdempotencyRecord{}, fmt.Errorf("unmarshal notification idempotency record: %w", err)
}
record := acceptintent.IdempotencyRecord{
Producer: wire.Producer,
IdempotencyKey: wire.IdempotencyKey,
NotificationID: wire.NotificationID,
RequestFingerprint: wire.RequestFingerprint,
CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(),
ExpiresAt: time.UnixMilli(wire.ExpiresAtMS).UTC(),
}
if err := record.Validate(); err != nil {
return acceptintent.IdempotencyRecord{}, fmt.Errorf("unmarshal notification idempotency record: %w", err)
}
return record, nil
}
// MarshalDeadLetter marshals one dead-letter entry into the strict JSON
// representation owned by Notification Service.
func MarshalDeadLetter(entry DeadLetterEntry) ([]byte, error) {
if err := entry.Validate(); err != nil {
return nil, fmt.Errorf("marshal dead letter entry: %w", err)
}
return marshalStrictJSON(deadLetterEntryJSON{
NotificationID: entry.NotificationID,
RouteID: entry.RouteID,
Channel: entry.Channel,
RecipientRef: entry.RecipientRef,
FinalAttemptCount: entry.FinalAttemptCount,
MaxAttempts: entry.MaxAttempts,
FailureClassification: entry.FailureClassification,
FailureMessage: entry.FailureMessage,
CreatedAtMS: unixMilli(entry.CreatedAt),
RecoveryHint: entry.RecoveryHint,
})
}
// UnmarshalDeadLetter unmarshals one strict JSON dead-letter entry.
func UnmarshalDeadLetter(payload []byte) (DeadLetterEntry, error) {
var wire deadLetterEntryJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return DeadLetterEntry{}, fmt.Errorf("unmarshal dead letter entry: %w", err)
}
entry := DeadLetterEntry{
NotificationID: wire.NotificationID,
RouteID: wire.RouteID,
Channel: wire.Channel,
RecipientRef: wire.RecipientRef,
FinalAttemptCount: wire.FinalAttemptCount,
MaxAttempts: wire.MaxAttempts,
FailureClassification: wire.FailureClassification,
FailureMessage: wire.FailureMessage,
CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(),
RecoveryHint: wire.RecoveryHint,
}
if err := entry.Validate(); err != nil {
return DeadLetterEntry{}, fmt.Errorf("unmarshal dead letter entry: %w", err)
}
return entry, nil
}
// MarshalMalformedIntent marshals one malformed-intent entry into the strict
// JSON representation owned by Notification Service.
func MarshalMalformedIntent(entry malformedintent.Entry) ([]byte, error) {
if err := entry.Validate(); err != nil {
return nil, fmt.Errorf("marshal malformed intent: %w", err)
}
return marshalStrictJSON(malformedIntentJSON{
StreamEntryID: entry.StreamEntryID,
NotificationType: entry.NotificationType,
Producer: entry.Producer,
IdempotencyKey: entry.IdempotencyKey,
FailureCode: entry.FailureCode,
FailureMessage: entry.FailureMessage,
RawFields: cloneJSONObject(entry.RawFields),
RecordedAtMS: unixMilli(entry.RecordedAt),
})
}
// UnmarshalMalformedIntent unmarshals one strict JSON malformed-intent entry.
func UnmarshalMalformedIntent(payload []byte) (malformedintent.Entry, error) {
var wire malformedIntentJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return malformedintent.Entry{}, fmt.Errorf("unmarshal malformed intent: %w", err)
}
entry := malformedintent.Entry{
StreamEntryID: wire.StreamEntryID,
NotificationType: wire.NotificationType,
Producer: wire.Producer,
IdempotencyKey: wire.IdempotencyKey,
FailureCode: wire.FailureCode,
FailureMessage: wire.FailureMessage,
RawFields: cloneJSONObject(wire.RawFields),
RecordedAt: time.UnixMilli(wire.RecordedAtMS).UTC(),
}
if err := entry.Validate(); err != nil {
return malformedintent.Entry{}, fmt.Errorf("unmarshal malformed intent: %w", err)
}
return entry, nil
}
// MarshalStreamOffset marshals one stream-offset record into the strict JSON
// representation owned by Notification Service.
func MarshalStreamOffset(offset StreamOffset) ([]byte, error) {
if err := offset.Validate(); err != nil {
return nil, fmt.Errorf("marshal stream offset: %w", err)
}
return marshalStrictJSON(streamOffsetJSON{
Stream: offset.Stream,
LastProcessedEntryID: offset.LastProcessedEntryID,
UpdatedAtMS: unixMilli(offset.UpdatedAt),
})
}
// UnmarshalStreamOffset unmarshals one strict JSON stream-offset record.
func UnmarshalStreamOffset(payload []byte) (StreamOffset, error) {
var wire streamOffsetJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return StreamOffset{}, fmt.Errorf("unmarshal stream offset: %w", err)
}
offset := StreamOffset{
Stream: wire.Stream,
LastProcessedEntryID: wire.LastProcessedEntryID,
UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(),
}
if err := offset.Validate(); err != nil {
return StreamOffset{}, fmt.Errorf("unmarshal stream offset: %w", err)
}
return offset, nil
}
// Validate reports whether offset contains a complete persisted consumer
// progress record.
func (offset StreamOffset) Validate() error {
if offset.Stream == "" {
return fmt.Errorf("stream offset stream must not be empty")
}
if offset.LastProcessedEntryID == "" {
return fmt.Errorf("stream offset last processed entry id must not be empty")
}
if offset.UpdatedAt.IsZero() {
return fmt.Errorf("stream offset updated at must not be zero")
}
if !offset.UpdatedAt.Equal(offset.UpdatedAt.UTC()) {
return fmt.Errorf("stream offset updated at must be UTC")
}
if !offset.UpdatedAt.Equal(offset.UpdatedAt.Truncate(time.Millisecond)) {
return fmt.Errorf("stream offset updated at must use millisecond precision")
}
return nil
}
// Validate reports whether entry contains a complete dead-letter record.
func (entry DeadLetterEntry) Validate() error {
if entry.NotificationID == "" {
return fmt.Errorf("dead letter entry notification id must not be empty")
}
if entry.RouteID == "" {
return fmt.Errorf("dead letter entry route id must not be empty")
}
if !entry.Channel.IsKnown() {
return fmt.Errorf("dead letter entry channel %q is unsupported", entry.Channel)
}
if entry.RecipientRef == "" {
return fmt.Errorf("dead letter entry recipient ref must not be empty")
}
if entry.FinalAttemptCount <= 0 {
return fmt.Errorf("dead letter entry final attempt count must be positive")
}
if entry.MaxAttempts <= 0 {
return fmt.Errorf("dead letter entry max attempts must be positive")
}
if entry.FailureClassification == "" {
return fmt.Errorf("dead letter entry failure classification must not be empty")
}
if entry.FailureMessage == "" {
return fmt.Errorf("dead letter entry failure message must not be empty")
}
if entry.CreatedAt.IsZero() {
return fmt.Errorf("dead letter entry created at must not be zero")
}
if !entry.CreatedAt.Equal(entry.CreatedAt.UTC()) {
return fmt.Errorf("dead letter entry created at must be UTC")
}
if !entry.CreatedAt.Equal(entry.CreatedAt.Truncate(time.Millisecond)) {
return fmt.Errorf("dead letter entry created at must use millisecond precision")
}
return nil
}
func marshalStrictJSON(value any) ([]byte, error) {
return json.Marshal(value)
}
func unmarshalStrictJSON(payload []byte, target any) error {
decoder := json.NewDecoder(bytes.NewBuffer(payload))
decoder.DisallowUnknownFields()
if err := decoder.Decode(target); err != nil {
return err
}
if err := decoder.Decode(&struct{}{}); err != io.EOF {
if err == nil {
return fmt.Errorf("unexpected trailing JSON input")
}
return err
}
return nil
}
func unixMilli(value time.Time) int64 {
return value.UTC().UnixMilli()
}
func optionalUnixMilli(value time.Time) *int64 {
if value.IsZero() {
return nil
}
millis := unixMilli(value)
return &millis
}
func cloneJSONObject(value map[string]any) map[string]any {
if value == nil {
return map[string]any{}
}
cloned := make(map[string]any, len(value))
for key, raw := range value {
cloned[key] = cloneJSONValue(raw)
}
return cloned
}
func cloneJSONValue(value any) any {
switch typed := value.(type) {
case map[string]any:
return cloneJSONObject(typed)
case []any:
cloned := make([]any, len(typed))
for index, item := range typed {
cloned[index] = cloneJSONValue(item)
}
return cloned
default:
return typed
}
}
@@ -0,0 +1,3 @@
// Package redisstate defines the frozen Redis keyspace, strict JSON records,
// and low-level mutation helpers used by Notification Service durable state.
package redisstate
@@ -0,0 +1,10 @@
package redisstate
import "errors"
var (
// ErrConflict reports that a Redis mutation could not be applied because
// one of the watched or newly created keys already existed or changed
// concurrently.
ErrConflict = errors.New("redis state conflict")
)
@@ -0,0 +1,105 @@
package redisstate
import (
"encoding/base64"
"fmt"
"strings"
"galaxy/notification/internal/api/intentstream"
)
const defaultPrefix = "notification:"
// Keyspace builds the frozen Notification Service Redis keys. All dynamic key
// segments are encoded with base64url so raw key structure does not depend on
// caller-provided characters.
type Keyspace struct{}
// Notification returns the primary Redis key for one notification_record.
func (Keyspace) Notification(notificationID string) string {
return defaultPrefix + "records:" + encodeKeyComponent(notificationID)
}
// Route returns the primary Redis key for one notification_route.
func (Keyspace) Route(notificationID string, routeID string) string {
return defaultPrefix + "routes:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID)
}
// ParseRoute returns the notification identifier and route identifier encoded
// inside routeKey.
func (Keyspace) ParseRoute(routeKey string) (string, string, error) {
trimmedPrefix := defaultPrefix + "routes:"
if !strings.HasPrefix(routeKey, trimmedPrefix) {
return "", "", fmt.Errorf("parse route key: %q does not use %q prefix", routeKey, trimmedPrefix)
}
encoded := strings.TrimPrefix(routeKey, trimmedPrefix)
parts := strings.Split(encoded, ":")
if len(parts) != 2 {
return "", "", fmt.Errorf("parse route key: %q must contain exactly two encoded segments", routeKey)
}
notificationID, err := decodeKeyComponent(parts[0])
if err != nil {
return "", "", fmt.Errorf("parse route key: notification id: %w", err)
}
routeID, err := decodeKeyComponent(parts[1])
if err != nil {
return "", "", fmt.Errorf("parse route key: route id: %w", err)
}
return notificationID, routeID, nil
}
// Idempotency returns the primary Redis key for one
// notification_idempotency_record.
func (Keyspace) Idempotency(producer intentstream.Producer, idempotencyKey string) string {
return defaultPrefix + "idempotency:" + encodeKeyComponent(string(producer)) + ":" + encodeKeyComponent(idempotencyKey)
}
// DeadLetter returns the primary Redis key for one
// notification_dead_letter_entry.
func (Keyspace) DeadLetter(notificationID string, routeID string) string {
return defaultPrefix + "dead_letters:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID)
}
// RouteLease returns the temporary Redis key used to coordinate exclusive
// publication of one notification_route across replicas.
func (Keyspace) RouteLease(notificationID string, routeID string) string {
return defaultPrefix + "route_leases:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID)
}
// MalformedIntent returns the primary Redis key for one malformed-intent
// record.
func (Keyspace) MalformedIntent(streamEntryID string) string {
return defaultPrefix + "malformed_intents:" + encodeKeyComponent(streamEntryID)
}
// StreamOffset returns the primary Redis key for one persisted intent-consumer
// offset.
func (Keyspace) StreamOffset(stream string) string {
return defaultPrefix + "stream_offsets:" + encodeKeyComponent(stream)
}
// Intents returns the frozen ingress Redis Stream key.
func (Keyspace) Intents() string {
return defaultPrefix + "intents"
}
// RouteSchedule returns the frozen route schedule sorted-set key.
func (Keyspace) RouteSchedule() string {
return defaultPrefix + "route_schedule"
}
func encodeKeyComponent(value string) string {
return base64.RawURLEncoding.EncodeToString([]byte(value))
}
func decodeKeyComponent(value string) (string, error) {
decoded, err := base64.RawURLEncoding.DecodeString(value)
if err != nil {
return "", err
}
return string(decoded), nil
}
@@ -0,0 +1,59 @@
package redisstate
import (
"context"
"errors"
"fmt"
"time"
"galaxy/notification/internal/service/malformedintent"
"github.com/redis/go-redis/v9"
)
// MalformedIntentStore provides the Redis-backed storage used for
// operator-visible malformed-intent records.
type MalformedIntentStore struct {
client *redis.Client
keys Keyspace
ttl time.Duration
}
// NewMalformedIntentStore constructs one Redis-backed malformed-intent store.
func NewMalformedIntentStore(client *redis.Client, ttl time.Duration) (*MalformedIntentStore, error) {
if client == nil {
return nil, errors.New("new malformed intent store: nil redis client")
}
if ttl <= 0 {
return nil, errors.New("new malformed intent store: non-positive ttl")
}
return &MalformedIntentStore{
client: client,
keys: Keyspace{},
ttl: ttl,
}, nil
}
// Record stores entry idempotently by its Redis Stream entry identifier.
func (store *MalformedIntentStore) Record(ctx context.Context, entry malformedintent.Entry) error {
if store == nil || store.client == nil {
return errors.New("record malformed intent: nil store")
}
if ctx == nil {
return errors.New("record malformed intent: nil context")
}
if err := entry.Validate(); err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
payload, err := MarshalMalformedIntent(entry)
if err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
if err := store.client.Set(ctx, store.keys.MalformedIntent(entry.StreamEntryID), payload, store.ttl).Err(); err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
return nil
}
@@ -0,0 +1,657 @@
package redisstate
import (
"bytes"
"context"
"errors"
"fmt"
"sort"
"strconv"
"time"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/telemetry"
"github.com/redis/go-redis/v9"
)
var releaseRouteLeaseScript = redis.NewScript(`
if redis.call("GET", KEYS[1]) == ARGV[1] then
return redis.call("DEL", KEYS[1])
end
return 0
`)
var completePublishedRouteScript = redis.NewScript(`
if redis.call("GET", KEYS[1]) ~= ARGV[1] then
return 0
end
if redis.call("GET", KEYS[2]) ~= ARGV[2] then
return 0
end
local field_count = tonumber(ARGV[6])
local values = {}
local index = 7
for _ = 1, field_count do
table.insert(values, ARGV[index])
table.insert(values, ARGV[index + 1])
index = index + 2
end
if tonumber(ARGV[4]) > 0 then
redis.call("XADD", ARGV[3], "MAXLEN", "~", ARGV[4], "*", unpack(values))
else
redis.call("XADD", ARGV[3], "*", unpack(values))
end
redis.call("SET", KEYS[1], ARGV[5], "KEEPTTL")
redis.call("ZREM", KEYS[3], KEYS[1])
redis.call("DEL", KEYS[2])
return 1
`)
// ScheduledRoute stores one due route reference loaded from
// `notification:route_schedule`.
type ScheduledRoute struct {
// RouteKey stores the full Redis route key scheduled for processing.
RouteKey string
// NotificationID stores the owning notification identifier.
NotificationID string
// RouteID stores the scheduled route identifier.
RouteID string
}
// CompleteRoutePublishedInput stores the data required to mark one route as
// published while atomically appending one outbound stream entry.
type CompleteRoutePublishedInput struct {
// ExpectedRoute stores the current route state previously loaded by the
// caller.
ExpectedRoute acceptintent.NotificationRoute
// LeaseToken stores the route-lease owner token that must still be held.
LeaseToken string
// PublishedAt stores when the publication attempt succeeded.
PublishedAt time.Time
// Stream stores the outbound Redis Stream name.
Stream string
// StreamMaxLen bounds Stream with approximate trimming when positive. Zero
// disables trimming.
StreamMaxLen int64
// StreamValues stores the exact Redis Stream fields appended to Stream.
StreamValues map[string]any
}
// CompleteRouteFailedInput stores the data required to record one retryable
// publication failure.
type CompleteRouteFailedInput struct {
// ExpectedRoute stores the current route state previously loaded by the
// caller.
ExpectedRoute acceptintent.NotificationRoute
// LeaseToken stores the route-lease owner token that must still be held.
LeaseToken string
// FailedAt stores when the publication attempt failed.
FailedAt time.Time
// NextAttemptAt stores the next scheduled retry time.
NextAttemptAt time.Time
// FailureClassification stores the classified publication failure kind.
FailureClassification string
// FailureMessage stores the detailed publication failure text.
FailureMessage string
}
// CompleteRouteDeadLetterInput stores the data required to record one
// exhausted publication failure.
type CompleteRouteDeadLetterInput struct {
// ExpectedRoute stores the current route state previously loaded by the
// caller.
ExpectedRoute acceptintent.NotificationRoute
// LeaseToken stores the route-lease owner token that must still be held.
LeaseToken string
// DeadLetteredAt stores when the route exhausted its retry budget.
DeadLetteredAt time.Time
// FailureClassification stores the classified terminal failure kind.
FailureClassification string
// FailureMessage stores the detailed terminal failure text.
FailureMessage string
// RecoveryHint stores the optional operator-facing recovery guidance.
RecoveryHint string
}
// ListDueRoutes loads up to limit scheduled routes whose next-attempt score is
// due at or before now.
func (store *AcceptanceStore) ListDueRoutes(ctx context.Context, now time.Time, limit int64) ([]ScheduledRoute, error) {
if store == nil || store.client == nil {
return nil, errors.New("list due routes: nil store")
}
if ctx == nil {
return nil, errors.New("list due routes: nil context")
}
if err := validateRouteStateTimestamp("list due routes now", now); err != nil {
return nil, err
}
if limit <= 0 {
return nil, errors.New("list due routes: limit must be positive")
}
members, err := store.client.ZRangeByScore(ctx, store.keys.RouteSchedule(), &redis.ZRangeBy{
Min: "-inf",
Max: strconv.FormatInt(now.UnixMilli(), 10),
Count: limit,
}).Result()
if err != nil {
return nil, fmt.Errorf("list due routes: %w", err)
}
routes := make([]ScheduledRoute, 0, len(members))
for _, member := range members {
notificationID, routeID, err := store.keys.ParseRoute(member)
if err != nil {
return nil, fmt.Errorf("list due routes: %w", err)
}
routes = append(routes, ScheduledRoute{
RouteKey: member,
NotificationID: notificationID,
RouteID: routeID,
})
}
return routes, nil
}
// ReadRouteScheduleSnapshot returns the current depth of the durable route
// schedule together with its oldest scheduled timestamp when one exists.
func (store *AcceptanceStore) ReadRouteScheduleSnapshot(ctx context.Context) (telemetry.RouteScheduleSnapshot, error) {
if store == nil || store.client == nil {
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil store")
}
if ctx == nil {
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil context")
}
depth, err := store.client.ZCard(ctx, store.keys.RouteSchedule()).Result()
if err != nil {
return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: depth: %w", err)
}
snapshot := telemetry.RouteScheduleSnapshot{
Depth: depth,
}
if depth == 0 {
return snapshot, nil
}
values, err := store.client.ZRangeWithScores(ctx, store.keys.RouteSchedule(), 0, 0).Result()
if err != nil {
return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: oldest scheduled entry: %w", err)
}
if len(values) == 0 {
return snapshot, nil
}
oldestScheduledFor := time.UnixMilli(int64(values[0].Score)).UTC()
snapshot.OldestScheduledFor = &oldestScheduledFor
return snapshot, nil
}
// TryAcquireRouteLease attempts to acquire one temporary route lease owned by
// token for ttl.
func (store *AcceptanceStore) TryAcquireRouteLease(ctx context.Context, notificationID string, routeID string, token string, ttl time.Duration) (bool, error) {
if store == nil || store.client == nil {
return false, errors.New("try acquire route lease: nil store")
}
if ctx == nil {
return false, errors.New("try acquire route lease: nil context")
}
if notificationID == "" {
return false, errors.New("try acquire route lease: notification id must not be empty")
}
if routeID == "" {
return false, errors.New("try acquire route lease: route id must not be empty")
}
if token == "" {
return false, errors.New("try acquire route lease: token must not be empty")
}
if ttl <= 0 {
return false, errors.New("try acquire route lease: ttl must be positive")
}
acquired, err := store.client.SetNX(ctx, store.keys.RouteLease(notificationID, routeID), token, ttl).Result()
if err != nil {
return false, fmt.Errorf("try acquire route lease: %w", err)
}
return acquired, nil
}
// ReleaseRouteLease releases one temporary route lease only when token still
// matches the stored owner value.
func (store *AcceptanceStore) ReleaseRouteLease(ctx context.Context, notificationID string, routeID string, token string) error {
if store == nil || store.client == nil {
return errors.New("release route lease: nil store")
}
if ctx == nil {
return errors.New("release route lease: nil context")
}
if notificationID == "" {
return errors.New("release route lease: notification id must not be empty")
}
if routeID == "" {
return errors.New("release route lease: route id must not be empty")
}
if token == "" {
return errors.New("release route lease: token must not be empty")
}
if err := releaseRouteLeaseScript.Run(
ctx,
store.client,
[]string{store.keys.RouteLease(notificationID, routeID)},
token,
).Err(); err != nil {
return fmt.Errorf("release route lease: %w", err)
}
return nil
}
// CompleteRoutePublished atomically appends one outbound stream entry and
// marks the corresponding route as published.
func (store *AcceptanceStore) CompleteRoutePublished(ctx context.Context, input CompleteRoutePublishedInput) error {
if store == nil || store.client == nil {
return errors.New("complete route published: nil store")
}
if ctx == nil {
return errors.New("complete route published: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route published: %w", err)
}
updatedRoute := input.ExpectedRoute
updatedRoute.Status = acceptintent.RouteStatusPublished
updatedRoute.AttemptCount++
updatedRoute.NextAttemptAt = time.Time{}
updatedRoute.LastErrorClassification = ""
updatedRoute.LastErrorMessage = ""
updatedRoute.LastErrorAt = time.Time{}
updatedRoute.UpdatedAt = input.PublishedAt
updatedRoute.PublishedAt = input.PublishedAt
updatedRoute.DeadLetteredAt = time.Time{}
payload, err := MarshalRoute(updatedRoute)
if err != nil {
return fmt.Errorf("complete route published: %w", err)
}
expectedPayload, err := MarshalRoute(input.ExpectedRoute)
if err != nil {
return fmt.Errorf("complete route published: %w", err)
}
streamArgs, err := flattenStreamValues(input.StreamValues)
if err != nil {
return fmt.Errorf("complete route published: %w", err)
}
result, err := completePublishedRouteScript.Run(
ctx,
store.client,
[]string{
store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID),
store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID),
store.keys.RouteSchedule(),
},
append([]any{
string(expectedPayload),
input.LeaseToken,
input.Stream,
input.StreamMaxLen,
string(payload),
len(streamArgs) / 2,
}, streamArgs...)...,
).Int()
switch {
case errors.Is(err, redis.Nil):
return ErrConflict
case err != nil:
return err
case result != 1:
return ErrConflict
default:
return nil
}
}
// CompleteRouteFailed atomically records one retryable publication failure and
// reschedules the route.
func (store *AcceptanceStore) CompleteRouteFailed(ctx context.Context, input CompleteRouteFailedInput) error {
if store == nil || store.client == nil {
return errors.New("complete route failed: nil store")
}
if ctx == nil {
return errors.New("complete route failed: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route failed: %w", err)
}
updatedRoute := input.ExpectedRoute
updatedRoute.Status = acceptintent.RouteStatusFailed
updatedRoute.AttemptCount++
updatedRoute.NextAttemptAt = input.NextAttemptAt
updatedRoute.LastErrorClassification = input.FailureClassification
updatedRoute.LastErrorMessage = input.FailureMessage
updatedRoute.LastErrorAt = input.FailedAt
updatedRoute.UpdatedAt = input.FailedAt
payload, err := MarshalRoute(updatedRoute)
if err != nil {
return fmt.Errorf("complete route failed: %w", err)
}
return store.completeRouteMutation(ctx, input.ExpectedRoute, input.LeaseToken, func(pipe redis.Pipeliner) error {
pipe.SetArgs(ctx, store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID), payload, redis.SetArgs{KeepTTL: true})
pipe.ZAdd(ctx, store.keys.RouteSchedule(), redis.Z{
Score: float64(input.NextAttemptAt.UnixMilli()),
Member: store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID),
})
pipe.Del(ctx, store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID))
return nil
})
}
// CompleteRouteDeadLetter atomically records one exhausted publication
// failure, stores the dead-letter entry, and removes the route from the
// retry schedule.
func (store *AcceptanceStore) CompleteRouteDeadLetter(ctx context.Context, input CompleteRouteDeadLetterInput) error {
if store == nil || store.client == nil {
return errors.New("complete route dead letter: nil store")
}
if ctx == nil {
return errors.New("complete route dead letter: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
updatedRoute := input.ExpectedRoute
updatedRoute.Status = acceptintent.RouteStatusDeadLetter
updatedRoute.AttemptCount++
updatedRoute.NextAttemptAt = time.Time{}
updatedRoute.LastErrorClassification = input.FailureClassification
updatedRoute.LastErrorMessage = input.FailureMessage
updatedRoute.LastErrorAt = input.DeadLetteredAt
updatedRoute.UpdatedAt = input.DeadLetteredAt
updatedRoute.DeadLetteredAt = input.DeadLetteredAt
if updatedRoute.AttemptCount < updatedRoute.MaxAttempts {
return fmt.Errorf(
"complete route dead letter: final attempt count %d is below max attempts %d",
updatedRoute.AttemptCount,
updatedRoute.MaxAttempts,
)
}
routePayload, err := MarshalRoute(updatedRoute)
if err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
deadLetterPayload, err := MarshalDeadLetter(DeadLetterEntry{
NotificationID: updatedRoute.NotificationID,
RouteID: updatedRoute.RouteID,
Channel: updatedRoute.Channel,
RecipientRef: updatedRoute.RecipientRef,
FinalAttemptCount: updatedRoute.AttemptCount,
MaxAttempts: updatedRoute.MaxAttempts,
FailureClassification: input.FailureClassification,
FailureMessage: input.FailureMessage,
CreatedAt: input.DeadLetteredAt,
RecoveryHint: input.RecoveryHint,
})
if err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
return store.completeRouteMutation(ctx, input.ExpectedRoute, input.LeaseToken, func(pipe redis.Pipeliner) error {
pipe.SetArgs(ctx, store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID), routePayload, redis.SetArgs{KeepTTL: true})
pipe.Set(ctx, store.keys.DeadLetter(updatedRoute.NotificationID, updatedRoute.RouteID), deadLetterPayload, store.cfg.DeadLetterTTL)
pipe.ZRem(ctx, store.keys.RouteSchedule(), store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID))
pipe.Del(ctx, store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID))
return nil
})
}
func (store *AcceptanceStore) completeRouteMutation(
ctx context.Context,
expectedRoute acceptintent.NotificationRoute,
leaseToken string,
mutate func(redis.Pipeliner) error,
) error {
routeKey := store.keys.Route(expectedRoute.NotificationID, expectedRoute.RouteID)
leaseKey := store.keys.RouteLease(expectedRoute.NotificationID, expectedRoute.RouteID)
watchErr := store.client.Watch(ctx, func(tx *redis.Tx) error {
currentRoute, err := loadWatchedRoute(ctx, tx, routeKey)
switch {
case errors.Is(err, redis.Nil):
return ErrConflict
case err != nil:
return err
}
if err := ensureRoutesEqual(expectedRoute, currentRoute); err != nil {
return err
}
leaseValue, err := tx.Get(ctx, leaseKey).Result()
switch {
case errors.Is(err, redis.Nil):
return ErrConflict
case err != nil:
return err
case leaseValue != leaseToken:
return ErrConflict
}
_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
return mutate(pipe)
})
return err
}, routeKey, leaseKey)
switch {
case errors.Is(watchErr, ErrConflict), errors.Is(watchErr, redis.TxFailedErr):
return ErrConflict
case watchErr != nil:
return watchErr
default:
return nil
}
}
func loadWatchedRoute(ctx context.Context, tx *redis.Tx, routeKey string) (acceptintent.NotificationRoute, error) {
payload, err := tx.Get(ctx, routeKey).Bytes()
if err != nil {
return acceptintent.NotificationRoute{}, err
}
return UnmarshalRoute(payload)
}
func ensureRoutesEqual(expected acceptintent.NotificationRoute, actual acceptintent.NotificationRoute) error {
expectedPayload, err := MarshalRoute(expected)
if err != nil {
return fmt.Errorf("marshal expected route: %w", err)
}
actualPayload, err := MarshalRoute(actual)
if err != nil {
return fmt.Errorf("marshal current route: %w", err)
}
if !bytes.Equal(expectedPayload, actualPayload) {
return ErrConflict
}
return nil
}
func validateCompletionRoute(route acceptintent.NotificationRoute) error {
if err := route.Validate(); err != nil {
return err
}
switch route.Status {
case acceptintent.RouteStatusPending, acceptintent.RouteStatusFailed:
return nil
default:
return fmt.Errorf("route status %q is not completable", route.Status)
}
}
func validateStreamValues(values map[string]any) error {
if len(values) == 0 {
return fmt.Errorf("stream values must not be empty")
}
for key, raw := range values {
if key == "" {
return fmt.Errorf("stream values key must not be empty")
}
switch typed := raw.(type) {
case string:
if typed == "" {
return fmt.Errorf("stream values %q must not be empty", key)
}
case []byte:
if len(typed) == 0 {
return fmt.Errorf("stream values %q must not be empty", key)
}
default:
return fmt.Errorf("stream values %q must be string or []byte", key)
}
}
return nil
}
func flattenStreamValues(values map[string]any) ([]any, error) {
keys := make([]string, 0, len(values))
for key := range values {
keys = append(keys, key)
}
sort.Strings(keys)
args := make([]any, 0, len(values)*2)
for _, key := range keys {
args = append(args, key, values[key])
}
return args, nil
}
func validateRouteStateTimestamp(name string, value time.Time) error {
if value.IsZero() {
return fmt.Errorf("%s must not be zero", name)
}
if !value.Equal(value.UTC()) {
return fmt.Errorf("%s must be UTC", name)
}
if !value.Equal(value.Truncate(time.Millisecond)) {
return fmt.Errorf("%s must use millisecond precision", name)
}
return nil
}
// Validate reports whether route contains a complete due-route reference.
func (route ScheduledRoute) Validate() error {
if route.RouteKey == "" {
return fmt.Errorf("scheduled route key must not be empty")
}
if route.NotificationID == "" {
return fmt.Errorf("scheduled route notification id must not be empty")
}
if route.RouteID == "" {
return fmt.Errorf("scheduled route route id must not be empty")
}
return nil
}
// Validate reports whether input contains a complete published-route
// transition.
func (input CompleteRoutePublishedInput) Validate() error {
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
return err
}
if input.LeaseToken == "" {
return fmt.Errorf("lease token must not be empty")
}
if err := validateRouteStateTimestamp("published at", input.PublishedAt); err != nil {
return err
}
if input.Stream == "" {
return fmt.Errorf("stream must not be empty")
}
if input.StreamMaxLen < 0 {
return fmt.Errorf("stream max len must not be negative")
}
if err := validateStreamValues(input.StreamValues); err != nil {
return err
}
return nil
}
// Validate reports whether input contains a complete retryable failure
// transition.
func (input CompleteRouteFailedInput) Validate() error {
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
return err
}
if input.LeaseToken == "" {
return fmt.Errorf("lease token must not be empty")
}
if err := validateRouteStateTimestamp("failed at", input.FailedAt); err != nil {
return err
}
if err := validateRouteStateTimestamp("next attempt at", input.NextAttemptAt); err != nil {
return err
}
if input.FailureClassification == "" {
return fmt.Errorf("failure classification must not be empty")
}
if input.FailureMessage == "" {
return fmt.Errorf("failure message must not be empty")
}
return nil
}
// Validate reports whether input contains a complete dead-letter transition.
func (input CompleteRouteDeadLetterInput) Validate() error {
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
return err
}
if input.LeaseToken == "" {
return fmt.Errorf("lease token must not be empty")
}
if err := validateRouteStateTimestamp("dead lettered at", input.DeadLetteredAt); err != nil {
return err
}
if input.FailureClassification == "" {
return fmt.Errorf("failure classification must not be empty")
}
if input.FailureMessage == "" {
return fmt.Errorf("failure message must not be empty")
}
return nil
}
@@ -0,0 +1,465 @@
package redisstate
import (
"context"
"testing"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"github.com/alicebob/miniredis/v2"
"github.com/stretchr/testify/require"
)
func TestAcceptanceStoreListDueRoutesLoadsScheduledMembers(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
require.NoError(t, store.CreateAcceptance(context.Background(), validUserAcceptanceInput(now, 0)))
routes, err := store.ListDueRoutes(context.Background(), now, 10)
require.NoError(t, err)
require.Len(t, routes, 2)
require.ElementsMatch(t, []string{"push:user:user-1", "email:user:user-1"}, []string{routes[0].RouteID, routes[1].RouteID})
for _, route := range routes {
require.NoError(t, route.Validate())
}
}
func TestAcceptanceStoreReadRouteScheduleSnapshot(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
require.NoError(t, store.CreateAcceptance(context.Background(), validUserAcceptanceInput(now, 0)))
snapshot, err := store.ReadRouteScheduleSnapshot(context.Background())
require.NoError(t, err)
require.Equal(t, int64(2), snapshot.Depth)
require.NotNil(t, snapshot.OldestScheduledFor)
require.Equal(t, now, *snapshot.OldestScheduledFor)
}
func TestAcceptanceStoreRouteLeaseAcquireReleaseAndExpire(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
acquired, err := store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-1", 2*time.Second)
require.NoError(t, err)
require.True(t, acquired)
acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-2", 2*time.Second)
require.NoError(t, err)
require.False(t, acquired)
require.NoError(t, store.ReleaseRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-1"))
acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-3", 2*time.Second)
require.NoError(t, err)
require.True(t, acquired)
server.FastForward(3 * time.Second)
acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-4", 2*time.Second)
require.NoError(t, err)
require.True(t, acquired)
}
func TestAcceptanceStoreCompleteRoutePublishedAppendsTrimmedStreamEntryAndMarksRoutePublished(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validUserAcceptanceInput(now, 0)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
require.NoError(t, err)
require.True(t, acquired)
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
publishedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
require.NoError(t, store.CompleteRoutePublished(context.Background(), CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: "token-1",
PublishedAt: publishedAt,
Stream: "gateway:client-events",
StreamMaxLen: 1024,
StreamValues: map[string]any{
"user_id": "user-1",
"event_type": "game.turn.ready",
"event_id": input.Notification.NotificationID + "/push:user:user-1",
"payload_bytes": []byte("payload-1"),
"request_id": "request-1",
"trace_id": "trace-1",
},
}))
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusPublished, updatedRoute.Status)
require.Equal(t, 1, updatedRoute.AttemptCount)
require.Equal(t, publishedAt, updatedRoute.PublishedAt)
scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
require.NoError(t, err)
require.Equal(t, []string{Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1")}, scheduled)
messages, err := client.XRange(context.Background(), "gateway:client-events", "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "user-1", messages[0].Values["user_id"])
require.Equal(t, "game.turn.ready", messages[0].Values["event_type"])
leaseKey := Keyspace{}.RouteLease(input.Notification.NotificationID, "push:user:user-1")
_, err = client.Get(context.Background(), leaseKey).Result()
require.Error(t, err)
}
func TestAcceptanceStoreCompleteRoutePublishedAppendsUntrimmedMailCommand(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validUserAcceptanceInput(now, 0)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "email:user:user-1", "token-1", 5*time.Second)
require.NoError(t, err)
require.True(t, acquired)
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "email:user:user-1")
require.NoError(t, err)
require.True(t, found)
publishedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
require.NoError(t, store.CompleteRoutePublished(context.Background(), CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: "token-1",
PublishedAt: publishedAt,
Stream: "mail:delivery_commands",
StreamMaxLen: 0,
StreamValues: map[string]any{
"delivery_id": input.Notification.NotificationID + "/email:user:user-1",
"source": "notification",
"payload_mode": "template",
"idempotency_key": "notification:" + input.Notification.NotificationID + "/email:user:user-1",
"requested_at_ms": "1775121700000",
"payload_json": `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}`,
},
}))
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "email:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusPublished, updatedRoute.Status)
require.Equal(t, 1, updatedRoute.AttemptCount)
require.Equal(t, publishedAt, updatedRoute.PublishedAt)
messages, err := client.XRange(context.Background(), "mail:delivery_commands", "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "notification", messages[0].Values["source"])
require.Equal(t, "template", messages[0].Values["payload_mode"])
require.Equal(t, "1775121700000-0/email:user:user-1", messages[0].Values["delivery_id"])
}
func TestAcceptanceStoreCompleteRouteFailedReschedulesRoute(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validUserAcceptanceInput(now, 0)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
require.NoError(t, err)
require.True(t, acquired)
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
failedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
nextAttemptAt := failedAt.Add(2 * time.Second).UTC().Truncate(time.Millisecond)
require.NoError(t, store.CompleteRouteFailed(context.Background(), CompleteRouteFailedInput{
ExpectedRoute: route,
LeaseToken: "token-1",
FailedAt: failedAt,
NextAttemptAt: nextAttemptAt,
FailureClassification: "gateway_stream_publish_failed",
FailureMessage: "temporary outage",
}))
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusFailed, updatedRoute.Status)
require.Equal(t, 1, updatedRoute.AttemptCount)
require.Equal(t, nextAttemptAt, updatedRoute.NextAttemptAt)
require.Equal(t, "gateway_stream_publish_failed", updatedRoute.LastErrorClassification)
scheduled, err := client.ZRangeWithScores(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
require.NoError(t, err)
require.Len(t, scheduled, 2)
require.Contains(t, []string{
scheduled[0].Member.(string),
scheduled[1].Member.(string),
}, Keyspace{}.Route(input.Notification.NotificationID, "push:user:user-1"))
}
func TestAcceptanceStoreCompleteRouteDeadLetterStoresTerminalFailure(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validUserAcceptanceInput(now, 2)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
require.NoError(t, err)
require.True(t, acquired)
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
deadLetteredAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
require.NoError(t, store.CompleteRouteDeadLetter(context.Background(), CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: "token-1",
DeadLetteredAt: deadLetteredAt,
FailureClassification: "payload_encoding_failed",
FailureMessage: "payload is invalid",
}))
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusDeadLetter, updatedRoute.Status)
require.Equal(t, 3, updatedRoute.AttemptCount)
require.Equal(t, deadLetteredAt, updatedRoute.DeadLetteredAt)
payload, err := client.Get(context.Background(), Keyspace{}.DeadLetter(input.Notification.NotificationID, "push:user:user-1")).Bytes()
require.NoError(t, err)
entry, err := UnmarshalDeadLetter(payload)
require.NoError(t, err)
require.Equal(t, "payload_encoding_failed", entry.FailureClassification)
require.Equal(t, 3, entry.FinalAttemptCount)
scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
require.NoError(t, err)
require.Equal(t, []string{Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1")}, scheduled)
}
func TestAcceptanceStoreDeadLetterIsIsolatedByChannelAndRecipient(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validUserAcceptanceInput(now, 2)
input.Notification.RecipientUserIDs = []string{"user-1", "user-2"}
input.Routes = append(input.Routes,
acceptintent.NotificationRoute{
NotificationID: input.Notification.NotificationID,
RouteID: "push:user:user-2",
Channel: intentstream.ChannelPush,
RecipientRef: "user:user-2",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 3,
NextAttemptAt: now,
ResolvedEmail: "second@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
acceptintent.NotificationRoute{
NotificationID: input.Notification.NotificationID,
RouteID: "email:user:user-2",
Channel: intentstream.ChannelEmail,
RecipientRef: "user:user-2",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "second@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
require.NoError(t, err)
require.True(t, acquired)
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
deadLetteredAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
require.NoError(t, store.CompleteRouteDeadLetter(context.Background(), CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: "token-1",
DeadLetteredAt: deadLetteredAt,
FailureClassification: "gateway_stream_publish_failed",
FailureMessage: "gateway unavailable",
}))
deadLetterRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusDeadLetter, deadLetterRoute.Status)
for _, routeID := range []string{"email:user:user-1", "push:user:user-2", "email:user:user-2"} {
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, routeID)
require.NoError(t, err)
require.True(t, found, "route %s should remain stored", routeID)
require.Equal(t, acceptintent.RouteStatusPending, route.Status, "route %s should remain pending", routeID)
}
scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
require.NoError(t, err)
require.ElementsMatch(t, []string{
Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1"),
Keyspace{}.Route(input.Notification.NotificationID, "push:user:user-2"),
Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-2"),
}, scheduled)
}
func validUserAcceptanceInput(now time.Time, pushAttemptCount int) acceptintent.CreateAcceptanceInput {
return acceptintent.CreateAcceptanceInput{
Notification: acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
IdempotencyKey: "game-123:turn-54",
RequestFingerprint: "sha256:deadbeef",
RequestID: "request-1",
TraceID: "trace-1",
OccurredAt: now,
AcceptedAt: now,
UpdatedAt: now,
},
Routes: []acceptintent.NotificationRoute{
{
NotificationID: "1775121700000-0",
RouteID: "push:user:user-1",
Channel: intentstream.ChannelPush,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
AttemptCount: pushAttemptCount,
MaxAttempts: 3,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
{
NotificationID: "1775121700000-0",
RouteID: "email:user:user-1",
Channel: intentstream.ChannelEmail,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
},
Idempotency: acceptintent.IdempotencyRecord{
Producer: intentstream.ProducerGameMaster,
IdempotencyKey: "game-123:turn-54",
NotificationID: "1775121700000-0",
RequestFingerprint: "sha256:deadbeef",
CreatedAt: now,
ExpiresAt: now.Add(7 * 24 * time.Hour),
},
}
}
@@ -0,0 +1,160 @@
package redisstate
import (
"context"
"errors"
"fmt"
"strconv"
"strings"
"time"
"galaxy/notification/internal/telemetry"
"github.com/redis/go-redis/v9"
)
// StreamOffsetStore provides the Redis-backed storage used for persisted
// plain-XREAD consumer progress.
type StreamOffsetStore struct {
client *redis.Client
keys Keyspace
}
// NewStreamOffsetStore constructs one Redis-backed stream-offset store.
func NewStreamOffsetStore(client *redis.Client) (*StreamOffsetStore, error) {
if client == nil {
return nil, errors.New("new notification stream offset store: nil redis client")
}
return &StreamOffsetStore{
client: client,
keys: Keyspace{},
}, nil
}
// Load returns the last processed entry id for stream when one is stored.
func (store *StreamOffsetStore) Load(ctx context.Context, stream string) (string, bool, error) {
if store == nil || store.client == nil {
return "", false, errors.New("load notification stream offset: nil store")
}
if ctx == nil {
return "", false, errors.New("load notification stream offset: nil context")
}
payload, err := store.client.Get(ctx, store.keys.StreamOffset(stream)).Bytes()
switch {
case errors.Is(err, redis.Nil):
return "", false, nil
case err != nil:
return "", false, fmt.Errorf("load notification stream offset: %w", err)
}
offset, err := UnmarshalStreamOffset(payload)
if err != nil {
return "", false, fmt.Errorf("load notification stream offset: %w", err)
}
return offset.LastProcessedEntryID, true, nil
}
// Save stores the last processed entry id for stream.
func (store *StreamOffsetStore) Save(ctx context.Context, stream string, entryID string) error {
if store == nil || store.client == nil {
return errors.New("save notification stream offset: nil store")
}
if ctx == nil {
return errors.New("save notification stream offset: nil context")
}
offset := StreamOffset{
Stream: stream,
LastProcessedEntryID: entryID,
UpdatedAt: time.Now().UTC().Truncate(time.Millisecond),
}
payload, err := MarshalStreamOffset(offset)
if err != nil {
return fmt.Errorf("save notification stream offset: %w", err)
}
if err := store.client.Set(ctx, store.keys.StreamOffset(stream), payload, 0).Err(); err != nil {
return fmt.Errorf("save notification stream offset: %w", err)
}
return nil
}
// IntentStreamLagReader provides Redis-backed lag snapshots for one intent
// stream.
type IntentStreamLagReader struct {
store *StreamOffsetStore
stream string
}
// NewIntentStreamLagReader constructs a lag reader for stream using store.
func NewIntentStreamLagReader(store *StreamOffsetStore, stream string) (*IntentStreamLagReader, error) {
if store == nil || store.client == nil {
return nil, errors.New("new notification intent stream lag reader: nil store")
}
if strings.TrimSpace(stream) == "" {
return nil, errors.New("new notification intent stream lag reader: stream must not be empty")
}
return &IntentStreamLagReader{
store: store,
stream: stream,
}, nil
}
// ReadIntentStreamLagSnapshot returns the oldest stream entry that is newer
// than the persisted plain-XREAD consumer offset for the configured stream.
func (reader *IntentStreamLagReader) ReadIntentStreamLagSnapshot(ctx context.Context) (telemetry.IntentStreamLagSnapshot, error) {
if reader == nil || reader.store == nil {
return telemetry.IntentStreamLagSnapshot{}, errors.New("read notification intent stream lag snapshot: nil reader")
}
if ctx == nil {
return telemetry.IntentStreamLagSnapshot{}, errors.New("read notification intent stream lag snapshot: nil context")
}
lastID, found, err := reader.store.Load(ctx, reader.stream)
if err != nil {
return telemetry.IntentStreamLagSnapshot{}, fmt.Errorf("read notification intent stream lag snapshot: %w", err)
}
minID := "-"
if found {
minID = "(" + lastID
}
messages, err := reader.store.client.XRangeN(ctx, reader.stream, minID, "+", 1).Result()
if err != nil {
return telemetry.IntentStreamLagSnapshot{}, fmt.Errorf("read notification intent stream lag snapshot: oldest entry: %w", err)
}
if len(messages) == 0 {
return telemetry.IntentStreamLagSnapshot{}, nil
}
oldestAt, err := streamEntryTime(messages[0].ID)
if err != nil {
return telemetry.IntentStreamLagSnapshot{}, fmt.Errorf("read notification intent stream lag snapshot: oldest entry id: %w", err)
}
return telemetry.IntentStreamLagSnapshot{
OldestUnprocessedAt: &oldestAt,
}, nil
}
func streamEntryTime(entryID string) (time.Time, error) {
timestampText, _, ok := strings.Cut(entryID, "-")
if !ok || strings.TrimSpace(timestampText) == "" {
return time.Time{}, fmt.Errorf("entry id %q is not a Redis Stream id", entryID)
}
timestampMS, err := strconv.ParseInt(timestampText, 10, 64)
if err != nil {
return time.Time{}, err
}
if timestampMS < 0 {
return time.Time{}, fmt.Errorf("entry id %q has negative timestamp", entryID)
}
return time.UnixMilli(timestampMS).UTC(), nil
}
@@ -0,0 +1,243 @@
// Package userservice provides the trusted internal User Service HTTP client
// used by Notification Service recipient enrichment.
package userservice
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"galaxy/notification/internal/service/acceptintent"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
)
const (
getUserByIDPathSuffix = "/api/v1/internal/users/%s"
subjectNotFoundErrorCode = "subject_not_found"
)
// Config configures one HTTP-backed User Service enrichment client.
type Config struct {
// BaseURL stores the absolute base URL of the trusted internal User Service
// HTTP API.
BaseURL string
// RequestTimeout bounds one outbound lookup request.
RequestTimeout time.Duration
}
// Client resolves Notification Service recipients through the trusted
// internal User Service HTTP API.
type Client struct {
baseURL string
requestTimeout time.Duration
httpClient *http.Client
closeIdleConnections func()
}
type getUserByIDResponse struct {
User userView `json:"user"`
}
type userView struct {
Email string `json:"email"`
PreferredLanguage string `json:"preferred_language"`
}
type errorEnvelope struct {
Error *errorBody `json:"error"`
}
type errorBody struct {
Code string `json:"code"`
Message string `json:"message"`
}
// NewClient constructs a User Service client that uses repository-standard
// HTTP transport instrumentation through otelhttp.
func NewClient(cfg Config) (*Client, error) {
transport, ok := http.DefaultTransport.(*http.Transport)
if !ok {
return nil, errors.New("new notification user service client: default transport is not *http.Transport")
}
baseTransport := transport.Clone()
return newClient(
cfg,
&http.Client{Transport: otelhttp.NewTransport(baseTransport)},
baseTransport.CloseIdleConnections,
)
}
func newClient(cfg Config, httpClient *http.Client, closeIdleConnections func()) (*Client, error) {
switch {
case strings.TrimSpace(cfg.BaseURL) == "":
return nil, errors.New("new notification user service client: base URL must not be empty")
case cfg.RequestTimeout <= 0:
return nil, errors.New("new notification user service client: request timeout must be positive")
case httpClient == nil:
return nil, errors.New("new notification user service client: http client must not be nil")
}
parsedBaseURL, err := url.Parse(strings.TrimRight(strings.TrimSpace(cfg.BaseURL), "/"))
if err != nil {
return nil, fmt.Errorf("new notification user service client: parse base URL: %w", err)
}
if parsedBaseURL.Scheme == "" || parsedBaseURL.Host == "" {
return nil, errors.New("new notification user service client: base URL must be absolute")
}
return &Client{
baseURL: parsedBaseURL.String(),
requestTimeout: cfg.RequestTimeout,
httpClient: httpClient,
closeIdleConnections: closeIdleConnections,
}, nil
}
// Close releases idle HTTP connections owned by the client transport.
func (client *Client) Close() error {
if client == nil || client.closeIdleConnections == nil {
return nil
}
client.closeIdleConnections()
return nil
}
// GetUserByID resolves the current user email and preferred language for the
// supplied stable userID.
func (client *Client) GetUserByID(ctx context.Context, userID string) (acceptintent.UserRecord, error) {
if client == nil || client.httpClient == nil {
return acceptintent.UserRecord{}, errors.New("lookup user by id: nil client")
}
if ctx == nil {
return acceptintent.UserRecord{}, errors.New("lookup user by id: nil context")
}
if err := ctx.Err(); err != nil {
return acceptintent.UserRecord{}, err
}
if strings.TrimSpace(userID) == "" {
return acceptintent.UserRecord{}, errors.New("lookup user by id: user id must not be empty")
}
payload, statusCode, err := client.doRequest(ctx, http.MethodGet, fmt.Sprintf(getUserByIDPathSuffix, url.PathEscape(userID)))
if err != nil {
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: %w", userID, err)
}
switch statusCode {
case http.StatusOK:
var response getUserByIDResponse
if err := decodeJSONPayload(payload, &response); err != nil {
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: decode success response: %w", userID, err)
}
record := acceptintent.UserRecord{
Email: response.User.Email,
PreferredLanguage: response.User.PreferredLanguage,
}
if err := record.Validate(); err != nil {
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: invalid success response: %w", userID, err)
}
return record, nil
case http.StatusNotFound:
errorCode, err := decodeErrorCode(payload)
if err != nil {
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: decode error response: %w", userID, err)
}
if errorCode == subjectNotFoundErrorCode {
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: %w", userID, acceptintent.ErrRecipientNotFound)
}
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: unexpected error code %q for status %d", userID, errorCode, statusCode)
default:
return acceptintent.UserRecord{}, fmt.Errorf("lookup user by id %q: unexpected HTTP status %d", userID, statusCode)
}
}
func (client *Client) doRequest(ctx context.Context, method string, requestPath string) ([]byte, int, error) {
attemptCtx, cancel := context.WithTimeout(ctx, client.requestTimeout)
defer cancel()
request, err := http.NewRequestWithContext(attemptCtx, method, client.baseURL+requestPath, nil)
if err != nil {
return nil, 0, fmt.Errorf("build request: %w", err)
}
response, err := client.httpClient.Do(request)
if err != nil {
return nil, 0, err
}
defer response.Body.Close()
payload, err := io.ReadAll(response.Body)
if err != nil {
return nil, 0, fmt.Errorf("read response body: %w", err)
}
return payload, response.StatusCode, nil
}
func decodeErrorCode(payload []byte) (string, error) {
var envelope errorEnvelope
if err := decodeStrictJSONPayload(payload, &envelope); err != nil {
return "", err
}
if envelope.Error == nil {
return "", errors.New("missing error object")
}
if strings.TrimSpace(envelope.Error.Code) == "" {
return "", errors.New("missing error code")
}
return envelope.Error.Code, nil
}
func decodeJSONPayload(payload []byte, target any) error {
decoder := json.NewDecoder(bytes.NewReader(payload))
if err := decoder.Decode(target); err != nil {
return err
}
if err := decoder.Decode(&struct{}{}); err != io.EOF {
if err == nil {
return errors.New("unexpected trailing JSON input")
}
return err
}
return nil
}
func decodeStrictJSONPayload(payload []byte, target any) error {
decoder := json.NewDecoder(bytes.NewReader(payload))
decoder.DisallowUnknownFields()
if err := decoder.Decode(target); err != nil {
return err
}
if err := decoder.Decode(&struct{}{}); err != io.EOF {
if err == nil {
return errors.New("unexpected trailing JSON input")
}
return err
}
return nil
}
var _ acceptintent.UserDirectory = (*Client)(nil)
@@ -0,0 +1,219 @@
package userservice
import (
"context"
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"testing"
"time"
"galaxy/notification/internal/service/acceptintent"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestNewClient(t *testing.T) {
t.Parallel()
tests := []struct {
name string
cfg Config
wantErr string
}{
{
name: "valid config",
cfg: Config{
BaseURL: "http://127.0.0.1:8080",
RequestTimeout: time.Second,
},
},
{
name: "empty base url",
cfg: Config{
RequestTimeout: time.Second,
},
wantErr: "base URL must not be empty",
},
{
name: "relative base url",
cfg: Config{
BaseURL: "/relative",
RequestTimeout: time.Second,
},
wantErr: "base URL must be absolute",
},
{
name: "non positive timeout",
cfg: Config{
BaseURL: "http://127.0.0.1:8080",
},
wantErr: "request timeout must be positive",
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
client, err := NewClient(tt.cfg)
if tt.wantErr != "" {
require.Error(t, err)
assert.ErrorContains(t, err, tt.wantErr)
return
}
require.NoError(t, err)
assert.NoError(t, client.Close())
})
}
}
func TestClientGetUserByID(t *testing.T) {
t.Parallel()
t.Run("success", func(t *testing.T) {
t.Parallel()
var captured capturedRequest
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
captured = captureRequest(t, r)
writeJSON(t, w, http.StatusOK, map[string]any{
"user": map[string]any{
"user_id": "user-123",
"email": "pilot@example.com",
"preferred_language": "en-US",
"time_zone": "Europe/Kaliningrad",
},
})
}))
defer server.Close()
client := newTestClient(t, server.URL, 250*time.Millisecond)
record, err := client.GetUserByID(context.Background(), "user-123")
require.NoError(t, err)
require.Equal(t, acceptintent.UserRecord{
Email: "pilot@example.com",
PreferredLanguage: "en-US",
}, record)
require.Equal(t, capturedRequest{
Method: http.MethodGet,
Path: "/api/v1/internal/users/user-123",
}, captured)
})
t.Run("subject not found", func(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
writeJSON(t, w, http.StatusNotFound, map[string]any{
"error": map[string]any{
"code": "subject_not_found",
"message": "subject not found",
},
})
}))
defer server.Close()
client := newTestClient(t, server.URL, 250*time.Millisecond)
_, err := client.GetUserByID(context.Background(), "user-missing")
require.Error(t, err)
require.ErrorIs(t, err, acceptintent.ErrRecipientNotFound)
})
t.Run("invalid email is treated as dependency failure", func(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
writeJSON(t, w, http.StatusOK, map[string]any{
"user": map[string]any{
"email": "bad@@example.com",
"preferred_language": "en",
},
})
}))
defer server.Close()
client := newTestClient(t, server.URL, 250*time.Millisecond)
_, err := client.GetUserByID(context.Background(), "user-123")
require.Error(t, err)
require.NotErrorIs(t, err, acceptintent.ErrRecipientNotFound)
require.ErrorContains(t, err, "invalid success response")
})
t.Run("timeout", func(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
<-r.Context().Done()
}))
defer server.Close()
client := newTestClient(t, server.URL, 10*time.Millisecond)
_, err := client.GetUserByID(context.Background(), "user-123")
require.Error(t, err)
require.NotErrorIs(t, err, acceptintent.ErrRecipientNotFound)
require.ErrorContains(t, err, "context deadline exceeded")
})
}
type capturedRequest struct {
Method string
Path string
}
func newTestClient(t *testing.T, baseURL string, requestTimeout time.Duration) *Client {
t.Helper()
client, err := newClient(
Config{
BaseURL: baseURL,
RequestTimeout: requestTimeout,
},
&http.Client{Transport: http.DefaultTransport.(*http.Transport).Clone()},
func() {},
)
require.NoError(t, err)
return client
}
func captureRequest(t *testing.T, request *http.Request) capturedRequest {
t.Helper()
_, err := io.ReadAll(request.Body)
require.NoError(t, err)
require.NoError(t, request.Body.Close())
return capturedRequest{
Method: request.Method,
Path: request.URL.Path,
}
}
func writeJSON(t *testing.T, writer http.ResponseWriter, statusCode int, payload any) {
t.Helper()
body, err := json.Marshal(payload)
require.NoError(t, err)
writer.Header().Set("Content-Type", "application/json")
writer.WriteHeader(statusCode)
_, err = writer.Write(body)
require.NoError(t, err)
}
func TestClientCloseIsNilSafe(t *testing.T) {
t.Parallel()
var nilClient *Client
require.NoError(t, nilClient.Close())
}
+2
View File
@@ -0,0 +1,2 @@
// Package api reserves the transport-layer namespace of Notification Service.
package api
@@ -0,0 +1,147 @@
// Package intentstream defines the frozen Redis Stream contract used for
// Notification Service intent intake.
package intentstream
import (
"strings"
"galaxy/notification/internal/service/malformedintent"
"galaxy/notificationintent"
)
const (
fieldNotificationType = "notification_type"
fieldProducer = "producer"
fieldAudienceKind = "audience_kind"
fieldRecipientUserIDs = "recipient_user_ids_json"
fieldIdempotencyKey = "idempotency_key"
fieldOccurredAtMS = "occurred_at_ms"
fieldRequestID = "request_id"
fieldTraceID = "trace_id"
fieldPayloadJSON = "payload_json"
defaultResolvedLocale = "en"
)
// NotificationType identifies one supported normalized notification type.
type NotificationType = notificationintent.NotificationType
const (
// NotificationTypeGeoReviewRecommended identifies the
// `geo.review_recommended` notification.
NotificationTypeGeoReviewRecommended = notificationintent.NotificationTypeGeoReviewRecommended
// NotificationTypeGameTurnReady identifies the `game.turn.ready`
// notification.
NotificationTypeGameTurnReady = notificationintent.NotificationTypeGameTurnReady
// NotificationTypeGameFinished identifies the `game.finished`
// notification.
NotificationTypeGameFinished = notificationintent.NotificationTypeGameFinished
// NotificationTypeGameGenerationFailed identifies the
// `game.generation_failed` notification.
NotificationTypeGameGenerationFailed = notificationintent.NotificationTypeGameGenerationFailed
// NotificationTypeLobbyRuntimePausedAfterStart identifies the
// `lobby.runtime_paused_after_start` notification.
NotificationTypeLobbyRuntimePausedAfterStart = notificationintent.NotificationTypeLobbyRuntimePausedAfterStart
// NotificationTypeLobbyApplicationSubmitted identifies the
// `lobby.application.submitted` notification.
NotificationTypeLobbyApplicationSubmitted = notificationintent.NotificationTypeLobbyApplicationSubmitted
// NotificationTypeLobbyMembershipApproved identifies the
// `lobby.membership.approved` notification.
NotificationTypeLobbyMembershipApproved = notificationintent.NotificationTypeLobbyMembershipApproved
// NotificationTypeLobbyMembershipRejected identifies the
// `lobby.membership.rejected` notification.
NotificationTypeLobbyMembershipRejected = notificationintent.NotificationTypeLobbyMembershipRejected
// NotificationTypeLobbyInviteCreated identifies the
// `lobby.invite.created` notification.
NotificationTypeLobbyInviteCreated = notificationintent.NotificationTypeLobbyInviteCreated
// NotificationTypeLobbyInviteRedeemed identifies the
// `lobby.invite.redeemed` notification.
NotificationTypeLobbyInviteRedeemed = notificationintent.NotificationTypeLobbyInviteRedeemed
// NotificationTypeLobbyInviteExpired identifies the
// `lobby.invite.expired` notification.
NotificationTypeLobbyInviteExpired = notificationintent.NotificationTypeLobbyInviteExpired
)
// Producer identifies one supported upstream producer.
type Producer = notificationintent.Producer
const (
// ProducerGeoProfile identifies Geo Profile Service.
ProducerGeoProfile = notificationintent.ProducerGeoProfile
// ProducerGameMaster identifies Game Master.
ProducerGameMaster = notificationintent.ProducerGameMaster
// ProducerGameLobby identifies Game Lobby.
ProducerGameLobby = notificationintent.ProducerGameLobby
)
// AudienceKind identifies one supported target-audience kind.
type AudienceKind = notificationintent.AudienceKind
const (
// AudienceKindUser identifies user-targeted notifications.
AudienceKindUser = notificationintent.AudienceKindUser
// AudienceKindAdminEmail identifies administrator-email notifications.
AudienceKindAdminEmail = notificationintent.AudienceKindAdminEmail
)
// Channel identifies one durable notification-delivery channel slot.
type Channel = notificationintent.Channel
const (
// ChannelPush identifies the push-delivery channel.
ChannelPush = notificationintent.ChannelPush
// ChannelEmail identifies the email-delivery channel.
ChannelEmail = notificationintent.ChannelEmail
)
// Intent stores one normalized notification intent accepted from the Redis
// Stream ingress contract.
type Intent = notificationintent.Intent
// DecodeIntent validates one raw Redis Stream entry and returns the normalized
// notification intent frozen by the shared producer contract.
func DecodeIntent(fields map[string]any) (Intent, error) {
return notificationintent.DecodeIntent(fields)
}
// ClassifyDecodeError maps one intake decoding or validation error to the
// stable malformed-intent failure surface.
func ClassifyDecodeError(err error) malformedintent.FailureCode {
if err == nil {
return malformedintent.FailureCodeInvalidIntent
}
message := err.Error()
switch {
case strings.Contains(message, "payload_json"),
strings.Contains(message, "turn_number"),
strings.Contains(message, "final_turn_number"),
strings.Contains(message, "failure_reason"),
strings.Contains(message, "applicant_name"),
strings.Contains(message, "inviter_name"),
strings.Contains(message, "invitee_name"),
strings.Contains(message, "review_reason"):
return malformedintent.FailureCodeInvalidPayload
default:
return malformedintent.FailureCodeInvalidIntent
}
}
// DefaultResolvedLocale returns the frozen fallback locale assigned when the
// current rollout has no supported exact user locale other than English.
func DefaultResolvedLocale() string {
return defaultResolvedLocale
}
@@ -0,0 +1,145 @@
package intentstream
import (
"testing"
"time"
"github.com/stretchr/testify/require"
)
func TestDecodeIntentNormalizesUserRecipientsAndPayload(t *testing.T) {
t.Parallel()
fields := map[string]any{
fieldNotificationType: NotificationTypeGameTurnReady.String(),
fieldProducer: ProducerGameMaster.String(),
fieldAudienceKind: AudienceKindUser.String(),
fieldRecipientUserIDs: `["user-2","user-1"]`,
fieldIdempotencyKey: "game-123:turn-54",
fieldOccurredAtMS: "1775121700000",
fieldPayloadJSON: `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`,
fieldRequestID: "request-123",
fieldTraceID: "trace-123",
}
intent, err := DecodeIntent(fields)
require.NoError(t, err)
require.Equal(t, []string{"user-1", "user-2"}, intent.RecipientUserIDs)
require.Equal(t, `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, intent.PayloadJSON)
require.Equal(t, time.UnixMilli(1775121700000).UTC(), intent.OccurredAt)
}
func TestDecodeIntentCanonicalizesEquivalentPayloadJSON(t *testing.T) {
t.Parallel()
fieldsA := map[string]any{
fieldNotificationType: NotificationTypeGameFinished.String(),
fieldProducer: ProducerGameMaster.String(),
fieldAudienceKind: AudienceKindUser.String(),
fieldRecipientUserIDs: `["user-1"]`,
fieldIdempotencyKey: "game-123:finished",
fieldOccurredAtMS: "1775121700001",
fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","final_turn_number":54}`,
}
fieldsB := map[string]any{
fieldNotificationType: NotificationTypeGameFinished.String(),
fieldProducer: ProducerGameMaster.String(),
fieldAudienceKind: AudienceKindUser.String(),
fieldRecipientUserIDs: `["user-1"]`,
fieldIdempotencyKey: "game-123:finished",
fieldOccurredAtMS: "1775121709999",
fieldPayloadJSON: `{"final_turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`,
}
intentA, err := DecodeIntent(fieldsA)
require.NoError(t, err)
intentB, err := DecodeIntent(fieldsB)
require.NoError(t, err)
require.Equal(t, intentA.PayloadJSON, intentB.PayloadJSON)
}
func TestDecodeIntentRejectsUnsupportedTopLevelField(t *testing.T) {
t.Parallel()
fields := map[string]any{
fieldNotificationType: NotificationTypeGameTurnReady.String(),
fieldProducer: ProducerGameMaster.String(),
fieldAudienceKind: AudienceKindUser.String(),
fieldRecipientUserIDs: `["user-1"]`,
fieldIdempotencyKey: "game-123:turn-54",
fieldOccurredAtMS: "1775121700000",
fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
"unexpected": "boom",
}
_, err := DecodeIntent(fields)
require.Error(t, err)
require.Contains(t, err.Error(), "unsupported fields")
require.Equal(t, malformedFailureCodeInvalidIntent(), string(ClassifyDecodeError(err)))
}
func TestDecodeIntentRejectsDuplicateRecipientUserIDs(t *testing.T) {
t.Parallel()
fields := map[string]any{
fieldNotificationType: NotificationTypeGameTurnReady.String(),
fieldProducer: ProducerGameMaster.String(),
fieldAudienceKind: AudienceKindUser.String(),
fieldRecipientUserIDs: `["user-1","user-1"]`,
fieldIdempotencyKey: "game-123:turn-54",
fieldOccurredAtMS: "1775121700000",
fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
}
_, err := DecodeIntent(fields)
require.Error(t, err)
require.Contains(t, err.Error(), "duplicates user id")
require.Equal(t, malformedFailureCodeInvalidIntent(), string(ClassifyDecodeError(err)))
}
func TestDecodeIntentRejectsInvalidPayloadJSON(t *testing.T) {
t.Parallel()
fields := map[string]any{
fieldNotificationType: NotificationTypeLobbyInviteCreated.String(),
fieldProducer: ProducerGameLobby.String(),
fieldAudienceKind: AudienceKindUser.String(),
fieldRecipientUserIDs: `["user-1"]`,
fieldIdempotencyKey: "invite-created:user-1",
fieldOccurredAtMS: "1775121700000",
fieldPayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","inviter_user_id":"user-2"}`,
}
_, err := DecodeIntent(fields)
require.Error(t, err)
require.Contains(t, err.Error(), "payload_json.inviter_name is required")
require.Equal(t, malformedFailureCodeInvalidPayload(), string(ClassifyDecodeError(err)))
}
func TestDecodeIntentRejectsAdminRecipientsField(t *testing.T) {
t.Parallel()
fields := map[string]any{
fieldNotificationType: NotificationTypeGeoReviewRecommended.String(),
fieldProducer: ProducerGeoProfile.String(),
fieldAudienceKind: AudienceKindAdminEmail.String(),
fieldRecipientUserIDs: `["user-1"]`,
fieldIdempotencyKey: "geo:user-1",
fieldOccurredAtMS: "1775121700000",
fieldPayloadJSON: `{"user_id":"user-1","user_email":"pilot@example.com","observed_country":"DE","usual_connection_country":"PL","review_reason":"country_mismatch"}`,
}
_, err := DecodeIntent(fields)
require.Error(t, err)
require.Contains(t, err.Error(), "must not be present")
require.Equal(t, malformedFailureCodeInvalidIntent(), string(ClassifyDecodeError(err)))
}
func malformedFailureCodeInvalidIntent() string {
return "invalid_intent"
}
func malformedFailureCodeInvalidPayload() string {
return "invalid_payload"
}
@@ -0,0 +1,252 @@
// Package internalhttp provides the private probe HTTP listener used by the
// runnable Notification Service process.
package internalhttp
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"net"
"net/http"
"strconv"
"sync"
"time"
"galaxy/notification/internal/telemetry"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel/attribute"
)
const jsonContentType = "application/json; charset=utf-8"
const (
// HealthzPath is the private liveness probe route.
HealthzPath = "/healthz"
// ReadyzPath is the private readiness probe route.
ReadyzPath = "/readyz"
)
// Config describes the private internal HTTP listener owned by Notification
// Service.
type Config struct {
// Addr is the TCP listen address used by the private probe HTTP server.
Addr string
// ReadHeaderTimeout bounds how long the listener may spend reading request
// headers before the server rejects the connection.
ReadHeaderTimeout time.Duration
// ReadTimeout bounds how long the listener may spend reading one request.
ReadTimeout time.Duration
// IdleTimeout bounds how long the listener keeps an idle keep-alive
// connection open.
IdleTimeout time.Duration
}
// Validate reports whether cfg contains a usable private HTTP listener
// configuration.
func (cfg Config) Validate() error {
switch {
case cfg.Addr == "":
return errors.New("internal HTTP addr must not be empty")
case cfg.ReadHeaderTimeout <= 0:
return errors.New("internal HTTP read header timeout must be positive")
case cfg.ReadTimeout <= 0:
return errors.New("internal HTTP read timeout must be positive")
case cfg.IdleTimeout <= 0:
return errors.New("internal HTTP idle timeout must be positive")
default:
return nil
}
}
// Dependencies describes the collaborators used by the private probe
// transport layer.
type Dependencies struct {
// Logger writes structured listener lifecycle logs. When nil, slog.Default
// is used.
Logger *slog.Logger
// Telemetry records low-cardinality probe metrics and lifecycle events.
Telemetry *telemetry.Runtime
}
// Server owns the private probe HTTP listener exposed by Notification
// Service.
type Server struct {
cfg Config
handler http.Handler
logger *slog.Logger
metrics *telemetry.Runtime
stateMu sync.RWMutex
server *http.Server
listener net.Listener
}
// NewServer constructs one private probe HTTP server for cfg and deps.
func NewServer(cfg Config, deps Dependencies) (*Server, error) {
if err := cfg.Validate(); err != nil {
return nil, fmt.Errorf("new internal HTTP server: %w", err)
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
return &Server{
cfg: cfg,
handler: newHandler(logger, deps.Telemetry),
logger: logger.With("component", "internal_http"),
metrics: deps.Telemetry,
}, nil
}
// Run binds the configured listener and serves the private probe surface until
// Shutdown closes the server.
func (server *Server) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run internal HTTP server: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
listener, err := net.Listen("tcp", server.cfg.Addr)
if err != nil {
return fmt.Errorf("run internal HTTP server: listen on %q: %w", server.cfg.Addr, err)
}
httpServer := &http.Server{
Handler: server.handler,
ReadHeaderTimeout: server.cfg.ReadHeaderTimeout,
ReadTimeout: server.cfg.ReadTimeout,
IdleTimeout: server.cfg.IdleTimeout,
}
server.stateMu.Lock()
server.server = httpServer
server.listener = listener
server.stateMu.Unlock()
server.logger.Info("notification internal HTTP server started", "addr", listener.Addr().String())
server.metrics.RecordInternalHTTPEvent(context.Background(), "started")
defer func() {
server.stateMu.Lock()
server.server = nil
server.listener = nil
server.stateMu.Unlock()
}()
err = httpServer.Serve(listener)
switch {
case err == nil:
return nil
case errors.Is(err, http.ErrServerClosed):
server.logger.Info("notification internal HTTP server stopped")
server.metrics.RecordInternalHTTPEvent(context.Background(), "stopped")
return nil
default:
return fmt.Errorf("run internal HTTP server: serve on %q: %w", server.cfg.Addr, err)
}
}
// Shutdown gracefully stops the private probe HTTP server within ctx.
func (server *Server) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown internal HTTP server: nil context")
}
server.stateMu.RLock()
httpServer := server.server
server.stateMu.RUnlock()
if httpServer == nil {
return nil
}
if err := httpServer.Shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) {
return fmt.Errorf("shutdown internal HTTP server: %w", err)
}
return nil
}
func newHandler(logger *slog.Logger, metrics *telemetry.Runtime) http.Handler {
mux := http.NewServeMux()
mux.HandleFunc("GET "+HealthzPath, handleHealthz)
mux.HandleFunc("GET "+ReadyzPath, handleReadyz)
return otelhttp.NewHandler(withObservability(mux, metrics), "notification.internal_http")
}
func withObservability(next http.Handler, metrics *telemetry.Runtime) http.Handler {
return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
startedAt := time.Now()
recorder := &statusRecorder{
ResponseWriter: writer,
statusCode: http.StatusOK,
}
next.ServeHTTP(recorder, request)
route := request.Pattern
switch recorder.statusCode {
case http.StatusMethodNotAllowed:
route = "method_not_allowed"
case http.StatusNotFound:
route = "not_found"
case 0:
route = "unmatched"
}
if route == "" {
route = "unmatched"
}
metrics.RecordInternalHTTPRequest(
request.Context(),
[]attribute.KeyValue{
attribute.String("route", route),
attribute.String("method", request.Method),
attribute.String("status_code", strconv.Itoa(recorder.statusCode)),
},
time.Since(startedAt),
)
})
}
func handleHealthz(writer http.ResponseWriter, _ *http.Request) {
writeStatusResponse(writer, http.StatusOK, "ok")
}
func handleReadyz(writer http.ResponseWriter, _ *http.Request) {
writeStatusResponse(writer, http.StatusOK, "ready")
}
func writeStatusResponse(writer http.ResponseWriter, statusCode int, status string) {
writer.Header().Set("Content-Type", jsonContentType)
writer.WriteHeader(statusCode)
_ = json.NewEncoder(writer).Encode(statusResponse{Status: status})
}
type statusResponse struct {
Status string `json:"status"`
}
type statusRecorder struct {
http.ResponseWriter
statusCode int
}
func (recorder *statusRecorder) WriteHeader(statusCode int) {
recorder.statusCode = statusCode
recorder.ResponseWriter.WriteHeader(statusCode)
}
@@ -0,0 +1,272 @@
package internalhttp
import (
"context"
"encoding/json"
"io"
"net"
"net/http"
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestNewServerRejectsInvalidConfiguration(t *testing.T) {
t.Parallel()
cfg := Config{
ReadHeaderTimeout: time.Second,
ReadTimeout: time.Second,
IdleTimeout: time.Second,
}
_, err := NewServer(cfg, Dependencies{})
require.Error(t, err)
assert.Contains(t, err.Error(), "addr")
}
func TestServerRunAndShutdown(t *testing.T) {
t.Parallel()
cfg := testConfig(t)
server, err := NewServer(cfg, Dependencies{})
require.NoError(t, err)
runErr := make(chan error, 1)
go func() {
runErr <- server.Run(context.Background())
}()
client := newTestHTTPClient(t)
waitForHealthzReady(t, client, cfg.Addr)
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
require.NoError(t, server.Shutdown(shutdownCtx))
waitForServerRunResult(t, runErr)
}
func TestProbeRoutesReturnStableJSON(t *testing.T) {
t.Parallel()
cfg := testConfig(t)
server, err := NewServer(cfg, Dependencies{})
require.NoError(t, err)
runErr := make(chan error, 1)
go func() {
runErr <- server.Run(context.Background())
}()
client := newTestHTTPClient(t)
waitForHealthzReady(t, client, cfg.Addr)
tests := []struct {
path string
status string
}{
{path: HealthzPath, status: "ok"},
{path: ReadyzPath, status: "ready"},
}
for _, tt := range tests {
tt := tt
t.Run(tt.path, func(t *testing.T) {
request, err := http.NewRequest(http.MethodGet, "http://"+cfg.Addr+tt.path, nil)
require.NoError(t, err)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
require.Equal(t, http.StatusOK, response.StatusCode)
require.Equal(t, "application/json; charset=utf-8", response.Header.Get("Content-Type"))
var payload statusResponse
require.NoError(t, json.NewDecoder(response.Body).Decode(&payload))
require.Equal(t, tt.status, payload.Status)
})
}
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
require.NoError(t, server.Shutdown(shutdownCtx))
waitForServerRunResult(t, runErr)
}
func TestServerDoesNotExposeMetricsOrUnknownRoutes(t *testing.T) {
t.Parallel()
cfg := testConfig(t)
server, err := NewServer(cfg, Dependencies{})
require.NoError(t, err)
runErr := make(chan error, 1)
go func() {
runErr <- server.Run(context.Background())
}()
client := newTestHTTPClient(t)
waitForHealthzReady(t, client, cfg.Addr)
for _, path := range []string{"/metrics", "/unknown"} {
request, err := http.NewRequest(http.MethodGet, "http://"+cfg.Addr+path, nil)
require.NoError(t, err)
response, err := client.Do(request)
require.NoError(t, err)
_, _ = io.ReadAll(response.Body)
response.Body.Close()
assert.Equalf(t, http.StatusNotFound, response.StatusCode, "path %s", path)
}
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
require.NoError(t, server.Shutdown(shutdownCtx))
waitForServerRunResult(t, runErr)
}
func TestServerPreservesStandardHEADBehavior(t *testing.T) {
t.Parallel()
cfg := testConfig(t)
server, err := NewServer(cfg, Dependencies{})
require.NoError(t, err)
runErr := make(chan error, 1)
go func() {
runErr <- server.Run(context.Background())
}()
client := newTestHTTPClient(t)
waitForHealthzReady(t, client, cfg.Addr)
request, err := http.NewRequest(http.MethodHead, "http://"+cfg.Addr+HealthzPath, nil)
require.NoError(t, err)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
body, err := io.ReadAll(response.Body)
require.NoError(t, err)
require.Equal(t, http.StatusOK, response.StatusCode)
require.Empty(t, body)
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
require.NoError(t, server.Shutdown(shutdownCtx))
waitForServerRunResult(t, runErr)
}
func TestServerUsesStandardMethodNotAllowedBehavior(t *testing.T) {
t.Parallel()
cfg := testConfig(t)
server, err := NewServer(cfg, Dependencies{})
require.NoError(t, err)
runErr := make(chan error, 1)
go func() {
runErr <- server.Run(context.Background())
}()
client := newTestHTTPClient(t)
waitForHealthzReady(t, client, cfg.Addr)
request, err := http.NewRequest(http.MethodPost, "http://"+cfg.Addr+HealthzPath, nil)
require.NoError(t, err)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
_, _ = io.ReadAll(response.Body)
require.Equal(t, http.StatusMethodNotAllowed, response.StatusCode)
require.Contains(t, response.Header.Get("Allow"), http.MethodGet)
require.Contains(t, response.Header.Get("Allow"), http.MethodHead)
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
require.NoError(t, server.Shutdown(shutdownCtx))
waitForServerRunResult(t, runErr)
}
func testConfig(t *testing.T) Config {
t.Helper()
return Config{
Addr: mustFreeAddr(t),
ReadHeaderTimeout: time.Second,
ReadTimeout: 2 * time.Second,
IdleTimeout: time.Minute,
}
}
func newTestHTTPClient(t *testing.T) *http.Client {
t.Helper()
transport := &http.Transport{DisableKeepAlives: true}
t.Cleanup(transport.CloseIdleConnections)
return &http.Client{
Timeout: 250 * time.Millisecond,
Transport: transport,
}
}
func waitForHealthzReady(t *testing.T, client *http.Client, addr string) {
t.Helper()
require.Eventually(t, func() bool {
request, err := http.NewRequest(http.MethodGet, "http://"+addr+HealthzPath, nil)
if err != nil {
return false
}
response, err := client.Do(request)
if err != nil {
return false
}
defer response.Body.Close()
payload, err := io.ReadAll(response.Body)
if err != nil {
return false
}
return response.StatusCode == http.StatusOK && strings.Contains(string(payload), `"status":"ok"`)
}, 5*time.Second, 25*time.Millisecond, "internal HTTP server did not become reachable")
}
func waitForServerRunResult(t *testing.T, runErr <-chan error) {
t.Helper()
var err error
require.Eventually(t, func() bool {
select {
case err = <-runErr:
return true
default:
return false
}
}, 5*time.Second, 10*time.Millisecond, "internal HTTP server did not stop")
require.NoError(t, err)
}
func mustFreeAddr(t *testing.T) string {
t.Helper()
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
defer func() {
assert.NoError(t, listener.Close())
}()
return listener.Addr().String()
}
+168
View File
@@ -0,0 +1,168 @@
// Package app wires the Notification Service process lifecycle and
// coordinates component startup and graceful shutdown.
package app
import (
"context"
"errors"
"fmt"
"sync"
"galaxy/notification/internal/config"
)
// Component is a long-lived Notification Service subsystem that participates
// in coordinated startup and graceful shutdown.
type Component interface {
// Run starts the component and blocks until it stops.
Run(context.Context) error
// Shutdown stops the component within the provided timeout-bounded context.
Shutdown(context.Context) error
}
// App owns the process-level lifecycle of Notification Service and its
// registered components.
type App struct {
cfg config.Config
components []Component
}
// New constructs App with a defensive copy of the supplied components.
func New(cfg config.Config, components ...Component) *App {
clonedComponents := append([]Component(nil), components...)
return &App{
cfg: cfg,
components: clonedComponents,
}
}
// Run starts all configured components, waits for cancellation or the first
// component failure, and then executes best-effort graceful shutdown.
func (app *App) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run notification app: nil context")
}
if err := app.validate(); err != nil {
return err
}
if len(app.components) == 0 {
<-ctx.Done()
return nil
}
runCtx, cancel := context.WithCancel(ctx)
defer cancel()
results := make(chan componentResult, len(app.components))
var runWaitGroup sync.WaitGroup
for index, component := range app.components {
runWaitGroup.Add(1)
go func(componentIndex int, component Component) {
defer runWaitGroup.Done()
results <- componentResult{
index: componentIndex,
err: component.Run(runCtx),
}
}(index, component)
}
var runErr error
select {
case <-ctx.Done():
case result := <-results:
runErr = classifyComponentResult(ctx, result)
}
cancel()
shutdownErr := app.shutdownComponents()
waitErr := app.waitForComponents(&runWaitGroup)
return errors.Join(runErr, shutdownErr, waitErr)
}
type componentResult struct {
index int
err error
}
func (app *App) validate() error {
if app.cfg.ShutdownTimeout <= 0 {
return fmt.Errorf("run notification app: shutdown timeout must be positive, got %s", app.cfg.ShutdownTimeout)
}
for index, component := range app.components {
if component == nil {
return fmt.Errorf("run notification app: component %d is nil", index)
}
}
return nil
}
func classifyComponentResult(parentCtx context.Context, result componentResult) error {
switch {
case result.err == nil:
if parentCtx.Err() != nil {
return nil
}
return fmt.Errorf("run notification app: component %d exited without error before shutdown", result.index)
case errors.Is(result.err, context.Canceled) && parentCtx.Err() != nil:
return nil
default:
return fmt.Errorf("run notification app: component %d: %w", result.index, result.err)
}
}
func (app *App) shutdownComponents() error {
var shutdownWaitGroup sync.WaitGroup
errs := make(chan error, len(app.components))
for index, component := range app.components {
shutdownWaitGroup.Add(1)
go func(componentIndex int, component Component) {
defer shutdownWaitGroup.Done()
shutdownCtx, cancel := context.WithTimeout(context.Background(), app.cfg.ShutdownTimeout)
defer cancel()
if err := component.Shutdown(shutdownCtx); err != nil {
errs <- fmt.Errorf("shutdown notification component %d: %w", componentIndex, err)
}
}(index, component)
}
shutdownWaitGroup.Wait()
close(errs)
var joined error
for err := range errs {
joined = errors.Join(joined, err)
}
return joined
}
func (app *App) waitForComponents(runWaitGroup *sync.WaitGroup) error {
done := make(chan struct{})
go func() {
runWaitGroup.Wait()
close(done)
}()
waitCtx, cancel := context.WithTimeout(context.Background(), app.cfg.ShutdownTimeout)
defer cancel()
select {
case <-done:
return nil
case <-waitCtx.Done():
return fmt.Errorf("wait for notification components: %w", waitCtx.Err())
}
}
+229
View File
@@ -0,0 +1,229 @@
package app
import (
"context"
"errors"
"fmt"
"log/slog"
redisadapter "galaxy/notification/internal/adapters/redis"
"galaxy/notification/internal/adapters/redisstate"
userserviceadapter "galaxy/notification/internal/adapters/userservice"
"galaxy/notification/internal/api/internalhttp"
"galaxy/notification/internal/config"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/telemetry"
"galaxy/notification/internal/worker"
"github.com/redis/go-redis/v9"
)
// Runtime owns the runnable Notification Service process plus the cleanup
// functions that release runtime resources after shutdown.
type Runtime struct {
cfg config.Config
app *App
probeServer *internalhttp.Server
telemetry *telemetry.Runtime
intentConsumer *worker.IntentConsumer
pushPublisher *worker.PushPublisher
emailPublisher *worker.EmailPublisher
cleanupFns []func() error
}
// NewRuntime constructs the runnable Notification Service process from cfg.
func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*Runtime, error) {
if ctx == nil {
return nil, fmt.Errorf("new notification runtime: nil context")
}
if err := cfg.Validate(); err != nil {
return nil, fmt.Errorf("new notification runtime: %w", err)
}
if logger == nil {
logger = slog.Default()
}
runtime := &Runtime{
cfg: cfg,
}
cleanupOnError := func(err error) (*Runtime, error) {
if cleanupErr := runtime.Close(); cleanupErr != nil {
return nil, fmt.Errorf("%w; cleanup: %w", err, cleanupErr)
}
return nil, err
}
telemetryRuntime, err := telemetry.NewProcess(ctx, telemetry.ProcessConfig{
ServiceName: cfg.Telemetry.ServiceName,
TracesExporter: cfg.Telemetry.TracesExporter,
MetricsExporter: cfg.Telemetry.MetricsExporter,
TracesProtocol: cfg.Telemetry.TracesProtocol,
MetricsProtocol: cfg.Telemetry.MetricsProtocol,
StdoutTracesEnabled: cfg.Telemetry.StdoutTracesEnabled,
StdoutMetricsEnabled: cfg.Telemetry.StdoutMetricsEnabled,
}, logger.With("component", "telemetry"))
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: telemetry: %w", err))
}
runtime.telemetry = telemetryRuntime
runtime.cleanupFns = append(runtime.cleanupFns, func() error {
shutdownCtx, cancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout)
defer cancel()
return telemetryRuntime.Shutdown(shutdownCtx)
})
redisClient := redisadapter.NewClient(cfg.Redis)
if err := redisadapter.InstrumentClient(redisClient, telemetryRuntime); err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: %w", err))
}
runtime.cleanupFns = append(runtime.cleanupFns, func() error {
err := redisClient.Close()
if errors.Is(err, redis.ErrClosed) {
return nil
}
return err
})
if err := redisadapter.Ping(ctx, cfg.Redis, redisClient); err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: %w", err))
}
acceptanceStore, err := redisstate.NewAcceptanceStore(redisClient, redisstate.AcceptanceConfig{
RecordTTL: cfg.Retry.RecordTTL,
DeadLetterTTL: cfg.Retry.DeadLetterTTL,
IdempotencyTTL: cfg.Retry.IdempotencyTTL,
})
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: acceptance store: %w", err))
}
malformedIntentStore, err := redisstate.NewMalformedIntentStore(redisClient, cfg.Retry.DeadLetterTTL)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: malformed intent store: %w", err))
}
streamOffsetStore, err := redisstate.NewStreamOffsetStore(redisClient)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: stream offset store: %w", err))
}
intentStreamLagReader, err := redisstate.NewIntentStreamLagReader(streamOffsetStore, cfg.Streams.Intents)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: intent stream lag reader: %w", err))
}
telemetryRuntime.SetRouteScheduleSnapshotReader(acceptanceStore)
telemetryRuntime.SetIntentStreamLagSnapshotReader(intentStreamLagReader)
userDirectory, err := userserviceadapter.NewClient(userserviceadapter.Config{
BaseURL: cfg.UserService.BaseURL,
RequestTimeout: cfg.UserService.Timeout,
})
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: user service client: %w", err))
}
runtime.cleanupFns = append(runtime.cleanupFns, userDirectory.Close)
acceptIntentService, err := acceptintent.New(acceptintent.Config{
Store: acceptanceStore,
UserDirectory: userDirectory,
Clock: nil,
Logger: logger,
Telemetry: telemetryRuntime,
PushMaxAttempts: cfg.Retry.PushMaxAttempts,
EmailMaxAttempts: cfg.Retry.EmailMaxAttempts,
IdempotencyTTL: cfg.Retry.IdempotencyTTL,
AdminRouting: cfg.AdminRouting,
})
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: accept intent service: %w", err))
}
intentConsumer, err := worker.NewIntentConsumer(worker.IntentConsumerConfig{
Client: redisClient,
Stream: cfg.Streams.Intents,
BlockTimeout: cfg.IntentsReadBlockTimeout,
Acceptor: acceptIntentService,
MalformedRecorder: malformedIntentStore,
OffsetStore: streamOffsetStore,
Telemetry: telemetryRuntime,
}, logger)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: intent consumer: %w", err))
}
runtime.intentConsumer = intentConsumer
pushPublisher, err := worker.NewPushPublisher(worker.PushPublisherConfig{
Store: acceptanceStore,
GatewayStream: cfg.Streams.GatewayClientEvents,
GatewayStreamMaxLen: cfg.Streams.GatewayClientEventsStreamMaxLen,
RouteLeaseTTL: cfg.Retry.RouteLeaseTTL,
RouteBackoffMin: cfg.Retry.RouteBackoffMin,
RouteBackoffMax: cfg.Retry.RouteBackoffMax,
Encoder: nil,
Telemetry: telemetryRuntime,
Clock: nil,
}, logger)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: push publisher: %w", err))
}
runtime.pushPublisher = pushPublisher
emailPublisher, err := worker.NewEmailPublisher(worker.EmailPublisherConfig{
Store: acceptanceStore,
MailDeliveryCommandsStream: cfg.Streams.MailDeliveryCommands,
RouteLeaseTTL: cfg.Retry.RouteLeaseTTL,
RouteBackoffMin: cfg.Retry.RouteBackoffMin,
RouteBackoffMax: cfg.Retry.RouteBackoffMax,
Encoder: nil,
Telemetry: telemetryRuntime,
Clock: nil,
}, logger)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: email publisher: %w", err))
}
runtime.emailPublisher = emailPublisher
probeServer, err := internalhttp.NewServer(internalhttp.Config{
Addr: cfg.InternalHTTP.Addr,
ReadHeaderTimeout: cfg.InternalHTTP.ReadHeaderTimeout,
ReadTimeout: cfg.InternalHTTP.ReadTimeout,
IdleTimeout: cfg.InternalHTTP.IdleTimeout,
}, internalhttp.Dependencies{
Logger: logger,
Telemetry: telemetryRuntime,
})
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: internal HTTP server: %w", err))
}
runtime.probeServer = probeServer
runtime.app = New(cfg, probeServer, intentConsumer, pushPublisher, emailPublisher)
return runtime, nil
}
// Run serves the private probe HTTP listener until ctx is canceled or one
// component fails.
func (runtime *Runtime) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run notification runtime: nil context")
}
if runtime == nil {
return errors.New("run notification runtime: nil runtime")
}
if runtime.app == nil {
return errors.New("run notification runtime: nil app")
}
return runtime.app.Run(ctx)
}
// Close releases every runtime dependency in reverse construction order.
func (runtime *Runtime) Close() error {
if runtime == nil {
return nil
}
var joined error
for index := len(runtime.cleanupFns) - 1; index >= 0; index-- {
if err := runtime.cleanupFns[index](); err != nil {
joined = errors.Join(joined, err)
}
}
return joined
}
@@ -0,0 +1,72 @@
package app
import (
"context"
"net/http"
"os"
"testing"
"time"
"galaxy/notification/internal/config"
"github.com/stretchr/testify/require"
testcontainers "github.com/testcontainers/testcontainers-go"
rediscontainer "github.com/testcontainers/testcontainers-go/modules/redis"
)
const (
realRuntimeSmokeEnv = "NOTIFICATION_REAL_RUNTIME_SMOKE"
realRuntimeRedisImage = "redis:7"
)
func TestRealRuntimeCompatibility(t *testing.T) {
if os.Getenv(realRuntimeSmokeEnv) != "1" {
t.Skipf("set %s=1 to run the real runtime smoke suite", realRuntimeSmokeEnv)
}
ctx := context.Background()
redisContainer, err := rediscontainer.Run(ctx, realRuntimeRedisImage)
require.NoError(t, err)
testcontainers.CleanupContainer(t, redisContainer)
redisAddr, err := redisContainer.Endpoint(ctx, "")
require.NoError(t, err)
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisAddr
cfg.UserService.BaseURL = "http://user-service.internal"
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 2 * time.Second
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := &http.Client{
Timeout: 500 * time.Millisecond,
Transport: &http.Transport{
DisableKeepAlives: true,
},
}
t.Cleanup(client.CloseIdleConnections)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/healthz", http.StatusOK)
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/readyz", http.StatusOK)
cancel()
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
}
+581
View File
@@ -0,0 +1,581 @@
package app
import (
"context"
"encoding/json"
"io"
"log/slog"
"net"
"net/http"
"net/http/httptest"
"strconv"
"testing"
"time"
redisstate "galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/config"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestNewRuntimeStartsProbeListenerAndStopsCleanly(t *testing.T) {
t.Parallel()
redisServer := miniredis.RunT(t)
userService := newUserLookupServer(t, func(http.ResponseWriter, *http.Request) {})
defer userService.Close()
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisServer.Addr()
cfg.UserService.BaseURL = userService.URL
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 10 * time.Second
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := newTestHTTPClient(t)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/healthz", http.StatusOK)
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/readyz", http.StatusOK)
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/metrics", http.StatusNotFound)
cancel()
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
}
func TestNewRuntimeFailsFastWhenRedisPingCheckFails(t *testing.T) {
t.Parallel()
cfg := config.DefaultConfig()
cfg.Redis.Addr = mustFreeAddr(t)
cfg.UserService.BaseURL = "http://127.0.0.1:18080"
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.Nil(t, runtime)
require.Error(t, err)
assert.ErrorContains(t, err, "ping redis")
}
func TestNewRuntimeAcceptsIntentThroughConsumer(t *testing.T) {
t.Parallel()
redisServer := miniredis.RunT(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisServer.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, redisClient.Close())
})
userService := newUserLookupServer(t, func(writer http.ResponseWriter, request *http.Request) {
writeJSON(t, writer, http.StatusOK, map[string]any{
"user": map[string]any{
"email": "pilot@example.com",
"preferred_language": "en-US",
},
})
})
defer userService.Close()
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisServer.Addr()
cfg.UserService.BaseURL = userService.URL
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 10 * time.Second
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := newTestHTTPClient(t)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
Stream: cfg.Streams.Intents,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
"payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
},
}).Result()
require.NoError(t, err)
require.Eventually(t, func() bool {
payload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes()
if err != nil {
return false
}
route, err := redisstate.UnmarshalRoute(payload)
if err != nil {
return false
}
return route.ResolvedEmail == "pilot@example.com" && route.ResolvedLocale == "en"
}, time.Second, 10*time.Millisecond)
cancel()
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
}
func TestNewRuntimePublishesAcceptedPushAndEmailRoutes(t *testing.T) {
t.Parallel()
redisServer := miniredis.RunT(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisServer.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, redisClient.Close())
})
userService := newUserLookupServer(t, func(writer http.ResponseWriter, request *http.Request) {
writeJSON(t, writer, http.StatusOK, map[string]any{
"user": map[string]any{
"email": "pilot@example.com",
"preferred_language": "en-US",
},
})
})
defer userService.Close()
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisServer.Addr()
cfg.UserService.BaseURL = userService.URL
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 10 * time.Second
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := newTestHTTPClient(t)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
Stream: cfg.Streams.Intents,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
"payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
"request_id": "request-1",
"trace_id": "trace-1",
},
}).Result()
require.NoError(t, err)
require.Eventually(t, func() bool {
pushPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:user:user-1")).Bytes()
if err != nil {
return false
}
pushRoute, err := redisstate.UnmarshalRoute(pushPayload)
if err != nil {
return false
}
emailPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes()
if err != nil {
return false
}
emailRoute, err := redisstate.UnmarshalRoute(emailPayload)
if err != nil {
return false
}
return pushRoute.Status == "published" && pushRoute.AttemptCount == 1 &&
emailRoute.Status == "published" && emailRoute.AttemptCount == 1
}, 2*time.Second, 10*time.Millisecond)
pushRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:user:user-1")).Bytes()
require.NoError(t, err)
pushRoute, err := redisstate.UnmarshalRoute(pushRoutePayload)
require.NoError(t, err)
require.Equal(t, "published", string(pushRoute.Status))
notificationPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Notification(messageID)).Bytes()
require.NoError(t, err)
notificationRecord, err := redisstate.UnmarshalNotification(notificationPayload)
require.NoError(t, err)
emailRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes()
require.NoError(t, err)
emailRoute, err := redisstate.UnmarshalRoute(emailRoutePayload)
require.NoError(t, err)
require.Equal(t, "published", string(emailRoute.Status))
messages, err := redisClient.XRange(context.Background(), cfg.Streams.GatewayClientEvents, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "user-1", messages[0].Values["user_id"])
require.Equal(t, "game.turn.ready", messages[0].Values["event_type"])
require.Equal(t, messageID+"/push:user:user-1", messages[0].Values["event_id"])
require.Equal(t, "request-1", messages[0].Values["request_id"])
require.Equal(t, "trace-1", messages[0].Values["trace_id"])
require.NotContains(t, messages[0].Values, "device_session_id")
switch payload := messages[0].Values["payload_bytes"].(type) {
case string:
require.NotEmpty(t, payload)
case []byte:
require.NotEmpty(t, payload)
default:
require.Failf(t, "unexpected payload type", "payload_bytes has type %T", payload)
}
mailCommands, err := redisClient.XRange(context.Background(), cfg.Streams.MailDeliveryCommands, "-", "+").Result()
require.NoError(t, err)
require.Len(t, mailCommands, 1)
require.Equal(t, messageID+"/email:user:user-1", mailCommands[0].Values["delivery_id"])
require.Equal(t, "notification", mailCommands[0].Values["source"])
require.Equal(t, "template", mailCommands[0].Values["payload_mode"])
require.Equal(t, "notification:"+messageID+"/email:user:user-1", mailCommands[0].Values["idempotency_key"])
require.Equal(t, strconv.FormatInt(notificationRecord.AcceptedAt.UnixMilli(), 10), mailCommands[0].Values["requested_at_ms"])
require.Equal(t, "request-1", mailCommands[0].Values["request_id"])
require.Equal(t, "trace-1", mailCommands[0].Values["trace_id"])
require.JSONEq(t,
`{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}`,
mailCommands[0].Values["payload_json"].(string),
)
cancel()
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
}
func TestNewRuntimePublishesAdminEmailRouteOnlyToMailService(t *testing.T) {
t.Parallel()
redisServer := miniredis.RunT(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisServer.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, redisClient.Close())
})
userService := newUserLookupServer(t, func(http.ResponseWriter, *http.Request) {})
defer userService.Close()
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisServer.Addr()
cfg.UserService.BaseURL = userService.URL
cfg.AdminRouting.LobbyApplicationSubmitted = []string{"owner@example.com"}
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 10 * time.Second
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := newTestHTTPClient(t)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
Stream: cfg.Streams.Intents,
Values: map[string]any{
"notification_type": "lobby.application.submitted",
"producer": "game_lobby",
"audience_kind": "admin_email",
"idempotency_key": "game-123:application-submitted:user-42",
"occurred_at_ms": "1775121700000",
"payload_json": `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-123","game_name":"Nebula Clash"}`,
},
}).Result()
require.NoError(t, err)
require.Eventually(t, func() bool {
payload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:email:owner@example.com")).Bytes()
if err != nil {
return false
}
route, err := redisstate.UnmarshalRoute(payload)
if err != nil {
return false
}
return route.Status == "published" && route.AttemptCount == 1
}, 2*time.Second, 10*time.Millisecond)
pushRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:email:owner@example.com")).Bytes()
require.NoError(t, err)
pushRoute, err := redisstate.UnmarshalRoute(pushRoutePayload)
require.NoError(t, err)
require.Equal(t, "skipped", string(pushRoute.Status))
mailCommands, err := redisClient.XRange(context.Background(), cfg.Streams.MailDeliveryCommands, "-", "+").Result()
require.NoError(t, err)
require.Len(t, mailCommands, 1)
require.Equal(t, messageID+"/email:email:owner@example.com", mailCommands[0].Values["delivery_id"])
require.JSONEq(t,
`{"to":["owner@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"lobby.application.submitted","locale":"en","variables":{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-123","game_name":"Nebula Clash"},"attachments":[]}`,
mailCommands[0].Values["payload_json"].(string),
)
gatewayMessages, err := redisClient.XRange(context.Background(), cfg.Streams.GatewayClientEvents, "-", "+").Result()
require.NoError(t, err)
require.Empty(t, gatewayMessages)
cancel()
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
}
func TestNewRuntimeUsesConfiguredUserServiceTimeout(t *testing.T) {
t.Parallel()
redisServer := miniredis.RunT(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisServer.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, redisClient.Close())
})
userService := newUserLookupServer(t, func(_ http.ResponseWriter, request *http.Request) {
<-request.Context().Done()
})
defer userService.Close()
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisServer.Addr()
cfg.UserService.BaseURL = userService.URL
cfg.UserService.Timeout = 20 * time.Millisecond
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 10 * time.Second
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := newTestHTTPClient(t)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
Stream: cfg.Streams.Intents,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
"payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
},
}).Result()
require.NoError(t, err)
var runErr error
require.Eventually(t, func() bool {
select {
case runErr = <-runErrCh:
return true
default:
return false
}
}, time.Second, 10*time.Millisecond)
require.Error(t, runErr)
require.ErrorContains(t, runErr, "context deadline exceeded")
offsetStore, err := redisstate.NewStreamOffsetStore(redisClient)
require.NoError(t, err)
offset, found, err := offsetStore.Load(context.Background(), cfg.Streams.Intents)
require.NoError(t, err)
require.False(t, found)
require.Empty(t, offset)
_, err = redisClient.Get(context.Background(), redisstate.Keyspace{}.Notification(messageID)).Bytes()
require.Error(t, err)
}
func testLogger() *slog.Logger {
return slog.New(slog.NewTextHandler(io.Discard, nil))
}
func newTestHTTPClient(t *testing.T) *http.Client {
t.Helper()
transport := &http.Transport{DisableKeepAlives: true}
t.Cleanup(transport.CloseIdleConnections)
return &http.Client{
Timeout: 500 * time.Millisecond,
Transport: transport,
}
}
func waitForRuntimeReady(t *testing.T, client *http.Client, addr string) {
t.Helper()
require.Eventually(t, func() bool {
request, err := http.NewRequest(http.MethodGet, "http://"+addr+"/readyz", nil)
if err != nil {
return false
}
response, err := client.Do(request)
if err != nil {
return false
}
defer response.Body.Close()
_, _ = io.Copy(io.Discard, response.Body)
return response.StatusCode == http.StatusOK
}, 5*time.Second, 25*time.Millisecond, "notification runtime did not become reachable")
}
func waitForRunResult(t *testing.T, runErrCh <-chan error, waitTimeout time.Duration) {
t.Helper()
var err error
require.Eventually(t, func() bool {
select {
case err = <-runErrCh:
return true
default:
return false
}
}, waitTimeout, 10*time.Millisecond, "notification runtime did not stop")
require.NoError(t, err)
}
func assertHTTPStatus(t *testing.T, client *http.Client, target string, want int) {
t.Helper()
request, err := http.NewRequest(http.MethodGet, target, nil)
require.NoError(t, err)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
_, _ = io.Copy(io.Discard, response.Body)
require.Equal(t, want, response.StatusCode)
}
func mustFreeAddr(t *testing.T) string {
t.Helper()
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
defer func() {
assert.NoError(t, listener.Close())
}()
return listener.Addr().String()
}
func newUserLookupServer(t *testing.T, handler func(http.ResponseWriter, *http.Request)) *httptest.Server {
t.Helper()
return httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
if request.Method != http.MethodGet {
http.NotFound(writer, request)
return
}
if request.URL.Path != "/api/v1/internal/users/user-1" {
writeJSON(t, writer, http.StatusNotFound, map[string]any{
"error": map[string]any{
"code": "subject_not_found",
"message": "subject not found",
},
})
return
}
handler(writer, request)
}))
}
func writeJSON(t *testing.T, writer http.ResponseWriter, statusCode int, payload any) {
t.Helper()
body, err := json.Marshal(payload)
require.NoError(t, err)
writer.Header().Set("Content-Type", "application/json")
writer.WriteHeader(statusCode)
_, err = writer.Write(body)
require.NoError(t, err)
}
+839
View File
@@ -0,0 +1,839 @@
// Package config loads the Notification Service process configuration from
// environment variables.
package config
import (
"crypto/tls"
"fmt"
"log/slog"
"net"
netmail "net/mail"
"net/url"
"os"
"strconv"
"strings"
"time"
"galaxy/notification/internal/telemetry"
)
const (
shutdownTimeoutEnvVar = "NOTIFICATION_SHUTDOWN_TIMEOUT"
logLevelEnvVar = "NOTIFICATION_LOG_LEVEL"
internalHTTPAddrEnvVar = "NOTIFICATION_INTERNAL_HTTP_ADDR"
internalHTTPReadHeaderTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_READ_HEADER_TIMEOUT"
internalHTTPReadTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_READ_TIMEOUT"
internalHTTPIdleTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_IDLE_TIMEOUT"
redisAddrEnvVar = "NOTIFICATION_REDIS_ADDR"
redisUsernameEnvVar = "NOTIFICATION_REDIS_USERNAME"
redisPasswordEnvVar = "NOTIFICATION_REDIS_PASSWORD"
redisDBEnvVar = "NOTIFICATION_REDIS_DB"
redisTLSEnabledEnvVar = "NOTIFICATION_REDIS_TLS_ENABLED"
redisOperationTimeoutEnvVar = "NOTIFICATION_REDIS_OPERATION_TIMEOUT"
intentsStreamEnvVar = "NOTIFICATION_INTENTS_STREAM"
intentsReadBlockTimeoutEnvVar = "NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT"
gatewayClientEventsStreamEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM"
gatewayClientEventsStreamMaxEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM_MAX_LEN"
mailDeliveryCommandsStreamEnvVar = "NOTIFICATION_MAIL_DELIVERY_COMMANDS_STREAM"
pushRetryMaxAttemptsEnvVar = "NOTIFICATION_PUSH_RETRY_MAX_ATTEMPTS"
emailRetryMaxAttemptsEnvVar = "NOTIFICATION_EMAIL_RETRY_MAX_ATTEMPTS"
routeLeaseTTLEnvVar = "NOTIFICATION_ROUTE_LEASE_TTL"
routeBackoffMinEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MIN"
routeBackoffMaxEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MAX"
deadLetterTTLEnvVar = "NOTIFICATION_DEAD_LETTER_TTL"
recordTTLEnvVar = "NOTIFICATION_RECORD_TTL"
idempotencyTTLEnvVar = "NOTIFICATION_IDEMPOTENCY_TTL"
userServiceBaseURLEnvVar = "NOTIFICATION_USER_SERVICE_BASE_URL"
userServiceTimeoutEnvVar = "NOTIFICATION_USER_SERVICE_TIMEOUT"
adminEmailsGeoReviewRecommendedEnvVar = "NOTIFICATION_ADMIN_EMAILS_GEO_REVIEW_RECOMMENDED"
adminEmailsGameGenerationFailedEnvVar = "NOTIFICATION_ADMIN_EMAILS_GAME_GENERATION_FAILED"
adminEmailsLobbyRuntimePausedAfterEnvVar = "NOTIFICATION_ADMIN_EMAILS_LOBBY_RUNTIME_PAUSED_AFTER_START"
adminEmailsLobbyApplicationSubmittedEnvVar = "NOTIFICATION_ADMIN_EMAILS_LOBBY_APPLICATION_SUBMITTED"
otelServiceNameEnvVar = "OTEL_SERVICE_NAME"
otelTracesExporterEnvVar = "OTEL_TRACES_EXPORTER"
otelMetricsExporterEnvVar = "OTEL_METRICS_EXPORTER"
otelExporterOTLPProtocolEnvVar = "OTEL_EXPORTER_OTLP_PROTOCOL"
otelExporterOTLPTracesProtocolEnvVar = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"
otelExporterOTLPMetricsProtocolEnvVar = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL"
otelStdoutTracesEnabledEnvVar = "NOTIFICATION_OTEL_STDOUT_TRACES_ENABLED"
otelStdoutMetricsEnabledEnvVar = "NOTIFICATION_OTEL_STDOUT_METRICS_ENABLED"
defaultShutdownTimeout = 5 * time.Second
defaultLogLevel = "info"
defaultInternalHTTPAddr = ":8092"
defaultReadHeaderTimeout = 2 * time.Second
defaultReadTimeout = 10 * time.Second
defaultIdleTimeout = time.Minute
defaultRedisDB = 0
defaultRedisOperationTimeout = 250 * time.Millisecond
defaultIntentsStream = "notification:intents"
defaultIntentsReadBlockTimeout = 2 * time.Second
defaultGatewayClientEventsStream = "gateway:client-events"
defaultGatewayClientEventsStreamMaxLen int64 = 1024
defaultMailDeliveryCommandsStream = "mail:delivery_commands"
defaultPushRetryMaxAttempts = 3
defaultEmailRetryMaxAttempts = 7
defaultRouteLeaseTTL = 5 * time.Second
defaultRouteBackoffMin = time.Second
defaultRouteBackoffMax = 5 * time.Minute
defaultDeadLetterTTL = 720 * time.Hour
defaultRecordTTL = 720 * time.Hour
defaultIdempotencyTTL = 168 * time.Hour
defaultUserServiceTimeout = time.Second
defaultOTelServiceName = "galaxy-notification"
otelExporterNone = "none"
otelExporterOTLP = "otlp"
otelProtocolHTTPProtobuf = "http/protobuf"
otelProtocolGRPC = "grpc"
)
// Config stores the full Notification Service process configuration.
type Config struct {
// ShutdownTimeout bounds graceful shutdown of every long-lived component.
ShutdownTimeout time.Duration
// Logging configures the process-wide structured logger.
Logging LoggingConfig
// InternalHTTP configures the private probe HTTP listener.
InternalHTTP InternalHTTPConfig
// Redis configures the shared Redis client used by the process.
Redis RedisConfig
// Streams stores the stable stream names reserved for notification ingress
// and downstream publication.
Streams StreamsConfig
// IntentsReadBlockTimeout stores the maximum Redis Streams blocking read
// window used by the intent consumer.
IntentsReadBlockTimeout time.Duration
// Retry stores the frozen retry and retention settings.
Retry RetryConfig
// UserService configures the trusted user-enrichment dependency.
UserService UserServiceConfig
// AdminRouting stores the type-specific configured administrator email
// lists.
AdminRouting AdminRoutingConfig
// Telemetry configures the process-wide OpenTelemetry runtime.
Telemetry TelemetryConfig
}
// LoggingConfig configures the process-wide structured logger.
type LoggingConfig struct {
// Level stores the process log level accepted by log/slog.
Level string
}
// InternalHTTPConfig configures the private probe HTTP listener.
type InternalHTTPConfig struct {
// Addr stores the TCP listen address.
Addr string
// ReadHeaderTimeout bounds request-header reading.
ReadHeaderTimeout time.Duration
// ReadTimeout bounds reading one request.
ReadTimeout time.Duration
// IdleTimeout bounds how long keep-alive connections stay open.
IdleTimeout time.Duration
}
// Validate reports whether cfg stores a usable internal HTTP listener
// configuration.
func (cfg InternalHTTPConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.Addr) == "":
return fmt.Errorf("internal HTTP addr must not be empty")
case !isTCPAddr(cfg.Addr):
return fmt.Errorf("internal HTTP addr %q must use host:port form", cfg.Addr)
case cfg.ReadHeaderTimeout <= 0:
return fmt.Errorf("internal HTTP read header timeout must be positive")
case cfg.ReadTimeout <= 0:
return fmt.Errorf("internal HTTP read timeout must be positive")
case cfg.IdleTimeout <= 0:
return fmt.Errorf("internal HTTP idle timeout must be positive")
default:
return nil
}
}
// RedisConfig configures the shared Redis client and its connection settings.
type RedisConfig struct {
// Addr stores the Redis network address.
Addr string
// Username stores the optional Redis ACL username.
Username string
// Password stores the optional Redis ACL password.
Password string
// DB stores the Redis logical database index.
DB int
// TLSEnabled reports whether TLS must be used for Redis connections.
TLSEnabled bool
// OperationTimeout bounds one Redis round trip including the startup PING.
OperationTimeout time.Duration
}
// TLSConfig returns the conservative TLS configuration used by the Redis
// client when TLSEnabled is true.
func (cfg RedisConfig) TLSConfig() *tls.Config {
if !cfg.TLSEnabled {
return nil
}
return &tls.Config{MinVersion: tls.VersionTLS12}
}
// Validate reports whether cfg stores a usable Redis configuration.
func (cfg RedisConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.Addr) == "":
return fmt.Errorf("redis addr must not be empty")
case !isTCPAddr(cfg.Addr):
return fmt.Errorf("redis addr %q must use host:port form", cfg.Addr)
case cfg.DB < 0:
return fmt.Errorf("redis db must not be negative")
case cfg.OperationTimeout <= 0:
return fmt.Errorf("redis operation timeout must be positive")
default:
return nil
}
}
// StreamsConfig stores the stable Redis Stream names used by Notification
// Service.
type StreamsConfig struct {
// Intents stores the ingress intent stream.
Intents string
// GatewayClientEvents stores the downstream Gateway client-events stream.
GatewayClientEvents string
// GatewayClientEventsStreamMaxLen bounds the downstream Gateway
// client-events stream with approximate trimming.
GatewayClientEventsStreamMaxLen int64
// MailDeliveryCommands stores the downstream Mail Service command stream.
MailDeliveryCommands string
}
// Validate reports whether cfg stores usable stream names.
func (cfg StreamsConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.Intents) == "":
return fmt.Errorf("intents stream must not be empty")
case strings.TrimSpace(cfg.GatewayClientEvents) == "":
return fmt.Errorf("gateway client-events stream must not be empty")
case cfg.GatewayClientEventsStreamMaxLen <= 0:
return fmt.Errorf("gateway client-events stream max len must be positive")
case strings.TrimSpace(cfg.MailDeliveryCommands) == "":
return fmt.Errorf("mail delivery-commands stream must not be empty")
default:
return nil
}
}
// RetryConfig stores the frozen retry budgets, backoff settings, and retention
// periods used by the service.
type RetryConfig struct {
// PushMaxAttempts stores the route retry budget for the `push` channel.
PushMaxAttempts int
// EmailMaxAttempts stores the route retry budget for the `email` channel.
EmailMaxAttempts int
// RouteLeaseTTL stores the temporary route-lease lifetime used to avoid
// duplicate publication across replicas.
RouteLeaseTTL time.Duration
// RouteBackoffMin stores the minimum retry backoff.
RouteBackoffMin time.Duration
// RouteBackoffMax stores the maximum retry backoff.
RouteBackoffMax time.Duration
// DeadLetterTTL stores the retention period for dead-letter and malformed
// intent records.
DeadLetterTTL time.Duration
// RecordTTL stores the retention period for notification and route records.
RecordTTL time.Duration
// IdempotencyTTL stores the retention period for idempotency records.
IdempotencyTTL time.Duration
}
// Validate reports whether cfg stores usable retry and retention settings.
func (cfg RetryConfig) Validate() error {
switch {
case cfg.PushMaxAttempts <= 0:
return fmt.Errorf("push retry max attempts must be positive")
case cfg.EmailMaxAttempts <= 0:
return fmt.Errorf("email retry max attempts must be positive")
case cfg.RouteLeaseTTL <= 0:
return fmt.Errorf("route lease ttl must be positive")
case cfg.RouteBackoffMin <= 0:
return fmt.Errorf("route backoff min must be positive")
case cfg.RouteBackoffMax <= 0:
return fmt.Errorf("route backoff max must be positive")
case cfg.RouteBackoffMin > cfg.RouteBackoffMax:
return fmt.Errorf("route backoff min must not exceed route backoff max")
case cfg.DeadLetterTTL <= 0:
return fmt.Errorf("dead-letter ttl must be positive")
case cfg.RecordTTL <= 0:
return fmt.Errorf("record ttl must be positive")
case cfg.IdempotencyTTL <= 0:
return fmt.Errorf("idempotency ttl must be positive")
default:
return nil
}
}
// UserServiceConfig configures the trusted user-enrichment dependency.
type UserServiceConfig struct {
// BaseURL stores the absolute base URL of the trusted User Service.
BaseURL string
// Timeout bounds one outbound User Service request.
Timeout time.Duration
}
// Validate reports whether cfg stores a usable User Service configuration.
func (cfg UserServiceConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.BaseURL) == "":
return fmt.Errorf("user service base URL must not be empty")
case !isAbsoluteHTTPURL(cfg.BaseURL):
return fmt.Errorf("user service base URL %q must be an absolute http(s) URL", cfg.BaseURL)
case cfg.Timeout <= 0:
return fmt.Errorf("user service timeout must be positive")
default:
return nil
}
}
// AdminRoutingConfig stores the type-specific configured administrator email
// lists.
type AdminRoutingConfig struct {
// GeoReviewRecommended stores recipients for
// `geo.review_recommended`.
GeoReviewRecommended []string
// GameGenerationFailed stores recipients for
// `game.generation_failed`.
GameGenerationFailed []string
// LobbyRuntimePausedAfterStart stores recipients for
// `lobby.runtime_paused_after_start`.
LobbyRuntimePausedAfterStart []string
// LobbyApplicationSubmitted stores recipients for public
// `lobby.application.submitted` notifications.
LobbyApplicationSubmitted []string
}
// Validate reports whether cfg stores valid normalized administrator email
// lists.
func (cfg AdminRoutingConfig) Validate() error {
if err := validateNormalizedEmailList("geo.review_recommended", cfg.GeoReviewRecommended); err != nil {
return err
}
if err := validateNormalizedEmailList("game.generation_failed", cfg.GameGenerationFailed); err != nil {
return err
}
if err := validateNormalizedEmailList("lobby.runtime_paused_after_start", cfg.LobbyRuntimePausedAfterStart); err != nil {
return err
}
if err := validateNormalizedEmailList("lobby.application.submitted", cfg.LobbyApplicationSubmitted); err != nil {
return err
}
return nil
}
// TelemetryConfig configures the Notification Service OpenTelemetry runtime.
type TelemetryConfig struct {
// ServiceName overrides the default OpenTelemetry service name.
ServiceName string
// TracesExporter selects the external traces exporter. Supported values are
// `none` and `otlp`.
TracesExporter string
// MetricsExporter selects the external metrics exporter. Supported values
// are `none` and `otlp`.
MetricsExporter string
// TracesProtocol selects the OTLP traces protocol when TracesExporter is
// `otlp`.
TracesProtocol string
// MetricsProtocol selects the OTLP metrics protocol when MetricsExporter is
// `otlp`.
MetricsProtocol string
// StdoutTracesEnabled enables the additional stdout trace exporter used for
// local development and debugging.
StdoutTracesEnabled bool
// StdoutMetricsEnabled enables the additional stdout metric exporter used
// for local development and debugging.
StdoutMetricsEnabled bool
}
// Validate reports whether cfg contains a supported OpenTelemetry
// configuration.
func (cfg TelemetryConfig) Validate() error {
return telemetry.ProcessConfig{
ServiceName: cfg.ServiceName,
TracesExporter: cfg.TracesExporter,
MetricsExporter: cfg.MetricsExporter,
TracesProtocol: cfg.TracesProtocol,
MetricsProtocol: cfg.MetricsProtocol,
StdoutTracesEnabled: cfg.StdoutTracesEnabled,
StdoutMetricsEnabled: cfg.StdoutMetricsEnabled,
}.Validate()
}
// DefaultConfig returns the default Notification Service process
// configuration.
func DefaultConfig() Config {
return Config{
ShutdownTimeout: defaultShutdownTimeout,
Logging: LoggingConfig{
Level: defaultLogLevel,
},
InternalHTTP: InternalHTTPConfig{
Addr: defaultInternalHTTPAddr,
ReadHeaderTimeout: defaultReadHeaderTimeout,
ReadTimeout: defaultReadTimeout,
IdleTimeout: defaultIdleTimeout,
},
Redis: RedisConfig{
DB: defaultRedisDB,
OperationTimeout: defaultRedisOperationTimeout,
},
Streams: StreamsConfig{
Intents: defaultIntentsStream,
GatewayClientEvents: defaultGatewayClientEventsStream,
GatewayClientEventsStreamMaxLen: defaultGatewayClientEventsStreamMaxLen,
MailDeliveryCommands: defaultMailDeliveryCommandsStream,
},
IntentsReadBlockTimeout: defaultIntentsReadBlockTimeout,
Retry: RetryConfig{
PushMaxAttempts: defaultPushRetryMaxAttempts,
EmailMaxAttempts: defaultEmailRetryMaxAttempts,
RouteLeaseTTL: defaultRouteLeaseTTL,
RouteBackoffMin: defaultRouteBackoffMin,
RouteBackoffMax: defaultRouteBackoffMax,
DeadLetterTTL: defaultDeadLetterTTL,
RecordTTL: defaultRecordTTL,
IdempotencyTTL: defaultIdempotencyTTL,
},
UserService: UserServiceConfig{
Timeout: defaultUserServiceTimeout,
},
Telemetry: TelemetryConfig{
ServiceName: defaultOTelServiceName,
TracesExporter: otelExporterNone,
MetricsExporter: otelExporterNone,
},
}
}
// LoadFromEnv loads the Notification Service process configuration from
// environment variables, applying documented defaults where appropriate.
func LoadFromEnv() (Config, error) {
cfg := DefaultConfig()
var err error
cfg.ShutdownTimeout, err = loadDurationEnvWithDefault(shutdownTimeoutEnvVar, cfg.ShutdownTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Logging.Level = loadStringEnvWithDefault(logLevelEnvVar, cfg.Logging.Level)
if err := validateLogLevel(cfg.Logging.Level); err != nil {
return Config{}, fmt.Errorf("load notification config: %s: %w", logLevelEnvVar, err)
}
cfg.InternalHTTP.Addr = loadStringEnvWithDefault(internalHTTPAddrEnvVar, cfg.InternalHTTP.Addr)
cfg.InternalHTTP.ReadHeaderTimeout, err = loadDurationEnvWithDefault(internalHTTPReadHeaderTimeoutEnvVar, cfg.InternalHTTP.ReadHeaderTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.InternalHTTP.ReadTimeout, err = loadDurationEnvWithDefault(internalHTTPReadTimeoutEnvVar, cfg.InternalHTTP.ReadTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.InternalHTTP.IdleTimeout, err = loadDurationEnvWithDefault(internalHTTPIdleTimeoutEnvVar, cfg.InternalHTTP.IdleTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Redis.Addr = loadStringEnvWithDefault(redisAddrEnvVar, cfg.Redis.Addr)
cfg.Redis.Username = os.Getenv(redisUsernameEnvVar)
cfg.Redis.Password = os.Getenv(redisPasswordEnvVar)
cfg.Redis.DB, err = loadIntEnvWithDefault(redisDBEnvVar, cfg.Redis.DB)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Redis.TLSEnabled, err = loadBoolEnvWithDefault(redisTLSEnabledEnvVar, cfg.Redis.TLSEnabled)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Redis.OperationTimeout, err = loadDurationEnvWithDefault(redisOperationTimeoutEnvVar, cfg.Redis.OperationTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Streams.Intents = loadStringEnvWithDefault(intentsStreamEnvVar, cfg.Streams.Intents)
cfg.Streams.GatewayClientEvents = loadStringEnvWithDefault(gatewayClientEventsStreamEnvVar, cfg.Streams.GatewayClientEvents)
cfg.Streams.GatewayClientEventsStreamMaxLen, err = loadInt64EnvWithDefault(gatewayClientEventsStreamMaxEnvVar, cfg.Streams.GatewayClientEventsStreamMaxLen)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Streams.MailDeliveryCommands = loadStringEnvWithDefault(mailDeliveryCommandsStreamEnvVar, cfg.Streams.MailDeliveryCommands)
cfg.IntentsReadBlockTimeout, err = loadDurationEnvWithDefault(intentsReadBlockTimeoutEnvVar, cfg.IntentsReadBlockTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.PushMaxAttempts, err = loadIntEnvWithDefault(pushRetryMaxAttemptsEnvVar, cfg.Retry.PushMaxAttempts)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.EmailMaxAttempts, err = loadIntEnvWithDefault(emailRetryMaxAttemptsEnvVar, cfg.Retry.EmailMaxAttempts)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.RouteLeaseTTL, err = loadDurationEnvWithDefault(routeLeaseTTLEnvVar, cfg.Retry.RouteLeaseTTL)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.RouteBackoffMin, err = loadDurationEnvWithDefault(routeBackoffMinEnvVar, cfg.Retry.RouteBackoffMin)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.RouteBackoffMax, err = loadDurationEnvWithDefault(routeBackoffMaxEnvVar, cfg.Retry.RouteBackoffMax)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.DeadLetterTTL, err = loadDurationEnvWithDefault(deadLetterTTLEnvVar, cfg.Retry.DeadLetterTTL)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.RecordTTL, err = loadDurationEnvWithDefault(recordTTLEnvVar, cfg.Retry.RecordTTL)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.IdempotencyTTL, err = loadDurationEnvWithDefault(idempotencyTTLEnvVar, cfg.Retry.IdempotencyTTL)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.UserService.BaseURL = normalizeBaseURL(loadStringEnvWithDefault(userServiceBaseURLEnvVar, cfg.UserService.BaseURL))
cfg.UserService.Timeout, err = loadDurationEnvWithDefault(userServiceTimeoutEnvVar, cfg.UserService.Timeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.AdminRouting.GeoReviewRecommended, err = loadEmailListEnv(adminEmailsGeoReviewRecommendedEnvVar, cfg.AdminRouting.GeoReviewRecommended)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.AdminRouting.GameGenerationFailed, err = loadEmailListEnv(adminEmailsGameGenerationFailedEnvVar, cfg.AdminRouting.GameGenerationFailed)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.AdminRouting.LobbyRuntimePausedAfterStart, err = loadEmailListEnv(adminEmailsLobbyRuntimePausedAfterEnvVar, cfg.AdminRouting.LobbyRuntimePausedAfterStart)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.AdminRouting.LobbyApplicationSubmitted, err = loadEmailListEnv(adminEmailsLobbyApplicationSubmittedEnvVar, cfg.AdminRouting.LobbyApplicationSubmitted)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Telemetry.ServiceName = loadStringEnvWithDefault(otelServiceNameEnvVar, cfg.Telemetry.ServiceName)
cfg.Telemetry.TracesExporter = normalizeExporterValue(loadStringEnvWithDefault(otelTracesExporterEnvVar, cfg.Telemetry.TracesExporter))
cfg.Telemetry.MetricsExporter = normalizeExporterValue(loadStringEnvWithDefault(otelMetricsExporterEnvVar, cfg.Telemetry.MetricsExporter))
cfg.Telemetry.TracesProtocol = loadOTLPProtocol(
os.Getenv(otelExporterOTLPTracesProtocolEnvVar),
os.Getenv(otelExporterOTLPProtocolEnvVar),
cfg.Telemetry.TracesExporter,
)
cfg.Telemetry.MetricsProtocol = loadOTLPProtocol(
os.Getenv(otelExporterOTLPMetricsProtocolEnvVar),
os.Getenv(otelExporterOTLPProtocolEnvVar),
cfg.Telemetry.MetricsExporter,
)
cfg.Telemetry.StdoutTracesEnabled, err = loadBoolEnvWithDefault(otelStdoutTracesEnabledEnvVar, cfg.Telemetry.StdoutTracesEnabled)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Telemetry.StdoutMetricsEnabled, err = loadBoolEnvWithDefault(otelStdoutMetricsEnabledEnvVar, cfg.Telemetry.StdoutMetricsEnabled)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
if err := cfg.Validate(); err != nil {
return Config{}, err
}
return cfg, nil
}
// Validate reports whether cfg contains a consistent Notification Service
// process configuration.
func (cfg Config) Validate() error {
switch {
case cfg.ShutdownTimeout <= 0:
return fmt.Errorf("load notification config: %s must be positive", shutdownTimeoutEnvVar)
case strings.TrimSpace(cfg.Redis.Addr) == "":
return fmt.Errorf("load notification config: %s must not be empty", redisAddrEnvVar)
case strings.TrimSpace(cfg.UserService.BaseURL) == "":
return fmt.Errorf("load notification config: %s must not be empty", userServiceBaseURLEnvVar)
}
if err := cfg.InternalHTTP.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.Redis.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.Streams.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if cfg.IntentsReadBlockTimeout <= 0 {
return fmt.Errorf("load notification config: %s must be positive", intentsReadBlockTimeoutEnvVar)
}
if err := cfg.Retry.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.UserService.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.AdminRouting.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.Telemetry.Validate(); err != nil {
return fmt.Errorf("load notification config: %w", err)
}
return nil
}
func loadStringEnvWithDefault(name string, value string) string {
if raw, ok := os.LookupEnv(name); ok {
return strings.TrimSpace(raw)
}
return value
}
func loadDurationEnvWithDefault(name string, value time.Duration) (time.Duration, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return value, nil
}
parsed, err := time.ParseDuration(strings.TrimSpace(raw))
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func loadIntEnvWithDefault(name string, value int) (int, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return value, nil
}
parsed, err := strconv.Atoi(strings.TrimSpace(raw))
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func loadInt64EnvWithDefault(name string, value int64) (int64, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return value, nil
}
parsed, err := strconv.ParseInt(strings.TrimSpace(raw), 10, 64)
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func loadBoolEnvWithDefault(name string, value bool) (bool, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return value, nil
}
parsed, err := strconv.ParseBool(strings.TrimSpace(raw))
if err != nil {
return false, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func loadEmailListEnv(name string, value []string) ([]string, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return append([]string(nil), value...), nil
}
return parseEmailList(name, raw)
}
func parseEmailList(name string, raw string) ([]string, error) {
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return nil, nil
}
parts := strings.Split(trimmed, ",")
addresses := make([]string, 0, len(parts))
seen := make(map[string]struct{}, len(parts))
for index, part := range parts {
normalized, err := normalizeMailboxAddress(part)
if err != nil {
return nil, fmt.Errorf("%s[%d]: %w", name, index, err)
}
if _, ok := seen[normalized]; ok {
continue
}
seen[normalized] = struct{}{}
addresses = append(addresses, normalized)
}
return addresses, nil
}
func normalizeMailboxAddress(value string) (string, error) {
trimmed := strings.TrimSpace(value)
if trimmed == "" {
return "", fmt.Errorf("email address must not be empty")
}
parsed, err := netmail.ParseAddress(trimmed)
if err != nil {
return "", fmt.Errorf("invalid email address %q: %w", trimmed, err)
}
if parsed.Name != "" {
return "", fmt.Errorf("email address %q must not include a display name", trimmed)
}
return strings.ToLower(parsed.Address), nil
}
func validateNormalizedEmailList(name string, values []string) error {
for index, value := range values {
normalized, err := normalizeMailboxAddress(value)
if err != nil {
return fmt.Errorf("%s[%d]: %w", name, index, err)
}
if normalized != value {
return fmt.Errorf("%s[%d]: email address must already be normalized", name, index)
}
}
return nil
}
func validateLogLevel(value string) error {
var level slog.Level
return level.UnmarshalText([]byte(strings.TrimSpace(value)))
}
func normalizeExporterValue(value string) string {
switch strings.TrimSpace(value) {
case "", otelExporterNone:
return otelExporterNone
default:
return strings.TrimSpace(value)
}
}
func loadOTLPProtocol(primary string, fallback string, exporter string) string {
protocol := strings.TrimSpace(primary)
if protocol == "" {
protocol = strings.TrimSpace(fallback)
}
if protocol == "" && exporter == otelExporterOTLP {
return otelProtocolHTTPProtobuf
}
return protocol
}
func normalizeBaseURL(value string) string {
trimmed := strings.TrimSpace(value)
if trimmed == "" {
return ""
}
return strings.TrimRight(trimmed, "/")
}
func isAbsoluteHTTPURL(value string) bool {
parsed, err := url.Parse(strings.TrimSpace(value))
if err != nil {
return false
}
if parsed.Scheme != "http" && parsed.Scheme != "https" {
return false
}
return parsed.Host != ""
}
func isTCPAddr(value string) bool {
host, port, err := net.SplitHostPort(strings.TrimSpace(value))
if err != nil {
return false
}
if port == "" {
return false
}
if host == "" {
return true
}
return true
}
+252
View File
@@ -0,0 +1,252 @@
package config
import (
"testing"
"time"
"github.com/stretchr/testify/require"
)
func TestLoadFromEnvUsesDefaults(t *testing.T) {
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
cfg, err := LoadFromEnv()
require.NoError(t, err)
defaults := DefaultConfig()
require.Equal(t, defaults.ShutdownTimeout, cfg.ShutdownTimeout)
require.Equal(t, defaults.Logging, cfg.Logging)
require.Equal(t, defaults.InternalHTTP, cfg.InternalHTTP)
require.Equal(t, "127.0.0.1:6379", cfg.Redis.Addr)
require.Equal(t, defaults.Redis.DB, cfg.Redis.DB)
require.Equal(t, defaults.Redis.OperationTimeout, cfg.Redis.OperationTimeout)
require.Equal(t, defaults.Streams, cfg.Streams)
require.Equal(t, defaults.Retry, cfg.Retry)
require.Equal(t, UserServiceConfig{
BaseURL: "http://user-service.internal",
Timeout: defaults.UserService.Timeout,
}, cfg.UserService)
require.Equal(t, defaults.AdminRouting, cfg.AdminRouting)
require.Equal(t, defaults.Telemetry, cfg.Telemetry)
}
func TestLoadFromEnvAppliesOverrides(t *testing.T) {
t.Setenv(shutdownTimeoutEnvVar, "9s")
t.Setenv(logLevelEnvVar, "debug")
t.Setenv(internalHTTPAddrEnvVar, "127.0.0.1:18092")
t.Setenv(internalHTTPReadHeaderTimeoutEnvVar, "3s")
t.Setenv(internalHTTPReadTimeoutEnvVar, "11s")
t.Setenv(internalHTTPIdleTimeoutEnvVar, "61s")
t.Setenv(redisAddrEnvVar, "127.0.0.1:6380")
t.Setenv(redisUsernameEnvVar, "alice")
t.Setenv(redisPasswordEnvVar, "secret")
t.Setenv(redisDBEnvVar, "3")
t.Setenv(redisTLSEnabledEnvVar, "true")
t.Setenv(redisOperationTimeoutEnvVar, "750ms")
t.Setenv(intentsStreamEnvVar, "notification:test_intents")
t.Setenv(intentsReadBlockTimeoutEnvVar, "3500ms")
t.Setenv(gatewayClientEventsStreamEnvVar, "gateway:test_client-events")
t.Setenv(gatewayClientEventsStreamMaxEnvVar, "2048")
t.Setenv(mailDeliveryCommandsStreamEnvVar, "mail:test_delivery_commands")
t.Setenv(pushRetryMaxAttemptsEnvVar, "5")
t.Setenv(emailRetryMaxAttemptsEnvVar, "9")
t.Setenv(routeLeaseTTLEnvVar, "7s")
t.Setenv(routeBackoffMinEnvVar, "2s")
t.Setenv(routeBackoffMaxEnvVar, "7m")
t.Setenv(deadLetterTTLEnvVar, "120h")
t.Setenv(recordTTLEnvVar, "240h")
t.Setenv(idempotencyTTLEnvVar, "48h")
t.Setenv(userServiceBaseURLEnvVar, "https://user-service.internal/api/")
t.Setenv(userServiceTimeoutEnvVar, "1500ms")
t.Setenv(adminEmailsGeoReviewRecommendedEnvVar, "First@example.com, second@example.com, first@example.com")
t.Setenv(adminEmailsGameGenerationFailedEnvVar, "ops@example.com")
t.Setenv(adminEmailsLobbyRuntimePausedAfterEnvVar, "pause@example.com, PAUSE@example.com")
t.Setenv(adminEmailsLobbyApplicationSubmittedEnvVar, "owner@example.com, OWNER@example.com")
t.Setenv(otelServiceNameEnvVar, "custom-notification")
t.Setenv(otelTracesExporterEnvVar, "otlp")
t.Setenv(otelMetricsExporterEnvVar, "otlp")
t.Setenv(otelExporterOTLPProtocolEnvVar, "grpc")
t.Setenv(otelStdoutTracesEnabledEnvVar, "true")
t.Setenv(otelStdoutMetricsEnabledEnvVar, "true")
cfg, err := LoadFromEnv()
require.NoError(t, err)
require.Equal(t, 9*time.Second, cfg.ShutdownTimeout)
require.Equal(t, "debug", cfg.Logging.Level)
require.Equal(t, InternalHTTPConfig{
Addr: "127.0.0.1:18092",
ReadHeaderTimeout: 3 * time.Second,
ReadTimeout: 11 * time.Second,
IdleTimeout: 61 * time.Second,
}, cfg.InternalHTTP)
require.Equal(t, RedisConfig{
Addr: "127.0.0.1:6380",
Username: "alice",
Password: "secret",
DB: 3,
TLSEnabled: true,
OperationTimeout: 750 * time.Millisecond,
}, cfg.Redis)
require.Equal(t, StreamsConfig{
Intents: "notification:test_intents",
GatewayClientEvents: "gateway:test_client-events",
GatewayClientEventsStreamMaxLen: 2048,
MailDeliveryCommands: "mail:test_delivery_commands",
}, cfg.Streams)
require.Equal(t, 3500*time.Millisecond, cfg.IntentsReadBlockTimeout)
require.Equal(t, RetryConfig{
PushMaxAttempts: 5,
EmailMaxAttempts: 9,
RouteLeaseTTL: 7 * time.Second,
RouteBackoffMin: 2 * time.Second,
RouteBackoffMax: 7 * time.Minute,
DeadLetterTTL: 120 * time.Hour,
RecordTTL: 240 * time.Hour,
IdempotencyTTL: 48 * time.Hour,
}, cfg.Retry)
require.Equal(t, UserServiceConfig{
BaseURL: "https://user-service.internal/api",
Timeout: 1500 * time.Millisecond,
}, cfg.UserService)
require.Equal(t, AdminRoutingConfig{
GeoReviewRecommended: []string{"first@example.com", "second@example.com"},
GameGenerationFailed: []string{"ops@example.com"},
LobbyRuntimePausedAfterStart: []string{"pause@example.com"},
LobbyApplicationSubmitted: []string{"owner@example.com"},
}, cfg.AdminRouting)
require.Equal(t, TelemetryConfig{
ServiceName: "custom-notification",
TracesExporter: "otlp",
MetricsExporter: "otlp",
TracesProtocol: "grpc",
MetricsProtocol: "grpc",
StdoutTracesEnabled: true,
StdoutMetricsEnabled: true,
}, cfg.Telemetry)
}
func TestLoadFromEnvRejectsInvalidValues(t *testing.T) {
tests := []struct {
name string
envName string
envVal string
}{
{name: "invalid duration", envName: shutdownTimeoutEnvVar, envVal: "later"},
{name: "invalid log level", envName: logLevelEnvVar, envVal: "verbose"},
{name: "invalid redis db", envName: redisDBEnvVar, envVal: "db-three"},
{name: "invalid redis tls", envName: redisTLSEnabledEnvVar, envVal: "sometimes"},
{name: "invalid push retries", envName: pushRetryMaxAttemptsEnvVar, envVal: "many"},
{name: "invalid email retries", envName: emailRetryMaxAttemptsEnvVar, envVal: "several"},
{name: "invalid gateway client events stream max len", envName: gatewayClientEventsStreamMaxEnvVar, envVal: "many"},
{name: "invalid user service timeout", envName: userServiceTimeoutEnvVar, envVal: "soon"},
{name: "invalid intents read block timeout", envName: intentsReadBlockTimeoutEnvVar, envVal: "later"},
{name: "invalid route lease ttl", envName: routeLeaseTTLEnvVar, envVal: "eventually"},
{name: "invalid traces exporter", envName: otelTracesExporterEnvVar, envVal: "stdout"},
{name: "invalid metrics protocol", envName: otelExporterOTLPMetricsProtocolEnvVar, envVal: "udp"},
{name: "invalid stdout traces", envName: otelStdoutTracesEnabledEnvVar, envVal: "sometimes"},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
t.Setenv(tt.envName, tt.envVal)
_, err := LoadFromEnv()
require.Error(t, err)
})
}
}
func TestLoadFromEnvRejectsMissingRequiredValues(t *testing.T) {
t.Run("missing redis addr", func(t *testing.T) {
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
_, err := LoadFromEnv()
require.Error(t, err)
require.Contains(t, err.Error(), redisAddrEnvVar)
})
t.Run("missing user service base url", func(t *testing.T) {
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
_, err := LoadFromEnv()
require.Error(t, err)
require.Contains(t, err.Error(), userServiceBaseURLEnvVar)
})
}
func TestLoadFromEnvRejectsInvalidConfiguration(t *testing.T) {
tests := []struct {
name string
envName string
envVal string
want string
}{
{name: "invalid internal http addr", envName: internalHTTPAddrEnvVar, envVal: "127.0.0.1", want: "internal HTTP addr"},
{name: "invalid redis addr", envName: redisAddrEnvVar, envVal: "127.0.0.1", want: "redis addr"},
{name: "relative user service url", envName: userServiceBaseURLEnvVar, envVal: "/internal/users", want: "absolute http(s) URL"},
{name: "invalid admin email", envName: adminEmailsGeoReviewRecommendedEnvVar, envVal: "broken-email", want: "invalid email address"},
{name: "blank admin email slot", envName: adminEmailsGameGenerationFailedEnvVar, envVal: "ops@example.com, , second@example.com", want: "must not be empty"},
{name: "invalid public application admin email", envName: adminEmailsLobbyApplicationSubmittedEnvVar, envVal: "Owner <owner@example.com>", want: "must not include a display name"},
{name: "nonpositive gateway client events stream max len", envName: gatewayClientEventsStreamMaxEnvVar, envVal: "0", want: "must be positive"},
{name: "backoff min above max", envName: routeBackoffMinEnvVar, envVal: "10m", want: "must not exceed"},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
t.Setenv(routeBackoffMaxEnvVar, "5m")
t.Setenv(tt.envName, tt.envVal)
_, err := LoadFromEnv()
require.Error(t, err)
require.Contains(t, err.Error(), tt.want)
})
}
}
func TestLoadFromEnvRejectsNonPositiveValues(t *testing.T) {
tests := []struct {
name string
envName string
envVal string
}{
{name: "shutdown timeout", envName: shutdownTimeoutEnvVar, envVal: "0s"},
{name: "read header timeout", envName: internalHTTPReadHeaderTimeoutEnvVar, envVal: "0s"},
{name: "read timeout", envName: internalHTTPReadTimeoutEnvVar, envVal: "0s"},
{name: "idle timeout", envName: internalHTTPIdleTimeoutEnvVar, envVal: "0s"},
{name: "redis timeout", envName: redisOperationTimeoutEnvVar, envVal: "0s"},
{name: "intents read block timeout", envName: intentsReadBlockTimeoutEnvVar, envVal: "0s"},
{name: "push retries", envName: pushRetryMaxAttemptsEnvVar, envVal: "0"},
{name: "email retries", envName: emailRetryMaxAttemptsEnvVar, envVal: "0"},
{name: "gateway client events stream max len", envName: gatewayClientEventsStreamMaxEnvVar, envVal: "0"},
{name: "route lease ttl", envName: routeLeaseTTLEnvVar, envVal: "0s"},
{name: "route backoff min", envName: routeBackoffMinEnvVar, envVal: "0s"},
{name: "route backoff max", envName: routeBackoffMaxEnvVar, envVal: "0s"},
{name: "dead letter ttl", envName: deadLetterTTLEnvVar, envVal: "0s"},
{name: "record ttl", envName: recordTTLEnvVar, envVal: "0s"},
{name: "idempotency ttl", envName: idempotencyTTLEnvVar, envVal: "0s"},
{name: "user service timeout", envName: userServiceTimeoutEnvVar, envVal: "0s"},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
t.Setenv(tt.envName, tt.envVal)
_, err := LoadFromEnv()
require.Error(t, err)
})
}
}
+112
View File
@@ -0,0 +1,112 @@
// Package logging configures the Notification Service process logger and
// provides context-aware helpers for trace fields.
package logging
import (
"context"
"fmt"
"log/slog"
"os"
"strings"
"galaxy/notification/internal/api/intentstream"
"go.opentelemetry.io/otel/trace"
)
// New constructs the process-wide JSON logger from level.
func New(level string) (*slog.Logger, error) {
var slogLevel slog.Level
if err := slogLevel.UnmarshalText([]byte(strings.TrimSpace(level))); err != nil {
return nil, fmt.Errorf("build logger: %w", err)
}
return slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slogLevel,
})), nil
}
// TraceAttrsFromContext returns slog key-value pairs for the active
// OpenTelemetry span when ctx carries a valid span context.
func TraceAttrsFromContext(ctx context.Context) []any {
if ctx == nil {
return nil
}
spanContext := trace.SpanContextFromContext(ctx)
if !spanContext.IsValid() {
return nil
}
return []any{
"otel_trace_id", spanContext.TraceID().String(),
"otel_span_id", spanContext.SpanID().String(),
}
}
// NotificationAttrs returns structured notification-identifying log fields.
func NotificationAttrs(
notificationID string,
notificationType intentstream.NotificationType,
producer intentstream.Producer,
audienceKind intentstream.AudienceKind,
idempotencyKey string,
requestID string,
traceID string,
) []any {
attrs := []any{
"notification_id", notificationID,
"notification_type", string(notificationType),
"producer", string(producer),
"audience_kind", string(audienceKind),
"idempotency_key", idempotencyKey,
}
if strings.TrimSpace(requestID) != "" {
attrs = append(attrs, "request_id", requestID)
}
if strings.TrimSpace(traceID) != "" {
attrs = append(attrs, "trace_id", traceID)
}
return attrs
}
// IntentAttrs returns structured intent-identifying log fields when a durable
// notification record does not yet exist.
func IntentAttrs(intent intentstream.Intent) []any {
attrs := []any{
"notification_type", string(intent.NotificationType),
"producer", string(intent.Producer),
"audience_kind", string(intent.AudienceKind),
"idempotency_key", intent.IdempotencyKey,
}
if strings.TrimSpace(intent.RequestID) != "" {
attrs = append(attrs, "request_id", intent.RequestID)
}
if strings.TrimSpace(intent.TraceID) != "" {
attrs = append(attrs, "trace_id", intent.TraceID)
}
return attrs
}
// RouteAttrs returns structured route-identifying log fields.
func RouteAttrs(
notificationID string,
notificationType intentstream.NotificationType,
producer intentstream.Producer,
audienceKind intentstream.AudienceKind,
idempotencyKey string,
requestID string,
traceID string,
routeID string,
channel intentstream.Channel,
) []any {
attrs := NotificationAttrs(notificationID, notificationType, producer, audienceKind, idempotencyKey, requestID, traceID)
attrs = append(attrs,
"route_id", routeID,
"channel", string(channel),
)
return attrs
}
@@ -0,0 +1,946 @@
// Package acceptintent implements durable idempotent acceptance of normalized
// notification intents.
package acceptintent
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"log/slog"
netmail "net/mail"
"strings"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/config"
"galaxy/notification/internal/logging"
)
var (
// ErrConflict reports that an idempotency scope already exists for
// different normalized content.
ErrConflict = errors.New("accept intent conflict")
// ErrRecipientNotFound reports that at least one user-targeted recipient
// does not exist in the trusted User Service directory.
ErrRecipientNotFound = errors.New("accept intent recipient not found")
// ErrServiceUnavailable reports that durable acceptance could not be
// completed or recovered safely.
ErrServiceUnavailable = errors.New("accept intent service unavailable")
)
// Outcome identifies the coarse intent-acceptance outcome.
type Outcome string
const (
// OutcomeAccepted reports that the intent was durably accepted into local
// notification state.
OutcomeAccepted Outcome = "accepted"
// OutcomeDuplicate reports that the intent matched already accepted
// normalized content and therefore became a replay no-op.
OutcomeDuplicate Outcome = "duplicate"
)
// RouteStatus identifies one stable notification-route state.
type RouteStatus string
const (
// RouteStatusPending reports that the route is ready for first publication.
RouteStatusPending RouteStatus = "pending"
// RouteStatusPublished reports that the route was durably handed off.
RouteStatusPublished RouteStatus = "published"
// RouteStatusFailed reports that the last publish attempt failed and a
// retry is scheduled.
RouteStatusFailed RouteStatus = "failed"
// RouteStatusDeadLetter reports that the route exhausted its retry budget.
RouteStatusDeadLetter RouteStatus = "dead_letter"
// RouteStatusSkipped reports that the route slot was durably materialized
// but intentionally not emitted.
RouteStatusSkipped RouteStatus = "skipped"
)
// Result stores the coarse outcome of one intent-acceptance attempt.
type Result struct {
// Outcome stores the stable intent-acceptance outcome.
Outcome Outcome
}
// NotificationRecord stores the primary durable notification record accepted
// from one normalized intent.
type NotificationRecord struct {
// NotificationID stores the stable notification identifier.
NotificationID string
// NotificationType stores the frozen notification vocabulary value.
NotificationType intentstream.NotificationType
// Producer stores the frozen producer identifier.
Producer intentstream.Producer
// AudienceKind stores the normalized audience selector.
AudienceKind intentstream.AudienceKind
// RecipientUserIDs stores the normalized recipient user set for
// user-targeted intents.
RecipientUserIDs []string
// PayloadJSON stores the canonical normalized payload JSON string.
PayloadJSON string
// IdempotencyKey stores the producer-owned idempotency key.
IdempotencyKey string
// RequestFingerprint stores the stable normalized request fingerprint.
RequestFingerprint string
// RequestID stores the optional tracing request identifier.
RequestID string
// TraceID stores the optional tracing trace identifier.
TraceID string
// OccurredAt stores when the producer says the event happened.
OccurredAt time.Time
// AcceptedAt stores when Notification Service durably accepted the intent.
AcceptedAt time.Time
// UpdatedAt stores the last notification-record mutation timestamp.
UpdatedAt time.Time
}
// NotificationRoute stores one durable route slot derived from an accepted
// notification.
type NotificationRoute struct {
// NotificationID stores the owning notification identifier.
NotificationID string
// RouteID stores the stable `<channel>:<recipient_ref>` identifier.
RouteID string
// Channel stores the route channel slot.
Channel intentstream.Channel
// RecipientRef stores the stable target slot identifier.
RecipientRef string
// Status stores the current route status.
Status RouteStatus
// AttemptCount stores how many publication attempts already ran.
AttemptCount int
// MaxAttempts stores the total retry budget for Channel.
MaxAttempts int
// NextAttemptAt stores the next scheduled publication time when Status is
// RouteStatusPending or RouteStatusFailed.
NextAttemptAt time.Time
// ResolvedEmail stores the already-known email target when available.
ResolvedEmail string
// ResolvedLocale stores the already-known locale when available.
ResolvedLocale string
// LastErrorClassification stores the optional last classified route error.
LastErrorClassification string
// LastErrorMessage stores the optional last route error message.
LastErrorMessage string
// LastErrorAt stores when the last route error happened.
LastErrorAt time.Time
// CreatedAt stores when the route was materialized.
CreatedAt time.Time
// UpdatedAt stores the last route mutation timestamp.
UpdatedAt time.Time
// PublishedAt stores when the route reached published.
PublishedAt time.Time
// DeadLetteredAt stores when the route reached dead_letter.
DeadLetteredAt time.Time
// SkippedAt stores when the route reached skipped.
SkippedAt time.Time
}
// IdempotencyRecord stores one durable `(producer, idempotency_key)`
// reservation.
type IdempotencyRecord struct {
// Producer stores the owning producer identifier.
Producer intentstream.Producer
// IdempotencyKey stores the producer-owned idempotency key.
IdempotencyKey string
// NotificationID stores the accepted notification identifier.
NotificationID string
// RequestFingerprint stores the stable normalized request fingerprint.
RequestFingerprint string
// CreatedAt stores when the reservation was created.
CreatedAt time.Time
// ExpiresAt stores when the reservation expires.
ExpiresAt time.Time
}
// AcceptInput stores one normalized intent plus its chosen notification
// identifier.
type AcceptInput struct {
// NotificationID stores the stable accepted notification identifier.
NotificationID string
// Intent stores the normalized decoded ingress intent.
Intent intentstream.Intent
}
// CreateAcceptanceInput stores the durable write set required to accept one
// notification intent.
type CreateAcceptanceInput struct {
// Notification stores the accepted notification record.
Notification NotificationRecord
// Routes stores every durable route slot derived from Notification.
Routes []NotificationRoute
// Idempotency stores the idempotency reservation bound to Notification.
Idempotency IdempotencyRecord
}
// Store describes the durable storage required by the intent-acceptance use
// case.
type Store interface {
// CreateAcceptance stores the complete durable write set for one intent
// acceptance attempt. Implementations must wrap ErrConflict when the write
// set races with already accepted state.
CreateAcceptance(context.Context, CreateAcceptanceInput) error
// GetIdempotency loads one existing idempotency reservation.
GetIdempotency(context.Context, intentstream.Producer, string) (IdempotencyRecord, bool, error)
// GetNotification loads one accepted notification by NotificationID.
GetNotification(context.Context, string) (NotificationRecord, bool, error)
}
// UserRecord stores the enrichment data resolved for one recipient user.
type UserRecord struct {
// Email stores the current user email address.
Email string
// PreferredLanguage stores the current user preferred language tag.
PreferredLanguage string
}
// Validate reports whether record contains usable recipient enrichment data.
func (record UserRecord) Validate() error {
if strings.TrimSpace(record.Email) == "" {
return errors.New("user record email must not be empty")
}
if _, err := netmail.ParseAddress(record.Email); err != nil {
return fmt.Errorf("user record email: %w", err)
}
return nil
}
// UserDirectory resolves trusted recipient data from User Service. Missing
// users must wrap ErrRecipientNotFound. Other failures are treated as
// dependency unavailability.
type UserDirectory interface {
// GetUserByID loads one user by stable user identifier.
GetUserByID(context.Context, string) (UserRecord, error)
}
// Telemetry records low-cardinality intent-acceptance and user-enrichment
// outcomes.
type Telemetry interface {
// RecordIntentOutcome records one accepted notification-intent outcome.
RecordIntentOutcome(context.Context, string, string, string, string)
// RecordUserEnrichmentAttempt records one User Service enrichment lookup
// outcome.
RecordUserEnrichmentAttempt(context.Context, string, string)
}
// Clock provides the current wall-clock time.
type Clock interface {
// Now returns the current time.
Now() time.Time
}
type systemClock struct{}
func (systemClock) Now() time.Time {
return time.Now()
}
// Config stores the dependencies and policies used by Service.
type Config struct {
// Store owns the durable accepted state.
Store Store
// UserDirectory resolves recipient email and locale from User Service.
UserDirectory UserDirectory
// Clock provides wall-clock timestamps.
Clock Clock
// Logger writes structured acceptance logs.
Logger *slog.Logger
// Telemetry records low-cardinality acceptance and enrichment outcomes.
Telemetry Telemetry
// PushMaxAttempts stores the retry budget for push routes.
PushMaxAttempts int
// EmailMaxAttempts stores the retry budget for email routes.
EmailMaxAttempts int
// IdempotencyTTL stores how long accepted idempotency scopes remain valid.
IdempotencyTTL time.Duration
// AdminRouting stores the type-specific administrator email lists.
AdminRouting config.AdminRoutingConfig
}
// Service durably accepts normalized notification intents.
type Service struct {
store Store
userDirectory UserDirectory
clock Clock
logger *slog.Logger
telemetry Telemetry
pushMaxAttempts int
emailMaxAttempts int
idempotencyTTL time.Duration
adminRouting config.AdminRoutingConfig
}
// New constructs Service from cfg.
func New(cfg Config) (*Service, error) {
if cfg.Store == nil {
return nil, errors.New("new accept intent service: nil store")
}
if cfg.UserDirectory == nil {
return nil, errors.New("new accept intent service: nil user directory")
}
if cfg.Clock == nil {
cfg.Clock = systemClock{}
}
if cfg.PushMaxAttempts <= 0 {
return nil, errors.New("new accept intent service: push max attempts must be positive")
}
if cfg.EmailMaxAttempts <= 0 {
return nil, errors.New("new accept intent service: email max attempts must be positive")
}
if cfg.IdempotencyTTL <= 0 {
return nil, errors.New("new accept intent service: idempotency ttl must be positive")
}
if cfg.Logger == nil {
cfg.Logger = slog.Default()
}
if err := cfg.AdminRouting.Validate(); err != nil {
return nil, fmt.Errorf("new accept intent service: %w", err)
}
return &Service{
store: cfg.Store,
userDirectory: cfg.UserDirectory,
clock: cfg.Clock,
logger: cfg.Logger.With("component", "accept_intent"),
telemetry: cfg.Telemetry,
pushMaxAttempts: cfg.PushMaxAttempts,
emailMaxAttempts: cfg.EmailMaxAttempts,
idempotencyTTL: cfg.IdempotencyTTL,
adminRouting: cfg.AdminRouting,
}, nil
}
// Execute durably accepts one normalized intent.
func (service *Service) Execute(ctx context.Context, input AcceptInput) (Result, error) {
if ctx == nil {
return Result{}, errors.New("accept intent: nil context")
}
if service == nil {
return Result{}, errors.New("accept intent: nil service")
}
if err := input.Validate(); err != nil {
return Result{}, fmt.Errorf("accept intent: %w", err)
}
fingerprint, err := requestFingerprint(input.Intent)
if err != nil {
return Result{}, fmt.Errorf("accept intent: %w", err)
}
if result, handled, err := service.resolveReplay(ctx, input, fingerprint); handled {
return result, err
}
createInput, result, err := service.buildCreateInput(ctx, input, fingerprint)
if err != nil {
switch {
case errors.Is(err, ErrRecipientNotFound):
return Result{}, err
case errors.Is(err, ErrServiceUnavailable):
return Result{}, err
default:
return Result{}, fmt.Errorf("accept intent: %w", err)
}
}
if err := service.store.CreateAcceptance(ctx, createInput); err != nil {
if !errors.Is(err, ErrConflict) {
return Result{}, fmt.Errorf("%w: create acceptance: %v", ErrServiceUnavailable, err)
}
if replayResult, handled, replayErr := service.resolveReplay(ctx, input, fingerprint); handled {
return replayResult, replayErr
}
return Result{}, fmt.Errorf("%w: create acceptance conflict without replay state", ErrServiceUnavailable)
}
service.recordIntentOutcome(ctx, createInput.Notification, string(result.Outcome))
logArgs := logging.NotificationAttrs(
createInput.Notification.NotificationID,
createInput.Notification.NotificationType,
createInput.Notification.Producer,
createInput.Notification.AudienceKind,
createInput.Notification.IdempotencyKey,
createInput.Notification.RequestID,
createInput.Notification.TraceID,
)
logArgs = append(logArgs,
"route_count", len(createInput.Routes),
"outcome", string(result.Outcome),
)
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
service.logger.Info("notification intent accepted", logArgs...)
return result, nil
}
// Validate reports whether result stores a supported intent-acceptance
// outcome.
func (result Result) Validate() error {
switch result.Outcome {
case OutcomeAccepted, OutcomeDuplicate:
return nil
default:
return fmt.Errorf("accept intent outcome %q is unsupported", result.Outcome)
}
}
// Validate reports whether input contains a usable acceptance request.
func (input AcceptInput) Validate() error {
if strings.TrimSpace(input.NotificationID) == "" {
return errors.New("accept input notification id must not be empty")
}
if err := input.Intent.Validate(); err != nil {
return fmt.Errorf("accept input intent: %w", err)
}
return nil
}
// Validate reports whether record contains a complete notification record.
func (record NotificationRecord) Validate() error {
if strings.TrimSpace(record.NotificationID) == "" {
return errors.New("notification record notification id must not be empty")
}
if !record.NotificationType.IsKnown() {
return fmt.Errorf("notification record type %q is unsupported", record.NotificationType)
}
if !record.Producer.IsKnown() {
return fmt.Errorf("notification record producer %q is unsupported", record.Producer)
}
if !record.AudienceKind.IsKnown() {
return fmt.Errorf("notification record audience kind %q is unsupported", record.AudienceKind)
}
if strings.TrimSpace(record.PayloadJSON) == "" {
return errors.New("notification record payload json must not be empty")
}
if strings.TrimSpace(record.IdempotencyKey) == "" {
return errors.New("notification record idempotency key must not be empty")
}
if strings.TrimSpace(record.RequestFingerprint) == "" {
return errors.New("notification record request fingerprint must not be empty")
}
if err := validateTimestamp("notification record occurred at", record.OccurredAt); err != nil {
return err
}
if err := validateTimestamp("notification record accepted at", record.AcceptedAt); err != nil {
return err
}
if err := validateTimestamp("notification record updated at", record.UpdatedAt); err != nil {
return err
}
if record.AudienceKind == intentstream.AudienceKindUser && len(record.RecipientUserIDs) == 0 {
return errors.New("notification record recipient user ids must not be empty for audience kind user")
}
if record.AudienceKind == intentstream.AudienceKindAdminEmail && len(record.RecipientUserIDs) > 0 {
return errors.New("notification record recipient user ids must be empty for audience kind admin_email")
}
return nil
}
// Validate reports whether route contains a complete route record.
func (route NotificationRoute) Validate() error {
if strings.TrimSpace(route.NotificationID) == "" {
return errors.New("notification route notification id must not be empty")
}
if strings.TrimSpace(route.RouteID) == "" {
return errors.New("notification route route id must not be empty")
}
if !route.Channel.IsKnown() {
return fmt.Errorf("notification route channel %q is unsupported", route.Channel)
}
if strings.TrimSpace(route.RecipientRef) == "" {
return errors.New("notification route recipient ref must not be empty")
}
if !route.Status.IsKnown() {
return fmt.Errorf("notification route status %q is unsupported", route.Status)
}
if route.AttemptCount < 0 {
return errors.New("notification route attempt count must not be negative")
}
if route.MaxAttempts <= 0 {
return errors.New("notification route max attempts must be positive")
}
if err := validateTimestamp("notification route created at", route.CreatedAt); err != nil {
return err
}
if err := validateTimestamp("notification route updated at", route.UpdatedAt); err != nil {
return err
}
switch route.Status {
case RouteStatusPending, RouteStatusFailed:
if err := validateTimestamp("notification route next attempt at", route.NextAttemptAt); err != nil {
return err
}
case RouteStatusSkipped:
if !route.NextAttemptAt.IsZero() {
return errors.New("notification route next attempt at must be zero for skipped routes")
}
if err := validateTimestamp("notification route skipped at", route.SkippedAt); err != nil {
return err
}
}
return nil
}
// IsKnown reports whether status belongs to the frozen route-status surface.
func (status RouteStatus) IsKnown() bool {
switch status {
case RouteStatusPending,
RouteStatusPublished,
RouteStatusFailed,
RouteStatusDeadLetter,
RouteStatusSkipped:
return true
default:
return false
}
}
// Validate reports whether record contains a complete idempotency record.
func (record IdempotencyRecord) Validate() error {
if !record.Producer.IsKnown() {
return fmt.Errorf("idempotency record producer %q is unsupported", record.Producer)
}
if strings.TrimSpace(record.IdempotencyKey) == "" {
return errors.New("idempotency record idempotency key must not be empty")
}
if strings.TrimSpace(record.NotificationID) == "" {
return errors.New("idempotency record notification id must not be empty")
}
if strings.TrimSpace(record.RequestFingerprint) == "" {
return errors.New("idempotency record request fingerprint must not be empty")
}
if err := validateTimestamp("idempotency record created at", record.CreatedAt); err != nil {
return err
}
if err := validateTimestamp("idempotency record expires at", record.ExpiresAt); err != nil {
return err
}
if !record.ExpiresAt.After(record.CreatedAt) {
return errors.New("idempotency record expires at must be after created at")
}
return nil
}
// Validate reports whether input contains a consistent durable write set.
func (input CreateAcceptanceInput) Validate() error {
if err := input.Notification.Validate(); err != nil {
return fmt.Errorf("notification: %w", err)
}
if err := input.Idempotency.Validate(); err != nil {
return fmt.Errorf("idempotency: %w", err)
}
if input.Idempotency.NotificationID != input.Notification.NotificationID {
return errors.New("idempotency notification id must match notification record")
}
if input.Idempotency.Producer != input.Notification.Producer {
return errors.New("idempotency producer must match notification record")
}
if input.Idempotency.IdempotencyKey != input.Notification.IdempotencyKey {
return errors.New("idempotency key must match notification record")
}
if input.Idempotency.RequestFingerprint != input.Notification.RequestFingerprint {
return errors.New("idempotency request fingerprint must match notification record")
}
seenRouteIDs := make(map[string]struct{}, len(input.Routes))
for index, route := range input.Routes {
if err := route.Validate(); err != nil {
return fmt.Errorf("routes[%d]: %w", index, err)
}
if route.NotificationID != input.Notification.NotificationID {
return fmt.Errorf("routes[%d]: notification id must match notification record", index)
}
if _, ok := seenRouteIDs[route.RouteID]; ok {
return fmt.Errorf("routes[%d]: route id %q is duplicated", index, route.RouteID)
}
seenRouteIDs[route.RouteID] = struct{}{}
if input.Notification.AudienceKind == intentstream.AudienceKindUser {
if !strings.HasPrefix(route.RecipientRef, "user:") {
return fmt.Errorf("routes[%d]: recipient ref must use user: prefix for audience kind user", index)
}
if strings.TrimSpace(route.ResolvedEmail) == "" {
return fmt.Errorf("routes[%d]: resolved email must not be empty for audience kind user", index)
}
if strings.TrimSpace(route.ResolvedLocale) == "" {
return fmt.Errorf("routes[%d]: resolved locale must not be empty for audience kind user", index)
}
}
}
return nil
}
func (service *Service) buildCreateInput(ctx context.Context, input AcceptInput, fingerprint string) (CreateAcceptanceInput, Result, error) {
now := service.clock.Now().UTC().Truncate(time.Millisecond)
record := NotificationRecord{
NotificationID: input.NotificationID,
NotificationType: input.Intent.NotificationType,
Producer: input.Intent.Producer,
AudienceKind: input.Intent.AudienceKind,
RecipientUserIDs: append([]string(nil), input.Intent.RecipientUserIDs...),
PayloadJSON: input.Intent.PayloadJSON,
IdempotencyKey: input.Intent.IdempotencyKey,
RequestFingerprint: fingerprint,
RequestID: input.Intent.RequestID,
TraceID: input.Intent.TraceID,
OccurredAt: input.Intent.OccurredAt,
AcceptedAt: now,
UpdatedAt: now,
}
routes, err := service.materializeRoutes(ctx, record, now)
if err != nil {
return CreateAcceptanceInput{}, Result{}, fmt.Errorf("materialize routes: %w", err)
}
createInput := CreateAcceptanceInput{
Notification: record,
Routes: routes,
Idempotency: IdempotencyRecord{
Producer: record.Producer,
IdempotencyKey: record.IdempotencyKey,
NotificationID: record.NotificationID,
RequestFingerprint: fingerprint,
CreatedAt: now,
ExpiresAt: now.Add(service.idempotencyTTL),
},
}
if err := createInput.Validate(); err != nil {
return CreateAcceptanceInput{}, Result{}, fmt.Errorf("build create acceptance input: %w", err)
}
result := Result{Outcome: OutcomeAccepted}
if err := result.Validate(); err != nil {
return CreateAcceptanceInput{}, Result{}, fmt.Errorf("build acceptance result: %w", err)
}
return createInput, result, nil
}
func (service *Service) materializeRoutes(ctx context.Context, record NotificationRecord, now time.Time) ([]NotificationRoute, error) {
switch record.AudienceKind {
case intentstream.AudienceKindUser:
recipients, err := service.resolveRecipients(ctx, record.NotificationType, record.RecipientUserIDs)
if err != nil {
return nil, err
}
routes := make([]NotificationRoute, 0, len(record.RecipientUserIDs)*2)
for _, userID := range record.RecipientUserIDs {
recipient := recipients[userID]
recipientRef := "user:" + userID
routes = append(routes,
service.newRoute(record, now, intentstream.ChannelPush, recipientRef, recipient.Email, resolveLocale(recipient.PreferredLanguage)),
service.newRoute(record, now, intentstream.ChannelEmail, recipientRef, recipient.Email, resolveLocale(recipient.PreferredLanguage)),
)
}
return routes, nil
case intentstream.AudienceKindAdminEmail:
adminEmails := service.adminEmailsFor(record.NotificationType)
if len(adminEmails) == 0 {
return []NotificationRoute{
service.newSyntheticAdminConfigRoute(record, now),
}, nil
}
routes := make([]NotificationRoute, 0, len(adminEmails)*2)
for _, email := range adminEmails {
recipientRef := "email:" + email
routes = append(routes,
service.newRoute(record, now, intentstream.ChannelPush, recipientRef, email, intentstream.DefaultResolvedLocale()),
service.newRoute(record, now, intentstream.ChannelEmail, recipientRef, email, intentstream.DefaultResolvedLocale()),
)
}
return routes, nil
default:
return nil, fmt.Errorf("unsupported audience kind %q", record.AudienceKind)
}
}
func (service *Service) resolveRecipients(ctx context.Context, notificationType intentstream.NotificationType, userIDs []string) (map[string]UserRecord, error) {
recipients := make(map[string]UserRecord, len(userIDs))
for _, userID := range userIDs {
record, err := service.userDirectory.GetUserByID(ctx, userID)
switch {
case err == nil:
if err := record.Validate(); err != nil {
service.recordUserEnrichmentAttempt(ctx, notificationType, "service_unavailable")
return nil, fmt.Errorf("%w: resolve recipient %q: %v", ErrServiceUnavailable, userID, err)
}
service.recordUserEnrichmentAttempt(ctx, notificationType, "success")
recipients[userID] = record
case errors.Is(err, ErrRecipientNotFound):
service.recordUserEnrichmentAttempt(ctx, notificationType, "recipient_not_found")
return nil, fmt.Errorf("%w: resolve recipient %q: %v", ErrRecipientNotFound, userID, err)
default:
service.recordUserEnrichmentAttempt(ctx, notificationType, "service_unavailable")
return nil, fmt.Errorf("%w: resolve recipient %q: %v", ErrServiceUnavailable, userID, err)
}
}
return recipients, nil
}
func (service *Service) newRoute(
record NotificationRecord,
now time.Time,
channel intentstream.Channel,
recipientRef string,
resolvedEmail string,
resolvedLocale string,
) NotificationRoute {
route := NotificationRoute{
NotificationID: record.NotificationID,
RouteID: string(channel) + ":" + recipientRef,
Channel: channel,
RecipientRef: recipientRef,
AttemptCount: 0,
MaxAttempts: service.maxAttempts(channel),
ResolvedEmail: resolvedEmail,
ResolvedLocale: resolvedLocale,
CreatedAt: now,
UpdatedAt: now,
}
if record.NotificationType.SupportsChannel(record.AudienceKind, channel) {
route.Status = RouteStatusPending
route.NextAttemptAt = now
return route
}
route.Status = RouteStatusSkipped
route.SkippedAt = now
return route
}
func (service *Service) newSyntheticAdminConfigRoute(record NotificationRecord, now time.Time) NotificationRoute {
recipientRef := "config:" + string(record.NotificationType)
return NotificationRoute{
NotificationID: record.NotificationID,
RouteID: string(intentstream.ChannelEmail) + ":" + recipientRef,
Channel: intentstream.ChannelEmail,
RecipientRef: recipientRef,
Status: RouteStatusSkipped,
AttemptCount: 0,
MaxAttempts: service.emailMaxAttempts,
CreatedAt: now,
UpdatedAt: now,
SkippedAt: now,
}
}
func (service *Service) adminEmailsFor(notificationType intentstream.NotificationType) []string {
switch notificationType {
case intentstream.NotificationTypeGeoReviewRecommended:
return append([]string(nil), service.adminRouting.GeoReviewRecommended...)
case intentstream.NotificationTypeGameGenerationFailed:
return append([]string(nil), service.adminRouting.GameGenerationFailed...)
case intentstream.NotificationTypeLobbyRuntimePausedAfterStart:
return append([]string(nil), service.adminRouting.LobbyRuntimePausedAfterStart...)
case intentstream.NotificationTypeLobbyApplicationSubmitted:
return append([]string(nil), service.adminRouting.LobbyApplicationSubmitted...)
default:
return nil
}
}
func (service *Service) maxAttempts(channel intentstream.Channel) int {
switch channel {
case intentstream.ChannelPush:
return service.pushMaxAttempts
case intentstream.ChannelEmail:
return service.emailMaxAttempts
default:
return 0
}
}
func resolveLocale(preferredLanguage string) string {
if preferredLanguage == intentstream.DefaultResolvedLocale() {
return intentstream.DefaultResolvedLocale()
}
return intentstream.DefaultResolvedLocale()
}
func (service *Service) resolveReplay(ctx context.Context, input AcceptInput, fingerprint string) (Result, bool, error) {
record, found, err := service.store.GetIdempotency(ctx, input.Intent.Producer, input.Intent.IdempotencyKey)
if err != nil {
return Result{}, true, fmt.Errorf("%w: load idempotency: %v", ErrServiceUnavailable, err)
}
if !found {
return Result{}, false, nil
}
if record.RequestFingerprint != fingerprint {
return Result{}, true, fmt.Errorf("%w: request conflicts with current state", ErrConflict)
}
notificationRecord, found, err := service.store.GetNotification(ctx, record.NotificationID)
if err != nil {
return Result{}, true, fmt.Errorf("%w: load notification: %v", ErrServiceUnavailable, err)
}
if !found {
return Result{}, true, fmt.Errorf("%w: notification %q is missing for idempotency scope", ErrServiceUnavailable, record.NotificationID)
}
if notificationRecord.NotificationID != record.NotificationID {
return Result{}, true, fmt.Errorf("%w: replay notification id mismatch", ErrServiceUnavailable)
}
result := Result{Outcome: OutcomeDuplicate}
if err := result.Validate(); err != nil {
return Result{}, true, fmt.Errorf("%w: %v", ErrServiceUnavailable, err)
}
service.recordIntentOutcome(ctx, notificationRecord, string(result.Outcome))
logArgs := logging.NotificationAttrs(
notificationRecord.NotificationID,
notificationRecord.NotificationType,
notificationRecord.Producer,
notificationRecord.AudienceKind,
notificationRecord.IdempotencyKey,
notificationRecord.RequestID,
notificationRecord.TraceID,
)
logArgs = append(logArgs,
"outcome", string(result.Outcome),
)
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
service.logger.Info("notification intent replay resolved", logArgs...)
return result, true, nil
}
func requestFingerprint(intent intentstream.Intent) (string, error) {
if err := intent.Validate(); err != nil {
return "", err
}
normalized := struct {
NotificationType intentstream.NotificationType `json:"notification_type"`
AudienceKind intentstream.AudienceKind `json:"audience_kind"`
RecipientUserIDs []string `json:"recipient_user_ids,omitempty"`
PayloadJSON json.RawMessage `json:"payload_json"`
}{
NotificationType: intent.NotificationType,
AudienceKind: intent.AudienceKind,
RecipientUserIDs: append([]string(nil), intent.RecipientUserIDs...),
PayloadJSON: json.RawMessage(intent.PayloadJSON),
}
payload, err := json.Marshal(normalized)
if err != nil {
return "", fmt.Errorf("marshal request fingerprint: %w", err)
}
sum := sha256.Sum256(payload)
return "sha256:" + hex.EncodeToString(sum[:]), nil
}
func (service *Service) recordIntentOutcome(ctx context.Context, record NotificationRecord, outcome string) {
if service == nil || service.telemetry == nil || strings.TrimSpace(outcome) == "" {
return
}
service.telemetry.RecordIntentOutcome(
ctx,
string(record.NotificationType),
string(record.Producer),
string(record.AudienceKind),
outcome,
)
}
func (service *Service) recordUserEnrichmentAttempt(ctx context.Context, notificationType intentstream.NotificationType, result string) {
if service == nil || service.telemetry == nil || strings.TrimSpace(result) == "" {
return
}
service.telemetry.RecordUserEnrichmentAttempt(ctx, string(notificationType), result)
}
func validateTimestamp(name string, value time.Time) error {
if value.IsZero() {
return fmt.Errorf("%s must not be zero", name)
}
if !value.Equal(value.UTC()) {
return fmt.Errorf("%s must be UTC", name)
}
if !value.Equal(value.Truncate(time.Millisecond)) {
return fmt.Errorf("%s must use millisecond precision", name)
}
return nil
}
@@ -0,0 +1,613 @@
package acceptintent
import (
"context"
"errors"
"testing"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/config"
"github.com/stretchr/testify/require"
)
func TestServiceAcceptsIntentAndMaterializesUserRoutes(t *testing.T) {
t.Parallel()
store := newRecordingStore()
directory := newStaticUserDirectory(map[string]UserRecord{
"user-1": {Email: "one@example.com", PreferredLanguage: "en"},
"user-2": {Email: "two@example.com", PreferredLanguage: "en-US"},
})
service, err := New(Config{
Store: store,
UserDirectory: directory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
result, err := service.Execute(context.Background(), AcceptInput{
NotificationID: "1775121700000-0",
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-2", "user-1"}, "request-123", "trace-123", time.UnixMilli(1775121700001).UTC()),
})
require.NoError(t, err)
require.Equal(t, OutcomeAccepted, result.Outcome)
require.Len(t, store.createInputs, 1)
createInput := store.createInputs[0]
require.Equal(t, "1775121700000-0", createInput.Notification.NotificationID)
require.Equal(t, []string{"user-1", "user-2"}, createInput.Notification.RecipientUserIDs)
require.Equal(t, `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, createInput.Notification.PayloadJSON)
require.Len(t, createInput.Routes, 4)
pushUser1 := routeByID(t, createInput.Routes, "push:user:user-1")
emailUser1 := routeByID(t, createInput.Routes, "email:user:user-1")
pushUser2 := routeByID(t, createInput.Routes, "push:user:user-2")
emailUser2 := routeByID(t, createInput.Routes, "email:user:user-2")
require.Equal(t, RouteStatusPending, pushUser1.Status)
require.Equal(t, 3, pushUser1.MaxAttempts)
require.Equal(t, "one@example.com", pushUser1.ResolvedEmail)
require.Equal(t, "en", pushUser1.ResolvedLocale)
require.Equal(t, RouteStatusPending, emailUser1.Status)
require.Equal(t, 7, emailUser1.MaxAttempts)
require.Equal(t, "one@example.com", emailUser1.ResolvedEmail)
require.Equal(t, "en", emailUser1.ResolvedLocale)
require.Equal(t, "two@example.com", pushUser2.ResolvedEmail)
require.Equal(t, "en", pushUser2.ResolvedLocale)
require.Equal(t, "two@example.com", emailUser2.ResolvedEmail)
require.Equal(t, "en", emailUser2.ResolvedLocale)
require.Equal(t, []string{"user-1", "user-2"}, directory.lookups)
}
func TestServiceTreatsEquivalentReplayAsDuplicate(t *testing.T) {
t.Parallel()
store := newRecordingStore()
directory := newStaticUserDirectory(map[string]UserRecord{
"user-1": {Email: "one@example.com", PreferredLanguage: "en"},
})
service, err := New(Config{
Store: store,
UserDirectory: directory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
firstInput := AcceptInput{
NotificationID: "1775121700000-0",
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "request-1", "trace-1", time.UnixMilli(1775121700001).UTC()),
}
secondInput := AcceptInput{
NotificationID: "1775121700001-0",
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "request-2", "trace-2", time.UnixMilli(1775121799999).UTC()),
}
firstResult, err := service.Execute(context.Background(), firstInput)
require.NoError(t, err)
require.Equal(t, OutcomeAccepted, firstResult.Outcome)
secondResult, err := service.Execute(context.Background(), secondInput)
require.NoError(t, err)
require.Equal(t, OutcomeDuplicate, secondResult.Outcome)
require.Len(t, store.createInputs, 1)
require.Equal(t, []string{"user-1"}, directory.lookups)
}
func TestServiceRejectsConflictOnSameIdempotencyScope(t *testing.T) {
t.Parallel()
store := newRecordingStore()
directory := newStaticUserDirectory(map[string]UserRecord{
"user-1": {Email: "one@example.com", PreferredLanguage: "en"},
})
service, err := New(Config{
Store: store,
UserDirectory: directory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
_, err = service.Execute(context.Background(), AcceptInput{
NotificationID: "1775121700000-0",
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()),
})
require.NoError(t, err)
_, err = service.Execute(context.Background(), AcceptInput{
NotificationID: "1775121700002-0",
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":55}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700002).UTC()),
})
require.ErrorIs(t, err, ErrConflict)
}
func TestServiceMaterializesPublicLobbyApplicationAdminRoutes(t *testing.T) {
t.Parallel()
store := newRecordingStore()
directory := newStaticUserDirectory(nil)
service, err := New(Config{
Store: store,
UserDirectory: directory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
AdminRouting: config.AdminRoutingConfig{
LobbyApplicationSubmitted: []string{"owner@example.com"},
},
})
require.NoError(t, err)
result, err := service.Execute(context.Background(), AcceptInput{
NotificationID: "1775121700000-0",
Intent: validPublicApplicationIntent(),
})
require.NoError(t, err)
require.Equal(t, OutcomeAccepted, result.Outcome)
require.Len(t, store.createInputs, 1)
require.Len(t, store.createInputs[0].Routes, 2)
pushRoute := routeByID(t, store.createInputs[0].Routes, "push:email:owner@example.com")
emailRoute := routeByID(t, store.createInputs[0].Routes, "email:email:owner@example.com")
require.Equal(t, RouteStatusSkipped, pushRoute.Status)
require.Equal(t, "owner@example.com", pushRoute.ResolvedEmail)
require.Equal(t, "en", pushRoute.ResolvedLocale)
require.Equal(t, RouteStatusPending, emailRoute.Status)
require.Equal(t, "owner@example.com", emailRoute.ResolvedEmail)
require.Equal(t, "en", emailRoute.ResolvedLocale)
require.Empty(t, directory.lookups)
}
func TestServiceMaterializesSyntheticAdminConfigRouteWhenListIsEmpty(t *testing.T) {
t.Parallel()
store := newRecordingStore()
directory := newStaticUserDirectory(nil)
service, err := New(Config{
Store: store,
UserDirectory: directory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
result, err := service.Execute(context.Background(), AcceptInput{
NotificationID: "1775121700000-0",
Intent: validPublicApplicationIntent(),
})
require.NoError(t, err)
require.Equal(t, OutcomeAccepted, result.Outcome)
require.Len(t, store.createInputs, 1)
require.Len(t, store.createInputs[0].Routes, 1)
route := store.createInputs[0].Routes[0]
require.Equal(t, "email:config:lobby.application.submitted", route.RouteID)
require.Equal(t, RouteStatusSkipped, route.Status)
require.Equal(t, 7, route.MaxAttempts)
require.True(t, route.NextAttemptAt.IsZero())
require.Empty(t, directory.lookups)
}
func TestServiceMaterializesChannelMatrixAndRetryBudgets(t *testing.T) {
t.Parallel()
now := time.UnixMilli(1775121700000).UTC()
tests := []struct {
name string
intent intentstream.Intent
adminRouting config.AdminRoutingConfig
wantRoutes map[string]struct {
status RouteStatus
maxAttempts int
}
}{
{
name: "user push and email",
intent: validTurnReadyIntent(
`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
[]string{"user-1"},
"",
"",
now,
),
wantRoutes: map[string]struct {
status RouteStatus
maxAttempts int
}{
"push:user:user-1": {status: RouteStatusPending, maxAttempts: 3},
"email:user:user-1": {status: RouteStatusPending, maxAttempts: 7},
},
},
{
name: "user email only",
intent: intentstream.Intent{
NotificationType: intentstream.NotificationTypeLobbyInviteExpired,
Producer: intentstream.ProducerGameLobby,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
IdempotencyKey: "game-123:invite-expired",
OccurredAt: now,
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","invitee_name":"Nova Pilot","invitee_user_id":"user-2"}`,
},
wantRoutes: map[string]struct {
status RouteStatus
maxAttempts int
}{
"push:user:user-1": {status: RouteStatusSkipped, maxAttempts: 3},
"email:user:user-1": {status: RouteStatusPending, maxAttempts: 7},
},
},
{
name: "admin email only",
intent: intentstream.Intent{
NotificationType: intentstream.NotificationTypeGeoReviewRecommended,
Producer: intentstream.ProducerGeoProfile,
AudienceKind: intentstream.AudienceKindAdminEmail,
IdempotencyKey: "geo:user-1",
OccurredAt: now,
PayloadJSON: `{"observed_country":"DE","review_reason":"country_mismatch","usual_connection_country":"PL","user_email":"pilot@example.com","user_id":"user-1"}`,
},
adminRouting: config.AdminRoutingConfig{
GeoReviewRecommended: []string{"admin@example.com"},
},
wantRoutes: map[string]struct {
status RouteStatus
maxAttempts int
}{
"push:email:admin@example.com": {status: RouteStatusSkipped, maxAttempts: 3},
"email:email:admin@example.com": {status: RouteStatusPending, maxAttempts: 7},
},
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
store := newRecordingStore()
directory := newStaticUserDirectory(map[string]UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "fr-FR"},
})
service, err := New(Config{
Store: store,
UserDirectory: directory,
Clock: fixedClock{now: now},
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
AdminRouting: tt.adminRouting,
})
require.NoError(t, err)
result, err := service.Execute(context.Background(), AcceptInput{
NotificationID: "1775121700000-0",
Intent: tt.intent,
})
require.NoError(t, err)
require.Equal(t, OutcomeAccepted, result.Outcome)
require.Len(t, store.createInputs, 1)
require.Len(t, store.createInputs[0].Routes, len(tt.wantRoutes))
for routeID, want := range tt.wantRoutes {
route := routeByID(t, store.createInputs[0].Routes, routeID)
require.Equal(t, want.status, route.Status)
require.Equal(t, want.maxAttempts, route.MaxAttempts)
}
})
}
}
func TestServiceReturnsRecipientNotFoundForMissingUser(t *testing.T) {
t.Parallel()
store := newRecordingStore()
directory := newStaticUserDirectory(nil)
service, err := New(Config{
Store: store,
UserDirectory: directory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
_, err = service.Execute(context.Background(), AcceptInput{
NotificationID: "1775121700000-0",
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-missing"}, "", "", time.UnixMilli(1775121700001).UTC()),
})
require.ErrorIs(t, err, ErrRecipientNotFound)
require.Empty(t, store.createInputs)
require.Equal(t, []string{"user-missing"}, directory.lookups)
}
func TestServiceReturnsServiceUnavailableWhenDirectoryFails(t *testing.T) {
t.Parallel()
store := newRecordingStore()
directory := newStaticUserDirectory(nil)
directory.errByUserID["user-1"] = errors.New("user service unavailable")
service, err := New(Config{
Store: store,
UserDirectory: directory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
_, err = service.Execute(context.Background(), AcceptInput{
NotificationID: "1775121700000-0",
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()),
})
require.ErrorIs(t, err, ErrServiceUnavailable)
require.Empty(t, store.createInputs)
}
func TestServiceRecordsIntentAndUserEnrichmentTelemetry(t *testing.T) {
t.Parallel()
store := newRecordingStore()
directory := newStaticUserDirectory(map[string]UserRecord{
"user-1": {Email: "one@example.com", PreferredLanguage: "en"},
})
telemetry := &recordingTelemetry{}
service, err := New(Config{
Store: store,
UserDirectory: directory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
Telemetry: telemetry,
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
input := AcceptInput{
NotificationID: "1775121700000-0",
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()),
}
result, err := service.Execute(context.Background(), input)
require.NoError(t, err)
require.Equal(t, OutcomeAccepted, result.Outcome)
duplicateInput := input
duplicateInput.NotificationID = "1775121700001-0"
result, err = service.Execute(context.Background(), duplicateInput)
require.NoError(t, err)
require.Equal(t, OutcomeDuplicate, result.Outcome)
require.Equal(t, []intentOutcomeRecord{
{
notificationType: "game.turn.ready",
producer: "game_master",
audienceKind: "user",
outcome: "accepted",
},
{
notificationType: "game.turn.ready",
producer: "game_master",
audienceKind: "user",
outcome: "duplicate",
},
}, telemetry.intentOutcomes)
require.Equal(t, []userEnrichmentRecord{
{notificationType: "game.turn.ready", result: "success"},
}, telemetry.userEnrichment)
}
func TestServiceRecordsUserEnrichmentFailureTelemetry(t *testing.T) {
t.Parallel()
tests := []struct {
name string
directory *staticUserDirectory
want string
}{
{
name: "recipient not found",
directory: newStaticUserDirectory(nil),
want: "recipient_not_found",
},
{
name: "service unavailable",
directory: func() *staticUserDirectory {
directory := newStaticUserDirectory(nil)
directory.errByUserID["user-1"] = errors.New("user service unavailable")
return directory
}(),
want: "service_unavailable",
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
telemetry := &recordingTelemetry{}
service, err := New(Config{
Store: newRecordingStore(),
UserDirectory: tt.directory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
Telemetry: telemetry,
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
_, err = service.Execute(context.Background(), AcceptInput{
NotificationID: "1775121700000-0",
Intent: validTurnReadyIntent(`{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`, []string{"user-1"}, "", "", time.UnixMilli(1775121700001).UTC()),
})
require.Error(t, err)
require.Equal(t, []userEnrichmentRecord{
{notificationType: "game.turn.ready", result: tt.want},
}, telemetry.userEnrichment)
})
}
}
type recordingStore struct {
createInputs []CreateAcceptanceInput
idempotency map[string]IdempotencyRecord
notifications map[string]NotificationRecord
}
func newRecordingStore() *recordingStore {
return &recordingStore{
idempotency: make(map[string]IdempotencyRecord),
notifications: make(map[string]NotificationRecord),
}
}
func (store *recordingStore) CreateAcceptance(_ context.Context, input CreateAcceptanceInput) error {
if err := input.Validate(); err != nil {
return err
}
key := string(input.Idempotency.Producer) + ":" + input.Idempotency.IdempotencyKey
if _, ok := store.idempotency[key]; ok {
return ErrConflict
}
store.createInputs = append(store.createInputs, input)
store.idempotency[key] = input.Idempotency
store.notifications[input.Notification.NotificationID] = input.Notification
return nil
}
func (store *recordingStore) GetIdempotency(_ context.Context, producer intentstream.Producer, idempotencyKey string) (IdempotencyRecord, bool, error) {
record, ok := store.idempotency[string(producer)+":"+idempotencyKey]
return record, ok, nil
}
func (store *recordingStore) GetNotification(_ context.Context, notificationID string) (NotificationRecord, bool, error) {
record, ok := store.notifications[notificationID]
return record, ok, nil
}
type fixedClock struct {
now time.Time
}
func (clock fixedClock) Now() time.Time {
return clock.now
}
func validTurnReadyIntent(payload string, recipients []string, requestID string, traceID string, occurredAt time.Time) intentstream.Intent {
sorted := append([]string(nil), recipients...)
if len(sorted) == 2 && sorted[0] == "user-2" {
sorted[0], sorted[1] = sorted[1], sorted[0]
}
return intentstream.Intent{
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: sorted,
IdempotencyKey: "game-123:turn-54",
OccurredAt: occurredAt.UTC().Truncate(time.Millisecond),
RequestID: requestID,
TraceID: traceID,
PayloadJSON: payload,
}
}
func validPublicApplicationIntent() intentstream.Intent {
return intentstream.Intent{
NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
Producer: intentstream.ProducerGameLobby,
AudienceKind: intentstream.AudienceKindAdminEmail,
IdempotencyKey: "game-456:application-submitted:user-42",
OccurredAt: time.UnixMilli(1775121700002).UTC(),
PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`,
}
}
func routeByID(t *testing.T, routes []NotificationRoute, routeID string) NotificationRoute {
t.Helper()
for _, route := range routes {
if route.RouteID == routeID {
return route
}
}
t.Fatalf("route %q not found", routeID)
return NotificationRoute{}
}
type staticUserDirectory struct {
records map[string]UserRecord
errByUserID map[string]error
lookups []string
}
func newStaticUserDirectory(records map[string]UserRecord) *staticUserDirectory {
return &staticUserDirectory{
records: records,
errByUserID: make(map[string]error),
}
}
func (directory *staticUserDirectory) GetUserByID(_ context.Context, userID string) (UserRecord, error) {
directory.lookups = append(directory.lookups, userID)
if err, ok := directory.errByUserID[userID]; ok {
return UserRecord{}, err
}
record, ok := directory.records[userID]
if !ok {
return UserRecord{}, ErrRecipientNotFound
}
return record, nil
}
type recordingTelemetry struct {
intentOutcomes []intentOutcomeRecord
userEnrichment []userEnrichmentRecord
}
func (telemetry *recordingTelemetry) RecordIntentOutcome(_ context.Context, notificationType string, producer string, audienceKind string, outcome string) {
telemetry.intentOutcomes = append(telemetry.intentOutcomes, intentOutcomeRecord{
notificationType: notificationType,
producer: producer,
audienceKind: audienceKind,
outcome: outcome,
})
}
func (telemetry *recordingTelemetry) RecordUserEnrichmentAttempt(_ context.Context, notificationType string, result string) {
telemetry.userEnrichment = append(telemetry.userEnrichment, userEnrichmentRecord{
notificationType: notificationType,
result: result,
})
}
type intentOutcomeRecord struct {
notificationType string
producer string
audienceKind string
outcome string
}
type userEnrichmentRecord struct {
notificationType string
result string
}
+3
View File
@@ -0,0 +1,3 @@
// Package service reserves the application-service namespace of Notification
// Service.
package service
@@ -0,0 +1,135 @@
// Package malformedintent defines the operator-visible record used for
// malformed notification intents.
package malformedintent
import (
"encoding/json"
"fmt"
"strings"
"time"
)
// FailureCode identifies one stable malformed-intent rejection reason.
type FailureCode string
const (
// FailureCodeInvalidIntent reports malformed top-level intent fields or an
// invalid normalized envelope.
FailureCodeInvalidIntent FailureCode = "invalid_intent"
// FailureCodeInvalidPayload reports malformed or schema-invalid
// `payload_json`.
FailureCodeInvalidPayload FailureCode = "invalid_payload"
// FailureCodeIdempotencyConflict reports a duplicate idempotency scope that
// conflicts with already accepted normalized content.
FailureCodeIdempotencyConflict FailureCode = "idempotency_conflict"
// FailureCodeRecipientNotFound reports that a user-targeted recipient user
// id could not be resolved through User Service.
FailureCodeRecipientNotFound FailureCode = "recipient_not_found"
)
// Entry stores one operator-visible malformed notification-intent record.
type Entry struct {
// StreamEntryID stores the Redis Stream entry identifier of the rejected
// intent.
StreamEntryID string
// NotificationType stores the optional raw notification type extracted from
// the rejected entry.
NotificationType string
// Producer stores the optional raw producer value extracted from the
// rejected entry.
Producer string
// IdempotencyKey stores the optional raw idempotency key extracted from the
// rejected entry.
IdempotencyKey string
// FailureCode stores the stable rejection classification.
FailureCode FailureCode
// FailureMessage stores the detailed validation or decode failure.
FailureMessage string
// RawFields stores the raw top-level stream fields captured for operator
// inspection.
RawFields map[string]any
// RecordedAt stores when the malformed intent was durably recorded.
RecordedAt time.Time
}
// IsKnown reports whether code belongs to the frozen malformed-intent
// rejection surface.
func (code FailureCode) IsKnown() bool {
switch code {
case FailureCodeInvalidIntent, FailureCodeInvalidPayload, FailureCodeIdempotencyConflict, FailureCodeRecipientNotFound:
return true
default:
return false
}
}
// Validate reports whether entry contains a complete malformed-intent record.
func (entry Entry) Validate() error {
if strings.TrimSpace(entry.StreamEntryID) == "" {
return fmt.Errorf("malformed intent stream entry id must not be empty")
}
if !entry.FailureCode.IsKnown() {
return fmt.Errorf("malformed intent failure code %q is unsupported", entry.FailureCode)
}
if strings.TrimSpace(entry.FailureMessage) == "" {
return fmt.Errorf("malformed intent failure message must not be empty")
}
if strings.TrimSpace(entry.FailureMessage) != entry.FailureMessage {
return fmt.Errorf("malformed intent failure message must not contain surrounding whitespace")
}
if entry.RawFields == nil {
return fmt.Errorf("malformed intent raw fields must not be nil")
}
if err := validateJSONObject("malformed intent raw fields", entry.RawFields); err != nil {
return err
}
if err := validateTimestamp("malformed intent recorded at", entry.RecordedAt); err != nil {
return err
}
return nil
}
func validateJSONObject(name string, value map[string]any) error {
payload, err := json.Marshal(value)
if err != nil {
return fmt.Errorf("%s: %w", name, err)
}
if string(payload) == "null" {
return fmt.Errorf("%s must encode as a JSON object", name)
}
var decoded map[string]any
if err := json.Unmarshal(payload, &decoded); err != nil {
return fmt.Errorf("%s: %w", name, err)
}
if decoded == nil {
return fmt.Errorf("%s must encode as a JSON object", name)
}
return nil
}
func validateTimestamp(name string, value time.Time) error {
if value.IsZero() {
return fmt.Errorf("%s must not be zero", name)
}
if !value.Equal(value.UTC()) {
return fmt.Errorf("%s must be UTC", name)
}
if !value.Equal(value.Truncate(time.Millisecond)) {
return fmt.Errorf("%s must use millisecond precision", name)
}
return nil
}
@@ -0,0 +1,178 @@
// Package publishmail encodes accepted email routes into Mail Service generic
// asynchronous template commands.
package publishmail
import (
"encoding/json"
"fmt"
netmail "net/mail"
"strconv"
"strings"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
)
const (
commandSourceNotification = "notification"
commandPayloadModeTemplate = "template"
)
// Command stores one Mail Service-compatible template delivery command
// produced from a durable notification email route.
type Command struct {
// DeliveryID stores the stable route-level delivery identifier.
DeliveryID string
// IdempotencyKey stores the stable Mail Service deduplication key.
IdempotencyKey string
// RequestedAt stores when Notification Service durably accepted the
// notification intent.
RequestedAt time.Time
// PayloadJSON stores the fully encoded template-mode command payload.
PayloadJSON string
// RequestID stores the optional correlation identifier.
RequestID string
// TraceID stores the optional tracing correlation identifier.
TraceID string
}
// Values returns the Redis Stream fields appended to the Mail Service command
// stream for Command.
func (command Command) Values() map[string]any {
values := map[string]any{
"delivery_id": command.DeliveryID,
"source": commandSourceNotification,
"payload_mode": commandPayloadModeTemplate,
"idempotency_key": command.IdempotencyKey,
"requested_at_ms": strconv.FormatInt(command.RequestedAt.UTC().UnixMilli(), 10),
"payload_json": command.PayloadJSON,
}
if command.RequestID != "" {
values["request_id"] = command.RequestID
}
if command.TraceID != "" {
values["trace_id"] = command.TraceID
}
return values
}
// Encoder converts one accepted notification record plus its email route into
// one Mail Service-compatible generic template command.
type Encoder struct{}
// Encode converts notification plus route into one template delivery command.
func (Encoder) Encode(notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) (Command, error) {
if err := notification.Validate(); err != nil {
return Command{}, fmt.Errorf("encode mail command: %w", err)
}
if err := route.Validate(); err != nil {
return Command{}, fmt.Errorf("encode mail command: %w", err)
}
if notification.NotificationID != route.NotificationID {
return Command{}, fmt.Errorf("encode mail command: notification id %q does not match route notification id %q", notification.NotificationID, route.NotificationID)
}
if route.Channel != intentstream.ChannelEmail {
return Command{}, fmt.Errorf("encode mail command: route channel %q is unsupported", route.Channel)
}
if !notification.NotificationType.SupportsChannel(notification.AudienceKind, intentstream.ChannelEmail) {
return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: notification type %q does not support email", notification.NotificationType)
}
recipientEmail, err := normalizedRecipientEmail(route.ResolvedEmail)
if err != nil {
return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: %w", err)
}
locale, err := normalizedLocale(route.ResolvedLocale)
if err != nil {
return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: %w", err)
}
variables, err := payloadVariables(notification.PayloadJSON)
if err != nil {
return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: %w", err)
}
payloadJSON, err := json.Marshal(templatePayloadJSON{
To: []string{recipientEmail},
Cc: []string{},
Bcc: []string{},
ReplyTo: []string{},
TemplateID: string(notification.NotificationType),
Locale: locale,
Variables: variables,
Attachments: []templateAttachmentJSON{},
})
if err != nil {
return Command{}, fmt.Errorf("encode mail command: payload_encoding_failed: marshal payload_json: %w", err)
}
return Command{
DeliveryID: notification.NotificationID + "/" + route.RouteID,
IdempotencyKey: "notification:" + notification.NotificationID + "/" + route.RouteID,
RequestedAt: notification.AcceptedAt,
PayloadJSON: string(payloadJSON),
RequestID: notification.RequestID,
TraceID: notification.TraceID,
}, nil
}
type templatePayloadJSON struct {
To []string `json:"to"`
Cc []string `json:"cc"`
Bcc []string `json:"bcc"`
ReplyTo []string `json:"reply_to"`
TemplateID string `json:"template_id"`
Locale string `json:"locale"`
Variables json.RawMessage `json:"variables"`
Attachments []templateAttachmentJSON `json:"attachments"`
}
type templateAttachmentJSON struct {
Filename string `json:"filename"`
ContentType string `json:"content_type"`
ContentBase64 string `json:"content_base64"`
}
func normalizedRecipientEmail(value string) (string, error) {
if strings.TrimSpace(value) == "" {
return "", fmt.Errorf("resolved email must not be empty")
}
parsed, err := netmail.ParseAddress(value)
if err != nil {
return "", fmt.Errorf("resolved email %q must be valid: %w", value, err)
}
if parsed.Name != "" || parsed.Address != value {
return "", fmt.Errorf("resolved email %q must not include a display name", value)
}
return value, nil
}
func normalizedLocale(value string) (string, error) {
switch {
case strings.TrimSpace(value) == "":
return "", fmt.Errorf("resolved locale must not be empty")
case strings.TrimSpace(value) != value:
return "", fmt.Errorf("resolved locale %q must not contain surrounding whitespace", value)
default:
return value, nil
}
}
func payloadVariables(payloadJSON string) (json.RawMessage, error) {
var payloadObject map[string]json.RawMessage
if err := json.Unmarshal([]byte(payloadJSON), &payloadObject); err != nil {
return nil, fmt.Errorf("decode payload_json: %w", err)
}
if payloadObject == nil {
return nil, fmt.Errorf("payload_json must be a JSON object")
}
return json.RawMessage(payloadJSON), nil
}
@@ -0,0 +1,275 @@
package publishmail
import (
"encoding/json"
"testing"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"github.com/stretchr/testify/require"
)
func TestEncoderEncodesUserAndAdminEmailCommands(t *testing.T) {
t.Parallel()
now := time.UnixMilli(1775121700000).UTC()
tests := []struct {
name string
notification acceptintent.NotificationRecord
route acceptintent.NotificationRoute
wantDeliveryID string
wantIdempotency string
wantPayloadJSON string
}{
{
name: "user route",
notification: acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
IdempotencyKey: "game-123:turn-54",
RequestFingerprint: "sha256:deadbeef",
AcceptedAt: now,
OccurredAt: now,
UpdatedAt: now,
},
route: acceptintent.NotificationRoute{
NotificationID: "1775121700000-0",
RouteID: "email:user:user-1",
Channel: intentstream.ChannelEmail,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
wantDeliveryID: "1775121700000-0/email:user:user-1",
wantIdempotency: "notification:1775121700000-0/email:user:user-1",
wantPayloadJSON: `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}`,
},
{
name: "admin route",
notification: acceptintent.NotificationRecord{
NotificationID: "1775121700001-0",
NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
Producer: intentstream.ProducerGameLobby,
AudienceKind: intentstream.AudienceKindAdminEmail,
PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`,
IdempotencyKey: "game-456:application-submitted:user-42",
RequestFingerprint: "sha256:cafebabe",
AcceptedAt: now,
OccurredAt: now,
UpdatedAt: now,
},
route: acceptintent.NotificationRoute{
NotificationID: "1775121700001-0",
RouteID: "email:email:owner@example.com",
Channel: intentstream.ChannelEmail,
RecipientRef: "email:owner@example.com",
Status: acceptintent.RouteStatusPending,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "owner@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
wantDeliveryID: "1775121700001-0/email:email:owner@example.com",
wantIdempotency: "notification:1775121700001-0/email:email:owner@example.com",
wantPayloadJSON: `{"to":["owner@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"lobby.application.submitted","locale":"en","variables":{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"},"attachments":[]}`,
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
command, err := Encoder{}.Encode(tt.notification, tt.route)
require.NoError(t, err)
require.Equal(t, tt.wantDeliveryID, command.DeliveryID)
require.Equal(t, tt.wantIdempotency, command.IdempotencyKey)
require.Equal(t, now, command.RequestedAt)
require.JSONEq(t, tt.wantPayloadJSON, command.PayloadJSON)
values := command.Values()
require.Equal(t, tt.wantDeliveryID, values["delivery_id"])
require.Equal(t, "notification", values["source"])
require.Equal(t, "template", values["payload_mode"])
require.Equal(t, tt.wantIdempotency, values["idempotency_key"])
require.Equal(t, "1775121700000", values["requested_at_ms"])
})
}
}
func TestEncoderPropagatesTracingMetadata(t *testing.T) {
t.Parallel()
now := time.UnixMilli(1775121700000).UTC()
command, err := Encoder{}.Encode(
acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
IdempotencyKey: "game-123:turn-54",
RequestFingerprint: "sha256:deadbeef",
RequestID: "request-1",
TraceID: "trace-1",
AcceptedAt: now,
OccurredAt: now,
UpdatedAt: now,
},
acceptintent.NotificationRoute{
NotificationID: "1775121700000-0",
RouteID: "email:user:user-1",
Channel: intentstream.ChannelEmail,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
)
require.NoError(t, err)
values := command.Values()
require.Equal(t, "request-1", values["request_id"])
require.Equal(t, "trace-1", values["trace_id"])
}
func TestEncoderPreservesNormalizedPayloadAsTemplateVariables(t *testing.T) {
t.Parallel()
now := time.UnixMilli(1775121700000).UTC()
command, err := Encoder{}.Encode(
acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeGameFinished,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"final_turn_number":81,"game_id":"game-123","game_name":"Nebula Clash"}`,
IdempotencyKey: "game-123:final",
RequestFingerprint: "sha256:deadbeef",
AcceptedAt: now,
OccurredAt: now,
UpdatedAt: now,
},
acceptintent.NotificationRoute{
NotificationID: "1775121700000-0",
RouteID: "email:user:user-1",
Channel: intentstream.ChannelEmail,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
)
require.NoError(t, err)
var payload struct {
Variables map[string]any `json:"variables"`
}
require.NoError(t, json.Unmarshal([]byte(command.PayloadJSON), &payload))
require.Equal(t, map[string]any{
"final_turn_number": float64(81),
"game_id": "game-123",
"game_name": "Nebula Clash",
}, payload.Variables)
}
func TestEncoderUsesEmptyAncillaryEnvelopeFields(t *testing.T) {
t.Parallel()
now := time.UnixMilli(1775121700000).UTC()
command, err := Encoder{}.Encode(
acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeLobbyInviteExpired,
Producer: intentstream.ProducerGameLobby,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","invitee_name":"Nova Pilot","invitee_user_id":"user-2"}`,
IdempotencyKey: "game-123:invite-expired",
RequestFingerprint: "sha256:deadbeef",
AcceptedAt: now,
OccurredAt: now,
UpdatedAt: now,
},
acceptintent.NotificationRoute{
NotificationID: "1775121700000-0",
RouteID: "email:user:user-1",
Channel: intentstream.ChannelEmail,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
)
require.NoError(t, err)
require.JSONEq(
t,
`{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"lobby.invite.expired","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","invitee_name":"Nova Pilot","invitee_user_id":"user-2"},"attachments":[]}`,
command.PayloadJSON,
)
}
func TestEncoderRejectsInvalidRouteForMailPublication(t *testing.T) {
t.Parallel()
now := time.UnixMilli(1775121700000).UTC()
_, err := Encoder{}.Encode(
acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
IdempotencyKey: "game-123:turn-54",
RequestFingerprint: "sha256:deadbeef",
AcceptedAt: now,
OccurredAt: now,
UpdatedAt: now,
},
acceptintent.NotificationRoute{
NotificationID: "1775121700000-0",
RouteID: "push:user:user-1",
Channel: intentstream.ChannelPush,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
MaxAttempts: 3,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
)
require.Error(t, err)
require.ErrorContains(t, err, `route channel "push" is unsupported`)
}
@@ -0,0 +1,221 @@
// Package publishpush encodes user-facing notification routes into Gateway
// client-event payloads.
package publishpush
import (
"encoding/json"
"errors"
"fmt"
"strings"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"galaxy/transcoder"
)
// Event stores one Gateway-compatible client event produced from a
// user-targeted notification route.
type Event struct {
// UserID stores the authenticated user fan-out target.
UserID string
// EventType stores the stable client-facing event type.
EventType string
// EventID stores the stable route-level event identifier.
EventID string
// PayloadBytes stores the encoded FlatBuffers payload bytes.
PayloadBytes []byte
// RequestID stores the optional correlation identifier.
RequestID string
// TraceID stores the optional tracing correlation identifier.
TraceID string
}
// Encoder maps one supported notification_type to the corresponding checked-in
// FlatBuffers payload encoder.
type Encoder struct{}
// Encode converts one accepted notification record plus its push route into a
// Gateway-compatible client event.
func (Encoder) Encode(notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) (Event, error) {
if err := notification.Validate(); err != nil {
return Event{}, fmt.Errorf("encode push event: %w", err)
}
if err := route.Validate(); err != nil {
return Event{}, fmt.Errorf("encode push event: %w", err)
}
if route.Channel != intentstream.ChannelPush {
return Event{}, fmt.Errorf("encode push event: route channel %q is unsupported", route.Channel)
}
userID, err := userIDFromRecipientRef(route.RecipientRef)
if err != nil {
return Event{}, fmt.Errorf("encode push event: %w", err)
}
payloadBytes, err := encodePayload(notification.NotificationType, notification.PayloadJSON)
if err != nil {
return Event{}, fmt.Errorf("encode push event: %w", err)
}
return Event{
UserID: userID,
EventType: string(notification.NotificationType),
EventID: notification.NotificationID + "/" + route.RouteID,
PayloadBytes: payloadBytes,
RequestID: notification.RequestID,
TraceID: notification.TraceID,
}, nil
}
func encodePayload(notificationType intentstream.NotificationType, payloadJSON string) ([]byte, error) {
switch notificationType {
case intentstream.NotificationTypeGameTurnReady:
var payload struct {
GameID string `json:"game_id"`
TurnNumber int64 `json:"turn_number"`
}
if err := decodePayload(payloadJSON, &payload); err != nil {
return nil, err
}
if payload.GameID == "" {
return nil, errors.New("payload_encoding_failed: game_id is empty")
}
if payload.TurnNumber < 1 {
return nil, errors.New("payload_encoding_failed: turn_number must be at least 1")
}
return wrapPayloadEncoding(transcoder.GameTurnReadyEventToPayload(&transcoder.GameTurnReadyEvent{
GameID: payload.GameID,
TurnNumber: payload.TurnNumber,
}))
case intentstream.NotificationTypeGameFinished:
var payload struct {
GameID string `json:"game_id"`
FinalTurnNumber int64 `json:"final_turn_number"`
}
if err := decodePayload(payloadJSON, &payload); err != nil {
return nil, err
}
if payload.GameID == "" {
return nil, errors.New("payload_encoding_failed: game_id is empty")
}
if payload.FinalTurnNumber < 1 {
return nil, errors.New("payload_encoding_failed: final_turn_number must be at least 1")
}
return wrapPayloadEncoding(transcoder.GameFinishedEventToPayload(&transcoder.GameFinishedEvent{
GameID: payload.GameID,
FinalTurnNumber: payload.FinalTurnNumber,
}))
case intentstream.NotificationTypeLobbyApplicationSubmitted:
var payload struct {
GameID string `json:"game_id"`
ApplicantUserID string `json:"applicant_user_id"`
}
if err := decodePayload(payloadJSON, &payload); err != nil {
return nil, err
}
if payload.GameID == "" {
return nil, errors.New("payload_encoding_failed: game_id is empty")
}
if payload.ApplicantUserID == "" {
return nil, errors.New("payload_encoding_failed: applicant_user_id is empty")
}
return wrapPayloadEncoding(transcoder.LobbyApplicationSubmittedEventToPayload(&transcoder.LobbyApplicationSubmittedEvent{
GameID: payload.GameID,
ApplicantUserID: payload.ApplicantUserID,
}))
case intentstream.NotificationTypeLobbyMembershipApproved:
var payload struct {
GameID string `json:"game_id"`
}
if err := decodePayload(payloadJSON, &payload); err != nil {
return nil, err
}
if payload.GameID == "" {
return nil, errors.New("payload_encoding_failed: game_id is empty")
}
return wrapPayloadEncoding(transcoder.LobbyMembershipApprovedEventToPayload(&transcoder.LobbyMembershipApprovedEvent{
GameID: payload.GameID,
}))
case intentstream.NotificationTypeLobbyMembershipRejected:
var payload struct {
GameID string `json:"game_id"`
}
if err := decodePayload(payloadJSON, &payload); err != nil {
return nil, err
}
if payload.GameID == "" {
return nil, errors.New("payload_encoding_failed: game_id is empty")
}
return wrapPayloadEncoding(transcoder.LobbyMembershipRejectedEventToPayload(&transcoder.LobbyMembershipRejectedEvent{
GameID: payload.GameID,
}))
case intentstream.NotificationTypeLobbyInviteCreated:
var payload struct {
GameID string `json:"game_id"`
InviterUserID string `json:"inviter_user_id"`
}
if err := decodePayload(payloadJSON, &payload); err != nil {
return nil, err
}
if payload.GameID == "" {
return nil, errors.New("payload_encoding_failed: game_id is empty")
}
if payload.InviterUserID == "" {
return nil, errors.New("payload_encoding_failed: inviter_user_id is empty")
}
return wrapPayloadEncoding(transcoder.LobbyInviteCreatedEventToPayload(&transcoder.LobbyInviteCreatedEvent{
GameID: payload.GameID,
InviterUserID: payload.InviterUserID,
}))
case intentstream.NotificationTypeLobbyInviteRedeemed:
var payload struct {
GameID string `json:"game_id"`
InviteeUserID string `json:"invitee_user_id"`
}
if err := decodePayload(payloadJSON, &payload); err != nil {
return nil, err
}
if payload.GameID == "" {
return nil, errors.New("payload_encoding_failed: game_id is empty")
}
if payload.InviteeUserID == "" {
return nil, errors.New("payload_encoding_failed: invitee_user_id is empty")
}
return wrapPayloadEncoding(transcoder.LobbyInviteRedeemedEventToPayload(&transcoder.LobbyInviteRedeemedEvent{
GameID: payload.GameID,
InviteeUserID: payload.InviteeUserID,
}))
default:
return nil, fmt.Errorf("payload_encoding_failed: notification type %q does not support push", notificationType)
}
}
func decodePayload(payloadJSON string, target any) error {
if err := json.Unmarshal([]byte(payloadJSON), target); err != nil {
return fmt.Errorf("payload_encoding_failed: decode payload_json: %w", err)
}
return nil
}
func wrapPayloadEncoding(payload []byte, err error) ([]byte, error) {
if err != nil {
return nil, fmt.Errorf("payload_encoding_failed: %w", err)
}
return payload, nil
}
func userIDFromRecipientRef(recipientRef string) (string, error) {
userID, ok := strings.CutPrefix(recipientRef, "user:")
if !ok || userID == "" {
return "", fmt.Errorf("recipient_ref %q is not user-targeted", recipientRef)
}
return userID, nil
}
@@ -0,0 +1,186 @@
package publishpush
import (
"testing"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"galaxy/transcoder"
"github.com/stretchr/testify/require"
)
func TestEncoderEncodesSupportedPushNotificationTypes(t *testing.T) {
t.Parallel()
now := time.UnixMilli(1775121700000).UTC()
tests := []struct {
name string
notificationType intentstream.NotificationType
payloadJSON string
assertPayload func(*testing.T, []byte)
}{
{
name: "game turn ready",
notificationType: intentstream.NotificationTypeGameTurnReady,
payloadJSON: `{"game_id":"game-1","game_name":"Nebula Clash","turn_number":54}`,
assertPayload: func(t *testing.T, payload []byte) {
t.Helper()
event, err := transcoder.PayloadToGameTurnReadyEvent(payload)
require.NoError(t, err)
require.Equal(t, "game-1", event.GameID)
require.Equal(t, int64(54), event.TurnNumber)
},
},
{
name: "game finished",
notificationType: intentstream.NotificationTypeGameFinished,
payloadJSON: `{"final_turn_number":81,"game_id":"game-2","game_name":"Nova"}`,
assertPayload: func(t *testing.T, payload []byte) {
t.Helper()
event, err := transcoder.PayloadToGameFinishedEvent(payload)
require.NoError(t, err)
require.Equal(t, "game-2", event.GameID)
require.Equal(t, int64(81), event.FinalTurnNumber)
},
},
{
name: "lobby application submitted",
notificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
payloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-2","game_id":"game-3","game_name":"Orion Front"}`,
assertPayload: func(t *testing.T, payload []byte) {
t.Helper()
event, err := transcoder.PayloadToLobbyApplicationSubmittedEvent(payload)
require.NoError(t, err)
require.Equal(t, "game-3", event.GameID)
require.Equal(t, "user-2", event.ApplicantUserID)
},
},
{
name: "lobby membership approved",
notificationType: intentstream.NotificationTypeLobbyMembershipApproved,
payloadJSON: `{"game_id":"game-4","game_name":"Ares"}`,
assertPayload: func(t *testing.T, payload []byte) {
t.Helper()
event, err := transcoder.PayloadToLobbyMembershipApprovedEvent(payload)
require.NoError(t, err)
require.Equal(t, "game-4", event.GameID)
},
},
{
name: "lobby membership rejected",
notificationType: intentstream.NotificationTypeLobbyMembershipRejected,
payloadJSON: `{"game_id":"game-5","game_name":"Atlas"}`,
assertPayload: func(t *testing.T, payload []byte) {
t.Helper()
event, err := transcoder.PayloadToLobbyMembershipRejectedEvent(payload)
require.NoError(t, err)
require.Equal(t, "game-5", event.GameID)
},
},
{
name: "lobby invite created",
notificationType: intentstream.NotificationTypeLobbyInviteCreated,
payloadJSON: `{"game_id":"game-6","game_name":"Vega","inviter_name":"Nova Pilot","inviter_user_id":"user-9"}`,
assertPayload: func(t *testing.T, payload []byte) {
t.Helper()
event, err := transcoder.PayloadToLobbyInviteCreatedEvent(payload)
require.NoError(t, err)
require.Equal(t, "game-6", event.GameID)
require.Equal(t, "user-9", event.InviterUserID)
},
},
{
name: "lobby invite redeemed",
notificationType: intentstream.NotificationTypeLobbyInviteRedeemed,
payloadJSON: `{"game_id":"game-7","game_name":"Lyra","invitee_name":"Skipper","invitee_user_id":"user-10"}`,
assertPayload: func(t *testing.T, payload []byte) {
t.Helper()
event, err := transcoder.PayloadToLobbyInviteRedeemedEvent(payload)
require.NoError(t, err)
require.Equal(t, "game-7", event.GameID)
require.Equal(t, "user-10", event.InviteeUserID)
},
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
event, err := Encoder{}.Encode(
acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: tt.notificationType,
Producer: tt.notificationType.ExpectedProducer(),
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: tt.payloadJSON,
IdempotencyKey: "idem-1",
RequestFingerprint: "sha256:deadbeef",
RequestID: "request-1",
TraceID: "trace-1",
OccurredAt: now,
AcceptedAt: now,
UpdatedAt: now,
},
acceptintent.NotificationRoute{
NotificationID: "1775121700000-0",
RouteID: "push:user:user-1",
Channel: intentstream.ChannelPush,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
MaxAttempts: 3,
NextAttemptAt: now,
CreatedAt: now,
UpdatedAt: now,
},
)
require.NoError(t, err)
require.Equal(t, "user-1", event.UserID)
require.Equal(t, string(tt.notificationType), event.EventType)
require.Equal(t, "1775121700000-0/push:user:user-1", event.EventID)
require.Equal(t, "request-1", event.RequestID)
require.Equal(t, "trace-1", event.TraceID)
require.NotEmpty(t, event.PayloadBytes)
tt.assertPayload(t, event.PayloadBytes)
})
}
}
func TestEncoderRejectsInvalidStoredPayload(t *testing.T) {
t.Parallel()
now := time.UnixMilli(1775121700000).UTC()
_, err := Encoder{}.Encode(
acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"game_id":"","game_name":"Nebula Clash","turn_number":0}`,
IdempotencyKey: "idem-1",
RequestFingerprint: "sha256:deadbeef",
OccurredAt: now,
AcceptedAt: now,
UpdatedAt: now,
},
acceptintent.NotificationRoute{
NotificationID: "1775121700000-0",
RouteID: "push:user:user-1",
Channel: intentstream.ChannelPush,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
MaxAttempts: 3,
NextAttemptAt: now,
CreatedAt: now,
UpdatedAt: now,
},
)
require.Error(t, err)
require.ErrorContains(t, err, "payload_encoding_failed")
}
+694
View File
@@ -0,0 +1,694 @@
// Package telemetry provides lightweight OpenTelemetry helpers and
// low-cardinality Notification Service instruments.
package telemetry
import (
"context"
"errors"
"fmt"
"log/slog"
"os"
"strings"
"sync"
"time"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
"go.opentelemetry.io/otel/exporters/stdout/stdoutmetric"
"go.opentelemetry.io/otel/exporters/stdout/stdouttrace"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/propagation"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
oteltrace "go.opentelemetry.io/otel/trace"
)
const meterName = "galaxy/notification"
const (
defaultServiceName = "galaxy-notification"
processExporterNone = "none"
processExporterOTLP = "otlp"
processProtocolHTTPProtobuf = "http/protobuf"
processProtocolGRPC = "grpc"
)
// ProcessConfig configures the process-wide OpenTelemetry runtime.
type ProcessConfig struct {
// ServiceName overrides the default OpenTelemetry service name.
ServiceName string
// TracesExporter selects the external traces exporter. Supported values are
// `none` and `otlp`.
TracesExporter string
// MetricsExporter selects the external metrics exporter. Supported values
// are `none` and `otlp`.
MetricsExporter string
// TracesProtocol selects the OTLP traces protocol when TracesExporter is
// `otlp`.
TracesProtocol string
// MetricsProtocol selects the OTLP metrics protocol when MetricsExporter is
// `otlp`.
MetricsProtocol string
// StdoutTracesEnabled enables the additional stdout trace exporter used for
// local development and debugging.
StdoutTracesEnabled bool
// StdoutMetricsEnabled enables the additional stdout metric exporter used
// for local development and debugging.
StdoutMetricsEnabled bool
}
// Validate reports whether cfg contains a supported OpenTelemetry exporter
// configuration.
func (cfg ProcessConfig) Validate() error {
switch cfg.TracesExporter {
case processExporterNone, processExporterOTLP:
default:
return fmt.Errorf("unsupported traces exporter %q", cfg.TracesExporter)
}
switch cfg.MetricsExporter {
case processExporterNone, processExporterOTLP:
default:
return fmt.Errorf("unsupported metrics exporter %q", cfg.MetricsExporter)
}
if cfg.TracesProtocol != "" && cfg.TracesProtocol != processProtocolHTTPProtobuf && cfg.TracesProtocol != processProtocolGRPC {
return fmt.Errorf("unsupported OTLP traces protocol %q", cfg.TracesProtocol)
}
if cfg.MetricsProtocol != "" && cfg.MetricsProtocol != processProtocolHTTPProtobuf && cfg.MetricsProtocol != processProtocolGRPC {
return fmt.Errorf("unsupported OTLP metrics protocol %q", cfg.MetricsProtocol)
}
return nil
}
// Runtime owns the Notification Service OpenTelemetry providers and
// low-cardinality custom instruments.
type Runtime struct {
tracerProvider oteltrace.TracerProvider
meterProvider metric.MeterProvider
shutdownMu sync.Mutex
shutdownDone bool
shutdownErr error
shutdownFns []func(context.Context) error
routeScheduleReaderMu sync.RWMutex
routeScheduleReader RouteScheduleSnapshotReader
intentStreamLagReaderMu sync.RWMutex
intentStreamLagReader IntentStreamLagSnapshotReader
internalHTTPRequests metric.Int64Counter
internalHTTPDuration metric.Float64Histogram
internalHTTPLifecycle metric.Int64Counter
intentOutcomes metric.Int64Counter
malformedIntents metric.Int64Counter
userEnrichment metric.Int64Counter
routePublishAttempts metric.Int64Counter
routeRetries metric.Int64Counter
routeDeadLetters metric.Int64Counter
}
// RouteScheduleSnapshot stores the current observable state of the durable
// notification route schedule.
type RouteScheduleSnapshot struct {
// Depth stores how many route keys are currently present in the route
// schedule.
Depth int64
// OldestScheduledFor stores the oldest currently scheduled due time when
// one exists.
OldestScheduledFor *time.Time
}
// RouteScheduleSnapshotReader loads one current route-schedule snapshot for
// observable gauge reporting.
type RouteScheduleSnapshotReader interface {
// ReadRouteScheduleSnapshot returns the current route-schedule depth and
// its oldest scheduled timestamp when one exists.
ReadRouteScheduleSnapshot(context.Context) (RouteScheduleSnapshot, error)
}
// IntentStreamLagSnapshot stores the current observable lag of the plain-XREAD
// notification-intent consumer.
type IntentStreamLagSnapshot struct {
// OldestUnprocessedAt stores the Redis Stream timestamp of the oldest
// entry that has not yet been durably processed.
OldestUnprocessedAt *time.Time
}
// IntentStreamLagSnapshotReader loads one current intent-stream lag snapshot
// for observable gauge reporting.
type IntentStreamLagSnapshotReader interface {
// ReadIntentStreamLagSnapshot returns the oldest unprocessed stream entry
// timestamp when one exists.
ReadIntentStreamLagSnapshot(context.Context) (IntentStreamLagSnapshot, error)
}
// New constructs a lightweight telemetry runtime around meterProvider for
// tests and embedded use cases that do not need process-level exporter wiring.
func New(meterProvider metric.MeterProvider) (*Runtime, error) {
return NewWithProviders(meterProvider, nil)
}
// NewWithProviders constructs a telemetry runtime around explicitly supplied
// meterProvider and tracerProvider values.
func NewWithProviders(meterProvider metric.MeterProvider, tracerProvider oteltrace.TracerProvider) (*Runtime, error) {
if meterProvider == nil {
meterProvider = otel.GetMeterProvider()
}
if tracerProvider == nil {
tracerProvider = otel.GetTracerProvider()
}
if meterProvider == nil {
return nil, errors.New("new notification telemetry runtime: nil meter provider")
}
if tracerProvider == nil {
return nil, errors.New("new notification telemetry runtime: nil tracer provider")
}
return buildRuntime(meterProvider, tracerProvider, nil)
}
// NewProcess constructs the process-wide Notification Service OpenTelemetry
// runtime from cfg, installs the resulting providers globally, and returns the
// runtime.
func NewProcess(ctx context.Context, cfg ProcessConfig, logger *slog.Logger) (*Runtime, error) {
if ctx == nil {
return nil, errors.New("new notification telemetry process: nil context")
}
if err := cfg.Validate(); err != nil {
return nil, fmt.Errorf("new notification telemetry process: %w", err)
}
if logger == nil {
logger = slog.Default()
}
serviceName := strings.TrimSpace(cfg.ServiceName)
if serviceName == "" {
serviceName = defaultServiceName
}
res := resource.NewSchemaless(attribute.String("service.name", serviceName))
tracerProvider, err := newTracerProvider(ctx, res, cfg)
if err != nil {
return nil, fmt.Errorf("new notification telemetry process: tracer provider: %w", err)
}
meterProvider, err := newMeterProvider(ctx, res, cfg)
if err != nil {
return nil, fmt.Errorf("new notification telemetry process: meter provider: %w", err)
}
otel.SetTracerProvider(tracerProvider)
otel.SetMeterProvider(meterProvider)
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
))
runtime, err := buildRuntime(meterProvider, tracerProvider, []func(context.Context) error{
meterProvider.Shutdown,
tracerProvider.Shutdown,
})
if err != nil {
return nil, fmt.Errorf("new notification telemetry process: runtime: %w", err)
}
logger.Info("notification telemetry configured",
"service_name", serviceName,
"traces_exporter", cfg.TracesExporter,
"metrics_exporter", cfg.MetricsExporter,
)
return runtime, nil
}
// TracerProvider returns the runtime tracer provider.
func (runtime *Runtime) TracerProvider() oteltrace.TracerProvider {
if runtime == nil || runtime.tracerProvider == nil {
return otel.GetTracerProvider()
}
return runtime.tracerProvider
}
// MeterProvider returns the runtime meter provider.
func (runtime *Runtime) MeterProvider() metric.MeterProvider {
if runtime == nil || runtime.meterProvider == nil {
return otel.GetMeterProvider()
}
return runtime.meterProvider
}
// Shutdown flushes and stops the configured telemetry providers. Shutdown is
// idempotent.
func (runtime *Runtime) Shutdown(ctx context.Context) error {
if runtime == nil {
return nil
}
runtime.shutdownMu.Lock()
if runtime.shutdownDone {
err := runtime.shutdownErr
runtime.shutdownMu.Unlock()
return err
}
runtime.shutdownDone = true
runtime.shutdownMu.Unlock()
var shutdownErr error
for index := len(runtime.shutdownFns) - 1; index >= 0; index-- {
shutdownErr = errors.Join(shutdownErr, runtime.shutdownFns[index](ctx))
}
runtime.shutdownMu.Lock()
runtime.shutdownErr = shutdownErr
runtime.shutdownMu.Unlock()
return shutdownErr
}
// RecordInternalHTTPRequest records one internal HTTP request outcome.
func (runtime *Runtime) RecordInternalHTTPRequest(ctx context.Context, attrs []attribute.KeyValue, duration time.Duration) {
if runtime == nil {
return
}
options := metric.WithAttributes(attrs...)
runtime.internalHTTPRequests.Add(normalizeContext(ctx), 1, options)
runtime.internalHTTPDuration.Record(normalizeContext(ctx), duration.Seconds()*1000, options)
}
// RecordInternalHTTPEvent records one internal HTTP server lifecycle event.
func (runtime *Runtime) RecordInternalHTTPEvent(ctx context.Context, event string) {
if runtime == nil {
return
}
runtime.internalHTTPLifecycle.Add(
normalizeContext(ctx),
1,
metric.WithAttributes(attribute.String("event", strings.TrimSpace(event))),
)
}
// RecordIntentOutcome records one accepted notification-intent outcome.
func (runtime *Runtime) RecordIntentOutcome(ctx context.Context, notificationType string, producer string, audienceKind string, outcome string) {
if runtime == nil {
return
}
runtime.intentOutcomes.Add(
normalizeContext(ctx),
1,
metric.WithAttributes(
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
attribute.String("producer", cleanAttribute(producer, "unknown")),
attribute.String("audience_kind", cleanAttribute(audienceKind, "unknown")),
attribute.String("outcome", cleanAttribute(outcome, "unknown")),
),
)
}
// RecordMalformedIntent records one malformed or rejected notification intent.
func (runtime *Runtime) RecordMalformedIntent(ctx context.Context, failureCode string, notificationType string, producer string) {
if runtime == nil {
return
}
runtime.malformedIntents.Add(
normalizeContext(ctx),
1,
metric.WithAttributes(
attribute.String("failure_code", cleanAttribute(failureCode, "unknown")),
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
attribute.String("producer", cleanAttribute(producer, "unknown")),
),
)
}
// RecordUserEnrichmentAttempt records one User Service enrichment lookup
// outcome.
func (runtime *Runtime) RecordUserEnrichmentAttempt(ctx context.Context, notificationType string, result string) {
if runtime == nil {
return
}
runtime.userEnrichment.Add(
normalizeContext(ctx),
1,
metric.WithAttributes(
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
attribute.String("result", cleanAttribute(result, "unknown")),
),
)
}
// RecordRoutePublishAttempt records one route publication attempt outcome.
func (runtime *Runtime) RecordRoutePublishAttempt(ctx context.Context, channel string, notificationType string, result string, failureClassification string) {
if runtime == nil {
return
}
runtime.routePublishAttempts.Add(
normalizeContext(ctx),
1,
metric.WithAttributes(
attribute.String("channel", cleanAttribute(channel, "unknown")),
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
attribute.String("result", cleanAttribute(result, "unknown")),
attribute.String("failure_classification", cleanAttribute(failureClassification, "none")),
),
)
}
// RecordRouteRetry records one route retry scheduling event.
func (runtime *Runtime) RecordRouteRetry(ctx context.Context, channel string, notificationType string) {
if runtime == nil {
return
}
runtime.routeRetries.Add(
normalizeContext(ctx),
1,
metric.WithAttributes(
attribute.String("channel", cleanAttribute(channel, "unknown")),
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
),
)
}
// RecordRouteDeadLetter records one route transition to dead_letter.
func (runtime *Runtime) RecordRouteDeadLetter(ctx context.Context, channel string, notificationType string, failureClassification string) {
if runtime == nil {
return
}
runtime.routeDeadLetters.Add(
normalizeContext(ctx),
1,
metric.WithAttributes(
attribute.String("channel", cleanAttribute(channel, "unknown")),
attribute.String("notification_type", cleanAttribute(notificationType, "unknown")),
attribute.String("failure_classification", cleanAttribute(failureClassification, "unknown")),
),
)
}
// SetRouteScheduleSnapshotReader installs the route-schedule reader used by
// the observable route schedule gauges.
func (runtime *Runtime) SetRouteScheduleSnapshotReader(reader RouteScheduleSnapshotReader) {
if runtime == nil {
return
}
runtime.routeScheduleReaderMu.Lock()
runtime.routeScheduleReader = reader
runtime.routeScheduleReaderMu.Unlock()
}
// SetIntentStreamLagSnapshotReader installs the intent-stream lag reader used
// by the observable lag gauge.
func (runtime *Runtime) SetIntentStreamLagSnapshotReader(reader IntentStreamLagSnapshotReader) {
if runtime == nil {
return
}
runtime.intentStreamLagReaderMu.Lock()
runtime.intentStreamLagReader = reader
runtime.intentStreamLagReaderMu.Unlock()
}
func buildRuntime(meterProvider metric.MeterProvider, tracerProvider oteltrace.TracerProvider, shutdownFns []func(context.Context) error) (*Runtime, error) {
meter := meterProvider.Meter(meterName)
runtime := &Runtime{
tracerProvider: tracerProvider,
meterProvider: meterProvider,
shutdownFns: append([]func(context.Context) error(nil), shutdownFns...),
}
internalHTTPRequests, err := meter.Int64Counter("notification.internal_http.requests")
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: internal_http.requests: %w", err)
}
internalHTTPDuration, err := meter.Float64Histogram("notification.internal_http.duration_ms", metric.WithUnit("ms"))
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: internal_http.duration_ms: %w", err)
}
internalHTTPLifecycle, err := meter.Int64Counter("notification.internal_http.lifecycle")
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: internal_http.lifecycle: %w", err)
}
intentOutcomes, err := meter.Int64Counter("notification.intent.outcomes")
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: intent.outcomes: %w", err)
}
malformedIntents, err := meter.Int64Counter("notification.intent.malformed")
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: intent.malformed: %w", err)
}
userEnrichment, err := meter.Int64Counter("notification.user_enrichment.attempts")
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: user_enrichment.attempts: %w", err)
}
routePublishAttempts, err := meter.Int64Counter("notification.route.publish_attempts")
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: route.publish_attempts: %w", err)
}
routeRetries, err := meter.Int64Counter("notification.route.retries")
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: route.retries: %w", err)
}
routeDeadLetters, err := meter.Int64Counter("notification.route.dead_letters")
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: route.dead_letters: %w", err)
}
routeScheduleDepth, err := meter.Int64ObservableGauge("notification.route_schedule.depth")
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: route_schedule.depth: %w", err)
}
routeScheduleOldestAge, err := meter.Int64ObservableGauge("notification.route_schedule.oldest_age_ms", metric.WithUnit("ms"))
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: route_schedule.oldest_age_ms: %w", err)
}
intentStreamOldestUnprocessedAge, err := meter.Int64ObservableGauge("notification.intent_stream.oldest_unprocessed_age_ms", metric.WithUnit("ms"))
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: intent_stream.oldest_unprocessed_age_ms: %w", err)
}
registration, err := meter.RegisterCallback(func(ctx context.Context, observer metric.Observer) error {
runtime.observeRouteSchedule(ctx, observer, routeScheduleDepth, routeScheduleOldestAge)
runtime.observeIntentStreamLag(ctx, observer, intentStreamOldestUnprocessedAge)
return nil
}, routeScheduleDepth, routeScheduleOldestAge, intentStreamOldestUnprocessedAge)
if err != nil {
return nil, fmt.Errorf("build notification telemetry runtime: observable callbacks: %w", err)
}
runtime.shutdownFns = append(runtime.shutdownFns, func(context.Context) error {
return registration.Unregister()
})
runtime.internalHTTPRequests = internalHTTPRequests
runtime.internalHTTPDuration = internalHTTPDuration
runtime.internalHTTPLifecycle = internalHTTPLifecycle
runtime.intentOutcomes = intentOutcomes
runtime.malformedIntents = malformedIntents
runtime.userEnrichment = userEnrichment
runtime.routePublishAttempts = routePublishAttempts
runtime.routeRetries = routeRetries
runtime.routeDeadLetters = routeDeadLetters
return runtime, nil
}
func newTracerProvider(ctx context.Context, res *resource.Resource, cfg ProcessConfig) (*sdktrace.TracerProvider, error) {
options := []sdktrace.TracerProviderOption{
sdktrace.WithResource(res),
}
if exporter, err := traceExporter(ctx, cfg); err != nil {
return nil, err
} else if exporter != nil {
options = append(options, sdktrace.WithBatcher(exporter))
}
if cfg.StdoutTracesEnabled {
exporter, err := stdouttrace.New(stdouttrace.WithWriter(os.Stdout))
if err != nil {
return nil, fmt.Errorf("stdout traces exporter: %w", err)
}
options = append(options, sdktrace.WithBatcher(exporter))
}
return sdktrace.NewTracerProvider(options...), nil
}
func newMeterProvider(ctx context.Context, res *resource.Resource, cfg ProcessConfig) (*sdkmetric.MeterProvider, error) {
options := []sdkmetric.Option{
sdkmetric.WithResource(res),
}
if exporter, err := metricExporter(ctx, cfg); err != nil {
return nil, err
} else if exporter != nil {
options = append(options, sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)))
}
if cfg.StdoutMetricsEnabled {
exporter, err := stdoutmetric.New(stdoutmetric.WithWriter(os.Stdout))
if err != nil {
return nil, fmt.Errorf("stdout metrics exporter: %w", err)
}
options = append(options, sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)))
}
return sdkmetric.NewMeterProvider(options...), nil
}
func traceExporter(ctx context.Context, cfg ProcessConfig) (sdktrace.SpanExporter, error) {
if cfg.TracesExporter != processExporterOTLP {
return nil, nil
}
switch normalizeProtocol(cfg.TracesProtocol) {
case processProtocolGRPC:
exporter, err := otlptracegrpc.New(ctx)
if err != nil {
return nil, fmt.Errorf("otlp grpc traces exporter: %w", err)
}
return exporter, nil
default:
exporter, err := otlptracehttp.New(ctx)
if err != nil {
return nil, fmt.Errorf("otlp http traces exporter: %w", err)
}
return exporter, nil
}
}
func metricExporter(ctx context.Context, cfg ProcessConfig) (sdkmetric.Exporter, error) {
if cfg.MetricsExporter != processExporterOTLP {
return nil, nil
}
switch normalizeProtocol(cfg.MetricsProtocol) {
case processProtocolGRPC:
exporter, err := otlpmetricgrpc.New(ctx)
if err != nil {
return nil, fmt.Errorf("otlp grpc metrics exporter: %w", err)
}
return exporter, nil
default:
exporter, err := otlpmetrichttp.New(ctx)
if err != nil {
return nil, fmt.Errorf("otlp http metrics exporter: %w", err)
}
return exporter, nil
}
}
func normalizeProtocol(value string) string {
switch strings.TrimSpace(value) {
case processProtocolGRPC:
return processProtocolGRPC
default:
return processProtocolHTTPProtobuf
}
}
func normalizeContext(ctx context.Context) context.Context {
if ctx == nil {
return context.Background()
}
return ctx
}
func cleanAttribute(value string, fallback string) string {
trimmed := strings.TrimSpace(value)
if trimmed == "" {
return fallback
}
return trimmed
}
func (runtime *Runtime) observeRouteSchedule(
ctx context.Context,
observer metric.Observer,
depthGauge metric.Int64ObservableGauge,
oldestAgeGauge metric.Int64ObservableGauge,
) {
depth := int64(0)
oldestAge := int64(0)
reader := runtime.currentRouteScheduleReader()
if reader != nil {
snapshot, err := reader.ReadRouteScheduleSnapshot(ctx)
if err != nil {
otel.Handle(fmt.Errorf("observe notification route schedule: %w", err))
} else {
if snapshot.Depth > 0 {
depth = snapshot.Depth
}
if snapshot.OldestScheduledFor != nil {
oldestAge = time.Since(snapshot.OldestScheduledFor.UTC()).Milliseconds()
if oldestAge < 0 {
oldestAge = 0
}
}
}
}
observer.ObserveInt64(depthGauge, depth)
observer.ObserveInt64(oldestAgeGauge, oldestAge)
}
func (runtime *Runtime) observeIntentStreamLag(
ctx context.Context,
observer metric.Observer,
oldestUnprocessedAgeGauge metric.Int64ObservableGauge,
) {
oldestAge := int64(0)
reader := runtime.currentIntentStreamLagReader()
if reader != nil {
snapshot, err := reader.ReadIntentStreamLagSnapshot(ctx)
if err != nil {
otel.Handle(fmt.Errorf("observe notification intent stream lag: %w", err))
} else if snapshot.OldestUnprocessedAt != nil {
oldestAge = time.Since(snapshot.OldestUnprocessedAt.UTC()).Milliseconds()
if oldestAge < 0 {
oldestAge = 0
}
}
}
observer.ObserveInt64(oldestUnprocessedAgeGauge, oldestAge)
}
func (runtime *Runtime) currentRouteScheduleReader() RouteScheduleSnapshotReader {
runtime.routeScheduleReaderMu.RLock()
defer runtime.routeScheduleReaderMu.RUnlock()
return runtime.routeScheduleReader
}
func (runtime *Runtime) currentIntentStreamLagReader() IntentStreamLagSnapshotReader {
runtime.intentStreamLagReaderMu.RLock()
defer runtime.intentStreamLagReaderMu.RUnlock()
return runtime.intentStreamLagReader
}
@@ -0,0 +1,228 @@
package telemetry
import (
"context"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel/attribute"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/metric/metricdata"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
)
func TestRuntimeRecordsMetrics(t *testing.T) {
t.Parallel()
reader := sdkmetric.NewManualReader()
meterProvider := sdkmetric.NewMeterProvider(sdkmetric.WithReader(reader))
tracerProvider := sdktrace.NewTracerProvider()
runtime, err := NewWithProviders(meterProvider, tracerProvider)
require.NoError(t, err)
runtime.RecordInternalHTTPRequest(context.Background(), []attribute.KeyValue{
attribute.String("route", "/healthz"),
attribute.String("method", "GET"),
attribute.String("edge_outcome", "success"),
}, 5*time.Millisecond)
runtime.RecordInternalHTTPEvent(context.Background(), "started")
runtime.RecordIntentOutcome(context.Background(), "game.turn.ready", "game_master", "user", "accepted")
runtime.RecordIntentOutcome(context.Background(), "game.turn.ready", "game_master", "user", "duplicate")
runtime.RecordMalformedIntent(context.Background(), "idempotency_conflict", "game.turn.ready", "game_master")
runtime.RecordUserEnrichmentAttempt(context.Background(), "game.turn.ready", "success")
runtime.RecordUserEnrichmentAttempt(context.Background(), "game.turn.ready", "recipient_not_found")
runtime.RecordRoutePublishAttempt(context.Background(), "push", "game.turn.ready", "published", "")
runtime.RecordRoutePublishAttempt(context.Background(), "email", "game.turn.ready", "retry", "mail_stream_publish_failed")
runtime.RecordRouteRetry(context.Background(), "email", "game.turn.ready")
runtime.RecordRouteDeadLetter(context.Background(), "email", "game.turn.ready", "mail_stream_publish_failed")
scheduledAt := time.Now().Add(-time.Second).UTC()
unprocessedAt := time.Now().Add(-2 * time.Second).UTC()
runtime.SetRouteScheduleSnapshotReader(stubRouteScheduleSnapshotReader{
snapshot: RouteScheduleSnapshot{
Depth: 3,
OldestScheduledFor: &scheduledAt,
},
})
runtime.SetIntentStreamLagSnapshotReader(stubIntentStreamLagSnapshotReader{
snapshot: IntentStreamLagSnapshot{
OldestUnprocessedAt: &unprocessedAt,
},
})
assertMetricCount(t, reader, "notification.internal_http.requests", map[string]string{
"route": "/healthz",
"method": "GET",
"edge_outcome": "success",
}, 1)
assertMetricCount(t, reader, "notification.internal_http.lifecycle", map[string]string{
"event": "started",
}, 1)
assertMetricCount(t, reader, "notification.intent.outcomes", map[string]string{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"outcome": "accepted",
}, 1)
assertMetricCount(t, reader, "notification.intent.outcomes", map[string]string{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"outcome": "duplicate",
}, 1)
assertMetricCount(t, reader, "notification.intent.malformed", map[string]string{
"failure_code": "idempotency_conflict",
"notification_type": "game.turn.ready",
"producer": "game_master",
}, 1)
assertMetricCount(t, reader, "notification.user_enrichment.attempts", map[string]string{
"notification_type": "game.turn.ready",
"result": "success",
}, 1)
assertMetricCount(t, reader, "notification.user_enrichment.attempts", map[string]string{
"notification_type": "game.turn.ready",
"result": "recipient_not_found",
}, 1)
assertMetricCount(t, reader, "notification.route.publish_attempts", map[string]string{
"channel": "push",
"notification_type": "game.turn.ready",
"result": "published",
"failure_classification": "none",
}, 1)
assertMetricCount(t, reader, "notification.route.publish_attempts", map[string]string{
"channel": "email",
"notification_type": "game.turn.ready",
"result": "retry",
"failure_classification": "mail_stream_publish_failed",
}, 1)
assertMetricCount(t, reader, "notification.route.retries", map[string]string{
"channel": "email",
"notification_type": "game.turn.ready",
}, 1)
assertMetricCount(t, reader, "notification.route.dead_letters", map[string]string{
"channel": "email",
"notification_type": "game.turn.ready",
"failure_classification": "mail_stream_publish_failed",
}, 1)
assertGaugeValue(t, reader, "notification.route_schedule.depth", nil, 3)
assertGaugePositive(t, reader, "notification.route_schedule.oldest_age_ms", nil)
assertGaugePositive(t, reader, "notification.intent_stream.oldest_unprocessed_age_ms", nil)
}
func assertMetricCount(t *testing.T, reader *sdkmetric.ManualReader, metricName string, wantAttrs map[string]string, wantValue int64) {
t.Helper()
var resourceMetrics metricdata.ResourceMetrics
require.NoError(t, reader.Collect(context.Background(), &resourceMetrics))
for _, scopeMetrics := range resourceMetrics.ScopeMetrics {
for _, metric := range scopeMetrics.Metrics {
if metric.Name != metricName {
continue
}
sum, ok := metric.Data.(metricdata.Sum[int64])
require.True(t, ok)
for _, point := range sum.DataPoints {
if hasMetricAttributes(point.Attributes.ToSlice(), wantAttrs) {
assert.Equal(t, wantValue, point.Value)
return
}
}
}
}
require.Failf(t, "test failed", "metric %q with attrs %v not found", metricName, wantAttrs)
}
func assertGaugeValue(t *testing.T, reader *sdkmetric.ManualReader, metricName string, wantAttrs map[string]string, wantValue int64) {
t.Helper()
var resourceMetrics metricdata.ResourceMetrics
require.NoError(t, reader.Collect(context.Background(), &resourceMetrics))
for _, scopeMetrics := range resourceMetrics.ScopeMetrics {
for _, metric := range scopeMetrics.Metrics {
if metric.Name != metricName {
continue
}
gauge, ok := metric.Data.(metricdata.Gauge[int64])
require.True(t, ok)
for _, point := range gauge.DataPoints {
if hasMetricAttributes(point.Attributes.ToSlice(), wantAttrs) {
assert.Equal(t, wantValue, point.Value)
return
}
}
}
}
require.Failf(t, "test failed", "gauge %q with attrs %v not found", metricName, wantAttrs)
}
func assertGaugePositive(t *testing.T, reader *sdkmetric.ManualReader, metricName string, wantAttrs map[string]string) {
t.Helper()
var resourceMetrics metricdata.ResourceMetrics
require.NoError(t, reader.Collect(context.Background(), &resourceMetrics))
for _, scopeMetrics := range resourceMetrics.ScopeMetrics {
for _, metric := range scopeMetrics.Metrics {
if metric.Name != metricName {
continue
}
gauge, ok := metric.Data.(metricdata.Gauge[int64])
require.True(t, ok)
for _, point := range gauge.DataPoints {
if hasMetricAttributes(point.Attributes.ToSlice(), wantAttrs) {
assert.Greater(t, point.Value, int64(0))
return
}
}
}
}
require.Failf(t, "test failed", "gauge %q with attrs %v not found", metricName, wantAttrs)
}
func hasMetricAttributes(values []attribute.KeyValue, want map[string]string) bool {
if len(want) == 0 {
return len(values) == 0
}
if len(values) != len(want) {
return false
}
for _, value := range values {
if want[string(value.Key)] != value.Value.AsString() {
return false
}
}
return true
}
type stubRouteScheduleSnapshotReader struct {
snapshot RouteScheduleSnapshot
err error
}
func (reader stubRouteScheduleSnapshotReader) ReadRouteScheduleSnapshot(context.Context) (RouteScheduleSnapshot, error) {
return reader.snapshot, reader.err
}
type stubIntentStreamLagSnapshotReader struct {
snapshot IntentStreamLagSnapshot
err error
}
func (reader stubIntentStreamLagSnapshotReader) ReadIntentStreamLagSnapshot(context.Context) (IntentStreamLagSnapshot, error) {
return reader.snapshot, reader.err
}
+3
View File
@@ -0,0 +1,3 @@
// Package worker provides the long-lived background components used by the
// runnable Notification Service process.
package worker
@@ -0,0 +1,421 @@
package worker
import (
"context"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/logging"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/publishmail"
)
const (
emailFailureClassificationPayloadEncoding = "payload_encoding_failed"
emailFailureClassificationMailStreamWrite = "mail_stream_publish_failed"
)
// EmailRouteStateStore describes the durable route-state operations required
// by EmailPublisher.
type EmailRouteStateStore interface {
// ListDueRoutes loads due scheduled routes.
ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error)
// TryAcquireRouteLease attempts to acquire one temporary route lease.
TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error)
// ReleaseRouteLease best-effort releases one temporary route lease.
ReleaseRouteLease(context.Context, string, string, string) error
// GetNotification loads one accepted notification.
GetNotification(context.Context, string) (acceptintent.NotificationRecord, bool, error)
// GetRoute loads one accepted notification route.
GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error)
// CompleteRoutePublished records one successful publication.
CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error
// CompleteRouteFailed records one retryable publication failure.
CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error
// CompleteRouteDeadLetter records one exhausted publication failure.
CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error
}
// EmailCommandEncoder encodes one email-capable notification route into a
// Mail Service-compatible generic command.
type EmailCommandEncoder interface {
// Encode converts notification plus route to one outbound command.
Encode(acceptintent.NotificationRecord, acceptintent.NotificationRoute) (publishmail.Command, error)
}
// EmailPublisherConfig stores the dependencies and policies used by
// EmailPublisher.
type EmailPublisherConfig struct {
// Store owns the durable route-state transitions.
Store EmailRouteStateStore
// MailDeliveryCommandsStream stores the outbound Mail Service command
// stream name.
MailDeliveryCommandsStream string
// RouteLeaseTTL stores the temporary route-lease lifetime.
RouteLeaseTTL time.Duration
// RouteBackoffMin stores the minimum retry backoff.
RouteBackoffMin time.Duration
// RouteBackoffMax stores the maximum retry backoff.
RouteBackoffMax time.Duration
// PollInterval stores how long the worker waits before the next due-route
// scan when no progress was made.
PollInterval time.Duration
// BatchSize stores the maximum number of due schedule members loaded per
// scan.
BatchSize int64
// Encoder stores the email command encoder.
Encoder EmailCommandEncoder
// Telemetry records route publication counters.
Telemetry RoutePublisherTelemetry
// Clock provides wall-clock timestamps.
Clock Clock
}
// EmailPublisher publishes due email routes into the Mail Service command
// stream with retry and dead-letter handling.
type EmailPublisher struct {
store EmailRouteStateStore
mailDeliveryCommandsStream string
routeLeaseTTL time.Duration
routeBackoffMin time.Duration
routeBackoffMax time.Duration
pollInterval time.Duration
batchSize int64
encoder EmailCommandEncoder
telemetry RoutePublisherTelemetry
clock Clock
workerToken string
logger *slog.Logger
}
// NewEmailPublisher constructs the email publication worker.
func NewEmailPublisher(cfg EmailPublisherConfig, logger *slog.Logger) (*EmailPublisher, error) {
switch {
case cfg.Store == nil:
return nil, errors.New("new email publisher: nil store")
case strings.TrimSpace(cfg.MailDeliveryCommandsStream) == "":
return nil, errors.New("new email publisher: mail delivery-commands stream must not be empty")
case cfg.RouteLeaseTTL <= 0:
return nil, errors.New("new email publisher: route lease ttl must be positive")
case cfg.RouteBackoffMin <= 0:
return nil, errors.New("new email publisher: route backoff min must be positive")
case cfg.RouteBackoffMax <= 0:
return nil, errors.New("new email publisher: route backoff max must be positive")
case cfg.RouteBackoffMin > cfg.RouteBackoffMax:
return nil, errors.New("new email publisher: route backoff min must not exceed route backoff max")
}
if cfg.PollInterval <= 0 {
cfg.PollInterval = defaultPushPublisherPollInterval
}
if cfg.BatchSize <= 0 {
cfg.BatchSize = defaultPushPublisherBatchSize
}
if cfg.Clock == nil {
cfg.Clock = systemClock{}
}
if cfg.Encoder == nil {
cfg.Encoder = publishmail.Encoder{}
}
if logger == nil {
logger = slog.Default()
}
workerToken, err := newWorkerToken()
if err != nil {
return nil, fmt.Errorf("new email publisher: %w", err)
}
return &EmailPublisher{
store: cfg.Store,
mailDeliveryCommandsStream: cfg.MailDeliveryCommandsStream,
routeLeaseTTL: cfg.RouteLeaseTTL,
routeBackoffMin: cfg.RouteBackoffMin,
routeBackoffMax: cfg.RouteBackoffMax,
pollInterval: cfg.PollInterval,
batchSize: cfg.BatchSize,
encoder: cfg.Encoder,
telemetry: cfg.Telemetry,
clock: cfg.Clock,
workerToken: workerToken,
logger: logger.With("component", "email_publisher", "stream", cfg.MailDeliveryCommandsStream),
}, nil
}
// Run starts the email publication loop and blocks until ctx is canceled or
// an unexpected publication error occurs.
func (publisher *EmailPublisher) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run email publisher: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
if publisher == nil {
return errors.New("run email publisher: nil publisher")
}
publisher.logger.Info("email publisher started",
"poll_interval", publisher.pollInterval.String(),
"batch_size", publisher.batchSize,
)
for {
progress, err := publisher.publishDueRoutes(ctx)
switch {
case err == nil && progress:
continue
case err == nil:
if waitErr := waitWithContext(ctx, publisher.pollInterval); waitErr != nil {
publisher.logger.Info("email publisher stopped")
return waitErr
}
case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)):
publisher.logger.Info("email publisher stopped")
return ctx.Err()
default:
return fmt.Errorf("run email publisher: %w", err)
}
}
}
// Shutdown stops the email publisher within ctx. The worker relies on context
// cancellation and a bounded polling interval, so it has no dedicated
// resources to release here.
func (publisher *EmailPublisher) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown email publisher: nil context")
}
if publisher == nil {
return nil
}
return nil
}
func (publisher *EmailPublisher) publishDueRoutes(ctx context.Context) (bool, error) {
now := publisher.now()
dueRoutes, err := publisher.store.ListDueRoutes(ctx, now, publisher.batchSize)
if err != nil {
return false, err
}
progress := false
for _, dueRoute := range dueRoutes {
if !strings.HasPrefix(dueRoute.RouteID, "email:") {
continue
}
processed, err := publisher.publishRoute(ctx, now, dueRoute)
if err != nil {
return progress, err
}
progress = progress || processed
}
return progress, nil
}
func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) {
acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL)
if err != nil {
return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err)
}
if !acquired {
return false, nil
}
defer func() {
releaseCtx, cancel := context.WithTimeout(context.Background(), publisher.routeLeaseTTL)
defer cancel()
_ = publisher.store.ReleaseRouteLease(releaseCtx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken)
}()
notification, found, err := publisher.store.GetNotification(ctx, dueRoute.NotificationID)
if err != nil {
return false, fmt.Errorf("load notification %q: %w", dueRoute.NotificationID, err)
}
if !found {
return false, fmt.Errorf("notification %q is missing for route %q", dueRoute.NotificationID, dueRoute.RouteID)
}
route, found, err := publisher.store.GetRoute(ctx, dueRoute.NotificationID, dueRoute.RouteID)
if err != nil {
return false, fmt.Errorf("load route %q: %w", dueRoute.RouteID, err)
}
if !found {
return false, fmt.Errorf("route %q is missing for notification %q", dueRoute.RouteID, dueRoute.NotificationID)
}
if route.Channel != intentstream.ChannelEmail {
return false, nil
}
switch route.Status {
case acceptintent.RouteStatusPending, acceptintent.RouteStatusFailed:
default:
return false, nil
}
if route.NextAttemptAt.After(now) {
return false, nil
}
command, err := publisher.encoder.Encode(notification, route)
if err != nil {
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationPayloadEncoding, err.Error())
}
err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
PublishedAt: publisher.now(),
Stream: publisher.mailDeliveryCommandsStream,
StreamMaxLen: 0,
StreamValues: command.Values(),
})
switch {
case err == nil:
publisher.recordPublishAttempt(ctx, notification, route, "published", "")
logArgs := logging.RouteAttrs(
notification.NotificationID,
notification.NotificationType,
notification.Producer,
notification.AudienceKind,
notification.IdempotencyKey,
notification.RequestID,
notification.TraceID,
route.RouteID,
route.Channel,
)
logArgs = append(logArgs,
"delivery_id", command.DeliveryID,
"resolved_email", route.ResolvedEmail,
)
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
publisher.logger.Info("email route published", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
return false, nil
default:
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationMailStreamWrite, err.Error())
}
}
func (publisher *EmailPublisher) recordFailure(
ctx context.Context,
notification acceptintent.NotificationRecord,
route acceptintent.NotificationRoute,
classification string,
message string,
) (bool, error) {
failureAt := publisher.now()
attemptNumber := route.AttemptCount + 1
logArgs := logging.RouteAttrs(
notification.NotificationID,
notification.NotificationType,
notification.Producer,
notification.AudienceKind,
notification.IdempotencyKey,
notification.RequestID,
notification.TraceID,
route.RouteID,
route.Channel,
)
logArgs = append(logArgs,
"resolved_email", route.ResolvedEmail,
"failure_classification", classification,
"failure_message", strings.TrimSpace(message),
"attempt_number", attemptNumber,
"max_attempts", route.MaxAttempts,
)
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
if attemptNumber >= route.MaxAttempts {
err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
DeadLetteredAt: failureAt,
FailureClassification: classification,
FailureMessage: strings.TrimSpace(message),
})
switch {
case err == nil:
publisher.recordPublishAttempt(ctx, notification, route, "dead_letter", classification)
publisher.recordRouteDeadLetter(ctx, notification, route, classification)
publisher.logger.Warn("email route dead-lettered", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err)
}
}
nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond)
err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
FailedAt: failureAt,
NextAttemptAt: nextAttemptAt,
FailureClassification: classification,
FailureMessage: strings.TrimSpace(message),
})
switch {
case err == nil:
publisher.recordPublishAttempt(ctx, notification, route, "retry", classification)
publisher.recordRouteRetry(ctx, notification, route)
logArgs = append(logArgs, "next_attempt_at", nextAttemptAt)
publisher.logger.Warn("email route failed and was rescheduled", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err)
}
}
func (publisher *EmailPublisher) now() time.Time {
return publisher.clock.Now().UTC().Truncate(time.Millisecond)
}
func (publisher *EmailPublisher) recordPublishAttempt(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, result string, classification string) {
if publisher == nil || publisher.telemetry == nil {
return
}
publisher.telemetry.RecordRoutePublishAttempt(ctx, string(route.Channel), string(notification.NotificationType), result, classification)
}
func (publisher *EmailPublisher) recordRouteRetry(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) {
if publisher == nil || publisher.telemetry == nil {
return
}
publisher.telemetry.RecordRouteRetry(ctx, string(route.Channel), string(notification.NotificationType))
}
func (publisher *EmailPublisher) recordRouteDeadLetter(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, classification string) {
if publisher == nil || publisher.telemetry == nil {
return
}
publisher.telemetry.RecordRouteDeadLetter(ctx, string(route.Channel), string(notification.NotificationType), classification)
}
@@ -0,0 +1,232 @@
package worker
import (
"context"
"testing"
"time"
redisstate "galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/service/acceptintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/require"
)
func TestEmailPublisherPublishesDueEmailRouteAndLeavesPushRoutePending(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
running := runEmailPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
pushRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusPending, pushRoute.Status)
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "1775121700000-0/email:user:user-1", messages[0].Values["delivery_id"])
require.Equal(t, "notification", messages[0].Values["source"])
require.Equal(t, "template", messages[0].Values["payload_mode"])
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", ""))
}
func TestEmailPublisherRetriesMailStreamPublicationFailures(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err())
running := runEmailPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1
}, time.Second, 10*time.Millisecond)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "retry", emailFailureClassificationMailStreamWrite))
require.True(t, fixture.telemetry.hasRouteRetry("email"))
require.NoError(t, fixture.client.Del(context.Background(), fixture.mailStream).Err())
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2
}, 2*time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", ""))
}
func TestEmailPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
otherPublisher, err := NewEmailPublisher(EmailPublisherConfig{
Store: fixture.store,
MailDeliveryCommandsStream: fixture.mailStream,
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Clock: newSteppingClock(fixture.now, time.Millisecond),
}, testWorkerLogger())
require.NoError(t, err)
first := runEmailPublisher(t, fixture.publisher)
defer first.stop(t)
second := runEmailPublisher(t, otherPublisher)
defer second.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
}
func TestEmailPublisherDeadLettersExhaustedRoute(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 6)))
require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err())
running := runEmailPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 7
}, time.Second, 10*time.Millisecond)
deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "email:user:user-1")).Bytes()
require.NoError(t, err)
deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload)
require.NoError(t, err)
require.Equal(t, emailFailureClassificationMailStreamWrite, deadLetter.FailureClassification)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "dead_letter", emailFailureClassificationMailStreamWrite))
require.True(t, fixture.telemetry.hasRouteDeadLetter("email", emailFailureClassificationMailStreamWrite))
}
type emailPublisherFixture struct {
client *redis.Client
store *redisstate.AcceptanceStore
publisher *EmailPublisher
mailStream string
now time.Time
clock *steppingClock
telemetry *recordingWorkerTelemetry
}
func newEmailPublisherFixture(t *testing.T) emailPublisherFixture {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{
Addr: server.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
require.NoError(t, client.Close())
})
store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
clock := newSteppingClock(now, time.Millisecond)
telemetry := &recordingWorkerTelemetry{}
publisher, err := NewEmailPublisher(EmailPublisherConfig{
Store: store,
MailDeliveryCommandsStream: "mail:delivery_commands",
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Telemetry: telemetry,
Clock: clock,
}, testWorkerLogger())
require.NoError(t, err)
return emailPublisherFixture{
client: client,
store: store,
publisher: publisher,
mailStream: "mail:delivery_commands",
now: now,
clock: clock,
telemetry: telemetry,
}
}
func validEmailAcceptanceInput(now time.Time, emailAttemptCount int) acceptintent.CreateAcceptanceInput {
input := validPushAcceptanceInput(now)
for index := range input.Routes {
if input.Routes[index].RouteID != "email:user:user-1" {
continue
}
input.Routes[index].AttemptCount = emailAttemptCount
input.Routes[index].MaxAttempts = 7
}
return input
}
type runningEmailPublisher struct {
cancel context.CancelFunc
resultCh chan error
}
func runEmailPublisher(t *testing.T, publisher *EmailPublisher) runningEmailPublisher {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- publisher.Run(ctx)
}()
return runningEmailPublisher{
cancel: cancel,
resultCh: resultCh,
}
}
func (r runningEmailPublisher) stop(t *testing.T) {
t.Helper()
r.cancel()
select {
case err := <-r.resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "email publisher did not stop")
}
}
@@ -0,0 +1,331 @@
package worker
import (
"context"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/logging"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/malformedintent"
"github.com/redis/go-redis/v9"
)
// AcceptIntentUseCase accepts one normalized notification intent.
type AcceptIntentUseCase interface {
// Execute durably accepts one normalized notification intent.
Execute(context.Context, acceptintent.AcceptInput) (acceptintent.Result, error)
}
// MalformedIntentRecorder stores one operator-visible malformed-intent record.
type MalformedIntentRecorder interface {
// Record persists entry idempotently by stream entry id.
Record(context.Context, malformedintent.Entry) error
}
// StreamOffsetStore stores the last durably processed entry id of one plain
// XREAD consumer.
type StreamOffsetStore interface {
// Load returns the last processed entry id for stream when one is stored.
Load(context.Context, string) (string, bool, error)
// Save stores the last processed entry id for stream.
Save(context.Context, string, string) error
}
// IntentConsumerTelemetry records low-cardinality stream-consumer events.
type IntentConsumerTelemetry interface {
// RecordMalformedIntent records one malformed or rejected notification
// intent.
RecordMalformedIntent(context.Context, string, string, string)
}
// Clock provides the current wall-clock time.
type Clock interface {
// Now returns the current time.
Now() time.Time
}
type systemClock struct{}
func (systemClock) Now() time.Time {
return time.Now()
}
// IntentConsumerConfig stores the dependencies used by IntentConsumer.
type IntentConsumerConfig struct {
// Client stores the Redis client used for XREAD.
Client *redis.Client
// Stream stores the Redis Stream name to consume.
Stream string
// BlockTimeout stores the blocking XREAD timeout.
BlockTimeout time.Duration
// Acceptor durably accepts valid notification intents.
Acceptor AcceptIntentUseCase
// MalformedRecorder persists operator-visible malformed-intent entries.
MalformedRecorder MalformedIntentRecorder
// OffsetStore stores the last durably processed stream entry id.
OffsetStore StreamOffsetStore
// Telemetry records malformed-intent counters.
Telemetry IntentConsumerTelemetry
// Clock provides wall-clock timestamps for malformed-intent records.
Clock Clock
}
// IntentConsumer stores the Redis Streams consumer used for notification
// intent intake.
type IntentConsumer struct {
client *redis.Client
stream string
blockTimeout time.Duration
acceptor AcceptIntentUseCase
malformedRecorder MalformedIntentRecorder
offsetStore StreamOffsetStore
telemetry IntentConsumerTelemetry
clock Clock
logger *slog.Logger
}
// NewIntentConsumer constructs the notification-intent consumer.
func NewIntentConsumer(cfg IntentConsumerConfig, logger *slog.Logger) (*IntentConsumer, error) {
switch {
case cfg.Client == nil:
return nil, errors.New("new intent consumer: nil redis client")
case strings.TrimSpace(cfg.Stream) == "":
return nil, errors.New("new intent consumer: stream must not be empty")
case cfg.BlockTimeout <= 0:
return nil, errors.New("new intent consumer: block timeout must be positive")
case cfg.Acceptor == nil:
return nil, errors.New("new intent consumer: nil acceptor")
case cfg.MalformedRecorder == nil:
return nil, errors.New("new intent consumer: nil malformed recorder")
case cfg.OffsetStore == nil:
return nil, errors.New("new intent consumer: nil offset store")
}
if cfg.Clock == nil {
cfg.Clock = systemClock{}
}
if logger == nil {
logger = slog.Default()
}
return &IntentConsumer{
client: cfg.Client,
stream: cfg.Stream,
blockTimeout: cfg.BlockTimeout,
acceptor: cfg.Acceptor,
malformedRecorder: cfg.MalformedRecorder,
offsetStore: cfg.OffsetStore,
telemetry: cfg.Telemetry,
clock: cfg.Clock,
logger: logger.With("component", "intent_consumer", "stream", cfg.Stream),
}, nil
}
// Run starts the intent consumer and blocks until ctx is canceled or Redis
// returns an unexpected error.
func (consumer *IntentConsumer) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run intent consumer: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
if consumer == nil || consumer.client == nil {
return errors.New("run intent consumer: nil consumer")
}
lastID, found, err := consumer.offsetStore.Load(ctx, consumer.stream)
if err != nil {
return fmt.Errorf("run intent consumer: load stream offset: %w", err)
}
if !found {
lastID = "0-0"
}
consumer.logger.Info("intent consumer started", "block_timeout", consumer.blockTimeout.String(), "start_entry_id", lastID)
for {
streams, err := consumer.client.XRead(ctx, &redis.XReadArgs{
Streams: []string{consumer.stream, lastID},
Count: 1,
Block: consumer.blockTimeout,
}).Result()
switch {
case err == nil:
for _, stream := range streams {
for _, message := range stream.Messages {
if err := consumer.handleMessage(ctx, message); err != nil {
return err
}
if err := consumer.offsetStore.Save(ctx, consumer.stream, message.ID); err != nil {
return fmt.Errorf("run intent consumer: save stream offset: %w", err)
}
lastID = message.ID
}
}
case errors.Is(err, redis.Nil):
continue
case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, redis.ErrClosed)):
consumer.logger.Info("intent consumer stopped")
return ctx.Err()
case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded), errors.Is(err, redis.ErrClosed):
return fmt.Errorf("run intent consumer: %w", err)
default:
return fmt.Errorf("run intent consumer: %w", err)
}
}
}
func (consumer *IntentConsumer) handleMessage(ctx context.Context, message redis.XMessage) error {
rawFields := cloneRawFields(message.Values)
intent, err := intentstream.DecodeIntent(rawFields)
if err != nil {
return consumer.recordMalformed(
ctx,
message.ID,
rawFields,
intentstream.ClassifyDecodeError(err),
err,
)
}
result, err := consumer.acceptor.Execute(ctx, acceptintent.AcceptInput{
NotificationID: message.ID,
Intent: intent,
})
switch {
case err == nil:
logArgs := []any{
"stream_entry_id", message.ID,
"notification_id", message.ID,
}
logArgs = append(logArgs, logging.IntentAttrs(intent)...)
logArgs = append(logArgs,
"outcome", string(result.Outcome),
)
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
consumer.logger.Info("notification intent handled", logArgs...)
return nil
case errors.Is(err, acceptintent.ErrConflict):
return consumer.recordMalformed(ctx, message.ID, rawFields, malformedintent.FailureCodeIdempotencyConflict, err)
case errors.Is(err, acceptintent.ErrRecipientNotFound):
return consumer.recordMalformed(ctx, message.ID, rawFields, malformedintent.FailureCodeRecipientNotFound, err)
case errors.Is(err, acceptintent.ErrServiceUnavailable):
return fmt.Errorf("handle intent %q: %w", message.ID, err)
default:
return fmt.Errorf("handle intent %q: %w", message.ID, err)
}
}
func (consumer *IntentConsumer) recordMalformed(
ctx context.Context,
streamEntryID string,
rawFields map[string]any,
failureCode malformedintent.FailureCode,
cause error,
) error {
entry := malformedintent.Entry{
StreamEntryID: streamEntryID,
NotificationType: optionalRawString(rawFields, "notification_type"),
Producer: optionalRawString(rawFields, "producer"),
IdempotencyKey: optionalRawString(rawFields, "idempotency_key"),
FailureCode: failureCode,
FailureMessage: strings.TrimSpace(cause.Error()),
RawFields: cloneRawFields(rawFields),
RecordedAt: consumer.clock.Now().UTC().Truncate(time.Millisecond),
}
if err := consumer.malformedRecorder.Record(ctx, entry); err != nil {
return fmt.Errorf("record malformed intent %q: %w", streamEntryID, err)
}
if consumer.telemetry != nil {
consumer.telemetry.RecordMalformedIntent(ctx, string(failureCode), entry.NotificationType, entry.Producer)
}
logArgs := []any{
"stream_entry_id", streamEntryID,
"notification_type", entry.NotificationType,
"producer", entry.Producer,
"idempotency_key", entry.IdempotencyKey,
"failure_code", string(entry.FailureCode),
"failure_message", entry.FailureMessage,
}
if traceID := optionalRawString(rawFields, "trace_id"); traceID != "" {
logArgs = append(logArgs, "trace_id", traceID)
}
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
consumer.logger.Warn("notification intent rejected", logArgs...)
return nil
}
func cloneRawFields(values map[string]any) map[string]any {
if values == nil {
return map[string]any{}
}
cloned := make(map[string]any, len(values))
for key, value := range values {
cloned[key] = cloneRawValue(value)
}
return cloned
}
func cloneRawValue(value any) any {
switch typed := value.(type) {
case map[string]any:
return cloneRawFields(typed)
case []any:
cloned := make([]any, len(typed))
for index, item := range typed {
cloned[index] = cloneRawValue(item)
}
return cloned
default:
return typed
}
}
func optionalRawString(values map[string]any, key string) string {
raw, ok := values[key]
if !ok {
return ""
}
switch typed := raw.(type) {
case string:
return typed
case []byte:
return string(typed)
default:
return ""
}
}
// Shutdown stops the intent consumer within ctx. The consumer relies on
// context cancellation and a bounded block timeout, so it has no dedicated
// resources to release here.
func (consumer *IntentConsumer) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown intent consumer: nil context")
}
if consumer == nil {
return nil
}
return nil
}
@@ -0,0 +1,422 @@
package worker
import (
"context"
"errors"
"io"
"log/slog"
"testing"
"time"
redisstate "galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/config"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/malformedintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestIntentConsumerStartsFromZeroOffsetWhenNoStoredOffsetExists(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), messageID)
return err == nil && found
}, time.Second, 10*time.Millisecond)
}
func TestIntentConsumerContinuesFromSavedOffsetAfterRestart(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
require.NoError(t, fixture.offsetStore.Save(context.Background(), fixture.stream, firstID))
secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), secondID)
return err == nil && found
}, time.Second, 10*time.Millisecond)
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), firstID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerRecordsIdempotencyConflictsAndAdvancesOffset(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(secondID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == "idempotency_conflict"
}, time.Second, 10*time.Millisecond)
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, secondID, offset)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), firstID)
require.NoError(t, err)
require.True(t, found)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), secondID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerShutdownInterruptsBlockingRead(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{})
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- fixture.consumer.Run(ctx)
}()
time.Sleep(50 * time.Millisecond)
cancel()
select {
case err := <-resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "intent consumer did not stop after shutdown")
}
}
func TestIntentConsumerRecordsRecipientNotFoundAndAdvancesOffset(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{})
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == malformedintent.FailureCodeRecipientNotFound
}, time.Second, 10*time.Millisecond)
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, messageID, offset)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerRecordsMalformedIntentAndAdvancesOffset(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
messageID, err := fixture.client.XAdd(context.Background(), &redis.XAddArgs{
Stream: fixture.stream,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
},
}).Result()
require.NoError(t, err)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == malformedintent.FailureCodeInvalidPayload &&
entry.StreamEntryID == messageID
}, time.Second, 10*time.Millisecond)
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, messageID, offset)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerRecordsTelemetryForOutcomesAndMalformedIntents(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
conflictID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(conflictID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == malformedintent.FailureCodeIdempotencyConflict
}, time.Second, 10*time.Millisecond)
require.Eventually(t, func() bool {
return fixture.telemetry.hasIntentOutcome("accepted") &&
fixture.telemetry.hasIntentOutcome("duplicate") &&
fixture.telemetry.hasMalformedIntent("idempotency_conflict")
}, time.Second, 10*time.Millisecond)
}
func TestIntentConsumerStopsWithoutAdvancingOffsetWhenUserDirectoryIsUnavailable(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
err: errors.New("user service unavailable"),
})
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
resultCh := make(chan error, 1)
go func() {
resultCh <- fixture.consumer.Run(ctx)
}()
var runErr error
require.Eventually(t, func() bool {
select {
case runErr = <-resultCh:
return true
default:
return false
}
}, time.Second, 10*time.Millisecond)
require.Error(t, runErr)
require.ErrorContains(t, runErr, "user service unavailable")
_, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.False(t, found)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
require.NoError(t, err)
require.False(t, found)
}
type intentConsumerFixture struct {
client *redis.Client
stream string
acceptanceStore *redisstate.AcceptanceStore
offsetStore *redisstate.StreamOffsetStore
consumer *IntentConsumer
telemetry *recordingWorkerTelemetry
}
func newIntentConsumerFixture(t *testing.T, userDirectory acceptintent.UserDirectory) intentConsumerFixture {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{
Addr: server.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, client.Close())
})
acceptanceStore, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
malformedStore, err := redisstate.NewMalformedIntentStore(client, 72*time.Hour)
require.NoError(t, err)
offsetStore, err := redisstate.NewStreamOffsetStore(client)
require.NoError(t, err)
telemetry := &recordingWorkerTelemetry{}
service, err := acceptintent.New(acceptintent.Config{
Store: acceptanceStore,
UserDirectory: userDirectory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
Telemetry: telemetry,
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
AdminRouting: config.AdminRoutingConfig{},
})
require.NoError(t, err)
consumer, err := NewIntentConsumer(IntentConsumerConfig{
Client: client,
Stream: "notification:intents",
BlockTimeout: 25 * time.Millisecond,
Acceptor: service,
MalformedRecorder: malformedStore,
OffsetStore: offsetStore,
Telemetry: telemetry,
Clock: fixedClock{now: time.UnixMilli(1775121700001).UTC()},
}, slog.New(slog.NewTextHandler(io.Discard, nil)))
require.NoError(t, err)
return intentConsumerFixture{
client: client,
stream: "notification:intents",
acceptanceStore: acceptanceStore,
offsetStore: offsetStore,
consumer: consumer,
telemetry: telemetry,
}
}
func addValidIntent(t *testing.T, client *redis.Client, stream string, payloadJSON string) string {
t.Helper()
messageID, err := client.XAdd(context.Background(), &redis.XAddArgs{
Stream: stream,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
"payload_json": payloadJSON,
},
}).Result()
require.NoError(t, err)
return messageID
}
type runningIntentConsumer struct {
cancel context.CancelFunc
resultCh chan error
}
func runIntentConsumer(t *testing.T, consumer *IntentConsumer) runningIntentConsumer {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- consumer.Run(ctx)
}()
time.Sleep(50 * time.Millisecond)
return runningIntentConsumer{
cancel: cancel,
resultCh: resultCh,
}
}
func (r runningIntentConsumer) stop(t *testing.T) {
t.Helper()
r.cancel()
select {
case err := <-r.resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "intent consumer did not stop")
}
}
type fixedClock struct {
now time.Time
}
func (clock fixedClock) Now() time.Time {
return clock.now
}
type stubUserDirectory struct {
records map[string]acceptintent.UserRecord
err error
}
func (directory stubUserDirectory) GetUserByID(_ context.Context, userID string) (acceptintent.UserRecord, error) {
if directory.err != nil {
return acceptintent.UserRecord{}, directory.err
}
if record, ok := directory.records[userID]; ok {
return record, nil
}
return acceptintent.UserRecord{}, acceptintent.ErrRecipientNotFound
}
@@ -0,0 +1,499 @@
package worker
import (
"context"
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/logging"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/publishpush"
)
const (
defaultPushPublisherPollInterval = 100 * time.Millisecond
defaultPushPublisherBatchSize = 64
pushFailureClassificationPayloadEncoding = "payload_encoding_failed"
pushFailureClassificationGatewayStreamWrite = "gateway_stream_publish_failed"
)
// PushRouteStateStore describes the durable route-state operations required by
// PushPublisher.
type PushRouteStateStore interface {
// ListDueRoutes loads due scheduled routes.
ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error)
// TryAcquireRouteLease attempts to acquire one temporary route lease.
TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error)
// ReleaseRouteLease best-effort releases one temporary route lease.
ReleaseRouteLease(context.Context, string, string, string) error
// GetNotification loads one accepted notification.
GetNotification(context.Context, string) (acceptintent.NotificationRecord, bool, error)
// GetRoute loads one accepted notification route.
GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error)
// CompleteRoutePublished records one successful publication.
CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error
// CompleteRouteFailed records one retryable publication failure.
CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error
// CompleteRouteDeadLetter records one exhausted publication failure.
CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error
}
// PushEventEncoder encodes one push-capable notification route into a
// Gateway-compatible client event.
type PushEventEncoder interface {
// Encode converts notification plus route to one outbound event.
Encode(acceptintent.NotificationRecord, acceptintent.NotificationRoute) (publishpush.Event, error)
}
// RoutePublisherTelemetry records low-cardinality route publication outcomes.
type RoutePublisherTelemetry interface {
// RecordRoutePublishAttempt records one route publication attempt outcome.
RecordRoutePublishAttempt(context.Context, string, string, string, string)
// RecordRouteRetry records one route retry scheduling event.
RecordRouteRetry(context.Context, string, string)
// RecordRouteDeadLetter records one route transition to dead_letter.
RecordRouteDeadLetter(context.Context, string, string, string)
}
// PushPublisherConfig stores the dependencies and policies used by
// PushPublisher.
type PushPublisherConfig struct {
// Store owns the durable route-state transitions.
Store PushRouteStateStore
// GatewayStream stores the outbound Gateway client-events stream name.
GatewayStream string
// GatewayStreamMaxLen bounds GatewayStream with approximate trimming.
GatewayStreamMaxLen int64
// RouteLeaseTTL stores the temporary route-lease lifetime.
RouteLeaseTTL time.Duration
// RouteBackoffMin stores the minimum retry backoff.
RouteBackoffMin time.Duration
// RouteBackoffMax stores the maximum retry backoff.
RouteBackoffMax time.Duration
// PollInterval stores how long the worker waits before the next due-route
// scan when no progress was made.
PollInterval time.Duration
// BatchSize stores the maximum number of due schedule members loaded per
// scan.
BatchSize int64
// Encoder stores the push payload encoder.
Encoder PushEventEncoder
// Telemetry records route publication counters.
Telemetry RoutePublisherTelemetry
// Clock provides wall-clock timestamps.
Clock Clock
}
// PushPublisher publishes due push routes into the Gateway client-events
// stream with retry and dead-letter handling.
type PushPublisher struct {
store PushRouteStateStore
gatewayStream string
gatewayStreamMaxLen int64
routeLeaseTTL time.Duration
routeBackoffMin time.Duration
routeBackoffMax time.Duration
pollInterval time.Duration
batchSize int64
encoder PushEventEncoder
telemetry RoutePublisherTelemetry
clock Clock
workerToken string
logger *slog.Logger
}
// NewPushPublisher constructs the push publication worker.
func NewPushPublisher(cfg PushPublisherConfig, logger *slog.Logger) (*PushPublisher, error) {
switch {
case cfg.Store == nil:
return nil, errors.New("new push publisher: nil store")
case strings.TrimSpace(cfg.GatewayStream) == "":
return nil, errors.New("new push publisher: gateway stream must not be empty")
case cfg.GatewayStreamMaxLen <= 0:
return nil, errors.New("new push publisher: gateway stream max len must be positive")
case cfg.RouteLeaseTTL <= 0:
return nil, errors.New("new push publisher: route lease ttl must be positive")
case cfg.RouteBackoffMin <= 0:
return nil, errors.New("new push publisher: route backoff min must be positive")
case cfg.RouteBackoffMax <= 0:
return nil, errors.New("new push publisher: route backoff max must be positive")
case cfg.RouteBackoffMin > cfg.RouteBackoffMax:
return nil, errors.New("new push publisher: route backoff min must not exceed route backoff max")
}
if cfg.PollInterval <= 0 {
cfg.PollInterval = defaultPushPublisherPollInterval
}
if cfg.BatchSize <= 0 {
cfg.BatchSize = defaultPushPublisherBatchSize
}
if cfg.Clock == nil {
cfg.Clock = systemClock{}
}
if cfg.Encoder == nil {
cfg.Encoder = publishpush.Encoder{}
}
if logger == nil {
logger = slog.Default()
}
workerToken, err := newWorkerToken()
if err != nil {
return nil, fmt.Errorf("new push publisher: %w", err)
}
return &PushPublisher{
store: cfg.Store,
gatewayStream: cfg.GatewayStream,
gatewayStreamMaxLen: cfg.GatewayStreamMaxLen,
routeLeaseTTL: cfg.RouteLeaseTTL,
routeBackoffMin: cfg.RouteBackoffMin,
routeBackoffMax: cfg.RouteBackoffMax,
pollInterval: cfg.PollInterval,
batchSize: cfg.BatchSize,
encoder: cfg.Encoder,
telemetry: cfg.Telemetry,
clock: cfg.Clock,
workerToken: workerToken,
logger: logger.With("component", "push_publisher", "stream", cfg.GatewayStream),
}, nil
}
// Run starts the push publication loop and blocks until ctx is canceled or an
// unexpected publication error occurs.
func (publisher *PushPublisher) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run push publisher: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
if publisher == nil {
return errors.New("run push publisher: nil publisher")
}
publisher.logger.Info("push publisher started",
"poll_interval", publisher.pollInterval.String(),
"batch_size", publisher.batchSize,
)
for {
progress, err := publisher.publishDueRoutes(ctx)
switch {
case err == nil && progress:
continue
case err == nil:
if waitErr := waitWithContext(ctx, publisher.pollInterval); waitErr != nil {
publisher.logger.Info("push publisher stopped")
return waitErr
}
case ctx.Err() != nil && (errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)):
publisher.logger.Info("push publisher stopped")
return ctx.Err()
default:
return fmt.Errorf("run push publisher: %w", err)
}
}
}
// Shutdown stops the push publisher within ctx. The worker relies on context
// cancellation and a bounded polling interval, so it has no dedicated
// resources to release here.
func (publisher *PushPublisher) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown push publisher: nil context")
}
if publisher == nil {
return nil
}
return nil
}
func (publisher *PushPublisher) publishDueRoutes(ctx context.Context) (bool, error) {
now := publisher.now()
dueRoutes, err := publisher.store.ListDueRoutes(ctx, now, publisher.batchSize)
if err != nil {
return false, err
}
progress := false
for _, dueRoute := range dueRoutes {
if !strings.HasPrefix(dueRoute.RouteID, "push:") {
continue
}
processed, err := publisher.publishRoute(ctx, now, dueRoute)
if err != nil {
return progress, err
}
progress = progress || processed
}
return progress, nil
}
func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) {
acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL)
if err != nil {
return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err)
}
if !acquired {
return false, nil
}
defer func() {
releaseCtx, cancel := context.WithTimeout(context.Background(), publisher.routeLeaseTTL)
defer cancel()
_ = publisher.store.ReleaseRouteLease(releaseCtx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken)
}()
notification, found, err := publisher.store.GetNotification(ctx, dueRoute.NotificationID)
if err != nil {
return false, fmt.Errorf("load notification %q: %w", dueRoute.NotificationID, err)
}
if !found {
return false, fmt.Errorf("notification %q is missing for route %q", dueRoute.NotificationID, dueRoute.RouteID)
}
route, found, err := publisher.store.GetRoute(ctx, dueRoute.NotificationID, dueRoute.RouteID)
if err != nil {
return false, fmt.Errorf("load route %q: %w", dueRoute.RouteID, err)
}
if !found {
return false, fmt.Errorf("route %q is missing for notification %q", dueRoute.RouteID, dueRoute.NotificationID)
}
if route.Channel != intentstream.ChannelPush {
return false, nil
}
switch route.Status {
case acceptintent.RouteStatusPending, acceptintent.RouteStatusFailed:
default:
return false, nil
}
if route.NextAttemptAt.After(now) {
return false, nil
}
event, err := publisher.encoder.Encode(notification, route)
if err != nil {
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationPayloadEncoding, err.Error())
}
err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
PublishedAt: publisher.now(),
Stream: publisher.gatewayStream,
StreamMaxLen: publisher.gatewayStreamMaxLen,
StreamValues: eventValues(event),
})
switch {
case err == nil:
publisher.recordPublishAttempt(ctx, notification, route, "published", "")
logArgs := logging.RouteAttrs(
notification.NotificationID,
notification.NotificationType,
notification.Producer,
notification.AudienceKind,
notification.IdempotencyKey,
notification.RequestID,
notification.TraceID,
route.RouteID,
route.Channel,
)
logArgs = append(logArgs,
"event_id", event.EventID,
"user_id", event.UserID,
)
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
publisher.logger.Info("push route published", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
return false, nil
default:
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationGatewayStreamWrite, err.Error())
}
}
func (publisher *PushPublisher) recordFailure(
ctx context.Context,
notification acceptintent.NotificationRecord,
route acceptintent.NotificationRoute,
classification string,
message string,
) (bool, error) {
failureAt := publisher.now()
attemptNumber := route.AttemptCount + 1
logArgs := logging.RouteAttrs(
notification.NotificationID,
notification.NotificationType,
notification.Producer,
notification.AudienceKind,
notification.IdempotencyKey,
notification.RequestID,
notification.TraceID,
route.RouteID,
route.Channel,
)
logArgs = append(logArgs,
"failure_classification", classification,
"failure_message", strings.TrimSpace(message),
"attempt_number", attemptNumber,
"max_attempts", route.MaxAttempts,
)
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
if attemptNumber >= route.MaxAttempts {
err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
DeadLetteredAt: failureAt,
FailureClassification: classification,
FailureMessage: strings.TrimSpace(message),
})
switch {
case err == nil:
publisher.recordPublishAttempt(ctx, notification, route, "dead_letter", classification)
publisher.recordRouteDeadLetter(ctx, notification, route, classification)
publisher.logger.Warn("push route dead-lettered", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err)
}
}
nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond)
err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
FailedAt: failureAt,
NextAttemptAt: nextAttemptAt,
FailureClassification: classification,
FailureMessage: strings.TrimSpace(message),
})
switch {
case err == nil:
publisher.recordPublishAttempt(ctx, notification, route, "retry", classification)
publisher.recordRouteRetry(ctx, notification, route)
logArgs = append(logArgs, "next_attempt_at", nextAttemptAt)
publisher.logger.Warn("push route failed and was rescheduled", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err)
}
}
func eventValues(event publishpush.Event) map[string]any {
values := map[string]any{
"user_id": event.UserID,
"event_type": event.EventType,
"event_id": event.EventID,
"payload_bytes": append([]byte(nil), event.PayloadBytes...),
}
if event.RequestID != "" {
values["request_id"] = event.RequestID
}
if event.TraceID != "" {
values["trace_id"] = event.TraceID
}
return values
}
func routeBackoffDelay(attemptNumber int, minBackoff time.Duration, maxBackoff time.Duration) time.Duration {
delay := minBackoff
for step := 1; step < attemptNumber; step++ {
if delay >= maxBackoff/2 {
return maxBackoff
}
delay *= 2
}
if delay < minBackoff {
return minBackoff
}
if delay > maxBackoff {
return maxBackoff
}
return delay
}
func waitWithContext(ctx context.Context, delay time.Duration) error {
timer := time.NewTimer(delay)
defer timer.Stop()
select {
case <-ctx.Done():
return ctx.Err()
case <-timer.C:
return nil
}
}
func newWorkerToken() (string, error) {
buffer := make([]byte, 16)
if _, err := rand.Read(buffer); err != nil {
return "", fmt.Errorf("generate worker token: %w", err)
}
return hex.EncodeToString(buffer), nil
}
func (publisher *PushPublisher) now() time.Time {
return publisher.clock.Now().UTC().Truncate(time.Millisecond)
}
func (publisher *PushPublisher) recordPublishAttempt(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, result string, classification string) {
if publisher == nil || publisher.telemetry == nil {
return
}
publisher.telemetry.RecordRoutePublishAttempt(ctx, string(route.Channel), string(notification.NotificationType), result, classification)
}
func (publisher *PushPublisher) recordRouteRetry(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute) {
if publisher == nil || publisher.telemetry == nil {
return
}
publisher.telemetry.RecordRouteRetry(ctx, string(route.Channel), string(notification.NotificationType))
}
func (publisher *PushPublisher) recordRouteDeadLetter(ctx context.Context, notification acceptintent.NotificationRecord, route acceptintent.NotificationRoute, classification string) {
if publisher == nil || publisher.telemetry == nil {
return
}
publisher.telemetry.RecordRouteDeadLetter(ctx, string(route.Channel), string(notification.NotificationType), classification)
}
@@ -0,0 +1,318 @@
package worker
import (
"context"
"io"
"log/slog"
"sync"
"testing"
"time"
redisstate "galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestPushPublisherPublishesDuePushRouteAndLeavesEmailRoutePending(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
running := runPushPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
emailRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusPending, emailRoute.Status)
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "user-1", messages[0].Values["user_id"])
require.Equal(t, "game.turn.ready", messages[0].Values["event_type"])
require.Equal(t, "1775121700000-0/push:user:user-1", messages[0].Values["event_id"])
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", ""))
}
func TestPushPublisherRetriesGatewayStreamPublicationFailures(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err())
running := runPushPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1
}, time.Second, 10*time.Millisecond)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "retry", pushFailureClassificationGatewayStreamWrite))
require.True(t, fixture.telemetry.hasRouteRetry("push"))
require.NoError(t, fixture.client.Del(context.Background(), fixture.gatewayStream).Err())
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2
}, 2*time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", ""))
}
func TestPushPublisherDeadLettersExhaustedRoute(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
input := validPushAcceptanceInput(fixture.now)
for index := range input.Routes {
if input.Routes[index].RouteID == "push:user:user-1" {
input.Routes[index].AttemptCount = 2
input.Routes[index].MaxAttempts = 3
}
}
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), input))
require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err())
running := runPushPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 3
}, time.Second, 10*time.Millisecond)
deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "push:user:user-1")).Bytes()
require.NoError(t, err)
deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload)
require.NoError(t, err)
require.Equal(t, pushFailureClassificationGatewayStreamWrite, deadLetter.FailureClassification)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "dead_letter", pushFailureClassificationGatewayStreamWrite))
require.True(t, fixture.telemetry.hasRouteDeadLetter("push", pushFailureClassificationGatewayStreamWrite))
}
func TestPushPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
otherPublisher, err := NewPushPublisher(PushPublisherConfig{
Store: fixture.store,
GatewayStream: fixture.gatewayStream,
GatewayStreamMaxLen: 1024,
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Clock: newSteppingClock(fixture.now, time.Millisecond),
}, testWorkerLogger())
require.NoError(t, err)
first := runPushPublisher(t, fixture.publisher)
defer first.stop(t)
second := runPushPublisher(t, otherPublisher)
defer second.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
}
type pushPublisherFixture struct {
client *redis.Client
store *redisstate.AcceptanceStore
publisher *PushPublisher
gatewayStream string
now time.Time
clock *steppingClock
telemetry *recordingWorkerTelemetry
}
func newPushPublisherFixture(t *testing.T) pushPublisherFixture {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{
Addr: server.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, client.Close())
})
store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
clock := newSteppingClock(now, time.Millisecond)
telemetry := &recordingWorkerTelemetry{}
publisher, err := NewPushPublisher(PushPublisherConfig{
Store: store,
GatewayStream: "gateway:client-events",
GatewayStreamMaxLen: 1024,
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Telemetry: telemetry,
Clock: clock,
}, testWorkerLogger())
require.NoError(t, err)
return pushPublisherFixture{
client: client,
store: store,
publisher: publisher,
gatewayStream: "gateway:client-events",
now: now,
clock: clock,
telemetry: telemetry,
}
}
func validPushAcceptanceInput(now time.Time) acceptintent.CreateAcceptanceInput {
return acceptintent.CreateAcceptanceInput{
Notification: acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
IdempotencyKey: "game-123:turn-54",
RequestFingerprint: "sha256:deadbeef",
RequestID: "request-1",
TraceID: "trace-1",
OccurredAt: now,
AcceptedAt: now,
UpdatedAt: now,
},
Routes: []acceptintent.NotificationRoute{
{
NotificationID: "1775121700000-0",
RouteID: "push:user:user-1",
Channel: intentstream.ChannelPush,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 3,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
{
NotificationID: "1775121700000-0",
RouteID: "email:user:user-1",
Channel: intentstream.ChannelEmail,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
},
Idempotency: acceptintent.IdempotencyRecord{
Producer: intentstream.ProducerGameMaster,
IdempotencyKey: "game-123:turn-54",
NotificationID: "1775121700000-0",
RequestFingerprint: "sha256:deadbeef",
CreatedAt: now,
ExpiresAt: now.Add(7 * 24 * time.Hour),
},
}
}
type runningPushPublisher struct {
cancel context.CancelFunc
resultCh chan error
}
func runPushPublisher(t *testing.T, publisher *PushPublisher) runningPushPublisher {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- publisher.Run(ctx)
}()
return runningPushPublisher{
cancel: cancel,
resultCh: resultCh,
}
}
func (r runningPushPublisher) stop(t *testing.T) {
t.Helper()
r.cancel()
select {
case err := <-r.resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "push publisher did not stop")
}
}
type steppingClock struct {
mu sync.Mutex
current time.Time
step time.Duration
}
func newSteppingClock(start time.Time, step time.Duration) *steppingClock {
return &steppingClock{
current: start.UTC().Truncate(time.Millisecond),
step: step,
}
}
func (clock *steppingClock) Now() time.Time {
clock.mu.Lock()
defer clock.mu.Unlock()
now := clock.current
clock.current = clock.current.Add(clock.step).UTC().Truncate(time.Millisecond)
return now
}
func testWorkerLogger() *slog.Logger {
return slog.New(slog.NewTextHandler(io.Discard, nil))
}
@@ -0,0 +1,184 @@
package worker
import (
"context"
"sync"
)
type recordingWorkerTelemetry struct {
mu sync.Mutex
intentOutcomes []intentOutcomeTelemetryRecord
malformedIntents []malformedIntentTelemetryRecord
userEnrichment []userEnrichmentTelemetryRecord
routePublishAttempts []routePublishTelemetryRecord
routeRetries []routeTelemetryRecord
routeDeadLetters []routeDeadLetterTelemetryRecord
}
func (telemetry *recordingWorkerTelemetry) RecordIntentOutcome(_ context.Context, notificationType string, producer string, audienceKind string, outcome string) {
telemetry.mu.Lock()
defer telemetry.mu.Unlock()
telemetry.intentOutcomes = append(telemetry.intentOutcomes, intentOutcomeTelemetryRecord{
notificationType: notificationType,
producer: producer,
audienceKind: audienceKind,
outcome: outcome,
})
}
func (telemetry *recordingWorkerTelemetry) RecordMalformedIntent(_ context.Context, failureCode string, notificationType string, producer string) {
telemetry.mu.Lock()
defer telemetry.mu.Unlock()
telemetry.malformedIntents = append(telemetry.malformedIntents, malformedIntentTelemetryRecord{
failureCode: failureCode,
notificationType: notificationType,
producer: producer,
})
}
func (telemetry *recordingWorkerTelemetry) RecordUserEnrichmentAttempt(_ context.Context, notificationType string, result string) {
telemetry.mu.Lock()
defer telemetry.mu.Unlock()
telemetry.userEnrichment = append(telemetry.userEnrichment, userEnrichmentTelemetryRecord{
notificationType: notificationType,
result: result,
})
}
func (telemetry *recordingWorkerTelemetry) RecordRoutePublishAttempt(_ context.Context, channel string, notificationType string, result string, failureClassification string) {
telemetry.mu.Lock()
defer telemetry.mu.Unlock()
telemetry.routePublishAttempts = append(telemetry.routePublishAttempts, routePublishTelemetryRecord{
channel: channel,
notificationType: notificationType,
result: result,
failureClassification: failureClassification,
})
}
func (telemetry *recordingWorkerTelemetry) RecordRouteRetry(_ context.Context, channel string, notificationType string) {
telemetry.mu.Lock()
defer telemetry.mu.Unlock()
telemetry.routeRetries = append(telemetry.routeRetries, routeTelemetryRecord{
channel: channel,
notificationType: notificationType,
})
}
func (telemetry *recordingWorkerTelemetry) RecordRouteDeadLetter(_ context.Context, channel string, notificationType string, failureClassification string) {
telemetry.mu.Lock()
defer telemetry.mu.Unlock()
telemetry.routeDeadLetters = append(telemetry.routeDeadLetters, routeDeadLetterTelemetryRecord{
channel: channel,
notificationType: notificationType,
failureClassification: failureClassification,
})
}
func (telemetry *recordingWorkerTelemetry) hasIntentOutcome(outcome string) bool {
telemetry.mu.Lock()
defer telemetry.mu.Unlock()
for _, record := range telemetry.intentOutcomes {
if record.outcome == outcome {
return true
}
}
return false
}
func (telemetry *recordingWorkerTelemetry) hasMalformedIntent(failureCode string) bool {
telemetry.mu.Lock()
defer telemetry.mu.Unlock()
for _, record := range telemetry.malformedIntents {
if record.failureCode == failureCode {
return true
}
}
return false
}
func (telemetry *recordingWorkerTelemetry) hasRoutePublishAttempt(channel string, result string, failureClassification string) bool {
telemetry.mu.Lock()
defer telemetry.mu.Unlock()
for _, record := range telemetry.routePublishAttempts {
if record.channel == channel && record.result == result && record.failureClassification == failureClassification {
return true
}
}
return false
}
func (telemetry *recordingWorkerTelemetry) hasRouteRetry(channel string) bool {
telemetry.mu.Lock()
defer telemetry.mu.Unlock()
for _, record := range telemetry.routeRetries {
if record.channel == channel {
return true
}
}
return false
}
func (telemetry *recordingWorkerTelemetry) hasRouteDeadLetter(channel string, failureClassification string) bool {
telemetry.mu.Lock()
defer telemetry.mu.Unlock()
for _, record := range telemetry.routeDeadLetters {
if record.channel == channel && record.failureClassification == failureClassification {
return true
}
}
return false
}
type intentOutcomeTelemetryRecord struct {
notificationType string
producer string
audienceKind string
outcome string
}
type malformedIntentTelemetryRecord struct {
failureCode string
notificationType string
producer string
}
type userEnrichmentTelemetryRecord struct {
notificationType string
result string
}
type routePublishTelemetryRecord struct {
channel string
notificationType string
result string
failureClassification string
}
type routeTelemetryRecord struct {
channel string
notificationType string
}
type routeDeadLetterTelemetryRecord struct {
channel string
notificationType string
failureClassification string
}