feat: use postgres

This commit is contained in:
Ilia Denisov
2026-04-26 20:34:39 +02:00
committed by GitHub
parent 48b0056b49
commit fe829285a6
365 changed files with 29223 additions and 24049 deletions
@@ -0,0 +1,25 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type DeadLetters struct {
NotificationID string `sql:"primary_key"`
RouteID string `sql:"primary_key"`
Channel string
RecipientRef string
FinalAttemptCount int32
MaxAttempts int32
FailureClassification string
FailureMessage string
RecoveryHint string
CreatedAt time.Time
}
@@ -0,0 +1,19 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type GooseDbVersion struct {
ID int32 `sql:"primary_key"`
VersionID int64
IsApplied bool
Tstamp time.Time
}
@@ -0,0 +1,23 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type MalformedIntents struct {
StreamEntryID string `sql:"primary_key"`
NotificationType string
Producer string
IdempotencyKey string
FailureCode string
FailureMessage string
RawFields string
RecordedAt time.Time
}
@@ -0,0 +1,29 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type Records struct {
NotificationID string `sql:"primary_key"`
NotificationType string
Producer string
AudienceKind string
RecipientUserIds string
PayloadJSON string
IdempotencyKey string
RequestFingerprint string
RequestID string
TraceID string
OccurredAt time.Time
AcceptedAt time.Time
UpdatedAt time.Time
IdempotencyExpiresAt time.Time
}
@@ -0,0 +1,33 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package model
import (
"time"
)
type Routes struct {
NotificationID string `sql:"primary_key"`
RouteID string `sql:"primary_key"`
Channel string
RecipientRef string
Status string
AttemptCount int32
MaxAttempts int32
NextAttemptAt *time.Time
ResolvedEmail string
ResolvedLocale string
LastErrorClassification string
LastErrorMessage string
LastErrorAt *time.Time
CreatedAt time.Time
UpdatedAt time.Time
PublishedAt *time.Time
DeadLetteredAt *time.Time
SkippedAt *time.Time
}
@@ -0,0 +1,105 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var DeadLetters = newDeadLettersTable("notification", "dead_letters", "")
type deadLettersTable struct {
postgres.Table
// Columns
NotificationID postgres.ColumnString
RouteID postgres.ColumnString
Channel postgres.ColumnString
RecipientRef postgres.ColumnString
FinalAttemptCount postgres.ColumnInteger
MaxAttempts postgres.ColumnInteger
FailureClassification postgres.ColumnString
FailureMessage postgres.ColumnString
RecoveryHint postgres.ColumnString
CreatedAt postgres.ColumnTimestampz
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type DeadLettersTable struct {
deadLettersTable
EXCLUDED deadLettersTable
}
// AS creates new DeadLettersTable with assigned alias
func (a DeadLettersTable) AS(alias string) *DeadLettersTable {
return newDeadLettersTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new DeadLettersTable with assigned schema name
func (a DeadLettersTable) FromSchema(schemaName string) *DeadLettersTable {
return newDeadLettersTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new DeadLettersTable with assigned table prefix
func (a DeadLettersTable) WithPrefix(prefix string) *DeadLettersTable {
return newDeadLettersTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new DeadLettersTable with assigned table suffix
func (a DeadLettersTable) WithSuffix(suffix string) *DeadLettersTable {
return newDeadLettersTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newDeadLettersTable(schemaName, tableName, alias string) *DeadLettersTable {
return &DeadLettersTable{
deadLettersTable: newDeadLettersTableImpl(schemaName, tableName, alias),
EXCLUDED: newDeadLettersTableImpl("", "excluded", ""),
}
}
func newDeadLettersTableImpl(schemaName, tableName, alias string) deadLettersTable {
var (
NotificationIDColumn = postgres.StringColumn("notification_id")
RouteIDColumn = postgres.StringColumn("route_id")
ChannelColumn = postgres.StringColumn("channel")
RecipientRefColumn = postgres.StringColumn("recipient_ref")
FinalAttemptCountColumn = postgres.IntegerColumn("final_attempt_count")
MaxAttemptsColumn = postgres.IntegerColumn("max_attempts")
FailureClassificationColumn = postgres.StringColumn("failure_classification")
FailureMessageColumn = postgres.StringColumn("failure_message")
RecoveryHintColumn = postgres.StringColumn("recovery_hint")
CreatedAtColumn = postgres.TimestampzColumn("created_at")
allColumns = postgres.ColumnList{NotificationIDColumn, RouteIDColumn, ChannelColumn, RecipientRefColumn, FinalAttemptCountColumn, MaxAttemptsColumn, FailureClassificationColumn, FailureMessageColumn, RecoveryHintColumn, CreatedAtColumn}
mutableColumns = postgres.ColumnList{ChannelColumn, RecipientRefColumn, FinalAttemptCountColumn, MaxAttemptsColumn, FailureClassificationColumn, FailureMessageColumn, RecoveryHintColumn, CreatedAtColumn}
defaultColumns = postgres.ColumnList{RecoveryHintColumn}
)
return deadLettersTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
NotificationID: NotificationIDColumn,
RouteID: RouteIDColumn,
Channel: ChannelColumn,
RecipientRef: RecipientRefColumn,
FinalAttemptCount: FinalAttemptCountColumn,
MaxAttempts: MaxAttemptsColumn,
FailureClassification: FailureClassificationColumn,
FailureMessage: FailureMessageColumn,
RecoveryHint: RecoveryHintColumn,
CreatedAt: CreatedAtColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,87 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var GooseDbVersion = newGooseDbVersionTable("notification", "goose_db_version", "")
type gooseDbVersionTable struct {
postgres.Table
// Columns
ID postgres.ColumnInteger
VersionID postgres.ColumnInteger
IsApplied postgres.ColumnBool
Tstamp postgres.ColumnTimestamp
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type GooseDbVersionTable struct {
gooseDbVersionTable
EXCLUDED gooseDbVersionTable
}
// AS creates new GooseDbVersionTable with assigned alias
func (a GooseDbVersionTable) AS(alias string) *GooseDbVersionTable {
return newGooseDbVersionTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new GooseDbVersionTable with assigned schema name
func (a GooseDbVersionTable) FromSchema(schemaName string) *GooseDbVersionTable {
return newGooseDbVersionTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new GooseDbVersionTable with assigned table prefix
func (a GooseDbVersionTable) WithPrefix(prefix string) *GooseDbVersionTable {
return newGooseDbVersionTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new GooseDbVersionTable with assigned table suffix
func (a GooseDbVersionTable) WithSuffix(suffix string) *GooseDbVersionTable {
return newGooseDbVersionTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newGooseDbVersionTable(schemaName, tableName, alias string) *GooseDbVersionTable {
return &GooseDbVersionTable{
gooseDbVersionTable: newGooseDbVersionTableImpl(schemaName, tableName, alias),
EXCLUDED: newGooseDbVersionTableImpl("", "excluded", ""),
}
}
func newGooseDbVersionTableImpl(schemaName, tableName, alias string) gooseDbVersionTable {
var (
IDColumn = postgres.IntegerColumn("id")
VersionIDColumn = postgres.IntegerColumn("version_id")
IsAppliedColumn = postgres.BoolColumn("is_applied")
TstampColumn = postgres.TimestampColumn("tstamp")
allColumns = postgres.ColumnList{IDColumn, VersionIDColumn, IsAppliedColumn, TstampColumn}
mutableColumns = postgres.ColumnList{VersionIDColumn, IsAppliedColumn, TstampColumn}
defaultColumns = postgres.ColumnList{TstampColumn}
)
return gooseDbVersionTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
ID: IDColumn,
VersionID: VersionIDColumn,
IsApplied: IsAppliedColumn,
Tstamp: TstampColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,99 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var MalformedIntents = newMalformedIntentsTable("notification", "malformed_intents", "")
type malformedIntentsTable struct {
postgres.Table
// Columns
StreamEntryID postgres.ColumnString
NotificationType postgres.ColumnString
Producer postgres.ColumnString
IdempotencyKey postgres.ColumnString
FailureCode postgres.ColumnString
FailureMessage postgres.ColumnString
RawFields postgres.ColumnString
RecordedAt postgres.ColumnTimestampz
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type MalformedIntentsTable struct {
malformedIntentsTable
EXCLUDED malformedIntentsTable
}
// AS creates new MalformedIntentsTable with assigned alias
func (a MalformedIntentsTable) AS(alias string) *MalformedIntentsTable {
return newMalformedIntentsTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new MalformedIntentsTable with assigned schema name
func (a MalformedIntentsTable) FromSchema(schemaName string) *MalformedIntentsTable {
return newMalformedIntentsTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new MalformedIntentsTable with assigned table prefix
func (a MalformedIntentsTable) WithPrefix(prefix string) *MalformedIntentsTable {
return newMalformedIntentsTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new MalformedIntentsTable with assigned table suffix
func (a MalformedIntentsTable) WithSuffix(suffix string) *MalformedIntentsTable {
return newMalformedIntentsTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newMalformedIntentsTable(schemaName, tableName, alias string) *MalformedIntentsTable {
return &MalformedIntentsTable{
malformedIntentsTable: newMalformedIntentsTableImpl(schemaName, tableName, alias),
EXCLUDED: newMalformedIntentsTableImpl("", "excluded", ""),
}
}
func newMalformedIntentsTableImpl(schemaName, tableName, alias string) malformedIntentsTable {
var (
StreamEntryIDColumn = postgres.StringColumn("stream_entry_id")
NotificationTypeColumn = postgres.StringColumn("notification_type")
ProducerColumn = postgres.StringColumn("producer")
IdempotencyKeyColumn = postgres.StringColumn("idempotency_key")
FailureCodeColumn = postgres.StringColumn("failure_code")
FailureMessageColumn = postgres.StringColumn("failure_message")
RawFieldsColumn = postgres.StringColumn("raw_fields")
RecordedAtColumn = postgres.TimestampzColumn("recorded_at")
allColumns = postgres.ColumnList{StreamEntryIDColumn, NotificationTypeColumn, ProducerColumn, IdempotencyKeyColumn, FailureCodeColumn, FailureMessageColumn, RawFieldsColumn, RecordedAtColumn}
mutableColumns = postgres.ColumnList{NotificationTypeColumn, ProducerColumn, IdempotencyKeyColumn, FailureCodeColumn, FailureMessageColumn, RawFieldsColumn, RecordedAtColumn}
defaultColumns = postgres.ColumnList{NotificationTypeColumn, ProducerColumn, IdempotencyKeyColumn}
)
return malformedIntentsTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
StreamEntryID: StreamEntryIDColumn,
NotificationType: NotificationTypeColumn,
Producer: ProducerColumn,
IdempotencyKey: IdempotencyKeyColumn,
FailureCode: FailureCodeColumn,
FailureMessage: FailureMessageColumn,
RawFields: RawFieldsColumn,
RecordedAt: RecordedAtColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,117 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var Records = newRecordsTable("notification", "records", "")
type recordsTable struct {
postgres.Table
// Columns
NotificationID postgres.ColumnString
NotificationType postgres.ColumnString
Producer postgres.ColumnString
AudienceKind postgres.ColumnString
RecipientUserIds postgres.ColumnString
PayloadJSON postgres.ColumnString
IdempotencyKey postgres.ColumnString
RequestFingerprint postgres.ColumnString
RequestID postgres.ColumnString
TraceID postgres.ColumnString
OccurredAt postgres.ColumnTimestampz
AcceptedAt postgres.ColumnTimestampz
UpdatedAt postgres.ColumnTimestampz
IdempotencyExpiresAt postgres.ColumnTimestampz
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type RecordsTable struct {
recordsTable
EXCLUDED recordsTable
}
// AS creates new RecordsTable with assigned alias
func (a RecordsTable) AS(alias string) *RecordsTable {
return newRecordsTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new RecordsTable with assigned schema name
func (a RecordsTable) FromSchema(schemaName string) *RecordsTable {
return newRecordsTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new RecordsTable with assigned table prefix
func (a RecordsTable) WithPrefix(prefix string) *RecordsTable {
return newRecordsTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new RecordsTable with assigned table suffix
func (a RecordsTable) WithSuffix(suffix string) *RecordsTable {
return newRecordsTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newRecordsTable(schemaName, tableName, alias string) *RecordsTable {
return &RecordsTable{
recordsTable: newRecordsTableImpl(schemaName, tableName, alias),
EXCLUDED: newRecordsTableImpl("", "excluded", ""),
}
}
func newRecordsTableImpl(schemaName, tableName, alias string) recordsTable {
var (
NotificationIDColumn = postgres.StringColumn("notification_id")
NotificationTypeColumn = postgres.StringColumn("notification_type")
ProducerColumn = postgres.StringColumn("producer")
AudienceKindColumn = postgres.StringColumn("audience_kind")
RecipientUserIdsColumn = postgres.StringColumn("recipient_user_ids")
PayloadJSONColumn = postgres.StringColumn("payload_json")
IdempotencyKeyColumn = postgres.StringColumn("idempotency_key")
RequestFingerprintColumn = postgres.StringColumn("request_fingerprint")
RequestIDColumn = postgres.StringColumn("request_id")
TraceIDColumn = postgres.StringColumn("trace_id")
OccurredAtColumn = postgres.TimestampzColumn("occurred_at")
AcceptedAtColumn = postgres.TimestampzColumn("accepted_at")
UpdatedAtColumn = postgres.TimestampzColumn("updated_at")
IdempotencyExpiresAtColumn = postgres.TimestampzColumn("idempotency_expires_at")
allColumns = postgres.ColumnList{NotificationIDColumn, NotificationTypeColumn, ProducerColumn, AudienceKindColumn, RecipientUserIdsColumn, PayloadJSONColumn, IdempotencyKeyColumn, RequestFingerprintColumn, RequestIDColumn, TraceIDColumn, OccurredAtColumn, AcceptedAtColumn, UpdatedAtColumn, IdempotencyExpiresAtColumn}
mutableColumns = postgres.ColumnList{NotificationTypeColumn, ProducerColumn, AudienceKindColumn, RecipientUserIdsColumn, PayloadJSONColumn, IdempotencyKeyColumn, RequestFingerprintColumn, RequestIDColumn, TraceIDColumn, OccurredAtColumn, AcceptedAtColumn, UpdatedAtColumn, IdempotencyExpiresAtColumn}
defaultColumns = postgres.ColumnList{RecipientUserIdsColumn, RequestIDColumn, TraceIDColumn}
)
return recordsTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
NotificationID: NotificationIDColumn,
NotificationType: NotificationTypeColumn,
Producer: ProducerColumn,
AudienceKind: AudienceKindColumn,
RecipientUserIds: RecipientUserIdsColumn,
PayloadJSON: PayloadJSONColumn,
IdempotencyKey: IdempotencyKeyColumn,
RequestFingerprint: RequestFingerprintColumn,
RequestID: RequestIDColumn,
TraceID: TraceIDColumn,
OccurredAt: OccurredAtColumn,
AcceptedAt: AcceptedAtColumn,
UpdatedAt: UpdatedAtColumn,
IdempotencyExpiresAt: IdempotencyExpiresAtColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,129 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
import (
"github.com/go-jet/jet/v2/postgres"
)
var Routes = newRoutesTable("notification", "routes", "")
type routesTable struct {
postgres.Table
// Columns
NotificationID postgres.ColumnString
RouteID postgres.ColumnString
Channel postgres.ColumnString
RecipientRef postgres.ColumnString
Status postgres.ColumnString
AttemptCount postgres.ColumnInteger
MaxAttempts postgres.ColumnInteger
NextAttemptAt postgres.ColumnTimestampz
ResolvedEmail postgres.ColumnString
ResolvedLocale postgres.ColumnString
LastErrorClassification postgres.ColumnString
LastErrorMessage postgres.ColumnString
LastErrorAt postgres.ColumnTimestampz
CreatedAt postgres.ColumnTimestampz
UpdatedAt postgres.ColumnTimestampz
PublishedAt postgres.ColumnTimestampz
DeadLetteredAt postgres.ColumnTimestampz
SkippedAt postgres.ColumnTimestampz
AllColumns postgres.ColumnList
MutableColumns postgres.ColumnList
DefaultColumns postgres.ColumnList
}
type RoutesTable struct {
routesTable
EXCLUDED routesTable
}
// AS creates new RoutesTable with assigned alias
func (a RoutesTable) AS(alias string) *RoutesTable {
return newRoutesTable(a.SchemaName(), a.TableName(), alias)
}
// Schema creates new RoutesTable with assigned schema name
func (a RoutesTable) FromSchema(schemaName string) *RoutesTable {
return newRoutesTable(schemaName, a.TableName(), a.Alias())
}
// WithPrefix creates new RoutesTable with assigned table prefix
func (a RoutesTable) WithPrefix(prefix string) *RoutesTable {
return newRoutesTable(a.SchemaName(), prefix+a.TableName(), a.TableName())
}
// WithSuffix creates new RoutesTable with assigned table suffix
func (a RoutesTable) WithSuffix(suffix string) *RoutesTable {
return newRoutesTable(a.SchemaName(), a.TableName()+suffix, a.TableName())
}
func newRoutesTable(schemaName, tableName, alias string) *RoutesTable {
return &RoutesTable{
routesTable: newRoutesTableImpl(schemaName, tableName, alias),
EXCLUDED: newRoutesTableImpl("", "excluded", ""),
}
}
func newRoutesTableImpl(schemaName, tableName, alias string) routesTable {
var (
NotificationIDColumn = postgres.StringColumn("notification_id")
RouteIDColumn = postgres.StringColumn("route_id")
ChannelColumn = postgres.StringColumn("channel")
RecipientRefColumn = postgres.StringColumn("recipient_ref")
StatusColumn = postgres.StringColumn("status")
AttemptCountColumn = postgres.IntegerColumn("attempt_count")
MaxAttemptsColumn = postgres.IntegerColumn("max_attempts")
NextAttemptAtColumn = postgres.TimestampzColumn("next_attempt_at")
ResolvedEmailColumn = postgres.StringColumn("resolved_email")
ResolvedLocaleColumn = postgres.StringColumn("resolved_locale")
LastErrorClassificationColumn = postgres.StringColumn("last_error_classification")
LastErrorMessageColumn = postgres.StringColumn("last_error_message")
LastErrorAtColumn = postgres.TimestampzColumn("last_error_at")
CreatedAtColumn = postgres.TimestampzColumn("created_at")
UpdatedAtColumn = postgres.TimestampzColumn("updated_at")
PublishedAtColumn = postgres.TimestampzColumn("published_at")
DeadLetteredAtColumn = postgres.TimestampzColumn("dead_lettered_at")
SkippedAtColumn = postgres.TimestampzColumn("skipped_at")
allColumns = postgres.ColumnList{NotificationIDColumn, RouteIDColumn, ChannelColumn, RecipientRefColumn, StatusColumn, AttemptCountColumn, MaxAttemptsColumn, NextAttemptAtColumn, ResolvedEmailColumn, ResolvedLocaleColumn, LastErrorClassificationColumn, LastErrorMessageColumn, LastErrorAtColumn, CreatedAtColumn, UpdatedAtColumn, PublishedAtColumn, DeadLetteredAtColumn, SkippedAtColumn}
mutableColumns = postgres.ColumnList{ChannelColumn, RecipientRefColumn, StatusColumn, AttemptCountColumn, MaxAttemptsColumn, NextAttemptAtColumn, ResolvedEmailColumn, ResolvedLocaleColumn, LastErrorClassificationColumn, LastErrorMessageColumn, LastErrorAtColumn, CreatedAtColumn, UpdatedAtColumn, PublishedAtColumn, DeadLetteredAtColumn, SkippedAtColumn}
defaultColumns = postgres.ColumnList{AttemptCountColumn, ResolvedEmailColumn, ResolvedLocaleColumn, LastErrorClassificationColumn, LastErrorMessageColumn}
)
return routesTable{
Table: postgres.NewTable(schemaName, tableName, alias, allColumns...),
//Columns
NotificationID: NotificationIDColumn,
RouteID: RouteIDColumn,
Channel: ChannelColumn,
RecipientRef: RecipientRefColumn,
Status: StatusColumn,
AttemptCount: AttemptCountColumn,
MaxAttempts: MaxAttemptsColumn,
NextAttemptAt: NextAttemptAtColumn,
ResolvedEmail: ResolvedEmailColumn,
ResolvedLocale: ResolvedLocaleColumn,
LastErrorClassification: LastErrorClassificationColumn,
LastErrorMessage: LastErrorMessageColumn,
LastErrorAt: LastErrorAtColumn,
CreatedAt: CreatedAtColumn,
UpdatedAt: UpdatedAtColumn,
PublishedAt: PublishedAtColumn,
DeadLetteredAt: DeadLetteredAtColumn,
SkippedAt: SkippedAtColumn,
AllColumns: allColumns,
MutableColumns: mutableColumns,
DefaultColumns: defaultColumns,
}
}
@@ -0,0 +1,18 @@
//
// Code generated by go-jet DO NOT EDIT.
//
// WARNING: Changes to this file may cause incorrect behavior
// and will be lost if the code is regenerated
//
package table
// UseSchema sets a new schema name for all generated table SQL builder types. It is recommended to invoke
// this method only once at the beginning of the program.
func UseSchema(schema string) {
DeadLetters = DeadLetters.FromSchema(schema)
GooseDbVersion = GooseDbVersion.FromSchema(schema)
MalformedIntents = MalformedIntents.FromSchema(schema)
Records = Records.FromSchema(schema)
Routes = Routes.FromSchema(schema)
}
@@ -0,0 +1,105 @@
-- +goose Up
-- records holds one durable notification record per accepted intent. The
-- (producer, idempotency_key) UNIQUE constraint replaces the previous Redis
-- idempotency keyspace: the durable row IS the idempotency reservation.
CREATE TABLE records (
notification_id text PRIMARY KEY,
notification_type text NOT NULL,
producer text NOT NULL,
audience_kind text NOT NULL,
recipient_user_ids jsonb NOT NULL DEFAULT '[]'::jsonb,
payload_json text NOT NULL,
idempotency_key text NOT NULL,
request_fingerprint text NOT NULL,
request_id text NOT NULL DEFAULT '',
trace_id text NOT NULL DEFAULT '',
occurred_at timestamptz NOT NULL,
accepted_at timestamptz NOT NULL,
updated_at timestamptz NOT NULL,
idempotency_expires_at timestamptz NOT NULL,
CONSTRAINT records_idempotency_unique UNIQUE (producer, idempotency_key)
);
-- Newest-first listing index used by operator/audit reads.
CREATE INDEX records_listing_idx
ON records (accepted_at DESC, notification_id DESC);
-- routes stores one row per (notification_id, route_id). next_attempt_at is
-- non-NULL only while the row is a scheduling candidate (status pending or
-- failed); the partial index keeps the scheduler scan tight.
CREATE TABLE routes (
notification_id text NOT NULL
REFERENCES records(notification_id) ON DELETE CASCADE,
route_id text NOT NULL,
channel text NOT NULL,
recipient_ref text NOT NULL,
status text NOT NULL,
attempt_count integer NOT NULL DEFAULT 0,
max_attempts integer NOT NULL,
next_attempt_at timestamptz,
resolved_email text NOT NULL DEFAULT '',
resolved_locale text NOT NULL DEFAULT '',
last_error_classification text NOT NULL DEFAULT '',
last_error_message text NOT NULL DEFAULT '',
last_error_at timestamptz,
created_at timestamptz NOT NULL,
updated_at timestamptz NOT NULL,
published_at timestamptz,
dead_lettered_at timestamptz,
skipped_at timestamptz,
PRIMARY KEY (notification_id, route_id)
);
-- Drives the publishers' due-route pull. Partial predicate keeps the index
-- narrow: terminal rows (published / dead_letter / skipped) never appear.
CREATE INDEX routes_due_idx
ON routes (next_attempt_at)
WHERE next_attempt_at IS NOT NULL;
-- Coarse status / channel filters used by operator views.
CREATE INDEX routes_status_idx ON routes (status);
CREATE INDEX routes_channel_idx ON routes (channel);
-- dead_letters carries the operator-visible record for one route that
-- exhausted automated handling. Cascade tied to the parent route row so a
-- record-level retention DELETE clears dependent dead-letter rows naturally.
CREATE TABLE dead_letters (
notification_id text NOT NULL,
route_id text NOT NULL,
channel text NOT NULL,
recipient_ref text NOT NULL,
final_attempt_count integer NOT NULL,
max_attempts integer NOT NULL,
failure_classification text NOT NULL,
failure_message text NOT NULL,
recovery_hint text NOT NULL DEFAULT '',
created_at timestamptz NOT NULL,
PRIMARY KEY (notification_id, route_id),
FOREIGN KEY (notification_id, route_id)
REFERENCES routes(notification_id, route_id) ON DELETE CASCADE
);
CREATE INDEX dead_letters_listing_idx
ON dead_letters (created_at DESC, notification_id DESC, route_id DESC);
-- malformed_intents stores operator-visible records for stream entries the
-- intent validator could not accept. Independent retention pass.
CREATE TABLE malformed_intents (
stream_entry_id text PRIMARY KEY,
notification_type text NOT NULL DEFAULT '',
producer text NOT NULL DEFAULT '',
idempotency_key text NOT NULL DEFAULT '',
failure_code text NOT NULL,
failure_message text NOT NULL,
raw_fields jsonb NOT NULL,
recorded_at timestamptz NOT NULL
);
CREATE INDEX malformed_intents_listing_idx
ON malformed_intents (recorded_at DESC, stream_entry_id DESC);
-- +goose Down
DROP TABLE IF EXISTS malformed_intents;
DROP TABLE IF EXISTS dead_letters;
DROP TABLE IF EXISTS routes;
DROP TABLE IF EXISTS records;
@@ -0,0 +1,19 @@
// Package migrations exposes the embedded goose migration files used by
// Notification Service to provision its `notification` schema in PostgreSQL.
//
// The embedded filesystem is consumed by `pkg/postgres.RunMigrations` during
// notification-service startup and by `cmd/jetgen` when regenerating the
// `internal/adapters/postgres/jet/` code against a transient PostgreSQL
// instance.
package migrations
import "embed"
//go:embed *.sql
var fs embed.FS
// FS returns the embedded filesystem containing every numbered goose
// migration shipped with Notification Service.
func FS() embed.FS {
return fs
}
@@ -0,0 +1,118 @@
package notificationstore
import (
"context"
"database/sql"
"errors"
"fmt"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
)
// Compile-time confirmation that *Store satisfies acceptintent.Store. The
// runtime wiring depends on this so the accept-intent service can consume
// the PostgreSQL adapter directly.
var _ acceptintent.Store = (*Store)(nil)
// CreateAcceptance writes one notification record together with its derived
// route slots inside one BEGIN … COMMIT transaction. Idempotency races
// surface as `acceptintent.ErrConflict`.
func (store *Store) CreateAcceptance(ctx context.Context, input acceptintent.CreateAcceptanceInput) error {
if store == nil {
return errors.New("create notification acceptance: nil store")
}
if ctx == nil {
return errors.New("create notification acceptance: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("create notification acceptance: %w", err)
}
return store.withTx(ctx, "create notification acceptance", func(ctx context.Context, tx *sql.Tx) error {
if err := insertRecord(ctx, tx, input.Notification, input.Idempotency.ExpiresAt); err != nil {
if isUniqueViolation(err) {
return acceptintent.ErrConflict
}
return fmt.Errorf("create notification acceptance: insert record: %w", err)
}
for index, route := range input.Routes {
if err := insertRoute(ctx, tx, route); err != nil {
return fmt.Errorf("create notification acceptance: insert route[%d]: %w", index, err)
}
}
return nil
})
}
// GetIdempotency loads one accepted idempotency reservation. Because the
// records row IS the idempotency reservation, the lookup keys on
// `(producer, idempotency_key)` and projects the relevant subset of the row
// into an IdempotencyRecord.
func (store *Store) GetIdempotency(ctx context.Context, producer intentstream.Producer, idempotencyKey string) (acceptintent.IdempotencyRecord, bool, error) {
if store == nil {
return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil store")
}
if ctx == nil {
return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil context")
}
operationCtx, cancel, err := store.operationContext(ctx, "get notification idempotency")
if err != nil {
return acceptintent.IdempotencyRecord{}, false, err
}
defer cancel()
scanned, found, err := loadIdempotencyByKey(operationCtx, store.db, string(producer), idempotencyKey)
if err != nil {
return acceptintent.IdempotencyRecord{}, false, err
}
if !found {
return acceptintent.IdempotencyRecord{}, false, nil
}
return idempotencyRecordFromScanned(scanned), true, nil
}
// GetNotification loads one accepted notification by NotificationID.
func (store *Store) GetNotification(ctx context.Context, notificationID string) (acceptintent.NotificationRecord, bool, error) {
if store == nil {
return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil store")
}
if ctx == nil {
return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil context")
}
operationCtx, cancel, err := store.operationContext(ctx, "get notification record")
if err != nil {
return acceptintent.NotificationRecord{}, false, err
}
defer cancel()
scanned, found, err := loadRecord(operationCtx, store.db, notificationID)
if err != nil {
return acceptintent.NotificationRecord{}, false, err
}
if !found {
return acceptintent.NotificationRecord{}, false, nil
}
return scanned.Record, true, nil
}
// GetRoute loads one accepted notification route by `(notificationID,
// routeID)`. Required by the publisher worker contracts.
func (store *Store) GetRoute(ctx context.Context, notificationID string, routeID string) (acceptintent.NotificationRoute, bool, error) {
if store == nil {
return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil store")
}
if ctx == nil {
return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil context")
}
operationCtx, cancel, err := store.operationContext(ctx, "get notification route")
if err != nil {
return acceptintent.NotificationRoute{}, false, err
}
defer cancel()
return loadRoute(operationCtx, store.db, notificationID, routeID)
}
@@ -0,0 +1,65 @@
package notificationstore
import (
"encoding/json"
"fmt"
)
// marshalRecipientUserIDs returns the JSONB bytes for the
// `records.recipient_user_ids` column. A nil/empty slice round-trips as `[]`
// to keep the column NOT NULL across equality tests.
func marshalRecipientUserIDs(userIDs []string) ([]byte, error) {
if userIDs == nil {
userIDs = []string{}
}
payload, err := json.Marshal(userIDs)
if err != nil {
return nil, fmt.Errorf("marshal recipient user ids: %w", err)
}
return payload, nil
}
// unmarshalRecipientUserIDs decodes the JSONB recipient user-id list. nil
// payloads round-trip as a nil slice so the read path matches what the
// service layer accepts (`nil` and an empty `[]` are equivalent for
// audience_kind != user_set).
func unmarshalRecipientUserIDs(payload []byte) ([]string, error) {
if len(payload) == 0 {
return nil, nil
}
var userIDs []string
if err := json.Unmarshal(payload, &userIDs); err != nil {
return nil, fmt.Errorf("unmarshal recipient user ids: %w", err)
}
if len(userIDs) == 0 {
return nil, nil
}
return userIDs, nil
}
// marshalRawFields returns the JSONB bytes for the
// `malformed_intents.raw_fields` column. The map is serialised verbatim so
// future operator queries can match arbitrary keys.
func marshalRawFields(fields map[string]any) ([]byte, error) {
if fields == nil {
fields = map[string]any{}
}
payload, err := json.Marshal(fields)
if err != nil {
return nil, fmt.Errorf("marshal raw fields: %w", err)
}
return payload, nil
}
// unmarshalRawFields decodes the malformed_intents.raw_fields column into a
// non-nil map (empty {} when the column is null/empty).
func unmarshalRawFields(payload []byte) (map[string]any, error) {
out := map[string]any{}
if len(payload) == 0 {
return out, nil
}
if err := json.Unmarshal(payload, &out); err != nil {
return nil, fmt.Errorf("unmarshal raw fields: %w", err)
}
return out, nil
}
@@ -0,0 +1,61 @@
package notificationstore
import (
"context"
"database/sql"
"time"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
)
// deadLetterRow stores the column values written to one dead_letters row.
// Kept package-private because the public surface is the routestate
// CompleteRouteDeadLetterInput shape; this struct is only the on-disk
// projection.
type deadLetterRow struct {
NotificationID string
RouteID string
Channel string
RecipientRef string
FinalAttemptCount int
MaxAttempts int
FailureClassification string
FailureMessage string
RecoveryHint string
CreatedAt time.Time
}
// insertDeadLetter writes one dead-letter audit row inside an open
// transaction. The composite PRIMARY KEY guards against duplicate inserts
// for the same `(notification_id, route_id)` pair.
func insertDeadLetter(ctx context.Context, tx *sql.Tx, row deadLetterRow) error {
stmt := pgtable.DeadLetters.INSERT(
pgtable.DeadLetters.NotificationID,
pgtable.DeadLetters.RouteID,
pgtable.DeadLetters.Channel,
pgtable.DeadLetters.RecipientRef,
pgtable.DeadLetters.FinalAttemptCount,
pgtable.DeadLetters.MaxAttempts,
pgtable.DeadLetters.FailureClassification,
pgtable.DeadLetters.FailureMessage,
pgtable.DeadLetters.RecoveryHint,
pgtable.DeadLetters.CreatedAt,
).VALUES(
row.NotificationID,
row.RouteID,
row.Channel,
row.RecipientRef,
row.FinalAttemptCount,
row.MaxAttempts,
row.FailureClassification,
row.FailureMessage,
row.RecoveryHint,
row.CreatedAt.UTC(),
)
query, args := stmt.Sql()
if _, err := tx.ExecContext(ctx, query, args...); err != nil {
return err
}
return nil
}
@@ -0,0 +1,200 @@
package notificationstore
import (
"context"
"database/sql"
"net/url"
"os"
"sync"
"testing"
"time"
"galaxy/notification/internal/adapters/postgres/migrations"
"galaxy/postgres"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
)
const (
pkgPostgresImage = "postgres:16-alpine"
pkgSuperUser = "galaxy"
pkgSuperPassword = "galaxy"
pkgSuperDatabase = "galaxy_notification"
pkgServiceRole = "notificationservice"
pkgServicePassword = "notificationservice"
pkgServiceSchema = "notification"
pkgContainerStartup = 90 * time.Second
pkgOperationTimeout = 10 * time.Second
)
var (
pkgContainerOnce sync.Once
pkgContainerErr error
pkgContainerEnv *postgresEnv
)
type postgresEnv struct {
container *tcpostgres.PostgresContainer
dsn string
pool *sql.DB
}
func ensurePostgresEnv(t testing.TB) *postgresEnv {
t.Helper()
pkgContainerOnce.Do(func() {
pkgContainerEnv, pkgContainerErr = startPostgresEnv()
})
if pkgContainerErr != nil {
t.Skipf("postgres container start failed (Docker unavailable?): %v", pkgContainerErr)
}
return pkgContainerEnv
}
func startPostgresEnv() (*postgresEnv, error) {
ctx := context.Background()
container, err := tcpostgres.Run(ctx, pkgPostgresImage,
tcpostgres.WithDatabase(pkgSuperDatabase),
tcpostgres.WithUsername(pkgSuperUser),
tcpostgres.WithPassword(pkgSuperPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(pkgContainerStartup),
),
)
if err != nil {
return nil, err
}
baseDSN, err := container.ConnectionString(ctx, "sslmode=disable")
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := provisionRoleAndSchema(ctx, baseDSN); err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
scopedDSN, err := dsnForServiceRole(baseDSN)
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
cfg := postgres.DefaultConfig()
cfg.PrimaryDSN = scopedDSN
cfg.OperationTimeout = pkgOperationTimeout
pool, err := postgres.OpenPrimary(ctx, cfg)
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := postgres.Ping(ctx, pool, pkgOperationTimeout); err != nil {
_ = pool.Close()
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := postgres.RunMigrations(ctx, pool, migrations.FS(), "."); err != nil {
_ = pool.Close()
_ = testcontainers.TerminateContainer(container)
return nil, err
}
return &postgresEnv{
container: container,
dsn: scopedDSN,
pool: pool,
}, nil
}
func provisionRoleAndSchema(ctx context.Context, baseDSN string) error {
cfg := postgres.DefaultConfig()
cfg.PrimaryDSN = baseDSN
cfg.OperationTimeout = pkgOperationTimeout
db, err := postgres.OpenPrimary(ctx, cfg)
if err != nil {
return err
}
defer func() { _ = db.Close() }()
statements := []string{
`DO $$ BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'notificationservice') THEN
CREATE ROLE notificationservice LOGIN PASSWORD 'notificationservice';
END IF;
END $$;`,
`CREATE SCHEMA IF NOT EXISTS notification AUTHORIZATION notificationservice;`,
`GRANT USAGE ON SCHEMA notification TO notificationservice;`,
}
for _, statement := range statements {
if _, err := db.ExecContext(ctx, statement); err != nil {
return err
}
}
return nil
}
func dsnForServiceRole(baseDSN string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := url.Values{}
values.Set("search_path", pkgServiceSchema)
values.Set("sslmode", "disable")
scoped := url.URL{
Scheme: parsed.Scheme,
User: url.UserPassword(pkgServiceRole, pkgServicePassword),
Host: parsed.Host,
Path: parsed.Path,
RawQuery: values.Encode(),
}
return scoped.String(), nil
}
// newTestStore returns a Store backed by the package-scoped pool. Every
// invocation truncates the notification-owned tables so individual tests
// start from a clean slate while sharing one container start.
func newTestStore(t *testing.T) *Store {
t.Helper()
env := ensurePostgresEnv(t)
truncateAll(t, env.pool)
store, err := New(Config{DB: env.pool, OperationTimeout: pkgOperationTimeout})
if err != nil {
t.Fatalf("new store: %v", err)
}
return store
}
func truncateAll(t *testing.T, db *sql.DB) {
t.Helper()
statement := `TRUNCATE TABLE
malformed_intents,
dead_letters,
routes,
records
RESTART IDENTITY CASCADE`
if _, err := db.ExecContext(context.Background(), statement); err != nil {
t.Fatalf("truncate tables: %v", err)
}
}
// TestMain runs first when `go test` enters the package. We drive it
// through a TestMain so the container started by the first test is shut
// down on the way out, even when individual tests panic.
func TestMain(m *testing.M) {
code := m.Run()
if pkgContainerEnv != nil {
if pkgContainerEnv.pool != nil {
_ = pkgContainerEnv.pool.Close()
}
if pkgContainerEnv.container != nil {
_ = testcontainers.TerminateContainer(pkgContainerEnv.container)
}
}
os.Exit(code)
}
@@ -0,0 +1,68 @@
package notificationstore
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"github.com/jackc/pgx/v5/pgconn"
)
// pgUniqueViolationCode identifies the SQLSTATE returned by PostgreSQL when
// a UNIQUE constraint is violated by INSERT or UPDATE.
const pgUniqueViolationCode = "23505"
// isUniqueViolation reports whether err is a PostgreSQL unique-violation,
// regardless of constraint name.
func isUniqueViolation(err error) bool {
var pgErr *pgconn.PgError
if !errors.As(err, &pgErr) {
return false
}
return pgErr.Code == pgUniqueViolationCode
}
// isNoRows reports whether err is sql.ErrNoRows.
func isNoRows(err error) bool {
return errors.Is(err, sql.ErrNoRows)
}
// nullableTime returns t.UTC() when non-zero, otherwise nil so the column
// is bound as SQL NULL. The notification domain uses zero-valued time.Time
// to express "absent" timestamps (no pointers), so the helper centralises
// the boundary translation.
func nullableTime(t time.Time) any {
if t.IsZero() {
return nil
}
return t.UTC()
}
// timeFromNullable copies an optional sql.NullTime read from PostgreSQL
// into a domain time.Time, applying the global UTC normalisation rule.
// Invalid (NULL) values become the zero time.Time.
func timeFromNullable(value sql.NullTime) time.Time {
if !value.Valid {
return time.Time{}
}
return value.Time.UTC()
}
// withTimeout derives a child context bounded by timeout and prefixes
// context errors with operation. Callers must always invoke the returned
// cancel.
func withTimeout(ctx context.Context, operation string, timeout time.Duration) (context.Context, context.CancelFunc, error) {
if ctx == nil {
return nil, nil, fmt.Errorf("%s: nil context", operation)
}
if err := ctx.Err(); err != nil {
return nil, nil, fmt.Errorf("%s: %w", operation, err)
}
if timeout <= 0 {
return nil, nil, fmt.Errorf("%s: operation timeout must be positive", operation)
}
bounded, cancel := context.WithTimeout(ctx, timeout)
return bounded, cancel, nil
}
@@ -0,0 +1,131 @@
package notificationstore
import (
"context"
"errors"
"fmt"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
"galaxy/notification/internal/service/malformedintent"
pg "github.com/go-jet/jet/v2/postgres"
)
// Record stores entry idempotently by stream entry id. The helper satisfies
// `worker.MalformedIntentRecorder`. Re-recording an entry with the same
// `stream_entry_id` is a silent no-op via `ON CONFLICT DO NOTHING`.
func (store *Store) Record(ctx context.Context, entry malformedintent.Entry) error {
if store == nil {
return errors.New("record malformed intent: nil store")
}
if ctx == nil {
return errors.New("record malformed intent: nil context")
}
if err := entry.Validate(); err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
rawFields, err := marshalRawFields(entry.RawFields)
if err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
operationCtx, cancel, err := store.operationContext(ctx, "record malformed intent")
if err != nil {
return err
}
defer cancel()
stmt := pgtable.MalformedIntents.INSERT(
pgtable.MalformedIntents.StreamEntryID,
pgtable.MalformedIntents.NotificationType,
pgtable.MalformedIntents.Producer,
pgtable.MalformedIntents.IdempotencyKey,
pgtable.MalformedIntents.FailureCode,
pgtable.MalformedIntents.FailureMessage,
pgtable.MalformedIntents.RawFields,
pgtable.MalformedIntents.RecordedAt,
).VALUES(
entry.StreamEntryID,
entry.NotificationType,
entry.Producer,
entry.IdempotencyKey,
string(entry.FailureCode),
entry.FailureMessage,
rawFields,
entry.RecordedAt.UTC(),
).ON_CONFLICT(pgtable.MalformedIntents.StreamEntryID).DO_NOTHING()
query, args := stmt.Sql()
if _, err := store.db.ExecContext(operationCtx, query, args...); err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
return nil
}
// GetMalformedIntent loads one malformed-intent entry by stream entry id.
// Returns found=false when no such row exists.
func (store *Store) GetMalformedIntent(ctx context.Context, streamEntryID string) (malformedintent.Entry, bool, error) {
if store == nil {
return malformedintent.Entry{}, false, errors.New("get malformed intent: nil store")
}
if ctx == nil {
return malformedintent.Entry{}, false, errors.New("get malformed intent: nil context")
}
operationCtx, cancel, err := store.operationContext(ctx, "get malformed intent")
if err != nil {
return malformedintent.Entry{}, false, err
}
defer cancel()
stmt := pg.SELECT(
pgtable.MalformedIntents.NotificationType,
pgtable.MalformedIntents.Producer,
pgtable.MalformedIntents.IdempotencyKey,
pgtable.MalformedIntents.FailureCode,
pgtable.MalformedIntents.FailureMessage,
pgtable.MalformedIntents.RawFields,
pgtable.MalformedIntents.RecordedAt,
).FROM(pgtable.MalformedIntents).
WHERE(pgtable.MalformedIntents.StreamEntryID.EQ(pg.String(streamEntryID)))
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
var (
notificationType string
producer string
idempotencyKey string
failureCode string
failureMessage string
rawFields []byte
)
entry := malformedintent.Entry{StreamEntryID: streamEntryID}
if err := row.Scan(
&notificationType,
&producer,
&idempotencyKey,
&failureCode,
&failureMessage,
&rawFields,
&entry.RecordedAt,
); err != nil {
if isNoRows(err) {
return malformedintent.Entry{}, false, nil
}
return malformedintent.Entry{}, false, fmt.Errorf("get malformed intent: %w", err)
}
entry.NotificationType = notificationType
entry.Producer = producer
entry.IdempotencyKey = idempotencyKey
entry.FailureCode = malformedintent.FailureCode(failureCode)
entry.FailureMessage = failureMessage
entry.RecordedAt = entry.RecordedAt.UTC()
fields, err := unmarshalRawFields(rawFields)
if err != nil {
return malformedintent.Entry{}, false, fmt.Errorf("get malformed intent: %w", err)
}
entry.RawFields = fields
return entry, true, nil
}
@@ -0,0 +1,223 @@
package notificationstore
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"galaxy/notification/internal/api/intentstream"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
"galaxy/notification/internal/service/acceptintent"
pg "github.com/go-jet/jet/v2/postgres"
)
// recordSelectColumns is the canonical SELECT list for the records table,
// matching scanRecord's column order.
var recordSelectColumns = pg.ColumnList{
pgtable.Records.NotificationID,
pgtable.Records.NotificationType,
pgtable.Records.Producer,
pgtable.Records.AudienceKind,
pgtable.Records.RecipientUserIds,
pgtable.Records.PayloadJSON,
pgtable.Records.IdempotencyKey,
pgtable.Records.RequestFingerprint,
pgtable.Records.RequestID,
pgtable.Records.TraceID,
pgtable.Records.OccurredAt,
pgtable.Records.AcceptedAt,
pgtable.Records.UpdatedAt,
pgtable.Records.IdempotencyExpiresAt,
}
// rowScanner abstracts *sql.Row and *sql.Rows so scanRecord/scanRoute can be
// shared across both single-row reads and iterated reads.
type rowScanner interface {
Scan(dest ...any) error
}
// scannedRecord stores the columns scanned from the records table plus the
// idempotency_expires_at value the service layer feeds back into the
// IdempotencyRecord constructed from the same row.
type scannedRecord struct {
Record acceptintent.NotificationRecord
IdempotencyExpiresAt time.Time
}
// scanRecord scans one records row from rs. Returns sql.ErrNoRows verbatim
// so callers can distinguish "no row" from a hard error.
func scanRecord(rs rowScanner) (scannedRecord, error) {
var (
notificationID string
notificationType string
producer string
audienceKind string
recipientUserIDs []byte
payloadJSON string
idempotencyKey string
requestFingerprint string
requestID string
traceID string
occurredAt time.Time
acceptedAt time.Time
updatedAt time.Time
idempotencyExpiresAt time.Time
)
if err := rs.Scan(
&notificationID,
&notificationType,
&producer,
&audienceKind,
&recipientUserIDs,
&payloadJSON,
&idempotencyKey,
&requestFingerprint,
&requestID,
&traceID,
&occurredAt,
&acceptedAt,
&updatedAt,
&idempotencyExpiresAt,
); err != nil {
return scannedRecord{}, err
}
users, err := unmarshalRecipientUserIDs(recipientUserIDs)
if err != nil {
return scannedRecord{}, err
}
return scannedRecord{
Record: acceptintent.NotificationRecord{
NotificationID: notificationID,
NotificationType: intentstream.NotificationType(notificationType),
Producer: intentstream.Producer(producer),
AudienceKind: intentstream.AudienceKind(audienceKind),
RecipientUserIDs: users,
PayloadJSON: payloadJSON,
IdempotencyKey: idempotencyKey,
RequestFingerprint: requestFingerprint,
RequestID: requestID,
TraceID: traceID,
OccurredAt: occurredAt.UTC(),
AcceptedAt: acceptedAt.UTC(),
UpdatedAt: updatedAt.UTC(),
},
IdempotencyExpiresAt: idempotencyExpiresAt.UTC(),
}, nil
}
// insertRecord writes one record row plus its idempotency expiry inside an
// open transaction. The (producer, idempotency_key) UNIQUE constraint is
// the idempotency reservation; the caller maps `isUniqueViolation` errors
// to `acceptintent.ErrConflict`.
func insertRecord(ctx context.Context, tx *sql.Tx, record acceptintent.NotificationRecord, idempotencyExpiresAt time.Time) error {
if err := record.Validate(); err != nil {
return fmt.Errorf("insert record: %w", err)
}
users, err := marshalRecipientUserIDs(record.RecipientUserIDs)
if err != nil {
return fmt.Errorf("insert record: %w", err)
}
stmt := pgtable.Records.INSERT(
pgtable.Records.NotificationID,
pgtable.Records.NotificationType,
pgtable.Records.Producer,
pgtable.Records.AudienceKind,
pgtable.Records.RecipientUserIds,
pgtable.Records.PayloadJSON,
pgtable.Records.IdempotencyKey,
pgtable.Records.RequestFingerprint,
pgtable.Records.RequestID,
pgtable.Records.TraceID,
pgtable.Records.OccurredAt,
pgtable.Records.AcceptedAt,
pgtable.Records.UpdatedAt,
pgtable.Records.IdempotencyExpiresAt,
).VALUES(
record.NotificationID,
string(record.NotificationType),
string(record.Producer),
string(record.AudienceKind),
users,
record.PayloadJSON,
record.IdempotencyKey,
record.RequestFingerprint,
record.RequestID,
record.TraceID,
record.OccurredAt.UTC(),
record.AcceptedAt.UTC(),
record.UpdatedAt.UTC(),
idempotencyExpiresAt.UTC(),
)
query, args := stmt.Sql()
if _, err := tx.ExecContext(ctx, query, args...); err != nil {
return err
}
return nil
}
// loadRecord returns the record row for notificationID using the store's
// default pool. found is false when no such row exists.
func loadRecord(ctx context.Context, db *sql.DB, notificationID string) (scannedRecord, bool, error) {
stmt := pg.SELECT(recordSelectColumns).
FROM(pgtable.Records).
WHERE(pgtable.Records.NotificationID.EQ(pg.String(notificationID)))
query, args := stmt.Sql()
row := db.QueryRowContext(ctx, query, args...)
scanned, err := scanRecord(row)
if isNoRows(err) {
return scannedRecord{}, false, nil
}
if err != nil {
return scannedRecord{}, false, fmt.Errorf("load notification record: %w", err)
}
return scanned, true, nil
}
// loadIdempotencyByKey returns the records row that owns one
// `(producer, idempotency_key)` reservation. found is false when no match.
func loadIdempotencyByKey(ctx context.Context, db *sql.DB, producer string, idempotencyKey string) (scannedRecord, bool, error) {
stmt := pg.SELECT(recordSelectColumns).
FROM(pgtable.Records).
WHERE(pg.AND(
pgtable.Records.Producer.EQ(pg.String(producer)),
pgtable.Records.IdempotencyKey.EQ(pg.String(idempotencyKey)),
))
query, args := stmt.Sql()
row := db.QueryRowContext(ctx, query, args...)
scanned, err := scanRecord(row)
if isNoRows(err) {
return scannedRecord{}, false, nil
}
if err != nil {
return scannedRecord{}, false, fmt.Errorf("load notification idempotency: %w", err)
}
return scanned, true, nil
}
// idempotencyRecordFromScanned constructs an IdempotencyRecord shape from
// the scanned record. CreatedAt mirrors AcceptedAt because the durable row
// is the idempotency reservation.
func idempotencyRecordFromScanned(scanned scannedRecord) acceptintent.IdempotencyRecord {
return acceptintent.IdempotencyRecord{
Producer: scanned.Record.Producer,
IdempotencyKey: scanned.Record.IdempotencyKey,
NotificationID: scanned.Record.NotificationID,
RequestFingerprint: scanned.Record.RequestFingerprint,
CreatedAt: scanned.Record.AcceptedAt,
ExpiresAt: scanned.IdempotencyExpiresAt,
}
}
// errRecordNotFound is the package-private sentinel returned by helpers
// when a row required by an in-progress transaction is not found.
var errRecordNotFound = errors.New("record not found")
@@ -0,0 +1,67 @@
package notificationstore
import (
"context"
"errors"
"fmt"
"time"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
pg "github.com/go-jet/jet/v2/postgres"
)
// DeleteRecordsOlderThan removes records rows whose `accepted_at` predates
// cutoff. The records FK CASCADE clears the dependent routes and
// dead_letters rows in the same statement.
func (store *Store) DeleteRecordsOlderThan(ctx context.Context, cutoff time.Time) (int64, error) {
if store == nil {
return 0, errors.New("delete notification records: nil store")
}
operationCtx, cancel, err := store.operationContext(ctx, "delete notification records")
if err != nil {
return 0, err
}
defer cancel()
stmt := pgtable.Records.DELETE().
WHERE(pgtable.Records.AcceptedAt.LT(pg.TimestampzT(cutoff.UTC())))
query, args := stmt.Sql()
result, err := store.db.ExecContext(operationCtx, query, args...)
if err != nil {
return 0, fmt.Errorf("delete notification records: %w", err)
}
rows, err := result.RowsAffected()
if err != nil {
return 0, fmt.Errorf("delete notification records: rows affected: %w", err)
}
return rows, nil
}
// DeleteMalformedIntentsOlderThan removes malformed-intent rows whose
// `recorded_at` predates cutoff.
func (store *Store) DeleteMalformedIntentsOlderThan(ctx context.Context, cutoff time.Time) (int64, error) {
if store == nil {
return 0, errors.New("delete malformed intents: nil store")
}
operationCtx, cancel, err := store.operationContext(ctx, "delete malformed intents")
if err != nil {
return 0, err
}
defer cancel()
stmt := pgtable.MalformedIntents.DELETE().
WHERE(pgtable.MalformedIntents.RecordedAt.LT(pg.TimestampzT(cutoff.UTC())))
query, args := stmt.Sql()
result, err := store.db.ExecContext(operationCtx, query, args...)
if err != nil {
return 0, fmt.Errorf("delete malformed intents: %w", err)
}
rows, err := result.RowsAffected()
if err != nil {
return 0, fmt.Errorf("delete malformed intents: rows affected: %w", err)
}
return rows, nil
}
@@ -0,0 +1,248 @@
package notificationstore
import (
"context"
"database/sql"
"fmt"
"time"
"galaxy/notification/internal/api/intentstream"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
"galaxy/notification/internal/service/acceptintent"
pg "github.com/go-jet/jet/v2/postgres"
)
// routeSelectColumns is the canonical SELECT list for the routes table,
// matching scanRoute's column order.
var routeSelectColumns = pg.ColumnList{
pgtable.Routes.NotificationID,
pgtable.Routes.RouteID,
pgtable.Routes.Channel,
pgtable.Routes.RecipientRef,
pgtable.Routes.Status,
pgtable.Routes.AttemptCount,
pgtable.Routes.MaxAttempts,
pgtable.Routes.NextAttemptAt,
pgtable.Routes.ResolvedEmail,
pgtable.Routes.ResolvedLocale,
pgtable.Routes.LastErrorClassification,
pgtable.Routes.LastErrorMessage,
pgtable.Routes.LastErrorAt,
pgtable.Routes.CreatedAt,
pgtable.Routes.UpdatedAt,
pgtable.Routes.PublishedAt,
pgtable.Routes.DeadLetteredAt,
pgtable.Routes.SkippedAt,
}
// scanRoute scans one routes row from rs.
func scanRoute(rs rowScanner) (acceptintent.NotificationRoute, error) {
var (
notificationID string
routeID string
channel string
recipientRef string
status string
attemptCount int
maxAttempts int
nextAttemptAt sql.NullTime
resolvedEmail string
resolvedLocale string
lastErrorClassification string
lastErrorMessage string
lastErrorAt sql.NullTime
createdAt time.Time
updatedAt time.Time
publishedAt sql.NullTime
deadLetteredAt sql.NullTime
skippedAt sql.NullTime
)
if err := rs.Scan(
&notificationID,
&routeID,
&channel,
&recipientRef,
&status,
&attemptCount,
&maxAttempts,
&nextAttemptAt,
&resolvedEmail,
&resolvedLocale,
&lastErrorClassification,
&lastErrorMessage,
&lastErrorAt,
&createdAt,
&updatedAt,
&publishedAt,
&deadLetteredAt,
&skippedAt,
); err != nil {
return acceptintent.NotificationRoute{}, err
}
return acceptintent.NotificationRoute{
NotificationID: notificationID,
RouteID: routeID,
Channel: intentstream.Channel(channel),
RecipientRef: recipientRef,
Status: acceptintent.RouteStatus(status),
AttemptCount: attemptCount,
MaxAttempts: maxAttempts,
NextAttemptAt: timeFromNullable(nextAttemptAt),
ResolvedEmail: resolvedEmail,
ResolvedLocale: resolvedLocale,
LastErrorClassification: lastErrorClassification,
LastErrorMessage: lastErrorMessage,
LastErrorAt: timeFromNullable(lastErrorAt),
CreatedAt: createdAt.UTC(),
UpdatedAt: updatedAt.UTC(),
PublishedAt: timeFromNullable(publishedAt),
DeadLetteredAt: timeFromNullable(deadLetteredAt),
SkippedAt: timeFromNullable(skippedAt),
}, nil
}
// insertRoute writes one route row inside an open transaction.
func insertRoute(ctx context.Context, tx *sql.Tx, route acceptintent.NotificationRoute) error {
if err := route.Validate(); err != nil {
return fmt.Errorf("insert route: %w", err)
}
stmt := pgtable.Routes.INSERT(
pgtable.Routes.NotificationID,
pgtable.Routes.RouteID,
pgtable.Routes.Channel,
pgtable.Routes.RecipientRef,
pgtable.Routes.Status,
pgtable.Routes.AttemptCount,
pgtable.Routes.MaxAttempts,
pgtable.Routes.NextAttemptAt,
pgtable.Routes.ResolvedEmail,
pgtable.Routes.ResolvedLocale,
pgtable.Routes.LastErrorClassification,
pgtable.Routes.LastErrorMessage,
pgtable.Routes.LastErrorAt,
pgtable.Routes.CreatedAt,
pgtable.Routes.UpdatedAt,
pgtable.Routes.PublishedAt,
pgtable.Routes.DeadLetteredAt,
pgtable.Routes.SkippedAt,
).VALUES(
route.NotificationID,
route.RouteID,
string(route.Channel),
route.RecipientRef,
string(route.Status),
route.AttemptCount,
route.MaxAttempts,
nullableTime(route.NextAttemptAt),
route.ResolvedEmail,
route.ResolvedLocale,
route.LastErrorClassification,
route.LastErrorMessage,
nullableTime(route.LastErrorAt),
route.CreatedAt.UTC(),
route.UpdatedAt.UTC(),
nullableTime(route.PublishedAt),
nullableTime(route.DeadLetteredAt),
nullableTime(route.SkippedAt),
)
query, args := stmt.Sql()
if _, err := tx.ExecContext(ctx, query, args...); err != nil {
return err
}
return nil
}
// loadRoute returns one route row by composite key. found is false when no
// matching row exists.
func loadRoute(ctx context.Context, db *sql.DB, notificationID string, routeID string) (acceptintent.NotificationRoute, bool, error) {
stmt := pg.SELECT(routeSelectColumns).
FROM(pgtable.Routes).
WHERE(pg.AND(
pgtable.Routes.NotificationID.EQ(pg.String(notificationID)),
pgtable.Routes.RouteID.EQ(pg.String(routeID)),
))
query, args := stmt.Sql()
row := db.QueryRowContext(ctx, query, args...)
route, err := scanRoute(row)
if isNoRows(err) {
return acceptintent.NotificationRoute{}, false, nil
}
if err != nil {
return acceptintent.NotificationRoute{}, false, fmt.Errorf("load notification route: %w", err)
}
return route, true, nil
}
// loadRouteTx returns one route row by composite key inside an open
// transaction.
func loadRouteTx(ctx context.Context, tx *sql.Tx, notificationID string, routeID string) (acceptintent.NotificationRoute, bool, error) {
stmt := pg.SELECT(routeSelectColumns).
FROM(pgtable.Routes).
WHERE(pg.AND(
pgtable.Routes.NotificationID.EQ(pg.String(notificationID)),
pgtable.Routes.RouteID.EQ(pg.String(routeID)),
))
query, args := stmt.Sql()
row := tx.QueryRowContext(ctx, query, args...)
route, err := scanRoute(row)
if isNoRows(err) {
return acceptintent.NotificationRoute{}, false, nil
}
if err != nil {
return acceptintent.NotificationRoute{}, false, fmt.Errorf("load notification route: %w", err)
}
return route, true, nil
}
// updateRouteIfMatching writes the route columns back inside an open
// transaction, gated on `updated_at = expectedUpdatedAt`. Returns the
// number of rows actually updated; zero indicates an optimistic-concurrency
// loss.
func updateRouteIfMatching(ctx context.Context, tx *sql.Tx, route acceptintent.NotificationRoute, expectedUpdatedAt time.Time) (int64, error) {
stmt := pgtable.Routes.UPDATE(
pgtable.Routes.Status,
pgtable.Routes.AttemptCount,
pgtable.Routes.NextAttemptAt,
pgtable.Routes.ResolvedEmail,
pgtable.Routes.ResolvedLocale,
pgtable.Routes.LastErrorClassification,
pgtable.Routes.LastErrorMessage,
pgtable.Routes.LastErrorAt,
pgtable.Routes.UpdatedAt,
pgtable.Routes.PublishedAt,
pgtable.Routes.DeadLetteredAt,
pgtable.Routes.SkippedAt,
).SET(
string(route.Status),
route.AttemptCount,
nullableTime(route.NextAttemptAt),
route.ResolvedEmail,
route.ResolvedLocale,
route.LastErrorClassification,
route.LastErrorMessage,
nullableTime(route.LastErrorAt),
route.UpdatedAt.UTC(),
nullableTime(route.PublishedAt),
nullableTime(route.DeadLetteredAt),
nullableTime(route.SkippedAt),
).WHERE(pg.AND(
pgtable.Routes.NotificationID.EQ(pg.String(route.NotificationID)),
pgtable.Routes.RouteID.EQ(pg.String(route.RouteID)),
pgtable.Routes.UpdatedAt.EQ(pg.TimestampzT(expectedUpdatedAt.UTC())),
))
query, args := stmt.Sql()
result, err := tx.ExecContext(ctx, query, args...)
if err != nil {
return 0, err
}
rows, err := result.RowsAffected()
if err != nil {
return 0, err
}
return rows, nil
}
@@ -0,0 +1,262 @@
package notificationstore
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/routestate"
"galaxy/notification/internal/telemetry"
pg "github.com/go-jet/jet/v2/postgres"
)
// scheduledRouteKey synthesises a stable, human-readable key for one
// ScheduledRoute. Notification publishers do not interpret the key beyond
// requiring it to be non-empty (`ScheduledRoute.Validate`).
func scheduledRouteKey(notificationID string, routeID string) string {
return notificationID + "/" + routeID
}
// ListDueRoutes returns up to limit routes whose `next_attempt_at` is at or
// before now. The query is non-locking; per-row contention is resolved by
// the lease (Redis) plus the optimistic-concurrency check inside `Complete*`.
func (store *Store) ListDueRoutes(ctx context.Context, now time.Time, limit int64) ([]routestate.ScheduledRoute, error) {
if store == nil {
return nil, errors.New("list due routes: nil store")
}
if ctx == nil {
return nil, errors.New("list due routes: nil context")
}
if err := routestate.ValidateUTCMillisecondTimestamp("list due routes now", now); err != nil {
return nil, err
}
if limit <= 0 {
return nil, errors.New("list due routes: limit must be positive")
}
operationCtx, cancel, err := store.operationContext(ctx, "list due routes")
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(pgtable.Routes.NotificationID, pgtable.Routes.RouteID).
FROM(pgtable.Routes).
WHERE(pg.AND(
pgtable.Routes.NextAttemptAt.IS_NOT_NULL(),
pgtable.Routes.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())),
)).
ORDER_BY(pgtable.Routes.NextAttemptAt.ASC()).
LIMIT(limit)
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("list due routes: %w", err)
}
defer rows.Close()
out := make([]routestate.ScheduledRoute, 0, limit)
for rows.Next() {
var (
notificationID string
routeID string
)
if err := rows.Scan(&notificationID, &routeID); err != nil {
return nil, fmt.Errorf("list due routes: scan: %w", err)
}
out = append(out, routestate.ScheduledRoute{
RouteKey: scheduledRouteKey(notificationID, routeID),
NotificationID: notificationID,
RouteID: routeID,
})
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("list due routes: %w", err)
}
return out, nil
}
// ReadRouteScheduleSnapshot returns the current depth of the route schedule
// (rows with non-NULL `next_attempt_at`) together with the oldest scheduled
// timestamp when one exists. The runtime exposes this through the telemetry
// snapshot reader.
func (store *Store) ReadRouteScheduleSnapshot(ctx context.Context) (telemetry.RouteScheduleSnapshot, error) {
if store == nil {
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil store")
}
if ctx == nil {
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil context")
}
operationCtx, cancel, err := store.operationContext(ctx, "read route schedule snapshot")
if err != nil {
return telemetry.RouteScheduleSnapshot{}, err
}
defer cancel()
stmt := pg.SELECT(
pg.COUNT(pg.STAR),
pg.MIN(pgtable.Routes.NextAttemptAt),
).
FROM(pgtable.Routes).
WHERE(pgtable.Routes.NextAttemptAt.IS_NOT_NULL())
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
var (
depth int64
oldest sql.NullTime
summary telemetry.RouteScheduleSnapshot
)
if err := row.Scan(&depth, &oldest); err != nil {
return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: %w", err)
}
summary.Depth = depth
if oldest.Valid {
oldestUTC := oldest.Time.UTC()
summary.OldestScheduledFor = &oldestUTC
}
return summary, nil
}
// CompleteRoutePublished marks the expected route as `published`,
// increments attempt_count, and clears retry/error fields. Optimistic
// concurrency on `updated_at` rejects races that happened since the
// publisher loaded the row; a mismatch surfaces as `routestate.ErrConflict`.
//
// Note: the outbound stream emission (XADD) happens in the publisher
// before this call. The store deliberately ignores the input.Stream and
// input.StreamValues fields — they are kept on the input only so the
// publisher can pass one struct around through its state machine.
func (store *Store) CompleteRoutePublished(ctx context.Context, input routestate.CompleteRoutePublishedInput) error {
if store == nil {
return errors.New("complete route published: nil store")
}
if ctx == nil {
return errors.New("complete route published: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route published: %w", err)
}
updated := input.ExpectedRoute
updated.Status = acceptintent.RouteStatusPublished
updated.AttemptCount++
updated.NextAttemptAt = time.Time{}
updated.LastErrorClassification = ""
updated.LastErrorMessage = ""
updated.LastErrorAt = time.Time{}
updated.UpdatedAt = input.PublishedAt
updated.PublishedAt = input.PublishedAt
updated.DeadLetteredAt = time.Time{}
return store.withTx(ctx, "complete route published", func(ctx context.Context, tx *sql.Tx) error {
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
if err != nil {
return fmt.Errorf("complete route published: %w", err)
}
if rows == 0 {
return routestate.ErrConflict
}
return nil
})
}
// CompleteRouteFailed records one retryable publication failure: increments
// attempt_count, populates the last-error fields, and reschedules the row
// at `NextAttemptAt`.
func (store *Store) CompleteRouteFailed(ctx context.Context, input routestate.CompleteRouteFailedInput) error {
if store == nil {
return errors.New("complete route failed: nil store")
}
if ctx == nil {
return errors.New("complete route failed: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route failed: %w", err)
}
updated := input.ExpectedRoute
updated.Status = acceptintent.RouteStatusFailed
updated.AttemptCount++
updated.NextAttemptAt = input.NextAttemptAt
updated.LastErrorClassification = input.FailureClassification
updated.LastErrorMessage = input.FailureMessage
updated.LastErrorAt = input.FailedAt
updated.UpdatedAt = input.FailedAt
return store.withTx(ctx, "complete route failed", func(ctx context.Context, tx *sql.Tx) error {
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
if err != nil {
return fmt.Errorf("complete route failed: %w", err)
}
if rows == 0 {
return routestate.ErrConflict
}
return nil
})
}
// CompleteRouteDeadLetter records one terminal publication failure:
// marks the route `dead_letter`, clears the schedule, and inserts the
// dead-letter audit row.
func (store *Store) CompleteRouteDeadLetter(ctx context.Context, input routestate.CompleteRouteDeadLetterInput) error {
if store == nil {
return errors.New("complete route dead letter: nil store")
}
if ctx == nil {
return errors.New("complete route dead letter: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
updated := input.ExpectedRoute
updated.Status = acceptintent.RouteStatusDeadLetter
updated.AttemptCount++
updated.NextAttemptAt = time.Time{}
updated.LastErrorClassification = input.FailureClassification
updated.LastErrorMessage = input.FailureMessage
updated.LastErrorAt = input.DeadLetteredAt
updated.UpdatedAt = input.DeadLetteredAt
updated.DeadLetteredAt = input.DeadLetteredAt
if updated.AttemptCount < updated.MaxAttempts {
return fmt.Errorf(
"complete route dead letter: final attempt count %d is below max attempts %d",
updated.AttemptCount,
updated.MaxAttempts,
)
}
return store.withTx(ctx, "complete route dead letter", func(ctx context.Context, tx *sql.Tx) error {
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
if err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
if rows == 0 {
return routestate.ErrConflict
}
if err := insertDeadLetter(ctx, tx, deadLetterRow{
NotificationID: updated.NotificationID,
RouteID: updated.RouteID,
Channel: string(updated.Channel),
RecipientRef: updated.RecipientRef,
FinalAttemptCount: updated.AttemptCount,
MaxAttempts: updated.MaxAttempts,
FailureClassification: input.FailureClassification,
FailureMessage: input.FailureMessage,
RecoveryHint: input.RecoveryHint,
CreatedAt: input.DeadLetteredAt,
}); err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
return nil
})
}
@@ -0,0 +1,126 @@
// Package notificationstore implements the PostgreSQL-backed source-of-truth
// persistence used by Notification Service.
//
// The package owns the on-disk shape of the `notification` schema (defined
// in `galaxy/notification/internal/adapters/postgres/migrations`) and
// translates the schema-agnostic Store interfaces declared by the
// `internal/service/acceptintent` use case and the route publishers into
// concrete `database/sql` operations driven by the pgx driver. Atomic
// composite operations (acceptance, route-completion transitions) execute
// inside explicit `BEGIN … COMMIT` transactions; per-row lifecycle
// transitions use optimistic concurrency on the `updated_at` token rather
// than retaining a `FOR UPDATE` lock across the publisher's outbound stream
// emission.
//
// Stage 5 of `PG_PLAN.md` migrates Notification Service away from
// Redis-backed durable state. The inbound `notification:intents` Redis
// Stream and its consumer offset, the outbound `gateway:client-events` and
// `mail:delivery_commands` Redis Streams, and the short-lived
// `route_leases:*` exclusivity hint all remain on Redis; this store is no
// longer aware of any of them.
package notificationstore
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
)
// Config configures one PostgreSQL-backed notification store instance. The
// store does not own the underlying *sql.DB lifecycle: the caller (typically
// the service runtime) opens, instruments, migrates, and closes the pool.
// The store only borrows the pool and bounds individual round trips with
// OperationTimeout.
type Config struct {
// DB stores the connection pool the store uses for every query.
DB *sql.DB
// OperationTimeout bounds one round trip. The store creates a derived
// context for each operation so callers cannot starve the pool with an
// unbounded ctx. Multi-statement transactions inherit this bound for the
// whole BEGIN … COMMIT span.
OperationTimeout time.Duration
}
// Store persists Notification Service durable state in PostgreSQL and
// exposes the per-use-case Store interfaces required by acceptance,
// publication completion, malformed-intent recording, and the periodic
// retention worker.
type Store struct {
db *sql.DB
operationTimeout time.Duration
}
// New constructs one PostgreSQL-backed notification store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.DB == nil {
return nil, errors.New("new postgres notification store: db must not be nil")
}
if cfg.OperationTimeout <= 0 {
return nil, errors.New("new postgres notification store: operation timeout must be positive")
}
return &Store{
db: cfg.DB,
operationTimeout: cfg.OperationTimeout,
}, nil
}
// Close is a no-op for the PostgreSQL-backed store: the connection pool is
// owned by the caller (the runtime) and closed once the runtime shuts down.
// The accessor remains so the runtime wiring can treat the store like the
// previous Redis-backed implementation.
func (store *Store) Close() error {
return nil
}
// Ping verifies that the configured PostgreSQL backend is reachable. It
// runs `db.PingContext` under the configured operation timeout.
func (store *Store) Ping(ctx context.Context) error {
operationCtx, cancel, err := withTimeout(ctx, "ping postgres notification store", store.operationTimeout)
if err != nil {
return err
}
defer cancel()
if err := store.db.PingContext(operationCtx); err != nil {
return fmt.Errorf("ping postgres notification store: %w", err)
}
return nil
}
// withTx runs fn inside a BEGIN … COMMIT transaction bounded by the store's
// operation timeout. It rolls back on any error or panic and returns
// whatever fn returned. The transaction uses the default isolation level
// (`READ COMMITTED`); per-row contention is resolved through optimistic
// concurrency on `updated_at` rather than `SELECT … FOR UPDATE`.
func (store *Store) withTx(ctx context.Context, operation string, fn func(ctx context.Context, tx *sql.Tx) error) error {
operationCtx, cancel, err := withTimeout(ctx, operation, store.operationTimeout)
if err != nil {
return err
}
defer cancel()
tx, err := store.db.BeginTx(operationCtx, nil)
if err != nil {
return fmt.Errorf("%s: begin: %w", operation, err)
}
if err := fn(operationCtx, tx); err != nil {
_ = tx.Rollback()
return err
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("%s: commit: %w", operation, err)
}
return nil
}
// operationContext bounds one read or write that does not need a
// transaction envelope (single statement). It mirrors store.withTx for
// non-transactional callers.
func (store *Store) operationContext(ctx context.Context, operation string) (context.Context, context.CancelFunc, error) {
return withTimeout(ctx, operation, store.operationTimeout)
}
@@ -0,0 +1,567 @@
package notificationstore
import (
"context"
"errors"
"testing"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/malformedintent"
"galaxy/notification/internal/service/routestate"
)
func TestPing(t *testing.T) {
store := newTestStore(t)
if err := store.Ping(context.Background()); err != nil {
t.Fatalf("ping: %v", err)
}
}
func TestCreateAcceptanceAndReads(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
pushRoute := newPendingRoute(notification.NotificationID, "push:user-1", intentstream.ChannelPush, "user-1", now)
emailRoute := newPendingRoute(notification.NotificationID, "email:user-1", intentstream.ChannelEmail, "user-1", now)
idem := newIdempotency(notification, now)
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{pushRoute, emailRoute},
Idempotency: idem,
}); err != nil {
t.Fatalf("create acceptance: %v", err)
}
gotNotification, found, err := store.GetNotification(ctx, notification.NotificationID)
if err != nil || !found {
t.Fatalf("get notification: found=%v err=%v", found, err)
}
if gotNotification.PayloadJSON != notification.PayloadJSON {
t.Fatalf("notification payload mismatch: got %q want %q", gotNotification.PayloadJSON, notification.PayloadJSON)
}
if len(gotNotification.RecipientUserIDs) != 1 || gotNotification.RecipientUserIDs[0] != "user-1" {
t.Fatalf("recipient_user_ids round-trip: %#v", gotNotification.RecipientUserIDs)
}
gotIdem, found, err := store.GetIdempotency(ctx, notification.Producer, notification.IdempotencyKey)
if err != nil || !found {
t.Fatalf("get idempotency: found=%v err=%v", found, err)
}
if gotIdem.NotificationID != notification.NotificationID {
t.Fatalf("idempotency notification id mismatch: got %q want %q", gotIdem.NotificationID, notification.NotificationID)
}
if !gotIdem.ExpiresAt.Equal(idem.ExpiresAt) {
t.Fatalf("idempotency expires_at mismatch: got %v want %v", gotIdem.ExpiresAt, idem.ExpiresAt)
}
gotRoute, found, err := store.GetRoute(ctx, notification.NotificationID, pushRoute.RouteID)
if err != nil || !found {
t.Fatalf("get push route: found=%v err=%v", found, err)
}
if gotRoute.Channel != intentstream.ChannelPush {
t.Fatalf("push route channel mismatch: got %q", gotRoute.Channel)
}
if !gotRoute.NextAttemptAt.Equal(pushRoute.NextAttemptAt) {
t.Fatalf("push route next_attempt_at mismatch: got %v want %v", gotRoute.NextAttemptAt, pushRoute.NextAttemptAt)
}
}
func TestCreateAcceptanceIdempotencyConflict(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
route := newPendingRoute(notification.NotificationID, "push:user-1", intentstream.ChannelPush, "user-1", now)
first := acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, now),
}
if err := store.CreateAcceptance(ctx, first); err != nil {
t.Fatalf("first acceptance: %v", err)
}
clone := notification
clone.NotificationID = "n-2"
cloneRoute := route
cloneRoute.NotificationID = clone.NotificationID
clone.AcceptedAt = now.Add(time.Second)
clone.UpdatedAt = clone.AcceptedAt
cloneIdem := newIdempotency(clone, now.Add(time.Second))
cloneIdem.IdempotencyKey = notification.IdempotencyKey
err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: clone,
Routes: []acceptintent.NotificationRoute{cloneRoute},
Idempotency: cloneIdem,
})
if !errors.Is(err, acceptintent.ErrConflict) {
t.Fatalf("expected acceptintent.ErrConflict, got %v", err)
}
}
func TestListDueRoutes(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
base := time.Now().UTC().Truncate(time.Millisecond)
pastNotification := newNotification(t, "past", base)
pastRoute := newPendingRoute(pastNotification.NotificationID, "push:past", intentstream.ChannelPush, "user-1", base.Add(-time.Minute))
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: pastNotification,
Routes: []acceptintent.NotificationRoute{pastRoute},
Idempotency: newIdempotency(pastNotification, base),
}); err != nil {
t.Fatalf("acceptance past: %v", err)
}
futureNotification := newNotification(t, "future", base)
futureNotification.IdempotencyKey = "key-future"
futureRoute := newPendingRoute(futureNotification.NotificationID, "push:future", intentstream.ChannelPush, "user-2", base.Add(time.Hour))
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: futureNotification,
Routes: []acceptintent.NotificationRoute{futureRoute},
Idempotency: newIdempotency(futureNotification, base),
}); err != nil {
t.Fatalf("acceptance future: %v", err)
}
due, err := store.ListDueRoutes(ctx, base, 10)
if err != nil {
t.Fatalf("list due routes: %v", err)
}
if len(due) != 1 {
t.Fatalf("expected one due route, got %d", len(due))
}
if due[0].NotificationID != "past" || due[0].RouteID != "push:past" {
t.Fatalf("unexpected due route: %#v", due[0])
}
}
func TestCompleteRoutePublishedHappyPath(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
route := newPendingRoute(notification.NotificationID, "email:user-1", intentstream.ChannelEmail, "user-1", now)
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, now),
}); err != nil {
t.Fatalf("acceptance: %v", err)
}
publishedAt := now.Add(time.Second)
err := store.CompleteRoutePublished(ctx, routestate.CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: "token",
PublishedAt: publishedAt,
Stream: "mail:delivery_commands",
StreamValues: map[string]any{"k": "v"},
})
if err != nil {
t.Fatalf("complete published: %v", err)
}
got, _, err := store.GetRoute(ctx, route.NotificationID, route.RouteID)
if err != nil {
t.Fatalf("get route: %v", err)
}
if got.Status != acceptintent.RouteStatusPublished {
t.Fatalf("expected status published, got %q", got.Status)
}
if got.AttemptCount != 1 {
t.Fatalf("expected attempt_count 1, got %d", got.AttemptCount)
}
if !got.NextAttemptAt.IsZero() {
t.Fatalf("expected next_attempt_at cleared, got %v", got.NextAttemptAt)
}
if !got.PublishedAt.Equal(publishedAt) {
t.Fatalf("expected published_at %v, got %v", publishedAt, got.PublishedAt)
}
}
func TestCompleteRoutePublishedConflictOnUpdatedAtMismatch(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
route := newPendingRoute(notification.NotificationID, "email:user-1", intentstream.ChannelEmail, "user-1", now)
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, now),
}); err != nil {
t.Fatalf("acceptance: %v", err)
}
stale := route
stale.UpdatedAt = now.Add(-time.Minute) // mismatch on purpose
err := store.CompleteRoutePublished(ctx, routestate.CompleteRoutePublishedInput{
ExpectedRoute: stale,
LeaseToken: "token",
PublishedAt: now.Add(time.Second),
Stream: "mail:delivery_commands",
StreamValues: map[string]any{"k": "v"},
})
if !errors.Is(err, routestate.ErrConflict) {
t.Fatalf("expected routestate.ErrConflict, got %v", err)
}
}
func TestCompleteRouteFailedReschedule(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
route := newPendingRoute(notification.NotificationID, "email:user-1", intentstream.ChannelEmail, "user-1", now)
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, now),
}); err != nil {
t.Fatalf("acceptance: %v", err)
}
failedAt := now.Add(time.Second)
nextAttemptAt := now.Add(2 * time.Minute)
err := store.CompleteRouteFailed(ctx, routestate.CompleteRouteFailedInput{
ExpectedRoute: route,
LeaseToken: "token",
FailedAt: failedAt,
NextAttemptAt: nextAttemptAt,
FailureClassification: "smtp_temporary_failure",
FailureMessage: "graylisted",
})
if err != nil {
t.Fatalf("complete failed: %v", err)
}
got, _, err := store.GetRoute(ctx, route.NotificationID, route.RouteID)
if err != nil {
t.Fatalf("get route: %v", err)
}
if got.Status != acceptintent.RouteStatusFailed {
t.Fatalf("expected status failed, got %q", got.Status)
}
if got.AttemptCount != 1 {
t.Fatalf("expected attempt_count 1, got %d", got.AttemptCount)
}
if !got.NextAttemptAt.Equal(nextAttemptAt) {
t.Fatalf("expected next_attempt_at %v, got %v", nextAttemptAt, got.NextAttemptAt)
}
if got.LastErrorClassification != "smtp_temporary_failure" {
t.Fatalf("expected error classification, got %q", got.LastErrorClassification)
}
}
func TestCompleteRouteDeadLetter(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
route := newPendingRoute(notification.NotificationID, "email:user-1", intentstream.ChannelEmail, "user-1", now)
route.MaxAttempts = 1 // single attempt budget so the first failure is terminal.
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, now),
}); err != nil {
t.Fatalf("acceptance: %v", err)
}
deadAt := now.Add(time.Second)
err := store.CompleteRouteDeadLetter(ctx, routestate.CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: "token",
DeadLetteredAt: deadAt,
FailureClassification: "smtp_permanent_failure",
FailureMessage: "rejected",
RecoveryHint: "manual review",
})
if err != nil {
t.Fatalf("complete dead letter: %v", err)
}
got, _, err := store.GetRoute(ctx, route.NotificationID, route.RouteID)
if err != nil {
t.Fatalf("get route: %v", err)
}
if got.Status != acceptintent.RouteStatusDeadLetter {
t.Fatalf("expected status dead_letter, got %q", got.Status)
}
if !got.DeadLetteredAt.Equal(deadAt) {
t.Fatalf("expected dead_lettered_at %v, got %v", deadAt, got.DeadLetteredAt)
}
// Check that the dead_letters audit row was inserted.
row := store.db.QueryRow(`SELECT failure_classification, recovery_hint FROM dead_letters WHERE notification_id = $1 AND route_id = $2`,
route.NotificationID, route.RouteID)
var classification string
var hint string
if err := row.Scan(&classification, &hint); err != nil {
t.Fatalf("scan dead_letter row: %v", err)
}
if classification != "smtp_permanent_failure" || hint != "manual review" {
t.Fatalf("dead_letter row mismatch: classification=%q hint=%q", classification, hint)
}
}
func TestReadRouteScheduleSnapshot(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
base := time.Now().UTC().Truncate(time.Millisecond)
for index, offset := range []time.Duration{-time.Hour, time.Minute, 2 * time.Minute} {
notification := newNotification(t, idString("n-", index), base)
notification.IdempotencyKey = idString("key-", index)
route := newPendingRoute(notification.NotificationID, idString("push:user-", index), intentstream.ChannelPush, idString("user-", index), base.Add(offset))
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, base),
}); err != nil {
t.Fatalf("acceptance %d: %v", index, err)
}
}
snap, err := store.ReadRouteScheduleSnapshot(ctx)
if err != nil {
t.Fatalf("read snapshot: %v", err)
}
if snap.Depth != 3 {
t.Fatalf("expected depth 3, got %d", snap.Depth)
}
if snap.OldestScheduledFor == nil {
t.Fatalf("expected oldest scheduled time, got nil")
}
if !snap.OldestScheduledFor.Equal(base.Add(-time.Hour)) {
t.Fatalf("expected oldest %v, got %v", base.Add(-time.Hour), *snap.OldestScheduledFor)
}
}
func TestMalformedIntentRecordAndGet(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
entry := malformedintent.Entry{
StreamEntryID: "stream-1",
NotificationType: "game.turn.ready",
Producer: "game-master",
IdempotencyKey: "key-1",
FailureCode: malformedintent.FailureCodeInvalidPayload,
FailureMessage: "decode failed",
RawFields: map[string]any{"raw_payload": "abc"},
RecordedAt: now,
}
if err := store.Record(ctx, entry); err != nil {
t.Fatalf("record malformed: %v", err)
}
// idempotent re-record
if err := store.Record(ctx, entry); err != nil {
t.Fatalf("record malformed twice: %v", err)
}
got, found, err := store.GetMalformedIntent(ctx, entry.StreamEntryID)
if err != nil || !found {
t.Fatalf("get malformed: found=%v err=%v", found, err)
}
if got.FailureCode != malformedintent.FailureCodeInvalidPayload {
t.Fatalf("failure_code mismatch: %q", got.FailureCode)
}
if got.FailureMessage != entry.FailureMessage {
t.Fatalf("failure_message mismatch: %q", got.FailureMessage)
}
}
func TestRetentionDeletesAndCascade(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
old := time.Now().UTC().Add(-30 * 24 * time.Hour).Truncate(time.Millisecond)
fresh := time.Now().UTC().Truncate(time.Millisecond)
oldNotification := newNotification(t, "old", old)
oldNotification.IdempotencyKey = "key-old"
oldRoute := newPendingRoute(oldNotification.NotificationID, "push:user-old", intentstream.ChannelPush, "user-old", old)
oldRoute.MaxAttempts = 1
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: oldNotification,
Routes: []acceptintent.NotificationRoute{oldRoute},
Idempotency: newIdempotency(oldNotification, old),
}); err != nil {
t.Fatalf("acceptance old: %v", err)
}
if err := store.CompleteRouteDeadLetter(ctx, routestate.CompleteRouteDeadLetterInput{
ExpectedRoute: oldRoute,
LeaseToken: "token",
DeadLetteredAt: old.Add(time.Second),
FailureClassification: "smtp_permanent_failure",
FailureMessage: "rejected",
}); err != nil {
t.Fatalf("dead letter old: %v", err)
}
freshNotification := newNotification(t, "fresh", fresh)
freshNotification.IdempotencyKey = "key-fresh"
freshRoute := newPendingRoute(freshNotification.NotificationID, "push:user-fresh", intentstream.ChannelPush, "user-fresh", fresh)
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: freshNotification,
Routes: []acceptintent.NotificationRoute{freshRoute},
Idempotency: newIdempotency(freshNotification, fresh),
}); err != nil {
t.Fatalf("acceptance fresh: %v", err)
}
cutoff := time.Now().UTC().Add(-7 * 24 * time.Hour)
deleted, err := store.DeleteRecordsOlderThan(ctx, cutoff)
if err != nil {
t.Fatalf("delete records: %v", err)
}
if deleted != 1 {
t.Fatalf("expected 1 deleted, got %d", deleted)
}
if _, found, err := store.GetNotification(ctx, "old"); err != nil || found {
t.Fatalf("old notification should be gone: found=%v err=%v", found, err)
}
// Confirm cascade emptied routes/dead_letters for old notification.
var routeCount int
if err := store.db.QueryRow(`SELECT COUNT(*) FROM routes WHERE notification_id = 'old'`).Scan(&routeCount); err != nil {
t.Fatalf("count routes: %v", err)
}
if routeCount != 0 {
t.Fatalf("expected 0 cascaded routes, got %d", routeCount)
}
var deadCount int
if err := store.db.QueryRow(`SELECT COUNT(*) FROM dead_letters WHERE notification_id = 'old'`).Scan(&deadCount); err != nil {
t.Fatalf("count dead letters: %v", err)
}
if deadCount != 0 {
t.Fatalf("expected 0 cascaded dead letters, got %d", deadCount)
}
// Fresh notification stays.
if _, found, err := store.GetNotification(ctx, "fresh"); err != nil || !found {
t.Fatalf("fresh notification missing: found=%v err=%v", found, err)
}
}
func TestDeleteMalformedIntentsOlderThan(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
old := time.Now().UTC().Add(-30 * 24 * time.Hour).Truncate(time.Millisecond)
fresh := time.Now().UTC().Truncate(time.Millisecond)
oldEntry := malformedintent.Entry{
StreamEntryID: "stream-old",
FailureCode: malformedintent.FailureCodeInvalidPayload,
FailureMessage: "decode failed",
RawFields: map[string]any{},
RecordedAt: old,
}
if err := store.Record(ctx, oldEntry); err != nil {
t.Fatalf("record old: %v", err)
}
freshEntry := malformedintent.Entry{
StreamEntryID: "stream-fresh",
FailureCode: malformedintent.FailureCodeInvalidPayload,
FailureMessage: "decode failed",
RawFields: map[string]any{},
RecordedAt: fresh,
}
if err := store.Record(ctx, freshEntry); err != nil {
t.Fatalf("record fresh: %v", err)
}
cutoff := time.Now().UTC().Add(-7 * 24 * time.Hour)
deleted, err := store.DeleteMalformedIntentsOlderThan(ctx, cutoff)
if err != nil {
t.Fatalf("delete: %v", err)
}
if deleted != 1 {
t.Fatalf("expected 1 deleted, got %d", deleted)
}
if _, found, err := store.GetMalformedIntent(ctx, "stream-old"); err != nil || found {
t.Fatalf("old malformed intent should be gone: found=%v err=%v", found, err)
}
if _, found, err := store.GetMalformedIntent(ctx, "stream-fresh"); err != nil || !found {
t.Fatalf("fresh malformed intent missing: found=%v err=%v", found, err)
}
}
// ---- helpers ----
func newNotification(t testing.TB, id string, occurred time.Time) acceptintent.NotificationRecord {
t.Helper()
return acceptintent.NotificationRecord{
NotificationID: id,
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"a":1}`,
IdempotencyKey: "key-" + id,
RequestFingerprint: "fp-" + id,
OccurredAt: occurred,
AcceptedAt: occurred,
UpdatedAt: occurred,
}
}
func newIdempotency(record acceptintent.NotificationRecord, createdAt time.Time) acceptintent.IdempotencyRecord {
return acceptintent.IdempotencyRecord{
Producer: record.Producer,
IdempotencyKey: record.IdempotencyKey,
NotificationID: record.NotificationID,
RequestFingerprint: record.RequestFingerprint,
CreatedAt: createdAt,
ExpiresAt: createdAt.Add(7 * 24 * time.Hour),
}
}
func newPendingRoute(notificationID string, routeID string, channel intentstream.Channel, recipient string, dueAt time.Time) acceptintent.NotificationRoute {
return acceptintent.NotificationRoute{
NotificationID: notificationID,
RouteID: routeID,
Channel: channel,
RecipientRef: "user:" + recipient,
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 3,
NextAttemptAt: dueAt,
ResolvedEmail: recipient + "@example.com",
ResolvedLocale: "en",
CreatedAt: dueAt,
UpdatedAt: dueAt,
}
}
func idString(prefix string, index int) string {
switch index {
case 0:
return prefix + "0"
case 1:
return prefix + "1"
case 2:
return prefix + "2"
default:
return prefix + "n"
}
}
@@ -0,0 +1,86 @@
// Package routepublisher composes one PostgreSQL-backed route-state store
// (notificationstore) with one Redis-backed lease store (redisstate.LeaseStore)
// behind the publisher worker contracts. The composition lets push and email
// publishers keep their existing one-store dependency while Stage 5 of
// `PG_PLAN.md` splits durable state to PostgreSQL and the short-lived
// per-replica exclusivity lease to Redis.
package routepublisher
import (
"context"
"errors"
"time"
"galaxy/notification/internal/adapters/postgres/notificationstore"
"galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/routestate"
"galaxy/notification/internal/telemetry"
)
// Store delegates each route-publisher method to either the durable state
// store (PostgreSQL) or the lease store (Redis), preserving the umbrella
// contract consumed by `worker.PushPublisher` and `worker.EmailPublisher`.
type Store struct {
state *notificationstore.Store
leases *redisstate.LeaseStore
}
// New constructs one composite route-publisher store. Both dependencies are
// required: the SQL store owns route lifecycle and dead-letter persistence,
// and the lease store owns the short-lived per-replica exclusivity hint
// retained on Redis per PG_PLAN.md §5.
func New(state *notificationstore.Store, leases *redisstate.LeaseStore) (*Store, error) {
if state == nil {
return nil, errors.New("new route publisher store: nil notification state store")
}
if leases == nil {
return nil, errors.New("new route publisher store: nil lease store")
}
return &Store{state: state, leases: leases}, nil
}
// ListDueRoutes delegates to the SQL store.
func (store *Store) ListDueRoutes(ctx context.Context, now time.Time, limit int64) ([]routestate.ScheduledRoute, error) {
return store.state.ListDueRoutes(ctx, now, limit)
}
// TryAcquireRouteLease delegates to the Redis lease store.
func (store *Store) TryAcquireRouteLease(ctx context.Context, notificationID string, routeID string, token string, ttl time.Duration) (bool, error) {
return store.leases.TryAcquireRouteLease(ctx, notificationID, routeID, token, ttl)
}
// ReleaseRouteLease delegates to the Redis lease store.
func (store *Store) ReleaseRouteLease(ctx context.Context, notificationID string, routeID string, token string) error {
return store.leases.ReleaseRouteLease(ctx, notificationID, routeID, token)
}
// GetNotification delegates to the SQL store.
func (store *Store) GetNotification(ctx context.Context, notificationID string) (acceptintent.NotificationRecord, bool, error) {
return store.state.GetNotification(ctx, notificationID)
}
// GetRoute delegates to the SQL store.
func (store *Store) GetRoute(ctx context.Context, notificationID string, routeID string) (acceptintent.NotificationRoute, bool, error) {
return store.state.GetRoute(ctx, notificationID, routeID)
}
// CompleteRoutePublished delegates to the SQL store.
func (store *Store) CompleteRoutePublished(ctx context.Context, input routestate.CompleteRoutePublishedInput) error {
return store.state.CompleteRoutePublished(ctx, input)
}
// CompleteRouteFailed delegates to the SQL store.
func (store *Store) CompleteRouteFailed(ctx context.Context, input routestate.CompleteRouteFailedInput) error {
return store.state.CompleteRouteFailed(ctx, input)
}
// CompleteRouteDeadLetter delegates to the SQL store.
func (store *Store) CompleteRouteDeadLetter(ctx context.Context, input routestate.CompleteRouteDeadLetterInput) error {
return store.state.CompleteRouteDeadLetter(ctx, input)
}
// ReadRouteScheduleSnapshot delegates to the SQL store.
func (store *Store) ReadRouteScheduleSnapshot(ctx context.Context) (telemetry.RouteScheduleSnapshot, error) {
return store.state.ReadRouteScheduleSnapshot(ctx)
}
+11 -16
View File
@@ -1,5 +1,6 @@
// Package redisadapter provides the Redis client helpers used by Notification
// Service runtime wiring.
// Service runtime wiring. The helpers wrap `pkg/redisconn` so the runtime
// keeps the same construction surface across the Stage 5 migration.
package redisadapter
import (
@@ -8,27 +9,21 @@ import (
"galaxy/notification/internal/config"
"galaxy/notification/internal/telemetry"
"galaxy/redisconn"
"github.com/redis/go-redis/extra/redisotel/v9"
"github.com/redis/go-redis/v9"
)
// NewClient constructs one Redis client from cfg.
// NewClient constructs one Redis client from cfg using the shared
// `pkg/redisconn` helper, which enforces the master/replica/password env-var
// shape.
func NewClient(cfg config.RedisConfig) *redis.Client {
return redis.NewClient(&redis.Options{
Addr: cfg.Addr,
Username: cfg.Username,
Password: cfg.Password,
DB: cfg.DB,
TLSConfig: cfg.TLSConfig(),
DialTimeout: cfg.OperationTimeout,
ReadTimeout: cfg.OperationTimeout,
WriteTimeout: cfg.OperationTimeout,
})
return redisconn.NewMasterClient(cfg.Conn)
}
// InstrumentClient attaches Redis tracing and metrics exporters to client when
// telemetryRuntime is available.
// InstrumentClient attaches Redis tracing and metrics exporters to client
// when telemetryRuntime is available.
func InstrumentClient(client *redis.Client, telemetryRuntime *telemetry.Runtime) error {
if client == nil {
return fmt.Errorf("instrument redis client: nil client")
@@ -55,13 +50,13 @@ func InstrumentClient(client *redis.Client, telemetryRuntime *telemetry.Runtime)
}
// Ping performs the startup Redis connectivity check bounded by
// cfg.OperationTimeout.
// cfg.Conn.OperationTimeout.
func Ping(ctx context.Context, cfg config.RedisConfig, client *redis.Client) error {
if client == nil {
return fmt.Errorf("ping redis: nil client")
}
pingCtx, cancel := context.WithTimeout(ctx, cfg.OperationTimeout)
pingCtx, cancel := context.WithTimeout(ctx, cfg.Conn.OperationTimeout)
defer cancel()
if err := client.Ping(pingCtx).Err(); err != nil {
@@ -1,140 +0,0 @@
package redisstate
import (
"context"
"errors"
"fmt"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"github.com/redis/go-redis/v9"
)
// AcceptanceStore provides the Redis-backed durable storage used by the
// intent-acceptance use case.
type AcceptanceStore struct {
client *redis.Client
writer *AtomicWriter
keys Keyspace
cfg AcceptanceConfig
}
// NewAcceptanceStore constructs one Redis-backed acceptance store.
func NewAcceptanceStore(client *redis.Client, cfg AcceptanceConfig) (*AcceptanceStore, error) {
if client == nil {
return nil, errors.New("new notification acceptance store: nil redis client")
}
writer, err := NewAtomicWriter(client, cfg)
if err != nil {
return nil, fmt.Errorf("new notification acceptance store: %w", err)
}
return &AcceptanceStore{
client: client,
writer: writer,
keys: Keyspace{},
cfg: cfg,
}, nil
}
// CreateAcceptance stores one complete accepted notification write set in
// Redis.
func (store *AcceptanceStore) CreateAcceptance(ctx context.Context, input acceptintent.CreateAcceptanceInput) error {
if store == nil || store.client == nil || store.writer == nil {
return errors.New("create notification acceptance: nil store")
}
if ctx == nil {
return errors.New("create notification acceptance: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("create notification acceptance: %w", err)
}
err := store.writer.CreateAcceptance(ctx, input)
if errors.Is(err, ErrConflict) {
return fmt.Errorf("create notification acceptance: %w", acceptintent.ErrConflict)
}
if err != nil {
return fmt.Errorf("create notification acceptance: %w", err)
}
return nil
}
// GetIdempotency loads one accepted idempotency scope from Redis.
func (store *AcceptanceStore) GetIdempotency(ctx context.Context, producer intentstream.Producer, idempotencyKey string) (acceptintent.IdempotencyRecord, bool, error) {
if store == nil || store.client == nil {
return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil store")
}
if ctx == nil {
return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil context")
}
payload, err := store.client.Get(ctx, store.keys.Idempotency(producer, idempotencyKey)).Bytes()
switch {
case errors.Is(err, redis.Nil):
return acceptintent.IdempotencyRecord{}, false, nil
case err != nil:
return acceptintent.IdempotencyRecord{}, false, fmt.Errorf("get notification idempotency: %w", err)
}
record, err := UnmarshalIdempotency(payload)
if err != nil {
return acceptintent.IdempotencyRecord{}, false, fmt.Errorf("get notification idempotency: %w", err)
}
return record, true, nil
}
// GetNotification loads one accepted notification record from Redis.
func (store *AcceptanceStore) GetNotification(ctx context.Context, notificationID string) (acceptintent.NotificationRecord, bool, error) {
if store == nil || store.client == nil {
return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil store")
}
if ctx == nil {
return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil context")
}
payload, err := store.client.Get(ctx, store.keys.Notification(notificationID)).Bytes()
switch {
case errors.Is(err, redis.Nil):
return acceptintent.NotificationRecord{}, false, nil
case err != nil:
return acceptintent.NotificationRecord{}, false, fmt.Errorf("get notification record: %w", err)
}
record, err := UnmarshalNotification(payload)
if err != nil {
return acceptintent.NotificationRecord{}, false, fmt.Errorf("get notification record: %w", err)
}
return record, true, nil
}
// GetRoute loads one accepted notification route by NotificationID and
// RouteID.
func (store *AcceptanceStore) GetRoute(ctx context.Context, notificationID string, routeID string) (acceptintent.NotificationRoute, bool, error) {
if store == nil || store.client == nil {
return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil store")
}
if ctx == nil {
return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil context")
}
payload, err := store.client.Get(ctx, store.keys.Route(notificationID, routeID)).Bytes()
switch {
case errors.Is(err, redis.Nil):
return acceptintent.NotificationRoute{}, false, nil
case err != nil:
return acceptintent.NotificationRoute{}, false, fmt.Errorf("get notification route: %w", err)
}
record, err := UnmarshalRoute(payload)
if err != nil {
return acceptintent.NotificationRoute{}, false, fmt.Errorf("get notification route: %w", err)
}
return record, true, nil
}
@@ -1,311 +0,0 @@
package redisstate
import (
"context"
"io"
"log/slog"
"testing"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/config"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/malformedintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/require"
)
func TestAcceptanceStoreCreateAcceptancePersistsNotificationRoutesAndSchedule(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validAdminAcceptanceInput(now)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
notificationRecord, found, err := store.GetNotification(context.Background(), input.Notification.NotificationID)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, input.Notification.NotificationID, notificationRecord.NotificationID)
idempotencyRecord, found, err := store.GetIdempotency(context.Background(), input.Idempotency.Producer, input.Idempotency.IdempotencyKey)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, input.Idempotency.RequestFingerprint, idempotencyRecord.RequestFingerprint)
pushRoutePayload, err := client.Get(context.Background(), Keyspace{}.Route(input.Notification.NotificationID, "push:email:owner@example.com")).Bytes()
require.NoError(t, err)
pushRoute, err := UnmarshalRoute(pushRoutePayload)
require.NoError(t, err)
require.Equal(t, acceptintent.RouteStatusSkipped, pushRoute.Status)
emailRouteKey := Keyspace{}.Route(input.Notification.NotificationID, "email:email:owner@example.com")
emailRoutePayload, err := client.Get(context.Background(), emailRouteKey).Bytes()
require.NoError(t, err)
emailRoute, err := UnmarshalRoute(emailRoutePayload)
require.NoError(t, err)
require.Equal(t, acceptintent.RouteStatusPending, emailRoute.Status)
scheduled, err := client.ZRangeWithScores(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
require.NoError(t, err)
require.Len(t, scheduled, 1)
require.Equal(t, emailRouteKey, scheduled[0].Member)
require.Equal(t, float64(now.UnixMilli()), scheduled[0].Score)
notificationTTL, err := client.PTTL(context.Background(), Keyspace{}.Notification(input.Notification.NotificationID)).Result()
require.NoError(t, err)
require.Greater(t, notificationTTL, 23*time.Hour)
require.LessOrEqual(t, notificationTTL, 24*time.Hour)
routeTTL, err := client.PTTL(context.Background(), emailRouteKey).Result()
require.NoError(t, err)
require.Greater(t, routeTTL, 23*time.Hour)
require.LessOrEqual(t, routeTTL, 24*time.Hour)
idempotencyTTL, err := client.PTTL(context.Background(), Keyspace{}.Idempotency(input.Idempotency.Producer, input.Idempotency.IdempotencyKey)).Result()
require.NoError(t, err)
require.Greater(t, idempotencyTTL, 6*24*time.Hour)
require.LessOrEqual(t, idempotencyTTL, 7*24*time.Hour)
}
func TestMalformedIntentStoreRecordPersistsEntry(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewMalformedIntentStore(client, 72*time.Hour)
require.NoError(t, err)
entry := malformedintent.Entry{
StreamEntryID: "1775121700000-0",
NotificationType: "game.turn.ready",
Producer: "game_master",
IdempotencyKey: "game-123:turn-54",
FailureCode: malformedintent.FailureCodeInvalidPayload,
FailureMessage: "payload_json.turn_number is required",
RawFields: map[string]any{
"notification_type": "game.turn.ready",
},
RecordedAt: time.UnixMilli(1775121700000).UTC(),
}
require.NoError(t, store.Record(context.Background(), entry))
payload, err := client.Get(context.Background(), Keyspace{}.MalformedIntent(entry.StreamEntryID)).Bytes()
require.NoError(t, err)
recordedEntry, err := UnmarshalMalformedIntent(payload)
require.NoError(t, err)
require.Equal(t, entry.StreamEntryID, recordedEntry.StreamEntryID)
require.Equal(t, entry.FailureCode, recordedEntry.FailureCode)
ttl, err := client.PTTL(context.Background(), Keyspace{}.MalformedIntent(entry.StreamEntryID)).Result()
require.NoError(t, err)
require.Greater(t, ttl, 71*time.Hour)
require.LessOrEqual(t, ttl, 72*time.Hour)
}
func TestStreamOffsetStoreLoadAndSave(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewStreamOffsetStore(client)
require.NoError(t, err)
_, found, err := store.Load(context.Background(), "notification:intents")
require.NoError(t, err)
require.False(t, found)
require.NoError(t, store.Save(context.Background(), "notification:intents", "1775121700000-0"))
entryID, found, err := store.Load(context.Background(), "notification:intents")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, "1775121700000-0", entryID)
}
func TestIntentStreamLagReaderReadsOldestUnprocessedEntry(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewStreamOffsetStore(client)
require.NoError(t, err)
reader, err := NewIntentStreamLagReader(store, "notification:intents")
require.NoError(t, err)
firstID, err := client.XAdd(context.Background(), &redis.XAddArgs{
Stream: "notification:intents",
ID: "1775121700000-0",
Values: map[string]any{"payload": "first"},
}).Result()
require.NoError(t, err)
secondID, err := client.XAdd(context.Background(), &redis.XAddArgs{
Stream: "notification:intents",
ID: "1775121701000-0",
Values: map[string]any{"payload": "second"},
}).Result()
require.NoError(t, err)
snapshot, err := reader.ReadIntentStreamLagSnapshot(context.Background())
require.NoError(t, err)
require.NotNil(t, snapshot.OldestUnprocessedAt)
require.Equal(t, time.UnixMilli(1775121700000).UTC(), *snapshot.OldestUnprocessedAt)
require.NoError(t, store.Save(context.Background(), "notification:intents", firstID))
snapshot, err = reader.ReadIntentStreamLagSnapshot(context.Background())
require.NoError(t, err)
require.NotNil(t, snapshot.OldestUnprocessedAt)
require.Equal(t, time.UnixMilli(1775121701000).UTC(), *snapshot.OldestUnprocessedAt)
require.NoError(t, store.Save(context.Background(), "notification:intents", secondID))
snapshot, err = reader.ReadIntentStreamLagSnapshot(context.Background())
require.NoError(t, err)
require.Nil(t, snapshot.OldestUnprocessedAt)
}
func TestAcceptanceStoreWorksWithAcceptIntentService(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
service, err := acceptintent.New(acceptintent.Config{
Store: store,
UserDirectory: staticUserDirectory{},
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
AdminRouting: config.AdminRoutingConfig{
LobbyApplicationSubmitted: []string{"owner@example.com"},
},
})
require.NoError(t, err)
result, err := service.Execute(context.Background(), acceptintent.AcceptInput{
NotificationID: "1775121700000-0",
Intent: intentstream.Intent{
NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
Producer: intentstream.ProducerGameLobby,
AudienceKind: intentstream.AudienceKindAdminEmail,
IdempotencyKey: "game-456:application-submitted:user-42",
OccurredAt: time.UnixMilli(1775121700002).UTC(),
PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`,
},
})
require.NoError(t, err)
require.Equal(t, acceptintent.OutcomeAccepted, result.Outcome)
record, found, err := store.GetNotification(context.Background(), "1775121700000-0")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, "1775121700000-0", record.NotificationID)
}
type fixedClock struct {
now time.Time
}
func (clock fixedClock) Now() time.Time {
return clock.now
}
func validAdminAcceptanceInput(now time.Time) acceptintent.CreateAcceptanceInput {
return acceptintent.CreateAcceptanceInput{
Notification: acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeLobbyApplicationSubmitted,
Producer: intentstream.ProducerGameLobby,
AudienceKind: intentstream.AudienceKindAdminEmail,
PayloadJSON: `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-456","game_name":"Orion Front"}`,
IdempotencyKey: "game-456:application-submitted:user-42",
RequestFingerprint: "sha256:deadbeef",
OccurredAt: now,
AcceptedAt: now,
UpdatedAt: now,
},
Routes: []acceptintent.NotificationRoute{
{
NotificationID: "1775121700000-0",
RouteID: "push:email:owner@example.com",
Channel: intentstream.ChannelPush,
RecipientRef: "email:owner@example.com",
Status: acceptintent.RouteStatusSkipped,
AttemptCount: 0,
MaxAttempts: 3,
ResolvedEmail: "owner@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
SkippedAt: now,
},
{
NotificationID: "1775121700000-0",
RouteID: "email:email:owner@example.com",
Channel: intentstream.ChannelEmail,
RecipientRef: "email:owner@example.com",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "owner@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
},
Idempotency: acceptintent.IdempotencyRecord{
Producer: intentstream.ProducerGameLobby,
IdempotencyKey: "game-456:application-submitted:user-42",
NotificationID: "1775121700000-0",
RequestFingerprint: "sha256:deadbeef",
CreatedAt: now,
ExpiresAt: now.Add(7 * 24 * time.Hour),
},
}
}
func newTestRedisClient(t *testing.T, server *miniredis.Miniredis) *redis.Client {
t.Helper()
client := redis.NewClient(&redis.Options{
Addr: server.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
require.NoError(t, client.Close())
})
return client
}
type staticUserDirectory struct{}
func (staticUserDirectory) GetUserByID(context.Context, string) (acceptintent.UserRecord, error) {
return acceptintent.UserRecord{}, acceptintent.ErrRecipientNotFound
}
@@ -1,157 +0,0 @@
package redisstate
import (
"context"
"errors"
"fmt"
"time"
"galaxy/notification/internal/service/acceptintent"
"github.com/redis/go-redis/v9"
)
// AcceptanceConfig stores the retention settings applied to accepted durable
// notification state.
type AcceptanceConfig struct {
// RecordTTL stores the retention period applied to notification and route
// records.
RecordTTL time.Duration
// DeadLetterTTL stores the retention period applied to route dead-letter
// entries.
DeadLetterTTL time.Duration
// IdempotencyTTL stores the retention period applied to idempotency
// reservations.
IdempotencyTTL time.Duration
}
// Validate reports whether cfg contains usable retention settings.
func (cfg AcceptanceConfig) Validate() error {
switch {
case cfg.RecordTTL <= 0:
return fmt.Errorf("record ttl must be positive")
case cfg.DeadLetterTTL <= 0:
return fmt.Errorf("dead-letter ttl must be positive")
case cfg.IdempotencyTTL <= 0:
return fmt.Errorf("idempotency ttl must be positive")
default:
return nil
}
}
// AtomicWriter performs the minimal multi-key Redis mutations required by
// notification intent acceptance.
type AtomicWriter struct {
client *redis.Client
keys Keyspace
cfg AcceptanceConfig
}
// NewAtomicWriter constructs a low-level Redis mutation helper.
func NewAtomicWriter(client *redis.Client, cfg AcceptanceConfig) (*AtomicWriter, error) {
if client == nil {
return nil, errors.New("new notification redis atomic writer: nil client")
}
if err := cfg.Validate(); err != nil {
return nil, fmt.Errorf("new notification redis atomic writer: %w", err)
}
return &AtomicWriter{
client: client,
keys: Keyspace{},
cfg: cfg,
}, nil
}
// CreateAcceptance stores one notification record, all derived routes, and
// the matching idempotency reservation in one optimistic Redis transaction.
func (writer *AtomicWriter) CreateAcceptance(ctx context.Context, input acceptintent.CreateAcceptanceInput) error {
if writer == nil || writer.client == nil {
return errors.New("create notification acceptance in redis: nil writer")
}
if ctx == nil {
return errors.New("create notification acceptance in redis: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("create notification acceptance in redis: %w", err)
}
notificationPayload, err := MarshalNotification(input.Notification)
if err != nil {
return fmt.Errorf("create notification acceptance in redis: %w", err)
}
idempotencyPayload, err := MarshalIdempotency(input.Idempotency)
if err != nil {
return fmt.Errorf("create notification acceptance in redis: %w", err)
}
routePayloads := make([][]byte, len(input.Routes))
routeKeys := make([]string, len(input.Routes))
scheduledRouteKeys := make([]string, 0, len(input.Routes))
scheduledRouteScores := make([]float64, 0, len(input.Routes))
for index, route := range input.Routes {
payload, err := MarshalRoute(route)
if err != nil {
return fmt.Errorf("create notification acceptance in redis: route %d: %w", index, err)
}
routePayloads[index] = payload
routeKeys[index] = writer.keys.Route(route.NotificationID, route.RouteID)
if route.Status == acceptintent.RouteStatusPending {
scheduledRouteKeys = append(scheduledRouteKeys, routeKeys[index])
scheduledRouteScores = append(scheduledRouteScores, float64(route.NextAttemptAt.UTC().UnixMilli()))
}
}
notificationKey := writer.keys.Notification(input.Notification.NotificationID)
idempotencyKey := writer.keys.Idempotency(input.Idempotency.Producer, input.Idempotency.IdempotencyKey)
watchKeys := append([]string{notificationKey, idempotencyKey}, routeKeys...)
watchErr := writer.client.Watch(ctx, func(tx *redis.Tx) error {
for _, key := range watchKeys {
if err := ensureKeyAbsent(ctx, tx, key); err != nil {
return err
}
}
_, err := tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
pipe.Set(ctx, notificationKey, notificationPayload, writer.cfg.RecordTTL)
pipe.Set(ctx, idempotencyKey, idempotencyPayload, writer.cfg.IdempotencyTTL)
for index, routeKey := range routeKeys {
pipe.Set(ctx, routeKey, routePayloads[index], writer.cfg.RecordTTL)
}
for index, routeKey := range scheduledRouteKeys {
pipe.ZAdd(ctx, writer.keys.RouteSchedule(), redis.Z{
Score: scheduledRouteScores[index],
Member: routeKey,
})
}
return nil
})
return err
}, watchKeys...)
switch {
case errors.Is(watchErr, ErrConflict), errors.Is(watchErr, redis.TxFailedErr):
return ErrConflict
case watchErr != nil:
return fmt.Errorf("create notification acceptance in redis: %w", watchErr)
default:
return nil
}
}
func ensureKeyAbsent(ctx context.Context, tx *redis.Tx, key string) error {
exists, err := tx.Exists(ctx, key).Result()
if err != nil {
return err
}
if exists > 0 {
return ErrConflict
}
return nil
}
@@ -6,10 +6,6 @@ import (
"fmt"
"io"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/malformedintent"
)
// StreamOffset stores the persisted progress of the plain-XREAD intent
@@ -18,412 +14,14 @@ type StreamOffset struct {
// Stream stores the Redis Stream name.
Stream string
// LastProcessedEntryID stores the last durably processed Redis Stream entry
// identifier.
// LastProcessedEntryID stores the last durably processed Redis Stream
// entry identifier.
LastProcessedEntryID string
// UpdatedAt stores when the offset record was last updated.
UpdatedAt time.Time
}
// DeadLetterEntry stores one terminal route-publication failure recorded for
// later operator inspection.
type DeadLetterEntry struct {
// NotificationID stores the owning notification identifier.
NotificationID string
// RouteID stores the exhausted route identifier.
RouteID string
// Channel stores the failed route channel.
Channel intentstream.Channel
// RecipientRef stores the stable failed recipient slot identifier.
RecipientRef string
// FinalAttemptCount stores how many publication attempts were consumed.
FinalAttemptCount int
// MaxAttempts stores the configured retry budget for Channel.
MaxAttempts int
// FailureClassification stores the stable classified failure reason.
FailureClassification string
// FailureMessage stores the last failure detail.
FailureMessage string
// CreatedAt stores when the route moved to dead_letter.
CreatedAt time.Time
// RecoveryHint stores the optional operator-facing recovery hint.
RecoveryHint string
}
type notificationRecordJSON struct {
NotificationID string `json:"notification_id"`
NotificationType intentstream.NotificationType `json:"notification_type"`
Producer intentstream.Producer `json:"producer"`
AudienceKind intentstream.AudienceKind `json:"audience_kind"`
RecipientUserIDs []string `json:"recipient_user_ids,omitempty"`
PayloadJSON string `json:"payload_json"`
IdempotencyKey string `json:"idempotency_key"`
RequestFingerprint string `json:"request_fingerprint"`
RequestID string `json:"request_id,omitempty"`
TraceID string `json:"trace_id,omitempty"`
OccurredAtMS int64 `json:"occurred_at_ms"`
AcceptedAtMS int64 `json:"accepted_at_ms"`
UpdatedAtMS int64 `json:"updated_at_ms"`
}
type notificationRouteJSON struct {
NotificationID string `json:"notification_id"`
RouteID string `json:"route_id"`
Channel intentstream.Channel `json:"channel"`
RecipientRef string `json:"recipient_ref"`
Status acceptintent.RouteStatus `json:"status"`
AttemptCount int `json:"attempt_count"`
MaxAttempts int `json:"max_attempts"`
NextAttemptAtMS *int64 `json:"next_attempt_at_ms,omitempty"`
ResolvedEmail string `json:"resolved_email,omitempty"`
ResolvedLocale string `json:"resolved_locale,omitempty"`
LastErrorClassification string `json:"last_error_classification,omitempty"`
LastErrorMessage string `json:"last_error_message,omitempty"`
LastErrorAtMS *int64 `json:"last_error_at_ms,omitempty"`
CreatedAtMS int64 `json:"created_at_ms"`
UpdatedAtMS int64 `json:"updated_at_ms"`
PublishedAtMS *int64 `json:"published_at_ms,omitempty"`
DeadLetteredAtMS *int64 `json:"dead_lettered_at_ms,omitempty"`
SkippedAtMS *int64 `json:"skipped_at_ms,omitempty"`
}
type idempotencyRecordJSON struct {
Producer intentstream.Producer `json:"producer"`
IdempotencyKey string `json:"idempotency_key"`
NotificationID string `json:"notification_id"`
RequestFingerprint string `json:"request_fingerprint"`
CreatedAtMS int64 `json:"created_at_ms"`
ExpiresAtMS int64 `json:"expires_at_ms"`
}
type malformedIntentJSON struct {
StreamEntryID string `json:"stream_entry_id"`
NotificationType string `json:"notification_type,omitempty"`
Producer string `json:"producer,omitempty"`
IdempotencyKey string `json:"idempotency_key,omitempty"`
FailureCode malformedintent.FailureCode `json:"failure_code"`
FailureMessage string `json:"failure_message"`
RawFields map[string]any `json:"raw_fields_json"`
RecordedAtMS int64 `json:"recorded_at_ms"`
}
type streamOffsetJSON struct {
Stream string `json:"stream"`
LastProcessedEntryID string `json:"last_processed_entry_id"`
UpdatedAtMS int64 `json:"updated_at_ms"`
}
type deadLetterEntryJSON struct {
NotificationID string `json:"notification_id"`
RouteID string `json:"route_id"`
Channel intentstream.Channel `json:"channel"`
RecipientRef string `json:"recipient_ref"`
FinalAttemptCount int `json:"final_attempt_count"`
MaxAttempts int `json:"max_attempts"`
FailureClassification string `json:"failure_classification"`
FailureMessage string `json:"failure_message"`
CreatedAtMS int64 `json:"created_at_ms"`
RecoveryHint string `json:"recovery_hint,omitempty"`
}
// MarshalNotification marshals one notification record into the strict JSON
// representation owned by Notification Service.
func MarshalNotification(record acceptintent.NotificationRecord) ([]byte, error) {
if err := record.Validate(); err != nil {
return nil, fmt.Errorf("marshal notification record: %w", err)
}
return marshalStrictJSON(notificationRecordJSON{
NotificationID: record.NotificationID,
NotificationType: record.NotificationType,
Producer: record.Producer,
AudienceKind: record.AudienceKind,
RecipientUserIDs: append([]string(nil), record.RecipientUserIDs...),
PayloadJSON: record.PayloadJSON,
IdempotencyKey: record.IdempotencyKey,
RequestFingerprint: record.RequestFingerprint,
RequestID: record.RequestID,
TraceID: record.TraceID,
OccurredAtMS: unixMilli(record.OccurredAt),
AcceptedAtMS: unixMilli(record.AcceptedAt),
UpdatedAtMS: unixMilli(record.UpdatedAt),
})
}
// UnmarshalNotification unmarshals one strict JSON notification record.
func UnmarshalNotification(payload []byte) (acceptintent.NotificationRecord, error) {
var wire notificationRecordJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return acceptintent.NotificationRecord{}, fmt.Errorf("unmarshal notification record: %w", err)
}
record := acceptintent.NotificationRecord{
NotificationID: wire.NotificationID,
NotificationType: wire.NotificationType,
Producer: wire.Producer,
AudienceKind: wire.AudienceKind,
RecipientUserIDs: append([]string(nil), wire.RecipientUserIDs...),
PayloadJSON: wire.PayloadJSON,
IdempotencyKey: wire.IdempotencyKey,
RequestFingerprint: wire.RequestFingerprint,
RequestID: wire.RequestID,
TraceID: wire.TraceID,
OccurredAt: time.UnixMilli(wire.OccurredAtMS).UTC(),
AcceptedAt: time.UnixMilli(wire.AcceptedAtMS).UTC(),
UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(),
}
if err := record.Validate(); err != nil {
return acceptintent.NotificationRecord{}, fmt.Errorf("unmarshal notification record: %w", err)
}
return record, nil
}
// MarshalRoute marshals one notification route into the strict JSON
// representation owned by Notification Service.
func MarshalRoute(route acceptintent.NotificationRoute) ([]byte, error) {
if err := route.Validate(); err != nil {
return nil, fmt.Errorf("marshal notification route: %w", err)
}
return marshalStrictJSON(notificationRouteJSON{
NotificationID: route.NotificationID,
RouteID: route.RouteID,
Channel: route.Channel,
RecipientRef: route.RecipientRef,
Status: route.Status,
AttemptCount: route.AttemptCount,
MaxAttempts: route.MaxAttempts,
NextAttemptAtMS: optionalUnixMilli(route.NextAttemptAt),
ResolvedEmail: route.ResolvedEmail,
ResolvedLocale: route.ResolvedLocale,
LastErrorClassification: route.LastErrorClassification,
LastErrorMessage: route.LastErrorMessage,
LastErrorAtMS: optionalUnixMilli(route.LastErrorAt),
CreatedAtMS: unixMilli(route.CreatedAt),
UpdatedAtMS: unixMilli(route.UpdatedAt),
PublishedAtMS: optionalUnixMilli(route.PublishedAt),
DeadLetteredAtMS: optionalUnixMilli(route.DeadLetteredAt),
SkippedAtMS: optionalUnixMilli(route.SkippedAt),
})
}
// UnmarshalRoute unmarshals one strict JSON notification route.
func UnmarshalRoute(payload []byte) (acceptintent.NotificationRoute, error) {
var wire notificationRouteJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return acceptintent.NotificationRoute{}, fmt.Errorf("unmarshal notification route: %w", err)
}
route := acceptintent.NotificationRoute{
NotificationID: wire.NotificationID,
RouteID: wire.RouteID,
Channel: wire.Channel,
RecipientRef: wire.RecipientRef,
Status: wire.Status,
AttemptCount: wire.AttemptCount,
MaxAttempts: wire.MaxAttempts,
ResolvedEmail: wire.ResolvedEmail,
ResolvedLocale: wire.ResolvedLocale,
LastErrorClassification: wire.LastErrorClassification,
LastErrorMessage: wire.LastErrorMessage,
CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(),
UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(),
}
if wire.NextAttemptAtMS != nil {
route.NextAttemptAt = time.UnixMilli(*wire.NextAttemptAtMS).UTC()
}
if wire.LastErrorAtMS != nil {
route.LastErrorAt = time.UnixMilli(*wire.LastErrorAtMS).UTC()
}
if wire.PublishedAtMS != nil {
route.PublishedAt = time.UnixMilli(*wire.PublishedAtMS).UTC()
}
if wire.DeadLetteredAtMS != nil {
route.DeadLetteredAt = time.UnixMilli(*wire.DeadLetteredAtMS).UTC()
}
if wire.SkippedAtMS != nil {
route.SkippedAt = time.UnixMilli(*wire.SkippedAtMS).UTC()
}
if err := route.Validate(); err != nil {
return acceptintent.NotificationRoute{}, fmt.Errorf("unmarshal notification route: %w", err)
}
return route, nil
}
// MarshalIdempotency marshals one idempotency record into the strict JSON
// representation owned by Notification Service.
func MarshalIdempotency(record acceptintent.IdempotencyRecord) ([]byte, error) {
if err := record.Validate(); err != nil {
return nil, fmt.Errorf("marshal notification idempotency record: %w", err)
}
return marshalStrictJSON(idempotencyRecordJSON{
Producer: record.Producer,
IdempotencyKey: record.IdempotencyKey,
NotificationID: record.NotificationID,
RequestFingerprint: record.RequestFingerprint,
CreatedAtMS: unixMilli(record.CreatedAt),
ExpiresAtMS: unixMilli(record.ExpiresAt),
})
}
// UnmarshalIdempotency unmarshals one strict JSON idempotency record.
func UnmarshalIdempotency(payload []byte) (acceptintent.IdempotencyRecord, error) {
var wire idempotencyRecordJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return acceptintent.IdempotencyRecord{}, fmt.Errorf("unmarshal notification idempotency record: %w", err)
}
record := acceptintent.IdempotencyRecord{
Producer: wire.Producer,
IdempotencyKey: wire.IdempotencyKey,
NotificationID: wire.NotificationID,
RequestFingerprint: wire.RequestFingerprint,
CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(),
ExpiresAt: time.UnixMilli(wire.ExpiresAtMS).UTC(),
}
if err := record.Validate(); err != nil {
return acceptintent.IdempotencyRecord{}, fmt.Errorf("unmarshal notification idempotency record: %w", err)
}
return record, nil
}
// MarshalDeadLetter marshals one dead-letter entry into the strict JSON
// representation owned by Notification Service.
func MarshalDeadLetter(entry DeadLetterEntry) ([]byte, error) {
if err := entry.Validate(); err != nil {
return nil, fmt.Errorf("marshal dead letter entry: %w", err)
}
return marshalStrictJSON(deadLetterEntryJSON{
NotificationID: entry.NotificationID,
RouteID: entry.RouteID,
Channel: entry.Channel,
RecipientRef: entry.RecipientRef,
FinalAttemptCount: entry.FinalAttemptCount,
MaxAttempts: entry.MaxAttempts,
FailureClassification: entry.FailureClassification,
FailureMessage: entry.FailureMessage,
CreatedAtMS: unixMilli(entry.CreatedAt),
RecoveryHint: entry.RecoveryHint,
})
}
// UnmarshalDeadLetter unmarshals one strict JSON dead-letter entry.
func UnmarshalDeadLetter(payload []byte) (DeadLetterEntry, error) {
var wire deadLetterEntryJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return DeadLetterEntry{}, fmt.Errorf("unmarshal dead letter entry: %w", err)
}
entry := DeadLetterEntry{
NotificationID: wire.NotificationID,
RouteID: wire.RouteID,
Channel: wire.Channel,
RecipientRef: wire.RecipientRef,
FinalAttemptCount: wire.FinalAttemptCount,
MaxAttempts: wire.MaxAttempts,
FailureClassification: wire.FailureClassification,
FailureMessage: wire.FailureMessage,
CreatedAt: time.UnixMilli(wire.CreatedAtMS).UTC(),
RecoveryHint: wire.RecoveryHint,
}
if err := entry.Validate(); err != nil {
return DeadLetterEntry{}, fmt.Errorf("unmarshal dead letter entry: %w", err)
}
return entry, nil
}
// MarshalMalformedIntent marshals one malformed-intent entry into the strict
// JSON representation owned by Notification Service.
func MarshalMalformedIntent(entry malformedintent.Entry) ([]byte, error) {
if err := entry.Validate(); err != nil {
return nil, fmt.Errorf("marshal malformed intent: %w", err)
}
return marshalStrictJSON(malformedIntentJSON{
StreamEntryID: entry.StreamEntryID,
NotificationType: entry.NotificationType,
Producer: entry.Producer,
IdempotencyKey: entry.IdempotencyKey,
FailureCode: entry.FailureCode,
FailureMessage: entry.FailureMessage,
RawFields: cloneJSONObject(entry.RawFields),
RecordedAtMS: unixMilli(entry.RecordedAt),
})
}
// UnmarshalMalformedIntent unmarshals one strict JSON malformed-intent entry.
func UnmarshalMalformedIntent(payload []byte) (malformedintent.Entry, error) {
var wire malformedIntentJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return malformedintent.Entry{}, fmt.Errorf("unmarshal malformed intent: %w", err)
}
entry := malformedintent.Entry{
StreamEntryID: wire.StreamEntryID,
NotificationType: wire.NotificationType,
Producer: wire.Producer,
IdempotencyKey: wire.IdempotencyKey,
FailureCode: wire.FailureCode,
FailureMessage: wire.FailureMessage,
RawFields: cloneJSONObject(wire.RawFields),
RecordedAt: time.UnixMilli(wire.RecordedAtMS).UTC(),
}
if err := entry.Validate(); err != nil {
return malformedintent.Entry{}, fmt.Errorf("unmarshal malformed intent: %w", err)
}
return entry, nil
}
// MarshalStreamOffset marshals one stream-offset record into the strict JSON
// representation owned by Notification Service.
func MarshalStreamOffset(offset StreamOffset) ([]byte, error) {
if err := offset.Validate(); err != nil {
return nil, fmt.Errorf("marshal stream offset: %w", err)
}
return marshalStrictJSON(streamOffsetJSON{
Stream: offset.Stream,
LastProcessedEntryID: offset.LastProcessedEntryID,
UpdatedAtMS: unixMilli(offset.UpdatedAt),
})
}
// UnmarshalStreamOffset unmarshals one strict JSON stream-offset record.
func UnmarshalStreamOffset(payload []byte) (StreamOffset, error) {
var wire streamOffsetJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return StreamOffset{}, fmt.Errorf("unmarshal stream offset: %w", err)
}
offset := StreamOffset{
Stream: wire.Stream,
LastProcessedEntryID: wire.LastProcessedEntryID,
UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(),
}
if err := offset.Validate(); err != nil {
return StreamOffset{}, fmt.Errorf("unmarshal stream offset: %w", err)
}
return offset, nil
}
// Validate reports whether offset contains a complete persisted consumer
// progress record.
func (offset StreamOffset) Validate() error {
@@ -446,43 +44,43 @@ func (offset StreamOffset) Validate() error {
return nil
}
// Validate reports whether entry contains a complete dead-letter record.
func (entry DeadLetterEntry) Validate() error {
if entry.NotificationID == "" {
return fmt.Errorf("dead letter entry notification id must not be empty")
}
if entry.RouteID == "" {
return fmt.Errorf("dead letter entry route id must not be empty")
}
if !entry.Channel.IsKnown() {
return fmt.Errorf("dead letter entry channel %q is unsupported", entry.Channel)
}
if entry.RecipientRef == "" {
return fmt.Errorf("dead letter entry recipient ref must not be empty")
}
if entry.FinalAttemptCount <= 0 {
return fmt.Errorf("dead letter entry final attempt count must be positive")
}
if entry.MaxAttempts <= 0 {
return fmt.Errorf("dead letter entry max attempts must be positive")
}
if entry.FailureClassification == "" {
return fmt.Errorf("dead letter entry failure classification must not be empty")
}
if entry.FailureMessage == "" {
return fmt.Errorf("dead letter entry failure message must not be empty")
}
if entry.CreatedAt.IsZero() {
return fmt.Errorf("dead letter entry created at must not be zero")
}
if !entry.CreatedAt.Equal(entry.CreatedAt.UTC()) {
return fmt.Errorf("dead letter entry created at must be UTC")
}
if !entry.CreatedAt.Equal(entry.CreatedAt.Truncate(time.Millisecond)) {
return fmt.Errorf("dead letter entry created at must use millisecond precision")
type streamOffsetJSON struct {
Stream string `json:"stream"`
LastProcessedEntryID string `json:"last_processed_entry_id"`
UpdatedAtMS int64 `json:"updated_at_ms"`
}
// MarshalStreamOffset marshals one stream-offset record into the strict JSON
// representation owned by Notification Service.
func MarshalStreamOffset(offset StreamOffset) ([]byte, error) {
if err := offset.Validate(); err != nil {
return nil, fmt.Errorf("marshal stream offset: %w", err)
}
return nil
return marshalStrictJSON(streamOffsetJSON{
Stream: offset.Stream,
LastProcessedEntryID: offset.LastProcessedEntryID,
UpdatedAtMS: offset.UpdatedAt.UTC().UnixMilli(),
})
}
// UnmarshalStreamOffset unmarshals one strict JSON stream-offset record.
func UnmarshalStreamOffset(payload []byte) (StreamOffset, error) {
var wire streamOffsetJSON
if err := unmarshalStrictJSON(payload, &wire); err != nil {
return StreamOffset{}, fmt.Errorf("unmarshal stream offset: %w", err)
}
offset := StreamOffset{
Stream: wire.Stream,
LastProcessedEntryID: wire.LastProcessedEntryID,
UpdatedAt: time.UnixMilli(wire.UpdatedAtMS).UTC(),
}
if err := offset.Validate(); err != nil {
return StreamOffset{}, fmt.Errorf("unmarshal stream offset: %w", err)
}
return offset, nil
}
func marshalStrictJSON(value any) ([]byte, error) {
@@ -505,43 +103,3 @@ func unmarshalStrictJSON(payload []byte, target any) error {
return nil
}
func unixMilli(value time.Time) int64 {
return value.UTC().UnixMilli()
}
func optionalUnixMilli(value time.Time) *int64 {
if value.IsZero() {
return nil
}
millis := unixMilli(value)
return &millis
}
func cloneJSONObject(value map[string]any) map[string]any {
if value == nil {
return map[string]any{}
}
cloned := make(map[string]any, len(value))
for key, raw := range value {
cloned[key] = cloneJSONValue(raw)
}
return cloned
}
func cloneJSONValue(value any) any {
switch typed := value.(type) {
case map[string]any:
return cloneJSONObject(typed)
case []any:
cloned := make([]any, len(typed))
for index, item := range typed {
cloned[index] = cloneJSONValue(item)
}
return cloned
default:
return typed
}
}
@@ -1,10 +1,10 @@
package redisstate
import "errors"
import "galaxy/notification/internal/service/routestate"
var (
// ErrConflict reports that a Redis mutation could not be applied because
// one of the watched or newly created keys already existed or changed
// concurrently.
ErrConflict = errors.New("redis state conflict")
)
// ErrConflict reports that a Redis mutation could not be applied because
// one of the watched or newly created keys already existed or changed
// concurrently. Aliased to routestate.ErrConflict so the publisher
// boundary uses one stable sentinel regardless of which storage backend
// drives the mutation.
var ErrConflict = routestate.ErrConflict
@@ -2,79 +2,25 @@ package redisstate
import (
"encoding/base64"
"fmt"
"strings"
"galaxy/notification/internal/api/intentstream"
)
const defaultPrefix = "notification:"
// Keyspace builds the frozen Notification Service Redis keys. All dynamic key
// segments are encoded with base64url so raw key structure does not depend on
// caller-provided characters.
// Keyspace builds the Notification Service Redis keys retained after the
// Stage 5 PostgreSQL migration: only the route lease, the persisted stream
// offset, and the inbound intent stream key are managed here. Durable
// notification state lives in the `notification` PostgreSQL schema.
//
// Dynamic key segments are encoded with base64url so raw key structure
// does not depend on caller-provided characters.
type Keyspace struct{}
// Notification returns the primary Redis key for one notification_record.
func (Keyspace) Notification(notificationID string) string {
return defaultPrefix + "records:" + encodeKeyComponent(notificationID)
}
// Route returns the primary Redis key for one notification_route.
func (Keyspace) Route(notificationID string, routeID string) string {
return defaultPrefix + "routes:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID)
}
// ParseRoute returns the notification identifier and route identifier encoded
// inside routeKey.
func (Keyspace) ParseRoute(routeKey string) (string, string, error) {
trimmedPrefix := defaultPrefix + "routes:"
if !strings.HasPrefix(routeKey, trimmedPrefix) {
return "", "", fmt.Errorf("parse route key: %q does not use %q prefix", routeKey, trimmedPrefix)
}
encoded := strings.TrimPrefix(routeKey, trimmedPrefix)
parts := strings.Split(encoded, ":")
if len(parts) != 2 {
return "", "", fmt.Errorf("parse route key: %q must contain exactly two encoded segments", routeKey)
}
notificationID, err := decodeKeyComponent(parts[0])
if err != nil {
return "", "", fmt.Errorf("parse route key: notification id: %w", err)
}
routeID, err := decodeKeyComponent(parts[1])
if err != nil {
return "", "", fmt.Errorf("parse route key: route id: %w", err)
}
return notificationID, routeID, nil
}
// Idempotency returns the primary Redis key for one
// notification_idempotency_record.
func (Keyspace) Idempotency(producer intentstream.Producer, idempotencyKey string) string {
return defaultPrefix + "idempotency:" + encodeKeyComponent(string(producer)) + ":" + encodeKeyComponent(idempotencyKey)
}
// DeadLetter returns the primary Redis key for one
// notification_dead_letter_entry.
func (Keyspace) DeadLetter(notificationID string, routeID string) string {
return defaultPrefix + "dead_letters:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID)
}
// RouteLease returns the temporary Redis key used to coordinate exclusive
// publication of one notification_route across replicas.
func (Keyspace) RouteLease(notificationID string, routeID string) string {
return defaultPrefix + "route_leases:" + encodeKeyComponent(notificationID) + ":" + encodeKeyComponent(routeID)
}
// MalformedIntent returns the primary Redis key for one malformed-intent
// record.
func (Keyspace) MalformedIntent(streamEntryID string) string {
return defaultPrefix + "malformed_intents:" + encodeKeyComponent(streamEntryID)
}
// StreamOffset returns the primary Redis key for one persisted intent-consumer
// offset.
func (Keyspace) StreamOffset(stream string) string {
@@ -86,20 +32,6 @@ func (Keyspace) Intents() string {
return defaultPrefix + "intents"
}
// RouteSchedule returns the frozen route schedule sorted-set key.
func (Keyspace) RouteSchedule() string {
return defaultPrefix + "route_schedule"
}
func encodeKeyComponent(value string) string {
return base64.RawURLEncoding.EncodeToString([]byte(value))
}
func decodeKeyComponent(value string) (string, error) {
decoded, err := base64.RawURLEncoding.DecodeString(value)
if err != nil {
return "", err
}
return string(decoded), nil
}
@@ -0,0 +1,108 @@
package redisstate
import (
"context"
"errors"
"fmt"
"time"
"github.com/redis/go-redis/v9"
)
// releaseRouteLeaseScript releases the route lease only when the supplied
// token still owns it. The Lua script gates the DEL on the SET value match
// so a publisher that lost the lease (TTL expiry, replica swap) cannot
// clear another worker's claim.
var releaseRouteLeaseScript = redis.NewScript(`
if redis.call("GET", KEYS[1]) == ARGV[1] then
return redis.call("DEL", KEYS[1])
end
return 0
`)
// LeaseStore owns the short-lived route lease keys that coordinate exclusive
// route publication across replicas. The lease lives on Redis as a per-route
// SETNX-with-TTL token; releasing it requires the same token via a Lua
// script that compares the stored value before deleting it.
//
// LeaseStore is intentionally separate from the durable route-state storage
// so the publishers can compose one storage-layer adapter (PostgreSQL since
// Stage 5) with the runtime-coordination layer that stays on Redis per
// `ARCHITECTURE.md §Persistence Backends`.
type LeaseStore struct {
client *redis.Client
keys Keyspace
}
// NewLeaseStore constructs one Redis-backed lease store.
func NewLeaseStore(client *redis.Client) (*LeaseStore, error) {
if client == nil {
return nil, errors.New("new notification lease store: nil redis client")
}
return &LeaseStore{client: client, keys: Keyspace{}}, nil
}
// TryAcquireRouteLease attempts to acquire one temporary route lease owned
// by token for ttl. The lease is stored at the route-lease keyspace key and
// auto-expires; a publisher whose work outlives the TTL must accept that
// another replica may pick the route up.
func (store *LeaseStore) TryAcquireRouteLease(ctx context.Context, notificationID string, routeID string, token string, ttl time.Duration) (bool, error) {
if store == nil || store.client == nil {
return false, errors.New("try acquire route lease: nil store")
}
if ctx == nil {
return false, errors.New("try acquire route lease: nil context")
}
if notificationID == "" {
return false, errors.New("try acquire route lease: notification id must not be empty")
}
if routeID == "" {
return false, errors.New("try acquire route lease: route id must not be empty")
}
if token == "" {
return false, errors.New("try acquire route lease: token must not be empty")
}
if ttl <= 0 {
return false, errors.New("try acquire route lease: ttl must be positive")
}
acquired, err := store.client.SetNX(ctx, store.keys.RouteLease(notificationID, routeID), token, ttl).Result()
if err != nil {
return false, fmt.Errorf("try acquire route lease: %w", err)
}
return acquired, nil
}
// ReleaseRouteLease releases one temporary route lease only when token still
// matches the stored owner value. Releasing a lease the caller no longer
// owns is a silent no-op.
func (store *LeaseStore) ReleaseRouteLease(ctx context.Context, notificationID string, routeID string, token string) error {
if store == nil || store.client == nil {
return errors.New("release route lease: nil store")
}
if ctx == nil {
return errors.New("release route lease: nil context")
}
if notificationID == "" {
return errors.New("release route lease: notification id must not be empty")
}
if routeID == "" {
return errors.New("release route lease: route id must not be empty")
}
if token == "" {
return errors.New("release route lease: token must not be empty")
}
if err := releaseRouteLeaseScript.Run(
ctx,
store.client,
[]string{store.keys.RouteLease(notificationID, routeID)},
token,
).Err(); err != nil {
return fmt.Errorf("release route lease: %w", err)
}
return nil
}
@@ -1,59 +0,0 @@
package redisstate
import (
"context"
"errors"
"fmt"
"time"
"galaxy/notification/internal/service/malformedintent"
"github.com/redis/go-redis/v9"
)
// MalformedIntentStore provides the Redis-backed storage used for
// operator-visible malformed-intent records.
type MalformedIntentStore struct {
client *redis.Client
keys Keyspace
ttl time.Duration
}
// NewMalformedIntentStore constructs one Redis-backed malformed-intent store.
func NewMalformedIntentStore(client *redis.Client, ttl time.Duration) (*MalformedIntentStore, error) {
if client == nil {
return nil, errors.New("new malformed intent store: nil redis client")
}
if ttl <= 0 {
return nil, errors.New("new malformed intent store: non-positive ttl")
}
return &MalformedIntentStore{
client: client,
keys: Keyspace{},
ttl: ttl,
}, nil
}
// Record stores entry idempotently by its Redis Stream entry identifier.
func (store *MalformedIntentStore) Record(ctx context.Context, entry malformedintent.Entry) error {
if store == nil || store.client == nil {
return errors.New("record malformed intent: nil store")
}
if ctx == nil {
return errors.New("record malformed intent: nil context")
}
if err := entry.Validate(); err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
payload, err := MarshalMalformedIntent(entry)
if err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
if err := store.client.Set(ctx, store.keys.MalformedIntent(entry.StreamEntryID), payload, store.ttl).Err(); err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
return nil
}
@@ -1,657 +0,0 @@
package redisstate
import (
"bytes"
"context"
"errors"
"fmt"
"sort"
"strconv"
"time"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/telemetry"
"github.com/redis/go-redis/v9"
)
var releaseRouteLeaseScript = redis.NewScript(`
if redis.call("GET", KEYS[1]) == ARGV[1] then
return redis.call("DEL", KEYS[1])
end
return 0
`)
var completePublishedRouteScript = redis.NewScript(`
if redis.call("GET", KEYS[1]) ~= ARGV[1] then
return 0
end
if redis.call("GET", KEYS[2]) ~= ARGV[2] then
return 0
end
local field_count = tonumber(ARGV[6])
local values = {}
local index = 7
for _ = 1, field_count do
table.insert(values, ARGV[index])
table.insert(values, ARGV[index + 1])
index = index + 2
end
if tonumber(ARGV[4]) > 0 then
redis.call("XADD", ARGV[3], "MAXLEN", "~", ARGV[4], "*", unpack(values))
else
redis.call("XADD", ARGV[3], "*", unpack(values))
end
redis.call("SET", KEYS[1], ARGV[5], "KEEPTTL")
redis.call("ZREM", KEYS[3], KEYS[1])
redis.call("DEL", KEYS[2])
return 1
`)
// ScheduledRoute stores one due route reference loaded from
// `notification:route_schedule`.
type ScheduledRoute struct {
// RouteKey stores the full Redis route key scheduled for processing.
RouteKey string
// NotificationID stores the owning notification identifier.
NotificationID string
// RouteID stores the scheduled route identifier.
RouteID string
}
// CompleteRoutePublishedInput stores the data required to mark one route as
// published while atomically appending one outbound stream entry.
type CompleteRoutePublishedInput struct {
// ExpectedRoute stores the current route state previously loaded by the
// caller.
ExpectedRoute acceptintent.NotificationRoute
// LeaseToken stores the route-lease owner token that must still be held.
LeaseToken string
// PublishedAt stores when the publication attempt succeeded.
PublishedAt time.Time
// Stream stores the outbound Redis Stream name.
Stream string
// StreamMaxLen bounds Stream with approximate trimming when positive. Zero
// disables trimming.
StreamMaxLen int64
// StreamValues stores the exact Redis Stream fields appended to Stream.
StreamValues map[string]any
}
// CompleteRouteFailedInput stores the data required to record one retryable
// publication failure.
type CompleteRouteFailedInput struct {
// ExpectedRoute stores the current route state previously loaded by the
// caller.
ExpectedRoute acceptintent.NotificationRoute
// LeaseToken stores the route-lease owner token that must still be held.
LeaseToken string
// FailedAt stores when the publication attempt failed.
FailedAt time.Time
// NextAttemptAt stores the next scheduled retry time.
NextAttemptAt time.Time
// FailureClassification stores the classified publication failure kind.
FailureClassification string
// FailureMessage stores the detailed publication failure text.
FailureMessage string
}
// CompleteRouteDeadLetterInput stores the data required to record one
// exhausted publication failure.
type CompleteRouteDeadLetterInput struct {
// ExpectedRoute stores the current route state previously loaded by the
// caller.
ExpectedRoute acceptintent.NotificationRoute
// LeaseToken stores the route-lease owner token that must still be held.
LeaseToken string
// DeadLetteredAt stores when the route exhausted its retry budget.
DeadLetteredAt time.Time
// FailureClassification stores the classified terminal failure kind.
FailureClassification string
// FailureMessage stores the detailed terminal failure text.
FailureMessage string
// RecoveryHint stores the optional operator-facing recovery guidance.
RecoveryHint string
}
// ListDueRoutes loads up to limit scheduled routes whose next-attempt score is
// due at or before now.
func (store *AcceptanceStore) ListDueRoutes(ctx context.Context, now time.Time, limit int64) ([]ScheduledRoute, error) {
if store == nil || store.client == nil {
return nil, errors.New("list due routes: nil store")
}
if ctx == nil {
return nil, errors.New("list due routes: nil context")
}
if err := validateRouteStateTimestamp("list due routes now", now); err != nil {
return nil, err
}
if limit <= 0 {
return nil, errors.New("list due routes: limit must be positive")
}
members, err := store.client.ZRangeByScore(ctx, store.keys.RouteSchedule(), &redis.ZRangeBy{
Min: "-inf",
Max: strconv.FormatInt(now.UnixMilli(), 10),
Count: limit,
}).Result()
if err != nil {
return nil, fmt.Errorf("list due routes: %w", err)
}
routes := make([]ScheduledRoute, 0, len(members))
for _, member := range members {
notificationID, routeID, err := store.keys.ParseRoute(member)
if err != nil {
return nil, fmt.Errorf("list due routes: %w", err)
}
routes = append(routes, ScheduledRoute{
RouteKey: member,
NotificationID: notificationID,
RouteID: routeID,
})
}
return routes, nil
}
// ReadRouteScheduleSnapshot returns the current depth of the durable route
// schedule together with its oldest scheduled timestamp when one exists.
func (store *AcceptanceStore) ReadRouteScheduleSnapshot(ctx context.Context) (telemetry.RouteScheduleSnapshot, error) {
if store == nil || store.client == nil {
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil store")
}
if ctx == nil {
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil context")
}
depth, err := store.client.ZCard(ctx, store.keys.RouteSchedule()).Result()
if err != nil {
return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: depth: %w", err)
}
snapshot := telemetry.RouteScheduleSnapshot{
Depth: depth,
}
if depth == 0 {
return snapshot, nil
}
values, err := store.client.ZRangeWithScores(ctx, store.keys.RouteSchedule(), 0, 0).Result()
if err != nil {
return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: oldest scheduled entry: %w", err)
}
if len(values) == 0 {
return snapshot, nil
}
oldestScheduledFor := time.UnixMilli(int64(values[0].Score)).UTC()
snapshot.OldestScheduledFor = &oldestScheduledFor
return snapshot, nil
}
// TryAcquireRouteLease attempts to acquire one temporary route lease owned by
// token for ttl.
func (store *AcceptanceStore) TryAcquireRouteLease(ctx context.Context, notificationID string, routeID string, token string, ttl time.Duration) (bool, error) {
if store == nil || store.client == nil {
return false, errors.New("try acquire route lease: nil store")
}
if ctx == nil {
return false, errors.New("try acquire route lease: nil context")
}
if notificationID == "" {
return false, errors.New("try acquire route lease: notification id must not be empty")
}
if routeID == "" {
return false, errors.New("try acquire route lease: route id must not be empty")
}
if token == "" {
return false, errors.New("try acquire route lease: token must not be empty")
}
if ttl <= 0 {
return false, errors.New("try acquire route lease: ttl must be positive")
}
acquired, err := store.client.SetNX(ctx, store.keys.RouteLease(notificationID, routeID), token, ttl).Result()
if err != nil {
return false, fmt.Errorf("try acquire route lease: %w", err)
}
return acquired, nil
}
// ReleaseRouteLease releases one temporary route lease only when token still
// matches the stored owner value.
func (store *AcceptanceStore) ReleaseRouteLease(ctx context.Context, notificationID string, routeID string, token string) error {
if store == nil || store.client == nil {
return errors.New("release route lease: nil store")
}
if ctx == nil {
return errors.New("release route lease: nil context")
}
if notificationID == "" {
return errors.New("release route lease: notification id must not be empty")
}
if routeID == "" {
return errors.New("release route lease: route id must not be empty")
}
if token == "" {
return errors.New("release route lease: token must not be empty")
}
if err := releaseRouteLeaseScript.Run(
ctx,
store.client,
[]string{store.keys.RouteLease(notificationID, routeID)},
token,
).Err(); err != nil {
return fmt.Errorf("release route lease: %w", err)
}
return nil
}
// CompleteRoutePublished atomically appends one outbound stream entry and
// marks the corresponding route as published.
func (store *AcceptanceStore) CompleteRoutePublished(ctx context.Context, input CompleteRoutePublishedInput) error {
if store == nil || store.client == nil {
return errors.New("complete route published: nil store")
}
if ctx == nil {
return errors.New("complete route published: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route published: %w", err)
}
updatedRoute := input.ExpectedRoute
updatedRoute.Status = acceptintent.RouteStatusPublished
updatedRoute.AttemptCount++
updatedRoute.NextAttemptAt = time.Time{}
updatedRoute.LastErrorClassification = ""
updatedRoute.LastErrorMessage = ""
updatedRoute.LastErrorAt = time.Time{}
updatedRoute.UpdatedAt = input.PublishedAt
updatedRoute.PublishedAt = input.PublishedAt
updatedRoute.DeadLetteredAt = time.Time{}
payload, err := MarshalRoute(updatedRoute)
if err != nil {
return fmt.Errorf("complete route published: %w", err)
}
expectedPayload, err := MarshalRoute(input.ExpectedRoute)
if err != nil {
return fmt.Errorf("complete route published: %w", err)
}
streamArgs, err := flattenStreamValues(input.StreamValues)
if err != nil {
return fmt.Errorf("complete route published: %w", err)
}
result, err := completePublishedRouteScript.Run(
ctx,
store.client,
[]string{
store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID),
store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID),
store.keys.RouteSchedule(),
},
append([]any{
string(expectedPayload),
input.LeaseToken,
input.Stream,
input.StreamMaxLen,
string(payload),
len(streamArgs) / 2,
}, streamArgs...)...,
).Int()
switch {
case errors.Is(err, redis.Nil):
return ErrConflict
case err != nil:
return err
case result != 1:
return ErrConflict
default:
return nil
}
}
// CompleteRouteFailed atomically records one retryable publication failure and
// reschedules the route.
func (store *AcceptanceStore) CompleteRouteFailed(ctx context.Context, input CompleteRouteFailedInput) error {
if store == nil || store.client == nil {
return errors.New("complete route failed: nil store")
}
if ctx == nil {
return errors.New("complete route failed: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route failed: %w", err)
}
updatedRoute := input.ExpectedRoute
updatedRoute.Status = acceptintent.RouteStatusFailed
updatedRoute.AttemptCount++
updatedRoute.NextAttemptAt = input.NextAttemptAt
updatedRoute.LastErrorClassification = input.FailureClassification
updatedRoute.LastErrorMessage = input.FailureMessage
updatedRoute.LastErrorAt = input.FailedAt
updatedRoute.UpdatedAt = input.FailedAt
payload, err := MarshalRoute(updatedRoute)
if err != nil {
return fmt.Errorf("complete route failed: %w", err)
}
return store.completeRouteMutation(ctx, input.ExpectedRoute, input.LeaseToken, func(pipe redis.Pipeliner) error {
pipe.SetArgs(ctx, store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID), payload, redis.SetArgs{KeepTTL: true})
pipe.ZAdd(ctx, store.keys.RouteSchedule(), redis.Z{
Score: float64(input.NextAttemptAt.UnixMilli()),
Member: store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID),
})
pipe.Del(ctx, store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID))
return nil
})
}
// CompleteRouteDeadLetter atomically records one exhausted publication
// failure, stores the dead-letter entry, and removes the route from the
// retry schedule.
func (store *AcceptanceStore) CompleteRouteDeadLetter(ctx context.Context, input CompleteRouteDeadLetterInput) error {
if store == nil || store.client == nil {
return errors.New("complete route dead letter: nil store")
}
if ctx == nil {
return errors.New("complete route dead letter: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
updatedRoute := input.ExpectedRoute
updatedRoute.Status = acceptintent.RouteStatusDeadLetter
updatedRoute.AttemptCount++
updatedRoute.NextAttemptAt = time.Time{}
updatedRoute.LastErrorClassification = input.FailureClassification
updatedRoute.LastErrorMessage = input.FailureMessage
updatedRoute.LastErrorAt = input.DeadLetteredAt
updatedRoute.UpdatedAt = input.DeadLetteredAt
updatedRoute.DeadLetteredAt = input.DeadLetteredAt
if updatedRoute.AttemptCount < updatedRoute.MaxAttempts {
return fmt.Errorf(
"complete route dead letter: final attempt count %d is below max attempts %d",
updatedRoute.AttemptCount,
updatedRoute.MaxAttempts,
)
}
routePayload, err := MarshalRoute(updatedRoute)
if err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
deadLetterPayload, err := MarshalDeadLetter(DeadLetterEntry{
NotificationID: updatedRoute.NotificationID,
RouteID: updatedRoute.RouteID,
Channel: updatedRoute.Channel,
RecipientRef: updatedRoute.RecipientRef,
FinalAttemptCount: updatedRoute.AttemptCount,
MaxAttempts: updatedRoute.MaxAttempts,
FailureClassification: input.FailureClassification,
FailureMessage: input.FailureMessage,
CreatedAt: input.DeadLetteredAt,
RecoveryHint: input.RecoveryHint,
})
if err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
return store.completeRouteMutation(ctx, input.ExpectedRoute, input.LeaseToken, func(pipe redis.Pipeliner) error {
pipe.SetArgs(ctx, store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID), routePayload, redis.SetArgs{KeepTTL: true})
pipe.Set(ctx, store.keys.DeadLetter(updatedRoute.NotificationID, updatedRoute.RouteID), deadLetterPayload, store.cfg.DeadLetterTTL)
pipe.ZRem(ctx, store.keys.RouteSchedule(), store.keys.Route(updatedRoute.NotificationID, updatedRoute.RouteID))
pipe.Del(ctx, store.keys.RouteLease(updatedRoute.NotificationID, updatedRoute.RouteID))
return nil
})
}
func (store *AcceptanceStore) completeRouteMutation(
ctx context.Context,
expectedRoute acceptintent.NotificationRoute,
leaseToken string,
mutate func(redis.Pipeliner) error,
) error {
routeKey := store.keys.Route(expectedRoute.NotificationID, expectedRoute.RouteID)
leaseKey := store.keys.RouteLease(expectedRoute.NotificationID, expectedRoute.RouteID)
watchErr := store.client.Watch(ctx, func(tx *redis.Tx) error {
currentRoute, err := loadWatchedRoute(ctx, tx, routeKey)
switch {
case errors.Is(err, redis.Nil):
return ErrConflict
case err != nil:
return err
}
if err := ensureRoutesEqual(expectedRoute, currentRoute); err != nil {
return err
}
leaseValue, err := tx.Get(ctx, leaseKey).Result()
switch {
case errors.Is(err, redis.Nil):
return ErrConflict
case err != nil:
return err
case leaseValue != leaseToken:
return ErrConflict
}
_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
return mutate(pipe)
})
return err
}, routeKey, leaseKey)
switch {
case errors.Is(watchErr, ErrConflict), errors.Is(watchErr, redis.TxFailedErr):
return ErrConflict
case watchErr != nil:
return watchErr
default:
return nil
}
}
func loadWatchedRoute(ctx context.Context, tx *redis.Tx, routeKey string) (acceptintent.NotificationRoute, error) {
payload, err := tx.Get(ctx, routeKey).Bytes()
if err != nil {
return acceptintent.NotificationRoute{}, err
}
return UnmarshalRoute(payload)
}
func ensureRoutesEqual(expected acceptintent.NotificationRoute, actual acceptintent.NotificationRoute) error {
expectedPayload, err := MarshalRoute(expected)
if err != nil {
return fmt.Errorf("marshal expected route: %w", err)
}
actualPayload, err := MarshalRoute(actual)
if err != nil {
return fmt.Errorf("marshal current route: %w", err)
}
if !bytes.Equal(expectedPayload, actualPayload) {
return ErrConflict
}
return nil
}
func validateCompletionRoute(route acceptintent.NotificationRoute) error {
if err := route.Validate(); err != nil {
return err
}
switch route.Status {
case acceptintent.RouteStatusPending, acceptintent.RouteStatusFailed:
return nil
default:
return fmt.Errorf("route status %q is not completable", route.Status)
}
}
func validateStreamValues(values map[string]any) error {
if len(values) == 0 {
return fmt.Errorf("stream values must not be empty")
}
for key, raw := range values {
if key == "" {
return fmt.Errorf("stream values key must not be empty")
}
switch typed := raw.(type) {
case string:
if typed == "" {
return fmt.Errorf("stream values %q must not be empty", key)
}
case []byte:
if len(typed) == 0 {
return fmt.Errorf("stream values %q must not be empty", key)
}
default:
return fmt.Errorf("stream values %q must be string or []byte", key)
}
}
return nil
}
func flattenStreamValues(values map[string]any) ([]any, error) {
keys := make([]string, 0, len(values))
for key := range values {
keys = append(keys, key)
}
sort.Strings(keys)
args := make([]any, 0, len(values)*2)
for _, key := range keys {
args = append(args, key, values[key])
}
return args, nil
}
func validateRouteStateTimestamp(name string, value time.Time) error {
if value.IsZero() {
return fmt.Errorf("%s must not be zero", name)
}
if !value.Equal(value.UTC()) {
return fmt.Errorf("%s must be UTC", name)
}
if !value.Equal(value.Truncate(time.Millisecond)) {
return fmt.Errorf("%s must use millisecond precision", name)
}
return nil
}
// Validate reports whether route contains a complete due-route reference.
func (route ScheduledRoute) Validate() error {
if route.RouteKey == "" {
return fmt.Errorf("scheduled route key must not be empty")
}
if route.NotificationID == "" {
return fmt.Errorf("scheduled route notification id must not be empty")
}
if route.RouteID == "" {
return fmt.Errorf("scheduled route route id must not be empty")
}
return nil
}
// Validate reports whether input contains a complete published-route
// transition.
func (input CompleteRoutePublishedInput) Validate() error {
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
return err
}
if input.LeaseToken == "" {
return fmt.Errorf("lease token must not be empty")
}
if err := validateRouteStateTimestamp("published at", input.PublishedAt); err != nil {
return err
}
if input.Stream == "" {
return fmt.Errorf("stream must not be empty")
}
if input.StreamMaxLen < 0 {
return fmt.Errorf("stream max len must not be negative")
}
if err := validateStreamValues(input.StreamValues); err != nil {
return err
}
return nil
}
// Validate reports whether input contains a complete retryable failure
// transition.
func (input CompleteRouteFailedInput) Validate() error {
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
return err
}
if input.LeaseToken == "" {
return fmt.Errorf("lease token must not be empty")
}
if err := validateRouteStateTimestamp("failed at", input.FailedAt); err != nil {
return err
}
if err := validateRouteStateTimestamp("next attempt at", input.NextAttemptAt); err != nil {
return err
}
if input.FailureClassification == "" {
return fmt.Errorf("failure classification must not be empty")
}
if input.FailureMessage == "" {
return fmt.Errorf("failure message must not be empty")
}
return nil
}
// Validate reports whether input contains a complete dead-letter transition.
func (input CompleteRouteDeadLetterInput) Validate() error {
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
return err
}
if input.LeaseToken == "" {
return fmt.Errorf("lease token must not be empty")
}
if err := validateRouteStateTimestamp("dead lettered at", input.DeadLetteredAt); err != nil {
return err
}
if input.FailureClassification == "" {
return fmt.Errorf("failure classification must not be empty")
}
if input.FailureMessage == "" {
return fmt.Errorf("failure message must not be empty")
}
return nil
}
@@ -1,465 +0,0 @@
package redisstate
import (
"context"
"testing"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"github.com/alicebob/miniredis/v2"
"github.com/stretchr/testify/require"
)
func TestAcceptanceStoreListDueRoutesLoadsScheduledMembers(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
require.NoError(t, store.CreateAcceptance(context.Background(), validUserAcceptanceInput(now, 0)))
routes, err := store.ListDueRoutes(context.Background(), now, 10)
require.NoError(t, err)
require.Len(t, routes, 2)
require.ElementsMatch(t, []string{"push:user:user-1", "email:user:user-1"}, []string{routes[0].RouteID, routes[1].RouteID})
for _, route := range routes {
require.NoError(t, route.Validate())
}
}
func TestAcceptanceStoreReadRouteScheduleSnapshot(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
require.NoError(t, store.CreateAcceptance(context.Background(), validUserAcceptanceInput(now, 0)))
snapshot, err := store.ReadRouteScheduleSnapshot(context.Background())
require.NoError(t, err)
require.Equal(t, int64(2), snapshot.Depth)
require.NotNil(t, snapshot.OldestScheduledFor)
require.Equal(t, now, *snapshot.OldestScheduledFor)
}
func TestAcceptanceStoreRouteLeaseAcquireReleaseAndExpire(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
acquired, err := store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-1", 2*time.Second)
require.NoError(t, err)
require.True(t, acquired)
acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-2", 2*time.Second)
require.NoError(t, err)
require.False(t, acquired)
require.NoError(t, store.ReleaseRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-1"))
acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-3", 2*time.Second)
require.NoError(t, err)
require.True(t, acquired)
server.FastForward(3 * time.Second)
acquired, err = store.TryAcquireRouteLease(context.Background(), "1775121700000-0", "push:user:user-1", "token-4", 2*time.Second)
require.NoError(t, err)
require.True(t, acquired)
}
func TestAcceptanceStoreCompleteRoutePublishedAppendsTrimmedStreamEntryAndMarksRoutePublished(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validUserAcceptanceInput(now, 0)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
require.NoError(t, err)
require.True(t, acquired)
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
publishedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
require.NoError(t, store.CompleteRoutePublished(context.Background(), CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: "token-1",
PublishedAt: publishedAt,
Stream: "gateway:client-events",
StreamMaxLen: 1024,
StreamValues: map[string]any{
"user_id": "user-1",
"event_type": "game.turn.ready",
"event_id": input.Notification.NotificationID + "/push:user:user-1",
"payload_bytes": []byte("payload-1"),
"request_id": "request-1",
"trace_id": "trace-1",
},
}))
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusPublished, updatedRoute.Status)
require.Equal(t, 1, updatedRoute.AttemptCount)
require.Equal(t, publishedAt, updatedRoute.PublishedAt)
scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
require.NoError(t, err)
require.Equal(t, []string{Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1")}, scheduled)
messages, err := client.XRange(context.Background(), "gateway:client-events", "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "user-1", messages[0].Values["user_id"])
require.Equal(t, "game.turn.ready", messages[0].Values["event_type"])
leaseKey := Keyspace{}.RouteLease(input.Notification.NotificationID, "push:user:user-1")
_, err = client.Get(context.Background(), leaseKey).Result()
require.Error(t, err)
}
func TestAcceptanceStoreCompleteRoutePublishedAppendsUntrimmedMailCommand(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validUserAcceptanceInput(now, 0)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "email:user:user-1", "token-1", 5*time.Second)
require.NoError(t, err)
require.True(t, acquired)
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "email:user:user-1")
require.NoError(t, err)
require.True(t, found)
publishedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
require.NoError(t, store.CompleteRoutePublished(context.Background(), CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: "token-1",
PublishedAt: publishedAt,
Stream: "mail:delivery_commands",
StreamMaxLen: 0,
StreamValues: map[string]any{
"delivery_id": input.Notification.NotificationID + "/email:user:user-1",
"source": "notification",
"payload_mode": "template",
"idempotency_key": "notification:" + input.Notification.NotificationID + "/email:user:user-1",
"requested_at_ms": "1775121700000",
"payload_json": `{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}`,
},
}))
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "email:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusPublished, updatedRoute.Status)
require.Equal(t, 1, updatedRoute.AttemptCount)
require.Equal(t, publishedAt, updatedRoute.PublishedAt)
messages, err := client.XRange(context.Background(), "mail:delivery_commands", "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "notification", messages[0].Values["source"])
require.Equal(t, "template", messages[0].Values["payload_mode"])
require.Equal(t, "1775121700000-0/email:user:user-1", messages[0].Values["delivery_id"])
}
func TestAcceptanceStoreCompleteRouteFailedReschedulesRoute(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validUserAcceptanceInput(now, 0)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
require.NoError(t, err)
require.True(t, acquired)
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
failedAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
nextAttemptAt := failedAt.Add(2 * time.Second).UTC().Truncate(time.Millisecond)
require.NoError(t, store.CompleteRouteFailed(context.Background(), CompleteRouteFailedInput{
ExpectedRoute: route,
LeaseToken: "token-1",
FailedAt: failedAt,
NextAttemptAt: nextAttemptAt,
FailureClassification: "gateway_stream_publish_failed",
FailureMessage: "temporary outage",
}))
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusFailed, updatedRoute.Status)
require.Equal(t, 1, updatedRoute.AttemptCount)
require.Equal(t, nextAttemptAt, updatedRoute.NextAttemptAt)
require.Equal(t, "gateway_stream_publish_failed", updatedRoute.LastErrorClassification)
scheduled, err := client.ZRangeWithScores(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
require.NoError(t, err)
require.Len(t, scheduled, 2)
require.Contains(t, []string{
scheduled[0].Member.(string),
scheduled[1].Member.(string),
}, Keyspace{}.Route(input.Notification.NotificationID, "push:user:user-1"))
}
func TestAcceptanceStoreCompleteRouteDeadLetterStoresTerminalFailure(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validUserAcceptanceInput(now, 2)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
require.NoError(t, err)
require.True(t, acquired)
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
deadLetteredAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
require.NoError(t, store.CompleteRouteDeadLetter(context.Background(), CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: "token-1",
DeadLetteredAt: deadLetteredAt,
FailureClassification: "payload_encoding_failed",
FailureMessage: "payload is invalid",
}))
updatedRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusDeadLetter, updatedRoute.Status)
require.Equal(t, 3, updatedRoute.AttemptCount)
require.Equal(t, deadLetteredAt, updatedRoute.DeadLetteredAt)
payload, err := client.Get(context.Background(), Keyspace{}.DeadLetter(input.Notification.NotificationID, "push:user:user-1")).Bytes()
require.NoError(t, err)
entry, err := UnmarshalDeadLetter(payload)
require.NoError(t, err)
require.Equal(t, "payload_encoding_failed", entry.FailureClassification)
require.Equal(t, 3, entry.FinalAttemptCount)
scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
require.NoError(t, err)
require.Equal(t, []string{Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1")}, scheduled)
}
func TestAcceptanceStoreDeadLetterIsIsolatedByChannelAndRecipient(t *testing.T) {
t.Parallel()
server := miniredis.RunT(t)
client := newTestRedisClient(t, server)
store, err := NewAcceptanceStore(client, AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
input := validUserAcceptanceInput(now, 2)
input.Notification.RecipientUserIDs = []string{"user-1", "user-2"}
input.Routes = append(input.Routes,
acceptintent.NotificationRoute{
NotificationID: input.Notification.NotificationID,
RouteID: "push:user:user-2",
Channel: intentstream.ChannelPush,
RecipientRef: "user:user-2",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 3,
NextAttemptAt: now,
ResolvedEmail: "second@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
acceptintent.NotificationRoute{
NotificationID: input.Notification.NotificationID,
RouteID: "email:user:user-2",
Channel: intentstream.ChannelEmail,
RecipientRef: "user:user-2",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "second@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
)
require.NoError(t, store.CreateAcceptance(context.Background(), input))
acquired, err := store.TryAcquireRouteLease(context.Background(), input.Notification.NotificationID, "push:user:user-1", "token-1", 5*time.Second)
require.NoError(t, err)
require.True(t, acquired)
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
deadLetteredAt := now.Add(time.Second).UTC().Truncate(time.Millisecond)
require.NoError(t, store.CompleteRouteDeadLetter(context.Background(), CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: "token-1",
DeadLetteredAt: deadLetteredAt,
FailureClassification: "gateway_stream_publish_failed",
FailureMessage: "gateway unavailable",
}))
deadLetterRoute, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusDeadLetter, deadLetterRoute.Status)
for _, routeID := range []string{"email:user:user-1", "push:user:user-2", "email:user:user-2"} {
route, found, err := store.GetRoute(context.Background(), input.Notification.NotificationID, routeID)
require.NoError(t, err)
require.True(t, found, "route %s should remain stored", routeID)
require.Equal(t, acceptintent.RouteStatusPending, route.Status, "route %s should remain pending", routeID)
}
scheduled, err := client.ZRange(context.Background(), Keyspace{}.RouteSchedule(), 0, -1).Result()
require.NoError(t, err)
require.ElementsMatch(t, []string{
Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-1"),
Keyspace{}.Route(input.Notification.NotificationID, "push:user:user-2"),
Keyspace{}.Route(input.Notification.NotificationID, "email:user:user-2"),
}, scheduled)
}
func validUserAcceptanceInput(now time.Time, pushAttemptCount int) acceptintent.CreateAcceptanceInput {
return acceptintent.CreateAcceptanceInput{
Notification: acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
IdempotencyKey: "game-123:turn-54",
RequestFingerprint: "sha256:deadbeef",
RequestID: "request-1",
TraceID: "trace-1",
OccurredAt: now,
AcceptedAt: now,
UpdatedAt: now,
},
Routes: []acceptintent.NotificationRoute{
{
NotificationID: "1775121700000-0",
RouteID: "push:user:user-1",
Channel: intentstream.ChannelPush,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
AttemptCount: pushAttemptCount,
MaxAttempts: 3,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
{
NotificationID: "1775121700000-0",
RouteID: "email:user:user-1",
Channel: intentstream.ChannelEmail,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
},
Idempotency: acceptintent.IdempotencyRecord{
Producer: intentstream.ProducerGameMaster,
IdempotencyKey: "game-123:turn-54",
NotificationID: "1775121700000-0",
RequestFingerprint: "sha256:deadbeef",
CreatedAt: now,
ExpiresAt: now.Add(7 * 24 * time.Hour),
},
}
}
+82 -18
View File
@@ -5,7 +5,11 @@ import (
"errors"
"fmt"
"log/slog"
"time"
"galaxy/notification/internal/adapters/postgres/migrations"
"galaxy/notification/internal/adapters/postgres/notificationstore"
"galaxy/notification/internal/adapters/postgres/routepublisher"
redisadapter "galaxy/notification/internal/adapters/redis"
"galaxy/notification/internal/adapters/redisstate"
userserviceadapter "galaxy/notification/internal/adapters/userservice"
@@ -14,10 +18,16 @@ import (
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/telemetry"
"galaxy/notification/internal/worker"
"galaxy/postgres"
"github.com/redis/go-redis/v9"
)
// systemClock satisfies the worker.Clock contract for runtime wiring.
type systemClock struct{}
func (systemClock) Now() time.Time { return time.Now() }
// Runtime owns the runnable Notification Service process plus the cleanup
// functions that release runtime resources after shutdown.
type Runtime struct {
@@ -25,16 +35,24 @@ type Runtime struct {
app *App
probeServer *internalhttp.Server
telemetry *telemetry.Runtime
intentConsumer *worker.IntentConsumer
pushPublisher *worker.PushPublisher
emailPublisher *worker.EmailPublisher
probeServer *internalhttp.Server
telemetry *telemetry.Runtime
intentConsumer *worker.IntentConsumer
pushPublisher *worker.PushPublisher
emailPublisher *worker.EmailPublisher
retentionWorker *worker.SQLRetentionWorker
cleanupFns []func() error
}
// NewRuntime constructs the runnable Notification Service process from cfg.
//
// PostgreSQL migrations apply strictly before any HTTP listener becomes
// ready. The runtime opens one shared `*redis.Client` consumed by the intent
// consumer (XREAD), the publishers (outbound XADDs), the route lease store,
// and the persisted stream offset store. Per PG_PLAN.md §5 the durable
// notification state lives in PostgreSQL while the lease key, the consumer
// offset, and the streams themselves remain on Redis.
func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*Runtime, error) {
if ctx == nil {
return nil, fmt.Errorf("new notification runtime: nil context")
@@ -91,17 +109,42 @@ func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*R
return cleanupOnError(fmt.Errorf("new notification runtime: %w", err))
}
acceptanceStore, err := redisstate.NewAcceptanceStore(redisClient, redisstate.AcceptanceConfig{
RecordTTL: cfg.Retry.RecordTTL,
DeadLetterTTL: cfg.Retry.DeadLetterTTL,
IdempotencyTTL: cfg.Retry.IdempotencyTTL,
pgPool, err := postgres.OpenPrimary(ctx, cfg.Postgres.Conn,
postgres.WithTracerProvider(telemetryRuntime.TracerProvider()),
postgres.WithMeterProvider(telemetryRuntime.MeterProvider()),
)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: open postgres: %w", err))
}
runtime.cleanupFns = append(runtime.cleanupFns, pgPool.Close)
unregisterPGStats, err := postgres.InstrumentDBStats(pgPool,
postgres.WithMeterProvider(telemetryRuntime.MeterProvider()),
)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: instrument postgres: %w", err))
}
runtime.cleanupFns = append(runtime.cleanupFns, func() error {
unregisterPGStats()
return nil
})
if err := postgres.Ping(ctx, pgPool, cfg.Postgres.Conn.OperationTimeout); err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: ping postgres: %w", err))
}
if err := postgres.RunMigrations(ctx, pgPool, migrations.FS(), "."); err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: run postgres migrations: %w", err))
}
notificationStore, err := notificationstore.New(notificationstore.Config{
DB: pgPool,
OperationTimeout: cfg.Postgres.Conn.OperationTimeout,
})
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: acceptance store: %w", err))
return cleanupOnError(fmt.Errorf("new notification runtime: notification store: %w", err))
}
malformedIntentStore, err := redisstate.NewMalformedIntentStore(redisClient, cfg.Retry.DeadLetterTTL)
leaseStore, err := redisstate.NewLeaseStore(redisClient)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: malformed intent store: %w", err))
return cleanupOnError(fmt.Errorf("new notification runtime: lease store: %w", err))
}
streamOffsetStore, err := redisstate.NewStreamOffsetStore(redisClient)
if err != nil {
@@ -111,8 +154,14 @@ func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*R
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: intent stream lag reader: %w", err))
}
telemetryRuntime.SetRouteScheduleSnapshotReader(acceptanceStore)
publisherStore, err := routepublisher.New(notificationStore, leaseStore)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: route publisher store: %w", err))
}
telemetryRuntime.SetRouteScheduleSnapshotReader(notificationStore)
telemetryRuntime.SetIntentStreamLagSnapshotReader(intentStreamLagReader)
userDirectory, err := userserviceadapter.NewClient(userserviceadapter.Config{
BaseURL: cfg.UserService.BaseURL,
RequestTimeout: cfg.UserService.Timeout,
@@ -121,8 +170,9 @@ func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*R
return cleanupOnError(fmt.Errorf("new notification runtime: user service client: %w", err))
}
runtime.cleanupFns = append(runtime.cleanupFns, userDirectory.Close)
acceptIntentService, err := acceptintent.New(acceptintent.Config{
Store: acceptanceStore,
Store: notificationStore,
UserDirectory: userDirectory,
Clock: nil,
Logger: logger,
@@ -140,7 +190,7 @@ func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*R
Stream: cfg.Streams.Intents,
BlockTimeout: cfg.IntentsReadBlockTimeout,
Acceptor: acceptIntentService,
MalformedRecorder: malformedIntentStore,
MalformedRecorder: notificationStore,
OffsetStore: streamOffsetStore,
Telemetry: telemetryRuntime,
}, logger)
@@ -149,7 +199,7 @@ func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*R
}
runtime.intentConsumer = intentConsumer
pushPublisher, err := worker.NewPushPublisher(worker.PushPublisherConfig{
Store: acceptanceStore,
Store: publisherStore,
GatewayStream: cfg.Streams.GatewayClientEvents,
GatewayStreamMaxLen: cfg.Streams.GatewayClientEventsStreamMaxLen,
RouteLeaseTTL: cfg.Retry.RouteLeaseTTL,
@@ -158,13 +208,14 @@ func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*R
Encoder: nil,
Telemetry: telemetryRuntime,
Clock: nil,
StreamPublisher: redisClient,
}, logger)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: push publisher: %w", err))
}
runtime.pushPublisher = pushPublisher
emailPublisher, err := worker.NewEmailPublisher(worker.EmailPublisherConfig{
Store: acceptanceStore,
Store: publisherStore,
MailDeliveryCommandsStream: cfg.Streams.MailDeliveryCommands,
RouteLeaseTTL: cfg.Retry.RouteLeaseTTL,
RouteBackoffMin: cfg.Retry.RouteBackoffMin,
@@ -172,12 +223,25 @@ func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*R
Encoder: nil,
Telemetry: telemetryRuntime,
Clock: nil,
StreamPublisher: redisClient,
}, logger)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: email publisher: %w", err))
}
runtime.emailPublisher = emailPublisher
retentionWorker, err := worker.NewSQLRetentionWorker(worker.SQLRetentionConfig{
Store: notificationStore,
RecordRetention: cfg.Retention.RecordRetention,
MalformedIntentRetention: cfg.Retention.MalformedIntentRetention,
CleanupInterval: cfg.Retention.CleanupInterval,
Clock: systemClock{},
}, logger)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: sql retention worker: %w", err))
}
runtime.retentionWorker = retentionWorker
probeServer, err := internalhttp.NewServer(internalhttp.Config{
Addr: cfg.InternalHTTP.Addr,
ReadHeaderTimeout: cfg.InternalHTTP.ReadHeaderTimeout,
@@ -191,7 +255,7 @@ func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*R
return cleanupOnError(fmt.Errorf("new notification runtime: internal HTTP server: %w", err))
}
runtime.probeServer = probeServer
runtime.app = New(cfg, probeServer, intentConsumer, pushPublisher, emailPublisher)
runtime.app = New(cfg, probeServer, intentConsumer, pushPublisher, emailPublisher, retentionWorker)
return runtime, nil
}
@@ -1,72 +0,0 @@
package app
import (
"context"
"net/http"
"os"
"testing"
"time"
"galaxy/notification/internal/config"
"github.com/stretchr/testify/require"
testcontainers "github.com/testcontainers/testcontainers-go"
rediscontainer "github.com/testcontainers/testcontainers-go/modules/redis"
)
const (
realRuntimeSmokeEnv = "NOTIFICATION_REAL_RUNTIME_SMOKE"
realRuntimeRedisImage = "redis:7"
)
func TestRealRuntimeCompatibility(t *testing.T) {
if os.Getenv(realRuntimeSmokeEnv) != "1" {
t.Skipf("set %s=1 to run the real runtime smoke suite", realRuntimeSmokeEnv)
}
ctx := context.Background()
redisContainer, err := rediscontainer.Run(ctx, realRuntimeRedisImage)
require.NoError(t, err)
testcontainers.CleanupContainer(t, redisContainer)
redisAddr, err := redisContainer.Endpoint(ctx, "")
require.NoError(t, err)
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisAddr
cfg.UserService.BaseURL = "http://user-service.internal"
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 2 * time.Second
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := &http.Client{
Timeout: 500 * time.Millisecond,
Transport: &http.Transport{
DisableKeepAlives: true,
},
}
t.Cleanup(client.CloseIdleConnections)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/healthz", http.StatusOK)
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/readyz", http.StatusOK)
cancel()
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
}
-581
View File
@@ -1,581 +0,0 @@
package app
import (
"context"
"encoding/json"
"io"
"log/slog"
"net"
"net/http"
"net/http/httptest"
"strconv"
"testing"
"time"
redisstate "galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/config"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestNewRuntimeStartsProbeListenerAndStopsCleanly(t *testing.T) {
t.Parallel()
redisServer := miniredis.RunT(t)
userService := newUserLookupServer(t, func(http.ResponseWriter, *http.Request) {})
defer userService.Close()
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisServer.Addr()
cfg.UserService.BaseURL = userService.URL
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 10 * time.Second
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := newTestHTTPClient(t)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/healthz", http.StatusOK)
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/readyz", http.StatusOK)
assertHTTPStatus(t, client, "http://"+cfg.InternalHTTP.Addr+"/metrics", http.StatusNotFound)
cancel()
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
}
func TestNewRuntimeFailsFastWhenRedisPingCheckFails(t *testing.T) {
t.Parallel()
cfg := config.DefaultConfig()
cfg.Redis.Addr = mustFreeAddr(t)
cfg.UserService.BaseURL = "http://127.0.0.1:18080"
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.Nil(t, runtime)
require.Error(t, err)
assert.ErrorContains(t, err, "ping redis")
}
func TestNewRuntimeAcceptsIntentThroughConsumer(t *testing.T) {
t.Parallel()
redisServer := miniredis.RunT(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisServer.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, redisClient.Close())
})
userService := newUserLookupServer(t, func(writer http.ResponseWriter, request *http.Request) {
writeJSON(t, writer, http.StatusOK, map[string]any{
"user": map[string]any{
"email": "pilot@example.com",
"preferred_language": "en-US",
},
})
})
defer userService.Close()
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisServer.Addr()
cfg.UserService.BaseURL = userService.URL
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 10 * time.Second
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := newTestHTTPClient(t)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
Stream: cfg.Streams.Intents,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
"payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
},
}).Result()
require.NoError(t, err)
require.Eventually(t, func() bool {
payload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes()
if err != nil {
return false
}
route, err := redisstate.UnmarshalRoute(payload)
if err != nil {
return false
}
return route.ResolvedEmail == "pilot@example.com" && route.ResolvedLocale == "en"
}, time.Second, 10*time.Millisecond)
cancel()
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
}
func TestNewRuntimePublishesAcceptedPushAndEmailRoutes(t *testing.T) {
t.Parallel()
redisServer := miniredis.RunT(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisServer.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, redisClient.Close())
})
userService := newUserLookupServer(t, func(writer http.ResponseWriter, request *http.Request) {
writeJSON(t, writer, http.StatusOK, map[string]any{
"user": map[string]any{
"email": "pilot@example.com",
"preferred_language": "en-US",
},
})
})
defer userService.Close()
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisServer.Addr()
cfg.UserService.BaseURL = userService.URL
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 10 * time.Second
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := newTestHTTPClient(t)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
Stream: cfg.Streams.Intents,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
"payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
"request_id": "request-1",
"trace_id": "trace-1",
},
}).Result()
require.NoError(t, err)
require.Eventually(t, func() bool {
pushPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:user:user-1")).Bytes()
if err != nil {
return false
}
pushRoute, err := redisstate.UnmarshalRoute(pushPayload)
if err != nil {
return false
}
emailPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes()
if err != nil {
return false
}
emailRoute, err := redisstate.UnmarshalRoute(emailPayload)
if err != nil {
return false
}
return pushRoute.Status == "published" && pushRoute.AttemptCount == 1 &&
emailRoute.Status == "published" && emailRoute.AttemptCount == 1
}, 2*time.Second, 10*time.Millisecond)
pushRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:user:user-1")).Bytes()
require.NoError(t, err)
pushRoute, err := redisstate.UnmarshalRoute(pushRoutePayload)
require.NoError(t, err)
require.Equal(t, "published", string(pushRoute.Status))
notificationPayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Notification(messageID)).Bytes()
require.NoError(t, err)
notificationRecord, err := redisstate.UnmarshalNotification(notificationPayload)
require.NoError(t, err)
emailRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:user:user-1")).Bytes()
require.NoError(t, err)
emailRoute, err := redisstate.UnmarshalRoute(emailRoutePayload)
require.NoError(t, err)
require.Equal(t, "published", string(emailRoute.Status))
messages, err := redisClient.XRange(context.Background(), cfg.Streams.GatewayClientEvents, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "user-1", messages[0].Values["user_id"])
require.Equal(t, "game.turn.ready", messages[0].Values["event_type"])
require.Equal(t, messageID+"/push:user:user-1", messages[0].Values["event_id"])
require.Equal(t, "request-1", messages[0].Values["request_id"])
require.Equal(t, "trace-1", messages[0].Values["trace_id"])
require.NotContains(t, messages[0].Values, "device_session_id")
switch payload := messages[0].Values["payload_bytes"].(type) {
case string:
require.NotEmpty(t, payload)
case []byte:
require.NotEmpty(t, payload)
default:
require.Failf(t, "unexpected payload type", "payload_bytes has type %T", payload)
}
mailCommands, err := redisClient.XRange(context.Background(), cfg.Streams.MailDeliveryCommands, "-", "+").Result()
require.NoError(t, err)
require.Len(t, mailCommands, 1)
require.Equal(t, messageID+"/email:user:user-1", mailCommands[0].Values["delivery_id"])
require.Equal(t, "notification", mailCommands[0].Values["source"])
require.Equal(t, "template", mailCommands[0].Values["payload_mode"])
require.Equal(t, "notification:"+messageID+"/email:user:user-1", mailCommands[0].Values["idempotency_key"])
require.Equal(t, strconv.FormatInt(notificationRecord.AcceptedAt.UnixMilli(), 10), mailCommands[0].Values["requested_at_ms"])
require.Equal(t, "request-1", mailCommands[0].Values["request_id"])
require.Equal(t, "trace-1", mailCommands[0].Values["trace_id"])
require.JSONEq(t,
`{"to":["pilot@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"game.turn.ready","locale":"en","variables":{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54},"attachments":[]}`,
mailCommands[0].Values["payload_json"].(string),
)
cancel()
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
}
func TestNewRuntimePublishesAdminEmailRouteOnlyToMailService(t *testing.T) {
t.Parallel()
redisServer := miniredis.RunT(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisServer.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, redisClient.Close())
})
userService := newUserLookupServer(t, func(http.ResponseWriter, *http.Request) {})
defer userService.Close()
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisServer.Addr()
cfg.UserService.BaseURL = userService.URL
cfg.AdminRouting.LobbyApplicationSubmitted = []string{"owner@example.com"}
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 10 * time.Second
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := newTestHTTPClient(t)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
Stream: cfg.Streams.Intents,
Values: map[string]any{
"notification_type": "lobby.application.submitted",
"producer": "game_lobby",
"audience_kind": "admin_email",
"idempotency_key": "game-123:application-submitted:user-42",
"occurred_at_ms": "1775121700000",
"payload_json": `{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-123","game_name":"Nebula Clash"}`,
},
}).Result()
require.NoError(t, err)
require.Eventually(t, func() bool {
payload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "email:email:owner@example.com")).Bytes()
if err != nil {
return false
}
route, err := redisstate.UnmarshalRoute(payload)
if err != nil {
return false
}
return route.Status == "published" && route.AttemptCount == 1
}, 2*time.Second, 10*time.Millisecond)
pushRoutePayload, err := redisClient.Get(context.Background(), redisstate.Keyspace{}.Route(messageID, "push:email:owner@example.com")).Bytes()
require.NoError(t, err)
pushRoute, err := redisstate.UnmarshalRoute(pushRoutePayload)
require.NoError(t, err)
require.Equal(t, "skipped", string(pushRoute.Status))
mailCommands, err := redisClient.XRange(context.Background(), cfg.Streams.MailDeliveryCommands, "-", "+").Result()
require.NoError(t, err)
require.Len(t, mailCommands, 1)
require.Equal(t, messageID+"/email:email:owner@example.com", mailCommands[0].Values["delivery_id"])
require.JSONEq(t,
`{"to":["owner@example.com"],"cc":[],"bcc":[],"reply_to":[],"template_id":"lobby.application.submitted","locale":"en","variables":{"applicant_name":"Nova Pilot","applicant_user_id":"user-42","game_id":"game-123","game_name":"Nebula Clash"},"attachments":[]}`,
mailCommands[0].Values["payload_json"].(string),
)
gatewayMessages, err := redisClient.XRange(context.Background(), cfg.Streams.GatewayClientEvents, "-", "+").Result()
require.NoError(t, err)
require.Empty(t, gatewayMessages)
cancel()
waitForRunResult(t, runErrCh, cfg.ShutdownTimeout+2*time.Second)
}
func TestNewRuntimeUsesConfiguredUserServiceTimeout(t *testing.T) {
t.Parallel()
redisServer := miniredis.RunT(t)
redisClient := redis.NewClient(&redis.Options{
Addr: redisServer.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, redisClient.Close())
})
userService := newUserLookupServer(t, func(_ http.ResponseWriter, request *http.Request) {
<-request.Context().Done()
})
defer userService.Close()
cfg := config.DefaultConfig()
cfg.Redis.Addr = redisServer.Addr()
cfg.UserService.BaseURL = userService.URL
cfg.UserService.Timeout = 20 * time.Millisecond
cfg.InternalHTTP.Addr = mustFreeAddr(t)
cfg.ShutdownTimeout = 10 * time.Second
cfg.IntentsReadBlockTimeout = 25 * time.Millisecond
cfg.Telemetry.TracesExporter = "none"
cfg.Telemetry.MetricsExporter = "none"
runtime, err := NewRuntime(context.Background(), cfg, testLogger())
require.NoError(t, err)
defer func() {
require.NoError(t, runtime.Close())
}()
runCtx, cancel := context.WithCancel(context.Background())
defer cancel()
runErrCh := make(chan error, 1)
go func() {
runErrCh <- runtime.Run(runCtx)
}()
client := newTestHTTPClient(t)
waitForRuntimeReady(t, client, cfg.InternalHTTP.Addr)
messageID, err := redisClient.XAdd(context.Background(), &redis.XAddArgs{
Stream: cfg.Streams.Intents,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
"payload_json": `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
},
}).Result()
require.NoError(t, err)
var runErr error
require.Eventually(t, func() bool {
select {
case runErr = <-runErrCh:
return true
default:
return false
}
}, time.Second, 10*time.Millisecond)
require.Error(t, runErr)
require.ErrorContains(t, runErr, "context deadline exceeded")
offsetStore, err := redisstate.NewStreamOffsetStore(redisClient)
require.NoError(t, err)
offset, found, err := offsetStore.Load(context.Background(), cfg.Streams.Intents)
require.NoError(t, err)
require.False(t, found)
require.Empty(t, offset)
_, err = redisClient.Get(context.Background(), redisstate.Keyspace{}.Notification(messageID)).Bytes()
require.Error(t, err)
}
func testLogger() *slog.Logger {
return slog.New(slog.NewTextHandler(io.Discard, nil))
}
func newTestHTTPClient(t *testing.T) *http.Client {
t.Helper()
transport := &http.Transport{DisableKeepAlives: true}
t.Cleanup(transport.CloseIdleConnections)
return &http.Client{
Timeout: 500 * time.Millisecond,
Transport: transport,
}
}
func waitForRuntimeReady(t *testing.T, client *http.Client, addr string) {
t.Helper()
require.Eventually(t, func() bool {
request, err := http.NewRequest(http.MethodGet, "http://"+addr+"/readyz", nil)
if err != nil {
return false
}
response, err := client.Do(request)
if err != nil {
return false
}
defer response.Body.Close()
_, _ = io.Copy(io.Discard, response.Body)
return response.StatusCode == http.StatusOK
}, 5*time.Second, 25*time.Millisecond, "notification runtime did not become reachable")
}
func waitForRunResult(t *testing.T, runErrCh <-chan error, waitTimeout time.Duration) {
t.Helper()
var err error
require.Eventually(t, func() bool {
select {
case err = <-runErrCh:
return true
default:
return false
}
}, waitTimeout, 10*time.Millisecond, "notification runtime did not stop")
require.NoError(t, err)
}
func assertHTTPStatus(t *testing.T, client *http.Client, target string, want int) {
t.Helper()
request, err := http.NewRequest(http.MethodGet, target, nil)
require.NoError(t, err)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
_, _ = io.Copy(io.Discard, response.Body)
require.Equal(t, want, response.StatusCode)
}
func mustFreeAddr(t *testing.T) string {
t.Helper()
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
defer func() {
assert.NoError(t, listener.Close())
}()
return listener.Addr().String()
}
func newUserLookupServer(t *testing.T, handler func(http.ResponseWriter, *http.Request)) *httptest.Server {
t.Helper()
return httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
if request.Method != http.MethodGet {
http.NotFound(writer, request)
return
}
if request.URL.Path != "/api/v1/internal/users/user-1" {
writeJSON(t, writer, http.StatusNotFound, map[string]any{
"error": map[string]any{
"code": "subject_not_found",
"message": "subject not found",
},
})
return
}
handler(writer, request)
}))
}
func writeJSON(t *testing.T, writer http.ResponseWriter, statusCode int, payload any) {
t.Helper()
body, err := json.Marshal(payload)
require.NoError(t, err)
writer.Header().Set("Content-Type", "application/json")
writer.WriteHeader(statusCode)
_, err = writer.Write(body)
require.NoError(t, err)
}
+129 -365
View File
@@ -3,21 +3,21 @@
package config
import (
"crypto/tls"
"fmt"
"log/slog"
"net"
netmail "net/mail"
"net/url"
"os"
"strconv"
"strings"
"time"
"galaxy/notification/internal/telemetry"
"galaxy/postgres"
"galaxy/redisconn"
)
const (
envPrefix = "NOTIFICATION"
shutdownTimeoutEnvVar = "NOTIFICATION_SHUTDOWN_TIMEOUT"
logLevelEnvVar = "NOTIFICATION_LOG_LEVEL"
@@ -26,28 +26,23 @@ const (
internalHTTPReadTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_READ_TIMEOUT"
internalHTTPIdleTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_IDLE_TIMEOUT"
redisAddrEnvVar = "NOTIFICATION_REDIS_ADDR"
redisUsernameEnvVar = "NOTIFICATION_REDIS_USERNAME"
redisPasswordEnvVar = "NOTIFICATION_REDIS_PASSWORD"
redisDBEnvVar = "NOTIFICATION_REDIS_DB"
redisTLSEnabledEnvVar = "NOTIFICATION_REDIS_TLS_ENABLED"
redisOperationTimeoutEnvVar = "NOTIFICATION_REDIS_OPERATION_TIMEOUT"
intentsStreamEnvVar = "NOTIFICATION_INTENTS_STREAM"
intentsReadBlockTimeoutEnvVar = "NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT"
gatewayClientEventsStreamEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM"
intentsStreamEnvVar = "NOTIFICATION_INTENTS_STREAM"
intentsReadBlockTimeoutEnvVar = "NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT"
gatewayClientEventsStreamEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM"
gatewayClientEventsStreamMaxEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM_MAX_LEN"
mailDeliveryCommandsStreamEnvVar = "NOTIFICATION_MAIL_DELIVERY_COMMANDS_STREAM"
mailDeliveryCommandsStreamEnvVar = "NOTIFICATION_MAIL_DELIVERY_COMMANDS_STREAM"
pushRetryMaxAttemptsEnvVar = "NOTIFICATION_PUSH_RETRY_MAX_ATTEMPTS"
emailRetryMaxAttemptsEnvVar = "NOTIFICATION_EMAIL_RETRY_MAX_ATTEMPTS"
routeLeaseTTLEnvVar = "NOTIFICATION_ROUTE_LEASE_TTL"
routeBackoffMinEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MIN"
routeBackoffMaxEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MAX"
deadLetterTTLEnvVar = "NOTIFICATION_DEAD_LETTER_TTL"
recordTTLEnvVar = "NOTIFICATION_RECORD_TTL"
idempotencyTTLEnvVar = "NOTIFICATION_IDEMPOTENCY_TTL"
recordRetentionEnvVar = "NOTIFICATION_RECORD_RETENTION"
malformedIntentRetentionEnvVar = "NOTIFICATION_MALFORMED_INTENT_RETENTION"
cleanupIntervalEnvVar = "NOTIFICATION_CLEANUP_INTERVAL"
userServiceBaseURLEnvVar = "NOTIFICATION_USER_SERVICE_BASE_URL"
userServiceTimeoutEnvVar = "NOTIFICATION_USER_SERVICE_TIMEOUT"
@@ -71,24 +66,24 @@ const (
defaultReadHeaderTimeout = 2 * time.Second
defaultReadTimeout = 10 * time.Second
defaultIdleTimeout = time.Minute
defaultRedisDB = 0
defaultRedisOperationTimeout = 250 * time.Millisecond
defaultIntentsStream = "notification:intents"
defaultIntentsReadBlockTimeout = 2 * time.Second
defaultGatewayClientEventsStream = "gateway:client-events"
defaultIntentsStream = "notification:intents"
defaultIntentsReadBlockTimeout = 2 * time.Second
defaultGatewayClientEventsStream = "gateway:client-events"
defaultGatewayClientEventsStreamMaxLen int64 = 1024
defaultMailDeliveryCommandsStream = "mail:delivery_commands"
defaultMailDeliveryCommandsStream = "mail:delivery_commands"
defaultPushRetryMaxAttempts = 3
defaultEmailRetryMaxAttempts = 7
defaultRouteLeaseTTL = 5 * time.Second
defaultRouteBackoffMin = time.Second
defaultRouteBackoffMax = 5 * time.Minute
defaultDeadLetterTTL = 720 * time.Hour
defaultRecordTTL = 720 * time.Hour
defaultIdempotencyTTL = 168 * time.Hour
defaultRecordRetention = 30 * 24 * time.Hour
defaultMalformedIntentRetention = 90 * 24 * time.Hour
defaultCleanupInterval = time.Hour
defaultUserServiceTimeout = time.Second
defaultOTelServiceName = "galaxy-notification"
@@ -109,20 +104,29 @@ type Config struct {
// InternalHTTP configures the private probe HTTP listener.
InternalHTTP InternalHTTPConfig
// Redis configures the shared Redis client used by the process.
// Redis configures the shared Redis connection topology and the inbound
// `notification:intents` stream plus the outbound stream names. Durable
// notification state lives in PostgreSQL after Stage 5 of `PG_PLAN.md`.
Redis RedisConfig
// Streams stores the stable stream names reserved for notification ingress
// and downstream publication.
// Postgres configures the PostgreSQL-backed durable store consumed via
// `pkg/postgres`.
Postgres PostgresConfig
// Streams stores the stable Redis Stream names reserved for ingress and
// downstream publication.
Streams StreamsConfig
// IntentsReadBlockTimeout stores the maximum Redis Streams blocking read
// window used by the intent consumer.
IntentsReadBlockTimeout time.Duration
// Retry stores the frozen retry and retention settings.
// Retry stores the frozen retry settings used by the route publishers.
Retry RetryConfig
// Retention stores the periodic SQL retention worker configuration.
Retention RetentionConfig
// UserService configures the trusted user-enrichment dependency.
UserService UserServiceConfig
@@ -174,51 +178,29 @@ func (cfg InternalHTTPConfig) Validate() error {
}
}
// RedisConfig configures the shared Redis client and its connection settings.
// RedisConfig configures the Notification Service Redis connection topology.
// Per-call timeouts live in `Conn.OperationTimeout`.
type RedisConfig struct {
// Addr stores the Redis network address.
Addr string
// Username stores the optional Redis ACL username.
Username string
// Password stores the optional Redis ACL password.
Password string
// DB stores the Redis logical database index.
DB int
// TLSEnabled reports whether TLS must be used for Redis connections.
TLSEnabled bool
// OperationTimeout bounds one Redis round trip including the startup PING.
OperationTimeout time.Duration
}
// TLSConfig returns the conservative TLS configuration used by the Redis
// client when TLSEnabled is true.
func (cfg RedisConfig) TLSConfig() *tls.Config {
if !cfg.TLSEnabled {
return nil
}
return &tls.Config{MinVersion: tls.VersionTLS12}
// Conn carries the connection topology (master, replicas, password, db,
// per-call timeout). Loaded via redisconn.LoadFromEnv("NOTIFICATION").
Conn redisconn.Config
}
// Validate reports whether cfg stores a usable Redis configuration.
func (cfg RedisConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.Addr) == "":
return fmt.Errorf("redis addr must not be empty")
case !isTCPAddr(cfg.Addr):
return fmt.Errorf("redis addr %q must use host:port form", cfg.Addr)
case cfg.DB < 0:
return fmt.Errorf("redis db must not be negative")
case cfg.OperationTimeout <= 0:
return fmt.Errorf("redis operation timeout must be positive")
default:
return nil
}
return cfg.Conn.Validate()
}
// PostgresConfig configures the PostgreSQL-backed durable store.
type PostgresConfig struct {
// Conn stores the primary plus replica DSN topology and pool tuning.
// Loaded via postgres.LoadFromEnv("NOTIFICATION").
Conn postgres.Config
}
// Validate reports whether cfg stores a usable PostgreSQL configuration.
func (cfg PostgresConfig) Validate() error {
return cfg.Conn.Validate()
}
// StreamsConfig stores the stable Redis Stream names used by Notification
@@ -254,8 +236,8 @@ func (cfg StreamsConfig) Validate() error {
}
}
// RetryConfig stores the frozen retry budgets, backoff settings, and retention
// periods used by the service.
// RetryConfig stores the frozen retry budgets, backoff settings, and the
// per-acceptance idempotency window.
type RetryConfig struct {
// PushMaxAttempts stores the route retry budget for the `push` channel.
PushMaxAttempts int
@@ -273,18 +255,13 @@ type RetryConfig struct {
// RouteBackoffMax stores the maximum retry backoff.
RouteBackoffMax time.Duration
// DeadLetterTTL stores the retention period for dead-letter and malformed
// intent records.
DeadLetterTTL time.Duration
// RecordTTL stores the retention period for notification and route records.
RecordTTL time.Duration
// IdempotencyTTL stores the retention period for idempotency records.
// IdempotencyTTL stores the per-acceptance idempotency window the service
// layer applies to the durable `idempotency_expires_at` column on the
// `records` table.
IdempotencyTTL time.Duration
}
// Validate reports whether cfg stores usable retry and retention settings.
// Validate reports whether cfg stores usable retry settings.
func (cfg RetryConfig) Validate() error {
switch {
case cfg.PushMaxAttempts <= 0:
@@ -299,10 +276,6 @@ func (cfg RetryConfig) Validate() error {
return fmt.Errorf("route backoff max must be positive")
case cfg.RouteBackoffMin > cfg.RouteBackoffMax:
return fmt.Errorf("route backoff min must not exceed route backoff max")
case cfg.DeadLetterTTL <= 0:
return fmt.Errorf("dead-letter ttl must be positive")
case cfg.RecordTTL <= 0:
return fmt.Errorf("record ttl must be positive")
case cfg.IdempotencyTTL <= 0:
return fmt.Errorf("idempotency ttl must be positive")
default:
@@ -310,6 +283,36 @@ func (cfg RetryConfig) Validate() error {
}
}
// RetentionConfig stores the durable retention windows applied by the
// periodic SQL retention worker.
type RetentionConfig struct {
// RecordRetention bounds how long records (and their cascaded routes and
// dead_letters) survive after acceptance.
RecordRetention time.Duration
// MalformedIntentRetention bounds how long malformed-intent rows survive
// after their original `recorded_at`.
MalformedIntentRetention time.Duration
// CleanupInterval stores the wall-clock period between two retention
// passes.
CleanupInterval time.Duration
}
// Validate reports whether cfg stores a usable retention configuration.
func (cfg RetentionConfig) Validate() error {
switch {
case cfg.RecordRetention <= 0:
return fmt.Errorf("%s must be positive", recordRetentionEnvVar)
case cfg.MalformedIntentRetention <= 0:
return fmt.Errorf("%s must be positive", malformedIntentRetentionEnvVar)
case cfg.CleanupInterval <= 0:
return fmt.Errorf("%s must be positive", cleanupIntervalEnvVar)
default:
return nil
}
}
// UserServiceConfig configures the trusted user-enrichment dependency.
type UserServiceConfig struct {
// BaseURL stores the absolute base URL of the trusted User Service.
@@ -336,12 +339,10 @@ func (cfg UserServiceConfig) Validate() error {
// AdminRoutingConfig stores the type-specific configured administrator email
// lists.
type AdminRoutingConfig struct {
// GeoReviewRecommended stores recipients for
// `geo.review_recommended`.
// GeoReviewRecommended stores recipients for `geo.review_recommended`.
GeoReviewRecommended []string
// GameGenerationFailed stores recipients for
// `game.generation_failed`.
// GameGenerationFailed stores recipients for `game.generation_failed`.
GameGenerationFailed []string
// LobbyRuntimePausedAfterStart stores recipients for
@@ -431,14 +432,16 @@ func DefaultConfig() Config {
IdleTimeout: defaultIdleTimeout,
},
Redis: RedisConfig{
DB: defaultRedisDB,
OperationTimeout: defaultRedisOperationTimeout,
Conn: redisconn.DefaultConfig(),
},
Postgres: PostgresConfig{
Conn: postgres.DefaultConfig(),
},
Streams: StreamsConfig{
Intents: defaultIntentsStream,
GatewayClientEvents: defaultGatewayClientEventsStream,
Intents: defaultIntentsStream,
GatewayClientEvents: defaultGatewayClientEventsStream,
GatewayClientEventsStreamMaxLen: defaultGatewayClientEventsStreamMaxLen,
MailDeliveryCommands: defaultMailDeliveryCommandsStream,
MailDeliveryCommands: defaultMailDeliveryCommandsStream,
},
IntentsReadBlockTimeout: defaultIntentsReadBlockTimeout,
Retry: RetryConfig{
@@ -447,10 +450,13 @@ func DefaultConfig() Config {
RouteLeaseTTL: defaultRouteLeaseTTL,
RouteBackoffMin: defaultRouteBackoffMin,
RouteBackoffMax: defaultRouteBackoffMax,
DeadLetterTTL: defaultDeadLetterTTL,
RecordTTL: defaultRecordTTL,
IdempotencyTTL: defaultIdempotencyTTL,
},
Retention: RetentionConfig{
RecordRetention: defaultRecordRetention,
MalformedIntentRetention: defaultMalformedIntentRetention,
CleanupInterval: defaultCleanupInterval,
},
UserService: UserServiceConfig{
Timeout: defaultUserServiceTimeout,
},
@@ -462,167 +468,21 @@ func DefaultConfig() Config {
}
}
// LoadFromEnv loads the Notification Service process configuration from
// environment variables, applying documented defaults where appropriate.
func LoadFromEnv() (Config, error) {
cfg := DefaultConfig()
var err error
cfg.ShutdownTimeout, err = loadDurationEnvWithDefault(shutdownTimeoutEnvVar, cfg.ShutdownTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Logging.Level = loadStringEnvWithDefault(logLevelEnvVar, cfg.Logging.Level)
if err := validateLogLevel(cfg.Logging.Level); err != nil {
return Config{}, fmt.Errorf("load notification config: %s: %w", logLevelEnvVar, err)
}
cfg.InternalHTTP.Addr = loadStringEnvWithDefault(internalHTTPAddrEnvVar, cfg.InternalHTTP.Addr)
cfg.InternalHTTP.ReadHeaderTimeout, err = loadDurationEnvWithDefault(internalHTTPReadHeaderTimeoutEnvVar, cfg.InternalHTTP.ReadHeaderTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.InternalHTTP.ReadTimeout, err = loadDurationEnvWithDefault(internalHTTPReadTimeoutEnvVar, cfg.InternalHTTP.ReadTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.InternalHTTP.IdleTimeout, err = loadDurationEnvWithDefault(internalHTTPIdleTimeoutEnvVar, cfg.InternalHTTP.IdleTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Redis.Addr = loadStringEnvWithDefault(redisAddrEnvVar, cfg.Redis.Addr)
cfg.Redis.Username = os.Getenv(redisUsernameEnvVar)
cfg.Redis.Password = os.Getenv(redisPasswordEnvVar)
cfg.Redis.DB, err = loadIntEnvWithDefault(redisDBEnvVar, cfg.Redis.DB)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Redis.TLSEnabled, err = loadBoolEnvWithDefault(redisTLSEnabledEnvVar, cfg.Redis.TLSEnabled)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Redis.OperationTimeout, err = loadDurationEnvWithDefault(redisOperationTimeoutEnvVar, cfg.Redis.OperationTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Streams.Intents = loadStringEnvWithDefault(intentsStreamEnvVar, cfg.Streams.Intents)
cfg.Streams.GatewayClientEvents = loadStringEnvWithDefault(gatewayClientEventsStreamEnvVar, cfg.Streams.GatewayClientEvents)
cfg.Streams.GatewayClientEventsStreamMaxLen, err = loadInt64EnvWithDefault(gatewayClientEventsStreamMaxEnvVar, cfg.Streams.GatewayClientEventsStreamMaxLen)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Streams.MailDeliveryCommands = loadStringEnvWithDefault(mailDeliveryCommandsStreamEnvVar, cfg.Streams.MailDeliveryCommands)
cfg.IntentsReadBlockTimeout, err = loadDurationEnvWithDefault(intentsReadBlockTimeoutEnvVar, cfg.IntentsReadBlockTimeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.PushMaxAttempts, err = loadIntEnvWithDefault(pushRetryMaxAttemptsEnvVar, cfg.Retry.PushMaxAttempts)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.EmailMaxAttempts, err = loadIntEnvWithDefault(emailRetryMaxAttemptsEnvVar, cfg.Retry.EmailMaxAttempts)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.RouteLeaseTTL, err = loadDurationEnvWithDefault(routeLeaseTTLEnvVar, cfg.Retry.RouteLeaseTTL)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.RouteBackoffMin, err = loadDurationEnvWithDefault(routeBackoffMinEnvVar, cfg.Retry.RouteBackoffMin)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.RouteBackoffMax, err = loadDurationEnvWithDefault(routeBackoffMaxEnvVar, cfg.Retry.RouteBackoffMax)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.DeadLetterTTL, err = loadDurationEnvWithDefault(deadLetterTTLEnvVar, cfg.Retry.DeadLetterTTL)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.RecordTTL, err = loadDurationEnvWithDefault(recordTTLEnvVar, cfg.Retry.RecordTTL)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Retry.IdempotencyTTL, err = loadDurationEnvWithDefault(idempotencyTTLEnvVar, cfg.Retry.IdempotencyTTL)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.UserService.BaseURL = normalizeBaseURL(loadStringEnvWithDefault(userServiceBaseURLEnvVar, cfg.UserService.BaseURL))
cfg.UserService.Timeout, err = loadDurationEnvWithDefault(userServiceTimeoutEnvVar, cfg.UserService.Timeout)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.AdminRouting.GeoReviewRecommended, err = loadEmailListEnv(adminEmailsGeoReviewRecommendedEnvVar, cfg.AdminRouting.GeoReviewRecommended)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.AdminRouting.GameGenerationFailed, err = loadEmailListEnv(adminEmailsGameGenerationFailedEnvVar, cfg.AdminRouting.GameGenerationFailed)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.AdminRouting.LobbyRuntimePausedAfterStart, err = loadEmailListEnv(adminEmailsLobbyRuntimePausedAfterEnvVar, cfg.AdminRouting.LobbyRuntimePausedAfterStart)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.AdminRouting.LobbyApplicationSubmitted, err = loadEmailListEnv(adminEmailsLobbyApplicationSubmittedEnvVar, cfg.AdminRouting.LobbyApplicationSubmitted)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Telemetry.ServiceName = loadStringEnvWithDefault(otelServiceNameEnvVar, cfg.Telemetry.ServiceName)
cfg.Telemetry.TracesExporter = normalizeExporterValue(loadStringEnvWithDefault(otelTracesExporterEnvVar, cfg.Telemetry.TracesExporter))
cfg.Telemetry.MetricsExporter = normalizeExporterValue(loadStringEnvWithDefault(otelMetricsExporterEnvVar, cfg.Telemetry.MetricsExporter))
cfg.Telemetry.TracesProtocol = loadOTLPProtocol(
os.Getenv(otelExporterOTLPTracesProtocolEnvVar),
os.Getenv(otelExporterOTLPProtocolEnvVar),
cfg.Telemetry.TracesExporter,
)
cfg.Telemetry.MetricsProtocol = loadOTLPProtocol(
os.Getenv(otelExporterOTLPMetricsProtocolEnvVar),
os.Getenv(otelExporterOTLPProtocolEnvVar),
cfg.Telemetry.MetricsExporter,
)
cfg.Telemetry.StdoutTracesEnabled, err = loadBoolEnvWithDefault(otelStdoutTracesEnabledEnvVar, cfg.Telemetry.StdoutTracesEnabled)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
cfg.Telemetry.StdoutMetricsEnabled, err = loadBoolEnvWithDefault(otelStdoutMetricsEnabledEnvVar, cfg.Telemetry.StdoutMetricsEnabled)
if err != nil {
return Config{}, fmt.Errorf("load notification config: %w", err)
}
if err := cfg.Validate(); err != nil {
return Config{}, err
}
return cfg, nil
}
// Validate reports whether cfg contains a consistent Notification Service
// process configuration.
func (cfg Config) Validate() error {
switch {
case cfg.ShutdownTimeout <= 0:
if cfg.ShutdownTimeout <= 0 {
return fmt.Errorf("load notification config: %s must be positive", shutdownTimeoutEnvVar)
case strings.TrimSpace(cfg.Redis.Addr) == "":
return fmt.Errorf("load notification config: %s must not be empty", redisAddrEnvVar)
case strings.TrimSpace(cfg.UserService.BaseURL) == "":
return fmt.Errorf("load notification config: %s must not be empty", userServiceBaseURLEnvVar)
}
if err := cfg.InternalHTTP.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.Redis.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
return fmt.Errorf("load notification config: %w", err)
}
if err := cfg.Postgres.Validate(); err != nil {
return fmt.Errorf("load notification config: %w", err)
}
if err := cfg.Streams.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
@@ -633,6 +493,9 @@ func (cfg Config) Validate() error {
if err := cfg.Retry.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.Retention.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.UserService.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
@@ -646,77 +509,35 @@ func (cfg Config) Validate() error {
return nil
}
func loadStringEnvWithDefault(name string, value string) string {
if raw, ok := os.LookupEnv(name); ok {
return strings.TrimSpace(raw)
func validateNormalizedEmailList(name string, values []string) error {
for index, value := range values {
normalized, err := normalizeMailboxAddress(value)
if err != nil {
return fmt.Errorf("%s[%d]: %w", name, index, err)
}
if normalized != value {
return fmt.Errorf("%s[%d]: email address must already be normalized", name, index)
}
}
return value
return nil
}
func loadDurationEnvWithDefault(name string, value time.Duration) (time.Duration, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return value, nil
func normalizeMailboxAddress(value string) (string, error) {
trimmed := strings.TrimSpace(value)
if trimmed == "" {
return "", fmt.Errorf("email address must not be empty")
}
parsed, err := time.ParseDuration(strings.TrimSpace(raw))
parsed, err := netmail.ParseAddress(trimmed)
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
return "", fmt.Errorf("invalid email address %q: %w", trimmed, err)
}
if parsed.Name != "" {
return "", fmt.Errorf("email address %q must not include a display name", trimmed)
}
return parsed, nil
}
func loadIntEnvWithDefault(name string, value int) (int, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return value, nil
}
parsed, err := strconv.Atoi(strings.TrimSpace(raw))
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func loadInt64EnvWithDefault(name string, value int64) (int64, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return value, nil
}
parsed, err := strconv.ParseInt(strings.TrimSpace(raw), 10, 64)
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func loadBoolEnvWithDefault(name string, value bool) (bool, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return value, nil
}
parsed, err := strconv.ParseBool(strings.TrimSpace(raw))
if err != nil {
return false, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func loadEmailListEnv(name string, value []string) ([]string, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return append([]string(nil), value...), nil
}
return parseEmailList(name, raw)
return strings.ToLower(parsed.Address), nil
}
func parseEmailList(name string, raw string) ([]string, error) {
@@ -743,63 +564,6 @@ func parseEmailList(name string, raw string) ([]string, error) {
return addresses, nil
}
func normalizeMailboxAddress(value string) (string, error) {
trimmed := strings.TrimSpace(value)
if trimmed == "" {
return "", fmt.Errorf("email address must not be empty")
}
parsed, err := netmail.ParseAddress(trimmed)
if err != nil {
return "", fmt.Errorf("invalid email address %q: %w", trimmed, err)
}
if parsed.Name != "" {
return "", fmt.Errorf("email address %q must not include a display name", trimmed)
}
return strings.ToLower(parsed.Address), nil
}
func validateNormalizedEmailList(name string, values []string) error {
for index, value := range values {
normalized, err := normalizeMailboxAddress(value)
if err != nil {
return fmt.Errorf("%s[%d]: %w", name, index, err)
}
if normalized != value {
return fmt.Errorf("%s[%d]: email address must already be normalized", name, index)
}
}
return nil
}
func validateLogLevel(value string) error {
var level slog.Level
return level.UnmarshalText([]byte(strings.TrimSpace(value)))
}
func normalizeExporterValue(value string) string {
switch strings.TrimSpace(value) {
case "", otelExporterNone:
return otelExporterNone
default:
return strings.TrimSpace(value)
}
}
func loadOTLPProtocol(primary string, fallback string, exporter string) string {
protocol := strings.TrimSpace(primary)
if protocol == "" {
protocol = strings.TrimSpace(fallback)
}
if protocol == "" && exporter == otelExporterOTLP {
return otelProtocolHTTPProtobuf
}
return protocol
}
func normalizeBaseURL(value string) string {
trimmed := strings.TrimSpace(value)
if trimmed == "" {
+142 -41
View File
@@ -4,12 +4,42 @@ import (
"testing"
"time"
"galaxy/postgres"
"galaxy/redisconn"
"github.com/stretchr/testify/require"
)
func TestLoadFromEnvUsesDefaults(t *testing.T) {
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
const (
envRedisMasterAddr = "NOTIFICATION_REDIS_MASTER_ADDR"
envRedisReplicaAddrs = "NOTIFICATION_REDIS_REPLICA_ADDRS"
envRedisPassword = "NOTIFICATION_REDIS_PASSWORD"
envRedisDB = "NOTIFICATION_REDIS_DB"
envRedisOpTimeout = "NOTIFICATION_REDIS_OPERATION_TIMEOUT"
envRedisTLSEnabled = "NOTIFICATION_REDIS_TLS_ENABLED"
envRedisUsername = "NOTIFICATION_REDIS_USERNAME"
envPostgresPrimaryDSN = "NOTIFICATION_POSTGRES_PRIMARY_DSN"
envPostgresOpTimeout = "NOTIFICATION_POSTGRES_OPERATION_TIMEOUT"
envPostgresMaxOpenConns = "NOTIFICATION_POSTGRES_MAX_OPEN_CONNS"
envPostgresMaxIdleConns = "NOTIFICATION_POSTGRES_MAX_IDLE_CONNS"
envPostgresConnMaxLife = "NOTIFICATION_POSTGRES_CONN_MAX_LIFETIME"
)
const (
defaultPrimaryDSN = "postgres://notificationservice:notificationservice@127.0.0.1:5432/galaxy?search_path=notification&sslmode=disable"
)
func setRequiredConnEnv(t *testing.T) {
t.Helper()
t.Setenv(envRedisMasterAddr, "127.0.0.1:6379")
t.Setenv(envRedisPassword, "secret")
t.Setenv(envPostgresPrimaryDSN, defaultPrimaryDSN)
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
}
func TestLoadFromEnvUsesDefaults(t *testing.T) {
setRequiredConnEnv(t)
cfg, err := LoadFromEnv()
require.NoError(t, err)
@@ -18,11 +48,14 @@ func TestLoadFromEnvUsesDefaults(t *testing.T) {
require.Equal(t, defaults.ShutdownTimeout, cfg.ShutdownTimeout)
require.Equal(t, defaults.Logging, cfg.Logging)
require.Equal(t, defaults.InternalHTTP, cfg.InternalHTTP)
require.Equal(t, "127.0.0.1:6379", cfg.Redis.Addr)
require.Equal(t, defaults.Redis.DB, cfg.Redis.DB)
require.Equal(t, defaults.Redis.OperationTimeout, cfg.Redis.OperationTimeout)
require.Equal(t, "127.0.0.1:6379", cfg.Redis.Conn.MasterAddr)
require.Equal(t, "secret", cfg.Redis.Conn.Password)
require.Equal(t, defaults.Redis.Conn.DB, cfg.Redis.Conn.DB)
require.Equal(t, defaults.Redis.Conn.OperationTimeout, cfg.Redis.Conn.OperationTimeout)
require.Equal(t, defaultPrimaryDSN, cfg.Postgres.Conn.PrimaryDSN)
require.Equal(t, defaults.Streams, cfg.Streams)
require.Equal(t, defaults.Retry, cfg.Retry)
require.Equal(t, defaults.Retention, cfg.Retention)
require.Equal(t, UserServiceConfig{
BaseURL: "http://user-service.internal",
Timeout: defaults.UserService.Timeout,
@@ -38,12 +71,19 @@ func TestLoadFromEnvAppliesOverrides(t *testing.T) {
t.Setenv(internalHTTPReadHeaderTimeoutEnvVar, "3s")
t.Setenv(internalHTTPReadTimeoutEnvVar, "11s")
t.Setenv(internalHTTPIdleTimeoutEnvVar, "61s")
t.Setenv(redisAddrEnvVar, "127.0.0.1:6380")
t.Setenv(redisUsernameEnvVar, "alice")
t.Setenv(redisPasswordEnvVar, "secret")
t.Setenv(redisDBEnvVar, "3")
t.Setenv(redisTLSEnabledEnvVar, "true")
t.Setenv(redisOperationTimeoutEnvVar, "750ms")
t.Setenv(envRedisMasterAddr, "127.0.0.1:6380")
t.Setenv(envRedisReplicaAddrs, "127.0.0.1:6381,127.0.0.1:6382")
t.Setenv(envRedisPassword, "topsecret")
t.Setenv(envRedisDB, "3")
t.Setenv(envRedisOpTimeout, "750ms")
t.Setenv(envPostgresPrimaryDSN, defaultPrimaryDSN)
t.Setenv(envPostgresOpTimeout, "1500ms")
t.Setenv(envPostgresMaxOpenConns, "32")
t.Setenv(envPostgresMaxIdleConns, "8")
t.Setenv(envPostgresConnMaxLife, "45m")
t.Setenv(intentsStreamEnvVar, "notification:test_intents")
t.Setenv(intentsReadBlockTimeoutEnvVar, "3500ms")
t.Setenv(gatewayClientEventsStreamEnvVar, "gateway:test_client-events")
@@ -54,9 +94,10 @@ func TestLoadFromEnvAppliesOverrides(t *testing.T) {
t.Setenv(routeLeaseTTLEnvVar, "7s")
t.Setenv(routeBackoffMinEnvVar, "2s")
t.Setenv(routeBackoffMaxEnvVar, "7m")
t.Setenv(deadLetterTTLEnvVar, "120h")
t.Setenv(recordTTLEnvVar, "240h")
t.Setenv(idempotencyTTLEnvVar, "48h")
t.Setenv(recordRetentionEnvVar, "21d")
t.Setenv(malformedIntentRetentionEnvVar, "168h")
t.Setenv(cleanupIntervalEnvVar, "30m")
t.Setenv(userServiceBaseURLEnvVar, "https://user-service.internal/api/")
t.Setenv(userServiceTimeoutEnvVar, "1500ms")
t.Setenv(adminEmailsGeoReviewRecommendedEnvVar, "First@example.com, second@example.com, first@example.com")
@@ -70,6 +111,9 @@ func TestLoadFromEnvAppliesOverrides(t *testing.T) {
t.Setenv(otelStdoutTracesEnabledEnvVar, "true")
t.Setenv(otelStdoutMetricsEnabledEnvVar, "true")
// Time package does not support `21d`; use 504h directly.
t.Setenv(recordRetentionEnvVar, "504h")
cfg, err := LoadFromEnv()
require.NoError(t, err)
@@ -82,18 +126,28 @@ func TestLoadFromEnvAppliesOverrides(t *testing.T) {
IdleTimeout: 61 * time.Second,
}, cfg.InternalHTTP)
require.Equal(t, RedisConfig{
Addr: "127.0.0.1:6380",
Username: "alice",
Password: "secret",
DB: 3,
TLSEnabled: true,
OperationTimeout: 750 * time.Millisecond,
Conn: redisconn.Config{
MasterAddr: "127.0.0.1:6380",
ReplicaAddrs: []string{"127.0.0.1:6381", "127.0.0.1:6382"},
Password: "topsecret",
DB: 3,
OperationTimeout: 750 * time.Millisecond,
},
}, cfg.Redis)
require.Equal(t, PostgresConfig{
Conn: postgres.Config{
PrimaryDSN: defaultPrimaryDSN,
OperationTimeout: 1500 * time.Millisecond,
MaxOpenConns: 32,
MaxIdleConns: 8,
ConnMaxLifetime: 45 * time.Minute,
},
}, cfg.Postgres)
require.Equal(t, StreamsConfig{
Intents: "notification:test_intents",
GatewayClientEvents: "gateway:test_client-events",
Intents: "notification:test_intents",
GatewayClientEvents: "gateway:test_client-events",
GatewayClientEventsStreamMaxLen: 2048,
MailDeliveryCommands: "mail:test_delivery_commands",
MailDeliveryCommands: "mail:test_delivery_commands",
}, cfg.Streams)
require.Equal(t, 3500*time.Millisecond, cfg.IntentsReadBlockTimeout)
require.Equal(t, RetryConfig{
@@ -102,10 +156,13 @@ func TestLoadFromEnvAppliesOverrides(t *testing.T) {
RouteLeaseTTL: 7 * time.Second,
RouteBackoffMin: 2 * time.Second,
RouteBackoffMax: 7 * time.Minute,
DeadLetterTTL: 120 * time.Hour,
RecordTTL: 240 * time.Hour,
IdempotencyTTL: 48 * time.Hour,
}, cfg.Retry)
require.Equal(t, RetentionConfig{
RecordRetention: 504 * time.Hour,
MalformedIntentRetention: 168 * time.Hour,
CleanupInterval: 30 * time.Minute,
}, cfg.Retention)
require.Equal(t, UserServiceConfig{
BaseURL: "https://user-service.internal/api",
Timeout: 1500 * time.Millisecond,
@@ -127,6 +184,27 @@ func TestLoadFromEnvAppliesOverrides(t *testing.T) {
}, cfg.Telemetry)
}
func TestLoadFromEnvRejectsDeprecatedRedisVars(t *testing.T) {
tests := []struct {
name string
envName string
}{
{name: "tls enabled rejected", envName: envRedisTLSEnabled},
{name: "username rejected", envName: envRedisUsername},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
setRequiredConnEnv(t)
t.Setenv(tt.envName, "true")
_, err := LoadFromEnv()
require.Error(t, err)
require.Contains(t, err.Error(), tt.envName)
})
}
}
func TestLoadFromEnvRejectsInvalidValues(t *testing.T) {
tests := []struct {
name string
@@ -135,14 +213,16 @@ func TestLoadFromEnvRejectsInvalidValues(t *testing.T) {
}{
{name: "invalid duration", envName: shutdownTimeoutEnvVar, envVal: "later"},
{name: "invalid log level", envName: logLevelEnvVar, envVal: "verbose"},
{name: "invalid redis db", envName: redisDBEnvVar, envVal: "db-three"},
{name: "invalid redis tls", envName: redisTLSEnabledEnvVar, envVal: "sometimes"},
{name: "invalid redis db", envName: envRedisDB, envVal: "db-three"},
{name: "invalid push retries", envName: pushRetryMaxAttemptsEnvVar, envVal: "many"},
{name: "invalid email retries", envName: emailRetryMaxAttemptsEnvVar, envVal: "several"},
{name: "invalid gateway client events stream max len", envName: gatewayClientEventsStreamMaxEnvVar, envVal: "many"},
{name: "invalid user service timeout", envName: userServiceTimeoutEnvVar, envVal: "soon"},
{name: "invalid intents read block timeout", envName: intentsReadBlockTimeoutEnvVar, envVal: "later"},
{name: "invalid route lease ttl", envName: routeLeaseTTLEnvVar, envVal: "eventually"},
{name: "invalid record retention", envName: recordRetentionEnvVar, envVal: "later"},
{name: "invalid malformed intent retention", envName: malformedIntentRetentionEnvVar, envVal: "later"},
{name: "invalid cleanup interval", envName: cleanupIntervalEnvVar, envVal: "later"},
{name: "invalid traces exporter", envName: otelTracesExporterEnvVar, envVal: "stdout"},
{name: "invalid metrics protocol", envName: otelExporterOTLPMetricsProtocolEnvVar, envVal: "udp"},
{name: "invalid stdout traces", envName: otelStdoutTracesEnabledEnvVar, envVal: "sometimes"},
@@ -152,8 +232,7 @@ func TestLoadFromEnvRejectsInvalidValues(t *testing.T) {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
setRequiredConnEnv(t)
t.Setenv(tt.envName, tt.envVal)
_, err := LoadFromEnv()
@@ -163,20 +242,44 @@ func TestLoadFromEnvRejectsInvalidValues(t *testing.T) {
}
func TestLoadFromEnvRejectsMissingRequiredValues(t *testing.T) {
t.Run("missing redis addr", func(t *testing.T) {
t.Run("missing redis master addr", func(t *testing.T) {
t.Setenv(envRedisPassword, "secret")
t.Setenv(envPostgresPrimaryDSN, defaultPrimaryDSN)
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
_, err := LoadFromEnv()
require.Error(t, err)
require.Contains(t, err.Error(), redisAddrEnvVar)
require.Contains(t, err.Error(), envRedisMasterAddr)
})
t.Run("missing user service base url", func(t *testing.T) {
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
t.Run("missing redis password", func(t *testing.T) {
t.Setenv(envRedisMasterAddr, "127.0.0.1:6379")
t.Setenv(envPostgresPrimaryDSN, defaultPrimaryDSN)
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
_, err := LoadFromEnv()
require.Error(t, err)
require.Contains(t, err.Error(), userServiceBaseURLEnvVar)
require.Contains(t, err.Error(), envRedisPassword)
})
t.Run("missing postgres primary dsn", func(t *testing.T) {
t.Setenv(envRedisMasterAddr, "127.0.0.1:6379")
t.Setenv(envRedisPassword, "secret")
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
_, err := LoadFromEnv()
require.Error(t, err)
require.Contains(t, err.Error(), envPostgresPrimaryDSN)
})
t.Run("missing user service base url", func(t *testing.T) {
t.Setenv(envRedisMasterAddr, "127.0.0.1:6379")
t.Setenv(envRedisPassword, "secret")
t.Setenv(envPostgresPrimaryDSN, defaultPrimaryDSN)
_, err := LoadFromEnv()
require.Error(t, err)
require.Contains(t, err.Error(), "user service base URL")
})
}
@@ -188,7 +291,6 @@ func TestLoadFromEnvRejectsInvalidConfiguration(t *testing.T) {
want string
}{
{name: "invalid internal http addr", envName: internalHTTPAddrEnvVar, envVal: "127.0.0.1", want: "internal HTTP addr"},
{name: "invalid redis addr", envName: redisAddrEnvVar, envVal: "127.0.0.1", want: "redis addr"},
{name: "relative user service url", envName: userServiceBaseURLEnvVar, envVal: "/internal/users", want: "absolute http(s) URL"},
{name: "invalid admin email", envName: adminEmailsGeoReviewRecommendedEnvVar, envVal: "broken-email", want: "invalid email address"},
{name: "blank admin email slot", envName: adminEmailsGameGenerationFailedEnvVar, envVal: "ops@example.com, , second@example.com", want: "must not be empty"},
@@ -201,8 +303,7 @@ func TestLoadFromEnvRejectsInvalidConfiguration(t *testing.T) {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
setRequiredConnEnv(t)
t.Setenv(routeBackoffMaxEnvVar, "5m")
t.Setenv(tt.envName, tt.envVal)
@@ -223,7 +324,7 @@ func TestLoadFromEnvRejectsNonPositiveValues(t *testing.T) {
{name: "read header timeout", envName: internalHTTPReadHeaderTimeoutEnvVar, envVal: "0s"},
{name: "read timeout", envName: internalHTTPReadTimeoutEnvVar, envVal: "0s"},
{name: "idle timeout", envName: internalHTTPIdleTimeoutEnvVar, envVal: "0s"},
{name: "redis timeout", envName: redisOperationTimeoutEnvVar, envVal: "0s"},
{name: "redis timeout", envName: envRedisOpTimeout, envVal: "0s"},
{name: "intents read block timeout", envName: intentsReadBlockTimeoutEnvVar, envVal: "0s"},
{name: "push retries", envName: pushRetryMaxAttemptsEnvVar, envVal: "0"},
{name: "email retries", envName: emailRetryMaxAttemptsEnvVar, envVal: "0"},
@@ -231,9 +332,10 @@ func TestLoadFromEnvRejectsNonPositiveValues(t *testing.T) {
{name: "route lease ttl", envName: routeLeaseTTLEnvVar, envVal: "0s"},
{name: "route backoff min", envName: routeBackoffMinEnvVar, envVal: "0s"},
{name: "route backoff max", envName: routeBackoffMaxEnvVar, envVal: "0s"},
{name: "dead letter ttl", envName: deadLetterTTLEnvVar, envVal: "0s"},
{name: "record ttl", envName: recordTTLEnvVar, envVal: "0s"},
{name: "idempotency ttl", envName: idempotencyTTLEnvVar, envVal: "0s"},
{name: "record retention", envName: recordRetentionEnvVar, envVal: "0s"},
{name: "malformed intent retention", envName: malformedIntentRetentionEnvVar, envVal: "0s"},
{name: "cleanup interval", envName: cleanupIntervalEnvVar, envVal: "0s"},
{name: "user service timeout", envName: userServiceTimeoutEnvVar, envVal: "0s"},
}
@@ -241,8 +343,7 @@ func TestLoadFromEnvRejectsNonPositiveValues(t *testing.T) {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Setenv(redisAddrEnvVar, "127.0.0.1:6379")
t.Setenv(userServiceBaseURLEnvVar, "http://user-service.internal")
setRequiredConnEnv(t)
t.Setenv(tt.envName, tt.envVal)
_, err := LoadFromEnv()
+262
View File
@@ -0,0 +1,262 @@
package config
import (
"fmt"
"log/slog"
"os"
"strconv"
"strings"
"time"
"galaxy/postgres"
"galaxy/redisconn"
)
// LoadFromEnv builds Config from environment variables and validates the
// resulting configuration. Connection topology for Redis and PostgreSQL is
// delegated to the shared `pkg/redisconn` and `pkg/postgres` LoadFromEnv
// helpers — the Redis loader hard-fails on the deprecated
// `NOTIFICATION_REDIS_TLS_ENABLED` / `NOTIFICATION_REDIS_USERNAME` env vars;
// the Postgres loader requires a primary DSN.
func LoadFromEnv() (Config, error) {
cfg := DefaultConfig()
var err error
cfg.ShutdownTimeout, err = durationEnv(shutdownTimeoutEnvVar, cfg.ShutdownTimeout)
if err != nil {
return Config{}, err
}
cfg.Logging.Level = stringEnv(logLevelEnvVar, cfg.Logging.Level)
cfg.InternalHTTP.Addr = stringEnv(internalHTTPAddrEnvVar, cfg.InternalHTTP.Addr)
cfg.InternalHTTP.ReadHeaderTimeout, err = durationEnv(internalHTTPReadHeaderTimeoutEnvVar, cfg.InternalHTTP.ReadHeaderTimeout)
if err != nil {
return Config{}, err
}
cfg.InternalHTTP.ReadTimeout, err = durationEnv(internalHTTPReadTimeoutEnvVar, cfg.InternalHTTP.ReadTimeout)
if err != nil {
return Config{}, err
}
cfg.InternalHTTP.IdleTimeout, err = durationEnv(internalHTTPIdleTimeoutEnvVar, cfg.InternalHTTP.IdleTimeout)
if err != nil {
return Config{}, err
}
redisConn, err := redisconn.LoadFromEnv(envPrefix)
if err != nil {
return Config{}, err
}
cfg.Redis.Conn = redisConn
pgConn, err := postgres.LoadFromEnv(envPrefix)
if err != nil {
return Config{}, err
}
cfg.Postgres.Conn = pgConn
cfg.Streams.Intents = stringEnv(intentsStreamEnvVar, cfg.Streams.Intents)
cfg.Streams.GatewayClientEvents = stringEnv(gatewayClientEventsStreamEnvVar, cfg.Streams.GatewayClientEvents)
cfg.Streams.GatewayClientEventsStreamMaxLen, err = int64Env(gatewayClientEventsStreamMaxEnvVar, cfg.Streams.GatewayClientEventsStreamMaxLen)
if err != nil {
return Config{}, err
}
cfg.Streams.MailDeliveryCommands = stringEnv(mailDeliveryCommandsStreamEnvVar, cfg.Streams.MailDeliveryCommands)
cfg.IntentsReadBlockTimeout, err = durationEnv(intentsReadBlockTimeoutEnvVar, cfg.IntentsReadBlockTimeout)
if err != nil {
return Config{}, err
}
cfg.Retry.PushMaxAttempts, err = intEnv(pushRetryMaxAttemptsEnvVar, cfg.Retry.PushMaxAttempts)
if err != nil {
return Config{}, err
}
cfg.Retry.EmailMaxAttempts, err = intEnv(emailRetryMaxAttemptsEnvVar, cfg.Retry.EmailMaxAttempts)
if err != nil {
return Config{}, err
}
cfg.Retry.RouteLeaseTTL, err = durationEnv(routeLeaseTTLEnvVar, cfg.Retry.RouteLeaseTTL)
if err != nil {
return Config{}, err
}
cfg.Retry.RouteBackoffMin, err = durationEnv(routeBackoffMinEnvVar, cfg.Retry.RouteBackoffMin)
if err != nil {
return Config{}, err
}
cfg.Retry.RouteBackoffMax, err = durationEnv(routeBackoffMaxEnvVar, cfg.Retry.RouteBackoffMax)
if err != nil {
return Config{}, err
}
cfg.Retry.IdempotencyTTL, err = durationEnv(idempotencyTTLEnvVar, cfg.Retry.IdempotencyTTL)
if err != nil {
return Config{}, err
}
cfg.Retention.RecordRetention, err = durationEnv(recordRetentionEnvVar, cfg.Retention.RecordRetention)
if err != nil {
return Config{}, err
}
cfg.Retention.MalformedIntentRetention, err = durationEnv(malformedIntentRetentionEnvVar, cfg.Retention.MalformedIntentRetention)
if err != nil {
return Config{}, err
}
cfg.Retention.CleanupInterval, err = durationEnv(cleanupIntervalEnvVar, cfg.Retention.CleanupInterval)
if err != nil {
return Config{}, err
}
cfg.UserService.BaseURL = normalizeBaseURL(stringEnv(userServiceBaseURLEnvVar, cfg.UserService.BaseURL))
cfg.UserService.Timeout, err = durationEnv(userServiceTimeoutEnvVar, cfg.UserService.Timeout)
if err != nil {
return Config{}, err
}
cfg.AdminRouting.GeoReviewRecommended, err = emailListEnv(adminEmailsGeoReviewRecommendedEnvVar, cfg.AdminRouting.GeoReviewRecommended)
if err != nil {
return Config{}, err
}
cfg.AdminRouting.GameGenerationFailed, err = emailListEnv(adminEmailsGameGenerationFailedEnvVar, cfg.AdminRouting.GameGenerationFailed)
if err != nil {
return Config{}, err
}
cfg.AdminRouting.LobbyRuntimePausedAfterStart, err = emailListEnv(adminEmailsLobbyRuntimePausedAfterEnvVar, cfg.AdminRouting.LobbyRuntimePausedAfterStart)
if err != nil {
return Config{}, err
}
cfg.AdminRouting.LobbyApplicationSubmitted, err = emailListEnv(adminEmailsLobbyApplicationSubmittedEnvVar, cfg.AdminRouting.LobbyApplicationSubmitted)
if err != nil {
return Config{}, err
}
cfg.Telemetry.ServiceName = stringEnv(otelServiceNameEnvVar, cfg.Telemetry.ServiceName)
cfg.Telemetry.TracesExporter = normalizeExporterValue(stringEnv(otelTracesExporterEnvVar, cfg.Telemetry.TracesExporter))
cfg.Telemetry.MetricsExporter = normalizeExporterValue(stringEnv(otelMetricsExporterEnvVar, cfg.Telemetry.MetricsExporter))
cfg.Telemetry.TracesProtocol = loadOTLPProtocol(
os.Getenv(otelExporterOTLPTracesProtocolEnvVar),
os.Getenv(otelExporterOTLPProtocolEnvVar),
cfg.Telemetry.TracesExporter,
)
cfg.Telemetry.MetricsProtocol = loadOTLPProtocol(
os.Getenv(otelExporterOTLPMetricsProtocolEnvVar),
os.Getenv(otelExporterOTLPProtocolEnvVar),
cfg.Telemetry.MetricsExporter,
)
cfg.Telemetry.StdoutTracesEnabled, err = boolEnv(otelStdoutTracesEnabledEnvVar, cfg.Telemetry.StdoutTracesEnabled)
if err != nil {
return Config{}, err
}
cfg.Telemetry.StdoutMetricsEnabled, err = boolEnv(otelStdoutMetricsEnabledEnvVar, cfg.Telemetry.StdoutMetricsEnabled)
if err != nil {
return Config{}, err
}
if err := validateLogLevel(cfg.Logging.Level); err != nil {
return Config{}, fmt.Errorf("load notification config: %s: %w", logLevelEnvVar, err)
}
if err := cfg.Validate(); err != nil {
return Config{}, err
}
return cfg, nil
}
func stringEnv(name string, fallback string) string {
value, ok := os.LookupEnv(name)
if !ok {
return fallback
}
return strings.TrimSpace(value)
}
func durationEnv(name string, fallback time.Duration) (time.Duration, error) {
value, ok := os.LookupEnv(name)
if !ok {
return fallback, nil
}
parsed, err := time.ParseDuration(strings.TrimSpace(value))
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func intEnv(name string, fallback int) (int, error) {
value, ok := os.LookupEnv(name)
if !ok {
return fallback, nil
}
parsed, err := strconv.Atoi(strings.TrimSpace(value))
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func int64Env(name string, fallback int64) (int64, error) {
value, ok := os.LookupEnv(name)
if !ok {
return fallback, nil
}
parsed, err := strconv.ParseInt(strings.TrimSpace(value), 10, 64)
if err != nil {
return 0, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func boolEnv(name string, fallback bool) (bool, error) {
value, ok := os.LookupEnv(name)
if !ok {
return fallback, nil
}
parsed, err := strconv.ParseBool(strings.TrimSpace(value))
if err != nil {
return false, fmt.Errorf("%s: %w", name, err)
}
return parsed, nil
}
func emailListEnv(name string, fallback []string) ([]string, error) {
raw, ok := os.LookupEnv(name)
if !ok {
return append([]string(nil), fallback...), nil
}
return parseEmailList(name, raw)
}
func validateLogLevel(value string) error {
var level slog.Level
return level.UnmarshalText([]byte(strings.TrimSpace(value)))
}
func normalizeExporterValue(value string) string {
switch strings.TrimSpace(value) {
case "", otelExporterNone:
return otelExporterNone
default:
return strings.TrimSpace(value)
}
}
func loadOTLPProtocol(primary string, fallback string, exporter string) string {
protocol := strings.TrimSpace(primary)
if protocol == "" {
protocol = strings.TrimSpace(fallback)
}
if protocol == "" && exporter == otelExporterOTLP {
return otelProtocolHTTPProtobuf
}
return protocol
}
@@ -0,0 +1,254 @@
// Package routestate carries the value types and inputs used by the route
// publishers to drive notification-route lifecycle transitions. The types
// are storage-agnostic: they were originally defined inside the Redis
// adapter package but were lifted here as part of the Stage 5 PostgreSQL
// migration so the publisher contracts can be satisfied by either a
// Redis-backed or a PostgreSQL-backed adapter (or a composite that splits
// state and lease storage between the two backends).
package routestate
import (
"errors"
"fmt"
"time"
"galaxy/notification/internal/service/acceptintent"
)
// ErrConflict reports that a route-state mutation lost an optimistic
// concurrency check (the row, the lease, or both no longer match the value
// the caller observed when it claimed the work). Publishers treat this as a
// no-op: the work was either already finished by another replica or has been
// rescheduled.
var ErrConflict = errors.New("route state conflict")
// ScheduledRoute carries one due route reference returned by a route-state
// store that exposes the schedule.
type ScheduledRoute struct {
// RouteKey stores the implementation-specific scheduling key. Redis
// adapters set this to the full sorted-set member; SQL adapters set it to
// a synthetic "<notificationID>/<routeID>" string. Tests only require it
// to be non-empty and stable.
RouteKey string
// NotificationID stores the owning notification identifier.
NotificationID string
// RouteID stores the scheduled route identifier.
RouteID string
}
// Validate reports whether route contains a complete due-route reference.
func (route ScheduledRoute) Validate() error {
if route.RouteKey == "" {
return fmt.Errorf("scheduled route key must not be empty")
}
if route.NotificationID == "" {
return fmt.Errorf("scheduled route notification id must not be empty")
}
if route.RouteID == "" {
return fmt.Errorf("scheduled route route id must not be empty")
}
return nil
}
// CompleteRoutePublishedInput carries the data required to mark one route as
// published while atomically appending one outbound stream entry.
type CompleteRoutePublishedInput struct {
// ExpectedRoute stores the current route state previously loaded by the
// caller. The store uses it as the optimistic-concurrency token.
ExpectedRoute acceptintent.NotificationRoute
// LeaseToken stores the route-lease owner token that must still be held.
LeaseToken string
// PublishedAt stores when the publication attempt succeeded.
PublishedAt time.Time
// Stream stores the outbound Redis Stream name.
Stream string
// StreamMaxLen bounds Stream with approximate trimming when positive. Zero
// disables trimming.
StreamMaxLen int64
// StreamValues stores the exact Redis Stream fields appended to Stream.
StreamValues map[string]any
}
// Validate reports whether input contains a complete published-route
// transition.
func (input CompleteRoutePublishedInput) Validate() error {
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
return err
}
if input.LeaseToken == "" {
return fmt.Errorf("lease token must not be empty")
}
if err := validateRouteStateTimestamp("published at", input.PublishedAt); err != nil {
return err
}
if input.Stream == "" {
return fmt.Errorf("stream must not be empty")
}
if input.StreamMaxLen < 0 {
return fmt.Errorf("stream max len must not be negative")
}
if err := validateStreamValues(input.StreamValues); err != nil {
return err
}
return nil
}
// CompleteRouteFailedInput carries the data required to record one retryable
// publication failure.
type CompleteRouteFailedInput struct {
// ExpectedRoute stores the current route state previously loaded by the
// caller.
ExpectedRoute acceptintent.NotificationRoute
// LeaseToken stores the route-lease owner token that must still be held.
LeaseToken string
// FailedAt stores when the publication attempt failed.
FailedAt time.Time
// NextAttemptAt stores the next scheduled retry time.
NextAttemptAt time.Time
// FailureClassification stores the classified publication failure kind.
FailureClassification string
// FailureMessage stores the detailed publication failure text.
FailureMessage string
}
// Validate reports whether input contains a complete retryable failure
// transition.
func (input CompleteRouteFailedInput) Validate() error {
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
return err
}
if input.LeaseToken == "" {
return fmt.Errorf("lease token must not be empty")
}
if err := validateRouteStateTimestamp("failed at", input.FailedAt); err != nil {
return err
}
if err := validateRouteStateTimestamp("next attempt at", input.NextAttemptAt); err != nil {
return err
}
if input.FailureClassification == "" {
return fmt.Errorf("failure classification must not be empty")
}
if input.FailureMessage == "" {
return fmt.Errorf("failure message must not be empty")
}
return nil
}
// CompleteRouteDeadLetterInput carries the data required to record one
// exhausted publication failure.
type CompleteRouteDeadLetterInput struct {
// ExpectedRoute stores the current route state previously loaded by the
// caller.
ExpectedRoute acceptintent.NotificationRoute
// LeaseToken stores the route-lease owner token that must still be held.
LeaseToken string
// DeadLetteredAt stores when the route exhausted its retry budget.
DeadLetteredAt time.Time
// FailureClassification stores the classified terminal failure kind.
FailureClassification string
// FailureMessage stores the detailed terminal failure text.
FailureMessage string
// RecoveryHint stores the optional operator-facing recovery guidance.
RecoveryHint string
}
// Validate reports whether input contains a complete dead-letter transition.
func (input CompleteRouteDeadLetterInput) Validate() error {
if err := validateCompletionRoute(input.ExpectedRoute); err != nil {
return err
}
if input.LeaseToken == "" {
return fmt.Errorf("lease token must not be empty")
}
if err := validateRouteStateTimestamp("dead lettered at", input.DeadLetteredAt); err != nil {
return err
}
if input.FailureClassification == "" {
return fmt.Errorf("failure classification must not be empty")
}
if input.FailureMessage == "" {
return fmt.Errorf("failure message must not be empty")
}
return nil
}
// ValidateUTCMillisecondTimestamp reports whether value is a non-zero UTC
// time truncated to millisecond precision. Exposed for callers that need the
// same boundary check the routestate inputs apply.
func ValidateUTCMillisecondTimestamp(name string, value time.Time) error {
return validateRouteStateTimestamp(name, value)
}
func validateRouteStateTimestamp(name string, value time.Time) error {
if value.IsZero() {
return fmt.Errorf("%s must not be zero", name)
}
if !value.Equal(value.UTC()) {
return fmt.Errorf("%s must be UTC", name)
}
if !value.Equal(value.Truncate(time.Millisecond)) {
return fmt.Errorf("%s must use millisecond precision", name)
}
return nil
}
func validateCompletionRoute(route acceptintent.NotificationRoute) error {
if err := route.Validate(); err != nil {
return err
}
switch route.Status {
case acceptintent.RouteStatusPending, acceptintent.RouteStatusFailed:
return nil
default:
return fmt.Errorf("route status %q is not completable", route.Status)
}
}
func validateStreamValues(values map[string]any) error {
if len(values) == 0 {
return fmt.Errorf("stream values must not be empty")
}
for key, raw := range values {
if key == "" {
return fmt.Errorf("stream values key must not be empty")
}
switch typed := raw.(type) {
case string:
if typed == "" {
return fmt.Errorf("stream values %q must not be empty", key)
}
case []byte:
if len(typed) == 0 {
return fmt.Errorf("stream values %q must not be empty", key)
}
default:
return fmt.Errorf("stream values %q must be string or []byte", key)
}
}
return nil
}
+29 -12
View File
@@ -8,11 +8,13 @@ import (
"strings"
"time"
"galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/logging"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/publishmail"
"galaxy/notification/internal/service/routestate"
"github.com/redis/go-redis/v9"
)
const (
@@ -24,7 +26,7 @@ const (
// by EmailPublisher.
type EmailRouteStateStore interface {
// ListDueRoutes loads due scheduled routes.
ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error)
ListDueRoutes(context.Context, time.Time, int64) ([]routestate.ScheduledRoute, error)
// TryAcquireRouteLease attempts to acquire one temporary route lease.
TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error)
@@ -39,13 +41,13 @@ type EmailRouteStateStore interface {
GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error)
// CompleteRoutePublished records one successful publication.
CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error
CompleteRoutePublished(context.Context, routestate.CompleteRoutePublishedInput) error
// CompleteRouteFailed records one retryable publication failure.
CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error
CompleteRouteFailed(context.Context, routestate.CompleteRouteFailedInput) error
// CompleteRouteDeadLetter records one exhausted publication failure.
CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error
CompleteRouteDeadLetter(context.Context, routestate.CompleteRouteDeadLetterInput) error
}
// EmailCommandEncoder encodes one email-capable notification route into a
@@ -90,6 +92,10 @@ type EmailPublisherConfig struct {
// Clock provides wall-clock timestamps.
Clock Clock
// StreamPublisher emits the outbound mail-delivery command before the
// route's PostgreSQL state transition is committed.
StreamPublisher StreamPublisher
}
// EmailPublisher publishes due email routes into the Mail Service command
@@ -105,6 +111,7 @@ type EmailPublisher struct {
encoder EmailCommandEncoder
telemetry RoutePublisherTelemetry
clock Clock
streamPublisher StreamPublisher
workerToken string
logger *slog.Logger
}
@@ -114,6 +121,8 @@ func NewEmailPublisher(cfg EmailPublisherConfig, logger *slog.Logger) (*EmailPub
switch {
case cfg.Store == nil:
return nil, errors.New("new email publisher: nil store")
case cfg.StreamPublisher == nil:
return nil, errors.New("new email publisher: nil stream publisher")
case strings.TrimSpace(cfg.MailDeliveryCommandsStream) == "":
return nil, errors.New("new email publisher: mail delivery-commands stream must not be empty")
case cfg.RouteLeaseTTL <= 0:
@@ -157,6 +166,7 @@ func NewEmailPublisher(cfg EmailPublisherConfig, logger *slog.Logger) (*EmailPub
encoder: cfg.Encoder,
telemetry: cfg.Telemetry,
clock: cfg.Clock,
streamPublisher: cfg.StreamPublisher,
workerToken: workerToken,
logger: logger.With("component", "email_publisher", "stream", cfg.MailDeliveryCommandsStream),
}, nil
@@ -237,7 +247,7 @@ func (publisher *EmailPublisher) publishDueRoutes(ctx context.Context) (bool, er
return progress, nil
}
func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) {
func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute routestate.ScheduledRoute) (bool, error) {
acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL)
if err != nil {
return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err)
@@ -283,7 +293,14 @@ func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationPayloadEncoding, err.Error())
}
err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{
if err := publisher.streamPublisher.XAdd(ctx, &redis.XAddArgs{
Stream: publisher.mailDeliveryCommandsStream,
Values: command.Values(),
}).Err(); err != nil {
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationMailStreamWrite, err.Error())
}
err = publisher.store.CompleteRoutePublished(ctx, routestate.CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
PublishedAt: publisher.now(),
@@ -312,7 +329,7 @@ func (publisher *EmailPublisher) publishRoute(ctx context.Context, now time.Time
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
publisher.logger.Info("email route published", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return publisher.recordFailure(ctx, notification, route, emailFailureClassificationMailStreamWrite, err.Error())
@@ -349,7 +366,7 @@ func (publisher *EmailPublisher) recordFailure(
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
if attemptNumber >= route.MaxAttempts {
err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{
err := publisher.store.CompleteRouteDeadLetter(ctx, routestate.CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
DeadLetteredAt: failureAt,
@@ -362,7 +379,7 @@ func (publisher *EmailPublisher) recordFailure(
publisher.recordRouteDeadLetter(ctx, notification, route, classification)
publisher.logger.Warn("email route dead-lettered", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err)
@@ -370,7 +387,7 @@ func (publisher *EmailPublisher) recordFailure(
}
nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond)
err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{
err := publisher.store.CompleteRouteFailed(ctx, routestate.CompleteRouteFailedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
FailedAt: failureAt,
@@ -385,7 +402,7 @@ func (publisher *EmailPublisher) recordFailure(
logArgs = append(logArgs, "next_attempt_at", nextAttemptAt)
publisher.logger.Warn("email route failed and was rescheduled", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err)
@@ -1,232 +0,0 @@
package worker
import (
"context"
"testing"
"time"
redisstate "galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/service/acceptintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/require"
)
func TestEmailPublisherPublishesDueEmailRouteAndLeavesPushRoutePending(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
running := runEmailPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
pushRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusPending, pushRoute.Status)
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "1775121700000-0/email:user:user-1", messages[0].Values["delivery_id"])
require.Equal(t, "notification", messages[0].Values["source"])
require.Equal(t, "template", messages[0].Values["payload_mode"])
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", ""))
}
func TestEmailPublisherRetriesMailStreamPublicationFailures(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err())
running := runEmailPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1
}, time.Second, 10*time.Millisecond)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "retry", emailFailureClassificationMailStreamWrite))
require.True(t, fixture.telemetry.hasRouteRetry("email"))
require.NoError(t, fixture.client.Del(context.Background(), fixture.mailStream).Err())
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2
}, 2*time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "published", ""))
}
func TestEmailPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 0)))
otherPublisher, err := NewEmailPublisher(EmailPublisherConfig{
Store: fixture.store,
MailDeliveryCommandsStream: fixture.mailStream,
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Clock: newSteppingClock(fixture.now, time.Millisecond),
}, testWorkerLogger())
require.NoError(t, err)
first := runEmailPublisher(t, fixture.publisher)
defer first.stop(t)
second := runEmailPublisher(t, otherPublisher)
defer second.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.mailStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
}
func TestEmailPublisherDeadLettersExhaustedRoute(t *testing.T) {
t.Parallel()
fixture := newEmailPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validEmailAcceptanceInput(fixture.now, 6)))
require.NoError(t, fixture.client.Set(context.Background(), fixture.mailStream, "wrong-type", 0).Err())
running := runEmailPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 7
}, time.Second, 10*time.Millisecond)
deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "email:user:user-1")).Bytes()
require.NoError(t, err)
deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload)
require.NoError(t, err)
require.Equal(t, emailFailureClassificationMailStreamWrite, deadLetter.FailureClassification)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("email", "dead_letter", emailFailureClassificationMailStreamWrite))
require.True(t, fixture.telemetry.hasRouteDeadLetter("email", emailFailureClassificationMailStreamWrite))
}
type emailPublisherFixture struct {
client *redis.Client
store *redisstate.AcceptanceStore
publisher *EmailPublisher
mailStream string
now time.Time
clock *steppingClock
telemetry *recordingWorkerTelemetry
}
func newEmailPublisherFixture(t *testing.T) emailPublisherFixture {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{
Addr: server.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
require.NoError(t, client.Close())
})
store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
clock := newSteppingClock(now, time.Millisecond)
telemetry := &recordingWorkerTelemetry{}
publisher, err := NewEmailPublisher(EmailPublisherConfig{
Store: store,
MailDeliveryCommandsStream: "mail:delivery_commands",
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Telemetry: telemetry,
Clock: clock,
}, testWorkerLogger())
require.NoError(t, err)
return emailPublisherFixture{
client: client,
store: store,
publisher: publisher,
mailStream: "mail:delivery_commands",
now: now,
clock: clock,
telemetry: telemetry,
}
}
func validEmailAcceptanceInput(now time.Time, emailAttemptCount int) acceptintent.CreateAcceptanceInput {
input := validPushAcceptanceInput(now)
for index := range input.Routes {
if input.Routes[index].RouteID != "email:user:user-1" {
continue
}
input.Routes[index].AttemptCount = emailAttemptCount
input.Routes[index].MaxAttempts = 7
}
return input
}
type runningEmailPublisher struct {
cancel context.CancelFunc
resultCh chan error
}
func runEmailPublisher(t *testing.T, publisher *EmailPublisher) runningEmailPublisher {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- publisher.Run(ctx)
}()
return runningEmailPublisher{
cancel: cancel,
resultCh: resultCh,
}
}
func (r runningEmailPublisher) stop(t *testing.T) {
t.Helper()
r.cancel()
select {
case err := <-r.resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "email publisher did not stop")
}
}
@@ -1,422 +0,0 @@
package worker
import (
"context"
"errors"
"io"
"log/slog"
"testing"
"time"
redisstate "galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/config"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/malformedintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestIntentConsumerStartsFromZeroOffsetWhenNoStoredOffsetExists(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), messageID)
return err == nil && found
}, time.Second, 10*time.Millisecond)
}
func TestIntentConsumerContinuesFromSavedOffsetAfterRestart(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
require.NoError(t, fixture.offsetStore.Save(context.Background(), fixture.stream, firstID))
secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), secondID)
return err == nil && found
}, time.Second, 10*time.Millisecond)
_, found, err := fixture.acceptanceStore.GetNotification(context.Background(), firstID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerRecordsIdempotencyConflictsAndAdvancesOffset(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
firstID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
secondID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(secondID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == "idempotency_conflict"
}, time.Second, 10*time.Millisecond)
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, secondID, offset)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), firstID)
require.NoError(t, err)
require.True(t, found)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), secondID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerShutdownInterruptsBlockingRead(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{})
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- fixture.consumer.Run(ctx)
}()
time.Sleep(50 * time.Millisecond)
cancel()
select {
case err := <-resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "intent consumer did not stop after shutdown")
}
}
func TestIntentConsumerRecordsRecipientNotFoundAndAdvancesOffset(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{})
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == malformedintent.FailureCodeRecipientNotFound
}, time.Second, 10*time.Millisecond)
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, messageID, offset)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerRecordsMalformedIntentAndAdvancesOffset(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
messageID, err := fixture.client.XAdd(context.Background(), &redis.XAddArgs{
Stream: fixture.stream,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
},
}).Result()
require.NoError(t, err)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(messageID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == malformedintent.FailureCodeInvalidPayload &&
entry.StreamEntryID == messageID
}, time.Second, 10*time.Millisecond)
offset, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.True(t, found)
require.Equal(t, messageID, offset)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
require.NoError(t, err)
require.False(t, found)
}
func TestIntentConsumerRecordsTelemetryForOutcomesAndMalformedIntents(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
records: map[string]acceptintent.UserRecord{
"user-1": {Email: "pilot@example.com", PreferredLanguage: "en"},
},
})
addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
conflictID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":55,"game_name":"Nebula Clash","game_id":"game-123"}`)
running := runIntentConsumer(t, fixture.consumer)
defer running.stop(t)
require.Eventually(t, func() bool {
payload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.MalformedIntent(conflictID)).Bytes()
if err != nil {
return false
}
entry, err := redisstate.UnmarshalMalformedIntent(payload)
if err != nil {
return false
}
return entry.FailureCode == malformedintent.FailureCodeIdempotencyConflict
}, time.Second, 10*time.Millisecond)
require.Eventually(t, func() bool {
return fixture.telemetry.hasIntentOutcome("accepted") &&
fixture.telemetry.hasIntentOutcome("duplicate") &&
fixture.telemetry.hasMalformedIntent("idempotency_conflict")
}, time.Second, 10*time.Millisecond)
}
func TestIntentConsumerStopsWithoutAdvancingOffsetWhenUserDirectoryIsUnavailable(t *testing.T) {
t.Parallel()
fixture := newIntentConsumerFixture(t, stubUserDirectory{
err: errors.New("user service unavailable"),
})
messageID := addValidIntent(t, fixture.client, fixture.stream, `{"turn_number":54,"game_name":"Nebula Clash","game_id":"game-123"}`)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
resultCh := make(chan error, 1)
go func() {
resultCh <- fixture.consumer.Run(ctx)
}()
var runErr error
require.Eventually(t, func() bool {
select {
case runErr = <-resultCh:
return true
default:
return false
}
}, time.Second, 10*time.Millisecond)
require.Error(t, runErr)
require.ErrorContains(t, runErr, "user service unavailable")
_, found, err := fixture.offsetStore.Load(context.Background(), fixture.stream)
require.NoError(t, err)
require.False(t, found)
_, found, err = fixture.acceptanceStore.GetNotification(context.Background(), messageID)
require.NoError(t, err)
require.False(t, found)
}
type intentConsumerFixture struct {
client *redis.Client
stream string
acceptanceStore *redisstate.AcceptanceStore
offsetStore *redisstate.StreamOffsetStore
consumer *IntentConsumer
telemetry *recordingWorkerTelemetry
}
func newIntentConsumerFixture(t *testing.T, userDirectory acceptintent.UserDirectory) intentConsumerFixture {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{
Addr: server.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, client.Close())
})
acceptanceStore, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
malformedStore, err := redisstate.NewMalformedIntentStore(client, 72*time.Hour)
require.NoError(t, err)
offsetStore, err := redisstate.NewStreamOffsetStore(client)
require.NoError(t, err)
telemetry := &recordingWorkerTelemetry{}
service, err := acceptintent.New(acceptintent.Config{
Store: acceptanceStore,
UserDirectory: userDirectory,
Clock: fixedClock{now: time.UnixMilli(1775121700000).UTC()},
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
Telemetry: telemetry,
PushMaxAttempts: 3,
EmailMaxAttempts: 7,
IdempotencyTTL: 7 * 24 * time.Hour,
AdminRouting: config.AdminRoutingConfig{},
})
require.NoError(t, err)
consumer, err := NewIntentConsumer(IntentConsumerConfig{
Client: client,
Stream: "notification:intents",
BlockTimeout: 25 * time.Millisecond,
Acceptor: service,
MalformedRecorder: malformedStore,
OffsetStore: offsetStore,
Telemetry: telemetry,
Clock: fixedClock{now: time.UnixMilli(1775121700001).UTC()},
}, slog.New(slog.NewTextHandler(io.Discard, nil)))
require.NoError(t, err)
return intentConsumerFixture{
client: client,
stream: "notification:intents",
acceptanceStore: acceptanceStore,
offsetStore: offsetStore,
consumer: consumer,
telemetry: telemetry,
}
}
func addValidIntent(t *testing.T, client *redis.Client, stream string, payloadJSON string) string {
t.Helper()
messageID, err := client.XAdd(context.Background(), &redis.XAddArgs{
Stream: stream,
Values: map[string]any{
"notification_type": "game.turn.ready",
"producer": "game_master",
"audience_kind": "user",
"recipient_user_ids_json": `["user-1"]`,
"idempotency_key": "game-123:turn-ready",
"occurred_at_ms": "1775121700000",
"payload_json": payloadJSON,
},
}).Result()
require.NoError(t, err)
return messageID
}
type runningIntentConsumer struct {
cancel context.CancelFunc
resultCh chan error
}
func runIntentConsumer(t *testing.T, consumer *IntentConsumer) runningIntentConsumer {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- consumer.Run(ctx)
}()
time.Sleep(50 * time.Millisecond)
return runningIntentConsumer{
cancel: cancel,
resultCh: resultCh,
}
}
func (r runningIntentConsumer) stop(t *testing.T) {
t.Helper()
r.cancel()
select {
case err := <-r.resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "intent consumer did not stop")
}
}
type fixedClock struct {
now time.Time
}
func (clock fixedClock) Now() time.Time {
return clock.now
}
type stubUserDirectory struct {
records map[string]acceptintent.UserRecord
err error
}
func (directory stubUserDirectory) GetUserByID(_ context.Context, userID string) (acceptintent.UserRecord, error) {
if directory.err != nil {
return acceptintent.UserRecord{}, directory.err
}
if record, ok := directory.records[userID]; ok {
return record, nil
}
return acceptintent.UserRecord{}, acceptintent.ErrRecipientNotFound
}
+34 -12
View File
@@ -10,11 +10,13 @@ import (
"strings"
"time"
"galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/logging"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/publishpush"
"galaxy/notification/internal/service/routestate"
"github.com/redis/go-redis/v9"
)
const (
@@ -29,7 +31,7 @@ const (
// PushPublisher.
type PushRouteStateStore interface {
// ListDueRoutes loads due scheduled routes.
ListDueRoutes(context.Context, time.Time, int64) ([]redisstate.ScheduledRoute, error)
ListDueRoutes(context.Context, time.Time, int64) ([]routestate.ScheduledRoute, error)
// TryAcquireRouteLease attempts to acquire one temporary route lease.
TryAcquireRouteLease(context.Context, string, string, string, time.Duration) (bool, error)
@@ -44,13 +46,13 @@ type PushRouteStateStore interface {
GetRoute(context.Context, string, string) (acceptintent.NotificationRoute, bool, error)
// CompleteRoutePublished records one successful publication.
CompleteRoutePublished(context.Context, redisstate.CompleteRoutePublishedInput) error
CompleteRoutePublished(context.Context, routestate.CompleteRoutePublishedInput) error
// CompleteRouteFailed records one retryable publication failure.
CompleteRouteFailed(context.Context, redisstate.CompleteRouteFailedInput) error
CompleteRouteFailed(context.Context, routestate.CompleteRouteFailedInput) error
// CompleteRouteDeadLetter records one exhausted publication failure.
CompleteRouteDeadLetter(context.Context, redisstate.CompleteRouteDeadLetterInput) error
CompleteRouteDeadLetter(context.Context, routestate.CompleteRouteDeadLetterInput) error
}
// PushEventEncoder encodes one push-capable notification route into a
@@ -109,6 +111,10 @@ type PushPublisherConfig struct {
// Clock provides wall-clock timestamps.
Clock Clock
// StreamPublisher emits the outbound Gateway client-event before the
// route's PostgreSQL state transition is committed.
StreamPublisher StreamPublisher
}
// PushPublisher publishes due push routes into the Gateway client-events
@@ -125,6 +131,7 @@ type PushPublisher struct {
encoder PushEventEncoder
telemetry RoutePublisherTelemetry
clock Clock
streamPublisher StreamPublisher
workerToken string
logger *slog.Logger
}
@@ -134,6 +141,8 @@ func NewPushPublisher(cfg PushPublisherConfig, logger *slog.Logger) (*PushPublis
switch {
case cfg.Store == nil:
return nil, errors.New("new push publisher: nil store")
case cfg.StreamPublisher == nil:
return nil, errors.New("new push publisher: nil stream publisher")
case strings.TrimSpace(cfg.GatewayStream) == "":
return nil, errors.New("new push publisher: gateway stream must not be empty")
case cfg.GatewayStreamMaxLen <= 0:
@@ -180,6 +189,7 @@ func NewPushPublisher(cfg PushPublisherConfig, logger *slog.Logger) (*PushPublis
encoder: cfg.Encoder,
telemetry: cfg.Telemetry,
clock: cfg.Clock,
streamPublisher: cfg.StreamPublisher,
workerToken: workerToken,
logger: logger.With("component", "push_publisher", "stream", cfg.GatewayStream),
}, nil
@@ -260,7 +270,7 @@ func (publisher *PushPublisher) publishDueRoutes(ctx context.Context) (bool, err
return progress, nil
}
func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute redisstate.ScheduledRoute) (bool, error) {
func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time, dueRoute routestate.ScheduledRoute) (bool, error) {
acquired, err := publisher.store.TryAcquireRouteLease(ctx, dueRoute.NotificationID, dueRoute.RouteID, publisher.workerToken, publisher.routeLeaseTTL)
if err != nil {
return false, fmt.Errorf("acquire route lease %q: %w", dueRoute.RouteID, err)
@@ -306,7 +316,19 @@ func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time,
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationPayloadEncoding, err.Error())
}
err = publisher.store.CompleteRoutePublished(ctx, redisstate.CompleteRoutePublishedInput{
xaddArgs := &redis.XAddArgs{
Stream: publisher.gatewayStream,
Values: eventValues(event),
}
if publisher.gatewayStreamMaxLen > 0 {
xaddArgs.MaxLen = publisher.gatewayStreamMaxLen
xaddArgs.Approx = true
}
if err := publisher.streamPublisher.XAdd(ctx, xaddArgs).Err(); err != nil {
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationGatewayStreamWrite, err.Error())
}
err = publisher.store.CompleteRoutePublished(ctx, routestate.CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
PublishedAt: publisher.now(),
@@ -335,7 +357,7 @@ func (publisher *PushPublisher) publishRoute(ctx context.Context, now time.Time,
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
publisher.logger.Info("push route published", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return publisher.recordFailure(ctx, notification, route, pushFailureClassificationGatewayStreamWrite, err.Error())
@@ -371,7 +393,7 @@ func (publisher *PushPublisher) recordFailure(
logArgs = append(logArgs, logging.TraceAttrsFromContext(ctx)...)
if attemptNumber >= route.MaxAttempts {
err := publisher.store.CompleteRouteDeadLetter(ctx, redisstate.CompleteRouteDeadLetterInput{
err := publisher.store.CompleteRouteDeadLetter(ctx, routestate.CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
DeadLetteredAt: failureAt,
@@ -384,7 +406,7 @@ func (publisher *PushPublisher) recordFailure(
publisher.recordRouteDeadLetter(ctx, notification, route, classification)
publisher.logger.Warn("push route dead-lettered", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("dead-letter route %q: %w", route.RouteID, err)
@@ -392,7 +414,7 @@ func (publisher *PushPublisher) recordFailure(
}
nextAttemptAt := failureAt.Add(routeBackoffDelay(attemptNumber, publisher.routeBackoffMin, publisher.routeBackoffMax)).UTC().Truncate(time.Millisecond)
err := publisher.store.CompleteRouteFailed(ctx, redisstate.CompleteRouteFailedInput{
err := publisher.store.CompleteRouteFailed(ctx, routestate.CompleteRouteFailedInput{
ExpectedRoute: route,
LeaseToken: publisher.workerToken,
FailedAt: failureAt,
@@ -407,7 +429,7 @@ func (publisher *PushPublisher) recordFailure(
logArgs = append(logArgs, "next_attempt_at", nextAttemptAt)
publisher.logger.Warn("push route failed and was rescheduled", logArgs...)
return true, nil
case errors.Is(err, redisstate.ErrConflict):
case errors.Is(err, routestate.ErrConflict):
return false, nil
default:
return false, fmt.Errorf("reschedule route %q: %w", route.RouteID, err)
@@ -1,318 +0,0 @@
package worker
import (
"context"
"io"
"log/slog"
"sync"
"testing"
"time"
redisstate "galaxy/notification/internal/adapters/redisstate"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"github.com/alicebob/miniredis/v2"
"github.com/redis/go-redis/v9"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestPushPublisherPublishesDuePushRouteAndLeavesEmailRoutePending(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
running := runPushPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
emailRoute, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "email:user:user-1")
require.NoError(t, err)
require.True(t, found)
require.Equal(t, acceptintent.RouteStatusPending, emailRoute.Status)
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.Equal(t, "user-1", messages[0].Values["user_id"])
require.Equal(t, "game.turn.ready", messages[0].Values["event_type"])
require.Equal(t, "1775121700000-0/push:user:user-1", messages[0].Values["event_id"])
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", ""))
}
func TestPushPublisherRetriesGatewayStreamPublicationFailures(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err())
running := runPushPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusFailed && route.AttemptCount == 1
}, time.Second, 10*time.Millisecond)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "retry", pushFailureClassificationGatewayStreamWrite))
require.True(t, fixture.telemetry.hasRouteRetry("push"))
require.NoError(t, fixture.client.Del(context.Background(), fixture.gatewayStream).Err())
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished && route.AttemptCount == 2
}, 2*time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "published", ""))
}
func TestPushPublisherDeadLettersExhaustedRoute(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
input := validPushAcceptanceInput(fixture.now)
for index := range input.Routes {
if input.Routes[index].RouteID == "push:user:user-1" {
input.Routes[index].AttemptCount = 2
input.Routes[index].MaxAttempts = 3
}
}
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), input))
require.NoError(t, fixture.client.Set(context.Background(), fixture.gatewayStream, "wrong-type", 0).Err())
running := runPushPublisher(t, fixture.publisher)
defer running.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusDeadLetter && route.AttemptCount == 3
}, time.Second, 10*time.Millisecond)
deadLetterPayload, err := fixture.client.Get(context.Background(), redisstate.Keyspace{}.DeadLetter("1775121700000-0", "push:user:user-1")).Bytes()
require.NoError(t, err)
deadLetter, err := redisstate.UnmarshalDeadLetter(deadLetterPayload)
require.NoError(t, err)
require.Equal(t, pushFailureClassificationGatewayStreamWrite, deadLetter.FailureClassification)
require.True(t, fixture.telemetry.hasRoutePublishAttempt("push", "dead_letter", pushFailureClassificationGatewayStreamWrite))
require.True(t, fixture.telemetry.hasRouteDeadLetter("push", pushFailureClassificationGatewayStreamWrite))
}
func TestPushPublisherLeasePreventsDuplicatePublicationAcrossReplicas(t *testing.T) {
t.Parallel()
fixture := newPushPublisherFixture(t)
require.NoError(t, fixture.store.CreateAcceptance(context.Background(), validPushAcceptanceInput(fixture.now)))
otherPublisher, err := NewPushPublisher(PushPublisherConfig{
Store: fixture.store,
GatewayStream: fixture.gatewayStream,
GatewayStreamMaxLen: 1024,
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Clock: newSteppingClock(fixture.now, time.Millisecond),
}, testWorkerLogger())
require.NoError(t, err)
first := runPushPublisher(t, fixture.publisher)
defer first.stop(t)
second := runPushPublisher(t, otherPublisher)
defer second.stop(t)
require.Eventually(t, func() bool {
route, found, err := fixture.store.GetRoute(context.Background(), "1775121700000-0", "push:user:user-1")
return err == nil && found && route.Status == acceptintent.RouteStatusPublished
}, time.Second, 10*time.Millisecond)
messages, err := fixture.client.XRange(context.Background(), fixture.gatewayStream, "-", "+").Result()
require.NoError(t, err)
require.Len(t, messages, 1)
}
type pushPublisherFixture struct {
client *redis.Client
store *redisstate.AcceptanceStore
publisher *PushPublisher
gatewayStream string
now time.Time
clock *steppingClock
telemetry *recordingWorkerTelemetry
}
func newPushPublisherFixture(t *testing.T) pushPublisherFixture {
t.Helper()
server := miniredis.RunT(t)
client := redis.NewClient(&redis.Options{
Addr: server.Addr(),
Protocol: 2,
DisableIdentity: true,
})
t.Cleanup(func() {
assert.NoError(t, client.Close())
})
store, err := redisstate.NewAcceptanceStore(client, redisstate.AcceptanceConfig{
RecordTTL: 24 * time.Hour,
DeadLetterTTL: 72 * time.Hour,
IdempotencyTTL: 7 * 24 * time.Hour,
})
require.NoError(t, err)
now := time.UnixMilli(1775121700000).UTC()
clock := newSteppingClock(now, time.Millisecond)
telemetry := &recordingWorkerTelemetry{}
publisher, err := NewPushPublisher(PushPublisherConfig{
Store: store,
GatewayStream: "gateway:client-events",
GatewayStreamMaxLen: 1024,
RouteLeaseTTL: 200 * time.Millisecond,
RouteBackoffMin: 20 * time.Millisecond,
RouteBackoffMax: 20 * time.Millisecond,
PollInterval: 10 * time.Millisecond,
BatchSize: 16,
Telemetry: telemetry,
Clock: clock,
}, testWorkerLogger())
require.NoError(t, err)
return pushPublisherFixture{
client: client,
store: store,
publisher: publisher,
gatewayStream: "gateway:client-events",
now: now,
clock: clock,
telemetry: telemetry,
}
}
func validPushAcceptanceInput(now time.Time) acceptintent.CreateAcceptanceInput {
return acceptintent.CreateAcceptanceInput{
Notification: acceptintent.NotificationRecord{
NotificationID: "1775121700000-0",
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"game_id":"game-123","game_name":"Nebula Clash","turn_number":54}`,
IdempotencyKey: "game-123:turn-54",
RequestFingerprint: "sha256:deadbeef",
RequestID: "request-1",
TraceID: "trace-1",
OccurredAt: now,
AcceptedAt: now,
UpdatedAt: now,
},
Routes: []acceptintent.NotificationRoute{
{
NotificationID: "1775121700000-0",
RouteID: "push:user:user-1",
Channel: intentstream.ChannelPush,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 3,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
{
NotificationID: "1775121700000-0",
RouteID: "email:user:user-1",
Channel: intentstream.ChannelEmail,
RecipientRef: "user:user-1",
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 7,
NextAttemptAt: now,
ResolvedEmail: "pilot@example.com",
ResolvedLocale: "en",
CreatedAt: now,
UpdatedAt: now,
},
},
Idempotency: acceptintent.IdempotencyRecord{
Producer: intentstream.ProducerGameMaster,
IdempotencyKey: "game-123:turn-54",
NotificationID: "1775121700000-0",
RequestFingerprint: "sha256:deadbeef",
CreatedAt: now,
ExpiresAt: now.Add(7 * 24 * time.Hour),
},
}
}
type runningPushPublisher struct {
cancel context.CancelFunc
resultCh chan error
}
func runPushPublisher(t *testing.T, publisher *PushPublisher) runningPushPublisher {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
resultCh := make(chan error, 1)
go func() {
resultCh <- publisher.Run(ctx)
}()
return runningPushPublisher{
cancel: cancel,
resultCh: resultCh,
}
}
func (r runningPushPublisher) stop(t *testing.T) {
t.Helper()
r.cancel()
select {
case err := <-r.resultCh:
require.ErrorIs(t, err, context.Canceled)
case <-time.After(time.Second):
require.FailNow(t, "push publisher did not stop")
}
}
type steppingClock struct {
mu sync.Mutex
current time.Time
step time.Duration
}
func newSteppingClock(start time.Time, step time.Duration) *steppingClock {
return &steppingClock{
current: start.UTC().Truncate(time.Millisecond),
step: step,
}
}
func (clock *steppingClock) Now() time.Time {
clock.mu.Lock()
defer clock.mu.Unlock()
now := clock.current
clock.current = clock.current.Add(clock.step).UTC().Truncate(time.Millisecond)
return now
}
func testWorkerLogger() *slog.Logger {
return slog.New(slog.NewTextHandler(io.Discard, nil))
}
@@ -0,0 +1,161 @@
package worker
import (
"context"
"errors"
"fmt"
"log/slog"
"time"
)
// SQLRetentionStore performs the durable DELETE statements applied by the
// retention worker. Implementations are typically the umbrella PostgreSQL
// notification store; the interface keeps the worker decoupled from the
// store package.
type SQLRetentionStore interface {
// DeleteRecordsOlderThan removes records rows whose accepted_at predates
// cutoff. Cascading FKs drop routes and dead_letters owned by the deleted
// rows.
DeleteRecordsOlderThan(ctx context.Context, cutoff time.Time) (int64, error)
// DeleteMalformedIntentsOlderThan removes malformed-intent rows whose
// recorded_at predates cutoff.
DeleteMalformedIntentsOlderThan(ctx context.Context, cutoff time.Time) (int64, error)
}
// SQLRetentionConfig stores the dependencies and policy used by
// SQLRetentionWorker.
type SQLRetentionConfig struct {
// Store applies the durable DELETE statements.
Store SQLRetentionStore
// RecordRetention bounds how long records (and their cascaded routes and
// dead_letters) survive after acceptance.
RecordRetention time.Duration
// MalformedIntentRetention bounds how long malformed-intent rows survive
// after recorded_at.
MalformedIntentRetention time.Duration
// CleanupInterval stores the wall-clock period between two retention
// passes.
CleanupInterval time.Duration
// Clock provides the wall-clock used to compute cutoff timestamps.
Clock Clock
}
// SQLRetentionWorker periodically deletes records and malformed-intent rows
// whose retention window has expired. The worker replaces the per-key
// Redis EXPIRE eviction that maintained TTLs on the previous Redis-backed
// notification keyspace.
type SQLRetentionWorker struct {
store SQLRetentionStore
recordRetention time.Duration
malformedIntentRetention time.Duration
cleanupInterval time.Duration
clock Clock
logger *slog.Logger
}
// NewSQLRetentionWorker constructs the periodic retention worker.
func NewSQLRetentionWorker(cfg SQLRetentionConfig, logger *slog.Logger) (*SQLRetentionWorker, error) {
switch {
case cfg.Store == nil:
return nil, errors.New("new sql retention worker: nil store")
case cfg.RecordRetention <= 0:
return nil, errors.New("new sql retention worker: non-positive record retention")
case cfg.MalformedIntentRetention <= 0:
return nil, errors.New("new sql retention worker: non-positive malformed intent retention")
case cfg.CleanupInterval <= 0:
return nil, errors.New("new sql retention worker: non-positive cleanup interval")
case cfg.Clock == nil:
return nil, errors.New("new sql retention worker: nil clock")
}
if logger == nil {
logger = slog.Default()
}
return &SQLRetentionWorker{
store: cfg.Store,
recordRetention: cfg.RecordRetention,
malformedIntentRetention: cfg.MalformedIntentRetention,
cleanupInterval: cfg.CleanupInterval,
clock: cfg.Clock,
logger: logger.With("component", "sql_retention_worker"),
}, nil
}
// Run starts the retention loop and blocks until ctx is canceled.
func (worker *SQLRetentionWorker) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run sql retention worker: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
if worker == nil {
return errors.New("run sql retention worker: nil worker")
}
worker.logger.Info("sql retention worker started",
"record_retention", worker.recordRetention.String(),
"malformed_intent_retention", worker.malformedIntentRetention.String(),
"cleanup_interval", worker.cleanupInterval.String(),
)
defer worker.logger.Info("sql retention worker stopped")
// First pass runs immediately so a freshly started service does not wait
// one full interval before evicting stale rows.
worker.runOnce(ctx)
ticker := time.NewTicker(worker.cleanupInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
worker.runOnce(ctx)
}
}
}
// Shutdown stops the retention worker within ctx.
func (worker *SQLRetentionWorker) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown sql retention worker: nil context")
}
return nil
}
func (worker *SQLRetentionWorker) runOnce(ctx context.Context) {
now := worker.clock.Now().UTC()
recordCutoff := now.Add(-worker.recordRetention)
if deleted, err := worker.store.DeleteRecordsOlderThan(ctx, recordCutoff); err != nil {
worker.logger.Warn("delete expired records failed",
"cutoff", recordCutoff,
"error", fmt.Sprintf("%v", err),
)
} else if deleted > 0 {
worker.logger.Info("expired records deleted",
"cutoff", recordCutoff,
"deleted", deleted,
)
}
malformedCutoff := now.Add(-worker.malformedIntentRetention)
if deleted, err := worker.store.DeleteMalformedIntentsOlderThan(ctx, malformedCutoff); err != nil {
worker.logger.Warn("delete expired malformed intents failed",
"cutoff", malformedCutoff,
"error", fmt.Sprintf("%v", err),
)
} else if deleted > 0 {
worker.logger.Info("expired malformed intents deleted",
"cutoff", malformedCutoff,
"deleted", deleted,
)
}
}
@@ -0,0 +1,18 @@
package worker
import (
"context"
"github.com/redis/go-redis/v9"
)
// StreamPublisher abstracts the subset of the Redis Streams API used by the
// route publishers to emit one outbound stream entry. The default
// implementation in production wiring is `*redis.Client`. Tests substitute
// an in-memory fake.
type StreamPublisher interface {
// XAdd appends one entry to the configured stream. Implementations must
// honour `args.MaxLen` plus `args.Approx == true` for approximate trimming
// when the caller sets them.
XAdd(ctx context.Context, args *redis.XAddArgs) *redis.StringCmd
}