feat: use postgres

This commit is contained in:
Ilia Denisov
2026-04-26 20:34:39 +02:00
committed by GitHub
parent 48b0056b49
commit fe829285a6
365 changed files with 29223 additions and 24049 deletions
@@ -0,0 +1,118 @@
package notificationstore
import (
"context"
"database/sql"
"errors"
"fmt"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
)
// Compile-time confirmation that *Store satisfies acceptintent.Store. The
// runtime wiring depends on this so the accept-intent service can consume
// the PostgreSQL adapter directly.
var _ acceptintent.Store = (*Store)(nil)
// CreateAcceptance writes one notification record together with its derived
// route slots inside one BEGIN … COMMIT transaction. Idempotency races
// surface as `acceptintent.ErrConflict`.
func (store *Store) CreateAcceptance(ctx context.Context, input acceptintent.CreateAcceptanceInput) error {
if store == nil {
return errors.New("create notification acceptance: nil store")
}
if ctx == nil {
return errors.New("create notification acceptance: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("create notification acceptance: %w", err)
}
return store.withTx(ctx, "create notification acceptance", func(ctx context.Context, tx *sql.Tx) error {
if err := insertRecord(ctx, tx, input.Notification, input.Idempotency.ExpiresAt); err != nil {
if isUniqueViolation(err) {
return acceptintent.ErrConflict
}
return fmt.Errorf("create notification acceptance: insert record: %w", err)
}
for index, route := range input.Routes {
if err := insertRoute(ctx, tx, route); err != nil {
return fmt.Errorf("create notification acceptance: insert route[%d]: %w", index, err)
}
}
return nil
})
}
// GetIdempotency loads one accepted idempotency reservation. Because the
// records row IS the idempotency reservation, the lookup keys on
// `(producer, idempotency_key)` and projects the relevant subset of the row
// into an IdempotencyRecord.
func (store *Store) GetIdempotency(ctx context.Context, producer intentstream.Producer, idempotencyKey string) (acceptintent.IdempotencyRecord, bool, error) {
if store == nil {
return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil store")
}
if ctx == nil {
return acceptintent.IdempotencyRecord{}, false, errors.New("get notification idempotency: nil context")
}
operationCtx, cancel, err := store.operationContext(ctx, "get notification idempotency")
if err != nil {
return acceptintent.IdempotencyRecord{}, false, err
}
defer cancel()
scanned, found, err := loadIdempotencyByKey(operationCtx, store.db, string(producer), idempotencyKey)
if err != nil {
return acceptintent.IdempotencyRecord{}, false, err
}
if !found {
return acceptintent.IdempotencyRecord{}, false, nil
}
return idempotencyRecordFromScanned(scanned), true, nil
}
// GetNotification loads one accepted notification by NotificationID.
func (store *Store) GetNotification(ctx context.Context, notificationID string) (acceptintent.NotificationRecord, bool, error) {
if store == nil {
return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil store")
}
if ctx == nil {
return acceptintent.NotificationRecord{}, false, errors.New("get notification record: nil context")
}
operationCtx, cancel, err := store.operationContext(ctx, "get notification record")
if err != nil {
return acceptintent.NotificationRecord{}, false, err
}
defer cancel()
scanned, found, err := loadRecord(operationCtx, store.db, notificationID)
if err != nil {
return acceptintent.NotificationRecord{}, false, err
}
if !found {
return acceptintent.NotificationRecord{}, false, nil
}
return scanned.Record, true, nil
}
// GetRoute loads one accepted notification route by `(notificationID,
// routeID)`. Required by the publisher worker contracts.
func (store *Store) GetRoute(ctx context.Context, notificationID string, routeID string) (acceptintent.NotificationRoute, bool, error) {
if store == nil {
return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil store")
}
if ctx == nil {
return acceptintent.NotificationRoute{}, false, errors.New("get notification route: nil context")
}
operationCtx, cancel, err := store.operationContext(ctx, "get notification route")
if err != nil {
return acceptintent.NotificationRoute{}, false, err
}
defer cancel()
return loadRoute(operationCtx, store.db, notificationID, routeID)
}
@@ -0,0 +1,65 @@
package notificationstore
import (
"encoding/json"
"fmt"
)
// marshalRecipientUserIDs returns the JSONB bytes for the
// `records.recipient_user_ids` column. A nil/empty slice round-trips as `[]`
// to keep the column NOT NULL across equality tests.
func marshalRecipientUserIDs(userIDs []string) ([]byte, error) {
if userIDs == nil {
userIDs = []string{}
}
payload, err := json.Marshal(userIDs)
if err != nil {
return nil, fmt.Errorf("marshal recipient user ids: %w", err)
}
return payload, nil
}
// unmarshalRecipientUserIDs decodes the JSONB recipient user-id list. nil
// payloads round-trip as a nil slice so the read path matches what the
// service layer accepts (`nil` and an empty `[]` are equivalent for
// audience_kind != user_set).
func unmarshalRecipientUserIDs(payload []byte) ([]string, error) {
if len(payload) == 0 {
return nil, nil
}
var userIDs []string
if err := json.Unmarshal(payload, &userIDs); err != nil {
return nil, fmt.Errorf("unmarshal recipient user ids: %w", err)
}
if len(userIDs) == 0 {
return nil, nil
}
return userIDs, nil
}
// marshalRawFields returns the JSONB bytes for the
// `malformed_intents.raw_fields` column. The map is serialised verbatim so
// future operator queries can match arbitrary keys.
func marshalRawFields(fields map[string]any) ([]byte, error) {
if fields == nil {
fields = map[string]any{}
}
payload, err := json.Marshal(fields)
if err != nil {
return nil, fmt.Errorf("marshal raw fields: %w", err)
}
return payload, nil
}
// unmarshalRawFields decodes the malformed_intents.raw_fields column into a
// non-nil map (empty {} when the column is null/empty).
func unmarshalRawFields(payload []byte) (map[string]any, error) {
out := map[string]any{}
if len(payload) == 0 {
return out, nil
}
if err := json.Unmarshal(payload, &out); err != nil {
return nil, fmt.Errorf("unmarshal raw fields: %w", err)
}
return out, nil
}
@@ -0,0 +1,61 @@
package notificationstore
import (
"context"
"database/sql"
"time"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
)
// deadLetterRow stores the column values written to one dead_letters row.
// Kept package-private because the public surface is the routestate
// CompleteRouteDeadLetterInput shape; this struct is only the on-disk
// projection.
type deadLetterRow struct {
NotificationID string
RouteID string
Channel string
RecipientRef string
FinalAttemptCount int
MaxAttempts int
FailureClassification string
FailureMessage string
RecoveryHint string
CreatedAt time.Time
}
// insertDeadLetter writes one dead-letter audit row inside an open
// transaction. The composite PRIMARY KEY guards against duplicate inserts
// for the same `(notification_id, route_id)` pair.
func insertDeadLetter(ctx context.Context, tx *sql.Tx, row deadLetterRow) error {
stmt := pgtable.DeadLetters.INSERT(
pgtable.DeadLetters.NotificationID,
pgtable.DeadLetters.RouteID,
pgtable.DeadLetters.Channel,
pgtable.DeadLetters.RecipientRef,
pgtable.DeadLetters.FinalAttemptCount,
pgtable.DeadLetters.MaxAttempts,
pgtable.DeadLetters.FailureClassification,
pgtable.DeadLetters.FailureMessage,
pgtable.DeadLetters.RecoveryHint,
pgtable.DeadLetters.CreatedAt,
).VALUES(
row.NotificationID,
row.RouteID,
row.Channel,
row.RecipientRef,
row.FinalAttemptCount,
row.MaxAttempts,
row.FailureClassification,
row.FailureMessage,
row.RecoveryHint,
row.CreatedAt.UTC(),
)
query, args := stmt.Sql()
if _, err := tx.ExecContext(ctx, query, args...); err != nil {
return err
}
return nil
}
@@ -0,0 +1,200 @@
package notificationstore
import (
"context"
"database/sql"
"net/url"
"os"
"sync"
"testing"
"time"
"galaxy/notification/internal/adapters/postgres/migrations"
"galaxy/postgres"
testcontainers "github.com/testcontainers/testcontainers-go"
tcpostgres "github.com/testcontainers/testcontainers-go/modules/postgres"
"github.com/testcontainers/testcontainers-go/wait"
)
const (
pkgPostgresImage = "postgres:16-alpine"
pkgSuperUser = "galaxy"
pkgSuperPassword = "galaxy"
pkgSuperDatabase = "galaxy_notification"
pkgServiceRole = "notificationservice"
pkgServicePassword = "notificationservice"
pkgServiceSchema = "notification"
pkgContainerStartup = 90 * time.Second
pkgOperationTimeout = 10 * time.Second
)
var (
pkgContainerOnce sync.Once
pkgContainerErr error
pkgContainerEnv *postgresEnv
)
type postgresEnv struct {
container *tcpostgres.PostgresContainer
dsn string
pool *sql.DB
}
func ensurePostgresEnv(t testing.TB) *postgresEnv {
t.Helper()
pkgContainerOnce.Do(func() {
pkgContainerEnv, pkgContainerErr = startPostgresEnv()
})
if pkgContainerErr != nil {
t.Skipf("postgres container start failed (Docker unavailable?): %v", pkgContainerErr)
}
return pkgContainerEnv
}
func startPostgresEnv() (*postgresEnv, error) {
ctx := context.Background()
container, err := tcpostgres.Run(ctx, pkgPostgresImage,
tcpostgres.WithDatabase(pkgSuperDatabase),
tcpostgres.WithUsername(pkgSuperUser),
tcpostgres.WithPassword(pkgSuperPassword),
testcontainers.WithWaitStrategy(
wait.ForLog("database system is ready to accept connections").
WithOccurrence(2).
WithStartupTimeout(pkgContainerStartup),
),
)
if err != nil {
return nil, err
}
baseDSN, err := container.ConnectionString(ctx, "sslmode=disable")
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := provisionRoleAndSchema(ctx, baseDSN); err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
scopedDSN, err := dsnForServiceRole(baseDSN)
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
cfg := postgres.DefaultConfig()
cfg.PrimaryDSN = scopedDSN
cfg.OperationTimeout = pkgOperationTimeout
pool, err := postgres.OpenPrimary(ctx, cfg)
if err != nil {
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := postgres.Ping(ctx, pool, pkgOperationTimeout); err != nil {
_ = pool.Close()
_ = testcontainers.TerminateContainer(container)
return nil, err
}
if err := postgres.RunMigrations(ctx, pool, migrations.FS(), "."); err != nil {
_ = pool.Close()
_ = testcontainers.TerminateContainer(container)
return nil, err
}
return &postgresEnv{
container: container,
dsn: scopedDSN,
pool: pool,
}, nil
}
func provisionRoleAndSchema(ctx context.Context, baseDSN string) error {
cfg := postgres.DefaultConfig()
cfg.PrimaryDSN = baseDSN
cfg.OperationTimeout = pkgOperationTimeout
db, err := postgres.OpenPrimary(ctx, cfg)
if err != nil {
return err
}
defer func() { _ = db.Close() }()
statements := []string{
`DO $$ BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'notificationservice') THEN
CREATE ROLE notificationservice LOGIN PASSWORD 'notificationservice';
END IF;
END $$;`,
`CREATE SCHEMA IF NOT EXISTS notification AUTHORIZATION notificationservice;`,
`GRANT USAGE ON SCHEMA notification TO notificationservice;`,
}
for _, statement := range statements {
if _, err := db.ExecContext(ctx, statement); err != nil {
return err
}
}
return nil
}
func dsnForServiceRole(baseDSN string) (string, error) {
parsed, err := url.Parse(baseDSN)
if err != nil {
return "", err
}
values := url.Values{}
values.Set("search_path", pkgServiceSchema)
values.Set("sslmode", "disable")
scoped := url.URL{
Scheme: parsed.Scheme,
User: url.UserPassword(pkgServiceRole, pkgServicePassword),
Host: parsed.Host,
Path: parsed.Path,
RawQuery: values.Encode(),
}
return scoped.String(), nil
}
// newTestStore returns a Store backed by the package-scoped pool. Every
// invocation truncates the notification-owned tables so individual tests
// start from a clean slate while sharing one container start.
func newTestStore(t *testing.T) *Store {
t.Helper()
env := ensurePostgresEnv(t)
truncateAll(t, env.pool)
store, err := New(Config{DB: env.pool, OperationTimeout: pkgOperationTimeout})
if err != nil {
t.Fatalf("new store: %v", err)
}
return store
}
func truncateAll(t *testing.T, db *sql.DB) {
t.Helper()
statement := `TRUNCATE TABLE
malformed_intents,
dead_letters,
routes,
records
RESTART IDENTITY CASCADE`
if _, err := db.ExecContext(context.Background(), statement); err != nil {
t.Fatalf("truncate tables: %v", err)
}
}
// TestMain runs first when `go test` enters the package. We drive it
// through a TestMain so the container started by the first test is shut
// down on the way out, even when individual tests panic.
func TestMain(m *testing.M) {
code := m.Run()
if pkgContainerEnv != nil {
if pkgContainerEnv.pool != nil {
_ = pkgContainerEnv.pool.Close()
}
if pkgContainerEnv.container != nil {
_ = testcontainers.TerminateContainer(pkgContainerEnv.container)
}
}
os.Exit(code)
}
@@ -0,0 +1,68 @@
package notificationstore
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"github.com/jackc/pgx/v5/pgconn"
)
// pgUniqueViolationCode identifies the SQLSTATE returned by PostgreSQL when
// a UNIQUE constraint is violated by INSERT or UPDATE.
const pgUniqueViolationCode = "23505"
// isUniqueViolation reports whether err is a PostgreSQL unique-violation,
// regardless of constraint name.
func isUniqueViolation(err error) bool {
var pgErr *pgconn.PgError
if !errors.As(err, &pgErr) {
return false
}
return pgErr.Code == pgUniqueViolationCode
}
// isNoRows reports whether err is sql.ErrNoRows.
func isNoRows(err error) bool {
return errors.Is(err, sql.ErrNoRows)
}
// nullableTime returns t.UTC() when non-zero, otherwise nil so the column
// is bound as SQL NULL. The notification domain uses zero-valued time.Time
// to express "absent" timestamps (no pointers), so the helper centralises
// the boundary translation.
func nullableTime(t time.Time) any {
if t.IsZero() {
return nil
}
return t.UTC()
}
// timeFromNullable copies an optional sql.NullTime read from PostgreSQL
// into a domain time.Time, applying the global UTC normalisation rule.
// Invalid (NULL) values become the zero time.Time.
func timeFromNullable(value sql.NullTime) time.Time {
if !value.Valid {
return time.Time{}
}
return value.Time.UTC()
}
// withTimeout derives a child context bounded by timeout and prefixes
// context errors with operation. Callers must always invoke the returned
// cancel.
func withTimeout(ctx context.Context, operation string, timeout time.Duration) (context.Context, context.CancelFunc, error) {
if ctx == nil {
return nil, nil, fmt.Errorf("%s: nil context", operation)
}
if err := ctx.Err(); err != nil {
return nil, nil, fmt.Errorf("%s: %w", operation, err)
}
if timeout <= 0 {
return nil, nil, fmt.Errorf("%s: operation timeout must be positive", operation)
}
bounded, cancel := context.WithTimeout(ctx, timeout)
return bounded, cancel, nil
}
@@ -0,0 +1,131 @@
package notificationstore
import (
"context"
"errors"
"fmt"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
"galaxy/notification/internal/service/malformedintent"
pg "github.com/go-jet/jet/v2/postgres"
)
// Record stores entry idempotently by stream entry id. The helper satisfies
// `worker.MalformedIntentRecorder`. Re-recording an entry with the same
// `stream_entry_id` is a silent no-op via `ON CONFLICT DO NOTHING`.
func (store *Store) Record(ctx context.Context, entry malformedintent.Entry) error {
if store == nil {
return errors.New("record malformed intent: nil store")
}
if ctx == nil {
return errors.New("record malformed intent: nil context")
}
if err := entry.Validate(); err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
rawFields, err := marshalRawFields(entry.RawFields)
if err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
operationCtx, cancel, err := store.operationContext(ctx, "record malformed intent")
if err != nil {
return err
}
defer cancel()
stmt := pgtable.MalformedIntents.INSERT(
pgtable.MalformedIntents.StreamEntryID,
pgtable.MalformedIntents.NotificationType,
pgtable.MalformedIntents.Producer,
pgtable.MalformedIntents.IdempotencyKey,
pgtable.MalformedIntents.FailureCode,
pgtable.MalformedIntents.FailureMessage,
pgtable.MalformedIntents.RawFields,
pgtable.MalformedIntents.RecordedAt,
).VALUES(
entry.StreamEntryID,
entry.NotificationType,
entry.Producer,
entry.IdempotencyKey,
string(entry.FailureCode),
entry.FailureMessage,
rawFields,
entry.RecordedAt.UTC(),
).ON_CONFLICT(pgtable.MalformedIntents.StreamEntryID).DO_NOTHING()
query, args := stmt.Sql()
if _, err := store.db.ExecContext(operationCtx, query, args...); err != nil {
return fmt.Errorf("record malformed intent: %w", err)
}
return nil
}
// GetMalformedIntent loads one malformed-intent entry by stream entry id.
// Returns found=false when no such row exists.
func (store *Store) GetMalformedIntent(ctx context.Context, streamEntryID string) (malformedintent.Entry, bool, error) {
if store == nil {
return malformedintent.Entry{}, false, errors.New("get malformed intent: nil store")
}
if ctx == nil {
return malformedintent.Entry{}, false, errors.New("get malformed intent: nil context")
}
operationCtx, cancel, err := store.operationContext(ctx, "get malformed intent")
if err != nil {
return malformedintent.Entry{}, false, err
}
defer cancel()
stmt := pg.SELECT(
pgtable.MalformedIntents.NotificationType,
pgtable.MalformedIntents.Producer,
pgtable.MalformedIntents.IdempotencyKey,
pgtable.MalformedIntents.FailureCode,
pgtable.MalformedIntents.FailureMessage,
pgtable.MalformedIntents.RawFields,
pgtable.MalformedIntents.RecordedAt,
).FROM(pgtable.MalformedIntents).
WHERE(pgtable.MalformedIntents.StreamEntryID.EQ(pg.String(streamEntryID)))
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
var (
notificationType string
producer string
idempotencyKey string
failureCode string
failureMessage string
rawFields []byte
)
entry := malformedintent.Entry{StreamEntryID: streamEntryID}
if err := row.Scan(
&notificationType,
&producer,
&idempotencyKey,
&failureCode,
&failureMessage,
&rawFields,
&entry.RecordedAt,
); err != nil {
if isNoRows(err) {
return malformedintent.Entry{}, false, nil
}
return malformedintent.Entry{}, false, fmt.Errorf("get malformed intent: %w", err)
}
entry.NotificationType = notificationType
entry.Producer = producer
entry.IdempotencyKey = idempotencyKey
entry.FailureCode = malformedintent.FailureCode(failureCode)
entry.FailureMessage = failureMessage
entry.RecordedAt = entry.RecordedAt.UTC()
fields, err := unmarshalRawFields(rawFields)
if err != nil {
return malformedintent.Entry{}, false, fmt.Errorf("get malformed intent: %w", err)
}
entry.RawFields = fields
return entry, true, nil
}
@@ -0,0 +1,223 @@
package notificationstore
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"galaxy/notification/internal/api/intentstream"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
"galaxy/notification/internal/service/acceptintent"
pg "github.com/go-jet/jet/v2/postgres"
)
// recordSelectColumns is the canonical SELECT list for the records table,
// matching scanRecord's column order.
var recordSelectColumns = pg.ColumnList{
pgtable.Records.NotificationID,
pgtable.Records.NotificationType,
pgtable.Records.Producer,
pgtable.Records.AudienceKind,
pgtable.Records.RecipientUserIds,
pgtable.Records.PayloadJSON,
pgtable.Records.IdempotencyKey,
pgtable.Records.RequestFingerprint,
pgtable.Records.RequestID,
pgtable.Records.TraceID,
pgtable.Records.OccurredAt,
pgtable.Records.AcceptedAt,
pgtable.Records.UpdatedAt,
pgtable.Records.IdempotencyExpiresAt,
}
// rowScanner abstracts *sql.Row and *sql.Rows so scanRecord/scanRoute can be
// shared across both single-row reads and iterated reads.
type rowScanner interface {
Scan(dest ...any) error
}
// scannedRecord stores the columns scanned from the records table plus the
// idempotency_expires_at value the service layer feeds back into the
// IdempotencyRecord constructed from the same row.
type scannedRecord struct {
Record acceptintent.NotificationRecord
IdempotencyExpiresAt time.Time
}
// scanRecord scans one records row from rs. Returns sql.ErrNoRows verbatim
// so callers can distinguish "no row" from a hard error.
func scanRecord(rs rowScanner) (scannedRecord, error) {
var (
notificationID string
notificationType string
producer string
audienceKind string
recipientUserIDs []byte
payloadJSON string
idempotencyKey string
requestFingerprint string
requestID string
traceID string
occurredAt time.Time
acceptedAt time.Time
updatedAt time.Time
idempotencyExpiresAt time.Time
)
if err := rs.Scan(
&notificationID,
&notificationType,
&producer,
&audienceKind,
&recipientUserIDs,
&payloadJSON,
&idempotencyKey,
&requestFingerprint,
&requestID,
&traceID,
&occurredAt,
&acceptedAt,
&updatedAt,
&idempotencyExpiresAt,
); err != nil {
return scannedRecord{}, err
}
users, err := unmarshalRecipientUserIDs(recipientUserIDs)
if err != nil {
return scannedRecord{}, err
}
return scannedRecord{
Record: acceptintent.NotificationRecord{
NotificationID: notificationID,
NotificationType: intentstream.NotificationType(notificationType),
Producer: intentstream.Producer(producer),
AudienceKind: intentstream.AudienceKind(audienceKind),
RecipientUserIDs: users,
PayloadJSON: payloadJSON,
IdempotencyKey: idempotencyKey,
RequestFingerprint: requestFingerprint,
RequestID: requestID,
TraceID: traceID,
OccurredAt: occurredAt.UTC(),
AcceptedAt: acceptedAt.UTC(),
UpdatedAt: updatedAt.UTC(),
},
IdempotencyExpiresAt: idempotencyExpiresAt.UTC(),
}, nil
}
// insertRecord writes one record row plus its idempotency expiry inside an
// open transaction. The (producer, idempotency_key) UNIQUE constraint is
// the idempotency reservation; the caller maps `isUniqueViolation` errors
// to `acceptintent.ErrConflict`.
func insertRecord(ctx context.Context, tx *sql.Tx, record acceptintent.NotificationRecord, idempotencyExpiresAt time.Time) error {
if err := record.Validate(); err != nil {
return fmt.Errorf("insert record: %w", err)
}
users, err := marshalRecipientUserIDs(record.RecipientUserIDs)
if err != nil {
return fmt.Errorf("insert record: %w", err)
}
stmt := pgtable.Records.INSERT(
pgtable.Records.NotificationID,
pgtable.Records.NotificationType,
pgtable.Records.Producer,
pgtable.Records.AudienceKind,
pgtable.Records.RecipientUserIds,
pgtable.Records.PayloadJSON,
pgtable.Records.IdempotencyKey,
pgtable.Records.RequestFingerprint,
pgtable.Records.RequestID,
pgtable.Records.TraceID,
pgtable.Records.OccurredAt,
pgtable.Records.AcceptedAt,
pgtable.Records.UpdatedAt,
pgtable.Records.IdempotencyExpiresAt,
).VALUES(
record.NotificationID,
string(record.NotificationType),
string(record.Producer),
string(record.AudienceKind),
users,
record.PayloadJSON,
record.IdempotencyKey,
record.RequestFingerprint,
record.RequestID,
record.TraceID,
record.OccurredAt.UTC(),
record.AcceptedAt.UTC(),
record.UpdatedAt.UTC(),
idempotencyExpiresAt.UTC(),
)
query, args := stmt.Sql()
if _, err := tx.ExecContext(ctx, query, args...); err != nil {
return err
}
return nil
}
// loadRecord returns the record row for notificationID using the store's
// default pool. found is false when no such row exists.
func loadRecord(ctx context.Context, db *sql.DB, notificationID string) (scannedRecord, bool, error) {
stmt := pg.SELECT(recordSelectColumns).
FROM(pgtable.Records).
WHERE(pgtable.Records.NotificationID.EQ(pg.String(notificationID)))
query, args := stmt.Sql()
row := db.QueryRowContext(ctx, query, args...)
scanned, err := scanRecord(row)
if isNoRows(err) {
return scannedRecord{}, false, nil
}
if err != nil {
return scannedRecord{}, false, fmt.Errorf("load notification record: %w", err)
}
return scanned, true, nil
}
// loadIdempotencyByKey returns the records row that owns one
// `(producer, idempotency_key)` reservation. found is false when no match.
func loadIdempotencyByKey(ctx context.Context, db *sql.DB, producer string, idempotencyKey string) (scannedRecord, bool, error) {
stmt := pg.SELECT(recordSelectColumns).
FROM(pgtable.Records).
WHERE(pg.AND(
pgtable.Records.Producer.EQ(pg.String(producer)),
pgtable.Records.IdempotencyKey.EQ(pg.String(idempotencyKey)),
))
query, args := stmt.Sql()
row := db.QueryRowContext(ctx, query, args...)
scanned, err := scanRecord(row)
if isNoRows(err) {
return scannedRecord{}, false, nil
}
if err != nil {
return scannedRecord{}, false, fmt.Errorf("load notification idempotency: %w", err)
}
return scanned, true, nil
}
// idempotencyRecordFromScanned constructs an IdempotencyRecord shape from
// the scanned record. CreatedAt mirrors AcceptedAt because the durable row
// is the idempotency reservation.
func idempotencyRecordFromScanned(scanned scannedRecord) acceptintent.IdempotencyRecord {
return acceptintent.IdempotencyRecord{
Producer: scanned.Record.Producer,
IdempotencyKey: scanned.Record.IdempotencyKey,
NotificationID: scanned.Record.NotificationID,
RequestFingerprint: scanned.Record.RequestFingerprint,
CreatedAt: scanned.Record.AcceptedAt,
ExpiresAt: scanned.IdempotencyExpiresAt,
}
}
// errRecordNotFound is the package-private sentinel returned by helpers
// when a row required by an in-progress transaction is not found.
var errRecordNotFound = errors.New("record not found")
@@ -0,0 +1,67 @@
package notificationstore
import (
"context"
"errors"
"fmt"
"time"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
pg "github.com/go-jet/jet/v2/postgres"
)
// DeleteRecordsOlderThan removes records rows whose `accepted_at` predates
// cutoff. The records FK CASCADE clears the dependent routes and
// dead_letters rows in the same statement.
func (store *Store) DeleteRecordsOlderThan(ctx context.Context, cutoff time.Time) (int64, error) {
if store == nil {
return 0, errors.New("delete notification records: nil store")
}
operationCtx, cancel, err := store.operationContext(ctx, "delete notification records")
if err != nil {
return 0, err
}
defer cancel()
stmt := pgtable.Records.DELETE().
WHERE(pgtable.Records.AcceptedAt.LT(pg.TimestampzT(cutoff.UTC())))
query, args := stmt.Sql()
result, err := store.db.ExecContext(operationCtx, query, args...)
if err != nil {
return 0, fmt.Errorf("delete notification records: %w", err)
}
rows, err := result.RowsAffected()
if err != nil {
return 0, fmt.Errorf("delete notification records: rows affected: %w", err)
}
return rows, nil
}
// DeleteMalformedIntentsOlderThan removes malformed-intent rows whose
// `recorded_at` predates cutoff.
func (store *Store) DeleteMalformedIntentsOlderThan(ctx context.Context, cutoff time.Time) (int64, error) {
if store == nil {
return 0, errors.New("delete malformed intents: nil store")
}
operationCtx, cancel, err := store.operationContext(ctx, "delete malformed intents")
if err != nil {
return 0, err
}
defer cancel()
stmt := pgtable.MalformedIntents.DELETE().
WHERE(pgtable.MalformedIntents.RecordedAt.LT(pg.TimestampzT(cutoff.UTC())))
query, args := stmt.Sql()
result, err := store.db.ExecContext(operationCtx, query, args...)
if err != nil {
return 0, fmt.Errorf("delete malformed intents: %w", err)
}
rows, err := result.RowsAffected()
if err != nil {
return 0, fmt.Errorf("delete malformed intents: rows affected: %w", err)
}
return rows, nil
}
@@ -0,0 +1,248 @@
package notificationstore
import (
"context"
"database/sql"
"fmt"
"time"
"galaxy/notification/internal/api/intentstream"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
"galaxy/notification/internal/service/acceptintent"
pg "github.com/go-jet/jet/v2/postgres"
)
// routeSelectColumns is the canonical SELECT list for the routes table,
// matching scanRoute's column order.
var routeSelectColumns = pg.ColumnList{
pgtable.Routes.NotificationID,
pgtable.Routes.RouteID,
pgtable.Routes.Channel,
pgtable.Routes.RecipientRef,
pgtable.Routes.Status,
pgtable.Routes.AttemptCount,
pgtable.Routes.MaxAttempts,
pgtable.Routes.NextAttemptAt,
pgtable.Routes.ResolvedEmail,
pgtable.Routes.ResolvedLocale,
pgtable.Routes.LastErrorClassification,
pgtable.Routes.LastErrorMessage,
pgtable.Routes.LastErrorAt,
pgtable.Routes.CreatedAt,
pgtable.Routes.UpdatedAt,
pgtable.Routes.PublishedAt,
pgtable.Routes.DeadLetteredAt,
pgtable.Routes.SkippedAt,
}
// scanRoute scans one routes row from rs.
func scanRoute(rs rowScanner) (acceptintent.NotificationRoute, error) {
var (
notificationID string
routeID string
channel string
recipientRef string
status string
attemptCount int
maxAttempts int
nextAttemptAt sql.NullTime
resolvedEmail string
resolvedLocale string
lastErrorClassification string
lastErrorMessage string
lastErrorAt sql.NullTime
createdAt time.Time
updatedAt time.Time
publishedAt sql.NullTime
deadLetteredAt sql.NullTime
skippedAt sql.NullTime
)
if err := rs.Scan(
&notificationID,
&routeID,
&channel,
&recipientRef,
&status,
&attemptCount,
&maxAttempts,
&nextAttemptAt,
&resolvedEmail,
&resolvedLocale,
&lastErrorClassification,
&lastErrorMessage,
&lastErrorAt,
&createdAt,
&updatedAt,
&publishedAt,
&deadLetteredAt,
&skippedAt,
); err != nil {
return acceptintent.NotificationRoute{}, err
}
return acceptintent.NotificationRoute{
NotificationID: notificationID,
RouteID: routeID,
Channel: intentstream.Channel(channel),
RecipientRef: recipientRef,
Status: acceptintent.RouteStatus(status),
AttemptCount: attemptCount,
MaxAttempts: maxAttempts,
NextAttemptAt: timeFromNullable(nextAttemptAt),
ResolvedEmail: resolvedEmail,
ResolvedLocale: resolvedLocale,
LastErrorClassification: lastErrorClassification,
LastErrorMessage: lastErrorMessage,
LastErrorAt: timeFromNullable(lastErrorAt),
CreatedAt: createdAt.UTC(),
UpdatedAt: updatedAt.UTC(),
PublishedAt: timeFromNullable(publishedAt),
DeadLetteredAt: timeFromNullable(deadLetteredAt),
SkippedAt: timeFromNullable(skippedAt),
}, nil
}
// insertRoute writes one route row inside an open transaction.
func insertRoute(ctx context.Context, tx *sql.Tx, route acceptintent.NotificationRoute) error {
if err := route.Validate(); err != nil {
return fmt.Errorf("insert route: %w", err)
}
stmt := pgtable.Routes.INSERT(
pgtable.Routes.NotificationID,
pgtable.Routes.RouteID,
pgtable.Routes.Channel,
pgtable.Routes.RecipientRef,
pgtable.Routes.Status,
pgtable.Routes.AttemptCount,
pgtable.Routes.MaxAttempts,
pgtable.Routes.NextAttemptAt,
pgtable.Routes.ResolvedEmail,
pgtable.Routes.ResolvedLocale,
pgtable.Routes.LastErrorClassification,
pgtable.Routes.LastErrorMessage,
pgtable.Routes.LastErrorAt,
pgtable.Routes.CreatedAt,
pgtable.Routes.UpdatedAt,
pgtable.Routes.PublishedAt,
pgtable.Routes.DeadLetteredAt,
pgtable.Routes.SkippedAt,
).VALUES(
route.NotificationID,
route.RouteID,
string(route.Channel),
route.RecipientRef,
string(route.Status),
route.AttemptCount,
route.MaxAttempts,
nullableTime(route.NextAttemptAt),
route.ResolvedEmail,
route.ResolvedLocale,
route.LastErrorClassification,
route.LastErrorMessage,
nullableTime(route.LastErrorAt),
route.CreatedAt.UTC(),
route.UpdatedAt.UTC(),
nullableTime(route.PublishedAt),
nullableTime(route.DeadLetteredAt),
nullableTime(route.SkippedAt),
)
query, args := stmt.Sql()
if _, err := tx.ExecContext(ctx, query, args...); err != nil {
return err
}
return nil
}
// loadRoute returns one route row by composite key. found is false when no
// matching row exists.
func loadRoute(ctx context.Context, db *sql.DB, notificationID string, routeID string) (acceptintent.NotificationRoute, bool, error) {
stmt := pg.SELECT(routeSelectColumns).
FROM(pgtable.Routes).
WHERE(pg.AND(
pgtable.Routes.NotificationID.EQ(pg.String(notificationID)),
pgtable.Routes.RouteID.EQ(pg.String(routeID)),
))
query, args := stmt.Sql()
row := db.QueryRowContext(ctx, query, args...)
route, err := scanRoute(row)
if isNoRows(err) {
return acceptintent.NotificationRoute{}, false, nil
}
if err != nil {
return acceptintent.NotificationRoute{}, false, fmt.Errorf("load notification route: %w", err)
}
return route, true, nil
}
// loadRouteTx returns one route row by composite key inside an open
// transaction.
func loadRouteTx(ctx context.Context, tx *sql.Tx, notificationID string, routeID string) (acceptintent.NotificationRoute, bool, error) {
stmt := pg.SELECT(routeSelectColumns).
FROM(pgtable.Routes).
WHERE(pg.AND(
pgtable.Routes.NotificationID.EQ(pg.String(notificationID)),
pgtable.Routes.RouteID.EQ(pg.String(routeID)),
))
query, args := stmt.Sql()
row := tx.QueryRowContext(ctx, query, args...)
route, err := scanRoute(row)
if isNoRows(err) {
return acceptintent.NotificationRoute{}, false, nil
}
if err != nil {
return acceptintent.NotificationRoute{}, false, fmt.Errorf("load notification route: %w", err)
}
return route, true, nil
}
// updateRouteIfMatching writes the route columns back inside an open
// transaction, gated on `updated_at = expectedUpdatedAt`. Returns the
// number of rows actually updated; zero indicates an optimistic-concurrency
// loss.
func updateRouteIfMatching(ctx context.Context, tx *sql.Tx, route acceptintent.NotificationRoute, expectedUpdatedAt time.Time) (int64, error) {
stmt := pgtable.Routes.UPDATE(
pgtable.Routes.Status,
pgtable.Routes.AttemptCount,
pgtable.Routes.NextAttemptAt,
pgtable.Routes.ResolvedEmail,
pgtable.Routes.ResolvedLocale,
pgtable.Routes.LastErrorClassification,
pgtable.Routes.LastErrorMessage,
pgtable.Routes.LastErrorAt,
pgtable.Routes.UpdatedAt,
pgtable.Routes.PublishedAt,
pgtable.Routes.DeadLetteredAt,
pgtable.Routes.SkippedAt,
).SET(
string(route.Status),
route.AttemptCount,
nullableTime(route.NextAttemptAt),
route.ResolvedEmail,
route.ResolvedLocale,
route.LastErrorClassification,
route.LastErrorMessage,
nullableTime(route.LastErrorAt),
route.UpdatedAt.UTC(),
nullableTime(route.PublishedAt),
nullableTime(route.DeadLetteredAt),
nullableTime(route.SkippedAt),
).WHERE(pg.AND(
pgtable.Routes.NotificationID.EQ(pg.String(route.NotificationID)),
pgtable.Routes.RouteID.EQ(pg.String(route.RouteID)),
pgtable.Routes.UpdatedAt.EQ(pg.TimestampzT(expectedUpdatedAt.UTC())),
))
query, args := stmt.Sql()
result, err := tx.ExecContext(ctx, query, args...)
if err != nil {
return 0, err
}
rows, err := result.RowsAffected()
if err != nil {
return 0, err
}
return rows, nil
}
@@ -0,0 +1,262 @@
package notificationstore
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
pgtable "galaxy/notification/internal/adapters/postgres/jet/notification/table"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/routestate"
"galaxy/notification/internal/telemetry"
pg "github.com/go-jet/jet/v2/postgres"
)
// scheduledRouteKey synthesises a stable, human-readable key for one
// ScheduledRoute. Notification publishers do not interpret the key beyond
// requiring it to be non-empty (`ScheduledRoute.Validate`).
func scheduledRouteKey(notificationID string, routeID string) string {
return notificationID + "/" + routeID
}
// ListDueRoutes returns up to limit routes whose `next_attempt_at` is at or
// before now. The query is non-locking; per-row contention is resolved by
// the lease (Redis) plus the optimistic-concurrency check inside `Complete*`.
func (store *Store) ListDueRoutes(ctx context.Context, now time.Time, limit int64) ([]routestate.ScheduledRoute, error) {
if store == nil {
return nil, errors.New("list due routes: nil store")
}
if ctx == nil {
return nil, errors.New("list due routes: nil context")
}
if err := routestate.ValidateUTCMillisecondTimestamp("list due routes now", now); err != nil {
return nil, err
}
if limit <= 0 {
return nil, errors.New("list due routes: limit must be positive")
}
operationCtx, cancel, err := store.operationContext(ctx, "list due routes")
if err != nil {
return nil, err
}
defer cancel()
stmt := pg.SELECT(pgtable.Routes.NotificationID, pgtable.Routes.RouteID).
FROM(pgtable.Routes).
WHERE(pg.AND(
pgtable.Routes.NextAttemptAt.IS_NOT_NULL(),
pgtable.Routes.NextAttemptAt.LT_EQ(pg.TimestampzT(now.UTC())),
)).
ORDER_BY(pgtable.Routes.NextAttemptAt.ASC()).
LIMIT(limit)
query, args := stmt.Sql()
rows, err := store.db.QueryContext(operationCtx, query, args...)
if err != nil {
return nil, fmt.Errorf("list due routes: %w", err)
}
defer rows.Close()
out := make([]routestate.ScheduledRoute, 0, limit)
for rows.Next() {
var (
notificationID string
routeID string
)
if err := rows.Scan(&notificationID, &routeID); err != nil {
return nil, fmt.Errorf("list due routes: scan: %w", err)
}
out = append(out, routestate.ScheduledRoute{
RouteKey: scheduledRouteKey(notificationID, routeID),
NotificationID: notificationID,
RouteID: routeID,
})
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("list due routes: %w", err)
}
return out, nil
}
// ReadRouteScheduleSnapshot returns the current depth of the route schedule
// (rows with non-NULL `next_attempt_at`) together with the oldest scheduled
// timestamp when one exists. The runtime exposes this through the telemetry
// snapshot reader.
func (store *Store) ReadRouteScheduleSnapshot(ctx context.Context) (telemetry.RouteScheduleSnapshot, error) {
if store == nil {
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil store")
}
if ctx == nil {
return telemetry.RouteScheduleSnapshot{}, errors.New("read route schedule snapshot: nil context")
}
operationCtx, cancel, err := store.operationContext(ctx, "read route schedule snapshot")
if err != nil {
return telemetry.RouteScheduleSnapshot{}, err
}
defer cancel()
stmt := pg.SELECT(
pg.COUNT(pg.STAR),
pg.MIN(pgtable.Routes.NextAttemptAt),
).
FROM(pgtable.Routes).
WHERE(pgtable.Routes.NextAttemptAt.IS_NOT_NULL())
query, args := stmt.Sql()
row := store.db.QueryRowContext(operationCtx, query, args...)
var (
depth int64
oldest sql.NullTime
summary telemetry.RouteScheduleSnapshot
)
if err := row.Scan(&depth, &oldest); err != nil {
return telemetry.RouteScheduleSnapshot{}, fmt.Errorf("read route schedule snapshot: %w", err)
}
summary.Depth = depth
if oldest.Valid {
oldestUTC := oldest.Time.UTC()
summary.OldestScheduledFor = &oldestUTC
}
return summary, nil
}
// CompleteRoutePublished marks the expected route as `published`,
// increments attempt_count, and clears retry/error fields. Optimistic
// concurrency on `updated_at` rejects races that happened since the
// publisher loaded the row; a mismatch surfaces as `routestate.ErrConflict`.
//
// Note: the outbound stream emission (XADD) happens in the publisher
// before this call. The store deliberately ignores the input.Stream and
// input.StreamValues fields — they are kept on the input only so the
// publisher can pass one struct around through its state machine.
func (store *Store) CompleteRoutePublished(ctx context.Context, input routestate.CompleteRoutePublishedInput) error {
if store == nil {
return errors.New("complete route published: nil store")
}
if ctx == nil {
return errors.New("complete route published: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route published: %w", err)
}
updated := input.ExpectedRoute
updated.Status = acceptintent.RouteStatusPublished
updated.AttemptCount++
updated.NextAttemptAt = time.Time{}
updated.LastErrorClassification = ""
updated.LastErrorMessage = ""
updated.LastErrorAt = time.Time{}
updated.UpdatedAt = input.PublishedAt
updated.PublishedAt = input.PublishedAt
updated.DeadLetteredAt = time.Time{}
return store.withTx(ctx, "complete route published", func(ctx context.Context, tx *sql.Tx) error {
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
if err != nil {
return fmt.Errorf("complete route published: %w", err)
}
if rows == 0 {
return routestate.ErrConflict
}
return nil
})
}
// CompleteRouteFailed records one retryable publication failure: increments
// attempt_count, populates the last-error fields, and reschedules the row
// at `NextAttemptAt`.
func (store *Store) CompleteRouteFailed(ctx context.Context, input routestate.CompleteRouteFailedInput) error {
if store == nil {
return errors.New("complete route failed: nil store")
}
if ctx == nil {
return errors.New("complete route failed: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route failed: %w", err)
}
updated := input.ExpectedRoute
updated.Status = acceptintent.RouteStatusFailed
updated.AttemptCount++
updated.NextAttemptAt = input.NextAttemptAt
updated.LastErrorClassification = input.FailureClassification
updated.LastErrorMessage = input.FailureMessage
updated.LastErrorAt = input.FailedAt
updated.UpdatedAt = input.FailedAt
return store.withTx(ctx, "complete route failed", func(ctx context.Context, tx *sql.Tx) error {
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
if err != nil {
return fmt.Errorf("complete route failed: %w", err)
}
if rows == 0 {
return routestate.ErrConflict
}
return nil
})
}
// CompleteRouteDeadLetter records one terminal publication failure:
// marks the route `dead_letter`, clears the schedule, and inserts the
// dead-letter audit row.
func (store *Store) CompleteRouteDeadLetter(ctx context.Context, input routestate.CompleteRouteDeadLetterInput) error {
if store == nil {
return errors.New("complete route dead letter: nil store")
}
if ctx == nil {
return errors.New("complete route dead letter: nil context")
}
if err := input.Validate(); err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
updated := input.ExpectedRoute
updated.Status = acceptintent.RouteStatusDeadLetter
updated.AttemptCount++
updated.NextAttemptAt = time.Time{}
updated.LastErrorClassification = input.FailureClassification
updated.LastErrorMessage = input.FailureMessage
updated.LastErrorAt = input.DeadLetteredAt
updated.UpdatedAt = input.DeadLetteredAt
updated.DeadLetteredAt = input.DeadLetteredAt
if updated.AttemptCount < updated.MaxAttempts {
return fmt.Errorf(
"complete route dead letter: final attempt count %d is below max attempts %d",
updated.AttemptCount,
updated.MaxAttempts,
)
}
return store.withTx(ctx, "complete route dead letter", func(ctx context.Context, tx *sql.Tx) error {
rows, err := updateRouteIfMatching(ctx, tx, updated, input.ExpectedRoute.UpdatedAt)
if err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
if rows == 0 {
return routestate.ErrConflict
}
if err := insertDeadLetter(ctx, tx, deadLetterRow{
NotificationID: updated.NotificationID,
RouteID: updated.RouteID,
Channel: string(updated.Channel),
RecipientRef: updated.RecipientRef,
FinalAttemptCount: updated.AttemptCount,
MaxAttempts: updated.MaxAttempts,
FailureClassification: input.FailureClassification,
FailureMessage: input.FailureMessage,
RecoveryHint: input.RecoveryHint,
CreatedAt: input.DeadLetteredAt,
}); err != nil {
return fmt.Errorf("complete route dead letter: %w", err)
}
return nil
})
}
@@ -0,0 +1,126 @@
// Package notificationstore implements the PostgreSQL-backed source-of-truth
// persistence used by Notification Service.
//
// The package owns the on-disk shape of the `notification` schema (defined
// in `galaxy/notification/internal/adapters/postgres/migrations`) and
// translates the schema-agnostic Store interfaces declared by the
// `internal/service/acceptintent` use case and the route publishers into
// concrete `database/sql` operations driven by the pgx driver. Atomic
// composite operations (acceptance, route-completion transitions) execute
// inside explicit `BEGIN … COMMIT` transactions; per-row lifecycle
// transitions use optimistic concurrency on the `updated_at` token rather
// than retaining a `FOR UPDATE` lock across the publisher's outbound stream
// emission.
//
// Stage 5 of `PG_PLAN.md` migrates Notification Service away from
// Redis-backed durable state. The inbound `notification:intents` Redis
// Stream and its consumer offset, the outbound `gateway:client-events` and
// `mail:delivery_commands` Redis Streams, and the short-lived
// `route_leases:*` exclusivity hint all remain on Redis; this store is no
// longer aware of any of them.
package notificationstore
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
)
// Config configures one PostgreSQL-backed notification store instance. The
// store does not own the underlying *sql.DB lifecycle: the caller (typically
// the service runtime) opens, instruments, migrates, and closes the pool.
// The store only borrows the pool and bounds individual round trips with
// OperationTimeout.
type Config struct {
// DB stores the connection pool the store uses for every query.
DB *sql.DB
// OperationTimeout bounds one round trip. The store creates a derived
// context for each operation so callers cannot starve the pool with an
// unbounded ctx. Multi-statement transactions inherit this bound for the
// whole BEGIN … COMMIT span.
OperationTimeout time.Duration
}
// Store persists Notification Service durable state in PostgreSQL and
// exposes the per-use-case Store interfaces required by acceptance,
// publication completion, malformed-intent recording, and the periodic
// retention worker.
type Store struct {
db *sql.DB
operationTimeout time.Duration
}
// New constructs one PostgreSQL-backed notification store from cfg.
func New(cfg Config) (*Store, error) {
if cfg.DB == nil {
return nil, errors.New("new postgres notification store: db must not be nil")
}
if cfg.OperationTimeout <= 0 {
return nil, errors.New("new postgres notification store: operation timeout must be positive")
}
return &Store{
db: cfg.DB,
operationTimeout: cfg.OperationTimeout,
}, nil
}
// Close is a no-op for the PostgreSQL-backed store: the connection pool is
// owned by the caller (the runtime) and closed once the runtime shuts down.
// The accessor remains so the runtime wiring can treat the store like the
// previous Redis-backed implementation.
func (store *Store) Close() error {
return nil
}
// Ping verifies that the configured PostgreSQL backend is reachable. It
// runs `db.PingContext` under the configured operation timeout.
func (store *Store) Ping(ctx context.Context) error {
operationCtx, cancel, err := withTimeout(ctx, "ping postgres notification store", store.operationTimeout)
if err != nil {
return err
}
defer cancel()
if err := store.db.PingContext(operationCtx); err != nil {
return fmt.Errorf("ping postgres notification store: %w", err)
}
return nil
}
// withTx runs fn inside a BEGIN … COMMIT transaction bounded by the store's
// operation timeout. It rolls back on any error or panic and returns
// whatever fn returned. The transaction uses the default isolation level
// (`READ COMMITTED`); per-row contention is resolved through optimistic
// concurrency on `updated_at` rather than `SELECT … FOR UPDATE`.
func (store *Store) withTx(ctx context.Context, operation string, fn func(ctx context.Context, tx *sql.Tx) error) error {
operationCtx, cancel, err := withTimeout(ctx, operation, store.operationTimeout)
if err != nil {
return err
}
defer cancel()
tx, err := store.db.BeginTx(operationCtx, nil)
if err != nil {
return fmt.Errorf("%s: begin: %w", operation, err)
}
if err := fn(operationCtx, tx); err != nil {
_ = tx.Rollback()
return err
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("%s: commit: %w", operation, err)
}
return nil
}
// operationContext bounds one read or write that does not need a
// transaction envelope (single statement). It mirrors store.withTx for
// non-transactional callers.
func (store *Store) operationContext(ctx context.Context, operation string) (context.Context, context.CancelFunc, error) {
return withTimeout(ctx, operation, store.operationTimeout)
}
@@ -0,0 +1,567 @@
package notificationstore
import (
"context"
"errors"
"testing"
"time"
"galaxy/notification/internal/api/intentstream"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/service/malformedintent"
"galaxy/notification/internal/service/routestate"
)
func TestPing(t *testing.T) {
store := newTestStore(t)
if err := store.Ping(context.Background()); err != nil {
t.Fatalf("ping: %v", err)
}
}
func TestCreateAcceptanceAndReads(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
pushRoute := newPendingRoute(notification.NotificationID, "push:user-1", intentstream.ChannelPush, "user-1", now)
emailRoute := newPendingRoute(notification.NotificationID, "email:user-1", intentstream.ChannelEmail, "user-1", now)
idem := newIdempotency(notification, now)
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{pushRoute, emailRoute},
Idempotency: idem,
}); err != nil {
t.Fatalf("create acceptance: %v", err)
}
gotNotification, found, err := store.GetNotification(ctx, notification.NotificationID)
if err != nil || !found {
t.Fatalf("get notification: found=%v err=%v", found, err)
}
if gotNotification.PayloadJSON != notification.PayloadJSON {
t.Fatalf("notification payload mismatch: got %q want %q", gotNotification.PayloadJSON, notification.PayloadJSON)
}
if len(gotNotification.RecipientUserIDs) != 1 || gotNotification.RecipientUserIDs[0] != "user-1" {
t.Fatalf("recipient_user_ids round-trip: %#v", gotNotification.RecipientUserIDs)
}
gotIdem, found, err := store.GetIdempotency(ctx, notification.Producer, notification.IdempotencyKey)
if err != nil || !found {
t.Fatalf("get idempotency: found=%v err=%v", found, err)
}
if gotIdem.NotificationID != notification.NotificationID {
t.Fatalf("idempotency notification id mismatch: got %q want %q", gotIdem.NotificationID, notification.NotificationID)
}
if !gotIdem.ExpiresAt.Equal(idem.ExpiresAt) {
t.Fatalf("idempotency expires_at mismatch: got %v want %v", gotIdem.ExpiresAt, idem.ExpiresAt)
}
gotRoute, found, err := store.GetRoute(ctx, notification.NotificationID, pushRoute.RouteID)
if err != nil || !found {
t.Fatalf("get push route: found=%v err=%v", found, err)
}
if gotRoute.Channel != intentstream.ChannelPush {
t.Fatalf("push route channel mismatch: got %q", gotRoute.Channel)
}
if !gotRoute.NextAttemptAt.Equal(pushRoute.NextAttemptAt) {
t.Fatalf("push route next_attempt_at mismatch: got %v want %v", gotRoute.NextAttemptAt, pushRoute.NextAttemptAt)
}
}
func TestCreateAcceptanceIdempotencyConflict(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
route := newPendingRoute(notification.NotificationID, "push:user-1", intentstream.ChannelPush, "user-1", now)
first := acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, now),
}
if err := store.CreateAcceptance(ctx, first); err != nil {
t.Fatalf("first acceptance: %v", err)
}
clone := notification
clone.NotificationID = "n-2"
cloneRoute := route
cloneRoute.NotificationID = clone.NotificationID
clone.AcceptedAt = now.Add(time.Second)
clone.UpdatedAt = clone.AcceptedAt
cloneIdem := newIdempotency(clone, now.Add(time.Second))
cloneIdem.IdempotencyKey = notification.IdempotencyKey
err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: clone,
Routes: []acceptintent.NotificationRoute{cloneRoute},
Idempotency: cloneIdem,
})
if !errors.Is(err, acceptintent.ErrConflict) {
t.Fatalf("expected acceptintent.ErrConflict, got %v", err)
}
}
func TestListDueRoutes(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
base := time.Now().UTC().Truncate(time.Millisecond)
pastNotification := newNotification(t, "past", base)
pastRoute := newPendingRoute(pastNotification.NotificationID, "push:past", intentstream.ChannelPush, "user-1", base.Add(-time.Minute))
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: pastNotification,
Routes: []acceptintent.NotificationRoute{pastRoute},
Idempotency: newIdempotency(pastNotification, base),
}); err != nil {
t.Fatalf("acceptance past: %v", err)
}
futureNotification := newNotification(t, "future", base)
futureNotification.IdempotencyKey = "key-future"
futureRoute := newPendingRoute(futureNotification.NotificationID, "push:future", intentstream.ChannelPush, "user-2", base.Add(time.Hour))
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: futureNotification,
Routes: []acceptintent.NotificationRoute{futureRoute},
Idempotency: newIdempotency(futureNotification, base),
}); err != nil {
t.Fatalf("acceptance future: %v", err)
}
due, err := store.ListDueRoutes(ctx, base, 10)
if err != nil {
t.Fatalf("list due routes: %v", err)
}
if len(due) != 1 {
t.Fatalf("expected one due route, got %d", len(due))
}
if due[0].NotificationID != "past" || due[0].RouteID != "push:past" {
t.Fatalf("unexpected due route: %#v", due[0])
}
}
func TestCompleteRoutePublishedHappyPath(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
route := newPendingRoute(notification.NotificationID, "email:user-1", intentstream.ChannelEmail, "user-1", now)
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, now),
}); err != nil {
t.Fatalf("acceptance: %v", err)
}
publishedAt := now.Add(time.Second)
err := store.CompleteRoutePublished(ctx, routestate.CompleteRoutePublishedInput{
ExpectedRoute: route,
LeaseToken: "token",
PublishedAt: publishedAt,
Stream: "mail:delivery_commands",
StreamValues: map[string]any{"k": "v"},
})
if err != nil {
t.Fatalf("complete published: %v", err)
}
got, _, err := store.GetRoute(ctx, route.NotificationID, route.RouteID)
if err != nil {
t.Fatalf("get route: %v", err)
}
if got.Status != acceptintent.RouteStatusPublished {
t.Fatalf("expected status published, got %q", got.Status)
}
if got.AttemptCount != 1 {
t.Fatalf("expected attempt_count 1, got %d", got.AttemptCount)
}
if !got.NextAttemptAt.IsZero() {
t.Fatalf("expected next_attempt_at cleared, got %v", got.NextAttemptAt)
}
if !got.PublishedAt.Equal(publishedAt) {
t.Fatalf("expected published_at %v, got %v", publishedAt, got.PublishedAt)
}
}
func TestCompleteRoutePublishedConflictOnUpdatedAtMismatch(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
route := newPendingRoute(notification.NotificationID, "email:user-1", intentstream.ChannelEmail, "user-1", now)
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, now),
}); err != nil {
t.Fatalf("acceptance: %v", err)
}
stale := route
stale.UpdatedAt = now.Add(-time.Minute) // mismatch on purpose
err := store.CompleteRoutePublished(ctx, routestate.CompleteRoutePublishedInput{
ExpectedRoute: stale,
LeaseToken: "token",
PublishedAt: now.Add(time.Second),
Stream: "mail:delivery_commands",
StreamValues: map[string]any{"k": "v"},
})
if !errors.Is(err, routestate.ErrConflict) {
t.Fatalf("expected routestate.ErrConflict, got %v", err)
}
}
func TestCompleteRouteFailedReschedule(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
route := newPendingRoute(notification.NotificationID, "email:user-1", intentstream.ChannelEmail, "user-1", now)
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, now),
}); err != nil {
t.Fatalf("acceptance: %v", err)
}
failedAt := now.Add(time.Second)
nextAttemptAt := now.Add(2 * time.Minute)
err := store.CompleteRouteFailed(ctx, routestate.CompleteRouteFailedInput{
ExpectedRoute: route,
LeaseToken: "token",
FailedAt: failedAt,
NextAttemptAt: nextAttemptAt,
FailureClassification: "smtp_temporary_failure",
FailureMessage: "graylisted",
})
if err != nil {
t.Fatalf("complete failed: %v", err)
}
got, _, err := store.GetRoute(ctx, route.NotificationID, route.RouteID)
if err != nil {
t.Fatalf("get route: %v", err)
}
if got.Status != acceptintent.RouteStatusFailed {
t.Fatalf("expected status failed, got %q", got.Status)
}
if got.AttemptCount != 1 {
t.Fatalf("expected attempt_count 1, got %d", got.AttemptCount)
}
if !got.NextAttemptAt.Equal(nextAttemptAt) {
t.Fatalf("expected next_attempt_at %v, got %v", nextAttemptAt, got.NextAttemptAt)
}
if got.LastErrorClassification != "smtp_temporary_failure" {
t.Fatalf("expected error classification, got %q", got.LastErrorClassification)
}
}
func TestCompleteRouteDeadLetter(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
notification := newNotification(t, "n-1", now)
route := newPendingRoute(notification.NotificationID, "email:user-1", intentstream.ChannelEmail, "user-1", now)
route.MaxAttempts = 1 // single attempt budget so the first failure is terminal.
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, now),
}); err != nil {
t.Fatalf("acceptance: %v", err)
}
deadAt := now.Add(time.Second)
err := store.CompleteRouteDeadLetter(ctx, routestate.CompleteRouteDeadLetterInput{
ExpectedRoute: route,
LeaseToken: "token",
DeadLetteredAt: deadAt,
FailureClassification: "smtp_permanent_failure",
FailureMessage: "rejected",
RecoveryHint: "manual review",
})
if err != nil {
t.Fatalf("complete dead letter: %v", err)
}
got, _, err := store.GetRoute(ctx, route.NotificationID, route.RouteID)
if err != nil {
t.Fatalf("get route: %v", err)
}
if got.Status != acceptintent.RouteStatusDeadLetter {
t.Fatalf("expected status dead_letter, got %q", got.Status)
}
if !got.DeadLetteredAt.Equal(deadAt) {
t.Fatalf("expected dead_lettered_at %v, got %v", deadAt, got.DeadLetteredAt)
}
// Check that the dead_letters audit row was inserted.
row := store.db.QueryRow(`SELECT failure_classification, recovery_hint FROM dead_letters WHERE notification_id = $1 AND route_id = $2`,
route.NotificationID, route.RouteID)
var classification string
var hint string
if err := row.Scan(&classification, &hint); err != nil {
t.Fatalf("scan dead_letter row: %v", err)
}
if classification != "smtp_permanent_failure" || hint != "manual review" {
t.Fatalf("dead_letter row mismatch: classification=%q hint=%q", classification, hint)
}
}
func TestReadRouteScheduleSnapshot(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
base := time.Now().UTC().Truncate(time.Millisecond)
for index, offset := range []time.Duration{-time.Hour, time.Minute, 2 * time.Minute} {
notification := newNotification(t, idString("n-", index), base)
notification.IdempotencyKey = idString("key-", index)
route := newPendingRoute(notification.NotificationID, idString("push:user-", index), intentstream.ChannelPush, idString("user-", index), base.Add(offset))
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: notification,
Routes: []acceptintent.NotificationRoute{route},
Idempotency: newIdempotency(notification, base),
}); err != nil {
t.Fatalf("acceptance %d: %v", index, err)
}
}
snap, err := store.ReadRouteScheduleSnapshot(ctx)
if err != nil {
t.Fatalf("read snapshot: %v", err)
}
if snap.Depth != 3 {
t.Fatalf("expected depth 3, got %d", snap.Depth)
}
if snap.OldestScheduledFor == nil {
t.Fatalf("expected oldest scheduled time, got nil")
}
if !snap.OldestScheduledFor.Equal(base.Add(-time.Hour)) {
t.Fatalf("expected oldest %v, got %v", base.Add(-time.Hour), *snap.OldestScheduledFor)
}
}
func TestMalformedIntentRecordAndGet(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
now := time.Now().UTC().Truncate(time.Millisecond)
entry := malformedintent.Entry{
StreamEntryID: "stream-1",
NotificationType: "game.turn.ready",
Producer: "game-master",
IdempotencyKey: "key-1",
FailureCode: malformedintent.FailureCodeInvalidPayload,
FailureMessage: "decode failed",
RawFields: map[string]any{"raw_payload": "abc"},
RecordedAt: now,
}
if err := store.Record(ctx, entry); err != nil {
t.Fatalf("record malformed: %v", err)
}
// idempotent re-record
if err := store.Record(ctx, entry); err != nil {
t.Fatalf("record malformed twice: %v", err)
}
got, found, err := store.GetMalformedIntent(ctx, entry.StreamEntryID)
if err != nil || !found {
t.Fatalf("get malformed: found=%v err=%v", found, err)
}
if got.FailureCode != malformedintent.FailureCodeInvalidPayload {
t.Fatalf("failure_code mismatch: %q", got.FailureCode)
}
if got.FailureMessage != entry.FailureMessage {
t.Fatalf("failure_message mismatch: %q", got.FailureMessage)
}
}
func TestRetentionDeletesAndCascade(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
old := time.Now().UTC().Add(-30 * 24 * time.Hour).Truncate(time.Millisecond)
fresh := time.Now().UTC().Truncate(time.Millisecond)
oldNotification := newNotification(t, "old", old)
oldNotification.IdempotencyKey = "key-old"
oldRoute := newPendingRoute(oldNotification.NotificationID, "push:user-old", intentstream.ChannelPush, "user-old", old)
oldRoute.MaxAttempts = 1
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: oldNotification,
Routes: []acceptintent.NotificationRoute{oldRoute},
Idempotency: newIdempotency(oldNotification, old),
}); err != nil {
t.Fatalf("acceptance old: %v", err)
}
if err := store.CompleteRouteDeadLetter(ctx, routestate.CompleteRouteDeadLetterInput{
ExpectedRoute: oldRoute,
LeaseToken: "token",
DeadLetteredAt: old.Add(time.Second),
FailureClassification: "smtp_permanent_failure",
FailureMessage: "rejected",
}); err != nil {
t.Fatalf("dead letter old: %v", err)
}
freshNotification := newNotification(t, "fresh", fresh)
freshNotification.IdempotencyKey = "key-fresh"
freshRoute := newPendingRoute(freshNotification.NotificationID, "push:user-fresh", intentstream.ChannelPush, "user-fresh", fresh)
if err := store.CreateAcceptance(ctx, acceptintent.CreateAcceptanceInput{
Notification: freshNotification,
Routes: []acceptintent.NotificationRoute{freshRoute},
Idempotency: newIdempotency(freshNotification, fresh),
}); err != nil {
t.Fatalf("acceptance fresh: %v", err)
}
cutoff := time.Now().UTC().Add(-7 * 24 * time.Hour)
deleted, err := store.DeleteRecordsOlderThan(ctx, cutoff)
if err != nil {
t.Fatalf("delete records: %v", err)
}
if deleted != 1 {
t.Fatalf("expected 1 deleted, got %d", deleted)
}
if _, found, err := store.GetNotification(ctx, "old"); err != nil || found {
t.Fatalf("old notification should be gone: found=%v err=%v", found, err)
}
// Confirm cascade emptied routes/dead_letters for old notification.
var routeCount int
if err := store.db.QueryRow(`SELECT COUNT(*) FROM routes WHERE notification_id = 'old'`).Scan(&routeCount); err != nil {
t.Fatalf("count routes: %v", err)
}
if routeCount != 0 {
t.Fatalf("expected 0 cascaded routes, got %d", routeCount)
}
var deadCount int
if err := store.db.QueryRow(`SELECT COUNT(*) FROM dead_letters WHERE notification_id = 'old'`).Scan(&deadCount); err != nil {
t.Fatalf("count dead letters: %v", err)
}
if deadCount != 0 {
t.Fatalf("expected 0 cascaded dead letters, got %d", deadCount)
}
// Fresh notification stays.
if _, found, err := store.GetNotification(ctx, "fresh"); err != nil || !found {
t.Fatalf("fresh notification missing: found=%v err=%v", found, err)
}
}
func TestDeleteMalformedIntentsOlderThan(t *testing.T) {
store := newTestStore(t)
ctx := context.Background()
old := time.Now().UTC().Add(-30 * 24 * time.Hour).Truncate(time.Millisecond)
fresh := time.Now().UTC().Truncate(time.Millisecond)
oldEntry := malformedintent.Entry{
StreamEntryID: "stream-old",
FailureCode: malformedintent.FailureCodeInvalidPayload,
FailureMessage: "decode failed",
RawFields: map[string]any{},
RecordedAt: old,
}
if err := store.Record(ctx, oldEntry); err != nil {
t.Fatalf("record old: %v", err)
}
freshEntry := malformedintent.Entry{
StreamEntryID: "stream-fresh",
FailureCode: malformedintent.FailureCodeInvalidPayload,
FailureMessage: "decode failed",
RawFields: map[string]any{},
RecordedAt: fresh,
}
if err := store.Record(ctx, freshEntry); err != nil {
t.Fatalf("record fresh: %v", err)
}
cutoff := time.Now().UTC().Add(-7 * 24 * time.Hour)
deleted, err := store.DeleteMalformedIntentsOlderThan(ctx, cutoff)
if err != nil {
t.Fatalf("delete: %v", err)
}
if deleted != 1 {
t.Fatalf("expected 1 deleted, got %d", deleted)
}
if _, found, err := store.GetMalformedIntent(ctx, "stream-old"); err != nil || found {
t.Fatalf("old malformed intent should be gone: found=%v err=%v", found, err)
}
if _, found, err := store.GetMalformedIntent(ctx, "stream-fresh"); err != nil || !found {
t.Fatalf("fresh malformed intent missing: found=%v err=%v", found, err)
}
}
// ---- helpers ----
func newNotification(t testing.TB, id string, occurred time.Time) acceptintent.NotificationRecord {
t.Helper()
return acceptintent.NotificationRecord{
NotificationID: id,
NotificationType: intentstream.NotificationTypeGameTurnReady,
Producer: intentstream.ProducerGameMaster,
AudienceKind: intentstream.AudienceKindUser,
RecipientUserIDs: []string{"user-1"},
PayloadJSON: `{"a":1}`,
IdempotencyKey: "key-" + id,
RequestFingerprint: "fp-" + id,
OccurredAt: occurred,
AcceptedAt: occurred,
UpdatedAt: occurred,
}
}
func newIdempotency(record acceptintent.NotificationRecord, createdAt time.Time) acceptintent.IdempotencyRecord {
return acceptintent.IdempotencyRecord{
Producer: record.Producer,
IdempotencyKey: record.IdempotencyKey,
NotificationID: record.NotificationID,
RequestFingerprint: record.RequestFingerprint,
CreatedAt: createdAt,
ExpiresAt: createdAt.Add(7 * 24 * time.Hour),
}
}
func newPendingRoute(notificationID string, routeID string, channel intentstream.Channel, recipient string, dueAt time.Time) acceptintent.NotificationRoute {
return acceptintent.NotificationRoute{
NotificationID: notificationID,
RouteID: routeID,
Channel: channel,
RecipientRef: "user:" + recipient,
Status: acceptintent.RouteStatusPending,
AttemptCount: 0,
MaxAttempts: 3,
NextAttemptAt: dueAt,
ResolvedEmail: recipient + "@example.com",
ResolvedLocale: "en",
CreatedAt: dueAt,
UpdatedAt: dueAt,
}
}
func idString(prefix string, index int) string {
switch index {
case 0:
return prefix + "0"
case 1:
return prefix + "1"
case 2:
return prefix + "2"
default:
return prefix + "n"
}
}