package app import ( "context" "errors" "fmt" "log/slog" "time" "galaxy/notification/internal/adapters/postgres/migrations" "galaxy/notification/internal/adapters/postgres/notificationstore" "galaxy/notification/internal/adapters/postgres/routepublisher" redisadapter "galaxy/notification/internal/adapters/redis" "galaxy/notification/internal/adapters/redisstate" userserviceadapter "galaxy/notification/internal/adapters/userservice" "galaxy/notification/internal/api/internalhttp" "galaxy/notification/internal/config" "galaxy/notification/internal/service/acceptintent" "galaxy/notification/internal/telemetry" "galaxy/notification/internal/worker" "galaxy/postgres" "github.com/redis/go-redis/v9" ) // systemClock satisfies the worker.Clock contract for runtime wiring. type systemClock struct{} func (systemClock) Now() time.Time { return time.Now() } // Runtime owns the runnable Notification Service process plus the cleanup // functions that release runtime resources after shutdown. type Runtime struct { cfg config.Config app *App probeServer *internalhttp.Server telemetry *telemetry.Runtime intentConsumer *worker.IntentConsumer pushPublisher *worker.PushPublisher emailPublisher *worker.EmailPublisher retentionWorker *worker.SQLRetentionWorker cleanupFns []func() error } // NewRuntime constructs the runnable Notification Service process from cfg. // // PostgreSQL migrations apply strictly before any HTTP listener becomes // ready. The runtime opens one shared `*redis.Client` consumed by the intent // consumer (XREAD), the publishers (outbound XADDs), the route lease store, // and the persisted stream offset store. Per PG_PLAN.md ยง5 the durable // notification state lives in PostgreSQL while the lease key, the consumer // offset, and the streams themselves remain on Redis. func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*Runtime, error) { if ctx == nil { return nil, fmt.Errorf("new notification runtime: nil context") } if err := cfg.Validate(); err != nil { return nil, fmt.Errorf("new notification runtime: %w", err) } if logger == nil { logger = slog.Default() } runtime := &Runtime{ cfg: cfg, } cleanupOnError := func(err error) (*Runtime, error) { if cleanupErr := runtime.Close(); cleanupErr != nil { return nil, fmt.Errorf("%w; cleanup: %w", err, cleanupErr) } return nil, err } telemetryRuntime, err := telemetry.NewProcess(ctx, telemetry.ProcessConfig{ ServiceName: cfg.Telemetry.ServiceName, TracesExporter: cfg.Telemetry.TracesExporter, MetricsExporter: cfg.Telemetry.MetricsExporter, TracesProtocol: cfg.Telemetry.TracesProtocol, MetricsProtocol: cfg.Telemetry.MetricsProtocol, StdoutTracesEnabled: cfg.Telemetry.StdoutTracesEnabled, StdoutMetricsEnabled: cfg.Telemetry.StdoutMetricsEnabled, }, logger.With("component", "telemetry")) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: telemetry: %w", err)) } runtime.telemetry = telemetryRuntime runtime.cleanupFns = append(runtime.cleanupFns, func() error { shutdownCtx, cancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout) defer cancel() return telemetryRuntime.Shutdown(shutdownCtx) }) redisClient := redisadapter.NewClient(cfg.Redis) if err := redisadapter.InstrumentClient(redisClient, telemetryRuntime); err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: %w", err)) } runtime.cleanupFns = append(runtime.cleanupFns, func() error { err := redisClient.Close() if errors.Is(err, redis.ErrClosed) { return nil } return err }) if err := redisadapter.Ping(ctx, cfg.Redis, redisClient); err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: %w", err)) } pgPool, err := postgres.OpenPrimary(ctx, cfg.Postgres.Conn, postgres.WithTracerProvider(telemetryRuntime.TracerProvider()), postgres.WithMeterProvider(telemetryRuntime.MeterProvider()), ) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: open postgres: %w", err)) } runtime.cleanupFns = append(runtime.cleanupFns, pgPool.Close) unregisterPGStats, err := postgres.InstrumentDBStats(pgPool, postgres.WithMeterProvider(telemetryRuntime.MeterProvider()), ) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: instrument postgres: %w", err)) } runtime.cleanupFns = append(runtime.cleanupFns, func() error { unregisterPGStats() return nil }) if err := postgres.Ping(ctx, pgPool, cfg.Postgres.Conn.OperationTimeout); err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: ping postgres: %w", err)) } if err := postgres.RunMigrations(ctx, pgPool, migrations.FS(), "."); err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: run postgres migrations: %w", err)) } notificationStore, err := notificationstore.New(notificationstore.Config{ DB: pgPool, OperationTimeout: cfg.Postgres.Conn.OperationTimeout, }) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: notification store: %w", err)) } leaseStore, err := redisstate.NewLeaseStore(redisClient) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: lease store: %w", err)) } streamOffsetStore, err := redisstate.NewStreamOffsetStore(redisClient) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: stream offset store: %w", err)) } intentStreamLagReader, err := redisstate.NewIntentStreamLagReader(streamOffsetStore, cfg.Streams.Intents) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: intent stream lag reader: %w", err)) } publisherStore, err := routepublisher.New(notificationStore, leaseStore) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: route publisher store: %w", err)) } telemetryRuntime.SetRouteScheduleSnapshotReader(notificationStore) telemetryRuntime.SetIntentStreamLagSnapshotReader(intentStreamLagReader) userDirectory, err := userserviceadapter.NewClient(userserviceadapter.Config{ BaseURL: cfg.UserService.BaseURL, RequestTimeout: cfg.UserService.Timeout, }) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: user service client: %w", err)) } runtime.cleanupFns = append(runtime.cleanupFns, userDirectory.Close) acceptIntentService, err := acceptintent.New(acceptintent.Config{ Store: notificationStore, UserDirectory: userDirectory, Clock: nil, Logger: logger, Telemetry: telemetryRuntime, PushMaxAttempts: cfg.Retry.PushMaxAttempts, EmailMaxAttempts: cfg.Retry.EmailMaxAttempts, IdempotencyTTL: cfg.Retry.IdempotencyTTL, AdminRouting: cfg.AdminRouting, }) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: accept intent service: %w", err)) } intentConsumer, err := worker.NewIntentConsumer(worker.IntentConsumerConfig{ Client: redisClient, Stream: cfg.Streams.Intents, BlockTimeout: cfg.IntentsReadBlockTimeout, Acceptor: acceptIntentService, MalformedRecorder: notificationStore, OffsetStore: streamOffsetStore, Telemetry: telemetryRuntime, }, logger) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: intent consumer: %w", err)) } runtime.intentConsumer = intentConsumer pushPublisher, err := worker.NewPushPublisher(worker.PushPublisherConfig{ Store: publisherStore, GatewayStream: cfg.Streams.GatewayClientEvents, GatewayStreamMaxLen: cfg.Streams.GatewayClientEventsStreamMaxLen, RouteLeaseTTL: cfg.Retry.RouteLeaseTTL, RouteBackoffMin: cfg.Retry.RouteBackoffMin, RouteBackoffMax: cfg.Retry.RouteBackoffMax, Encoder: nil, Telemetry: telemetryRuntime, Clock: nil, StreamPublisher: redisClient, }, logger) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: push publisher: %w", err)) } runtime.pushPublisher = pushPublisher emailPublisher, err := worker.NewEmailPublisher(worker.EmailPublisherConfig{ Store: publisherStore, MailDeliveryCommandsStream: cfg.Streams.MailDeliveryCommands, RouteLeaseTTL: cfg.Retry.RouteLeaseTTL, RouteBackoffMin: cfg.Retry.RouteBackoffMin, RouteBackoffMax: cfg.Retry.RouteBackoffMax, Encoder: nil, Telemetry: telemetryRuntime, Clock: nil, StreamPublisher: redisClient, }, logger) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: email publisher: %w", err)) } runtime.emailPublisher = emailPublisher retentionWorker, err := worker.NewSQLRetentionWorker(worker.SQLRetentionConfig{ Store: notificationStore, RecordRetention: cfg.Retention.RecordRetention, MalformedIntentRetention: cfg.Retention.MalformedIntentRetention, CleanupInterval: cfg.Retention.CleanupInterval, Clock: systemClock{}, }, logger) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: sql retention worker: %w", err)) } runtime.retentionWorker = retentionWorker probeServer, err := internalhttp.NewServer(internalhttp.Config{ Addr: cfg.InternalHTTP.Addr, ReadHeaderTimeout: cfg.InternalHTTP.ReadHeaderTimeout, ReadTimeout: cfg.InternalHTTP.ReadTimeout, IdleTimeout: cfg.InternalHTTP.IdleTimeout, }, internalhttp.Dependencies{ Logger: logger, Telemetry: telemetryRuntime, }) if err != nil { return cleanupOnError(fmt.Errorf("new notification runtime: internal HTTP server: %w", err)) } runtime.probeServer = probeServer runtime.app = New(cfg, probeServer, intentConsumer, pushPublisher, emailPublisher, retentionWorker) return runtime, nil } // Run serves the private probe HTTP listener until ctx is canceled or one // component fails. func (runtime *Runtime) Run(ctx context.Context) error { if ctx == nil { return errors.New("run notification runtime: nil context") } if runtime == nil { return errors.New("run notification runtime: nil runtime") } if runtime.app == nil { return errors.New("run notification runtime: nil app") } return runtime.app.Run(ctx) } // Close releases every runtime dependency in reverse construction order. func (runtime *Runtime) Close() error { if runtime == nil { return nil } var joined error for index := len(runtime.cleanupFns) - 1; index >= 0; index-- { if err := runtime.cleanupFns[index](); err != nil { joined = errors.Join(joined, err) } } return joined }