Files
galaxy-game/notification/internal/app/runtime.go
T
2026-04-26 20:34:39 +02:00

294 lines
11 KiB
Go

package app
import (
"context"
"errors"
"fmt"
"log/slog"
"time"
"galaxy/notification/internal/adapters/postgres/migrations"
"galaxy/notification/internal/adapters/postgres/notificationstore"
"galaxy/notification/internal/adapters/postgres/routepublisher"
redisadapter "galaxy/notification/internal/adapters/redis"
"galaxy/notification/internal/adapters/redisstate"
userserviceadapter "galaxy/notification/internal/adapters/userservice"
"galaxy/notification/internal/api/internalhttp"
"galaxy/notification/internal/config"
"galaxy/notification/internal/service/acceptintent"
"galaxy/notification/internal/telemetry"
"galaxy/notification/internal/worker"
"galaxy/postgres"
"github.com/redis/go-redis/v9"
)
// systemClock satisfies the worker.Clock contract for runtime wiring.
type systemClock struct{}
func (systemClock) Now() time.Time { return time.Now() }
// Runtime owns the runnable Notification Service process plus the cleanup
// functions that release runtime resources after shutdown.
type Runtime struct {
cfg config.Config
app *App
probeServer *internalhttp.Server
telemetry *telemetry.Runtime
intentConsumer *worker.IntentConsumer
pushPublisher *worker.PushPublisher
emailPublisher *worker.EmailPublisher
retentionWorker *worker.SQLRetentionWorker
cleanupFns []func() error
}
// NewRuntime constructs the runnable Notification Service process from cfg.
//
// PostgreSQL migrations apply strictly before any HTTP listener becomes
// ready. The runtime opens one shared `*redis.Client` consumed by the intent
// consumer (XREAD), the publishers (outbound XADDs), the route lease store,
// and the persisted stream offset store. Per PG_PLAN.md §5 the durable
// notification state lives in PostgreSQL while the lease key, the consumer
// offset, and the streams themselves remain on Redis.
func NewRuntime(ctx context.Context, cfg config.Config, logger *slog.Logger) (*Runtime, error) {
if ctx == nil {
return nil, fmt.Errorf("new notification runtime: nil context")
}
if err := cfg.Validate(); err != nil {
return nil, fmt.Errorf("new notification runtime: %w", err)
}
if logger == nil {
logger = slog.Default()
}
runtime := &Runtime{
cfg: cfg,
}
cleanupOnError := func(err error) (*Runtime, error) {
if cleanupErr := runtime.Close(); cleanupErr != nil {
return nil, fmt.Errorf("%w; cleanup: %w", err, cleanupErr)
}
return nil, err
}
telemetryRuntime, err := telemetry.NewProcess(ctx, telemetry.ProcessConfig{
ServiceName: cfg.Telemetry.ServiceName,
TracesExporter: cfg.Telemetry.TracesExporter,
MetricsExporter: cfg.Telemetry.MetricsExporter,
TracesProtocol: cfg.Telemetry.TracesProtocol,
MetricsProtocol: cfg.Telemetry.MetricsProtocol,
StdoutTracesEnabled: cfg.Telemetry.StdoutTracesEnabled,
StdoutMetricsEnabled: cfg.Telemetry.StdoutMetricsEnabled,
}, logger.With("component", "telemetry"))
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: telemetry: %w", err))
}
runtime.telemetry = telemetryRuntime
runtime.cleanupFns = append(runtime.cleanupFns, func() error {
shutdownCtx, cancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout)
defer cancel()
return telemetryRuntime.Shutdown(shutdownCtx)
})
redisClient := redisadapter.NewClient(cfg.Redis)
if err := redisadapter.InstrumentClient(redisClient, telemetryRuntime); err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: %w", err))
}
runtime.cleanupFns = append(runtime.cleanupFns, func() error {
err := redisClient.Close()
if errors.Is(err, redis.ErrClosed) {
return nil
}
return err
})
if err := redisadapter.Ping(ctx, cfg.Redis, redisClient); err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: %w", err))
}
pgPool, err := postgres.OpenPrimary(ctx, cfg.Postgres.Conn,
postgres.WithTracerProvider(telemetryRuntime.TracerProvider()),
postgres.WithMeterProvider(telemetryRuntime.MeterProvider()),
)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: open postgres: %w", err))
}
runtime.cleanupFns = append(runtime.cleanupFns, pgPool.Close)
unregisterPGStats, err := postgres.InstrumentDBStats(pgPool,
postgres.WithMeterProvider(telemetryRuntime.MeterProvider()),
)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: instrument postgres: %w", err))
}
runtime.cleanupFns = append(runtime.cleanupFns, func() error {
unregisterPGStats()
return nil
})
if err := postgres.Ping(ctx, pgPool, cfg.Postgres.Conn.OperationTimeout); err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: ping postgres: %w", err))
}
if err := postgres.RunMigrations(ctx, pgPool, migrations.FS(), "."); err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: run postgres migrations: %w", err))
}
notificationStore, err := notificationstore.New(notificationstore.Config{
DB: pgPool,
OperationTimeout: cfg.Postgres.Conn.OperationTimeout,
})
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: notification store: %w", err))
}
leaseStore, err := redisstate.NewLeaseStore(redisClient)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: lease store: %w", err))
}
streamOffsetStore, err := redisstate.NewStreamOffsetStore(redisClient)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: stream offset store: %w", err))
}
intentStreamLagReader, err := redisstate.NewIntentStreamLagReader(streamOffsetStore, cfg.Streams.Intents)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: intent stream lag reader: %w", err))
}
publisherStore, err := routepublisher.New(notificationStore, leaseStore)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: route publisher store: %w", err))
}
telemetryRuntime.SetRouteScheduleSnapshotReader(notificationStore)
telemetryRuntime.SetIntentStreamLagSnapshotReader(intentStreamLagReader)
userDirectory, err := userserviceadapter.NewClient(userserviceadapter.Config{
BaseURL: cfg.UserService.BaseURL,
RequestTimeout: cfg.UserService.Timeout,
})
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: user service client: %w", err))
}
runtime.cleanupFns = append(runtime.cleanupFns, userDirectory.Close)
acceptIntentService, err := acceptintent.New(acceptintent.Config{
Store: notificationStore,
UserDirectory: userDirectory,
Clock: nil,
Logger: logger,
Telemetry: telemetryRuntime,
PushMaxAttempts: cfg.Retry.PushMaxAttempts,
EmailMaxAttempts: cfg.Retry.EmailMaxAttempts,
IdempotencyTTL: cfg.Retry.IdempotencyTTL,
AdminRouting: cfg.AdminRouting,
})
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: accept intent service: %w", err))
}
intentConsumer, err := worker.NewIntentConsumer(worker.IntentConsumerConfig{
Client: redisClient,
Stream: cfg.Streams.Intents,
BlockTimeout: cfg.IntentsReadBlockTimeout,
Acceptor: acceptIntentService,
MalformedRecorder: notificationStore,
OffsetStore: streamOffsetStore,
Telemetry: telemetryRuntime,
}, logger)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: intent consumer: %w", err))
}
runtime.intentConsumer = intentConsumer
pushPublisher, err := worker.NewPushPublisher(worker.PushPublisherConfig{
Store: publisherStore,
GatewayStream: cfg.Streams.GatewayClientEvents,
GatewayStreamMaxLen: cfg.Streams.GatewayClientEventsStreamMaxLen,
RouteLeaseTTL: cfg.Retry.RouteLeaseTTL,
RouteBackoffMin: cfg.Retry.RouteBackoffMin,
RouteBackoffMax: cfg.Retry.RouteBackoffMax,
Encoder: nil,
Telemetry: telemetryRuntime,
Clock: nil,
StreamPublisher: redisClient,
}, logger)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: push publisher: %w", err))
}
runtime.pushPublisher = pushPublisher
emailPublisher, err := worker.NewEmailPublisher(worker.EmailPublisherConfig{
Store: publisherStore,
MailDeliveryCommandsStream: cfg.Streams.MailDeliveryCommands,
RouteLeaseTTL: cfg.Retry.RouteLeaseTTL,
RouteBackoffMin: cfg.Retry.RouteBackoffMin,
RouteBackoffMax: cfg.Retry.RouteBackoffMax,
Encoder: nil,
Telemetry: telemetryRuntime,
Clock: nil,
StreamPublisher: redisClient,
}, logger)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: email publisher: %w", err))
}
runtime.emailPublisher = emailPublisher
retentionWorker, err := worker.NewSQLRetentionWorker(worker.SQLRetentionConfig{
Store: notificationStore,
RecordRetention: cfg.Retention.RecordRetention,
MalformedIntentRetention: cfg.Retention.MalformedIntentRetention,
CleanupInterval: cfg.Retention.CleanupInterval,
Clock: systemClock{},
}, logger)
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: sql retention worker: %w", err))
}
runtime.retentionWorker = retentionWorker
probeServer, err := internalhttp.NewServer(internalhttp.Config{
Addr: cfg.InternalHTTP.Addr,
ReadHeaderTimeout: cfg.InternalHTTP.ReadHeaderTimeout,
ReadTimeout: cfg.InternalHTTP.ReadTimeout,
IdleTimeout: cfg.InternalHTTP.IdleTimeout,
}, internalhttp.Dependencies{
Logger: logger,
Telemetry: telemetryRuntime,
})
if err != nil {
return cleanupOnError(fmt.Errorf("new notification runtime: internal HTTP server: %w", err))
}
runtime.probeServer = probeServer
runtime.app = New(cfg, probeServer, intentConsumer, pushPublisher, emailPublisher, retentionWorker)
return runtime, nil
}
// Run serves the private probe HTTP listener until ctx is canceled or one
// component fails.
func (runtime *Runtime) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run notification runtime: nil context")
}
if runtime == nil {
return errors.New("run notification runtime: nil runtime")
}
if runtime.app == nil {
return errors.New("run notification runtime: nil app")
}
return runtime.app.Run(ctx)
}
// Close releases every runtime dependency in reverse construction order.
func (runtime *Runtime) Close() error {
if runtime == nil {
return nil
}
var joined error
for index := len(runtime.cleanupFns) - 1; index >= 0; index-- {
if err := runtime.cleanupFns[index](); err != nil {
joined = errors.Join(joined, err)
}
}
return joined
}