// Package config loads the Notification Service process configuration from // environment variables. package config import ( "fmt" "net" netmail "net/mail" "net/url" "strings" "time" "galaxy/notification/internal/telemetry" "galaxy/postgres" "galaxy/redisconn" ) const ( envPrefix = "NOTIFICATION" shutdownTimeoutEnvVar = "NOTIFICATION_SHUTDOWN_TIMEOUT" logLevelEnvVar = "NOTIFICATION_LOG_LEVEL" internalHTTPAddrEnvVar = "NOTIFICATION_INTERNAL_HTTP_ADDR" internalHTTPReadHeaderTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_READ_HEADER_TIMEOUT" internalHTTPReadTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_READ_TIMEOUT" internalHTTPIdleTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_IDLE_TIMEOUT" intentsStreamEnvVar = "NOTIFICATION_INTENTS_STREAM" intentsReadBlockTimeoutEnvVar = "NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT" gatewayClientEventsStreamEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM" gatewayClientEventsStreamMaxEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM_MAX_LEN" mailDeliveryCommandsStreamEnvVar = "NOTIFICATION_MAIL_DELIVERY_COMMANDS_STREAM" pushRetryMaxAttemptsEnvVar = "NOTIFICATION_PUSH_RETRY_MAX_ATTEMPTS" emailRetryMaxAttemptsEnvVar = "NOTIFICATION_EMAIL_RETRY_MAX_ATTEMPTS" routeLeaseTTLEnvVar = "NOTIFICATION_ROUTE_LEASE_TTL" routeBackoffMinEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MIN" routeBackoffMaxEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MAX" idempotencyTTLEnvVar = "NOTIFICATION_IDEMPOTENCY_TTL" recordRetentionEnvVar = "NOTIFICATION_RECORD_RETENTION" malformedIntentRetentionEnvVar = "NOTIFICATION_MALFORMED_INTENT_RETENTION" cleanupIntervalEnvVar = "NOTIFICATION_CLEANUP_INTERVAL" userServiceBaseURLEnvVar = "NOTIFICATION_USER_SERVICE_BASE_URL" userServiceTimeoutEnvVar = "NOTIFICATION_USER_SERVICE_TIMEOUT" adminEmailsGeoReviewRecommendedEnvVar = "NOTIFICATION_ADMIN_EMAILS_GEO_REVIEW_RECOMMENDED" adminEmailsGameGenerationFailedEnvVar = "NOTIFICATION_ADMIN_EMAILS_GAME_GENERATION_FAILED" adminEmailsLobbyRuntimePausedAfterEnvVar = "NOTIFICATION_ADMIN_EMAILS_LOBBY_RUNTIME_PAUSED_AFTER_START" adminEmailsLobbyApplicationSubmittedEnvVar = "NOTIFICATION_ADMIN_EMAILS_LOBBY_APPLICATION_SUBMITTED" otelServiceNameEnvVar = "OTEL_SERVICE_NAME" otelTracesExporterEnvVar = "OTEL_TRACES_EXPORTER" otelMetricsExporterEnvVar = "OTEL_METRICS_EXPORTER" otelExporterOTLPProtocolEnvVar = "OTEL_EXPORTER_OTLP_PROTOCOL" otelExporterOTLPTracesProtocolEnvVar = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL" otelExporterOTLPMetricsProtocolEnvVar = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL" otelStdoutTracesEnabledEnvVar = "NOTIFICATION_OTEL_STDOUT_TRACES_ENABLED" otelStdoutMetricsEnabledEnvVar = "NOTIFICATION_OTEL_STDOUT_METRICS_ENABLED" defaultShutdownTimeout = 5 * time.Second defaultLogLevel = "info" defaultInternalHTTPAddr = ":8092" defaultReadHeaderTimeout = 2 * time.Second defaultReadTimeout = 10 * time.Second defaultIdleTimeout = time.Minute defaultIntentsStream = "notification:intents" defaultIntentsReadBlockTimeout = 2 * time.Second defaultGatewayClientEventsStream = "gateway:client-events" defaultGatewayClientEventsStreamMaxLen int64 = 1024 defaultMailDeliveryCommandsStream = "mail:delivery_commands" defaultPushRetryMaxAttempts = 3 defaultEmailRetryMaxAttempts = 7 defaultRouteLeaseTTL = 5 * time.Second defaultRouteBackoffMin = time.Second defaultRouteBackoffMax = 5 * time.Minute defaultIdempotencyTTL = 168 * time.Hour defaultRecordRetention = 30 * 24 * time.Hour defaultMalformedIntentRetention = 90 * 24 * time.Hour defaultCleanupInterval = time.Hour defaultUserServiceTimeout = time.Second defaultOTelServiceName = "galaxy-notification" otelExporterNone = "none" otelExporterOTLP = "otlp" otelProtocolHTTPProtobuf = "http/protobuf" otelProtocolGRPC = "grpc" ) // Config stores the full Notification Service process configuration. type Config struct { // ShutdownTimeout bounds graceful shutdown of every long-lived component. ShutdownTimeout time.Duration // Logging configures the process-wide structured logger. Logging LoggingConfig // InternalHTTP configures the private probe HTTP listener. InternalHTTP InternalHTTPConfig // Redis configures the shared Redis connection topology and the inbound // `notification:intents` stream plus the outbound stream names. Durable // notification state lives in PostgreSQL after Stage 5 of `PG_PLAN.md`. Redis RedisConfig // Postgres configures the PostgreSQL-backed durable store consumed via // `pkg/postgres`. Postgres PostgresConfig // Streams stores the stable Redis Stream names reserved for ingress and // downstream publication. Streams StreamsConfig // IntentsReadBlockTimeout stores the maximum Redis Streams blocking read // window used by the intent consumer. IntentsReadBlockTimeout time.Duration // Retry stores the frozen retry settings used by the route publishers. Retry RetryConfig // Retention stores the periodic SQL retention worker configuration. Retention RetentionConfig // UserService configures the trusted user-enrichment dependency. UserService UserServiceConfig // AdminRouting stores the type-specific configured administrator email // lists. AdminRouting AdminRoutingConfig // Telemetry configures the process-wide OpenTelemetry runtime. Telemetry TelemetryConfig } // LoggingConfig configures the process-wide structured logger. type LoggingConfig struct { // Level stores the process log level accepted by log/slog. Level string } // InternalHTTPConfig configures the private probe HTTP listener. type InternalHTTPConfig struct { // Addr stores the TCP listen address. Addr string // ReadHeaderTimeout bounds request-header reading. ReadHeaderTimeout time.Duration // ReadTimeout bounds reading one request. ReadTimeout time.Duration // IdleTimeout bounds how long keep-alive connections stay open. IdleTimeout time.Duration } // Validate reports whether cfg stores a usable internal HTTP listener // configuration. func (cfg InternalHTTPConfig) Validate() error { switch { case strings.TrimSpace(cfg.Addr) == "": return fmt.Errorf("internal HTTP addr must not be empty") case !isTCPAddr(cfg.Addr): return fmt.Errorf("internal HTTP addr %q must use host:port form", cfg.Addr) case cfg.ReadHeaderTimeout <= 0: return fmt.Errorf("internal HTTP read header timeout must be positive") case cfg.ReadTimeout <= 0: return fmt.Errorf("internal HTTP read timeout must be positive") case cfg.IdleTimeout <= 0: return fmt.Errorf("internal HTTP idle timeout must be positive") default: return nil } } // RedisConfig configures the Notification Service Redis connection topology. // Per-call timeouts live in `Conn.OperationTimeout`. type RedisConfig struct { // Conn carries the connection topology (master, replicas, password, db, // per-call timeout). Loaded via redisconn.LoadFromEnv("NOTIFICATION"). Conn redisconn.Config } // Validate reports whether cfg stores a usable Redis configuration. func (cfg RedisConfig) Validate() error { return cfg.Conn.Validate() } // PostgresConfig configures the PostgreSQL-backed durable store. type PostgresConfig struct { // Conn stores the primary plus replica DSN topology and pool tuning. // Loaded via postgres.LoadFromEnv("NOTIFICATION"). Conn postgres.Config } // Validate reports whether cfg stores a usable PostgreSQL configuration. func (cfg PostgresConfig) Validate() error { return cfg.Conn.Validate() } // StreamsConfig stores the stable Redis Stream names used by Notification // Service. type StreamsConfig struct { // Intents stores the ingress intent stream. Intents string // GatewayClientEvents stores the downstream Gateway client-events stream. GatewayClientEvents string // GatewayClientEventsStreamMaxLen bounds the downstream Gateway // client-events stream with approximate trimming. GatewayClientEventsStreamMaxLen int64 // MailDeliveryCommands stores the downstream Mail Service command stream. MailDeliveryCommands string } // Validate reports whether cfg stores usable stream names. func (cfg StreamsConfig) Validate() error { switch { case strings.TrimSpace(cfg.Intents) == "": return fmt.Errorf("intents stream must not be empty") case strings.TrimSpace(cfg.GatewayClientEvents) == "": return fmt.Errorf("gateway client-events stream must not be empty") case cfg.GatewayClientEventsStreamMaxLen <= 0: return fmt.Errorf("gateway client-events stream max len must be positive") case strings.TrimSpace(cfg.MailDeliveryCommands) == "": return fmt.Errorf("mail delivery-commands stream must not be empty") default: return nil } } // RetryConfig stores the frozen retry budgets, backoff settings, and the // per-acceptance idempotency window. type RetryConfig struct { // PushMaxAttempts stores the route retry budget for the `push` channel. PushMaxAttempts int // EmailMaxAttempts stores the route retry budget for the `email` channel. EmailMaxAttempts int // RouteLeaseTTL stores the temporary route-lease lifetime used to avoid // duplicate publication across replicas. RouteLeaseTTL time.Duration // RouteBackoffMin stores the minimum retry backoff. RouteBackoffMin time.Duration // RouteBackoffMax stores the maximum retry backoff. RouteBackoffMax time.Duration // IdempotencyTTL stores the per-acceptance idempotency window the service // layer applies to the durable `idempotency_expires_at` column on the // `records` table. IdempotencyTTL time.Duration } // Validate reports whether cfg stores usable retry settings. func (cfg RetryConfig) Validate() error { switch { case cfg.PushMaxAttempts <= 0: return fmt.Errorf("push retry max attempts must be positive") case cfg.EmailMaxAttempts <= 0: return fmt.Errorf("email retry max attempts must be positive") case cfg.RouteLeaseTTL <= 0: return fmt.Errorf("route lease ttl must be positive") case cfg.RouteBackoffMin <= 0: return fmt.Errorf("route backoff min must be positive") case cfg.RouteBackoffMax <= 0: return fmt.Errorf("route backoff max must be positive") case cfg.RouteBackoffMin > cfg.RouteBackoffMax: return fmt.Errorf("route backoff min must not exceed route backoff max") case cfg.IdempotencyTTL <= 0: return fmt.Errorf("idempotency ttl must be positive") default: return nil } } // RetentionConfig stores the durable retention windows applied by the // periodic SQL retention worker. type RetentionConfig struct { // RecordRetention bounds how long records (and their cascaded routes and // dead_letters) survive after acceptance. RecordRetention time.Duration // MalformedIntentRetention bounds how long malformed-intent rows survive // after their original `recorded_at`. MalformedIntentRetention time.Duration // CleanupInterval stores the wall-clock period between two retention // passes. CleanupInterval time.Duration } // Validate reports whether cfg stores a usable retention configuration. func (cfg RetentionConfig) Validate() error { switch { case cfg.RecordRetention <= 0: return fmt.Errorf("%s must be positive", recordRetentionEnvVar) case cfg.MalformedIntentRetention <= 0: return fmt.Errorf("%s must be positive", malformedIntentRetentionEnvVar) case cfg.CleanupInterval <= 0: return fmt.Errorf("%s must be positive", cleanupIntervalEnvVar) default: return nil } } // UserServiceConfig configures the trusted user-enrichment dependency. type UserServiceConfig struct { // BaseURL stores the absolute base URL of the trusted User Service. BaseURL string // Timeout bounds one outbound User Service request. Timeout time.Duration } // Validate reports whether cfg stores a usable User Service configuration. func (cfg UserServiceConfig) Validate() error { switch { case strings.TrimSpace(cfg.BaseURL) == "": return fmt.Errorf("user service base URL must not be empty") case !isAbsoluteHTTPURL(cfg.BaseURL): return fmt.Errorf("user service base URL %q must be an absolute http(s) URL", cfg.BaseURL) case cfg.Timeout <= 0: return fmt.Errorf("user service timeout must be positive") default: return nil } } // AdminRoutingConfig stores the type-specific configured administrator email // lists. type AdminRoutingConfig struct { // GeoReviewRecommended stores recipients for `geo.review_recommended`. GeoReviewRecommended []string // GameGenerationFailed stores recipients for `game.generation_failed`. GameGenerationFailed []string // LobbyRuntimePausedAfterStart stores recipients for // `lobby.runtime_paused_after_start`. LobbyRuntimePausedAfterStart []string // LobbyApplicationSubmitted stores recipients for public // `lobby.application.submitted` notifications. LobbyApplicationSubmitted []string } // Validate reports whether cfg stores valid normalized administrator email // lists. func (cfg AdminRoutingConfig) Validate() error { if err := validateNormalizedEmailList("geo.review_recommended", cfg.GeoReviewRecommended); err != nil { return err } if err := validateNormalizedEmailList("game.generation_failed", cfg.GameGenerationFailed); err != nil { return err } if err := validateNormalizedEmailList("lobby.runtime_paused_after_start", cfg.LobbyRuntimePausedAfterStart); err != nil { return err } if err := validateNormalizedEmailList("lobby.application.submitted", cfg.LobbyApplicationSubmitted); err != nil { return err } return nil } // TelemetryConfig configures the Notification Service OpenTelemetry runtime. type TelemetryConfig struct { // ServiceName overrides the default OpenTelemetry service name. ServiceName string // TracesExporter selects the external traces exporter. Supported values are // `none` and `otlp`. TracesExporter string // MetricsExporter selects the external metrics exporter. Supported values // are `none` and `otlp`. MetricsExporter string // TracesProtocol selects the OTLP traces protocol when TracesExporter is // `otlp`. TracesProtocol string // MetricsProtocol selects the OTLP metrics protocol when MetricsExporter is // `otlp`. MetricsProtocol string // StdoutTracesEnabled enables the additional stdout trace exporter used for // local development and debugging. StdoutTracesEnabled bool // StdoutMetricsEnabled enables the additional stdout metric exporter used // for local development and debugging. StdoutMetricsEnabled bool } // Validate reports whether cfg contains a supported OpenTelemetry // configuration. func (cfg TelemetryConfig) Validate() error { return telemetry.ProcessConfig{ ServiceName: cfg.ServiceName, TracesExporter: cfg.TracesExporter, MetricsExporter: cfg.MetricsExporter, TracesProtocol: cfg.TracesProtocol, MetricsProtocol: cfg.MetricsProtocol, StdoutTracesEnabled: cfg.StdoutTracesEnabled, StdoutMetricsEnabled: cfg.StdoutMetricsEnabled, }.Validate() } // DefaultConfig returns the default Notification Service process // configuration. func DefaultConfig() Config { return Config{ ShutdownTimeout: defaultShutdownTimeout, Logging: LoggingConfig{ Level: defaultLogLevel, }, InternalHTTP: InternalHTTPConfig{ Addr: defaultInternalHTTPAddr, ReadHeaderTimeout: defaultReadHeaderTimeout, ReadTimeout: defaultReadTimeout, IdleTimeout: defaultIdleTimeout, }, Redis: RedisConfig{ Conn: redisconn.DefaultConfig(), }, Postgres: PostgresConfig{ Conn: postgres.DefaultConfig(), }, Streams: StreamsConfig{ Intents: defaultIntentsStream, GatewayClientEvents: defaultGatewayClientEventsStream, GatewayClientEventsStreamMaxLen: defaultGatewayClientEventsStreamMaxLen, MailDeliveryCommands: defaultMailDeliveryCommandsStream, }, IntentsReadBlockTimeout: defaultIntentsReadBlockTimeout, Retry: RetryConfig{ PushMaxAttempts: defaultPushRetryMaxAttempts, EmailMaxAttempts: defaultEmailRetryMaxAttempts, RouteLeaseTTL: defaultRouteLeaseTTL, RouteBackoffMin: defaultRouteBackoffMin, RouteBackoffMax: defaultRouteBackoffMax, IdempotencyTTL: defaultIdempotencyTTL, }, Retention: RetentionConfig{ RecordRetention: defaultRecordRetention, MalformedIntentRetention: defaultMalformedIntentRetention, CleanupInterval: defaultCleanupInterval, }, UserService: UserServiceConfig{ Timeout: defaultUserServiceTimeout, }, Telemetry: TelemetryConfig{ ServiceName: defaultOTelServiceName, TracesExporter: otelExporterNone, MetricsExporter: otelExporterNone, }, } } // Validate reports whether cfg contains a consistent Notification Service // process configuration. func (cfg Config) Validate() error { if cfg.ShutdownTimeout <= 0 { return fmt.Errorf("load notification config: %s must be positive", shutdownTimeoutEnvVar) } if err := cfg.InternalHTTP.Validate(); err != nil { return fmt.Errorf("load notification config: %s", err) } if err := cfg.Redis.Validate(); err != nil { return fmt.Errorf("load notification config: %w", err) } if err := cfg.Postgres.Validate(); err != nil { return fmt.Errorf("load notification config: %w", err) } if err := cfg.Streams.Validate(); err != nil { return fmt.Errorf("load notification config: %s", err) } if cfg.IntentsReadBlockTimeout <= 0 { return fmt.Errorf("load notification config: %s must be positive", intentsReadBlockTimeoutEnvVar) } if err := cfg.Retry.Validate(); err != nil { return fmt.Errorf("load notification config: %s", err) } if err := cfg.Retention.Validate(); err != nil { return fmt.Errorf("load notification config: %s", err) } if err := cfg.UserService.Validate(); err != nil { return fmt.Errorf("load notification config: %s", err) } if err := cfg.AdminRouting.Validate(); err != nil { return fmt.Errorf("load notification config: %s", err) } if err := cfg.Telemetry.Validate(); err != nil { return fmt.Errorf("load notification config: %w", err) } return nil } func validateNormalizedEmailList(name string, values []string) error { for index, value := range values { normalized, err := normalizeMailboxAddress(value) if err != nil { return fmt.Errorf("%s[%d]: %w", name, index, err) } if normalized != value { return fmt.Errorf("%s[%d]: email address must already be normalized", name, index) } } return nil } func normalizeMailboxAddress(value string) (string, error) { trimmed := strings.TrimSpace(value) if trimmed == "" { return "", fmt.Errorf("email address must not be empty") } parsed, err := netmail.ParseAddress(trimmed) if err != nil { return "", fmt.Errorf("invalid email address %q: %w", trimmed, err) } if parsed.Name != "" { return "", fmt.Errorf("email address %q must not include a display name", trimmed) } return strings.ToLower(parsed.Address), nil } func parseEmailList(name string, raw string) ([]string, error) { trimmed := strings.TrimSpace(raw) if trimmed == "" { return nil, nil } parts := strings.Split(trimmed, ",") addresses := make([]string, 0, len(parts)) seen := make(map[string]struct{}, len(parts)) for index, part := range parts { normalized, err := normalizeMailboxAddress(part) if err != nil { return nil, fmt.Errorf("%s[%d]: %w", name, index, err) } if _, ok := seen[normalized]; ok { continue } seen[normalized] = struct{}{} addresses = append(addresses, normalized) } return addresses, nil } func normalizeBaseURL(value string) string { trimmed := strings.TrimSpace(value) if trimmed == "" { return "" } return strings.TrimRight(trimmed, "/") } func isAbsoluteHTTPURL(value string) bool { parsed, err := url.Parse(strings.TrimSpace(value)) if err != nil { return false } if parsed.Scheme != "http" && parsed.Scheme != "https" { return false } return parsed.Host != "" } func isTCPAddr(value string) bool { host, port, err := net.SplitHostPort(strings.TrimSpace(value)) if err != nil { return false } if port == "" { return false } if host == "" { return true } return true }