Files
galaxy-game/notification/internal/config/config.go
T
2026-04-26 20:34:39 +02:00

604 lines
20 KiB
Go

// Package config loads the Notification Service process configuration from
// environment variables.
package config
import (
"fmt"
"net"
netmail "net/mail"
"net/url"
"strings"
"time"
"galaxy/notification/internal/telemetry"
"galaxy/postgres"
"galaxy/redisconn"
)
const (
envPrefix = "NOTIFICATION"
shutdownTimeoutEnvVar = "NOTIFICATION_SHUTDOWN_TIMEOUT"
logLevelEnvVar = "NOTIFICATION_LOG_LEVEL"
internalHTTPAddrEnvVar = "NOTIFICATION_INTERNAL_HTTP_ADDR"
internalHTTPReadHeaderTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_READ_HEADER_TIMEOUT"
internalHTTPReadTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_READ_TIMEOUT"
internalHTTPIdleTimeoutEnvVar = "NOTIFICATION_INTERNAL_HTTP_IDLE_TIMEOUT"
intentsStreamEnvVar = "NOTIFICATION_INTENTS_STREAM"
intentsReadBlockTimeoutEnvVar = "NOTIFICATION_INTENTS_READ_BLOCK_TIMEOUT"
gatewayClientEventsStreamEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM"
gatewayClientEventsStreamMaxEnvVar = "NOTIFICATION_GATEWAY_CLIENT_EVENTS_STREAM_MAX_LEN"
mailDeliveryCommandsStreamEnvVar = "NOTIFICATION_MAIL_DELIVERY_COMMANDS_STREAM"
pushRetryMaxAttemptsEnvVar = "NOTIFICATION_PUSH_RETRY_MAX_ATTEMPTS"
emailRetryMaxAttemptsEnvVar = "NOTIFICATION_EMAIL_RETRY_MAX_ATTEMPTS"
routeLeaseTTLEnvVar = "NOTIFICATION_ROUTE_LEASE_TTL"
routeBackoffMinEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MIN"
routeBackoffMaxEnvVar = "NOTIFICATION_ROUTE_BACKOFF_MAX"
idempotencyTTLEnvVar = "NOTIFICATION_IDEMPOTENCY_TTL"
recordRetentionEnvVar = "NOTIFICATION_RECORD_RETENTION"
malformedIntentRetentionEnvVar = "NOTIFICATION_MALFORMED_INTENT_RETENTION"
cleanupIntervalEnvVar = "NOTIFICATION_CLEANUP_INTERVAL"
userServiceBaseURLEnvVar = "NOTIFICATION_USER_SERVICE_BASE_URL"
userServiceTimeoutEnvVar = "NOTIFICATION_USER_SERVICE_TIMEOUT"
adminEmailsGeoReviewRecommendedEnvVar = "NOTIFICATION_ADMIN_EMAILS_GEO_REVIEW_RECOMMENDED"
adminEmailsGameGenerationFailedEnvVar = "NOTIFICATION_ADMIN_EMAILS_GAME_GENERATION_FAILED"
adminEmailsLobbyRuntimePausedAfterEnvVar = "NOTIFICATION_ADMIN_EMAILS_LOBBY_RUNTIME_PAUSED_AFTER_START"
adminEmailsLobbyApplicationSubmittedEnvVar = "NOTIFICATION_ADMIN_EMAILS_LOBBY_APPLICATION_SUBMITTED"
otelServiceNameEnvVar = "OTEL_SERVICE_NAME"
otelTracesExporterEnvVar = "OTEL_TRACES_EXPORTER"
otelMetricsExporterEnvVar = "OTEL_METRICS_EXPORTER"
otelExporterOTLPProtocolEnvVar = "OTEL_EXPORTER_OTLP_PROTOCOL"
otelExporterOTLPTracesProtocolEnvVar = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"
otelExporterOTLPMetricsProtocolEnvVar = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL"
otelStdoutTracesEnabledEnvVar = "NOTIFICATION_OTEL_STDOUT_TRACES_ENABLED"
otelStdoutMetricsEnabledEnvVar = "NOTIFICATION_OTEL_STDOUT_METRICS_ENABLED"
defaultShutdownTimeout = 5 * time.Second
defaultLogLevel = "info"
defaultInternalHTTPAddr = ":8092"
defaultReadHeaderTimeout = 2 * time.Second
defaultReadTimeout = 10 * time.Second
defaultIdleTimeout = time.Minute
defaultIntentsStream = "notification:intents"
defaultIntentsReadBlockTimeout = 2 * time.Second
defaultGatewayClientEventsStream = "gateway:client-events"
defaultGatewayClientEventsStreamMaxLen int64 = 1024
defaultMailDeliveryCommandsStream = "mail:delivery_commands"
defaultPushRetryMaxAttempts = 3
defaultEmailRetryMaxAttempts = 7
defaultRouteLeaseTTL = 5 * time.Second
defaultRouteBackoffMin = time.Second
defaultRouteBackoffMax = 5 * time.Minute
defaultIdempotencyTTL = 168 * time.Hour
defaultRecordRetention = 30 * 24 * time.Hour
defaultMalformedIntentRetention = 90 * 24 * time.Hour
defaultCleanupInterval = time.Hour
defaultUserServiceTimeout = time.Second
defaultOTelServiceName = "galaxy-notification"
otelExporterNone = "none"
otelExporterOTLP = "otlp"
otelProtocolHTTPProtobuf = "http/protobuf"
otelProtocolGRPC = "grpc"
)
// Config stores the full Notification Service process configuration.
type Config struct {
// ShutdownTimeout bounds graceful shutdown of every long-lived component.
ShutdownTimeout time.Duration
// Logging configures the process-wide structured logger.
Logging LoggingConfig
// InternalHTTP configures the private probe HTTP listener.
InternalHTTP InternalHTTPConfig
// Redis configures the shared Redis connection topology and the inbound
// `notification:intents` stream plus the outbound stream names. Durable
// notification state lives in PostgreSQL after Stage 5 of `PG_PLAN.md`.
Redis RedisConfig
// Postgres configures the PostgreSQL-backed durable store consumed via
// `pkg/postgres`.
Postgres PostgresConfig
// Streams stores the stable Redis Stream names reserved for ingress and
// downstream publication.
Streams StreamsConfig
// IntentsReadBlockTimeout stores the maximum Redis Streams blocking read
// window used by the intent consumer.
IntentsReadBlockTimeout time.Duration
// Retry stores the frozen retry settings used by the route publishers.
Retry RetryConfig
// Retention stores the periodic SQL retention worker configuration.
Retention RetentionConfig
// UserService configures the trusted user-enrichment dependency.
UserService UserServiceConfig
// AdminRouting stores the type-specific configured administrator email
// lists.
AdminRouting AdminRoutingConfig
// Telemetry configures the process-wide OpenTelemetry runtime.
Telemetry TelemetryConfig
}
// LoggingConfig configures the process-wide structured logger.
type LoggingConfig struct {
// Level stores the process log level accepted by log/slog.
Level string
}
// InternalHTTPConfig configures the private probe HTTP listener.
type InternalHTTPConfig struct {
// Addr stores the TCP listen address.
Addr string
// ReadHeaderTimeout bounds request-header reading.
ReadHeaderTimeout time.Duration
// ReadTimeout bounds reading one request.
ReadTimeout time.Duration
// IdleTimeout bounds how long keep-alive connections stay open.
IdleTimeout time.Duration
}
// Validate reports whether cfg stores a usable internal HTTP listener
// configuration.
func (cfg InternalHTTPConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.Addr) == "":
return fmt.Errorf("internal HTTP addr must not be empty")
case !isTCPAddr(cfg.Addr):
return fmt.Errorf("internal HTTP addr %q must use host:port form", cfg.Addr)
case cfg.ReadHeaderTimeout <= 0:
return fmt.Errorf("internal HTTP read header timeout must be positive")
case cfg.ReadTimeout <= 0:
return fmt.Errorf("internal HTTP read timeout must be positive")
case cfg.IdleTimeout <= 0:
return fmt.Errorf("internal HTTP idle timeout must be positive")
default:
return nil
}
}
// RedisConfig configures the Notification Service Redis connection topology.
// Per-call timeouts live in `Conn.OperationTimeout`.
type RedisConfig struct {
// Conn carries the connection topology (master, replicas, password, db,
// per-call timeout). Loaded via redisconn.LoadFromEnv("NOTIFICATION").
Conn redisconn.Config
}
// Validate reports whether cfg stores a usable Redis configuration.
func (cfg RedisConfig) Validate() error {
return cfg.Conn.Validate()
}
// PostgresConfig configures the PostgreSQL-backed durable store.
type PostgresConfig struct {
// Conn stores the primary plus replica DSN topology and pool tuning.
// Loaded via postgres.LoadFromEnv("NOTIFICATION").
Conn postgres.Config
}
// Validate reports whether cfg stores a usable PostgreSQL configuration.
func (cfg PostgresConfig) Validate() error {
return cfg.Conn.Validate()
}
// StreamsConfig stores the stable Redis Stream names used by Notification
// Service.
type StreamsConfig struct {
// Intents stores the ingress intent stream.
Intents string
// GatewayClientEvents stores the downstream Gateway client-events stream.
GatewayClientEvents string
// GatewayClientEventsStreamMaxLen bounds the downstream Gateway
// client-events stream with approximate trimming.
GatewayClientEventsStreamMaxLen int64
// MailDeliveryCommands stores the downstream Mail Service command stream.
MailDeliveryCommands string
}
// Validate reports whether cfg stores usable stream names.
func (cfg StreamsConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.Intents) == "":
return fmt.Errorf("intents stream must not be empty")
case strings.TrimSpace(cfg.GatewayClientEvents) == "":
return fmt.Errorf("gateway client-events stream must not be empty")
case cfg.GatewayClientEventsStreamMaxLen <= 0:
return fmt.Errorf("gateway client-events stream max len must be positive")
case strings.TrimSpace(cfg.MailDeliveryCommands) == "":
return fmt.Errorf("mail delivery-commands stream must not be empty")
default:
return nil
}
}
// RetryConfig stores the frozen retry budgets, backoff settings, and the
// per-acceptance idempotency window.
type RetryConfig struct {
// PushMaxAttempts stores the route retry budget for the `push` channel.
PushMaxAttempts int
// EmailMaxAttempts stores the route retry budget for the `email` channel.
EmailMaxAttempts int
// RouteLeaseTTL stores the temporary route-lease lifetime used to avoid
// duplicate publication across replicas.
RouteLeaseTTL time.Duration
// RouteBackoffMin stores the minimum retry backoff.
RouteBackoffMin time.Duration
// RouteBackoffMax stores the maximum retry backoff.
RouteBackoffMax time.Duration
// IdempotencyTTL stores the per-acceptance idempotency window the service
// layer applies to the durable `idempotency_expires_at` column on the
// `records` table.
IdempotencyTTL time.Duration
}
// Validate reports whether cfg stores usable retry settings.
func (cfg RetryConfig) Validate() error {
switch {
case cfg.PushMaxAttempts <= 0:
return fmt.Errorf("push retry max attempts must be positive")
case cfg.EmailMaxAttempts <= 0:
return fmt.Errorf("email retry max attempts must be positive")
case cfg.RouteLeaseTTL <= 0:
return fmt.Errorf("route lease ttl must be positive")
case cfg.RouteBackoffMin <= 0:
return fmt.Errorf("route backoff min must be positive")
case cfg.RouteBackoffMax <= 0:
return fmt.Errorf("route backoff max must be positive")
case cfg.RouteBackoffMin > cfg.RouteBackoffMax:
return fmt.Errorf("route backoff min must not exceed route backoff max")
case cfg.IdempotencyTTL <= 0:
return fmt.Errorf("idempotency ttl must be positive")
default:
return nil
}
}
// RetentionConfig stores the durable retention windows applied by the
// periodic SQL retention worker.
type RetentionConfig struct {
// RecordRetention bounds how long records (and their cascaded routes and
// dead_letters) survive after acceptance.
RecordRetention time.Duration
// MalformedIntentRetention bounds how long malformed-intent rows survive
// after their original `recorded_at`.
MalformedIntentRetention time.Duration
// CleanupInterval stores the wall-clock period between two retention
// passes.
CleanupInterval time.Duration
}
// Validate reports whether cfg stores a usable retention configuration.
func (cfg RetentionConfig) Validate() error {
switch {
case cfg.RecordRetention <= 0:
return fmt.Errorf("%s must be positive", recordRetentionEnvVar)
case cfg.MalformedIntentRetention <= 0:
return fmt.Errorf("%s must be positive", malformedIntentRetentionEnvVar)
case cfg.CleanupInterval <= 0:
return fmt.Errorf("%s must be positive", cleanupIntervalEnvVar)
default:
return nil
}
}
// UserServiceConfig configures the trusted user-enrichment dependency.
type UserServiceConfig struct {
// BaseURL stores the absolute base URL of the trusted User Service.
BaseURL string
// Timeout bounds one outbound User Service request.
Timeout time.Duration
}
// Validate reports whether cfg stores a usable User Service configuration.
func (cfg UserServiceConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.BaseURL) == "":
return fmt.Errorf("user service base URL must not be empty")
case !isAbsoluteHTTPURL(cfg.BaseURL):
return fmt.Errorf("user service base URL %q must be an absolute http(s) URL", cfg.BaseURL)
case cfg.Timeout <= 0:
return fmt.Errorf("user service timeout must be positive")
default:
return nil
}
}
// AdminRoutingConfig stores the type-specific configured administrator email
// lists.
type AdminRoutingConfig struct {
// GeoReviewRecommended stores recipients for `geo.review_recommended`.
GeoReviewRecommended []string
// GameGenerationFailed stores recipients for `game.generation_failed`.
GameGenerationFailed []string
// LobbyRuntimePausedAfterStart stores recipients for
// `lobby.runtime_paused_after_start`.
LobbyRuntimePausedAfterStart []string
// LobbyApplicationSubmitted stores recipients for public
// `lobby.application.submitted` notifications.
LobbyApplicationSubmitted []string
}
// Validate reports whether cfg stores valid normalized administrator email
// lists.
func (cfg AdminRoutingConfig) Validate() error {
if err := validateNormalizedEmailList("geo.review_recommended", cfg.GeoReviewRecommended); err != nil {
return err
}
if err := validateNormalizedEmailList("game.generation_failed", cfg.GameGenerationFailed); err != nil {
return err
}
if err := validateNormalizedEmailList("lobby.runtime_paused_after_start", cfg.LobbyRuntimePausedAfterStart); err != nil {
return err
}
if err := validateNormalizedEmailList("lobby.application.submitted", cfg.LobbyApplicationSubmitted); err != nil {
return err
}
return nil
}
// TelemetryConfig configures the Notification Service OpenTelemetry runtime.
type TelemetryConfig struct {
// ServiceName overrides the default OpenTelemetry service name.
ServiceName string
// TracesExporter selects the external traces exporter. Supported values are
// `none` and `otlp`.
TracesExporter string
// MetricsExporter selects the external metrics exporter. Supported values
// are `none` and `otlp`.
MetricsExporter string
// TracesProtocol selects the OTLP traces protocol when TracesExporter is
// `otlp`.
TracesProtocol string
// MetricsProtocol selects the OTLP metrics protocol when MetricsExporter is
// `otlp`.
MetricsProtocol string
// StdoutTracesEnabled enables the additional stdout trace exporter used for
// local development and debugging.
StdoutTracesEnabled bool
// StdoutMetricsEnabled enables the additional stdout metric exporter used
// for local development and debugging.
StdoutMetricsEnabled bool
}
// Validate reports whether cfg contains a supported OpenTelemetry
// configuration.
func (cfg TelemetryConfig) Validate() error {
return telemetry.ProcessConfig{
ServiceName: cfg.ServiceName,
TracesExporter: cfg.TracesExporter,
MetricsExporter: cfg.MetricsExporter,
TracesProtocol: cfg.TracesProtocol,
MetricsProtocol: cfg.MetricsProtocol,
StdoutTracesEnabled: cfg.StdoutTracesEnabled,
StdoutMetricsEnabled: cfg.StdoutMetricsEnabled,
}.Validate()
}
// DefaultConfig returns the default Notification Service process
// configuration.
func DefaultConfig() Config {
return Config{
ShutdownTimeout: defaultShutdownTimeout,
Logging: LoggingConfig{
Level: defaultLogLevel,
},
InternalHTTP: InternalHTTPConfig{
Addr: defaultInternalHTTPAddr,
ReadHeaderTimeout: defaultReadHeaderTimeout,
ReadTimeout: defaultReadTimeout,
IdleTimeout: defaultIdleTimeout,
},
Redis: RedisConfig{
Conn: redisconn.DefaultConfig(),
},
Postgres: PostgresConfig{
Conn: postgres.DefaultConfig(),
},
Streams: StreamsConfig{
Intents: defaultIntentsStream,
GatewayClientEvents: defaultGatewayClientEventsStream,
GatewayClientEventsStreamMaxLen: defaultGatewayClientEventsStreamMaxLen,
MailDeliveryCommands: defaultMailDeliveryCommandsStream,
},
IntentsReadBlockTimeout: defaultIntentsReadBlockTimeout,
Retry: RetryConfig{
PushMaxAttempts: defaultPushRetryMaxAttempts,
EmailMaxAttempts: defaultEmailRetryMaxAttempts,
RouteLeaseTTL: defaultRouteLeaseTTL,
RouteBackoffMin: defaultRouteBackoffMin,
RouteBackoffMax: defaultRouteBackoffMax,
IdempotencyTTL: defaultIdempotencyTTL,
},
Retention: RetentionConfig{
RecordRetention: defaultRecordRetention,
MalformedIntentRetention: defaultMalformedIntentRetention,
CleanupInterval: defaultCleanupInterval,
},
UserService: UserServiceConfig{
Timeout: defaultUserServiceTimeout,
},
Telemetry: TelemetryConfig{
ServiceName: defaultOTelServiceName,
TracesExporter: otelExporterNone,
MetricsExporter: otelExporterNone,
},
}
}
// Validate reports whether cfg contains a consistent Notification Service
// process configuration.
func (cfg Config) Validate() error {
if cfg.ShutdownTimeout <= 0 {
return fmt.Errorf("load notification config: %s must be positive", shutdownTimeoutEnvVar)
}
if err := cfg.InternalHTTP.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.Redis.Validate(); err != nil {
return fmt.Errorf("load notification config: %w", err)
}
if err := cfg.Postgres.Validate(); err != nil {
return fmt.Errorf("load notification config: %w", err)
}
if err := cfg.Streams.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if cfg.IntentsReadBlockTimeout <= 0 {
return fmt.Errorf("load notification config: %s must be positive", intentsReadBlockTimeoutEnvVar)
}
if err := cfg.Retry.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.Retention.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.UserService.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.AdminRouting.Validate(); err != nil {
return fmt.Errorf("load notification config: %s", err)
}
if err := cfg.Telemetry.Validate(); err != nil {
return fmt.Errorf("load notification config: %w", err)
}
return nil
}
func validateNormalizedEmailList(name string, values []string) error {
for index, value := range values {
normalized, err := normalizeMailboxAddress(value)
if err != nil {
return fmt.Errorf("%s[%d]: %w", name, index, err)
}
if normalized != value {
return fmt.Errorf("%s[%d]: email address must already be normalized", name, index)
}
}
return nil
}
func normalizeMailboxAddress(value string) (string, error) {
trimmed := strings.TrimSpace(value)
if trimmed == "" {
return "", fmt.Errorf("email address must not be empty")
}
parsed, err := netmail.ParseAddress(trimmed)
if err != nil {
return "", fmt.Errorf("invalid email address %q: %w", trimmed, err)
}
if parsed.Name != "" {
return "", fmt.Errorf("email address %q must not include a display name", trimmed)
}
return strings.ToLower(parsed.Address), nil
}
func parseEmailList(name string, raw string) ([]string, error) {
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
return nil, nil
}
parts := strings.Split(trimmed, ",")
addresses := make([]string, 0, len(parts))
seen := make(map[string]struct{}, len(parts))
for index, part := range parts {
normalized, err := normalizeMailboxAddress(part)
if err != nil {
return nil, fmt.Errorf("%s[%d]: %w", name, index, err)
}
if _, ok := seen[normalized]; ok {
continue
}
seen[normalized] = struct{}{}
addresses = append(addresses, normalized)
}
return addresses, nil
}
func normalizeBaseURL(value string) string {
trimmed := strings.TrimSpace(value)
if trimmed == "" {
return ""
}
return strings.TrimRight(trimmed, "/")
}
func isAbsoluteHTTPURL(value string) bool {
parsed, err := url.Parse(strings.TrimSpace(value))
if err != nil {
return false
}
if parsed.Scheme != "http" && parsed.Scheme != "https" {
return false
}
return parsed.Host != ""
}
func isTCPAddr(value string) bool {
host, port, err := net.SplitHostPort(strings.TrimSpace(value))
if err != nil {
return false
}
if port == "" {
return false
}
if host == "" {
return true
}
return true
}