Files
galaxy-game/lobby/internal/config/config.go
T
2026-04-28 20:39:18 +02:00

545 lines
20 KiB
Go

// Package config loads the Game Lobby Service process configuration from
// environment variables.
package config
import (
"fmt"
"strings"
"time"
"galaxy/lobby/internal/domain/engineimage"
"galaxy/lobby/internal/telemetry"
"galaxy/postgres"
"galaxy/redisconn"
)
const (
envPrefix = "LOBBY"
shutdownTimeoutEnvVar = "LOBBY_SHUTDOWN_TIMEOUT"
logLevelEnvVar = "LOBBY_LOG_LEVEL"
publicHTTPAddrEnvVar = "LOBBY_PUBLIC_HTTP_ADDR"
publicHTTPReadHeaderTimeoutEnvVar = "LOBBY_PUBLIC_HTTP_READ_HEADER_TIMEOUT"
publicHTTPReadTimeoutEnvVar = "LOBBY_PUBLIC_HTTP_READ_TIMEOUT"
publicHTTPIdleTimeoutEnvVar = "LOBBY_PUBLIC_HTTP_IDLE_TIMEOUT"
internalHTTPAddrEnvVar = "LOBBY_INTERNAL_HTTP_ADDR"
internalHTTPReadHeaderTimeoutEnvVar = "LOBBY_INTERNAL_HTTP_READ_HEADER_TIMEOUT"
internalHTTPReadTimeoutEnvVar = "LOBBY_INTERNAL_HTTP_READ_TIMEOUT"
internalHTTPIdleTimeoutEnvVar = "LOBBY_INTERNAL_HTTP_IDLE_TIMEOUT"
gmEventsStreamEnvVar = "LOBBY_GM_EVENTS_STREAM"
gmEventsReadBlockTimeoutEnvVar = "LOBBY_GM_EVENTS_READ_BLOCK_TIMEOUT"
userLifecycleStreamEnvVar = "LOBBY_USER_LIFECYCLE_STREAM"
userLifecycleReadBlockTimeoutEnvVar = "LOBBY_USER_LIFECYCLE_READ_BLOCK_TIMEOUT"
runtimeStartJobsStreamEnvVar = "LOBBY_RUNTIME_START_JOBS_STREAM"
runtimeStopJobsStreamEnvVar = "LOBBY_RUNTIME_STOP_JOBS_STREAM"
runtimeJobResultsStreamEnvVar = "LOBBY_RUNTIME_JOB_RESULTS_STREAM"
runtimeJobResultsReadBlockTimeoutEnv = "LOBBY_RUNTIME_JOB_RESULTS_READ_BLOCK_TIMEOUT"
notificationIntentsStreamEnvVar = "LOBBY_NOTIFICATION_INTENTS_STREAM"
userServiceBaseURLEnvVar = "LOBBY_USER_SERVICE_BASE_URL"
userServiceTimeoutEnvVar = "LOBBY_USER_SERVICE_TIMEOUT"
gmBaseURLEnvVar = "LOBBY_GM_BASE_URL"
gmTimeoutEnvVar = "LOBBY_GM_TIMEOUT"
enrollmentAutomationIntervalEnvVar = "LOBBY_ENROLLMENT_AUTOMATION_INTERVAL"
raceNameDirectoryBackendEnvVar = "LOBBY_RACE_NAME_DIRECTORY_BACKEND"
raceNameExpirationIntervalEnvVar = "LOBBY_RACE_NAME_EXPIRATION_INTERVAL"
engineImageTemplateEnvVar = "LOBBY_ENGINE_IMAGE_TEMPLATE"
otelServiceNameEnvVar = "OTEL_SERVICE_NAME"
otelTracesExporterEnvVar = "OTEL_TRACES_EXPORTER"
otelMetricsExporterEnvVar = "OTEL_METRICS_EXPORTER"
otelExporterOTLPProtocolEnvVar = "OTEL_EXPORTER_OTLP_PROTOCOL"
otelExporterOTLPTracesProtocolEnvVar = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"
otelExporterOTLPMetricsProtocolEnvVar = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL"
otelStdoutTracesEnabledEnvVar = "LOBBY_OTEL_STDOUT_TRACES_ENABLED"
otelStdoutMetricsEnabledEnvVar = "LOBBY_OTEL_STDOUT_METRICS_ENABLED"
defaultShutdownTimeout = 30 * time.Second
defaultLogLevel = "info"
defaultPublicHTTPAddr = ":8094"
defaultInternalHTTPAddr = ":8095"
defaultReadHeaderTimeout = 2 * time.Second
defaultReadTimeout = 10 * time.Second
defaultIdleTimeout = time.Minute
defaultGMEventsStream = "gm:lobby_events"
defaultGMEventsReadBlockTimeout = 2 * time.Second
defaultUserLifecycleStream = "user:lifecycle_events"
defaultUserLifecycleReadBlockTimeout = 2 * time.Second
defaultRuntimeStartJobsStream = "runtime:start_jobs"
defaultRuntimeStopJobsStream = "runtime:stop_jobs"
defaultRuntimeJobResultsStream = "runtime:job_results"
defaultRuntimeJobResultsReadBlockTimeout = 2 * time.Second
defaultNotificationIntentsStream = "notification:intents"
defaultUserServiceTimeout = time.Second
defaultGMTimeout = 5 * time.Second
defaultEnrollmentAutomationInterval = 30 * time.Second
defaultRaceNameExpirationInterval = time.Hour
defaultEngineImageTemplate = "galaxy/game:" + engineimage.VersionPlaceholder
defaultOTelServiceName = "galaxy-lobby"
// RaceNameDirectoryBackendPostgres selects the PostgreSQL-backed
// Race Name Directory adapter. It is the default production backend
// after PG_PLAN.md §6B.
RaceNameDirectoryBackendPostgres = "postgres"
// RaceNameDirectoryBackendStub selects the in-process Race Name
// Directory stub used by unit tests that do not need PostgreSQL.
RaceNameDirectoryBackendStub = "stub"
)
// Config stores the full Game Lobby Service process configuration.
type Config struct {
// ShutdownTimeout bounds graceful shutdown of every long-lived component.
ShutdownTimeout time.Duration
// Logging configures the process-wide structured logger.
Logging LoggingConfig
// PublicHTTP configures the public authenticated HTTP listener that serves
// gateway-forwarded player commands.
PublicHTTP PublicHTTPConfig
// InternalHTTP configures the trusted internal HTTP listener that serves
// Game Master registration and admin operations.
InternalHTTP InternalHTTPConfig
// Redis configures the shared Redis client and the Redis Streams keys
// consumed by the runnable service skeleton and its future workers.
Redis RedisConfig
// Postgres configures the PostgreSQL-backed durable store consumed via
// `pkg/postgres`.
Postgres PostgresConfig
// UserService configures the synchronous User Service eligibility client.
UserService UserServiceConfig
// GM configures the synchronous Game Master registration client.
GM GMConfig
// EnrollmentAutomation configures the periodic enrollment automation
// worker that will be added in the
EnrollmentAutomation EnrollmentAutomationConfig
// RaceNameDirectory configures the Race Name Directory backend
// selector. It governs which adapter is wired by the runtime:
// Redis-backed persistence in production, an in-process stub for
// tests that do not need Redis.
RaceNameDirectory RaceNameDirectoryConfig
// PendingRegistration configures the periodic worker that releases
// every pending_registration whose eligible_until has passed.
PendingRegistration PendingRegistrationConfig
// RuntimeManager configures the Runtime Manager publisher contract.
RuntimeManager RuntimeManagerConfig
// Telemetry configures the process-wide OpenTelemetry runtime.
Telemetry TelemetryConfig
}
// RaceNameDirectoryConfig configures which Race Name Directory adapter
// is wired into the runtime.
type RaceNameDirectoryConfig struct {
// Backend selects the Race Name Directory adapter. Accepted values
// are RaceNameDirectoryBackendPostgres and RaceNameDirectoryBackendStub.
Backend string
}
// Validate reports whether cfg stores a supported Race Name Directory
// backend selector.
func (cfg RaceNameDirectoryConfig) Validate() error {
switch cfg.Backend {
case RaceNameDirectoryBackendPostgres, RaceNameDirectoryBackendStub:
return nil
case "":
return fmt.Errorf("race name directory backend must not be empty")
default:
return fmt.Errorf("race name directory backend %q must be one of %q or %q",
cfg.Backend,
RaceNameDirectoryBackendPostgres,
RaceNameDirectoryBackendStub)
}
}
// LoggingConfig configures the process-wide structured logger.
type LoggingConfig struct {
// Level stores the process log level accepted by log/slog.
Level string
}
// PublicHTTPConfig configures the public authenticated HTTP listener.
type PublicHTTPConfig struct {
// Addr stores the TCP listen address.
Addr string
// ReadHeaderTimeout bounds request-header reading.
ReadHeaderTimeout time.Duration
// ReadTimeout bounds reading one request.
ReadTimeout time.Duration
// IdleTimeout bounds how long keep-alive connections stay open.
IdleTimeout time.Duration
}
// Validate reports whether cfg stores a usable public HTTP listener
// configuration.
func (cfg PublicHTTPConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.Addr) == "":
return fmt.Errorf("public HTTP addr must not be empty")
case !isTCPAddr(cfg.Addr):
return fmt.Errorf("public HTTP addr %q must use host:port form", cfg.Addr)
case cfg.ReadHeaderTimeout <= 0:
return fmt.Errorf("public HTTP read header timeout must be positive")
case cfg.ReadTimeout <= 0:
return fmt.Errorf("public HTTP read timeout must be positive")
case cfg.IdleTimeout <= 0:
return fmt.Errorf("public HTTP idle timeout must be positive")
default:
return nil
}
}
// InternalHTTPConfig configures the trusted internal HTTP listener.
type InternalHTTPConfig struct {
// Addr stores the TCP listen address.
Addr string
// ReadHeaderTimeout bounds request-header reading.
ReadHeaderTimeout time.Duration
// ReadTimeout bounds reading one request.
ReadTimeout time.Duration
// IdleTimeout bounds how long keep-alive connections stay open.
IdleTimeout time.Duration
}
// Validate reports whether cfg stores a usable internal HTTP listener
// configuration.
func (cfg InternalHTTPConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.Addr) == "":
return fmt.Errorf("internal HTTP addr must not be empty")
case !isTCPAddr(cfg.Addr):
return fmt.Errorf("internal HTTP addr %q must use host:port form", cfg.Addr)
case cfg.ReadHeaderTimeout <= 0:
return fmt.Errorf("internal HTTP read header timeout must be positive")
case cfg.ReadTimeout <= 0:
return fmt.Errorf("internal HTTP read timeout must be positive")
case cfg.IdleTimeout <= 0:
return fmt.Errorf("internal HTTP idle timeout must be positive")
default:
return nil
}
}
// RedisConfig configures the Game Lobby Redis connection topology and the
// Redis Stream names Lobby reads from / writes to. Per-call timeouts and
// connection topology live inside `Conn`.
type RedisConfig struct {
// Conn carries the connection topology (master, replicas, password, db,
// per-call timeout). Loaded via redisconn.LoadFromEnv("LOBBY"); rejects
// the deprecated LOBBY_REDIS_TLS_ENABLED / LOBBY_REDIS_USERNAME env vars
// at startup.
Conn redisconn.Config
// GMEventsStream stores the Redis Streams key for Game Master runtime
// events consumed by Lobby.
GMEventsStream string
// GMEventsReadBlockTimeout bounds the maximum blocking read window on
// GMEventsStream.
GMEventsReadBlockTimeout time.Duration
// RuntimeStartJobsStream stores the Redis Streams key Lobby writes start
// jobs to.
RuntimeStartJobsStream string
// RuntimeStopJobsStream stores the Redis Streams key Lobby writes stop
// jobs to. publishes stop jobs only from the orphan-container
// path inside the runtime job result worker.
RuntimeStopJobsStream string
// RuntimeJobResultsStream stores the Redis Streams key Lobby reads
// runtime job results from.
RuntimeJobResultsStream string
// RuntimeJobResultsReadBlockTimeout bounds the maximum blocking read window
// on RuntimeJobResultsStream.
RuntimeJobResultsReadBlockTimeout time.Duration
// NotificationIntentsStream stores the Redis Streams key Lobby writes
// notification intents to.
NotificationIntentsStream string
// UserLifecycleStream stores the Redis Streams key Lobby reads
// User Service lifecycle events from. The stream is consumed by the
// cascade worker.
UserLifecycleStream string
// UserLifecycleReadBlockTimeout bounds the maximum blocking read
// window on UserLifecycleStream.
UserLifecycleReadBlockTimeout time.Duration
}
// Validate reports whether cfg stores a usable Redis configuration.
func (cfg RedisConfig) Validate() error {
if err := cfg.Conn.Validate(); err != nil {
return err
}
switch {
case strings.TrimSpace(cfg.GMEventsStream) == "":
return fmt.Errorf("redis gm events stream must not be empty")
case cfg.GMEventsReadBlockTimeout <= 0:
return fmt.Errorf("redis gm events read block timeout must be positive")
case strings.TrimSpace(cfg.RuntimeStartJobsStream) == "":
return fmt.Errorf("redis runtime start jobs stream must not be empty")
case strings.TrimSpace(cfg.RuntimeStopJobsStream) == "":
return fmt.Errorf("redis runtime stop jobs stream must not be empty")
case strings.TrimSpace(cfg.RuntimeJobResultsStream) == "":
return fmt.Errorf("redis runtime job results stream must not be empty")
case cfg.RuntimeJobResultsReadBlockTimeout <= 0:
return fmt.Errorf("redis runtime job results read block timeout must be positive")
case strings.TrimSpace(cfg.NotificationIntentsStream) == "":
return fmt.Errorf("redis notification intents stream must not be empty")
case strings.TrimSpace(cfg.UserLifecycleStream) == "":
return fmt.Errorf("redis user lifecycle stream must not be empty")
case cfg.UserLifecycleReadBlockTimeout <= 0:
return fmt.Errorf("redis user lifecycle read block timeout must be positive")
default:
return nil
}
}
// PostgresConfig configures the PostgreSQL-backed durable store consumed via
// `pkg/postgres`. Topology and pool tuning live in `Conn`; loaded via
// `postgres.LoadFromEnv("LOBBY")`.
type PostgresConfig struct {
// Conn carries the primary plus replica DSN topology and pool tuning.
Conn postgres.Config
}
// Validate reports whether cfg stores a usable PostgreSQL configuration.
func (cfg PostgresConfig) Validate() error {
return cfg.Conn.Validate()
}
// UserServiceConfig configures the synchronous User Service eligibility
// client used by the application flow.
type UserServiceConfig struct {
// BaseURL stores the User Service root URL.
BaseURL string
// Timeout bounds one User Service request.
Timeout time.Duration
}
// Validate reports whether cfg stores a usable User Service client
// configuration.
func (cfg UserServiceConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.BaseURL) == "":
return fmt.Errorf("user service base url must not be empty")
case !isHTTPURL(cfg.BaseURL):
return fmt.Errorf("user service base url %q must be an absolute http(s) URL", cfg.BaseURL)
case cfg.Timeout <= 0:
return fmt.Errorf("user service timeout must be positive")
default:
return nil
}
}
// GMConfig configures the synchronous Game Master registration client.
type GMConfig struct {
// BaseURL stores the Game Master root URL.
BaseURL string
// Timeout bounds one Game Master request.
Timeout time.Duration
}
// Validate reports whether cfg stores a usable Game Master client
// configuration.
func (cfg GMConfig) Validate() error {
switch {
case strings.TrimSpace(cfg.BaseURL) == "":
return fmt.Errorf("gm base url must not be empty")
case !isHTTPURL(cfg.BaseURL):
return fmt.Errorf("gm base url %q must be an absolute http(s) URL", cfg.BaseURL)
case cfg.Timeout <= 0:
return fmt.Errorf("gm timeout must be positive")
default:
return nil
}
}
// EnrollmentAutomationConfig configures the periodic enrollment automation
// worker.
type EnrollmentAutomationConfig struct {
// Interval stores the enrollment automation tick interval.
Interval time.Duration
}
// Validate reports whether cfg stores a usable enrollment automation
// configuration.
func (cfg EnrollmentAutomationConfig) Validate() error {
if cfg.Interval <= 0 {
return fmt.Errorf("enrollment automation interval must be positive")
}
return nil
}
// PendingRegistrationConfig configures the periodic worker that
// releases expired Race Name Directory pending_registration entries.
type PendingRegistrationConfig struct {
// Interval stores the pending-registration expiration tick interval.
Interval time.Duration
}
// Validate reports whether cfg stores a usable pending-registration
// expiration worker configuration.
func (cfg PendingRegistrationConfig) Validate() error {
if cfg.Interval <= 0 {
return fmt.Errorf("race name expiration interval must be positive")
}
return nil
}
// RuntimeManagerConfig configures the Lobby-side Runtime Manager
// publisher contract. Lobby resolves the Docker image reference it
// publishes on `runtime:start_jobs` from a per-game
// `target_engine_version` and the configured EngineImageTemplate.
type RuntimeManagerConfig struct {
// EngineImageTemplate stores the Docker reference template applied
// to a game's `target_engine_version`. The string must contain the
// literal placeholder `{engine_version}`; Lobby fails fast at
// startup otherwise.
EngineImageTemplate string
}
// Validate reports whether cfg stores a usable Runtime Manager
// publisher configuration.
func (cfg RuntimeManagerConfig) Validate() error {
if _, err := engineimage.NewResolver(cfg.EngineImageTemplate); err != nil {
return fmt.Errorf("engine image template: %w", err)
}
return nil
}
// TelemetryConfig configures the Game Lobby Service OpenTelemetry runtime.
type TelemetryConfig struct {
// ServiceName overrides the default OpenTelemetry service name.
ServiceName string
// TracesExporter selects the external traces exporter. Supported values are
// `none` and `otlp`.
TracesExporter string
// MetricsExporter selects the external metrics exporter. Supported values
// are `none` and `otlp`.
MetricsExporter string
// TracesProtocol selects the OTLP traces protocol when TracesExporter is
// `otlp`.
TracesProtocol string
// MetricsProtocol selects the OTLP metrics protocol when MetricsExporter is
// `otlp`.
MetricsProtocol string
// StdoutTracesEnabled enables the additional stdout trace exporter used
// for local development and debugging.
StdoutTracesEnabled bool
// StdoutMetricsEnabled enables the additional stdout metric exporter used
// for local development and debugging.
StdoutMetricsEnabled bool
}
// Validate reports whether cfg contains a supported OpenTelemetry
// configuration.
func (cfg TelemetryConfig) Validate() error {
return telemetry.ProcessConfig{
ServiceName: cfg.ServiceName,
TracesExporter: cfg.TracesExporter,
MetricsExporter: cfg.MetricsExporter,
TracesProtocol: cfg.TracesProtocol,
MetricsProtocol: cfg.MetricsProtocol,
StdoutTracesEnabled: cfg.StdoutTracesEnabled,
StdoutMetricsEnabled: cfg.StdoutMetricsEnabled,
}.Validate()
}
// DefaultConfig returns the default Game Lobby Service process
// configuration.
func DefaultConfig() Config {
return Config{
ShutdownTimeout: defaultShutdownTimeout,
Logging: LoggingConfig{
Level: defaultLogLevel,
},
PublicHTTP: PublicHTTPConfig{
Addr: defaultPublicHTTPAddr,
ReadHeaderTimeout: defaultReadHeaderTimeout,
ReadTimeout: defaultReadTimeout,
IdleTimeout: defaultIdleTimeout,
},
InternalHTTP: InternalHTTPConfig{
Addr: defaultInternalHTTPAddr,
ReadHeaderTimeout: defaultReadHeaderTimeout,
ReadTimeout: defaultReadTimeout,
IdleTimeout: defaultIdleTimeout,
},
Redis: RedisConfig{
Conn: redisconn.DefaultConfig(),
GMEventsStream: defaultGMEventsStream,
GMEventsReadBlockTimeout: defaultGMEventsReadBlockTimeout,
RuntimeStartJobsStream: defaultRuntimeStartJobsStream,
RuntimeStopJobsStream: defaultRuntimeStopJobsStream,
RuntimeJobResultsStream: defaultRuntimeJobResultsStream,
RuntimeJobResultsReadBlockTimeout: defaultRuntimeJobResultsReadBlockTimeout,
NotificationIntentsStream: defaultNotificationIntentsStream,
UserLifecycleStream: defaultUserLifecycleStream,
UserLifecycleReadBlockTimeout: defaultUserLifecycleReadBlockTimeout,
},
Postgres: PostgresConfig{
Conn: postgres.DefaultConfig(),
},
UserService: UserServiceConfig{
Timeout: defaultUserServiceTimeout,
},
GM: GMConfig{
Timeout: defaultGMTimeout,
},
EnrollmentAutomation: EnrollmentAutomationConfig{
Interval: defaultEnrollmentAutomationInterval,
},
RaceNameDirectory: RaceNameDirectoryConfig{
Backend: RaceNameDirectoryBackendPostgres,
},
PendingRegistration: PendingRegistrationConfig{
Interval: defaultRaceNameExpirationInterval,
},
RuntimeManager: RuntimeManagerConfig{
EngineImageTemplate: defaultEngineImageTemplate,
},
Telemetry: TelemetryConfig{
ServiceName: defaultOTelServiceName,
TracesExporter: "none",
MetricsExporter: "none",
},
}
}