feat: runtime manager
This commit is contained in:
@@ -0,0 +1,632 @@
|
||||
// Package config loads the Runtime Manager process configuration from
|
||||
// environment variables.
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/postgres"
|
||||
"galaxy/redisconn"
|
||||
"galaxy/rtmanager/internal/telemetry"
|
||||
)
|
||||
|
||||
const (
|
||||
envPrefix = "RTMANAGER"
|
||||
|
||||
shutdownTimeoutEnvVar = "RTMANAGER_SHUTDOWN_TIMEOUT"
|
||||
logLevelEnvVar = "RTMANAGER_LOG_LEVEL"
|
||||
|
||||
internalHTTPAddrEnvVar = "RTMANAGER_INTERNAL_HTTP_ADDR"
|
||||
internalHTTPReadHeaderTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_READ_HEADER_TIMEOUT"
|
||||
internalHTTPReadTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_READ_TIMEOUT"
|
||||
internalHTTPWriteTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_WRITE_TIMEOUT"
|
||||
internalHTTPIdleTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_IDLE_TIMEOUT"
|
||||
|
||||
dockerHostEnvVar = "RTMANAGER_DOCKER_HOST"
|
||||
dockerAPIVersionEnvVar = "RTMANAGER_DOCKER_API_VERSION"
|
||||
dockerNetworkEnvVar = "RTMANAGER_DOCKER_NETWORK"
|
||||
dockerLogDriverEnvVar = "RTMANAGER_DOCKER_LOG_DRIVER"
|
||||
dockerLogOptsEnvVar = "RTMANAGER_DOCKER_LOG_OPTS"
|
||||
imagePullPolicyEnvVar = "RTMANAGER_IMAGE_PULL_POLICY"
|
||||
|
||||
defaultCPUQuotaEnvVar = "RTMANAGER_DEFAULT_CPU_QUOTA"
|
||||
defaultMemoryEnvVar = "RTMANAGER_DEFAULT_MEMORY"
|
||||
defaultPIDsLimitEnvVar = "RTMANAGER_DEFAULT_PIDS_LIMIT"
|
||||
containerStopTimeoutSecondsEnvVar = "RTMANAGER_CONTAINER_STOP_TIMEOUT_SECONDS"
|
||||
containerRetentionDaysEnvVar = "RTMANAGER_CONTAINER_RETENTION_DAYS"
|
||||
engineStateMountPathEnvVar = "RTMANAGER_ENGINE_STATE_MOUNT_PATH"
|
||||
engineStateEnvNameEnvVar = "RTMANAGER_ENGINE_STATE_ENV_NAME"
|
||||
gameStateDirModeEnvVar = "RTMANAGER_GAME_STATE_DIR_MODE"
|
||||
gameStateOwnerUIDEnvVar = "RTMANAGER_GAME_STATE_OWNER_UID"
|
||||
gameStateOwnerGIDEnvVar = "RTMANAGER_GAME_STATE_OWNER_GID"
|
||||
gameStateRootEnvVar = "RTMANAGER_GAME_STATE_ROOT"
|
||||
|
||||
startJobsStreamEnvVar = "RTMANAGER_REDIS_START_JOBS_STREAM"
|
||||
stopJobsStreamEnvVar = "RTMANAGER_REDIS_STOP_JOBS_STREAM"
|
||||
jobResultsStreamEnvVar = "RTMANAGER_REDIS_JOB_RESULTS_STREAM"
|
||||
healthEventsStreamEnvVar = "RTMANAGER_REDIS_HEALTH_EVENTS_STREAM"
|
||||
notificationIntentsStreamEnv = "RTMANAGER_NOTIFICATION_INTENTS_STREAM"
|
||||
streamBlockTimeoutEnvVar = "RTMANAGER_STREAM_BLOCK_TIMEOUT"
|
||||
|
||||
inspectIntervalEnvVar = "RTMANAGER_INSPECT_INTERVAL"
|
||||
probeIntervalEnvVar = "RTMANAGER_PROBE_INTERVAL"
|
||||
probeTimeoutEnvVar = "RTMANAGER_PROBE_TIMEOUT"
|
||||
probeFailuresThresholdEnvVar = "RTMANAGER_PROBE_FAILURES_THRESHOLD"
|
||||
|
||||
reconcileIntervalEnvVar = "RTMANAGER_RECONCILE_INTERVAL"
|
||||
cleanupIntervalEnvVar = "RTMANAGER_CLEANUP_INTERVAL"
|
||||
|
||||
gameLeaseTTLSecondsEnvVar = "RTMANAGER_GAME_LEASE_TTL_SECONDS"
|
||||
|
||||
lobbyInternalBaseURLEnvVar = "RTMANAGER_LOBBY_INTERNAL_BASE_URL"
|
||||
lobbyInternalTimeoutEnvVar = "RTMANAGER_LOBBY_INTERNAL_TIMEOUT"
|
||||
|
||||
otelServiceNameEnvVar = "OTEL_SERVICE_NAME"
|
||||
otelTracesExporterEnvVar = "OTEL_TRACES_EXPORTER"
|
||||
otelMetricsExporterEnvVar = "OTEL_METRICS_EXPORTER"
|
||||
otelExporterOTLPProtocolEnvVar = "OTEL_EXPORTER_OTLP_PROTOCOL"
|
||||
otelExporterOTLPTracesProtocolEnvVar = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"
|
||||
otelExporterOTLPMetricsProtocolEnvVar = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL"
|
||||
otelStdoutTracesEnabledEnvVar = "RTMANAGER_OTEL_STDOUT_TRACES_ENABLED"
|
||||
otelStdoutMetricsEnabledEnvVar = "RTMANAGER_OTEL_STDOUT_METRICS_ENABLED"
|
||||
|
||||
defaultShutdownTimeout = 30 * time.Second
|
||||
defaultLogLevel = "info"
|
||||
defaultInternalHTTPAddr = ":8096"
|
||||
defaultReadHeaderTimeout = 2 * time.Second
|
||||
defaultReadTimeout = 5 * time.Second
|
||||
defaultWriteTimeout = 15 * time.Second
|
||||
defaultIdleTimeout = 60 * time.Second
|
||||
|
||||
defaultDockerHost = "unix:///var/run/docker.sock"
|
||||
defaultDockerNetwork = "galaxy-net"
|
||||
defaultDockerLogDriver = "json-file"
|
||||
defaultImagePullPolicy = ImagePullPolicyIfMissing
|
||||
|
||||
defaultCPUQuota = 1.0
|
||||
defaultMemory = "512m"
|
||||
defaultPIDsLimit = 512
|
||||
defaultContainerStopTimeout = 30 * time.Second
|
||||
defaultContainerRetention = 30 * 24 * time.Hour
|
||||
defaultEngineStateMountPath = "/var/lib/galaxy-game"
|
||||
defaultEngineStateEnvName = "GAME_STATE_PATH"
|
||||
defaultGameStateDirMode = 0o750
|
||||
|
||||
defaultStartJobsStream = "runtime:start_jobs"
|
||||
defaultStopJobsStream = "runtime:stop_jobs"
|
||||
defaultJobResultsStream = "runtime:job_results"
|
||||
defaultHealthEventsStream = "runtime:health_events"
|
||||
defaultNotificationIntentsKey = "notification:intents"
|
||||
defaultStreamBlockTimeout = 5 * time.Second
|
||||
|
||||
defaultInspectInterval = 30 * time.Second
|
||||
defaultProbeInterval = 15 * time.Second
|
||||
defaultProbeTimeout = 2 * time.Second
|
||||
defaultProbeFailuresThreshold = 3
|
||||
|
||||
defaultReconcileInterval = 5 * time.Minute
|
||||
defaultCleanupInterval = time.Hour
|
||||
|
||||
defaultGameLeaseTTL = 60 * time.Second
|
||||
|
||||
defaultLobbyInternalTimeout = 2 * time.Second
|
||||
|
||||
defaultOTelServiceName = "galaxy-rtmanager"
|
||||
)
|
||||
|
||||
// ImagePullPolicy enumerates the supported image pull policies. The start
|
||||
// service validates a producer-supplied `image_ref` against this policy at
|
||||
// start time.
|
||||
type ImagePullPolicy string
|
||||
|
||||
// Supported pull policies, frozen by `rtmanager/README.md` §Configuration.
|
||||
const (
|
||||
ImagePullPolicyIfMissing ImagePullPolicy = "if_missing"
|
||||
ImagePullPolicyAlways ImagePullPolicy = "always"
|
||||
ImagePullPolicyNever ImagePullPolicy = "never"
|
||||
)
|
||||
|
||||
// Validate reports whether p is one of the frozen pull policies.
|
||||
func (p ImagePullPolicy) Validate() error {
|
||||
switch p {
|
||||
case ImagePullPolicyIfMissing, ImagePullPolicyAlways, ImagePullPolicyNever:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("image pull policy %q must be one of %q, %q, %q",
|
||||
p, ImagePullPolicyIfMissing, ImagePullPolicyAlways, ImagePullPolicyNever)
|
||||
}
|
||||
}
|
||||
|
||||
// Config stores the full Runtime Manager process configuration.
|
||||
type Config struct {
|
||||
// ShutdownTimeout bounds graceful shutdown of every long-lived
|
||||
// component.
|
||||
ShutdownTimeout time.Duration
|
||||
|
||||
// Logging configures the process-wide structured logger.
|
||||
Logging LoggingConfig
|
||||
|
||||
// InternalHTTP configures the trusted internal HTTP listener that
|
||||
// serves probes and the GM/Admin REST surface.
|
||||
InternalHTTP InternalHTTPConfig
|
||||
|
||||
// Docker configures the Docker SDK client RTM uses to drive the local
|
||||
// Docker daemon.
|
||||
Docker DockerConfig
|
||||
|
||||
// Postgres configures the PostgreSQL-backed durable store consumed via
|
||||
// `pkg/postgres`.
|
||||
Postgres PostgresConfig
|
||||
|
||||
// Redis configures the shared Redis connection topology consumed via
|
||||
// `pkg/redisconn`.
|
||||
Redis RedisConfig
|
||||
|
||||
// Streams stores the stable Redis Stream names RTM reads from and
|
||||
// writes to.
|
||||
Streams StreamsConfig
|
||||
|
||||
// Container stores the per-container defaults applied at start time
|
||||
// when the resolved image does not declare its own labels.
|
||||
Container ContainerConfig
|
||||
|
||||
// Health configures the periodic health-monitoring workers (events
|
||||
// listener, inspect, active probe).
|
||||
Health HealthConfig
|
||||
|
||||
// Cleanup configures the reconciler and container-cleanup workers.
|
||||
Cleanup CleanupConfig
|
||||
|
||||
// Coordination configures the per-game Redis lease used to serialise
|
||||
// operations across all entry points.
|
||||
Coordination CoordinationConfig
|
||||
|
||||
// Lobby configures the synchronous Lobby internal REST client used by
|
||||
// the start service for ancillary lookups.
|
||||
Lobby LobbyConfig
|
||||
|
||||
// Telemetry configures the process-wide OpenTelemetry runtime.
|
||||
Telemetry TelemetryConfig
|
||||
}
|
||||
|
||||
// LoggingConfig configures the process-wide structured logger.
|
||||
type LoggingConfig struct {
|
||||
// Level stores the process log level accepted by log/slog.
|
||||
Level string
|
||||
}
|
||||
|
||||
// InternalHTTPConfig configures the trusted internal HTTP listener.
|
||||
type InternalHTTPConfig struct {
|
||||
// Addr stores the TCP listen address.
|
||||
Addr string
|
||||
|
||||
// ReadHeaderTimeout bounds request-header reading.
|
||||
ReadHeaderTimeout time.Duration
|
||||
|
||||
// ReadTimeout bounds reading one request.
|
||||
ReadTimeout time.Duration
|
||||
|
||||
// WriteTimeout bounds writing one response.
|
||||
WriteTimeout time.Duration
|
||||
|
||||
// IdleTimeout bounds how long keep-alive connections stay open.
|
||||
IdleTimeout time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable internal HTTP listener
|
||||
// configuration.
|
||||
func (cfg InternalHTTPConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.Addr) == "":
|
||||
return fmt.Errorf("internal HTTP addr must not be empty")
|
||||
case !isTCPAddr(cfg.Addr):
|
||||
return fmt.Errorf("internal HTTP addr %q must use host:port form", cfg.Addr)
|
||||
case cfg.ReadHeaderTimeout <= 0:
|
||||
return fmt.Errorf("internal HTTP read header timeout must be positive")
|
||||
case cfg.ReadTimeout <= 0:
|
||||
return fmt.Errorf("internal HTTP read timeout must be positive")
|
||||
case cfg.WriteTimeout <= 0:
|
||||
return fmt.Errorf("internal HTTP write timeout must be positive")
|
||||
case cfg.IdleTimeout <= 0:
|
||||
return fmt.Errorf("internal HTTP idle timeout must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// DockerConfig configures the Docker SDK client.
|
||||
type DockerConfig struct {
|
||||
// Host stores the Docker daemon endpoint (e.g.
|
||||
// `unix:///var/run/docker.sock`).
|
||||
Host string
|
||||
|
||||
// APIVersion overrides the Docker API version. Empty lets the SDK
|
||||
// negotiate.
|
||||
APIVersion string
|
||||
|
||||
// Network stores the user-defined Docker bridge network containers
|
||||
// attach to. Provisioned outside RTM; missing network is a fail-fast
|
||||
// condition at startup.
|
||||
Network string
|
||||
|
||||
// LogDriver stores the Docker logging driver applied to engine
|
||||
// containers.
|
||||
LogDriver string
|
||||
|
||||
// LogOpts stores the comma-separated `key=value` driver options.
|
||||
LogOpts string
|
||||
|
||||
// PullPolicy stores the configured image pull policy.
|
||||
PullPolicy ImagePullPolicy
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable Docker configuration.
|
||||
func (cfg DockerConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.Host) == "":
|
||||
return fmt.Errorf("docker host must not be empty")
|
||||
case strings.TrimSpace(cfg.Network) == "":
|
||||
return fmt.Errorf("docker network must not be empty")
|
||||
case strings.TrimSpace(cfg.LogDriver) == "":
|
||||
return fmt.Errorf("docker log driver must not be empty")
|
||||
}
|
||||
return cfg.PullPolicy.Validate()
|
||||
}
|
||||
|
||||
// PostgresConfig configures the PostgreSQL-backed durable store consumed
|
||||
// via `pkg/postgres`.
|
||||
type PostgresConfig struct {
|
||||
// Conn carries the primary plus replica DSN topology and pool tuning.
|
||||
Conn postgres.Config
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable PostgreSQL configuration.
|
||||
func (cfg PostgresConfig) Validate() error {
|
||||
return cfg.Conn.Validate()
|
||||
}
|
||||
|
||||
// RedisConfig configures the Runtime Manager Redis connection topology.
|
||||
type RedisConfig struct {
|
||||
// Conn carries the connection topology (master, replicas, password,
|
||||
// db, per-call timeout).
|
||||
Conn redisconn.Config
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable Redis configuration.
|
||||
func (cfg RedisConfig) Validate() error {
|
||||
return cfg.Conn.Validate()
|
||||
}
|
||||
|
||||
// StreamsConfig stores the stable Redis Stream names used by Runtime
|
||||
// Manager.
|
||||
type StreamsConfig struct {
|
||||
// StartJobs stores the Redis Streams key Lobby writes start jobs to.
|
||||
StartJobs string
|
||||
|
||||
// StopJobs stores the Redis Streams key Lobby writes stop jobs to.
|
||||
StopJobs string
|
||||
|
||||
// JobResults stores the Redis Streams key RTM writes job outcomes
|
||||
// to.
|
||||
JobResults string
|
||||
|
||||
// HealthEvents stores the Redis Streams key RTM publishes
|
||||
// technical health events to.
|
||||
HealthEvents string
|
||||
|
||||
// NotificationIntents stores the Redis Streams key RTM publishes
|
||||
// admin-only notification intents to.
|
||||
NotificationIntents string
|
||||
|
||||
// BlockTimeout bounds the maximum blocking read window for stream
|
||||
// consumers.
|
||||
BlockTimeout time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores usable stream names.
|
||||
func (cfg StreamsConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.StartJobs) == "":
|
||||
return fmt.Errorf("redis start jobs stream must not be empty")
|
||||
case strings.TrimSpace(cfg.StopJobs) == "":
|
||||
return fmt.Errorf("redis stop jobs stream must not be empty")
|
||||
case strings.TrimSpace(cfg.JobResults) == "":
|
||||
return fmt.Errorf("redis job results stream must not be empty")
|
||||
case strings.TrimSpace(cfg.HealthEvents) == "":
|
||||
return fmt.Errorf("redis health events stream must not be empty")
|
||||
case strings.TrimSpace(cfg.NotificationIntents) == "":
|
||||
return fmt.Errorf("redis notification intents stream must not be empty")
|
||||
case cfg.BlockTimeout <= 0:
|
||||
return fmt.Errorf("redis stream block timeout must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// ContainerConfig stores the per-container defaults applied at start
|
||||
// time. Resource defaults apply when the resolved engine image does not
|
||||
// expose `com.galaxy.cpu_quota` / `com.galaxy.memory` /
|
||||
// `com.galaxy.pids_limit` labels.
|
||||
type ContainerConfig struct {
|
||||
// DefaultCPUQuota is the fallback `--cpus` value applied when the
|
||||
// image does not declare `com.galaxy.cpu_quota`.
|
||||
DefaultCPUQuota float64
|
||||
|
||||
// DefaultMemory is the fallback `--memory` value applied when the
|
||||
// image does not declare `com.galaxy.memory`.
|
||||
DefaultMemory string
|
||||
|
||||
// DefaultPIDsLimit is the fallback `--pids-limit` value applied
|
||||
// when the image does not declare `com.galaxy.pids_limit`.
|
||||
DefaultPIDsLimit int
|
||||
|
||||
// StopTimeout bounds graceful container stop before Docker fires
|
||||
// SIGKILL.
|
||||
StopTimeout time.Duration
|
||||
|
||||
// Retention stores the TTL after which `status=stopped` containers
|
||||
// are removed by the cleanup worker.
|
||||
Retention time.Duration
|
||||
|
||||
// EngineStateMountPath is the in-container path the per-game state
|
||||
// directory is bind-mounted to.
|
||||
EngineStateMountPath string
|
||||
|
||||
// EngineStateEnvName is the env-var name forwarded to the engine
|
||||
// pointing at EngineStateMountPath.
|
||||
EngineStateEnvName string
|
||||
|
||||
// GameStateDirMode stores the unix permissions applied to the
|
||||
// per-game state directory on creation.
|
||||
GameStateDirMode uint32
|
||||
|
||||
// GameStateOwnerUID stores the unix uid applied to the per-game
|
||||
// state directory on creation.
|
||||
GameStateOwnerUID int
|
||||
|
||||
// GameStateOwnerGID stores the unix gid applied to the per-game
|
||||
// state directory on creation.
|
||||
GameStateOwnerGID int
|
||||
|
||||
// GameStateRoot is the host path under which per-game state
|
||||
// directories are created.
|
||||
GameStateRoot string
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores usable container defaults.
|
||||
func (cfg ContainerConfig) Validate() error {
|
||||
switch {
|
||||
case cfg.DefaultCPUQuota <= 0:
|
||||
return fmt.Errorf("default cpu quota must be positive")
|
||||
case strings.TrimSpace(cfg.DefaultMemory) == "":
|
||||
return fmt.Errorf("default memory must not be empty")
|
||||
case cfg.DefaultPIDsLimit <= 0:
|
||||
return fmt.Errorf("default pids limit must be positive")
|
||||
case cfg.StopTimeout <= 0:
|
||||
return fmt.Errorf("container stop timeout must be positive")
|
||||
case cfg.Retention <= 0:
|
||||
return fmt.Errorf("container retention must be positive")
|
||||
case strings.TrimSpace(cfg.EngineStateMountPath) == "":
|
||||
return fmt.Errorf("engine state mount path must not be empty")
|
||||
case strings.TrimSpace(cfg.EngineStateEnvName) == "":
|
||||
return fmt.Errorf("engine state env name must not be empty")
|
||||
case cfg.GameStateDirMode == 0:
|
||||
return fmt.Errorf("game state dir mode must be non-zero")
|
||||
case strings.TrimSpace(cfg.GameStateRoot) == "":
|
||||
return fmt.Errorf("game state root must not be empty")
|
||||
case !strings.HasPrefix(strings.TrimSpace(cfg.GameStateRoot), "/"):
|
||||
return fmt.Errorf("game state root %q must be an absolute path", cfg.GameStateRoot)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// HealthConfig configures the periodic health-monitoring workers
|
||||
// (Docker events listener, periodic inspect, active probe).
|
||||
type HealthConfig struct {
|
||||
// InspectInterval is the period between two periodic Docker inspect
|
||||
// passes.
|
||||
InspectInterval time.Duration
|
||||
|
||||
// ProbeInterval is the period between two engine `/healthz` probe
|
||||
// rounds.
|
||||
ProbeInterval time.Duration
|
||||
|
||||
// ProbeTimeout bounds one engine `/healthz` request.
|
||||
ProbeTimeout time.Duration
|
||||
|
||||
// ProbeFailuresThreshold is the consecutive-failure count that
|
||||
// triggers a `probe_failed` event.
|
||||
ProbeFailuresThreshold int
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores usable health-monitoring settings.
|
||||
func (cfg HealthConfig) Validate() error {
|
||||
switch {
|
||||
case cfg.InspectInterval <= 0:
|
||||
return fmt.Errorf("inspect interval must be positive")
|
||||
case cfg.ProbeInterval <= 0:
|
||||
return fmt.Errorf("probe interval must be positive")
|
||||
case cfg.ProbeTimeout <= 0:
|
||||
return fmt.Errorf("probe timeout must be positive")
|
||||
case cfg.ProbeFailuresThreshold <= 0:
|
||||
return fmt.Errorf("probe failures threshold must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// CleanupConfig configures the reconciler and container-cleanup workers.
|
||||
type CleanupConfig struct {
|
||||
// ReconcileInterval is the period between two reconciler passes.
|
||||
ReconcileInterval time.Duration
|
||||
|
||||
// CleanupInterval is the period between two container-cleanup
|
||||
// passes.
|
||||
CleanupInterval time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores usable cleanup settings.
|
||||
func (cfg CleanupConfig) Validate() error {
|
||||
switch {
|
||||
case cfg.ReconcileInterval <= 0:
|
||||
return fmt.Errorf("reconcile interval must be positive")
|
||||
case cfg.CleanupInterval <= 0:
|
||||
return fmt.Errorf("cleanup interval must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// CoordinationConfig configures the per-game Redis lease.
|
||||
type CoordinationConfig struct {
|
||||
// GameLeaseTTL bounds the per-game lease lifetime renewed every
|
||||
// half-TTL while an operation runs.
|
||||
GameLeaseTTL time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable lease configuration.
|
||||
func (cfg CoordinationConfig) Validate() error {
|
||||
if cfg.GameLeaseTTL <= 0 {
|
||||
return fmt.Errorf("game lease ttl must be positive")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// LobbyConfig configures the synchronous Lobby internal REST client.
|
||||
type LobbyConfig struct {
|
||||
// BaseURL stores the trusted Lobby internal listener base URL.
|
||||
BaseURL string
|
||||
|
||||
// Timeout bounds one Lobby internal request.
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable Lobby client
|
||||
// configuration.
|
||||
func (cfg LobbyConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.BaseURL) == "":
|
||||
return fmt.Errorf("lobby internal base url must not be empty")
|
||||
case !isHTTPURL(cfg.BaseURL):
|
||||
return fmt.Errorf("lobby internal base url %q must be an absolute http(s) URL", cfg.BaseURL)
|
||||
case cfg.Timeout <= 0:
|
||||
return fmt.Errorf("lobby internal timeout must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// TelemetryConfig configures the Runtime Manager OpenTelemetry runtime.
|
||||
type TelemetryConfig struct {
|
||||
// ServiceName overrides the default OpenTelemetry service name.
|
||||
ServiceName string
|
||||
|
||||
// TracesExporter selects the external traces exporter. Supported
|
||||
// values are `none` and `otlp`.
|
||||
TracesExporter string
|
||||
|
||||
// MetricsExporter selects the external metrics exporter. Supported
|
||||
// values are `none` and `otlp`.
|
||||
MetricsExporter string
|
||||
|
||||
// TracesProtocol selects the OTLP traces protocol when
|
||||
// TracesExporter is `otlp`.
|
||||
TracesProtocol string
|
||||
|
||||
// MetricsProtocol selects the OTLP metrics protocol when
|
||||
// MetricsExporter is `otlp`.
|
||||
MetricsProtocol string
|
||||
|
||||
// StdoutTracesEnabled enables the additional stdout trace exporter
|
||||
// used for local development and debugging.
|
||||
StdoutTracesEnabled bool
|
||||
|
||||
// StdoutMetricsEnabled enables the additional stdout metric
|
||||
// exporter used for local development and debugging.
|
||||
StdoutMetricsEnabled bool
|
||||
}
|
||||
|
||||
// Validate reports whether cfg contains a supported OpenTelemetry
|
||||
// configuration.
|
||||
func (cfg TelemetryConfig) Validate() error {
|
||||
return telemetry.ProcessConfig{
|
||||
ServiceName: cfg.ServiceName,
|
||||
TracesExporter: cfg.TracesExporter,
|
||||
MetricsExporter: cfg.MetricsExporter,
|
||||
TracesProtocol: cfg.TracesProtocol,
|
||||
MetricsProtocol: cfg.MetricsProtocol,
|
||||
StdoutTracesEnabled: cfg.StdoutTracesEnabled,
|
||||
StdoutMetricsEnabled: cfg.StdoutMetricsEnabled,
|
||||
}.Validate()
|
||||
}
|
||||
|
||||
// DefaultConfig returns the default Runtime Manager process configuration.
|
||||
func DefaultConfig() Config {
|
||||
return Config{
|
||||
ShutdownTimeout: defaultShutdownTimeout,
|
||||
Logging: LoggingConfig{
|
||||
Level: defaultLogLevel,
|
||||
},
|
||||
InternalHTTP: InternalHTTPConfig{
|
||||
Addr: defaultInternalHTTPAddr,
|
||||
ReadHeaderTimeout: defaultReadHeaderTimeout,
|
||||
ReadTimeout: defaultReadTimeout,
|
||||
WriteTimeout: defaultWriteTimeout,
|
||||
IdleTimeout: defaultIdleTimeout,
|
||||
},
|
||||
Docker: DockerConfig{
|
||||
Host: defaultDockerHost,
|
||||
Network: defaultDockerNetwork,
|
||||
LogDriver: defaultDockerLogDriver,
|
||||
PullPolicy: defaultImagePullPolicy,
|
||||
},
|
||||
Postgres: PostgresConfig{
|
||||
Conn: postgres.DefaultConfig(),
|
||||
},
|
||||
Redis: RedisConfig{
|
||||
Conn: redisconn.DefaultConfig(),
|
||||
},
|
||||
Streams: StreamsConfig{
|
||||
StartJobs: defaultStartJobsStream,
|
||||
StopJobs: defaultStopJobsStream,
|
||||
JobResults: defaultJobResultsStream,
|
||||
HealthEvents: defaultHealthEventsStream,
|
||||
NotificationIntents: defaultNotificationIntentsKey,
|
||||
BlockTimeout: defaultStreamBlockTimeout,
|
||||
},
|
||||
Container: ContainerConfig{
|
||||
DefaultCPUQuota: defaultCPUQuota,
|
||||
DefaultMemory: defaultMemory,
|
||||
DefaultPIDsLimit: defaultPIDsLimit,
|
||||
StopTimeout: defaultContainerStopTimeout,
|
||||
Retention: defaultContainerRetention,
|
||||
EngineStateMountPath: defaultEngineStateMountPath,
|
||||
EngineStateEnvName: defaultEngineStateEnvName,
|
||||
GameStateDirMode: defaultGameStateDirMode,
|
||||
},
|
||||
Health: HealthConfig{
|
||||
InspectInterval: defaultInspectInterval,
|
||||
ProbeInterval: defaultProbeInterval,
|
||||
ProbeTimeout: defaultProbeTimeout,
|
||||
ProbeFailuresThreshold: defaultProbeFailuresThreshold,
|
||||
},
|
||||
Cleanup: CleanupConfig{
|
||||
ReconcileInterval: defaultReconcileInterval,
|
||||
CleanupInterval: defaultCleanupInterval,
|
||||
},
|
||||
Coordination: CoordinationConfig{
|
||||
GameLeaseTTL: defaultGameLeaseTTL,
|
||||
},
|
||||
Lobby: LobbyConfig{
|
||||
Timeout: defaultLobbyInternalTimeout,
|
||||
},
|
||||
Telemetry: TelemetryConfig{
|
||||
ServiceName: defaultOTelServiceName,
|
||||
TracesExporter: "none",
|
||||
MetricsExporter: "none",
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func validEnv(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
t.Setenv("RTMANAGER_POSTGRES_PRIMARY_DSN", "postgres://rtm:secret@localhost:5432/galaxy?search_path=rtmanager&sslmode=disable")
|
||||
t.Setenv("RTMANAGER_REDIS_MASTER_ADDR", "localhost:6379")
|
||||
t.Setenv("RTMANAGER_REDIS_PASSWORD", "secret")
|
||||
t.Setenv("RTMANAGER_GAME_STATE_ROOT", "/var/lib/galaxy/games")
|
||||
t.Setenv("RTMANAGER_LOBBY_INTERNAL_BASE_URL", "http://lobby:8095")
|
||||
}
|
||||
|
||||
func TestLoadFromEnvAcceptsDefaults(t *testing.T) {
|
||||
validEnv(t)
|
||||
|
||||
cfg, err := LoadFromEnv()
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, ":8096", cfg.InternalHTTP.Addr)
|
||||
require.Equal(t, "unix:///var/run/docker.sock", cfg.Docker.Host)
|
||||
require.Equal(t, "galaxy-net", cfg.Docker.Network)
|
||||
require.Equal(t, "json-file", cfg.Docker.LogDriver)
|
||||
require.Equal(t, ImagePullPolicyIfMissing, cfg.Docker.PullPolicy)
|
||||
require.Equal(t, "runtime:start_jobs", cfg.Streams.StartJobs)
|
||||
require.Equal(t, "runtime:stop_jobs", cfg.Streams.StopJobs)
|
||||
require.Equal(t, "runtime:job_results", cfg.Streams.JobResults)
|
||||
require.Equal(t, "runtime:health_events", cfg.Streams.HealthEvents)
|
||||
require.Equal(t, "notification:intents", cfg.Streams.NotificationIntents)
|
||||
require.Equal(t, 30*time.Second, cfg.Container.StopTimeout)
|
||||
require.Equal(t, 30*24*time.Hour, cfg.Container.Retention)
|
||||
require.Equal(t, "/var/lib/galaxy-game", cfg.Container.EngineStateMountPath)
|
||||
require.Equal(t, "GAME_STATE_PATH", cfg.Container.EngineStateEnvName)
|
||||
require.EqualValues(t, 0o750, cfg.Container.GameStateDirMode)
|
||||
require.Equal(t, 60*time.Second, cfg.Coordination.GameLeaseTTL)
|
||||
require.Equal(t, "http://lobby:8095", cfg.Lobby.BaseURL)
|
||||
require.Equal(t, 2*time.Second, cfg.Lobby.Timeout)
|
||||
require.Equal(t, "galaxy-rtmanager", cfg.Telemetry.ServiceName)
|
||||
}
|
||||
|
||||
func TestLoadFromEnvHonoursOverrides(t *testing.T) {
|
||||
validEnv(t)
|
||||
t.Setenv("RTMANAGER_INTERNAL_HTTP_ADDR", ":9000")
|
||||
t.Setenv("RTMANAGER_DOCKER_NETWORK", "custom-net")
|
||||
t.Setenv("RTMANAGER_IMAGE_PULL_POLICY", "always")
|
||||
t.Setenv("RTMANAGER_REDIS_START_JOBS_STREAM", "custom:start_jobs")
|
||||
t.Setenv("RTMANAGER_GAME_LEASE_TTL_SECONDS", "120")
|
||||
t.Setenv("RTMANAGER_CONTAINER_STOP_TIMEOUT_SECONDS", "45")
|
||||
t.Setenv("RTMANAGER_CONTAINER_RETENTION_DAYS", "7")
|
||||
t.Setenv("RTMANAGER_GAME_STATE_DIR_MODE", "0700")
|
||||
|
||||
cfg, err := LoadFromEnv()
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, ":9000", cfg.InternalHTTP.Addr)
|
||||
require.Equal(t, "custom-net", cfg.Docker.Network)
|
||||
require.Equal(t, ImagePullPolicyAlways, cfg.Docker.PullPolicy)
|
||||
require.Equal(t, "custom:start_jobs", cfg.Streams.StartJobs)
|
||||
require.Equal(t, 120*time.Second, cfg.Coordination.GameLeaseTTL)
|
||||
require.Equal(t, 45*time.Second, cfg.Container.StopTimeout)
|
||||
require.Equal(t, 7*24*time.Hour, cfg.Container.Retention)
|
||||
require.EqualValues(t, 0o700, cfg.Container.GameStateDirMode)
|
||||
}
|
||||
|
||||
func TestLoadFromEnvRejectsUnknownPullPolicy(t *testing.T) {
|
||||
validEnv(t)
|
||||
t.Setenv("RTMANAGER_IMAGE_PULL_POLICY", "weekly")
|
||||
|
||||
_, err := LoadFromEnv()
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "image pull policy")
|
||||
}
|
||||
|
||||
func TestLoadFromEnvRequiresGameStateRoot(t *testing.T) {
|
||||
t.Setenv("RTMANAGER_POSTGRES_PRIMARY_DSN", "postgres://rtm:secret@localhost:5432/galaxy")
|
||||
t.Setenv("RTMANAGER_REDIS_MASTER_ADDR", "localhost:6379")
|
||||
t.Setenv("RTMANAGER_REDIS_PASSWORD", "secret")
|
||||
t.Setenv("RTMANAGER_LOBBY_INTERNAL_BASE_URL", "http://lobby:8095")
|
||||
|
||||
_, err := LoadFromEnv()
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "RTMANAGER_GAME_STATE_ROOT")
|
||||
}
|
||||
|
||||
func TestLoadFromEnvRequiresLobbyBaseURL(t *testing.T) {
|
||||
t.Setenv("RTMANAGER_POSTGRES_PRIMARY_DSN", "postgres://rtm:secret@localhost:5432/galaxy")
|
||||
t.Setenv("RTMANAGER_REDIS_MASTER_ADDR", "localhost:6379")
|
||||
t.Setenv("RTMANAGER_REDIS_PASSWORD", "secret")
|
||||
t.Setenv("RTMANAGER_GAME_STATE_ROOT", "/var/lib/galaxy/games")
|
||||
|
||||
_, err := LoadFromEnv()
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "RTMANAGER_LOBBY_INTERNAL_BASE_URL")
|
||||
}
|
||||
|
||||
func TestLoadFromEnvRejectsRelativeStateRoot(t *testing.T) {
|
||||
validEnv(t)
|
||||
t.Setenv("RTMANAGER_GAME_STATE_ROOT", "relative/path")
|
||||
|
||||
_, err := LoadFromEnv()
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "absolute path")
|
||||
}
|
||||
|
||||
func TestLoadFromEnvRejectsBadLogLevel(t *testing.T) {
|
||||
validEnv(t)
|
||||
t.Setenv("RTMANAGER_LOG_LEVEL", "verbose")
|
||||
|
||||
_, err := LoadFromEnv()
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "RTMANAGER_LOG_LEVEL")
|
||||
}
|
||||
|
||||
func TestImagePullPolicyValidate(t *testing.T) {
|
||||
require.NoError(t, ImagePullPolicyIfMissing.Validate())
|
||||
require.NoError(t, ImagePullPolicyAlways.Validate())
|
||||
require.NoError(t, ImagePullPolicyNever.Validate())
|
||||
require.Error(t, ImagePullPolicy("monthly").Validate())
|
||||
}
|
||||
|
||||
func TestInternalHTTPValidateRejectsBadAddr(t *testing.T) {
|
||||
cfg := DefaultConfig().InternalHTTP
|
||||
cfg.Addr = "not-an-addr"
|
||||
err := cfg.Validate()
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "host:port")
|
||||
}
|
||||
|
||||
func TestStreamsValidateRequiresAllNames(t *testing.T) {
|
||||
cfg := DefaultConfig().Streams
|
||||
cfg.StartJobs = " "
|
||||
err := cfg.Validate()
|
||||
require.Error(t, err)
|
||||
require.True(t, strings.Contains(err.Error(), "start jobs"))
|
||||
}
|
||||
@@ -0,0 +1,319 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/postgres"
|
||||
"galaxy/redisconn"
|
||||
)
|
||||
|
||||
// LoadFromEnv builds Config from environment variables and validates the
|
||||
// resulting configuration.
|
||||
func LoadFromEnv() (Config, error) {
|
||||
cfg := DefaultConfig()
|
||||
|
||||
var err error
|
||||
|
||||
cfg.ShutdownTimeout, err = durationEnv(shutdownTimeoutEnvVar, cfg.ShutdownTimeout)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
cfg.Logging.Level = stringEnv(logLevelEnvVar, cfg.Logging.Level)
|
||||
|
||||
cfg.InternalHTTP.Addr = stringEnv(internalHTTPAddrEnvVar, cfg.InternalHTTP.Addr)
|
||||
cfg.InternalHTTP.ReadHeaderTimeout, err = durationEnv(internalHTTPReadHeaderTimeoutEnvVar, cfg.InternalHTTP.ReadHeaderTimeout)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.InternalHTTP.ReadTimeout, err = durationEnv(internalHTTPReadTimeoutEnvVar, cfg.InternalHTTP.ReadTimeout)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.InternalHTTP.WriteTimeout, err = durationEnv(internalHTTPWriteTimeoutEnvVar, cfg.InternalHTTP.WriteTimeout)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.InternalHTTP.IdleTimeout, err = durationEnv(internalHTTPIdleTimeoutEnvVar, cfg.InternalHTTP.IdleTimeout)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
cfg.Docker.Host = stringEnv(dockerHostEnvVar, cfg.Docker.Host)
|
||||
cfg.Docker.APIVersion = stringEnv(dockerAPIVersionEnvVar, cfg.Docker.APIVersion)
|
||||
cfg.Docker.Network = stringEnv(dockerNetworkEnvVar, cfg.Docker.Network)
|
||||
cfg.Docker.LogDriver = stringEnv(dockerLogDriverEnvVar, cfg.Docker.LogDriver)
|
||||
cfg.Docker.LogOpts = stringEnv(dockerLogOptsEnvVar, cfg.Docker.LogOpts)
|
||||
if raw, ok := os.LookupEnv(imagePullPolicyEnvVar); ok {
|
||||
cfg.Docker.PullPolicy = ImagePullPolicy(strings.TrimSpace(raw))
|
||||
}
|
||||
|
||||
pgConn, err := postgres.LoadFromEnv(envPrefix)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Postgres.Conn = pgConn
|
||||
|
||||
redisConn, err := redisconn.LoadFromEnv(envPrefix)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Redis.Conn = redisConn
|
||||
|
||||
cfg.Streams.StartJobs = stringEnv(startJobsStreamEnvVar, cfg.Streams.StartJobs)
|
||||
cfg.Streams.StopJobs = stringEnv(stopJobsStreamEnvVar, cfg.Streams.StopJobs)
|
||||
cfg.Streams.JobResults = stringEnv(jobResultsStreamEnvVar, cfg.Streams.JobResults)
|
||||
cfg.Streams.HealthEvents = stringEnv(healthEventsStreamEnvVar, cfg.Streams.HealthEvents)
|
||||
cfg.Streams.NotificationIntents = stringEnv(notificationIntentsStreamEnv, cfg.Streams.NotificationIntents)
|
||||
cfg.Streams.BlockTimeout, err = durationEnv(streamBlockTimeoutEnvVar, cfg.Streams.BlockTimeout)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
cfg.Container.DefaultCPUQuota, err = floatEnv(defaultCPUQuotaEnvVar, cfg.Container.DefaultCPUQuota)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Container.DefaultMemory = stringEnv(defaultMemoryEnvVar, cfg.Container.DefaultMemory)
|
||||
cfg.Container.DefaultPIDsLimit, err = intEnv(defaultPIDsLimitEnvVar, cfg.Container.DefaultPIDsLimit)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Container.StopTimeout, err = secondsEnv(containerStopTimeoutSecondsEnvVar, cfg.Container.StopTimeout)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Container.Retention, err = daysEnv(containerRetentionDaysEnvVar, cfg.Container.Retention)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Container.EngineStateMountPath = stringEnv(engineStateMountPathEnvVar, cfg.Container.EngineStateMountPath)
|
||||
cfg.Container.EngineStateEnvName = stringEnv(engineStateEnvNameEnvVar, cfg.Container.EngineStateEnvName)
|
||||
cfg.Container.GameStateDirMode, err = octalUint32Env(gameStateDirModeEnvVar, cfg.Container.GameStateDirMode)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Container.GameStateOwnerUID, err = intEnv(gameStateOwnerUIDEnvVar, cfg.Container.GameStateOwnerUID)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Container.GameStateOwnerGID, err = intEnv(gameStateOwnerGIDEnvVar, cfg.Container.GameStateOwnerGID)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
root, ok := os.LookupEnv(gameStateRootEnvVar)
|
||||
if !ok || strings.TrimSpace(root) == "" {
|
||||
return Config{}, fmt.Errorf("%s must be set", gameStateRootEnvVar)
|
||||
}
|
||||
cfg.Container.GameStateRoot = strings.TrimSpace(root)
|
||||
|
||||
cfg.Health.InspectInterval, err = durationEnv(inspectIntervalEnvVar, cfg.Health.InspectInterval)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Health.ProbeInterval, err = durationEnv(probeIntervalEnvVar, cfg.Health.ProbeInterval)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Health.ProbeTimeout, err = durationEnv(probeTimeoutEnvVar, cfg.Health.ProbeTimeout)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Health.ProbeFailuresThreshold, err = intEnv(probeFailuresThresholdEnvVar, cfg.Health.ProbeFailuresThreshold)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
cfg.Cleanup.ReconcileInterval, err = durationEnv(reconcileIntervalEnvVar, cfg.Cleanup.ReconcileInterval)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Cleanup.CleanupInterval, err = durationEnv(cleanupIntervalEnvVar, cfg.Cleanup.CleanupInterval)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
cfg.Coordination.GameLeaseTTL, err = secondsEnv(gameLeaseTTLSecondsEnvVar, cfg.Coordination.GameLeaseTTL)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
lobbyURL, ok := os.LookupEnv(lobbyInternalBaseURLEnvVar)
|
||||
if !ok || strings.TrimSpace(lobbyURL) == "" {
|
||||
return Config{}, fmt.Errorf("%s must be set", lobbyInternalBaseURLEnvVar)
|
||||
}
|
||||
cfg.Lobby.BaseURL = strings.TrimSpace(lobbyURL)
|
||||
cfg.Lobby.Timeout, err = durationEnv(lobbyInternalTimeoutEnvVar, cfg.Lobby.Timeout)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
cfg.Telemetry.ServiceName = stringEnv(otelServiceNameEnvVar, cfg.Telemetry.ServiceName)
|
||||
cfg.Telemetry.TracesExporter = normalizeExporterValue(stringEnv(otelTracesExporterEnvVar, cfg.Telemetry.TracesExporter))
|
||||
cfg.Telemetry.MetricsExporter = normalizeExporterValue(stringEnv(otelMetricsExporterEnvVar, cfg.Telemetry.MetricsExporter))
|
||||
cfg.Telemetry.TracesProtocol = normalizeProtocolValue(
|
||||
os.Getenv(otelExporterOTLPTracesProtocolEnvVar),
|
||||
os.Getenv(otelExporterOTLPProtocolEnvVar),
|
||||
cfg.Telemetry.TracesProtocol,
|
||||
)
|
||||
cfg.Telemetry.MetricsProtocol = normalizeProtocolValue(
|
||||
os.Getenv(otelExporterOTLPMetricsProtocolEnvVar),
|
||||
os.Getenv(otelExporterOTLPProtocolEnvVar),
|
||||
cfg.Telemetry.MetricsProtocol,
|
||||
)
|
||||
cfg.Telemetry.StdoutTracesEnabled, err = boolEnv(otelStdoutTracesEnabledEnvVar, cfg.Telemetry.StdoutTracesEnabled)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
cfg.Telemetry.StdoutMetricsEnabled, err = boolEnv(otelStdoutMetricsEnabledEnvVar, cfg.Telemetry.StdoutMetricsEnabled)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func stringEnv(name string, fallback string) string {
|
||||
value, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return fallback
|
||||
}
|
||||
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
|
||||
func durationEnv(name string, fallback time.Duration) (time.Duration, error) {
|
||||
value, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return fallback, nil
|
||||
}
|
||||
|
||||
parsed, err := time.ParseDuration(strings.TrimSpace(value))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s: parse duration: %w", name, err)
|
||||
}
|
||||
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func secondsEnv(name string, fallback time.Duration) (time.Duration, error) {
|
||||
value, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return fallback, nil
|
||||
}
|
||||
|
||||
parsed, err := strconv.Atoi(strings.TrimSpace(value))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s: parse seconds: %w", name, err)
|
||||
}
|
||||
if parsed <= 0 {
|
||||
return 0, fmt.Errorf("%s: must be positive", name)
|
||||
}
|
||||
|
||||
return time.Duration(parsed) * time.Second, nil
|
||||
}
|
||||
|
||||
func daysEnv(name string, fallback time.Duration) (time.Duration, error) {
|
||||
value, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return fallback, nil
|
||||
}
|
||||
|
||||
parsed, err := strconv.Atoi(strings.TrimSpace(value))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s: parse days: %w", name, err)
|
||||
}
|
||||
if parsed <= 0 {
|
||||
return 0, fmt.Errorf("%s: must be positive", name)
|
||||
}
|
||||
|
||||
return time.Duration(parsed) * 24 * time.Hour, nil
|
||||
}
|
||||
|
||||
func intEnv(name string, fallback int) (int, error) {
|
||||
value, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return fallback, nil
|
||||
}
|
||||
|
||||
parsed, err := strconv.Atoi(strings.TrimSpace(value))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s: parse int: %w", name, err)
|
||||
}
|
||||
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func floatEnv(name string, fallback float64) (float64, error) {
|
||||
value, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return fallback, nil
|
||||
}
|
||||
|
||||
parsed, err := strconv.ParseFloat(strings.TrimSpace(value), 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s: parse float: %w", name, err)
|
||||
}
|
||||
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func boolEnv(name string, fallback bool) (bool, error) {
|
||||
value, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return fallback, nil
|
||||
}
|
||||
|
||||
parsed, err := strconv.ParseBool(strings.TrimSpace(value))
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("%s: parse bool: %w", name, err)
|
||||
}
|
||||
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
func octalUint32Env(name string, fallback uint32) (uint32, error) {
|
||||
value, ok := os.LookupEnv(name)
|
||||
if !ok {
|
||||
return fallback, nil
|
||||
}
|
||||
|
||||
parsed, err := strconv.ParseUint(strings.TrimSpace(value), 8, 32)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s: parse octal: %w", name, err)
|
||||
}
|
||||
|
||||
return uint32(parsed), nil
|
||||
}
|
||||
|
||||
func normalizeExporterValue(value string) string {
|
||||
trimmed := strings.TrimSpace(value)
|
||||
switch trimmed {
|
||||
case "", "none":
|
||||
return "none"
|
||||
default:
|
||||
return trimmed
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeProtocolValue(primary string, fallback string, defaultValue string) string {
|
||||
primary = strings.TrimSpace(primary)
|
||||
if primary != "" {
|
||||
return primary
|
||||
}
|
||||
|
||||
fallback = strings.TrimSpace(fallback)
|
||||
if fallback != "" {
|
||||
return fallback
|
||||
}
|
||||
|
||||
return strings.TrimSpace(defaultValue)
|
||||
}
|
||||
@@ -0,0 +1,93 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Validate reports whether cfg stores a usable Runtime Manager process
|
||||
// configuration.
|
||||
func (cfg Config) Validate() error {
|
||||
if cfg.ShutdownTimeout <= 0 {
|
||||
return fmt.Errorf("%s must be positive", shutdownTimeoutEnvVar)
|
||||
}
|
||||
if err := validateSlogLevel(cfg.Logging.Level); err != nil {
|
||||
return fmt.Errorf("%s: %w", logLevelEnvVar, err)
|
||||
}
|
||||
if err := cfg.InternalHTTP.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cfg.Docker.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cfg.Postgres.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cfg.Redis.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cfg.Streams.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cfg.Container.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cfg.Health.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cfg.Cleanup.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cfg.Coordination.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cfg.Lobby.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cfg.Telemetry.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateSlogLevel(level string) error {
|
||||
var slogLevel slog.Level
|
||||
if err := slogLevel.UnmarshalText([]byte(strings.TrimSpace(level))); err != nil {
|
||||
return fmt.Errorf("invalid slog level %q: %w", level, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isTCPAddr(value string) bool {
|
||||
host, port, err := net.SplitHostPort(strings.TrimSpace(value))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if port == "" {
|
||||
return false
|
||||
}
|
||||
if host == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
return !strings.Contains(host, " ")
|
||||
}
|
||||
|
||||
func isHTTPURL(value string) bool {
|
||||
parsed, err := url.Parse(strings.TrimSpace(value))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if parsed.Scheme != "http" && parsed.Scheme != "https" {
|
||||
return false
|
||||
}
|
||||
|
||||
return parsed.Host != ""
|
||||
}
|
||||
Reference in New Issue
Block a user