633 lines
21 KiB
Go
633 lines
21 KiB
Go
// Package config loads the Runtime Manager process configuration from
|
|
// environment variables.
|
|
package config
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"galaxy/postgres"
|
|
"galaxy/redisconn"
|
|
"galaxy/rtmanager/internal/telemetry"
|
|
)
|
|
|
|
const (
|
|
envPrefix = "RTMANAGER"
|
|
|
|
shutdownTimeoutEnvVar = "RTMANAGER_SHUTDOWN_TIMEOUT"
|
|
logLevelEnvVar = "RTMANAGER_LOG_LEVEL"
|
|
|
|
internalHTTPAddrEnvVar = "RTMANAGER_INTERNAL_HTTP_ADDR"
|
|
internalHTTPReadHeaderTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_READ_HEADER_TIMEOUT"
|
|
internalHTTPReadTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_READ_TIMEOUT"
|
|
internalHTTPWriteTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_WRITE_TIMEOUT"
|
|
internalHTTPIdleTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_IDLE_TIMEOUT"
|
|
|
|
dockerHostEnvVar = "RTMANAGER_DOCKER_HOST"
|
|
dockerAPIVersionEnvVar = "RTMANAGER_DOCKER_API_VERSION"
|
|
dockerNetworkEnvVar = "RTMANAGER_DOCKER_NETWORK"
|
|
dockerLogDriverEnvVar = "RTMANAGER_DOCKER_LOG_DRIVER"
|
|
dockerLogOptsEnvVar = "RTMANAGER_DOCKER_LOG_OPTS"
|
|
imagePullPolicyEnvVar = "RTMANAGER_IMAGE_PULL_POLICY"
|
|
|
|
defaultCPUQuotaEnvVar = "RTMANAGER_DEFAULT_CPU_QUOTA"
|
|
defaultMemoryEnvVar = "RTMANAGER_DEFAULT_MEMORY"
|
|
defaultPIDsLimitEnvVar = "RTMANAGER_DEFAULT_PIDS_LIMIT"
|
|
containerStopTimeoutSecondsEnvVar = "RTMANAGER_CONTAINER_STOP_TIMEOUT_SECONDS"
|
|
containerRetentionDaysEnvVar = "RTMANAGER_CONTAINER_RETENTION_DAYS"
|
|
engineStateMountPathEnvVar = "RTMANAGER_ENGINE_STATE_MOUNT_PATH"
|
|
engineStateEnvNameEnvVar = "RTMANAGER_ENGINE_STATE_ENV_NAME"
|
|
gameStateDirModeEnvVar = "RTMANAGER_GAME_STATE_DIR_MODE"
|
|
gameStateOwnerUIDEnvVar = "RTMANAGER_GAME_STATE_OWNER_UID"
|
|
gameStateOwnerGIDEnvVar = "RTMANAGER_GAME_STATE_OWNER_GID"
|
|
gameStateRootEnvVar = "RTMANAGER_GAME_STATE_ROOT"
|
|
|
|
startJobsStreamEnvVar = "RTMANAGER_REDIS_START_JOBS_STREAM"
|
|
stopJobsStreamEnvVar = "RTMANAGER_REDIS_STOP_JOBS_STREAM"
|
|
jobResultsStreamEnvVar = "RTMANAGER_REDIS_JOB_RESULTS_STREAM"
|
|
healthEventsStreamEnvVar = "RTMANAGER_REDIS_HEALTH_EVENTS_STREAM"
|
|
notificationIntentsStreamEnv = "RTMANAGER_NOTIFICATION_INTENTS_STREAM"
|
|
streamBlockTimeoutEnvVar = "RTMANAGER_STREAM_BLOCK_TIMEOUT"
|
|
|
|
inspectIntervalEnvVar = "RTMANAGER_INSPECT_INTERVAL"
|
|
probeIntervalEnvVar = "RTMANAGER_PROBE_INTERVAL"
|
|
probeTimeoutEnvVar = "RTMANAGER_PROBE_TIMEOUT"
|
|
probeFailuresThresholdEnvVar = "RTMANAGER_PROBE_FAILURES_THRESHOLD"
|
|
|
|
reconcileIntervalEnvVar = "RTMANAGER_RECONCILE_INTERVAL"
|
|
cleanupIntervalEnvVar = "RTMANAGER_CLEANUP_INTERVAL"
|
|
|
|
gameLeaseTTLSecondsEnvVar = "RTMANAGER_GAME_LEASE_TTL_SECONDS"
|
|
|
|
lobbyInternalBaseURLEnvVar = "RTMANAGER_LOBBY_INTERNAL_BASE_URL"
|
|
lobbyInternalTimeoutEnvVar = "RTMANAGER_LOBBY_INTERNAL_TIMEOUT"
|
|
|
|
otelServiceNameEnvVar = "OTEL_SERVICE_NAME"
|
|
otelTracesExporterEnvVar = "OTEL_TRACES_EXPORTER"
|
|
otelMetricsExporterEnvVar = "OTEL_METRICS_EXPORTER"
|
|
otelExporterOTLPProtocolEnvVar = "OTEL_EXPORTER_OTLP_PROTOCOL"
|
|
otelExporterOTLPTracesProtocolEnvVar = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"
|
|
otelExporterOTLPMetricsProtocolEnvVar = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL"
|
|
otelStdoutTracesEnabledEnvVar = "RTMANAGER_OTEL_STDOUT_TRACES_ENABLED"
|
|
otelStdoutMetricsEnabledEnvVar = "RTMANAGER_OTEL_STDOUT_METRICS_ENABLED"
|
|
|
|
defaultShutdownTimeout = 30 * time.Second
|
|
defaultLogLevel = "info"
|
|
defaultInternalHTTPAddr = ":8096"
|
|
defaultReadHeaderTimeout = 2 * time.Second
|
|
defaultReadTimeout = 5 * time.Second
|
|
defaultWriteTimeout = 15 * time.Second
|
|
defaultIdleTimeout = 60 * time.Second
|
|
|
|
defaultDockerHost = "unix:///var/run/docker.sock"
|
|
defaultDockerNetwork = "galaxy-net"
|
|
defaultDockerLogDriver = "json-file"
|
|
defaultImagePullPolicy = ImagePullPolicyIfMissing
|
|
|
|
defaultCPUQuota = 1.0
|
|
defaultMemory = "512m"
|
|
defaultPIDsLimit = 512
|
|
defaultContainerStopTimeout = 30 * time.Second
|
|
defaultContainerRetention = 30 * 24 * time.Hour
|
|
defaultEngineStateMountPath = "/var/lib/galaxy-game"
|
|
defaultEngineStateEnvName = "GAME_STATE_PATH"
|
|
defaultGameStateDirMode = 0o750
|
|
|
|
defaultStartJobsStream = "runtime:start_jobs"
|
|
defaultStopJobsStream = "runtime:stop_jobs"
|
|
defaultJobResultsStream = "runtime:job_results"
|
|
defaultHealthEventsStream = "runtime:health_events"
|
|
defaultNotificationIntentsKey = "notification:intents"
|
|
defaultStreamBlockTimeout = 5 * time.Second
|
|
|
|
defaultInspectInterval = 30 * time.Second
|
|
defaultProbeInterval = 15 * time.Second
|
|
defaultProbeTimeout = 2 * time.Second
|
|
defaultProbeFailuresThreshold = 3
|
|
|
|
defaultReconcileInterval = 5 * time.Minute
|
|
defaultCleanupInterval = time.Hour
|
|
|
|
defaultGameLeaseTTL = 60 * time.Second
|
|
|
|
defaultLobbyInternalTimeout = 2 * time.Second
|
|
|
|
defaultOTelServiceName = "galaxy-rtmanager"
|
|
)
|
|
|
|
// ImagePullPolicy enumerates the supported image pull policies. The start
|
|
// service validates a producer-supplied `image_ref` against this policy at
|
|
// start time.
|
|
type ImagePullPolicy string
|
|
|
|
// Supported pull policies, frozen by `rtmanager/README.md` §Configuration.
|
|
const (
|
|
ImagePullPolicyIfMissing ImagePullPolicy = "if_missing"
|
|
ImagePullPolicyAlways ImagePullPolicy = "always"
|
|
ImagePullPolicyNever ImagePullPolicy = "never"
|
|
)
|
|
|
|
// Validate reports whether p is one of the frozen pull policies.
|
|
func (p ImagePullPolicy) Validate() error {
|
|
switch p {
|
|
case ImagePullPolicyIfMissing, ImagePullPolicyAlways, ImagePullPolicyNever:
|
|
return nil
|
|
default:
|
|
return fmt.Errorf("image pull policy %q must be one of %q, %q, %q",
|
|
p, ImagePullPolicyIfMissing, ImagePullPolicyAlways, ImagePullPolicyNever)
|
|
}
|
|
}
|
|
|
|
// Config stores the full Runtime Manager process configuration.
|
|
type Config struct {
|
|
// ShutdownTimeout bounds graceful shutdown of every long-lived
|
|
// component.
|
|
ShutdownTimeout time.Duration
|
|
|
|
// Logging configures the process-wide structured logger.
|
|
Logging LoggingConfig
|
|
|
|
// InternalHTTP configures the trusted internal HTTP listener that
|
|
// serves probes and the GM/Admin REST surface.
|
|
InternalHTTP InternalHTTPConfig
|
|
|
|
// Docker configures the Docker SDK client RTM uses to drive the local
|
|
// Docker daemon.
|
|
Docker DockerConfig
|
|
|
|
// Postgres configures the PostgreSQL-backed durable store consumed via
|
|
// `pkg/postgres`.
|
|
Postgres PostgresConfig
|
|
|
|
// Redis configures the shared Redis connection topology consumed via
|
|
// `pkg/redisconn`.
|
|
Redis RedisConfig
|
|
|
|
// Streams stores the stable Redis Stream names RTM reads from and
|
|
// writes to.
|
|
Streams StreamsConfig
|
|
|
|
// Container stores the per-container defaults applied at start time
|
|
// when the resolved image does not declare its own labels.
|
|
Container ContainerConfig
|
|
|
|
// Health configures the periodic health-monitoring workers (events
|
|
// listener, inspect, active probe).
|
|
Health HealthConfig
|
|
|
|
// Cleanup configures the reconciler and container-cleanup workers.
|
|
Cleanup CleanupConfig
|
|
|
|
// Coordination configures the per-game Redis lease used to serialise
|
|
// operations across all entry points.
|
|
Coordination CoordinationConfig
|
|
|
|
// Lobby configures the synchronous Lobby internal REST client used by
|
|
// the start service for ancillary lookups.
|
|
Lobby LobbyConfig
|
|
|
|
// Telemetry configures the process-wide OpenTelemetry runtime.
|
|
Telemetry TelemetryConfig
|
|
}
|
|
|
|
// LoggingConfig configures the process-wide structured logger.
|
|
type LoggingConfig struct {
|
|
// Level stores the process log level accepted by log/slog.
|
|
Level string
|
|
}
|
|
|
|
// InternalHTTPConfig configures the trusted internal HTTP listener.
|
|
type InternalHTTPConfig struct {
|
|
// Addr stores the TCP listen address.
|
|
Addr string
|
|
|
|
// ReadHeaderTimeout bounds request-header reading.
|
|
ReadHeaderTimeout time.Duration
|
|
|
|
// ReadTimeout bounds reading one request.
|
|
ReadTimeout time.Duration
|
|
|
|
// WriteTimeout bounds writing one response.
|
|
WriteTimeout time.Duration
|
|
|
|
// IdleTimeout bounds how long keep-alive connections stay open.
|
|
IdleTimeout time.Duration
|
|
}
|
|
|
|
// Validate reports whether cfg stores a usable internal HTTP listener
|
|
// configuration.
|
|
func (cfg InternalHTTPConfig) Validate() error {
|
|
switch {
|
|
case strings.TrimSpace(cfg.Addr) == "":
|
|
return fmt.Errorf("internal HTTP addr must not be empty")
|
|
case !isTCPAddr(cfg.Addr):
|
|
return fmt.Errorf("internal HTTP addr %q must use host:port form", cfg.Addr)
|
|
case cfg.ReadHeaderTimeout <= 0:
|
|
return fmt.Errorf("internal HTTP read header timeout must be positive")
|
|
case cfg.ReadTimeout <= 0:
|
|
return fmt.Errorf("internal HTTP read timeout must be positive")
|
|
case cfg.WriteTimeout <= 0:
|
|
return fmt.Errorf("internal HTTP write timeout must be positive")
|
|
case cfg.IdleTimeout <= 0:
|
|
return fmt.Errorf("internal HTTP idle timeout must be positive")
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// DockerConfig configures the Docker SDK client.
|
|
type DockerConfig struct {
|
|
// Host stores the Docker daemon endpoint (e.g.
|
|
// `unix:///var/run/docker.sock`).
|
|
Host string
|
|
|
|
// APIVersion overrides the Docker API version. Empty lets the SDK
|
|
// negotiate.
|
|
APIVersion string
|
|
|
|
// Network stores the user-defined Docker bridge network containers
|
|
// attach to. Provisioned outside RTM; missing network is a fail-fast
|
|
// condition at startup.
|
|
Network string
|
|
|
|
// LogDriver stores the Docker logging driver applied to engine
|
|
// containers.
|
|
LogDriver string
|
|
|
|
// LogOpts stores the comma-separated `key=value` driver options.
|
|
LogOpts string
|
|
|
|
// PullPolicy stores the configured image pull policy.
|
|
PullPolicy ImagePullPolicy
|
|
}
|
|
|
|
// Validate reports whether cfg stores a usable Docker configuration.
|
|
func (cfg DockerConfig) Validate() error {
|
|
switch {
|
|
case strings.TrimSpace(cfg.Host) == "":
|
|
return fmt.Errorf("docker host must not be empty")
|
|
case strings.TrimSpace(cfg.Network) == "":
|
|
return fmt.Errorf("docker network must not be empty")
|
|
case strings.TrimSpace(cfg.LogDriver) == "":
|
|
return fmt.Errorf("docker log driver must not be empty")
|
|
}
|
|
return cfg.PullPolicy.Validate()
|
|
}
|
|
|
|
// PostgresConfig configures the PostgreSQL-backed durable store consumed
|
|
// via `pkg/postgres`.
|
|
type PostgresConfig struct {
|
|
// Conn carries the primary plus replica DSN topology and pool tuning.
|
|
Conn postgres.Config
|
|
}
|
|
|
|
// Validate reports whether cfg stores a usable PostgreSQL configuration.
|
|
func (cfg PostgresConfig) Validate() error {
|
|
return cfg.Conn.Validate()
|
|
}
|
|
|
|
// RedisConfig configures the Runtime Manager Redis connection topology.
|
|
type RedisConfig struct {
|
|
// Conn carries the connection topology (master, replicas, password,
|
|
// db, per-call timeout).
|
|
Conn redisconn.Config
|
|
}
|
|
|
|
// Validate reports whether cfg stores a usable Redis configuration.
|
|
func (cfg RedisConfig) Validate() error {
|
|
return cfg.Conn.Validate()
|
|
}
|
|
|
|
// StreamsConfig stores the stable Redis Stream names used by Runtime
|
|
// Manager.
|
|
type StreamsConfig struct {
|
|
// StartJobs stores the Redis Streams key Lobby writes start jobs to.
|
|
StartJobs string
|
|
|
|
// StopJobs stores the Redis Streams key Lobby writes stop jobs to.
|
|
StopJobs string
|
|
|
|
// JobResults stores the Redis Streams key RTM writes job outcomes
|
|
// to.
|
|
JobResults string
|
|
|
|
// HealthEvents stores the Redis Streams key RTM publishes
|
|
// technical health events to.
|
|
HealthEvents string
|
|
|
|
// NotificationIntents stores the Redis Streams key RTM publishes
|
|
// admin-only notification intents to.
|
|
NotificationIntents string
|
|
|
|
// BlockTimeout bounds the maximum blocking read window for stream
|
|
// consumers.
|
|
BlockTimeout time.Duration
|
|
}
|
|
|
|
// Validate reports whether cfg stores usable stream names.
|
|
func (cfg StreamsConfig) Validate() error {
|
|
switch {
|
|
case strings.TrimSpace(cfg.StartJobs) == "":
|
|
return fmt.Errorf("redis start jobs stream must not be empty")
|
|
case strings.TrimSpace(cfg.StopJobs) == "":
|
|
return fmt.Errorf("redis stop jobs stream must not be empty")
|
|
case strings.TrimSpace(cfg.JobResults) == "":
|
|
return fmt.Errorf("redis job results stream must not be empty")
|
|
case strings.TrimSpace(cfg.HealthEvents) == "":
|
|
return fmt.Errorf("redis health events stream must not be empty")
|
|
case strings.TrimSpace(cfg.NotificationIntents) == "":
|
|
return fmt.Errorf("redis notification intents stream must not be empty")
|
|
case cfg.BlockTimeout <= 0:
|
|
return fmt.Errorf("redis stream block timeout must be positive")
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// ContainerConfig stores the per-container defaults applied at start
|
|
// time. Resource defaults apply when the resolved engine image does not
|
|
// expose `com.galaxy.cpu_quota` / `com.galaxy.memory` /
|
|
// `com.galaxy.pids_limit` labels.
|
|
type ContainerConfig struct {
|
|
// DefaultCPUQuota is the fallback `--cpus` value applied when the
|
|
// image does not declare `com.galaxy.cpu_quota`.
|
|
DefaultCPUQuota float64
|
|
|
|
// DefaultMemory is the fallback `--memory` value applied when the
|
|
// image does not declare `com.galaxy.memory`.
|
|
DefaultMemory string
|
|
|
|
// DefaultPIDsLimit is the fallback `--pids-limit` value applied
|
|
// when the image does not declare `com.galaxy.pids_limit`.
|
|
DefaultPIDsLimit int
|
|
|
|
// StopTimeout bounds graceful container stop before Docker fires
|
|
// SIGKILL.
|
|
StopTimeout time.Duration
|
|
|
|
// Retention stores the TTL after which `status=stopped` containers
|
|
// are removed by the cleanup worker.
|
|
Retention time.Duration
|
|
|
|
// EngineStateMountPath is the in-container path the per-game state
|
|
// directory is bind-mounted to.
|
|
EngineStateMountPath string
|
|
|
|
// EngineStateEnvName is the env-var name forwarded to the engine
|
|
// pointing at EngineStateMountPath.
|
|
EngineStateEnvName string
|
|
|
|
// GameStateDirMode stores the unix permissions applied to the
|
|
// per-game state directory on creation.
|
|
GameStateDirMode uint32
|
|
|
|
// GameStateOwnerUID stores the unix uid applied to the per-game
|
|
// state directory on creation.
|
|
GameStateOwnerUID int
|
|
|
|
// GameStateOwnerGID stores the unix gid applied to the per-game
|
|
// state directory on creation.
|
|
GameStateOwnerGID int
|
|
|
|
// GameStateRoot is the host path under which per-game state
|
|
// directories are created.
|
|
GameStateRoot string
|
|
}
|
|
|
|
// Validate reports whether cfg stores usable container defaults.
|
|
func (cfg ContainerConfig) Validate() error {
|
|
switch {
|
|
case cfg.DefaultCPUQuota <= 0:
|
|
return fmt.Errorf("default cpu quota must be positive")
|
|
case strings.TrimSpace(cfg.DefaultMemory) == "":
|
|
return fmt.Errorf("default memory must not be empty")
|
|
case cfg.DefaultPIDsLimit <= 0:
|
|
return fmt.Errorf("default pids limit must be positive")
|
|
case cfg.StopTimeout <= 0:
|
|
return fmt.Errorf("container stop timeout must be positive")
|
|
case cfg.Retention <= 0:
|
|
return fmt.Errorf("container retention must be positive")
|
|
case strings.TrimSpace(cfg.EngineStateMountPath) == "":
|
|
return fmt.Errorf("engine state mount path must not be empty")
|
|
case strings.TrimSpace(cfg.EngineStateEnvName) == "":
|
|
return fmt.Errorf("engine state env name must not be empty")
|
|
case cfg.GameStateDirMode == 0:
|
|
return fmt.Errorf("game state dir mode must be non-zero")
|
|
case strings.TrimSpace(cfg.GameStateRoot) == "":
|
|
return fmt.Errorf("game state root must not be empty")
|
|
case !strings.HasPrefix(strings.TrimSpace(cfg.GameStateRoot), "/"):
|
|
return fmt.Errorf("game state root %q must be an absolute path", cfg.GameStateRoot)
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// HealthConfig configures the periodic health-monitoring workers
|
|
// (Docker events listener, periodic inspect, active probe).
|
|
type HealthConfig struct {
|
|
// InspectInterval is the period between two periodic Docker inspect
|
|
// passes.
|
|
InspectInterval time.Duration
|
|
|
|
// ProbeInterval is the period between two engine `/healthz` probe
|
|
// rounds.
|
|
ProbeInterval time.Duration
|
|
|
|
// ProbeTimeout bounds one engine `/healthz` request.
|
|
ProbeTimeout time.Duration
|
|
|
|
// ProbeFailuresThreshold is the consecutive-failure count that
|
|
// triggers a `probe_failed` event.
|
|
ProbeFailuresThreshold int
|
|
}
|
|
|
|
// Validate reports whether cfg stores usable health-monitoring settings.
|
|
func (cfg HealthConfig) Validate() error {
|
|
switch {
|
|
case cfg.InspectInterval <= 0:
|
|
return fmt.Errorf("inspect interval must be positive")
|
|
case cfg.ProbeInterval <= 0:
|
|
return fmt.Errorf("probe interval must be positive")
|
|
case cfg.ProbeTimeout <= 0:
|
|
return fmt.Errorf("probe timeout must be positive")
|
|
case cfg.ProbeFailuresThreshold <= 0:
|
|
return fmt.Errorf("probe failures threshold must be positive")
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// CleanupConfig configures the reconciler and container-cleanup workers.
|
|
type CleanupConfig struct {
|
|
// ReconcileInterval is the period between two reconciler passes.
|
|
ReconcileInterval time.Duration
|
|
|
|
// CleanupInterval is the period between two container-cleanup
|
|
// passes.
|
|
CleanupInterval time.Duration
|
|
}
|
|
|
|
// Validate reports whether cfg stores usable cleanup settings.
|
|
func (cfg CleanupConfig) Validate() error {
|
|
switch {
|
|
case cfg.ReconcileInterval <= 0:
|
|
return fmt.Errorf("reconcile interval must be positive")
|
|
case cfg.CleanupInterval <= 0:
|
|
return fmt.Errorf("cleanup interval must be positive")
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// CoordinationConfig configures the per-game Redis lease.
|
|
type CoordinationConfig struct {
|
|
// GameLeaseTTL bounds the per-game lease lifetime renewed every
|
|
// half-TTL while an operation runs.
|
|
GameLeaseTTL time.Duration
|
|
}
|
|
|
|
// Validate reports whether cfg stores a usable lease configuration.
|
|
func (cfg CoordinationConfig) Validate() error {
|
|
if cfg.GameLeaseTTL <= 0 {
|
|
return fmt.Errorf("game lease ttl must be positive")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// LobbyConfig configures the synchronous Lobby internal REST client.
|
|
type LobbyConfig struct {
|
|
// BaseURL stores the trusted Lobby internal listener base URL.
|
|
BaseURL string
|
|
|
|
// Timeout bounds one Lobby internal request.
|
|
Timeout time.Duration
|
|
}
|
|
|
|
// Validate reports whether cfg stores a usable Lobby client
|
|
// configuration.
|
|
func (cfg LobbyConfig) Validate() error {
|
|
switch {
|
|
case strings.TrimSpace(cfg.BaseURL) == "":
|
|
return fmt.Errorf("lobby internal base url must not be empty")
|
|
case !isHTTPURL(cfg.BaseURL):
|
|
return fmt.Errorf("lobby internal base url %q must be an absolute http(s) URL", cfg.BaseURL)
|
|
case cfg.Timeout <= 0:
|
|
return fmt.Errorf("lobby internal timeout must be positive")
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// TelemetryConfig configures the Runtime Manager OpenTelemetry runtime.
|
|
type TelemetryConfig struct {
|
|
// ServiceName overrides the default OpenTelemetry service name.
|
|
ServiceName string
|
|
|
|
// TracesExporter selects the external traces exporter. Supported
|
|
// values are `none` and `otlp`.
|
|
TracesExporter string
|
|
|
|
// MetricsExporter selects the external metrics exporter. Supported
|
|
// values are `none` and `otlp`.
|
|
MetricsExporter string
|
|
|
|
// TracesProtocol selects the OTLP traces protocol when
|
|
// TracesExporter is `otlp`.
|
|
TracesProtocol string
|
|
|
|
// MetricsProtocol selects the OTLP metrics protocol when
|
|
// MetricsExporter is `otlp`.
|
|
MetricsProtocol string
|
|
|
|
// StdoutTracesEnabled enables the additional stdout trace exporter
|
|
// used for local development and debugging.
|
|
StdoutTracesEnabled bool
|
|
|
|
// StdoutMetricsEnabled enables the additional stdout metric
|
|
// exporter used for local development and debugging.
|
|
StdoutMetricsEnabled bool
|
|
}
|
|
|
|
// Validate reports whether cfg contains a supported OpenTelemetry
|
|
// configuration.
|
|
func (cfg TelemetryConfig) Validate() error {
|
|
return telemetry.ProcessConfig{
|
|
ServiceName: cfg.ServiceName,
|
|
TracesExporter: cfg.TracesExporter,
|
|
MetricsExporter: cfg.MetricsExporter,
|
|
TracesProtocol: cfg.TracesProtocol,
|
|
MetricsProtocol: cfg.MetricsProtocol,
|
|
StdoutTracesEnabled: cfg.StdoutTracesEnabled,
|
|
StdoutMetricsEnabled: cfg.StdoutMetricsEnabled,
|
|
}.Validate()
|
|
}
|
|
|
|
// DefaultConfig returns the default Runtime Manager process configuration.
|
|
func DefaultConfig() Config {
|
|
return Config{
|
|
ShutdownTimeout: defaultShutdownTimeout,
|
|
Logging: LoggingConfig{
|
|
Level: defaultLogLevel,
|
|
},
|
|
InternalHTTP: InternalHTTPConfig{
|
|
Addr: defaultInternalHTTPAddr,
|
|
ReadHeaderTimeout: defaultReadHeaderTimeout,
|
|
ReadTimeout: defaultReadTimeout,
|
|
WriteTimeout: defaultWriteTimeout,
|
|
IdleTimeout: defaultIdleTimeout,
|
|
},
|
|
Docker: DockerConfig{
|
|
Host: defaultDockerHost,
|
|
Network: defaultDockerNetwork,
|
|
LogDriver: defaultDockerLogDriver,
|
|
PullPolicy: defaultImagePullPolicy,
|
|
},
|
|
Postgres: PostgresConfig{
|
|
Conn: postgres.DefaultConfig(),
|
|
},
|
|
Redis: RedisConfig{
|
|
Conn: redisconn.DefaultConfig(),
|
|
},
|
|
Streams: StreamsConfig{
|
|
StartJobs: defaultStartJobsStream,
|
|
StopJobs: defaultStopJobsStream,
|
|
JobResults: defaultJobResultsStream,
|
|
HealthEvents: defaultHealthEventsStream,
|
|
NotificationIntents: defaultNotificationIntentsKey,
|
|
BlockTimeout: defaultStreamBlockTimeout,
|
|
},
|
|
Container: ContainerConfig{
|
|
DefaultCPUQuota: defaultCPUQuota,
|
|
DefaultMemory: defaultMemory,
|
|
DefaultPIDsLimit: defaultPIDsLimit,
|
|
StopTimeout: defaultContainerStopTimeout,
|
|
Retention: defaultContainerRetention,
|
|
EngineStateMountPath: defaultEngineStateMountPath,
|
|
EngineStateEnvName: defaultEngineStateEnvName,
|
|
GameStateDirMode: defaultGameStateDirMode,
|
|
},
|
|
Health: HealthConfig{
|
|
InspectInterval: defaultInspectInterval,
|
|
ProbeInterval: defaultProbeInterval,
|
|
ProbeTimeout: defaultProbeTimeout,
|
|
ProbeFailuresThreshold: defaultProbeFailuresThreshold,
|
|
},
|
|
Cleanup: CleanupConfig{
|
|
ReconcileInterval: defaultReconcileInterval,
|
|
CleanupInterval: defaultCleanupInterval,
|
|
},
|
|
Coordination: CoordinationConfig{
|
|
GameLeaseTTL: defaultGameLeaseTTL,
|
|
},
|
|
Lobby: LobbyConfig{
|
|
Timeout: defaultLobbyInternalTimeout,
|
|
},
|
|
Telemetry: TelemetryConfig{
|
|
ServiceName: defaultOTelServiceName,
|
|
TracesExporter: "none",
|
|
MetricsExporter: "none",
|
|
},
|
|
}
|
|
}
|