feat: runtime manager
This commit is contained in:
@@ -0,0 +1,632 @@
|
||||
// Package config loads the Runtime Manager process configuration from
|
||||
// environment variables.
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"galaxy/postgres"
|
||||
"galaxy/redisconn"
|
||||
"galaxy/rtmanager/internal/telemetry"
|
||||
)
|
||||
|
||||
const (
|
||||
envPrefix = "RTMANAGER"
|
||||
|
||||
shutdownTimeoutEnvVar = "RTMANAGER_SHUTDOWN_TIMEOUT"
|
||||
logLevelEnvVar = "RTMANAGER_LOG_LEVEL"
|
||||
|
||||
internalHTTPAddrEnvVar = "RTMANAGER_INTERNAL_HTTP_ADDR"
|
||||
internalHTTPReadHeaderTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_READ_HEADER_TIMEOUT"
|
||||
internalHTTPReadTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_READ_TIMEOUT"
|
||||
internalHTTPWriteTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_WRITE_TIMEOUT"
|
||||
internalHTTPIdleTimeoutEnvVar = "RTMANAGER_INTERNAL_HTTP_IDLE_TIMEOUT"
|
||||
|
||||
dockerHostEnvVar = "RTMANAGER_DOCKER_HOST"
|
||||
dockerAPIVersionEnvVar = "RTMANAGER_DOCKER_API_VERSION"
|
||||
dockerNetworkEnvVar = "RTMANAGER_DOCKER_NETWORK"
|
||||
dockerLogDriverEnvVar = "RTMANAGER_DOCKER_LOG_DRIVER"
|
||||
dockerLogOptsEnvVar = "RTMANAGER_DOCKER_LOG_OPTS"
|
||||
imagePullPolicyEnvVar = "RTMANAGER_IMAGE_PULL_POLICY"
|
||||
|
||||
defaultCPUQuotaEnvVar = "RTMANAGER_DEFAULT_CPU_QUOTA"
|
||||
defaultMemoryEnvVar = "RTMANAGER_DEFAULT_MEMORY"
|
||||
defaultPIDsLimitEnvVar = "RTMANAGER_DEFAULT_PIDS_LIMIT"
|
||||
containerStopTimeoutSecondsEnvVar = "RTMANAGER_CONTAINER_STOP_TIMEOUT_SECONDS"
|
||||
containerRetentionDaysEnvVar = "RTMANAGER_CONTAINER_RETENTION_DAYS"
|
||||
engineStateMountPathEnvVar = "RTMANAGER_ENGINE_STATE_MOUNT_PATH"
|
||||
engineStateEnvNameEnvVar = "RTMANAGER_ENGINE_STATE_ENV_NAME"
|
||||
gameStateDirModeEnvVar = "RTMANAGER_GAME_STATE_DIR_MODE"
|
||||
gameStateOwnerUIDEnvVar = "RTMANAGER_GAME_STATE_OWNER_UID"
|
||||
gameStateOwnerGIDEnvVar = "RTMANAGER_GAME_STATE_OWNER_GID"
|
||||
gameStateRootEnvVar = "RTMANAGER_GAME_STATE_ROOT"
|
||||
|
||||
startJobsStreamEnvVar = "RTMANAGER_REDIS_START_JOBS_STREAM"
|
||||
stopJobsStreamEnvVar = "RTMANAGER_REDIS_STOP_JOBS_STREAM"
|
||||
jobResultsStreamEnvVar = "RTMANAGER_REDIS_JOB_RESULTS_STREAM"
|
||||
healthEventsStreamEnvVar = "RTMANAGER_REDIS_HEALTH_EVENTS_STREAM"
|
||||
notificationIntentsStreamEnv = "RTMANAGER_NOTIFICATION_INTENTS_STREAM"
|
||||
streamBlockTimeoutEnvVar = "RTMANAGER_STREAM_BLOCK_TIMEOUT"
|
||||
|
||||
inspectIntervalEnvVar = "RTMANAGER_INSPECT_INTERVAL"
|
||||
probeIntervalEnvVar = "RTMANAGER_PROBE_INTERVAL"
|
||||
probeTimeoutEnvVar = "RTMANAGER_PROBE_TIMEOUT"
|
||||
probeFailuresThresholdEnvVar = "RTMANAGER_PROBE_FAILURES_THRESHOLD"
|
||||
|
||||
reconcileIntervalEnvVar = "RTMANAGER_RECONCILE_INTERVAL"
|
||||
cleanupIntervalEnvVar = "RTMANAGER_CLEANUP_INTERVAL"
|
||||
|
||||
gameLeaseTTLSecondsEnvVar = "RTMANAGER_GAME_LEASE_TTL_SECONDS"
|
||||
|
||||
lobbyInternalBaseURLEnvVar = "RTMANAGER_LOBBY_INTERNAL_BASE_URL"
|
||||
lobbyInternalTimeoutEnvVar = "RTMANAGER_LOBBY_INTERNAL_TIMEOUT"
|
||||
|
||||
otelServiceNameEnvVar = "OTEL_SERVICE_NAME"
|
||||
otelTracesExporterEnvVar = "OTEL_TRACES_EXPORTER"
|
||||
otelMetricsExporterEnvVar = "OTEL_METRICS_EXPORTER"
|
||||
otelExporterOTLPProtocolEnvVar = "OTEL_EXPORTER_OTLP_PROTOCOL"
|
||||
otelExporterOTLPTracesProtocolEnvVar = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"
|
||||
otelExporterOTLPMetricsProtocolEnvVar = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL"
|
||||
otelStdoutTracesEnabledEnvVar = "RTMANAGER_OTEL_STDOUT_TRACES_ENABLED"
|
||||
otelStdoutMetricsEnabledEnvVar = "RTMANAGER_OTEL_STDOUT_METRICS_ENABLED"
|
||||
|
||||
defaultShutdownTimeout = 30 * time.Second
|
||||
defaultLogLevel = "info"
|
||||
defaultInternalHTTPAddr = ":8096"
|
||||
defaultReadHeaderTimeout = 2 * time.Second
|
||||
defaultReadTimeout = 5 * time.Second
|
||||
defaultWriteTimeout = 15 * time.Second
|
||||
defaultIdleTimeout = 60 * time.Second
|
||||
|
||||
defaultDockerHost = "unix:///var/run/docker.sock"
|
||||
defaultDockerNetwork = "galaxy-net"
|
||||
defaultDockerLogDriver = "json-file"
|
||||
defaultImagePullPolicy = ImagePullPolicyIfMissing
|
||||
|
||||
defaultCPUQuota = 1.0
|
||||
defaultMemory = "512m"
|
||||
defaultPIDsLimit = 512
|
||||
defaultContainerStopTimeout = 30 * time.Second
|
||||
defaultContainerRetention = 30 * 24 * time.Hour
|
||||
defaultEngineStateMountPath = "/var/lib/galaxy-game"
|
||||
defaultEngineStateEnvName = "GAME_STATE_PATH"
|
||||
defaultGameStateDirMode = 0o750
|
||||
|
||||
defaultStartJobsStream = "runtime:start_jobs"
|
||||
defaultStopJobsStream = "runtime:stop_jobs"
|
||||
defaultJobResultsStream = "runtime:job_results"
|
||||
defaultHealthEventsStream = "runtime:health_events"
|
||||
defaultNotificationIntentsKey = "notification:intents"
|
||||
defaultStreamBlockTimeout = 5 * time.Second
|
||||
|
||||
defaultInspectInterval = 30 * time.Second
|
||||
defaultProbeInterval = 15 * time.Second
|
||||
defaultProbeTimeout = 2 * time.Second
|
||||
defaultProbeFailuresThreshold = 3
|
||||
|
||||
defaultReconcileInterval = 5 * time.Minute
|
||||
defaultCleanupInterval = time.Hour
|
||||
|
||||
defaultGameLeaseTTL = 60 * time.Second
|
||||
|
||||
defaultLobbyInternalTimeout = 2 * time.Second
|
||||
|
||||
defaultOTelServiceName = "galaxy-rtmanager"
|
||||
)
|
||||
|
||||
// ImagePullPolicy enumerates the supported image pull policies. The start
|
||||
// service validates a producer-supplied `image_ref` against this policy at
|
||||
// start time.
|
||||
type ImagePullPolicy string
|
||||
|
||||
// Supported pull policies, frozen by `rtmanager/README.md` §Configuration.
|
||||
const (
|
||||
ImagePullPolicyIfMissing ImagePullPolicy = "if_missing"
|
||||
ImagePullPolicyAlways ImagePullPolicy = "always"
|
||||
ImagePullPolicyNever ImagePullPolicy = "never"
|
||||
)
|
||||
|
||||
// Validate reports whether p is one of the frozen pull policies.
|
||||
func (p ImagePullPolicy) Validate() error {
|
||||
switch p {
|
||||
case ImagePullPolicyIfMissing, ImagePullPolicyAlways, ImagePullPolicyNever:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("image pull policy %q must be one of %q, %q, %q",
|
||||
p, ImagePullPolicyIfMissing, ImagePullPolicyAlways, ImagePullPolicyNever)
|
||||
}
|
||||
}
|
||||
|
||||
// Config stores the full Runtime Manager process configuration.
|
||||
type Config struct {
|
||||
// ShutdownTimeout bounds graceful shutdown of every long-lived
|
||||
// component.
|
||||
ShutdownTimeout time.Duration
|
||||
|
||||
// Logging configures the process-wide structured logger.
|
||||
Logging LoggingConfig
|
||||
|
||||
// InternalHTTP configures the trusted internal HTTP listener that
|
||||
// serves probes and the GM/Admin REST surface.
|
||||
InternalHTTP InternalHTTPConfig
|
||||
|
||||
// Docker configures the Docker SDK client RTM uses to drive the local
|
||||
// Docker daemon.
|
||||
Docker DockerConfig
|
||||
|
||||
// Postgres configures the PostgreSQL-backed durable store consumed via
|
||||
// `pkg/postgres`.
|
||||
Postgres PostgresConfig
|
||||
|
||||
// Redis configures the shared Redis connection topology consumed via
|
||||
// `pkg/redisconn`.
|
||||
Redis RedisConfig
|
||||
|
||||
// Streams stores the stable Redis Stream names RTM reads from and
|
||||
// writes to.
|
||||
Streams StreamsConfig
|
||||
|
||||
// Container stores the per-container defaults applied at start time
|
||||
// when the resolved image does not declare its own labels.
|
||||
Container ContainerConfig
|
||||
|
||||
// Health configures the periodic health-monitoring workers (events
|
||||
// listener, inspect, active probe).
|
||||
Health HealthConfig
|
||||
|
||||
// Cleanup configures the reconciler and container-cleanup workers.
|
||||
Cleanup CleanupConfig
|
||||
|
||||
// Coordination configures the per-game Redis lease used to serialise
|
||||
// operations across all entry points.
|
||||
Coordination CoordinationConfig
|
||||
|
||||
// Lobby configures the synchronous Lobby internal REST client used by
|
||||
// the start service for ancillary lookups.
|
||||
Lobby LobbyConfig
|
||||
|
||||
// Telemetry configures the process-wide OpenTelemetry runtime.
|
||||
Telemetry TelemetryConfig
|
||||
}
|
||||
|
||||
// LoggingConfig configures the process-wide structured logger.
|
||||
type LoggingConfig struct {
|
||||
// Level stores the process log level accepted by log/slog.
|
||||
Level string
|
||||
}
|
||||
|
||||
// InternalHTTPConfig configures the trusted internal HTTP listener.
|
||||
type InternalHTTPConfig struct {
|
||||
// Addr stores the TCP listen address.
|
||||
Addr string
|
||||
|
||||
// ReadHeaderTimeout bounds request-header reading.
|
||||
ReadHeaderTimeout time.Duration
|
||||
|
||||
// ReadTimeout bounds reading one request.
|
||||
ReadTimeout time.Duration
|
||||
|
||||
// WriteTimeout bounds writing one response.
|
||||
WriteTimeout time.Duration
|
||||
|
||||
// IdleTimeout bounds how long keep-alive connections stay open.
|
||||
IdleTimeout time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable internal HTTP listener
|
||||
// configuration.
|
||||
func (cfg InternalHTTPConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.Addr) == "":
|
||||
return fmt.Errorf("internal HTTP addr must not be empty")
|
||||
case !isTCPAddr(cfg.Addr):
|
||||
return fmt.Errorf("internal HTTP addr %q must use host:port form", cfg.Addr)
|
||||
case cfg.ReadHeaderTimeout <= 0:
|
||||
return fmt.Errorf("internal HTTP read header timeout must be positive")
|
||||
case cfg.ReadTimeout <= 0:
|
||||
return fmt.Errorf("internal HTTP read timeout must be positive")
|
||||
case cfg.WriteTimeout <= 0:
|
||||
return fmt.Errorf("internal HTTP write timeout must be positive")
|
||||
case cfg.IdleTimeout <= 0:
|
||||
return fmt.Errorf("internal HTTP idle timeout must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// DockerConfig configures the Docker SDK client.
|
||||
type DockerConfig struct {
|
||||
// Host stores the Docker daemon endpoint (e.g.
|
||||
// `unix:///var/run/docker.sock`).
|
||||
Host string
|
||||
|
||||
// APIVersion overrides the Docker API version. Empty lets the SDK
|
||||
// negotiate.
|
||||
APIVersion string
|
||||
|
||||
// Network stores the user-defined Docker bridge network containers
|
||||
// attach to. Provisioned outside RTM; missing network is a fail-fast
|
||||
// condition at startup.
|
||||
Network string
|
||||
|
||||
// LogDriver stores the Docker logging driver applied to engine
|
||||
// containers.
|
||||
LogDriver string
|
||||
|
||||
// LogOpts stores the comma-separated `key=value` driver options.
|
||||
LogOpts string
|
||||
|
||||
// PullPolicy stores the configured image pull policy.
|
||||
PullPolicy ImagePullPolicy
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable Docker configuration.
|
||||
func (cfg DockerConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.Host) == "":
|
||||
return fmt.Errorf("docker host must not be empty")
|
||||
case strings.TrimSpace(cfg.Network) == "":
|
||||
return fmt.Errorf("docker network must not be empty")
|
||||
case strings.TrimSpace(cfg.LogDriver) == "":
|
||||
return fmt.Errorf("docker log driver must not be empty")
|
||||
}
|
||||
return cfg.PullPolicy.Validate()
|
||||
}
|
||||
|
||||
// PostgresConfig configures the PostgreSQL-backed durable store consumed
|
||||
// via `pkg/postgres`.
|
||||
type PostgresConfig struct {
|
||||
// Conn carries the primary plus replica DSN topology and pool tuning.
|
||||
Conn postgres.Config
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable PostgreSQL configuration.
|
||||
func (cfg PostgresConfig) Validate() error {
|
||||
return cfg.Conn.Validate()
|
||||
}
|
||||
|
||||
// RedisConfig configures the Runtime Manager Redis connection topology.
|
||||
type RedisConfig struct {
|
||||
// Conn carries the connection topology (master, replicas, password,
|
||||
// db, per-call timeout).
|
||||
Conn redisconn.Config
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable Redis configuration.
|
||||
func (cfg RedisConfig) Validate() error {
|
||||
return cfg.Conn.Validate()
|
||||
}
|
||||
|
||||
// StreamsConfig stores the stable Redis Stream names used by Runtime
|
||||
// Manager.
|
||||
type StreamsConfig struct {
|
||||
// StartJobs stores the Redis Streams key Lobby writes start jobs to.
|
||||
StartJobs string
|
||||
|
||||
// StopJobs stores the Redis Streams key Lobby writes stop jobs to.
|
||||
StopJobs string
|
||||
|
||||
// JobResults stores the Redis Streams key RTM writes job outcomes
|
||||
// to.
|
||||
JobResults string
|
||||
|
||||
// HealthEvents stores the Redis Streams key RTM publishes
|
||||
// technical health events to.
|
||||
HealthEvents string
|
||||
|
||||
// NotificationIntents stores the Redis Streams key RTM publishes
|
||||
// admin-only notification intents to.
|
||||
NotificationIntents string
|
||||
|
||||
// BlockTimeout bounds the maximum blocking read window for stream
|
||||
// consumers.
|
||||
BlockTimeout time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores usable stream names.
|
||||
func (cfg StreamsConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.StartJobs) == "":
|
||||
return fmt.Errorf("redis start jobs stream must not be empty")
|
||||
case strings.TrimSpace(cfg.StopJobs) == "":
|
||||
return fmt.Errorf("redis stop jobs stream must not be empty")
|
||||
case strings.TrimSpace(cfg.JobResults) == "":
|
||||
return fmt.Errorf("redis job results stream must not be empty")
|
||||
case strings.TrimSpace(cfg.HealthEvents) == "":
|
||||
return fmt.Errorf("redis health events stream must not be empty")
|
||||
case strings.TrimSpace(cfg.NotificationIntents) == "":
|
||||
return fmt.Errorf("redis notification intents stream must not be empty")
|
||||
case cfg.BlockTimeout <= 0:
|
||||
return fmt.Errorf("redis stream block timeout must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// ContainerConfig stores the per-container defaults applied at start
|
||||
// time. Resource defaults apply when the resolved engine image does not
|
||||
// expose `com.galaxy.cpu_quota` / `com.galaxy.memory` /
|
||||
// `com.galaxy.pids_limit` labels.
|
||||
type ContainerConfig struct {
|
||||
// DefaultCPUQuota is the fallback `--cpus` value applied when the
|
||||
// image does not declare `com.galaxy.cpu_quota`.
|
||||
DefaultCPUQuota float64
|
||||
|
||||
// DefaultMemory is the fallback `--memory` value applied when the
|
||||
// image does not declare `com.galaxy.memory`.
|
||||
DefaultMemory string
|
||||
|
||||
// DefaultPIDsLimit is the fallback `--pids-limit` value applied
|
||||
// when the image does not declare `com.galaxy.pids_limit`.
|
||||
DefaultPIDsLimit int
|
||||
|
||||
// StopTimeout bounds graceful container stop before Docker fires
|
||||
// SIGKILL.
|
||||
StopTimeout time.Duration
|
||||
|
||||
// Retention stores the TTL after which `status=stopped` containers
|
||||
// are removed by the cleanup worker.
|
||||
Retention time.Duration
|
||||
|
||||
// EngineStateMountPath is the in-container path the per-game state
|
||||
// directory is bind-mounted to.
|
||||
EngineStateMountPath string
|
||||
|
||||
// EngineStateEnvName is the env-var name forwarded to the engine
|
||||
// pointing at EngineStateMountPath.
|
||||
EngineStateEnvName string
|
||||
|
||||
// GameStateDirMode stores the unix permissions applied to the
|
||||
// per-game state directory on creation.
|
||||
GameStateDirMode uint32
|
||||
|
||||
// GameStateOwnerUID stores the unix uid applied to the per-game
|
||||
// state directory on creation.
|
||||
GameStateOwnerUID int
|
||||
|
||||
// GameStateOwnerGID stores the unix gid applied to the per-game
|
||||
// state directory on creation.
|
||||
GameStateOwnerGID int
|
||||
|
||||
// GameStateRoot is the host path under which per-game state
|
||||
// directories are created.
|
||||
GameStateRoot string
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores usable container defaults.
|
||||
func (cfg ContainerConfig) Validate() error {
|
||||
switch {
|
||||
case cfg.DefaultCPUQuota <= 0:
|
||||
return fmt.Errorf("default cpu quota must be positive")
|
||||
case strings.TrimSpace(cfg.DefaultMemory) == "":
|
||||
return fmt.Errorf("default memory must not be empty")
|
||||
case cfg.DefaultPIDsLimit <= 0:
|
||||
return fmt.Errorf("default pids limit must be positive")
|
||||
case cfg.StopTimeout <= 0:
|
||||
return fmt.Errorf("container stop timeout must be positive")
|
||||
case cfg.Retention <= 0:
|
||||
return fmt.Errorf("container retention must be positive")
|
||||
case strings.TrimSpace(cfg.EngineStateMountPath) == "":
|
||||
return fmt.Errorf("engine state mount path must not be empty")
|
||||
case strings.TrimSpace(cfg.EngineStateEnvName) == "":
|
||||
return fmt.Errorf("engine state env name must not be empty")
|
||||
case cfg.GameStateDirMode == 0:
|
||||
return fmt.Errorf("game state dir mode must be non-zero")
|
||||
case strings.TrimSpace(cfg.GameStateRoot) == "":
|
||||
return fmt.Errorf("game state root must not be empty")
|
||||
case !strings.HasPrefix(strings.TrimSpace(cfg.GameStateRoot), "/"):
|
||||
return fmt.Errorf("game state root %q must be an absolute path", cfg.GameStateRoot)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// HealthConfig configures the periodic health-monitoring workers
|
||||
// (Docker events listener, periodic inspect, active probe).
|
||||
type HealthConfig struct {
|
||||
// InspectInterval is the period between two periodic Docker inspect
|
||||
// passes.
|
||||
InspectInterval time.Duration
|
||||
|
||||
// ProbeInterval is the period between two engine `/healthz` probe
|
||||
// rounds.
|
||||
ProbeInterval time.Duration
|
||||
|
||||
// ProbeTimeout bounds one engine `/healthz` request.
|
||||
ProbeTimeout time.Duration
|
||||
|
||||
// ProbeFailuresThreshold is the consecutive-failure count that
|
||||
// triggers a `probe_failed` event.
|
||||
ProbeFailuresThreshold int
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores usable health-monitoring settings.
|
||||
func (cfg HealthConfig) Validate() error {
|
||||
switch {
|
||||
case cfg.InspectInterval <= 0:
|
||||
return fmt.Errorf("inspect interval must be positive")
|
||||
case cfg.ProbeInterval <= 0:
|
||||
return fmt.Errorf("probe interval must be positive")
|
||||
case cfg.ProbeTimeout <= 0:
|
||||
return fmt.Errorf("probe timeout must be positive")
|
||||
case cfg.ProbeFailuresThreshold <= 0:
|
||||
return fmt.Errorf("probe failures threshold must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// CleanupConfig configures the reconciler and container-cleanup workers.
|
||||
type CleanupConfig struct {
|
||||
// ReconcileInterval is the period between two reconciler passes.
|
||||
ReconcileInterval time.Duration
|
||||
|
||||
// CleanupInterval is the period between two container-cleanup
|
||||
// passes.
|
||||
CleanupInterval time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores usable cleanup settings.
|
||||
func (cfg CleanupConfig) Validate() error {
|
||||
switch {
|
||||
case cfg.ReconcileInterval <= 0:
|
||||
return fmt.Errorf("reconcile interval must be positive")
|
||||
case cfg.CleanupInterval <= 0:
|
||||
return fmt.Errorf("cleanup interval must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// CoordinationConfig configures the per-game Redis lease.
|
||||
type CoordinationConfig struct {
|
||||
// GameLeaseTTL bounds the per-game lease lifetime renewed every
|
||||
// half-TTL while an operation runs.
|
||||
GameLeaseTTL time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable lease configuration.
|
||||
func (cfg CoordinationConfig) Validate() error {
|
||||
if cfg.GameLeaseTTL <= 0 {
|
||||
return fmt.Errorf("game lease ttl must be positive")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// LobbyConfig configures the synchronous Lobby internal REST client.
|
||||
type LobbyConfig struct {
|
||||
// BaseURL stores the trusted Lobby internal listener base URL.
|
||||
BaseURL string
|
||||
|
||||
// Timeout bounds one Lobby internal request.
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg stores a usable Lobby client
|
||||
// configuration.
|
||||
func (cfg LobbyConfig) Validate() error {
|
||||
switch {
|
||||
case strings.TrimSpace(cfg.BaseURL) == "":
|
||||
return fmt.Errorf("lobby internal base url must not be empty")
|
||||
case !isHTTPURL(cfg.BaseURL):
|
||||
return fmt.Errorf("lobby internal base url %q must be an absolute http(s) URL", cfg.BaseURL)
|
||||
case cfg.Timeout <= 0:
|
||||
return fmt.Errorf("lobby internal timeout must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// TelemetryConfig configures the Runtime Manager OpenTelemetry runtime.
|
||||
type TelemetryConfig struct {
|
||||
// ServiceName overrides the default OpenTelemetry service name.
|
||||
ServiceName string
|
||||
|
||||
// TracesExporter selects the external traces exporter. Supported
|
||||
// values are `none` and `otlp`.
|
||||
TracesExporter string
|
||||
|
||||
// MetricsExporter selects the external metrics exporter. Supported
|
||||
// values are `none` and `otlp`.
|
||||
MetricsExporter string
|
||||
|
||||
// TracesProtocol selects the OTLP traces protocol when
|
||||
// TracesExporter is `otlp`.
|
||||
TracesProtocol string
|
||||
|
||||
// MetricsProtocol selects the OTLP metrics protocol when
|
||||
// MetricsExporter is `otlp`.
|
||||
MetricsProtocol string
|
||||
|
||||
// StdoutTracesEnabled enables the additional stdout trace exporter
|
||||
// used for local development and debugging.
|
||||
StdoutTracesEnabled bool
|
||||
|
||||
// StdoutMetricsEnabled enables the additional stdout metric
|
||||
// exporter used for local development and debugging.
|
||||
StdoutMetricsEnabled bool
|
||||
}
|
||||
|
||||
// Validate reports whether cfg contains a supported OpenTelemetry
|
||||
// configuration.
|
||||
func (cfg TelemetryConfig) Validate() error {
|
||||
return telemetry.ProcessConfig{
|
||||
ServiceName: cfg.ServiceName,
|
||||
TracesExporter: cfg.TracesExporter,
|
||||
MetricsExporter: cfg.MetricsExporter,
|
||||
TracesProtocol: cfg.TracesProtocol,
|
||||
MetricsProtocol: cfg.MetricsProtocol,
|
||||
StdoutTracesEnabled: cfg.StdoutTracesEnabled,
|
||||
StdoutMetricsEnabled: cfg.StdoutMetricsEnabled,
|
||||
}.Validate()
|
||||
}
|
||||
|
||||
// DefaultConfig returns the default Runtime Manager process configuration.
|
||||
func DefaultConfig() Config {
|
||||
return Config{
|
||||
ShutdownTimeout: defaultShutdownTimeout,
|
||||
Logging: LoggingConfig{
|
||||
Level: defaultLogLevel,
|
||||
},
|
||||
InternalHTTP: InternalHTTPConfig{
|
||||
Addr: defaultInternalHTTPAddr,
|
||||
ReadHeaderTimeout: defaultReadHeaderTimeout,
|
||||
ReadTimeout: defaultReadTimeout,
|
||||
WriteTimeout: defaultWriteTimeout,
|
||||
IdleTimeout: defaultIdleTimeout,
|
||||
},
|
||||
Docker: DockerConfig{
|
||||
Host: defaultDockerHost,
|
||||
Network: defaultDockerNetwork,
|
||||
LogDriver: defaultDockerLogDriver,
|
||||
PullPolicy: defaultImagePullPolicy,
|
||||
},
|
||||
Postgres: PostgresConfig{
|
||||
Conn: postgres.DefaultConfig(),
|
||||
},
|
||||
Redis: RedisConfig{
|
||||
Conn: redisconn.DefaultConfig(),
|
||||
},
|
||||
Streams: StreamsConfig{
|
||||
StartJobs: defaultStartJobsStream,
|
||||
StopJobs: defaultStopJobsStream,
|
||||
JobResults: defaultJobResultsStream,
|
||||
HealthEvents: defaultHealthEventsStream,
|
||||
NotificationIntents: defaultNotificationIntentsKey,
|
||||
BlockTimeout: defaultStreamBlockTimeout,
|
||||
},
|
||||
Container: ContainerConfig{
|
||||
DefaultCPUQuota: defaultCPUQuota,
|
||||
DefaultMemory: defaultMemory,
|
||||
DefaultPIDsLimit: defaultPIDsLimit,
|
||||
StopTimeout: defaultContainerStopTimeout,
|
||||
Retention: defaultContainerRetention,
|
||||
EngineStateMountPath: defaultEngineStateMountPath,
|
||||
EngineStateEnvName: defaultEngineStateEnvName,
|
||||
GameStateDirMode: defaultGameStateDirMode,
|
||||
},
|
||||
Health: HealthConfig{
|
||||
InspectInterval: defaultInspectInterval,
|
||||
ProbeInterval: defaultProbeInterval,
|
||||
ProbeTimeout: defaultProbeTimeout,
|
||||
ProbeFailuresThreshold: defaultProbeFailuresThreshold,
|
||||
},
|
||||
Cleanup: CleanupConfig{
|
||||
ReconcileInterval: defaultReconcileInterval,
|
||||
CleanupInterval: defaultCleanupInterval,
|
||||
},
|
||||
Coordination: CoordinationConfig{
|
||||
GameLeaseTTL: defaultGameLeaseTTL,
|
||||
},
|
||||
Lobby: LobbyConfig{
|
||||
Timeout: defaultLobbyInternalTimeout,
|
||||
},
|
||||
Telemetry: TelemetryConfig{
|
||||
ServiceName: defaultOTelServiceName,
|
||||
TracesExporter: "none",
|
||||
MetricsExporter: "none",
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user