package app import ( "context" "database/sql" "errors" "fmt" "log/slog" "net/http" "time" "galaxy/rtmanager/internal/adapters/docker" "galaxy/rtmanager/internal/adapters/healtheventspublisher" "galaxy/rtmanager/internal/adapters/jobresultspublisher" "galaxy/rtmanager/internal/adapters/lobbyclient" "galaxy/rtmanager/internal/adapters/notificationpublisher" "galaxy/rtmanager/internal/adapters/postgres/healthsnapshotstore" "galaxy/rtmanager/internal/adapters/postgres/operationlogstore" "galaxy/rtmanager/internal/adapters/postgres/runtimerecordstore" "galaxy/rtmanager/internal/adapters/redisstate/gamelease" "galaxy/rtmanager/internal/adapters/redisstate/streamoffsets" "galaxy/rtmanager/internal/config" "galaxy/rtmanager/internal/ports" "galaxy/rtmanager/internal/service/cleanupcontainer" "galaxy/rtmanager/internal/service/patchruntime" "galaxy/rtmanager/internal/service/restartruntime" "galaxy/rtmanager/internal/service/startruntime" "galaxy/rtmanager/internal/service/stopruntime" "galaxy/rtmanager/internal/telemetry" "galaxy/rtmanager/internal/worker/containercleanup" "galaxy/rtmanager/internal/worker/dockerevents" "galaxy/rtmanager/internal/worker/dockerinspect" "galaxy/rtmanager/internal/worker/healthprobe" "galaxy/rtmanager/internal/worker/reconcile" "galaxy/rtmanager/internal/worker/startjobsconsumer" "galaxy/rtmanager/internal/worker/stopjobsconsumer" dockerclient "github.com/docker/docker/client" "github.com/redis/go-redis/v9" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) // wiring owns the process-level singletons constructed once during // `NewRuntime` and consumed by every worker and HTTP handler. // // The struct exposes typed accessors so callers can grab the store / // adapter / service singletons without depending on internal fields. type wiring struct { cfg config.Config redisClient *redis.Client pgPool *sql.DB dockerClient *dockerclient.Client clock func() time.Time logger *slog.Logger telemetry *telemetry.Runtime // Persistence stores. runtimeRecordStore *runtimerecordstore.Store operationLogStore *operationlogstore.Store healthSnapshotStore *healthsnapshotstore.Store streamOffsetStore *streamoffsets.Store gameLeaseStore *gamelease.Store // External adapters. dockerAdapter *docker.Client lobbyClient *lobbyclient.Client notificationPublisher *notificationpublisher.Publisher healthEventsPublisher *healtheventspublisher.Publisher jobResultsPublisher *jobresultspublisher.Publisher // Service layer. startRuntimeService *startruntime.Service stopRuntimeService *stopruntime.Service restartRuntimeService *restartruntime.Service patchRuntimeService *patchruntime.Service cleanupContainerService *cleanupcontainer.Service // Worker layer. startJobsConsumer *startjobsconsumer.Consumer stopJobsConsumer *stopjobsconsumer.Consumer dockerEventsListener *dockerevents.Listener healthProbeWorker *healthprobe.Worker dockerInspectWorker *dockerinspect.Worker reconciler *reconcile.Reconciler containerCleanupWorker *containercleanup.Worker // closers releases adapter-level resources at runtime shutdown. closers []func() error } // newWiring constructs the process-level dependency set, the persistence // stores, the external adapters, and the service layer. It validates // every required collaborator so callers can rely on them being non-nil. func newWiring( cfg config.Config, redisClient *redis.Client, pgPool *sql.DB, dockerClient *dockerclient.Client, clock func() time.Time, logger *slog.Logger, telemetryRuntime *telemetry.Runtime, ) (*wiring, error) { if redisClient == nil { return nil, errors.New("new rtmanager wiring: nil redis client") } if pgPool == nil { return nil, errors.New("new rtmanager wiring: nil postgres pool") } if dockerClient == nil { return nil, errors.New("new rtmanager wiring: nil docker client") } if clock == nil { clock = time.Now } if logger == nil { logger = slog.Default() } if telemetryRuntime == nil { return nil, fmt.Errorf("new rtmanager wiring: nil telemetry runtime") } w := &wiring{ cfg: cfg, redisClient: redisClient, pgPool: pgPool, dockerClient: dockerClient, clock: clock, logger: logger, telemetry: telemetryRuntime, } if err := w.buildPersistence(); err != nil { return nil, fmt.Errorf("new rtmanager wiring: %w", err) } if err := w.buildAdapters(); err != nil { _ = w.close() return nil, fmt.Errorf("new rtmanager wiring: %w", err) } if err := w.buildServices(); err != nil { _ = w.close() return nil, fmt.Errorf("new rtmanager wiring: %w", err) } if err := w.buildWorkers(); err != nil { _ = w.close() return nil, fmt.Errorf("new rtmanager wiring: %w", err) } return w, nil } func (w *wiring) buildPersistence() error { runtimeStore, err := runtimerecordstore.New(runtimerecordstore.Config{ DB: w.pgPool, OperationTimeout: w.cfg.Postgres.Conn.OperationTimeout, }) if err != nil { return fmt.Errorf("runtime record store: %w", err) } w.runtimeRecordStore = runtimeStore operationStore, err := operationlogstore.New(operationlogstore.Config{ DB: w.pgPool, OperationTimeout: w.cfg.Postgres.Conn.OperationTimeout, }) if err != nil { return fmt.Errorf("operation log store: %w", err) } w.operationLogStore = operationStore snapshotStore, err := healthsnapshotstore.New(healthsnapshotstore.Config{ DB: w.pgPool, OperationTimeout: w.cfg.Postgres.Conn.OperationTimeout, }) if err != nil { return fmt.Errorf("health snapshot store: %w", err) } w.healthSnapshotStore = snapshotStore offsetStore, err := streamoffsets.New(streamoffsets.Config{Client: w.redisClient}) if err != nil { return fmt.Errorf("stream offset store: %w", err) } w.streamOffsetStore = offsetStore leaseStore, err := gamelease.New(gamelease.Config{Client: w.redisClient}) if err != nil { return fmt.Errorf("game lease store: %w", err) } w.gameLeaseStore = leaseStore return nil } func (w *wiring) buildAdapters() error { dockerAdapter, err := docker.NewClient(docker.Config{ Docker: w.dockerClient, LogDriver: w.cfg.Docker.LogDriver, LogOpts: w.cfg.Docker.LogOpts, Clock: w.clock, }) if err != nil { return fmt.Errorf("docker adapter: %w", err) } w.dockerAdapter = dockerAdapter lobby, err := lobbyclient.NewClient(lobbyclient.Config{ BaseURL: w.cfg.Lobby.BaseURL, RequestTimeout: w.cfg.Lobby.Timeout, }) if err != nil { return fmt.Errorf("lobby client: %w", err) } w.lobbyClient = lobby w.closers = append(w.closers, lobby.Close) notificationPub, err := notificationpublisher.NewPublisher(notificationpublisher.Config{ Client: w.redisClient, Stream: w.cfg.Streams.NotificationIntents, }) if err != nil { return fmt.Errorf("notification publisher: %w", err) } w.notificationPublisher = notificationPub healthPub, err := healtheventspublisher.NewPublisher(healtheventspublisher.Config{ Client: w.redisClient, Snapshots: w.healthSnapshotStore, Stream: w.cfg.Streams.HealthEvents, }) if err != nil { return fmt.Errorf("health events publisher: %w", err) } w.healthEventsPublisher = healthPub jobResultsPub, err := jobresultspublisher.NewPublisher(jobresultspublisher.Config{ Client: w.redisClient, Stream: w.cfg.Streams.JobResults, }) if err != nil { return fmt.Errorf("job results publisher: %w", err) } w.jobResultsPublisher = jobResultsPub return nil } func (w *wiring) buildServices() error { startService, err := startruntime.NewService(startruntime.Dependencies{ RuntimeRecords: w.runtimeRecordStore, OperationLogs: w.operationLogStore, Docker: w.dockerAdapter, Leases: w.gameLeaseStore, HealthEvents: w.healthEventsPublisher, Notifications: w.notificationPublisher, Lobby: w.lobbyClient, Container: w.cfg.Container, DockerCfg: w.cfg.Docker, Coordination: w.cfg.Coordination, Telemetry: w.telemetry, Logger: w.logger, Clock: w.clock, }) if err != nil { return fmt.Errorf("start runtime service: %w", err) } w.startRuntimeService = startService stopService, err := stopruntime.NewService(stopruntime.Dependencies{ RuntimeRecords: w.runtimeRecordStore, OperationLogs: w.operationLogStore, Docker: w.dockerAdapter, Leases: w.gameLeaseStore, HealthEvents: w.healthEventsPublisher, Container: w.cfg.Container, Coordination: w.cfg.Coordination, Telemetry: w.telemetry, Logger: w.logger, Clock: w.clock, }) if err != nil { return fmt.Errorf("stop runtime service: %w", err) } w.stopRuntimeService = stopService restartService, err := restartruntime.NewService(restartruntime.Dependencies{ RuntimeRecords: w.runtimeRecordStore, OperationLogs: w.operationLogStore, Docker: w.dockerAdapter, Leases: w.gameLeaseStore, StopService: stopService, StartService: startService, Coordination: w.cfg.Coordination, Telemetry: w.telemetry, Logger: w.logger, Clock: w.clock, }) if err != nil { return fmt.Errorf("restart runtime service: %w", err) } w.restartRuntimeService = restartService patchService, err := patchruntime.NewService(patchruntime.Dependencies{ RuntimeRecords: w.runtimeRecordStore, OperationLogs: w.operationLogStore, Docker: w.dockerAdapter, Leases: w.gameLeaseStore, StopService: stopService, StartService: startService, Coordination: w.cfg.Coordination, Telemetry: w.telemetry, Logger: w.logger, Clock: w.clock, }) if err != nil { return fmt.Errorf("patch runtime service: %w", err) } w.patchRuntimeService = patchService cleanupService, err := cleanupcontainer.NewService(cleanupcontainer.Dependencies{ RuntimeRecords: w.runtimeRecordStore, OperationLogs: w.operationLogStore, Docker: w.dockerAdapter, Leases: w.gameLeaseStore, Coordination: w.cfg.Coordination, Telemetry: w.telemetry, Logger: w.logger, Clock: w.clock, }) if err != nil { return fmt.Errorf("cleanup container service: %w", err) } w.cleanupContainerService = cleanupService return nil } // buildWorkers constructs the asynchronous Lobby ↔ RTM stream // consumers. Both consumers participate in the process lifecycle as // `app.Component`s; `internal/app/runtime.go` passes them into // `app.New` alongside the internal HTTP server. func (w *wiring) buildWorkers() error { startConsumer, err := startjobsconsumer.NewConsumer(startjobsconsumer.Config{ Client: w.redisClient, Stream: w.cfg.Streams.StartJobs, BlockTimeout: w.cfg.Streams.BlockTimeout, StartService: w.startRuntimeService, JobResults: w.jobResultsPublisher, OffsetStore: w.streamOffsetStore, Logger: w.logger, }) if err != nil { return fmt.Errorf("start jobs consumer: %w", err) } w.startJobsConsumer = startConsumer stopConsumer, err := stopjobsconsumer.NewConsumer(stopjobsconsumer.Config{ Client: w.redisClient, Stream: w.cfg.Streams.StopJobs, BlockTimeout: w.cfg.Streams.BlockTimeout, StopService: w.stopRuntimeService, JobResults: w.jobResultsPublisher, OffsetStore: w.streamOffsetStore, Logger: w.logger, }) if err != nil { return fmt.Errorf("stop jobs consumer: %w", err) } w.stopJobsConsumer = stopConsumer eventsListener, err := dockerevents.NewListener(dockerevents.Dependencies{ Docker: w.dockerAdapter, RuntimeRecords: w.runtimeRecordStore, HealthEvents: w.healthEventsPublisher, Telemetry: w.telemetry, Clock: w.clock, Logger: w.logger, }) if err != nil { return fmt.Errorf("docker events listener: %w", err) } w.dockerEventsListener = eventsListener probeHTTPClient, err := newProbeHTTPClient(w.telemetry) if err != nil { return fmt.Errorf("health probe http client: %w", err) } probeWorker, err := healthprobe.NewWorker(healthprobe.Dependencies{ RuntimeRecords: w.runtimeRecordStore, HealthEvents: w.healthEventsPublisher, HTTPClient: probeHTTPClient, Telemetry: w.telemetry, Interval: w.cfg.Health.ProbeInterval, ProbeTimeout: w.cfg.Health.ProbeTimeout, FailuresThreshold: w.cfg.Health.ProbeFailuresThreshold, Clock: w.clock, Logger: w.logger, }) if err != nil { return fmt.Errorf("health probe worker: %w", err) } w.healthProbeWorker = probeWorker inspectWorker, err := dockerinspect.NewWorker(dockerinspect.Dependencies{ Docker: w.dockerAdapter, RuntimeRecords: w.runtimeRecordStore, HealthEvents: w.healthEventsPublisher, Telemetry: w.telemetry, Interval: w.cfg.Health.InspectInterval, Clock: w.clock, Logger: w.logger, }) if err != nil { return fmt.Errorf("docker inspect worker: %w", err) } w.dockerInspectWorker = inspectWorker reconciler, err := reconcile.NewReconciler(reconcile.Dependencies{ Docker: w.dockerAdapter, RuntimeRecords: w.runtimeRecordStore, OperationLogs: w.operationLogStore, HealthEvents: w.healthEventsPublisher, Leases: w.gameLeaseStore, Telemetry: w.telemetry, DockerCfg: w.cfg.Docker, ContainerCfg: w.cfg.Container, Coordination: w.cfg.Coordination, Interval: w.cfg.Cleanup.ReconcileInterval, Clock: w.clock, Logger: w.logger, }) if err != nil { return fmt.Errorf("reconciler: %w", err) } w.reconciler = reconciler cleanupWorker, err := containercleanup.NewWorker(containercleanup.Dependencies{ RuntimeRecords: w.runtimeRecordStore, Cleanup: w.cleanupContainerService, Retention: w.cfg.Container.Retention, Interval: w.cfg.Cleanup.CleanupInterval, Clock: w.clock, Logger: w.logger, }) if err != nil { return fmt.Errorf("container cleanup worker: %w", err) } w.containerCleanupWorker = cleanupWorker return nil } // newProbeHTTPClient constructs the otelhttp-instrumented HTTP client // the active health probe uses to call engine `/healthz`. It clones // the default transport so caller-provided transports stay isolated // from production wiring (mirrors the lobby internal client). func newProbeHTTPClient(telemetryRuntime *telemetry.Runtime) (*http.Client, error) { transport, ok := http.DefaultTransport.(*http.Transport) if !ok { return nil, errors.New("default http transport is not *http.Transport") } cloned := transport.Clone() instrumented := otelhttp.NewTransport(cloned, otelhttp.WithTracerProvider(telemetryRuntime.TracerProvider()), otelhttp.WithMeterProvider(telemetryRuntime.MeterProvider()), ) return &http.Client{Transport: instrumented}, nil } // registerTelemetryGauges installs the runtime-records-by-status gauge // callback so the telemetry runtime can observe the persistent store // without holding a strong reference to the wiring. func (w *wiring) registerTelemetryGauges() error { probe := newRuntimeRecordsProbe(w.runtimeRecordStore) return w.telemetry.RegisterGauges(telemetry.GaugeDependencies{ RuntimeRecordsByStatus: probe, Logger: w.logger, }) } // close releases adapter-level resources owned by the wiring layer. // Returns the joined error of every closer; the caller is expected to // invoke this once during process shutdown. func (w *wiring) close() error { var joined error for index := len(w.closers) - 1; index >= 0; index-- { if err := w.closers[index](); err != nil { joined = errors.Join(joined, err) } } w.closers = nil return joined } // runtimeRecordsProbe adapts runtimerecordstore.Store to // telemetry.RuntimeRecordsByStatusProbe by translating the typed status // keys into the string keys the gauge expects. type runtimeRecordsProbe struct { store *runtimerecordstore.Store } func newRuntimeRecordsProbe(store *runtimerecordstore.Store) *runtimeRecordsProbe { return &runtimeRecordsProbe{store: store} } func (p *runtimeRecordsProbe) CountByStatus(ctx context.Context) (map[string]int, error) { if p == nil || p.store == nil { return nil, errors.New("runtime records probe: nil store") } counts, err := p.store.CountByStatus(ctx) if err != nil { return nil, err } out := make(map[string]int, len(counts)) for status, count := range counts { out[string(status)] = count } return out, nil } // Compile-time assertions that the constructed adapters satisfy the // expected port surfaces; these prevent silent regressions when a // port shape changes. var ( _ ports.RuntimeRecordStore = (*runtimerecordstore.Store)(nil) _ ports.OperationLogStore = (*operationlogstore.Store)(nil) _ ports.HealthSnapshotStore = (*healthsnapshotstore.Store)(nil) _ ports.StreamOffsetStore = (*streamoffsets.Store)(nil) _ ports.GameLeaseStore = (*gamelease.Store)(nil) _ ports.DockerClient = (*docker.Client)(nil) _ ports.LobbyInternalClient = (*lobbyclient.Client)(nil) _ ports.NotificationIntentPublisher = (*notificationpublisher.Publisher)(nil) _ ports.HealthEventPublisher = (*healtheventspublisher.Publisher)(nil) _ ports.JobResultPublisher = (*jobresultspublisher.Publisher)(nil) _ Component = (*reconcile.Reconciler)(nil) _ Component = (*containercleanup.Worker)(nil) _ containercleanup.Cleaner = (*cleanupcontainer.Service)(nil) )