galaxy-game/rtmanager/internal/service/startruntime/service.go

// Package startruntime implements the `start` lifecycle operation owned
// by Runtime Manager. The service is the single orchestrator behind
// both the asynchronous `runtime:start_jobs` consumer and the
// synchronous `POST /api/v1/internal/runtimes/{game_id}/start` REST
// handler; both callers obtain a deterministic Result with a stable
// `Outcome` / `ErrorCode` pair.
//
// Lifecycle and failure-mode semantics follow `rtmanager/README.md
// §Lifecycles → Start`. Design rationale is captured in
// `rtmanager/docs/services.md`.
package startruntime

import (
	"context"
	"crypto/rand"
	"encoding/base64"
	"encoding/json"
	"errors"
	"fmt"
	"log/slog"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"time"

	"galaxy/notificationintent"
	"galaxy/rtmanager/internal/config"
	"galaxy/rtmanager/internal/domain/health"
	"galaxy/rtmanager/internal/domain/operation"
	"galaxy/rtmanager/internal/domain/runtime"
	"galaxy/rtmanager/internal/logging"
	"galaxy/rtmanager/internal/ports"
	"galaxy/rtmanager/internal/telemetry"

	"github.com/distribution/reference"
)

// Container labels applied to every engine container created by the
// start service. Frozen by `rtmanager/README.md §Container Model`.
const (
	LabelOwner          = "com.galaxy.owner"
	LabelOwnerValue     = "rtmanager"
	LabelKind           = "com.galaxy.kind"
	LabelKindValue      = "game-engine"
	LabelGameID         = "com.galaxy.game_id"
	LabelEngineImageRef = "com.galaxy.engine_image_ref"
	LabelStartedAtMs    = "com.galaxy.started_at_ms"

	// Image labels read at start time to derive resource limits.
	imageLabelCPUQuota  = "com.galaxy.cpu_quota"
	imageLabelMemory    = "com.galaxy.memory"
	imageLabelPIDsLimit = "com.galaxy.pids_limit"

	// HostnamePrefix is the constant prefix used to build the per-game
	// container hostname (`galaxy-game-{game_id}`). The full hostname
	// also forms the container name; restart and patch keep the same
	// value so the engine endpoint stays stable across container
	// recreates.
	HostnamePrefix = "galaxy-game-"

	// EngineStateBackCompatEnvName is the secondary env var name v1
	// engines accept for the bind-mounted state directory. Always set
	// alongside the configured primary name to honour the v1 backward
	// compatibility commitment in `rtmanager/README.md §Container Model`.
	EngineStateBackCompatEnvName = "STORAGE_PATH"

	// leaseReleaseTimeout bounds the deferred lease-release call. A
	// fresh background context is used so the release runs even when
	// the request context was already canceled.
	leaseReleaseTimeout = 5 * time.Second
)

// Input stores the per-call arguments for one start operation.
type Input struct {
	// GameID identifies the platform game to start.
	GameID string

	// ImageRef stores the producer-resolved Docker reference of the
	// engine image. Validated against `distribution/reference` before
	// any Docker work.
	ImageRef string

	// OpSource classifies how the request entered Runtime Manager.
	// Required: every operation_log entry carries an op_source.
	OpSource operation.OpSource

	// SourceRef stores the optional opaque per-source reference
	// (Redis Stream entry id, REST request id, admin user id). Empty
	// when the caller does not provide one.
	SourceRef string
}

// Validate reports whether input carries the structural invariants the
// service requires.
func (input Input) Validate() error {
	if strings.TrimSpace(input.GameID) == "" {
		return fmt.Errorf("game id must not be empty")
	}
	if strings.TrimSpace(input.ImageRef) == "" {
		return fmt.Errorf("image ref must not be empty")
	}
	if !input.OpSource.IsKnown() {
		return fmt.Errorf("op source %q is unsupported", input.OpSource)
	}
	return nil
}

// Result stores the deterministic outcome of one Handle call.
type Result struct {
	// Record carries the runtime record installed by the operation.
	// Populated on success and on idempotent replay (`replay_no_op`);
	// zero on failure.
	Record runtime.RuntimeRecord

	// Outcome reports whether the operation completed (success) or
	// produced a stable failure code.
	Outcome operation.Outcome

	// ErrorCode stores the stable error code on failure, or
	// `replay_no_op` on idempotent replay. Empty for fresh successes.
	ErrorCode string

	// ErrorMessage stores the operator-readable detail on failure.
	// Empty for successes.
	ErrorMessage string
}

// Dependencies groups the collaborators required by Service.
type Dependencies struct {
	// RuntimeRecords reads and installs the durable runtime record.
	RuntimeRecords ports.RuntimeRecordStore

	// OperationLogs records the success / failure audit entry.
	OperationLogs ports.OperationLogStore

	// Docker drives the Docker daemon (network check, pull, inspect,
	// run, remove).
	Docker ports.DockerClient

	// Leases serialises operations against the same game id.
	Leases ports.GameLeaseStore

	// HealthEvents publishes `runtime:health_events` and upserts the
	// matching `health_snapshots` row.
	HealthEvents ports.HealthEventPublisher

	// Notifications publishes admin-only failure intents.
	Notifications ports.NotificationIntentPublisher

	// Lobby provides best-effort diagnostic context for the started
	// game. May be nil; the start operation does not depend on it.
	Lobby ports.LobbyInternalClient

	// Container groups the per-container defaults and state-directory
	// settings consumed at start time.
	Container config.ContainerConfig

	// Docker groups the Docker daemon settings (network, log driver,
	// pull policy) consumed at start time.
	DockerCfg config.DockerConfig

	// Coordination supplies the per-game lease TTL.
	Coordination config.CoordinationConfig

	// Telemetry records start outcomes, lease latency, and health
	// event counters. Required.
	Telemetry *telemetry.Runtime

	// Logger records structured service-level events. Defaults to
	// `slog.Default()` when nil.
	Logger *slog.Logger

	// Clock supplies the wall-clock used for operation timestamps.
	// Defaults to `time.Now` when nil.
	Clock func() time.Time

	// NewToken supplies a unique opaque lease token. Defaults to a
	// 32-byte random base64url string when nil. Tests may override.
	NewToken func() string

	// PrepareStateDir creates the per-game state directory and
	// returns its absolute host path. Defaults to a real-filesystem
	// implementation that honours Container.GameStateRoot,
	// Container.GameStateDirMode, and Container.GameStateOwner{UID,GID}.
	// Tests override to point at a temporary directory.
	PrepareStateDir func(gameID string) (string, error)
}

// Service executes the start lifecycle operation.
type Service struct {
	runtimeRecords ports.RuntimeRecordStore
	operationLogs  ports.OperationLogStore
	docker         ports.DockerClient
	leases         ports.GameLeaseStore
	healthEvents   ports.HealthEventPublisher
	notifications  ports.NotificationIntentPublisher
	lobby          ports.LobbyInternalClient

	containerCfg    config.ContainerConfig
	dockerCfg       config.DockerConfig
	leaseTTL        time.Duration

	telemetry *telemetry.Runtime
	logger    *slog.Logger

	clock           func() time.Time
	newToken        func() string
	prepareStateDir func(gameID string) (string, error)
}

// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
	switch {
	case deps.RuntimeRecords == nil:
		return nil, errors.New("new start runtime service: nil runtime records")
	case deps.OperationLogs == nil:
		return nil, errors.New("new start runtime service: nil operation logs")
	case deps.Docker == nil:
		return nil, errors.New("new start runtime service: nil docker client")
	case deps.Leases == nil:
		return nil, errors.New("new start runtime service: nil lease store")
	case deps.HealthEvents == nil:
		return nil, errors.New("new start runtime service: nil health events publisher")
	case deps.Notifications == nil:
		return nil, errors.New("new start runtime service: nil notification publisher")
	case deps.Telemetry == nil:
		return nil, errors.New("new start runtime service: nil telemetry runtime")
	}
	if err := deps.Container.Validate(); err != nil {
		return nil, fmt.Errorf("new start runtime service: container config: %w", err)
	}
	if err := deps.DockerCfg.Validate(); err != nil {
		return nil, fmt.Errorf("new start runtime service: docker config: %w", err)
	}
	if err := deps.Coordination.Validate(); err != nil {
		return nil, fmt.Errorf("new start runtime service: coordination config: %w", err)
	}

	clock := deps.Clock
	if clock == nil {
		clock = time.Now
	}
	logger := deps.Logger
	if logger == nil {
		logger = slog.Default()
	}
	logger = logger.With("service", "rtmanager.startruntime")

	newToken := deps.NewToken
	if newToken == nil {
		newToken = defaultTokenGenerator()
	}
	prepareStateDir := deps.PrepareStateDir
	if prepareStateDir == nil {
		prepareStateDir = newDefaultStateDirPreparer(deps.Container)
	}

	return &Service{
		runtimeRecords:  deps.RuntimeRecords,
		operationLogs:   deps.OperationLogs,
		docker:          deps.Docker,
		leases:          deps.Leases,
		healthEvents:    deps.HealthEvents,
		notifications:   deps.Notifications,
		lobby:           deps.Lobby,
		containerCfg:    deps.Container,
		dockerCfg:       deps.DockerCfg,
		leaseTTL:        deps.Coordination.GameLeaseTTL,
		telemetry:       deps.Telemetry,
		logger:          logger,
		clock:           clock,
		newToken:        newToken,
		prepareStateDir: prepareStateDir,
	}, nil
}

// Handle executes one start operation end-to-end. The Go-level error
// return is reserved for non-business failures (nil context, nil
// receiver). Every business outcome — fresh success, idempotent
// replay, or any of the stable failure modes — flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
	if service == nil {
		return Result{}, errors.New("start runtime: nil service")
	}
	if ctx == nil {
		return Result{}, errors.New("start runtime: nil context")
	}

	opStartedAt := service.clock().UTC()

	if err := input.Validate(); err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:      opStartedAt,
			input:            input,
			errorCode:        ErrorCodeStartConfigInvalid,
			errorMessage:     err.Error(),
			notificationType: notificationintent.NotificationTypeRuntimeStartConfigInvalid,
		}), nil
	}

	token := service.newToken()
	leaseStart := service.clock()
	acquired, err := service.leases.TryAcquire(ctx, input.GameID, token, service.leaseTTL)
	service.telemetry.RecordLeaseAcquireLatency(ctx, service.clock().Sub(leaseStart))
	if err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    ErrorCodeServiceUnavailable,
			errorMessage: fmt.Sprintf("acquire game lease: %s", err.Error()),
		}), nil
	}
	if !acquired {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    ErrorCodeConflict,
			errorMessage: "another lifecycle operation is in progress for this game",
		}), nil
	}
	defer service.releaseLease(ctx, input.GameID, token)

	return service.runUnderLease(ctx, input, opStartedAt)
}

// Run executes the start lifecycle assuming the per-game lease is
// already held by the caller. The method is reserved for orchestrator
// services in `internal/service/` that compose start with another
// operation under a single outer lease (restart and patch). External
// callers must use Handle, which acquires and releases the lease
// itself.
//
// Run still validates input and reports business outcomes through
// Result; the Go-level error return is reserved for non-business
// failures (nil context, nil receiver). Operation log entries,
// telemetry counters, health events and admin-only notification
// intents fire identically to Handle.
func (service *Service) Run(ctx context.Context, input Input) (Result, error) {
	if service == nil {
		return Result{}, errors.New("start runtime: nil service")
	}
	if ctx == nil {
		return Result{}, errors.New("start runtime: nil context")
	}

	opStartedAt := service.clock().UTC()

	if err := input.Validate(); err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:      opStartedAt,
			input:            input,
			errorCode:        ErrorCodeStartConfigInvalid,
			errorMessage:     err.Error(),
			notificationType: notificationintent.NotificationTypeRuntimeStartConfigInvalid,
		}), nil
	}

	return service.runUnderLease(ctx, input, opStartedAt)
}

// runUnderLease executes the post-validation, lease-protected start
// steps shared by Handle and Run. Callers must validate input and
// acquire the lease (when applicable) before invocation.
func (service *Service) runUnderLease(ctx context.Context, input Input, opStartedAt time.Time) (Result, error) {
	existing, hasExisting, err := service.loadExisting(ctx, input.GameID)
	if err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    ErrorCodeInternal,
			errorMessage: fmt.Sprintf("load runtime record: %s", err.Error()),
		}), nil
	}
	if hasExisting && existing.Status == runtime.StatusRunning {
		if existing.CurrentImageRef == input.ImageRef {
			return service.recordReplayNoOp(ctx, opStartedAt, input, existing), nil
		}
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    ErrorCodeConflict,
			errorMessage: fmt.Sprintf("runtime already running with image_ref %q", existing.CurrentImageRef),
		}), nil
	}

	service.fetchLobbyDiagnostic(ctx, input.GameID)

	if err := validateImageRef(input.ImageRef); err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:      opStartedAt,
			input:            input,
			errorCode:        ErrorCodeStartConfigInvalid,
			errorMessage:     fmt.Sprintf("invalid image_ref: %s", err.Error()),
			notificationType: notificationintent.NotificationTypeRuntimeStartConfigInvalid,
		}), nil
	}

	if err := service.docker.EnsureNetwork(ctx, service.dockerCfg.Network); err != nil {
		if errors.Is(err, ports.ErrNetworkMissing) {
			return service.recordFailure(ctx, failureCtx{
				opStartedAt:      opStartedAt,
				input:            input,
				errorCode:        ErrorCodeStartConfigInvalid,
				errorMessage:     fmt.Sprintf("docker network %q is missing", service.dockerCfg.Network),
				notificationType: notificationintent.NotificationTypeRuntimeStartConfigInvalid,
			}), nil
		}
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    ErrorCodeServiceUnavailable,
			errorMessage: fmt.Sprintf("ensure docker network: %s", err.Error()),
		}), nil
	}

	if err := service.docker.PullImage(ctx, input.ImageRef, ports.PullPolicy(service.dockerCfg.PullPolicy)); err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:      opStartedAt,
			input:            input,
			errorCode:        ErrorCodeImagePullFailed,
			errorMessage:     err.Error(),
			notificationType: notificationintent.NotificationTypeRuntimeImagePullFailed,
		}), nil
	}

	imageInspect, err := service.docker.InspectImage(ctx, input.ImageRef)
	if err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:      opStartedAt,
			input:            input,
			errorCode:        ErrorCodeImagePullFailed,
			errorMessage:     fmt.Sprintf("inspect image: %s", err.Error()),
			notificationType: notificationintent.NotificationTypeRuntimeImagePullFailed,
		}), nil
	}
	cpuQuota, memory, pidsLimit := service.resolveLimits(imageInspect.Labels)

	statePath, err := service.prepareStateDir(input.GameID)
	if err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:      opStartedAt,
			input:            input,
			errorCode:        ErrorCodeStartConfigInvalid,
			errorMessage:     fmt.Sprintf("prepare state directory: %s", err.Error()),
			notificationType: notificationintent.NotificationTypeRuntimeStartConfigInvalid,
		}), nil
	}

	hostname := containerHostname(input.GameID)
	spec := ports.RunSpec{
		Name:     hostname,
		Image:    input.ImageRef,
		Hostname: hostname,
		Network:  service.dockerCfg.Network,
		Env:      service.buildEnv(),
		Labels:   service.buildLabels(input.GameID, input.ImageRef, opStartedAt),
		BindMounts: []ports.BindMount{{
			HostPath:  statePath,
			MountPath: service.containerCfg.EngineStateMountPath,
			ReadOnly:  false,
		}},
		LogDriver: service.dockerCfg.LogDriver,
		LogOpts:   parseLogOpts(service.dockerCfg.LogOpts),
		CPUQuota:  cpuQuota,
		Memory:    memory,
		PIDsLimit: pidsLimit,
	}
	runResult, err := service.docker.Run(ctx, spec)
	if err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:      opStartedAt,
			input:            input,
			errorCode:        ErrorCodeContainerStartFailed,
			errorMessage:     err.Error(),
			notificationType: notificationintent.NotificationTypeRuntimeContainerStartFailed,
		}), nil
	}

	createdAt := opStartedAt
	if hasExisting && !existing.CreatedAt.IsZero() {
		createdAt = existing.CreatedAt
	}
	startedAt := runResult.StartedAt
	record := runtime.RuntimeRecord{
		GameID:             input.GameID,
		Status:             runtime.StatusRunning,
		CurrentContainerID: runResult.ContainerID,
		CurrentImageRef:    input.ImageRef,
		EngineEndpoint:     runResult.EngineEndpoint,
		StatePath:          statePath,
		DockerNetwork:      service.dockerCfg.Network,
		StartedAt:          &startedAt,
		LastOpAt:           startedAt,
		CreatedAt:          createdAt,
	}
	if err := service.runtimeRecords.Upsert(ctx, record); err != nil {
		service.bestEffortRemove(input.GameID, runResult.ContainerID)
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:      opStartedAt,
			input:            input,
			errorCode:        ErrorCodeContainerStartFailed,
			errorMessage:     fmt.Sprintf("upsert runtime record: %s", err.Error()),
			containerID:      runResult.ContainerID,
			notificationType: notificationintent.NotificationTypeRuntimeContainerStartFailed,
		}), nil
	}

	finishedAt := service.clock().UTC()
	service.bestEffortAppend(ctx, operation.OperationEntry{
		GameID:      input.GameID,
		OpKind:      operation.OpKindStart,
		OpSource:    input.OpSource,
		SourceRef:   input.SourceRef,
		ImageRef:    input.ImageRef,
		ContainerID: runResult.ContainerID,
		Outcome:     operation.OutcomeSuccess,
		StartedAt:   opStartedAt,
		FinishedAt:  &finishedAt,
	})
	service.bestEffortPublishHealth(ctx, ports.HealthEventEnvelope{
		GameID:      input.GameID,
		ContainerID: runResult.ContainerID,
		EventType:   health.EventTypeContainerStarted,
		OccurredAt:  startedAt,
		Details:     containerStartedDetails(input.ImageRef),
	})

	service.telemetry.RecordStartOutcome(ctx, string(operation.OutcomeSuccess), "", string(input.OpSource))
	service.telemetry.RecordHealthEvent(ctx, string(health.EventTypeContainerStarted))

	logArgs := []any{
		"game_id", input.GameID,
		"container_id", runResult.ContainerID,
		"image_ref", input.ImageRef,
		"op_source", string(input.OpSource),
	}
	logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
	service.logger.InfoContext(ctx, "runtime started", logArgs...)

	return Result{
		Record:  record,
		Outcome: operation.OutcomeSuccess,
	}, nil
}

// failureCtx groups the inputs to recordFailure so the Handle method
// stays readable.
type failureCtx struct {
	opStartedAt      time.Time
	input            Input
	errorCode        string
	errorMessage     string
	containerID      string
	notificationType notificationintent.NotificationType
}

// recordFailure records the failure operation_log entry, publishes the
// matching admin-only notification intent (when applicable), and emits
// telemetry. All side effects are best-effort; a downstream failure is
// logged but does not change the returned Result.
func (service *Service) recordFailure(ctx context.Context, fc failureCtx) Result {
	finishedAt := service.clock().UTC()
	service.bestEffortAppend(ctx, operation.OperationEntry{
		GameID:       fc.input.GameID,
		OpKind:       operation.OpKindStart,
		OpSource:     fc.input.OpSource,
		SourceRef:    fc.input.SourceRef,
		ImageRef:     fc.input.ImageRef,
		ContainerID:  fc.containerID,
		Outcome:      operation.OutcomeFailure,
		ErrorCode:    fc.errorCode,
		ErrorMessage: fc.errorMessage,
		StartedAt:    fc.opStartedAt,
		FinishedAt:   &finishedAt,
	})

	if fc.notificationType != "" {
		service.bestEffortNotify(ctx, fc)
	}

	service.telemetry.RecordStartOutcome(ctx, string(operation.OutcomeFailure), fc.errorCode, string(fc.input.OpSource))

	logArgs := []any{
		"game_id", fc.input.GameID,
		"image_ref", fc.input.ImageRef,
		"op_source", string(fc.input.OpSource),
		"error_code", fc.errorCode,
		"error_message", fc.errorMessage,
	}
	logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
	service.logger.WarnContext(ctx, "runtime start failed", logArgs...)

	return Result{
		Outcome:      operation.OutcomeFailure,
		ErrorCode:    fc.errorCode,
		ErrorMessage: fc.errorMessage,
	}
}

// recordReplayNoOp records the idempotent replay outcome and returns
// the existing record. The operation_log entry is appended best-effort
// so audit history captures the replay; telemetry counts the call as a
// successful start with `error_code=replay_no_op`.
func (service *Service) recordReplayNoOp(ctx context.Context, opStartedAt time.Time, input Input, existing runtime.RuntimeRecord) Result {
	finishedAt := service.clock().UTC()
	service.bestEffortAppend(ctx, operation.OperationEntry{
		GameID:      input.GameID,
		OpKind:      operation.OpKindStart,
		OpSource:    input.OpSource,
		SourceRef:   input.SourceRef,
		ImageRef:    input.ImageRef,
		ContainerID: existing.CurrentContainerID,
		Outcome:     operation.OutcomeSuccess,
		ErrorCode:   ErrorCodeReplayNoOp,
		StartedAt:   opStartedAt,
		FinishedAt:  &finishedAt,
	})
	service.telemetry.RecordStartOutcome(ctx, string(operation.OutcomeSuccess), ErrorCodeReplayNoOp, string(input.OpSource))

	logArgs := []any{
		"game_id", input.GameID,
		"container_id", existing.CurrentContainerID,
		"image_ref", input.ImageRef,
		"op_source", string(input.OpSource),
	}
	logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
	service.logger.InfoContext(ctx, "runtime start replay no-op", logArgs...)

	return Result{
		Record:    existing,
		Outcome:   operation.OutcomeSuccess,
		ErrorCode: ErrorCodeReplayNoOp,
	}
}

// loadExisting reads the runtime record for gameID. The boolean return
// reports whether a record exists; ErrNotFound is translated to
// (zero, false, nil) so the caller does not branch on the sentinel
// elsewhere.
func (service *Service) loadExisting(ctx context.Context, gameID string) (runtime.RuntimeRecord, bool, error) {
	record, err := service.runtimeRecords.Get(ctx, gameID)
	switch {
	case errors.Is(err, runtime.ErrNotFound):
		return runtime.RuntimeRecord{}, false, nil
	case err != nil:
		return runtime.RuntimeRecord{}, false, err
	default:
		return record, true, nil
	}
}

// fetchLobbyDiagnostic best-effort enriches the request log with the
// Lobby-side game record. A nil Lobby client or any transport failure
// is logged and the start operation continues.
func (service *Service) fetchLobbyDiagnostic(ctx context.Context, gameID string) {
	if service.lobby == nil {
		return
	}
	record, err := service.lobby.GetGame(ctx, gameID)
	if err != nil {
		service.logger.DebugContext(ctx, "lobby diagnostic fetch failed",
			"game_id", gameID,
			"err", err.Error(),
		)
		return
	}
	service.logger.DebugContext(ctx, "lobby diagnostic fetched",
		"game_id", gameID,
		"lobby_status", record.Status,
		"lobby_target_engine_version", record.TargetEngineVersion,
	)
}

// resolveLimits derives the per-container resource limits from the
// resolved image's labels with config-driven fallbacks. Unparseable
// label values silently fall back to the configured default; operators
// see the chosen value through `rtmanager.docker_op_latency` and start
// logs.
func (service *Service) resolveLimits(labels map[string]string) (cpuQuota float64, memory string, pidsLimit int) {
	cpuQuota = service.containerCfg.DefaultCPUQuota
	memory = service.containerCfg.DefaultMemory
	pidsLimit = service.containerCfg.DefaultPIDsLimit

	if raw, ok := labels[imageLabelCPUQuota]; ok {
		if value, err := strconv.ParseFloat(raw, 64); err == nil && value > 0 {
			cpuQuota = value
		}
	}
	if raw, ok := labels[imageLabelMemory]; ok && strings.TrimSpace(raw) != "" {
		memory = raw
	}
	if raw, ok := labels[imageLabelPIDsLimit]; ok {
		if value, err := strconv.Atoi(raw); err == nil && value > 0 {
			pidsLimit = value
		}
	}
	return cpuQuota, memory, pidsLimit
}

// buildEnv assembles the env-var map handed to the engine. Both the
// configured primary name and `STORAGE_PATH` are set per
// `rtmanager/README.md §Container Model` v1 backward compatibility.
func (service *Service) buildEnv() map[string]string {
	mount := service.containerCfg.EngineStateMountPath
	env := map[string]string{
		service.containerCfg.EngineStateEnvName: mount,
		EngineStateBackCompatEnvName:            mount,
	}
	return env
}

// buildLabels assembles the container labels per
// `rtmanager/README.md §Container Model`.
func (service *Service) buildLabels(gameID, imageRef string, startedAt time.Time) map[string]string {
	return map[string]string{
		LabelOwner:          LabelOwnerValue,
		LabelKind:           LabelKindValue,
		LabelGameID:         gameID,
		LabelEngineImageRef: imageRef,
		LabelStartedAtMs:    strconv.FormatInt(startedAt.UTC().UnixMilli(), 10),
	}
}

// releaseLease releases the per-game lease in a fresh background
// context so a canceled request context does not leave the lease
// pinned for its TTL.
func (service *Service) releaseLease(ctx context.Context, gameID, token string) {
	cleanupCtx, cancel := context.WithTimeout(context.Background(), leaseReleaseTimeout)
	defer cancel()
	if err := service.leases.Release(cleanupCtx, gameID, token); err != nil {
		service.logger.WarnContext(ctx, "release game lease",
			"game_id", gameID,
			"err", err.Error(),
		)
	}
}

// bestEffortAppend writes one operation_log entry. A failure is logged
// and discarded; the durable runtime record (or its absence) remains
// the source of truth.
func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
	if _, err := service.operationLogs.Append(ctx, entry); err != nil {
		service.logger.ErrorContext(ctx, "append operation log",
			"game_id", entry.GameID,
			"op_kind", string(entry.OpKind),
			"outcome", string(entry.Outcome),
			"error_code", entry.ErrorCode,
			"err", err.Error(),
		)
	}
}

// bestEffortPublishHealth emits one health event + snapshot upsert.
// Failures degrade silently per `rtmanager/README.md §Notification
// Contracts`; the runtime record remains the source of truth.
func (service *Service) bestEffortPublishHealth(ctx context.Context, envelope ports.HealthEventEnvelope) {
	if err := service.healthEvents.Publish(ctx, envelope); err != nil {
		service.logger.ErrorContext(ctx, "publish health event",
			"game_id", envelope.GameID,
			"container_id", envelope.ContainerID,
			"event_type", string(envelope.EventType),
			"err", err.Error(),
		)
	}
}

// bestEffortNotify publishes one admin-only failure intent. Failures
// degrade silently because the source business state already reflects
// the outcome.
func (service *Service) bestEffortNotify(ctx context.Context, fc failureCtx) {
	intent, err := buildFailureIntent(fc, service.clock().UTC())
	if err != nil {
		service.logger.ErrorContext(ctx, "build notification intent",
			"game_id", fc.input.GameID,
			"notification_type", string(fc.notificationType),
			"err", err.Error(),
		)
		return
	}
	if err := service.notifications.Publish(ctx, intent); err != nil {
		service.logger.ErrorContext(ctx, "publish notification intent",
			"game_id", fc.input.GameID,
			"notification_type", string(fc.notificationType),
			"err", err.Error(),
		)
		return
	}
	service.telemetry.RecordNotificationIntent(ctx, string(fc.notificationType))
}

// bestEffortRemove forces removal of a container left running by a
// failed start that progressed past Run but failed to register the
// runtime record. Failures degrade silently — the reconciler adopts
// orphans the periodic pass observes.
func (service *Service) bestEffortRemove(gameID, containerID string) {
	cleanupCtx, cancel := context.WithTimeout(context.Background(), leaseReleaseTimeout)
	defer cancel()
	if err := service.docker.Remove(cleanupCtx, containerID); err != nil {
		service.logger.ErrorContext(cleanupCtx, "rollback container after upsert failure",
			"game_id", gameID,
			"container_id", containerID,
			"err", err.Error(),
		)
	}
}

// containerHostname builds the per-game hostname that doubles as the
// Docker container name.
func containerHostname(gameID string) string {
	return HostnamePrefix + gameID
}

// containerStartedDetails builds the `details` payload required by the
// `container_started` AsyncAPI variant.
func containerStartedDetails(imageRef string) json.RawMessage {
	payload := map[string]string{"image_ref": imageRef}
	encoded, _ := json.Marshal(payload)
	return encoded
}

// validateImageRef rejects malformed Docker references before any
// daemon round-trip. The validation surfaces as `start_config_invalid`;
// daemon-side rejections after a valid parse are reported as
// `image_pull_failed`.
func validateImageRef(ref string) error {
	if strings.TrimSpace(ref) == "" {
		return fmt.Errorf("image ref must not be empty")
	}
	if _, err := reference.ParseNormalizedNamed(ref); err != nil {
		return err
	}
	return nil
}

// parseLogOpts turns the `key=value,key2=value2` shape of the
// `RTMANAGER_DOCKER_LOG_OPTS` config into a map suitable for the
// Docker SDK. Empty input returns nil so the SDK uses driver defaults.
func parseLogOpts(raw string) map[string]string {
	if strings.TrimSpace(raw) == "" {
		return nil
	}
	out := make(map[string]string)
	for part := range strings.SplitSeq(raw, ",") {
		entry := strings.TrimSpace(part)
		if entry == "" {
			continue
		}
		index := strings.IndexByte(entry, '=')
		if index <= 0 {
			continue
		}
		out[entry[:index]] = entry[index+1:]
	}
	if len(out) == 0 {
		return nil
	}
	return out
}

// buildFailureIntent constructs the admin-only notification intent for
// fc. The idempotency key is scoped per (notification_type, game_id,
// image_ref, attempted_at_ms) so the same failure observed twice is
// recognised as a duplicate by Notification Service.
func buildFailureIntent(fc failureCtx, attemptedAt time.Time) (notificationintent.Intent, error) {
	attemptedAtMs := attemptedAt.UnixMilli()
	idempotencyKey := fmt.Sprintf("%s.%s.%d", fc.notificationType, fc.input.GameID, attemptedAtMs)
	metadata := notificationintent.Metadata{
		IdempotencyKey: idempotencyKey,
		OccurredAt:     attemptedAt,
	}

	switch fc.notificationType {
	case notificationintent.NotificationTypeRuntimeImagePullFailed:
		return notificationintent.NewRuntimeImagePullFailedIntent(metadata, notificationintent.RuntimeImagePullFailedPayload{
			GameID:        fc.input.GameID,
			ImageRef:      fc.input.ImageRef,
			ErrorCode:     fc.errorCode,
			ErrorMessage:  fc.errorMessage,
			AttemptedAtMs: attemptedAtMs,
		})
	case notificationintent.NotificationTypeRuntimeContainerStartFailed:
		return notificationintent.NewRuntimeContainerStartFailedIntent(metadata, notificationintent.RuntimeContainerStartFailedPayload{
			GameID:        fc.input.GameID,
			ImageRef:      fc.input.ImageRef,
			ErrorCode:     fc.errorCode,
			ErrorMessage:  fc.errorMessage,
			AttemptedAtMs: attemptedAtMs,
		})
	case notificationintent.NotificationTypeRuntimeStartConfigInvalid:
		return notificationintent.NewRuntimeStartConfigInvalidIntent(metadata, notificationintent.RuntimeStartConfigInvalidPayload{
			GameID:        fc.input.GameID,
			ImageRef:      fc.input.ImageRef,
			ErrorCode:     fc.errorCode,
			ErrorMessage:  fc.errorMessage,
			AttemptedAtMs: attemptedAtMs,
		})
	default:
		return notificationintent.Intent{}, fmt.Errorf("unsupported notification type %q", fc.notificationType)
	}
}

// defaultTokenGenerator returns a function that produces 32-byte
// base64url-encoded tokens. The randomness source is `crypto/rand`;
// failures fall back to a deterministic-looking but invalid token so
// the caller observes a TryAcquire collision rather than a panic on a
// degraded entropy source.
func defaultTokenGenerator() func() string {
	return func() string {
		var buf [32]byte
		if _, err := rand.Read(buf[:]); err != nil {
			return "rtmanager-fallback-token"
		}
		return base64.RawURLEncoding.EncodeToString(buf[:])
	}
}

// newDefaultStateDirPreparer returns a function that creates the
// per-game state directory under cfg.GameStateRoot with the configured
// permissions and ownership. The function is overridable through
// Dependencies.PrepareStateDir; tests inject a temporary-dir fake.
func newDefaultStateDirPreparer(cfg config.ContainerConfig) func(gameID string) (string, error) {
	mode := os.FileMode(cfg.GameStateDirMode)
	uid := cfg.GameStateOwnerUID
	gid := cfg.GameStateOwnerGID
	root := cfg.GameStateRoot
	return func(gameID string) (string, error) {
		path := filepath.Join(root, gameID)
		if err := os.MkdirAll(path, mode); err != nil {
			return "", fmt.Errorf("create state dir %q: %w", path, err)
		}
		if err := os.Chmod(path, mode); err != nil {
			return "", fmt.Errorf("chmod state dir %q: %w", path, err)
		}
		if err := os.Chown(path, uid, gid); err != nil {
			return "", fmt.Errorf("chown state dir %q: %w", path, err)
		}
		return path, nil
	}
}