// Package cleanupcontainer implements the `cleanup_container` lifecycle
// operation owned by Runtime Manager. The service removes the Docker
// container of an already-stopped runtime and transitions the record
// to `removed`. It refuses to operate on a still-running runtime —
// callers must stop first.
//
// Two callers exercise this surface: the administrative
// `DELETE /api/v1/internal/runtimes/{game_id}/container` endpoint, and
// the periodic container-cleanup worker that walks
// `runtime_records.status='stopped'` rows older than
// `RTMANAGER_CONTAINER_RETENTION_DAYS`. Both paths flow through Handle.
//
// Lifecycle and failure-mode semantics follow `rtmanager/README.md
// §Lifecycles → Cleanup`. Design rationale is captured in
// `rtmanager/docs/services.md`.
package cleanupcontainer

import (
	"context"
	"crypto/rand"
	"encoding/base64"
	"errors"
	"fmt"
	"log/slog"
	"strings"
	"time"

	"galaxy/rtmanager/internal/config"
	"galaxy/rtmanager/internal/domain/operation"
	"galaxy/rtmanager/internal/domain/runtime"
	"galaxy/rtmanager/internal/logging"
	"galaxy/rtmanager/internal/ports"
	"galaxy/rtmanager/internal/service/startruntime"
	"galaxy/rtmanager/internal/telemetry"
)

const leaseReleaseTimeout = 5 * time.Second

// Input stores the per-call arguments for one cleanup operation.
type Input struct {
	// GameID identifies the platform game whose container is removed.
	GameID string

	// OpSource classifies how the request entered Runtime Manager.
	// Required: every operation_log entry carries an op_source.
	OpSource operation.OpSource

	// SourceRef stores the optional opaque per-source reference (REST
	// request id, admin user id). Empty for the periodic auto-TTL
	// caller.
	SourceRef string
}

// Validate reports whether input carries the structural invariants the
// service requires.
func (input Input) Validate() error {
	if strings.TrimSpace(input.GameID) == "" {
		return fmt.Errorf("game id must not be empty")
	}
	if !input.OpSource.IsKnown() {
		return fmt.Errorf("op source %q is unsupported", input.OpSource)
	}
	return nil
}

// Result stores the deterministic outcome of one Handle call.
type Result struct {
	// Record carries the updated runtime record on success and on
	// idempotent replay; zero on failure.
	Record runtime.RuntimeRecord

	// Outcome reports whether the operation completed (success) or
	// produced a stable failure code.
	Outcome operation.Outcome

	// ErrorCode stores the stable error code on failure, or
	// `replay_no_op` on idempotent replay. Empty for fresh successes.
	ErrorCode string

	// ErrorMessage stores the operator-readable detail on failure.
	ErrorMessage string
}

// Dependencies groups the collaborators required by Service.
type Dependencies struct {
	RuntimeRecords ports.RuntimeRecordStore
	OperationLogs  ports.OperationLogStore
	Docker         ports.DockerClient
	Leases         ports.GameLeaseStore

	Coordination config.CoordinationConfig

	Telemetry *telemetry.Runtime
	Logger    *slog.Logger
	Clock     func() time.Time
	NewToken  func() string
}

// Service executes the cleanup_container lifecycle operation.
type Service struct {
	runtimeRecords ports.RuntimeRecordStore
	operationLogs  ports.OperationLogStore
	docker         ports.DockerClient
	leases         ports.GameLeaseStore

	leaseTTL time.Duration

	telemetry *telemetry.Runtime
	logger    *slog.Logger

	clock    func() time.Time
	newToken func() string
}

// NewService constructs one Service from deps.
func NewService(deps Dependencies) (*Service, error) {
	switch {
	case deps.RuntimeRecords == nil:
		return nil, errors.New("new cleanup container service: nil runtime records")
	case deps.OperationLogs == nil:
		return nil, errors.New("new cleanup container service: nil operation logs")
	case deps.Docker == nil:
		return nil, errors.New("new cleanup container service: nil docker client")
	case deps.Leases == nil:
		return nil, errors.New("new cleanup container service: nil lease store")
	case deps.Telemetry == nil:
		return nil, errors.New("new cleanup container service: nil telemetry runtime")
	}
	if err := deps.Coordination.Validate(); err != nil {
		return nil, fmt.Errorf("new cleanup container service: coordination config: %w", err)
	}

	clock := deps.Clock
	if clock == nil {
		clock = time.Now
	}
	logger := deps.Logger
	if logger == nil {
		logger = slog.Default()
	}
	logger = logger.With("service", "rtmanager.cleanupcontainer")

	newToken := deps.NewToken
	if newToken == nil {
		newToken = defaultTokenGenerator()
	}

	return &Service{
		runtimeRecords: deps.RuntimeRecords,
		operationLogs:  deps.OperationLogs,
		docker:         deps.Docker,
		leases:         deps.Leases,
		leaseTTL:       deps.Coordination.GameLeaseTTL,
		telemetry:      deps.Telemetry,
		logger:         logger,
		clock:          clock,
		newToken:       newToken,
	}, nil
}

// Handle executes one cleanup operation end-to-end. The Go-level error
// return is reserved for non-business failures (nil context, nil
// receiver). Every business outcome — success, idempotent replay, or
// any of the stable failure modes — flows through Result.
func (service *Service) Handle(ctx context.Context, input Input) (Result, error) {
	if service == nil {
		return Result{}, errors.New("cleanup container: nil service")
	}
	if ctx == nil {
		return Result{}, errors.New("cleanup container: nil context")
	}

	opStartedAt := service.clock().UTC()

	if err := input.Validate(); err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    startruntime.ErrorCodeInvalidRequest,
			errorMessage: err.Error(),
		}), nil
	}

	token := service.newToken()
	leaseStart := service.clock()
	acquired, err := service.leases.TryAcquire(ctx, input.GameID, token, service.leaseTTL)
	service.telemetry.RecordLeaseAcquireLatency(ctx, service.clock().Sub(leaseStart))
	if err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    startruntime.ErrorCodeServiceUnavailable,
			errorMessage: fmt.Sprintf("acquire game lease: %s", err.Error()),
		}), nil
	}
	if !acquired {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    startruntime.ErrorCodeConflict,
			errorMessage: "another lifecycle operation is in progress for this game",
		}), nil
	}
	defer service.releaseLease(ctx, input.GameID, token)

	return service.runUnderLease(ctx, input, opStartedAt)
}

// runUnderLease executes the lease-protected cleanup steps.
func (service *Service) runUnderLease(ctx context.Context, input Input, opStartedAt time.Time) (Result, error) {
	existing, err := service.runtimeRecords.Get(ctx, input.GameID)
	if errors.Is(err, runtime.ErrNotFound) {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    startruntime.ErrorCodeNotFound,
			errorMessage: fmt.Sprintf("runtime record for game %q does not exist", input.GameID),
		}), nil
	}
	if err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    startruntime.ErrorCodeInternal,
			errorMessage: fmt.Sprintf("load runtime record: %s", err.Error()),
		}), nil
	}

	switch existing.Status {
	case runtime.StatusRemoved:
		return service.recordReplayNoOp(ctx, opStartedAt, input, existing), nil
	case runtime.StatusRunning:
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    startruntime.ErrorCodeConflict,
			errorMessage: fmt.Sprintf("runtime for game %q is running; stop the runtime first", input.GameID),
			containerID:  existing.CurrentContainerID,
			imageRef:     existing.CurrentImageRef,
		}), nil
	case runtime.StatusStopped:
		// proceed
	default:
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    startruntime.ErrorCodeInternal,
			errorMessage: fmt.Sprintf("runtime record has unsupported status %q", existing.Status),
		}), nil
	}

	if existing.CurrentContainerID != "" {
		if err := service.docker.Remove(ctx, existing.CurrentContainerID); err != nil {
			return service.recordFailure(ctx, failureCtx{
				opStartedAt:  opStartedAt,
				input:        input,
				errorCode:    startruntime.ErrorCodeServiceUnavailable,
				errorMessage: fmt.Sprintf("docker remove: %s", err.Error()),
				containerID:  existing.CurrentContainerID,
				imageRef:     existing.CurrentImageRef,
			}), nil
		}
	}

	updateNow := service.clock().UTC()
	err = service.runtimeRecords.UpdateStatus(ctx, ports.UpdateStatusInput{
		GameID:              input.GameID,
		ExpectedFrom:        runtime.StatusStopped,
		ExpectedContainerID: existing.CurrentContainerID,
		To:                  runtime.StatusRemoved,
		Now:                 updateNow,
	})
	if errors.Is(err, runtime.ErrConflict) {
		// CAS race: another caller (reconciler dispose, concurrent admin)
		// already moved the record. The desired terminal state was
		// reached by another path.
		return service.recordReplayNoOp(ctx, opStartedAt, input, existing), nil
	}
	if errors.Is(err, runtime.ErrNotFound) {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    startruntime.ErrorCodeNotFound,
			errorMessage: fmt.Sprintf("runtime record for game %q vanished mid-cleanup", input.GameID),
			containerID:  existing.CurrentContainerID,
			imageRef:     existing.CurrentImageRef,
		}), nil
	}
	if err != nil {
		return service.recordFailure(ctx, failureCtx{
			opStartedAt:  opStartedAt,
			input:        input,
			errorCode:    startruntime.ErrorCodeInternal,
			errorMessage: fmt.Sprintf("update runtime status: %s", err.Error()),
			containerID:  existing.CurrentContainerID,
			imageRef:     existing.CurrentImageRef,
		}), nil
	}

	finishedAt := service.clock().UTC()
	service.bestEffortAppend(ctx, operation.OperationEntry{
		GameID:      input.GameID,
		OpKind:      operation.OpKindCleanupContainer,
		OpSource:    input.OpSource,
		SourceRef:   input.SourceRef,
		ImageRef:    existing.CurrentImageRef,
		ContainerID: existing.CurrentContainerID,
		Outcome:     operation.OutcomeSuccess,
		StartedAt:   opStartedAt,
		FinishedAt:  &finishedAt,
	})
	service.telemetry.RecordCleanupOutcome(ctx, string(operation.OutcomeSuccess), string(input.OpSource))

	record := existing
	record.Status = runtime.StatusRemoved
	record.CurrentContainerID = ""
	removedAt := updateNow
	record.RemovedAt = &removedAt
	record.LastOpAt = updateNow

	logArgs := []any{
		"game_id", input.GameID,
		"container_id", existing.CurrentContainerID,
		"op_source", string(input.OpSource),
	}
	logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
	service.logger.InfoContext(ctx, "runtime container cleaned up", logArgs...)

	return Result{
		Record:  record,
		Outcome: operation.OutcomeSuccess,
	}, nil
}

// recordReplayNoOp records the idempotent replay outcome and returns the
// existing record unchanged.
func (service *Service) recordReplayNoOp(ctx context.Context, opStartedAt time.Time, input Input, existing runtime.RuntimeRecord) Result {
	finishedAt := service.clock().UTC()
	service.bestEffortAppend(ctx, operation.OperationEntry{
		GameID:      input.GameID,
		OpKind:      operation.OpKindCleanupContainer,
		OpSource:    input.OpSource,
		SourceRef:   input.SourceRef,
		ImageRef:    existing.CurrentImageRef,
		ContainerID: existing.CurrentContainerID,
		Outcome:     operation.OutcomeSuccess,
		ErrorCode:   startruntime.ErrorCodeReplayNoOp,
		StartedAt:   opStartedAt,
		FinishedAt:  &finishedAt,
	})
	service.telemetry.RecordCleanupOutcome(ctx, string(operation.OutcomeSuccess), string(input.OpSource))

	logArgs := []any{
		"game_id", input.GameID,
		"container_id", existing.CurrentContainerID,
		"op_source", string(input.OpSource),
	}
	logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
	service.logger.InfoContext(ctx, "runtime cleanup replay no-op", logArgs...)

	return Result{
		Record:    existing,
		Outcome:   operation.OutcomeSuccess,
		ErrorCode: startruntime.ErrorCodeReplayNoOp,
	}
}

// failureCtx groups the inputs to recordFailure.
type failureCtx struct {
	opStartedAt  time.Time
	input        Input
	errorCode    string
	errorMessage string
	containerID  string
	imageRef     string
}

func (service *Service) recordFailure(ctx context.Context, fc failureCtx) Result {
	finishedAt := service.clock().UTC()
	service.bestEffortAppend(ctx, operation.OperationEntry{
		GameID:       fc.input.GameID,
		OpKind:       operation.OpKindCleanupContainer,
		OpSource:     fc.input.OpSource,
		SourceRef:    fc.input.SourceRef,
		ImageRef:     fc.imageRef,
		ContainerID:  fc.containerID,
		Outcome:      operation.OutcomeFailure,
		ErrorCode:    fc.errorCode,
		ErrorMessage: fc.errorMessage,
		StartedAt:    fc.opStartedAt,
		FinishedAt:   &finishedAt,
	})
	service.telemetry.RecordCleanupOutcome(ctx, string(operation.OutcomeFailure), string(fc.input.OpSource))

	logArgs := []any{
		"game_id", fc.input.GameID,
		"op_source", string(fc.input.OpSource),
		"error_code", fc.errorCode,
		"error_message", fc.errorMessage,
	}
	logArgs = append(logArgs, logging.ContextAttrs(ctx)...)
	service.logger.WarnContext(ctx, "runtime cleanup failed", logArgs...)

	return Result{
		Outcome:      operation.OutcomeFailure,
		ErrorCode:    fc.errorCode,
		ErrorMessage: fc.errorMessage,
	}
}

func (service *Service) releaseLease(ctx context.Context, gameID, token string) {
	cleanupCtx, cancel := context.WithTimeout(context.Background(), leaseReleaseTimeout)
	defer cancel()
	if err := service.leases.Release(cleanupCtx, gameID, token); err != nil {
		service.logger.WarnContext(ctx, "release game lease",
			"game_id", gameID,
			"err", err.Error(),
		)
	}
}

func (service *Service) bestEffortAppend(ctx context.Context, entry operation.OperationEntry) {
	if _, err := service.operationLogs.Append(ctx, entry); err != nil {
		service.logger.ErrorContext(ctx, "append operation log",
			"game_id", entry.GameID,
			"op_kind", string(entry.OpKind),
			"outcome", string(entry.Outcome),
			"error_code", entry.ErrorCode,
			"err", err.Error(),
		)
	}
}

func defaultTokenGenerator() func() string {
	return func() string {
		var buf [32]byte
		if _, err := rand.Read(buf[:]); err != nil {
			return "rtmanager-fallback-token"
		}
		return base64.RawURLEncoding.EncodeToString(buf[:])
	}
}