feat: runtime manager
This commit is contained in:
@@ -0,0 +1,245 @@
|
||||
// Package operation defines the runtime-operation audit-log domain types
|
||||
// owned by Runtime Manager.
|
||||
//
|
||||
// One OperationEntry maps to one row of the `operation_log` PostgreSQL
|
||||
// table (see
|
||||
// `galaxy/rtmanager/internal/adapters/postgres/migrations/00001_init.sql`).
|
||||
// The OpKind / OpSource / Outcome enums match the SQL CHECK constraints
|
||||
// verbatim and feed the telemetry counters declared in
|
||||
// `galaxy/rtmanager/README.md §Observability`.
|
||||
package operation
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// OpKind identifies the kind of operation Runtime Manager performed.
|
||||
type OpKind string
|
||||
|
||||
const (
|
||||
// OpKindStart records a start lifecycle operation.
|
||||
OpKindStart OpKind = "start"
|
||||
|
||||
// OpKindStop records a stop lifecycle operation.
|
||||
OpKindStop OpKind = "stop"
|
||||
|
||||
// OpKindRestart records a restart lifecycle operation
|
||||
// (recreate with the same image_ref).
|
||||
OpKindRestart OpKind = "restart"
|
||||
|
||||
// OpKindPatch records a semver-patch lifecycle operation
|
||||
// (recreate with a new image_ref).
|
||||
OpKindPatch OpKind = "patch"
|
||||
|
||||
// OpKindCleanupContainer records a container removal performed by
|
||||
// the cleanup TTL worker or the admin DELETE endpoint.
|
||||
OpKindCleanupContainer OpKind = "cleanup_container"
|
||||
|
||||
// OpKindReconcileAdopt records that the reconciler discovered an
|
||||
// unrecorded container labelled `com.galaxy.owner=rtmanager` and
|
||||
// inserted a runtime record for it.
|
||||
OpKindReconcileAdopt OpKind = "reconcile_adopt"
|
||||
|
||||
// OpKindReconcileDispose records that the reconciler observed a
|
||||
// running record whose container is missing in Docker and marked it
|
||||
// as removed.
|
||||
OpKindReconcileDispose OpKind = "reconcile_dispose"
|
||||
)
|
||||
|
||||
// IsKnown reports whether kind belongs to the frozen op-kind vocabulary.
|
||||
func (kind OpKind) IsKnown() bool {
|
||||
switch kind {
|
||||
case OpKindStart,
|
||||
OpKindStop,
|
||||
OpKindRestart,
|
||||
OpKindPatch,
|
||||
OpKindCleanupContainer,
|
||||
OpKindReconcileAdopt,
|
||||
OpKindReconcileDispose:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// AllOpKinds returns the frozen list of every op-kind value. The slice
|
||||
// order is stable across calls.
|
||||
func AllOpKinds() []OpKind {
|
||||
return []OpKind{
|
||||
OpKindStart,
|
||||
OpKindStop,
|
||||
OpKindRestart,
|
||||
OpKindPatch,
|
||||
OpKindCleanupContainer,
|
||||
OpKindReconcileAdopt,
|
||||
OpKindReconcileDispose,
|
||||
}
|
||||
}
|
||||
|
||||
// OpSource identifies where one operation entered Runtime Manager.
|
||||
type OpSource string
|
||||
|
||||
const (
|
||||
// OpSourceLobbyStream identifies entries triggered by the
|
||||
// `runtime:start_jobs` or `runtime:stop_jobs` Redis Stream consumer.
|
||||
OpSourceLobbyStream OpSource = "lobby_stream"
|
||||
|
||||
// OpSourceGMRest identifies entries triggered by Game Master through
|
||||
// the internal REST surface.
|
||||
OpSourceGMRest OpSource = "gm_rest"
|
||||
|
||||
// OpSourceAdminRest identifies entries triggered by Admin Service
|
||||
// through the internal REST surface.
|
||||
OpSourceAdminRest OpSource = "admin_rest"
|
||||
|
||||
// OpSourceAutoTTL identifies entries triggered by the periodic
|
||||
// container-cleanup worker.
|
||||
OpSourceAutoTTL OpSource = "auto_ttl"
|
||||
|
||||
// OpSourceAutoReconcile identifies entries triggered by the
|
||||
// reconciler at startup or on its periodic interval.
|
||||
OpSourceAutoReconcile OpSource = "auto_reconcile"
|
||||
)
|
||||
|
||||
// IsKnown reports whether source belongs to the frozen op-source
|
||||
// vocabulary.
|
||||
func (source OpSource) IsKnown() bool {
|
||||
switch source {
|
||||
case OpSourceLobbyStream,
|
||||
OpSourceGMRest,
|
||||
OpSourceAdminRest,
|
||||
OpSourceAutoTTL,
|
||||
OpSourceAutoReconcile:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// AllOpSources returns the frozen list of every op-source value. The
|
||||
// slice order is stable across calls.
|
||||
func AllOpSources() []OpSource {
|
||||
return []OpSource{
|
||||
OpSourceLobbyStream,
|
||||
OpSourceGMRest,
|
||||
OpSourceAdminRest,
|
||||
OpSourceAutoTTL,
|
||||
OpSourceAutoReconcile,
|
||||
}
|
||||
}
|
||||
|
||||
// Outcome reports the high-level outcome of one operation.
|
||||
type Outcome string
|
||||
|
||||
const (
|
||||
// OutcomeSuccess reports that the operation completed without
|
||||
// surfacing an error.
|
||||
OutcomeSuccess Outcome = "success"
|
||||
|
||||
// OutcomeFailure reports that the operation surfaced a stable error
|
||||
// code recorded in OperationEntry.ErrorCode.
|
||||
OutcomeFailure Outcome = "failure"
|
||||
)
|
||||
|
||||
// IsKnown reports whether outcome belongs to the frozen outcome
|
||||
// vocabulary.
|
||||
func (outcome Outcome) IsKnown() bool {
|
||||
switch outcome {
|
||||
case OutcomeSuccess, OutcomeFailure:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// AllOutcomes returns the frozen list of every outcome value.
|
||||
func AllOutcomes() []Outcome {
|
||||
return []Outcome{OutcomeSuccess, OutcomeFailure}
|
||||
}
|
||||
|
||||
// OperationEntry stores one append-only audit row of the `operation_log`
|
||||
// table. ID is zero on records that have not been persisted yet; the
|
||||
// store assigns it from the table's bigserial column. FinishedAt is a
|
||||
// pointer because the column is nullable for in-flight rows even though
|
||||
// the lifecycle services finalise the row in the same transaction.
|
||||
type OperationEntry struct {
|
||||
// ID identifies the persisted row. Zero before persistence.
|
||||
ID int64
|
||||
|
||||
// GameID identifies the platform game this operation acted on.
|
||||
GameID string
|
||||
|
||||
// OpKind classifies what the operation did.
|
||||
OpKind OpKind
|
||||
|
||||
// OpSource classifies how the operation entered Runtime Manager.
|
||||
OpSource OpSource
|
||||
|
||||
// SourceRef stores an opaque per-source reference such as a Redis
|
||||
// Stream entry id, a REST request id, or an admin user id. Empty
|
||||
// when the source does not provide one.
|
||||
SourceRef string
|
||||
|
||||
// ImageRef stores the engine image reference associated with the
|
||||
// operation, when applicable. Empty for operations that do not
|
||||
// touch an image (e.g., cleanup_container).
|
||||
ImageRef string
|
||||
|
||||
// ContainerID stores the Docker container id observed at the time
|
||||
// of the operation, when applicable.
|
||||
ContainerID string
|
||||
|
||||
// Outcome reports whether the operation succeeded or failed.
|
||||
Outcome Outcome
|
||||
|
||||
// ErrorCode stores the stable error code on failure. Empty on
|
||||
// success.
|
||||
ErrorCode string
|
||||
|
||||
// ErrorMessage stores the operator-readable detail on failure.
|
||||
// Empty on success.
|
||||
ErrorMessage string
|
||||
|
||||
// StartedAt stores the wall-clock at which the operation began.
|
||||
StartedAt time.Time
|
||||
|
||||
// FinishedAt stores the wall-clock at which the operation
|
||||
// finalised. Nil for in-flight rows.
|
||||
FinishedAt *time.Time
|
||||
}
|
||||
|
||||
// Validate reports whether entry satisfies the operation-log invariants
|
||||
// implied by the SQL CHECK constraints and the README §Persistence
|
||||
// Layout.
|
||||
func (entry OperationEntry) Validate() error {
|
||||
if strings.TrimSpace(entry.GameID) == "" {
|
||||
return fmt.Errorf("game id must not be empty")
|
||||
}
|
||||
if !entry.OpKind.IsKnown() {
|
||||
return fmt.Errorf("op kind %q is unsupported", entry.OpKind)
|
||||
}
|
||||
if !entry.OpSource.IsKnown() {
|
||||
return fmt.Errorf("op source %q is unsupported", entry.OpSource)
|
||||
}
|
||||
if !entry.Outcome.IsKnown() {
|
||||
return fmt.Errorf("outcome %q is unsupported", entry.Outcome)
|
||||
}
|
||||
if entry.StartedAt.IsZero() {
|
||||
return fmt.Errorf("started at must not be zero")
|
||||
}
|
||||
if entry.FinishedAt != nil {
|
||||
if entry.FinishedAt.IsZero() {
|
||||
return fmt.Errorf("finished at must not be zero when present")
|
||||
}
|
||||
if entry.FinishedAt.Before(entry.StartedAt) {
|
||||
return fmt.Errorf("finished at must not be before started at")
|
||||
}
|
||||
}
|
||||
if entry.Outcome == OutcomeFailure && strings.TrimSpace(entry.ErrorCode) == "" {
|
||||
return fmt.Errorf("error code must not be empty for failure entries")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user