feat: gamemaster
This commit is contained in:
@@ -0,0 +1,392 @@
|
||||
// Package internalhttp provides the trusted internal HTTP listener
|
||||
// used by the runnable Game Master process. It exposes the `/healthz`
|
||||
// and `/readyz` probes plus every internal REST operation declared in
|
||||
// `gamemaster/api/internal-openapi.yaml`. Per-operation handlers live
|
||||
// in the nested `handlers` package; this file owns the listener
|
||||
// lifecycle and the probe routes only.
|
||||
package internalhttp
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"galaxy/gamemaster/internal/api/internalhttp/handlers"
|
||||
"galaxy/gamemaster/internal/telemetry"
|
||||
|
||||
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
)
|
||||
|
||||
const jsonContentType = "application/json; charset=utf-8"
|
||||
|
||||
// errorCodeServiceUnavailable mirrors the stable error code declared in
|
||||
// `gamemaster/api/internal-openapi.yaml` §Error Model.
|
||||
const errorCodeServiceUnavailable = "service_unavailable"
|
||||
|
||||
// HealthzPath and ReadyzPath are the internal probe routes documented in
|
||||
// `gamemaster/api/internal-openapi.yaml`.
|
||||
const (
|
||||
HealthzPath = "/healthz"
|
||||
ReadyzPath = "/readyz"
|
||||
)
|
||||
|
||||
// ReadinessProbe reports whether the dependencies the listener guards
|
||||
// (PostgreSQL, Redis) are reachable. A non-nil error is reported to the
|
||||
// caller as `503 service_unavailable` with the wrapped message.
|
||||
type ReadinessProbe interface {
|
||||
Check(ctx context.Context) error
|
||||
}
|
||||
|
||||
// Config describes the trusted internal HTTP listener owned by Game
|
||||
// Master.
|
||||
type Config struct {
|
||||
// Addr is the TCP listen address used by the internal HTTP server.
|
||||
Addr string
|
||||
|
||||
// ReadHeaderTimeout bounds how long the listener may spend reading
|
||||
// request headers before the server rejects the connection.
|
||||
ReadHeaderTimeout time.Duration
|
||||
|
||||
// ReadTimeout bounds how long the listener may spend reading one
|
||||
// request.
|
||||
ReadTimeout time.Duration
|
||||
|
||||
// WriteTimeout bounds how long the listener may spend writing one
|
||||
// response.
|
||||
WriteTimeout time.Duration
|
||||
|
||||
// IdleTimeout bounds how long the listener keeps an idle keep-alive
|
||||
// connection open.
|
||||
IdleTimeout time.Duration
|
||||
}
|
||||
|
||||
// Validate reports whether cfg contains a usable internal HTTP listener
|
||||
// configuration.
|
||||
func (cfg Config) Validate() error {
|
||||
switch {
|
||||
case cfg.Addr == "":
|
||||
return errors.New("internal HTTP addr must not be empty")
|
||||
case cfg.ReadHeaderTimeout <= 0:
|
||||
return errors.New("internal HTTP read header timeout must be positive")
|
||||
case cfg.ReadTimeout <= 0:
|
||||
return errors.New("internal HTTP read timeout must be positive")
|
||||
case cfg.WriteTimeout <= 0:
|
||||
return errors.New("internal HTTP write timeout must be positive")
|
||||
case cfg.IdleTimeout <= 0:
|
||||
return errors.New("internal HTTP idle timeout must be positive")
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Dependencies describes the collaborators used by the internal HTTP
|
||||
// transport layer. The probe-only fields (Logger, Telemetry,
|
||||
// Readiness) drive `/healthz` and `/readyz`; the remaining fields
|
||||
// pass through to the per-operation handlers registered by
|
||||
// `handlers.Register`.
|
||||
type Dependencies struct {
|
||||
// Logger writes structured listener lifecycle logs. When nil,
|
||||
// slog.Default is used.
|
||||
Logger *slog.Logger
|
||||
|
||||
// Telemetry records low-cardinality probe metrics and lifecycle
|
||||
// events.
|
||||
Telemetry *telemetry.Runtime
|
||||
|
||||
// Readiness reports whether PG / Redis are reachable. A nil
|
||||
// readiness probe makes `/readyz` always answer `200`; the runtime
|
||||
// always supplies a real probe in production wiring.
|
||||
Readiness ReadinessProbe
|
||||
|
||||
// RuntimeRecords backs the read-only list/get runtime endpoints.
|
||||
RuntimeRecords handlers.RuntimeRecordsReader
|
||||
|
||||
// RegisterRuntime is the orchestrator for `internalRegisterRuntime`.
|
||||
RegisterRuntime handlers.RegisterRuntimeService
|
||||
|
||||
// ForceNextTurn drives the synchronous force-next-turn flow.
|
||||
ForceNextTurn handlers.ForceNextTurnService
|
||||
|
||||
// StopRuntime drives the admin stop flow.
|
||||
StopRuntime handlers.StopRuntimeService
|
||||
|
||||
// PatchRuntime drives the admin patch flow.
|
||||
PatchRuntime handlers.PatchRuntimeService
|
||||
|
||||
// BanishRace drives the engine race-banish flow.
|
||||
BanishRace handlers.BanishRaceService
|
||||
|
||||
// InvalidateMemberships purges the in-process membership cache.
|
||||
InvalidateMemberships handlers.MembershipInvalidator
|
||||
|
||||
// GameLiveness returns the current runtime status without
|
||||
// contacting the engine.
|
||||
GameLiveness handlers.LivenessService
|
||||
|
||||
// EngineVersions exposes the multi-method engine-version registry
|
||||
// service.
|
||||
EngineVersions handlers.EngineVersionService
|
||||
|
||||
// CommandExecute forwards a player command batch to the engine.
|
||||
CommandExecute handlers.CommandExecuteService
|
||||
|
||||
// PutOrders forwards a player order batch to the engine.
|
||||
PutOrders handlers.OrderPutService
|
||||
|
||||
// GetReport reads a per-player turn report from the engine.
|
||||
GetReport handlers.ReportGetService
|
||||
}
|
||||
|
||||
// Server owns the trusted internal HTTP listener exposed by Game Master.
|
||||
type Server struct {
|
||||
cfg Config
|
||||
|
||||
handler http.Handler
|
||||
logger *slog.Logger
|
||||
metrics *telemetry.Runtime
|
||||
|
||||
stateMu sync.RWMutex
|
||||
server *http.Server
|
||||
listener net.Listener
|
||||
}
|
||||
|
||||
// NewServer constructs one trusted internal HTTP server for cfg and deps.
|
||||
func NewServer(cfg Config, deps Dependencies) (*Server, error) {
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("new internal HTTP server: %w", err)
|
||||
}
|
||||
|
||||
logger := deps.Logger
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
|
||||
return &Server{
|
||||
cfg: cfg,
|
||||
handler: newHandler(deps, logger),
|
||||
logger: logger.With("component", "internal_http"),
|
||||
metrics: deps.Telemetry,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Addr returns the currently bound listener address after Run is called.
|
||||
// It returns an empty string if the server has not yet bound a listener.
|
||||
func (server *Server) Addr() string {
|
||||
server.stateMu.RLock()
|
||||
defer server.stateMu.RUnlock()
|
||||
if server.listener == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
return server.listener.Addr().String()
|
||||
}
|
||||
|
||||
// Run binds the configured listener and serves the internal HTTP surface
|
||||
// until Shutdown closes the server.
|
||||
func (server *Server) Run(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("run internal HTTP server: nil context")
|
||||
}
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
listener, err := net.Listen("tcp", server.cfg.Addr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("run internal HTTP server: listen on %q: %w", server.cfg.Addr, err)
|
||||
}
|
||||
|
||||
httpServer := &http.Server{
|
||||
Handler: server.handler,
|
||||
ReadHeaderTimeout: server.cfg.ReadHeaderTimeout,
|
||||
ReadTimeout: server.cfg.ReadTimeout,
|
||||
WriteTimeout: server.cfg.WriteTimeout,
|
||||
IdleTimeout: server.cfg.IdleTimeout,
|
||||
}
|
||||
|
||||
server.stateMu.Lock()
|
||||
server.server = httpServer
|
||||
server.listener = listener
|
||||
server.stateMu.Unlock()
|
||||
|
||||
server.logger.Info("gamemaster internal HTTP server started", "addr", listener.Addr().String())
|
||||
|
||||
defer func() {
|
||||
server.stateMu.Lock()
|
||||
server.server = nil
|
||||
server.listener = nil
|
||||
server.stateMu.Unlock()
|
||||
}()
|
||||
|
||||
err = httpServer.Serve(listener)
|
||||
switch {
|
||||
case err == nil:
|
||||
return nil
|
||||
case errors.Is(err, http.ErrServerClosed):
|
||||
server.logger.Info("gamemaster internal HTTP server stopped")
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("run internal HTTP server: serve on %q: %w", server.cfg.Addr, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown gracefully stops the internal HTTP server within ctx.
|
||||
func (server *Server) Shutdown(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("shutdown internal HTTP server: nil context")
|
||||
}
|
||||
|
||||
server.stateMu.RLock()
|
||||
httpServer := server.server
|
||||
server.stateMu.RUnlock()
|
||||
|
||||
if httpServer == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := httpServer.Shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) {
|
||||
return fmt.Errorf("shutdown internal HTTP server: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func newHandler(deps Dependencies, logger *slog.Logger) http.Handler {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("GET "+HealthzPath, handleHealthz)
|
||||
mux.HandleFunc("GET "+ReadyzPath, handleReadyz(deps.Readiness, logger))
|
||||
handlers.Register(mux, handlers.Dependencies{
|
||||
Logger: logger,
|
||||
RuntimeRecords: deps.RuntimeRecords,
|
||||
RegisterRuntime: deps.RegisterRuntime,
|
||||
ForceNextTurn: deps.ForceNextTurn,
|
||||
StopRuntime: deps.StopRuntime,
|
||||
PatchRuntime: deps.PatchRuntime,
|
||||
BanishRace: deps.BanishRace,
|
||||
InvalidateMemberships: deps.InvalidateMemberships,
|
||||
GameLiveness: deps.GameLiveness,
|
||||
EngineVersions: deps.EngineVersions,
|
||||
CommandExecute: deps.CommandExecute,
|
||||
PutOrders: deps.PutOrders,
|
||||
GetReport: deps.GetReport,
|
||||
})
|
||||
|
||||
metrics := deps.Telemetry
|
||||
options := []otelhttp.Option{}
|
||||
if metrics != nil {
|
||||
options = append(options,
|
||||
otelhttp.WithTracerProvider(metrics.TracerProvider()),
|
||||
otelhttp.WithMeterProvider(metrics.MeterProvider()),
|
||||
)
|
||||
}
|
||||
|
||||
return otelhttp.NewHandler(withObservability(mux, metrics), "gamemaster.internal_http", options...)
|
||||
}
|
||||
|
||||
func withObservability(next http.Handler, metrics *telemetry.Runtime) http.Handler {
|
||||
return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
|
||||
startedAt := time.Now()
|
||||
recorder := &statusRecorder{
|
||||
ResponseWriter: writer,
|
||||
statusCode: http.StatusOK,
|
||||
}
|
||||
|
||||
next.ServeHTTP(recorder, request)
|
||||
|
||||
route := request.Pattern
|
||||
switch recorder.statusCode {
|
||||
case http.StatusMethodNotAllowed:
|
||||
route = "method_not_allowed"
|
||||
case http.StatusNotFound:
|
||||
route = "not_found"
|
||||
case 0:
|
||||
route = "unmatched"
|
||||
}
|
||||
if route == "" {
|
||||
route = "unmatched"
|
||||
}
|
||||
|
||||
if metrics != nil {
|
||||
metrics.RecordInternalHTTPRequest(
|
||||
request.Context(),
|
||||
[]attribute.KeyValue{
|
||||
attribute.String("route", route),
|
||||
attribute.String("method", request.Method),
|
||||
attribute.String("status_code", strconv.Itoa(recorder.statusCode)),
|
||||
},
|
||||
time.Since(startedAt),
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func handleHealthz(writer http.ResponseWriter, _ *http.Request) {
|
||||
writeStatusResponse(writer, http.StatusOK, "ok")
|
||||
}
|
||||
|
||||
func handleReadyz(probe ReadinessProbe, logger *slog.Logger) http.HandlerFunc {
|
||||
return func(writer http.ResponseWriter, request *http.Request) {
|
||||
if probe == nil {
|
||||
writeStatusResponse(writer, http.StatusOK, "ready")
|
||||
return
|
||||
}
|
||||
|
||||
if err := probe.Check(request.Context()); err != nil {
|
||||
logger.WarnContext(request.Context(), "gamemaster readiness probe failed",
|
||||
"err", err.Error(),
|
||||
)
|
||||
writeServiceUnavailable(writer, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
writeStatusResponse(writer, http.StatusOK, "ready")
|
||||
}
|
||||
}
|
||||
|
||||
func writeStatusResponse(writer http.ResponseWriter, statusCode int, status string) {
|
||||
writer.Header().Set("Content-Type", jsonContentType)
|
||||
writer.WriteHeader(statusCode)
|
||||
_ = json.NewEncoder(writer).Encode(statusResponse{Status: status})
|
||||
}
|
||||
|
||||
func writeServiceUnavailable(writer http.ResponseWriter, message string) {
|
||||
writer.Header().Set("Content-Type", jsonContentType)
|
||||
writer.WriteHeader(http.StatusServiceUnavailable)
|
||||
_ = json.NewEncoder(writer).Encode(errorResponse{
|
||||
Error: errorBody{
|
||||
Code: errorCodeServiceUnavailable,
|
||||
Message: message,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
type statusResponse struct {
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
type errorBody struct {
|
||||
Code string `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
type errorResponse struct {
|
||||
Error errorBody `json:"error"`
|
||||
}
|
||||
|
||||
type statusRecorder struct {
|
||||
http.ResponseWriter
|
||||
statusCode int
|
||||
}
|
||||
|
||||
func (recorder *statusRecorder) WriteHeader(statusCode int) {
|
||||
recorder.statusCode = statusCode
|
||||
recorder.ResponseWriter.WriteHeader(statusCode)
|
||||
}
|
||||
Reference in New Issue
Block a user