364 lines
10 KiB
Go
364 lines
10 KiB
Go
// Package internalhttp provides the trusted internal HTTP listener used
|
|
// by the runnable Runtime Manager process. It exposes `/healthz` and
|
|
// `/readyz` plus the GM/Admin REST surface backed by the lifecycle
|
|
// services in `internal/service/`.
|
|
package internalhttp
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"net"
|
|
"net/http"
|
|
"strconv"
|
|
"sync"
|
|
"time"
|
|
|
|
"galaxy/rtmanager/internal/api/internalhttp/handlers"
|
|
"galaxy/rtmanager/internal/ports"
|
|
"galaxy/rtmanager/internal/telemetry"
|
|
|
|
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
|
|
"go.opentelemetry.io/otel/attribute"
|
|
)
|
|
|
|
const jsonContentType = "application/json; charset=utf-8"
|
|
|
|
// errorCodeServiceUnavailable mirrors the stable error code declared in
|
|
// `rtmanager/api/internal-openapi.yaml` `§Error Model`.
|
|
const errorCodeServiceUnavailable = "service_unavailable"
|
|
|
|
// HealthzPath and ReadyzPath are the internal probe routes documented in
|
|
// `rtmanager/api/internal-openapi.yaml`.
|
|
const (
|
|
HealthzPath = "/healthz"
|
|
ReadyzPath = "/readyz"
|
|
)
|
|
|
|
// ReadinessProbe reports whether the dependencies the listener guards
|
|
// (PostgreSQL, Redis, Docker) are reachable. A non-nil error is reported
|
|
// to the caller as `503 service_unavailable` with the wrapped message.
|
|
type ReadinessProbe interface {
|
|
Check(ctx context.Context) error
|
|
}
|
|
|
|
// Config describes the trusted internal HTTP listener owned by Runtime
|
|
// Manager.
|
|
type Config struct {
|
|
// Addr is the TCP listen address used by the internal HTTP server.
|
|
Addr string
|
|
|
|
// ReadHeaderTimeout bounds how long the listener may spend reading
|
|
// request headers before the server rejects the connection.
|
|
ReadHeaderTimeout time.Duration
|
|
|
|
// ReadTimeout bounds how long the listener may spend reading one
|
|
// request.
|
|
ReadTimeout time.Duration
|
|
|
|
// WriteTimeout bounds how long the listener may spend writing one
|
|
// response.
|
|
WriteTimeout time.Duration
|
|
|
|
// IdleTimeout bounds how long the listener keeps an idle keep-alive
|
|
// connection open.
|
|
IdleTimeout time.Duration
|
|
}
|
|
|
|
// Validate reports whether cfg contains a usable internal HTTP listener
|
|
// configuration.
|
|
func (cfg Config) Validate() error {
|
|
switch {
|
|
case cfg.Addr == "":
|
|
return errors.New("internal HTTP addr must not be empty")
|
|
case cfg.ReadHeaderTimeout <= 0:
|
|
return errors.New("internal HTTP read header timeout must be positive")
|
|
case cfg.ReadTimeout <= 0:
|
|
return errors.New("internal HTTP read timeout must be positive")
|
|
case cfg.WriteTimeout <= 0:
|
|
return errors.New("internal HTTP write timeout must be positive")
|
|
case cfg.IdleTimeout <= 0:
|
|
return errors.New("internal HTTP idle timeout must be positive")
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// Dependencies describes the collaborators used by the internal HTTP
|
|
// transport layer. The listener still works when the lifecycle service
|
|
// fields are zero — handlers register but each returns
|
|
// `500 internal_error` until the runtime wires the real services.
|
|
type Dependencies struct {
|
|
// Logger writes structured listener lifecycle logs. When nil,
|
|
// slog.Default is used.
|
|
Logger *slog.Logger
|
|
|
|
// Telemetry records low-cardinality probe metrics and lifecycle
|
|
// events.
|
|
Telemetry *telemetry.Runtime
|
|
|
|
// Readiness reports whether PG / Redis / Docker are reachable. A
|
|
// nil readiness probe makes `/readyz` always answer `200`; the
|
|
// runtime always supplies a real probe in production wiring.
|
|
Readiness ReadinessProbe
|
|
|
|
// RuntimeRecords backs the read-only list/get handlers. When nil
|
|
// those routes return `500 internal_error`.
|
|
RuntimeRecords ports.RuntimeRecordStore
|
|
|
|
// StartRuntime, StopRuntime, RestartRuntime, PatchRuntime, and
|
|
// CleanupContainer back the lifecycle handlers. Each accepts a
|
|
// narrow interface so tests can pass `mockgen`-generated mocks;
|
|
// production wiring passes the concrete `*<lifecycle>.Service`
|
|
// pointer.
|
|
StartRuntime handlers.StartService
|
|
StopRuntime handlers.StopService
|
|
RestartRuntime handlers.RestartService
|
|
PatchRuntime handlers.PatchService
|
|
CleanupContainer handlers.CleanupService
|
|
}
|
|
|
|
// Server owns the trusted internal HTTP listener exposed by Runtime
|
|
// Manager.
|
|
type Server struct {
|
|
cfg Config
|
|
|
|
handler http.Handler
|
|
logger *slog.Logger
|
|
metrics *telemetry.Runtime
|
|
|
|
stateMu sync.RWMutex
|
|
server *http.Server
|
|
listener net.Listener
|
|
}
|
|
|
|
// NewServer constructs one trusted internal HTTP server for cfg and deps.
|
|
func NewServer(cfg Config, deps Dependencies) (*Server, error) {
|
|
if err := cfg.Validate(); err != nil {
|
|
return nil, fmt.Errorf("new internal HTTP server: %w", err)
|
|
}
|
|
|
|
logger := deps.Logger
|
|
if logger == nil {
|
|
logger = slog.Default()
|
|
}
|
|
|
|
return &Server{
|
|
cfg: cfg,
|
|
handler: newHandler(deps, logger),
|
|
logger: logger.With("component", "internal_http"),
|
|
metrics: deps.Telemetry,
|
|
}, nil
|
|
}
|
|
|
|
// Addr returns the currently bound listener address after Run is called.
|
|
// It returns an empty string if the server has not yet bound a listener.
|
|
func (server *Server) Addr() string {
|
|
server.stateMu.RLock()
|
|
defer server.stateMu.RUnlock()
|
|
if server.listener == nil {
|
|
return ""
|
|
}
|
|
|
|
return server.listener.Addr().String()
|
|
}
|
|
|
|
// Run binds the configured listener and serves the internal HTTP surface
|
|
// until Shutdown closes the server.
|
|
func (server *Server) Run(ctx context.Context) error {
|
|
if ctx == nil {
|
|
return errors.New("run internal HTTP server: nil context")
|
|
}
|
|
if err := ctx.Err(); err != nil {
|
|
return err
|
|
}
|
|
|
|
listener, err := net.Listen("tcp", server.cfg.Addr)
|
|
if err != nil {
|
|
return fmt.Errorf("run internal HTTP server: listen on %q: %w", server.cfg.Addr, err)
|
|
}
|
|
|
|
httpServer := &http.Server{
|
|
Handler: server.handler,
|
|
ReadHeaderTimeout: server.cfg.ReadHeaderTimeout,
|
|
ReadTimeout: server.cfg.ReadTimeout,
|
|
WriteTimeout: server.cfg.WriteTimeout,
|
|
IdleTimeout: server.cfg.IdleTimeout,
|
|
}
|
|
|
|
server.stateMu.Lock()
|
|
server.server = httpServer
|
|
server.listener = listener
|
|
server.stateMu.Unlock()
|
|
|
|
server.logger.Info("rtmanager internal HTTP server started", "addr", listener.Addr().String())
|
|
|
|
defer func() {
|
|
server.stateMu.Lock()
|
|
server.server = nil
|
|
server.listener = nil
|
|
server.stateMu.Unlock()
|
|
}()
|
|
|
|
err = httpServer.Serve(listener)
|
|
switch {
|
|
case err == nil:
|
|
return nil
|
|
case errors.Is(err, http.ErrServerClosed):
|
|
server.logger.Info("rtmanager internal HTTP server stopped")
|
|
return nil
|
|
default:
|
|
return fmt.Errorf("run internal HTTP server: serve on %q: %w", server.cfg.Addr, err)
|
|
}
|
|
}
|
|
|
|
// Shutdown gracefully stops the internal HTTP server within ctx.
|
|
func (server *Server) Shutdown(ctx context.Context) error {
|
|
if ctx == nil {
|
|
return errors.New("shutdown internal HTTP server: nil context")
|
|
}
|
|
|
|
server.stateMu.RLock()
|
|
httpServer := server.server
|
|
server.stateMu.RUnlock()
|
|
|
|
if httpServer == nil {
|
|
return nil
|
|
}
|
|
|
|
if err := httpServer.Shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) {
|
|
return fmt.Errorf("shutdown internal HTTP server: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func newHandler(deps Dependencies, logger *slog.Logger) http.Handler {
|
|
mux := http.NewServeMux()
|
|
mux.HandleFunc("GET "+HealthzPath, handleHealthz)
|
|
mux.HandleFunc("GET "+ReadyzPath, handleReadyz(deps.Readiness, logger))
|
|
|
|
handlers.Register(mux, handlers.Dependencies{
|
|
Logger: logger,
|
|
RuntimeRecords: deps.RuntimeRecords,
|
|
StartRuntime: deps.StartRuntime,
|
|
StopRuntime: deps.StopRuntime,
|
|
RestartRuntime: deps.RestartRuntime,
|
|
PatchRuntime: deps.PatchRuntime,
|
|
CleanupContainer: deps.CleanupContainer,
|
|
})
|
|
|
|
metrics := deps.Telemetry
|
|
options := []otelhttp.Option{}
|
|
if metrics != nil {
|
|
options = append(options,
|
|
otelhttp.WithTracerProvider(metrics.TracerProvider()),
|
|
otelhttp.WithMeterProvider(metrics.MeterProvider()),
|
|
)
|
|
}
|
|
|
|
return otelhttp.NewHandler(withObservability(mux, metrics), "rtmanager.internal_http", options...)
|
|
}
|
|
|
|
func withObservability(next http.Handler, metrics *telemetry.Runtime) http.Handler {
|
|
return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
|
|
startedAt := time.Now()
|
|
recorder := &statusRecorder{
|
|
ResponseWriter: writer,
|
|
statusCode: http.StatusOK,
|
|
}
|
|
|
|
next.ServeHTTP(recorder, request)
|
|
|
|
route := request.Pattern
|
|
switch recorder.statusCode {
|
|
case http.StatusMethodNotAllowed:
|
|
route = "method_not_allowed"
|
|
case http.StatusNotFound:
|
|
route = "not_found"
|
|
case 0:
|
|
route = "unmatched"
|
|
}
|
|
if route == "" {
|
|
route = "unmatched"
|
|
}
|
|
|
|
if metrics != nil {
|
|
metrics.RecordInternalHTTPRequest(
|
|
request.Context(),
|
|
[]attribute.KeyValue{
|
|
attribute.String("route", route),
|
|
attribute.String("method", request.Method),
|
|
attribute.String("status_code", strconv.Itoa(recorder.statusCode)),
|
|
},
|
|
time.Since(startedAt),
|
|
)
|
|
}
|
|
})
|
|
}
|
|
|
|
func handleHealthz(writer http.ResponseWriter, _ *http.Request) {
|
|
writeStatusResponse(writer, http.StatusOK, "ok")
|
|
}
|
|
|
|
func handleReadyz(probe ReadinessProbe, logger *slog.Logger) http.HandlerFunc {
|
|
return func(writer http.ResponseWriter, request *http.Request) {
|
|
if probe == nil {
|
|
writeStatusResponse(writer, http.StatusOK, "ready")
|
|
return
|
|
}
|
|
|
|
if err := probe.Check(request.Context()); err != nil {
|
|
logger.WarnContext(request.Context(), "rtmanager readiness probe failed",
|
|
"err", err.Error(),
|
|
)
|
|
writeServiceUnavailable(writer, err.Error())
|
|
return
|
|
}
|
|
|
|
writeStatusResponse(writer, http.StatusOK, "ready")
|
|
}
|
|
}
|
|
|
|
func writeStatusResponse(writer http.ResponseWriter, statusCode int, status string) {
|
|
writer.Header().Set("Content-Type", jsonContentType)
|
|
writer.WriteHeader(statusCode)
|
|
_ = json.NewEncoder(writer).Encode(statusResponse{Status: status})
|
|
}
|
|
|
|
func writeServiceUnavailable(writer http.ResponseWriter, message string) {
|
|
writer.Header().Set("Content-Type", jsonContentType)
|
|
writer.WriteHeader(http.StatusServiceUnavailable)
|
|
_ = json.NewEncoder(writer).Encode(errorResponse{
|
|
Error: errorBody{
|
|
Code: errorCodeServiceUnavailable,
|
|
Message: message,
|
|
},
|
|
})
|
|
}
|
|
|
|
type statusResponse struct {
|
|
Status string `json:"status"`
|
|
}
|
|
|
|
type errorBody struct {
|
|
Code string `json:"code"`
|
|
Message string `json:"message"`
|
|
}
|
|
|
|
type errorResponse struct {
|
|
Error errorBody `json:"error"`
|
|
}
|
|
|
|
type statusRecorder struct {
|
|
http.ResponseWriter
|
|
statusCode int
|
|
}
|
|
|
|
func (recorder *statusRecorder) WriteHeader(statusCode int) {
|
|
recorder.statusCode = statusCode
|
|
recorder.ResponseWriter.WriteHeader(statusCode)
|
|
}
|