// Package telemetry owns the OpenTelemetry runtime for the backend process. // // New constructs the configured tracer and meter providers, registers them // globally, and exposes a Shutdown method for orderly process exit. The // supported exporter set follows README ยง4: // // - traces: none, otlp (gRPC or HTTP/protobuf), stdout // - metrics: none, otlp (gRPC or HTTP/protobuf), stdout, prometheus // // When metrics use the prometheus exporter, the runtime also retains the // Prometheus HTTP handler and listen address so a separate metrics listener // (`internal/metricsapi`) can serve the scrape endpoint. package telemetry import ( "context" "errors" "fmt" "net/http" "galaxy/backend/internal/config" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" otelprom "go.opentelemetry.io/otel/exporters/prometheus" "go.opentelemetry.io/otel/exporters/stdout/stdoutmetric" "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/propagation" sdkmetric "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" ) // Runtime owns the shared OpenTelemetry providers and the optional Prometheus // scrape handler. type Runtime struct { logger *zap.Logger tracerProvider *sdktrace.TracerProvider meterProvider *sdkmetric.MeterProvider promHandler http.Handler prometheusListenAddr string } // New constructs the backend telemetry runtime, registers global providers, // and wires the optional Prometheus scrape handler. Callers must invoke // Runtime.Shutdown during process exit. func New(ctx context.Context, logger *zap.Logger, cfg config.TelemetryConfig) (*Runtime, error) { if logger == nil { logger = zap.NewNop() } res, err := resource.New( ctx, resource.WithAttributes(attribute.String("service.name", cfg.ServiceName)), ) if err != nil { return nil, fmt.Errorf("build telemetry resource: %w", err) } tracerProvider, err := newTracerProvider(ctx, cfg, res) if err != nil { return nil, fmt.Errorf("build tracer provider: %w", err) } meterProvider, promHandler, err := newMeterProvider(ctx, cfg, res) if err != nil { // Tracer provider was already constructed; release its resources before // surfacing the meter-provider error. _ = tracerProvider.Shutdown(ctx) return nil, fmt.Errorf("build meter provider: %w", err) } otel.SetTracerProvider(tracerProvider) otel.SetMeterProvider(meterProvider) otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( propagation.TraceContext{}, propagation.Baggage{}, )) rt := &Runtime{ logger: logger, tracerProvider: tracerProvider, meterProvider: meterProvider, promHandler: promHandler, } if cfg.MetricsExporter == "prometheus" { rt.prometheusListenAddr = cfg.PrometheusListenAddr } return rt, nil } // TracerProvider returns the runtime tracer provider, falling back to the // global one when r is not initialised. func (r *Runtime) TracerProvider() trace.TracerProvider { if r == nil || r.tracerProvider == nil { return otel.GetTracerProvider() } return r.tracerProvider } // MeterProvider returns the runtime meter provider, falling back to the // global one when r is not initialised. func (r *Runtime) MeterProvider() metric.MeterProvider { if r == nil || r.meterProvider == nil { return otel.GetMeterProvider() } return r.meterProvider } // Handler returns the Prometheus scrape handler when the metrics exporter is // `prometheus`, or http.NotFoundHandler() otherwise. The metricsapi server // uses this when a Prometheus listener is enabled. func (r *Runtime) Handler() http.Handler { if r == nil || r.promHandler == nil { return http.NotFoundHandler() } return r.promHandler } // PrometheusListenAddr returns the configured Prometheus listen address, or // the empty string when the Prometheus exporter is not selected. func (r *Runtime) PrometheusListenAddr() string { if r == nil { return "" } return r.prometheusListenAddr } // Shutdown flushes both providers within ctx. func (r *Runtime) Shutdown(ctx context.Context) error { if r == nil { return nil } var err error if r.meterProvider != nil { err = errors.Join(err, r.meterProvider.Shutdown(ctx)) } if r.tracerProvider != nil { err = errors.Join(err, r.tracerProvider.Shutdown(ctx)) } return err } // TraceFieldsFromContext returns zap fields for the active OpenTelemetry span // when ctx carries a valid span context. The helper is collocated with the // telemetry runtime so observers do not need to import the OTel API directly. func TraceFieldsFromContext(ctx context.Context) []zap.Field { if ctx == nil { return nil } spanContext := trace.SpanContextFromContext(ctx) if !spanContext.IsValid() { return nil } return []zap.Field{ zap.String("otel_trace_id", spanContext.TraceID().String()), zap.String("otel_span_id", spanContext.SpanID().String()), } } func newTracerProvider(ctx context.Context, cfg config.TelemetryConfig, res *resource.Resource) (*sdktrace.TracerProvider, error) { switch cfg.TracesExporter { case "none": return sdktrace.NewTracerProvider(sdktrace.WithResource(res)), nil case "stdout": exporter, err := stdouttrace.New() if err != nil { return nil, fmt.Errorf("stdout trace exporter: %w", err) } return sdktrace.NewTracerProvider( sdktrace.WithBatcher(exporter), sdktrace.WithResource(res), ), nil case "otlp": exporter, err := newOTLPTraceExporter(ctx, cfg) if err != nil { return nil, err } return sdktrace.NewTracerProvider( sdktrace.WithBatcher(exporter), sdktrace.WithResource(res), ), nil default: return nil, fmt.Errorf("unsupported traces exporter %q", cfg.TracesExporter) } } func newOTLPTraceExporter(ctx context.Context, cfg config.TelemetryConfig) (sdktrace.SpanExporter, error) { switch cfg.Protocol { case "grpc": opts := []otlptracegrpc.Option{} if cfg.Endpoint != "" { opts = append(opts, otlptracegrpc.WithEndpoint(cfg.Endpoint)) } exporter, err := otlptracegrpc.New(ctx, opts...) if err != nil { return nil, fmt.Errorf("otlp grpc trace exporter: %w", err) } return exporter, nil case "http/protobuf": opts := []otlptracehttp.Option{} if cfg.Endpoint != "" { opts = append(opts, otlptracehttp.WithEndpoint(cfg.Endpoint)) } exporter, err := otlptracehttp.New(ctx, opts...) if err != nil { return nil, fmt.Errorf("otlp http trace exporter: %w", err) } return exporter, nil default: return nil, fmt.Errorf("unsupported otel protocol %q", cfg.Protocol) } } func newMeterProvider(ctx context.Context, cfg config.TelemetryConfig, res *resource.Resource) (*sdkmetric.MeterProvider, http.Handler, error) { switch cfg.MetricsExporter { case "none": return sdkmetric.NewMeterProvider(sdkmetric.WithResource(res)), nil, nil case "stdout": exporter, err := stdoutmetric.New() if err != nil { return nil, nil, fmt.Errorf("stdout metric exporter: %w", err) } return sdkmetric.NewMeterProvider( sdkmetric.WithResource(res), sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)), ), nil, nil case "otlp": exporter, err := newOTLPMetricExporter(ctx, cfg) if err != nil { return nil, nil, err } return sdkmetric.NewMeterProvider( sdkmetric.WithResource(res), sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)), ), nil, nil case "prometheus": registry := prometheus.NewRegistry() exporter, err := otelprom.New(otelprom.WithRegisterer(registry)) if err != nil { return nil, nil, fmt.Errorf("prometheus metric exporter: %w", err) } mp := sdkmetric.NewMeterProvider( sdkmetric.WithResource(res), sdkmetric.WithReader(exporter), ) handler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{}) return mp, handler, nil default: return nil, nil, fmt.Errorf("unsupported metrics exporter %q", cfg.MetricsExporter) } } func newOTLPMetricExporter(ctx context.Context, cfg config.TelemetryConfig) (sdkmetric.Exporter, error) { switch cfg.Protocol { case "grpc": opts := []otlpmetricgrpc.Option{} if cfg.Endpoint != "" { opts = append(opts, otlpmetricgrpc.WithEndpoint(cfg.Endpoint)) } exporter, err := otlpmetricgrpc.New(ctx, opts...) if err != nil { return nil, fmt.Errorf("otlp grpc metric exporter: %w", err) } return exporter, nil case "http/protobuf": opts := []otlpmetrichttp.Option{} if cfg.Endpoint != "" { opts = append(opts, otlpmetrichttp.WithEndpoint(cfg.Endpoint)) } exporter, err := otlpmetrichttp.New(ctx, opts...) if err != nil { return nil, fmt.Errorf("otlp http metric exporter: %w", err) } return exporter, nil default: return nil, fmt.Errorf("unsupported otel protocol %q", cfg.Protocol) } }