feat: user service
This commit is contained in:
@@ -0,0 +1,549 @@
|
||||
// Package telemetry provides shared OpenTelemetry runtime helpers and
|
||||
// low-cardinality user-service instruments.
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
|
||||
otelprom "go.opentelemetry.io/otel/exporters/prometheus"
|
||||
"go.opentelemetry.io/otel/exporters/stdout/stdoutmetric"
|
||||
"go.opentelemetry.io/otel/exporters/stdout/stdouttrace"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/propagation"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||
oteltrace "go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
const meterName = "galaxy/user"
|
||||
|
||||
const (
|
||||
defaultServiceName = "galaxy-user"
|
||||
|
||||
processExporterNone = "none"
|
||||
processExporterOTLP = "otlp"
|
||||
processProtocolHTTPProtobuf = "http/protobuf"
|
||||
processProtocolGRPC = "grpc"
|
||||
)
|
||||
|
||||
// ProcessConfig configures the process-wide OpenTelemetry runtime.
|
||||
type ProcessConfig struct {
|
||||
// ServiceName overrides the default OpenTelemetry service name.
|
||||
ServiceName string
|
||||
|
||||
// TracesExporter selects the external traces exporter. Supported values are
|
||||
// `none` and `otlp`.
|
||||
TracesExporter string
|
||||
|
||||
// MetricsExporter selects the external metrics exporter. Supported values
|
||||
// are `none` and `otlp`.
|
||||
MetricsExporter string
|
||||
|
||||
// TracesProtocol selects the OTLP traces protocol when TracesExporter is
|
||||
// `otlp`.
|
||||
TracesProtocol string
|
||||
|
||||
// MetricsProtocol selects the OTLP metrics protocol when MetricsExporter is
|
||||
// `otlp`.
|
||||
MetricsProtocol string
|
||||
|
||||
// StdoutTracesEnabled enables the additional stdout trace exporter used for
|
||||
// local development and debugging.
|
||||
StdoutTracesEnabled bool
|
||||
|
||||
// StdoutMetricsEnabled enables the additional stdout metric exporter used
|
||||
// for local development and debugging.
|
||||
StdoutMetricsEnabled bool
|
||||
}
|
||||
|
||||
// Validate reports whether cfg contains a supported OpenTelemetry exporter
|
||||
// configuration.
|
||||
func (cfg ProcessConfig) Validate() error {
|
||||
switch cfg.TracesExporter {
|
||||
case processExporterNone, processExporterOTLP:
|
||||
default:
|
||||
return fmt.Errorf("unsupported traces exporter %q", cfg.TracesExporter)
|
||||
}
|
||||
|
||||
switch cfg.MetricsExporter {
|
||||
case processExporterNone, processExporterOTLP:
|
||||
default:
|
||||
return fmt.Errorf("unsupported metrics exporter %q", cfg.MetricsExporter)
|
||||
}
|
||||
|
||||
if cfg.TracesProtocol != "" && cfg.TracesProtocol != processProtocolHTTPProtobuf && cfg.TracesProtocol != processProtocolGRPC {
|
||||
return fmt.Errorf("unsupported OTLP traces protocol %q", cfg.TracesProtocol)
|
||||
}
|
||||
if cfg.MetricsProtocol != "" && cfg.MetricsProtocol != processProtocolHTTPProtobuf && cfg.MetricsProtocol != processProtocolGRPC {
|
||||
return fmt.Errorf("unsupported OTLP metrics protocol %q", cfg.MetricsProtocol)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Runtime owns the user-service OpenTelemetry providers, the Prometheus
|
||||
// metrics handler, and the custom low-cardinality instruments.
|
||||
type Runtime struct {
|
||||
tracerProvider oteltrace.TracerProvider
|
||||
meterProvider metric.MeterProvider
|
||||
promHandler http.Handler
|
||||
|
||||
shutdownMu sync.Mutex
|
||||
shutdownDone bool
|
||||
shutdownErr error
|
||||
shutdownFns []func(context.Context) error
|
||||
|
||||
internalHTTPRequests metric.Int64Counter
|
||||
internalHTTPDuration metric.Float64Histogram
|
||||
authResolutionOutcomes metric.Int64Counter
|
||||
userCreationOutcomes metric.Int64Counter
|
||||
raceNameReservationConflicts metric.Int64Counter
|
||||
entitlementMutations metric.Int64Counter
|
||||
sanctionMutations metric.Int64Counter
|
||||
limitMutations metric.Int64Counter
|
||||
eventPublicationFailures metric.Int64Counter
|
||||
}
|
||||
|
||||
// New constructs a lightweight telemetry runtime around meterProvider for
|
||||
// tests and embedded use cases that do not need process-level exporter wiring.
|
||||
func New(meterProvider metric.MeterProvider) (*Runtime, error) {
|
||||
return NewWithProviders(meterProvider, nil)
|
||||
}
|
||||
|
||||
// NewWithProviders constructs a telemetry runtime around explicitly supplied
|
||||
// meterProvider and tracerProvider values.
|
||||
func NewWithProviders(meterProvider metric.MeterProvider, tracerProvider oteltrace.TracerProvider) (*Runtime, error) {
|
||||
if meterProvider == nil {
|
||||
meterProvider = otel.GetMeterProvider()
|
||||
}
|
||||
if tracerProvider == nil {
|
||||
tracerProvider = otel.GetTracerProvider()
|
||||
}
|
||||
if meterProvider == nil {
|
||||
return nil, errors.New("new user telemetry runtime: nil meter provider")
|
||||
}
|
||||
if tracerProvider == nil {
|
||||
return nil, errors.New("new user telemetry runtime: nil tracer provider")
|
||||
}
|
||||
|
||||
return buildRuntime(meterProvider, tracerProvider, http.NotFoundHandler(), nil)
|
||||
}
|
||||
|
||||
// NewProcess constructs the process-wide user-service OpenTelemetry runtime
|
||||
// from cfg, installs the resulting providers globally, and returns the
|
||||
// runtime.
|
||||
func NewProcess(ctx context.Context, cfg ProcessConfig, logger *slog.Logger) (*Runtime, error) {
|
||||
return newProcess(ctx, cfg, logger, os.Stdout, os.Stdout)
|
||||
}
|
||||
|
||||
// TracerProvider returns the runtime tracer provider.
|
||||
func (r *Runtime) TracerProvider() oteltrace.TracerProvider {
|
||||
if r == nil || r.tracerProvider == nil {
|
||||
return otel.GetTracerProvider()
|
||||
}
|
||||
|
||||
return r.tracerProvider
|
||||
}
|
||||
|
||||
// MeterProvider returns the runtime meter provider.
|
||||
func (r *Runtime) MeterProvider() metric.MeterProvider {
|
||||
if r == nil || r.meterProvider == nil {
|
||||
return otel.GetMeterProvider()
|
||||
}
|
||||
|
||||
return r.meterProvider
|
||||
}
|
||||
|
||||
// Handler returns the Prometheus handler that should be mounted on the admin
|
||||
// listener.
|
||||
func (r *Runtime) Handler() http.Handler {
|
||||
if r == nil || r.promHandler == nil {
|
||||
return http.NotFoundHandler()
|
||||
}
|
||||
|
||||
return r.promHandler
|
||||
}
|
||||
|
||||
// Shutdown flushes and stops the configured telemetry providers. Shutdown is
|
||||
// idempotent.
|
||||
func (r *Runtime) Shutdown(ctx context.Context) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
r.shutdownMu.Lock()
|
||||
if r.shutdownDone {
|
||||
err := r.shutdownErr
|
||||
r.shutdownMu.Unlock()
|
||||
return err
|
||||
}
|
||||
r.shutdownDone = true
|
||||
r.shutdownMu.Unlock()
|
||||
|
||||
var shutdownErr error
|
||||
for index := len(r.shutdownFns) - 1; index >= 0; index-- {
|
||||
shutdownErr = errors.Join(shutdownErr, r.shutdownFns[index](ctx))
|
||||
}
|
||||
|
||||
r.shutdownMu.Lock()
|
||||
r.shutdownErr = shutdownErr
|
||||
r.shutdownMu.Unlock()
|
||||
|
||||
return shutdownErr
|
||||
}
|
||||
|
||||
// RecordInternalHTTPRequest records one internal HTTP request outcome.
|
||||
func (r *Runtime) RecordInternalHTTPRequest(ctx context.Context, attrs []attribute.KeyValue, duration time.Duration) {
|
||||
if r == nil {
|
||||
return
|
||||
}
|
||||
|
||||
options := metric.WithAttributes(attrs...)
|
||||
r.internalHTTPRequests.Add(normalizeContext(ctx), 1, options)
|
||||
r.internalHTTPDuration.Record(normalizeContext(ctx), duration.Seconds()*1000, options)
|
||||
}
|
||||
|
||||
// RecordAuthResolutionOutcome records one auth-facing resolution outcome.
|
||||
func (r *Runtime) RecordAuthResolutionOutcome(ctx context.Context, operation string, outcome string) {
|
||||
if r == nil {
|
||||
return
|
||||
}
|
||||
|
||||
r.authResolutionOutcomes.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(
|
||||
attribute.String("operation", strings.TrimSpace(operation)),
|
||||
attribute.String("outcome", strings.TrimSpace(outcome)),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordUserCreationOutcome records one ensure-by-email coarse outcome.
|
||||
func (r *Runtime) RecordUserCreationOutcome(ctx context.Context, outcome string) {
|
||||
if r == nil {
|
||||
return
|
||||
}
|
||||
|
||||
r.userCreationOutcomes.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(attribute.String("outcome", strings.TrimSpace(outcome))),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordRaceNameReservationConflict records one race-name reservation conflict
|
||||
// for operation.
|
||||
func (r *Runtime) RecordRaceNameReservationConflict(ctx context.Context, operation string) {
|
||||
if r == nil {
|
||||
return
|
||||
}
|
||||
|
||||
r.raceNameReservationConflicts.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(attribute.String("operation", strings.TrimSpace(operation))),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordEntitlementMutation records one entitlement command outcome.
|
||||
func (r *Runtime) RecordEntitlementMutation(ctx context.Context, command string, outcome string) {
|
||||
if r == nil {
|
||||
return
|
||||
}
|
||||
|
||||
r.entitlementMutations.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(
|
||||
attribute.String("command", strings.TrimSpace(command)),
|
||||
attribute.String("outcome", strings.TrimSpace(outcome)),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordSanctionMutation records one sanction command outcome.
|
||||
func (r *Runtime) RecordSanctionMutation(ctx context.Context, command string, outcome string) {
|
||||
if r == nil {
|
||||
return
|
||||
}
|
||||
|
||||
r.sanctionMutations.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(
|
||||
attribute.String("command", strings.TrimSpace(command)),
|
||||
attribute.String("outcome", strings.TrimSpace(outcome)),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordLimitMutation records one limit command outcome.
|
||||
func (r *Runtime) RecordLimitMutation(ctx context.Context, command string, outcome string) {
|
||||
if r == nil {
|
||||
return
|
||||
}
|
||||
|
||||
r.limitMutations.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(
|
||||
attribute.String("command", strings.TrimSpace(command)),
|
||||
attribute.String("outcome", strings.TrimSpace(outcome)),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordEventPublicationFailure records one post-commit auxiliary event
|
||||
// publication failure.
|
||||
func (r *Runtime) RecordEventPublicationFailure(ctx context.Context, eventType string) {
|
||||
if r == nil {
|
||||
return
|
||||
}
|
||||
|
||||
r.eventPublicationFailures.Add(
|
||||
normalizeContext(ctx),
|
||||
1,
|
||||
metric.WithAttributes(attribute.String("event_type", strings.TrimSpace(eventType))),
|
||||
)
|
||||
}
|
||||
|
||||
func newProcess(ctx context.Context, cfg ProcessConfig, logger *slog.Logger, stdoutTraceWriter io.Writer, stdoutMetricWriter io.Writer) (*Runtime, error) {
|
||||
if ctx == nil {
|
||||
return nil, errors.New("new user telemetry process: nil context")
|
||||
}
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("new user telemetry process: %w", err)
|
||||
}
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
if strings.TrimSpace(cfg.ServiceName) == "" {
|
||||
cfg.ServiceName = defaultServiceName
|
||||
}
|
||||
|
||||
res, err := resource.New(
|
||||
ctx,
|
||||
resource.WithAttributes(attribute.String("service.name", cfg.ServiceName)),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new user telemetry process: resource: %w", err)
|
||||
}
|
||||
|
||||
tracerProvider, err := newTracerProvider(ctx, res, cfg, stdoutTraceWriter)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new user telemetry process: tracer provider: %w", err)
|
||||
}
|
||||
|
||||
registry := prometheus.NewRegistry()
|
||||
prometheusExporter, err := otelprom.New(otelprom.WithRegisterer(registry))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new user telemetry process: prometheus exporter: %w", err)
|
||||
}
|
||||
|
||||
meterProvider, err := newMeterProvider(ctx, res, cfg, prometheusExporter, stdoutMetricWriter)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new user telemetry process: meter provider: %w", err)
|
||||
}
|
||||
|
||||
otel.SetTracerProvider(tracerProvider)
|
||||
otel.SetMeterProvider(meterProvider)
|
||||
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
|
||||
propagation.TraceContext{},
|
||||
propagation.Baggage{},
|
||||
))
|
||||
|
||||
runtime, err := buildRuntime(
|
||||
meterProvider,
|
||||
tracerProvider,
|
||||
promhttp.HandlerFor(registry, promhttp.HandlerOpts{}),
|
||||
[]func(context.Context) error{
|
||||
meterProvider.Shutdown,
|
||||
tracerProvider.Shutdown,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new user telemetry process: %w", err)
|
||||
}
|
||||
|
||||
logger.InfoContext(ctx, "user telemetry configured",
|
||||
"service_name", cfg.ServiceName,
|
||||
"traces_exporter", cfg.TracesExporter,
|
||||
"metrics_exporter", cfg.MetricsExporter,
|
||||
"stdout_traces_enabled", cfg.StdoutTracesEnabled,
|
||||
"stdout_metrics_enabled", cfg.StdoutMetricsEnabled,
|
||||
)
|
||||
|
||||
return runtime, nil
|
||||
}
|
||||
|
||||
func buildRuntime(
|
||||
meterProvider metric.MeterProvider,
|
||||
tracerProvider oteltrace.TracerProvider,
|
||||
promHandler http.Handler,
|
||||
shutdownFns []func(context.Context) error,
|
||||
) (*Runtime, error) {
|
||||
meter := meterProvider.Meter(meterName)
|
||||
|
||||
internalHTTPRequests, err := meter.Int64Counter("user.internal_http.requests")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build user telemetry runtime: internal_http.requests: %w", err)
|
||||
}
|
||||
internalHTTPDuration, err := meter.Float64Histogram("user.internal_http.duration", metric.WithUnit("ms"))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build user telemetry runtime: internal_http.duration: %w", err)
|
||||
}
|
||||
authResolutionOutcomes, err := meter.Int64Counter("user.auth_resolution.outcomes")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build user telemetry runtime: auth_resolution.outcomes: %w", err)
|
||||
}
|
||||
userCreationOutcomes, err := meter.Int64Counter("user.user_creation.outcomes")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build user telemetry runtime: user_creation.outcomes: %w", err)
|
||||
}
|
||||
raceNameReservationConflicts, err := meter.Int64Counter("user.race_name.reservation_conflicts")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build user telemetry runtime: race_name.reservation_conflicts: %w", err)
|
||||
}
|
||||
entitlementMutations, err := meter.Int64Counter("user.entitlement.mutations")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build user telemetry runtime: entitlement.mutations: %w", err)
|
||||
}
|
||||
sanctionMutations, err := meter.Int64Counter("user.sanction.mutations")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build user telemetry runtime: sanction.mutations: %w", err)
|
||||
}
|
||||
limitMutations, err := meter.Int64Counter("user.limit.mutations")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build user telemetry runtime: limit.mutations: %w", err)
|
||||
}
|
||||
eventPublicationFailures, err := meter.Int64Counter("user.event_publication_failures")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build user telemetry runtime: event_publication_failures: %w", err)
|
||||
}
|
||||
|
||||
if promHandler == nil {
|
||||
promHandler = http.NotFoundHandler()
|
||||
}
|
||||
|
||||
return &Runtime{
|
||||
tracerProvider: tracerProvider,
|
||||
meterProvider: meterProvider,
|
||||
promHandler: promHandler,
|
||||
shutdownFns: shutdownFns,
|
||||
internalHTTPRequests: internalHTTPRequests,
|
||||
internalHTTPDuration: internalHTTPDuration,
|
||||
authResolutionOutcomes: authResolutionOutcomes,
|
||||
userCreationOutcomes: userCreationOutcomes,
|
||||
raceNameReservationConflicts: raceNameReservationConflicts,
|
||||
entitlementMutations: entitlementMutations,
|
||||
sanctionMutations: sanctionMutations,
|
||||
limitMutations: limitMutations,
|
||||
eventPublicationFailures: eventPublicationFailures,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func newTracerProvider(ctx context.Context, res *resource.Resource, cfg ProcessConfig, stdoutWriter io.Writer) (*sdktrace.TracerProvider, error) {
|
||||
options := []sdktrace.TracerProviderOption{sdktrace.WithResource(res)}
|
||||
|
||||
if cfg.TracesExporter == processExporterOTLP {
|
||||
exporter, err := newOTLPTraceExporter(ctx, cfg.TracesProtocol)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
options = append(options, sdktrace.WithBatcher(exporter))
|
||||
}
|
||||
if cfg.StdoutTracesEnabled {
|
||||
exporter, err := stdouttrace.New(
|
||||
stdouttrace.WithPrettyPrint(),
|
||||
stdouttrace.WithWriter(stdoutWriter),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
options = append(options, sdktrace.WithBatcher(exporter))
|
||||
}
|
||||
|
||||
return sdktrace.NewTracerProvider(options...), nil
|
||||
}
|
||||
|
||||
func newMeterProvider(
|
||||
ctx context.Context,
|
||||
res *resource.Resource,
|
||||
cfg ProcessConfig,
|
||||
prometheusExporter sdkmetric.Reader,
|
||||
stdoutWriter io.Writer,
|
||||
) (*sdkmetric.MeterProvider, error) {
|
||||
options := []sdkmetric.Option{
|
||||
sdkmetric.WithResource(res),
|
||||
sdkmetric.WithReader(prometheusExporter),
|
||||
}
|
||||
|
||||
if cfg.MetricsExporter == processExporterOTLP {
|
||||
exporter, err := newOTLPMetricExporter(ctx, cfg.MetricsProtocol)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
options = append(options, sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)))
|
||||
}
|
||||
if cfg.StdoutMetricsEnabled {
|
||||
exporter, err := stdoutmetric.New(
|
||||
stdoutmetric.WithPrettyPrint(),
|
||||
stdoutmetric.WithWriter(stdoutWriter),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
options = append(options, sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)))
|
||||
}
|
||||
|
||||
return sdkmetric.NewMeterProvider(options...), nil
|
||||
}
|
||||
|
||||
func newOTLPTraceExporter(ctx context.Context, protocol string) (sdktrace.SpanExporter, error) {
|
||||
switch protocol {
|
||||
case "", processProtocolHTTPProtobuf:
|
||||
return otlptracehttp.New(ctx)
|
||||
case processProtocolGRPC:
|
||||
return otlptracegrpc.New(ctx)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported OTLP traces protocol %q", protocol)
|
||||
}
|
||||
}
|
||||
|
||||
func newOTLPMetricExporter(ctx context.Context, protocol string) (sdkmetric.Exporter, error) {
|
||||
switch protocol {
|
||||
case "", processProtocolHTTPProtobuf:
|
||||
return otlpmetrichttp.New(ctx)
|
||||
case processProtocolGRPC:
|
||||
return otlpmetricgrpc.New(ctx)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported OTLP metrics protocol %q", protocol)
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeContext(ctx context.Context) context.Context {
|
||||
if ctx == nil {
|
||||
return context.Background()
|
||||
}
|
||||
|
||||
return ctx
|
||||
}
|
||||
Reference in New Issue
Block a user