feat: notification service

This commit is contained in:
Ilia Denisov
2026-04-22 08:49:45 +02:00
committed by GitHub
parent 5b7593e6f6
commit 32dc29359a
135 changed files with 21828 additions and 130 deletions
@@ -0,0 +1,252 @@
// Package internalhttp provides the private probe HTTP listener used by the
// runnable Notification Service process.
package internalhttp
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"net"
"net/http"
"strconv"
"sync"
"time"
"galaxy/notification/internal/telemetry"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel/attribute"
)
const jsonContentType = "application/json; charset=utf-8"
const (
// HealthzPath is the private liveness probe route.
HealthzPath = "/healthz"
// ReadyzPath is the private readiness probe route.
ReadyzPath = "/readyz"
)
// Config describes the private internal HTTP listener owned by Notification
// Service.
type Config struct {
// Addr is the TCP listen address used by the private probe HTTP server.
Addr string
// ReadHeaderTimeout bounds how long the listener may spend reading request
// headers before the server rejects the connection.
ReadHeaderTimeout time.Duration
// ReadTimeout bounds how long the listener may spend reading one request.
ReadTimeout time.Duration
// IdleTimeout bounds how long the listener keeps an idle keep-alive
// connection open.
IdleTimeout time.Duration
}
// Validate reports whether cfg contains a usable private HTTP listener
// configuration.
func (cfg Config) Validate() error {
switch {
case cfg.Addr == "":
return errors.New("internal HTTP addr must not be empty")
case cfg.ReadHeaderTimeout <= 0:
return errors.New("internal HTTP read header timeout must be positive")
case cfg.ReadTimeout <= 0:
return errors.New("internal HTTP read timeout must be positive")
case cfg.IdleTimeout <= 0:
return errors.New("internal HTTP idle timeout must be positive")
default:
return nil
}
}
// Dependencies describes the collaborators used by the private probe
// transport layer.
type Dependencies struct {
// Logger writes structured listener lifecycle logs. When nil, slog.Default
// is used.
Logger *slog.Logger
// Telemetry records low-cardinality probe metrics and lifecycle events.
Telemetry *telemetry.Runtime
}
// Server owns the private probe HTTP listener exposed by Notification
// Service.
type Server struct {
cfg Config
handler http.Handler
logger *slog.Logger
metrics *telemetry.Runtime
stateMu sync.RWMutex
server *http.Server
listener net.Listener
}
// NewServer constructs one private probe HTTP server for cfg and deps.
func NewServer(cfg Config, deps Dependencies) (*Server, error) {
if err := cfg.Validate(); err != nil {
return nil, fmt.Errorf("new internal HTTP server: %w", err)
}
logger := deps.Logger
if logger == nil {
logger = slog.Default()
}
return &Server{
cfg: cfg,
handler: newHandler(logger, deps.Telemetry),
logger: logger.With("component", "internal_http"),
metrics: deps.Telemetry,
}, nil
}
// Run binds the configured listener and serves the private probe surface until
// Shutdown closes the server.
func (server *Server) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run internal HTTP server: nil context")
}
if err := ctx.Err(); err != nil {
return err
}
listener, err := net.Listen("tcp", server.cfg.Addr)
if err != nil {
return fmt.Errorf("run internal HTTP server: listen on %q: %w", server.cfg.Addr, err)
}
httpServer := &http.Server{
Handler: server.handler,
ReadHeaderTimeout: server.cfg.ReadHeaderTimeout,
ReadTimeout: server.cfg.ReadTimeout,
IdleTimeout: server.cfg.IdleTimeout,
}
server.stateMu.Lock()
server.server = httpServer
server.listener = listener
server.stateMu.Unlock()
server.logger.Info("notification internal HTTP server started", "addr", listener.Addr().String())
server.metrics.RecordInternalHTTPEvent(context.Background(), "started")
defer func() {
server.stateMu.Lock()
server.server = nil
server.listener = nil
server.stateMu.Unlock()
}()
err = httpServer.Serve(listener)
switch {
case err == nil:
return nil
case errors.Is(err, http.ErrServerClosed):
server.logger.Info("notification internal HTTP server stopped")
server.metrics.RecordInternalHTTPEvent(context.Background(), "stopped")
return nil
default:
return fmt.Errorf("run internal HTTP server: serve on %q: %w", server.cfg.Addr, err)
}
}
// Shutdown gracefully stops the private probe HTTP server within ctx.
func (server *Server) Shutdown(ctx context.Context) error {
if ctx == nil {
return errors.New("shutdown internal HTTP server: nil context")
}
server.stateMu.RLock()
httpServer := server.server
server.stateMu.RUnlock()
if httpServer == nil {
return nil
}
if err := httpServer.Shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) {
return fmt.Errorf("shutdown internal HTTP server: %w", err)
}
return nil
}
func newHandler(logger *slog.Logger, metrics *telemetry.Runtime) http.Handler {
mux := http.NewServeMux()
mux.HandleFunc("GET "+HealthzPath, handleHealthz)
mux.HandleFunc("GET "+ReadyzPath, handleReadyz)
return otelhttp.NewHandler(withObservability(mux, metrics), "notification.internal_http")
}
func withObservability(next http.Handler, metrics *telemetry.Runtime) http.Handler {
return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
startedAt := time.Now()
recorder := &statusRecorder{
ResponseWriter: writer,
statusCode: http.StatusOK,
}
next.ServeHTTP(recorder, request)
route := request.Pattern
switch recorder.statusCode {
case http.StatusMethodNotAllowed:
route = "method_not_allowed"
case http.StatusNotFound:
route = "not_found"
case 0:
route = "unmatched"
}
if route == "" {
route = "unmatched"
}
metrics.RecordInternalHTTPRequest(
request.Context(),
[]attribute.KeyValue{
attribute.String("route", route),
attribute.String("method", request.Method),
attribute.String("status_code", strconv.Itoa(recorder.statusCode)),
},
time.Since(startedAt),
)
})
}
func handleHealthz(writer http.ResponseWriter, _ *http.Request) {
writeStatusResponse(writer, http.StatusOK, "ok")
}
func handleReadyz(writer http.ResponseWriter, _ *http.Request) {
writeStatusResponse(writer, http.StatusOK, "ready")
}
func writeStatusResponse(writer http.ResponseWriter, statusCode int, status string) {
writer.Header().Set("Content-Type", jsonContentType)
writer.WriteHeader(statusCode)
_ = json.NewEncoder(writer).Encode(statusResponse{Status: status})
}
type statusResponse struct {
Status string `json:"status"`
}
type statusRecorder struct {
http.ResponseWriter
statusCode int
}
func (recorder *statusRecorder) WriteHeader(statusCode int) {
recorder.statusCode = statusCode
recorder.ResponseWriter.WriteHeader(statusCode)
}
@@ -0,0 +1,272 @@
package internalhttp
import (
"context"
"encoding/json"
"io"
"net"
"net/http"
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestNewServerRejectsInvalidConfiguration(t *testing.T) {
t.Parallel()
cfg := Config{
ReadHeaderTimeout: time.Second,
ReadTimeout: time.Second,
IdleTimeout: time.Second,
}
_, err := NewServer(cfg, Dependencies{})
require.Error(t, err)
assert.Contains(t, err.Error(), "addr")
}
func TestServerRunAndShutdown(t *testing.T) {
t.Parallel()
cfg := testConfig(t)
server, err := NewServer(cfg, Dependencies{})
require.NoError(t, err)
runErr := make(chan error, 1)
go func() {
runErr <- server.Run(context.Background())
}()
client := newTestHTTPClient(t)
waitForHealthzReady(t, client, cfg.Addr)
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
require.NoError(t, server.Shutdown(shutdownCtx))
waitForServerRunResult(t, runErr)
}
func TestProbeRoutesReturnStableJSON(t *testing.T) {
t.Parallel()
cfg := testConfig(t)
server, err := NewServer(cfg, Dependencies{})
require.NoError(t, err)
runErr := make(chan error, 1)
go func() {
runErr <- server.Run(context.Background())
}()
client := newTestHTTPClient(t)
waitForHealthzReady(t, client, cfg.Addr)
tests := []struct {
path string
status string
}{
{path: HealthzPath, status: "ok"},
{path: ReadyzPath, status: "ready"},
}
for _, tt := range tests {
tt := tt
t.Run(tt.path, func(t *testing.T) {
request, err := http.NewRequest(http.MethodGet, "http://"+cfg.Addr+tt.path, nil)
require.NoError(t, err)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
require.Equal(t, http.StatusOK, response.StatusCode)
require.Equal(t, "application/json; charset=utf-8", response.Header.Get("Content-Type"))
var payload statusResponse
require.NoError(t, json.NewDecoder(response.Body).Decode(&payload))
require.Equal(t, tt.status, payload.Status)
})
}
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
require.NoError(t, server.Shutdown(shutdownCtx))
waitForServerRunResult(t, runErr)
}
func TestServerDoesNotExposeMetricsOrUnknownRoutes(t *testing.T) {
t.Parallel()
cfg := testConfig(t)
server, err := NewServer(cfg, Dependencies{})
require.NoError(t, err)
runErr := make(chan error, 1)
go func() {
runErr <- server.Run(context.Background())
}()
client := newTestHTTPClient(t)
waitForHealthzReady(t, client, cfg.Addr)
for _, path := range []string{"/metrics", "/unknown"} {
request, err := http.NewRequest(http.MethodGet, "http://"+cfg.Addr+path, nil)
require.NoError(t, err)
response, err := client.Do(request)
require.NoError(t, err)
_, _ = io.ReadAll(response.Body)
response.Body.Close()
assert.Equalf(t, http.StatusNotFound, response.StatusCode, "path %s", path)
}
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
require.NoError(t, server.Shutdown(shutdownCtx))
waitForServerRunResult(t, runErr)
}
func TestServerPreservesStandardHEADBehavior(t *testing.T) {
t.Parallel()
cfg := testConfig(t)
server, err := NewServer(cfg, Dependencies{})
require.NoError(t, err)
runErr := make(chan error, 1)
go func() {
runErr <- server.Run(context.Background())
}()
client := newTestHTTPClient(t)
waitForHealthzReady(t, client, cfg.Addr)
request, err := http.NewRequest(http.MethodHead, "http://"+cfg.Addr+HealthzPath, nil)
require.NoError(t, err)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
body, err := io.ReadAll(response.Body)
require.NoError(t, err)
require.Equal(t, http.StatusOK, response.StatusCode)
require.Empty(t, body)
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
require.NoError(t, server.Shutdown(shutdownCtx))
waitForServerRunResult(t, runErr)
}
func TestServerUsesStandardMethodNotAllowedBehavior(t *testing.T) {
t.Parallel()
cfg := testConfig(t)
server, err := NewServer(cfg, Dependencies{})
require.NoError(t, err)
runErr := make(chan error, 1)
go func() {
runErr <- server.Run(context.Background())
}()
client := newTestHTTPClient(t)
waitForHealthzReady(t, client, cfg.Addr)
request, err := http.NewRequest(http.MethodPost, "http://"+cfg.Addr+HealthzPath, nil)
require.NoError(t, err)
response, err := client.Do(request)
require.NoError(t, err)
defer response.Body.Close()
_, _ = io.ReadAll(response.Body)
require.Equal(t, http.StatusMethodNotAllowed, response.StatusCode)
require.Contains(t, response.Header.Get("Allow"), http.MethodGet)
require.Contains(t, response.Header.Get("Allow"), http.MethodHead)
shutdownCtx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
require.NoError(t, server.Shutdown(shutdownCtx))
waitForServerRunResult(t, runErr)
}
func testConfig(t *testing.T) Config {
t.Helper()
return Config{
Addr: mustFreeAddr(t),
ReadHeaderTimeout: time.Second,
ReadTimeout: 2 * time.Second,
IdleTimeout: time.Minute,
}
}
func newTestHTTPClient(t *testing.T) *http.Client {
t.Helper()
transport := &http.Transport{DisableKeepAlives: true}
t.Cleanup(transport.CloseIdleConnections)
return &http.Client{
Timeout: 250 * time.Millisecond,
Transport: transport,
}
}
func waitForHealthzReady(t *testing.T, client *http.Client, addr string) {
t.Helper()
require.Eventually(t, func() bool {
request, err := http.NewRequest(http.MethodGet, "http://"+addr+HealthzPath, nil)
if err != nil {
return false
}
response, err := client.Do(request)
if err != nil {
return false
}
defer response.Body.Close()
payload, err := io.ReadAll(response.Body)
if err != nil {
return false
}
return response.StatusCode == http.StatusOK && strings.Contains(string(payload), `"status":"ok"`)
}, 5*time.Second, 25*time.Millisecond, "internal HTTP server did not become reachable")
}
func waitForServerRunResult(t *testing.T, runErr <-chan error) {
t.Helper()
var err error
require.Eventually(t, func() bool {
select {
case err = <-runErr:
return true
default:
return false
}
}, 5*time.Second, 10*time.Millisecond, "internal HTTP server did not stop")
require.NoError(t, err)
}
func mustFreeAddr(t *testing.T) string {
t.Helper()
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
defer func() {
assert.NoError(t, listener.Close())
}()
return listener.Addr().String()
}