feat: edge gateway service

This commit is contained in:
Ilia Denisov
2026-04-02 19:18:42 +02:00
committed by GitHub
parent 8cde99936c
commit 436c97a38b
95 changed files with 20504 additions and 57 deletions
+178
View File
@@ -0,0 +1,178 @@
// Package app wires the gateway process lifecycle and coordinates component
// startup and graceful shutdown.
package app
import (
"context"
"errors"
"fmt"
"sync"
"galaxy/gateway/internal/config"
)
// Component is a long-lived gateway subsystem that participates in coordinated
// startup and graceful shutdown.
type Component interface {
// Run starts the component and blocks until it stops.
Run(context.Context) error
// Shutdown stops the component within the provided timeout-bounded context.
Shutdown(context.Context) error
}
// App owns the process-level lifecycle of the gateway and its registered
// components.
type App struct {
cfg config.Config
components []Component
}
// New constructs an App with a defensive copy of the supplied components.
func New(cfg config.Config, components ...Component) *App {
clonedComponents := append([]Component(nil), components...)
return &App{
cfg: cfg,
components: clonedComponents,
}
}
// Run starts all configured components, waits for cancellation or the first
// component failure, and then executes best-effort graceful shutdown for every
// component.
func (a *App) Run(ctx context.Context) error {
if ctx == nil {
return errors.New("run gateway app: nil context")
}
if err := a.validate(); err != nil {
return err
}
if len(a.components) == 0 {
<-ctx.Done()
return nil
}
runCtx, cancel := context.WithCancel(ctx)
defer cancel()
results := make(chan componentResult, len(a.components))
var runWG sync.WaitGroup
for idx, component := range a.components {
runWG.Add(1)
go func(index int, component Component) {
defer runWG.Done()
results <- componentResult{
index: index,
err: component.Run(runCtx),
}
}(idx, component)
}
var runErr error
select {
case <-ctx.Done():
case result := <-results:
runErr = classifyComponentResult(ctx, result)
}
cancel()
shutdownErr := a.shutdownComponents()
waitErr := a.waitForComponents(&runWG)
return errors.Join(runErr, shutdownErr, waitErr)
}
// componentResult captures the first observed exit from a running component.
type componentResult struct {
index int
err error
}
// validate confirms that the App has a safe shutdown budget and no nil
// components before goroutines are started.
func (a *App) validate() error {
if a.cfg.ShutdownTimeout <= 0 {
return fmt.Errorf("run gateway app: shutdown timeout must be positive, got %s", a.cfg.ShutdownTimeout)
}
for idx, component := range a.components {
if component == nil {
return fmt.Errorf("run gateway app: component %d is nil", idx)
}
}
return nil
}
// classifyComponentResult maps the first component exit into the error that
// should control the application result.
func classifyComponentResult(parentCtx context.Context, result componentResult) error {
switch {
case result.err == nil:
if parentCtx.Err() != nil {
return nil
}
return fmt.Errorf("run gateway app: component %d exited without error before shutdown", result.index)
case errors.Is(result.err, context.Canceled) && parentCtx.Err() != nil:
return nil
default:
return fmt.Errorf("run gateway app: component %d: %w", result.index, result.err)
}
}
// shutdownComponents calls Shutdown on every registered component using a fresh
// timeout-bounded context per component and joins any shutdown failures.
func (a *App) shutdownComponents() error {
var shutdownWG sync.WaitGroup
errs := make(chan error, len(a.components))
for idx, component := range a.components {
shutdownWG.Add(1)
go func(index int, component Component) {
defer shutdownWG.Done()
shutdownCtx, cancel := context.WithTimeout(context.Background(), a.cfg.ShutdownTimeout)
defer cancel()
if err := component.Shutdown(shutdownCtx); err != nil {
errs <- fmt.Errorf("shutdown gateway component %d: %w", index, err)
}
}(idx, component)
}
shutdownWG.Wait()
close(errs)
var joined error
for err := range errs {
joined = errors.Join(joined, err)
}
return joined
}
// waitForComponents waits for running components to return after shutdown and
// reports when they outlive the configured shutdown budget.
func (a *App) waitForComponents(runWG *sync.WaitGroup) error {
done := make(chan struct{})
go func() {
runWG.Wait()
close(done)
}()
waitCtx, cancel := context.WithTimeout(context.Background(), a.cfg.ShutdownTimeout)
defer cancel()
select {
case <-done:
return nil
case <-waitCtx.Done():
return fmt.Errorf("wait for gateway components: %w", waitCtx.Err())
}
}
+268
View File
@@ -0,0 +1,268 @@
package app
import (
"context"
"errors"
"sync"
"testing"
"time"
"galaxy/gateway/internal/config"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestAppRunWaitsForCancellationWithoutComponents(t *testing.T) {
t.Parallel()
application := New(config.Config{ShutdownTimeout: 50 * time.Millisecond})
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
resultCh := make(chan error, 1)
go func() {
resultCh <- application.Run(ctx)
}()
select {
case err := <-resultCh:
require.FailNowf(t, "Run() returned early", "error=%v", err)
case <-time.After(50 * time.Millisecond):
}
cancel()
select {
case err := <-resultCh:
require.NoError(t, err)
case <-time.After(time.Second):
require.FailNow(t, "Run() did not return after cancellation")
}
}
func TestAppRunCancelsComponentsAndCallsShutdownOnce(t *testing.T) {
t.Parallel()
first := newLifecycleComponent()
second := newLifecycleComponent()
application := New(
config.Config{ShutdownTimeout: time.Second},
first,
second,
)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
resultCh := make(chan error, 1)
go func() {
resultCh <- application.Run(ctx)
}()
first.waitStarted(t)
second.waitStarted(t)
cancel()
select {
case err := <-resultCh:
require.NoError(t, err)
case <-time.After(time.Second):
require.FailNow(t, "Run() did not return after cancellation")
}
first.waitRunExited(t)
second.waitRunExited(t)
assert.Equal(t, 1, first.shutdownCalls())
assert.Equal(t, 1, second.shutdownCalls())
}
func TestAppRunReturnsComponentErrorAndStillShutsDown(t *testing.T) {
t.Parallel()
runErr := errors.New("boom")
failing := newFailingComponent(runErr)
blocking := newLifecycleComponent()
application := New(
config.Config{ShutdownTimeout: time.Second},
failing,
blocking,
)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
resultCh := make(chan error, 1)
go func() {
resultCh <- application.Run(ctx)
}()
failing.waitStarted(t)
blocking.waitStarted(t)
failing.releaseRun()
select {
case err := <-resultCh:
require.Error(t, err)
assert.ErrorIs(t, err, runErr)
case <-time.After(time.Second):
require.FailNow(t, "Run() did not return after component failure")
}
failing.waitRunExited(t)
blocking.waitRunExited(t)
assert.Equal(t, 1, failing.shutdownCalls())
assert.Equal(t, 1, blocking.shutdownCalls())
}
// lifecycleComponent blocks in Run until the application calls Shutdown.
type lifecycleComponent struct {
startedCh chan struct{}
runDoneCh chan struct{}
stopCh chan struct{}
shutdownMu sync.Mutex
shutdownCnt int
}
// newLifecycleComponent builds a component that exits Run only after Shutdown
// signals its stop channel.
func newLifecycleComponent() *lifecycleComponent {
return &lifecycleComponent{
startedCh: make(chan struct{}),
runDoneCh: make(chan struct{}),
stopCh: make(chan struct{}),
}
}
// Run marks the component as started, waits for cancellation, and then blocks
// until Shutdown releases the stop channel.
func (c *lifecycleComponent) Run(ctx context.Context) error {
close(c.startedCh)
defer close(c.runDoneCh)
<-ctx.Done()
<-c.stopCh
return nil
}
// Shutdown records the call and releases the run loop.
func (c *lifecycleComponent) Shutdown(context.Context) error {
c.shutdownMu.Lock()
defer c.shutdownMu.Unlock()
c.shutdownCnt++
if c.shutdownCnt == 1 {
close(c.stopCh)
}
return nil
}
// waitStarted blocks until Run has started or fails the test on timeout.
func (c *lifecycleComponent) waitStarted(t *testing.T) {
t.Helper()
select {
case <-c.startedCh:
case <-time.After(time.Second):
require.FailNow(t, "component did not start")
}
}
// waitRunExited blocks until Run exits or fails the test on timeout.
func (c *lifecycleComponent) waitRunExited(t *testing.T) {
t.Helper()
select {
case <-c.runDoneCh:
case <-time.After(time.Second):
require.FailNow(t, "component run did not exit")
}
}
// shutdownCalls returns the number of observed Shutdown invocations.
func (c *lifecycleComponent) shutdownCalls() int {
c.shutdownMu.Lock()
defer c.shutdownMu.Unlock()
return c.shutdownCnt
}
// failingComponent returns a predefined error once released by the test and
// still tracks shutdown calls.
type failingComponent struct {
startedCh chan struct{}
releaseCh chan struct{}
runDoneCh chan struct{}
shutdownMu sync.Mutex
shutdownCnt int
err error
}
// newFailingComponent builds a component whose Run returns err after release.
func newFailingComponent(err error) *failingComponent {
return &failingComponent{
startedCh: make(chan struct{}),
releaseCh: make(chan struct{}),
runDoneCh: make(chan struct{}),
err: err,
}
}
// Run waits until the test releases it and then returns the configured error.
func (c *failingComponent) Run(context.Context) error {
close(c.startedCh)
defer close(c.runDoneCh)
<-c.releaseCh
return c.err
}
// Shutdown records that the application attempted graceful shutdown.
func (c *failingComponent) Shutdown(context.Context) error {
c.shutdownMu.Lock()
defer c.shutdownMu.Unlock()
c.shutdownCnt++
return nil
}
// waitStarted blocks until Run has started or fails the test on timeout.
func (c *failingComponent) waitStarted(t *testing.T) {
t.Helper()
select {
case <-c.startedCh:
case <-time.After(time.Second):
require.FailNow(t, "failing component did not start")
}
}
// releaseRun allows Run to return its configured error.
func (c *failingComponent) releaseRun() {
close(c.releaseCh)
}
// waitRunExited blocks until Run exits or fails the test on timeout.
func (c *failingComponent) waitRunExited(t *testing.T) {
t.Helper()
select {
case <-c.runDoneCh:
case <-time.After(time.Second):
require.FailNow(t, "failing component run did not exit")
}
}
// shutdownCalls returns the number of observed Shutdown invocations.
func (c *failingComponent) shutdownCalls() int {
c.shutdownMu.Lock()
defer c.shutdownMu.Unlock()
return c.shutdownCnt
}