feat: edge gateway service
This commit is contained in:
@@ -0,0 +1,178 @@
|
||||
// Package app wires the gateway process lifecycle and coordinates component
|
||||
// startup and graceful shutdown.
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"galaxy/gateway/internal/config"
|
||||
)
|
||||
|
||||
// Component is a long-lived gateway subsystem that participates in coordinated
|
||||
// startup and graceful shutdown.
|
||||
type Component interface {
|
||||
// Run starts the component and blocks until it stops.
|
||||
Run(context.Context) error
|
||||
|
||||
// Shutdown stops the component within the provided timeout-bounded context.
|
||||
Shutdown(context.Context) error
|
||||
}
|
||||
|
||||
// App owns the process-level lifecycle of the gateway and its registered
|
||||
// components.
|
||||
type App struct {
|
||||
cfg config.Config
|
||||
components []Component
|
||||
}
|
||||
|
||||
// New constructs an App with a defensive copy of the supplied components.
|
||||
func New(cfg config.Config, components ...Component) *App {
|
||||
clonedComponents := append([]Component(nil), components...)
|
||||
|
||||
return &App{
|
||||
cfg: cfg,
|
||||
components: clonedComponents,
|
||||
}
|
||||
}
|
||||
|
||||
// Run starts all configured components, waits for cancellation or the first
|
||||
// component failure, and then executes best-effort graceful shutdown for every
|
||||
// component.
|
||||
func (a *App) Run(ctx context.Context) error {
|
||||
if ctx == nil {
|
||||
return errors.New("run gateway app: nil context")
|
||||
}
|
||||
if err := a.validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(a.components) == 0 {
|
||||
<-ctx.Done()
|
||||
return nil
|
||||
}
|
||||
|
||||
runCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
results := make(chan componentResult, len(a.components))
|
||||
var runWG sync.WaitGroup
|
||||
|
||||
for idx, component := range a.components {
|
||||
runWG.Add(1)
|
||||
|
||||
go func(index int, component Component) {
|
||||
defer runWG.Done()
|
||||
results <- componentResult{
|
||||
index: index,
|
||||
err: component.Run(runCtx),
|
||||
}
|
||||
}(idx, component)
|
||||
}
|
||||
|
||||
var runErr error
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case result := <-results:
|
||||
runErr = classifyComponentResult(ctx, result)
|
||||
}
|
||||
|
||||
cancel()
|
||||
|
||||
shutdownErr := a.shutdownComponents()
|
||||
waitErr := a.waitForComponents(&runWG)
|
||||
|
||||
return errors.Join(runErr, shutdownErr, waitErr)
|
||||
}
|
||||
|
||||
// componentResult captures the first observed exit from a running component.
|
||||
type componentResult struct {
|
||||
index int
|
||||
err error
|
||||
}
|
||||
|
||||
// validate confirms that the App has a safe shutdown budget and no nil
|
||||
// components before goroutines are started.
|
||||
func (a *App) validate() error {
|
||||
if a.cfg.ShutdownTimeout <= 0 {
|
||||
return fmt.Errorf("run gateway app: shutdown timeout must be positive, got %s", a.cfg.ShutdownTimeout)
|
||||
}
|
||||
|
||||
for idx, component := range a.components {
|
||||
if component == nil {
|
||||
return fmt.Errorf("run gateway app: component %d is nil", idx)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// classifyComponentResult maps the first component exit into the error that
|
||||
// should control the application result.
|
||||
func classifyComponentResult(parentCtx context.Context, result componentResult) error {
|
||||
switch {
|
||||
case result.err == nil:
|
||||
if parentCtx.Err() != nil {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("run gateway app: component %d exited without error before shutdown", result.index)
|
||||
case errors.Is(result.err, context.Canceled) && parentCtx.Err() != nil:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("run gateway app: component %d: %w", result.index, result.err)
|
||||
}
|
||||
}
|
||||
|
||||
// shutdownComponents calls Shutdown on every registered component using a fresh
|
||||
// timeout-bounded context per component and joins any shutdown failures.
|
||||
func (a *App) shutdownComponents() error {
|
||||
var shutdownWG sync.WaitGroup
|
||||
errs := make(chan error, len(a.components))
|
||||
|
||||
for idx, component := range a.components {
|
||||
shutdownWG.Add(1)
|
||||
|
||||
go func(index int, component Component) {
|
||||
defer shutdownWG.Done()
|
||||
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), a.cfg.ShutdownTimeout)
|
||||
defer cancel()
|
||||
|
||||
if err := component.Shutdown(shutdownCtx); err != nil {
|
||||
errs <- fmt.Errorf("shutdown gateway component %d: %w", index, err)
|
||||
}
|
||||
}(idx, component)
|
||||
}
|
||||
|
||||
shutdownWG.Wait()
|
||||
close(errs)
|
||||
|
||||
var joined error
|
||||
for err := range errs {
|
||||
joined = errors.Join(joined, err)
|
||||
}
|
||||
|
||||
return joined
|
||||
}
|
||||
|
||||
// waitForComponents waits for running components to return after shutdown and
|
||||
// reports when they outlive the configured shutdown budget.
|
||||
func (a *App) waitForComponents(runWG *sync.WaitGroup) error {
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
runWG.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
waitCtx, cancel := context.WithTimeout(context.Background(), a.cfg.ShutdownTimeout)
|
||||
defer cancel()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
return nil
|
||||
case <-waitCtx.Done():
|
||||
return fmt.Errorf("wait for gateway components: %w", waitCtx.Err())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,268 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"galaxy/gateway/internal/config"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestAppRunWaitsForCancellationWithoutComponents(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
application := New(config.Config{ShutdownTimeout: 50 * time.Millisecond})
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- application.Run(ctx)
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-resultCh:
|
||||
require.FailNowf(t, "Run() returned early", "error=%v", err)
|
||||
case <-time.After(50 * time.Millisecond):
|
||||
}
|
||||
|
||||
cancel()
|
||||
|
||||
select {
|
||||
case err := <-resultCh:
|
||||
require.NoError(t, err)
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "Run() did not return after cancellation")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAppRunCancelsComponentsAndCallsShutdownOnce(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
first := newLifecycleComponent()
|
||||
second := newLifecycleComponent()
|
||||
|
||||
application := New(
|
||||
config.Config{ShutdownTimeout: time.Second},
|
||||
first,
|
||||
second,
|
||||
)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- application.Run(ctx)
|
||||
}()
|
||||
|
||||
first.waitStarted(t)
|
||||
second.waitStarted(t)
|
||||
|
||||
cancel()
|
||||
|
||||
select {
|
||||
case err := <-resultCh:
|
||||
require.NoError(t, err)
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "Run() did not return after cancellation")
|
||||
}
|
||||
|
||||
first.waitRunExited(t)
|
||||
second.waitRunExited(t)
|
||||
|
||||
assert.Equal(t, 1, first.shutdownCalls())
|
||||
assert.Equal(t, 1, second.shutdownCalls())
|
||||
}
|
||||
|
||||
func TestAppRunReturnsComponentErrorAndStillShutsDown(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
runErr := errors.New("boom")
|
||||
failing := newFailingComponent(runErr)
|
||||
blocking := newLifecycleComponent()
|
||||
|
||||
application := New(
|
||||
config.Config{ShutdownTimeout: time.Second},
|
||||
failing,
|
||||
blocking,
|
||||
)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
resultCh := make(chan error, 1)
|
||||
go func() {
|
||||
resultCh <- application.Run(ctx)
|
||||
}()
|
||||
|
||||
failing.waitStarted(t)
|
||||
blocking.waitStarted(t)
|
||||
failing.releaseRun()
|
||||
|
||||
select {
|
||||
case err := <-resultCh:
|
||||
require.Error(t, err)
|
||||
assert.ErrorIs(t, err, runErr)
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "Run() did not return after component failure")
|
||||
}
|
||||
|
||||
failing.waitRunExited(t)
|
||||
blocking.waitRunExited(t)
|
||||
|
||||
assert.Equal(t, 1, failing.shutdownCalls())
|
||||
assert.Equal(t, 1, blocking.shutdownCalls())
|
||||
}
|
||||
|
||||
// lifecycleComponent blocks in Run until the application calls Shutdown.
|
||||
type lifecycleComponent struct {
|
||||
startedCh chan struct{}
|
||||
runDoneCh chan struct{}
|
||||
stopCh chan struct{}
|
||||
shutdownMu sync.Mutex
|
||||
shutdownCnt int
|
||||
}
|
||||
|
||||
// newLifecycleComponent builds a component that exits Run only after Shutdown
|
||||
// signals its stop channel.
|
||||
func newLifecycleComponent() *lifecycleComponent {
|
||||
return &lifecycleComponent{
|
||||
startedCh: make(chan struct{}),
|
||||
runDoneCh: make(chan struct{}),
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Run marks the component as started, waits for cancellation, and then blocks
|
||||
// until Shutdown releases the stop channel.
|
||||
func (c *lifecycleComponent) Run(ctx context.Context) error {
|
||||
close(c.startedCh)
|
||||
defer close(c.runDoneCh)
|
||||
|
||||
<-ctx.Done()
|
||||
<-c.stopCh
|
||||
return nil
|
||||
}
|
||||
|
||||
// Shutdown records the call and releases the run loop.
|
||||
func (c *lifecycleComponent) Shutdown(context.Context) error {
|
||||
c.shutdownMu.Lock()
|
||||
defer c.shutdownMu.Unlock()
|
||||
|
||||
c.shutdownCnt++
|
||||
if c.shutdownCnt == 1 {
|
||||
close(c.stopCh)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// waitStarted blocks until Run has started or fails the test on timeout.
|
||||
func (c *lifecycleComponent) waitStarted(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
select {
|
||||
case <-c.startedCh:
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "component did not start")
|
||||
}
|
||||
}
|
||||
|
||||
// waitRunExited blocks until Run exits or fails the test on timeout.
|
||||
func (c *lifecycleComponent) waitRunExited(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
select {
|
||||
case <-c.runDoneCh:
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "component run did not exit")
|
||||
}
|
||||
}
|
||||
|
||||
// shutdownCalls returns the number of observed Shutdown invocations.
|
||||
func (c *lifecycleComponent) shutdownCalls() int {
|
||||
c.shutdownMu.Lock()
|
||||
defer c.shutdownMu.Unlock()
|
||||
|
||||
return c.shutdownCnt
|
||||
}
|
||||
|
||||
// failingComponent returns a predefined error once released by the test and
|
||||
// still tracks shutdown calls.
|
||||
type failingComponent struct {
|
||||
startedCh chan struct{}
|
||||
releaseCh chan struct{}
|
||||
runDoneCh chan struct{}
|
||||
shutdownMu sync.Mutex
|
||||
shutdownCnt int
|
||||
err error
|
||||
}
|
||||
|
||||
// newFailingComponent builds a component whose Run returns err after release.
|
||||
func newFailingComponent(err error) *failingComponent {
|
||||
return &failingComponent{
|
||||
startedCh: make(chan struct{}),
|
||||
releaseCh: make(chan struct{}),
|
||||
runDoneCh: make(chan struct{}),
|
||||
err: err,
|
||||
}
|
||||
}
|
||||
|
||||
// Run waits until the test releases it and then returns the configured error.
|
||||
func (c *failingComponent) Run(context.Context) error {
|
||||
close(c.startedCh)
|
||||
defer close(c.runDoneCh)
|
||||
|
||||
<-c.releaseCh
|
||||
return c.err
|
||||
}
|
||||
|
||||
// Shutdown records that the application attempted graceful shutdown.
|
||||
func (c *failingComponent) Shutdown(context.Context) error {
|
||||
c.shutdownMu.Lock()
|
||||
defer c.shutdownMu.Unlock()
|
||||
|
||||
c.shutdownCnt++
|
||||
return nil
|
||||
}
|
||||
|
||||
// waitStarted blocks until Run has started or fails the test on timeout.
|
||||
func (c *failingComponent) waitStarted(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
select {
|
||||
case <-c.startedCh:
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "failing component did not start")
|
||||
}
|
||||
}
|
||||
|
||||
// releaseRun allows Run to return its configured error.
|
||||
func (c *failingComponent) releaseRun() {
|
||||
close(c.releaseCh)
|
||||
}
|
||||
|
||||
// waitRunExited blocks until Run exits or fails the test on timeout.
|
||||
func (c *failingComponent) waitRunExited(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
select {
|
||||
case <-c.runDoneCh:
|
||||
case <-time.After(time.Second):
|
||||
require.FailNow(t, "failing component run did not exit")
|
||||
}
|
||||
}
|
||||
|
||||
// shutdownCalls returns the number of observed Shutdown invocations.
|
||||
func (c *failingComponent) shutdownCalls() int {
|
||||
c.shutdownMu.Lock()
|
||||
defer c.shutdownMu.Unlock()
|
||||
|
||||
return c.shutdownCnt
|
||||
}
|
||||
Reference in New Issue
Block a user