package postgres import ( "context" "database/sql" "database/sql/driver" "errors" "fmt" "io/fs" "strings" "sync" "time" "github.com/pressly/goose/v3" "scrabble/backend/internal/postgres/migrations" ) // schemaName is the Postgres schema owned by the backend service. Every backend // table lives here, and the DSN pins search_path to it. const schemaName = "backend" // migrationRetryAttempts and migrationRetryBackoff bound the transient-error // retry around ApplyMigrations. A freshly started Postgres — notably a test // container — can reset a pooled connection moments after it reports ready, // which surfaces as "bad connection" mid-migration; a handful of quick retries // ride over that without masking real failures. const ( migrationRetryAttempts = 5 migrationRetryBackoff = 250 * time.Millisecond ) // gooseMu serialises access to goose's package-level filesystem state so a // second caller in the same process cannot race on goose.SetBaseFS. var gooseMu sync.Mutex // ApplyMigrations runs every pending Up migration embedded in the backend // binary against db. The schema is created upfront so goose's bookkeeping table // (`goose_db_version`, scoped to the DSN search_path) has somewhere to land // before the first migration runs; migration 00001_init.sql re-asserts the // schema with IF NOT EXISTS, so the double-create is idempotent. // // The apply is retried on transient connection errors. Both steps are // idempotent, so a retry after a dropped connection resumes from the last // committed migration. func ApplyMigrations(ctx context.Context, db *sql.DB) error { return retryOnTransient(ctx, migrationRetryAttempts, migrationRetryBackoff, func() error { if _, err := db.ExecContext(ctx, "CREATE SCHEMA IF NOT EXISTS "+schemaName); err != nil { return fmt.Errorf("ensure backend schema: %w", err) } if err := runMigrations(ctx, db, migrations.Migrations(), "."); err != nil { return fmt.Errorf("apply backend migrations: %w", err) } return nil }) } // runMigrations applies every pending Up migration found under dir inside fsys // against db. The PostgreSQL dialect is forced; goose's package-level base FS is // restored on the way out so a second caller in the same process is safe. dir // is "." when the migration files sit at the embed root. func runMigrations(ctx context.Context, db *sql.DB, fsys fs.FS, dir string) error { if db == nil { return errors.New("run migrations: nil db") } if fsys == nil { return errors.New("run migrations: nil fs") } gooseMu.Lock() defer gooseMu.Unlock() goose.SetBaseFS(fsys) defer goose.SetBaseFS(nil) if err := goose.SetDialect("postgres"); err != nil { return fmt.Errorf("run migrations: set dialect: %w", err) } if err := goose.UpContext(ctx, db, dir); err != nil { return fmt.Errorf("run migrations: %w", err) } return nil } // retryOnTransient runs op up to attempts times, retrying only when op fails // with a transient connection error — a dropped, reset, or refused connection, // as opposed to a deterministic SQL error. It waits backoff between attempts and // stops early if ctx is cancelled. func retryOnTransient(ctx context.Context, attempts int, backoff time.Duration, op func() error) error { var err error for attempt := 1; attempt <= attempts; attempt++ { if err = op(); err == nil { return nil } if attempt == attempts || !isTransientConnError(err) { return err } select { case <-ctx.Done(): return errors.Join(err, ctx.Err()) case <-time.After(backoff): } } return err } // isTransientConnError reports whether err is a transient connection-level // failure worth retrying, leaving deterministic SQL errors (syntax, constraint // violations) to fail fast. func isTransientConnError(err error) bool { if err == nil { return false } if errors.Is(err, driver.ErrBadConn) { return true } msg := strings.ToLower(err.Error()) for _, s := range []string{ "bad connection", "connection refused", "connection reset", "broken pipe", "server closed the connection", } { if strings.Contains(msg, s) { return true } } return false }