diplomail (Stage E): LibreTranslate client + async translation worker
Synchronous translation on read (Stage D) blocks the HTTP handler on translator I/O. Stage E switches to "send moments-fast, deliver when translated": recipients whose preferred_language differs from the detected body_lang are inserted with available_at=NULL, and an async worker turns them on once a LibreTranslate call materialises the cache row (or fails terminally after 5 retries). Schema delta on diplomail_recipients: available_at, translation_attempts, next_translation_attempt_at, plus a snapshot recipient_preferred_language so the worker queries do not need a join. Read paths (ListInbox, GetMessage, UnreadCount) filter on available_at IS NOT NULL. Push fan-out is moved from Service to the worker so the recipient only sees the toast when the inbox row is actually visible. Translator backend is now a configurable choice: empty BACKEND_DIPLOMAIL_TRANSLATOR_URL → noop (deliver original); populated → LibreTranslate HTTP client. Per-attempt timeout, max attempts, and worker interval all live in DiplomailConfig. The HTTP client itself is unit-tested via httptest (happy path, BCP47 normalisation, unsupported pair, 5xx, identical src/dst, missing URL); worker delivery + fallback paths are covered by the testcontainers-backed e2e tests in diplomail_e2e_test.go. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,154 @@
|
||||
package translator
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// LibreTranslateEngine is the engine identifier persisted in
|
||||
// `diplomail_translations.translator` for cache rows produced by the
|
||||
// LibreTranslate client.
|
||||
const LibreTranslateEngine = "libretranslate"
|
||||
|
||||
// LibreTranslateConfig configures the HTTP client. URL is the base
|
||||
// of the deployed instance (without `/translate`). Timeout bounds a
|
||||
// single HTTP request; the worker layers retry / backoff on top.
|
||||
type LibreTranslateConfig struct {
|
||||
URL string
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// ErrUnsupportedLanguagePair classifies a LibreTranslate 400 response
|
||||
// that indicates the engine cannot translate between the requested
|
||||
// source / target codes. The worker treats this as terminal: no
|
||||
// further retries, deliver the original.
|
||||
var ErrUnsupportedLanguagePair = errors.New("translator: language pair not supported by libretranslate")
|
||||
|
||||
// NewLibreTranslate constructs a Translator that posts to
|
||||
// `<URL>/translate`. Returns an error when URL is empty so wiring
|
||||
// catches "translator misconfigured" at startup rather than at
|
||||
// first-translation-attempt.
|
||||
func NewLibreTranslate(cfg LibreTranslateConfig) (Translator, error) {
|
||||
url := strings.TrimRight(strings.TrimSpace(cfg.URL), "/")
|
||||
if url == "" {
|
||||
return nil, errors.New("translator: libretranslate URL must be set")
|
||||
}
|
||||
timeout := cfg.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = 10 * time.Second
|
||||
}
|
||||
return &libreTranslate{
|
||||
endpoint: url + "/translate",
|
||||
client: &http.Client{Timeout: timeout},
|
||||
}, nil
|
||||
}
|
||||
|
||||
type libreTranslate struct {
|
||||
endpoint string
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// requestBody is the LibreTranslate POST /translate input shape.
|
||||
// `q` is sent as a two-element array so the engine returns one
|
||||
// translation per element in the same call (subject + body).
|
||||
type requestBody struct {
|
||||
Q []string `json:"q"`
|
||||
Source string `json:"source"`
|
||||
Target string `json:"target"`
|
||||
Format string `json:"format"`
|
||||
}
|
||||
|
||||
// responseBody is the LibreTranslate output shape when `q` is an
|
||||
// array. The single-string-q variant is a different shape; we never
|
||||
// emit a single-q request so the client always sees the array form.
|
||||
type responseBody struct {
|
||||
TranslatedText []string `json:"translatedText"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// Translate posts subject + body to LibreTranslate, normalising the
|
||||
// language codes and classifying the response. The 400 / unsupported-
|
||||
// pair path is signalled by `ErrUnsupportedLanguagePair`. All other
|
||||
// HTTP errors (timeout, 5xx, network failure) come back as wrapped
|
||||
// errors so the worker can backoff and retry.
|
||||
func (l *libreTranslate) Translate(ctx context.Context, srcLang, dstLang, subject, body string) (Result, error) {
|
||||
src := normaliseLanguageCode(srcLang)
|
||||
dst := normaliseLanguageCode(dstLang)
|
||||
if src == "" || dst == "" {
|
||||
return Result{}, fmt.Errorf("translator: missing source or target language (src=%q dst=%q)", srcLang, dstLang)
|
||||
}
|
||||
if src == dst {
|
||||
return Result{Subject: subject, Body: body, Engine: NoopEngine}, nil
|
||||
}
|
||||
|
||||
reqBody, err := json.Marshal(requestBody{
|
||||
Q: []string{subject, body},
|
||||
Source: src,
|
||||
Target: dst,
|
||||
Format: "text",
|
||||
})
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("translator: marshal request: %w", err)
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, l.endpoint, bytes.NewReader(reqBody))
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("translator: build request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Accept", "application/json")
|
||||
|
||||
resp, err := l.client.Do(req)
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("translator: do request: %w", err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
raw, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("translator: read response: %w", err)
|
||||
}
|
||||
if resp.StatusCode == http.StatusBadRequest {
|
||||
return Result{}, fmt.Errorf("%w: %s", ErrUnsupportedLanguagePair, strings.TrimSpace(string(raw)))
|
||||
}
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return Result{}, fmt.Errorf("translator: libretranslate http %d: %s", resp.StatusCode, strings.TrimSpace(string(raw)))
|
||||
}
|
||||
|
||||
var out responseBody
|
||||
if err := json.Unmarshal(raw, &out); err != nil {
|
||||
return Result{}, fmt.Errorf("translator: unmarshal response: %w", err)
|
||||
}
|
||||
if out.Error != "" {
|
||||
return Result{}, fmt.Errorf("translator: libretranslate error: %s", out.Error)
|
||||
}
|
||||
if len(out.TranslatedText) != 2 {
|
||||
return Result{}, fmt.Errorf("translator: libretranslate returned %d strings, want 2", len(out.TranslatedText))
|
||||
}
|
||||
return Result{
|
||||
Subject: out.TranslatedText[0],
|
||||
Body: out.TranslatedText[1],
|
||||
Engine: LibreTranslateEngine,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// normaliseLanguageCode collapses a BCP 47 tag to the ISO 639-1 base
|
||||
// that LibreTranslate expects (`en-US` → `en`, `EN` → `en`). The
|
||||
// helper is mirrored on the diplomail service side; both sides need
|
||||
// to use the same normalisation so cache keys line up.
|
||||
func normaliseLanguageCode(tag string) string {
|
||||
tag = strings.TrimSpace(tag)
|
||||
if tag == "" {
|
||||
return ""
|
||||
}
|
||||
if i := strings.IndexAny(tag, "-_"); i > 0 {
|
||||
tag = tag[:i]
|
||||
}
|
||||
return strings.ToLower(tag)
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
package translator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestLibreTranslateHappyPath(t *testing.T) {
|
||||
t.Parallel()
|
||||
var (
|
||||
requestSource string
|
||||
requestTarget string
|
||||
requestQ []string
|
||||
requestFormat string
|
||||
)
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
body, _ := io.ReadAll(r.Body)
|
||||
var in requestBody
|
||||
if err := json.Unmarshal(body, &in); err != nil {
|
||||
t.Errorf("unmarshal: %v", err)
|
||||
}
|
||||
requestSource = in.Source
|
||||
requestTarget = in.Target
|
||||
requestQ = in.Q
|
||||
requestFormat = in.Format
|
||||
_ = json.NewEncoder(w).Encode(responseBody{
|
||||
TranslatedText: []string{"[ru] " + in.Q[0], "[ru] " + in.Q[1]},
|
||||
})
|
||||
}))
|
||||
t.Cleanup(server.Close)
|
||||
|
||||
tr, err := NewLibreTranslate(LibreTranslateConfig{URL: server.URL, Timeout: 2 * time.Second})
|
||||
if err != nil {
|
||||
t.Fatalf("new: %v", err)
|
||||
}
|
||||
res, err := tr.Translate(context.Background(), "en", "ru", "Hello", "World")
|
||||
if err != nil {
|
||||
t.Fatalf("translate: %v", err)
|
||||
}
|
||||
if res.Engine != LibreTranslateEngine {
|
||||
t.Fatalf("engine = %q, want %q", res.Engine, LibreTranslateEngine)
|
||||
}
|
||||
if res.Subject != "[ru] Hello" || res.Body != "[ru] World" {
|
||||
t.Fatalf("result = %+v", res)
|
||||
}
|
||||
if requestSource != "en" || requestTarget != "ru" || requestFormat != "text" {
|
||||
t.Fatalf("request fields: src=%q dst=%q fmt=%q", requestSource, requestTarget, requestFormat)
|
||||
}
|
||||
if len(requestQ) != 2 || requestQ[0] != "Hello" || requestQ[1] != "World" {
|
||||
t.Fatalf("request q = %v", requestQ)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLibreTranslateNormalisesLanguageCodes(t *testing.T) {
|
||||
t.Parallel()
|
||||
var src, dst string
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
body, _ := io.ReadAll(r.Body)
|
||||
var in requestBody
|
||||
_ = json.Unmarshal(body, &in)
|
||||
src, dst = in.Source, in.Target
|
||||
_ = json.NewEncoder(w).Encode(responseBody{TranslatedText: []string{"a", "b"}})
|
||||
}))
|
||||
t.Cleanup(server.Close)
|
||||
|
||||
tr, _ := NewLibreTranslate(LibreTranslateConfig{URL: server.URL})
|
||||
if _, err := tr.Translate(context.Background(), "EN-US", "ru-RU", "x", "y"); err != nil {
|
||||
t.Fatalf("translate: %v", err)
|
||||
}
|
||||
if src != "en" || dst != "ru" {
|
||||
t.Fatalf("normalised codes src=%q dst=%q, want en/ru", src, dst)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLibreTranslateUnsupportedPair(t *testing.T) {
|
||||
t.Parallel()
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
_, _ = w.Write([]byte(`{"error":"language not supported"}`))
|
||||
}))
|
||||
t.Cleanup(server.Close)
|
||||
|
||||
tr, _ := NewLibreTranslate(LibreTranslateConfig{URL: server.URL})
|
||||
_, err := tr.Translate(context.Background(), "en", "xx", "subject", "body")
|
||||
if !errors.Is(err, ErrUnsupportedLanguagePair) {
|
||||
t.Fatalf("err = %v, want ErrUnsupportedLanguagePair", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLibreTranslateServerError(t *testing.T) {
|
||||
t.Parallel()
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
_, _ = w.Write([]byte("kaboom"))
|
||||
}))
|
||||
t.Cleanup(server.Close)
|
||||
|
||||
tr, _ := NewLibreTranslate(LibreTranslateConfig{URL: server.URL})
|
||||
_, err := tr.Translate(context.Background(), "en", "ru", "subject", "body")
|
||||
if err == nil {
|
||||
t.Fatalf("expected error, got nil")
|
||||
}
|
||||
if errors.Is(err, ErrUnsupportedLanguagePair) {
|
||||
t.Fatalf("err mis-classified as unsupported pair: %v", err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "500") {
|
||||
t.Fatalf("err = %v, want mention of 500", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLibreTranslateSameSourceAndTargetIsNoop(t *testing.T) {
|
||||
t.Parallel()
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
t.Errorf("translator should not call the server for identical src/dst: %s", r.URL.Path)
|
||||
}))
|
||||
t.Cleanup(server.Close)
|
||||
|
||||
tr, _ := NewLibreTranslate(LibreTranslateConfig{URL: server.URL})
|
||||
res, err := tr.Translate(context.Background(), "en", "EN", "x", "y")
|
||||
if err != nil {
|
||||
t.Fatalf("translate: %v", err)
|
||||
}
|
||||
if res.Engine != NoopEngine {
|
||||
t.Fatalf("engine = %q, want %q", res.Engine, NoopEngine)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLibreTranslateRequiresURL(t *testing.T) {
|
||||
t.Parallel()
|
||||
_, err := NewLibreTranslate(LibreTranslateConfig{URL: ""})
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for empty URL")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user