diplomail (Stage E): LibreTranslate client + async translation worker
Tests · Go / test (push) Successful in 1m59s
Tests · Go / test (pull_request) Successful in 2m1s
Tests · Integration / integration (pull_request) Successful in 1m37s

Synchronous translation on read (Stage D) blocks the HTTP handler on
translator I/O. Stage E switches to "send moments-fast, deliver
when translated": recipients whose preferred_language differs from
the detected body_lang are inserted with available_at=NULL, and an
async worker turns them on once a LibreTranslate call materialises
the cache row (or fails terminally after 5 retries).

Schema delta on diplomail_recipients: available_at,
translation_attempts, next_translation_attempt_at, plus a snapshot
recipient_preferred_language so the worker queries do not need a
join. Read paths (ListInbox, GetMessage, UnreadCount) filter on
available_at IS NOT NULL. Push fan-out is moved from Service to the
worker so the recipient only sees the toast when the inbox row is
actually visible.

Translator backend is now a configurable choice: empty
BACKEND_DIPLOMAIL_TRANSLATOR_URL → noop (deliver original);
populated → LibreTranslate HTTP client. Per-attempt timeout, max
attempts, and worker interval all live in DiplomailConfig. The HTTP
client itself is unit-tested via httptest (happy path, BCP47
normalisation, unsupported pair, 5xx, identical src/dst, missing
URL); worker delivery + fallback paths are covered by the
testcontainers-backed e2e tests in diplomail_e2e_test.go.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ilia Denisov
2026-05-15 20:15:28 +02:00
parent e22f4b7800
commit 9f7c9099bc
16 changed files with 1222 additions and 155 deletions
@@ -0,0 +1,154 @@
package translator
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// LibreTranslateEngine is the engine identifier persisted in
// `diplomail_translations.translator` for cache rows produced by the
// LibreTranslate client.
const LibreTranslateEngine = "libretranslate"
// LibreTranslateConfig configures the HTTP client. URL is the base
// of the deployed instance (without `/translate`). Timeout bounds a
// single HTTP request; the worker layers retry / backoff on top.
type LibreTranslateConfig struct {
URL string
Timeout time.Duration
}
// ErrUnsupportedLanguagePair classifies a LibreTranslate 400 response
// that indicates the engine cannot translate between the requested
// source / target codes. The worker treats this as terminal: no
// further retries, deliver the original.
var ErrUnsupportedLanguagePair = errors.New("translator: language pair not supported by libretranslate")
// NewLibreTranslate constructs a Translator that posts to
// `<URL>/translate`. Returns an error when URL is empty so wiring
// catches "translator misconfigured" at startup rather than at
// first-translation-attempt.
func NewLibreTranslate(cfg LibreTranslateConfig) (Translator, error) {
url := strings.TrimRight(strings.TrimSpace(cfg.URL), "/")
if url == "" {
return nil, errors.New("translator: libretranslate URL must be set")
}
timeout := cfg.Timeout
if timeout <= 0 {
timeout = 10 * time.Second
}
return &libreTranslate{
endpoint: url + "/translate",
client: &http.Client{Timeout: timeout},
}, nil
}
type libreTranslate struct {
endpoint string
client *http.Client
}
// requestBody is the LibreTranslate POST /translate input shape.
// `q` is sent as a two-element array so the engine returns one
// translation per element in the same call (subject + body).
type requestBody struct {
Q []string `json:"q"`
Source string `json:"source"`
Target string `json:"target"`
Format string `json:"format"`
}
// responseBody is the LibreTranslate output shape when `q` is an
// array. The single-string-q variant is a different shape; we never
// emit a single-q request so the client always sees the array form.
type responseBody struct {
TranslatedText []string `json:"translatedText"`
Error string `json:"error,omitempty"`
}
// Translate posts subject + body to LibreTranslate, normalising the
// language codes and classifying the response. The 400 / unsupported-
// pair path is signalled by `ErrUnsupportedLanguagePair`. All other
// HTTP errors (timeout, 5xx, network failure) come back as wrapped
// errors so the worker can backoff and retry.
func (l *libreTranslate) Translate(ctx context.Context, srcLang, dstLang, subject, body string) (Result, error) {
src := normaliseLanguageCode(srcLang)
dst := normaliseLanguageCode(dstLang)
if src == "" || dst == "" {
return Result{}, fmt.Errorf("translator: missing source or target language (src=%q dst=%q)", srcLang, dstLang)
}
if src == dst {
return Result{Subject: subject, Body: body, Engine: NoopEngine}, nil
}
reqBody, err := json.Marshal(requestBody{
Q: []string{subject, body},
Source: src,
Target: dst,
Format: "text",
})
if err != nil {
return Result{}, fmt.Errorf("translator: marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, l.endpoint, bytes.NewReader(reqBody))
if err != nil {
return Result{}, fmt.Errorf("translator: build request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "application/json")
resp, err := l.client.Do(req)
if err != nil {
return Result{}, fmt.Errorf("translator: do request: %w", err)
}
defer func() { _ = resp.Body.Close() }()
raw, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
if err != nil {
return Result{}, fmt.Errorf("translator: read response: %w", err)
}
if resp.StatusCode == http.StatusBadRequest {
return Result{}, fmt.Errorf("%w: %s", ErrUnsupportedLanguagePair, strings.TrimSpace(string(raw)))
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return Result{}, fmt.Errorf("translator: libretranslate http %d: %s", resp.StatusCode, strings.TrimSpace(string(raw)))
}
var out responseBody
if err := json.Unmarshal(raw, &out); err != nil {
return Result{}, fmt.Errorf("translator: unmarshal response: %w", err)
}
if out.Error != "" {
return Result{}, fmt.Errorf("translator: libretranslate error: %s", out.Error)
}
if len(out.TranslatedText) != 2 {
return Result{}, fmt.Errorf("translator: libretranslate returned %d strings, want 2", len(out.TranslatedText))
}
return Result{
Subject: out.TranslatedText[0],
Body: out.TranslatedText[1],
Engine: LibreTranslateEngine,
}, nil
}
// normaliseLanguageCode collapses a BCP 47 tag to the ISO 639-1 base
// that LibreTranslate expects (`en-US` → `en`, `EN` → `en`). The
// helper is mirrored on the diplomail service side; both sides need
// to use the same normalisation so cache keys line up.
func normaliseLanguageCode(tag string) string {
tag = strings.TrimSpace(tag)
if tag == "" {
return ""
}
if i := strings.IndexAny(tag, "-_"); i > 0 {
tag = tag[:i]
}
return strings.ToLower(tag)
}
@@ -0,0 +1,141 @@
package translator
import (
"context"
"encoding/json"
"errors"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
func TestLibreTranslateHappyPath(t *testing.T) {
t.Parallel()
var (
requestSource string
requestTarget string
requestQ []string
requestFormat string
)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
body, _ := io.ReadAll(r.Body)
var in requestBody
if err := json.Unmarshal(body, &in); err != nil {
t.Errorf("unmarshal: %v", err)
}
requestSource = in.Source
requestTarget = in.Target
requestQ = in.Q
requestFormat = in.Format
_ = json.NewEncoder(w).Encode(responseBody{
TranslatedText: []string{"[ru] " + in.Q[0], "[ru] " + in.Q[1]},
})
}))
t.Cleanup(server.Close)
tr, err := NewLibreTranslate(LibreTranslateConfig{URL: server.URL, Timeout: 2 * time.Second})
if err != nil {
t.Fatalf("new: %v", err)
}
res, err := tr.Translate(context.Background(), "en", "ru", "Hello", "World")
if err != nil {
t.Fatalf("translate: %v", err)
}
if res.Engine != LibreTranslateEngine {
t.Fatalf("engine = %q, want %q", res.Engine, LibreTranslateEngine)
}
if res.Subject != "[ru] Hello" || res.Body != "[ru] World" {
t.Fatalf("result = %+v", res)
}
if requestSource != "en" || requestTarget != "ru" || requestFormat != "text" {
t.Fatalf("request fields: src=%q dst=%q fmt=%q", requestSource, requestTarget, requestFormat)
}
if len(requestQ) != 2 || requestQ[0] != "Hello" || requestQ[1] != "World" {
t.Fatalf("request q = %v", requestQ)
}
}
func TestLibreTranslateNormalisesLanguageCodes(t *testing.T) {
t.Parallel()
var src, dst string
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
body, _ := io.ReadAll(r.Body)
var in requestBody
_ = json.Unmarshal(body, &in)
src, dst = in.Source, in.Target
_ = json.NewEncoder(w).Encode(responseBody{TranslatedText: []string{"a", "b"}})
}))
t.Cleanup(server.Close)
tr, _ := NewLibreTranslate(LibreTranslateConfig{URL: server.URL})
if _, err := tr.Translate(context.Background(), "EN-US", "ru-RU", "x", "y"); err != nil {
t.Fatalf("translate: %v", err)
}
if src != "en" || dst != "ru" {
t.Fatalf("normalised codes src=%q dst=%q, want en/ru", src, dst)
}
}
func TestLibreTranslateUnsupportedPair(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusBadRequest)
_, _ = w.Write([]byte(`{"error":"language not supported"}`))
}))
t.Cleanup(server.Close)
tr, _ := NewLibreTranslate(LibreTranslateConfig{URL: server.URL})
_, err := tr.Translate(context.Background(), "en", "xx", "subject", "body")
if !errors.Is(err, ErrUnsupportedLanguagePair) {
t.Fatalf("err = %v, want ErrUnsupportedLanguagePair", err)
}
}
func TestLibreTranslateServerError(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte("kaboom"))
}))
t.Cleanup(server.Close)
tr, _ := NewLibreTranslate(LibreTranslateConfig{URL: server.URL})
_, err := tr.Translate(context.Background(), "en", "ru", "subject", "body")
if err == nil {
t.Fatalf("expected error, got nil")
}
if errors.Is(err, ErrUnsupportedLanguagePair) {
t.Fatalf("err mis-classified as unsupported pair: %v", err)
}
if !strings.Contains(err.Error(), "500") {
t.Fatalf("err = %v, want mention of 500", err)
}
}
func TestLibreTranslateSameSourceAndTargetIsNoop(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Errorf("translator should not call the server for identical src/dst: %s", r.URL.Path)
}))
t.Cleanup(server.Close)
tr, _ := NewLibreTranslate(LibreTranslateConfig{URL: server.URL})
res, err := tr.Translate(context.Background(), "en", "EN", "x", "y")
if err != nil {
t.Fatalf("translate: %v", err)
}
if res.Engine != NoopEngine {
t.Fatalf("engine = %q, want %q", res.Engine, NoopEngine)
}
}
func TestLibreTranslateRequiresURL(t *testing.T) {
t.Parallel()
_, err := NewLibreTranslate(LibreTranslateConfig{URL: ""})
if err == nil {
t.Fatalf("expected error for empty URL")
}
}