diplomail (Stage E): LibreTranslate client + async translation worker
Tests · Go / test (push) Successful in 1m59s
Tests · Go / test (pull_request) Successful in 2m1s
Tests · Integration / integration (pull_request) Successful in 1m37s

Synchronous translation on read (Stage D) blocks the HTTP handler on
translator I/O. Stage E switches to "send moments-fast, deliver
when translated": recipients whose preferred_language differs from
the detected body_lang are inserted with available_at=NULL, and an
async worker turns them on once a LibreTranslate call materialises
the cache row (or fails terminally after 5 retries).

Schema delta on diplomail_recipients: available_at,
translation_attempts, next_translation_attempt_at, plus a snapshot
recipient_preferred_language so the worker queries do not need a
join. Read paths (ListInbox, GetMessage, UnreadCount) filter on
available_at IS NOT NULL. Push fan-out is moved from Service to the
worker so the recipient only sees the toast when the inbox row is
actually visible.

Translator backend is now a configurable choice: empty
BACKEND_DIPLOMAIL_TRANSLATOR_URL → noop (deliver original);
populated → LibreTranslate HTTP client. Per-attempt timeout, max
attempts, and worker interval all live in DiplomailConfig. The HTTP
client itself is unit-tested via httptest (happy path, BCP47
normalisation, unsupported pair, 5xx, identical src/dst, missing
URL); worker delivery + fallback paths are covered by the
testcontainers-backed e2e tests in diplomail_e2e_test.go.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ilia Denisov
2026-05-15 20:15:28 +02:00
parent e22f4b7800
commit 9f7c9099bc
16 changed files with 1222 additions and 155 deletions
@@ -0,0 +1,154 @@
package translator
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// LibreTranslateEngine is the engine identifier persisted in
// `diplomail_translations.translator` for cache rows produced by the
// LibreTranslate client.
const LibreTranslateEngine = "libretranslate"
// LibreTranslateConfig configures the HTTP client. URL is the base
// of the deployed instance (without `/translate`). Timeout bounds a
// single HTTP request; the worker layers retry / backoff on top.
type LibreTranslateConfig struct {
URL string
Timeout time.Duration
}
// ErrUnsupportedLanguagePair classifies a LibreTranslate 400 response
// that indicates the engine cannot translate between the requested
// source / target codes. The worker treats this as terminal: no
// further retries, deliver the original.
var ErrUnsupportedLanguagePair = errors.New("translator: language pair not supported by libretranslate")
// NewLibreTranslate constructs a Translator that posts to
// `<URL>/translate`. Returns an error when URL is empty so wiring
// catches "translator misconfigured" at startup rather than at
// first-translation-attempt.
func NewLibreTranslate(cfg LibreTranslateConfig) (Translator, error) {
url := strings.TrimRight(strings.TrimSpace(cfg.URL), "/")
if url == "" {
return nil, errors.New("translator: libretranslate URL must be set")
}
timeout := cfg.Timeout
if timeout <= 0 {
timeout = 10 * time.Second
}
return &libreTranslate{
endpoint: url + "/translate",
client: &http.Client{Timeout: timeout},
}, nil
}
type libreTranslate struct {
endpoint string
client *http.Client
}
// requestBody is the LibreTranslate POST /translate input shape.
// `q` is sent as a two-element array so the engine returns one
// translation per element in the same call (subject + body).
type requestBody struct {
Q []string `json:"q"`
Source string `json:"source"`
Target string `json:"target"`
Format string `json:"format"`
}
// responseBody is the LibreTranslate output shape when `q` is an
// array. The single-string-q variant is a different shape; we never
// emit a single-q request so the client always sees the array form.
type responseBody struct {
TranslatedText []string `json:"translatedText"`
Error string `json:"error,omitempty"`
}
// Translate posts subject + body to LibreTranslate, normalising the
// language codes and classifying the response. The 400 / unsupported-
// pair path is signalled by `ErrUnsupportedLanguagePair`. All other
// HTTP errors (timeout, 5xx, network failure) come back as wrapped
// errors so the worker can backoff and retry.
func (l *libreTranslate) Translate(ctx context.Context, srcLang, dstLang, subject, body string) (Result, error) {
src := normaliseLanguageCode(srcLang)
dst := normaliseLanguageCode(dstLang)
if src == "" || dst == "" {
return Result{}, fmt.Errorf("translator: missing source or target language (src=%q dst=%q)", srcLang, dstLang)
}
if src == dst {
return Result{Subject: subject, Body: body, Engine: NoopEngine}, nil
}
reqBody, err := json.Marshal(requestBody{
Q: []string{subject, body},
Source: src,
Target: dst,
Format: "text",
})
if err != nil {
return Result{}, fmt.Errorf("translator: marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, l.endpoint, bytes.NewReader(reqBody))
if err != nil {
return Result{}, fmt.Errorf("translator: build request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "application/json")
resp, err := l.client.Do(req)
if err != nil {
return Result{}, fmt.Errorf("translator: do request: %w", err)
}
defer func() { _ = resp.Body.Close() }()
raw, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
if err != nil {
return Result{}, fmt.Errorf("translator: read response: %w", err)
}
if resp.StatusCode == http.StatusBadRequest {
return Result{}, fmt.Errorf("%w: %s", ErrUnsupportedLanguagePair, strings.TrimSpace(string(raw)))
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return Result{}, fmt.Errorf("translator: libretranslate http %d: %s", resp.StatusCode, strings.TrimSpace(string(raw)))
}
var out responseBody
if err := json.Unmarshal(raw, &out); err != nil {
return Result{}, fmt.Errorf("translator: unmarshal response: %w", err)
}
if out.Error != "" {
return Result{}, fmt.Errorf("translator: libretranslate error: %s", out.Error)
}
if len(out.TranslatedText) != 2 {
return Result{}, fmt.Errorf("translator: libretranslate returned %d strings, want 2", len(out.TranslatedText))
}
return Result{
Subject: out.TranslatedText[0],
Body: out.TranslatedText[1],
Engine: LibreTranslateEngine,
}, nil
}
// normaliseLanguageCode collapses a BCP 47 tag to the ISO 639-1 base
// that LibreTranslate expects (`en-US` → `en`, `EN` → `en`). The
// helper is mirrored on the diplomail service side; both sides need
// to use the same normalisation so cache keys line up.
func normaliseLanguageCode(tag string) string {
tag = strings.TrimSpace(tag)
if tag == "" {
return ""
}
if i := strings.IndexAny(tag, "-_"); i > 0 {
tag = tag[:i]
}
return strings.ToLower(tag)
}