diplomail (Stage E): LibreTranslate client + async translation worker
Synchronous translation on read (Stage D) blocks the HTTP handler on translator I/O. Stage E switches to "send moments-fast, deliver when translated": recipients whose preferred_language differs from the detected body_lang are inserted with available_at=NULL, and an async worker turns them on once a LibreTranslate call materialises the cache row (or fails terminally after 5 retries). Schema delta on diplomail_recipients: available_at, translation_attempts, next_translation_attempt_at, plus a snapshot recipient_preferred_language so the worker queries do not need a join. Read paths (ListInbox, GetMessage, UnreadCount) filter on available_at IS NOT NULL. Push fan-out is moved from Service to the worker so the recipient only sees the toast when the inbox row is actually visible. Translator backend is now a configurable choice: empty BACKEND_DIPLOMAIL_TRANSLATOR_URL → noop (deliver original); populated → LibreTranslate HTTP client. Per-attempt timeout, max attempts, and worker interval all live in DiplomailConfig. The HTTP client itself is unit-tested via httptest (happy path, BCP47 normalisation, unsupported pair, 5xx, identical src/dst, missing URL); worker delivery + fallback paths are covered by the testcontainers-backed e2e tests in diplomail_e2e_test.go. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,154 @@
|
||||
package translator
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// LibreTranslateEngine is the engine identifier persisted in
|
||||
// `diplomail_translations.translator` for cache rows produced by the
|
||||
// LibreTranslate client.
|
||||
const LibreTranslateEngine = "libretranslate"
|
||||
|
||||
// LibreTranslateConfig configures the HTTP client. URL is the base
|
||||
// of the deployed instance (without `/translate`). Timeout bounds a
|
||||
// single HTTP request; the worker layers retry / backoff on top.
|
||||
type LibreTranslateConfig struct {
|
||||
URL string
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// ErrUnsupportedLanguagePair classifies a LibreTranslate 400 response
|
||||
// that indicates the engine cannot translate between the requested
|
||||
// source / target codes. The worker treats this as terminal: no
|
||||
// further retries, deliver the original.
|
||||
var ErrUnsupportedLanguagePair = errors.New("translator: language pair not supported by libretranslate")
|
||||
|
||||
// NewLibreTranslate constructs a Translator that posts to
|
||||
// `<URL>/translate`. Returns an error when URL is empty so wiring
|
||||
// catches "translator misconfigured" at startup rather than at
|
||||
// first-translation-attempt.
|
||||
func NewLibreTranslate(cfg LibreTranslateConfig) (Translator, error) {
|
||||
url := strings.TrimRight(strings.TrimSpace(cfg.URL), "/")
|
||||
if url == "" {
|
||||
return nil, errors.New("translator: libretranslate URL must be set")
|
||||
}
|
||||
timeout := cfg.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = 10 * time.Second
|
||||
}
|
||||
return &libreTranslate{
|
||||
endpoint: url + "/translate",
|
||||
client: &http.Client{Timeout: timeout},
|
||||
}, nil
|
||||
}
|
||||
|
||||
type libreTranslate struct {
|
||||
endpoint string
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// requestBody is the LibreTranslate POST /translate input shape.
|
||||
// `q` is sent as a two-element array so the engine returns one
|
||||
// translation per element in the same call (subject + body).
|
||||
type requestBody struct {
|
||||
Q []string `json:"q"`
|
||||
Source string `json:"source"`
|
||||
Target string `json:"target"`
|
||||
Format string `json:"format"`
|
||||
}
|
||||
|
||||
// responseBody is the LibreTranslate output shape when `q` is an
|
||||
// array. The single-string-q variant is a different shape; we never
|
||||
// emit a single-q request so the client always sees the array form.
|
||||
type responseBody struct {
|
||||
TranslatedText []string `json:"translatedText"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// Translate posts subject + body to LibreTranslate, normalising the
|
||||
// language codes and classifying the response. The 400 / unsupported-
|
||||
// pair path is signalled by `ErrUnsupportedLanguagePair`. All other
|
||||
// HTTP errors (timeout, 5xx, network failure) come back as wrapped
|
||||
// errors so the worker can backoff and retry.
|
||||
func (l *libreTranslate) Translate(ctx context.Context, srcLang, dstLang, subject, body string) (Result, error) {
|
||||
src := normaliseLanguageCode(srcLang)
|
||||
dst := normaliseLanguageCode(dstLang)
|
||||
if src == "" || dst == "" {
|
||||
return Result{}, fmt.Errorf("translator: missing source or target language (src=%q dst=%q)", srcLang, dstLang)
|
||||
}
|
||||
if src == dst {
|
||||
return Result{Subject: subject, Body: body, Engine: NoopEngine}, nil
|
||||
}
|
||||
|
||||
reqBody, err := json.Marshal(requestBody{
|
||||
Q: []string{subject, body},
|
||||
Source: src,
|
||||
Target: dst,
|
||||
Format: "text",
|
||||
})
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("translator: marshal request: %w", err)
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, l.endpoint, bytes.NewReader(reqBody))
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("translator: build request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Accept", "application/json")
|
||||
|
||||
resp, err := l.client.Do(req)
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("translator: do request: %w", err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
raw, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("translator: read response: %w", err)
|
||||
}
|
||||
if resp.StatusCode == http.StatusBadRequest {
|
||||
return Result{}, fmt.Errorf("%w: %s", ErrUnsupportedLanguagePair, strings.TrimSpace(string(raw)))
|
||||
}
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return Result{}, fmt.Errorf("translator: libretranslate http %d: %s", resp.StatusCode, strings.TrimSpace(string(raw)))
|
||||
}
|
||||
|
||||
var out responseBody
|
||||
if err := json.Unmarshal(raw, &out); err != nil {
|
||||
return Result{}, fmt.Errorf("translator: unmarshal response: %w", err)
|
||||
}
|
||||
if out.Error != "" {
|
||||
return Result{}, fmt.Errorf("translator: libretranslate error: %s", out.Error)
|
||||
}
|
||||
if len(out.TranslatedText) != 2 {
|
||||
return Result{}, fmt.Errorf("translator: libretranslate returned %d strings, want 2", len(out.TranslatedText))
|
||||
}
|
||||
return Result{
|
||||
Subject: out.TranslatedText[0],
|
||||
Body: out.TranslatedText[1],
|
||||
Engine: LibreTranslateEngine,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// normaliseLanguageCode collapses a BCP 47 tag to the ISO 639-1 base
|
||||
// that LibreTranslate expects (`en-US` → `en`, `EN` → `en`). The
|
||||
// helper is mirrored on the diplomail service side; both sides need
|
||||
// to use the same normalisation so cache keys line up.
|
||||
func normaliseLanguageCode(tag string) string {
|
||||
tag = strings.TrimSpace(tag)
|
||||
if tag == "" {
|
||||
return ""
|
||||
}
|
||||
if i := strings.IndexAny(tag, "-_"); i > 0 {
|
||||
tag = tag[:i]
|
||||
}
|
||||
return strings.ToLower(tag)
|
||||
}
|
||||
Reference in New Issue
Block a user