9f7c9099bc
Synchronous translation on read (Stage D) blocks the HTTP handler on translator I/O. Stage E switches to "send moments-fast, deliver when translated": recipients whose preferred_language differs from the detected body_lang are inserted with available_at=NULL, and an async worker turns them on once a LibreTranslate call materialises the cache row (or fails terminally after 5 retries). Schema delta on diplomail_recipients: available_at, translation_attempts, next_translation_attempt_at, plus a snapshot recipient_preferred_language so the worker queries do not need a join. Read paths (ListInbox, GetMessage, UnreadCount) filter on available_at IS NOT NULL. Push fan-out is moved from Service to the worker so the recipient only sees the toast when the inbox row is actually visible. Translator backend is now a configurable choice: empty BACKEND_DIPLOMAIL_TRANSLATOR_URL → noop (deliver original); populated → LibreTranslate HTTP client. Per-attempt timeout, max attempts, and worker interval all live in DiplomailConfig. The HTTP client itself is unit-tested via httptest (happy path, BCP47 normalisation, unsupported pair, 5xx, identical src/dst, missing URL); worker delivery + fallback paths are covered by the testcontainers-backed e2e tests in diplomail_e2e_test.go. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
155 lines
5.1 KiB
Go
155 lines
5.1 KiB
Go
package translator
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// LibreTranslateEngine is the engine identifier persisted in
|
|
// `diplomail_translations.translator` for cache rows produced by the
|
|
// LibreTranslate client.
|
|
const LibreTranslateEngine = "libretranslate"
|
|
|
|
// LibreTranslateConfig configures the HTTP client. URL is the base
|
|
// of the deployed instance (without `/translate`). Timeout bounds a
|
|
// single HTTP request; the worker layers retry / backoff on top.
|
|
type LibreTranslateConfig struct {
|
|
URL string
|
|
Timeout time.Duration
|
|
}
|
|
|
|
// ErrUnsupportedLanguagePair classifies a LibreTranslate 400 response
|
|
// that indicates the engine cannot translate between the requested
|
|
// source / target codes. The worker treats this as terminal: no
|
|
// further retries, deliver the original.
|
|
var ErrUnsupportedLanguagePair = errors.New("translator: language pair not supported by libretranslate")
|
|
|
|
// NewLibreTranslate constructs a Translator that posts to
|
|
// `<URL>/translate`. Returns an error when URL is empty so wiring
|
|
// catches "translator misconfigured" at startup rather than at
|
|
// first-translation-attempt.
|
|
func NewLibreTranslate(cfg LibreTranslateConfig) (Translator, error) {
|
|
url := strings.TrimRight(strings.TrimSpace(cfg.URL), "/")
|
|
if url == "" {
|
|
return nil, errors.New("translator: libretranslate URL must be set")
|
|
}
|
|
timeout := cfg.Timeout
|
|
if timeout <= 0 {
|
|
timeout = 10 * time.Second
|
|
}
|
|
return &libreTranslate{
|
|
endpoint: url + "/translate",
|
|
client: &http.Client{Timeout: timeout},
|
|
}, nil
|
|
}
|
|
|
|
type libreTranslate struct {
|
|
endpoint string
|
|
client *http.Client
|
|
}
|
|
|
|
// requestBody is the LibreTranslate POST /translate input shape.
|
|
// `q` is sent as a two-element array so the engine returns one
|
|
// translation per element in the same call (subject + body).
|
|
type requestBody struct {
|
|
Q []string `json:"q"`
|
|
Source string `json:"source"`
|
|
Target string `json:"target"`
|
|
Format string `json:"format"`
|
|
}
|
|
|
|
// responseBody is the LibreTranslate output shape when `q` is an
|
|
// array. The single-string-q variant is a different shape; we never
|
|
// emit a single-q request so the client always sees the array form.
|
|
type responseBody struct {
|
|
TranslatedText []string `json:"translatedText"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
// Translate posts subject + body to LibreTranslate, normalising the
|
|
// language codes and classifying the response. The 400 / unsupported-
|
|
// pair path is signalled by `ErrUnsupportedLanguagePair`. All other
|
|
// HTTP errors (timeout, 5xx, network failure) come back as wrapped
|
|
// errors so the worker can backoff and retry.
|
|
func (l *libreTranslate) Translate(ctx context.Context, srcLang, dstLang, subject, body string) (Result, error) {
|
|
src := normaliseLanguageCode(srcLang)
|
|
dst := normaliseLanguageCode(dstLang)
|
|
if src == "" || dst == "" {
|
|
return Result{}, fmt.Errorf("translator: missing source or target language (src=%q dst=%q)", srcLang, dstLang)
|
|
}
|
|
if src == dst {
|
|
return Result{Subject: subject, Body: body, Engine: NoopEngine}, nil
|
|
}
|
|
|
|
reqBody, err := json.Marshal(requestBody{
|
|
Q: []string{subject, body},
|
|
Source: src,
|
|
Target: dst,
|
|
Format: "text",
|
|
})
|
|
if err != nil {
|
|
return Result{}, fmt.Errorf("translator: marshal request: %w", err)
|
|
}
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, l.endpoint, bytes.NewReader(reqBody))
|
|
if err != nil {
|
|
return Result{}, fmt.Errorf("translator: build request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Accept", "application/json")
|
|
|
|
resp, err := l.client.Do(req)
|
|
if err != nil {
|
|
return Result{}, fmt.Errorf("translator: do request: %w", err)
|
|
}
|
|
defer func() { _ = resp.Body.Close() }()
|
|
|
|
raw, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
|
|
if err != nil {
|
|
return Result{}, fmt.Errorf("translator: read response: %w", err)
|
|
}
|
|
if resp.StatusCode == http.StatusBadRequest {
|
|
return Result{}, fmt.Errorf("%w: %s", ErrUnsupportedLanguagePair, strings.TrimSpace(string(raw)))
|
|
}
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return Result{}, fmt.Errorf("translator: libretranslate http %d: %s", resp.StatusCode, strings.TrimSpace(string(raw)))
|
|
}
|
|
|
|
var out responseBody
|
|
if err := json.Unmarshal(raw, &out); err != nil {
|
|
return Result{}, fmt.Errorf("translator: unmarshal response: %w", err)
|
|
}
|
|
if out.Error != "" {
|
|
return Result{}, fmt.Errorf("translator: libretranslate error: %s", out.Error)
|
|
}
|
|
if len(out.TranslatedText) != 2 {
|
|
return Result{}, fmt.Errorf("translator: libretranslate returned %d strings, want 2", len(out.TranslatedText))
|
|
}
|
|
return Result{
|
|
Subject: out.TranslatedText[0],
|
|
Body: out.TranslatedText[1],
|
|
Engine: LibreTranslateEngine,
|
|
}, nil
|
|
}
|
|
|
|
// normaliseLanguageCode collapses a BCP 47 tag to the ISO 639-1 base
|
|
// that LibreTranslate expects (`en-US` → `en`, `EN` → `en`). The
|
|
// helper is mirrored on the diplomail service side; both sides need
|
|
// to use the same normalisation so cache keys line up.
|
|
func normaliseLanguageCode(tag string) string {
|
|
tag = strings.TrimSpace(tag)
|
|
if tag == "" {
|
|
return ""
|
|
}
|
|
if i := strings.IndexAny(tag, "-_"); i > 0 {
|
|
tag = tag[:i]
|
|
}
|
|
return strings.ToLower(tag)
|
|
}
|