package translator import ( "bytes" "context" "encoding/json" "errors" "fmt" "io" "net/http" "strings" "time" ) // LibreTranslateEngine is the engine identifier persisted in // `diplomail_translations.translator` for cache rows produced by the // LibreTranslate client. const LibreTranslateEngine = "libretranslate" // LibreTranslateConfig configures the HTTP client. URL is the base // of the deployed instance (without `/translate`). Timeout bounds a // single HTTP request; the worker layers retry / backoff on top. type LibreTranslateConfig struct { URL string Timeout time.Duration } // ErrUnsupportedLanguagePair classifies a LibreTranslate 400 response // that indicates the engine cannot translate between the requested // source / target codes. The worker treats this as terminal: no // further retries, deliver the original. var ErrUnsupportedLanguagePair = errors.New("translator: language pair not supported by libretranslate") // NewLibreTranslate constructs a Translator that posts to // `/translate`. Returns an error when URL is empty so wiring // catches "translator misconfigured" at startup rather than at // first-translation-attempt. func NewLibreTranslate(cfg LibreTranslateConfig) (Translator, error) { url := strings.TrimRight(strings.TrimSpace(cfg.URL), "/") if url == "" { return nil, errors.New("translator: libretranslate URL must be set") } timeout := cfg.Timeout if timeout <= 0 { timeout = 10 * time.Second } return &libreTranslate{ endpoint: url + "/translate", client: &http.Client{Timeout: timeout}, }, nil } type libreTranslate struct { endpoint string client *http.Client } // requestBody is the LibreTranslate POST /translate input shape. // `q` is sent as a two-element array so the engine returns one // translation per element in the same call (subject + body). type requestBody struct { Q []string `json:"q"` Source string `json:"source"` Target string `json:"target"` Format string `json:"format"` } // responseBody is the LibreTranslate output shape when `q` is an // array. The single-string-q variant is a different shape; we never // emit a single-q request so the client always sees the array form. type responseBody struct { TranslatedText []string `json:"translatedText"` Error string `json:"error,omitempty"` } // Translate posts subject + body to LibreTranslate, normalising the // language codes and classifying the response. The 400 / unsupported- // pair path is signalled by `ErrUnsupportedLanguagePair`. All other // HTTP errors (timeout, 5xx, network failure) come back as wrapped // errors so the worker can backoff and retry. func (l *libreTranslate) Translate(ctx context.Context, srcLang, dstLang, subject, body string) (Result, error) { src := normaliseLanguageCode(srcLang) dst := normaliseLanguageCode(dstLang) if src == "" || dst == "" { return Result{}, fmt.Errorf("translator: missing source or target language (src=%q dst=%q)", srcLang, dstLang) } if src == dst { return Result{Subject: subject, Body: body, Engine: NoopEngine}, nil } reqBody, err := json.Marshal(requestBody{ Q: []string{subject, body}, Source: src, Target: dst, Format: "text", }) if err != nil { return Result{}, fmt.Errorf("translator: marshal request: %w", err) } req, err := http.NewRequestWithContext(ctx, http.MethodPost, l.endpoint, bytes.NewReader(reqBody)) if err != nil { return Result{}, fmt.Errorf("translator: build request: %w", err) } req.Header.Set("Content-Type", "application/json") req.Header.Set("Accept", "application/json") resp, err := l.client.Do(req) if err != nil { return Result{}, fmt.Errorf("translator: do request: %w", err) } defer func() { _ = resp.Body.Close() }() raw, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) if err != nil { return Result{}, fmt.Errorf("translator: read response: %w", err) } if resp.StatusCode == http.StatusBadRequest { return Result{}, fmt.Errorf("%w: %s", ErrUnsupportedLanguagePair, strings.TrimSpace(string(raw))) } if resp.StatusCode < 200 || resp.StatusCode >= 300 { return Result{}, fmt.Errorf("translator: libretranslate http %d: %s", resp.StatusCode, strings.TrimSpace(string(raw))) } var out responseBody if err := json.Unmarshal(raw, &out); err != nil { return Result{}, fmt.Errorf("translator: unmarshal response: %w", err) } if out.Error != "" { return Result{}, fmt.Errorf("translator: libretranslate error: %s", out.Error) } if len(out.TranslatedText) != 2 { return Result{}, fmt.Errorf("translator: libretranslate returned %d strings, want 2", len(out.TranslatedText)) } return Result{ Subject: out.TranslatedText[0], Body: out.TranslatedText[1], Engine: LibreTranslateEngine, }, nil } // normaliseLanguageCode collapses a BCP 47 tag to the ISO 639-1 base // that LibreTranslate expects (`en-US` → `en`, `EN` → `en`). The // helper is mirrored on the diplomail service side; both sides need // to use the same normalisation so cache keys line up. func normaliseLanguageCode(tag string) string { tag = strings.TrimSpace(tag) if tag == "" { return "" } if i := strings.IndexAny(tag, "-_"); i > 0 { tag = tag[:i] } return strings.ToLower(tag) }