deploy(ttsreader): enable phase6 biblical routing

2026-05-06 19:46:25 -05:00
parent bf6f542569
commit c2eb37dee9
2 changed files with 202 additions and 8 deletions
--- a/apps/fc-ttsreader/biblical-tts/app.py
+++ b/apps/fc-ttsreader/biblical-tts/app.py
@@ -30,6 +30,7 @@ import logging
 import re
 import shlex
 import subprocess
 import unicodedata
 from typing import Optional
 from fastapi import FastAPI, HTTPException
@@ -60,6 +61,189 @@ class TtsRequest(BaseModel):
    volume: int = 100     # 0-200
 HEBREW_CHAR_RE = re.compile(r"[\u0590-\u05FF]")
 HEBREW_WORD_RE = re.compile(r"[\u0590-\u05FF]+")
 # eSpeak-NG's Hebrew voice can spell unpointed Hebrew as Unicode character
 # names on some builds. For source-text study reads, prefer a stable
 # scholarly transliteration so words sound like words even without niqqud.
 HEBREW_WORD_TRANSLITERATIONS = {
    "אב": "av",
    "אבא": "abba",
    "אברהם": "Avraham",
    "אדמה": "adamah",
    "אדני": "Adonai",
    "אדם": "adam",
    "אור": "or",
    "אלהים": "Elohim",
    "אלוהים": "Elohim",
    "אמן": "amen",
    "אם": "em",
    "אמת": "emet",
    "ארץ": "eretz",
    "אש": "esh",
    "את": "et",
    "בית": "beit",
    "בן": "ben",
    "ברא": "bara",
    "בראשית": "bereshit",
    "ברית": "berit",
    "ברוך": "barukh",
    "בת": "bat",
    "גוי": "goy",
    "גוים": "goyim",
    "גויים": "goyim",
    "דבר": "davar",
    "דברים": "devarim",
    "דוד": "David",
    "הלל": "hallel",
    "הארץ": "ha-aretz",
    "הברית": "ha-berit",
    "החדשה": "ha-chadashah",
    "השמים": "ha-shamayim",
    "השמיים": "ha-shamayim",
    "ויאמר": "vayomer",
    "יהוה": "Adonai",
    "יוסף": "Yosef",
    "יוחנן": "Yochanan",
    "ישראל": "Yisrael",
    "ישוע": "Yeshua",
    "יצחק": "Yitzchak",
    "יעקב": "Yaakov",
    "ירושלים": "Yerushalayim",
    "כהן": "kohen",
    "כהנים": "kohanim",
    "מים": "mayim",
    "מות": "mavet",
    "מושיע": "moshia",
    "מלך": "melekh",
    "מלכות": "malkhut",
    "מרים": "Miriam",
    "משה": "Moshe",
    "משיח": "Mashiach",
    "נביא": "navi",
    "נביאים": "neviim",
    "עם": "am",
    "עולם": "olam",
    "צדק": "tzedek",
    "קדוש": "qadosh",
    "קדושים": "qedoshim",
    "קול": "qol",
    "רוח": "ruach",
    "שאול": "Shaul",
    "שמים": "shamayim",
    "שמיים": "shamayim",
    "שמעון": "Shimon",
    "שלום": "Shalom",
    "תורה": "torah",
    "חכמה": "chokhmah",
    "חסד": "chesed",
    "חיים": "chayim",
    "חושך": "choshekh",
 }
 HEBREW_LETTERS = {
    "א": "a",
    "ב": "b",
    "ג": "g",
    "ד": "d",
    "ה": "h",
    "ו": "v",
    "ז": "z",
    "ח": "kh",
    "ט": "t",
    "י": "y",
    "כ": "kh",
    "ך": "kh",
    "ל": "l",
    "מ": "m",
    "ם": "m",
    "נ": "n",
    "ן": "n",
    "ס": "s",
    "ע": "a",
    "פ": "p",
    "ף": "f",
    "צ": "ts",
    "ץ": "ts",
    "ק": "q",
    "ר": "r",
    "ש": "sh",
    "ת": "t",
 }
 HEBREW_VOWELISH = {"a", "e", "i", "o", "u"}
 def _strip_hebrew_marks(value: str) -> str:
    decomposed = unicodedata.normalize("NFD", value)
    return "".join(
        ch for ch in decomposed
        if unicodedata.category(ch) != "Mn" and ch not in {"׳", "״", "־"}
    )
 def _fallback_hebrew_transliteration(word: str) -> str:
    tokens: list[str] = []
    chars = list(word)
    for index, ch in enumerate(chars):
        token = HEBREW_LETTERS.get(ch)
        if token is None:
            continue
        if ch == "ה" and index == len(chars) - 1:
            token = "ah"
        elif ch == "י" and index > 0:
            token = "i"
        elif ch == "ו" and index > 0:
            token = "o"
        tokens.append(token)
    if not tokens:
        return word
    spoken: list[str] = []
    for index, token in enumerate(tokens):
        spoken.append(token)
        next_token = tokens[index + 1] if index + 1 < len(tokens) else ""
        if (
            token[-1:] not in HEBREW_VOWELISH
            and next_token
            and next_token[:1] not in HEBREW_VOWELISH
        ):
            spoken.append("a")
    return "".join(spoken)
 def _transliterate_hebrew_word(match: re.Match[str]) -> str:
    original = match.group(0)
    normalized = _strip_hebrew_marks(original)
    if not normalized:
        return original
    direct = HEBREW_WORD_TRANSLITERATIONS.get(normalized)
    if direct:
        return direct
    if normalized.startswith("ו") and len(normalized) > 1:
        rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
        if rest:
            return f"ve-{rest}"
    if normalized.startswith("ה") and len(normalized) > 1:
        rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
        if rest:
            return f"ha-{rest}"
    return _fallback_hebrew_transliteration(normalized)
 def _prepare_synthesis_input(text: str, language: str, voice: str) -> tuple[str, str]:
    if language.lower().startswith("he") and HEBREW_CHAR_RE.search(text):
        spoken = HEBREW_WORD_RE.sub(_transliterate_hebrew_word, text)
        return spoken, "en-us"
    return text, voice
 def _resolve_voice(req: TtsRequest) -> str:
    if req.voice:
        return req.voice.strip()
@@ -115,14 +299,15 @@ def tts(req: TtsRequest) -> Response:
        raise HTTPException(status_code=400, detail="text is required")
    voice = _resolve_voice(req)
    spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
    args = [
        "--stdout",
-        "-v", voice,
+        "-v", synth_voice,
        "-s", str(max(80, min(450, req.rate))),
        "-p", str(max(0, min(99, req.pitch))),
        "-a", str(max(0, min(200, req.volume))),
    ]
-    wav = _run_espeak(args, req.text.encode("utf-8"))
+    wav = _run_espeak(args, spoken_text.encode("utf-8"))
    if not wav:
        raise HTTPException(status_code=500, detail="espeak-ng returned empty stdout")
    return Response(content=wav, media_type="audio/wav")
@@ -153,9 +338,9 @@ def tts(req: TtsRequest) -> Response:
 PHONEME_DURATION_RE = re.compile(r"^\s*\S+\s+(\d+)\s+", re.MULTILINE)
-def _estimate_total_ms(req: TtsRequest, voice: str) -> int:
+def _estimate_total_ms(req: TtsRequest, voice: str, spoken_text: str) -> int:
    args = ["--pho", "--quiet", "-v", voice, "-s", str(req.rate)]
-    out = _run_espeak(args, req.text.encode("utf-8"))
+    out = _run_espeak(args, spoken_text.encode("utf-8"))
    text = out.decode("utf-8", errors="replace")
    total = 0
    for match in PHONEME_DURATION_RE.finditer(text):
@@ -175,7 +360,8 @@ def timings(req: TtsRequest):
    if not req.text.strip():
        raise HTTPException(status_code=400, detail="text is required")
    voice = _resolve_voice(req)
-    total_ms = _estimate_total_ms(req, voice)
+    spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
    total_ms = _estimate_total_ms(req, synth_voice, spoken_text)
    # Distribute total_ms across whitespace-split words proportional to
    # character count. Punctuation-only tokens are folded into the previous
@@ -204,7 +390,7 @@ def timings(req: TtsRequest):
        {
            "text": req.text,
            "language": req.language,
-            "voice": voice,
+            "voice": synth_voice,
            "words": out_words,
            "durationMs": total_ms,
        }
--- a/apps/fc-ttsreader/fc-ttsreader.yaml
+++ b/apps/fc-ttsreader/fc-ttsreader.yaml
@@ -359,7 +359,7 @@ spec:
        runAsUser: 1654
      containers:
        - name: biblical-tts
-          image: localhost/fc-biblical-tts:v1
+          image: localhost/fc-biblical-tts:v20260506-hebrew-translit
          imagePullPolicy: Never
          ports:
            - containerPort: 10402
@@ -532,7 +532,7 @@ spec:
        fsGroupChangePolicy: OnRootMismatch
      containers:
        - name: web
-          image: localhost/fc-ttsreader-web:v202605061500
+          image: localhost/fc-ttsreader-web:v20260506-phase6
          imagePullPolicy: Never
          ports:
            - containerPort: 5217
@@ -568,6 +568,14 @@ spec:
              value: "http://ttsreader-kokoro.fc-ttsreader.svc.cluster.local.:8880"
            - name: TtsReader__Kokoro__TimeoutSeconds
              value: "120"
            - name: FlowerCore__Tts__BiblicalTts__Enabled
              value: "true"
            - name: FlowerCore__Tts__BiblicalTts__BaseUrl
              value: "http://ttsreader-biblical.fc-ttsreader.svc.cluster.local.:10402"
            - name: FlowerCore__Tts__BiblicalTts__TimeoutSeconds
              value: "60"
            - name: FlowerCore__Tts__BiblicalTts__DefaultLanguage
              value: "grc"
            - name: Speech__Alignment__Enabled
              # Cluster-native faster-whisper (Lane F, 2026-04-25). The
              # ttsreader-align deployment in this manifest wraps