deploy(ttsreader): enable phase6 biblical routing

2026-05-06 19:46:25 -05:00
parent bf6f542569
commit c2eb37dee9
2 changed files with 202 additions and 8 deletions
--- a/apps/fc-ttsreader/biblical-tts/app.py
+++ b/apps/fc-ttsreader/biblical-tts/app.py
@@ -30,6 +30,7 @@ import logging
 import re
 import shlex
 import subprocess
+import unicodedata
 from typing import Optional

 from fastapi import FastAPI, HTTPException
@@ -60,6 +61,189 @@ class TtsRequest(BaseModel):
    volume: int = 100     # 0-200


+HEBREW_CHAR_RE = re.compile(r"[\u0590-\u05FF]")
+HEBREW_WORD_RE = re.compile(r"[\u0590-\u05FF]+")
+
+# eSpeak-NG's Hebrew voice can spell unpointed Hebrew as Unicode character
+# names on some builds. For source-text study reads, prefer a stable
+# scholarly transliteration so words sound like words even without niqqud.
+HEBREW_WORD_TRANSLITERATIONS = {
+    "אב": "av",
+    "אבא": "abba",
+    "אברהם": "Avraham",
+    "אדמה": "adamah",
+    "אדני": "Adonai",
+    "אדם": "adam",
+    "אור": "or",
+    "אלהים": "Elohim",
+    "אלוהים": "Elohim",
+    "אמן": "amen",
+    "אם": "em",
+    "אמת": "emet",
+    "ארץ": "eretz",
+    "אש": "esh",
+    "את": "et",
+    "בית": "beit",
+    "בן": "ben",
+    "ברא": "bara",
+    "בראשית": "bereshit",
+    "ברית": "berit",
+    "ברוך": "barukh",
+    "בת": "bat",
+    "גוי": "goy",
+    "גוים": "goyim",
+    "גויים": "goyim",
+    "דבר": "davar",
+    "דברים": "devarim",
+    "דוד": "David",
+    "הלל": "hallel",
+    "הארץ": "ha-aretz",
+    "הברית": "ha-berit",
+    "החדשה": "ha-chadashah",
+    "השמים": "ha-shamayim",
+    "השמיים": "ha-shamayim",
+    "ויאמר": "vayomer",
+    "יהוה": "Adonai",
+    "יוסף": "Yosef",
+    "יוחנן": "Yochanan",
+    "ישראל": "Yisrael",
+    "ישוע": "Yeshua",
+    "יצחק": "Yitzchak",
+    "יעקב": "Yaakov",
+    "ירושלים": "Yerushalayim",
+    "כהן": "kohen",
+    "כהנים": "kohanim",
+    "מים": "mayim",
+    "מות": "mavet",
+    "מושיע": "moshia",
+    "מלך": "melekh",
+    "מלכות": "malkhut",
+    "מרים": "Miriam",
+    "משה": "Moshe",
+    "משיח": "Mashiach",
+    "נביא": "navi",
+    "נביאים": "neviim",
+    "עם": "am",
+    "עולם": "olam",
+    "צדק": "tzedek",
+    "קדוש": "qadosh",
+    "קדושים": "qedoshim",
+    "קול": "qol",
+    "רוח": "ruach",
+    "שאול": "Shaul",
+    "שמים": "shamayim",
+    "שמיים": "shamayim",
+    "שמעון": "Shimon",
+    "שלום": "Shalom",
+    "תורה": "torah",
+    "חכמה": "chokhmah",
+    "חסד": "chesed",
+    "חיים": "chayim",
+    "חושך": "choshekh",
+}
+
+HEBREW_LETTERS = {
+    "א": "a",
+    "ב": "b",
+    "ג": "g",
+    "ד": "d",
+    "ה": "h",
+    "ו": "v",
+    "ז": "z",
+    "ח": "kh",
+    "ט": "t",
+    "י": "y",
+    "כ": "kh",
+    "ך": "kh",
+    "ל": "l",
+    "מ": "m",
+    "ם": "m",
+    "נ": "n",
+    "ן": "n",
+    "ס": "s",
+    "ע": "a",
+    "פ": "p",
+    "ף": "f",
+    "צ": "ts",
+    "ץ": "ts",
+    "ק": "q",
+    "ר": "r",
+    "ש": "sh",
+    "ת": "t",
+}
+
+HEBREW_VOWELISH = {"a", "e", "i", "o", "u"}
+
+
+def _strip_hebrew_marks(value: str) -> str:
+    decomposed = unicodedata.normalize("NFD", value)
+    return "".join(
+        ch for ch in decomposed
+        if unicodedata.category(ch) != "Mn" and ch not in {"׳", "״", "־"}
+    )
+
+
+def _fallback_hebrew_transliteration(word: str) -> str:
+    tokens: list[str] = []
+    chars = list(word)
+    for index, ch in enumerate(chars):
+        token = HEBREW_LETTERS.get(ch)
+        if token is None:
+            continue
+        if ch == "ה" and index == len(chars) - 1:
+            token = "ah"
+        elif ch == "י" and index > 0:
+            token = "i"
+        elif ch == "ו" and index > 0:
+            token = "o"
+        tokens.append(token)
+
+    if not tokens:
+        return word
+
+    spoken: list[str] = []
+    for index, token in enumerate(tokens):
+        spoken.append(token)
+        next_token = tokens[index + 1] if index + 1 < len(tokens) else ""
+        if (
+            token[-1:] not in HEBREW_VOWELISH
+            and next_token
+            and next_token[:1] not in HEBREW_VOWELISH
+        ):
+            spoken.append("a")
+    return "".join(spoken)
+
+
+def _transliterate_hebrew_word(match: re.Match[str]) -> str:
+    original = match.group(0)
+    normalized = _strip_hebrew_marks(original)
+    if not normalized:
+        return original
+
+    direct = HEBREW_WORD_TRANSLITERATIONS.get(normalized)
+    if direct:
+        return direct
+
+    if normalized.startswith("ו") and len(normalized) > 1:
+        rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
+        if rest:
+            return f"ve-{rest}"
+
+    if normalized.startswith("ה") and len(normalized) > 1:
+        rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
+        if rest:
+            return f"ha-{rest}"
+
+    return _fallback_hebrew_transliteration(normalized)
+
+
+def _prepare_synthesis_input(text: str, language: str, voice: str) -> tuple[str, str]:
+    if language.lower().startswith("he") and HEBREW_CHAR_RE.search(text):
+        spoken = HEBREW_WORD_RE.sub(_transliterate_hebrew_word, text)
+        return spoken, "en-us"
+    return text, voice
+
+
 def _resolve_voice(req: TtsRequest) -> str:
    if req.voice:
        return req.voice.strip()
@@ -115,14 +299,15 @@ def tts(req: TtsRequest) -> Response:
        raise HTTPException(status_code=400, detail="text is required")

    voice = _resolve_voice(req)
+    spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
    args = [
        "--stdout",
-        "-v", voice,
+        "-v", synth_voice,
        "-s", str(max(80, min(450, req.rate))),
        "-p", str(max(0, min(99, req.pitch))),
        "-a", str(max(0, min(200, req.volume))),
    ]
-    wav = _run_espeak(args, req.text.encode("utf-8"))
+    wav = _run_espeak(args, spoken_text.encode("utf-8"))
    if not wav:
        raise HTTPException(status_code=500, detail="espeak-ng returned empty stdout")
    return Response(content=wav, media_type="audio/wav")
@@ -153,9 +338,9 @@ def tts(req: TtsRequest) -> Response:
 PHONEME_DURATION_RE = re.compile(r"^\s*\S+\s+(\d+)\s+", re.MULTILINE)


-def _estimate_total_ms(req: TtsRequest, voice: str) -> int:
+def _estimate_total_ms(req: TtsRequest, voice: str, spoken_text: str) -> int:
    args = ["--pho", "--quiet", "-v", voice, "-s", str(req.rate)]
-    out = _run_espeak(args, req.text.encode("utf-8"))
+    out = _run_espeak(args, spoken_text.encode("utf-8"))
    text = out.decode("utf-8", errors="replace")
    total = 0
    for match in PHONEME_DURATION_RE.finditer(text):
@@ -175,7 +360,8 @@ def timings(req: TtsRequest):
    if not req.text.strip():
        raise HTTPException(status_code=400, detail="text is required")
    voice = _resolve_voice(req)
-    total_ms = _estimate_total_ms(req, voice)
+    spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
+    total_ms = _estimate_total_ms(req, synth_voice, spoken_text)

    # Distribute total_ms across whitespace-split words proportional to
    # character count. Punctuation-only tokens are folded into the previous
@@ -204,7 +390,7 @@ def timings(req: TtsRequest):
        {
            "text": req.text,
            "language": req.language,
-            "voice": voice,
+            "voice": synth_voice,
            "words": out_words,
            "durationMs": total_ms,
        }