deploy(ttsreader): enable phase6 biblical routing
This commit is contained in:
@@ -30,6 +30,7 @@ import logging
|
|||||||
import re
|
import re
|
||||||
import shlex
|
import shlex
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import unicodedata
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
@@ -60,6 +61,189 @@ class TtsRequest(BaseModel):
|
|||||||
volume: int = 100 # 0-200
|
volume: int = 100 # 0-200
|
||||||
|
|
||||||
|
|
||||||
|
HEBREW_CHAR_RE = re.compile(r"[\u0590-\u05FF]")
|
||||||
|
HEBREW_WORD_RE = re.compile(r"[\u0590-\u05FF]+")
|
||||||
|
|
||||||
|
# eSpeak-NG's Hebrew voice can spell unpointed Hebrew as Unicode character
|
||||||
|
# names on some builds. For source-text study reads, prefer a stable
|
||||||
|
# scholarly transliteration so words sound like words even without niqqud.
|
||||||
|
HEBREW_WORD_TRANSLITERATIONS = {
|
||||||
|
"אב": "av",
|
||||||
|
"אבא": "abba",
|
||||||
|
"אברהם": "Avraham",
|
||||||
|
"אדמה": "adamah",
|
||||||
|
"אדני": "Adonai",
|
||||||
|
"אדם": "adam",
|
||||||
|
"אור": "or",
|
||||||
|
"אלהים": "Elohim",
|
||||||
|
"אלוהים": "Elohim",
|
||||||
|
"אמן": "amen",
|
||||||
|
"אם": "em",
|
||||||
|
"אמת": "emet",
|
||||||
|
"ארץ": "eretz",
|
||||||
|
"אש": "esh",
|
||||||
|
"את": "et",
|
||||||
|
"בית": "beit",
|
||||||
|
"בן": "ben",
|
||||||
|
"ברא": "bara",
|
||||||
|
"בראשית": "bereshit",
|
||||||
|
"ברית": "berit",
|
||||||
|
"ברוך": "barukh",
|
||||||
|
"בת": "bat",
|
||||||
|
"גוי": "goy",
|
||||||
|
"גוים": "goyim",
|
||||||
|
"גויים": "goyim",
|
||||||
|
"דבר": "davar",
|
||||||
|
"דברים": "devarim",
|
||||||
|
"דוד": "David",
|
||||||
|
"הלל": "hallel",
|
||||||
|
"הארץ": "ha-aretz",
|
||||||
|
"הברית": "ha-berit",
|
||||||
|
"החדשה": "ha-chadashah",
|
||||||
|
"השמים": "ha-shamayim",
|
||||||
|
"השמיים": "ha-shamayim",
|
||||||
|
"ויאמר": "vayomer",
|
||||||
|
"יהוה": "Adonai",
|
||||||
|
"יוסף": "Yosef",
|
||||||
|
"יוחנן": "Yochanan",
|
||||||
|
"ישראל": "Yisrael",
|
||||||
|
"ישוע": "Yeshua",
|
||||||
|
"יצחק": "Yitzchak",
|
||||||
|
"יעקב": "Yaakov",
|
||||||
|
"ירושלים": "Yerushalayim",
|
||||||
|
"כהן": "kohen",
|
||||||
|
"כהנים": "kohanim",
|
||||||
|
"מים": "mayim",
|
||||||
|
"מות": "mavet",
|
||||||
|
"מושיע": "moshia",
|
||||||
|
"מלך": "melekh",
|
||||||
|
"מלכות": "malkhut",
|
||||||
|
"מרים": "Miriam",
|
||||||
|
"משה": "Moshe",
|
||||||
|
"משיח": "Mashiach",
|
||||||
|
"נביא": "navi",
|
||||||
|
"נביאים": "neviim",
|
||||||
|
"עם": "am",
|
||||||
|
"עולם": "olam",
|
||||||
|
"צדק": "tzedek",
|
||||||
|
"קדוש": "qadosh",
|
||||||
|
"קדושים": "qedoshim",
|
||||||
|
"קול": "qol",
|
||||||
|
"רוח": "ruach",
|
||||||
|
"שאול": "Shaul",
|
||||||
|
"שמים": "shamayim",
|
||||||
|
"שמיים": "shamayim",
|
||||||
|
"שמעון": "Shimon",
|
||||||
|
"שלום": "Shalom",
|
||||||
|
"תורה": "torah",
|
||||||
|
"חכמה": "chokhmah",
|
||||||
|
"חסד": "chesed",
|
||||||
|
"חיים": "chayim",
|
||||||
|
"חושך": "choshekh",
|
||||||
|
}
|
||||||
|
|
||||||
|
HEBREW_LETTERS = {
|
||||||
|
"א": "a",
|
||||||
|
"ב": "b",
|
||||||
|
"ג": "g",
|
||||||
|
"ד": "d",
|
||||||
|
"ה": "h",
|
||||||
|
"ו": "v",
|
||||||
|
"ז": "z",
|
||||||
|
"ח": "kh",
|
||||||
|
"ט": "t",
|
||||||
|
"י": "y",
|
||||||
|
"כ": "kh",
|
||||||
|
"ך": "kh",
|
||||||
|
"ל": "l",
|
||||||
|
"מ": "m",
|
||||||
|
"ם": "m",
|
||||||
|
"נ": "n",
|
||||||
|
"ן": "n",
|
||||||
|
"ס": "s",
|
||||||
|
"ע": "a",
|
||||||
|
"פ": "p",
|
||||||
|
"ף": "f",
|
||||||
|
"צ": "ts",
|
||||||
|
"ץ": "ts",
|
||||||
|
"ק": "q",
|
||||||
|
"ר": "r",
|
||||||
|
"ש": "sh",
|
||||||
|
"ת": "t",
|
||||||
|
}
|
||||||
|
|
||||||
|
HEBREW_VOWELISH = {"a", "e", "i", "o", "u"}
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_hebrew_marks(value: str) -> str:
|
||||||
|
decomposed = unicodedata.normalize("NFD", value)
|
||||||
|
return "".join(
|
||||||
|
ch for ch in decomposed
|
||||||
|
if unicodedata.category(ch) != "Mn" and ch not in {"׳", "״", "־"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fallback_hebrew_transliteration(word: str) -> str:
|
||||||
|
tokens: list[str] = []
|
||||||
|
chars = list(word)
|
||||||
|
for index, ch in enumerate(chars):
|
||||||
|
token = HEBREW_LETTERS.get(ch)
|
||||||
|
if token is None:
|
||||||
|
continue
|
||||||
|
if ch == "ה" and index == len(chars) - 1:
|
||||||
|
token = "ah"
|
||||||
|
elif ch == "י" and index > 0:
|
||||||
|
token = "i"
|
||||||
|
elif ch == "ו" and index > 0:
|
||||||
|
token = "o"
|
||||||
|
tokens.append(token)
|
||||||
|
|
||||||
|
if not tokens:
|
||||||
|
return word
|
||||||
|
|
||||||
|
spoken: list[str] = []
|
||||||
|
for index, token in enumerate(tokens):
|
||||||
|
spoken.append(token)
|
||||||
|
next_token = tokens[index + 1] if index + 1 < len(tokens) else ""
|
||||||
|
if (
|
||||||
|
token[-1:] not in HEBREW_VOWELISH
|
||||||
|
and next_token
|
||||||
|
and next_token[:1] not in HEBREW_VOWELISH
|
||||||
|
):
|
||||||
|
spoken.append("a")
|
||||||
|
return "".join(spoken)
|
||||||
|
|
||||||
|
|
||||||
|
def _transliterate_hebrew_word(match: re.Match[str]) -> str:
|
||||||
|
original = match.group(0)
|
||||||
|
normalized = _strip_hebrew_marks(original)
|
||||||
|
if not normalized:
|
||||||
|
return original
|
||||||
|
|
||||||
|
direct = HEBREW_WORD_TRANSLITERATIONS.get(normalized)
|
||||||
|
if direct:
|
||||||
|
return direct
|
||||||
|
|
||||||
|
if normalized.startswith("ו") and len(normalized) > 1:
|
||||||
|
rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
|
||||||
|
if rest:
|
||||||
|
return f"ve-{rest}"
|
||||||
|
|
||||||
|
if normalized.startswith("ה") and len(normalized) > 1:
|
||||||
|
rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
|
||||||
|
if rest:
|
||||||
|
return f"ha-{rest}"
|
||||||
|
|
||||||
|
return _fallback_hebrew_transliteration(normalized)
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_synthesis_input(text: str, language: str, voice: str) -> tuple[str, str]:
|
||||||
|
if language.lower().startswith("he") and HEBREW_CHAR_RE.search(text):
|
||||||
|
spoken = HEBREW_WORD_RE.sub(_transliterate_hebrew_word, text)
|
||||||
|
return spoken, "en-us"
|
||||||
|
return text, voice
|
||||||
|
|
||||||
|
|
||||||
def _resolve_voice(req: TtsRequest) -> str:
|
def _resolve_voice(req: TtsRequest) -> str:
|
||||||
if req.voice:
|
if req.voice:
|
||||||
return req.voice.strip()
|
return req.voice.strip()
|
||||||
@@ -115,14 +299,15 @@ def tts(req: TtsRequest) -> Response:
|
|||||||
raise HTTPException(status_code=400, detail="text is required")
|
raise HTTPException(status_code=400, detail="text is required")
|
||||||
|
|
||||||
voice = _resolve_voice(req)
|
voice = _resolve_voice(req)
|
||||||
|
spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
|
||||||
args = [
|
args = [
|
||||||
"--stdout",
|
"--stdout",
|
||||||
"-v", voice,
|
"-v", synth_voice,
|
||||||
"-s", str(max(80, min(450, req.rate))),
|
"-s", str(max(80, min(450, req.rate))),
|
||||||
"-p", str(max(0, min(99, req.pitch))),
|
"-p", str(max(0, min(99, req.pitch))),
|
||||||
"-a", str(max(0, min(200, req.volume))),
|
"-a", str(max(0, min(200, req.volume))),
|
||||||
]
|
]
|
||||||
wav = _run_espeak(args, req.text.encode("utf-8"))
|
wav = _run_espeak(args, spoken_text.encode("utf-8"))
|
||||||
if not wav:
|
if not wav:
|
||||||
raise HTTPException(status_code=500, detail="espeak-ng returned empty stdout")
|
raise HTTPException(status_code=500, detail="espeak-ng returned empty stdout")
|
||||||
return Response(content=wav, media_type="audio/wav")
|
return Response(content=wav, media_type="audio/wav")
|
||||||
@@ -153,9 +338,9 @@ def tts(req: TtsRequest) -> Response:
|
|||||||
PHONEME_DURATION_RE = re.compile(r"^\s*\S+\s+(\d+)\s+", re.MULTILINE)
|
PHONEME_DURATION_RE = re.compile(r"^\s*\S+\s+(\d+)\s+", re.MULTILINE)
|
||||||
|
|
||||||
|
|
||||||
def _estimate_total_ms(req: TtsRequest, voice: str) -> int:
|
def _estimate_total_ms(req: TtsRequest, voice: str, spoken_text: str) -> int:
|
||||||
args = ["--pho", "--quiet", "-v", voice, "-s", str(req.rate)]
|
args = ["--pho", "--quiet", "-v", voice, "-s", str(req.rate)]
|
||||||
out = _run_espeak(args, req.text.encode("utf-8"))
|
out = _run_espeak(args, spoken_text.encode("utf-8"))
|
||||||
text = out.decode("utf-8", errors="replace")
|
text = out.decode("utf-8", errors="replace")
|
||||||
total = 0
|
total = 0
|
||||||
for match in PHONEME_DURATION_RE.finditer(text):
|
for match in PHONEME_DURATION_RE.finditer(text):
|
||||||
@@ -175,7 +360,8 @@ def timings(req: TtsRequest):
|
|||||||
if not req.text.strip():
|
if not req.text.strip():
|
||||||
raise HTTPException(status_code=400, detail="text is required")
|
raise HTTPException(status_code=400, detail="text is required")
|
||||||
voice = _resolve_voice(req)
|
voice = _resolve_voice(req)
|
||||||
total_ms = _estimate_total_ms(req, voice)
|
spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
|
||||||
|
total_ms = _estimate_total_ms(req, synth_voice, spoken_text)
|
||||||
|
|
||||||
# Distribute total_ms across whitespace-split words proportional to
|
# Distribute total_ms across whitespace-split words proportional to
|
||||||
# character count. Punctuation-only tokens are folded into the previous
|
# character count. Punctuation-only tokens are folded into the previous
|
||||||
@@ -204,7 +390,7 @@ def timings(req: TtsRequest):
|
|||||||
{
|
{
|
||||||
"text": req.text,
|
"text": req.text,
|
||||||
"language": req.language,
|
"language": req.language,
|
||||||
"voice": voice,
|
"voice": synth_voice,
|
||||||
"words": out_words,
|
"words": out_words,
|
||||||
"durationMs": total_ms,
|
"durationMs": total_ms,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -359,7 +359,7 @@ spec:
|
|||||||
runAsUser: 1654
|
runAsUser: 1654
|
||||||
containers:
|
containers:
|
||||||
- name: biblical-tts
|
- name: biblical-tts
|
||||||
image: localhost/fc-biblical-tts:v1
|
image: localhost/fc-biblical-tts:v20260506-hebrew-translit
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 10402
|
- containerPort: 10402
|
||||||
@@ -532,7 +532,7 @@ spec:
|
|||||||
fsGroupChangePolicy: OnRootMismatch
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
containers:
|
containers:
|
||||||
- name: web
|
- name: web
|
||||||
image: localhost/fc-ttsreader-web:v202605061500
|
image: localhost/fc-ttsreader-web:v20260506-phase6
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5217
|
- containerPort: 5217
|
||||||
@@ -568,6 +568,14 @@ spec:
|
|||||||
value: "http://ttsreader-kokoro.fc-ttsreader.svc.cluster.local.:8880"
|
value: "http://ttsreader-kokoro.fc-ttsreader.svc.cluster.local.:8880"
|
||||||
- name: TtsReader__Kokoro__TimeoutSeconds
|
- name: TtsReader__Kokoro__TimeoutSeconds
|
||||||
value: "120"
|
value: "120"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__Enabled
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__BaseUrl
|
||||||
|
value: "http://ttsreader-biblical.fc-ttsreader.svc.cluster.local.:10402"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__TimeoutSeconds
|
||||||
|
value: "60"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__DefaultLanguage
|
||||||
|
value: "grc"
|
||||||
- name: Speech__Alignment__Enabled
|
- name: Speech__Alignment__Enabled
|
||||||
# Cluster-native faster-whisper (Lane F, 2026-04-25). The
|
# Cluster-native faster-whisper (Lane F, 2026-04-25). The
|
||||||
# ttsreader-align deployment in this manifest wraps
|
# ttsreader-align deployment in this manifest wraps
|
||||||
|
|||||||
Reference in New Issue
Block a user