diff --git a/apps/fc-ttsreader/fc-ttsreader.yaml b/apps/fc-ttsreader/fc-ttsreader.yaml index d91aa58..b756e2c 100644 --- a/apps/fc-ttsreader/fc-ttsreader.yaml +++ b/apps/fc-ttsreader/fc-ttsreader.yaml @@ -169,7 +169,7 @@ spec: runAsUser: 1654 containers: - name: align - image: localhost/fc-speech-align:v2 + image: localhost/fc-speech-align:v3 imagePullPolicy: Never ports: - containerPort: 9200 diff --git a/apps/fc-ttsreader/speech-align/app.py b/apps/fc-ttsreader/speech-align/app.py index 092bb48..70652eb 100644 --- a/apps/fc-ttsreader/speech-align/app.py +++ b/apps/fc-ttsreader/speech-align/app.py @@ -128,10 +128,17 @@ async def align(audio: UploadFile = File(...), language: str = Form(DEFAULT_LANG for segment in segments: text_parts.append(segment.text.strip()) for word in (segment.words or []): + # Field names MUST match the FlowerCore.Shared.Speech contract: + # `text` / `startMs` / `endMs`. The deployed FasterWhisperAlignmentClient + # ignores any other names — see Common's + # FasterWhisperAlignmentResponse / FasterWhisperWord. words.append({ - "word": word.word.strip(), - "startSeconds": float(word.start or 0.0), - "endSeconds": float(word.end or 0.0), + "text": word.word.strip(), + "startMs": int((word.start or 0.0) * 1000), + "endMs": int((word.end or 0.0) * 1000), + # Confidence is informational and ignored by the C# client today, + # but kept on the wire for future scoring + fc-align operators + # that want to surface low-confidence words. "confidence": float(getattr(word, "probability", 0.0) or 0.0), })