From 9df26620b8a1f6996132d0a525588b1ce768d3ac Mon Sep 17 00:00:00 2001 From: Andrew Stoltz Date: Sat, 25 Apr 2026 10:28:21 -0500 Subject: [PATCH] fc-ttsreader: disable Whisper, fall back to estimator until backend is reachable The cluster-wide pod cannot reach BLUEJAY-WS speaches on 10.0.56.20:9200 because the rootless+host-net podman setup binds 127.0.0.1 only on the WSL machine; nothing on the LAN-facing interface. The openai-compatible Backend value also relied on a Common change still on feat/shared-indexing rather than master, so the deployed image's Shared.Speech only knows the FC-native /align shape. Disable Speech:Alignment for now. EstimatedAlignmentClient kicks in and keeps /api/v1/voices/preview-with-timings returning word-aligned JSON, just with uniform-distribution timings instead of real Whisper output. Re-enable once: (a) Common's openai-compatible Backend lands on master and a new TtsReader image ships, or (b) we point at a LAN-routable backend (e.g. an aiohttp /align shim, or speaches running on a node that's actually reachable from cluster pods). Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/fc-ttsreader/fc-ttsreader.yaml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/apps/fc-ttsreader/fc-ttsreader.yaml b/apps/fc-ttsreader/fc-ttsreader.yaml index e04f81e..9d58115 100644 --- a/apps/fc-ttsreader/fc-ttsreader.yaml +++ b/apps/fc-ttsreader/fc-ttsreader.yaml @@ -173,18 +173,18 @@ spec: - name: TtsReader__Kokoro__TimeoutSeconds value: "120" - name: Speech__Alignment__Enabled - value: "true" - - name: Speech__Alignment__Backend - # speaches container on BLUEJAY-WS speaks the OpenAI-compatible - # /v1/audio/transcriptions contract; FasterWhisperAlignmentClient - # adapts the verbose_json response into the FlowerCore shape. - # Switch to "fc-align" once a native /align backend is deployed. - value: "openai-compatible" + # Off until either: + # (a) a native /align backend is deployed inside the cluster, or + # (b) the BLUEJAY-WS host exposes the speaches container on the + # LAN-routable bind (10.0.56.20:9200, not just 127.0.0.1) + # AND Common ships the openai-compatible Backend support + # (currently on feat/shared-indexing, not on master). + # While disabled, /preview-with-timings still returns word timings + # via EstimatedAlignmentClient — slightly less accurate, but the + # UI can still drive word-level highlight playback. + value: "false" - name: Speech__Alignment__BaseUrl value: "http://10.0.56.20:9200" - - name: Speech__Alignment__Model - # Tag understood by speaches (faster-whisper-server). - value: "Systran/faster-whisper-base.en" - name: Speech__Alignment__TimeoutSeconds value: "120" - name: TtsReader__Ollama__BaseUrl