From d03a92407d37a506536c40b080a09fa53948ce3e Mon Sep 17 00:00:00 2001 From: Andrew Stoltz Date: Sun, 14 Jun 2026 14:14:59 -0500 Subject: [PATCH] gx10/tts: persist Piper /tts source + manifest (telephony TTS port baseline) Dockerfile (linux/arm64, en_US-amy-medium baked), tts_service.py (16kHz/16-bit/mono WAV, numpy resample 22050->16000), gx10-tts.yaml (CPU NodePort 30850, no GPU request), README (build/import/cutover/verify on the GX10 cluster). --- gx10/tts/Dockerfile | 31 ++++++++ gx10/tts/README.md | 59 ++++++++++++++++ gx10/tts/gx10-tts.yaml | 81 +++++++++++++++++++++ gx10/tts/tts_service.py | 153 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 324 insertions(+) create mode 100644 gx10/tts/Dockerfile create mode 100644 gx10/tts/README.md create mode 100644 gx10/tts/gx10-tts.yaml create mode 100644 gx10/tts/tts_service.py diff --git a/gx10/tts/Dockerfile b/gx10/tts/Dockerfile new file mode 100644 index 0000000..678b8cd --- /dev/null +++ b/gx10/tts/Dockerfile @@ -0,0 +1,31 @@ +# GX10 Piper TTS — linux/arm64 (built natively on the GX10 / DGX Spark, aarch64). +# Serves the telephony /tts contract: POST {"text"} -> 16 kHz/16-bit/mono WAV. +# Voice baked into the image so there is no runtime HuggingFace dependency. +FROM python:3.12-slim + +# espeak-ng is the phonemizer backend piper-tts uses at synthesis time. +RUN apt-get update \ + && apt-get install -y --no-install-recommends espeak-ng ca-certificates curl \ + && rm -rf /var/lib/apt/lists/* + +RUN pip install --no-cache-dir piper-tts flask numpy + +# Bake the voice model (en_US-amy-medium, 22.05 kHz native) into the image. +ARG PIPER_VOICE=en_US-amy-medium +ARG VOICE_BASE=https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/amy/medium +RUN mkdir -p /voices \ + && curl -sSL -o "/voices/${PIPER_VOICE}.onnx" "${VOICE_BASE}/${PIPER_VOICE}.onnx" \ + && curl -sSL -o "/voices/${PIPER_VOICE}.onnx.json" "${VOICE_BASE}/${PIPER_VOICE}.onnx.json" \ + && test -s "/voices/${PIPER_VOICE}.onnx" \ + && test -s "/voices/${PIPER_VOICE}.onnx.json" + +COPY tts_service.py /app/tts_service.py +WORKDIR /app + +ENV TTS_PORT=8500 \ + PIPER_VOICE=en_US-amy-medium \ + VOICES_DIR=/voices \ + TARGET_RATE=16000 + +EXPOSE 8500 +CMD ["python", "tts_service.py"] diff --git a/gx10/tts/README.md b/gx10/tts/README.md new file mode 100644 index 0000000..a5cca2b --- /dev/null +++ b/gx10/tts/README.md @@ -0,0 +1,59 @@ +# GX10 Piper TTS — telephony `/tts` endpoint + +CPU Piper TTS serving the telephony `/tts` contract on the **GX10 RKE2 cluster** +(ASUS Ascent GX10 / NVIDIA DGX Spark, ARM64, `10.0.56.14`). This is the +telephony-TTS-port-to-GX10 (P1) baseline: edge1 parity at higher quality, zero +GPU/aarch64 risk, frees telephony off the slow edge1 Pi 5. + +## What it is +- `tts_service.py` — Flask app: `POST /tts {"text"}` → **16 kHz / 16-bit / mono WAV** + (canonical 44-byte header) + `GET /health`. Voice `en_US-amy-medium` (22.05 kHz + native) is numpy-resampled to 16 kHz so it drops straight onto Asterisk's + `.sln16` path (telephony strips the 44-byte header). Same wire contract as the + edge1 `speech-pipeline` `/tts`, just the TTS half (no STT/Wyoming). +- `Dockerfile` — `linux/arm64`, voice baked in (no runtime HuggingFace dep). +- `gx10-tts.yaml` — Namespace `tts` + Deployment (CPU-only, **no GPU request** so it + co-resides with the GPU-holding Ollama pod) + NodePort Service. + +## This cluster is NOT under the old-cluster ArgoCD (yet) +Apply manually with the GX10's own kubectl: +```bash +ssh -J noc1 -i ~/.ssh/fcadmin_ed25519 bluejay@10.0.56.14 +export KUBECONFIG=/etc/rancher/rke2/rke2.yaml +K=/var/lib/rancher/rke2/bin/kubectl +$K apply -f gx10-tts.yaml +``` + +## Build + import (native arm64 on the GX10) +```bash +docker build -t localhost/fc-gx10-tts:v20260614 . +docker save localhost/fc-gx10-tts:v20260614 -o /tmp/t.tar +sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/t.tar +# manifest uses imagePullPolicy: Never (image lives in containerd, no registry) +``` + +## Telephony cutover (reversible) +Endpoint telephony hits: **`http://10.0.56.14:30850`** (NodePort, MGMT VLAN 56). +In `apps/telephony/telephony.yaml`: +1. Deployment env `Tts__PiperUrl=http://10.0.56.14:30850` — **this is the real lever**; + env vars override `appsettings.Production.json`, so the configmap `Tts` block alone + is inert (it was shadowed by a drifted live env `Tts__PiperUrl=edge1`). +2. NetworkPolicy egress to `10.0.56.14/32:30850` (telephony-web is `hostNetwork`, so this + only matters for non-hostNetwork pods; harmless either way). +3. edge1 (`10.0.57.17:8500`) stays warm — **rollback = set `Tts__PiperUrl` back to it**. + The TTS circuit breaker + `MapTextToSound` canned-prompt fallback mean a bad endpoint + degrades gracefully, never to silence. + +## Verify (not a manual call) +```bash +FLOWERCORE_SIP_TEST_MODE=required dotnet.exe test \ + FlowerCore.Telephony/tests/FlowerCore.Telephony.SipTests/FlowerCore.Telephony.SipTests.csproj \ + --filter FullyQualifiedName~Call_Star100_ReceivesAudibleAudioStream +``` +A passing audible test alone is NOT sufficient (edge1 also produces audible audio) — +confirm the **GX10 TTS pod's own access log** (`kubectl -n tts logs deploy/gx10-tts`) +shows `POST /tts 200` during the call, and telephony-web logs target `10.0.56.14:30850`. + +## Voice upgrade (follow-on) +Operator's pick is **Kokoro**; needs GPU time-slicing (Ollama holds the GB10 GPU; MPS is +refuted on GB10) OR Kokoro-CPU behind a `/tts` shim. This Piper baseline stays as the floor. diff --git a/gx10/tts/gx10-tts.yaml b/gx10/tts/gx10-tts.yaml new file mode 100644 index 0000000..bcad1d7 --- /dev/null +++ b/gx10/tts/gx10-tts.yaml @@ -0,0 +1,81 @@ +# GX10 Piper TTS — telephony /tts endpoint on the GX10 RKE2 cluster. +# Applied DIRECTLY via the GX10's own kubectl (KUBECONFIG=/etc/rancher/rke2/rke2.yaml); +# the GX10 cluster is NOT yet under the old-cluster ArgoCD. CPU-only (no GPU request) +# so it co-resides with the GPU-holding Ollama pod without contending for the GB10. +# Image is imported into RKE2 containerd (imagePullPolicy: Never). +# Telephony reaches it at http://10.0.56.14:30850 (NodePort, MGMT VLAN 56). +apiVersion: v1 +kind: Namespace +metadata: + name: tts +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gx10-tts + namespace: tts + labels: + app: gx10-tts +spec: + replicas: 1 + selector: + matchLabels: + app: gx10-tts + template: + metadata: + labels: + app: gx10-tts + spec: + containers: + - name: tts + image: localhost/fc-gx10-tts:v20260614 + imagePullPolicy: Never + ports: + - containerPort: 8500 + name: http + env: + - name: TTS_PORT + value: "8500" + - name: PIPER_VOICE + value: "en_US-amy-medium" + - name: TARGET_RATE + value: "16000" + readinessProbe: + httpGet: + path: /health + port: 8500 + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 3 + livenessProbe: + httpGet: + path: /health + port: 8500 + initialDelaySeconds: 10 + periodSeconds: 20 + timeoutSeconds: 5 + resources: + requests: + cpu: "500m" + memory: "512Mi" + limits: + cpu: "4" + memory: "2Gi" +--- +apiVersion: v1 +kind: Service +metadata: + name: gx10-tts + namespace: tts + labels: + app: gx10-tts +spec: + type: NodePort + selector: + app: gx10-tts + ports: + - name: http + port: 8500 + targetPort: 8500 + nodePort: 30850 + protocol: TCP diff --git a/gx10/tts/tts_service.py b/gx10/tts/tts_service.py new file mode 100644 index 0000000..f9857aa --- /dev/null +++ b/gx10/tts/tts_service.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +"""GX10 Piper TTS microservice — telephony /tts contract. + +POST /tts {"text": "..."} -> 16 kHz / 16-bit / mono WAV (canonical 44-byte header) +GET /health -> JSON status + +The telephony AsteriskProvider strips the 44-byte WAV header and writes the +remainder as a `.sln16` (signed-linear 16 kHz) file that Asterisk transcodes to +any codec. So the response MUST be 16 kHz / 16-bit / mono. The en_US-amy-medium +voice is 22.05 kHz native, so we resample to 16 kHz (a 22.05 kHz stream treated +as 16 kHz plays ~1.38x too fast). This is a drop-in upgrade over edge1's +en_US-amy-low (16 kHz native, lower quality), keeping the exact wire contract. +""" +import io +import logging +import os +import sys +import threading +import wave + +import numpy as np +from flask import Flask, Response, jsonify, request + +API_PORT = int(os.environ.get("TTS_PORT", "8500")) +PIPER_VOICE = os.environ.get("PIPER_VOICE", "en_US-amy-medium") +VOICES_DIR = os.environ.get("VOICES_DIR", "/voices") +TARGET_RATE = int(os.environ.get("TARGET_RATE", "16000")) + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + stream=sys.stdout, +) +log = logging.getLogger("gx10-tts") + +piper_voice_obj = None +piper_loaded = False +piper_lock = threading.Lock() +native_rate = None + +app = Flask(__name__) + + +def load_piper(): + """Load the Piper voice model once at startup (shared, lock-guarded).""" + global piper_voice_obj, piper_loaded + try: + from piper import PiperVoice + model_path = os.path.join(VOICES_DIR, f"{PIPER_VOICE}.onnx") + if not os.path.isfile(model_path): + log.error("Piper voice model not found at %s — TTS disabled", model_path) + piper_loaded = False + return + log.info("Loading Piper voice %s from %s", PIPER_VOICE, model_path) + piper_voice_obj = PiperVoice.load(model_path) + piper_loaded = True + log.info("Piper voice loaded") + except Exception as exc: # noqa: BLE001 — fail-soft, /health reports it + log.error("Failed to load Piper: %s", exc) + piper_loaded = False + + +def synthesize_chunks(text): + """Run Piper synthesis under a lock because the loaded voice is shared.""" + with piper_lock: + return list(piper_voice_obj.synthesize(text)) + + +def resample_i16(pcm_i16, src_rate, dst_rate): + """Linear-interpolation resample of int16 PCM (matches edge1's STT resample).""" + if src_rate == dst_rate or len(pcm_i16) == 0: + return pcm_i16 + audio = pcm_i16.astype(np.float32) + target_len = int(round(len(audio) * dst_rate / src_rate)) + if target_len <= 0: + return np.zeros(0, dtype=np.int16) + idx = np.linspace(0, len(audio) - 1, target_len) + res = np.interp(idx, np.arange(len(audio)), audio) + return np.clip(np.round(res), -32768, 32767).astype(np.int16) + + +@app.route("/health", methods=["GET"]) +def health(): + return jsonify({ + "status": "ok", + "voice": PIPER_VOICE, + "loaded": piper_loaded, + "target_rate": TARGET_RATE, + "native_rate": native_rate, + }) + + +@app.route("/tts", methods=["POST"]) +def tts(): + """Text -> 16 kHz/16-bit/mono WAV. Mirrors the edge1 speech-pipeline contract.""" + if not piper_loaded: + return jsonify({"error": "Piper TTS model not loaded"}), 503 + + data = request.get_json(silent=True) + if not data or "text" not in data: + return jsonify({"error": "Missing required field: text"}), 400 + + text = data["text"].strip() + if not text: + return jsonify({"error": "Text field is empty"}), 400 + if len(text) > 10000: + return jsonify({"error": "Text too long (max 10000 characters)"}), 400 + + try: + chunks = synthesize_chunks(text) + if not chunks: + return jsonify({"error": "No audio produced"}), 500 + + global native_rate + first = chunks[0] + native_rate = first.sample_rate + + if first.sample_width != 2 or first.sample_channels != 1: + return jsonify({ + "error": f"Unexpected PCM format: width={first.sample_width} " + f"channels={first.sample_channels} (need 16-bit mono)" + }), 500 + + pcm = np.frombuffer( + b"".join(c.audio_int16_bytes for c in chunks), dtype=np.int16 + ) + out = resample_i16(pcm, native_rate, TARGET_RATE) + + wav_buffer = io.BytesIO() + with wave.open(wav_buffer, "wb") as wav_file: + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(TARGET_RATE) + wav_file.writeframes(out.tobytes()) + wav_buffer.seek(0) + + return Response( + wav_buffer.read(), + mimetype="audio/wav", + headers={"Content-Disposition": 'inline; filename="speech.wav"'}, + ) + except Exception as exc: # noqa: BLE001 + log.error("TTS synthesis failed: %s", exc) + return jsonify({"error": f"Synthesis failed: {exc}"}), 500 + + +if __name__ == "__main__": + log.info( + "GX10 TTS starting on port %d (voice=%s -> %d Hz)", + API_PORT, PIPER_VOICE, TARGET_RATE, + ) + load_piper() + app.run(host="0.0.0.0", port=API_PORT, threaded=True)