# GX10 Piper TTS — linux/arm64 (built natively on the GX10 / DGX Spark, aarch64). # Serves the telephony /tts contract: POST {"text"} -> 16 kHz/16-bit/mono WAV. # Voice baked into the image so there is no runtime HuggingFace dependency. FROM python:3.12-slim # espeak-ng is the phonemizer backend piper-tts uses at synthesis time. RUN apt-get update \ && apt-get install -y --no-install-recommends espeak-ng ca-certificates curl \ && rm -rf /var/lib/apt/lists/* RUN pip install --no-cache-dir piper-tts flask numpy # Bake the voice model (en_US-amy-medium, 22.05 kHz native) into the image. ARG PIPER_VOICE=en_US-amy-medium ARG VOICE_BASE=https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/amy/medium RUN mkdir -p /voices \ && curl -sSL -o "/voices/${PIPER_VOICE}.onnx" "${VOICE_BASE}/${PIPER_VOICE}.onnx" \ && curl -sSL -o "/voices/${PIPER_VOICE}.onnx.json" "${VOICE_BASE}/${PIPER_VOICE}.onnx.json" \ && test -s "/voices/${PIPER_VOICE}.onnx" \ && test -s "/voices/${PIPER_VOICE}.onnx.json" COPY tts_service.py /app/tts_service.py WORKDIR /app ENV TTS_PORT=8500 \ PIPER_VOICE=en_US-amy-medium \ VOICES_DIR=/voices \ TARGET_RATE=16000 EXPOSE 8500 CMD ["python", "tts_service.py"]