deploy(chat): route wave5 chat ollama to edge1

This commit is contained in:
Andrew Stoltz
2026-06-13 22:59:18 -05:00
parent 50a3ee5e8e
commit 9dd170a9ac

View File

@@ -30,18 +30,15 @@ data:
FlowerCore__Auth__Oidc__Audience: "chat"
FlowerCore__Auth__Oidc__ClientId: "chat"
FlowerCore__Database__ConnectionStrings__Sqlite: "Data Source=/data/chat.db"
# Ollama target. Switched 2026-04-25 from edge1 Pi5 (10.0.57.17) to BLUEJAY-WS
# workstation (10.0.56.20, RX 9070 XT 16GB, OLLAMA_HOST=0.0.0.0:11434, Vulkan
# backend per feedback_rdna4_vulkan_broken). The Pi5 was timing out every team-
# round speaker at the 300s per-turn cap (live-proven 2026-04-25 03:53 UTC,
# see feedback_chat_team_round_edge1_too_slow). Workstation has gemma3:4b for
# the Cheap tier, plus gemma3:27b/phi4:14b/qwen3:14b for Default/Balanced/Deep.
# Piper TTS stays on edge1 below (different service, Pi handles TTS fine).
FlowerCore__AI__OllamaBaseUrl: "http://10.0.56.20:11434"
FlowerCore__AI__DefaultModelName: "phi4:14b"
ChatOptions__BehaviorRuleEngine__OllamaBaseUrl: "http://10.0.56.20:11434"
# Ollama target. BLUEJAY-WS remains faster from the workstation, but this lane
# proved Chat pods time out reaching 10.0.56.20:11434. Keep generation and
# behavior-rule checks on the cluster-routable edge1 endpoint until that route
# is fixed; choose models that edge1 actually hosts.
FlowerCore__AI__OllamaBaseUrl: "http://10.0.57.17:11434"
FlowerCore__AI__DefaultModelName: "qwen2.5-coder:7b"
ChatOptions__BehaviorRuleEngine__OllamaBaseUrl: "http://10.0.57.17:11434"
ChatOptions__BehaviorRuleEngine__FallbackOllamaBaseUrl: "http://10.0.57.17:11434"
ChatOptions__BehaviorRuleEngine__ModelName: "gemma3:12b"
ChatOptions__BehaviorRuleEngine__ModelName: "gemma3:4b"
FlowerCore__AI__Memory__UseSharedIndexingAdapter: "true"
FlowerCore__AI__Memory__UseOllamaEmbeddings: "true"
FlowerCore__AI__Memory__EmbeddingModel: "nomic-embed-text"