deploy(chat): route wave5 chat ollama to edge1

2026-06-13 22:59:18 -05:00
parent 50a3ee5e8e
commit 9dd170a9ac
1 changed files with 8 additions and 11 deletions
--- a/apps/fc-chat/fc-chat.yaml
+++ b/apps/fc-chat/fc-chat.yaml
@@ -30,18 +30,15 @@ data:
  FlowerCore__Auth__Oidc__Audience: "chat"
  FlowerCore__Auth__Oidc__ClientId: "chat"
  FlowerCore__Database__ConnectionStrings__Sqlite: "Data Source=/data/chat.db"
-  # Ollama target. Switched 2026-04-25 from edge1 Pi5 (10.0.57.17) to BLUEJAY-WS
-  # workstation (10.0.56.20, RX 9070 XT 16GB, OLLAMA_HOST=0.0.0.0:11434, Vulkan
-  # backend per feedback_rdna4_vulkan_broken). The Pi5 was timing out every team-
-  # round speaker at the 300s per-turn cap (live-proven 2026-04-25 03:53 UTC,
-  # see feedback_chat_team_round_edge1_too_slow). Workstation has gemma3:4b for
-  # the Cheap tier, plus gemma3:27b/phi4:14b/qwen3:14b for Default/Balanced/Deep.
-  # Piper TTS stays on edge1 below (different service, Pi handles TTS fine).
-  FlowerCore__AI__OllamaBaseUrl: "http://10.0.56.20:11434"
-  FlowerCore__AI__DefaultModelName: "phi4:14b"
-  ChatOptions__BehaviorRuleEngine__OllamaBaseUrl: "http://10.0.56.20:11434"
+  # Ollama target. BLUEJAY-WS remains faster from the workstation, but this lane
+  # proved Chat pods time out reaching 10.0.56.20:11434. Keep generation and
+  # behavior-rule checks on the cluster-routable edge1 endpoint until that route
+  # is fixed; choose models that edge1 actually hosts.
+  FlowerCore__AI__OllamaBaseUrl: "http://10.0.57.17:11434"
+  FlowerCore__AI__DefaultModelName: "qwen2.5-coder:7b"
+  ChatOptions__BehaviorRuleEngine__OllamaBaseUrl: "http://10.0.57.17:11434"
  ChatOptions__BehaviorRuleEngine__FallbackOllamaBaseUrl: "http://10.0.57.17:11434"
-  ChatOptions__BehaviorRuleEngine__ModelName: "gemma3:12b"
+  ChatOptions__BehaviorRuleEngine__ModelName: "gemma3:4b"
  FlowerCore__AI__Memory__UseSharedIndexingAdapter: "true"
  FlowerCore__AI__Memory__UseOllamaEmbeddings: "true"
  FlowerCore__AI__Memory__EmbeddingModel: "nomic-embed-text"