From 9dd170a9ac9d1986d64ab072ddf817dc015cbcf0 Mon Sep 17 00:00:00 2001 From: Andrew Stoltz Date: Sat, 13 Jun 2026 22:59:18 -0500 Subject: [PATCH] deploy(chat): route wave5 chat ollama to edge1 --- apps/fc-chat/fc-chat.yaml | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/apps/fc-chat/fc-chat.yaml b/apps/fc-chat/fc-chat.yaml index 4ba190a..96674e7 100644 --- a/apps/fc-chat/fc-chat.yaml +++ b/apps/fc-chat/fc-chat.yaml @@ -30,18 +30,15 @@ data: FlowerCore__Auth__Oidc__Audience: "chat" FlowerCore__Auth__Oidc__ClientId: "chat" FlowerCore__Database__ConnectionStrings__Sqlite: "Data Source=/data/chat.db" - # Ollama target. Switched 2026-04-25 from edge1 Pi5 (10.0.57.17) to BLUEJAY-WS - # workstation (10.0.56.20, RX 9070 XT 16GB, OLLAMA_HOST=0.0.0.0:11434, Vulkan - # backend per feedback_rdna4_vulkan_broken). The Pi5 was timing out every team- - # round speaker at the 300s per-turn cap (live-proven 2026-04-25 03:53 UTC, - # see feedback_chat_team_round_edge1_too_slow). Workstation has gemma3:4b for - # the Cheap tier, plus gemma3:27b/phi4:14b/qwen3:14b for Default/Balanced/Deep. - # Piper TTS stays on edge1 below (different service, Pi handles TTS fine). - FlowerCore__AI__OllamaBaseUrl: "http://10.0.56.20:11434" - FlowerCore__AI__DefaultModelName: "phi4:14b" - ChatOptions__BehaviorRuleEngine__OllamaBaseUrl: "http://10.0.56.20:11434" + # Ollama target. BLUEJAY-WS remains faster from the workstation, but this lane + # proved Chat pods time out reaching 10.0.56.20:11434. Keep generation and + # behavior-rule checks on the cluster-routable edge1 endpoint until that route + # is fixed; choose models that edge1 actually hosts. + FlowerCore__AI__OllamaBaseUrl: "http://10.0.57.17:11434" + FlowerCore__AI__DefaultModelName: "qwen2.5-coder:7b" + ChatOptions__BehaviorRuleEngine__OllamaBaseUrl: "http://10.0.57.17:11434" ChatOptions__BehaviorRuleEngine__FallbackOllamaBaseUrl: "http://10.0.57.17:11434" - ChatOptions__BehaviorRuleEngine__ModelName: "gemma3:12b" + ChatOptions__BehaviorRuleEngine__ModelName: "gemma3:4b" FlowerCore__AI__Memory__UseSharedIndexingAdapter: "true" FlowerCore__AI__Memory__UseOllamaEmbeddings: "true" FlowerCore__AI__Memory__EmbeddingModel: "nomic-embed-text"