deploy(chat): route wave5 chat ollama to edge1
This commit is contained in:
@@ -30,18 +30,15 @@ data:
|
||||
FlowerCore__Auth__Oidc__Audience: "chat"
|
||||
FlowerCore__Auth__Oidc__ClientId: "chat"
|
||||
FlowerCore__Database__ConnectionStrings__Sqlite: "Data Source=/data/chat.db"
|
||||
# Ollama target. Switched 2026-04-25 from edge1 Pi5 (10.0.57.17) to BLUEJAY-WS
|
||||
# workstation (10.0.56.20, RX 9070 XT 16GB, OLLAMA_HOST=0.0.0.0:11434, Vulkan
|
||||
# backend per feedback_rdna4_vulkan_broken). The Pi5 was timing out every team-
|
||||
# round speaker at the 300s per-turn cap (live-proven 2026-04-25 03:53 UTC,
|
||||
# see feedback_chat_team_round_edge1_too_slow). Workstation has gemma3:4b for
|
||||
# the Cheap tier, plus gemma3:27b/phi4:14b/qwen3:14b for Default/Balanced/Deep.
|
||||
# Piper TTS stays on edge1 below (different service, Pi handles TTS fine).
|
||||
FlowerCore__AI__OllamaBaseUrl: "http://10.0.56.20:11434"
|
||||
FlowerCore__AI__DefaultModelName: "phi4:14b"
|
||||
ChatOptions__BehaviorRuleEngine__OllamaBaseUrl: "http://10.0.56.20:11434"
|
||||
# Ollama target. BLUEJAY-WS remains faster from the workstation, but this lane
|
||||
# proved Chat pods time out reaching 10.0.56.20:11434. Keep generation and
|
||||
# behavior-rule checks on the cluster-routable edge1 endpoint until that route
|
||||
# is fixed; choose models that edge1 actually hosts.
|
||||
FlowerCore__AI__OllamaBaseUrl: "http://10.0.57.17:11434"
|
||||
FlowerCore__AI__DefaultModelName: "qwen2.5-coder:7b"
|
||||
ChatOptions__BehaviorRuleEngine__OllamaBaseUrl: "http://10.0.57.17:11434"
|
||||
ChatOptions__BehaviorRuleEngine__FallbackOllamaBaseUrl: "http://10.0.57.17:11434"
|
||||
ChatOptions__BehaviorRuleEngine__ModelName: "gemma3:12b"
|
||||
ChatOptions__BehaviorRuleEngine__ModelName: "gemma3:4b"
|
||||
FlowerCore__AI__Memory__UseSharedIndexingAdapter: "true"
|
||||
FlowerCore__AI__Memory__UseOllamaEmbeddings: "true"
|
||||
FlowerCore__AI__Memory__EmbeddingModel: "nomic-embed-text"
|
||||
|
||||
Reference in New Issue
Block a user