diff --git a/apps/fc-llm-bridge/fc-llm-bridge.yaml b/apps/fc-llm-bridge/fc-llm-bridge.yaml index aca6f1f..9182dc6 100644 --- a/apps/fc-llm-bridge/fc-llm-bridge.yaml +++ b/apps/fc-llm-bridge/fc-llm-bridge.yaml @@ -164,9 +164,16 @@ spec: name: fc-llm-bridge-api-keys key: spare-2 optional: true - # Shared.Chat — Ollama (edge1 Pi 5 + AI HAT+, matches bridge default) + # Shared.Chat — GX10 Ollama via the INFRA-VLAN NodePort (10.0.56.14:30976), + # NOT the PROD-VLAN MetalLB VIP (10.0.57.201:11434). The cross-VLAN path to + # the VIP MTU-black-holes LARGE requests: Agent Zero's full prompt (458-line + # system prompt + 108 MCP tool descriptions ~150KB) times out / resets mid- + # stream there ("Connection reset by peer" in OllamaClient.ChatStreamAsync), + # which made AZ loop on "you have sent the same message again". The NodePort is + # same-VLAN as the old cluster (no inter-VLAN hop) and carries 150KB fine. + # (Small chat/embed requests still work on the VIP; only big agentic prompts broke.) - name: FlowerCore__Chat__OllamaBaseUrl - value: "http://10.0.57.201:11434" + value: "http://10.0.56.14:30976" - name: FlowerCore__Chat__HttpTimeout value: "00:05:00" # Tier routing override (Wiring A, 2026-06-14): repoint Agent Zero's @@ -175,7 +182,7 @@ spec: # Anthropic key is currently 401); Cheap was gemma3:4b which CANNOT # call tools (400 does not support tools) — fatal for an agentic loop. # qwen2.5 instruct supports the tool-calling loop; GX10 has the memory. - # OllamaBaseUrl above already points at the GX10 VIP (10.0.57.201). + # OllamaBaseUrl above points at the GX10 NodePort (10.0.56.14:30976). - name: FlowerCore__Chat__ModelRouter__DefaultRoutes__Balanced__Provider value: "Ollama" - name: FlowerCore__Chat__ModelRouter__DefaultRoutes__Balanced__Model