From 0c8e6ee8ab99313f1bd1f24553d28f68e6810d30 Mon Sep 17 00:00:00 2001 From: Andrew Stoltz Date: Sun, 14 Jun 2026 02:38:17 -0500 Subject: [PATCH] agent-zero(models): tool-capable qwen2.5 on GX10 via fc-llm-bridge (Wiring A) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent Zero's agentic tool-loop ran on cloud Anthropic Sonnet (the bridge's Anthropic key is currently 401) + gemma3:4b util (gemma3 returns 400 "does not support tools" — fatal for the loop). Repoint the bridge ModelRouter tiers: Balanced -> Ollama qwen2.5:14b (AZ chat) and Cheap -> qwen2.5:7b (AZ util), both on the GX10 VIP 10.0.57.201 (already the bridge OllamaBaseUrl). Env-only, no rebuild; Wiring A keeps the budget ledger + cache. Also: AZ chat ctx -> 32768, browser -> qwen2.5:7b (text/tool-capable, vision off), AGENT_NAME -> "Blue Jay" (the NUC role is retired). qwen2.5:7b + :14b pulled + warm-pinned on the GX10. Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/agent-zero/agent-zero.yaml | 8 ++++---- apps/fc-llm-bridge/fc-llm-bridge.yaml | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/apps/agent-zero/agent-zero.yaml b/apps/agent-zero/agent-zero.yaml index d1dce8f..f62cf67 100644 --- a/apps/agent-zero/agent-zero.yaml +++ b/apps/agent-zero/agent-zero.yaml @@ -248,7 +248,7 @@ spec: # use the bridge's Ollama-compatible root via OLLAMA_HOST. mkdir -p /a0/usr/plugins/_model_config cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG' - {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"openai/fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}} + {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":32768,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":32768}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"openai/fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}} MODELCFG # Strip heredoc indentation sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json @@ -285,7 +285,7 @@ spec: env: # Agent identity - name: AGENT_NAME - value: "Blue Jay (NUC)" + value: "Blue Jay" # Chat model — routed through FlowerCore LLM Bridge (ADR-088) # so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep) # dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint. @@ -344,7 +344,7 @@ spec: - name: A0_SET_browser_model_provider value: "ollama" - name: A0_SET_browser_model_name - value: "gemma3:4b" + value: "qwen2.5:7b" - name: A0_SET_browser_model_api_base value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080" - name: A0_SET_browser_model_api_key @@ -353,7 +353,7 @@ spec: name: fc-llm-bridge-api-keys key: agent-zero-k8s - name: A0_SET_browser_model_vision - value: "true" + value: "false" - name: OLLAMA_HOST value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080" - name: FLOWERCORE_AGENTZERO_OLLAMA_URL diff --git a/apps/fc-llm-bridge/fc-llm-bridge.yaml b/apps/fc-llm-bridge/fc-llm-bridge.yaml index ca20f1c..aca6f1f 100644 --- a/apps/fc-llm-bridge/fc-llm-bridge.yaml +++ b/apps/fc-llm-bridge/fc-llm-bridge.yaml @@ -169,6 +169,21 @@ spec: value: "http://10.0.57.201:11434" - name: FlowerCore__Chat__HttpTimeout value: "00:05:00" + # Tier routing override (Wiring A, 2026-06-14): repoint Agent Zero's + # chat (Balanced) + util (Cheap) tiers to the GX10's tool-capable + # local qwen2.5. Balanced was Anthropic Sonnet (cloud/cost, and the + # Anthropic key is currently 401); Cheap was gemma3:4b which CANNOT + # call tools (400 does not support tools) — fatal for an agentic loop. + # qwen2.5 instruct supports the tool-calling loop; GX10 has the memory. + # OllamaBaseUrl above already points at the GX10 VIP (10.0.57.201). + - name: FlowerCore__Chat__ModelRouter__DefaultRoutes__Balanced__Provider + value: "Ollama" + - name: FlowerCore__Chat__ModelRouter__DefaultRoutes__Balanced__Model + value: "qwen2.5:14b" + - name: FlowerCore__Chat__ModelRouter__DefaultRoutes__Cheap__Provider + value: "Ollama" + - name: FlowerCore__Chat__ModelRouter__DefaultRoutes__Cheap__Model + value: "qwen2.5:7b" # Shared.Chat — Anthropic - name: FlowerCore__Chat__Anthropic__Enabled value: "true"