From 0d5a1fd530af7e47ffba4e8865ae852529754b2d Mon Sep 17 00:00:00 2001 From: Andrew Stoltz Date: Wed, 29 Apr 2026 19:14:01 -0500 Subject: [PATCH] fix(agent-zero): route util and embed through llm bridge --- apps/agent-zero/agent-zero.yaml | 43 ++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/apps/agent-zero/agent-zero.yaml b/apps/agent-zero/agent-zero.yaml index 04ca1f9..03ce0cc 100644 --- a/apps/agent-zero/agent-zero.yaml +++ b/apps/agent-zero/agent-zero.yaml @@ -92,14 +92,16 @@ subjects: # ============================================================================= # Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile) # ============================================================================= -# Connects to a local nginx proxy that routes to edge1 Pi 5 + AI HAT+ Ollama only +# Chat / utility / embedding lanes route through fc-llm-bridge. Browser keeps +# a local nginx proxy to edge1 Pi 5 + AI HAT+ until the bridge grows a live +# Vision route and the in-pod tools stop calling Ollama directly. # Blue Jay profile with 21 tools, 3 prompts, 4 extensions --- -# FC LLM Bridge API key for Agent Zero (ADR-088 chat_model routing). +# FC LLM Bridge API key for Agent Zero (ADR-088 chat / util / embed routing). # Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s). -# Consumed by the chat_model only; util / embedding / browser stay on local -# Ollama via the 127.0.0.1 sidecar proxy. +# Consumed by the OpenAI-compatible chat / util / embedding lanes. Browser +# stays on the local Ollama sidecar until fc:vision is configured on the bridge. apiVersion: onepassword.com/v1 kind: OnePasswordItem metadata: @@ -295,13 +297,14 @@ spec: # The _model_config plugin reads config.json (NOT config.yaml). # chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat, # spend-tracked, tier-aliased (fc:balanced → Claude Sonnet). - # api_key comes from A0_SET_chat_model_api_key env var (overrides - # config.json). util + embedding go to local 127.0.0.1 nginx - # proxy which routes to edge1 Pi 5 + AI HAT+ ONLY (workstation - # is private dev hardware, intentionally not in the cluster path). + # api_key comes from OPENAI_API_KEY / A0_SET_chat_model_api_key. + # Utility + embedding now share the same bridge surface so Agent + # Zero stops talking to Ollama directly for those model lanes. + # Browser stays on the local 127.0.0.1 proxy until the bridge has + # a live Vision route and the in-pod tools stop calling Ollama. mkdir -p /a0/usr/plugins/_model_config cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG' - {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}} + {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}} MODELCFG # Strip heredoc indentation sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json @@ -325,8 +328,9 @@ spec: # Chat model — routed through FlowerCore LLM Bridge (ADR-088) # so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep) # dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint. - # Util / embedding / browser stay on local Ollama via 127.0.0.1 proxy - # for zero-latency, zero-cost small-model traffic. + # Utility + embedding now share the bridge/auth surface too. + # Browser stays on local Ollama until the bridge has a live + # Vision route and the in-pod tools stop calling Ollama directly. - name: A0_SET_chat_model_provider value: "openai" - name: A0_SET_chat_model_name @@ -354,21 +358,22 @@ spec: value: '{"temperature": 0, "num_ctx": 8192}' # Utility model — fast small helper tier through the same proxy - name: A0_SET_util_model_provider - value: "ollama" + value: "openai" - name: A0_SET_util_model_name - value: "qwen2.5:1.5b" + value: "fc:cheap" - name: A0_SET_util_model_api_base - value: "http://127.0.0.1:11434" + value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1" - name: A0_SET_util_model_kwargs value: '{"num_ctx": 2048}' - # Embedding model — nomic through the same proxy + # Embedding model — bridge alias to nomic-embed-text on edge1 - name: A0_SET_embed_model_provider - value: "ollama" + value: "openai" - name: A0_SET_embed_model_name - value: "nomic-embed-text" + value: "fc:embedding" - name: A0_SET_embed_model_api_base - value: "http://127.0.0.1:11434" - # Browser model — small Gemma candidate through the same proxy + value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1" + # Browser model — small Gemma candidate stays on the local proxy + # until fc:vision is configured on the bridge. - name: A0_SET_browser_model_provider value: "ollama" - name: A0_SET_browser_model_name