fix(agent-zero): route util and embed through llm bridge
This commit is contained in:
@@ -92,14 +92,16 @@ subjects:
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
|
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Connects to a local nginx proxy that routes to edge1 Pi 5 + AI HAT+ Ollama only
|
# Chat / utility / embedding lanes route through fc-llm-bridge. Browser keeps
|
||||||
|
# a local nginx proxy to edge1 Pi 5 + AI HAT+ until the bridge grows a live
|
||||||
|
# Vision route and the in-pod tools stop calling Ollama directly.
|
||||||
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions
|
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions
|
||||||
|
|
||||||
---
|
---
|
||||||
# FC LLM Bridge API key for Agent Zero (ADR-088 chat_model routing).
|
# FC LLM Bridge API key for Agent Zero (ADR-088 chat / util / embed routing).
|
||||||
# Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s).
|
# Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s).
|
||||||
# Consumed by the chat_model only; util / embedding / browser stay on local
|
# Consumed by the OpenAI-compatible chat / util / embedding lanes. Browser
|
||||||
# Ollama via the 127.0.0.1 sidecar proxy.
|
# stays on the local Ollama sidecar until fc:vision is configured on the bridge.
|
||||||
apiVersion: onepassword.com/v1
|
apiVersion: onepassword.com/v1
|
||||||
kind: OnePasswordItem
|
kind: OnePasswordItem
|
||||||
metadata:
|
metadata:
|
||||||
@@ -295,13 +297,14 @@ spec:
|
|||||||
# The _model_config plugin reads config.json (NOT config.yaml).
|
# The _model_config plugin reads config.json (NOT config.yaml).
|
||||||
# chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
|
# chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
|
||||||
# spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
|
# spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
|
||||||
# api_key comes from A0_SET_chat_model_api_key env var (overrides
|
# api_key comes from OPENAI_API_KEY / A0_SET_chat_model_api_key.
|
||||||
# config.json). util + embedding go to local 127.0.0.1 nginx
|
# Utility + embedding now share the same bridge surface so Agent
|
||||||
# proxy which routes to edge1 Pi 5 + AI HAT+ ONLY (workstation
|
# Zero stops talking to Ollama directly for those model lanes.
|
||||||
# is private dev hardware, intentionally not in the cluster path).
|
# Browser stays on the local 127.0.0.1 proxy until the bridge has
|
||||||
|
# a live Vision route and the in-pod tools stop calling Ollama.
|
||||||
mkdir -p /a0/usr/plugins/_model_config
|
mkdir -p /a0/usr/plugins/_model_config
|
||||||
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
|
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
|
||||||
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}}
|
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}}
|
||||||
MODELCFG
|
MODELCFG
|
||||||
# Strip heredoc indentation
|
# Strip heredoc indentation
|
||||||
sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json
|
sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json
|
||||||
@@ -325,8 +328,9 @@ spec:
|
|||||||
# Chat model — routed through FlowerCore LLM Bridge (ADR-088)
|
# Chat model — routed through FlowerCore LLM Bridge (ADR-088)
|
||||||
# so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
|
# so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
|
||||||
# dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
|
# dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
|
||||||
# Util / embedding / browser stay on local Ollama via 127.0.0.1 proxy
|
# Utility + embedding now share the bridge/auth surface too.
|
||||||
# for zero-latency, zero-cost small-model traffic.
|
# Browser stays on local Ollama until the bridge has a live
|
||||||
|
# Vision route and the in-pod tools stop calling Ollama directly.
|
||||||
- name: A0_SET_chat_model_provider
|
- name: A0_SET_chat_model_provider
|
||||||
value: "openai"
|
value: "openai"
|
||||||
- name: A0_SET_chat_model_name
|
- name: A0_SET_chat_model_name
|
||||||
@@ -354,21 +358,22 @@ spec:
|
|||||||
value: '{"temperature": 0, "num_ctx": 8192}'
|
value: '{"temperature": 0, "num_ctx": 8192}'
|
||||||
# Utility model — fast small helper tier through the same proxy
|
# Utility model — fast small helper tier through the same proxy
|
||||||
- name: A0_SET_util_model_provider
|
- name: A0_SET_util_model_provider
|
||||||
value: "ollama"
|
value: "openai"
|
||||||
- name: A0_SET_util_model_name
|
- name: A0_SET_util_model_name
|
||||||
value: "qwen2.5:1.5b"
|
value: "fc:cheap"
|
||||||
- name: A0_SET_util_model_api_base
|
- name: A0_SET_util_model_api_base
|
||||||
value: "http://127.0.0.1:11434"
|
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
|
||||||
- name: A0_SET_util_model_kwargs
|
- name: A0_SET_util_model_kwargs
|
||||||
value: '{"num_ctx": 2048}'
|
value: '{"num_ctx": 2048}'
|
||||||
# Embedding model — nomic through the same proxy
|
# Embedding model — bridge alias to nomic-embed-text on edge1
|
||||||
- name: A0_SET_embed_model_provider
|
- name: A0_SET_embed_model_provider
|
||||||
value: "ollama"
|
value: "openai"
|
||||||
- name: A0_SET_embed_model_name
|
- name: A0_SET_embed_model_name
|
||||||
value: "nomic-embed-text"
|
value: "fc:embedding"
|
||||||
- name: A0_SET_embed_model_api_base
|
- name: A0_SET_embed_model_api_base
|
||||||
value: "http://127.0.0.1:11434"
|
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
|
||||||
# Browser model — small Gemma candidate through the same proxy
|
# Browser model — small Gemma candidate stays on the local proxy
|
||||||
|
# until fc:vision is configured on the bridge.
|
||||||
- name: A0_SET_browser_model_provider
|
- name: A0_SET_browser_model_provider
|
||||||
value: "ollama"
|
value: "ollama"
|
||||||
- name: A0_SET_browser_model_name
|
- name: A0_SET_browser_model_name
|
||||||
|
|||||||
Reference in New Issue
Block a user