refactor(agent-zero): drop ollama-proxy sidecar (Phase 3)

This commit is contained in:
Andrew Stoltz
2026-04-29 20:27:28 -05:00
parent 1b633f57b2
commit 4a309cbf0b
3 changed files with 57 additions and 98 deletions

View File

@@ -92,14 +92,14 @@ subjects:
# =============================================================================
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
# =============================================================================
# Connects to a local nginx proxy that routes to edge1 Pi 5 + AI HAT+ Ollama only
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions
# Connects directly to fc-llm-bridge for chat + util + embeddings + browser.
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions.
---
# FC LLM Bridge API key for Agent Zero (ADR-088 chat_model routing).
# FC LLM Bridge API key for Agent Zero (ADR-088 chat/util/embed/browser routing).
# Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s).
# Consumed by the chat_model only; util / embedding / browser stay on local
# Ollama via the 127.0.0.1 sidecar proxy.
# Consumed by chat, util, embeddings, browser, and corpus-search requests
# that traverse fc-llm-bridge.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
@@ -150,19 +150,18 @@ spec:
spec:
serviceAccountName: agent-zero
initContainers:
# Wait for edge1 Ollama to be reachable before starting Agent Zero.
# (Workstation Ollama is intentionally NOT in the cluster path.)
- name: wait-for-ollama
# Wait for fc-llm-bridge to be reachable before starting Agent Zero.
- name: wait-for-llm-bridge
image: busybox:1.37
command: ["sh", "-c"]
args:
- |
echo "Waiting for edge1 Ollama (10.0.57.17:11434)..."
until wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do
echo "edge1 Ollama not ready yet, retrying in 5s..."
echo "Waiting for fc-llm-bridge..."
until wget -qO- --timeout=2 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz >/dev/null 2>&1; do
echo "fc-llm-bridge not ready yet, retrying in 5s..."
sleep 5
done
echo "edge1 Ollama is reachable."
echo "fc-llm-bridge is reachable."
# Assemble the Blue Jay profile directory structure from ConfigMaps.
# ConfigMaps can't create nested dirs, so we copy into the workspace PVC.
- name: setup-bluejay
@@ -209,73 +208,6 @@ spec:
- name: bluejay-theme
mountPath: /tmp/bluejay-theme
containers:
- name: ollama-proxy
image: nginx:1.27-alpine
command: ["/bin/sh", "-c"]
args:
- |
cat > /etc/nginx/nginx.conf <<'NGINX'
worker_processes 1;
events { worker_connections 1024; }
http {
upstream ollama_upstream {
# edge1 Pi 5 + AI HAT+ is the SOLE upstream.
# Workstation Ollama (BLUEJAY-WS) is private dev hardware and
# MUST NOT be added back here without explicit operator decision —
# adding it would expose the workstation to cluster traffic.
server 10.0.57.17:11434 max_fails=2 fail_timeout=10s;
keepalive 16;
}
server {
listen 11434;
# Local healthcheck — proves nginx itself is alive.
# Must NOT depend on upstream so liveness doesn't restart
# the container when edge1 is slow/offline.
location = /healthz {
access_log off;
return 200 'ok\n';
default_type text/plain;
}
location / {
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_set_header Host $host;
proxy_connect_timeout 5s;
proxy_read_timeout 600s;
proxy_send_timeout 600s;
proxy_next_upstream error timeout invalid_header http_502 http_503 http_504;
proxy_pass http://ollama_upstream;
}
}
}
NGINX
exec nginx -g 'daemon off;'
ports:
- containerPort: 11434
# Readiness probe DOES check upstream so K8s only routes traffic
# when edge1 Ollama is reachable. timeoutSeconds=5 absorbs the Pi's
# slower TCP handshake under load (was timeoutSeconds=1 default →
# 172 historic restarts when the workstation primary path went down,
# before the cluster was repointed to edge1-only on 2026-04-27).
readinessProbe:
httpGet:
path: /api/tags
port: 11434
initialDelaySeconds: 5
periodSeconds: 15
timeoutSeconds: 5
failureThreshold: 3
# Liveness probe hits ONLY local healthz — restarts the container
# only when nginx itself is dead. Decoupling liveness from upstream
# eliminates restart-loops caused by transient upstream outages.
livenessProbe:
httpGet:
path: /healthz
port: 11434
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 3
failureThreshold: 3
- name: agent-zero
image: agent0ai/agent-zero:latest
command: ["/bin/bash", "-c"]
@@ -296,12 +228,11 @@ spec:
# chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
# spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
# api_key comes from A0_SET_chat_model_api_key env var (overrides
# config.json). util + embedding go to local 127.0.0.1 nginx
# proxy which routes to edge1 Pi 5 + AI HAT+ ONLY (workstation
# is private dev hardware, intentionally not in the cluster path).
# config.json). Utility / embedding / browser all point at the
# same bridge root and use Ollama-compatible endpoints there.
mkdir -p /a0/usr/plugins/_model_config
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}}
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080","kwargs":{}}}
MODELCFG
# Strip heredoc indentation
sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json
@@ -325,8 +256,8 @@ spec:
# Chat model — routed through FlowerCore LLM Bridge (ADR-088)
# so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
# dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
# Util / embedding / browser stay on local Ollama via 127.0.0.1 proxy
# for zero-latency, zero-cost small-model traffic.
# Utility / embedding / browser now traverse fc-llm-bridge too so
# Agent Zero no longer needs a local Ollama proxy sidecar.
- name: A0_SET_chat_model_provider
value: "openai"
- name: A0_SET_chat_model_name
@@ -348,6 +279,11 @@ spec:
secretKeyRef:
name: fc-llm-bridge-api-keys
key: agent-zero-k8s
- name: FC_LLM_BRIDGE_API_KEY
valueFrom:
secretKeyRef:
name: fc-llm-bridge-api-keys
key: agent-zero-k8s
- name: A0_SET_chat_model_ctx_length
value: "8192"
- name: A0_SET_chat_model_kwargs
@@ -358,7 +294,12 @@ spec:
- name: A0_SET_util_model_name
value: "qwen2.5:1.5b"
- name: A0_SET_util_model_api_base
value: "http://127.0.0.1:11434"
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
- name: A0_SET_util_model_api_key
valueFrom:
secretKeyRef:
name: fc-llm-bridge-api-keys
key: agent-zero-k8s
- name: A0_SET_util_model_kwargs
value: '{"num_ctx": 2048}'
# Embedding model — nomic through the same proxy
@@ -367,16 +308,30 @@ spec:
- name: A0_SET_embed_model_name
value: "nomic-embed-text"
- name: A0_SET_embed_model_api_base
value: "http://127.0.0.1:11434"
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
- name: A0_SET_embed_model_api_key
valueFrom:
secretKeyRef:
name: fc-llm-bridge-api-keys
key: agent-zero-k8s
# Browser model — small Gemma candidate through the same proxy
- name: A0_SET_browser_model_provider
value: "ollama"
- name: A0_SET_browser_model_name
value: "gemma3:4b"
- name: A0_SET_browser_model_api_base
value: "http://127.0.0.1:11434"
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
- name: A0_SET_browser_model_api_key
valueFrom:
secretKeyRef:
name: fc-llm-bridge-api-keys
key: agent-zero-k8s
- name: A0_SET_browser_model_vision
value: "true"
- name: OLLAMA_HOST
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
- name: FLOWERCORE_AGENTZERO_OLLAMA_URL
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
# Agent profile — Blue Jay personality, tools, and system prompt
- name: A0_SET_agent_profile
value: "bluejay"
@@ -452,7 +407,7 @@ spec:
command:
- /bin/bash
- -c
- "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://127.0.0.1:11434/api/tags > /dev/null"
- "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz > /dev/null"
periodSeconds: 30
failureThreshold: 2
resources:
@@ -590,13 +545,6 @@ spec:
protocol: UDP
- port: 53
protocol: TCP
# Ollama on edge1 Pi 5 + AI HAT+ (sole upstream — workstation
# is private dev hardware and intentionally not allowlisted)
- to:
- ipBlock:
cidr: 10.0.57.17/32
ports:
- port: 11434
# Print.Web on edge2
- to:
- ipBlock: